/*******************************************************************************
*
* This file is part of the General Hidden Markov Model Library,
* GHMM version __VERSION__, see http://ghmm.org
*
* Filename: ghmm/ghmm/xmlwriter.h
* Authors: Janne Grunau
*
* Copyright (C) 1998-2006 Alexander Schliep
* Copyright (C) 1998-2001 ZAIK/ZPR, Universitaet zu Koeln
* Copyright (C) 2002-2006 Max-Planck-Institut fuer Molekulare Genetik,
* Berlin
*
* Contact: schliep@ghmm.org
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the Free
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*
* This file is version $Revision: 2306 $
* from $Date: 2013-06-03 12:35:43 -0400 (Mon, 03 Jun 2013) $
* last change by $Author: ejb177 $
*
*******************************************************************************/
#ifdef HAVE_CONFIG_H
# include "../config.h"
#endif
#include <math.h>
#include <stdio.h>
#include <string.h>
#include <libxml/encoding.h>
#include <libxml/xmlwriter.h>
#include "ghmm.h"
#include "mes.h"
#include "mprintf.h"
#include "ghmm_internals.h"
#include "xmlwriter.h"
#define kAlphabet 0
#define kLabelAlphabet 1
/* Bitmask to test the modeltype against to choose the type of the model pointer
we use in the union */
#define PTR_TYPE_MASK (GHMM_kDiscreteHMM + GHMM_kTransitionClasses + GHMM_kPairHMM + GHMM_kContinuousHMM)
#if defined(LIBXML_WRITER_ENABLED) && defined(LIBXML_OUTPUT_ENABLED)
#define MY_ENCODING "ISO-8859-1"
#define DTD_VERSION "1.0"
#define WRITE_DOUBLE_ATTRIBUTE(XMLW, NAME, VALUE) \
if (0 > xmlTextWriterWriteFormatAttribute(XMLW, BAD_CAST (NAME), \
"%.8f", (VALUE))) { \
GHMM_LOG_PRINTF(LERROR, LOC, "failed to write attribute %s (%.8f)", \
(NAME), (VALUE)); \
goto STOP;} else
/* ========================================================================= */
static char *replaceXMLEntity(char *str) {
#define CUR_PROC "replaceXMLEntity"
int i, written = 0;
int len = strlen(str);
char* retval = malloc(len*4+1);
for (i=0; i<len; ++i) {
switch (str[i]) {
case '<':
strncpy(retval, "<", 4);
written += 4;
break;
case '>':
strncpy(retval, ">", 4);
written += 4;
break;
default:
retval[written++] = str[i];
}
}
retval[written++] = '\0';
return realloc(retval, written);
#undef CUR_PROC
}
/* ========================================================================= */
static char * strModeltype(int modelType) {
#define CUR_PROC "strModeltype"
int end;
char * mt;
ARRAY_CALLOC(mt, 200);
if (modelType > 0) {
if (modelType & GHMM_kLeftRight)
strcat(mt, "left-right ");
if (modelType & GHMM_kSilentStates)
strcat(mt, "silent ");
if (modelType & GHMM_kTiedEmissions)
strcat(mt, "tied ");
if (modelType & GHMM_kHigherOrderEmissions)
strcat(mt, "higher-order ");
if (modelType & GHMM_kBackgroundDistributions)
strcat(mt, "background ");
if (modelType & GHMM_kLabeledStates)
strcat(mt, "labeled ");
if (modelType & GHMM_kTransitionClasses)
strcat(mt, "transition-classes ");
if (modelType & GHMM_kDiscreteHMM)
strcat(mt, "discrete ");
if (modelType & GHMM_kContinuousHMM)
strcat(mt, "continuous ");
if (modelType & GHMM_kPairHMM)
strcat(mt, "pair ");
if (modelType & GHMM_kMultivariate)
strcat(mt, "multivariate ");
} else {
GHMM_LOG(LERROR, "can't write models with unspecified modeltype");
goto STOP;
}
/* overwrite the last space */
end = strlen(mt);
mt[end-1] = '\0';
return mt;
STOP:
m_free(mt);
return NULL;
#undef CUR_PROC
}
/* ========================================================================= */
static char * doubleArrayToCSV(double * array, int size) {
#define CUR_PROC "doubleArrayToCSV"
int i, pos=0;
char *csv=NULL;
int singlelength = (2 + /* comma and space */
8 + /* 8 signifcant digits */
1 + /* sign */
5 + /* 'E' and signed mantissa */
3); /* safety */
int maxlength = size * singlelength;
ARRAY_MALLOC(csv, maxlength);
for (i=0; i < size-1 && pos + singlelength < maxlength; i++) {
pos += sprintf(csv+pos, "%.8g, ", array[i]);
}
if (i < size-1 || pos + singlelength > maxlength) {
GHMM_LOG(LERROR, "writing CSV failed");
goto STOP;
} else {
pos += sprintf(csv+pos, "%.8g", array[i]);
}
/*printf("%d bytes of %d written\n", pos, maxlength);*/
return csv;
STOP:
free(csv);
return NULL;
#undef CUR_PROC
}
/* ========================================================================= */
static int writeAlphabet(xmlTextWriterPtr writer, ghmm_alphabet * alfa, int type) {
#define CUR_PROC "writeAlphabet"
int i;
if (0 > xmlTextWriterStartElement(writer, BAD_CAST (type == kAlphabet ? "alphabet" : "classAlphabet"))) {
GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement");
goto STOP;;
}
if (type == kAlphabet)
if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "id", "%d", alfa->id))
GHMM_LOG_PRINTF(LERROR, LOC, "failed to write id-attribute for alphabet"
"with id %d", alfa->id);
for (i=0; i<alfa->size; i++) {
if (0 > xmlTextWriterStartElement(writer, BAD_CAST "symbol")) {
GHMM_LOG_PRINTF(LERROR, LOC, "failed to start symbol-tag no %d", i);
goto STOP;
}
if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "code", "%d", i)) {
GHMM_LOG_PRINTF(LERROR, LOC, "failed to write code-attribute for symbol %s"
"with code %d", alfa->symbols[i], i);
goto STOP;
}
if (0 > xmlTextWriterWriteRaw(writer, BAD_CAST replaceXMLEntity(alfa->symbols[i]))) {
GHMM_LOG_PRINTF(LERROR, LOC, "failed to write symbol %s with code %d",
alfa->symbols[i], i);
goto STOP;
}
if (0 > xmlTextWriterEndElement(writer)) {
GHMM_LOG_PRINTF(LERROR, LOC, "failed to end symbol-tag no %d", i);
goto STOP;
}
}
if (0 > xmlTextWriterEndElement(writer)) {
GHMM_LOG(LERROR, "Error at ending alphabet");
goto STOP;
}
return 0;
STOP:
return -1;
#undef CUR_PROC
}
/* ========================================================================= */
static int writeBackground(xmlTextWriterPtr writer, ghmm_dbackground* bg) {
#define CUR_PROC "writeBackground"
int i;
char * tmp=NULL;
for (i=0; i<bg->n; i++) {
if (0 > xmlTextWriterStartElement(writer, BAD_CAST "background")) {
GHMM_LOG_PRINTF(LERROR, LOC, "Error at starting backgroung %d", i);
return -1;
}
if (!(bg->name) || !(bg->name[i])) {
if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "key", "bg_%d", i))
GHMM_LOG(LERROR, "Error at writing background key");
}
else {
if (0 > xmlTextWriterWriteAttribute(writer, BAD_CAST "key", BAD_CAST (bg->name[i])))
GHMM_LOG(LERROR, "Error at writing background key");
}
if (0 < bg->order[i])
if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "order", "%d", bg->order[i]))
GHMM_LOG(LERROR, "can't write background order attribute");
tmp = doubleArrayToCSV(bg->b[i], pow(bg->m, bg->order[i]+1));
if (tmp) {
if (0 > xmlTextWriterWriteRaw(writer, BAD_CAST tmp)) {
GHMM_LOG(LERROR, "Error at xmlTextWriterWriteRaw while writing"
"background distribution CSV");
m_free(tmp);
return -1;
}
m_free(tmp);
} else {
GHMM_LOG(LERROR, "converting array to CSV failed for background distribution");
return -1;
}
if (0 > xmlTextWriterEndElement(writer)) {
GHMM_LOG(LERROR, "Error at xmlTextWriterEndElement while ending"
"background distribution");
return -1;
}
}
return 0;
#undef CUR_PROC
}
/* ========================================================================= */
static int writeDiscreteStateContents(xmlTextWriterPtr writer, ghmm_xmlfile* f,
int moNo, int sNo) {
#define CUR_PROC "writeDiscreteStateContents"
int bgId, cLabel, rc, order, tied;
char * tmp=NULL;
if (f->model.d[moNo]->model_type & GHMM_kSilentStates && f->model.d[moNo]->silent[sNo])
{
if (0 > xmlTextWriterStartElement(writer, BAD_CAST "silent")) {
GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement (silent)");
goto STOP;
}
/* end silent */
if (0 > xmlTextWriterEndElement(writer)) {
GHMM_LOG(LERROR, "Error at xmlTextWriterEndElement (silent)");
goto STOP;
}
}
else
{
/* writing discrete distribution */
if (0 > xmlTextWriterStartElement(writer, BAD_CAST "discrete")) {
GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement (discrete)");
goto STOP;
}
if (0 > xmlTextWriterWriteAttribute(writer, BAD_CAST "id", BAD_CAST "0")) {
GHMM_LOG(LERROR, "failed to write alphabet id");
goto STOP;
}
if (f->model.d[moNo]->s[sNo].fix)
if (0 > xmlTextWriterWriteAttribute(writer, BAD_CAST "fixed", BAD_CAST "1")) {
GHMM_LOG(LERROR, "failed to write fixed attribute");
goto STOP;
}
if ((f->model.d[moNo]->model_type & GHMM_kHigherOrderEmissions)
&& f->model.d[moNo]->order[sNo]) {
order = f->model.d[moNo]->order[sNo];
if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "order", "%d", order)) {
GHMM_LOG(LERROR, "failed to write order attribute for discrete distribution");
goto STOP;
}
} else
order = 0;
tmp = doubleArrayToCSV(f->model.d[moNo]->s[sNo].b, pow(f->model.d[moNo]->M, order+1));
if (tmp) {
if (0 > xmlTextWriterWriteRaw(writer, BAD_CAST tmp)) {
GHMM_LOG(LERROR, "Error at xmlTextWriterWriteRaw while writing"
"discrete distribution CSV");
m_free(tmp);
goto STOP;
}
m_free(tmp);
} else {
GHMM_LOG(LERROR, "converting array to CSV failed for discrete distribution");
goto STOP;
}
/* end discrete distribution */
if (0 > xmlTextWriterEndElement(writer)) {
GHMM_LOG(LERROR, "Error at xmlTextWriterEndElement (discrete)");
goto STOP;
}
}
/* writing backgroung key */
if (f->model.d[moNo]->model_type & GHMM_kBackgroundDistributions) {
bgId = f->model.d[moNo]->background_id[sNo];
if (bgId != GHMM_kNoBackgroundDistribution) {
if (f->model.d[moNo]->bp->name[bgId]) {
rc = xmlTextWriterWriteFormatElement(writer, BAD_CAST "backgroundKey", f->model.d[moNo]->bp->name[bgId]);
}
else{
rc = xmlTextWriterWriteFormatElement(writer, BAD_CAST "backgroundKey", "bg_%d", bgId);
}
/* BAD_CAST f->model.d[moNo]->bp->name[bgId]); */
if (rc<0) {
GHMM_LOG(LERROR, "Error at xmlTextWriterWriteElement (backgroundKey)");
goto STOP;
}
/* } else { */
/* GHMM_LOG(LERROR, "background name is NULL pointer, invalid model"); */
/* goto STOP; */
/* } */
}
}
/* writing class label */
if (f->model.d[moNo]->model_type & GHMM_kLabeledStates) {
cLabel = f->model.d[moNo]->label[sNo];
rc = xmlTextWriterWriteFormatElement(writer, BAD_CAST "class", "%d", cLabel);
if (rc<0) {
GHMM_LOG(LERROR, "failed to write class label");
goto STOP;
}
}
/* duration (not implemented yet, maybe never */
#if 0
if (f->model.d[moNo]->model_type & GHMM_kDurations) {
if (f->model.d[moNo]->duration[sNo] > 0) {
rc = xmlTextWriterWriteElement(writer, BAD_CAST "duration",
BAD_CAST f->model.d[moNo]->duration[sNo]);
if (rc<0) {
GHMM_LOG(LERROR, "Error at xmlTextWriterWriteElement (duration)");
goto STOP;
}
}
}
#endif
/* writing positions */
if ((f->model.d[moNo]->s[sNo].xPosition > 0)
&& (f->model.d[moNo]->s[sNo].xPosition > 0)) {
if (xmlTextWriterStartElement(writer, BAD_CAST "position") < 0) {
GHMM_LOG(LERROR, "failed to start position element (position)"); goto STOP;}
if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "x", "%d",
f->model.d[moNo]->s[sNo].xPosition)) {
GHMM_LOG(LERROR, "failed to write x position"); goto STOP;}
if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "y", "%d",
f->model.d[moNo]->s[sNo].yPosition)) {
GHMM_LOG(LERROR, "failed to write y position"); goto STOP;}
if (xmlTextWriterEndElement(writer) < 0) {
GHMM_LOG(LERROR, "Error at xmlTextWriterEndElement (position)"); goto STOP;}
}
/* writing tied states */
if (f->model.d[moNo]->model_type & GHMM_kTiedEmissions) {
tied = f->model.d[moNo]->tied_to[sNo];
if (tied != GHMM_kUntied) {
rc = xmlTextWriterWriteFormatElement(writer, BAD_CAST "tiedTo", "%d", tied);
if (rc<0) {
GHMM_LOG(LERROR, "failed to write tiedTo element");
goto STOP;
}
}
}
return 0;
STOP:
return -1;
#undef CUR_PROC
}
/* ========================================================================= */
static int writeDiscreteSwitchingStateContents(xmlTextWriterPtr writer,
ghmm_xmlfile* f, int moNo,
int sNo) {
#define CUR_PROC "writeDiscreteSwitchingStateContents"
int bgId, cLabel, rc, order, tied;
char * tmp=NULL;
/* writing discrete distribution */
if (0 > xmlTextWriterStartElement(writer, BAD_CAST "discrete")) {
GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement (discrete)");
goto STOP;
}
if (0 > xmlTextWriterWriteAttribute(writer, BAD_CAST "id", BAD_CAST "0")) {
GHMM_LOG(LERROR, "failed to write alphabet id");
goto STOP;
}
if (f->model.ds[moNo]->s[sNo].fix)
if (0 > xmlTextWriterWriteAttribute(writer, BAD_CAST "fixed", BAD_CAST "1")) {
GHMM_LOG(LERROR, "failed to write fixed attriute");
goto STOP;
}
if ((f->model.ds[moNo]->model_type & GHMM_kHigherOrderEmissions)
&& f->model.ds[moNo]->order[sNo]) {
order = f->model.ds[moNo]->order[sNo];
if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "order", "%d", order)) {
GHMM_LOG(LERROR, "failed to write order attribute for discrete distribution");
goto STOP;
}
} else
order = 0;
tmp = doubleArrayToCSV(f->model.ds[moNo]->s[sNo].b, pow(f->model.ds[moNo]->M, order+1));
if (tmp) {
if (0 > xmlTextWriterWriteRaw(writer, BAD_CAST tmp)) {
GHMM_LOG(LERROR, "Error at xmlTextWriterWriteRaw while writing"
"discrete distribution CSV");
m_free(tmp);
goto STOP;
}
m_free(tmp);
} else {
GHMM_LOG(LERROR, "converting array to CSV failed for discrete distribution");
goto STOP;
}
/* end discrete distribution */
if (0 > xmlTextWriterEndElement(writer)) {
GHMM_LOG(LERROR, "Error at xmlTextWriterEndElement (discrete)");
goto STOP;
}
/* writing backgroung key */
if (f->model.ds[moNo]->model_type & GHMM_kBackgroundDistributions) {
bgId = f->model.ds[moNo]->background_id[sNo];
if (bgId != GHMM_kNoBackgroundDistribution) {
if (f->model.ds[moNo]->bp->name[bgId]) {
rc = xmlTextWriterWriteElement(writer, BAD_CAST "backgroundKey",
BAD_CAST f->model.ds[moNo]->bp->name[bgId]);
if (rc<0) {
GHMM_LOG(LERROR, "Error at xmlTextWriterWriteElement (backgroundKey)");
goto STOP;
}
}
else{
rc = xmlTextWriterWriteFormatElement(writer, BAD_CAST "backgroundKey", "bg_%d", bgId);
}
/*else {
GHMM_LOG(LERROR, "background name is NULL pointer, invalid model");
goto STOP;
}*/
}
}
/* writing class label */
if (f->model.ds[moNo]->model_type & GHMM_kLabeledStates) {
cLabel = f->model.ds[moNo]->label[sNo];
rc = xmlTextWriterWriteFormatElement(writer, BAD_CAST "class", "%d", cLabel);
if (rc<0) {
GHMM_LOG(LERROR, "failed to write class label");
goto STOP;
}
}
/* duration (not implemented yet, maybe never */
#if 0
if (f->model.ds[moNo]->model_type & GHMM_kDurations) {
if (f->model.ds[moNo]->duration[sNo] > 0) {
rc = xmlTextWriterWriteElement(writer, BAD_CAST "duration",
BAD_CAST f->model.ds[moNo]->duration[sNo]);
if (rc<0) {
GHMM_LOG(LERROR, "Error at xmlTextWriterWriteElement (duration)");
goto STOP;
}
}
}
#endif
/* writing positions */
if ((f->model.ds[moNo]->s[sNo].xPosition > 0)
&& (f->model.ds[moNo]->s[sNo].xPosition > 0)) {
if (xmlTextWriterStartElement(writer, BAD_CAST "position") < 0) {
GHMM_LOG(LERROR, "failed to start position element (position)"); goto STOP;}
if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "x", "%d",
f->model.ds[moNo]->s[sNo].xPosition)) {
GHMM_LOG(LERROR, "failed to write x position"); goto STOP;}
if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "y", "%d",
f->model.ds[moNo]->s[sNo].yPosition)) {
GHMM_LOG(LERROR, "failed to write y position"); goto STOP;}
if (xmlTextWriterEndElement(writer) < 0) {
GHMM_LOG(LERROR, "Error at xmlTextWriterEndElement (position)"); goto STOP;}
}
/* writing tied states */
if (f->model.ds[moNo]->model_type & GHMM_kTiedEmissions) {
tied = f->model.ds[moNo]->tied_to[sNo];
if (tied != GHMM_kUntied) {
rc = xmlTextWriterWriteFormatElement(writer, BAD_CAST "tiedTo", "%d", tied);
if (rc<0) {
GHMM_LOG(LERROR, "failed to write tiedTo element");
goto STOP;
}
}
}
return 0;
STOP:
return -1;
#undef CUR_PROC
}
/*===========================================================================*/
/* write mean vector and covariance matrix as elements for multinormals */
static int writeMultiNormal(xmlTextWriterPtr writer, ghmm_c_emission *emission)
{
#define CUR_PROC "writeMultiNormal"
char *tmp=NULL;
/* writing mean vector*/
if (0 > xmlTextWriterStartElement(writer, BAD_CAST "mean")) {
GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement (mean)");
goto STOP;
}
tmp = doubleArrayToCSV(emission->mean.vec, emission->dimension);
if (tmp) {
if (0 > xmlTextWriterWriteRaw(writer, BAD_CAST tmp)) {
GHMM_LOG(LERROR, "Error at xmlTextWriterWriteRaw while writing mean vector CSV");
goto STOP;
}
m_free(tmp);
tmp = NULL;
} else {
GHMM_LOG(LERROR, "converting array to CSV failed for mean vector");
goto STOP;
}
if (0 > xmlTextWriterEndElement(writer)) {
GHMM_LOG(LERROR, "Error at xmlTextWriterEndElement mean");
goto STOP;
}
/* writing covariance matrix*/
if (0 > xmlTextWriterStartElement(writer, BAD_CAST "variance")) {
GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement (variance)");
goto STOP;
}
tmp = doubleArrayToCSV(emission->variance.mat, emission->dimension * emission->dimension);
if (tmp) {
if (0 > xmlTextWriterWriteRaw(writer, BAD_CAST tmp)) {
GHMM_LOG(LERROR, "Error at xmlTextWriterWriteRaw while writing variance matrix CSV");
goto STOP;
}
m_free(tmp);
tmp = NULL;
} else {
GHMM_LOG(LERROR, "converting array to CSV failed for covariance matrix");
goto STOP;
}
if (0 > xmlTextWriterEndElement(writer)) {
GHMM_LOG(LERROR, "Error at xmlTextWriterEndElement variance");
goto STOP;
}
return 0;
STOP:
m_free(tmp);
return -1;
#undef CUR_PROC
}
/* ========================================================================= */
static int writeContinuousStateContents(xmlTextWriterPtr writer, ghmm_xmlfile* f,
int moNo, int sNo) {
#define CUR_PROC "writeContinuousStateContents"
int i;
ghmm_cstate *state = f->model.c[moNo]->s + sNo;
int allFixed = state->fix;
ghmm_c_emission *emission;
/* writing continuous distribution */
if (0 > xmlTextWriterStartElement(writer, BAD_CAST "mixture")) {
GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement (mixture)");
goto STOP;
}
if (f->model.c[moNo]->s[sNo].fix)
allFixed = 1;
for(i=0; i < f->model.c[moNo]->s[sNo].M; i++){
emission = f->model.c[moNo]->s[sNo].e+i;
switch (emission->type) {
case normal:
if (0 > xmlTextWriterStartElement(writer, BAD_CAST "normal")) {
GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement (normal)");
goto STOP;
}
WRITE_DOUBLE_ATTRIBUTE(writer, "mean", emission->mean.val);
WRITE_DOUBLE_ATTRIBUTE(writer, "variance", emission->variance.val);
break;
case multinormal:
if (0 > xmlTextWriterStartElement(writer, BAD_CAST "multinormal")) {
GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement (multinormal)");
goto STOP;
}
if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "dimension",
"%d", emission->dimension)) {
GHMM_LOG(LERROR, "failed to write dimension attribute");
goto STOP;
}
break;
case normal_left:
if (0 > xmlTextWriterStartElement(writer, BAD_CAST "normalLeftTail")) {
GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement (normalLeftTail)");
goto STOP;
}
WRITE_DOUBLE_ATTRIBUTE(writer, "mean", emission->mean.val);
WRITE_DOUBLE_ATTRIBUTE(writer, "variance", emission->variance.val);
WRITE_DOUBLE_ATTRIBUTE(writer, "max", emission->min);
break;
case normal_right:
if (0 > xmlTextWriterStartElement(writer, BAD_CAST "normalRightTail")) {
GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement (normalRightTail)");
goto STOP;
}
WRITE_DOUBLE_ATTRIBUTE(writer, "mean", emission->mean.val);
WRITE_DOUBLE_ATTRIBUTE(writer, "variance", emission->variance.val);
WRITE_DOUBLE_ATTRIBUTE(writer, "min", emission->max);
break;
case uniform:
if (0 > xmlTextWriterStartElement(writer, BAD_CAST "uniform")) {
GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement (uniform)");
goto STOP;
}
WRITE_DOUBLE_ATTRIBUTE(writer, "min", emission->min);
WRITE_DOUBLE_ATTRIBUTE(writer, "max", emission->max);
break;
default:
GHMM_LOG_PRINTF(LERROR, LOC, "invalid density %d at position %d", emission->type, i);
goto STOP;
}
/*optional values */
if (allFixed || emission->fixed) {
if (0 > xmlTextWriterWriteAttribute(writer, BAD_CAST "fixed", BAD_CAST "1")) {
GHMM_LOG(LERROR, "failed to set fixed attribute");
goto STOP;
}
}
if (state->M > 1) {
WRITE_DOUBLE_ATTRIBUTE(writer, "prior", state->c[i]);
}
/* write mean vector and covariance matrix as childs for multinormal */
if (emission->type == multinormal) {
if (0 > writeMultiNormal(writer, emission)) {
GHMM_LOG(LERROR, "failed to write mean and covariance childs");
goto STOP;
}
}
if (0 > xmlTextWriterEndElement(writer)) {
GHMM_LOG(LERROR, "Error at xmlTextWriterEndElement (all densities)");
goto STOP;
}
}
/* end mixture tag */
if (0 > xmlTextWriterEndElement(writer)) {
GHMM_LOG(LERROR, "Error at xmlTextWriterEndElement (mixture)");
goto STOP;
}
/* writing positions */
if ((state->xPosition > 0) && (state->yPosition > 0)) {
if (xmlTextWriterStartElement(writer, BAD_CAST "position") < 0) {
GHMM_LOG(LERROR, "failed to start position element (position)");
goto STOP;
}
if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "x", "%d",
state->xPosition)) {
GHMM_LOG(LERROR, "failed to write x position");
goto STOP;
}
if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "y", "%d",
state->yPosition)) {
GHMM_LOG(LERROR, "failed to write y position");
goto STOP;
}
if (xmlTextWriterEndElement(writer) < 0) {
GHMM_LOG(LERROR, "Error at xmlTextWriterEndElement (position)");
goto STOP;
}
}
return 0;
STOP:
return -1;
#undef CUR_PROC
}
/* ========================================================================= */
static int writeState(xmlTextWriterPtr writer, ghmm_xmlfile* f, int moNo, int sNo) {
#define CUR_PROC "writeState"
int rc;
double w_pi;
char *w_desc=NULL;
/* start state */
if (0 > xmlTextWriterStartElement(writer, BAD_CAST "state")) {
GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement (state)");
goto STOP;
}
/* write id attribute */
if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "id", "%d", sNo))
GHMM_LOG(LERROR, "failed to write statte id attribute");
/* read state attribute from different model types */
switch (f->modelType & PTR_TYPE_MASK) {
case GHMM_kDiscreteHMM:
w_pi = f->model.d[moNo]->s[sNo].pi;
w_desc = f->model.d[moNo]->s[sNo].desc;
break;
case (GHMM_kDiscreteHMM+GHMM_kTransitionClasses):
w_pi = f->model.ds[moNo]->s[sNo].pi;
w_desc = f->model.ds[moNo]->s[sNo].desc;
break;
case (GHMM_kDiscreteHMM+GHMM_kPairHMM):
case (GHMM_kDiscreteHMM+GHMM_kPairHMM+GHMM_kTransitionClasses):
/*
w_pi = f->model.d[moNo]->s[sNo].pi;
w_desc = f->model.d[moNo]->s[sNo];
*/
break;
case GHMM_kContinuousHMM:
case (GHMM_kContinuousHMM+GHMM_kTransitionClasses):
case (GHMM_kContinuousHMM+GHMM_kMultivariate):
case (GHMM_kContinuousHMM+GHMM_kMultivariate+GHMM_kTransitionClasses):
w_pi = f->model.c[moNo]->s[sNo].pi;
w_desc = f->model.c[moNo]->s[sNo].desc;
break;
default:
GHMM_LOG(LCRITIC, "invalid modelType");}
/* write initial probability as attribute */
WRITE_DOUBLE_ATTRIBUTE(writer, "initial", w_pi);
/* write state description */
if (w_desc) {
if (xmlTextWriterWriteAttribute(writer, BAD_CAST "desc", BAD_CAST replaceXMLEntity(w_desc)))
GHMM_LOG(LERROR, "writing state description failed");
}
/* write state contents for different model types */
switch (f->modelType & PTR_TYPE_MASK) {
case GHMM_kDiscreteHMM:
rc = writeDiscreteStateContents(writer, f, moNo, sNo);
break;
case (GHMM_kDiscreteHMM+GHMM_kTransitionClasses):
rc = writeDiscreteSwitchingStateContents(writer, f, moNo, sNo);
break;
case (GHMM_kDiscreteHMM+GHMM_kPairHMM):
case (GHMM_kDiscreteHMM+GHMM_kPairHMM+GHMM_kTransitionClasses):
/*
rc = writeDiscretePairStateContents(writer, f, moNo, sNo);
*/
break;
case GHMM_kContinuousHMM:
case (GHMM_kContinuousHMM+GHMM_kTransitionClasses):
case (GHMM_kContinuousHMM+GHMM_kMultivariate):
case (GHMM_kContinuousHMM+GHMM_kMultivariate+GHMM_kTransitionClasses):
rc = writeContinuousStateContents(writer, f, moNo, sNo);
break;
default:
GHMM_LOG(LCRITIC, "invalid modelType");
goto STOP;
}
if (rc) {
GHMM_LOG_PRINTF(LERROR, LOC, "writing state contents failed. model_type = %s",
strModeltype(f->modelType & PTR_TYPE_MASK));
goto STOP;
}
/* end state*/
if (0 > xmlTextWriterEndElement(writer)) {
GHMM_LOG(LERROR, "Error at xmlTextWriterEndElement (state)");
goto STOP;
}
return 0;
STOP:
return -1;
#undef CUR_PROC
}
/* ========================================================================= */
static int writeTransition(xmlTextWriterPtr writer, ghmm_xmlfile* f, int moNo,
int sNo) {
#define CUR_PROC "writeTransition"
int cos, i, j;
int out_states, * out_id;
double * * out_a;
double * w_out_a;
char * tmp;
/* write state contents for different model types */
switch (f->modelType & PTR_TYPE_MASK) {
case GHMM_kDiscreteHMM:
out_states = f->model.d[moNo]->s[sNo].out_states;
out_id = f->model.d[moNo]->s[sNo].out_id;
out_a = &(f->model.d[moNo]->s[sNo].out_a);
cos = 1;
break;
case (GHMM_kDiscreteHMM+GHMM_kTransitionClasses):
out_states = f->model.ds[moNo]->s[sNo].out_states;
out_id = f->model.ds[moNo]->s[sNo].out_id;
out_a = f->model.ds[moNo]->s[sNo].out_a;
cos = f->model.ds[moNo]->cos;
break;
case (GHMM_kDiscreteHMM+GHMM_kPairHMM):
case (GHMM_kDiscreteHMM+GHMM_kPairHMM+GHMM_kTransitionClasses):
/*
out_states = f->model.dp[moNo]->s[sNo].out_states;
out_id = f->model.dp[moNo]->s[sNo].out_id;
out_a = f->model.dp[moNo]->s[sNo].out_a;
cos = f->model.dp[moNo]->cos;
*/
break;
case GHMM_kContinuousHMM:
case (GHMM_kContinuousHMM+GHMM_kTransitionClasses):
case (GHMM_kContinuousHMM+GHMM_kMultivariate):
case (GHMM_kContinuousHMM+GHMM_kMultivariate+GHMM_kTransitionClasses):
out_states = f->model.c[moNo]->s[sNo].out_states;
out_id = f->model.c[moNo]->s[sNo].out_id;
out_a = f->model.c[moNo]->s[sNo].out_a;
cos = f->model.c[moNo]->cos;
break;
default:
GHMM_LOG(LCRITIC, "invalid modelType");}
ARRAY_MALLOC(w_out_a, cos);
for (i=0; i<out_states; i++) {
if (0 > xmlTextWriterStartElement(writer, BAD_CAST "transition")) {
GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement (transition)");
goto STOP;
}
/* write source id (current state attribute */
if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "source", "%d", sNo))
GHMM_LOG(LERROR, "failed to write transition source attribute");
/* write target id as attribute */
if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "target", "%d", out_id[i]))
GHMM_LOG(LERROR, "failed to write transition target attribute");
for (j=0; j<cos; j++)
w_out_a[j] = out_a[j][i];
tmp = doubleArrayToCSV(w_out_a, cos);
if (tmp) {
if (0 > xmlTextWriterWriteElement(writer, BAD_CAST "probability", BAD_CAST tmp)) {
GHMM_LOG(LERROR, "Error at xmlTextWriterWriteElement (transition probabilities)");
m_free(tmp);
goto STOP;
}
m_free(tmp);
} else {
GHMM_LOG(LERROR, "converting transition probabilities array to CSV failed");
goto STOP;
}
/* end transition */
if (0 > xmlTextWriterEndElement(writer)) {
GHMM_LOG(LERROR, "Error at xmlTextWriterEndElement (transition)");
goto STOP;
}
}
return 0;
STOP:
return -1;
#undef CUR_PROC
}
/* ========================================================================= */
static int writeHMM(xmlTextWriterPtr writer, ghmm_xmlfile* f, int number) {
#define CUR_PROC "writeHMM"
int rc=0, i, N;
int w_cos;
double w_prior;
char *w_name;
char * w_type;
/* start HMM */
if (0 > xmlTextWriterStartElement(writer, BAD_CAST "HMM")) {
GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement (HMM)");
goto STOP;;
}
/* write HMM attributes applicable */
switch (f->modelType & PTR_TYPE_MASK) {
case GHMM_kDiscreteHMM:
w_name = f->model.d[number]->name;
w_type = strModeltype(f->model.d[number]->model_type);
w_prior = f->model.d[number]->prior;
N = f->model.d[number]->N;
w_cos = 1;
break;
case (GHMM_kDiscreteHMM+GHMM_kTransitionClasses):
w_name = f->model.ds[number]->name;
w_type = strModeltype(f->model.ds[number]->model_type);
w_prior = f->model.ds[number]->prior;
N = f->model.ds[number]->N;
w_cos = 0;
break;
case (GHMM_kDiscreteHMM+GHMM_kPairHMM):
case (GHMM_kDiscreteHMM+GHMM_kPairHMM+GHMM_kTransitionClasses):
/*
w_name = f->model.dp[number]->name;
w_type = strModeltype(f->model.dp[number]->model_type);
w_prior = f->model.dp[number]->prior;
N = f->model.dp[number]->N;
w_cos = 0;
*/
break;
case GHMM_kContinuousHMM:
case (GHMM_kContinuousHMM+GHMM_kMultivariate):
case (GHMM_kContinuousHMM+GHMM_kTransitionClasses):
case (GHMM_kContinuousHMM+GHMM_kMultivariate+GHMM_kTransitionClasses):
w_name = f->model.c[number]->name;
if (f->model.c[number]->model_type)
w_type = strModeltype(f->model.c[number]->model_type);
else
w_type = strModeltype(f->modelType);
w_prior = f->model.c[number]->prior;
N = f->model.c[number]->N;
w_cos = f->model.c[number]->cos;
break;
default:
GHMM_LOG(LERROR, "invalid modelType");
goto STOP;}
if (w_name) {
if (xmlTextWriterWriteAttribute(writer, BAD_CAST "name", w_name))
GHMM_LOG(LERROR, "writing HMM name failed");
}
if (xmlTextWriterWriteAttribute(writer, BAD_CAST "type", BAD_CAST w_type))
GHMM_LOG(LERROR, "writing HMM type failed");
if (w_prior >= 0.0) {
WRITE_DOUBLE_ATTRIBUTE(writer, "prior", w_prior);
}
if (w_cos > 1)
if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "transitionClasses",
"%d", w_cos))
GHMM_LOG(LERROR, "failed to write no of transitionClasses");
/* write alphabet if applicable */
switch (f->modelType & (GHMM_kDiscreteHMM + GHMM_kTransitionClasses
+ GHMM_kPairHMM)) {
case GHMM_kDiscreteHMM:
rc = writeAlphabet(writer, f->model.d[number]->alphabet, kAlphabet);
break;
case (GHMM_kDiscreteHMM+GHMM_kTransitionClasses):
/*rc = writeAlphabet(writer, f->model.ds[number]->alphabet, kAlphabet);*/
break;
case (GHMM_kDiscreteHMM+GHMM_kPairHMM):
case (GHMM_kDiscreteHMM+GHMM_kPairHMM+GHMM_kTransitionClasses):
/*rc = writeAlphabet(writer, f->model.dp[number]->alphabets[0], kAlphabet);
if (rc) {
GHMM_LOG(LERROR, "writing first alphabet of discrete pair HMM failed");
goto STOP;
}
rc = writeAlphabet(writer, f->model.dp[number]->alphabets[1], kAlphabet);*/
break;
}
if (rc) {
GHMM_LOG_PRINTF(LERROR, LOC, "writing alphabet for HMM %d (type %s) failed",
number, strModeltype(f->modelType));
goto STOP;
}
/* write label alphabet if applicable */
if ((f->modelType & PTR_TYPE_MASK) == GHMM_kDiscreteHMM
&& f->modelType & GHMM_kLabeledStates) {
if (writeAlphabet(writer, f->model.d[number]->label_alphabet, kLabelAlphabet))
GHMM_LOG(LERROR, "writing of label alphabet failed");
}
/* write background distributions if applicable */
if ((f->modelType & PTR_TYPE_MASK) == GHMM_kDiscreteHMM
&& f->modelType & GHMM_kBackgroundDistributions) {
if (writeBackground(writer, f->model.d[number]->bp))
GHMM_LOG(LERROR, "writing of background distributions failed");
}
/* write all states */
for (i=0; i<N; i++)
if (writeState(writer, f, number, i)) {
GHMM_LOG_PRINTF(LERROR, LOC, "writing of state %d in HMM %d failed", i, number);
goto STOP;
}
/* write all outgoing transitions */
for (i=0; i<N; i++)
if (writeTransition(writer, f, number, i)) {
GHMM_LOG_PRINTF(LERROR, LOC, "writing transitions of state %d in HMM %d failed",
i, number);
goto STOP;
}
/*end HMM*/
if (0 > xmlTextWriterEndElement(writer)) {
GHMM_LOG(LERROR, "Error at xmlTextWriterEndElement (HMM)");
goto STOP;
}
return 0;
STOP:
return -1;
#undef CUR_PROC
}
/* ========================================================================= */
void ghmm_xmlfile_write(ghmm_xmlfile* f, const char *file) {
#define CUR_PROC "ghmm_xmlfile_write"
int rc, i;
xmlTextWriterPtr writer;
xmlDocPtr doc;
/*
* this initialize the library and check potential ABI mismatches
* between the version it was compiled for and the actual shared
* library used.
*/
LIBXML_TEST_VERSION
xmlSubstituteEntitiesDefault(1);
/* Create a new XmlWriter for DOM, with no compression. */
writer = xmlNewTextWriterDoc(&doc, 0);
if (writer == NULL) {
GHMM_LOG(LERROR, "can not create the xml writer");
goto STOP;
}
/* indenting writer to circumvent no space between SYSTEM and PUBLIC identifier */
xmlTextWriterSetIndent(writer, 1);
/* Start the document with the xml default for the version,
* encoding ISO 8859-1 and the default for the standalone
* declaration. */
rc = xmlTextWriterStartDocument(writer, NULL, MY_ENCODING, NULL);
if (rc < 0) {
GHMM_LOG(LERROR, "Error at xmlTextWriterStartDocument\n");
goto STOP;
}
/* Set the Document type declaration at the beginning of the document */
rc = xmlTextWriterWriteDTD(writer, BAD_CAST "mixture",
BAD_CAST "-//ghmm.org//DOCUMENT ghmm V"DTD_VERSION"//EN",
BAD_CAST "http://ghmm.sourceforge.net/xml/"DTD_VERSION"/ghmm.dtd",
NULL);
if (rc < 0) {
GHMM_LOG(LERROR, "failed to write the DocType"); goto STOP;}
/* start real contents */
if (0 > xmlTextWriterStartElement(writer, BAD_CAST "mixture")) {
GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement (mixture)");
goto STOP;;
}
if (xmlTextWriterWriteAttribute(writer, BAD_CAST "version", BAD_CAST DTD_VERSION) < 0) {
GHMM_LOG(LERROR, "failed to write version 1.0"); goto STOP;}
if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "noComponents", "%d", f->noModels)) {
GHMM_LOG(LERROR, "failed to write the number of components"); goto STOP;}
/* write all models */
for (i=0; i<f->noModels; i++)
writeHMM(writer, f, i);
/* end mixture */
if (0 > xmlTextWriterEndDocument(writer)) {
GHMM_LOG(LERROR, "Error at xmlTextWriterEndDocument (mixture)");
goto STOP;
}
xmlFreeTextWriter(writer);
xmlSaveFormatFileEnc(file, doc, MY_ENCODING, 1);
STOP:
xmlFreeDoc(doc);
/*
* Cleanup function for the XML library.
*/
xmlCleanupParser();
/*
* this is to debug memory for regression tests
*/
xmlMemoryDump();
#undef CUR_PROC
}
#endif