/*******************************************************************************
*
* This file is part of the General Hidden Markov Model Library,
* GHMM version __VERSION__, see http://ghmm.org
*
* Filename: ghmm/ghmm/smodel.h
* Authors: Bernhard Knab, Benjamin Georgi
*
* Copyright (C) 1998-2004 Alexander Schliep
* Copyright (C) 1998-2001 ZAIK/ZPR, Universitaet zu Koeln
* Copyright (C) 2002-2004 Max-Planck-Institut fuer Molekulare Genetik,
* Berlin
*
* Contact: schliep@ghmm.org
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the Free
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*
* This file is version $Revision: 2277 $
* from $Date: 2009-04-28 08:44:31 -0400 (Tue, 28 Apr 2009) $
* last change by $Author: grunau $.
*
*******************************************************************************/
#ifndef GHMM_SMODEL_H
#define GHMM_SMODEL_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stdio.h>
#include "ghmm.h"
#include "scanner.h"
/**@name SHMM-Modell */
/*@{ (Doc++-Group: smodel) */
/** Continuous HMM. Structures and function.
ghmm_cmodel includes continuous ghmm_dmodel with one transition matrix
(COS is set to 1) and an extension for
models with several matrices
(COS is set to a value greater than 1). In the latter case
a suitable (depending on the spezific application) function
has to be set in the get_class function pointer of
ghmm_cmodel_class_change_context */
/**
density types for ghmm_cmodel.
*/
typedef enum {
normal, /**< gaussian */
normal_right, /**< right tail */
normal_approx, /**< approximated gaussian */
normal_left, /**< left tail */
uniform,
binormal, /**< two dimensional gaussian */
multinormal, /**< multivariate gaussian */
density_number /**< number of density types, has to stay last */
} ghmm_density_t;
/**
ghmm_c_emission bundles all emission parameters
*/
typedef struct ghmm_c_emission {
/** specify the type of the density */
ghmm_density_t type;
/** dimension > 1 for multivariate normals */
int dimension;
/** mean for output functions (pointer to mean vector
for multivariate) */
union {
double val;
double *vec;
} mean;
/** variance or pointer to a covariance matrix
for multivariate normals */
union {
double val;
double *mat;
} variance;
/** pointer to inverse of covariance matrix if multivariate normal
else NULL */
double *sigmainv;
/** determinant of covariance matrix for multivariate normal */
double det;
/** Cholesky decomposition of covariance matrix A,
if A = GG' sigmacd only holds G */
double *sigmacd;
/** minimum of uniform distribution
or left boundary for rigth-tail gaussians */
double min;
/** maximum of uniform distribution
or right boundary for left-tail gaussians */
double max;
/** if fixed != 0 the parameters of the density are fixed */
int fixed;
} ghmm_c_emission;
/**
State struct for continuous emission HMMs.
*/
typedef struct ghmm_cstate {
/** Number of output densities per state */
int M;
/** initial prob. */
double pi;
/** IDs of successor states */
int *out_id;
/** IDs of predecessor states */
int *in_id;
/** transition probs to successor states. It is a
matrix in case of mult. transition matrices (COS > 1)*/
double **out_a;
/** transition probs from predecessor states. It is a
matrix in case of mult. transition matrices (COS > 1) */
double **in_a;
/** number of successor states */
int out_states;
/** number of predecessor states */
int in_states;
/** weight vector for output function components */
double *c;
/** flag for fixation of parameter. If fix = 1 do not change parameters of
output functions, if fix = 0 do normal training. Default is 0. */
int fix;
/** vector of ghmm_c_emission
(type and parameters of output function components) */
ghmm_c_emission *e;
/** contains a description of the state (null terminated utf-8)*/
char *desc;
/** x coordinate position for graph representation plotting **/
int xPosition;
/** y coordinate position for graph representation plotting **/
int yPosition;
} ghmm_cstate;
struct ghmm_cmodel;
/** Class change context for continuous emission HMMs.
*/
typedef struct ghmm_cmodel_class_change_context {
/* Names of class change module/function (for python callback) */
char *python_module;
char *python_function;
/* index of current sequence */
int k;
/** pointer to class function */
int (*get_class) (struct ghmm_cmodel *, const double *, int, int);
/* space for any data necessary for class switch, USER is RESPONSIBLE */
void *user_data;
} ghmm_cmodel_class_change_context;
/** Model struct for continuous emission HMMs.
*/
typedef struct ghmm_cmodel {
/** Number of states */
int N;
/** Maximun number of components in the states */
int M;
/** Number of dimensions of the emission components.
All emissions must have the same number of dimensions */
int dim;
/** ghmm_cmodel includes continuous model with one transition matrix
(cos is set to 1) and an extension for models with several matrices
(cos is set to a positive integer value > 1).*/
int cos;
/** prior for a priori prob. of the model. -1 means no prior specified (all
models have equal prob. a priori. */
double prior;
/* contains a arbitrary name for the model (null terminated utf-8) */
char *name;
/** Contains bit flags for varios model extensions such as
kSilentStates (see ghmm.h for a complete list)
*/
int model_type;
/** All states of the model. Transition probs are part of the states. */
ghmm_cstate *s;
/* pointer to a ghmm_cmodel_class_change_context struct necessary for multiple transition
classes */
ghmm_cmodel_class_change_context *class_change;
} ghmm_cmodel;
/* don't include this earlier: in sequence.h ghmm_cmodel has to be known */
#include "sequence.h"
int ghmm_cmodel_class_change_alloc (ghmm_cmodel * smo);
/** Allocates the multivariate arrays of a ghmm_c_emmission struct
@return 0: success, -1: error
@param emissions address of ghmm_c_emission
@param dim dimension for multivariate emissions
*/
int ghmm_c_emission_alloc(ghmm_c_emission *emissions, int dim);
/** Allocates a cstate
@return 0: success, -1: error
@param s pointer to the allocated states
@param M maximal number of densities of the state
@param in_states number of incoming transitions
@param out_states number of outgoing transitions
@param cos number of transition classes
*/
int ghmm_cstate_alloc (ghmm_cstate * s, int M,
int in_states, int out_states, int cos);
/** Alloc model
@return allocated cmodel, -1: error
@param N number of states in the model
@param modeltype type of the model
*/
ghmm_cmodel * ghmm_cmodel_calloc( int N, int modeltype);
/** Free memory of multivariate ghmm_c_emission arrays
@param emission pointer of ghmm_c_emission to free */
void ghmm_c_emission_free(ghmm_c_emission *emission);
/** Free memory ghmm_cmodel
@return 0: success, -1: error
@param smo pointer pointer of ghmm_cmodel */
int ghmm_cmodel_free (ghmm_cmodel ** smo);
/**
Copies one smodel. Memory alloc is here.
@return pointer to ghmm_cmodel copy
@param smo ghmm_cmodel to be copied */
ghmm_cmodel *ghmm_cmodel_copy (const ghmm_cmodel * smo);
/**
Checks if ghmm_cmodel is well definded. E.g. sum pi = 1, only positive values
etc.
@return 0 if ghmm_cmodel is ok, -1 for error
@param smo ghmm_cmodel for checking
*/
int ghmm_cmodel_check (const ghmm_cmodel * smo);
/**
For a vector of smodels: check that the number of states and the number
of output function components are the same in each smodel.
@return 0 if smodels are ok, -1 for error
@param smo: vector of smodels for checking
@param smodel_number: number of smodels
*/
int ghmm_cmodel_check_compatibility (ghmm_cmodel ** smo, int smodel_number);
/**
Generates random symbol.
Generates random number(s) for a specified state and specified
output component of the given smodel.
@return 0 on success
@param smo: ghmm_cmodel
@param state: state
@param m: index of output component
@param x: pointer to double to store result
*/
int ghmm_cmodel_get_random_var(ghmm_cmodel *smo, int state, int m, double *x);
/**
Produces sequences to a given model. All memory that is needed for the
sequences is allocated inside the function. It is possible to define
the length of the sequences globally (global_len > 0) or it can be set
inside the function, when a final state in the model is reached (a state
with no output). If the model has no final state, the sequences will
have length MAX_SEQ_LEN.
@return pointer to an array of sequences
@param smo: model
@param seed: initial parameter for the random value generator
(an integer). If seed == 0, then the random value
generator is not initialized.
@param global_len: length of sequences (=0: automatically via final states)
@param seq_number: number of sequences
@param Tmax: maximal sequence length, set to MAX_SEQ_LEN if -1
*/
ghmm_cseq *ghmm_cmodel_generate_sequences (ghmm_cmodel * smo, int seed,
int global_len, long seq_number,
int Tmax);
/**
Computes sum over all sequence of
seq_w * log( P ( O|lambda ) ). If a sequence can't be generated by smo
error cost of seq_w * PRENALTY_LOGP are imposed.
@return n: number of evaluated sequences, -1: error
@param smo ghmm_cmodel
@param sqd sequence struct
@param log_p evaluated log likelihood
*/
int ghmm_cmodel_likelihood (ghmm_cmodel * smo, ghmm_cseq * sqd, double *log_p);
/**
Computes log likelihood for all sequence of
seq_w * log( P ( O|lambda ) ). If a sequence can't be generated by smo
error cost of seq_w * PRENALTY_LOGP are imposed.
@return n: number of evaluated sequences, -1: error
@param smo ghmm_cmodel
@param sqd sequence struct
@param log_ps array of evaluated likelihoods
*/
int ghmm_cmodel_individual_likelihoods (ghmm_cmodel * smo, ghmm_cseq * sqd,
double *log_ps);
/** Reads an XML file with specifications for one or more smodels.
All parameters in matrix or vector form.
@return vector of read smodels
@param filename input xml file
@param smo_number number of smodels to read*/
ghmm_cmodel **ghmm_cmodel_xml_read (const char *filename, int *smo_number);
/** Writes an XML file with specifications for one or more smodels.
@return 0:sucess, -1:error
@param file output xml filename
@param smo ghmm_cmodel(s)
@param smo_number number of smodels to write*/
int ghmm_cmodel_xml_write(ghmm_cmodel** smo, const char *file, int smo_number);
/**
Prints one ghmm_cmodel in matrix form.
@param file output file
@param smo ghmm_cmodel
*/
void ghmm_cmodel_print (FILE * file, ghmm_cmodel * smo);
/**
Prints one ghmm_cmodel with only one transition Matrix A (=Ak_0).
@param file output file
@param smo ghmm_cmodel
*/
void ghmm_cmodel_print_oneA (FILE * file, ghmm_cmodel * smo);
/**
Prints transition matrix of specified class.
@param file output file
@param smo ghmm_cmodel
@param k transition class
@param tab format: leading tab
@param separator format: seperator
@param ending format: end of data in line
*/
void ghmm_cmodel_Ak_print (FILE * file, ghmm_cmodel * smo, int k, char *tab,
char *separator, char *ending);
/**
Prints weight matrix of output functions of an smodel.
@param file output file
@param smo ghmm_cmodel
@param tab format: leading tab
@param separator format: seperator
@param ending format: end of data in line
*/
void ghmm_cmodel_C_print (FILE * file, ghmm_cmodel * smo, char *tab, char *separator,
char *ending);
/**
Prints mean matrix of output functions of an smodel.
@param file output file
@param smo ghmm_cmodel
@param tab format: leading tab
@param separator format: seperator
@param ending format: end of data in line
*/
void ghmm_cmodel_Mue_print (FILE * file, ghmm_cmodel * smo, char *tab,
char *separator, char *ending);
/**
Prints variance matrix of output functions of an smodel.
@param file output file
@param smo ghmm_cmodel
@param tab format: leading tab
@param separator format: seperator
@param ending format: end of data in line
*/
void ghmm_cmodel_U_print (FILE * file, ghmm_cmodel * smo, char *tab, char *separator,
char *ending);
/**
Prints initial prob vector of an smodel.
@param file output file
@param smo ghmm_cmodel
@param tab format: leading tab
@param separator format: seperator
@param ending format: end of data in line
*/
void ghmm_cmodel_Pi_print (FILE * file, ghmm_cmodel * smo, char *tab, char *separator,
char *ending);
/**
Prints vector of fix_states.
@param file output file
@param smo ghmm_cmodel
@param tab format: leading tab
@param separator format: seperator
@param ending format: end of data in line
*/
void ghmm_cmodel_fix_print (FILE * file, ghmm_cmodel * smo, char *tab,
char *separator, char *ending);
/** Computes the density of one symbol (omega) in a given state and a
given output component
@return calculated density
@param state ghmm_cstate
@param m output component
@param omega given symbol
*/
double ghmm_cmodel_calc_cmbm(const ghmm_cstate *state, int m, double *omega);
/** Computes the density of one symbol (omega) in a given state (sums over
all output components
@return calculated density
@param state state
@param omega given symbol
*/
double ghmm_cmodel_calc_b(ghmm_cstate *state, const double *omega);
/** Computes probabilistic distance of two models
@return the distance
@param cm0 ghmm_cmodel used for generating random output
@param cm ghmm_cmodel to compare with
@param maxT maximum output length (for HMMs with absorbing states multiple
sequences with a toal length of at least maxT will be
generated)
@param symmetric flag, whether to symmetrize distance (not implemented yet)
@param verbose flag, whether to monitor distance in 40 steps.
Prints to stdout (yuk!)
*/
double ghmm_cmodel_prob_distance (ghmm_cmodel * cm0, ghmm_cmodel * cm, int maxT,
int symmetric, int verbose);
/**
Computes value of distribution function for a given symbol omega, a given
ghmm_cstate and a given output component.
@return value of distribution function
@param state ghmm_cstate
@param m component
@param omega symbol
*/
double ghmm_cmodel_calc_cmBm(const ghmm_cstate *state, int m, const double *omega);
/**
Computes value of distribution function for a given symbol omega and
a given state. Sums over all components.
@return value of distribution function
@param state ghmm_cstate
@param omega symbol
*/
double ghmm_cmodel_calc_B(ghmm_cstate *state, const double *omega);
/** Computes the number of free parameters in an array of
smodels. E.g. if the number of parameter from pi is N - 1.
Counts only those parameters, that can be changed during
training. If pi[i] = 0 it is not counted, since it can't be changed.
@return number of free parameters
@param smo ghmm_cmodel
@param smo_number number of smodels
*/
int ghmm_cmodel_count_free_parameter (ghmm_cmodel ** smo, int smo_number);
/*============================================================================*/
/* keep the following functions for first distribution???
--> BK ?
*/
/** Generates interval(a,b) with B(a) < 0.01, B(b) > 0.99
@param smo continous HMM
@param state given state
@param a return-value: left side
@param b return-value: right side
*/
void ghmm_cmodel_get_interval_B (ghmm_cmodel * smo, int state, double *a, double *b);
/** Normalizes initial and transisition probablilities and mixture weights
@return 0 on success / -1 on error
@param smo model to normalize
*/
int ghmm_cmodel_normalize(ghmm_cmodel *smo);
/**
Get transition probabality from state 'i' to state 'j' to value 'prob'.
NOTE: No internal checks
@return 1 if there is a transition, 0 otherwise
@param mo model
@param i state index (source)
@param j state index (target)
@param c transition class
*/
double ghmm_cmodel_get_transition(ghmm_cmodel* mo, int i, int j, int c);
/**
Checks if a non zero transition exists between state 'i' to state 'j'.
NOTE: No internal checks
@return 0 if there is no non zero transition from state i to state j, 1 else
@param mo model
@param i state index (source)
@param j state index (target)
@param c transition class
*/
int ghmm_cmodel_check_transition(ghmm_cmodel* mo, int i, int j, int c);
/**
Set transition from state 'i' to state 'j' to value 'prob'.
NOTE: No internal checks - model might get broken if used without care.
@param mo model
@param i state index (source)
@param j state index (target)
@param prob probability
@param c transition class
*/
void ghmm_cmodel_set_transition (ghmm_cmodel* mo, int i, int j, int c, double prob);
#ifdef __cplusplus
}
#endif
#endif
/*@} (Doc++-Group: smodel) */