/*******************************************************************************
*
* This file is part of the General Hidden Markov Model Library,
* GHMM version __VERSION__, see http://ghmm.org
*
* Filename: ghmm/ghmm/sfoba.c
* Authors: Bernhard Knab, Benjamin Georgi
*
* Copyright (C) 1998-2004 Alexander Schliep
* Copyright (C) 1998-2001 ZAIK/ZPR, Universitaet zu Koeln
* Copyright (C) 2002-2004 Max-Planck-Institut fuer Molekulare Genetik,
* Berlin
*
* Contact: schliep@ghmm.org
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the Free
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*
* This file is version $Revision: 2243 $
* from $Date: 2008-11-05 08:05:05 -0500 (Wed, 05 Nov 2008) $
* last change by $Author: christoph_mpg $.
*
*******************************************************************************/
#ifdef HAVE_CONFIG_H
# include "../config.h"
#endif
#include <math.h>
#include <float.h>
#include "ghmm.h"
#include "mprintf.h"
#include "mes.h"
#include "sfoba.h"
#include "matrix.h"
#include "randvar.h"
#include "ghmm_internals.h"
/*----------------------------------------------------------------------------*/
static int sfoba_initforward (ghmm_cmodel * smo, double *alpha_1, double *omega,
double *scale, double **b)
{
# define CUR_PROC "sfoba_initforward"
int i;
double c_0;
scale[0] = 0.0;
if (b == NULL){
for (i = 0; i < smo->N; i++) {
alpha_1[i] = smo->s[i].pi * ghmm_cmodel_calc_b(smo->s+i, omega);
scale[0] += alpha_1[i];
}
}
else {
for (i = 0; i < smo->N; i++) {
alpha_1[i] = smo->s[i].pi * b[i][smo->M];
scale[0] += alpha_1[i];
}
}
if (scale[0] > DBL_MIN) { /* >= EPS_PREC */
c_0 = 1 / scale[0];
for (i = 0; i < smo->N; i++)
alpha_1[i] *= c_0;
}
return (0);
# undef CUR_PROC
} /* sfoba_initforward */
/*----------------------------------------------------------------------------*/
static double sfoba_stepforward (ghmm_cstate * s, double *alpha_t, int osc,
double b_omega)
{
int i, id;
double value = 0.0;
for (i = 0; i < s->in_states; i++) {
id = s->in_id[i];
value += s->in_a[osc][i] * alpha_t[id];
}
value *= b_omega; /* b_omega outside the sum */
return (value);
} /* sfoba_stepforward */
/*============================================================================*/
int ghmm_cmodel_forward (ghmm_cmodel * smo, double *O, int T, double ***b,
double **alpha, double *scale, double *log_p)
{
# define CUR_PROC "ghmm_cmodel_forward"
int res = -1;
int i, t = 0, osc = 0;
double c_t;
int pos;
/* T is length of sequence; divide by dimension to represent the number of time points */
T /= smo->dim;
/* calculate alpha and scale for t = 0 */
if (b == NULL)
sfoba_initforward(smo, alpha[0], O, scale, NULL);
else
sfoba_initforward(smo, alpha[0], O, scale, b[0]);
if (scale[0] <= DBL_MIN) {
/* means f(O[0], mue, u) << 0, first symbol very unlikely */
/* GHMM_LOG(LCONVERTED, "scale[0] == 0.0!\n"); */
goto STOP;
}
else {
*log_p = -log (1 / scale[0]);
if (smo->cos == 1) {
osc = 0;
}
else {
if (!smo->class_change->get_class) {
printf ("ERROR: get_class not initialized\n");
return (-1);
}
/* printf("1: cos = %d, k = %d, t = %d\n",smo->cos,smo->class_change->k,t); */
osc = smo->class_change->get_class (smo, O, smo->class_change->k, t);
if (osc >= smo->cos){
printf("ERROR: get_class returned index %d but model has only %d classes !\n",osc,smo->cos);
goto STOP;
}
}
for (t = 1; t < T; t++) {
scale[t] = 0.0;
pos = t * smo->dim;
/* b not calculated yet */
if (b == NULL) {
for (i = 0; i < smo->N; i++) {
alpha[t][i] = sfoba_stepforward(smo->s+i, alpha[t-1], osc,
ghmm_cmodel_calc_b(smo->s+i, O+pos));
scale[t] += alpha[t][i];
}
}
/* b precalculated */
else {
for (i = 0; i < smo->N; i++) {
alpha[t][i] = sfoba_stepforward (smo->s+i, alpha[t - 1], osc,
b[t][i][smo->M]);
scale[t] += alpha[t][i];
}
}
if (scale[t] <= DBL_MIN) { /* seq. can't be build */
goto STOP;
break;
}
c_t = 1 / scale[t];
/* scale alpha */
for (i = 0; i < smo->N; i++)
alpha[t][i] *= c_t;
/* summation of log(c[t]) for calculation of log( P(O|lambda) ) */
*log_p -= log (c_t);
if (smo->cos == 1) {
osc = 0;
}
else {
if (!smo->class_change->get_class) {
printf ("ERROR: get_class not initialized\n");
return (-1);
}
/* printf("1: cos = %d, k = %d, t = %d\n",smo->cos,smo->class_change->k,t); */
osc = smo->class_change->get_class (smo, O, smo->class_change->k, t);
if (osc >= smo->cos){
printf("ERROR: get_class returned index %d but model has only %d classes !\n",osc,smo->cos);
goto STOP;
}
}
}
}
/* log_p should not be smaller than value used for seqs. that
can't be build ???
if (*log_p < (double)PENALTY_LOGP)
*log_p = (double)PENALTY_LOGP;
*/
return 0;
STOP:
*log_p = (double) -DBL_MAX;
return (res);
# undef CUR_PROC
} /* ghmm_cmodel_forward */
#define LOWER_SCALE_BOUND 3.4811068399043105e-57 /* exp(-130) */
/*============================================================================*/
int ghmm_cmodel_backward (ghmm_cmodel * smo, double *O, int T, double ***b,
double **beta, const double *scale)
{
# define CUR_PROC "ghmm_cmodel_backward"
double *beta_tmp, sum, c_t;
int i, j, j_id, t, osc;
int res = -1;
int pos;
/* T is length of sequence; divide by dimension to represent the number of time points */
T /= smo->dim;
ARRAY_CALLOC (beta_tmp, smo->N);
for (t = 0; t < T; t++) {
/* try differenent bounds here in case of problems
like beta[t] = NaN */
if (scale[t] < LOWER_SCALE_BOUND) {
/* printf("backward scale(%d) = %e\n", t , scale[t]); */
goto STOP;
}
}
/* initialize */
c_t = 1 / scale[T - 1];
for (i = 0; i < smo->N; i++) {
beta[T - 1][i] = 1;
beta_tmp[i] = c_t;
}
/* Backward Step for t = T-2, ..., 0 */
/* beta_tmp: Vector for storage of scaled beta in one time step */
if (smo->cos == 1) {
osc = 0;
}
else {
if (!smo->class_change->get_class) {
printf ("ERROR: get_class not initialized\n");
goto STOP;
}
osc = smo->class_change->get_class (smo, O, smo->class_change->k, T - 2);
/* printf("osc(%d) = %d\n",T-2,osc); */
if (osc >= smo->cos){
printf("ERROR: get_class returned index %d but model has only %d classes !\n",osc,smo->cos);
goto STOP;
}
}
for (t = T - 2; t >= 0; t--) {
pos = t * smo->dim;
if (b == NULL)
for (i = 0; i < smo->N; i++) {
sum = 0.0;
for (j = 0; j < smo->s[i].out_states; j++) {
j_id = smo->s[i].out_id[j];
sum += smo->s[i].out_a[osc][j]
* ghmm_cmodel_calc_b(smo->s+j_id, O+pos+smo->dim)
* beta_tmp[j_id];
}
beta[t][i] = sum;
}
else
for (i = 0; i < smo->N; i++) {
sum = 0.0;
for (j = 0; j < smo->s[i].out_states; j++) {
j_id = smo->s[i].out_id[j];
sum +=
smo->s[i].out_a[osc][j] * b[t + 1][j_id][smo->M] * beta_tmp[j_id];
/*printf(" smo->s[%d].out_a[%d][%d] * b[%d] * beta_tmp[%d] = %f * %f *
%f\n",i,osc,j,t+1,j_id,smo->s[i].out_a[osc][j], b[t + 1][j_id][smo->M], beta_tmp[j_id]); */
}
beta[t][i] = sum;
/* printf(" -> beta[%d][%d] = %f\n",t,i,beta[t][i]); */
}
c_t = 1 / scale[t];
for (i = 0; i < smo->N; i++)
beta_tmp[i] = beta[t][i] * c_t;
if (smo->cos == 1) {
osc = 0;
}
else {
if (!smo->class_change->get_class) {
printf ("ERROR: get_class not initialized\n");
goto STOP;
}
/* if t=1 the next iteration will be the last */
if (t >= 1){
osc = smo->class_change->get_class (smo, O, smo->class_change->k, t-1);
/* printf("osc(%d) = %d\n",t-1,osc); */
if (osc >= smo->cos){
printf("ERROR: get_class returned index %d but model has only %d classes !\n",osc,smo->cos);
goto STOP;
}
}
}
}
res = 0;
STOP: /* Label STOP from ARRAY_[CM]ALLOC */
m_free (beta_tmp);
return (res);
# undef CUR_PROC
} /* ghmm_cmodel_backward */
/*============================================================================*/
int ghmm_cmodel_logp (ghmm_cmodel * smo, double *O, int T, double *log_p)
{
# define CUR_PROC "ghmm_cmodel_logp"
int res = -1;
double **alpha, *scale = NULL;
alpha = ighmm_cmatrix_stat_alloc (T, smo->N);
if (!alpha) {
GHMM_LOG_QUEUED(LCONVERTED);
goto STOP;
}
ARRAY_CALLOC (scale, T);
/* run forward alg. */
if (ghmm_cmodel_forward (smo, O, T, NULL, alpha, scale, log_p) == -1) {
/* GHMM_LOG_QUEUED(LCONVERTED); */
goto STOP;
}
res = 0;
STOP: /* Label STOP from ARRAY_[CM]ALLOC */
ighmm_cmatrix_stat_free (&alpha);
m_free (scale);
return (res);
# undef CUR_PROC
} /* ghmm_cmodel_logp */
/*============================================================================*/
int ghmm_cmodel_logp_joint(ghmm_cmodel *mo, const double *O, int len,
const int *S, int slen, double *log_p)
{
# define CUR_PROC "ghmm_cmodel_logp_joint"
int prevstate, state, state_pos=0, pos=0, j, osc=0;
int dim = mo->dim;
prevstate = state = S[0];
*log_p = log(mo->s[state].pi);
if (!(mo->model_type & GHMM_kSilentStates) || 1 /* XXX !mo->silent[state] */ )
{
*log_p += log(ghmm_cmodel_calc_b(mo->s+state, O+pos));
pos+=dim;
}
for (state_pos=1; state_pos < slen || pos+dim <= len; state_pos++) {
state = S[state_pos];
for (j=0; j < mo->s[state].in_states; ++j) {
if (prevstate == mo->s[state].in_id[j])
break;
}
if (mo->cos > 1) {
if (!mo->class_change->get_class) {
GHMM_LOG(LERROR, "get_class not initialized");
goto STOP;
}
osc = mo->class_change->get_class(mo, O, mo->class_change->k, pos);
if (osc >= mo->cos) {
GHMM_LOG_PRINTF(LERROR, LOC, "get_class returned index %d "
"but model has only %d classes!", osc, mo->cos);
goto STOP;
}
}
if (j == mo->s[state].in_states ||
fabs(mo->s[state].in_a[osc][j]) < GHMM_EPS_PREC) {
GHMM_LOG_PRINTF(LERROR, LOC, "Sequence can't be built. There is no "
"transition from state %d to %d.", prevstate, state);
goto STOP;
}
*log_p += log(mo->s[state].in_a[osc][j]);
if (!(mo->model_type & GHMM_kSilentStates) || 1 /* XXX !mo->silent[state] */) {
*log_p += log(ghmm_cmodel_calc_b(mo->s+state, O+pos));
pos+=dim;
}
prevstate = state;
}
if (pos < len)
GHMM_LOG_PRINTF(LINFO, LOC, "state sequence too short! processed only %d symbols", pos/dim);
if (state_pos < slen)
GHMM_LOG_PRINTF(LINFO, LOC, "sequence too short! visited only %d states", state_pos);
return 0;
STOP:
return -1;
# undef CUR_PROC
}