/*******************************************************************************
*
* This file is part of the General Hidden Markov Model Library,
* GHMM version __VERSION__, see http://ghmm.org
*
* Filename: ghmm/ghmm/foba.c
* Authors: Utz J. Pape, Benjamin Georgi, Janne Grunau
*
* Copyright (C) 1998-2004 Alexander Schliep
* Copyright (C) 1998-2001 ZAIK/ZPR, Universitaet zu Koeln
* Copyright (C) 2002-2004 Max-Planck-Institut fuer Molekulare Genetik,
* Berlin
*
* Contact: schliep@ghmm.org
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the Free
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*
* This file is version $Revision: 1997 $
* from $Date: 2007-12-10 11:07:24 -0500 (Mon, 10 Dec 2007) $
* last change by $Author: grunau $.
*
*******************************************************************************/
#ifdef HAVE_CONFIG_H
# include "../config.h"
#endif
#include <assert.h>
#include "ghmm.h"
#include "model.h"
#include "math.h"
#include "matrix.h"
#include "mes.h"
#include "mprintf.h"
#include "foba.h"
#include "ghmm_internals.h"
int ghmm_dmodel_forward_init (ghmm_dmodel * mo, double *alpha_1, int symb, double *scale)
{
# define CUR_PROC "ghmm_dmodel_forward_init"
int res = -1;
int i, j, id, in_id;
double c_0;
scale[0] = 0.0;
/*printf(" *** foba_initforward\n");*/
/*iterate over non-silent states*/
/*printf(" *** iterate over non-silent states \n");*/
for (i = 0; i < mo->N; i++) {
if (!(mo->model_type & GHMM_kSilentStates) || !(mo->silent[i])) {
/*no starting in states with order > 0 !!!*/
if (!(mo->model_type & GHMM_kHigherOrderEmissions) || mo->order[i] == 0) {
alpha_1[i] = mo->s[i].pi * mo->s[i].b[symb];
/*printf("\nalpha1[%i]=%f\n",i,alpha_1[i]);*/
scale[0] += alpha_1[i];
}
else {
alpha_1[i] = 0;
}
}
}
/*iterate over silent states*/
/*printf(" *** iterate over silent states \n");*/
if (mo->model_type & GHMM_kSilentStates) {
for (i = 0; i < mo->topo_order_length; i++) {
id = mo->topo_order[i];
alpha_1[id] = mo->s[id].pi;
/*printf("\nsilent_start alpha1[%i]=%f\n",id,alpha_1[id]);*/
for (j = 0; j < mo->s[id].in_states; j++) {
in_id = mo->s[id].in_id[j];
alpha_1[id] += mo->s[id].in_a[j] * alpha_1[in_id];
/*printf("\n\tsilent_run alpha1[%i]=%f\n",id,alpha_1[id]);*/
}
scale[0] += alpha_1[id];
}
}
/* printf("\nwo label: scale[0] = %f\n",scale[0]); */
if (scale[0] >= GHMM_EPS_PREC) {
c_0 = 1 / scale[0];
for (i = 0; i < mo->N; i++)
alpha_1[i] *= c_0;
}
res = 0;
return (0); /* attention: scale[0] might be 0 */
# undef CUR_PROC
} /* ghmm_dmodel_forward_init */
/*----------------------------------------------------------------------------*/
/** modified by Casillux to improve performance */
double ghmm_dmodel_forward_step (ghmm_dstate * s, double *alpha_t, const double b_symb)
{
int i, id;
double value = 0.0;
if (b_symb < GHMM_EPS_PREC)
return 0.;
/*printf(" *** foba_stepforward\n");*/
for (i = 0; i < s->in_states; i++) {
id = s->in_id[i];
value += s->in_a[i] * alpha_t[id];
/*printf(" state %d, value %f, p_symb %f\n",id, value, b_symb); */
}
value *= b_symb;
return (value);
} /* ghmm_dmodel_forward_step */
/*============================================================================*/
int ghmm_dmodel_forward (ghmm_dmodel * mo, const int *O, int len, double **alpha,
double *scale, double *log_p)
{
# define CUR_PROC "ghmm_dmodel_forward"
char * str;
int res = -1;
int i, t, id;
int e_index;
double c_t;
double log_scale_sum = 0.0;
double non_silent_salpha_sum = 0.0;
double salpha_log = 0.0;
if (mo->model_type & GHMM_kSilentStates)
ghmm_dmodel_order_topological(mo);
ghmm_dmodel_forward_init (mo, alpha[0], O[0], scale);
if (scale[0] < GHMM_EPS_PREC) {
/* means: first symbol can't be generated by hmm */
/*printf("\nscale kleiner als eps (line_no: 123)\n");*/
*log_p = +1;
}
else {
*log_p = -log (1 / scale[0]);
for (t = 1; t < len; t++) {
scale[t] = 0.0;
update_emission_history (mo, O[t - 1]);
/* printf("\n\nStep t=%i mit len=%i, O[i]=%i\n",t,len,O[t]);
printf("iterate over non-silent state\n"); */
/* iterate over non-silent states */
for (i = 0; i < mo->N; i++) {
if (!(mo->model_type & GHMM_kSilentStates) || !(mo->silent[i])) {
e_index = get_emission_index (mo, i, O[t], t);
if (e_index != -1) {
alpha[t][i] =
ghmm_dmodel_forward_step (&mo->s[i], alpha[t - 1], mo->s[i].b[e_index]);
scale[t] += alpha[t][i];
}
else {
alpha[t][i] = 0;
}
}
}
/* iterate over silent states */
/* printf("iterate over silent state\n"); */
if (mo->model_type & GHMM_kSilentStates) {
for (i = 0; i < mo->topo_order_length; i++) {
/*printf("\nget id\n");*/
id = mo->topo_order[i];
/*printf(" akt_ state %d\n",id);*/
/*printf("\nin stepforward\n");*/
alpha[t][id] = ghmm_dmodel_forward_step (&mo->s[id], alpha[t], 1);
/*printf("\nnach stepforward\n");*/
scale[t] += alpha[t][id];
}
}
if (scale[t] < GHMM_EPS_PREC) {
/* O-string can't be generated by hmm */
str = ighmm_mprintf(NULL, 0, "scale smaller than epsilon (%g < %g) in position %d. Can't generate symbol %d\n", scale[t], GHMM_EPS_PREC, t, O[t]);
GHMM_LOG(LCONVERTED, str);
m_free (str);
*log_p = +1.0;
break;
}
c_t = 1 / scale[t];
for (i = 0; i < mo->N; i++) {
alpha[t][i] *= c_t;
}
if (!(mo->model_type & GHMM_kSilentStates) && *log_p != +1) {
/* sum log(c[t]) scaling values to get log( P(O|lambda) ) */
/*printf("log_p %f -= log(%f) = ",*log_p,c_t);*/
*log_p -= log (c_t);
/*printf(" %f\n",*log_p); */
}
}
if (mo->model_type & GHMM_kSilentStates && *log_p != +1) {
/*printf("silent model\n");*/
for (i = 0; i < len; i++) {
log_scale_sum += log (scale[i]);
}
for (i = 0; i < mo->N; i++) {
if (!(mo->silent[i])) {
non_silent_salpha_sum += alpha[len - 1][i];
}
}
salpha_log = log (non_silent_salpha_sum);
*log_p = log_scale_sum + salpha_log;
}
}
/*printf("\nin forward: log_p = %f\n",*log_p);*/
if (*log_p == 1.0) {
res = -1;
}
else {
res = 0;
}
return res;
# undef CUR_PROC
} /* ghmm_dmodel_forward */
/*============================================================================*/
int ghmm_dmodel_forward_descale (double **alpha, double *scale, int t, int n,
double **newalpha)
{
# define CUR_PROC "ghmm_dmodel_forward_descale"
int i, j, k;
/*printf("\nAngekommen, t=%i, n=%i\n",t,n);*/
for (i = 0; i < t; i++) {
/*printf("i=%i\n",i);*/
for (j = 0; j < n; j++) {
/*printf("\tj=%i\n",j);*/
newalpha[i][j] = alpha[i][j];
/*newalpha[i][j] *= scale[j];*/
/*printf(".");*/
for (k = 0; k <= i; k++) {
/*printf(",");*/
newalpha[i][j] *= scale[k];
}
}
}
/*printf("\ndescale geschafft\n");*/
return 0;
# undef CUR_PROC
} /* ghmm_dmodel_forward_descale */
/***************************** Backward Algorithm ******************************/
int ghmm_dmodel_backward (ghmm_dmodel * mo, const int *O, int len, double **beta,
const double *scale)
{
# define CUR_PROC "ghmm_dmodel_backward"
/* beta_tmp holds beta-variables for silent states */
double *beta_tmp=NULL;
double sum, emission;
int i, j, j_id, t, k, id;
int res = -1;
int e_index;
for (t = 0; t < len; t++)
mes_check_0 (scale[t], goto STOP);
/* topological ordering for models with silent states and allocating
temporary array needed for silent states */
if (mo->model_type & GHMM_kSilentStates) {
ARRAY_CALLOC (beta_tmp, mo->N);
ghmm_dmodel_order_topological(mo);
}
/* initialize all states */
for (i = 0; i < mo->N; i++) {
beta[len - 1][i] = 1.0;
}
if (!(mo->model_type & GHMM_kHigherOrderEmissions)) {
mo->maxorder = 0;
}
/* initialize emission history */
for (t = len - (mo->maxorder); t < len; t++) {
update_emission_history (mo, O[t]);
}
/* Backward Step for t = T-1, ..., 0 */
/* loop over reverse topological ordering of silent states, non-silent states */
for (t = len - 2; t >= 0; t--) {
/* printf(" ----------- *** t = %d *** ---------- \n",t); */
/* printf("\n*** O(%d) = %d\n",t+1,O[t+1]); */
/* updating of emission_history with O[t] such that emission_history memorizes
O[t - maxorder ... t] */
if (0 <= t - mo->maxorder + 1)
update_emission_history_front (mo, O[t - mo->maxorder + 1]);
/* iterating over the the silent states and filling beta_tmp */
if (mo->model_type & GHMM_kSilentStates) {
for (k = mo->topo_order_length - 1; k >= 0; k--) {
id = mo->topo_order[k];
/* printf(" silent[%d] = %d\n",id,mo->silent[id]); */
assert (mo->silent[id] == 1);
sum = 0.0;
for (j = 0; j < mo->s[id].out_states; j++) {
j_id = mo->s[id].out_id[j];
/* out_state is not silent */
if (!mo->silent[j_id]) {
e_index = get_emission_index (mo, j_id, O[t + 1], t + 1);
if (e_index != -1) {
sum += mo->s[id].out_a[j] * mo->s[j_id].b[e_index] * beta[t+1][j_id];
}
}
/* out_state is silent, beta_tmp[j_id] is useful since we go through
the silent states in reversed topological order */
else {
sum += mo->s[id].out_a[j] * beta_tmp[j_id];
}
}
/* setting beta_tmp for the silent state
don't scale the betas for silent states now
wait until the betas for non-silent states are complete to avoid
multiple scaling with the same scalingfactor in one term */
beta_tmp[id] = sum;
}
}
/* iterating over the the non-silent states */
for (i = 0; i < mo->N; i++) {
if (!(mo->model_type & GHMM_kSilentStates) || !(mo->silent[i])) {
sum = 0.0;
for (j = 0; j < mo->s[i].out_states; j++) {
j_id = mo->s[i].out_id[j];
/* out_state is not silent: get the emission probability
and use beta[t+1]*/
if (!(mo->model_type & GHMM_kSilentStates) || !(mo->silent[j_id])) {
e_index = get_emission_index (mo, j_id, O[t + 1], t + 1);
if (e_index != -1)
emission = mo->s[j_id].b[e_index];
else
emission = 0;
sum += mo->s[i].out_a[j] * emission * beta[t+1][j_id];
}
/* out_state is silent: use beta_tmp */
else {
sum += mo->s[i].out_a[j] * beta_tmp[j_id];
}
}
/* updating beta[t] for non-silent state */
beta[t][i] = sum / scale[t+1];
}
}
/* updating beta[t] for silent states, finally scale them
and resetting beta_tmp */
if (mo->model_type & GHMM_kSilentStates)
for (i=0; i<mo->N; i++) {
if (mo->silent[i]) {
beta[t][i] = beta_tmp[i] / scale[t+1];
beta_tmp[i] = 0.0;
}
}
}
res = 0;
STOP: /* Label STOP from ARRAY_[CM]ALLOC */
if (mo->model_type & GHMM_kSilentStates) m_free (beta_tmp);
return (res);
# undef CUR_PROC
} /* ghmm_dmodel_backward */
/*============================================================================*/
int ghmm_dmodel_backward_termination (ghmm_dmodel *mo, const int *O, int length,
double **beta, double *scale, double *log_p) {
#define CUR_PROC "ghmm_dmodel_backward_termination"
int i, id, j, j_id, k, res=-1;
double log_scale_sum, sum;
double * beta_tmp=NULL;
/* topological ordering for models with silent states and precomputing
the beta_tmp for silent states */
if (mo->model_type & GHMM_kSilentStates) {
ghmm_dmodel_order_topological(mo);
ARRAY_CALLOC (beta_tmp, mo->N);
for (k = mo->topo_order_length - 1; k >= 0; k--) {
id = mo->topo_order[k];
assert (mo->silent[id] == 1);
sum = 0.0;
for (j = 0; j < mo->s[id].out_states; j++) {
j_id = mo->s[id].out_id[j];
/* out_state is not silent */
if (!mo->silent[j_id]) {
/* no emission history for the first symbol */
if (!(mo->model_type & GHMM_kHigherOrderEmissions) || mo->order[id]==0) {
sum += mo->s[id].out_a[j] * mo->s[j_id].b[O[0]] * beta[0][j_id];
}
}
/* out_state is silent, beta_tmp[j_id] is useful since we go through
the silent states in reversed topological order */
else {
sum += mo->s[id].out_a[j] * beta_tmp[j_id];
}
}
/* setting beta_tmp for the silent state
don't scale the betas for silent states now */
beta_tmp[id] = sum;
}
}
sum = 0.0;
/* iterating over all states with pi > 0.0 */
for (i = 0; i < mo->N; i++) {
if (mo->s[i].pi > 0.0) {
/* silent states */
if ((mo->model_type & GHMM_kSilentStates) && mo->silent[i]) {
sum += mo->s[i].pi * beta_tmp[i];
}
/* non-silent states */
else {
/* no emission history for the first symbol */
if (!(mo->model_type & GHMM_kHigherOrderEmissions) || mo->order[i]==0) {
sum += mo->s[i].pi * mo->s[i].b[O[0]] * beta[0][i];
}
}
}
}
*log_p = log (sum / scale[0]);
log_scale_sum = 0.0;
for (i = 0; i < length; i++) {
log_scale_sum += log (scale[i]);
}
*log_p += log_scale_sum;
res = 0;
STOP: /* Label STOP from ARRAY_[CM]ALLOC */
if (mo->model_type & GHMM_kSilentStates) m_free (beta_tmp);
return (res);
# undef CUR_PROC
}
/*============================================================================*/
int ghmm_dmodel_logp (ghmm_dmodel * mo, const int *O, int len, double *log_p)
{
# define CUR_PROC "ghmm_dmodel_logp"
int res = -1;
double **alpha, *scale = NULL;
alpha = ighmm_cmatrix_stat_alloc (len, mo->N);
if (!alpha) {
GHMM_LOG_QUEUED(LCONVERTED);
goto STOP;
}
ARRAY_CALLOC (scale, len);
/* run ghmm_dmodel_forward */
if (ghmm_dmodel_forward (mo, O, len, alpha, scale, log_p) == -1) {
GHMM_LOG_QUEUED(LCONVERTED);
goto STOP;
}
res = 0;
STOP: /* Label STOP from ARRAY_[CM]ALLOC */
ighmm_cmatrix_stat_free (&alpha);
m_free (scale);
return (res);
# undef CUR_PROC
} /* ghmm_dmodel_logp */
/*============================================================================*/
int ghmm_dmodel_logp_joint(ghmm_dmodel * mo, const int *O, int len,
const int *S, int slen, double *log_p)
{
# define CUR_PROC "ghmm_dmodel_logp_joint"
int prevstate, state, state_pos=0, pos=0, j;
prevstate = state = S[0];
*log_p = log(mo->s[state].pi);
if (!(mo->model_type & GHMM_kSilentStates) || !mo->silent[state])
*log_p += log(mo->s[state].b[O[pos++]]);
for (state_pos=1; state_pos < slen || pos < len; state_pos++) {
state = S[state_pos];
for (j=0; j < mo->s[state].in_states; ++j) {
if (prevstate == mo->s[state].in_id[j])
break;
}
if (j == mo->s[state].in_states ||
fabs(mo->s[state].in_a[j]) < GHMM_EPS_PREC) {
GHMM_LOG_PRINTF(LERROR, LOC, "Sequence can't be built. There is no "
"transition from state %d to %d.", prevstate, state);
goto STOP;
}
*log_p += log(mo->s[state].in_a[j]);
if (!(mo->model_type & GHMM_kSilentStates) || !mo->silent[state]) {
*log_p += log(mo->s[state].b[O[pos++]]);
}
prevstate = state;
}
if (pos < len)
GHMM_LOG_PRINTF(LINFO, LOC, "state sequence too short! processed only %d symbols", pos);
if (state_pos < slen)
GHMM_LOG_PRINTF(LINFO, LOC, "sequence too short! visited only %d states", state_pos);
return 0;
STOP:
return -1;
# undef CUR_PROC
}
/*============================================================================*/
/*====================== Lean forward algorithm ====================*/
int ghmm_dmodel_forward_lean (ghmm_dmodel * mo, const int *O, int len, double *log_p)
{
# define CUR_PROC "ghmm_dmodel_forward_lean"
int res = -1;
int i, t, id, e_index;
double c_t;
double log_scale_sum = 0.0;
double non_silent_salpha_sum = 0.0;
double salpha_log = 0.0;
double *alpha_last_col=NULL;
double *alpha_curr_col=NULL;
double *switching_tmp;
double *scale=NULL;
/* Allocating */
ARRAY_CALLOC (alpha_last_col, mo->N);
ARRAY_CALLOC (alpha_curr_col, mo->N);
ARRAY_CALLOC (scale, len);
if (mo->model_type & GHMM_kSilentStates)
ghmm_dmodel_order_topological(mo);
ghmm_dmodel_forward_init (mo, alpha_last_col, O[0], scale);
if (scale[0] < GHMM_EPS_PREC) {
/* means: first symbol can't be generated by hmm */
*log_p = +1;
}
else {
*log_p = -log (1 / scale[0]);
for (t = 1; t < len; t++) {
scale[t] = 0.0;
update_emission_history (mo, O[t - 1]);
/* iterate over non-silent states */
for (i = 0; i < mo->N; i++) {
if (!(mo->model_type & GHMM_kSilentStates) || !(mo->silent[i])) {
e_index = get_emission_index (mo, i, O[t], t);
if (e_index != -1) {
alpha_curr_col[i] = ghmm_dmodel_forward_step (&mo->s[i], alpha_last_col,
mo->s[i].b[e_index]);
scale[t] += alpha_curr_col[i];
}
else
alpha_curr_col[i] = 0;
}
}
/* iterate over silent states */
if (mo->model_type & GHMM_kSilentStates) {
for (i = 0; i < mo->topo_order_length; i++) {
id = mo->topo_order[i];
alpha_curr_col[id] = ghmm_dmodel_forward_step (&mo->s[id], alpha_curr_col, 1);
scale[t] += alpha_curr_col[id];
}
}
if (scale[t] < GHMM_EPS_PREC) {
GHMM_LOG(LCONVERTED, "scale smaller than epsilon\n");
/* O-string can't be generated by hmm */
*log_p = +1.0;
break;
}
c_t = 1 / scale[t];
for (i = 0; i < mo->N; i++)
alpha_curr_col[i] *= c_t;
if (!(mo->model_type & GHMM_kSilentStates)) {
/*sum log(c[t]) scaling values to get log( P(O|lambda) ) */
*log_p -= log (c_t);
}
/* switching pointers of alpha_curr_col and alpha_last_col
don't set alpha_curr_col[i] to zero since its overwritten */
switching_tmp = alpha_last_col;
alpha_last_col = alpha_curr_col;
alpha_curr_col = switching_tmp;
}
/* Termination step: compute log likelihood */
if ((mo->model_type & GHMM_kSilentStates) && *log_p != +1) {
/*printf("silent model\n");*/
for (i = 0; i < len; i++) {
log_scale_sum += log (scale[i]);
}
for (i = 0; i < mo->N; i++) {
/* use alpha_last_col since the columms are also in the last step
switched */
if (!(mo->silent[i]))
non_silent_salpha_sum += alpha_last_col[i];
}
salpha_log = log (non_silent_salpha_sum);
*log_p = log_scale_sum + salpha_log;
}
}
/*printf("\nin forward: log_p = %f\n",*log_p);*/
if (*log_p == 1.0)
res = -1;
else
res = 0;
STOP: /* Label STOP from ARRAY_[CM]ALLOC */
/* Deallocation */
m_free (alpha_last_col);
m_free (alpha_curr_col);
m_free (scale);
return res;
#undef CUR_PROC
} /* ghmm_dmodel_forward_lean */
/*======================= labeled HMMs =======================================*/
static int foba_label_initforward (ghmm_dmodel * mo, double *alpha_1, int symb,
int label, double *scale)
{
# define CUR_PROC "foba_label_initforward"
int res = -1;
int i;
double c_0;
scale[0] = 0.0;
/* iterate over non-silent states */
for (i = 0; i < mo->N; i++) {
if (!(mo->model_type & GHMM_kSilentStates) || !(mo->silent[i])) {
if (mo->label[i] == label)
alpha_1[i] = mo->s[i].pi * mo->s[i].b[symb];
else
alpha_1[i] = 0.0;
}
else
alpha_1[i] = 0.0;
/* printf("\nalpha1[%i]=%f\n",i,alpha_1[i]); */
scale[0] += alpha_1[i];
}
/* printf("\nlabel: scale[0] = %f\n",scale[0]); */
if (scale[0] >= GHMM_EPS_PREC) {
c_0 = 1 / scale[0];
for (i = 0; i < mo->N; i++)
alpha_1[i] *= c_0;
}
res = 0;
return (0); /* attention: scale[0] might be 0 */
# undef CUR_PROC
} /* foba_label_initforward */
/*============================================================================*/
int ghmm_dmodel_label_forward (ghmm_dmodel * mo, const int *O, const int *label, int len,
double **alpha, double *scale, double *log_p)
{
# define CUR_PROC "ghmm_dl_forward"
int res = -1;
int i, t;
int e_index;
double c_t;
char *str;
foba_label_initforward (mo, alpha[0], O[0], label[0], scale);
if (scale[0] < GHMM_EPS_PREC) {
/* means: first symbol can't be generated by hmm */
*log_p = +1;
}
else {
*log_p = -log (1 / scale[0]);
for (t = 1; t < len; t++) {
update_emission_history (mo, O[t - 1]);
scale[t] = 0.0;
/* printf("\n\nStep t=%i mit len=%i, O[i]=%i\n",t,len,O[t]); */
for (i = 0; i < mo->N; i++) {
if (!(mo->model_type & GHMM_kSilentStates) || !(mo->silent[i])) {
if (mo->label[i] == label[t]) {
/*printf("%d: akt_ state %d, label: %d \t current Label: %d\n",
t, i, mo->label[i], label[t]);*/
e_index = get_emission_index (mo, i, O[t], t);
if (-1 != e_index) {
alpha[t][i] = ghmm_dmodel_forward_step(&mo->s[i], alpha[t-1], mo->s[i].b[e_index]);
/*if (alpha[t][i] < GHMM_EPS_PREC) {
printf("alpha[%d][%d] = %g \t ", t, i, alpha[t][i]);
printf("mo->s[%d].b[%d] = %g\n", i, e_index, mo->s[i].b[e_index]);
}
else printf("alpha[%d][%d] = %g\n", t, i, alpha[t][i]);*/
}
else {
alpha[t][i] = 0;
}
}
else {
alpha[t][i] = 0;
}
scale[t] += alpha[t][i];
}
else {
GHMM_LOG(LCONVERTED, "ERROR: Silent state in foba_label_forward.\n");
}
}
if (scale[t] < GHMM_EPS_PREC) {
if (t > 4) {
str = ighmm_mprintf (NULL, 0, "%g\t%g\t%g\t%g\t%g\n", scale[t-5],
scale[t-4], scale[t-3], scale[t-2], scale[t-1]);
GHMM_LOG(LCONVERTED, str);
m_free (str);
}
str = ighmm_mprintf (NULL, 0, "scale = %g smaller than eps = EPS_PREC "
"in the %d-th char.\ncannot generate emission: %d "
"with label: %d in sequence of length %d\n",
scale[t], t, O[t], label[t], len);
GHMM_LOG(LCONVERTED, str);
m_free (str);
/* O-string can't be generated by hmm */
*log_p = +1.0;
break;
}
c_t = 1 / scale[t];
for (i = 0; i < mo->N; i++) {
alpha[t][i] *= c_t;
}
if (!(mo->model_type & GHMM_kSilentStates) && *log_p != +1) {
/*sum log(c[t]) scaling values to get log( P(O|lambda) ) */
/*printf("log_p %f -= log(%f) = ",*log_p,c_t);*/
*log_p -= log (c_t);
/*printf(" %f\n",*log_p);*/
}
}
}
/*printf("\nin forward: log_p = %f\n",*log_p);*/
if (*log_p == 1.0) {
res = -1;
}
else {
res = 0;
}
return res;
# undef CUR_PROC
} /* ghmm_dmodel_forward */
/*============================================================================*/
int ghmm_dmodel_label_logp (ghmm_dmodel * mo, const int *O, const int *label, int len,
double *log_p)
{
# define CUR_PROC "ghmm_dl_logp"
int res = -1;
double **alpha, *scale = NULL;
alpha = ighmm_cmatrix_stat_alloc (len, mo->N);
if (!alpha) {
GHMM_LOG_QUEUED(LCONVERTED);
goto STOP;
}
ARRAY_CALLOC (scale, len);
/* run ghmm_dmodel_forward */
if (ghmm_dmodel_label_forward (mo, O, label, len, alpha, scale, log_p) == -1) {
GHMM_LOG_QUEUED(LCONVERTED);
goto STOP;
}
res = 0;
STOP: /* Label STOP from ARRAY_[CM]ALLOC */
ighmm_cmatrix_stat_free (&alpha);
m_free (scale);
return (res);
# undef CUR_PROC
} /* ghmm_dl_logp */
/*============================================================================*/
int ghmm_dmodel_label_backward (ghmm_dmodel * mo, const int *O, const int *label, int len,
double **beta, double *scale, double *log_p)
{
# define CUR_PROC "ghmm_dl_backward"
double *beta_tmp, sum;
int i, j, j_id, t;
int res = -1;
int e_index;
/* int beta_out=0; */
double emission;
ARRAY_CALLOC (beta_tmp, mo->N);
for (t = 0; t < len; t++)
mes_check_0 (scale[t], goto STOP);
/* check for silent states */
if (mo->model_type & GHMM_kSilentStates) {
GHMM_LOG(LCONVERTED, "ERROR: No silent states allowed in labelled HMM!\n");
goto STOP;
}
/* initialize */
for (i = 0; i < mo->N; i++) {
/* start only in states with the correct label */
if (label[len - 1] == mo->label[i])
beta[len - 1][i] = 1.0;
else
beta[len - 1][i] = 0.0;
beta_tmp[i] = beta[len - 1][i] / scale[len - 1];
}
/* initialize emission history */
if (!(mo->model_type & GHMM_kHigherOrderEmissions))
mo->maxorder = 0;
for (t = len - (mo->maxorder); t < len; t++) {
update_emission_history (mo, O[t]);
}
/* Backward Step for t = T-1, ..., 0
beta_tmp: Vector for storage of scaled beta in one time step
loop over reverse topological ordering of silent states, non-silent states */
for (t = len - 2; t >= 0; t--) {
/* updating of emission_history with O[t] such that emission_history
memorizes O[t - maxorder ... t] */
if (0 <= t - mo->maxorder + 1)
update_emission_history_front (mo, O[t - mo->maxorder + 1]);
for (i = 0; i < mo->N; i++) {
sum = 0.0;
for (j = 0; j < mo->s[i].out_states; j++) {
j_id = mo->s[i].out_id[j];
/* The state has only a emission with probability > 0, if the label matches */
if (label[t] == mo->label[i]) {
e_index = get_emission_index (mo, j_id, O[t + 1], t + 1);
if (e_index != -1)
emission = mo->s[j_id].b[e_index];
else
emission = 0.0;
}
else
emission = 0.0;
sum += mo->s[i].out_a[j] * emission * beta_tmp[j_id];
}
beta[t][i] = sum;
/* if ((beta[t][i] > 0) && ((beta[t][i] < .01) || (beta[t][i] > 100))) beta_out++; */
}
for (i = 0; i < mo->N; i++)
beta_tmp[i] = beta[t][i] / scale[t];
}
/* printf("labeled betas out of [.01 100]: %d (%f %)\n", beta_out, (float)beta_out/(mo->N*len)); */
res = 0;
STOP: /* Label STOP from ARRAY_[CM]ALLOC */
m_free (beta_tmp);
return (res);
# undef CUR_PROC
}
/*=========== Lean forward algorithm for labeled states ====================*/
int ghmm_dl_forward_lean (ghmm_dmodel * mo, const int *O, const int *label,
int len, double *log_p)
{
# define CUR_PROC "ghmm_dl_forward_lean"
int res = -1;
int i, t, id, e_index;
double c_t;
double log_scale_sum = 0.0;
double non_silent_salpha_sum = 0.0;
double salpha_log = 0.0;
double *alpha_last_col=NULL;
double *alpha_curr_col=NULL;
double *switching_tmp=NULL;
double *scale=NULL;
/* Allocating */
ARRAY_CALLOC (alpha_last_col, mo->N);
ARRAY_CALLOC (alpha_curr_col, mo->N);
ARRAY_CALLOC (scale, len);
if (mo->model_type & GHMM_kSilentStates)
ghmm_dmodel_order_topological(mo);
foba_label_initforward (mo, alpha_last_col, O[0], label[0], scale);
if (scale[0] < GHMM_EPS_PREC) {
/* means: first symbol can't be generated by hmm */
*log_p = +1;
goto STOP;
}
*log_p = -log (1 / scale[0]);
for (t = 1; t < len; t++) {
scale[t] = 0.0;
/* iterate over non-silent states */
for (i = 0; i < mo->N; i++) {
if (!(mo->model_type & GHMM_kSilentStates) || !(mo->silent[i])) {
/* printf(" akt_ state %d\n",i);*/
if (mo->label[i] == label[t]) {
e_index = get_emission_index (mo, i, O[t], t);
if (e_index != -1) {
alpha_curr_col[i] = ghmm_dmodel_forward_step (&mo->s[i], alpha_last_col,
mo->s[i].b[e_index]);
scale[t] += alpha_curr_col[i];
}
else
alpha_curr_col[i] = 0;
}
else
alpha_curr_col[i] = 0;
}
}
/* iterate over silent states */
if (mo->model_type & GHMM_kSilentStates) {
for (i = 0; i < mo->topo_order_length; i++) {
id = mo->topo_order[i];
alpha_curr_col[id] = ghmm_dmodel_forward_step (&mo->s[id], alpha_last_col, 1);
scale[t] += alpha_curr_col[id];
}
}
if (scale[t] < GHMM_EPS_PREC) {
GHMM_LOG(LCONVERTED, "scale smaller than epsilon\n");
/* O-string can't be generated by hmm */
*log_p = +1.0;
break;
}
c_t = 1 / scale[t];
for (i = 0; i < mo->N; i++)
alpha_curr_col[i] *= c_t;
if (!(mo->model_type & GHMM_kSilentStates)) {
/*sum log(c[t]) scaling values to get log( P(O|lambda) ) */
*log_p -= log (c_t);
}
/* switching pointers of alpha_curr_col and alpha_last_col
don't set alpha_curr_col[i] to zero since its overwritten */
switching_tmp = alpha_last_col;
alpha_last_col = alpha_curr_col;
alpha_curr_col = switching_tmp;
}
/* Termination step: compute log likelihood */
if (mo->model_type & GHMM_kSilentStates && *log_p != +1) {
/*printf("silent model\n");*/
for (i = 0; i < len; i++)
log_scale_sum += log (scale[i]);
for (i = 0; i < mo->N; i++)
if (!(mo->silent[i]))
non_silent_salpha_sum += alpha_curr_col[i];
salpha_log = log (non_silent_salpha_sum);
*log_p = log_scale_sum + salpha_log;
}
/*printf("\nin forward: log_p = %f\n",*log_p);*/
if (*log_p == 1.0)
res = -1;
else
res = 0;
STOP: /* Label STOP from ARRAY_[CM]ALLOC */
/* Deallocation */
m_free (alpha_last_col);
m_free (alpha_curr_col);
m_free (scale);
return res;
#undef CUR_PROC
} /* foba_forward_label_lean */