/*******************************************************************************
*
* This file is part of the General Hidden Markov Model Library,
* GHMM version __VERSION__, see http://ghmm.org
*
* Filename: ghmm/ghmm/sdviterbi.c
* Authors: Wasinee Rungsarityotin
*
* Copyright (C) 1998-2004 Alexander Schliep
* Copyright (C) 1998-2001 ZAIK/ZPR, Universitaet zu Koeln
* Copyright (C) 2002-2004 Max-Planck-Institut fuer Molekulare Genetik,
* Berlin
*
* Contact: schliep@ghmm.org
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the Free
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*
* This file is version $Revision: 1713 $
* from $Date: 2006-10-16 10:06:28 -0400 (Mon, 16 Oct 2006) $
* last change by $Author: grunau $.
*
*******************************************************************************/
#ifdef HAVE_CONFIG_H
# include "../config.h"
#endif
#include <float.h>
#include <math.h>
#include <assert.h>
#include "ghmm.h"
#include "mprintf.h"
#include "mes.h"
#include "matrix.h"
#include "sdmodel.h"
#include "ghmm_internals.h"
typedef enum DFSFLAG { DONE, NOTVISITED, VISITED } DFSFLAG;
typedef struct local_store_t {
double ***log_in_a;
double **log_b;
double *phi;
double *phi_new;
int **psi;
int *topo_order;
int topo_order_length;
} local_store_t;
static local_store_t *sdviterbi_alloc (ghmm_dsmodel * mo, int len);
static int sdviterbi_free (local_store_t ** v, int n, int cos, int len);
/*----------------------------------------------------------------------------*/
static local_store_t *sdviterbi_alloc (ghmm_dsmodel * mo, int len)
{
#define CUR_PROC "sdviterbi_alloc"
local_store_t *v = NULL;
int j;
ARRAY_CALLOC (v, 1);
/* Allocate the log_in_a's -> individal lenghts */
ARRAY_CALLOC (v->log_in_a, mo->N);
for (j = 0; j < mo->N; j++)
v->log_in_a[j] = ighmm_cmatrix_stat_alloc (mo->cos, mo->s[j].in_states);
v->log_b = ighmm_cmatrix_stat_alloc (mo->N, len);
if (!(v->log_b)) {
GHMM_LOG_QUEUED(LCONVERTED);
goto STOP;
}
ARRAY_CALLOC (v->phi, mo->N);
ARRAY_CALLOC (v->phi_new, mo->N);
v->psi = ighmm_dmatrix_alloc (len, mo->N);
if (!(v->psi)) {
GHMM_LOG_QUEUED(LCONVERTED);
goto STOP;
}
v->topo_order_length = 0;
ARRAY_CALLOC (v->topo_order, mo->N);
return (v);
STOP: /* Label STOP from ARRAY_[CM]ALLOC */
sdviterbi_free (&v, mo->N, mo->cos, len);
return (NULL);
#undef CUR_PROC
} /* viterbi_alloc */
/*----------------------------------------------------------------------------*/
static int sdviterbi_free (local_store_t ** v, int n, int cos, int len)
{
#define CUR_PROC "sdviterbi_free"
int j;
mes_check_ptr (v, return (-1));
if (!*v)
return (0);
for (j = 0; j < n; j++)
ighmm_cmatrix_stat_free (&((*v)->log_in_a[j]));
m_free ((*v)->log_in_a);
ighmm_cmatrix_stat_free (&((*v)->log_b));
m_free ((*v)->phi);
m_free ((*v)->phi_new);
ighmm_dmatrix_free (&((*v)->psi), len);
m_free ((*v)->topo_order);
m_free (*v);
return (0);
#undef CUR_PROC
} /* viterbi_free */
/*----------------------------------------------------------------------------*/
static void Viterbi_precompute (ghmm_dsmodel * mo, int *o, int len,
local_store_t * v)
{
#define CUR_PROC "viterbi_precompute"
int i, j, k, t;
/* Precomputing the log(a_ij) */
/*for (j = 0; j < mo->N; j++)*/
/* for (i = 0; i < mo->s[j].in_states; i++)*/
/* if ( mo->s[j].in_a[i] == 0.0 ) */ /* DBL_EPSILON ? */
/*log_in_a[j][i] = +1; */ /* Not used any further in the calculations */
/* else*/
/*log_in_a[j][i] = log( mo->s[j].in_a[i] );*/
for (j = 0; j < mo->N; j++) {
for (k = 0; k < mo->cos; k++)
for (i = 0; i < mo->s[j].in_states; i++)
if (mo->s[j].in_a[k][i] == 0.0) /* DBL_EPSILON ? */
v->log_in_a[j][k][i] = +1; /* Not used any further in the calculations */
else
v->log_in_a[j][k][i] = log (mo->s[j].in_a[k][i]);
}
/* Precomputing the log(bj(ot)) */
for (j = 0; j < mo->N; j++)
for (t = 0; t < len; t++) {
if (mo->s[j].b[o[t]] == 0.0) /* DBL_EPSILON ? */
v->log_b[j][t] = +1;
else
v->log_b[j][t] = log (mo->s[j].b[o[t]]);
}
#undef CUR_PROC
} /* viterbi_precompute */
/** */
static void __viterbi_silent (ghmm_dsmodel * mo, int t, local_store_t * v,
int *recent_matchcount, int *countstates,
int nr_of_countstates)
{
int topocount;
int i, k, osc;
double max_value, value;
osc = 0;
for (topocount = 0; topocount < mo->topo_order_length; topocount++) {
k = mo->topo_order[topocount];
if (mo->silent[k]) { /* Silent states */
/* Determine the maximum */
/* max_phi = phi[i] + log_in_a[j][i] ... */
max_value = -DBL_MAX;
v->psi[t][k] = -1;
for (i = 0; i < mo->s[k].in_states; i++) {
/* printf("\nBerrechnung von transclass von Zustand %d", mo->s[k].in_id[i]);*/
if (mo->cos != 1) {
osc = mo->get_class (NULL, mo->N);
}
if (v->phi[mo->s[k].in_id[i]] != +1 && v->log_in_a[k][osc][i] != +1) {
value = v->phi[mo->s[k].in_id[i]] + v->log_in_a[k][osc][i];
if (value > max_value) {
max_value = value;
v->psi[t][k] = mo->s[k].in_id[i];
}
}
}
/*find out, if we are in a delete state unless this state isn't reached anyway */
if (v->psi[t][k] != -1) {
for (i = 0; i < nr_of_countstates; i++) {
if (k == countstates[i]) {
recent_matchcount[k] = 1;
break;
}
}
recent_matchcount[k] += recent_matchcount[v->psi[t][k]];
/* No maximum found (that is, state never reached)
or the output O[t] = 0.0: */
if (max_value == -DBL_MAX) {
v->phi[k] = +1;
}
else {
v->phi[k] = max_value;
}
}
}
}
}
/** Return the log score of the sequence */
int *ghmm_dsmodel_viterbi (ghmm_dsmodel * mo, int *o, int len, double *log_p)
{
#define CUR_PROC "ghmm_dsmodel_viterbi"
int *state_seq = NULL;
int t, j, i, k, St, osc=0;
int last_osc = -1;
double value, max_value;
local_store_t *v;
int len_path = mo->N * len;
/*lists to remember how long we have been staying in the circular part of the model */
int *former_matchcount = NULL;
int *recent_matchcount = NULL;
int *tmp_matchcount = NULL;
int *countstates = NULL;
int nr_of_countstates = 2 * ((mo->N - 5) / 3); /* # of matchstates + deletestates*/
int lastemState;
int *tmp_path;
if ((mo->model_type & GHMM_kSilentStates) && mo->topo_order == NULL) {
/* ghmm_dsmodel_topo_order (mo); */
/* Should we call it here ???? */
fprintf (stderr,
"Viterbi Error: Contain silent states, but no topological ordering\n");
goto STOP;
}
/* Allocate the matrices log_in_a, log_b,Vektor phi, phi_new, Matrix psi */
v = sdviterbi_alloc (mo, len);
if (!v) {
GHMM_LOG_QUEUED(LCONVERTED);
goto STOP;
}
ARRAY_CALLOC (state_seq, len_path);
for (i = 0; i < len_path; i++) {
state_seq[i] = -1;
}
ARRAY_CALLOC (former_matchcount, mo->N);
ARRAY_CALLOC (recent_matchcount, mo->N);
/*We always start outside of the circle, no way to get in in t=0 => matchcounts = 0 */
for (i = 0; i < mo->N; i++) {
former_matchcount[i] = 0;
recent_matchcount[i] = 0;
}
ARRAY_CALLOC (countstates, nr_of_countstates);
for (i = 0; i < nr_of_countstates / 2; i++) {
/* 5th state is first matchstate, then come all other matchstates and afterwards all deletestates
so we have a list of states that inkrement the counter of how long we have been in the circle
*/
countstates[i] = i + 5;
countstates[i + nr_of_countstates / 2] = i + nr_of_countstates + 4;
}
/* Precomputing the log(a_ij) and log(bj(ot)) */
Viterbi_precompute (mo, o, len, v);
/* Initialization, that is t = 0 */
for (j = 0; j < mo->N; j++) {
if (mo->s[j].pi == 0.0 || v->log_b[j][0] == +1) /* instead of 0, DBL_EPS.? */
v->phi[j] = +1;
else
v->phi[j] = log (mo->s[j].pi) + v->log_b[j][0];
}
if (mo->model_type & GHMM_kSilentStates) { /* could go into silent state at t=0 */
__viterbi_silent (mo, t =
0, v, recent_matchcount, countstates,
nr_of_countstates);
}
/*
for (t=0, j = 0; j < mo->N; j++)
{
printf("\npsi[%d],in:%f, phi=%f\n", t, v->psi[t][j], v->phi[j]);
} */
/* t > 0 */
for (t = 1; t < len; t++) {
/*osc = mo->get_class(mo->N,t);*/
for (j = 0; j < mo->N; j++) {
/** initialization of phi, psi **/
v->phi_new[j] = +1;
v->psi[t][j] = -1;
}
for (k = 0; k < mo->N; k++) {
recent_matchcount[k] = 0;
/* Determine the maximum */
/* max_phi = phi[i] + log_in_a[j][i] ... */
if (!(mo->model_type & GHMM_kSilentStates) || !mo->silent[k]) {
St = k;
max_value = -DBL_MAX;
v->psi[t][St] = -1;
for (i = 0; i < mo->s[St].in_states; i++) {
/* get_class of in state*/
/* printf("\nBerechnung von transclass fuer Zustand %d", mo->s[St].in_id[i]);*/
if (mo->cos > 1) {
osc = mo->get_class (NULL, mo->N);
}
if (v->phi[mo->s[St].in_id[i]] != +1 &&
v->log_in_a[St][osc][i] != +1) {
value = v->phi[mo->s[St].in_id[i]] + v->log_in_a[St][osc][i];
if (value > max_value) {
max_value = value;
v->psi[t][St] = mo->s[St].in_id[i];
}
}
else {
/* fprintf(stderr, " %d --> %d = %f, \n", i,St,v->log_in_a[St][osc][i]);*/
}
}
/* No maximum found (that is, state never reached)
or the output O[t] = 0.0: */
if (max_value == -DBL_MAX || /* and then also: (v->psi[t][j] == -1) */
v->log_b[St][t] == +1) {
v->phi_new[St] = +1;
}
else
v->phi_new[St] = max_value + v->log_b[St][t];
/*find out how long we have been staying in the circle unless we didn't reach this state */
if (v->psi[t][St] != -1) {
for (i = 0; i < nr_of_countstates; i++) {
if (countstates[i] == St) {
recent_matchcount[St] = 1;
break;
}
}
recent_matchcount[St] += former_matchcount[v->psi[t][St]];
}
}
} /* complete time step for emitting states */
/* First now replace the old phi with the new phi */
for (j = 0; j < mo->N; j++) {
v->phi[j] = v->phi_new[j];
/* printf("\npsi[%d],%d, phi, %f\n", t, v->psi[t][j], v->phi[j]); */
}
last_osc = osc; /* save last transition class */
if (mo->model_type & GHMM_kSilentStates) {
__viterbi_silent (mo, t, v, recent_matchcount, countstates,
nr_of_countstates);
} /* complete time step for silent states */
/*
for (j = 0; j < mo->N; j++)
{
printf("\npsi[%d],in:%d, phi=%f\n", t, v->psi[t][j], v->phi[j]);
}
*/
} /* Next observation , increment time-step */
/* Termination */
max_value = -DBL_MAX;
state_seq[len_path - 1] = -1;
for (j = 0; j < mo->N; j++)
if (v->phi[j] != +1 && v->phi[j] > max_value) {
max_value = v->phi[j];
state_seq[len_path - 1] = j;
}
if (max_value == -DBL_MAX) {
/* Sequence can't be generated from the model! */
*log_p = +1;
/* Backtracing doesn't work, because state_seq[*] allocated with -1 */
for (t = len - 2; t >= 0; t--)
state_seq[t] = -1;
}
else {
/* Backtracing, should put DEL path nicely */
*log_p = max_value;
lastemState = state_seq[len_path - 1];
for (t = len - 2, i = len_path - 2; t >= 0; t--) {
if ((mo->model_type & GHMM_kSilentStates) &&
mo->silent[v->psi[t + 1][lastemState]]) {
St = v->psi[t + 1][lastemState];
/* fprintf(stderr, "t=%d: DEL St=%d\n", t+1, St ); */
while (St != -1 && mo->silent[St]) { /* trace back up to the last emitting state */
/* fprintf(stderr, "t=%d: DEL St=%d\n", t, St ); */
state_seq[i--] = St;
St = v->psi[t][St];
}
state_seq[i--] = St;
lastemState = St;
}
else {
state_seq[i--] = v->psi[t + 1][lastemState];
lastemState = v->psi[t + 1][lastemState];
}
}
}
/* COPY PATH */
tmp_path = state_seq;
if (!(mo->model_type & GHMM_kSilentStates)) {
state_seq = (int *) malloc (sizeof (int) * len);
for (i = 0, t = 0; t < len_path && i < len; t++) {
if (tmp_path[t] >= 0) {
state_seq[i++] = tmp_path[t];
}
}
m_free (tmp_path); /* free the old state sequence */
}
/* Free the memory space */
m_free (former_matchcount);
m_free (recent_matchcount);
m_free (tmp_matchcount);
m_free (countstates);
sdviterbi_free (&v, mo->N, mo->cos, len);
return (state_seq);
STOP: /* Label STOP from ARRAY_[CM]ALLOC */
/* Free the memory space */
sdviterbi_free (&v, mo->N, mo->cos, len);
m_free (state_seq);
m_free (former_matchcount);
m_free (recent_matchcount);
m_free (tmp_matchcount);
m_free (countstates);
return NULL;
#undef CUR_PROC
} /* viterbi */
int *ghmm_dsmodel_viterbi_silent (ghmm_dsmodel * mo, int *o, int len, double *log_p)
{
#define CUR_PROC "ghmm_dsmodel_viterbi_silent"
return 0;
#undef CUR_PROC
}