Codebase list ghmm / HEAD ghmm / viterbi.c
HEAD

Tree @HEAD (Download .tar.gz)

viterbi.c @HEADraw · history · blame

/*******************************************************************************
*
*       This file is part of the General Hidden Markov Model Library,
*       GHMM version __VERSION__, see http://ghmm.org
*
*       Filename: ghmm/ghmm/viterbi.c
*       Authors:  Wasinee Rungsarityotin, Benjamin Georgi
*
*       Copyright (C) 1998-2004 Alexander Schliep
*       Copyright (C) 1998-2001 ZAIK/ZPR, Universitaet zu Koeln
*       Copyright (C) 2002-2004 Max-Planck-Institut fuer Molekulare Genetik,
*                               Berlin
*
*       Contact: schliep@ghmm.org
*
*       This library is free software; you can redistribute it and/or
*       modify it under the terms of the GNU Library General Public
*       License as published by the Free Software Foundation; either
*       version 2 of the License, or (at your option) any later version.
*
*       This library is distributed in the hope that it will be useful,
*       but WITHOUT ANY WARRANTY; without even the implied warranty of
*       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
*       Library General Public License for more details.
*
*       You should have received a copy of the GNU Library General Public
*       License along with this library; if not, write to the Free
*       Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*
*       This file is version $Revision: 1991 $
*                       from $Date: 2007-12-05 11:39:04 -0500 (Wed, 05 Dec 2007) $
*             last change by $Author: grunau $.
*
*******************************************************************************/

#ifdef HAVE_CONFIG_H
#  include "../config.h"
#endif

#include <float.h>
#include <math.h>
#include <assert.h>

#include "ghmm.h"
#include "mes.h"
#include "mprintf.h"
#include "matrix.h"
#include "sdmodel.h"
#include "ghmm_internals.h"

typedef struct local_store_t {
    double **log_in_a;
    double **log_b;
    double *phi;
    double *phi_new;
    int **psi;

    int *path_len;

    int *topo_order;
    int topo_order_length;
} local_store_t;


/*----------------------------------------------------------------------------*/
static int viterbi_free(local_store_t **v, int n, int len)
{
#define CUR_PROC "viterbi_free"
    int j;
    mes_check_ptr(v, return -1);

    if (!*v)
        return 0;

    for (j = 0; j < n; j++)
        m_free((*v)->log_in_a[j]);

    m_free((*v)->log_in_a);
    ighmm_cmatrix_free(&((*v)->log_b), n);
    m_free((*v)->phi);
    m_free((*v)->phi_new);
    /*ighmm_dmatrix_free( &((*v)->psi), len ); */
    ighmm_dmatrix_stat_free(&((*v)->psi));

    m_free((*v)->path_len);
    m_free((*v)->topo_order);

    m_free(*v);

    return 0;
#undef CUR_PROC
}                               /* viterbi_free */

/*----------------------------------------------------------------------------*/
static local_store_t *viterbi_alloc(ghmm_dmodel *mo, int len)
{
#define CUR_PROC "sdviterbi_alloc"
    local_store_t *v = NULL;
    int j;
    ARRAY_CALLOC(v, 1);

    /* Allocate the log_in_a's -> individal lenghts */
    ARRAY_CALLOC(v->log_in_a, mo->N);
    for (j = 0; j < mo->N; j++) {
        ARRAY_CALLOC(v->log_in_a[j], mo->s[j].in_states);
    }

    v->log_b = ighmm_cmatrix_alloc(mo->N, mo->M);
    if (!(v->log_b)) {
        GHMM_LOG_QUEUED(LCONVERTED);
        goto STOP;
    }
    ARRAY_CALLOC(v->phi, mo->N);
    ARRAY_CALLOC(v->phi_new, mo->N);
    v->psi = ighmm_dmatrix_stat_alloc(len, mo->N);
    if (!(v->psi)) {
        GHMM_LOG_QUEUED(LCONVERTED);
        goto STOP;
    }

    ARRAY_CALLOC(v->path_len, mo->N);

    v->topo_order_length = 0;
    ARRAY_CALLOC(v->topo_order, mo->N);

    return v;
  STOP:                        /* Label STOP from ARRAY_[CM]ALLOC */
    viterbi_free(&v, mo->N, len);
    return NULL;
#undef CUR_PROC
}                               /* viterbi_alloc */

/*----------------------------------------------------------------------------*/
static void Viterbi_precompute(ghmm_dmodel *mo, int *o, int len, local_store_t *v)
{
#define CUR_PROC "viterbi_precompute"
    int i, j, t;

    /* Precomputing the log(a_ij) */
    for (j = 0; j < mo->N; j++) {
        for (i = 0; i < mo->s[j].in_states; i++) {
            if (mo->s[j].in_a[i] == 0.0)        /* DBL_EPSILON ? */
                v->log_in_a[j][i] = +1; /* Not used any further in the calculations */
            else
                v->log_in_a[j][i] = log(mo->s[j].in_a[i]);
        }
    }

    /* Precomputing the log(bj(ot)) */
    for (j = 0; j < mo->N; j++) {
        for (t = 0; t < mo->M; t++) {
            if (mo->s[j].b[t] == 0.0)    /* DBL_EPSILON ? */
                v->log_b[j][t] = +1;
            else
                v->log_b[j][t] = log(mo->s[j].b[t]);
        }
    }
#undef CUR_PROC
}                               /* viterbi_precompute */

/*----------------------------------------------------------------------------*/
static void viterbi_silent(ghmm_dmodel *mo, int t, local_store_t *v)
{
#define CUR_PROC "viterbi_silent"
    int topocount;
    int i, St, i_id, max_id;
    double max_value, value;

    for (topocount=0; topocount < mo->topo_order_length; topocount++) {
        St = mo->topo_order[topocount];
        if (mo->silent[St]) {    /* Silent states */
            /* Determine the maximum */
            /* max_phi = phi[i] + log_in_a[j][i] ... */
            max_value = -DBL_MAX;
            max_id = -1;
            for (i = 0; i < mo->s[St].in_states; i++) {
                i_id = mo->s[St].in_id[i];
                if (v->phi[i_id] != +1 && v->log_in_a[St][i] != +1) {
                    value = v->phi[i_id] + v->log_in_a[St][i];
                    if (value > max_value) {
                        max_value = value;
                        max_id = i_id;
                    }
                }
            }
            /* No maximum found (that is, state never reached)
               or the output O[t] = 0.0: */
            if (max_id < 0) {
                v->phi[St] = +1;
            } else {
                v->phi[St] = max_value;
                v->psi[t][St] = max_id;
                v->path_len[St] = v->path_len[max_id] + 1;
            }
        }
    }
#undef CUR_PROC
}

/*============================================================================*/
/** Return the viterbi path of the sequence.  */
int *ghmm_dmodel_viterbi(ghmm_dmodel * mo, int *o, int len, int *pathlen, double *log_p)
{
#define CUR_PROC "ghmm_dmodel_viterbi"

    int *state_seq = NULL;
    int t, j, i, i_id, St, max_id;
    int end_state, next_state, prev_state;
    int len_path, state_seq_index;
    int *plen, *exchange;
    double value, max_value, *temp;
    local_store_t *v;

    /* for silent states: initializing path length with a multiple
       of the sequence length
       and sort the silent states topological */
    if (mo->model_type & GHMM_kSilentStates) {
        ghmm_dmodel_order_topological(mo);
    }

    /* Allocate the matrices log_in_a, log_b,Vektor phi, phi_new, Matrix psi */
    v = viterbi_alloc(mo, len);
    if (!v) {
        GHMM_LOG_QUEUED(LCONVERTED);
        goto STOP;
    }

    ARRAY_CALLOC(plen, mo->N);

    /* Precomputing the log(a_ij) and log(bj(ot)) */
    Viterbi_precompute(mo, o, len, v);

    /* Initialization, that is t = 0 */
    for (j = 0; j < mo->N; j++) {
        if (mo->s[j].pi == 0.0 || v->log_b[j][o[0]] == +1) /* instead of 0, DBL_EPS.? */
            v->phi[j] = +1;
        else {
            v->phi[j] = log(mo->s[j].pi) + v->log_b[j][o[0]];
            v->path_len[j] = 1;
        }
    }
    if (mo->model_type & GHMM_kSilentStates) {  /* could go into silent state at t=0 */
        viterbi_silent(mo, t = 0, v);
    }

    /* t > 0 */
    for (t = 1; t < len; t++) {
        for (j = 0; j < mo->N; j++) {
            /* initialization of phi, psi */
            v->phi_new[j] = +1;
            v->psi[t][j] = -1;
        }

        for (St=0; St < mo->N; St++) {
            /* Determine the maximum */
            /* max_phi = phi[i] + log_in_a[j][i] ... */
            if (!(mo->model_type & GHMM_kSilentStates) || !mo->silent[St]) {
                max_value = -DBL_MAX;
                max_id = -1;
                for (i = 0; i < mo->s[St].in_states; i++) {
                    i_id = mo->s[St].in_id[i];
                    if (v->phi[i_id] != +1 && v->log_in_a[St][i] != +1) {
                        value = v->phi[i_id] + v->log_in_a[St][i];
                        if (value > max_value) {
                            max_value = value;
                            max_id = i_id;
                        }
                    }
                }

                /* No maximum found (that is, state never reached)
                   or the output O[t] = 0.0: */
                if (max_id >= 0 && v->log_b[St][o[t]] != +1) {
                    v->phi_new[St] = max_value + v->log_b[St][o[t]];
                    v->psi[t][St]  = max_id;
                    plen[St] = v->path_len[max_id] + 1;
                }
            }
        }                       /* complete time step for emitting states */

        /* Exchange pointers */
        temp = v->phi; v->phi = v->phi_new; v->phi_new = temp;
        exchange = v->path_len; v->path_len = plen; plen = exchange;

        /* complete time step for silent states */
        if (mo->model_type & GHMM_kSilentStates) {
            viterbi_silent(mo, t, v);
        }
    }                           /* Next observation , increment time-step */

    /* Termination - find end state */
    max_value = -DBL_MAX;
    end_state = -1;
    for (j=0; j < mo->N; j++) {
        if (v->phi[j] != +1 && v->phi[j] > max_value) {
            max_value = v->phi[j];
            end_state = j;
        }
    }
    if (end_state < 0) {
        /* Sequence can't be generated from the model! */
        *log_p = +1;
        len_path = 1;
    }
    else {
        *log_p = max_value;
        len_path = v->path_len[end_state];
    }

    /* allocating state_seq array */
    ARRAY_CALLOC(state_seq, len_path+1);
    t = len-1;
    state_seq_index = len_path-1;
    state_seq[len_path] = -1;
    state_seq[state_seq_index--] = end_state;
    prev_state = end_state;

    /* backtrace is simple if the path length is known */
    for (; state_seq_index >= 0;  state_seq_index--) {
        next_state = v->psi[t][prev_state];
        state_seq[state_seq_index] = prev_state = next_state;
        if (!(mo->model_type & GHMM_kSilentStates) || !mo->silent[next_state])
            t--;
    }
    *pathlen = len_path;
    if (state_seq_index >= 0 || t > 0)
        GHMM_LOG_PRINTF(LERROR, LOC, "state_seq_index = %d, t = %d", state_seq_index, t);

    /* Free the memory space */
    viterbi_free(&v, mo->N, len);
    m_free(plen);
    return state_seq;
  STOP:                        /* Label STOP from ARRAY_[CM]ALLOC */
    /* Free the memory space */
    *pathlen = -1;
    viterbi_free(&v, mo->N, len);
    m_free(plen);
    m_free(state_seq);
    return NULL;
#undef CUR_PROC
}                               /* ghmm_dmodel_viterbi */

/*============================================================================*/
double ghmm_dmodel_viterbi_logp(ghmm_dmodel * mo, int *o, int len, int *state_seq)
{
#define CUR_PROC "ghmm_dmodel_viterbi_logp"
    double log_p = 0.0;
    int unused;
    int *vpath = ghmm_dmodel_viterbi(mo, o, len, &unused, &log_p);
    m_free(vpath);
    return log_p;
#undef CUR_PROC
}                               /* ghmm_dmodel_viterbi_logp */