/*******************************************************************************
*
* This file is part of the General Hidden Markov Model Library,
* GHMM version __VERSION__, see http://ghmm.org
*
* Filename: ghmm/ghmm/viterbi.c
* Authors: Wasinee Rungsarityotin, Benjamin Georgi
*
* Copyright (C) 1998-2004 Alexander Schliep
* Copyright (C) 1998-2001 ZAIK/ZPR, Universitaet zu Koeln
* Copyright (C) 2002-2004 Max-Planck-Institut fuer Molekulare Genetik,
* Berlin
*
* Contact: schliep@ghmm.org
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the Free
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*
* This file is version $Revision: 1713 $
* from $Date: 2006-10-16 10:06:28 -0400 (Mon, 16 Oct 2006) $
* last change by $Author: grunau $.
*
*******************************************************************************/
/* Possible sources of errors: initialisation, pushback (the loop after) */
#ifdef HAVE_CONFIG_H
# include "../config.h"
#endif
#include <float.h>
#include <math.h>
#include <assert.h>
#include <stdlib.h>
#include "ghmm.h"
#include "mprintf.h"
#include "mes.h"
#include "matrix.h"
#include "pmodel.h"
#include "psequence.h"
#include "pviterbi.h"
#include "ghmm_internals.h"
typedef struct plocal_store_t {
/** precomputed log probabilities for transitions into the states
for each transition class **/
double *** log_in_a;
/** precomputed log probabilities for each state for the emissions **/
double ** log_b;
/** lookback matrix for the last offset steps **/
double *** phi;
/** log probabilities for the current u, v and every state **/
double *phi_new;
/** traceback matrix **/
int ***psi;
/** for convinience store a pointer to the model **/
ghmm_dpmodel * mo;
/** for the debug mode store information of matrix sizes **/
/** length of sequence X determines size of psi **/
int len_x;
/** length of sequence Y determines size of phi and psi **/
int len_y;
/** non functional stuff **/
int *topo_order;
int topo_order_length;
} plocal_store_t;
static void ghmm_dpmodel_print_viterbi_store(plocal_store_t * pv);
static plocal_store_t *pviterbi_alloc(ghmm_dpmodel *mo, int len_x, int len_y);
static int pviterbi_free(plocal_store_t **v, int n, int len_x, int len_y,
int max_offset_x, int max_offset_y);
static void init_phi(plocal_store_t * pv, ghmm_dpseq * X, ghmm_dpseq * Y);
static double get_phi(plocal_store_t * pv, int x, int y, int offset_x,
int offset_y, int state);
static void set_phi(plocal_store_t * pv, int x, int y, int ghmm_dstate,
double prob);
static void push_back_phi(plocal_store_t * pv, int length_y);
static void set_psi(plocal_store_t * pv, int x, int y, int ghmm_dstate,
int from_state);
/*============================================================================*/
static plocal_store_t *pviterbi_alloc(ghmm_dpmodel *mo, int len_x, int len_y) {
#define CUR_PROC "pviterbi_alloc"
plocal_store_t* v = NULL;
int i, j;
ARRAY_CALLOC (v, 1);
v->mo = mo;
v->len_y = len_y;
v->len_x = len_x;
/* Allocate the log_in_a's -> individal lenghts */
ARRAY_CALLOC (v->log_in_a, mo->N);
/* first index of log_in_a: target state */
for (j = 0; j < mo->N; j++){
/* second index: source state */
ARRAY_CALLOC (v->log_in_a[j], mo->s[j].in_states);
for (i=0; i<mo->s[j].in_states; i++) {
/* third index: transition classes of source state */
ARRAY_CALLOC (v->log_in_a[j][i], mo->s[mo->s[j].in_id[i]].kclasses);
}
}
ARRAY_CALLOC (v->log_b, mo->N);
for (j=0; j<mo->N; j++) {
ARRAY_CALLOC (v->log_b[j], ghmm_dpmodel_emission_table_size(mo, j) + 1);
}
if (!(v->log_b)) {GHMM_LOG_QUEUED(LCONVERTED); goto STOP;}
v->phi = ighmm_cmatrix_3d_alloc(mo->max_offset_x + 1, len_y + mo->max_offset_y + 1, mo->N);
if (!(v->phi)) {GHMM_LOG_QUEUED(LCONVERTED); goto STOP;}
ARRAY_CALLOC (v->phi_new, mo->N);
v->psi = ighmm_dmatrix_3d_alloc(len_x + mo->max_offset_x + 1, len_y + mo->max_offset_y + 1, mo->N);
if (!(v->psi)) {GHMM_LOG_QUEUED(LCONVERTED); goto STOP;}
v->topo_order_length = 0;
ARRAY_CALLOC (v->topo_order, mo->N);
return(v);
STOP: /* Label STOP from ARRAY_[CM]ALLOC */
pviterbi_free((&v), mo->N, len_x, len_y, mo->max_offset_x, mo->max_offset_y);
return(NULL);
#undef CUR_PROC
} /* viterbi_alloc */
/*============================================================================*/
static int pviterbi_free(plocal_store_t **v, int n, int len_x, int len_y,
int max_offset_x, int max_offset_y) {
#define CUR_PROC "pviterbi_free"
int i, j;
mes_check_ptr(v, return(-1));
if( !*v ) return(0);
for (j = 0; j < n; j++) {
for (i=0; i < (*v)->mo->s[j].in_states; i++)
m_free((*v)->log_in_a[j][i]);
m_free((*v)->log_in_a[j]);
}
m_free((*v)->log_in_a);
for (j=0; j<n; j++)
m_free((*v)->log_b[j]);
m_free((*v)->log_b);
ighmm_cmatrix_3d_free( &((*v)->phi), max_offset_x + 1,
len_y + max_offset_y + 1);
m_free((*v)->phi_new);
ighmm_dmatrix_3d_free( &((*v)->psi), len_x + max_offset_x + 1, len_y + max_offset_y + 1);
m_free((*v)->topo_order);
(*v)->mo = NULL;
m_free(*v);
return(0);
#undef CUR_PROC
} /* viterbi_free */
/*============================================================================*/
static void ghmm_dpmodel_print_viterbi_store(plocal_store_t * pv) {
int j, k;
ghmm_dpmodel * mo;
printf("Local store for pair HMM viterbi algorithm\n");
printf("Log in a:\n");
mo = pv->mo;
for (j = 0; j < mo->N; j++){
printf("state %i in states %i\n", j, mo->s[j].in_states);
for (k=0; k<mo->s[j].in_states; k++)
printf("FIXME: log_in_a has three dimensions!"/*From: %i %f\n", mo->s[j].in_id[k], pv->log_in_a[j][k]*/);
}
printf("Log b:\n");
for (j = 0; j < mo->N; j++){
printf("state %i #chars: %i\n", j, ghmm_dpmodel_emission_table_size(mo, j));
for (k=0; k<ghmm_dpmodel_emission_table_size(mo, j); k++)
printf("Emission prob char: %i %f\n", k, pv->log_b[j][k]);
}
}
/*============================================================================*/
static void pviterbi_precompute( ghmm_dpmodel *mo, plocal_store_t *v) {
#define CUR_PROC "pviterbi_precompute"
int i, j, emission, t_class;
/* Precomputing the log(a_ij) */
for (j = 0; j < mo->N; j++)
for (i = 0; i < mo->s[j].in_states; i++)
for (t_class = 0; t_class < mo->s[mo->s[j].in_id[i]].kclasses; t_class++){
if ( mo->s[j].in_a[i][t_class] == 0.0 ) /* DBL_EPSILON ? */
v->log_in_a[j][i][t_class] = +1; /*Not used any further in the calculations */
else
v->log_in_a[j][i][t_class] = log( mo->s[j].in_a[i][t_class] );
}
/* Precomputing the log emission probabilities for each state*/
for (j = 0; j < mo->N; j++) {
for (emission = 0; emission < ghmm_dpmodel_emission_table_size(mo,j); emission++) {
if (1) {
if ( mo->s[j].b[emission] == 0.0 ) /* DBL_EPSILON ? */
v->log_b[j][emission] = +1;
else
v->log_b[j][emission] = log( mo->s[j].b[emission] );
}
else{
v->log_b[j][emission] = 0.0;
}
}
v->log_b[j][emission] = 1; /* the last field is for invalid emissions */
}
#undef CUR_PROC
}/* viterbi_precompute */
/*============================================================================*/
/** */
/* static void p__viterbi_silent( ghmm_dmodel *mo, int t, plocal_store_t *v ) */
/* { */
/* int topocount; */
/* int i, k; */
/* double max_value, value; */
/* for ( topocount = 0; topocount < mo->topo_order_length; topocount++) { */
/* k = mo->topo_order[topocount]; */
/* if ( mo->silent[k] ) { /\* Silent states *\/ */
/* /\* Determine the maximum *\/ */
/* /\* max_phi = phi[i] + log_in_a[j][i] ... *\/ */
/* max_value = -DBL_MAX; */
/* v->psi[t][k] = -1; */
/* for (i = 0; i < mo->s[k].in_states; i++) { */
/* if ( v->phi[ mo->s[k].in_id[i] ] != +1 && */
/* v->log_in_a[k][i] != +1) { */
/* value = v->phi[ mo->s[k].in_id[i] ] + v->log_in_a[k][i]; */
/* if (value > max_value) { */
/* max_value = value; */
/* v->psi[t][k] = mo->s[k].in_id[i]; */
/* } */
/* } */
/* } */
/* /\* No maximum found (that is, state never reached) */
/* or the output O[t] = 0.0: *\/ */
/* if (max_value == -DBL_MAX) { */
/* v->phi[k] = +1; */
/* } else { */
/* v->phi[k] = max_value; */
/* } */
/* } */
/* } */
/* } */
/*============================================================================*/
static double sget_log_in_a(plocal_store_t * pv, int i, int j, ghmm_dpseq * X, ghmm_dpseq * Y, int index_x, int index_y) {
/* determine the transition class for the source ghmm_dstate */
int id = pv->mo->s[i].in_id[j];
int cl = pv->mo->s[id].class_change->get_class(pv->mo, X, Y, index_x,index_y,
pv->mo->s[id].class_change->user_data);
return pv->log_in_a[i][j][cl];
}
/*============================================================================*/
static double log_b(plocal_store_t * pv, int state, int emission) {
#ifdef DEBUG
if (ghmm_dstate > pv->mo->N)
fprintf(stderr, "log_b: State index out of bounds %i > %i\n", ghmm_dstate, pv->mo->N);
if (emission > ghmm_dpmodel_emission_table_size(pv->mo, state))
fprintf(stderr, "log_b: Emission index out of bounds %i > %i for ghmm_dstate %i\n",
emission, ghmm_dpmodel_emission_table_size(pv->mo, state), state);
#endif
return pv->log_b[state][emission];
}
/*============================================================================*/
static void init_phi(plocal_store_t * pv, ghmm_dpseq * X, ghmm_dpseq * Y) {
#ifdef DEBUG
int emission;
#endif
int u, v, j, i, off_x, y;
double log_in_a_ij;
double value, max_value, previous_prob, log_b_i;
/* printf("ghmm_dpmodel_viterbi init\n"); */
ghmm_dpmodel * mo = pv->mo;
double (*log_in_a)(plocal_store_t*, int, int, ghmm_dpseq*, ghmm_dpseq*,
int, int);
log_in_a = &sget_log_in_a;
/* Initialize the lookback matrix (for positions [-offsetX,0], [0, len_y]*/
for (off_x=0; off_x<mo->max_offset_x + 1; off_x++)
for (y=0; y<Y->length + mo->max_offset_y + 1; y++)
for (j=0; j<mo->N; j++) {
pv->phi[off_x][y][j] = +1;
}
if ( mo->model_type & GHMM_kSilentStates ) { /* could go into silent state at t=0 */
/*p__viterbi_silent( mo, t=0, v);*/
}
/*for (j = 0; j < mo->N; j++)
{
printf("\npsi[%d],in:%d, phi=%f\n", t, v->psi[t][j], v->phi[j]);
}
for( i = 0; i < mo->N; i++){
printf("%d\t", former_matchcount[i]);
}
for (i = 0; i < mo->N; i++){
printf("%d\t", recent_matchcount[i]);
}*/
/* initialize for offsets > 1 (u < max_offset_x, v < max_offset_y) */
/* this is in principle the same as the main recurrence but adds initial
probabilities to states that cannot be inhabitated at u=0, v=0 because
of greater offsets than one
iteration start is u=-1 v=-1 to allow states with offset_x == 0
which corresponds to a series of gap states before reading the first
character of x at position x=0, y=v or equally for offset_y == 0 */
/* u, v <= max offsets */
for (u = -1; u <= mo->max_offset_x; u++) {
for (v = -mo->max_offset_y; v < Y->length; v++) {
for (j = 0; j < mo->N; j++)
{
/** initialization of phi (lookback matrix), psi (traceback) **/
set_phi(pv, u, v, j, +1);
set_psi(pv, u, v, j, -1);
}
/* for each state i */
for (i = 0; i < mo->N; i++) {
/* Determine the maximum */
/* max_phi = phi[i] + log_in_a[j][i] ... */
if (!(mo->model_type & GHMM_kSilentStates) || !mo->silent[i] ) {
max_value = -DBL_MAX;
set_psi(pv, u, v, i, -1);
for (j = 0; j < mo->s[i].in_states; j++) {
/* look back in the phi matrix at the offsets */
previous_prob = get_phi(pv, u, v, mo->s[i].offset_x,
mo->s[i].offset_y, mo->s[i].in_id[j]);
log_in_a_ij = (*log_in_a)(pv, i, j, X, Y, u, v);
if ( previous_prob != +1 && log_in_a_ij != +1) {
value = previous_prob + log_in_a_ij;
if (value > max_value) {
max_value = value;
set_psi(pv, u, v, i, mo->s[i].in_id[j]);
}
}
else
{;} /* fprintf(stderr, " %d --> %d = %f, \n", i,i,v->log_in_a[i][i]); */
}
#ifdef DEBUG
emission = ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u),
ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v),
mo->size_of_alphabet[mo->s[i].alphabet],
mo->s[i].offset_x, mo->s[i].offset_y);
if (emission > ghmm_dpmodel_emission_table_size(mo, i)){
printf("State %i\n", i);
ghmm_dpmodel_state_print(&(mo->s[i]));
printf("charX: %i charY: %i alphabet size: %i emission table: %i emission index: %i\n",
ghmm_dpseq_get_char(X, mo->s[i].alphabet, u),
ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v),
mo->size_of_alphabet[mo->s[i].alphabet],
ghmm_dpmodel_emission_table_size(mo, i), emission);
}
#endif
log_b_i = log_b(pv, i, ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u),
ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v),
mo->size_of_alphabet[mo->s[i].alphabet],
mo->s[i].offset_x, mo->s[i].offset_y));
/* this is the difference from the main loop:
check whether this state could be an initial state and add the
initial probability */
if (log_b_i == +1 ) {
set_phi(pv, u, v, i, +1);
}
else {
if (max_value == -DBL_MAX)
set_phi(pv, u, v, i, +1);
else
set_phi(pv, u, v, i, max_value);
/* if (mo->s[i].pi != 0 && mo->s[i].offset_x - 1 == u &&
mo->s[i].offset_y - 1 == v) { */
if (mo->s[i].log_pi != 1 && mo->s[i].offset_x - 1 == u &&
mo->s[i].offset_y - 1 == v) {
set_phi(pv, u, v, i, mo->s[i].log_pi);
#ifdef DEBUG
printf("Initial log prob state %i at (%i, %i) = %f\n", i, u, v, get_phi(pv, u, v, 0, 0, i));
printf("Characters emitted X: %i, Y: %i\n",
ghmm_dpseq_get_char(X, mo->s[i].alphabet, u),
ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v));
#endif
}
if (get_phi(pv, u, v, 0, 0, i) != 1)
set_phi(pv, u, v, i, get_phi(pv, u, v, 0, 0, i) + log_b_i);
}
}
/* if (v == 0) {
printf"(%i, %i, %i) preceding %i\n", u, v, i, pv->psi[u][v][i]);
} */
} /* complete time step for emitting states */
/* last_osc = osc; */
/* save last transition class */
/*if (mo->model_type & kSilentStates) {
p__viterbi_silent( mo, t, v );
}*/ /* complete time step for silent states */
/**************
for (j = 0; j < mo->N; j++)
{
printf("\npsi[%d],in:%d, phi=%f\n", t, v->psi[t][j], v->phi[j]);
}
for (i = 0; i < mo->N; i++){
printf("%d\t", former_matchcount[i]);
}
for (i = 0; i < mo->N; i++){
printf("%d\t", recent_matchcount[i]);
}
****************/
} /* End for v in Y */
/* Next character in X */
/* push back the old phi values */
push_back_phi(pv, Y->length);
} /* End for u in X */
}
/*============================================================================*/
/* call this function when you are at position (x, y) and want to get the
probability of state 'state' which is offset_x and offset_y away from x,y */
static double get_phi(plocal_store_t * pv, int x, int y, int offset_x, int offset_y, int state){
#ifdef DEBUG
if (y > pv->len_y || state > pv->mo->N || y < - pv->mo->max_offset_y)
fprintf(stderr, "get_phi: out of bounds %i %i %i\n",
x + pv->mo->max_offset_x, y + pv->mo->max_offset_y, state);
#endif
if (y - offset_y + pv->mo->max_offset_y >= 0)
return pv->phi[offset_x][y - offset_y + pv->mo->max_offset_y][state];
else
return 1;
}
/*============================================================================*/
/* set the value of this matrix cell */
static void set_phi(plocal_store_t * pv, int x, int y, int state, double prob){
#ifdef DEBUG
if (y > pv->len_y || state > pv->mo->N || y < - pv->mo->max_offset_y)
fprintf(stderr, "set_phi: out of bounds %i %i %i %f\n",
x + pv->mo->max_offset_x, y + pv->mo->max_offset_y,
state, prob);
#endif
pv->phi[0][y + pv->mo->max_offset_y][state] = prob;
}
/*============================================================================*/
/* since we only keep the frontier we have to push back when ever a row is
complete */
static void push_back_phi(plocal_store_t * pv, int length_y){
int off_x, y, j;
/* push back the old phi values */
for (off_x=pv->mo->max_offset_x; off_x>0; off_x--)
for (y=0; y<length_y + pv->mo->max_offset_y + 1; y++)
for (j=0; j<pv->mo->N; j++)
pv->phi[off_x][y][j] = pv->phi[off_x-1][y][j];
}
/*============================================================================*/
static void set_psi(plocal_store_t * pv, int x, int y, int state, int from_state){
/* shift by max_offsets for negative indices */
#ifdef DEBUG
if (x > pv->len_x || y > pv->len_y || state > pv->mo->N ||
x < - pv->mo->max_offset_x || y < - pv->mo->max_offset_y)
fprintf(stderr, "set_psi: out of bounds %i %i %i %i\n",
x + pv->mo->max_offset_x, y + pv->mo->max_offset_y,
state, from_state);
#endif
pv->psi[x + pv->mo->max_offset_x][y + pv->mo->max_offset_y][state] = from_state;
}
/*============================================================================*/
static int get_psi(plocal_store_t * pv, int x, int y, int state) {
/* shift by max_offsets for negative indices*/
#ifdef DEBUG
if (x > pv->len_x || y > pv->len_y || state > pv->mo->N ||
x < - pv->mo->max_offset_x || y < - pv->mo->max_offset_y)
fprintf(stderr, "get_psi: out of bounds %i %i %i\n",
x + pv->mo->max_offset_x, y + pv->mo->max_offset_y,
state);
#endif
return pv->psi[x + pv->mo->max_offset_x][y + pv->mo->max_offset_y][state];
}
/*============================================================================*/
int *ghmm_dpmodel_viterbi_test(ghmm_dpmodel *mo, ghmm_dpseq * X, ghmm_dpseq * Y,
double *log_p, int *path_length) {
plocal_store_t *pv;
printf("---- viterbi test -----\n");
/*ghmm_dpmodel_print(mo);*/
pv = pviterbi_alloc(mo, X->length, Y->length);
printf("try free within pviterbi_test\n");
pviterbi_free(&pv, mo->N, X->length, Y->length, mo->max_offset_x ,
mo->max_offset_y);
printf("OK\n");
return NULL;
}
/*============================================================================*/
int *ghmm_dpmodel_viterbi(ghmm_dpmodel *mo, ghmm_dpseq * X, ghmm_dpseq * Y, double *log_p,
int *path_length) {
return ghmm_dpmodel_viterbi_variable_tb(mo, X, Y, log_p, path_length, -1);
}
/*============================================================================*/
int *ghmm_dpmodel_viterbi_variable_tb(ghmm_dpmodel *mo, ghmm_dpseq * X, ghmm_dpseq * Y,
double *log_p, int *path_length,
int start_traceback_with) {
#define CUR_PROC "ghmm_dpmodel_viterbi"
int u, v, j, i, off_x, off_y, current_state_index;
double value, max_value, previous_prob;
plocal_store_t *pv;
int *state_seq = NULL;
int emission;
double log_b_i, log_in_a_ij;
double (*log_in_a)(plocal_store_t*, int, int, ghmm_dpseq*, ghmm_dpseq*, int, int);
/* printf("---- viterbi -----\n"); */
i_list * state_list;
state_list = ighmm_list_init_list();
log_in_a = &sget_log_in_a;
/* int len_path = mo->N*len; the length of the path is not known apriori */
/* if (mo->model_type & kSilentStates && */
/* mo->silent != NULL && */
/* mo->topo_order == NULL) { */
/* ghmm_dmodel_topo_order( mo ); */
/* } */
/* Allocate the matrices log_in_a, log_b,Vektor phi, phi_new, Matrix psi */
pv = pviterbi_alloc(mo, X->length, Y->length);
if (!pv) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; }
/* Precomputing the log(a_ij) and log(bj(ot)) */
pviterbi_precompute(mo, pv);
/* Initialize the lookback matrix (for positions [-offsetX,0], [-1, len_y]*/
init_phi(pv, X, Y);
/* u > max_offset_x , v starts -1 to allow states with offset_x == 0
which corresponds to a series of gap states before reading the first
character of x at position x=0, y=v */
/** THIS IS THE MAIN RECURRENCE **/
for (u = mo->max_offset_x + 1; u < X->length; u++) {
for (v = -mo->max_offset_y; v < Y->length; v++) {
for (j = 0; j < mo->N; j++)
{
/** initialization of phi (lookback matrix), psi (traceback) **/
set_phi(pv, u, v, j, +1);
set_psi(pv, u, v, j, -1);
}
for (i = 0; i < mo->N; i++) {
/* Determine the maximum */
/* max_phi = phi[i] + log_in_a[j][i] ... */
if (!(mo->model_type & GHMM_kSilentStates) || !mo->silent[i] ) {
max_value = -DBL_MAX;
set_psi(pv, u, v, i, -1);
for (j = 0; j < mo->s[i].in_states; j++) {
/* look back in the phi matrix at the offsets */
previous_prob = get_phi(pv, u, v, mo->s[i].offset_x, mo->s[i].offset_y, mo->s[i].in_id[j]);
log_in_a_ij = (*log_in_a)(pv, i, j, X, Y, u, v);
if ( previous_prob != +1 && log_in_a_ij != +1) {
value = previous_prob + log_in_a_ij;
if (value > max_value) {
max_value = value;
set_psi(pv, u, v, i, mo->s[i].in_id[j]);
}
}
else
{;} /* fprintf(stderr, " %d --> %d = %f, \n", i,i,v->log_in_a[i][i]); */
}
emission = ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u),
ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v),
mo->size_of_alphabet[mo->s[i].alphabet],
mo->s[i].offset_x, mo->s[i].offset_y);
#ifdef DEBUG
if (emission > ghmm_dpmodel_emission_table_size(mo, i)){
printf("State %i\n", i);
ghmm_dpmodel_state_print(&(mo->s[i]));
printf("charX: %i charY: %i alphabet size: %i emission table: %i emission index: %i\n",
ghmm_dpseq_get_char(X, mo->s[i].alphabet, u),
ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v),
mo->size_of_alphabet[mo->s[i].alphabet],
ghmm_dpmodel_emission_table_size(mo, i), emission);
}
#endif
log_b_i = log_b(pv, i, ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u),
ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v),
mo->size_of_alphabet[mo->s[i].alphabet],
mo->s[i].offset_x, mo->s[i].offset_y));
/* No maximum found (that is, state never reached)
or the output O[t] = 0.0: */
if (max_value == -DBL_MAX ||/* and then also: (v->psi[t][j] == -1) */
log_b_i == +1 ) {
set_phi(pv, u, v, i, +1);
}
else
set_phi(pv, u, v, i, max_value + log_b_i);
}
} /* complete time step for emitting states */
/* last_osc = osc; */
/* save last transition class */
/*if (mo->model_type & kSilentStates) {
p__viterbi_silent( mo, t, v );
}*/ /* complete time step for silent states */
/**************
for (j = 0; j < mo->N; j++)
{
printf("\npsi[%d],in:%d, phi=%f\n", t, v->psi[t][j], v->phi[j]);
}
for (i = 0; i < mo->N; i++){
printf("%d\t", former_matchcount[i]);
}
for (i = 0; i < mo->N; i++){
printf("%d\t", recent_matchcount[i]);
}
****************/
} /* End for v in Y */
/* Next character in X */
push_back_phi(pv, Y->length);
} /* End for u in X */
/* Termination */
max_value = -DBL_MAX;
ighmm_list_append(state_list, -1);
/* if start_traceback_with is -1 (it is by default) search for the most
likely state at the end of both sequences */
if (start_traceback_with == -1) {
for (j = 0; j < mo->N; j++){
#ifdef DEBUG
printf("phi(len_x)(len_y)(%i)=%f\n", j, get_phi(pv, u, Y->length-1, 0, 0, j));
#endif
if ( get_phi(pv, u, Y->length-1, 0, 0, j) != +1 &&
get_phi(pv, u, Y->length-1, 0, 0, j) > max_value) {
max_value = get_phi(pv, X->length-1, Y->length-1, 0, 0, j);
state_list->last->val = j;
}
}
}
/* this is the special traceback mode for the d & c algorithm that also
connects the traceback to the first state of the rest of the path */
else {
#ifdef DEBUG
printf("D & C traceback from state %i!\n", start_traceback_with);
printf("Last characters emitted X: %i, Y: %i\n",
ghmm_dpseq_get_char(X, mo->s[start_traceback_with].alphabet,
X->length-1),
ghmm_dpseq_get_char(Y, mo->s[start_traceback_with].alphabet,
Y->length-1));
for (j = 0; j < mo->N; j++){
printf("phi(len_x)(len_y)(%i)=%f\n", j, get_phi(pv, X->length-1, Y->length-1, 0, 0, j));
}
#endif
max_value = get_phi(pv, X->length-1, Y->length-1, 0, 0, start_traceback_with);
if (max_value != 1 && max_value > -DBL_MAX)
state_list->last->val = start_traceback_with;
}
if (max_value == -DBL_MAX) {
/* Sequence can't be generated from the model! */
*log_p = +1;
/* Backtracing doesn't work, because state_seq[*] allocated with -1 */
/* for (t = len - 2; t >= 0; t--)
state_list->last->val = -1; */
}
else {
/* Backtracing, should put DEL path nicely */
*log_p = max_value;
/* removed the handling of silent states here */
/* start trace back at the end of both sequences */
u = X->length - 1;
v = Y->length - 1;
current_state_index = state_list->first->val;
off_x = mo->s[current_state_index].offset_x;
off_y = mo->s[current_state_index].offset_y;
while (u - off_x >= -1 && v - off_y >= -1 && current_state_index != -1) {
/* while (u > 0 && v > 0) { */
/* look up the preceding state and save it in the first position of the
state list */
/* printf("Current state %i at (%i,%i) -> preceding state %i\n",
current_state_index, u, v, get_psi(pv, u, v, current_state_index)); */
/* update the current state */
current_state_index = get_psi(pv, u, v, current_state_index);
if (current_state_index != -1)
ighmm_list_insert(state_list, current_state_index);
/* move in the alignment matrix */
u -= off_x;
v -= off_y;
/* get the next offsets */
off_x = mo->s[current_state_index].offset_x;
off_y = mo->s[current_state_index].offset_y;
}
}
/* Free the memory space */
pviterbi_free(&pv, mo->N, X->length, Y->length, mo->max_offset_x ,
mo->max_offset_y);
/* printf("After traceback: last state = %i\n", state_list->last->val); */
state_seq = ighmm_list_to_array(state_list);
*path_length = state_list->length;
/* PRINT PATH */
/* fprintf(stderr, "Viterbi path: " ); */
/* int t; */
/* for(t=0; t < *path_length; t++) */
/* if (state_seq[t] >= 0) fprintf(stderr, " %d ", state_seq[t]); */
/* fprintf(stderr, "\n Freeing ... \n"); */
return (state_seq);
STOP: /* Label STOP from ARRAY_[CM]ALLOC */
/* Free the memory space */
pviterbi_free(&pv, mo->N, X->length, Y->length, mo->max_offset_x,
mo->max_offset_y);
m_free(state_seq);
ighmm_list_free(state_list);
return NULL;
#undef CUR_PROC
} /* viterbi */
/*============================================================================*/
double ghmm_dpmodel_viterbi_logp(ghmm_dpmodel *mo, ghmm_dpseq * X, ghmm_dpseq * Y,
int *state_seq, int state_seq_len) {
#define CUR_PROC "ghmm_dpmodel_viterbi_logp"
int s, t, i, j, u, v;
double log_p = 0.0;
double log_b_i = 1.0;
double log_in_a = 1.0;
plocal_store_t *pv;
/* Allocate the matrices log_in_a, log_b,Vektor phi, phi_new, Matrix psi */
pv = pviterbi_alloc(mo, 0, 0);
pviterbi_precompute(mo, pv);
/* initial state */
t = 0; u = -1; v = -1;
if (state_seq_len > t) {
i = state_seq[t];
/* initial state probability */
/* log_p += log(mo->s[i].pi); */
log_p += mo->s[i].log_pi;
if (log_p == 1.0) {
pviterbi_free(&pv, mo->N, 0, 0, mo->max_offset_x, mo->max_offset_y);
fprintf(stderr, "the initial probability of state %i is zero\n", i);
return 1.0;/* the initial prob is zero */
}
/* consume the first characters of the sequences */
u += mo->s[i].offset_x;
v += mo->s[i].offset_y;
/* get the emission probability */
log_b_i = log_b(pv, i, ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u),
ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v),
mo->size_of_alphabet[mo->s[i].alphabet],
mo->s[i].offset_x, mo->s[i].offset_y));
if (log_b_i == 1.0) { /* chars cant be emitted */
pviterbi_free(&pv, mo->N, 0, 0, mo->max_offset_x, mo->max_offset_y);
fprintf(stderr, "characters (%i, %i) at position (%i, %i) cannot be emitted by state %i (t=%i)\n",
ghmm_dpseq_get_char(X, mo->s[i].alphabet, u),
ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), u, v, i, t);
return 1.0;
}
log_p += log_b_i;
}
else { /* there is no path.. */
pviterbi_free(&pv, mo->N, 0, 0, mo->max_offset_x, mo->max_offset_y);
fprintf(stderr, "No path given!\n");
return 1.0;
}
/* rest of the sequence */
for (t=1; t<state_seq_len; t++) {
j = i; /* predecessor state */
i = state_seq[t]; /* current state */
/* consume the next characters */
u += mo->s[i].offset_x;
v += mo->s[i].offset_y;
if (u >= X->length || v >= Y->length) { /* path consumes too many chars */
pviterbi_free(&pv, mo->N, 0, 0, mo->max_offset_x, mo->max_offset_y);
fprintf(stderr, "path consumes too many chars\n");
return 1.0;
}
/* transition prob state j -> i*/
log_in_a = 1.0;
for (s=0; s<mo->s[i].in_states; s++) {
if (mo->s[i].in_id[s] == j) {
log_in_a = sget_log_in_a(pv, i, s, X, Y, u, v);
break;
}
}
if (log_in_a == 1.0) {
pviterbi_free(&pv, mo->N, 0, 0, mo->max_offset_x, mo->max_offset_y);
fprintf(stderr, "transition (%i -> %i) at t=%i not possible\n", j, i,t);
return 1.0; /* transition not possible */
}
/* emission probability */
log_b_i = log_b(pv, i, ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u),
ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v),
mo->size_of_alphabet[mo->s[i].alphabet],
mo->s[i].offset_x, mo->s[i].offset_y));
if (log_b_i == 1.0) {
pviterbi_free(&pv, mo->N, 0, 0, mo->max_offset_x, mo->max_offset_y);
fprintf(stderr, "characters (%i, %i) at position (%i, %i) cannot be emitted by state %i (t=%i)\n",
ghmm_dpseq_get_char(X, mo->s[i].alphabet, u),
ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), u, v, i, t);
return 1.0; /* characters cant be emitted */
}
log_p += log_in_a + log_b_i;
}
pviterbi_free(&pv, mo->N, 0, 0, mo->max_offset_x,
mo->max_offset_y);
/* check if all of the sequences has been consumed */
if (u != X->length - 1 && v != Y->length - 1) {
fprintf(stderr, "path consumes not all characters (%i of %i, %i of %i)\n",
u + 1, X->length, v + 1, Y->length);
return 1.0;
}
return log_p;
#undef CUR_PROC
} /* ghmm_dpmodel_viterbi_logp */