/*******************************************************************************
*
* This file is part of the General Hidden Markov Model Library,
* GHMM version __VERSION__, see http://ghmm.org
*
* Filename: ghmm/ghmm/pviterbi_propagate.c
* Authors: Matthias Heinig, Janne Grunau
*
* Copyright (C) 1998-2004 Alexander Schliep
* Copyright (C) 1998-2001 ZAIK/ZPR, Universitaet zu Koeln
* Copyright (C) 2002-2004 Max-Planck-Institut fuer Molekulare Genetik,
* Berlin
*
* Contact: schliep@ghmm.org
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the Free
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*
* This file is version $Revision: 1713 $
* from $Date: 2006-10-16 10:06:28 -0400 (Mon, 16 Oct 2006) $
* last change by $Author: grunau $.
*
*******************************************************************************/
/* Possible sources of errors: initialisation, pushback (the loop after) */
#ifdef HAVE_CONFIG_H
# include "../config.h"
#endif
#include <float.h>
#include <math.h>
#include <assert.h>
#include <stdlib.h>
#include "ghmm.h"
#include "mprintf.h"
#include "mes.h"
#include "matrix.h"
#include "pmodel.h"
#include "psequence.h"
#include "pviterbi.h"
#include "pviterbi_propagate.h"
#include "ghmm_internals.h"
#define PROP_EPS 10e-6
/* --- typedefs --- */
/** Matrix cell **/
typedef struct cell {
/** x coordinate of the cell **/
int x;
/** y coordinate of the cell **/
int y;
/** state coordinate of the cell **/
int state;
/** state that was inhabited before **/
int previous_state;
/** probability up to the previous cell **/
double log_p;
/** transition from the provious state to this state **/
double log_a;
int ref_count;
} cell;
typedef struct plocal_propagate_store_t {
/** precomputed log probabilities for transitions into the states
for each transition class of the source state **/
double *** log_in_a;
/** precomputed log probabilities for each state for the emissions **/
double **log_b;
/** lookback matrix for the last offset steps **/
double *** phi;
/** log probabilities for the current u, v and every state **/
double *phi_new;
/** traceback matrix of cell pointers **/
cell **** end_of_first;
/** for convinience store a pointer to the model **/
ghmm_dpmodel * mo;
/** for the debug mode store information of matrix sizes **/
/** length of sequence X determines size of psi **/
int len_x;
/** length of sequence Y determines size of phi and psi **/
int len_y;
/** for the recursion reuse memory here is the start index **/
int start_x;
int start_y;
/** non functional stuff **/
int *topo_order;
int topo_order_length;
} plocal_propagate_store_t;
/* --==-- local declarations --==-- */
static plocal_propagate_store_t * pviterbi_propagate_alloc (ghmm_dpmodel *mo, int len_y);
static int pviterbi_propagate_free (plocal_propagate_store_t **v, int n, int max_offset_x,
int max_offset_y, int len_y);
static cell * init_cell(int x, int y, int ghmm_dstate, int previous_state, double log_p,
double log_a);
static void pviterbi_prop_precompute (ghmm_dpmodel *mo, plocal_propagate_store_t *pv);
static cell * pviterbi_propagate_step (ghmm_dpmodel *mo, ghmm_dpseq * X, ghmm_dpseq * Y,
cell * start, cell * stop, double * log_p,
plocal_propagate_store_t * pv);
static int * pviterbi_propagate_recursion(ghmm_dpmodel *mo, ghmm_dpseq * X, ghmm_dpseq * Y,
double *log_p, int *path_length, cell *start,
cell *stop, double max_size,
plocal_propagate_store_t * pv);
/*------------ Here comes the Propagate stuff ------------- */
/*============================================================================*/
static cell * init_cell (int x, int y, int state, int previous_state,
double log_p, double log_a) {
#define CUR_PROC "init_cell"
cell * mcell;
ARRAY_CALLOC (mcell, 1);
/* printf("Alloc cell: %i\n", sizeof(*mcell)); */
mcell->x = x;
mcell->y = y;
mcell->state = state;
mcell->previous_state = previous_state;
mcell->log_p = log_p;
mcell->log_a = log_a;
return mcell;
STOP: /* Label STOP from ARRAY_[CM]ALLOC */
return(NULL);
#undef CUR_PROC
}
/*============================================================================*/
cell * ghmm_dpmodel_copy_cell(cell * c) {
if (c)
return init_cell(c->x, c->y, c->state, c->previous_state, c->log_p, c->log_a);
else
return NULL;
}
/*============================================================================*/
void ghmm_dpmodel_print_cell(cell * c) {
if (c)
printf("(%i, %i) state: %i previous state: %i log p: %f log a: %f\n", c->x, c->y, c->state, c->previous_state, c->log_p, c->log_a);
else
printf("No cell\n");
}
/*============================================================================*/
static plocal_propagate_store_t * pviterbi_propagate_alloc (ghmm_dpmodel *mo, int len_y) {
#define CUR_PROC "pviterbi_propagate_alloc"
plocal_propagate_store_t* v = NULL;
int i, j, k;
ARRAY_CALLOC (v, 1);
v->mo = mo;
v->len_y = len_y;
/* Allocate the log_in_a's -> individal lenghts */
ARRAY_CALLOC (v->log_in_a, mo->N);
/* first index of log_in_a: target state */
for (j = 0; j < mo->N; j++){
/* second index: source state */
ARRAY_CALLOC (v->log_in_a[j], mo->s[j].in_states);
for (i=0; i<mo->s[j].in_states; i++) {
/* third index: transition classes of source state */
ARRAY_CALLOC (v->log_in_a[j][i], mo->s[mo->s[j].in_id[i]].kclasses);
}
}
ARRAY_CALLOC (v->log_b, mo->N);
for (j=0; j<mo->N; j++) {
ARRAY_CALLOC (v->log_b[j], ghmm_dpmodel_emission_table_size(mo, j) + 1);
}
if (!(v->log_b)) {GHMM_LOG_QUEUED(LCONVERTED); goto STOP;}
v->phi = ighmm_cmatrix_3d_alloc(mo->max_offset_x + 1, len_y + mo->max_offset_y + 1, mo->N);
if (!(v->phi)) {GHMM_LOG_QUEUED(LCONVERTED); goto STOP;}
ARRAY_CALLOC (v->phi_new, mo->N);
ARRAY_CALLOC (v->end_of_first, mo->max_offset_x + 1);
for (j=0; j<mo->max_offset_x + 1; j++) {
ARRAY_CALLOC (v->end_of_first[j], len_y + mo->max_offset_y + 1);
for (i=0; i<len_y + mo->max_offset_y + 1; i++) {
ARRAY_CALLOC (v->end_of_first[j][i], mo->N);
for (k=0; k<mo->N; k++)
v->end_of_first[j][i][k] = NULL;
/*ARRAY_CALLOC (v->end_of_first[j][i][k], 1);*/
}
}
v->topo_order_length = 0;
ARRAY_CALLOC (v->topo_order, mo->N);
return(v);
STOP: /* Label STOP from ARRAY_[CM]ALLOC */
pviterbi_propagate_free(&v, mo->N, mo->max_offset_x, mo->max_offset_y, len_y);
return(NULL);
#undef CUR_PROC
}
/*============================================================================*/
static void pviterbi_prop_precompute (ghmm_dpmodel *mo, plocal_propagate_store_t *pv)
{
#define CUR_PROC "pviterbi_precompute"
int i, j, emission, t_class;
/* Precomputing the log(a_ij) */
for (j = 0; j < mo->N; j++)
for (i = 0; i < mo->s[j].in_states; i++)
for (t_class = 0; t_class < mo->s[mo->s[j].in_id[i]].kclasses; t_class++){
if ( mo->s[j].in_a[i][t_class] == 0.0 )
pv->log_in_a[j][i][t_class] = +1;
else
pv->log_in_a[j][i][t_class] = log( mo->s[j].in_a[i][t_class] );
}
/* Precomputing the log emission probabilities for each state*/
for (j = 0; j < mo->N; j++) {
for (emission = 0; emission < ghmm_dpmodel_emission_table_size(mo,j); emission++) {
if (1) {
if ( mo->s[j].b[emission] == 0.0 ) /* DBL_EPSILON ? */
pv->log_b[j][emission] = +1;
else
pv->log_b[j][emission] = log( mo->s[j].b[emission] );
}
else{
pv->log_b[j][emission] = 0.0;
}
}
pv->log_b[j][emission] = 1; /* the last field is for invalid emissions */
}
#undef CUR_PROC
}/* viterbi_precompute */
/*============================================================================*/
static int pviterbi_propagate_free (plocal_propagate_store_t **v, int n,
int max_offset_x, int max_offset_y,
int len_y) {
#define CUR_PROC "pviterbi_propagate_free"
int j, i;
mes_check_ptr(v, return(-1));
if( !*v ) return(0);
for (j = 0; j < n; j++) {
for (i=0; i<(*v)->mo->s[j].in_states; i++)
m_free((*v)->log_in_a[j][i]);
m_free((*v)->log_in_a[j]);
}
m_free((*v)->log_in_a);
for (j=0; j<n; j++)
m_free((*v)->log_b[j]);
m_free((*v)->log_b);
ighmm_cmatrix_3d_free(&((*v)->phi), max_offset_x + 1, len_y + max_offset_y + 1);
m_free((*v)->phi_new);
if ((*v)->end_of_first) {
for (j=0; j<max_offset_x + 1; j++) {
if ((*v)->end_of_first[j]) {
for (i=0; i<len_y + max_offset_y + 1; i++)
if ((*v)->end_of_first[j][i])
m_free((*v)->end_of_first[j][i]);
m_free((*v)->end_of_first[j]);
}
}
m_free((*v)->end_of_first);
}
m_free((*v)->topo_order);
m_free(*v);
return(0);
#undef CUR_PROC
}
/*============================================================================*/
static double sget_log_in_a_prop(plocal_propagate_store_t * pv, int i, int j,
ghmm_dpseq * X, ghmm_dpseq * Y, int index_x,
int index_y) {
/* determine the transition class for the source state */
int id = pv->mo->s[i].in_id[j];
int cl = pv->mo->s[id].class_change->get_class(pv->mo, X, Y, index_x,index_y,
pv->mo->s[id].class_change->user_data);
return pv->log_in_a[i][j][cl];
}
/*============================================================================*/
static double log_b_prop(plocal_propagate_store_t * pv, int state, int emission) {
#ifdef DEBUG
if (state > pv->mo->N)
fprintf(stderr, "log_b_prop: State index out of bounds %i > %i\n", state, pv->mo->N);
if (emission > ghmm_dpmodel_emission_table_size(pv->mo, state))
fprintf(stderr, "log_b_prop: Emission index out of bounds %i > %i for state %i\n",
emission, ghmm_dpmodel_emission_table_size(pv->mo, state), state);
#endif
return pv->log_b[state][emission];
}
/*============================================================================*/
static double get_phi_prop(plocal_propagate_store_t * pv, int x, int y,
int offset_x, int offset_y, int state){
#ifdef DEBUG
if (y > pv->len_y || state > pv->mo->N || y < - pv->mo->max_offset_y)
fprintf(stderr, "get_phi_prop: out of bounds %i %i %i\n",
x + pv->mo->max_offset_x, y + pv->mo->max_offset_y, state);
#endif
if (y - offset_y + pv->mo->max_offset_y >= pv->start_y)
return pv->phi[offset_x][y - offset_y + pv->mo->max_offset_y][state];
else
return 1;
}
/*============================================================================*/
/* set the value of this matrix cell */
static void set_phi_prop (plocal_propagate_store_t * pv, int x, int y,
int state, double prob){
#ifdef DEBUG
if (y > pv->len_y || state > pv->mo->N || y < - pv->mo->max_offset_y)
fprintf(stderr, "set_phi_prop: out of bounds %i %i %i %f\n",
x + pv->mo->max_offset_x, y + pv->mo->max_offset_y,
state, prob);
#endif
pv->phi[0][y + pv->mo->max_offset_y][state] = prob;
}
/*============================================================================*/
static void set_end_of_first (plocal_propagate_store_t * pv, int x, int y,
int state, cell * end_of_first){
#ifdef DEBUG
if (y > pv->len_y || state > pv->mo->N || y < - pv->mo->max_offset_y) {
fprintf(stderr, "set_end_of_first: out of bounds %i %i %i\n",
x + pv->mo->max_offset_x, y + pv->mo->max_offset_y,
state);
ghmm_dpmodel_print_cell(end_of_first);
}
#endif
pv->end_of_first[0][y + pv->mo->max_offset_y][state] = end_of_first;
}
/*============================================================================*/
static cell * get_end_of_first (plocal_propagate_store_t * pv, int x, int y,
int offset_x, int offset_y, int state) {
#ifdef DEBUG
if (y > pv->len_y || state > pv->mo->N || y < - pv->mo->max_offset_y)
fprintf(stderr, "get_end_of_first: out of bounds %i %i %i\n",
x + pv->mo->max_offset_x, y + pv->mo->max_offset_y,
state);
#endif
if (y - offset_y >= 0)
return pv->end_of_first[offset_x][y - offset_y + pv->mo->max_offset_y][state];
else
return NULL;
}
/*============================================================================*/
/* since we only keep the frontier we have to push back when ever a row is
complete */
static void push_back_phi_prop (plocal_propagate_store_t * pv, int length_y){
int off_x, y, j;
/* push back the old phi values */
for (off_x=pv->mo->max_offset_x; off_x>0; off_x--)
for (y=0; y<length_y + pv->mo->max_offset_y + 1; y++)
for (j=0; j<pv->mo->N; j++) {
pv->phi[off_x][y][j] = pv->phi[off_x-1][y][j];
pv->end_of_first[off_x][y][j] = pv->end_of_first[off_x-1][y][j];
}
}
/*============================================================================*/
static void init_start_stop (cell * start, cell *stop, ghmm_dpseq * X,
ghmm_dpseq * Y, int * start_x, int * start_y,
int * stop_x, int * stop_y) {
if (start != NULL) {
*start_x = start->x;
*start_y = start->y;
}
else {
*start_x = 0;
*start_y = 0;
}
if (stop != NULL) {
*stop_x = stop->x;
*stop_y = stop->y;
}
else {
*stop_x = X->length;
*stop_y = Y->length;
}
}
/*============================================================================*/
int * ghmm_dpmodel_viterbi_propagate (ghmm_dpmodel *mo, ghmm_dpseq * X, ghmm_dpseq * Y,
double *log_p, int *path_length, double max_size) {
#define CUR_PROC "ghmm_dpmodel_viterbi_propagate"
/* Divide and conquer algorithm to reduce the memory requirement */
/* 1. Compute the alignment of X vs Y and propagate the middle point*/
/* 2. Solve recursively the two alignments X[0:len/2] vs Y[0:m] and
X[len/2+1:len] vs Y[m+1:len] */
/* Break the recursion if the lengths of both sequences become tractable
for the normal ghmm_dpmodel_viterbi algorithm */
/* start of the implementation */
/* give sequence length of X = 0 to ghmm_dpmodel_viterbi alloc so traceback matrix
will not be allocated */
plocal_propagate_store_t * pv = pviterbi_propagate_alloc(mo, Y->length);
/* check if it worked */
if (!pv) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; }
/* Precomputing the log(a_ij) and log(bj(ot)) */
pviterbi_prop_precompute(mo, pv);
/* Launch the recursion */
/* double step_log;
pviterbi_propagate_step(mo, X, Y, NULL, NULL, &step_log,pv);
printf("step log for the whole sequence: %f\n", step_log); */
return pviterbi_propagate_recursion(mo, X, Y, log_p, path_length,
NULL, NULL,
max_size, pv);
STOP: /* Label STOP from ARRAY_[CM]ALLOC */
/* Free the memory space */
pviterbi_propagate_free(&pv, mo->N, mo->max_offset_x, mo->max_offset_y, Y->length);
return NULL;
#undef CUR_PROC
}
/*============================================================================*/
static int * pviterbi_propagate_recursion (ghmm_dpmodel *mo, ghmm_dpseq * X,
ghmm_dpseq * Y, double *log_p,
int *path_length, cell *start,
cell *stop, double max_size,
plocal_propagate_store_t * pv) {
#define CUR_PROC "pviterbi_propagate_recursion"
/* Divide and conquer algorithm to reduce the memory requirement */
/* 1. Compute the alignment of X vs Y and propagate the middle point */
/* 2. Solve recursively the two alignments X[0:len/2] vs Y[0:m] and
X[len/2+1:len] vs Y[m+1:len] */
/* Break the recursion if the lengths of both sequences become tractable
for the normal ghmm_dpmodel_viterbi algorithm */
/* start of the implementation */
int * path_seq = NULL;
int start_x, start_y, stop_x, stop_y;
double * original_pi=NULL;
int i;
cell * middle;
int length1, length2;
double log_p1, log_p2;
int * path1, * path2;
init_start_stop(start, stop, X, Y, &start_x, &start_y, &stop_x, &stop_y);
#ifdef DEBUG
printf("Recursion: start, stop cells\n");
if(start)
ghmm_dpmodel_print_cell(start);
else
printf("(0, 0) first segment\n");
if(stop)
ghmm_dpmodel_print_cell(stop);
else
printf("(%i, %i) last segment\n", stop_x, stop_y);
#endif
/* Break the recursion if the lengths of both sequences become tractable
for the normal ghmm_dpmodel_viterbi algorithm */
if ((double)(stop_x - start_x) * (double)(stop_y - start_y) < max_size) {
/* to use the unchanged ghmm_dpmodel_viterbi algorithm take slices of the sequences */
ghmm_dpseq * tractable_X = ghmm_dpseq_slice(X, start_x, stop_x);
ghmm_dpseq * tractable_Y = ghmm_dpseq_slice(Y, start_y, stop_y);
/* if this is not the very first path segment starting at zero:
temporarily change the initial probabability to go into state k+1 */
#ifdef DEBUG
printf("ghmm_dpmodel_viterbi on slice x[%i:%i], y[%i:%i]\n",
start_x, stop_x, start_y, stop_y);
#endif
if (start != NULL) {
ARRAY_CALLOC (original_pi, mo->N);
/* save original pi and set all to zero */
for (i=0; i<mo->N; i++) {
original_pi[i] = mo->s[i].log_pi;
mo->s[i].log_pi = 1;
}
/* set the initial prob. for the state that was in the middle of path */
mo->s[start->state].log_pi = start->log_p + start->log_a;
/* compute the partial path */
if (stop != NULL)
path_seq = ghmm_dpmodel_viterbi_variable_tb(mo, tractable_X, tractable_Y, log_p, path_length, stop->previous_state);
else
path_seq = ghmm_dpmodel_viterbi_variable_tb(mo, tractable_X, tractable_Y, log_p, path_length, -1);
/* restore the original model */
for (i=0; i<mo->N; i++)
mo->s[i].log_pi = original_pi[i];
m_free(original_pi);
}
else {
if (stop != NULL)
path_seq = ghmm_dpmodel_viterbi_variable_tb(mo, tractable_X, tractable_Y, log_p, path_length, stop->previous_state);
else
path_seq = ghmm_dpmodel_viterbi_variable_tb(mo, tractable_X, tractable_Y, log_p, path_length, -1);
}
if (*log_p == 1) {
fprintf(stderr, "Problem with slice x[%i:%i], y[%i:%i]\n",
start_x, stop_x, start_y, stop_y);
}
return path_seq;
}
else {
/* 1. Compute the alignment of X vs Y and propagate the middle point */
double step_log_p = 1;
/* if this is not the very first path segment starting at zero:
temporarily change the initial probabability to go into state k+1 */
if (start != NULL) {
ARRAY_CALLOC(original_pi, mo->N);
/* save original pi and set all to zero */
for (i=0; i<mo->N; i++) {
original_pi[i] = mo->s[i].log_pi;
mo->s[i].log_pi = 1;
}
/* set the initial prob. for the state that was in the middle of path */
mo->s[start->state].log_pi = start->log_p + start->log_a;
}
middle = pviterbi_propagate_step(mo, X, Y, start, stop, &step_log_p, pv);
if (start != NULL) {
/* restore the original model */
for (i=0; i<mo->N; i++)
mo->s[i].log_pi = original_pi[i];
m_free(original_pi);
}
/* check if there is a middle */
if (!middle) {
fprintf(stderr, "(%i, %i)->(%i, %i) No middle found!\n",
start_x, start_y, stop_x, stop_y);
ARRAY_CALLOC(path_seq, 1);
path_seq[0] = -1;
*path_length = 1;
*log_p = 1;
return path_seq;
}
#ifdef DEBUG
else {
printf("(%i, %i)->(%i, %i) Middlepoint ", start_x, start_y,
stop_x, stop_y);
ghmm_dpmodel_print_cell(middle);
}
#endif
/* check if there is a path */
if (step_log_p == 1) {
ARRAY_CALLOC(path_seq, 1);
path_seq[0] = -1;
*path_length = 1;
*log_p = 1;
return path_seq;
}
/* 2. Solve recursively the two alignments X[0:len/2] vs Y[0:m] and
X[len/2+1:len] vs Y[m+1:len] */
length1 = 0;
log_p1 = 0;
path1 = pviterbi_propagate_recursion(mo, X, Y, &log_p1, &length1,
start, middle,
max_size, pv);
length2 = 0;
log_p2 = 0;
path2 = pviterbi_propagate_recursion(mo, X, Y, &log_p2, &length2,
middle, stop,
max_size, pv);
/* check the paths */
if (log_p1 == 1 || log_p2 == 1) {
ARRAY_CALLOC (path_seq, 1);
path_seq[0] = -1;
*path_length = 1;
*log_p = 1;
return path_seq;
}
/* concatenate the paths */
*path_length = length1 + length2;
*log_p = log_p2;
#ifdef DEBUG
/* check if the transition between the ends of the paths is possible */
for (i=0; i<mo->s[path1[length1-1]].in_states; i++){
if (mo->s[path1[length1-1]].in_id[i] == path2[0])
break;
}
if (i == mo->s[path1[length1-1]].in_states) {
printf("no transition between states %i -> %i\n", path1[length1-1], path2[0]);
}
printf("Conquer: start, stop cells\n");
if(start)
ghmm_dpmodel_print_cell(start);
else
printf("(0, 0) first segment\n");
if(stop)
ghmm_dpmodel_print_cell(stop);
else
printf("(%i, %i) last segment\n", stop_x, stop_y);
printf("Path 1:\n[");
for (i=0; i<length1; i++)
printf("%i,", path1[i]);
printf("\nPath 2:\n[");
for (i=0; i<length2; i++)
printf("%i,", path2[i]);
printf("]\n");
#endif
ARRAY_CALLOC (path_seq, *path_length);
for (i=0; i<length1; i++)
path_seq[i] = path1[i];
for (i=0; i<length2; i++)
path_seq[length1 + i] = path2[i];
return path_seq;
}
STOP: /* Label STOP from ARRAY_[CM]ALLOC */
return NULL;
#undef CUR_PROC
}
/*============================================================================*/
static void init_phi_prop (plocal_propagate_store_t * pv, ghmm_dpseq * X,
ghmm_dpseq * Y, cell * start, cell * stop) {
#define CUR_PROC "init_phi_prop"
int u, v, j, i, off_x, y;
double value, max_value, previous_prob, log_b_i, log_in_a_ij ;
int start_x, start_y, stop_x, stop_y, middle_x;
ghmm_dpmodel * mo = pv->mo;
double (*log_in_a)(plocal_propagate_store_t*, int, int, ghmm_dpseq*,
ghmm_dpseq*, int, int);
log_in_a = &sget_log_in_a_prop;
init_start_stop(start, stop, X, Y, &start_x, &start_y, &stop_x, &stop_y);
pv->start_x = start_x;
pv->start_y = start_y;
middle_x = start_x + (stop_x - start_x) / 2;
/* to be sure that we do not look up something out of the bounds set the
whole matrix to 1 */
/* Initialize the lookback matrix (for positions [-offsetX,0], [0, len_y]*/
for (off_x=0; off_x<mo->max_offset_x + 1; off_x++)
for (y=0; y<Y->length + mo->max_offset_y + 1; y++)
for (j=0; j<mo->N; j++) {
pv->phi[off_x][y][j] = +1;
}
/* Inititalize the end_of_first matrix */
for (off_x=0; off_x<mo->max_offset_x + 1; off_x++)
for (y=0; y<Y->length + mo->max_offset_y + 1; y++)
for (j=0; j<mo->N; j++)
if (pv->end_of_first[off_x][y][j]) {
/* m_free(pv->end_of_first[off_x][y][j]); */
pv->end_of_first[off_x][y][j] = NULL;
}
if (mo->model_type & GHMM_kSilentStates) { /* could go into silent state at t=0 */
/*p__viterbi_silent( mo, t=0, v);*/
}
/*for (j = 0; j < mo->N; j++)
{
printf("\npsi[%d],in:%d, phi=%f\n", t, v->psi[t][j], v->phi[j]);
}
for( i = 0; i < mo->N; i++){
printf("%d\t", former_matchcount[i]);
}
for (i = 0; i < mo->N; i++){
printf("%d\t", recent_matchcount[i]);
}*/
/* initialize for offsets > 1 (u < max_offset_x, v < max_offset_y) */
/* this is in principle the same as the main recurrence but adds initial
probabilities to states that cannot be inhabitated at u=0, v=0 because
of greater offsets than one */
/* u, v <= max offsets */
for (u = -1; u <= mo->max_offset_x; u++) {
for (v = start_y - mo->max_offset_y; v < stop_y; v++) {
for (j = 0; j < mo->N; j++)
{
/** initialization of phi (lookback matrix) **/
set_phi_prop(pv, u, v, j, 1);
/** traceback for the propagate algorithm **/
set_end_of_first(pv, u, v, j, NULL);
}
for (i = 0; i < mo->N; i++) {
/* Determine the maximum */
/* max_phi = phi[i] + log_in_a[j][i] ... */
if (!(mo->model_type & GHMM_kSilentStates) || !mo->silent[i] ) {
max_value = -DBL_MAX;
set_end_of_first(pv, u, v, i, NULL);
for (j = 0; j < mo->s[i].in_states; j++) {
/* look back in the phi matrix at the offsets */
previous_prob = get_phi_prop(pv, u, v, mo->s[i].offset_x,
mo->s[i].offset_y, mo->s[i].in_id[j]);
log_in_a_ij = (*log_in_a)(pv, i, j, X, Y, u, v);
if ( previous_prob != +1 && log_in_a_ij != +1) {
value = previous_prob + log_in_a_ij;
if (value > max_value) {
max_value = value;
/* Critical point for the propagate algorithm if we are at the
middle point of sequence X store this at the end point of
the first alignment */
if (u - middle_x < mo->s[i].offset_x && u - middle_x >= 0) {
cell * end_of_first = init_cell(u - (mo->s[i].offset_x - 1),
v - (mo->s[i].offset_y - 1),
i, mo->s[i].in_id[j],
previous_prob,
log_in_a_ij);
if (get_end_of_first(pv, u, v, 0, 0, i) != NULL) {
cell * old = get_end_of_first(pv, u, v, 0, 0, i);
m_free(old);
}
set_end_of_first(pv, u, v, i, end_of_first);
}
else {
/* at all other points simply propagate the values on */
set_end_of_first(pv, u, v, i, get_end_of_first(pv, u, v,
mo->s[i].offset_x,
mo->s[i].offset_y,
mo->s[i].in_id[j]));
}
}
}
else
{;} /* fprintf(stderr, " %d --> %d = %f, \n", i,i,v->log_in_a[i][i]); */
}
#ifdef DEBUG
int emission = ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u + start_x),
ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v),
mo->size_of_alphabet[mo->s[i].alphabet],
mo->s[i].offset_x, mo->s[i].offset_y);
if (emission > ghmm_dpmodel_emission_table_size(mo, i)){
printf("State %i\n", i);
ghmm_dpmodel_state_print(&(mo->s[i]));
printf("charX: %i charY: %i alphabet size: %i emission table: %i emission index: %i\n",
ghmm_dpseq_get_char(X, mo->s[i].alphabet, u),
ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v),
mo->size_of_alphabet[mo->s[i].alphabet],
ghmm_dpmodel_emission_table_size(mo, i), emission);
}
#endif
log_b_i = log_b_prop(pv, i, ghmm_dpmodel_pair(ghmm_dpseq_get_char(X,
mo->s[i].alphabet,
u + start_x),
ghmm_dpseq_get_char(Y,
mo->s[i].alphabet,
v),
mo->size_of_alphabet[mo->s[i].alphabet],
mo->s[i].offset_x, mo->s[i].offset_y));
/* this is the difference from the main loop:
check whether this state could be an initial state and add the
initial probability */
if (log_b_i == +1 ) {
set_phi_prop(pv, u, v, i, +1);
}
else {
if (max_value == -DBL_MAX)
set_phi_prop(pv, u, v, i, +1);
else
set_phi_prop(pv, u, v, i, max_value);
/* if (mo->s[i].pi != 0 && mo->s[i].offset_x - 1 == u &&
mo->s[i].offset_y - 1 + start_y == v) { */
if (mo->s[i].log_pi != 1 && mo->s[i].offset_x - 1 == u &&
mo->s[i].offset_y - 1 + start_y == v){
set_phi_prop(pv, u, v, i, mo->s[i].log_pi);
#ifdef DEBUG
printf("Initial log prob state %i at (%i, %i) = %f\n", i, start_x + u, v, get_phi_prop(pv, u, v, 0, 0, i));
printf("Characters emitted X: %i, Y: %i\n",
ghmm_dpseq_get_char(X, mo->s[i].alphabet, u + start_x),
ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v));
#endif
}
if (get_phi_prop(pv, u, v, 0, 0, i) != 1) {
set_phi_prop(pv, u, v, i, get_phi_prop(pv, u, v, 0, 0, i) + log_b_i);
}
}
}
} /* complete time step for emitting states */
/* last_osc = osc; */ /* save last transition class */
/*if (mo->model_type & kSilentStates) {
p__viterbi_silent( mo, t, v );
}*/ /* complete time step for silent states */
/**************
for (j = 0; j < mo->N; j++)
{
printf("\npsi[%d],in:%d, phi=%f\n", t, v->psi[t][j], v->phi[j]);
}
for (i = 0; i < mo->N; i++){
printf("%d\t", former_matchcount[i]);
}
for (i = 0; i < mo->N; i++){
printf("%d\t", recent_matchcount[i]);
}
****************/
} /* End for v in Y */
/* Next character in X */
/* push back the old phi values */
push_back_phi_prop(pv, Y->length);
#ifdef DEBUG
if (start != NULL && mo->s[start->state].offset_y == 0) {
max_value = -DBL_MAX;
i = -1;
y = -1;
off_x = -1;
int x;
for (x = 0; x<=mo->max_offset_x; x++)
for (v = - mo->max_offset_y; v<Y->length; v++) {
for (j=0; j<mo->N; j++) {
if (get_phi_prop(pv, x, v, x, 0, j) >= max_value &&
get_phi_prop(pv, x, v, x, 0, j) < 1 - PROP_EPS) {
max_value = get_phi_prop(pv, x, v, x, 0, j);
i = j;
off_x = x;
y = v;
}
}
}
printf("u = %i start_x = %i off_x = %i ", u, start_x, off_x);
printf("max log prob state %i at (%i, %i) = %f after pushback\n",
i, start_x + u - (off_x - 1), y, get_phi_prop(pv, u, y, off_x, 0, i));
}
#endif
} /* End for u in X */
#undef CUR_PROC
}
/*============================================================================*/
static cell * pviterbi_propagate_step (ghmm_dpmodel *mo, ghmm_dpseq * X, ghmm_dpseq * Y,
cell * start, cell * stop,
double * log_p,
plocal_propagate_store_t * pv) {
#define CUR_PROC "pviterbi_step"
/* printf("---- propagate step -----\n"); */
int u, v, j, i;
double value, max_value, previous_prob;
/* int len_path = mo->N*len; the length of the path is not known apriori */
int start_x, start_y, stop_x, stop_y;
double log_b_i, log_in_a_ij;
cell * middle = NULL;
int middle_x;
double (*log_in_a)(plocal_propagate_store_t*, int, int, ghmm_dpseq*,
ghmm_dpseq*, int, int);
log_in_a = &sget_log_in_a_prop;
init_start_stop(start, stop, X, Y, &start_x, &start_y, &stop_x, &stop_y);
middle_x = start_x + (stop_x - start_x) / 2;
/* if (mo->model_type & kSilentStates && */
/* mo->silent != NULL && */
/* mo->topo_order == NULL) { */
/* ghmm_dmodel_topo_order( mo ); */
/* } */
init_phi_prop(pv, X, Y, start, stop);
#ifdef DEBUG
if (start != NULL && mo->s[start->state].offset_y == 0) {
for (u = 0; u<=mo->max_offset_x; u++) {
printf("row %i of phi\n", u);
for (v = start_y - 1; v < stop_y; v++) {
printf("phi(0, %i, %i): %f, ", v, start->state, get_phi_prop(pv, 0, v, 0, 0, start->state));
}
printf("\n\n");
}
}
#endif
/* u, v > 0 */
/** THIS IS THE MAIN RECURRENCE **/
/* printf("Main loop x from %i to %i and y from %i to %i\n",
start_x + mo->max_offset_x + 1, stop_x, start_y, stop_y);*/
for (u = start_x + mo->max_offset_x + 1; u < stop_x; u++) {
for (v = start_y - mo->max_offset_y; v < stop_y; v++) {
for (j = 0; j < mo->N; j++)
{
/** initialization of phi (lookback matrix) **/
set_phi_prop(pv, u, v, j, +1);
set_end_of_first(pv, 0, v, j, NULL);
}
for (i = 0; i < mo->N; i++) {
/* Determine the maximum */
/* max_phi = phi[i] + log_in_a[j][i] ... */
if (!(mo->model_type & GHMM_kSilentStates) || !mo->silent[i]) {
max_value = -DBL_MAX;
set_end_of_first(pv, 0, v, i, NULL);
for (j = 0; j < mo->s[i].in_states; j++) {
/* look back in the phi matrix at the offsets */
previous_prob = get_phi_prop(pv, u, v, mo->s[i].offset_x,
mo->s[i].offset_y, mo->s[i].in_id[j]);
log_in_a_ij = (*log_in_a)(pv, i, j, X, Y, u, v);
if ( previous_prob != +1 &&
log_in_a_ij != +1) {
value = previous_prob + log_in_a_ij;
if (value > max_value) {
max_value = value;
/* Critical point for the propagate algorithm if we are at the
middle point of sequence X store this at the end point of
the first alignment */
if (u - middle_x < mo->s[i].offset_x && u - middle_x >= 0) {
cell * end_of_first = init_cell(u - (mo->s[i].offset_x - 1),
v - (mo->s[i].offset_y - 1),
i, mo->s[i].in_id[j],
previous_prob,
log_in_a_ij);
if (get_end_of_first(pv, u, v, 0, 0, i) != NULL) {
cell * old = get_end_of_first(pv, u, v, 0, 0, i);
m_free(old);
}
set_end_of_first(pv, u, v, i, end_of_first);
}
else {
/* at all other points simply propagate the values on */
set_end_of_first(pv, u, v, i, get_end_of_first(pv, u, v,
mo->s[i].offset_x,
mo->s[i].offset_y,
mo->s[i].in_id[j]));
}
}
}
else
{;} /* fprintf(stderr, " %d --> %d = %f, \n", i,i,v->log_in_a[i][i]); */
}
#ifdef DEBUG
int emission = ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u),
ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v),
mo->size_of_alphabet[mo->s[i].alphabet],
mo->s[i].offset_x, mo->s[i].offset_y);
if (emission > ghmm_dpmodel_emission_table_size(mo, i)){
printf("State %i\n", i);
ghmm_dpmodel_state_print(&(mo->s[i]));
printf("charX: %i charY: %i alphabet size: %i emission table: %i emission index: %i\n",
ghmm_dpseq_get_char(X, mo->s[i].alphabet, u),
ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v),
mo->size_of_alphabet[mo->s[i].alphabet],
ghmm_dpmodel_emission_table_size(mo, i), emission);
}
#endif
log_b_i = log_b_prop(pv, i, ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u),
ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v),
mo->size_of_alphabet[mo->s[i].alphabet],
mo->s[i].offset_x, mo->s[i].offset_y));
/* No maximum found (that is, state never reached)
or the output O[t] = 0.0: */
if (max_value == -DBL_MAX ||/* and then also: (v->psi[t][j] == -1) */
log_b_i == +1 ) {
set_phi_prop(pv, u, v, i, 1);
}
else
set_phi_prop(pv, u, v, i, max_value + log_b_i);
}
} /* complete time step for emitting states */
/* last_osc = osc; */ /* save last transition class */
/*if (mo->model_type & kSilentStates) {
p__viterbi_silent( mo, t, v );
}*/ /* complete time step for silent states */
/**************
for (j = 0; j < mo->N; j++)
{
printf("\npsi[%d],in:%d, phi=%f\n", t, v->psi[t][j], v->phi[j]);
}
for (i = 0; i < mo->N; i++){
printf("%d\t", former_matchcount[i]);
}
for (i = 0; i < mo->N; i++){
printf("%d\t", recent_matchcount[i]);
}
****************/
} /* End for v in Y */
/* Next character in X */
/* push back the old phi values */
push_back_phi_prop(pv, Y->length);
} /* End for u in X */
/* Termination */
max_value = -DBL_MAX;
/* for the last segment search for the maximum probability at the end of the
two sequences */
if (stop == NULL){
for (j = 0; j < mo->N; j++){
#ifdef DEBUG
/* printf("phi(len_x)(len_y)(%i)=%f\n", j, pv->phi[0][stop_y-1][j]);
ghmm_dpmodel_print_cell(pv->end_of_first[0][stop_y - 1][j]); */
#endif
if ( get_phi_prop(pv, stop_x - 1, stop_y - 1, 0, 0, j) != +1 &&
get_phi_prop(pv, stop_x - 1, stop_y - 1, 0, 0, j) > max_value) {
max_value = get_phi_prop(pv, stop_x - 1, stop_y - 1, 0, 0, j);
middle = get_end_of_first(pv, stop_x - 1, stop_y - 1, 0, 0, j);
}
}
}
/* traceback for the interior segments have to start with the previous state
of the middle beacuse the path has to be connected */
else {
if ( get_phi_prop(pv, stop_x - 1, stop_y - 1, 0, 0, stop->previous_state) != +1 ) {
max_value = get_phi_prop(pv, stop_x - 1, stop_y - 1, 0, 0, stop->previous_state);
middle = get_end_of_first(pv, stop_x - 1, stop_y - 1, 0, 0, stop->previous_state);
}
}
if (max_value == -DBL_MAX) {
/* Sequence can't be generated from the model! */
*log_p = +1;
}
else {
*log_p = max_value;
}
return middle;
#undef CUR_PROC
}
/*===========================================================================*/
int * ghmm_dpmodel_viterbi_propagate_segment (ghmm_dpmodel *mo, ghmm_dpseq * X, ghmm_dpseq * Y,
double *log_p, int *path_length,
double max_size, int start_x, int start_y,
int stop_x, int stop_y, int start_state,
int stop_state, double start_log_p,
double stop_log_p) {
#define CUR_PROC "ghmm_dpmodel_viterbi_propagate_segment"
int * path_seq = NULL;
cell * start, * stop;
plocal_propagate_store_t * pv = pviterbi_propagate_alloc(mo, Y->length);
/* Precomputing the log(a_ij) and log(bj(ot)) */
pviterbi_prop_precompute(mo, pv);
/* init start and stop */
start = init_cell (start_x, start_y, start_state, -1, start_log_p, 0);
stop = init_cell (stop_x, stop_y, stop_state, stop_state, stop_log_p, 0);
/* Launch the recursion */
path_seq = pviterbi_propagate_recursion (mo, X, Y, log_p, path_length,
start, stop, max_size, pv);
pviterbi_propagate_free (&pv, mo->N, mo->max_offset_x, mo->max_offset_y, Y->length);
return path_seq;
#undef CUR_PROC
}