Codebase list ghmm / HEAD ghmm / sfoba.c
HEAD

Tree @HEAD (Download .tar.gz)

sfoba.c @HEADraw · history · blame

/*******************************************************************************
*
*       This file is part of the General Hidden Markov Model Library,
*       GHMM version __VERSION__, see http://ghmm.org
*
*       Filename: ghmm/ghmm/sfoba.c
*       Authors:  Bernhard Knab, Benjamin Georgi
*
*       Copyright (C) 1998-2004 Alexander Schliep
*       Copyright (C) 1998-2001 ZAIK/ZPR, Universitaet zu Koeln
*       Copyright (C) 2002-2004 Max-Planck-Institut fuer Molekulare Genetik,
*                               Berlin
*
*       Contact: schliep@ghmm.org
*
*       This library is free software; you can redistribute it and/or
*       modify it under the terms of the GNU Library General Public
*       License as published by the Free Software Foundation; either
*       version 2 of the License, or (at your option) any later version.
*
*       This library is distributed in the hope that it will be useful,
*       but WITHOUT ANY WARRANTY; without even the implied warranty of
*       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
*       Library General Public License for more details.
*
*       You should have received a copy of the GNU Library General Public
*       License along with this library; if not, write to the Free
*       Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*
*       This file is version $Revision: 2243 $
*                       from $Date: 2008-11-05 08:05:05 -0500 (Wed, 05 Nov 2008) $
*             last change by $Author: christoph_mpg $.
*
*******************************************************************************/

#ifdef HAVE_CONFIG_H
#  include "../config.h"
#endif

#include <math.h>
#include <float.h>
#include "ghmm.h"
#include "mprintf.h"
#include "mes.h"
#include "sfoba.h"
#include "matrix.h"
#include "randvar.h"
#include "ghmm_internals.h"



/*----------------------------------------------------------------------------*/
static int sfoba_initforward (ghmm_cmodel * smo, double *alpha_1, double *omega,
                              double *scale, double **b)
{
# define CUR_PROC "sfoba_initforward"
  int i;
  double c_0;
  scale[0] = 0.0;
  if (b == NULL){
    for (i = 0; i < smo->N; i++) {
      alpha_1[i] = smo->s[i].pi * ghmm_cmodel_calc_b(smo->s+i, omega);
      scale[0] += alpha_1[i];
    }
  }
  else {
    for (i = 0; i < smo->N; i++) {
      alpha_1[i] = smo->s[i].pi * b[i][smo->M];
      scale[0] += alpha_1[i];
    }
  }
  if (scale[0] > DBL_MIN) {     /* >= EPS_PREC */
    c_0 = 1 / scale[0];
    for (i = 0; i < smo->N; i++)
      alpha_1[i] *= c_0;
  }
  return (0);
# undef CUR_PROC
}                               /* sfoba_initforward */

/*----------------------------------------------------------------------------*/
static double sfoba_stepforward (ghmm_cstate * s, double *alpha_t, int osc,
                                 double b_omega)
{
  int i, id;
  double value = 0.0;
  for (i = 0; i < s->in_states; i++) {
    id = s->in_id[i];
    value += s->in_a[osc][i] * alpha_t[id];
  }
  value *= b_omega;             /* b_omega outside the sum */
  return (value);
}                               /* sfoba_stepforward */


/*============================================================================*/
int ghmm_cmodel_forward (ghmm_cmodel * smo, double *O, int T, double ***b,
                   double **alpha, double *scale, double *log_p)
{
# define CUR_PROC "ghmm_cmodel_forward"
  int res = -1;
  int i, t = 0, osc = 0;
  double c_t;
  int pos;

  /* T is length of sequence; divide by dimension to represent the number of time points */
  T /= smo->dim;
  /* calculate alpha and scale for t = 0 */
  if (b == NULL)
    sfoba_initforward(smo, alpha[0], O, scale, NULL);
  else
    sfoba_initforward(smo, alpha[0], O, scale, b[0]);
  if (scale[0] <= DBL_MIN) {
    /* means f(O[0], mue, u) << 0, first symbol very unlikely */
    /* GHMM_LOG(LCONVERTED, "scale[0] == 0.0!\n"); */
    goto STOP;
  }
  else {
    *log_p = -log (1 / scale[0]);

    if (smo->cos == 1) {
      osc = 0;
    }
    else {
      if (!smo->class_change->get_class) {
        printf ("ERROR: get_class not initialized\n");
        return (-1);
      }
      /* printf("1: cos = %d, k = %d, t = %d\n",smo->cos,smo->class_change->k,t); */
      osc = smo->class_change->get_class (smo, O, smo->class_change->k, t);
      if (osc >= smo->cos){
        printf("ERROR: get_class returned index %d but model has only %d classes !\n",osc,smo->cos);
        goto STOP;
      }

    }


    for (t = 1; t < T; t++) {
      scale[t] = 0.0;
      pos = t * smo->dim;
      /* b not calculated yet */
      if (b == NULL) {
        for (i = 0; i < smo->N; i++) {
          alpha[t][i] = sfoba_stepforward(smo->s+i, alpha[t-1], osc,
                                          ghmm_cmodel_calc_b(smo->s+i, O+pos));
          scale[t] += alpha[t][i];
        }
      }
      /* b precalculated */
      else {
        for (i = 0; i < smo->N; i++) {
          alpha[t][i] = sfoba_stepforward (smo->s+i, alpha[t - 1], osc,
                                           b[t][i][smo->M]);
          scale[t] += alpha[t][i];
        }
      }
      if (scale[t] <= DBL_MIN) {        /* seq. can't be build */
        goto STOP;
        break;
      }
      c_t = 1 / scale[t];
      /* scale alpha */
      for (i = 0; i < smo->N; i++)
        alpha[t][i] *= c_t;
      /* summation of log(c[t]) for calculation of log( P(O|lambda) ) */
      *log_p -= log (c_t);

      if (smo->cos == 1) {
        osc = 0;
      }
      else {
        if (!smo->class_change->get_class) {
          printf ("ERROR: get_class not initialized\n");
          return (-1);
        }
        /* printf("1: cos = %d, k = %d, t = %d\n",smo->cos,smo->class_change->k,t); */
        osc = smo->class_change->get_class (smo, O, smo->class_change->k, t);
        if (osc >= smo->cos){
          printf("ERROR: get_class returned index %d but model has only %d classes !\n",osc,smo->cos);
          goto STOP;
        }		
      }



    }
  }
  /* log_p should not be smaller than value used for seqs. that 
     can't be build ???
     if (*log_p < (double)PENALTY_LOGP)
     *log_p = (double)PENALTY_LOGP;
   */
  return 0;
STOP:
  *log_p = (double) -DBL_MAX;
  return (res);
# undef CUR_PROC
}                               /* ghmm_cmodel_forward */

#define LOWER_SCALE_BOUND 3.4811068399043105e-57 /* exp(-130) */

/*============================================================================*/
int ghmm_cmodel_backward (ghmm_cmodel * smo, double *O, int T, double ***b,
                    double **beta, const double *scale)
{
# define CUR_PROC "ghmm_cmodel_backward"
  double *beta_tmp, sum, c_t;
  int i, j, j_id, t, osc;
  int res = -1;
  int pos;

  /* T is length of sequence; divide by dimension to represent the number of time points */
  T /= smo->dim;
  
  ARRAY_CALLOC (beta_tmp, smo->N);

  for (t = 0; t < T; t++) {
    /* try differenent bounds here in case of problems 
       like beta[t] = NaN */
    if (scale[t] < LOWER_SCALE_BOUND) {
      /* printf("backward scale(%d) = %e\n", t , scale[t]); */
      goto STOP;
    }
  }
  /* initialize */
  c_t = 1 / scale[T - 1];
  for (i = 0; i < smo->N; i++) {
    beta[T - 1][i] = 1;
    beta_tmp[i] = c_t;
  }
  /* Backward Step for t = T-2, ..., 0 */
  /* beta_tmp: Vector for storage of scaled beta in one time step */

  if (smo->cos == 1) {
    osc = 0;
  }
  else {
    if (!smo->class_change->get_class) {
      printf ("ERROR: get_class not initialized\n");
      goto STOP;
    }
     osc = smo->class_change->get_class (smo, O, smo->class_change->k, T - 2);
     /* printf("osc(%d) = %d\n",T-2,osc);  */
    if (osc >= smo->cos){
      printf("ERROR: get_class returned index %d but model has only %d classes !\n",osc,smo->cos);
      goto STOP;
    }     
  }


  for (t = T - 2; t >= 0; t--) {
    pos = t * smo->dim;
    if (b == NULL)
      for (i = 0; i < smo->N; i++) {
        sum = 0.0;
        for (j = 0; j < smo->s[i].out_states; j++) {
          j_id = smo->s[i].out_id[j];
          sum += smo->s[i].out_a[osc][j]
              * ghmm_cmodel_calc_b(smo->s+j_id, O+pos+smo->dim)
              * beta_tmp[j_id];
        }
        beta[t][i] = sum;
      }
    else
      for (i = 0; i < smo->N; i++) {
        sum = 0.0;
        for (j = 0; j < smo->s[i].out_states; j++) {
          j_id = smo->s[i].out_id[j];
          sum +=
            smo->s[i].out_a[osc][j] * b[t + 1][j_id][smo->M] * beta_tmp[j_id];
          
            /*printf("  smo->s[%d].out_a[%d][%d] * b[%d] * beta_tmp[%d] = %f * %f *
            %f\n",i,osc,j,t+1,j_id,smo->s[i].out_a[osc][j], b[t + 1][j_id][smo->M], beta_tmp[j_id]); */
          
        }
        beta[t][i] = sum;
        /* printf(" ->   beta[%d][%d] = %f\n",t,i,beta[t][i]); */
      }
    c_t = 1 / scale[t];
    for (i = 0; i < smo->N; i++)
      beta_tmp[i] = beta[t][i] * c_t;

    if (smo->cos == 1) {
      osc = 0;
    }
    else {
      if (!smo->class_change->get_class) {
        printf ("ERROR: get_class not initialized\n");
        goto STOP;
      }
      /* if t=1 the next iteration will be the last */        
      if (t >= 1){
        osc = smo->class_change->get_class (smo, O, smo->class_change->k, t-1);
        /* printf("osc(%d) = %d\n",t-1,osc);  */
        if (osc >= smo->cos){
          printf("ERROR: get_class returned index %d but model has only %d classes !\n",osc,smo->cos);
          goto STOP;
        }	
      }
    }
  }
  res = 0;
STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
  m_free (beta_tmp);
  return (res);
# undef CUR_PROC
}                               /* ghmm_cmodel_backward */

/*============================================================================*/
int ghmm_cmodel_logp (ghmm_cmodel * smo, double *O, int T, double *log_p)
{
# define CUR_PROC "ghmm_cmodel_logp"
  int res = -1;
  double **alpha, *scale = NULL;

  alpha = ighmm_cmatrix_stat_alloc (T, smo->N);
  if (!alpha) {
    GHMM_LOG_QUEUED(LCONVERTED);
    goto STOP;
  }
  ARRAY_CALLOC (scale, T);
  /* run forward alg. */
  if (ghmm_cmodel_forward (smo, O, T, NULL, alpha, scale, log_p) == -1) {
    /* GHMM_LOG_QUEUED(LCONVERTED); */
    goto STOP;
  }
  res = 0;

STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
  ighmm_cmatrix_stat_free (&alpha);
  m_free (scale);
  return (res);
# undef CUR_PROC
}                               /* ghmm_cmodel_logp */

/*============================================================================*/
int ghmm_cmodel_logp_joint(ghmm_cmodel *mo, const double *O, int len,
                            const int *S, int slen, double *log_p)
{
# define CUR_PROC "ghmm_cmodel_logp_joint"
    int prevstate, state, state_pos=0, pos=0, j, osc=0;
    int dim = mo->dim;

    prevstate = state = S[0];
    *log_p = log(mo->s[state].pi);
    if (!(mo->model_type & GHMM_kSilentStates) || 1 /* XXX !mo->silent[state] */ )
    {
        *log_p += log(ghmm_cmodel_calc_b(mo->s+state, O+pos));
        pos+=dim;
    }
        
    for (state_pos=1; state_pos < slen || pos+dim <= len; state_pos++) {
        state = S[state_pos];
        for (j=0; j < mo->s[state].in_states; ++j) {
            if (prevstate == mo->s[state].in_id[j])
                break;
        }

        if (mo->cos > 1) {
            if (!mo->class_change->get_class) {
                GHMM_LOG(LERROR, "get_class not initialized");
                goto STOP;
            }
            osc = mo->class_change->get_class(mo, O, mo->class_change->k, pos);
            if (osc >= mo->cos) {
                GHMM_LOG_PRINTF(LERROR, LOC, "get_class returned index %d "
                                "but model has only %d classes!", osc, mo->cos);
                goto STOP;
            }
        }

        if (j == mo->s[state].in_states ||
            fabs(mo->s[state].in_a[osc][j]) < GHMM_EPS_PREC) {
            GHMM_LOG_PRINTF(LERROR, LOC, "Sequence can't be built. There is no "
                            "transition from state %d to %d.", prevstate, state);
            goto STOP;
        }

        *log_p += log(mo->s[state].in_a[osc][j]);

        if (!(mo->model_type & GHMM_kSilentStates) || 1 /* XXX !mo->silent[state] */) {
            *log_p += log(ghmm_cmodel_calc_b(mo->s+state, O+pos));
            pos+=dim;
        }
        
        prevstate = state;
    }

    if (pos < len)
        GHMM_LOG_PRINTF(LINFO, LOC, "state sequence too short! processed only %d symbols", pos/dim);
    if (state_pos < slen)
        GHMM_LOG_PRINTF(LINFO, LOC, "sequence too short! visited only %d states", state_pos);

    return 0;
  STOP:
    return -1;
# undef CUR_PROC
}