Codebase list ghmm / HEAD ghmm / smap_classify.c
HEAD

Tree @HEAD (Download .tar.gz)

smap_classify.c @HEADraw · history · blame

/*******************************************************************************
*
*       This file is part of the General Hidden Markov Model Library,
*       GHMM version __VERSION__, see http://ghmm.org
*
*       Filename: ghmm/ghmm/smap_classify.c
*       Authors:  Bernd Wichern
*
*       Copyright (C) 1998-2004 Alexander Schliep
*       Copyright (C) 1998-2001 ZAIK/ZPR, Universitaet zu Koeln
*       Copyright (C) 2002-2004 Max-Planck-Institut fuer Molekulare Genetik,
*                               Berlin
*
*       Contact: schliep@ghmm.org
*
*       This library is free software; you can redistribute it and/or
*       modify it under the terms of the GNU Library General Public
*       License as published by the Free Software Foundation; either
*       version 2 of the License, or (at your option) any later version.
*
*       This library is distributed in the hope that it will be useful,
*       but WITHOUT ANY WARRANTY; without even the implied warranty of
*       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
*       Library General Public License for more details.
*
*       You should have received a copy of the GNU Library General Public
*       License along with this library; if not, write to the Free
*       Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*
*       This file is version $Revision: 1977 $
*                       from $Date: 2007-11-16 10:49:06 -0500 (Fri, 16 Nov 2007) $
*             last change by $Author: grunau $.
*
*******************************************************************************/

#ifdef HAVE_CONFIG_H
#  include "../config.h"
#endif

#include "ghmmconfig.h"

#ifdef GHMM_OBSOLETE

#include <math.h>
#include <float.h>
#include "ghmm.h"
#include "mes.h"
#include "sfoba.h"
#include "matrix.h"
#include "smap_classify.h"
#include "ghmm_internals.h"

typedef struct local_store_t {
  double ***alpha;
  double **scale;
  double *prob;
  double **p;
  double **sum_alpha;
  double *alpha_1;
  double *prior;
  int *error;
} local_store_t;

/*----------------------------------------------------------------------------*/
static local_store_t *smap_classify_alloc (int mo_number, int states, int T)
{
#define CUR_PROC "smap_classify_alloc"
  int i;
  local_store_t *map = NULL;
  ARRAY_CALLOC (map, 1);
  ARRAY_CALLOC (map->alpha, mo_number);
  for (i = 0; i < mo_number; i++) {
    map->alpha[i] = ighmm_cmatrix_alloc (T, states);
    if (!map->alpha[i]) {
      GHMM_LOG_QUEUED(LCONVERTED);
      goto STOP;
    }
  }
  map->scale = ighmm_cmatrix_alloc (mo_number, T);
  if (!map->scale) {
    GHMM_LOG_QUEUED(LCONVERTED);
    goto STOP;
  }
  map->p = ighmm_cmatrix_alloc (mo_number, T + 1);
  if (!map->p) {
    GHMM_LOG_QUEUED(LCONVERTED);
    goto STOP;
  }
  map->sum_alpha = ighmm_cmatrix_alloc (mo_number, T);
  if (!map->sum_alpha) {
    GHMM_LOG_QUEUED(LCONVERTED);
    goto STOP;
  }
  ARRAY_CALLOC (map->prob, mo_number);
  ARRAY_CALLOC (map->alpha_1, mo_number);
  ARRAY_CALLOC (map->error, mo_number);
  ARRAY_CALLOC (map->prior, mo_number);
  return map;
STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
  return NULL;
#undef CUR_PROC
}

/*----------------------------------------------------------------------------*/
static int smap_classify_free (local_store_t ** map, int mo_number, int T)
{
# define CUR_PROC "smap_classify_free"
  int i;
  mes_check_ptr (map, return (-1));
  if (!*map)
    return (0);
  m_free ((*map)->prob);
  m_free ((*map)->alpha_1);
  m_free ((*map)->error);
  m_free ((*map)->prior);
  ighmm_cmatrix_free (&((*map)->scale), mo_number);
  ighmm_cmatrix_free (&((*map)->p), mo_number);
  ighmm_cmatrix_free (&((*map)->sum_alpha), mo_number);
  for (i = 0; i < mo_number; i++)
    ighmm_cmatrix_free (&((*map)->alpha[i]), T);
  m_free (*map);

  return 0;
#undef CUR_PROC
}

/*============================================================================*/

/* Maximum A Posteriori Classification Algorithm (MAPCA): 
   given a field of models and one sequence and suppose the sequence
   has been produced by one of these models. This algorithm calculates
   for each model the probability, that the seq. comes from the model.
   This bayesian approach uses a prior for the models. If none is specified
   equal prob. is assumed.
   The maps are copied into "result", which has to be of dimension "smo_number"
   Ref.: A. Kehagias: Bayesian Classification of HMM, Math. Comp. Modelling
   (1995)
*/

int ghmm_smap_classify (ghmm_cmodel ** smo, double *result, int smo_number,
                   double *O, int T)
{
#define CUR_PROC "ghmm_smap_classify"
  int t, n, m, max_model = -1, build = 0;       /* time, states, models */
  double log_p, denom_26, sum = 0.0, max_result = 0;
  local_store_t *map = NULL;


  if (smo == NULL || smo_number <= 0 || O == NULL || T < 0) {
    GHMM_LOG_QUEUED(LCONVERTED);
    goto STOP;
  }

  map = smap_classify_alloc (smo_number, smo[0]->N, T);

  /* no prior defined --> same prior for all models */
  for (m = 0; m < smo_number; m++)
    if (smo[m]->prior == -1)
      map->prior[m] = 1 / (double) smo_number;
    else
      map->prior[m] = smo[m]->prior;

  for (m = 0; m < smo_number; m++)
    sum += map->prior[m];
  if (fabs (1 - sum) > 0.0001) {
    GHMM_LOG(LCONVERTED,
      "Sum of model priors != 1 or mixing of priors and non-priors \n");
    goto STOP;
  }


  /* for all models */
  for (m = 0; m < smo_number; m++) {
    map->p[m][0] = map->prior[m];       /* (22) */
    map->alpha_1[m] = 1 / (double) smo_number;  /* (23) */
    /* forward alg.                                     (24) */
    if (ghmm_cmodel_forward (smo[m], O, T, NULL, map->alpha[m], map->scale[m],
                       &log_p) == -1) {
      /* The sequence can't be generated from this model. */
      map->error[m] = 1;
      continue;
    }
    build = 1;
    for (t = 0; t < T; t++) {
      for (n = 0; n < smo[0]->N; n++)
        map->sum_alpha[m][t] += map->alpha[m][t][n];    /* Sums (25) */
      if (map->sum_alpha[m][t] == 0) {
        GHMM_LOG(LCONVERTED, "sum_alpha = 0\n");
        goto STOP;
      }                         /* Another solution ??? */
    }
  }
  if (!build) {
    GHMM_LOG(LCONVERTED, "Prob. = 0 for all models\n");
    goto STOP;
  }

  for (t = 0; t < T; t++) {
    denom_26 = 0.0;
    for (m = 0; m < smo_number; m++) {
      if (!map->error[m]) {
        if (t != 0)
          map->prob[m] = map->sum_alpha[m][t] * map->scale[m][t] /      /* (25) */
            map->sum_alpha[m][t - 1];
        else
          map->prob[m] = map->sum_alpha[m][t] * map->scale[m][t];       /* (25) */
        denom_26 += map->prob[m] * map->p[m][t];
      }
      else
        map->prob[m] = 0.0;     /* Seq. cannot be generated by this model */
    }
    for (m = 0; m < smo_number; m++) {
      if (denom_26 != 0)
        map->p[m][t + 1] = map->prob[m] * map->p[m][t] / denom_26;      /* (26) */
      else {
        GHMM_LOG(LCONVERTED, "denom_26 == 0!\n");
        goto STOP;
      }
    }
  }
  /* copy into result and find best model */
  max_result = 0.0;
  for (m = 0; m < smo_number; m++) {
    result[m] = map->p[m][T];
    if (result[m] > max_result) {
      max_result = result[m];
      max_model = m;
    }
  }

STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
  smap_classify_free (&map, smo_number, T);
  return max_model;

#undef CUR_PROC
}

/* Alternative to MAPCA (smap_classify); calculate p[m] directly using 
   Bayes' theorem, instead of recursive over t.
   p(m | O) = p(O | m) * p(m) / (sum_i p(O | i) * p(i))
   Same result?
*/

int ghmm_smap_bayes (ghmm_cmodel ** smo, double *result, int smo_number, double *O,
                int T)
{
#define CUR_PROC "ghmm_smap_bayes"
  double *prior, *log_p;
  double sum = 0.0, p_von_O = 0.0, max_result = 0.0;
  int *error=NULL;
  int found_model = 0, err = 0;
  int m, max_model = -1;

  /* nothing to do here */
  if (smo_number == 1) {
    result[0] = 1.0;
    return 0;
  }

  for (m = 0; m < smo_number; m++)
    result[m] = 0;

  ARRAY_CALLOC (prior, smo_number);
  ARRAY_CALLOC (log_p, smo_number);
  ARRAY_CALLOC (error, smo_number);

  if (smo == NULL || smo_number <= 0 || O == NULL || T < 0) {
    GHMM_LOG_QUEUED(LCONVERTED);
    goto STOP;
  }

  /* no prior defined --> same prior for all models */
  for (m = 0; m < smo_number; m++)
    if (smo[m]->prior == -1)
      prior[m] = 1 / (double) smo_number;
    else
      prior[m] = smo[m]->prior;

  for (m = 0; m < smo_number; m++)
    sum += prior[m];
  if (fabs (1 - sum) > 0.0001) {
    GHMM_LOG(LCONVERTED,
      "Sum of model priors != 1 or mixing of priors and non-priors \n");
    for (m = 0; m < smo_number; m++)
      printf ("%.6f  ", prior[m]);
    printf ("\n");
    goto STOP;
  }

  /* Calculate log_p for every model; combined with prior gives 
     the likelihood for O, given all models */
  for (m = 0; m < smo_number; m++)
    if (ghmm_cmodel_logp (smo[m], O, T, &log_p[m]) == -1) {
      error[m] = 1;
    }
    else {
      error[m] = 0;
      p_von_O += exp (log_p[m]) * prior[m];
      found_model = 1;
    }

  if (p_von_O == 0) {
    GHMM_LOG(LCONVERTED, "P(O) = 0!\n");
    err = 1;
  }
  if (!found_model) {
    GHMM_LOG(LCONVERTED, "-1 from ghmm_cmodel_logp for all models\n");
    err = 1;
  }
  if (err)
    goto STOP;

  /* Likelihood for the models, given O */
  for (m = 0; m < smo_number; m++) {
    if (!error[m]) {
      result[m] = exp (log_p[m]) * prior[m] / p_von_O;
      if (result[m] > max_result) {
        max_model = m;
        max_result = result[m];
      }
    }
  }
  /* TEST */
  if (max_model == -1) {
    printf ("smap_bayes: max_model == -1\n");
    for (m = 0; m < smo_number; m++)
      if (!error[m])
        printf ("%f %.4f %.4f;  ", log_p[m], prior[m], p_von_O);
    printf ("\n");
  }
STOP:     /* Label STOP from ARRAY_[CM]ALLOC */

  m_free (prior);
  m_free (log_p);
  m_free (error);
  return max_model;
#undef CUR_PROC
}

#endif /* GHMM_OBSOLETE */