/*******************************************************************************
*
* This file is part of the General Hidden Markov Model Library,
* GHMM version __VERSION__, see http://ghmm.org
*
* Filename: ghmm/ghmm/discrime.c
* Authors: Janne Grunau
*
* Copyright (C) 1998-2004 Alexander Schliep
* Copyright (C) 1998-2001 ZAIK/ZPR, Universitaet zu Koeln
* Copyright (C) 2002-2004 Max-Planck-Institut fuer Molekulare Genetik,
* Berlin
*
* Contact: schliep@ghmm.org
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the Free
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*
* This file is version $Revision: 1754 $
* from $Date: 2006-11-03 11:29:44 -0500 (Fri, 03 Nov 2006) $
* last change by $Author: grunau $.
*
*******************************************************************************/
#ifdef HAVE_CONFIG_H
# include "../config.h"
#endif
#include <float.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "ghmm.h"
#include "mes.h"
#include "matrix.h"
#include "model.h"
#include "foba.h"
#include "gradescent.h"
#include "discrime.h"
#include "ghmm_internals.h"
#define LAMBDA 0.14
#define TRIM(o, n) ((1-LAMBDA)*(o) + LAMBDA*(n))
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
#else
#define logl(A) log(A)
#define expl(A) exp(A)
#endif
/* forward declaration */
static int discrime_galloc (ghmm_dmodel ** mo, ghmm_dseq ** sqs, int noC,
double ******matrix_b, double *****matrix_a,
double *****matrix_pi, long double ***omega,
long double ****omegati, double ****log_p);
static void discrime_gfree (ghmm_dmodel ** mo, ghmm_dseq ** sqs, int noC,
double *****matrix_b, double ****matrix_a,
double ****matrix_pi, long double **omega,
long double ***omegati, double ***log_p);
static void discrime_trim_gradient (double *new, int length);
static double discrime_lambda = 0.0;
static double discrime_alpha = 1.0;
/*----------------------------------------------------------------------------*/
/** allocates memory for m and n matrices: */
static int discrime_galloc (ghmm_dmodel ** mo, ghmm_dseq ** sqs, int noC,
double ******matrix_b, double *****matrix_a,
double *****matrix_pi, long double ***omega,
long double ****omegati, double ****log_p)
{
#define CUR_PROC "discrime_galloc"
int i, k, l, m;
/* first allocate memory for matrix_b: */
ARRAY_CALLOC (*matrix_b, noC);
for (k = 0; k < noC; k++) {
ARRAY_CALLOC ((*matrix_b)[k], sqs[k]->seq_number);
for (l = 0; l < sqs[k]->seq_number; l++) {
ARRAY_CALLOC ((*matrix_b)[k][l], noC);
for (m = 0; m < noC; m++) {
ARRAY_CALLOC ((*matrix_b)[k][l][m], mo[m]->N);
for (i = 0; i < mo[m]->N; i++)
ARRAY_CALLOC ((*matrix_b)[k][l][m][i], ghmm_ipow (mo[m], mo[m]->M, mo[m]->order[i] + 1));
}
}
}
/* matrix_a(k,l,i,j) = matrix_a[k][l][i*mo->N + j] */
ARRAY_CALLOC (*matrix_a, noC);
for (k = 0; k < noC; k++) {
ARRAY_CALLOC ((*matrix_a)[k], sqs[k]->seq_number);
for (l = 0; l < sqs[k]->seq_number; l++) {
ARRAY_CALLOC ((*matrix_a)[k][l], noC);
for (m = 0; m < noC; m++)
ARRAY_CALLOC ((*matrix_a)[k][l][m], mo[m]->N * mo[m]->N);
}
}
/* allocate memory for matrix_pi */
ARRAY_CALLOC (*matrix_pi, noC);
for (k = 0; k < noC; k++) {
ARRAY_CALLOC ((*matrix_pi)[k], sqs[k]->seq_number);
for (l = 0; l < sqs[k]->seq_number; l++) {
ARRAY_CALLOC ((*matrix_pi)[k][l], noC);
for (m = 0; m < noC; m++)
ARRAY_CALLOC ((*matrix_pi)[k][l][m], mo[m]->N);
}
}
/* allocate memory for matrices of likelihoods
log_p[k][l][m] =
log_prob of l-th sequence of k-th class under the m-th ghmm_dmodel */
ARRAY_CALLOC (*log_p, noC);
for (k = 0; k < noC; k++) {
ARRAY_CALLOC ((*log_p)[k], sqs[k]->seq_number);
for (l = 0; l < sqs[k]->seq_number; l++)
ARRAY_CALLOC ((*log_p)[k][l], noC);
}
/* allocate memory for outer derivatives */
ARRAY_CALLOC (*omega, noC);
for (k = 0; k < noC; k++)
ARRAY_CALLOC ((*omega)[k], sqs[k]->seq_number);
/* allocate memory for omega tilde. NB: size(omega)*noC == size(omegati) */
ARRAY_CALLOC (*omegati, noC);
for (k = 0; k < noC; k++) {
ARRAY_CALLOC ((*omegati)[k], sqs[k]->seq_number);
for (l = 0; l < sqs[k]->seq_number; l++)
ARRAY_CALLOC ((*omegati)[k][l], noC);
}
return 0;
STOP: /* Label STOP from ARRAY_[CM]ALLOC */
discrime_gfree (mo, sqs, noC, *matrix_b, *matrix_a, *matrix_pi,
*omega, *omegati, *log_p);
return -1;
#undef CUR_PROC
}
/*----------------------------------------------------------------------------*/
/** frees memory for expectation matrices for all classes & training sequences*/
static void discrime_gfree (ghmm_dmodel ** mo, ghmm_dseq ** sqs, int noC,
double *****matrix_b, double ****matrix_a,
double ****matrix_pi, long double **omega,
long double ***omegati, double ***log_p)
{
#define CUR_PROC "discrime_gfree"
int i, k, l, m;
for (k = 0; k < noC; k++) {
for (l = 0; l < sqs[k]->seq_number; l++) {
for (m = 0; m < noC; m++) {
for (i = 0; i < mo[m]->N; i++)
m_free (matrix_b[k][l][m][i]);
m_free (matrix_b[k][l][m]);
}
m_free (matrix_b[k][l]);
}
m_free (matrix_b[k]);
}
m_free (matrix_b);
for (k = 0; k < noC; k++) {
for (l = 0; l < sqs[k]->seq_number; l++) {
for (m = 0; m < noC; m++)
m_free (matrix_a[k][l][m]);
m_free (matrix_a[k][l]);
}
m_free (matrix_a[k]);
}
m_free (matrix_a);
for (k = 0; k < noC; k++) {
for (l = 0; l < sqs[k]->seq_number; l++) {
for (m = 0; m < noC; m++)
m_free (matrix_pi[k][l][m]);
m_free (matrix_pi[k][l]);
}
m_free (matrix_pi[k]);
}
m_free (matrix_pi);
for (k = 0; k < noC; k++) {
for (l = 0; l < sqs[k]->seq_number; l++)
m_free (log_p[k][l]);
m_free (log_p[k]);
}
m_free (log_p);
for (k = 0; k < noC; k++)
m_free (omega[k]);
m_free (omega);
for (k = 0; k < noC; k++) {
for (l = 0; l < sqs[k]->seq_number; l++)
m_free (omegati[k][l]);
m_free (omegati[k]);
}
m_free (omegati);
return;
#undef CUR_PROC
}
/*----------------------------------------------------------------------------*/
static int discrime_calculate_omega (ghmm_dmodel ** mo, ghmm_dseq ** sqs, int noC,
long double **omega, long double ***omegati,
double ***log_p)
{
#define CUR_PROC "discrime_calculate_omega"
int k, l, m;
int seq_no, argmax = 0;
double sum, max;
double exponent;
long double sigmoid;
/* compute outer derivatives */
/* iterate over all classes */
for (k = 0; k < noC; k++) {
seq_no = sqs[k]->seq_number;
/*iterate over all training sequences */
for (l = 0; l < seq_no; l++) {
max = 1.0;
/* calculate log(sum[prob]) for logarithmic probabilities
first determine the maximum */
for (m = 0; m < noC; m++) {
if (m != k
&& (1.0 == max || max < (log_p[k][l][m] + log (mo[m]->prior)))) {
max = log_p[k][l][m] + log (mo[m]->prior);
argmax = m;
}
}
/* sum */
sum = 1.0;
for (m = 0; m < noC; m++) {
if (m != k && m != argmax)
sum += exp (log_p[k][l][m] + log (mo[m]->prior) - max);
}
sum = log (sum) + max;
exponent = log_p[k][l][k] + log (mo[k]->prior) - sum;
if (exponent < logl (LDBL_MIN)) {
printf ("exponent was too large (%g) cut it down!\n", exponent);
exponent = (double) logl (LDBL_MIN);
}
sigmoid = 1.0 / (1.0 + expl ((-discrime_alpha) * exponent));
omega[k][l] = discrime_alpha * sigmoid * (1.0 - sigmoid);
/* printf("omega[%d][%d] = %Lg\n",k ,l, omega[k][l]); */
/* omegati is not useful for m==k equation (2.9) */
for (m = 0; m < noC; m++) {
if (m == k)
omegati[k][l][m] = 0;
else
omegati[k][l][m] =
omega[k][l] * expl (log_p[k][l][m] -
sum) * (long double) mo[m]->prior;
/* printf("%Lg, ", expl(log_p[k][l][m] - sum) * (long double)mo[m]->prior); */
/* printf("omegati[%d][%d][%d] = %Lg\n",k ,l, m, omegati[k][l][m]); */
}
/* printf("\n"); */
}
}
return 0;
#undef CUR_PROC
}
/*----------------------------------------------------------------------------*/
static int discrime_precompute (ghmm_dmodel ** mo, ghmm_dseq ** sqs, int noC,
double *****expect_b, double ****expect_a,
double ****expect_pi, double ***log_p)
{
#define CUR_PROC "discrime_precompute"
int k, l, m;
int seq_len, success = 1;
/* forward, backward variables and scaler */
double **alpha, **beta, *scale;
ghmm_dseq *sq;
/* iterate over all classes */
for (k = 0; k < noC; k++) {
sq = sqs[k];
/*iterate over all training sequences */
for (l = 0; l < sq->seq_number; l++) {
seq_len = sq->seq_len[l];
/* iterate over all classes */
for (m = 0; m < noC; m++) {
if (-1 ==
ighmm_reestimate_alloc_matvek (&alpha, &beta, &scale, seq_len,
mo[m]->N))
return -1;
/* calculate forward and backward variables without labels: */
if (-1 == ghmm_dmodel_forward (mo[m], sq->seq[l], seq_len, alpha, scale,
&(log_p[k][l][m]))) {
success = 0;
printf ("forward\n");
goto FREE;
}
if (-1 == ghmm_dmodel_backward (mo[m], sq->seq[l], seq_len, beta, scale)) {
success = 0;
printf ("backward\n");
goto FREE;
}
/* compute expectation matrices
expect_*[k][l][m] holds the expected number how ofter a particular
parameter of model m is used by l-th training sequence of class k */
ghmm_dmodel_label_gradient_expectations (mo[m], alpha, beta, scale,
sq->seq[l], seq_len,
expect_b[k][l][m], expect_a[k][l][m],
expect_pi[k][l][m]);
FREE:
ighmm_reestimate_free_matvek (alpha, beta, scale, seq_len);
if (!success)
return -1;
}
}
}
return 0;
#undef CUR_PROC
}
/*----------------------------------------------------------------------------*/
double ghmm_dmodel_label_discrim_perf(ghmm_dmodel** mo, ghmm_dseq** sqs, int noC)
{
#define CUR_PROC "ghmm_dmodel_label_discrim_perf"
int k, l, m, temp;
int argmax = 0;
double sum, max;
double *logp;
double exponent;
long double sigmoid;
double performance = 0.0;
ghmm_dseq *sq;
ARRAY_CALLOC (logp, noC);
/* iterate over all classes */
for (k = 0; k < noC; k++) {
sq = sqs[k];
/*iterate over all training sequences */
for (l = 0; l < sq->seq_number; l++) {
/* iterate over all classes */
for (m = 0; m < noC; m++) {
temp = ghmm_dmodel_logp (mo[m], sq->seq[l], sq->seq_len[l], &(logp[m]));
if (0 != temp)
printf ("ghmm_dmodel_logp error in sequence[%d][%d] under model %d (%g)\n",
k, l, m, logp[m]);
/*printf("ghmm_dmodel_logp sequence[%d][%d] under model %d (%g)\n", k, l, m, logp[m]);*/
}
max = 1.0;
for (m = 0; m < noC; m++) {
if (m != k && (1.0 == max || max < (logp[m] + log (mo[m]->prior)))) {
max = logp[m] + log (mo[m]->prior);
argmax = m;
}
}
/* sum */
sum = 1.0;
for (m = 0; m < noC; m++) {
if (m != k && m != argmax)
sum += exp (logp[m] + log (mo[m]->prior) - max);
}
sum = log (sum) + max;
exponent = logp[k] + log (mo[k]->prior) - sum;
if (exponent < logl (LDBL_MIN)) {
printf ("exponent was too large (%g) cut it down!\n", exponent);
exponent = (double) logl (LDBL_MIN);
}
sigmoid = 1.0 / (1.0 + expl ((-discrime_alpha) * exponent));
performance += (double) sigmoid;
}
}
m_free (logp);
STOP: /* Label STOP from ARRAY_[CM]ALLOC */
return performance;
#undef CUR_PROC
}
/*----------------------------------------------------------------------------*/
static void discrime_print_statistics(ghmm_dmodel** mo, ghmm_dseq** sqs, int noC,
int* falseP, int* falseN)
{
#define CUR_PROC "discrime_print_statistics"
int k, l, m;
int argmax;
double *logp, max;
ghmm_dseq *sq;
ARRAY_CALLOC (logp, noC);
for (k = 0; k < noC; k++) {
falseP[k] = 0;
falseN[k] = 0;
}
for (k = 0; k < noC; k++) {
sq = sqs[k];
printf ("Looking at training tokens of Class %d\n", k);
for (l = 0; l < sq->seq_number; l++) {
argmax = 0, max = -DBL_MAX;
for (m = 0; m < noC; m++) {
ghmm_dmodel_logp (mo[m], sq->seq[l], sq->seq_len[l], &(logp[m]));
if (m == 0 || max < logp[m]) {
max = logp[m];
argmax = m;
}
}
if (sq->seq_number < 11 && noC < 8) {
/* printing fancy statistics */
printf ("%2d: %8.4g", l, logp[0] - logp[argmax]);
for (m = 1; m < noC; m++)
printf (", %8.4g", logp[m] - logp[argmax]);
printf (" \t+(%g)\n", logp[argmax]);
}
/* counting false positives and false negatives */
if (argmax != k) {
falseP[argmax]++;
falseN[k]++;
}
}
printf ("%d false negatives in class %d.\n", falseN[k], k);
}
STOP: /* Label STOP from ARRAY_[CM]ALLOC */
m_free (logp);
return;
#undef CUR_PROC
}
/*----------------------------------------------------------------------------*/
static void discrime_trim_gradient (double *new, int length)
{
#define CUR_PROC "discrime_trim_gradient"
int i, argmin = 0;
double min, sum = 0;
/* printf("unnormalised: %g", new[0]); */
/* for (i=1; i<length; i++) */
/* printf(", %g", new[i]); */
/* printf("\n"); */
for (i = 1; i < length; i++)
if (new[i] < new[argmin])
argmin = i;
min = new[argmin];
if (new[argmin] < 0.0)
for (i = 0; i < length; i++)
new[i] -= (1.1 * min);
for (i = 0; i < length; i++)
sum += new[i];
for (i = 0; i < length; i++)
new[i] /= sum;
/* printf(" normalised: %g", new[0]); */
/* for (i=1; i<length; i++) */
/* printf(", %g", new[i]); */
/* printf("\n"); */
return;
#undef CUR_PROC
}
/*----------------------------------------------------------------------------*/
static void discrime_update_pi_gradient (ghmm_dmodel ** mo, ghmm_dseq ** sqs, int noC,
int class, double ****expect_pi,
long double **omega, long double ***omegati)
{
#define CUR_PROC "discrime_update_pi_gradient"
int i, l, m;
double *pi_old = NULL;
double *pi_new = NULL;
double sum;
ghmm_dseq *sq = NULL;
ARRAY_CALLOC (pi_old, mo[class]->N);
ARRAY_CALLOC (pi_new, mo[class]->N);
/* itarate over all states of the current model */
for (i = 0; i < mo[class]->N; i++) {
/* iterate over all classes */
sum = 0.0;
for (m = 0; m < noC; m++) {
sq = sqs[m];
/*iterate over all training sequences */
if (m == class)
for (l = 0; l < sq->seq_number; l++)
sum += omega[class][l] * expect_pi[class][l][class][i];
else
for (l = 0; l < sq->seq_number; l++)
sum -= omegati[m][l][class] * expect_pi[m][l][class][i];
}
/* check for valid new parameter */
pi_old[i] = mo[class]->s[i].pi;
if (fabs (pi_old[i]) == 0.0)
pi_new[i] = pi_old[i];
else
pi_new[i] = pi_old[i] + discrime_lambda * (sum / pi_old[i]);
}
/* change paramters to fit into valid parameter range */
discrime_trim_gradient (pi_new, mo[class]->N);
/* update parameters */
for (i = 0; i < mo[class]->N; i++) {
/* printf("update parameter Pi in state %d of model %d with: %5.3g",
i, class, pi_new[i]);
printf(" (%5.3g) old value.\n", pi_old[i]); */
mo[class]->s[i].pi = pi_new[i];
}
STOP: /* Label STOP from ARRAY_[CM]ALLOC */
m_free (pi_old);
m_free (pi_new);
return;
#undef CUR_PROC
}
/*----------------------------------------------------------------------------*/
static void discrime_update_a_gradient (ghmm_dmodel ** mo, ghmm_dseq ** sqs, int noC,
int class, double ****expect_a,
long double **omega, long double ***omegati)
{
#define CUR_PROC "discrime_update_a_gradient"
int i, j, g, l, m;
int j_id;
double *a_old = NULL;
double *a_new = NULL;
double sum;
ghmm_dseq *sq = NULL;
ARRAY_CALLOC (a_old, mo[class]->N);
ARRAY_CALLOC (a_new, mo[class]->N);
/* updating current class */
/* itarate over all states of the current model */
for (i = 0; i < mo[class]->N; i++) {
for (j = 0; j < mo[class]->s[i].out_states; j++) {
j_id = mo[class]->s[i].out_id[j];
sum = 0.0;
/* iterate over all classes and training sequences */
for (m = 0; m < noC; m++) {
sq = sqs[m];
if (m == class)
for (l = 0; l < sq->seq_number; l++)
sum +=
omega[class][l] * expect_a[class][l][+class][i * mo[class]->N +
j_id];
else
for (l = 0; l < sq->seq_number; l++)
sum -=
omegati[m][l][class] * expect_a[m][l][class][i * mo[class]->N +
j_id];
}
/* check for valid new parameter */
a_old[j] = mo[class]->s[i].out_a[j];
if (fabs (a_old[j]) == 0.0)
a_new[j] = a_old[j];
else
a_new[j] = a_old[j] + discrime_lambda * (sum / a_old[j]);
}
/* change paramters to fit into valid parameter range */
discrime_trim_gradient (a_new, mo[class]->s[i].out_states);
/* update parameter */
for (j = 0; j < mo[class]->s[i].out_states; j++) {
/* printf("update parameter A[%d] in state %d of model %d with: %5.3g",
j, i, class, a_new[j]);
printf(" (%5.3g) old value.\n", a_old[j]); */
mo[class]->s[i].out_a[j] = a_new[j];
/* mirror out_a to corresponding in_a */
j_id = mo[class]->s[i].out_id[j];
for (g = 0; g < mo[class]->s[j_id].in_states; g++)
if (i == mo[class]->s[j_id].in_id[g]) {
mo[class]->s[j_id].in_a[g] = mo[class]->s[i].out_a[j];
break;
}
}
}
STOP: /* Label STOP from ARRAY_[CM]ALLOC */
m_free (a_old);
m_free (a_new);
return;
#undef CUR_PROC
}
/*----------------------------------------------------------------------------*/
static void discrime_update_b_gradient (ghmm_dmodel ** mo, ghmm_dseq ** sqs, int noC,
int class, double *****expect_b,
long double **omega, long double ***omegati)
{
#define CUR_PROC "discrime_update_b_gradient"
int i, h, l, m;
int hist, size;
double *b_old = NULL;
double *b_new = NULL;
double sum;
ARRAY_CALLOC (b_old, mo[class]->M);
ARRAY_CALLOC (b_new, mo[class]->M);
/* updating current class */
/* itarate over all states of the current model */
for (i = 0; i < mo[class]->N; i++) {
if (mo[class]->s[i].fix)
continue;
size = ghmm_ipow (mo[class], mo[class]->M, mo[class]->order[i] + 1);
for (hist = 0; hist < size; hist += mo[class]->M) {
for (h = hist; h < hist + mo[class]->M; h++) {
/* iterate over all classes and training sequences */
sum = 0.0;
for (m = 0; m < noC; m++) {
if (m == class)
for (l = 0; l < sqs[m]->seq_number; l++)
sum += omega[class][l] * expect_b[class][l][class][i][h];
else
for (l = 0; l < sqs[m]->seq_number; l++)
sum -= omegati[m][l][class] * expect_b[m][l][class][i][h];
}
/* check for valid new parameters */
b_old[h] = mo[class]->s[i].b[h];
if (fabs (b_old[h]) == 0.0)
b_new[h] = b_old[h];
else
b_new[h] = b_old[h] + discrime_lambda * (sum / b_old[h]);
}
/* change parameters to fit into valid parameter range */
discrime_trim_gradient (b_new, mo[0]->M);
for (h = hist; h < hist + mo[class]->M; h++) {
/* update parameters */
/*printf("update parameter B[%d] in state %d of model %d with: %5.3g",
h, i, class, b_new[h]);
printf(" (%5.3g) old value.\n", b_old[h]); */
mo[class]->s[i].b[h] = b_new[h];
}
}
}
STOP: /* Label STOP from ARRAY_[CM]ALLOC */
m_free (b_old);
m_free (b_new);
return;
#undef CUR_PROC
}
/*----------------------------------------------------------------------------*/
static void discrime_update_pi_closed (ghmm_dmodel ** mo, ghmm_dseq ** sqs, int noC,
int class, double lfactor,
double ****expect_pi, long double **omega,
long double ***omegati)
{
#define CUR_PROC "discrime_update_pi_closed"
int i, l, m;
double *pi_old = NULL;
double *pi_new = NULL;
double sum, lagrangian;
ghmm_dseq *sq = NULL;
ARRAY_CALLOC (pi_old, mo[class]->N);
ARRAY_CALLOC (pi_new, mo[class]->N);
/* updating current class (all or only specified) */
/* itarate over all states of the k-th model to compute lagrangian multiplier */
lagrangian = 0.0;
for (i = 0; i < mo[class]->N; i++) {
/* iterate over all classes */
for (m = 0; m < noC; m++) {
sq = sqs[m];
/* if current class and class of training sequence match, add the first term */
if (m == class)
for (l = 0; l < sq->seq_number; l++)
lagrangian -= omega[class][l] * expect_pi[class][l][class][i];
/* otherwise the second */
else
for (l = 0; l < sq->seq_number; l++)
lagrangian +=
lfactor * omegati[m][l][class] * expect_pi[m][l][class][i];
}
}
for (i = 0; i < mo[class]->N; i++) {
/* iterate over all classes */
sum = 0.0;
for (m = 0; m < noC; m++) {
sq = sqs[m];
/*iterate over all training sequences */
if (m == class)
for (l = 0; l < sq->seq_number; l++)
sum -= omega[class][l] * expect_pi[class][l][class][i];
else
for (l = 0; l < sq->seq_number; l++)
sum += lfactor * omegati[m][l][class] * expect_pi[m][l][class][i];
}
/* check for valid new parameter */
pi_old[i] = mo[class]->s[i].pi;
if (fabs (lagrangian) == 0.0)
pi_new[i] = pi_old[i];
else
pi_new[i] = sum / lagrangian;
}
/* update parameters */
for (i = 0; i < mo[class]->N; i++) {
/* printf("update parameter Pi in state %d of model %d with: %5.3g",
i, class, pi_new[i]);
printf(" (%5.3g) old value.\n", pi_old[i]); */
mo[class]->s[i].pi = TRIM (pi_old[i], pi_new[i]);
}
STOP: /* Label STOP from ARRAY_[CM]ALLOC */
m_free (pi_old);
m_free (pi_new);
return;
#undef CUR_PROC
}
/*----------------------------------------------------------------------------*/
static void discrime_update_a_closed (ghmm_dmodel ** mo, ghmm_dseq ** sqs, int noC,
int class, double lfactor, double ****expect_a,
long double **omega, long double ***omegati)
{
#define CUR_PROC "discrime_update_a_closed"
int i, j, g, l, m;
int j_id;
double *a_old = NULL;
double *a_new = NULL;
double sum, lagrangian;
ghmm_dseq *sq = NULL;
ARRAY_CALLOC (a_old, mo[class]->N);
ARRAY_CALLOC (a_new, mo[class]->N);
/* updating current class (all or only specified) */
/* itarate over all states of the k-th model */
for (i = 0; i < mo[class]->N; i++) {
/* compute lagrangian multiplier */
lagrangian = 0.0;
for (j = 0; j < mo[class]->s[i].out_states; j++) {
j_id = mo[class]->s[i].out_id[j];
/* iterate over all classes and training sequences */
for (m = 0; m < noC; m++) {
sq = sqs[m];
if (m == class)
for (l = 0; l < sq->seq_number; l++)
lagrangian -=
omega[class][l] * expect_a[class][l][class][i * mo[class]->N +
j_id];
else
for (l = 0; l < sq->seq_number; l++)
lagrangian +=
lfactor * omegati[m][l][class] * expect_a[m][l][class][i *
mo
[class]->
N +
j_id];
}
}
for (j = 0; j < mo[class]->s[i].out_states; j++) {
j_id = mo[class]->s[i].out_id[j];
sum = 0.0;
/* iterate over all classes and training sequences */
for (m = 0; m < noC; m++) {
sq = sqs[m];
if (m == class)
for (l = 0; l < sq->seq_number; l++)
sum -=
omega[class][l] * expect_a[class][l][+class][i * mo[class]->N +
j_id];
else
for (l = 0; l < sq->seq_number; l++)
sum +=
lfactor * omegati[m][l][class] * expect_a[m][l][class][i *
mo
[class]->
N +
j_id];
}
/* check for valid new parameter */
a_old[j] = mo[class]->s[i].out_a[j];
if (fabs (lagrangian) == 0.0)
a_new[j] = a_old[j];
else
a_new[j] = sum / lagrangian;
}
/* update parameter */
for (j = 0; j < mo[class]->s[i].out_states; j++) {
mo[class]->s[i].out_a[j] = TRIM (a_old[j], a_new[j]);
/*printf("update parameter A[%d] in state %d of model %d with: %5.3g",
j, i, class, mo[class]->s[i].out_a[j]);
printf(" (%5.3g) old value, %5.3g estimted\n", a_old[j], a_new[j]); */
/* mirror out_a to corresponding in_a */
j_id = mo[class]->s[i].out_id[j];
for (g = 0; g < mo[class]->s[j_id].in_states; g++)
if (i == mo[class]->s[j_id].in_id[g]) {
mo[class]->s[j_id].in_a[g] = mo[class]->s[i].out_a[j];
break;
}
}
}
STOP: /* Label STOP from ARRAY_[CM]ALLOC */
m_free (a_old);
m_free (a_new);
return;
#undef CUR_PROC
}
/*----------------------------------------------------------------------------*/
static void discrime_update_b_closed (ghmm_dmodel ** mo, ghmm_dseq ** sqs, int noC,
int class, double lfactor,
double *****expect_b, long double **omega,
long double ***omegati)
{
#define CUR_PROC "discrime_update_b_closed"
int i, h, l, m;
int hist, size;
double *b_old = NULL;
double *b_new = NULL;
double sum, lagrangian;
ARRAY_CALLOC (b_old, mo[class]->M);
ARRAY_CALLOC (b_new, mo[class]->M);
/* itarate over all states of the k-th model */
for (i = 0; i < mo[class]->N; i++) {
if (mo[class]->s[i].fix)
continue;
size = ghmm_ipow (mo[class], mo[class]->M, mo[class]->order[i] + 1);
for (hist = 0; hist < size; hist += mo[class]->M) {
/* compute lagrangian multiplier */
lagrangian = 0.0;
for (h = hist; h < hist + mo[class]->M; h++) {
/* iterate over all classes and training sequences */
for (m = 0; m < noC; m++) {
if (m == class)
for (l = 0; l < sqs[m]->seq_number; l++)
lagrangian -= omega[class][l] * expect_b[class][l][class][i][h];
else
for (l = 0; l < sqs[m]->seq_number; l++)
lagrangian +=
lfactor * omegati[m][l][class] * expect_b[m][l][class][i][h];
}
}
for (h = hist; h < hist + mo[class]->M; h++) {
/* iterate over all classes and training sequences */
sum = 0.0;
for (m = 0; m < noC; m++) {
if (m == class)
for (l = 0; l < sqs[m]->seq_number; l++)
sum -= omega[class][l] * expect_b[class][l][class][i][h];
else
for (l = 0; l < sqs[m]->seq_number; l++)
sum +=
lfactor * omegati[m][l][class] * expect_b[m][l][class][i][h];
}
/* check for valid new parameters */
b_old[h] = mo[class]->s[i].b[h];
if (fabs (lagrangian) == 0.0)
b_new[h] = b_old[h];
else
b_new[h] = sum / lagrangian;
}
/* update parameters */
for (h = hist; h < hist + mo[class]->M; h++) {
mo[class]->s[i].b[h] = TRIM (b_old[h], b_new[h]);
/*printf("update parameter B[%d] in state %d of model %d with: %5.3g",
h, i, class, mo[class]->s[i].b[h]);
printf(" (%5.3g) old value, estimate %5.3g\n", b_old[h], b_new[h]); */
}
}
}
STOP: /* Label STOP from ARRAY_[CM]ALLOC */
m_free (b_old);
m_free (b_new);
return;
#undef CUR_PROC
}
/*----------------------------------------------------------------------------*/
static void discrime_find_factor (ghmm_dmodel * mo, ghmm_dseq ** sqs, int noC, int k,
double lfactor, double ****expect_pi,
double ****expect_a, double *****expect_b,
long double **omega, long double ***omegati)
{
#define CUR_PROC "discrime_find_factor"
int h, i, j, l, m;
int size, hist, j_id;
double self, other;
ghmm_dseq *sq;
/* itarate over all states of the k-th model */
for (i = 0; i < mo->N; i++) {
/* PI */
/* iterate over all classes */
self = other = 0.0;
for (m = 0; m < noC; m++) {
sq = sqs[m];
/* if current class and class of training sequence match, add the first term */
if (m == k)
for (l = 0; l < sq->seq_number; l++)
self += omega[k][l] * expect_pi[k][l][k][i];
/* otherwise the second */
else
for (l = 0; l < sq->seq_number; l++)
other += lfactor * omegati[m][l][k] * expect_pi[m][l][k][i];
}
if (self < other)
lfactor *= self / other;
/*printf("PI[%d]: %g - %g = %g \t %g\n", k, self, other, self-other, lfactor);*/
/* A */
for (j = 0; j < mo->s[i].out_states; j++) {
j_id = mo->s[i].out_id[j];
/* iterate over all classes and training sequences */
self = other = 0.0;
for (m = 0; m < noC; m++) {
sq = sqs[m];
if (m == k)
for (l = 0; l < sq->seq_number; l++)
self += omega[k][l] * expect_a[k][l][k][i * mo->N + j_id];
else
for (l = 0; l < sq->seq_number; l++)
other +=
lfactor * omegati[m][l][k] * expect_a[m][l][k][i * mo->N +
j_id];
}
if (self < other)
lfactor *= self / other;
/*printf(" A[%d]: %g - %g = %g \t %g\n", k, self, other, self-other, lfactor);*/
}
/* B */
if (mo->s[i].fix)
continue;
size = ghmm_ipow(mo, mo->M, mo->order[i] + 1);
for (hist = 0; hist < size; hist += mo->M) {
for (h = hist; h < hist + mo->M; h++) {
self = other = 0.0;
/* iterate over all classes and training sequences */
for (m = 0; m < noC; m++) {
if (m == k)
for (l = 0; l < sqs[m]->seq_number; l++)
self += omega[k][l] * expect_b[k][l][k][i][h];
else
for (l = 0; l < sqs[m]->seq_number; l++)
other += lfactor * omegati[m][l][k] * expect_b[m][l][k][i][h];
}
if (self < other)
lfactor *= self / other;
/*printf(" B[%d]: %g - %g = %g \t %g\n", k, self, other, self-other, lfactor);*/
}
}
}
return;
#undef CUR_PROC
}
/*----------------------------------------------------------------------------*/
static int discrime_onestep (ghmm_dmodel ** mo, ghmm_dseq ** sqs, int noC, int grad,
int class)
{
#define CUR_PROC "discrime_onestep"
int j, l, retval = -1;
/* expected use of parameter (for partial derivatives) */
double *****expect_b, ****expect_a, ****expect_pi;
/* matrix of log probabilities */
double ***log_p;
/* matrix of outer derivatives */
long double **omega, ***omegati;
long double psum = 0, nsum = 0;
double lfactor;
if (-1 == discrime_galloc (mo, sqs, noC, &expect_b, &expect_a,
&expect_pi, &omega, &omegati, &log_p)) {
printf ("Allocation Error.\n");
return -1;
}
if (-1 ==
discrime_precompute (mo, sqs, noC, expect_b, expect_a, expect_pi,
log_p)) {
printf ("precompute Error.\n");
goto FREE;
}
if (-1 == discrime_calculate_omega (mo, sqs, noC, omega, omegati, log_p)) {
printf ("omega Error.\n");
goto FREE;
}
if (grad) {
/* updateing parameters */
discrime_update_pi_gradient (mo, sqs, noC, class, expect_pi, omega,
omegati);
discrime_update_a_gradient (mo, sqs, noC, class, expect_a, omega,
omegati);
discrime_update_b_gradient (mo, sqs, noC, class, expect_b, omega,
omegati);
}
else {
/* calculate a pleaseant lfactor */
for (j = 0; j < noC; j++) {
for (l = 0; l < sqs[j]->seq_number; l++) {
if (j == class)
psum += omega[class][l];
else
nsum += omegati[j][l][class];
}
}
if (fabs (nsum) > 1e-200 && psum < nsum)
lfactor = (psum / nsum);
else
lfactor = 1.0;
/* determing maximum lfactor */
discrime_find_factor (mo[class], sqs, noC, class, lfactor, expect_pi,
expect_a, expect_b, omega, omegati);
lfactor *= .99;
printf ("Calculated L: %g\n", lfactor);
/* updating parameters */
discrime_update_pi_closed (mo, sqs, noC, class, lfactor, expect_pi, omega,
omegati);
discrime_update_a_closed (mo, sqs, noC, class, lfactor, expect_a, omega,
omegati);
discrime_update_b_closed (mo, sqs, noC, class, lfactor, expect_b, omega,
omegati);
}
retval = 0;
FREE:
discrime_gfree (mo, sqs, noC, expect_b, expect_a, expect_pi, omega,
omegati, log_p);
return retval;
#undef CUR_PROC
}
/*----------------------------------------------------------------------------*/
int ghmm_dmodel_label_discriminative(ghmm_dmodel** mo, ghmm_dseq** sqs, int noC, int max_steps,
int gradient)
{
#define CUR_PROC "ghmm_dmodel_label_discriminative"
double last_perf, cur_perf;
int retval=-1, last_cer, cur_cer;
double lambda=0.0;
double noiselevel = 0.0667;
int fp, fn, totalseqs = 0, totalsyms = 0;
int *falseP=NULL, *falseN=NULL;
double * prior_backup=NULL;
int i, k, step;
ghmm_dmodel * last;
ARRAY_CALLOC (falseP, noC);
ARRAY_CALLOC (falseN, noC);
ARRAY_CALLOC (prior_backup, noC);
for (i = 0; i < noC; i++) {
totalseqs += sqs[i]->seq_number;
for (k = 0; k < sqs[i]->seq_number; k++)
totalsyms += sqs[i]->seq_len[k];
}
/* setting priors from number of training sequences and
remember the old values */
for (i=0; i<noC; i++) {
prior_backup[i] = mo[i]->prior;
mo[i]->prior = (double)sqs[i]->seq_number / totalseqs;
printf("original prior: %g \t new prior %g\n", prior_backup[i], mo[i]->prior);
}
last_perf = ghmm_dmodel_label_discrim_perf (mo, sqs, noC);
cur_perf = last_perf;
discrime_print_statistics (mo, sqs, noC, falseP, falseN);
fp = fn = 0;
for (i = 0; i < noC; i++) {
printf ("Model %d likelihood: %g, \t false positives: %d\n", i,
ghmm_dmodel_likelihood (mo[i], sqs[i]), falseP[i]);
fn += falseN[i];
fp += falseP[i];
}
printf
("\n%d false positives and %d false negatives after ML-initialisation; Performance: %g.\n",
fp, fn, last_perf);
last_cer = cur_cer = fp;
for (k = 0; k < noC; k++) {
last = NULL;
step = 0;
if (gradient)
lambda = .3;
do {
if (last)
ghmm_dmodel_free (&last);
/* save a copy of the currently trained model */
last = ghmm_dmodel_copy (mo[k]);
last_cer = cur_cer;
last_perf = cur_perf;
noiselevel /= 1.8;
discrime_lambda = lambda / totalsyms;
printf
("==============================================================\n");
printf
("Optimising class %d, current step %d, lambda: %g noise: %g, gradient: %d\n",
k, step, discrime_lambda, noiselevel, gradient);
/* if (gradient) */
/* ghmm_dmodel_add_noise(mo[k], noiselevel, 0); */
discrime_onestep (mo, sqs, noC, gradient, k);
cur_perf = ghmm_dmodel_label_discrim_perf (mo, sqs, noC);
discrime_print_statistics (mo, sqs, noC, falseP, falseN);
fp = fn = 0;
for (i = 0; i < noC; i++) {
printf ("Model %d likelihood: %g, \t false positives: %d\n", i,
ghmm_dmodel_likelihood (mo[i], sqs[i]), falseP[i]);
fn += falseN[i];
fp += falseP[i];
}
cur_cer = fp;
printf ("MAP=%12g -> training -> MAP=%12g", last_perf, cur_perf);
printf (" %d false positives, %d false negatives\n", fp, fn);
printf
("==============================================================\n");
} while ((last_perf < cur_perf || cur_cer < last_cer) && (step++ < max_steps));
mo[k] = ghmm_dmodel_copy (last);
ghmm_dmodel_free (&last);
cur_perf = last_perf;
cur_cer = last_cer;
}
/* resetting priors to old values */
for (i = 0; i < noC; i++)
mo[i]->prior = prior_backup[i];
retval = 0;
STOP: /* Label STOP from ARRAY_[CM]ALLOC */
m_free (prior_backup);
m_free (falseP);
m_free (falseN);
return retval;
#undef CUR_PROC
}