Codebase list tigr-glimmer / upstream/3.02b SimpleMake / score-fixed.cc
upstream/3.02b

Tree @upstream/3.02b (Download .tar.gz)

score-fixed.cc @upstream/3.02braw · history · blame

//    Programmer:  Arthur L. Delcher
//          File:  score-fixed.cc
//  Last Updated:  Mon Jun  7 10:35:06 EDT 2004
//
//  Compute scores for a set of fixed length strings using
//  the model in the file on the command line.


#include  "score-fixed.hh"


static char  * Pos_Model_Path;
  // Name of file containing the positive model
static char  * Neg_Model_Path;
  // Name of file containing the negative model
static bool  Simple_Output = false;
  // If true, output is just string number (starting with 0)
  // and either 1 or -1 indicating which model scores higher
static bool  Use_Neg_ICM_Model = false;
  // If true then negative model is usual (streaming) ICM
  // instead of fixed-length ICM
static bool  Use_Null_Neg_Model = false;
  // If true then negative model is ignored and value
  // from it is automatically zero


//**ALD  Gets rid of make undefined reference error
int  Unused = Filter ('a');



int  main
    (int argc, char * argv [])

  {
   Fixed_Length_ICM_t  pos_model;
   ICM_t  neg_icm_model;
   Fixed_Length_ICM_t  neg_fixed_model;
   char  * string = NULL, * tag = NULL;
   long int  string_size = 0, tag_size = 0;
   int  string_num = 0;

   Parse_Command_Line (argc, argv);

   pos_model . read (Pos_Model_Path);
   fprintf (stderr, "pos model  len = %d  special = %d  type = %d\n",
            pos_model . getModelLength (),
            pos_model . getSpecialPosition (),
            pos_model . getModelType ());
   if  (Use_Null_Neg_Model)
       fprintf (stderr, "Using null negative model\n");
   else if  (Use_Neg_ICM_Model)
       neg_icm_model . Read (Neg_Model_Path);
     else
       {
        neg_fixed_model . read (Neg_Model_Path);
        fprintf (stderr, "neg model  len = %d  special = %d  type = %d\n",
                 neg_fixed_model . getModelLength (),
                 neg_fixed_model . getSpecialPosition (),
                 neg_fixed_model . getModelType ());
       }

   while  (Read_String (stdin, string, string_size, tag, tag_size))
     {
      double  pos_score, neg_score;
      double  avg_pos_score, avg_neg_score;
      int  len;

      string_num ++;
      len = strlen (string);

      pos_score = pos_model . score (string);
      if  (Use_Null_Neg_Model)
          neg_score = 0.0;
      else if  (Use_Neg_ICM_Model)
          neg_score = neg_icm_model . Score_String (string, strlen (string), 1);
        else
          neg_score = neg_fixed_model . score (string);

      avg_pos_score = pos_score / len;
      avg_neg_score = neg_score / len;

      if  (Simple_Output)
          printf ("%6d %3d\n", string_num - 1,
                  pos_score >= neg_score ? 1 : -1);
        else
          printf ("%5d:  %10.4f %9.5f   %10.4f %9.5f   %9.5f\n",
                  string_num, pos_score, avg_pos_score,
                  neg_score, avg_neg_score, avg_pos_score - avg_neg_score );
     }

   return  0;
  }



static void  Parse_Command_Line
    (int argc, char * argv [])

//  Get options and parameters from command line with  argc
//  arguments in  argv [0 .. (argc - 1)] .

  {
   bool  errflg = false;
   int  ch;

   optarg = NULL;

   while  (! errflg
             && ((ch = getopt (argc, argv, "hINs")) != EOF))
     switch  (ch)
       {
        case  'h' :
          errflg = true;
          break;

        case  'I' :
          Use_Neg_ICM_Model = true;
          break;

        case  'N' :
          Use_Null_Neg_Model = true;
          break;

        case  's' :
          Simple_Output = true;
          break;

        case  '?' :
          fprintf (stderr, "Unrecognized option -%c\n", optopt);

        default :
          errflg = TRUE;
       }

   if  (errflg || (Use_Null_Neg_Model && optind > argc - 1)
          || (! Use_Null_Neg_Model && optind != argc - 2))
       {
        Usage (argv [0]);
        exit (EXIT_FAILURE);
       }

   Pos_Model_Path = argv [optind ++];
   if  (! Use_Null_Neg_Model)
       Neg_Model_Path = argv [optind ++];

   return;
  }



int  Read_String
    (FILE * fp, char * & s, long int & s_size, char * & tag,
     long int & tag_size)

//  Read next string from  fp  (assuming FASTA format) into  s [0 .. ]
//  which has  s_size  characters.  Allocate extra memory if needed
//  and adjust  s_size  accordingly.  Return  TRUE  if successful,  FALSE
//  otherwise (e.g., EOF).  Put FASTA header line into  tag [0 .. ]
//  (and adjust  tag_size  if needed).

  {
   int  ch, ct;

   while  ((ch = fgetc (fp)) != EOF && ch != '>')
     ;

   if  (ch == EOF)
       return  FALSE;

   ct = 0;
   while  ((ch = fgetc (fp)) != EOF && ch != '\n' && isspace (ch))
     ;
   if  (ch == EOF)
       return  FALSE;
   if  (ch != '\n' && ! isspace (ch))
       ungetc (ch, fp);
   while  ((ch = fgetc (fp)) != EOF && ch != '\n')
     {
      if  (ct >= tag_size - 1)
          {
           tag_size += INCR_SIZE;
           tag = (char *) Safe_realloc (tag, tag_size);
          }
      tag [ct ++] = char (ch);
     }
   tag [ct ++] = '\0';

   ct = 0;
   while  ((ch = fgetc (fp)) != EOF && ch != '>')
     {
      if  (isspace (ch))
          continue;

      if  (ct >= s_size - 1)
          {
           s_size += INCR_SIZE;
           s = (char *) Safe_realloc (s, s_size);
          }
      s [ct ++] = char (ch);
     }
   s [ct ++] = '\0';

   if  (ch == '>')
       ungetc (ch, fp);

   return  TRUE;
  }



static void  Usage
    (char * command)

//  Print to stderr description of options and command line for
//  this program.   command  is the command that was used to
//  invoke it.

  {
   fprintf (stderr,
           "USAGE:  %s [options]  <pos-model>  <neg-model>  < input-file\n"
           "\n"
           "Read sequences from  stdin  and score them using fixed-length\n"
           "model in file  <model> .  Output scores to  stdout.\n"
           "Output columns are:  sequence number, positive total score,\n"
           "  positive score per base, negative total score,\n"
           "  negative score per base, delta positive/negative per-base scores.\n"
           "\n"
           "Options:\n"
           " -h        Print this message\n"
           " -I        Negative model is regular ICM, not fixed-length ICM\n"
           " -N        Use NULL negative model, i.e., constant zero\n"
           " -s        Output simple format of string num and 1 or -1\n"
           "\n",
           command);

   return;
  }