Codebase list tigr-glimmer / debian/latest SimpleMake / glimmer3.hh
debian/latest

Tree @debian/latest (Download .tar.gz)

glimmer3.hh @debian/latestraw · history · blame

//  A. L. Delcher
//
//  File:  glimmer3.hh
//
//  Last Modified:  Tue May  9 10:25:40 EDT 2006
//
//  Declarations for  Glimmer3



#ifndef  __GLIMMER3_HH_INCLUDED
#define  __GLIMMER3_HH_INCLUDED


#include  "delcher.hh"
#include  "fasta.hh"
#include  "gene.hh"
#include  "icm.hh"


// Default values of global variables

static const bool  DEFAULT_GENOME_IS_CIRCULAR = true;
static const int  DEFAULT_MIN_GENE_LEN = 100;
static const int  DEFAULT_MAX_OLAP_BASES = 30;
static const int  DEFAULT_RIBOSOME_WINDOW_SIZE = 20;
static const double  DEFAULT_START_PROB []
     = {0.60, 0.30, 0.10};
static const int  DEFAULT_THRESHOLD_SCORE = 30;
static const int  DEFAULT_USE_FIRST_START_CODON = false;
static const int  DEFAULT_USE_INDEPENDENT_SCORE = true;
static const int  HI_SCORE = 100;
  // the highest possible ICM score for an orf
static const double  LONG_ORF_SCORE_PER_BASE = 0.03;
  // artificially good score value for sufficiently long orfs
  //**ALD Should maybe change to a lower value like 0.01 ??


enum  Event_t
  {INITIAL, FWD_START, FWD_STOP, REV_START, REV_STOP, TERMINAL};


struct  Event_Node_t
  {
   int  id : 24;
   int  frame : 3;
   unsigned  is_first_start : 1;
   unsigned  disqualified : 1;
   unsigned  truncated : 1;
   Event_t  e_type;
   int  pos, pwm_sep;
     // pos is the last base of the codon, numbered starting at 1
   double  score, pwm_score;
   Event_Node_t  * frame_pred;
   Event_Node_t  * best_pred;

   Event_Node_t  ()   // default constructor
     { is_first_start = disqualified = truncated = 0; }

   void  Set_Frame_From_Pos
       (void);
  };


static bool  Event_Pos_Cmp
    (Event_Node_t * const & a, Event_Node_t * const & b)
  { return  (a -> pos < b -> pos); }


struct  Orf_Pos_t
  {
   int  start, stop, dir;
   char  * tag;
  };


struct  Range_t
  {
   int  lo, hi;
  };


static bool  Range_Cmp
    (const Range_t & a, const Range_t & b)
  { return  (a . lo < b . lo); }


struct  Position_t
  {
   int  lo, hi, max_prev;
  };


struct  Start_t
  {
   int  j, pos;
   double  score, rate;
   int  which : 8;
   unsigned  truncated : 1;  
   bool  first;
  };



static void  Add_Events
    (const Orf_t & orf, vector <Start_t> & start_list, int id);
static void  Add_PWM_Score
    (Event_Node_t * p);
static void  All_Frame_Score
    (const string & s, int offset, int frame, vector <double> & af);
static void  Clear_Events
    (void);
static void  Complement_Transfer
    (string & buff, const string & s, int lo, int hi);
static void  Disqualify
    (Event_Node_t * p, int cutoff);
static void  Do_Fwd_Stop_Codon
    (int i, int frame, int prev_fwd_stop [3], int first_fwd_start [3],
     int first_fwd_stop [3], int first_base, bool hit_ignore,
     vector <Orf_t> & orf_list);
static void  Echo_General_Settings
    (FILE * fp);
static void  Echo_Specific_Settings
    (FILE * fp, int len);
static double  Entropy_Distance_Ratio
    (int start, int len, int fr);
static int  Find_Uncovered_Position
    (vector <Event_Node_t *> ep);
static void  Find_Orfs
    (vector <Orf_t> & orf_list);
static void  Find_Stops_Reverse
    (const string & s, int len, vector <bool> & has_stop);
static void  Finish_Orfs
    (bool use_wraparound, const int prev_rev_stop [3],
     const int last_rev_start [3], int last_position,
     vector <Orf_t> & orf_list);
static void  Fix_Wrap
    (int & p, const int n);
static int  Frame_To_Sub
    (int f);
static void  Get_Ignore_Regions
    (void);
static void  Get_Orf_Pos_List
    (void);
static void  Handle_First_Forward_Stop
    (int fr, int pos, int start_pos, int first_base, int & gene_len,
     int & orf_len, bool use_wraparound);
static void  Handle_First_Reverse_Stop
    (int pos, int last_start, int & gene_len, int & orf_stop, bool hit_ignore);
static void  Handle_Last_Reverse_Stop
    (int fr, const int prev_rev_stop [3], const int last_rev_start [3],
     int & gene_len, int & orf_len, bool use_wraparound, int last_position);
static void  Initialize_Terminal_Events
    (Event_Node_t & first_event, Event_Node_t & final_event,
     Event_Node_t * best_event [6], Event_Node_t * last_event [6]);
static void  Integerize_Scores
    (const vector <double> ds, int hi_score, const vector <bool> set_zero,
    vector <int> & is);
static double  Olap_Score_Adjustment
    (int lo, int hi, int f1, int f2);
static int  On_Seq_0
    (int i);
static int  On_Seq_1
    (int i);
static void  Output_Extra_Start_Info
    (FILE * fp, int i, int lo, int hi, int frame,
     vector <Start_t> & start_list);
static void  Parse_Command_Line
    (int argc, char * argv []);
template  <class DT>
static void  Permute_By_Frame
    (vector <DT> & v, int frame);
int  Position_To_Frame
    (int p);
static void  Print_Comma_Separated_Strings
    (const vector <const char *> & v, FILE * fp);
static void  Print_Headings
    (FILE * fp);
static void  Print_Orflist_Headings
    (FILE * fp);
static const char  * Print_String
    (Event_t e);
static void  Prob_To_Logs
    (vector <double> & v);
static void  Process_Events
    (void);
static void  Process_Fwd_Start_Event
    (Event_Node_t * ep);
static void  Process_Fwd_Stop_Event
    (Event_Node_t * ep);
static void  Process_Initial_Event
    (Event_Node_t * ep);
static void  Process_Rev_Start_Event
    (Event_Node_t * ep);
static void  Process_Rev_Stop_Event
    (Event_Node_t * ep);
static void  PWM_Score_Fwd_Start
    (int pos, const PWM_t & pwm, int window, double & score, int & separation);
static void  PWM_Score_Rev_Start
    (int pos, const PWM_t & pwm, int window, double & score, int & separation);
static void  Read_Entropy_Profiles
    (const char * fn, bool & errflg);
static void  Read_Sequences
    (FILE * fp, vector <string> & seq_list, vector <string> & hdr_list,
     int & seq_ct);
static void  Requalify
    (Event_Node_t * p, int cutoff);
static void  Reverse_Complement_Transfer
    (string & buff, const string & s, int lo, int hi);
static void  Reverse_Transfer
    (string & buff, const string & s, int start, int len);
static void  Score_Orflist
    (FILE * detail_fp, FILE * summary_fp);
static void  Score_Orfs
    (vector <Orf_t> & orf_list, vector <Gene_t> & gene_list, FILE * fp);
static void  Score_Separate_Input
    (const string & seq, const string & hdr, int seq_num, FILE * detail_fp,
     FILE * predict_fp);
static void  Set_Final_Event
    (Event_Node_t & fe, Event_Node_t * best_event [6],
     int seq_len);
static void  Set_GC_Fraction
    (double & gc, const vector <string> & s);
static void  Set_Ignore_Score_Len
    (void);
static void  Set_Start_And_Stop_Codons
    (void);
static void  Shift_Events
    (vector <Event_Node_t *> & ep, int reference_pos);
static void  Show_Events
    (FILE * fp);
static void  Trace_Back
    (FILE * fp, const Event_Node_t & final_event);
static void  Usage
    (void);
static void  Wrap_Around_Back
    (int wfr, int pos, int & gene_len, int & orf_len);
static void  Wrap_Through_Front
    (int fr, int pos, int & gene_len, int & orf_len);

#endif