Codebase list tigr-glimmer / 3e814cdb-e004-4d37-9919-46690da6c39e/main SimpleMake / gene.hh
3e814cdb-e004-4d37-9919-46690da6c39e/main

Tree @3e814cdb-e004-4d37-9919-46690da6c39e/main (Download .tar.gz)

gene.hh @3e814cdb-e004-4d37-9919-46690da6c39e/mainraw · history · blame

//  A. L. Delcher
//
//  File:  gene.hh
//
//  Last Modified:  23 October 2003
//
//  DNA- and gene-related routines delcarations



#ifndef  __GENE_HH_INCLUDED
#define  __GENE_HH_INCLUDED

#include  "xlate_tables.hh"


const unsigned  ATG_MASK = 0x184;
const unsigned  CAA_MASK = 0x211;
const unsigned  CAC_MASK = 0x212;
const unsigned  CAG_MASK = 0x214;
const unsigned  CAT_MASK = 0x218;
const unsigned  CAY_MASK = 0x21a;
const unsigned  CTA_MASK = 0x281;
const unsigned  CTG_MASK = 0x284;
const unsigned  GTG_MASK = 0x484;
const unsigned  RTG_MASK = 0x584;
const unsigned  TAA_MASK = 0x811;
const unsigned  TAG_MASK = 0x814;
const unsigned  TAR_MASK = 0x815;
const unsigned  TCA_MASK = 0x821;
const unsigned  TGA_MASK = 0x841;
const unsigned  TRA_MASK = 0x851;
const unsigned  TTA_MASK = 0x881;
const unsigned  TTG_MASK = 0x884;
const unsigned  TYA_MASK = 0x8a1;
const unsigned  YTA_MASK = 0xa81;
const unsigned  SHIFT_MASK = 0xFF;

const unsigned  DELETE_FLAG = 0x01;
const unsigned  TRUNCATED_START_FLAG = 0x02;
const unsigned  TRUNCATED_STOP_FLAG = 0x04;

const long int  INCR_SIZE = 10000;
const long int  INIT_SIZE = 10000;
const int  MAX_LINE = 300;

#define  DEFAULT_POS_ENTROPY_PROF  {0.08468,0.01606,0.05739,0.05752,0.04328,\
  0.07042,0.02942,0.05624,0.04442,0.05620,0.03029,0.03975,0.05116,0.04098,\
  0.05989,0.08224,0.05660,0.06991,0.02044,0.03310}
#define  DEFAULT_NEG_ENTROPY_PROF  {0.07434,0.03035,0.05936,0.04729,0.05662,\
  0.07704,0.05777,0.05328,0.03360,0.05581,0.01457,0.03718,0.04594,0.05977,\
  0.08489,0.05990,0.04978,0.07227,0.01050,0.01974}
const char  * const DEFAULT_START_CODON []
     = {"atg", "gtg", "ttg"};
const char  * const DEFAULT_STOP_CODON []
     = {"taa", "tag", "tga"};



class  Codon_t
  {
  private:
   static const unsigned  shift_mask = 0xff;
   static const unsigned  reverse_shift_mask = 0xff0;

   unsigned int  data;
     // Represent the codon as a 12-bit string.  Each character
     // is 4 bits, representing whether it can be a, c, g or t.
     // a is 1, c is 2, g is 4 and t is 8.
     // E.g., 'a' is 0001; IUPAC character 's' (which is 'c' or 'g')
     // is 0110.
   

  public:
   Codon_t ()
     { data = 0x0; }

   void  Clear
       ()
     { data = 0x0; }
   bool  Can_Be
       (const vector <Codon_t> & a, int & which);
   bool  Must_Be
       (const vector <Codon_t> & a, int & which);
   void  Print
       (FILE * fp)
     { fprintf (fp, "%03x", data); }
   void  Reverse_Complement
       (void);
   void  Reverse_Shift_In
       (char ch);
   void  Set_From
       (const char * s);
   void  Shift_In
       (char ch);
  };


class  Orf_t
  {
  protected:
   int  stop_position;
     // first base (i.e., lowest subscript) counting positions
     // starting at 1
   int  frame;
     // is determined by the leftmost position of the stop codon,
     // positions starting at 1, positive for forward, negative for
     // reverse
   int  orf_len;
   int  gene_len;

  public:
   Orf_t  ()
     { stop_position = 0;  frame = 0; }

   int  Get_Frame  (void)  const
     { return  frame; }
   int  Get_Gene_Len  (void)  const
     { return  gene_len; }
   int  Get_Orf_Len  (void)  const
     { return  orf_len; }
   int  Get_Stop_Position  (void)  const
     { return  stop_position; }

   void  Set_Frame  (int i)
     { frame = i; }
   void  Set_Gene_Len  (int i)
     { gene_len = i; }
   void  Set_Orf_Len  (int i)
     { orf_len = i; }
   void  Set_Stop_Position  (int i)
     { stop_position = i; }
     
  };


struct  DNA_vect_t
  {
   double  p [4];
  };


class  PWM_t
  {
  private:
   vector <DNA_vect_t>  col;

  public:
   PWM_t  ()
     {}

   void  Check  (void)
     { cerr << "PWM_t Check:  size = " << col . size () << endl; }
   void  Counts_To_Prob
       (void);
   double  Column_Score
       (char ch, int col)  const;
   bool  Is_Empty  (void)  const
     { return  col . empty (); }
   void  Make_Log_Odds_WRT_GC
       (double gc_frac);
   void  Print
       (FILE * fp);
   void  Probs_To_Logs
    (void);
   bool  Read
       (FILE * fp);
   int  Width  (void)  const
     { return   int (col . size ()); }

   PWM_t &  operator =
       (const PWM_t & src);
  };


class  Gene_t  :  public Orf_t
  {
  private:
   unsigned int  status;
   int  id;
   double  score;

  public:
   Gene_t  ()
     { status = 0; }
   Gene_t  (const Orf_t & orf) : Orf_t (orf)
     { status = 0; }

   int  Get_ID  (void)  const
     { return  id; }
   double  Get_Score  (void)  const
     { return  score; }
   unsigned int  Get_Status  (void)  const
     { return  status; }
   unsigned int  Get_Status_Bit
       (unsigned int u)  const;

   void  Set_ID  (int i)
     { id = i; }
   void  Set_Score  (double d)
     { score = d; }
   void  Set_Status  (unsigned int u)
     { status = u; }
   void  Set_Status_Bit  (unsigned int u)
     { status |= u; }

   void  Clear_Status  (void)
     { status = 0; }
  };



bool  By_ID
    (const Gene_t & a, const Gene_t & b);
unsigned  Ch_Mask
    (char);
int  Char_Sub
    (char ch);
char  Codon_Translation
    (const char * c, int transl_tabl = 1);
char  Complement
    (char ch);
void  Counts_To_Entropy_Profile
    (int count [26], double ep [20]);
int  Filter
    (char ch);
void  Find_Stop_Codons
    (const char * s, int t, int stop []);
int  First_In_Frame_Stop
    (char * s, int frame);
void  Forward_Strand_Transfer
    (string & t, const string & s, int start, int len);
int  Is_Forward_Start
    (unsigned codon);
int  Is_Forward_Stop
    (unsigned codon);
int  Is_Reverse_Start
    (unsigned codon);
int  Is_Reverse_Stop
    (unsigned codon);
int  Is_Start
    (const char * s);
int  Is_Stop
    (const char * s);
int  Nucleotide_To_Subscript
    (char ch);
int  Read_String
    (FILE * fp, char * & t, long int & size, char name [], int partial);
void  Reverse_Complement
    (char * s);
void  Reverse_Complement
    (string & s);
void  Reverse_Strand_Transfer
    (string & t, const string & s, int start, int len);
void  Set_Stop_Codons_By_Code
    (vector <const char *> & stop_codon, int code, bool & errflg);


#endif