Codebase list tigr-glimmer / a2ba7ce SimpleMake / window-acgt.cc
a2ba7ce

Tree @a2ba7ce (Download .tar.gz)

window-acgt.cc @a2ba7ceraw · history · blame

//  A. L. Delcher
//
//  File:  window-acgt.cc
//
//  Last Modified:  Tue May 23 10:30:35 EDT 2006
//
//  Read a multifasta file from stdin and report the acgt content
//  of windows in it.  Command line arguments specify the
//  length of windows and their separation.
//  Output goes to stdout in the format
//  window-start  window-len  A's  C's  G's  T's  #other  %GC
//  Note that the last window can be shorter than the specified
//  length.



#include  "window-acgt.hh"


// External variables

extern int  Verbose;
extern int  Global_Debug_Flag;


// Global variables

static bool  Output_Percents = false;
  // If set true (by the -p option) then output percentages instead
  // of counts
static int  Window_Len;
  // Width of window to process; specified on command line
static int  Window_Skip;
  // Number of characters to slide window before reporting the next
  // result



int  main
    (int argc, char * argv [])

  {
   vector <char>  window;  // the actual window characters (a ring buffer)
   char  line [MAX_LINE];
   int  win_pos;    // position of the first character in the current window
   int  win_next;   // next window position to be printed
   int  win_size;   // number of characters in the current window
   int  win_sub;    // subscript of next position in  window
   int  last_pos;   // last window position printed
   int  count [5] = {0};
   int  i;
   

   Verbose = 0;

   Parse_Command_Line (argc, argv);

   window . resize (Window_Len);

   win_pos = win_next = 1;
   win_sub = win_size = last_pos = 0;

   while  (fgets (line, MAX_LINE, stdin) != NULL)
     {
      if  (First_Non_Blank (line) == '>')
          {
           if  (win_pos != last_pos)
               Finish (win_pos, win_size, win_next, count, window, win_sub);
           fputs (line, stdout);
           printf ("%8s %7s %6s %6s %6s %6s %6s %6s\n", "Position", "Length",
                "As", "Cs", "Gs", "Ts", "Other", "%GC");
           win_pos = win_next = 1;
           win_sub = win_size = last_pos = 0;
           for  (i = 0;  i < 5;  i ++)
             count [i] = 0;
          }
        else
          {
           char  * p;

           for  (p = line;  * p != '\0';  p ++)
             if  (! isspace (* p))
                 {
                  if  (win_size == Window_Len)
                      {
                       count [Subscript (window [win_sub])] --;
                         // Substract for character sliding out of the window
                       win_pos ++;
                      }
                    else
                      win_size ++;
                  count [Subscript (* p)] ++;
                  window [win_sub] = * p;
                  win_sub = (win_sub + 1) % Window_Len;

                  if  (win_size == Window_Len && win_pos == win_next)
                      {
                       Print_Line (win_pos, win_size, count);
                       last_pos = win_pos;
                       win_next += Window_Skip;
                      }
                 }
          }
     }

   if  (win_pos != last_pos)
       Finish (win_pos, win_size, win_next, count, window, win_sub);

   return  0;
  }



static void  Finish
    (int win_pos, int win_size, int win_next, int count [5],
     const vector <char> & window, int win_sub)

//  Print the final line for the information in the window
//  beginning at position  win_pos  and  containing  win_size
//  characters.  The counts to be printed are in  count .
//  The ring buffer of window characters is  window  and the current
//  position in it is  win_sub .

  {
   while  (win_pos < win_next && win_size > 0)
     {
      count [Subscript (window [win_sub])] --;
      win_pos ++;
      win_size --;
      win_sub = (win_sub + 1) % Window_Len;
     }

   if  (win_size > 0)
       Print_Line (win_pos, win_size, count);

   return;
  }



static void  Parse_Command_Line
    (int argc, char * argv [])

//  Get options and parameters from command line with  argc
//  arguments in  argv [0 .. (argc - 1)] .

  {
   bool  errflg = false;
   int  ch;

   optarg = NULL;

#if  ALLOW_LONG_OPTIONS
   int  option_index = 0;
   static struct option  long_options [] = {
        {"help", 0, 0, 'h'},
        {"percen", 0, 0, 'p'},
        {0, 0, 0, 0}
      };

   while  (! errflg
        && ((ch = getopt_long (argc, argv, "hp",
                     long_options, & option_index)) != EOF))
#else
   while  (! errflg
        && ((ch = getopt (argc, argv, "hp")) != EOF))
#endif

     switch  (ch)
       {
        case  'h' :
          Usage ();
          exit (EXIT_SUCCESS);

        case  'p' :
          Output_Percents = true;
          break;

        default :
          errflg = true;
       }

   if  (errflg || optind > argc - 2)
       {
        Usage ();
        exit (EXIT_FAILURE);
       }

   Window_Len = int (strtol (argv [optind ++], NULL, 10));
   Window_Skip = int (strtol (argv [optind ++], NULL, 10));

   if  (Window_Len < 1)
       {
        sprintf (Clean_Exit_Msg_Line, "ERROR:  Bad window length = %d", Window_Len);
        SIMPLE_THROW (Clean_Exit_Msg_Line);
       }
   if  (Window_Skip < 1)
       {
        sprintf (Clean_Exit_Msg_Line, "ERROR:  Bad window skip = %d", Window_Skip);
        SIMPLE_THROW (Clean_Exit_Msg_Line);
       }

   return;
  }



static void  Print_Line
    (int win_pos, int win_size, int count [5])

//  Print the output line for window beginning at position
//   win_pos  containing  win_size  characters and counts in
//   count .

  {
   int  i;

   printf ("%8d %7d", win_pos, win_size);
   if  (Output_Percents)
       for  (i = 0;  i < 5;  i ++)
         printf (" %6.1f", Percent (count [i], win_size));
     else
       for  (i = 0;  i < 5;  i ++)
         printf (" %6d", count [i]);
   printf (" %6.1f", Percent (count [1] + count [2], win_size));
   putchar ('\n');

   return;
  }



int static  Subscript
    (char ch)

//  Return the subscript (in  count ) corresponding to  ch .

  {
   switch (tolower (ch))
     {
      case  'a' :
        return  0;
      case  'c' :
        return  1;
      case  'g' :
        return  2;
      case  't' :
        return  3;
      default :
        return  4;
     }
  }



static void  Usage
    (void)

//  Print to stderr description of options and command line for
//  this program.

  {
   fprintf (stderr,
       "USAGE:  window-acgt [options] window-len window-skip < input-file\n"
       "\n"
       "Read multi-fasta-format file from standard input.\n"
       "Print the acgt-content of windows in each sequence.\n"
       "The width of windows is <window-len>.  The number of\n"
       "positions between windows to report is <window-skip>\n"
       "So the overlap between consecutive windows is\n"
       "<window-len> - <window-skip> positions\n"
       "Output goes to standard output in the format:\n"
       "  window-start  window-len  A's C's G's T's #other %%GC\n"
       "Note the last window in the sequence can be shorter than\n"
       "<window-len> if the sequence ends prematurely\n"
       "\n"
       "Options:\n"
       " -h  or  --help\n"
       "    Print this message\n"
       " -p  or  --percent\n"
       "    Output percentages instead of counts\n"
       "\n");

   return;
  }