Codebase list tigr-glimmer / 8356402 scripts / upstream-coords.awk
8356402

Tree @8356402 (Download .tar.gz)

upstream-coords.awk @8356402raw · history · blame

#!/bin/awk -f
# Usage:  upstream-coords.awk  <len>  <separation>
#   Read gene prediction coordinates from standard input
#   and output the coordinates of the region of length
#    <len>  that is  <sep>  bases before the 5' start
#   of the gene.  Input format is:
#     <tag>  <start>  <stop>
#   Output format is the same.
#   If the length of the gene is longer than  MAX_GENE_LEN ,
#   then the gene is assumed to wrap around a circular genome
#   Note that output coordinates can be negative or longer
#   than the genome length (which is unknown).


BEGIN   {
         if  (ARGC < 3)
             Usage_Exit();

         if  (MAX_GENE_LEN == 0)
             MAX_GENE_LEN = 100000;

         len = ARGV [1];
         delete ARGV [1];

         sep = ARGV [2];
         delete ARGV [2];
        }


        {
         if  (1 * $2 < $3)
             {
              gene_len = 1 + $3 - $2;
              dir = 1;
             }
           else
             {
              gene_len = 1 + $2 - $3;
              dir = -1;
             }
         if  (gene_len > MAX_GENE_LEN)
             dir *= -1;

         printf "%s %8d %8d\n", $1, $2 - dir * (sep + len),
              $2 - dir * (sep + 1);
        }



function  Usage_Exit  ()
  {
   print "# Usage:  upstream-coords.awk  <len>  <separation>";
   print "#   Read gene prediction coordinates from standard input";
   print "#   and output the coordinates of the region of length";
   print "#    <len>  that is  <sep>  bases before the 5' start";
   print "#   of the gene.  Input format is:";
   print "#     <tag>  <start>  <stop>";
   print "#   Output format is the same.";
   print "#   If the length of the gene is longer than  MAX_GENE_LEN ,";
   print "#   then the gene is assumed to wrap around a circular genome";
   print "#   Note that output coordinates can be negative or longer";
   print "#   than the genome length (which is unknown).";

   exit;
  }