Codebase list tigr-glimmer / 8356402 scripts / glim-diff.awk
8356402

Tree @8356402 (Download .tar.gz)

glim-diff.awk @8356402raw · history · blame

#!/bin/awk -f
# Usage:  glim-diff.awk  <a-pred> <b-pred>
#   Read gene predictions in <a-pred> and <b-pred>
#   and output them side by side.  Both must be
#   in sorted order by stop codon and the format for
#   each must be:
#     <id>  <start>  <stop>  [additional columns irrelevant]
#   Also print summary info at end.


BEGIN   {
         if  (ARGC < 3)
             Usage_Exit();

         afp = ARGV [1];
         delete ARGV [1];
         bfp = ARGV [2];
         delete ARGV [2];

         Read_A();
         Read_B();

         while  (! (adone || bdone))
           {
            if  (1 * aend < 1 * bend)
                {
                 printf "%-8s %7d %7d  <\n", aid, astart, aend;
                 aonly ++;
                 Read_A();
                }
            else if  (1 * bend < 1 * aend)
                {
                 printf "%24s  >  %-8s %7d %7d\n", "", bid, bstart, bend;
                 bonly ++;
                 Read_B();
                }
              else
                {
                 if  (1 * astart < 1 * aend)
                     diff = bstart - astart;
                   else
                     diff = astart - bstart;
                 if  (diff == 0)
                     {
                      ch = "=";
                      exact_ct ++;
                     }
                   else
                     ch = "|";
                 printf "%-8s %7d %7d  %s  %-8s %7d %7d\n",
                      aid, astart, aend, ch, bid, bstart, bend;
                 match_ct ++;
                 diff_sum += diff;
                 Read_A();
                 Read_B();
                }
           }

         while  (! adone)
           {
            printf "%-8s %7d %7d  <\n", aid, astart, aend;
            aonly ++;
            Read_A();
           }
         while  (! bdone)
           {
            printf "%24s  >  %-8s %7d %7d\n", "", bid, bstart, bend;
            bonly ++;
            Read_B();
           }

         print "";
         printf " A only: %6d  %5.1f%%\n", aonly, Percent(aonly, acount);
         printf " B only: %6d  %5.1f%%\n", bonly, Percent(bonly, bcount);
         printf "Matches: %6d  %5.1f%%  %5.1f%%\n", match_ct,
              Percent(match_ct, acount), Percent(match_ct, bcount);
         printf "  Exact: %6d  %5.1f%%  %5.1f%%\n", exact_ct,
              Percent(exact_ct, match_ct), Percent(exact_ct, acount);
         printf "AvgDiff: %8.1f\n", diff_sum / match_ct;
         printf "A count: %6d\n", acount;
         printf "B count: %6d\n", bcount;
        }



function  Percent  (x, y)
  {
   if  (y == 0)
       return  0.0;
     else
       return  (100.0 * x) / y;
  }



function  Read_A  ()
  {
   if  ((getline < afp) > 0)
       {
        aid = $1;
        astart = $2;
        aend = $3;
        acount ++;
       }
     else
       adone = 1;
  }



function  Read_B  ()
  {
   if  ((getline < bfp) > 0)
       {
        bid = $1;
        bstart = $2;
        bend = $3;
        bcount ++;
       }
     else
       bdone = 1;
  }



function  Usage_Exit  ()
  {
   print "# Usage:  glim-diff.awk  <a-pred> <b-pred>";
   print "#   Read gene predictions in <a-pred> and <b-pred>";
   print "#   and output them side by side.  Both must be";
   print "#   in sorted order by stop codon and the format for";
   print "#   each must be:";
   print "#     <id>  <start>  <stop>  [additional columns irrelevant]";
   print "#   Also print summary info at end.";

   exit;
  }