codon_us.c - codonw (lintian-fixes/main)

codon_us.c @lintian-fixes/main — raw · history · blame

/**************************************************************************/
/* CodonW codon usage analysis package                                    */
/* Copyright (C) 2005            John F. Peden                            */
/* This program is free software; you can redistribute                    */
/* it and/or modify it under the terms of the GNU General Public License  */
/* as published by the Free Software Foundation; version 2 of the         */
/* License,                                                               */
/*                                                                        */
/* This program is distributed in the hope that it will be useful, but    */
/* WITHOUT ANY WARRANTY; without even the implied warranty of             */
/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the           */
/* GNU General Public License for more details.                           */
/* You should have received a copy of the GNU General Public License along*/
/* with this program; if not, write to the Free Software Foundation, Inc.,*/
/* 675 Mass Ave, Cambridge, MA 02139, USA.                                */
/*                                                                        */
/*                                                                        */
/* The author can be contacted by email (jfp#hanson-codonw@yahoo.com Anti-*/
/* Spam please change the # in my email to an _)                          */
/*                                                                        */
/* For the latest version and information see                             */
/* http://codonw.sourceforge.net 					  */
/**************************************************************************/
/*                                                                        */
/* -----------------------        codon_us.C     ------------------------ */
/* This file contains most of the codon usage analysis subroutines        */
/* except for the COA analysis                                            */
/* Internal subroutines and functions                                     */
/* initilize_point    assigns genetic code dependent parameters to structs*/
/* initilize_coa      decides which cod/AA to include in a COA by default */
/* codon_usage_tot    Counts codon and amino acid usage                   */
/* ident_codon        Converts codon into a numerical value in range 1-64 */
/* codon_usage_out    Write out Codon Usage to file                       */
/* codon_error        Called after all codons read, checks data was OK    */
/* rscu_usage_out     Write out RSCU                                      */
/* raau_usage_out     Write out normalised amino acid usage               */
/* aa_usage_out       Write out amino acid usage                          */
/* how_synon          Calculates how synonymous each codon is             */
/* how_synon_aa       Calculates how synonymous each AA is                */
/* clean_up           Re-zeros various internal counters and arrays       */
/* base_sil_us_out    Write out base composition at silent sites          */
/* cai_out            Write out CAI usage                                 */
/* cbi_out            Write out codon bias index                          */
/* fop_out            Write out Frequency of Optimal codons               */
/* enc_out            Write out Effective Number of codons                */
/* gc_out             Writes various analyses of base usage               */
/* dot(,X)            prints a period every X times it is called          */
/* get_aa             converts a three base codon into a 1 or 3 letter AA */
/* cutab_out          Write a nice tabulation of the RSCU+CU+AA           */
/* dinuc_count        Count the dinucleotide usage                        */
/* dinuc_out          Write out dinucleotide usage                        */
/* coa_raw_out        Write out raw codon usage for use by COA analysis   */
/* sorted_by_axis1    Sorts genes according to their axis one position    */
/* gen_cusort_fop     COA specific, write out cu of genes by axis1 posit. */
/* highlow            Used sorted cu to calculate high_low chi sq. contin */
/* hydro_out          Write out Protein hydropathicity                    */
/* aromo_out          Write out Protein aromaticity                       */
/*                                                                        */
/*                                                                        */
/* External subroutines to codon_us.c                                     */
/* my_exit            Controls exit from CodonW closes any open files     */
/* tidy               reads the input data                                */
/* output             called from tidy to decide what to do with the data */
/* toutput            handles the reformatting and translation of seqs    */
/* output_long        if sequence is very long then process what we know  */
/*                    and write sequence to disk in fragments             */
/* open_file          Open files, checks for existing files               */
/* fileclose          Closes files and returns a NULL pointer or exits    */
/*                                                                        */
/**************************************************************************/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <math.h>
#include <limits.h>
#include "codonW.h"
/********************* Initilize Pointers**********************************/
/* Various pointers to structures are assigned here dependent on the      */
/* genetic code chosen.                                                   */
/* paa                points to a struct containing Amino Acid names      */
/* pap                points to amino acid properties                     */
/* pcai               points to Adaptation values used to calc CAI        */
/* pfop               points to a struct describing optimal codons        */
/* pcbi               points to the same structure as pfop                */
/* pcu                points to data which has the translation of codons  */
/* ds                 is a struct describing how synonymous a codon is    */
/* da                 is a struct describing the size of each AA family   */
/* pcoa               points to a struct that describes columns to be     */
/*                    included/excluded from any COA analysis             */
/**************************************************************************/
int initilize_point(char code, char fop_species, char cai_species)
{
   paa = &amino_acids;
   pap = &amino_prop;
   pcai = &cai[cai_species];
   pfop = &fop[fop_species];
   pcbi = &fop[fop_species];
   pcu = &cu[code];
   ds = how_synon();                          
   da = how_synon_aa();                                     
   pcoa = &coa;

   printf ("\n");   
   if (pm->codonW)
     printf ("Genetic code is currently set to %s %s\n\n",pcu->des,pcu->typ);

   return 1;
}
/*******************How Synonymous is this codon  *************************/
/* This function discovers at run time how synonymous a codon is by check-*/
/* ing all other codons to see if they encode the same AA                 */
/* This saves a lot of time when new genetic codes are added              */ 
/**************************************************************************/
int *how_synon(void)
{
   static int      dds[65];
   int x,i;
   
   for (x = 0; x < 65; x++)
      dds[x] = 0;

   for (x = 1; x < 65; x++)
      for (i = 1; i < 65; i++)
     if (pcu->ca[x] == pcu->ca[i])
        dds[x]++;
   return dds;                             /* return a structure          */
}
/*******************How Synonymous is this AA     *************************/
/* This function discovers at run time how synonymous an amino acid is by */
/* checking all codons to see if they encode this same AA                 */
/* This saves a lot of time when new genetic codes are added              */ 
/**************************************************************************/
int *how_synon_aa(void)
{
   static int      dda[22];
   int x;
   
   for (x = 0; x < 22; x++)
      dda[x] = 0;

   for (x = 1; x < 65; x++)
      dda[pcu->ca[x]]++;
   return dda;                             /* return a structure          */
}
/********************* Initialise COA     *********************************/
/* Decides which codons or amino acids are to be included in a COA if only*/
/* the default choice is used. For an amino acid COA, only stops are excl */
/* but for a codon usage COA stop codons and non-synonymous codons are    */
/* excluded                                                               */
/* pcoa               points to a struct that describes columns to be     */
/*                    included/excluded from any COA analysis             */
/*                    structure contains AA and Codon information         */
/**************************************************************************/
int initilize_coa(char code)
{
   static char     initilized;
   static char     oldcode;
   int i;
    
   /* if called a second time return unless the genetic code has changed  */
   if (initilized && (oldcode == code)) return 1;

   for (i = 0; i < 22; i++)         /* for each amino acid                */
      if (i == 11 || i == 0)        /* stop codons have the value 11      */
     pcoa->amino[i] = FALSE;        /* see RECODING file for more details */
      else
     pcoa->amino[i] = TRUE;

   for (i = 0; i < 65; i++)         /* for each codon                     */
      if (*(ds + i) == 1 || pcu->ca[i] == 11 || i == 0) 
     pcoa->codons[i] = FALSE;
      else
     pcoa->codons[i] = TRUE;

   initilized = TRUE;               /* we have been called  ...           */
   return 1;
}
/****************** Codon Usage Counting      *****************************/
/* Counts the frequency of usage of each codon and amino acid this data   */
/* is used throughout CodonW                                              */
/* pcu->ca contains codon to amino acid translations for the current code */
/* and is assigned in initialise point                                    */
/**************************************************************************/
int codon_usage_tot(char *seq, long int how_many)
{
   char            codon[4];
   int             icode;
   int             i;
   
   for (i = 0; i < how_many - 2; i += 3) {
      strncpy(codon, (seq + i), 3);
      icode = ident_codon(codon);
      ncod[icode]++;                          /*increment the codon count */  
      naa[pcu->ca[icode]]++;                  /*increment the AA count    */ 
      codon_tot++;                            /*increment the codon total */
   }

   if (how_many % 3) {                        /*if last codon was partial */
      icode = 0;                              /*set icode to zero and     */
      ncod[0]++;                              /*increment untranslated    */ 
   }                                          /*codons                    */
   return icode;                              /*return the last codon     */
}

/****************** Ident codon               *****************************/
/* Converts each codon into a numerical array (codon) and converts this   */
/* array into a numerical value in the range 0-64, zero is reserved for   */
/* codons that contain at least one unrecognised base                     */
/*                                                                        */
/**************************************************************************/
int ident_codon(char *codon)
{
   int             icode = 0;
   int x;
   
   for (x = 0; x < 3; x++) {
      switch (codon[x]) {
      case 'T':
      case 't':
      case 'U':
      case 'u':
     codon[x] = (char) 1;
     continue;
      case 'C':
      case 'c':
     codon[x] = (char) 2;
     continue;
      case 'A':
      case 'a':
     codon[x] = (char) 3;
     continue;
      case 'G':
      case 'g':
     codon[x] = (char) 4;
     continue;
      case '\0':
     return 0;
      default:
     codon[x] = (char) 0;
     break;
      }
   }
   if (codon[0] * codon[1] * codon[2] != 0)
      icode = (codon[0] - 1) * 16 + codon[1]
     + (codon[2] - 1) * 4;
   else
      icode = 0;

   return icode;
}

/****************** Codon error               *****************************/
/* Does some basic error checking for the input data, it can be called    */
/* using different error levels, thus generating different types of       */
/* messages. Basically checks for start, stop codons and internal stop    */
/* codons. As well as non-translatable and partial codons                 */
/**************************************************************************/
long int codon_error(int x, int y, char *ttitle, char error_level)
{
   long int             ns = 0;                  /* number of stops       */
   long int        loc_cod_tot = 0;
   static int      error_lines = 0;
   int i;

   for (i = 1, ns = 0; i < 65; i++) {
     loc_cod_tot += ncod[i];
     if (pcu->ca[i] == 11)
       ns +=  ncod[i];                           /*count  stop codons     */
   }

   switch (error_level) {
     case 1:                                    /*internal stop codons    */
       ns = ns - valid_stops;           
       /* a stop was a valid_stop if it was the last codon of a sequence  */

       if ( ! valid_start && pm->warn ) {
           dot(0,10);   
           fprintf(pm->my_err, "\nWarning: Sequence %3li \"%-20.20s\" does "
               "not begin with a recognised start codon\n"
		     ,num_sequence,ttitle);
           error_lines++;
       }

       if (ns && pm->warn ) {
	        dot(0,10);  
	        if (pm->totals && pm->warn)
	         fprintf(pm->my_err,"\nWarning: some sequences had internal stop"
		     " codons (found %li such codons)\n", ns);
	        else
	         fprintf(pm->my_err, "\nWarning: Sequence %3li \"%-20.20s\" has "
             "%li internal stop codon(s)\n", num_sequence, ttitle, ns);
	        num_seq_int_stop++;
	        error_lines++;
       }
       break;
   case 2:                                
     dot(0,10);  
     if (ncod[0] == 1 && pcu->ca[x] != 11 && pm->warn){ /*  last codon was partial */
       fprintf(pm->my_err, 
	       "\nWarning: Sequence %3li \"%-20.20s\" last codon was partial\n"
	       ,num_sequence, ttitle);
       error_lines++;
     }else {
       if (ncod[0] && pm->warn){                        /* non translatable codons */
	    if (pm->totals)
	      fprintf(pm->my_err, 
		   "\nWarning: some sequences had non translatable"
		   " codons (found %li such codons)\n",  ncod[0]);
	    else
	      fprintf(pm->my_err, 
		   "\nWarning: sequence %3li \"%-20.20s\" has %li non translatable"
		   " codon(s)\n", num_sequence, ttitle, ncod[0]);
	    error_lines++; 
       }
       if (pcu->ca[x] != 11 && pm->warn ) {
	    if (!pm->totals){
	       fprintf(pm->my_err, 
		   "\nWarning: Sequence %3li \"%-20.20s\" is not terminated by"
		   " a stop codon\n", num_sequence, ttitle);
	       error_lines++;
            }     
       }
     }  
     break;
   case 3: 
                                   /* Nc error routines see codon_us      */
     dot(0,10);                    /* dot resetting internal counter      */
     if (x==3) x=4;                /* if x=3 there are no 3 or 4 fold AA  */ 
     fprintf(pm->my_err, 
	     "\nSequence %li \"%-20.20s\" contains ",num_sequence, ttitle);
     (y) ? fprintf(pm->my_err, "only %i ", (int) y) : 
       fprintf(pm->my_err, "no ");
     fprintf(pm->my_err, "amino acids with %i synonymous codons\n", x);
     fprintf(pm->my_err, "\t--Nc was not calculated \n");
     error_lines+=2;      
     break;
   case 4:                         /* run silent                          */
     break;
   default:
      my_exit(99,"Programme error in codon_error\n");
   }
   if ((((error_lines + 2) * 2) > pm->term_length) && pm->verbose 
       && pm->my_err == stderr ) {
     error_lines = 0;              /* count lines of errors               */
     dot(0,10);                     
     pause;
   }       
   return loc_cod_tot;             /* Number of codons counted            */
}

/****************** Codon Usage Out           *****************************/
/* Writes codon usage output to file. Note this subroutine is only called */
/* when machine readable output is selected, otherwise cutab_out is used  */
/**************************************************************************/
int codon_usage_out(FILE * fblkout, long int *nncod, int last_aa, 
                    int vvalid_stops, char *ttitle)
{
  long int ccodon_tot = 0;
  int x;
  char sp=pm->seperator;
  
  ccodon_tot = codon_error(last_aa, vvalid_stops, ""   , (char) 4); /*dummy*/

  /*example of output                                                     */ 
  /*0,0,0,0,3,2,2,0,0,0,0,0,0,3,0,0,                                      */
  /*0,0,0,4,3,4,1,7,0,0,0,0,3,1,3,1,Codons=100                              */       
  /*0,0,0,0,10,6,3,0,0,0,0,0,1,1,12,0,Universal Genetic code              */
  /*0,0,0,3,7,5,7,9,0,1,1,1,8,4,5,0,MLSPCOPER.PE1                         */

  for (x = 1; x < 65; x++) {
    
    fprintf(fblkout, "%i%c",nncod[x],sp);
    
    switch (x) {
    case 16:
      fprintf(fblkout, "\n");
      break;
    case 32:
	fprintf(fblkout, "Codons=%ld\n",ccodon_tot);
      break;
    case 48:
	fprintf(fblkout, "%.30s\n", pcu->des);
      break;
    case 64:
      fprintf(fblkout, "%.20s\n",ttitle);
      break;
    default:
      break;
    }
  }
  return 1;
}
/******************  RSCU  Usage out          *****************************/
/* Writes Relative synonymous codon usage output to file. Note this subrou*/
/* tine is only called if machine readable output is selected             */
/* If human readable format was selected then what the user really wanted */
/* was cutab so this is automatically selected in codons.c                */
/* RSCU values are genetic codon dependent                                */
/**************************************************************************/
int rscu_usage_out(FILE * fblkout, long *nncod, long *nnaa)
{  
 int x;
 char sp=pm->seperator;

 /* ds points to an array[64] of synonym values i.e. how synon its AA is  */

 for (x = 1; x < 65; x++) {
   if (nnaa[pcu->ca[x]] != 0)
     fprintf(fblkout, "%5.3f%c",
	     ( (float) nncod[x] / (float) nnaa[pcu->ca[x]])
	     *  ((float) *(ds + x)), sp );
   else
     fprintf(fblkout, "0.000%c",sp);

   if (x == 64)
     fprintf(fblkout, "%-20.20s", title);

   if (!(x % 16))
     fprintf(fblkout, "\n");
   }
   return 1;
}
/******************   RAAU output             *****************************/
/* Writes Relative amino acid usage output to file. Amino Acid usage is   */
/* normalised for gene length                                             */
/**************************************************************************/
int raau_usage_out(FILE * fblkout, long *nnaa)
{
   long int        aa_tot = 0;
   static char     first_line = TRUE;
   int i,x;
   char sp;

   if (pm->seq_format=='M')                     /*  if machine readable  */
      sp = pm->seperator;
   else
      sp = '\t';   

   if (first_line) {                            /* if true write a header*/
     if ( pm->seq_format=='M')
	 fprintf(fblkout, "%s", "Gene_name");
       else
	 fprintf(fblkout, "%-20.20s", "Gene name");

      for (i = 0; i < 22; i++)
	if ( pm->seq_format=='M')
	    fprintf(fblkout, "%c%s", sp,paa->aa3[i]);/* three letter AA names*/
	  else
	    fprintf(fblkout, "%c %-6.6s", sp,paa->aa3[i]);
      fprintf(fblkout, "\n");
      first_line = FALSE;
   }
   for (i = 1; i < 22; i++)
     if (i != 11)
       aa_tot += nnaa[i];                       /* total No. of AAs      */
   
   if ( pm->seq_format=='M')
     fprintf(fblkout, "%.30s", title);
   else
     fprintf(fblkout, "%-20.20s", title);       /* don't waste spaces    */
   
   for (x = 0; x < 22; x++)
     if (x == 11)
       fprintf(fblkout, "%c0.0000",sp);         /* report 0 for stops    */
     else if (aa_tot)
       if (  pm->seq_format=='M')
	   fprintf(fblkout, "%c%.4f",sp,
		   (double) nnaa[x] / (double) aa_tot);
	 else
	   fprintf(fblkout, "%c%7.4f",sp,
		   (double) nnaa[x] / (double) aa_tot);
     else                                       /*What no AminoAcids!!!! */
       if (  pm->seq_format=='M')
	 fprintf(fblkout, "%c%c",sp,sp);            
	 else
	   fprintf(fblkout, "%c ***** ",sp);        

   fprintf(fblkout, "\n",sp);
   return 1;
}
/******************   AA usage output         *****************************/
/* Writes amino acid usage output to file.                                */
/**************************************************************************/
int aa_usage_out(FILE * fblkout, long *nnaa)
{
  static char     first_line = TRUE;
  int i;
  char sp=pm->seperator;

  if (first_line) {
    (pm->seq_format=='M')?
      fprintf(fblkout, "%s", "Gene_name"):
      fprintf(fblkout, "%-20.20s ", "Gene name");
    
    for (i = 0; i < 22; i++)
      (pm->seq_format=='M')?
	fprintf(fblkout, "%c%s", sp,paa->aa3[i]):    /* 3 letter AA code     */
      fprintf(fblkout, "%-5.5s", paa->aa3[i]);
    fprintf(fblkout, "\n");
    first_line = FALSE;
  }
  (pm->seq_format=='M')?
    fprintf(fblkout, "%.20s", title):
    fprintf(fblkout, "%-20.20s ", title);
  
  for (i = 0; i < 22; i++){
    (pm->seq_format=='M')?
      fprintf(fblkout, "%c%li", sp,nnaa[i]):
      fprintf(fblkout, "%-5li",nnaa[i]);
  }

  fprintf(fblkout, "\n");
  return 1;
}
/******************  Base Silent output     *******************************/
/* Calculates and write the base composition at silent sites              */
/* normalised as a function of the possible usage at that silent site with*/
/* changing the amino acid composition of the protein. It is inspired by  */
/* GC3s but is much more complicated to calculate as not every AA has the */
/* option to use any base at the third position                           */
/* All synonymous AA can select between a G or C though                   */
/**************************************************************************/
void base_sil_us_out(FILE * foutput, long *nncod, long *nnaa)
{
   int             id,i,x,y,z;
   long            bases_s[4];     /* synonymous GCAT bases               */
                                      
   long            cb[4];          /* codons that could have been GCAT    */
   int             done[4];
   char sp=  (char) (pm->seq_format=='H')? (char) '\t': (char) pm->seperator;

   for (x = 0; x < 4; x++) {
     cb[x] = 0;
     bases_s[x] = 0;
   }                               /* blank the arrays                    */

   for (x = 1; x < 5; x++)
     for (y = 1; y < 5; y++)
       for (z = 1; z < 5; z++) {   /* look at all 64 codons               */
	 id = (x - 1) * 16 + y + (z - 1) * 4;

	 if (*(ds + id) == 1 || pcu->ca[id] == 11)
           continue;              /* if no synon skip to next       codon */
	 bases_s[z - 1] += nncod[id]; /* count No. codon ending in base X     */
       }     
			
   for (i = 1; i < 22; i++) {
     for (x = 0; x < 4; x++)      /* don't want to count bases in 6 fold  */
         done[x] = FALSE;         /* sites twice do we so we remember     */   

     if (i == 11 || *(da + i) == 1)
       continue;                  /* if stop codon skip, or AA not synony */

      for (x = 1; x < 5; x++)    /* else add aa to could have ended count */
     for (y = 1; y < 5; y++)
        for (z = 1; z < 5; z++) {
           id = (x - 1) * 16 + y + (z - 1) * 4; 
           /* assign codon values in range 1-64                           */
           if (pcu->ca[id] == i && done[z - 1] == FALSE) {
	   /* encode AA i which we know to be synon so add could_be_x ending*/
         /* by the Number of that amino acid                              */
	     cb[z - 1] += nnaa[i];    
	     done[z - 1] = TRUE;     /* don't look for any more or we might   */
                                 /* process leu+arg+ser twice             */
           }                       
        }
   }

   /* Now the easy bit ... just output the results to file                */      
   for (i = 0; i < 4; i++) {
      if (cb[i] > 0)
     fprintf(foutput, "%6.4f%c", (double) bases_s[i]/(double)cb[i], sp);
      else
     fprintf(foutput, "0.0000%c",sp);
   }
   return;
}
/******************  Clean up               *******************************/
/* Called after each sequence has been completely read from disk          */
/* It re-zeros all the main counters, but is not called when concatenating*/
/* sequences together                                                     */
/**************************************************************************/
int clean_up(long int *nncod, long int *nnaa)
{
   int x;
   int i;
   
   for (x = 0; x < 65; x++)
      nncod[x] = 0;
   for (x = 0; x < 23; x++)
      nnaa[x] = 0;
                                    /* dinucleotide count remembers the   */                                     
   dinuc_count(" ", 1);             /* last_base from the last fragment   */
                                    /* this causes the last base to be "" */
   for (x = 0; x < 3; x++)
      for (i = 0; i < 16; i++)
         din[x][i] = 0;

   dinuc_count(" ", 1);
   master_ic = tot = 
   non_std_char = AT_TOT = GC_TOT = AA_TOT = GAP_TOT = IUBC_TOT = 0; 
   long_seq = FALSE;
   valid_stops = valid_start = codon_tot = tot = fram = 0;                   
   return 1;
}
/*****************Codon Adaptation Index output   *************************/
/* Codon Adaptation Index (CAI) (Sharp and Li 1987). CAI is a measurement */
/* of the relative adaptiveness of the codon usage of a gene towards the  */
/* codon usage of highly expressed genes. The relative adaptiveness (w) of*/
/* each codon is the ratio of the usage of each codon, to that of the most*/
/* abundant codon for the same amino acid. The relative adaptiveness of   */
/* codons for albeit a limited choice of species, can be selected from the*/
/* Menu. The user can also input a personal choice of values. The CAI     */
/* index is defined as the geometric mean of these relative adaptiveness  */
/* values. Non-synonymous codons and termination codons (genetic code     */
/* dependent) are excluded. To aid computation, the CAI is calculated as  */
/* using a natural log summation, To prevent a codon having a relative    */
/* adaptiveness value of zero, which could result in a CAI of zero;       */
/* these codons have fitness of zero (<.0001) are adjusted to 0.01        */
/**************************************************************************/
int cai_out(FILE * foutput, long int *nncod)
{
   long int        totaa = 0;
   double          sigma;
   float           ftemp;
   int x;
   char sp=  (char) (pm->seq_format=='H')? 
       (char) '\t': 
       (char) pm->seperator;
   static char       cai_ttt = FALSE;
   static char       description[61];
   static char       reference[61];
  
   static CAI_STRUCT user_cai;


   if (!cai_ttt ) {                       /* have we been called already   */     
      user_cai.des = description;         /* assign an array to a pointer  */
      user_cai.ref = reference;           /* as above                      */
      
      if ( pm->caifile==NULL && pm->verbose==TRUE 
	   && pm->menu==TRUE && (pcai == cai )){
          /* this is false                                                 */
	  /* if personal caifile is on commandline or                      */
          /* in non-interactive mode or -silent option                     */
          /* or cai values are not the default values                      */
	  

	  printf("\nDo you wish to input a personal choice of CAI"
          " values (y/n) [n] ");
      gets(pm->junk);

      /* This allows a user defined choice of CAI values to be selected    */ 
      if ('Y' == (char) toupper( (int) pm->junk[0])) {
          /* tell the user a little about what we are looking for          */
          printf("\nInput file must contain 64 CAI values\n"
                 "ranging from 0.00 to 1.00\n"
                 "values must be separated by spaces\n");
         /* open the CAI adaptiveness values file                          */
           if (!(pm->caifile = open_file("file with CAI values"
                       ,"cai.coa", "r", 0))) my_exit(6,"cai_out");
      
      }
      }                                          /* matched if pm->caifile=*/
     if (pm->caifile){  
       rewind (pm->caifile);        /* unlikely unless fopfile = caifile   */
       x = 0;
       strcpy(user_cai.des,"User supplied CAI adaptation values ");
       strcpy(user_cai.ref,"No reference");
       user_cai.cai_val[x++] = (float) 0.0;

     while ((fscanf(pm->caifile, "%f ", &ftemp)) != EOF) {
                                    /* if any bad CAI values are read EXIT*/
         if (ftemp < 0 || ftemp > 1.0) {
           printf("\nError CAI %f value out of range\nEXITING",ftemp);
           my_exit(99,"cai_out");
        }                                        
        user_cai.cai_val[x++] = ftemp;                    /* assign value */
     }                                                    /* end of while */
     if (x != 65) {                 /*             wrong number of codons */
        fprintf(pm->my_err, "\nError in CAI file, found %i values"
            " expected 64 values EXITING\n", x - 1);
        my_exit(99,"cai_out");
     }
     pcai = &user_cai;              /* assigns pointer to user CAI values */
      }                             /*        matches if( pm->caifile...  */

    
     printf ("Using %s (%s) w values to calculate "
	        		      "CAI \n",pcai->des,pcai->ref);
     cai_ttt = TRUE;                /*stops this "if" from being entered  */

    }                              /* matches if (!cai_ttt )             */
   
   for (x = 1, sigma = 0; x < 65; x++) {
      if (pcu->ca[x] == 11 || *(ds + x) == 1) continue;
      if (pcai->cai_val[x] < 0.0001)/* if value is effectively zero       */
            pcai->cai_val[x] = (float) 0.01;               /* make it .01 */
      sigma += (double) *(nncod + x) * log((double) pcai->cai_val[x]);
      totaa += *(nncod + x);
   }

   if (totaa) {                     /* catch floating point overflow error*/
      sigma = sigma / (double) totaa;
      sigma = exp(sigma);
   } else
      sigma = 0;

   fprintf(foutput, "%5.3f%c", sigma,sp);
   return 1;
}
/*****************Codon Bias Index output        **************************/
/* Codon bias index is a measure of directional codon bias, it measures   */
/* the extent to which a gene uses a subset of optimal codons.            */
/* CBI = ( Nopt-Nran)/(Nopt-Nran) Where Nopt = number of optimal codons;  */
/* Ntot = number of synonymous codons; Nran = expected number of optimal  */
/* codons if codons were assigned randomly. CBI is similar to Fop as used */
/* by Ikemura, with Nran used as a scaling factor. In a gene with extreme */
/* codon bias, CBI will equal 1.0, in a gene with random codon usage CBI  */
/* will equal 0.0. Note that it is possible for Nopt to be less than Nran.*/
/* This results in a negative value for CBI.                              */
/* ( Bennetzen and Hall 1982 )                                            */
/**************************************************************************/
int cbi_out(FILE * foutput, long int *nncod, long int *nnaa )
{
   long int        tot_cod  = 0;
   long int        opt      = 0; 
   float           exp_cod  = (float) 0.0; 
   float           fcbi;
   int             c,x;
   char            str[2];
   char sp=  (pm->seq_format=='H')? 
       (char) '\t':
       (char) pm->seperator;


   static char       description[61];
   static char       reference[61];
   static char       first_call_cbi  = TRUE;
   static char       has_opt_info[22];
   static FOP_STRUCT user_cbi;

   if (first_call_cbi) {                 /* have we been called already   */

     user_cbi.des = description;         /* assign a pointer to array     */
     user_cbi.ref = reference;    
      
      if ( pm->cbifile == NULL && pm->verbose==TRUE 
	  && pm->menu==TRUE && ( pcbi == fop )){ 
          /* this is false                                                 */
	  /* if personal fopfile is on commandline or                      */
          /* in non-interactive mode or -silent option                     */
          /* or fop values are not the default values                      */

      printf("\nDo you wish to input a personal choice of CBI"
         " values (y/n) [n] ");

      gets(pm->junk);

      if ('Y' == (char) toupper( (int) pm->junk[0])) {

     printf("\nInput file must contain 64 CBI values\n"
        " 1= rare codon\n 2= common codon\n 3= optimal codon\n");

     if (!(pm->cbifile = open_file("file with CBI values"
                       ,"cbi.coa", "r", 0)))
        my_exit(6,"cai_out");
          }                         /* matches if Y==                     */
     }                              /* matches if pm->cbifile==NULL       */


     if ( pm->cbifile ){
       rewind (pm->cbifile);        /* fopfile can be the same as cbifile */
       strcpy(user_cbi.des,"User supplied choice");
       strcpy(user_cbi.ref,"No reference");    
       x = 0;
       user_cbi.fop_cod[x++] = 0;

       while ((c = fgetc(pm->cbifile)) != EOF && x <=66) {
       sprintf (str,"%c",c);	
	 if (isdigit(c) && atoi(str) >= 0 
	     && atoi(str) <= 3) {
           user_cbi.fop_cod[x++] = (char) atoi(str);
	   
	 }                          /*                             isdigit */
       }                            /*                        end of while */

     if (x != 65) {                /*              wrong number of codons */
        sprintf(pm->messages, "\nError in CBI file %i digits found,  "
            "expected 64 EXITING\n", x - 1);
        my_exit(99,pm->messages);
     }                        
       pcbi = (&user_cbi);
    }                              /*             matches if(pm->cbifile)  */

    
     printf ("Using %s (%s) \noptimal codons to calculate "
	        		      "CBI\n",pcbi->des,pcbi->ref);


				   /* initilise has_opt_info             */			      
     for (x = 1; x < 22; x++) has_opt_info[x]=0;
     
     for (x = 1; x < 65; x++)     {
        if (pcu->ca[x] == 11 || *(ds + x) == 1) 
		continue;			      			      
        if (pcbi->fop_cod[x] == 3 ) 
		has_opt_info[pcu->ca[x]]++;        
     }  



     first_call_cbi = FALSE;       /*      this won't be called again      */
   }                               /*          matches if (first_call_cbi) */


   for (x = 1; x < 65; x++) {
      if (! has_opt_info[pcu->ca[x]])      continue;
      switch ((int) pcbi->fop_cod[x]) {
      case 3:
        opt     += nncod[x];
        tot_cod += nncod[x];
        exp_cod += (float) nnaa[pcu->ca[x]]/ (float) da[pcu->ca[x]]; 
      break;
      case 2:
      case 1:
        tot_cod += *(nncod + x);
        break;
      default:
         sprintf(pm->messages, " Serious error in CBI information found"
          " an illegal CBI value of %f for codon %i"
          " permissible values are \n 1 for non-optimal"
          " codons\n 2 for common codons\n"
          " 3 for optimal codons\n" " EXITING ",
          pcbi->fop_cod[x], x);
	 
          my_exit(99,pm->messages);
          break;
      }                             /*                   end of switch     */
   }                                /*                   for (    )        */                     

   if( tot_cod - exp_cod)
     fcbi= (opt - exp_cod) / (tot_cod - exp_cod);     
   else  
     fcbi= (float) 0.0; 
    
   fprintf(foutput, "%5.3f%c", fcbi,sp);                /* CBI     QED     */

   return 1;
}

/****************** Frequency of OPtimal codons output  ********************/
/* Frequency of Optimal codons (Fop) (Ikemura 1981). This index, is ratio  */
/* of optimal codons to synonymous codons (genetic code dependent). Optimal*/
/* codons for several species are in-built and can be selected using Menu 3*/
/* By default, the optimal codons of E. coli are assumed. The user may also*/
/* enter a personal choice of optimal codons. If rare synonymous codons    */
/* have been identified, there is a choice of calculating the original Fop */
/* index or a modified index. Fop values for the original index are always */
/* between 0 (where no optimal codons are used) and 1 (where only optimal  */
/* codons are used). When calculating the modified Fop index, any negative */
/* values are adjusted to zero.                                            */
/***************************************************************************/
int fop_out(FILE * foutput, long int *nncod)
{
   long int        nonopt = 0;
   long int        std = 0;
   long int        opt = 0;
   float           ffop;
   int             c,x;
   char            nonopt_codons = FALSE;
    
   char            str[2];


   char sp=  (pm->seq_format=='H')? (char) '\t': (char) pm->seperator;

   static char     first_call = TRUE;
   static char     description[61];
   static char     reference[61];
   static char     asked_about_fop = FALSE;
   static char     factor_in_rare = FALSE;
   static char     has_opt_info[22];
   static FOP_STRUCT user_fop;

   if (first_call) {                /* have I been called previously      */
     user_fop.des = description;
     user_fop.ref = reference;
     if ( pm->fopfile == NULL && pm->verbose==TRUE 
	  && pm->menu == TRUE && (pfop == fop )) {
          /* this is false                                                 */
	  /* if personal fopfile is on commandline or                      */
          /* in non-interactive mode or -silent option                     */
          /* or fop values are not the default values                      */

         printf("\nDo you wish to input a personal choice of Fop"
	          " values (y/n) [n] ");
         gets(pm->junk);
         if ('Y' == (char) toupper( (int) pm->junk[0])) {
          printf("\nInput file must contain 64 Fop values\n"
                 " 1= rare codon\n 2= common codon\n 3= optimal codon\n");

          if (!(pm->fopfile = open_file("file with Fop values"
                       ,"fop.coa", "r", 0))) my_exit(6,"fop_out");

         }                           /*                         if 'Y' == */
      }                              /* if (pm->fopfile == NULL........ ) */
  
 
    if ( pm->fopfile ) {
      rewind (pm->fopfile);          /*    possible for fopfile = cbifile */
      strcpy(user_fop.des,"User supplied choice");
      strcpy(user_fop.ref,"No reference");
      x = 0;
      user_fop.fop_cod[x++] = 0;
      
      while ((c = fgetc(pm->fopfile)) != EOF && x <=66) {
        sprintf (str,"%c",c);
      
        if (isdigit(c) && atoi(str) >= 0 
            && atoi(str) <= 3) {
	        user_fop.fop_cod[x++] = (char) atoi(str);	
        }                           /*                       test isdigit */
     }                              /*                       end of while */

     if (x != 65) {                 /*             wrong number of codons */
        sprintf(pm->messages, "\nError in Fop file %i values found,  "
            "expected 64 EXITING\n", x - 1);
        my_exit(99,pm->messages);
     }
     pfop = &user_fop;              /*  assigns pointer to user fop values*/
    }
     

     printf ("Using %s (%s)\noptimal codons to calculate "
	        		      "Fop\n",pfop->des,pfop->ref);
	
	
				   /* initilise has_opt_info             */			      
     for (x = 1; x < 22; x++) has_opt_info[x]=0;
        
     for (x = 1; x < 65; x++)     {
        if (pcu->ca[x] == 11 || *(ds + x) == 1) 
		continue;			      			      
        if (pfop->fop_cod[x] == 3 ) 
		has_opt_info[pcu->ca[x]]++;
	
	if (pfop->fop_cod[x] == 1 ){
	   if (!asked_about_fop && pm->verbose) {
             printf("\nIn the set of optimal codons you have selected,\n"
        	  "non-optimal codons have been identified\nThey can be "
        	  "used in the calculation of a modified Fop, "
        	  "(Fop=(opt-rare)/total)\n else the original formulae "
        	  "will be used (Fop=opt/total)\n\n\t\tDo you wish "
        	  "calculate a modified fop (y/n) [n] ");
	     gets(pm->junk);
	     if ( 'Y' == (char) toupper( (int)pm->junk[0]))
	       factor_in_rare = TRUE;
	     asked_about_fop = TRUE;
           }
	   
	   if ( factor_in_rare == TRUE )
	            has_opt_info[pcu->ca[x]]++;
        }  
    }                                 /*    matches for (x=1           */
   first_call = FALSE;
   }                                  /*    matches if ( !first_call ) */
   
   
   
   for (x = 1; x < 65; x++) {
      if (!has_opt_info[pcu->ca[x]] ) 
       continue;
      
      switch ((int) pfop->fop_cod[x]) {
      case 3:
     opt += *(nncod + x);
     break;
      case 2:
     std += *(nncod + x);
     break;
      case 1:
     nonopt_codons = TRUE;
     nonopt += *(nncod + x);
     break;
      default:                      
     sprintf(pm->messages, " Serious error in fop information found"
         " an illegal fop value of %f for codon %l"
         " permissible values are \n 1 for non-optimal"
         " codons\n 2 for common codons\n"
         " 3 for optimal codons\n" " EXITING ",
         pfop->fop_cod[x], x);
	 printf ("opt %l, std %l, nonopt %l\n",opt,std,nonopt); 
     my_exit(99,pm->messages);
     break;
      }
   }
                                    /* only ask this once  ...            */


   if (factor_in_rare && (opt + nonopt + std) )
      ffop = (float) (opt - nonopt) / (float) (opt + nonopt + std);
   else if ((opt + nonopt + std))
      ffop = (float) opt / (float) (opt + nonopt + std);
   else   
      ffop=0.0;


   fprintf(foutput, "%5.3f%c", ffop,sp);

   return 1;
}

/***************  Effective Number of Codons output   *********************/
/* The effective number of codons (NC) (Wright 1990). This index is a     */
/* simple measure of overall codon bias and is analogous to the effective */
/* number of alleles measure used in population genetics. Knowledge of the*/
/* optimal codons or a reference set of highly expressed genes is not     */
/* needed when calculating this index. Initially the homozygosity for each*/
/* amino acid is estimated from the squared codon frequencies.            */
/**************************************************************************/
float enc_out(FILE * foutput, long int *nncod, long int *nnaa) {
   int             numaa[9];
   int             fold[9];
   int             error_t = FALSE;
   int             i,z,x;
   double          totb[9];
   double          averb = 0, bb = 0, k2 = 0, s2 = 0;
   float           enc_tot = 0.0F;
   char sp=  (pm->seq_format=='H')? (char) '\t': (char) pm->seperator;

/* don't assume that 6 is the largest possible amino acid family assume 9*/
   for (i = 0; i < 9; i++) {            
      fold[i] = 0;              /* initialise arrays to zero             */
      totb[i] = 0.0;
      numaa[i] = 0;
   }

   for (i = 1; i < 22; i++) {   /* for each amino acid                  */
      if (i == 11)
     continue;                  /* but not for stop codons              */

      if (*(nnaa + i) <= 1)     /* if this aa occurs once then skip     */
     bb = 0;
      else {
     for (x = 1, s2 = 0; x < 65; x++) { 
         /* Try all codons but we are only looking for those that encode*/
         /* amino amid i, saves having to hard wire in any assumptions  */
        if (pcu->ca[x] != i) continue;           /* skip is not i       */


        if (*(nncod + x) == 0)  /* if codons not used then              */
           k2 = 0.0;            /* k2 = 0                               */
        else
           k2 = pow(((double) *(nncod + x) / (double) *(nnaa + i)),
            (double) 2);

        s2 += k2;               /* sum of all k2's for aa i             */
     }
     bb = (((double) *(nnaa + i) * s2) - 1.0) /  /* homozygosity        */
        (double) (*(nnaa + i) - 1.0);
      }

      if (bb > 0.0000001) {
     totb[*(da + i)] += bb;         /* sum of all bb's for amino acids  */
                                    /* which have z alternative codons  */
     numaa[*(da + i)]++;            /* where z = *(da+i)                */
      }
                                    /* numaa is no of aa that were z    */
      fold[*(da + i)]++;            /* fold z=4 can have 9 in univ code */
   }                                /* but some aa may be absent from   */
                                    /* gene therefore numaa[z] may be 0 */
   enc_tot = (float) fold[1];

   for (z = 2, averb = 0, error_t = FALSE; z <= 8; z++) {   
                                   /* look at all values of z if there  */
      if (fold[z]) {               /* are amino acids that are z fold   */
     if (numaa[z] && totb[z] > 0)
        averb = totb[z] / numaa[z];
     else if (z==3 && numaa[2] && numaa[4] && fold[z]==1 )   
                                   /* special case                      */
        averb = (totb[2] / numaa[2] + totb[4] / numaa[4]) * 0.5;
     else {                        /* write error to stderr             */
        codon_error( z, numaa[z], title, 3 );  
        error_t = TRUE;            /* error catch for strange genes     */
        break;
        }
     enc_tot += (float) fold[z] / (float) averb;    
                                   /* the calculation                   */
      }
   }

   if (error_t)
      fprintf(foutput, "*****%c",sp);
   else if (enc_tot <= 61)
      fprintf(foutput, "%5.2f%c", enc_tot,sp);
   else
      fprintf(foutput, "61.00%c",sp);

   return enc_tot;
}

/*******************   G+C output          *******************************/
/* This function is a real work horse, initially it counts base composit */
/* ion in all frames, length of gene, num synonymous codons, number of   */
/* non synonymous codons. Then dependent on the value for which used in  */
/* switch statement. We return various analyses of this data             */
/* if which ==1 then the output is very detailed, base by base etc.      */
/* if which ==2 then the output is for GC content only                   */
/* if which ==3 then the output is for GC3s (GC at synonymous 3rd posit) */
/* if which ==4 then the output is for L_sym                             */
/* if which ==5 then the output is for L_aa                              */
/* The output from this subroutine is in a tabular format if human read- */
/* able output is selected, and in columns if machine readable. Also the */
/* number of values reported changes as it is assumed the user has access*/
/* to a spreadsheet type programme if they are requesting tabular output */
/*************************************************************************/
void gc_out(FILE * foutput, FILE * fblkout, int which){

   long int        id;
   long int        bases[5];        /* base that are synonymous GCAT     */
   long int        base_tot[5];
   long int        base_1[5];
   long int        base_2[5];
   long int        base_3[5];
   long int        tot_s = 0;
   long int        totalaa = 0;
   static char     header = FALSE;
   int x,y,z;
   char sp=  (pm->seq_format=='H')? 
       (char) '\t': 
       (char) pm->seperator;

   typedef double lf;

   for (x = 0; x < 5; x++) {
      bases[x] = 0;                 /* initialise array values to zero    */
      base_tot[x] = 0;
      base_1[x] = 0;
      base_2[x] = 0;
      base_3[x] = 0;
   }

   for (x = 1; x < 5; x++)
      for (y = 1; y < 5; y++)
     for (z = 1; z < 5; z++) {      /* look at all 64 codons              */
        id = (x - 1) * 16 + y + (z - 1) * 4;

        if (pcu->ca[id] == 11)
           continue;                /* skip if a stop codon               */
        base_tot[x] += ncod[id];    /* we have a codon xyz therefore the  */
        base_1[x] += ncod[id];      /* frequency of each position for base*/
        base_tot[y] += ncod[id];    /* x,y,z are equal to the number of   */
        base_2[y] += ncod[id];      /* xyz codons .... easy               */
        base_tot[z] += ncod[id];    /* will be fooled a little if there   */
        base_3[z] += ncod[id];      /* non translatable codons, but these */
                                    /* are ignored when the avg is calc   */
        totalaa += ncod[id];

        if (*(ds + id) == 1)
           continue;                /* if not synon  skip codon           */

        bases[z] += ncod[id];       /* count no of codons ending in Z     */
                     
        tot_s += ncod[id];          /* count tot no of silent codons      */
                      
     }


   if (!tot_s || !totalaa) {
      fprintf(pm->my_err, "Warning %.20s appear to be too short\n", title);
      fprintf(pm->my_err, "No output was written to file   \n");
      return;
   }
   switch ((int) which) {
   case 1:                          /* exhaustive output for analysis     */
      if (pm->seq_format == 'M') {  /* machine readable format            */
     if (!header) {                 /* print a first line                 */
        fprintf(fblkout,
         "Gene_description%cLen_aa%cLen_sym%cGC%cGC3s%cGCn3s%cGC1%cGC2"
         "%cGC3%cT1%cT2%cT3%cC1%cC2%cC3%cA1%cA2%cA3%cG1%cG2%cG3\n"
		,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp);
        header = TRUE;
     }
                                    /* now print the information          */
     fprintf(fblkout, "%-.20s%c", title,sp); 
     fprintf(fblkout, 
	     "%ld%c%ld%c%5.3f%c%5.3f%c%5.3f%c%5.3f%c%5.3f%c%5.3f%c"
         "%5.3f%c%5.3f%c%5.3f%c%5.3f%c%5.3f%c%5.3f%c%5.3f%c"
         "%5.3f%c%5.3f%c%5.3f%c%5.3f%c%5.3f\n",
	     totalaa,sp,
	     tot_s,sp,
	     (lf) (base_tot[2] + base_tot[4]) / (lf) (totalaa * 3),sp,
	     (lf) (bases[2] + bases[4]) / (lf) tot_s,sp,
	     (lf) (base_tot[2] + base_tot[4] - bases[2] - bases[4])
	     / (lf) (totalaa * 3 - tot_s),sp,
	     (lf) (base_1[2] + base_1[4]) / (lf) (totalaa),sp,
	     (lf) (base_2[2] + base_2[4]) / (lf) (totalaa),sp,
	     (lf) (base_3[2] + base_3[4]) / (lf) (totalaa),sp,
	     (lf) base_1[1] / (lf) totalaa,sp, 
	     (lf) base_2[1] / (lf) totalaa,sp, 
	     (lf) base_3[1] / (lf) totalaa,sp,
	     (lf) base_1[2] / (lf) totalaa,sp, 
	     (lf) base_2[2] / (lf) totalaa,sp, 
	     (lf) base_3[2] / (lf) totalaa,sp,
	     (lf) base_1[3] / (lf) totalaa,sp, 
	     (lf) base_2[3] / (lf) totalaa,sp, 
	     (lf) base_3[3] / (lf) totalaa,sp,
	     (lf) base_1[4] / (lf) totalaa,sp, 
	     (lf) base_2[4] / (lf) totalaa,sp, 
	     (lf) base_3[4] / (lf) totalaa);
      } else {                      /* must be human formatted output then*/
     fprintf(fblkout,               /* tabulated output                   */ 
         "Gene Name: %-69.69s\nLength   : %-ld aa"
         " \tNon_synonymous/synonymous codons (%3ld/%5ld)\n"
         " GC=%5.3f\tGC3s=%5.3f\tGC_not_GC3s=%5.3f\n"
         "base\t1\t2\t3\ttotal\t\t1\t2\t3 \ttotal\n"
         "  T\t%5.3f\t%5.3f\t%5.3f\t%5.3f\t"
         "W\t%5.3f\t%5.3f\t%5.3f\t%5.3f\n"
         "  C\t%5.3f\t%5.3f\t%5.3f\t%5.3f\t"
         "S\t%5.3f\t%5.3f\t%5.3f\t%5.3f\n"
         "  A\t%5.3f\t%5.3f\t%5.3f\t%5.3f\t"
         "R\t%5.3f\t%5.3f\t%5.3f\t%5.3f\n"
         "  G\t%5.3f\t%5.3f\t%5.3f\t%5.3f\t"
         "Y\t%5.3f\t%5.3f\t%5.3f\t%5.3f\n\n",
         title,
         totalaa,
         totalaa - tot_s,
         tot_s,
         (lf) (base_tot[2] + base_tot[4]) / (lf) (totalaa * 3),
         (lf) (bases[2] + bases[4]) / (lf) tot_s,
         (lf) (base_tot[2] + base_tot[4] - bases[2] - bases[4])
         / (lf) (totalaa * 3 - tot_s),
         (lf) base_1[1] / (lf) totalaa, (lf) base_2[1] / (lf) totalaa, 
         (lf) base_3[1] / (lf) totalaa,
         (lf) base_tot[1] / (lf) (totalaa * 3),
         (lf) (base_1[1] + base_1[3]) / (lf) totalaa,
         (lf) (base_2[1] + base_2[3]) / (lf) totalaa,
         (lf) (base_3[1] + base_3[3]) / (lf) totalaa,
         (lf) (base_tot[1] + base_tot[3]) / (lf) (totalaa * 3),
         (lf) base_1[2] / (lf) totalaa, (lf) base_2[2] / (lf) totalaa, 
         (lf) base_3[2] / (lf) totalaa,
         (lf) base_tot[2] / (lf) (totalaa * 3),
         (lf) (base_1[2] + base_1[4]) / (lf) totalaa,
         (lf) (base_2[2] + base_2[4]) / (lf) totalaa,
         (lf) (base_3[2] + base_3[4]) / (lf) totalaa,
         (lf) (base_tot[2] + base_tot[4]) / (lf) (totalaa * 3),
         (lf) base_1[3] / (lf) totalaa, (lf) base_2[3] / (lf) totalaa, 
         (lf) base_3[3] / (lf) totalaa,
         (lf) base_tot[3] / (lf) (totalaa * 3),
         (lf) (base_1[3] + base_1[4]) / (lf) totalaa,
         (lf) (base_2[3] + base_2[4]) / (lf) totalaa,
         (lf) (base_3[3] + base_3[4]) / (lf) totalaa,
         (lf) (base_tot[3] + base_tot[4]) / (lf) (totalaa * 3),
         (lf) base_1[4] / (lf) totalaa, (lf) base_2[4] / (lf) totalaa, 
         (lf) base_3[4] / (lf) totalaa,
         (lf) base_tot[4] / (lf) (totalaa * 3),
         (lf) (base_1[1] + base_1[2]) / (lf) totalaa,
         (lf) (base_2[1] + base_2[2]) / (lf) totalaa,
         (lf) (base_3[1] + base_3[2]) / (lf) totalaa,
         (lf) (base_tot[1] + base_tot[2]) / (lf) (totalaa * 3));
                                    /* What hit me, did anyone see a bus  */
      }
      break;
   case 2:                          /* a bit more simple ... GC content   */
      fprintf(foutput, "%5.3f%c", (lf) ((base_tot[2] + base_tot[4]) / (lf) 
          (totalaa * 3)),sp);
      break;
   case 3:                          /* GC3s                               */
      fprintf(foutput, "%5.3f%c", (lf) (bases[2] + bases[4]) / 
          (lf) tot_s,sp);
      break;
   case 4:                          /* Number of synonymous codons        */
      fprintf(foutput, "%3li%c", tot_s,sp);
      break;
   case 5:                          /* Total length in translatable AA    */
      fprintf(foutput, "%3li%c", totalaa,sp);
      break;

#ifdef DEBUG
   default:
      fprintf(stderr, " Programming error in GC_out which (%i) is out of "
          "valid range\n"
          ,(int) which);
      my_exit(99, "gc out");
      break;
#endif
   }
   return;
}

/********************     DOT    ******************************************/
/*   Indicates the progress of a search                                   */
/**************************************************************************/

void dot(int y, long int period)
{
   static long int xx;
   static char     dott=0;

   if (!y) dott = 0;                /* re-zero the width counter          */

   if (++xx % period == 0){         /* every period calls print a .       */
             fprintf(stderr,".");
             dott++;
            }            
   if ( dott == 50) {               /* every 50 dots wrap the line        */
             fprintf(stderr,"\n");
             dott=0;
            }
   return;
}
/**********************  get_aa    *****************************************/
/* get_aa converts a numeric codon value (range 0-64 ) into a amino acid   */
/* and returns that amino acid number                                      */
/* pcu->ca converts the codon number into amino acid number                */
/* paa->aa1 converts  amino acid code into letters                         */
/***************************************************************************/

char *get_aa(int which, char *codon)
{
   char           *amino=NULL;

   if (strlen(codon) == 3) {
      if (which == 1)
     amino = paa->aa1[pcu->ca[ident_codon(codon)]];
      else
     amino = paa->aa3[pcu->ca[ident_codon(codon)]];
   } else {
      amino = amino;
      amino = paa->aa1[0];
   }
   return amino;
}
/**********************   cutab_out     ***********************************/
/* Generates a formatted table of codon, RSCU and amino acid usage        */
/* ds points to an array[64] of synonymous values                         */
/* it reveals how many synonyms there are for each aa                     */
/**************************************************************************/
int cutab_out(FILE * fblkout, long *nncod, long *nnaa)
{
   int             last_row[4];
   int             x;
   char            sp;

   if (pm->seq_format=='M')
      sp = pm->seperator;
   else
      sp = '\t';
                         
   for (x = 0; x < 4; x++)
      last_row[x] = 0;

   codon_tot = codon_error(1, 1, "", (char) 4); /*  dummy*/

   for (x = 1; x < 65; x++) {
     if (last_row[x % 4] != pcu->ca[x]){
       (pm->seq_format=='M')?
	 fprintf(fblkout, "%s%c%s%c", paa->aa3[pcu->ca[x]], sp, paa->cod[x], sp):
	 fprintf(fblkout, "%s %s"  , paa->aa3[pcu->ca[x]], paa->cod[x]);
     }
     else{
       (pm->seq_format=='M')?
	 fprintf(fblkout, "%c%s%c", sp, paa->cod[x], sp):
	 fprintf(fblkout, "    %s",   paa->cod[x]);
     }
/* Sample of output *******************************************************/      
/*Phe UUU    0 0.00 Ser UCU    1 0.24 Tyr UAU    1 0.11 Cys UGU    1 0.67 */ 
/*    UUC   22 2.00     UCC   10 2.40     UAC   17 1.89     UGC    2 1.33 */ 
/*Leu UUA    0 0.00     UCA    1 0.24 TER UAA    0 0.00 TER UGA    1 3.00 */ 
/*    UUG    1 0.12     UCG    6 1.44     UAG    0 0.00 Trp UGG    4 1.00 */
/**************************************************************************/
   (pm->seq_format=='M')?
      fprintf(fblkout, "%i%c%.2f%c",
          (int) nncod[x],
          sp, (nncod[x]) ?
          ((float) nncod[x] / (float) nnaa[pcu->ca[x]])
          * (float) (*(ds + x)):0,sp):           /* end of fprintf        */
      fprintf(fblkout, "%5i%5.2f ",
          (int) nncod[x],
          (nncod[x]) ?
          ((float) nncod[x] / (float) nnaa[pcu->ca[x]])
          * (float) (*(ds + x)):0);              /* end of fprintf        */

      last_row[x % 4] = pcu->ca[x];

      if (!(x % 4))
     fprintf(fblkout, "\n");
      if (!(x % 16))
     fprintf(fblkout, "\n");
   }
   fprintf(fblkout, "%li codons in %16.16s (used %22.22s)\n\n", 
       (long int) codon_tot, title, pcu->des);
   return 1;
}
/********************  Dinuc_count    *************************************/
/* Count the frequency of all 16 dinucleotides in all three possible      */
/* reading frames. This one of the few functions that does not use the    */
/* codon and amino acid usage arrays ncod and naa to measure the parameter*/
/* rather they use the raw sequence data                                  */
/**************************************************************************/
int dinuc_count(char *seq, long int ttot)
{
   static char     a = 0;
   int i;
   
   for (i = 0; i < ttot; i++) {
      last_base = a;
      switch (seq[i]) {
      case 't':
      case 'T':
      case 'u':
      case 'U':
     a = 1;
     break;
      case 'c':
      case 'C':
     a = 2;
     break;
      case 'a':
      case 'A':
     a = 3;
     break;
      case 'g':
      case 'G':
     a = 4;
     break;
      default:
     a = 0;
     break;
      }
      if (!a || !last_base)
     continue;                      /* true if either of the base is not  */
                                    /* a standard UTCG, or the current bas*/
                                    /* is the start of the sequence       */
      din[fram][((last_base - 1) * 4 + a) - 1]++;
      if (++fram == 3) fram = 0;    /* resets the frame to zero           */
   }
   return 1;
}
/***************** Dinuc_out           ************************************/
/* Outputs the frequency of dinucleotides, either in fout rows per seq    */
/* if the output is meant to be in a human readable form, each row repre- */
/* senting a reading frame. The fourth row is the total of the all the    */
/* reading frames. Machine readable format writes all the data into a     */
/* single row                                                             */
/**************************************************************************/
int dinuc_out(FILE * fblkout, char *ttitle)
{
   static char     called = FALSE;
   char            bases[5] = {'T', 'C', 'A', 'G'};
   char            sp = pm->seperator;
   long            dinuc_tot[4];
   int i,x,y;


   for ( x=0 ; x<4 ; x ++)  dinuc_tot[x]=0;
 

   for ( x=0 ; x<3 ; x++ )
       for ( i=0 ; i<16 ; i++ ){
         dinuc_tot[x]+=din[x][i];   /* count dinuc usage in each frame   */
         dinuc_tot[3]+=din[x][i];   /* and total dinuc usage,            */
       }

   if (pm->seq_format=='H' ) sp = ' ';

   if (!called) {                   /* write out the first row as a header*/
      called = TRUE;

      if  (pm->seq_format=='H' ) {
	   fprintf(fblkout,"%-13.13s%cframe%c","title", sp,sp);
	   for (x = 0; x < 4; x++)
	    for (i = 0; i < 4; i++) 
	     fprintf(fblkout,"%c%c%4.4c",bases[x],bases[i],sp);        
      }else{
	   fprintf(fblkout, "%s","title");
        for (y = 0; y < 4; y ++){
	     fprintf(fblkout, "%c%s",sp,"frame");
	     for (x = 0; x < 4; x++) 
	      for (i = 0; i < 4; i++) 
          fprintf(fblkout,"%c%c%c",sp, bases[x],bases[i]);          
          }
          }
      fprintf(fblkout, "\n");
       }                            /* matches if (!called)               */ 

/*Sample output   truncated  **********************************************/
/*title         frame TT    TC    TA    TG    CT    CC    CA    CG    AT  */
/*MLSPCOPER.PE1__ 1:2 0.024 0.041 0.016 0.008 0.049 0.041 0.033 0.098 ... */
/*MLSPCOPER.PE1__ 2:3 0.000 0.195 0.000 0.098 0.000 0.138 0.008 0.073 ... */
/*MLSPCOPER.PE1__ 3:1 0.008 0.016 0.000 0.033 0.033 0.107 0.172 0.262 ... */
/*MLSPCOPER.PE1__ all 0.011 0.084 0.005 0.046 0.027 0.095 0.071 0.144 ... */
/*MLSPCOPER.PE2__ 1:2 0.026 0.026 0.009 0.009 0.053 0.035 0.053 0.061 ... */  
/**************************************************************************/
   for (x = 0; x < 4; x++) {
   if ( pm->seq_format == 'H' || x == 0 )   
     fprintf(fblkout,  (pm->seq_format=='H') ?
	     "%-15.15s%c":"%-.15s%c", ttitle, sp);

     switch (x) {
     case 0:
         fprintf(fblkout, "1:2%c", sp);
       break;
     case 1:
         fprintf(fblkout, "2:3%c", sp);
       break;
     case 2:
         fprintf(fblkout, "3:1%c", sp);
       break;
     case 3:
         fprintf(fblkout, "all%c", sp);
       break;
     }

     if ( x == 3 ){ 
       for (i = 0; i < 16; i++)
          if ( dinuc_tot[x] )
	        fprintf(fblkout,"%5.3f%c",
              (float)(din[0][i]+din[1][i]+din[2][i])/
              (float)dinuc_tot[x], sp);     
          else
            fprintf(fblkout,"%5.3f%c",0.00, sp);              
     }
     else{
       for (i = 0; i < 16; i++)
         if ( dinuc_tot[x] )	
           fprintf(fblkout, "%5.3f%c", 
             (float) din[x][i]/(float)dinuc_tot[x], sp);
           else
               fprintf(fblkout,"%5.3f%c", 0.00, sp);              
     }

     if ( pm->seq_format == 'H' || x == 3) 
       fprintf(fblkout, "\n");
   }
   return 1;
}
/************* Coa_raw_out            *************************************/
/* Write out codon usage in a format compatible with the format required  */
/* by text2bin, i.e. part of the COA analysis suite of subroutines        */
/* rather than storing this data in memory, we first write raw codon usage*/
/* to disk, and then read it in as necessary, the file handle for this    */
/* data is passed via the fcoaout pointer. By default it writes to the    */
/* files coa_raw and coa1_raw                                             */
/**************************************************************************/
char coa_raw_out(FILE * fcoaout, long *nncod, long *nnaa, char *ttitle)
{

   static int      count = 0;
   int i;
      
   for (i = 0; i < (int) strlen(ttitle); i++)  /* don't take any chances  */
      if (isspace( (int) *(ttitle + i)))    *(ttitle + i) = '_';

   strncpy(pm->junk, ttitle, 20);              /* sequence name           */
   fprintf(fcoaout, "%i_%s ", ++count, pm->junk);

   switch (pm->coa) {
   case 'c':
   case 'r':                                  /* if rscu or codon usage   */ 
      for (i = 1; i < 65; i++)
     fprintf(fcoaout, "%i\t", (int) nncod[i]);
      fprintf(fcoaout, "\n");
      break;
   case 'a':                                  /* if amino acid usage      */
      for (i = 1; i < 22; i++)
     fprintf(fcoaout, "%i\t", (int) nnaa[i]);
      fprintf(fcoaout, "\n");
      break;
#ifdef DEBUG                                  /* Debugging code           */
   default:
      fprintf(pm->my_err, " Error in coa_out_raw\n");
#endif
   }
   return 1;
}
/**********  sorted_by_axis1    *******************************************/
/* COA specific routine, after the position of the genes on the first axis*/
/* has been computed the genes are sorted according to there ordination   */
/* this allows us to identify gene positioned at either end of the first  */
/* trend. Then the codon usage of these genes is used to determine the CU */
/* of these two groups. This information is used to identify optimal codon*/
/* calculated putative CAI adaptive values and for the Chi squared con-   */
/* tingency test, used to identify the optimal and non-optimal codons     */
/* The position of each gene on axis 1 is passed via the ax1 pointer      */
/* The integer rank of each sequence is stored in sortax1                 */
/* The number of genes is passed by the integer value lig                 */
/**************************************************************************/
void sorted_by_axis1(double *ax1, int *sortax1, int lig)
{
   double          min;
   int             nmin, *tagged;
   int             i,j;
   
   /* allocated an array such that we can record which genes have been    */
   /* processed already, and are in sortax1                               */
   if ((tagged = (int *) calloc(lig + 1, sizeof(int))) == NULL)
      my_exit(3, "sorted by axis 1");

   /* blank the array, shouldn't have to do this for ANSI C compilers     */
   for (i = 1; i <= lig; i++)
      tagged[i] = FALSE;

   /* for each gene                                                       */
   for (j = 1; j <= lig; j++) {
      i = 0;
      while (tagged[++i]);          /* find the first gene not in sortax1 */
      min = ax1[i];                 /* assign it value to min             */  
      nmin = i;                     /* assign it ordination to nmin       */

      for (i = 1; i <= lig; i++) {  /* for each gene                      */
       if (tagged[i]) continue;     /* gene is already in sortax1 .. next */
       if (ax1[i] < min) {          /* find the min value among the rest  */
        min = ax1[i];               /* assign it value to min             */ 
        nmin = i;                   /* assign it ordination to nmin       */
       }
      }
      sortax1[j] = nmin;            /* gene with lowest ax1 position is   */
      tagged[nmin] = TRUE;          /* assigned to sorax1 and tagged      */
   }
   free(tagged);
}
/***********  gen_cusort_fop                 ******************************/
/* COA specific routine, takes the sorted array of axis 1 positions from  */
/* sort_by_axis1 and passed via the sortax1 pointer. The array contains   */
/* the genes in order of occurrence in the original input file, but the   */
/* ranked order of each gene is recorded as the array value               */
/* This allows us to identify genes position at either end of the main    */
/* trend. Then the codon usage of these genes is used to write out a file */
/* with the genes in a axis1 position order                               */
/* the codon usage of the two groups at either end of the principle axis  */
/* are also counted. This information is then passed to highlow()         */
/* The position of each gene on axis 1 is passed via the ax1 pointer      */
/* The integer rank of each sequence is stored in sortax1                 */
/* The number of genes is passed by the interger value lig                */
/**************************************************************************/
void gen_cusort_fop(int *sortax1, int lig, FILE * fnam, FILE *ssummary)
{
   int             stops;
   long int       *low, *high;
   int             min, max, i ;
   float           v2;
   FILE           *fcusort = NULL;
   int            j;

   
   /* first open the original raw codon usage file                        */
   if ((fcusort = open_file("", "cusort.coa", "w", FALSE)) == NULL)
      my_exit(1, "gen_cusort_fop");                       

   /* calloc enough memory for the codon usage of the low group of genes  */
   if ((low = (long int *) calloc(65, sizeof(long int))) == NULL)
      my_exit(3, "low gen_cusort_fop");
   /* calloc enought memory for the codon usage of the high group of genes*/
   if ((high = (long int *) calloc(65, sizeof(long int))) == NULL)
      my_exit(3, "high gen_cusort_fop");

   /*pcoa->fop_gene is set in the advanced correspondence menu and is used*/
   /*to set the No of genes at either end of the principle axis that are  */
   /*to be used to create the low and high codon bias subsets of genes    */
   if (pcoa->fop_gene < 0) {        /* the number represent a percentage  */
      min = (int) ((float) lig * ((float) pcoa->fop_gene * -0.01));
      max = lig - (int) ((float) lig * ((float) pcoa->fop_gene * -0.01));
   } else {                        /*  the value is an absolute number    */
      min = pcoa->fop_gene;
      max = lig - pcoa->fop_gene;
   }

   if (min <= 0) {                 /* error catch in case % is too low    */
      min = 1;                     /* or fop_gene is set too high         */
      fprintf(pm->my_err, "Problems with the number genes used for"
          " fop adjusting to 1 gene\n");
   }
   if (max <= 0) {                 /* ditto                               */
      max = 1;
      fprintf(pm->my_err, "Problems with the number genes used for"
          " fop adjusting to one gene\n");
   }
   for (j = 1; j < 65; j++) {      /* initialise the blank array          */
      low[j] = 0;
      high[j] = 0;
   }

   /* write explanation about what we are doing to summary.coa            */ 
   fprintf(ssummary, "\ncusort.coa (not shown here) contains CU of "
       "genes sorted by their\n"
       "ordination on the principle axis or factor\n"
       "Genes used to calculate fop were 1 to %i and %i to %i\n"
       "these gene numbers REFER ONLY to the file cusort.coa\n"
       ,min, max + 1, pcoa->rows);

   for (i = 1; i <= lig; i++) {     /* foreach gene                       */
      rewind(fnam);                 /* go to start of codon_raw           */
      clean_up(ncod, naa);          /* blank the codon usage array        */    
      j = 1;
      while (j++ != sortax1[i])     /* find the rank of gene i            */ 
       fgets(pm->junk, BUFSIZ,fnam);/* by scanning for lines of CU in     */ 
      fscanf(fnam, "%s", pm->junk); /* now we know the name of seq i      */

      for (j = 1; j < 64; j++) {    /* now read in the cu of each codon   */
       fscanf(fnam, "%f", &v2);     /* assign it initially to v2          */ 
       ncod[j] = (long int) v2;     /* then place this value in ncod      */
     if (min >= i)                  /* remember the codon usage of the    */ 
        low[j] += (long int) v2;    /* two groups of genes at either end  */
     if (max < i)                   /* of the axis, containing min and    */
        high[j] += (long int) v2;   /* max genes                          */
      }

      fscanf(fnam, "%f\n", &v2);    /* now read the last codon in         */
      ncod[64] = (long int) v2;
      if (min >= i)
       low[64] += (long int) v2;
      if (max < i)
        high[64] += (long int) v2;  /* as above                           */

      /* we want to use codon_us_out to write out the sorted list of CU   */
      /* to cusort.coa. But if we have any internal stops etc, it will    */
      /* generate error messages, but we have already seen this messages  */
      /* on the first pass, so we fool it by saying all the stops are     */
      /* valid stops and not to complain again                            */
      for (j = 1, stops = 0; j < 65; j++)   
                 if (pcu->ca[j] == 11)
                        stops += (int) ncod[j];
      dot( 1 , 10 );  
      codon_usage_out(fcusort, ncod, 11, stops, pm->junk);
   }
   fileclose(&fcusort);              
   highlow(low, high, ssummary);        /* now we call highlow           */
                                        /* to use the sorted cu output   */
   free(low);                           /* release the memory to the OS  */
   free(high);
}

/************ highlow          ********************************************/
/* The codon usage of the two groups on either end of the axis is assigned*/
/* to low and high ... perhaps these would be better called left and right*/
/* as when they are passed to this function it is not know which group is */
/* lowly or highly biased. This is decided within highlow, by calculating */
/* the enc (a measure of bias) for each group and assigning the group with*/
/* the lowest enc as the higher biased genes. This works if the trend     */
/* represented by axis1 is truly selection for optimal translation        */
/* IT'S THE USERS RESPONSIBILITY TO ASSERTAIN IF THIS IS VALID            */
/* This information is used to identify optimal codons, as well as        */
/* calculate  putative CAI adaptive values and for the Chi squared con-   */
/* tingency test, used to identify the optimal and non-optimal codons     */
/**************************************************************************/

void highlow(long int *low, long int *high, FILE * ssummary)
{

   int            *last_row, icode, outer,i,j,x ;

   long int       *aa_low, *aa_high, *left, *right, *left_aa, *right_aa;
   long int       *highest_x;
   long int        right_tot = 0, left_tot = 0;

   float           enc_low, enc_high;
   float           a, b, c, d, e, f, g, h, total, hr, br, *x2;
   float           w;
   char           *flag, sp;

   FILE           *fcai=NULL,*fhilo = NULL, *ffop = NULL;
   FILE           *fcbi=NULL;

   /*calloc to the pointers the required storage                          */
   if ((fhilo = open_file("", "hilo.coa", "w", FALSE)) == NULL)
      my_exit(1, "hilo.coa");
   if ((ffop = open_file("", "fop.coa", "w", FALSE)) == NULL)
      my_exit(1, "fop.coa");
   if ((aa_low = (long int *) calloc(22, sizeof(long int))) == NULL)
      my_exit(3, "aa_low");
   if ((aa_high = (long int *) calloc(22, sizeof(long int))) == NULL)
      my_exit(3, "aa_high");
   if ((highest_x = (long int *) calloc(22, sizeof(long int))) == NULL)
      my_exit(3, "last_row");      
   if ((x2 = (float *) calloc(65, sizeof(float))) == NULL)
      my_exit(3, "x2");
   if ((flag = (char *) calloc(65, sizeof(char))) == NULL)
      my_exit(3, "flag");
   if ((last_row = (int *) calloc(65, sizeof(int))) == NULL)
      my_exit(3, "last_row");
   
  
   if (pm->seq_format=='M')
      sp = pm->seperator;
   else
      sp = '\t';

   /* initialize the various arrays                                       */
   for (x = 0; x < 4; x++) last_row[x] = 0;

   for (x = 0; x < 22; x++){
      highest_x[x]=0;
      aa_low   [x]=0;
      aa_high  [x]=0;
      }
   for (x = 0; x <65 ; x++) {
      x2      [x]= (float) 0.0;
      flag    [x]=0;   
      last_row[x]=0;
      }
      
      
   /*count the amino acid usage for the two datasets, initially we only   */
   /*have the codon usage of the two groups                               */
   for (i = 1; i < 65; i++) {
      aa_low[pcu->ca[i]] += low[i];
      aa_high[pcu->ca[i]] += high[i];
      flag[i] = ' ';                /*flag is used to identify opt codons */
   }

   enc_low = enc_out(fhilo, low, aa_low);         /*calc enc for each  of */
   enc_high = enc_out(fhilo, high, aa_high);      /*datasets              */
   fprintf(fhilo, "\n");

   fprintf(ssummary, "\nenc_left %f enc_right %f\n", enc_low, enc_high);

   for (i = 1; i < 65; i++) {
      if (*(ds + i) == 1 || pcu->ca[i] == 11)     /*skip stop and nonsynon*/
     continue;

      if (enc_low < enc_high) {                  /*decide which is more   */
        left = low;                              /*biased                 */
        right = high;                            /*left and right refer   */
        left_aa = aa_low;                        /*the columns of outputed*/
        right_aa = aa_high;                      /*hilow table            */
        a = (float) low[i];
        b = (float) high[i];
        g = (float) aa_low[pcu->ca[i]];
        h = (float) aa_high[pcu->ca[i]];
      } else {
        left = high;
        right = low;
        left_aa = aa_high;
        right_aa = aa_low;
        a = (float) high[i];
        b = (float) low[i];
        g = (float) aa_high[pcu->ca[i]];
        h = (float) aa_low[pcu->ca[i]];
     }
      /* calculate the chi squared contingency value                      */
      c = g - a;
      d = h - b;
      e = a + b;
      f = c + d;
      total = a + b + c + d;
      if (e * f * h * g)
     x2[i] = ((a * d - c * b) * (a * d - c * b)) * total / (e * f * g * h);
      else
     x2[i] = (float) -99.0;                   /*if 0 assign nonsense value*/

      if (g * h) {
     hr = a / g;
     br = b / h;
     if (hr > br && x2[i] > 6.635)            /* if significant at p<.99  */
        flag[i] = '*';
     else if (hr > br && x2[i] > 3.841)       /* if significant at p<0.05 */
        flag[i] = '@';
      }
   }
   fprintf(ssummary, "Chi squared contingency test of genes from both\n"
                     "extremes of axis 1\n");
/* this created the hi-low codon usage table                              */
/* Sample output truncated (***********************************************/
/*Asp   GAU   0.10 ( 10) 1.68 ( 53)   Gly   GGU   0.21 ( 12) 0.85 ( 11)   */   
/*      GAC*  1.90 (184) 0.32 ( 10)         GGC*  3.13 (176) 2.00 ( 26)   */   
/*Glu   GAA   0.00 (  0) 1.34 ( 55)         GGA   0.05 (  3) 0.69 (  9)   */  
/*      GAG*  2.00 (255) 0.66 ( 27)         GGG   0.60 ( 34) 0.46 (  6)   */   
/*                                                                        */
/*                                                                        */
/*        Number of codons in high bias dataset 2825                      */
/*        Number of codons in low  bias dataset 1194                      */
/*Note: high bias was assigned to the dataset with the lower average Nc   */
/*NO Chi could be calculated for UGU                                      */
/*Codon UUC (Phe) chi value was 70.175                                    */
/*Codon UCC (Ser) chi value was 48.030                                    */
/*Codon UAC (Tyr) chi value was 86.069                                    */
/**************************************************************************/ 

   for (outer = 1; outer <= 3; outer += 2) {
      for (x = 1; x < 5; x++) {
      for (j = 1; j < 5; j++) {
        icode = ((x - 1) * 16) + ((j - 1) * 4) + outer; 


        for (i = icode; i <= icode + 1; i++) {   /*loop twice             */
            /* if the previous entry in this column codes for the same AA */
            if (last_row[i % 2] != pcu->ca[i]) {
	          fprintf(fhilo, "%s%c%s%c%c", paa->aa3[pcu->ca[i]],
		              sp, paa->cod[i], flag[i], sp);
	          fprintf(ssummary, "%s%c%s%c%c", paa->aa3[pcu->ca[i]],
		              sp, paa->cod[i], flag[i], sp);
	        } else {                       
	           fprintf(fhilo, "%c%s%c%c", sp, paa->cod[i], flag[i], sp);
	           fprintf(ssummary, "   %c%s%c%c",sp,paa->cod[i],flag[i],sp);
	        }
            /* write out Codon usage, RSCU and significance for both data */
	       fprintf(fhilo, "%4.2f (%3i) %4.2f (%3i)%c",
		       (left[i]) ?
		       ((float) left[i] / (float) left_aa[pcu->ca[i]])
		       * (float) (*(ds + i))
		       : 0.0,
		       (int) left[i],
		       (right[i]) ?
		       ((float) right[i] / (float) right_aa[pcu->ca[i]])
		       * (float) (*(ds + i))
		       : 0.0,
		       (int) right[i],sp);               /*       end of fprintf  */
	      fprintf(ssummary, "%4.2f (%3i) %4.2f (%3i)%c",
		       (left[i]) ?
		       ((float) left[i] / (float) left_aa[pcu->ca[i]])
		       * (float) (*(ds + i))
		       : 0.0,
		       (int) left[i],
		       (right[i]) ?
		       ((float) right[i] / (float) right_aa[pcu->ca[i]])
		       * (float) (*(ds + i))
		       : 0.0,
		       (int) right[i],sp);               /*        end of fprintf */
          last_row[i % 2] = pcu->ca[i];          /* remember the last row */
        }
        fprintf(fhilo, "\n");
        fprintf(ssummary, "\n");
       }
       fprintf(ssummary, "\n");
       fprintf(fhilo, "\n");
      }
      fprintf(ssummary, "\n");
      fprintf(fhilo, "\n");
   }

   for (i = 1; i < 65; i++) {                    /* count both datasets   */
      right_tot += right[i];
      left_tot += left[i];
   }


   fprintf(fhilo, 
       "\tNumber of codons in high bias dataset %li\n", left_tot);
   fprintf(fhilo, 
       "\tNumber of codons in low  bias dataset %li\n", right_tot);
   fprintf(fhilo, 
       "Note: high bias was assigned to the dataset with the lower"
       " average Nc\n");

   fprintf(ssummary, 
       "\tNumber of codons in high bias dataset %li\n", left_tot);
   fprintf(ssummary, 
       "\tNumber of codons in low  bias dataset %li\n", right_tot);
   fprintf(ssummary, 
       "Note high bias was assigned to the genes with the lower"
       " overall Nc\n");

   /* now printout the Chi Squared values for each significant comparison */
   for (i = 1; i < 65; i++) {
      if (flag[i] == '*' || flag[i] == '@') {
     fprintf(fhilo, "Codon %s (%s) chi value was %.3f\n", paa->cod[i],
         paa->aa3[pcu->ca[i]], x2[i]);
     fprintf(ssummary, "Codon %s (%s) chi value was %.3f\n", paa->cod[i],
         paa->aa3[pcu->ca[i]], x2[i]);
      }
      if (x2[i] == -99)       /* there were no codons in one of the groups*/
     fprintf(fhilo, "NO Chi could be calculated for %s\n", paa->cod[i]);
   }
   fprintf(fhilo, "\n");
   fprintf(ssummary, "\n");

   /* now write out the optimal codons as PUTATIVELY identified by codonW */
   fprintf(ssummary, "These are the PUTATIVE optimal codons\n"
     "This is the format required for Menu 4 option 2 (Fop) "
     "and option 3 (CBI)\n"
     "This data is also duplicated in the files \"fop.coa\" "
     "and \"cbi.coa\"\n"
     "The format of these files is that required for input "
     "as a personal choice\n"
     "of optimal codons for these indexes\n");

   for (i = 1; i < 65; i++) {
      if( left[i] > highest_x[pcu->ca[i]])    /* used for calculating CAI */
                           highest_x[pcu->ca[i]]=left[i]; 
      
      if (*(ds + i) == 1 || pcu->ca[i] == 11) {
     fprintf(ffop, "2");
     fprintf(ssummary, "2");
      } else if (flag[i] == '*') {
     fprintf(ffop, "3");
     fprintf(ssummary, "3");
      } else if (((left[i]) ?
          ((float) left[i] / (float) left_aa[pcu->ca[i]])
          * (float) (*(ds + i))
          : 0.0) < 0.1) {                        /* if RSCU <0.1 its rare */
     fprintf(ffop, "1");
     fprintf(ssummary, "1");
      } else {
     fprintf(ffop, "2");
     fprintf(ssummary, "2");
      }

      if (!(i % 16)) {                           /* handle line wrapping  */ 
     fprintf(ffop, "\n");
     fprintf(ssummary, "\n");
      } else {
     fprintf(ffop, ",");
     fprintf(ssummary, ",");
      }
   }
   fileclose(&ffop);                              /*   close the Fop file  */
  
   if ((fcbi = open_file("", "cbi.coa", "w", FALSE)) == NULL)
      my_exit(1, "cbi.coa");                     /*    open cbi.coa       */
      
 for (i = 1; i < 65; i++) {                      /* write values 2 cbi.coa*/

  if (flag[i] == '*')                       /* Only report optimal codons */
     fprintf(fcbi, "3");
  else
     fprintf(fcbi, "2");                    /* ignore non optimal codons  */

  if (!(i % 16)) 
     fprintf(fcbi, "\n");
  else
     fprintf(fcbi, ",");
    
 }
   
  fileclose(&fcbi);   
   
   fprintf(ssummary, "\n\n");
    
   /* now calculate and write out CAI adaptiveness values                 */
   fprintf(ssummary, "These are PUTATIVE CAI adaptiveness values "
     "identified by this programme\n"
     "This data is also duplicated in the file \"cai.coa\"\n"
     "The format of this file is compatible with the format\n"
     "of the file used to input a personal selection of CAI values\n"
     "That is, the format required for Menu 4 option 1\n"
     "cai.coa\tinput file to be used for CAI calculations\n"
     "\n\nCod AA    Xi\tWi\t\tCod AA    Xi\tWi\n"); 
  
   
   if ((fcai = open_file("", "cai.coa", "w", FALSE)) == NULL)
      my_exit(1, "cai.coa"); 
  
   for (i = 1, x = TRUE ; i < 65 && x ; i++) {
    
    /* if a stop or a non-synonymous codon w = 1                          */
    if (*(ds + i) == 1 || pcu->ca[i] == 11) {  
                    fprintf(fcai, "1.0000000 \n");
                    fprintf(ssummary,"%s %s %6.1f %9.7f\t", 
                      paa->cod[i], 
                      paa->aa3[pcu->ca[i]],
                      (float) left[i], 1.0000000); 
    } else  if ( highest_x[pcu->ca[i]] ) {
      
      /* if a codon is absent then adjust its frequecy to 0.5             */
      if ( left[i] ) 
       w= (float) left[i]/ (float) highest_x[pcu->ca[i]];
      else
       w= (float) 0.5   / (float) highest_x[pcu->ca[i]];
      fprintf(fcai, "%9.7f \n", w);                    /* output CAI W    */
      fprintf(ssummary,"%s %s %6.1f %9.7f\t", 
             paa->cod[i], paa->aa3[pcu->ca[i]],
             (left[i]) ? (float) left[i]:0.5 , w); 
    /* either strange amino acid composition or data sets where too small */               
    } else {                            
      fprintf(pm->my_err, 
          "WARNING An attempt to calculate CAI relative "
          "adaptivnesss FAILED\n no %s amino acids found"
          " in the high bias dataset \n",paa->aa3[pcu->ca[i]]);    
      fprintf(ssummary, 
          "\nWARNING An attempt to calculate CAI relative adaptiveness "
          "FAILED\n no %s amino acids found in the high bias dataset \n",
          paa->aa3[pcu->ca[i]]);
      x=FALSE;
   }  
   if( !(i%2)) fprintf (ssummary  , "\n");
   } /* matches for (i = 1, x = TRUE ; i < 65 && x ; i++)                 */
     
   fileclose(&fcai);                              /* close files           */
   fileclose(&fhilo);
   free(aa_low);                                 /* free memory           */
   free(aa_high);
   free(highest_x);
   free(x2);
   free(flag);
   free(last_row);
   return;
}
/*********************  hydro_out        **********************************/
/* The general average hydropathicity or (GRAVY) score, for the hypothet- */
/* ical translated gene product. It is calculated as the arithmetic mean  */
/* of the sum of the hydropathic indices of each amino acid. This index   */
/* was used to quantify the major COA trends in the amino acid usage of   */
/* E. coli genes (Lobry, 1994).                                           */
/* Calculates and outputs total protein hydropathicity based on the Kyte  */
/* and Dolittle Index of hydropathicity (1982)                            */
/* nnaa               Array with frequency of amino acids                 */
/* paa                points to a struct containing Amino Acid values     */
/* pap->hydro         Pointer to hydropathicity values for each AA        */
/**************************************************************************/
int hydro_out(FILE * foutput, long int *nnaa)
{
   long int        a2_tot = 0;
   float           hydro = (float) 0.0;
   int i;
   char sp=  (pm->seq_format=='H')? (char) '\t': (char) pm->seperator;

   for (i = 1; i < 22; i++)
      if (i != 11) a2_tot += nnaa[i];

   if (!a2_tot) {           /* whow   .. no amino acids what happened     */
      fprintf(pm->my_err, "Warning %.20s appear to be too short\n", title);
      fprintf(pm->my_err, "No output was written to file   \n", title);
      return 1;
   }
   
   for (i = 1; i < 22; i++)
      if (i != 11)
     hydro += ((float) nnaa[i] / (float) a2_tot) * (float) pap->hydro[i];

   fprintf(foutput, "%8.6f%c", hydro,sp );

   return 1;
}
/**************** Aromo_out ***********************************************/
/* Aromaticity score of protein. This is the frequency of aromatic amino  */
/* acids (Phe, Tyr, Trp) in the hypothetical translated gene product      */
/* nnaa               Array with frequency of amino acids                 */
/* paa                points to a struct containing Amino Acid values     */
/* pap->aromo         Pointer to aromaticity values for each AA           */
/**************************************************************************/
int aromo_out(FILE * foutput, long int *nnaa)
{
   long int        a1_tot = 0;
   float           aromo = (float) 0.0;
   int i;
   char sp=  (pm->seq_format=='H')? (char) '\t': (char) pm->seperator;

   for (i = 1; i < 22; i++)
      if (i != 11)
     a1_tot += nnaa[i];


   if (!a1_tot) {
      fprintf(pm->my_err, "Warning %.20s appear to be too short\n", title);
      fprintf(pm->my_err, "No output was written to file   \n", title);
      return 1;
   }
   for (i = 1; i < 22; i++)
      if (i != 11)
     aromo += ((float) nnaa[i] / (float) a1_tot) * (float) pap->aromo[i];

   fprintf(foutput, "%8.6f%c", aromo,sp);
   return 1;
}
Tree @lintian-fixes/main (Download .tar.gz)

codon_us.c @lintian-fixes/main — raw · history · blame