Codebase list aegean / upstream/latest src / gaeval.c
upstream/latest

Tree @upstream/latest (Download .tar.gz)

gaeval.c @upstream/latestraw · history · blame

/**

Copyright (c) 2010-2015, Daniel S. Standage and CONTRIBUTORS

The AEGeAn Toolkit is distributed under the ISC License. See
the 'LICENSE' file in the AEGeAn source code distribution or
online at https://github.com/standage/AEGeAn/blob/master/LICENSE.

**/

#include <getopt.h>
#include <math.h>
#include "genometools.h"
#include "AgnGaevalVisitor.h"
#include "AgnInferCDSVisitor.h"
#include "AgnInferExonsVisitor.h"
#include "AgnUtils.h"

typedef struct
{
  const char *alignfile;
  const char **genefiles;
  int numgenefiles;
  AgnGaevalParams params;
} GaevalOptions;

static void default_params(AgnGaevalParams *params)
{
  params->alpha = 0.6;
  params->beta = 0.3;
  params->gamma = 0.05;
  params->epsilon = 0.05;
  params->exp_cds_len = 400;
  params->exp_5putr_len = 200;
  params->exp_3putr_len = 100;
}

static void print_usage(FILE *outstream)
{
  fprintf(outstream,
"\ngaeval: calculate coverage and intergrity scores for gene models based on "
"transcript alignments\n"
"Usage: gaeval [options] alignments.gff3 genes.gff3 [moregenes.gff3 ...]\n"
"  Basic options:\n"
"    -h|--help               print this help message and exit\n"
"    -v|--version            print version number and exit\n\n"
"  Weights for calculating integrity score (must add up to 1.0):\n"
"    -a|--alpha: DOUBLE      introns confirmed, or %% expected CDS length for\n"
"                            single-exon genes; default is 0.6\n"
"    -b|--beta: DOUBLE       exon coverage; default is 0.3\n"
"    -g|--gamma: DOUBLE      %% expected 5' UTR length; default is 0.05\n"
"    -e|--epsilon: DOUBLE    %% expected 3' UTR length; default is 0.05\n\n"
"  Expected feature lengths for calculating integrity score:\n"
"    -c|--exp-cds: INT       expected CDS length (in bp); default is 400\n"
"    -5|--exp-5putr: INT     expected 5' UTR length; default is 200\n"
"    -3|--exp-3putr: INT     expected 3' UTR length; default is 100\n\n");
}

static void parse_options(int argc, char **argv, GaevalOptions *options)
{
  default_params(&options->params);
  int opt = 0;
  int optindex = 0;
  const char *optstr = "hva:b:g:e:c:5:3:";
  const struct option gaeval_options[] =
  {
    { "help",      no_argument,       NULL, 'h' },
    { "version",   no_argument,       NULL, 'v' },
    { "alpha",     required_argument, NULL, 'a' },
    { "beta",      required_argument, NULL, 'b' },
    { "gamma",     required_argument, NULL, 'g' },
    { "epsilon",   required_argument, NULL, 'e' },
    { "exp-cds",   required_argument, NULL, 'c' },
    { "exp-5putr", required_argument, NULL, '5' },
    { "exp-3putr", required_argument, NULL, '3' },
    { NULL,        no_argument,       NULL,  0  },
  };
  for(opt  = getopt_long(argc, argv + 0, optstr, gaeval_options, &optindex);
      opt != -1;
      opt  = getopt_long(argc, argv + 0, optstr, gaeval_options, &optindex))
  {
    if(opt == '3')
      options->params.exp_3putr_len = atoi(optarg);
    else if(opt == '5')
      options->params.exp_5putr_len = atoi(optarg);
    else if(opt == 'a')
      options->params.alpha = atof(optarg);
    else if(opt == 'b')
      options->params.beta = atof(optarg);
    else if(opt == 'c')
      options->params.exp_cds_len = atoi(optarg);
    else if(opt == 'e')
      options->params.epsilon = atof(optarg);
    else if(opt == 'h')
    {
      print_usage(stdout);
      exit(0);
    }
    else if(opt == 'g')
      options->params.gamma = atof(optarg);
    else if(opt == 'v')
    {
      agn_print_version("GAEVAL", stdout);
      exit(0);
    }
  }
  int numargs = argc - optind;
  if(numargs < 2)
  {
    print_usage(stderr);
    fprintf(stderr, "error: must provide at least 2 input files, %d provided\n",
            numargs);
    exit(1);
  }

  double weight_total = options->params.alpha + options->params.beta +
                        options->params.gamma + options->params.epsilon;
  if(fabs(weight_total - 1.0) > 0.00001)
  {
    print_usage(stderr);
    fprintf(stderr, "error: integrity score weights must add up to 1.0; "
            "specified weights total %.2lf\n", weight_total);
    exit(1);
  }

  options->alignfile = argv[optind + 0];
  options->genefiles = (const char **)argv + optind + 1;
  options->numgenefiles = numargs - 1;
}

int main(int argc, char **argv)
{
  GtError *error;
  GtNodeStream *stream, *last_stream, *align_stream;
  GtQueue *streams;
  GaevalOptions options;
  parse_options(argc, argv, &options);

  //----------
  // Set up the processing stream
  //----------
  gt_lib_init();
  streams = gt_queue_new();

  stream = gt_gff3_in_stream_new_unsorted(1, &options.alignfile);
  gt_gff3_in_stream_check_id_attributes((GtGFF3InStream *)stream);
  gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream *)stream);
  gt_queue_add(streams, stream);
  align_stream = stream;

  stream = gt_gff3_in_stream_new_unsorted(options.numgenefiles,
                                          options.genefiles);
  gt_gff3_in_stream_check_id_attributes((GtGFF3InStream *)stream);
  gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream *)stream);
  gt_queue_add(streams, stream);
  last_stream = stream;

  GtStr *source = gt_str_new_cstr("AEGeAn::GAEVAL");
  GtLogger *logger = gt_logger_new(true, "", stderr);
  stream = agn_infer_cds_stream_new(last_stream, source, logger);
  gt_queue_add(streams, stream);
  last_stream = stream;

  stream = agn_infer_exons_stream_new(last_stream, source, logger);
  gt_queue_add(streams, stream);
  last_stream = stream;
  gt_str_delete(source);

  stream = agn_gaeval_stream_new(last_stream, align_stream, options.params);
  gt_queue_add(streams, stream);
  last_stream = stream;

  stream = gt_gff3_out_stream_new(last_stream, NULL);
  gt_gff3_out_stream_retain_id_attributes((GtGFF3OutStream *)stream);
  gt_queue_add(streams, stream);
  last_stream = stream;

  //----------
  // Execute the processing stream
  //----------
  error = gt_error_new();
  int had_err = gt_node_stream_pull(last_stream, error);
  if(had_err)
    fprintf(stderr, "Error processing node stream: %s\n", gt_error_get(error));
  gt_error_delete(error);

  //----------
  // Free memory
  //----------
  while(gt_queue_size(streams) > 0)
  {
    stream = gt_queue_get(streams);
    gt_node_stream_delete(stream);
  }
  gt_queue_delete(streams);
  gt_logger_delete(logger);
  gt_lib_clean();
  return had_err;
}