src/genbank.cc - augustus (984ef698-c606-4157-9207-1d9b10a870de/main)

Tree @984ef698-c606-4157-9207-1d9b10a870de/main (Download .tar.gz)

genbank.cc @984ef698-c606-4157-9207-1d9b10a870de/main — raw · history · blame

/*
 * genbank.cc
 *
 * License: Artistic License, see file LICENSE.TXT or 
 *          https://opensource.org/licenses/artistic-license-1.0
 * 
 * Description: sequence and gene struture input from file
 */

#include "genbank.hh"
#include "fasta.hh"

// project includes
#include "properties.hh"
#include "gene.hh"
#include "projectio.hh"  // for goto_line_after

// standard C/C++ includes
#include <iostream>

#ifdef ZIPINPUT
using boost::iostreams::filtering_istream;
using boost::iostreams::gzip_decompressor;
#endif

GBProcessor::GBProcessor(string filename) :
    gbs(filename)
{
    try{
	gbVerbosity = Properties::getIntProperty("/genbank/verbosity");
    } catch (...){
	gbVerbosity = 3;
    }
}

GBPositions* GBProcessor::nextPosition( ){
    return gbs.nextData( );
}

AnnoSequence* GBProcessor::getAnnoSequence( GBPositions* pos ){
    if( !pos )
        return (AnnoSequence*) 0;
    bool stopCodonExcludedFromCDS = false;
    try {
	stopCodonExcludedFromCDS = Properties::getBoolProperty("stopCodonExcludedFromCDS");
    } catch (ProjectError &e) {
	stopCodonExcludedFromCDS = false;
    }
    bool withUTR;
    try {
      string utr = Properties::getProperty("UTR");
      withUTR = (utr == "both" || utr == "1" || utr == "on" || utr == "5");
    } catch (ProjectError &e) {
      withUTR = false;
    }

    AnnoSequence *annoseq = new AnnoSequence();
    if (!annoseq)
	return NULL;
    annoseq->anno = new Annotation;
    annoseq->sequence = getSequence( *pos );
    annoseq->length = pos->seqlength;
    annoseq->bc.addSequence(annoseq->sequence, strlen(annoseq->sequence) );
    char* idtmp = strstr(pos->buffer, "LOCUS");
    unsigned maxSeqnameLen = 100;
    if (idtmp == NULL) {
	annoseq->seqname = newstrcpy("unknown");
    } else {
	annoseq->seqname = new char[maxSeqnameLen];
	idtmp += strlen("LOCUS");
	while( isspace(*idtmp) )
	    idtmp++;
	int i;
	for( i = 0; i < maxSeqnameLen - 1 && !isspace(*idtmp); i++, idtmp++ ){
	    annoseq->seqname[i] = *idtmp;
	}
	annoseq->seqname[i] = '\0';
    }
    int numGenes = pos->CDS.size();
    if (numGenes == 0 && gbVerbosity > 0) {
      cerr << "No 'CDS' found in sequence " << annoseq->seqname << endl;
    }
    /*
     * make a list of genes from the data in pos
     *
     */
    list<Gene*> allgenes, *allgenesSorted;
    int curGeneNr = 1;
    list<GBFeature>::iterator fit, mRNAit;
    int lastgeneend=-1;
    Gene *gene;

    pos->CDS.sort();
    pos->mRNA.sort();

    // make genes from the CDS first
    
    for (fit = pos->CDS.begin(); fit != pos->CDS.end(); fit++) {
      bool cont=true;
      if (fit->begin <= lastgeneend) {
	cerr << "CDS overlaps with other CDS (" << (fit->begin+1) << " <= " << (lastgeneend+1) << ") in sequence " << annoseq->seqname
	     << ". Ignoring this gene." << endl;
	cont = false;
      } 
      if (cont && gbVerbosity > 0 && (!fit->complete_l || !fit->complete_r)){
	  cerr << "Coding sequence incomplete in sequence " << annoseq->seqname << "." << endl;
	  //cont = false;
      }
      if (cont) {
	gene = new Gene;
	// set the id, geneid, seqname and source
	gene->id = "1";
	
	if (fit->geneid != "") {
	  gene->geneid = fit->geneid;
	} else {
	    gene->geneid = string(annoseq->seqname) + "-" + itoa(curGeneNr);
	    
	  // char *temp = new char[strlen(annoseq->seqname)+6];
	  // sprintf(temp, "%s-%d",annoseq->seqname, curGeneNr );
	  // gene->geneid = temp;
	}
	gene->seqname = annoseq->seqname;

	gene->source = "database";

	gene->length   = pos->seqlength;
	gene->clength  = 0;
	gene->codingstart = gene->codingend = -1;
	gene->strand   = fit->strand;
	gene->bc       = annoseq->bc;
	gene->exons    = fit->ranges;
	/*
	 * construct the coding exons and introns
	 */
	State *exon_old=NULL, *intron_old=NULL, *intron=NULL, *st = gene->exons;
	while( st ){
	  gene->clength += st->length();
	  if (st == gene->exons)
	    if (st->next == NULL)
	      st->type =  (fit->strand == plusstrand)? singleG : rsingleG;
	    else
	      if (fit->strand == plusstrand)
		st->type = initialExon(gene->clength);
	      else 
		st->type = rterminalExon(2 - gene->clength);
	  else if (st->next == NULL)
	    if (fit->strand == plusstrand)
	      st->type = terminal;
	    else 
	      st->type = rinitial;
	  else 
	    if (fit->strand == plusstrand)
	      st->type = internalExon(gene->clength);
	    else 
	      st->type = rinternalExon(2 - gene->clength);
	  
	  if (stopCodonExcludedFromCDS) {
	      if (st->type == singleG || st->type == terminal){
		  	    gene->clength += 3;
			    st->end += 3;
	      }
	      if (st->type == rsingleG || isRTerminalExon(st->type)){
		  gene->clength += 3;
		  st->begin -= 3;
	      }
	      if (st->begin < 0 || st->end >= annoseq->length)
		  throw GBError("Stop codon out of sequence bounds. Ignoring sequence.");
	  }

	  if (gene->codingstart < 0 || gene->codingstart > st->begin)
	    gene->codingstart = st->begin;
	  if (gene->codingend < st->end)
	    gene->codingend = st->end;
	  if( exon_old ){
	    intron = new State();
	    intron->begin = exon_old->end+1;
	    intron->end   = st->begin-1;
	    if (intron->begin > intron->end){
		cerr << "Error: In sequence " << annoseq->seqname << ": One CDS exon does not begin properly after the previous CDS exon."
		     << exon_old->end << " >= " << st->begin << endl;  
		throw GBError("Intron has non-positive length.");
	    }
		
	    intron->type = intron_type;
	    intron->next  = 0;
	    if( !gene->introns )
	      gene->introns = intron;
	    if( intron_old )
	      intron_old->next = intron;
	    intron_old = intron;
	  }
	  exon_old=st;
	  st = st->next;
	}
	if (withUTR){
	  /*
	   * look if there is any fitting mRNA: positions compatible (and same geneid)
	   * take the first fitting but delete all fitting mRNAs
	   * construct the utr
	   */
	  bool mrnafound = false;
	  mRNAit = pos->mRNA.begin();
	  GBFeature mrnaFeature;
	  while(mRNAit != pos->mRNA.end()){
	    if (fit->matches(*mRNAit)){
	      if (!mrnafound)
		mrnaFeature = *mRNAit;
	      mrnafound = true;
	      mRNAit = pos->mRNA.erase(mRNAit);
	    } else {
	      mRNAit++;
	    }
	  }
	  if (mrnafound){
	    // construct the utr
	    gene->addUTR(mrnaFeature.ranges, mrnaFeature.complete_l, mrnaFeature.complete_r);
	  }
	}

	// gene->computeBC(annoseq->sequence);
	allgenes.push_back(gene);
	lastgeneend = fit->end;
	curGeneNr++;
      }
    }

    if (withUTR){
	/*
	 * make incomplete genes from the remaining mRNAs
	 */
	for (mRNAit = pos->mRNA.begin(); mRNAit != pos->mRNA.end(); mRNAit++) {
	    gene = new Gene;
	    // set the id, geneid, seqname and source
	    gene->id = "1";
	    if (mRNAit->geneid != "") {
		gene->geneid = mRNAit->geneid;
	    } else {
		gene->geneid = string(annoseq->seqname) + "-" + itoa(curGeneNr);

		// char *temp = new char[strlen(annoseq->seqname)+6];
		// sprintf(temp, "%s-%d",annoseq->seqname, curGeneNr );
		// gene->geneid = temp;
	    }
	    gene->seqname = annoseq->seqname;
	  
	    gene->source = "database";
	  
	    gene->length   = pos->seqlength;
	    gene->clength  = 0;
	    gene->codingstart = gene->codingend = -1;
	    gene->strand   = mRNAit->strand;
	    gene->bc       = annoseq->bc;
	    gene->exons    = NULL;
	    // add the UTR
	    gene->addUTR(mRNAit->ranges, mRNAit->complete_l,  mRNAit->complete_r);
	    // check whether it overlaps a CDS
	    // If yes, don't use it.
	    bool overlapsCDS = false;
	    for (list<Gene*>::iterator git = allgenes.begin(); git != allgenes.end(); git++)
		if ((gene->transstart <= (*git)->codingstart && gene->transend >= (*git)->codingstart) ||
		    (gene->transstart >= (*git)->codingstart && gene->transstart <= (*git)->codingend)){
		    overlapsCDS = true;
		}

	    if ((gene->utr5exons || gene->utr3exons) && !overlapsCDS){
		// gene->computeBC(annoseq->sequence);
		allgenes.push_back(gene);
		curGeneNr++;
	    }
	}
    }
    if (allgenes.empty()) {
      cerr << "No correct 'CDS'"; 
      if (withUTR)
	cerr << "or 'mRNA'";
      cerr << " found in sequence " << annoseq->seqname << endl;
    }
    allgenesSorted = sortGenePtrList(allgenes);
    for (list<Gene*>::iterator git = allgenesSorted->begin(); git != allgenesSorted->end(); git++)
      annoseq->anno->appendGene(*git);
    //annoseq->printGFF();
    return annoseq;
}

/*
 * GBProcessor::getAnnoSequenceList()
 *  
 * gets the list of all annotated sequences from 
 *
 */
AnnoSequence* GBProcessor::getAnnoSequenceList(){
    AnnoSequence *first = NULL, *annoseq, *last = NULL;
    bool notend = true;
    int annocount = 0;
    while (notend){
	try {
	    annoseq = getAnnoSequence(nextPosition( ));
	    if ((notend = (annoseq != NULL))){
		annoseq->next = NULL;
		if (last)
		    last->next = annoseq;
		else
		    first = annoseq;
		last = annoseq;
		annocount++;
	    }
	} catch (GBError &e) {
	  cerr << "GBProcessor::getGeneList(): " << e.getMessage() << endl;
	  cerr << "Encountered error after reading " << annocount << " annotations." << endl;
 	}
    }
    if (annocount==0) 
	throw ProjectError("No genbank sequences found.");
    if (gbVerbosity)
	cout << "# Read in " << annocount << " genbank sequences." << endl;
    
    return first;
}

AnnoSequence* GBProcessor::getSequenceList(){
    AnnoSequence *seqlist = NULL, *seq, *last = NULL;
    while((seq = gbs.getNextFASTASequence())){
	seq->next = NULL;
	if (last)
	    last->next = seq;
	else
	    seqlist = seq;
	last = seq;
    };
    gbs.clear();
    if (seqlist == NULL)
	throw ProjectError("No sequences found.");
    return seqlist;
}

char* GBProcessor::getSequence( GBPositions& pos ){
    char* seq = new char[pos.seqlength+1];
    if( seq ){
        char* seqp = seq;
	istringstream isstrm( pos.seqbegin );
        char c;
        int  i;
	int count = 0;
        isstrm.getline(seq, pos.seqlength+1);
        while( isstrm ){
            isstrm >> i;  // Zahl zu Begin der Zeile lesen!
            while( isstrm.get( c ) && c != '\n' )
                if( isalpha( c ) ){
		    if (count++ < pos.seqlength) 
			*seqp++ = c; // tolower now postponed to after softmasking detection
		    else {
			char buffer [140];
			sprintf (buffer, "Sequence was longer than the expected %d bp. Please compare the line 'source 1..X' with the actual sequence length.", pos.seqlength);
			throw GBError(buffer);
		    }
		}
	}
	if (count < pos.seqlength) {
	    cerr << "Warning: Sequence was shorter than expected frome the source line of the Genbank entry ( expected " << pos.seqlength 
		 << " bp). Please compare the line 'source 1..X' with the actual sequence length (" << count << ")." << endl;
	  pos.seqlength = count;
	} 
	seq[pos.seqlength]='\0'; 
    }
    return seq;
}


GBFeature::GBFeature(const char *pos){
  begin = end = -1;
  ranges = NULL;
  complete_r = complete_l = true;
  istringstream isstrm( pos );
  char c;
  char tmp[GBMAXLINELEN] = { '\0' }; // need more than 4200 chars for human muscle gene TTN
  char* join = (char*)0;
  int i = 0;

  isstrm >> fkey >> ws;    // read 'CDS','mRNA' or signal FKEY
  c = isstrm.peek( );
  if (c == 'c') { // "complement"
    strand = minusstrand;
    while( isstrm.get( c ) && c != '(' && c != '\n')
      ;
  } else
    strand = plusstrand;
  c = isstrm.peek( );

  if( isalpha( c ) ){
    while( isstrm.get( c ) && c != '(' && c != '\n')
      ;
    if ( c == '\n') {
      cerr << "Feature key = " << fkey << ", c = " << c << endl;
      throw GBError( "GBProcessor::getJoin( ):  failed!!!" );
    }
    while( isstrm.get( c ) && c != ')' )
      if( !isspace( c ) ) {
          if (isdigit(c) || c =='.' || c == ',' || c=='<' || c=='>' ){
              if (i >= GBMAXLINELEN) {  
                  cerr << "Genbank feature description of " << fkey << "  " << string(tmp).substr(0, 40)
                       << " ... reaches the character limit of " << GBMAXLINELEN-1 << endl;
                  throw GBError( "GBProcessor::getJoin( ):  failed!!!" );
              }
              tmp[i++] = c;
          } else {
	  cerr << fkey << " contains character " << c << endl;
	  throw GBError( "GBProcessor::getJoin( ):  failed!!!" );
	}
      }
    while( isstrm.get( c ) && c != '\n' )
      ;         // read up to the end of the line
  } else if( isdigit( c ) || c == '<'){
      while( isstrm.get( c ) && c != ')' && c != '\n' ){
          if (i >= GBMAXLINELEN) {  
              cerr << "Genbank feature description of " << fkey << "  " << string(tmp).substr(0, 40)
                   << " ... reaches the character limit of " << GBMAXLINELEN-1 << endl;
              throw GBError( "GBProcessor::getJoin( ):  failed!!!" );
          }
          tmp[i++] = c;
      }
      
    while (isstrm && (c != '\n'))  // read until end of line
      isstrm.get( c );
  } else {  
    cerr << "Feature key = " << fkey << ", c = " << c << endl;
    throw GBError( "GBProcessor::getJoin( ):  failed!!!" );
  }
  
  join = newstrcpy(tmp,i);

  // try to determine the name of the gene
  char *genename;
  char buf[GBMAXLINELEN];
  bool searchFinished = false;
  char *gtag;
  while( isstrm && !searchFinished){
    isstrm.getline( buf, GBMAXLINELEN-1 );
    if (strncmp(buf, "                     ", 21) != 0) {  // name of the gene not found
      geneid="";
      searchFinished = true;
    }
    else {
      gtag = strstr(buf, "/gene=");
      if (gtag){// found the name of the gene in the annotation
	int cplen = strcspn(gtag + 7, "\"");
	genename = newstrcpy(gtag + 7, cplen);
	geneid = genename;
	searchFinished = true; 
      }
    }    
  }
 
  /*
   * Now construct the ranges state sequence
   */
  try {
    State *exon = 0, *exon_old = 0;
    istringstream joinstrm( join );
    while( joinstrm ){
      char c;
      int ebegin=-1, eend=-1;
      exon = new State();

      c = joinstrm.peek( );
      if (c=='<'){
	complete_l = false;
	joinstrm.get( c );
      }
    
      joinstrm >> ebegin >> c >> c;
      c = joinstrm.peek( );
      if (c=='>'){
	complete_r = false;
	joinstrm.get( c );
      }
      
      joinstrm >> eend >> c;  // format:  'Numer..Number,'
      if (ebegin < 1 || eend < 1)
	throw ProjectError(string("Wrong format for coordinates: ") + join);
      if (ebegin > eend)
	throw ProjectError(string("Feature begins after it ends: ") + join);
      exon->begin = ebegin - 1; // correct for the fact that indices
      exon->end   = eend - 1;   // start with 0 in the c++ sequence
      exon->next = 0;

      if( !ranges )
	ranges = exon;
      if (begin == -1 || exon->begin < begin)
	begin = exon->begin;
      if (end == -1 || exon->end > end)
	end = exon->end;
    
      if( exon_old ){
	exon_old->next = exon;
      }    
      exon_old = exon;
    }
  } catch (ProjectError &e) {
    cerr << "Constructing GenBank feature: " << e.getMessage() << endl;
    throw GBError("GBFeature constructor:Format error when reading genbank format.");
  }
}

bool GBFeature::checkRange(int len){
  return (begin >= 0 && end >= 0 && begin<len && end<len);
}

/*
 * GBFeature::matches
 * checks whether other extends this, as mRNA should extend CDS
 * in the range covered by this, the ranges should be identical
 */

bool GBFeature::matches(GBFeature &other){
  // if genenames exist they should be identical
  if (geneid != "" && other.geneid != "" && geneid != other.geneid)
    return false;
  if (strand != other.strand)
    return false;
  State *st=ranges, *otherst=other.ranges;
  if (!st || !otherst)
    return false;
  while (otherst && otherst->end < st->end)
    otherst = otherst->next;
  if (!(otherst && (otherst->begin <= st->begin) && (otherst->end == st->end || (st->next == NULL && otherst->end >= st->end))))
    return false;
  while (otherst->next && st->next){
    st=st->next;
    otherst=otherst->next;
    if (!(st->next == NULL || st == ranges) // internal exon
	&& !(st->begin == otherst->begin && st->end == otherst->end))// doesn't match exactly
      return false;
  }
  if (st->next)
    return false;
  if (! (otherst->end >= st->end && (otherst->begin == st->begin || (st == ranges && otherst->begin <= st->begin))))
    return false;  
  return true;
}

GBSplitter::GBSplitter( string fname ) : ftype(unknown) {
    if (fname != "-") {
	ifstrm.open(fname.c_str());    
	if( !ifstrm )
	    throw GBError("Could not open input file \"" + fname + "\"!");
    } else { // read from standard input
	throw GBError("Input from STDIN not supported anymore since the introduction of .gzipped input (with version 3.0).");
	//ifstrm.ios::rdbuf(cin.rdbuf());
	//ftype = fasta;
    }
#ifdef ZIPINPUT
    // deflate if gzipped
    boost::iostreams::filtering_istream zin;
    try {
	zin.push(gzip_decompressor());
	zin.push(ifstrm);
	zin.peek();
	if (!zin)
	    throw("Could not read first character assuming gzip format.");
	cout << "# Looks like " << ((fname != "-")? fname: "STDIN") 
	     << " is in gzip format. Deflating..." << endl;
    } catch (...) { // boost::iostreams::gzip_error& 
	// not a gzip file or ill-formatted
	zin.reset();
	ifstrm.seekg(0);
	zin.push(ifstrm);
    }
    boost::iostreams::copy(zin, sin);

#else
    // only normal file input available
    sin << ifstrm.rdbuf();
#endif
    determineFileType();
}

GBSplitter::~GBSplitter( ){
    if (ifstrm.is_open())
        ifstrm.close();
}



void GBSplitter::determineFileType(){
    std::stringstream csin(sin.str());
    char wrongChar=' ';
    ftype = unknown;
    csin >> ws;

    // check whether it could be genbank format
    // search for at least one LOCUS and ORIGIN at the beginning of a line
    csin >> goto_line_after( "LOCUS" );
    csin >> goto_line_after( "ORIGIN" );
    if (csin)
	ftype = genbank;
    else {
	// check, whether it is FASTA or plain sequence
	// only letters allowed in the sequence part, except for the end of the line
	bool whiteSpaceSeen;
	csin.clear();
	csin.seekg(0, ios::beg);
	string line;
	bool haveWrongChar=false;
	while (csin && !haveWrongChar) {
	    getline(csin, line);
	    if (line[0]!='>') {
		whiteSpaceSeen=false;
		for (int i=0; i < line.length() && !haveWrongChar; i++) 
		    if (isspace(line[i]))
			whiteSpaceSeen = true;
		    else if (!isalpha(line[i]) && whiteSpaceSeen){
			haveWrongChar = true;
			wrongChar = line[i];
		    }
	    }
	}
	if (!haveWrongChar)
	    ftype = fasta;
	
	csin.clear();
	csin.seekg(0, ios::beg);
    }
    if (ftype == unknown) {
     	string errmsg = "GBProcessor::determineFileType(): Couldn't determine input file type. Found bad character '";
	errmsg += wrongChar;
     	errmsg += "'";
     	throw GBError(errmsg);
    }

    //    csin.clear();
    //    csin.seekg(0, ios::beg);
}


GBPositions* GBSplitter::nextData( ){
    GBPositions* pos = new GBPositions;
    if( !findPositions( *pos ) ){     // No other data available!
        delete pos;
        return (GBPositions*)0;
    }
    return pos;
}

AnnoSequence *GBSplitter::getNextFASTASequence( ){
    char *sequence = NULL, *name = NULL;
    int length;
    readOneFastaSeq(sin, sequence, name, length);
    if (sequence == NULL || length == 0)
	return NULL;

    AnnoSequence *seq = new AnnoSequence();
    seq->seqname = name;
    seq->sequence = sequence;
    seq->length = length;
    return seq;
}


Boolean GBSplitter::gotoEnd( ){
    char buf[GBMAXLINELEN];
    do{
        int i = 0;
        sin.getline( buf, GBMAXLINELEN );
	if ((sin.rdstate() & ios_base::failbit) && !(sin.rdstate() & ios_base::eofbit))
	    sin.clear(sin.rdstate() & ~ios_base::failbit);
        while( i < GBMAXLINELEN-1 && isspace(buf[i]))
            i++;
        if( buf[i] == '/' && buf[i+1] == '/' )
            return true;
    } while (sin);
    return false;
}


Boolean GBSplitter::findPositions( GBPositions& pos ){
    int fposb, fpose;
    fposb = sin.tellg();
    if( !gotoEnd( ) )
        return false;
    if (sin.eof()){ // happens in rare cases, without newline after
                    // record ending double-slash //
        sin.clear(); // otherwise tellg returns -1
        fpose = sin.tellg();
        sin.clear(ios_base::eofbit); // reset the eofbit
    } else {
        fpose = sin.tellg();
    }
    sin.seekg( fposb );

    pos.length = fpose-fposb /*+1*/;         // Without the '\0'!!
    pos.buffer = new char[pos.length+1];
    sin.read( pos.buffer, pos.length );
    pos.buffer[pos.length] = '\0';
    pos.length++;                       // Now with the '\0'!!!
    pos.seqlength = 0;
    istringstream isstrm( pos.buffer );
    char buf[GBMAXLINELEN];
    while( isstrm ){
        int curpos = isstrm.tellg();
        isstrm >> ws;
        isstrm.getline( buf, GBMAXLINELEN-1 );
	if ((!sin.eof() && (sin.rdstate() & ios_base::failbit)) || strlen(buf) >= GBMAXLINELEN-2){
	    throw GBError(string("Could not read the following line in Genbank file.\n") + buf 
			  + "\nMaximum line length is \n" + itoa(GBMAXLINELEN-2) + ".\n");
	}
        char *src;
        char *rna;
        char *cds;
        char *seq;

        src = strstr( buf, "source" );
        if( src && src == buf ){
            istringstream strm(buf);
            string str;
            int a, b;
            char c;
            strm >> str >> a >> c >> c >> b;
            if (strm.fail())
                throw GBError(string("Syntax error in source line:\n") + src +
                                  "\nShould be of a format like:  source  1..2345");

	    if (b-a+1 > pos.seqlength)
		pos.seqlength = b-a+1;
            continue;
        }
        rna = strstr( buf, "mRNA         " );

        if( rna && rna == buf ){
	    GBFeature mrnaFeature(pos.buffer+curpos);
	    if (mrnaFeature.checkRange(pos.seqlength))
		pos.mRNA.push_back(mrnaFeature);
	    else {
		cout << "mRNA " << mrnaFeature.geneid << " out of range. Ignoring it." << endl;
		cout << "end= " << mrnaFeature.end << " len= " << pos.seqlength << endl;
	    }
        }
	cds = strstr( buf, "CDS             " );
	if( cds && cds == buf ){
	    GBFeature cdsFeature( pos.buffer+curpos );
	    if(cdsFeature.checkRange(pos.seqlength))
		pos.CDS.push_back( cdsFeature );
	    else
	        cout << "CDS " << cdsFeature.geneid << " out of range. Ignoring it." << endl;
	}
	seq = strstr( buf, "ORIGIN" );
	if( seq && seq == buf ){
	    if (curpos >= pos.length)
		cerr << "In gene " << pos.CDS.front().geneid << " a coordinate points after the sequence." << endl;
	  pos.seqbegin = pos.buffer+curpos;
	  break;
	}
    }  // while
    if (pos.seqlength == 0) {
	throw GBError("Sequence has 0 length. Maybe 'source' Feature missing?");
    }
    return true;
}