Codebase list gfapy / run/5aafde3f-6941-45a5-9ffb-a2e45b29cb5a/main bin / gfapy-fillseq
run/5aafde3f-6941-45a5-9ffb-a2e45b29cb5a/main

Tree @run/5aafde3f-6941-45a5-9ffb-a2e45b29cb5a/main (Download .tar.gz)

gfapy-fillseq @run/5aafde3f-6941-45a5-9ffb-a2e45b29cb5a/mainraw · history · blame

#!/usr/bin/env python3
"""
Add sequences from a Fasta file to a GFA file.
"""

import argparse
import sys
import gfapy

op = argparse.ArgumentParser(description=__doc__)
op.add_argument("inputgfa")
op.add_argument("inputfasta")
op.add_argument("-q", "--quiet", action="store_true", help="silence warnings")
op.add_argument("-v", "--verbose", action="store_true", help="verbose output")
op.add_argument("-V", '--version', action='version', version='%(prog)s 0.1')
opts = op.parse_args()

# note when applying to the output of older versions of Canu (1.6)
# the following fix to the GFA VN tag is necessary:
# sed -i s'/VN:Z:bogart\/edges/VN:Z:1.0/' canu.contigs.gfa

g = gfapy.Gfa.from_file(opts.inputgfa)

segment = None
slines = []
with open(opts.inputfasta) as f:
  for line in f:
    line = line.strip()
    if line.startswith(">"):
      if segment:
        segment.sequence = "".join(slines)
      sname = line[1:].split(" ")[0]
      if opts.verbose:
        sys.stderr.write("Processing segment {}...\n".format(sname))
      segment = g.segment(sname)
      if not opts.quiet and not segment:
        sys.stderr.write("Warning: Segment with ID {} ".format(sname)+
                         "found in Fasta but not in GFA file\n")
      slines = []
    else:
      slines.append(line)
if segment:
  segment.sequence = "".join(slines)

if not opts.quiet:
  for s in g.segments:
    if s.sequence == gfapy.Placeholder:
      sys.stderr.write("Warning: Segment with ID {} ".format(s.name)+
                       "found in GFA but not in Fasta file\n")

print(g)