Codebase list gfapy / fresh-snapshots/main tests / extension.py
fresh-snapshots/main

Tree @fresh-snapshots/main (Download .tar.gz)

extension.py @fresh-snapshots/mainraw · history · blame

import gfapy
import re
from collections import OrderedDict

class Taxon(gfapy.Line):
  RECORD_TYPE = "T"
  POSFIELDS = OrderedDict([("tid","identifier_gfa2")])
  TAGS_DATATYPE = {"UL":"Z"}
  NAME_FIELD = "tid"

Taxon.register_extension()

class MetagenomicAssignment(gfapy.Line):
  RECORD_TYPE = "M"
  POSFIELDS = OrderedDict([("mid","optional_identifier_gfa2"),
                           ("tid","identifier_gfa2"),
                           ("sid","identifier_gfa2")])
  TAGS_DATATYPE = {"SC":"i"}
  NAME_FIELD = "mid"

MetagenomicAssignment.register_extension(references=
    [("sid", gfapy.line.segment.GFA2, "metagenomic_assignments"),
     ("tid", Taxon, "metagenomic_assignments")])

class TaxonID:

  def validate_encoded(string):
    if not re.match(r"^taxon:(\d+)$",string) and \
        not re.match(r"^[a-zA-Z0-9_]+$", string):
      raise gfapy.ValueError("Invalid taxon ID: {}".format(string))

  def decode(string):
    TaxonID.validate_encoded(string)
    return string

  def validate_decoded(obj):
    if isinstance(obj,Taxon):
      TaxonID.validate_encoded(obj.name)
    else:
      raise gfapy.TypeError(
        "Invalid type for taxon ID: "+"{}".format(repr(obj)))

  def encode(obj):
    TaxonID.validate_decoded(obj)
    return obj

gfapy.Field.register_datatype("taxon_id", TaxonID)

Taxon.DATATYPE["tid"] = "taxon_id"
MetagenomicAssignment.DATATYPE["tid"] = "taxon_id"