Codebase list ariba / upstream/2.12.1+ds ariba / scaffold_graph.py
upstream/2.12.1+ds

Tree @upstream/2.12.1+ds (Download .tar.gz)

scaffold_graph.py @upstream/2.12.1+dsraw · history · blame

import pyfastaq
from ariba import link

class Error (Exception): pass

class Graph:
    def __init__(self, ref_lengths):
        self.links = {}
        self.partial_links = {}
        self.ref_lengths = ref_lengths


    def update_from_sam(self, sam, sam_reader):
        '''Updates graph info from a pysam.AlignedSegment object'''
        if sam.is_unmapped \
          or sam.mate_is_unmapped \
          or (sam.reference_id == sam.next_reference_id):
            return

        new_link = link.Link(sam, sam_reader, self.ref_lengths)
        read_name = sam.query_name

        if read_name in self.partial_links:
            new_link.merge(self.partial_links[read_name])
            del self.partial_links[read_name]
            key = tuple(sorted((new_link.refnames[0], new_link.refnames[1])))
            if key not in self.links:
                self.links[key] = []
            new_link.sort()
            self.links[key].append(new_link)
        else:
            self.partial_links[read_name] = new_link


    def _make_graph(self, max_insert):
        '''helper function to construct graph from current state of object'''
        if len(self.partial_links) != 0:
            raise Error('Error in _make_graph(). Cannot continue because there are partial links')
        
        self.contig_links = {}
        for key in self.links:
            for l in self.links[key]:
                insert_size = l.insert_size()
                if insert_size <= max_insert:
                    if key not in self.contig_links:
                        self.contig_links[key] = {}
                    dirs = ''.join(l.dirs)
                    self.contig_links[key][dirs] = self.contig_links[key].get(dirs, 0) + 1


    def _remove_low_cov_links(self, min_coverage):
        keys_to_delete = set()

        for key in self.contig_links:
            d = self.contig_links[key]
            self.contig_links[key] = {x:d[x] for x in d if d[x] >= min_coverage}
            if len(self.contig_links[key]) == 0:
                keys_to_delete.add(key)

        for key in keys_to_delete:
            del self.contig_links[key]


    def _contig_graph_is_consistent(self):
        for key in self.contig_links:
            if len(self.contig_links[key]) > 1:
                return False

        all_names_in_keys = set()
        for key in self.contig_links:
            all_names_in_keys.add(key[0])
            all_names_in_keys.add(key[1])

        name_to_links = {name: set() for name in all_names_in_keys}

        for key in self.contig_links:
            assert len(self.contig_links[key]) == 1
            directions = list(self.contig_links[key])[0]
            name1, name2 = key
            dir1, dir2 = directions[0], directions[1]
            for n, d in [(name1, dir1), (name2, dir2)]:
                if d in name_to_links[n]:
                    return False
                else:
                    name_to_links[n].add(d)
        return True


    def is_consistent(self, min_coverage, max_insert):
        self._make_graph(max_insert)
        self._remove_low_cov_links(min_coverage)
        return self._contig_graph_is_consistent()


    def write_all_links_to_file(self, filename):
        f = pyfastaq.utils.open_file_write(filename)
        for key in sorted(self.links):
            to_write = list(self.links[key])
            to_write.sort()
            for l in to_write:
                print(l, file=f)
        pyfastaq.utils.close(f)