Codebase list fastaq / 5b178b3
Imported Upstream version 3.11.1 Sascha Steinbiss 8 years ago
6 changed file(s) with 27 addition(s) and 11 deletion(s). Raw diff Collapse all Expand all
7272 | interleave | Interleaves two files, output is alternating between fwd/rev reads |
7373 | make_random_contigs | Make contigs of random sequence |
7474 | merge | Converts multi sequence file to a single sequence |
75 | replace_bases | Replaces all occurences of one letter with another |
75 | replace_bases | Replaces all occurrences of one letter with another |
7676 | reverse_complement | Reverse complement all sequences |
7777 | scaffolds_to_contigs | Creates a file of contigs from a file of scaffolds |
7878 | search_for_seq | Find all exact matches to a string (and its reverse complement) |
0 version = '3.11.0'
0 version = '3.11.1'
247247 self.seq = ''.join(new_seq)
248248
249249 def replace_bases(self, old, new):
250 '''Replaces all occurences of 'old' with 'new' '''
250 '''Replaces all occurrences of 'old' with 'new' '''
251251 self.seq = self.seq.replace(old, new)
252252
253253 def replace_interval(self, start, end, new):
287287 return [intervals.Interval(coords[i], coords[i+1]) for i in range(0, len(coords)-1,2)]
288288
289289
290
291
292290 def orfs(self, frame=0, revcomp=False):
291 '''Returns a list of ORFs that the sequence has, starting on the given
292 frame. Each returned ORF is an interval.Interval object.
293 If revomp=True, then finds the ORFs of the reverse complement
294 of the sequence.'''
293295 assert frame in [0,1,2]
294296 if revcomp:
295297 self.revcomp()
313315
314316
315317 def all_orfs(self, min_length=300):
318 '''Finds all open reading frames in the sequence, that are at least as
319 long as min_length. Includes ORFs on the reverse strand.
320 Returns a list of ORFs, where each element is a tuple:
321 (interval.Interval, bool)
322 where bool=True means on the reverse strand'''
316323 orfs = []
317324 for frame in [0,1,2]:
318325 for revcomp in [False, True]:
334341 return False
335342
336343
337 def looks_like_gene(self, translation_table=1):
344 def looks_like_gene(self):
338345 '''Returns true iff: length >=6, length is a multiple of 3, first codon is start, last codon is a stop and has no other stop codons'''
339 return self.is_complete_orf() and len(self) >= 6 and len(self) %3 == 0 and self.seq[0:3] in genetic_codes.starts[genetic_code]
340
346 return self.is_complete_orf() \
347 and len(self) >= 6 \
348 and len(self) %3 == 0 \
349 and self.seq[0:3].upper() in genetic_codes.starts[genetic_code]
350
341351
342352 # Fills the object with the next sequence in the file. Returns
343353 # True if this was successful, False if no more sequences in the file.
398408 return Fastq(self.id, self.seq, ''.join([chr(max(0, min(x, 93)) + 33) for x in qual_scores]))
399409
400410 def search(self, search_string):
401 '''Finds every occurence (including overlapping ones) of the search_string, including on the reverse strand. Returns a list where each element is a tuple (position, strand) where strand is in ['-', '+']. Positions are zero-based'''
411 '''Finds every occurrence (including overlapping ones) of the search_string, including on the reverse strand. Returns a list where each element is a tuple (position, strand) where strand is in ['-', '+']. Positions are zero-based'''
402412 seq = self.seq.upper()
403413 search_string = search_string.upper()
404414 pos = 0
251251 tests = [
252252 (sequences.Fasta('ID', 'TTT'), False),
253253 (sequences.Fasta('ID', 'TTGTAA'), True),
254 (sequences.Fasta('ID', 'ttgTAA'), True),
254255 (sequences.Fasta('ID', 'TTGTTTTAA'), True),
255256 (sequences.Fasta('ID', 'TTGTAATTTTAA'), False),
256257 (sequences.Fasta('ID', 'TTGTTTTGAA'), False),
258259
259260 for t in tests:
260261 self.assertEqual(t[0].looks_like_gene(), t[1])
262
263 sequences.genetic_code = 1
264 self.assertFalse(sequences.Fasta('ID', 'ATTCAGTAA').looks_like_gene())
265 sequences.genetic_code = 11
266 self.assertTrue(sequences.Fasta('ID', 'ATTCAGTAA').looks_like_gene())
261267
262268
263269 def test_is_all_Ns(self):
1818 'interleave': 'Interleaves two files, output is alternating between fwd/rev reads',
1919 'make_random_contigs': 'Make contigs of random sequence',
2020 'merge': 'Converts multi sequence file to a single sequence',
21 'replace_bases': 'Replaces all occurences of one letter with another',
21 'replace_bases': 'Replaces all occurrences of one letter with another',
2222 'reverse_complement': 'Reverse complement all sequences',
2323 'scaffolds_to_contigs': 'Creates a file of contigs from a file of scaffolds',
2424 'search_for_seq': 'Find all exact matches to a string (and its reverse complement)',
33
44 setup(
55 name='pyfastaq',
6 version='3.11.0',
6 version='3.11.1',
77 description='Script to manipulate FASTA and FASTQ files, plus API for developers',
88 packages = find_packages(),
99 author='Martin Hunt',