247 | 247 |
self.seq = ''.join(new_seq)
|
248 | 248 |
|
249 | 249 |
def replace_bases(self, old, new):
|
250 | |
'''Replaces all occurences of 'old' with 'new' '''
|
|
250 |
'''Replaces all occurrences of 'old' with 'new' '''
|
251 | 251 |
self.seq = self.seq.replace(old, new)
|
252 | 252 |
|
253 | 253 |
def replace_interval(self, start, end, new):
|
|
287 | 287 |
return [intervals.Interval(coords[i], coords[i+1]) for i in range(0, len(coords)-1,2)]
|
288 | 288 |
|
289 | 289 |
|
290 | |
|
291 | |
|
292 | 290 |
def orfs(self, frame=0, revcomp=False):
|
|
291 |
'''Returns a list of ORFs that the sequence has, starting on the given
|
|
292 |
frame. Each returned ORF is an interval.Interval object.
|
|
293 |
If revomp=True, then finds the ORFs of the reverse complement
|
|
294 |
of the sequence.'''
|
293 | 295 |
assert frame in [0,1,2]
|
294 | 296 |
if revcomp:
|
295 | 297 |
self.revcomp()
|
|
313 | 315 |
|
314 | 316 |
|
315 | 317 |
def all_orfs(self, min_length=300):
|
|
318 |
'''Finds all open reading frames in the sequence, that are at least as
|
|
319 |
long as min_length. Includes ORFs on the reverse strand.
|
|
320 |
Returns a list of ORFs, where each element is a tuple:
|
|
321 |
(interval.Interval, bool)
|
|
322 |
where bool=True means on the reverse strand'''
|
316 | 323 |
orfs = []
|
317 | 324 |
for frame in [0,1,2]:
|
318 | 325 |
for revcomp in [False, True]:
|
|
334 | 341 |
return False
|
335 | 342 |
|
336 | 343 |
|
337 | |
def looks_like_gene(self, translation_table=1):
|
|
344 |
def looks_like_gene(self):
|
338 | 345 |
'''Returns true iff: length >=6, length is a multiple of 3, first codon is start, last codon is a stop and has no other stop codons'''
|
339 | |
return self.is_complete_orf() and len(self) >= 6 and len(self) %3 == 0 and self.seq[0:3] in genetic_codes.starts[genetic_code]
|
340 | |
|
|
346 |
return self.is_complete_orf() \
|
|
347 |
and len(self) >= 6 \
|
|
348 |
and len(self) %3 == 0 \
|
|
349 |
and self.seq[0:3].upper() in genetic_codes.starts[genetic_code]
|
|
350 |
|
341 | 351 |
|
342 | 352 |
# Fills the object with the next sequence in the file. Returns
|
343 | 353 |
# True if this was successful, False if no more sequences in the file.
|
|
398 | 408 |
return Fastq(self.id, self.seq, ''.join([chr(max(0, min(x, 93)) + 33) for x in qual_scores]))
|
399 | 409 |
|
400 | 410 |
def search(self, search_string):
|
401 | |
'''Finds every occurence (including overlapping ones) of the search_string, including on the reverse strand. Returns a list where each element is a tuple (position, strand) where strand is in ['-', '+']. Positions are zero-based'''
|
|
411 |
'''Finds every occurrence (including overlapping ones) of the search_string, including on the reverse strand. Returns a list where each element is a tuple (position, strand) where strand is in ['-', '+']. Positions are zero-based'''
|
402 | 412 |
seq = self.seq.upper()
|
403 | 413 |
search_string = search_string.upper()
|
404 | 414 |
pos = 0
|