0 | |
Description: Delay import of Fastaq modules by the python executables
|
1 | |
Man pages for this package are being automatically created with through the
|
2 | |
help2man wrapper called usage_to_man. help2man calls the python executables
|
3 | |
with the -h option and converts the usage into a man page.
|
4 | |
.
|
5 | |
The first step done by all the executables is the import of the modules deployed
|
6 | |
by this package. Since the package is not installed in the system at build time,
|
7 | |
the man pages would never be properly created.
|
8 | |
.
|
9 | |
This patch solves this problem by importing the modules in this package after
|
10 | |
the argument parsing code.
|
11 | |
.
|
12 | |
Upstream prefered to keep the code as it is for styling reasons, which is
|
13 | |
perfectly reasonable
|
14 | |
.
|
15 | |
fastaq (1.5.0-1) UNRELEASED; urgency=low
|
16 | |
.
|
17 | |
* Initial release (Closes: #1234)
|
18 | |
Author: DMPT <debian-med-packaging@lists.alioth.debian.org>
|
19 | |
--- a/scripts/fastaq_capillary_to_pairs
|
20 | |
+++ b/scripts/fastaq_capillary_to_pairs
|
21 | |
@@ -1,7 +1,6 @@
|
22 | |
#!/usr/bin/env python3
|
23 | |
|
24 | |
import argparse
|
25 | |
-from fastaq import tasks
|
26 | |
|
27 | |
parser = argparse.ArgumentParser(
|
28 | |
description = 'Given a fasta/q file of capillary reads, makes an interleaved file of read pairs (where more than read from same ligation, takes the longest read) and a file of unpaired reads. Replaces the .p1k/.q1k part of read names to denote fwd/rev reads with /1 and /2',
|
29 | |
@@ -9,4 +8,8 @@
|
30 | |
parser.add_argument('infile', help='Name of input fasta/q file')
|
31 | |
parser.add_argument('outprefix', help='Prefix of output files', metavar='outfiles prefix')
|
32 | |
options = parser.parse_args()
|
33 | |
+
|
34 | |
+
|
35 | |
+from fastaq import tasks
|
36 | |
+
|
37 | |
tasks.capillary_to_pairs(options.infile, options.outprefix)
|
38 | |
--- a/scripts/fastaq_chunker
|
39 | |
+++ b/scripts/fastaq_chunker
|
40 | |
@@ -1,7 +1,6 @@
|
41 | |
#!/usr/bin/env python3
|
42 | |
|
43 | |
import argparse
|
44 | |
-from fastaq import tasks
|
45 | |
|
46 | |
parser = argparse.ArgumentParser(
|
47 | |
description = 'Splits a multi fasta/q file into separate files. Splits sequences into chunks of a fixed size. Aims for chunk_size chunks in each file, but allows a little extra, so chunk can be up to (chunk_size + tolerance), to prevent tiny chunks made from the ends of sequences',
|
48 | |
@@ -12,6 +11,10 @@
|
49 | |
parser.add_argument('tolerance', type=int, help='Tolerance allowed in chunk size')
|
50 | |
parser.add_argument('--skip_all_Ns', action='store_true', help='Do not output any sequence that consists of all Ns')
|
51 | |
options = parser.parse_args()
|
52 | |
+
|
53 | |
+
|
54 | |
+from fastaq import tasks
|
55 | |
+
|
56 | |
tasks.split_by_fixed_size(
|
57 | |
options.infile,
|
58 | |
options.outprefix,
|
59 | |
--- a/scripts/fastaq_count_sequences
|
60 | |
+++ b/scripts/fastaq_count_sequences
|
61 | |
@@ -1,11 +1,14 @@
|
62 | |
#!/usr/bin/env python3
|
63 | |
|
64 | |
import argparse
|
65 | |
-from fastaq import tasks
|
66 | |
|
67 | |
parser = argparse.ArgumentParser(
|
68 | |
description = 'Counts the number of sequences in a fasta/q file',
|
69 | |
usage = '%(prog)s <fasta/q in>')
|
70 | |
parser.add_argument('infile', help='Name of input fasta/q file')
|
71 | |
options = parser.parse_args()
|
72 | |
+
|
73 | |
+
|
74 | |
+from fastaq import tasks
|
75 | |
+
|
76 | |
print(tasks.count_sequences(options.infile))
|
77 | |
--- a/scripts/fastaq_deinterleave
|
78 | |
+++ b/scripts/fastaq_deinterleave
|
79 | |
@@ -1,7 +1,6 @@
|
80 | |
#!/usr/bin/env python3
|
81 | |
|
82 | |
import argparse
|
83 | |
-from fastaq import tasks
|
84 | |
|
85 | |
parser = argparse.ArgumentParser(
|
86 | |
description = 'Deinterleaves fasta/q file, so that reads are written alternately between two output files',
|
87 | |
@@ -11,4 +10,8 @@
|
88 | |
parser.add_argument('out_fwd', help='Name of output fasta/q file of forwards reads')
|
89 | |
parser.add_argument('out_rev', help='Name of output fasta/q file of reverse reads')
|
90 | |
options = parser.parse_args()
|
91 | |
+
|
92 | |
+
|
93 | |
+from fastaq import tasks
|
94 | |
+
|
95 | |
tasks.deinterleave(options.infile, options.out_fwd, options.out_rev, fasta_out=options.fasta_out)
|
96 | |
--- a/scripts/fastaq_enumerate_names
|
97 | |
+++ b/scripts/fastaq_enumerate_names
|
98 | |
@@ -1,7 +1,6 @@
|
99 | |
#!/usr/bin/env python3
|
100 | |
|
101 | |
import argparse
|
102 | |
-from fastaq import tasks
|
103 | |
|
104 | |
parser = argparse.ArgumentParser(
|
105 | |
description = 'Renames sequences in a file, calling them 1,2,3... etc',
|
106 | |
@@ -12,6 +11,10 @@
|
107 | |
parser.add_argument('infile', help='Name of fasta/q file to be read')
|
108 | |
parser.add_argument('outfile', help='Name of output fasta/q file')
|
109 | |
options = parser.parse_args()
|
110 | |
+
|
111 | |
+
|
112 | |
+from fastaq import tasks
|
113 | |
+
|
114 | |
tasks.enumerate_names(options.infile,
|
115 | |
options.outfile,
|
116 | |
start_index=options.start_index,
|
117 | |
--- a/scripts/fastaq_expand_nucleotides
|
118 | |
+++ b/scripts/fastaq_expand_nucleotides
|
119 | |
@@ -1,7 +1,6 @@
|
120 | |
#!/usr/bin/env python3
|
121 | |
|
122 | |
import argparse
|
123 | |
-from fastaq import tasks
|
124 | |
|
125 | |
parser = argparse.ArgumentParser(
|
126 | |
description = 'Makes all combinations of sequences in input file by using all possibilities of redundant bases. e.g. ART could be AAT or AGT. Assumes input is nucleotides, not amino acids',
|
127 | |
@@ -9,6 +8,10 @@
|
128 | |
parser.add_argument('infile', help='Name of input file. Can be any of FASTA, FASTQ, GFF3, EMBL, GBK, Phylip')
|
129 | |
parser.add_argument('outfile', help='Name of output file')
|
130 | |
options = parser.parse_args()
|
131 | |
+
|
132 | |
+
|
133 | |
+from fastaq import tasks
|
134 | |
+
|
135 | |
tasks.expand_nucleotides(
|
136 | |
options.infile,
|
137 | |
options.outfile,
|
138 | |
--- a/scripts/fastaq_extend_gaps
|
139 | |
+++ b/scripts/fastaq_extend_gaps
|
140 | |
@@ -1,7 +1,6 @@
|
141 | |
#!/usr/bin/env python3
|
142 | |
|
143 | |
import argparse
|
144 | |
-from fastaq import tasks
|
145 | |
|
146 | |
parser = argparse.ArgumentParser(
|
147 | |
description = 'Extends the length of all gaps (and trims the start/end of sequences) in a fasta/q file. Does this by replacing a set number of bases either side of each gap with Ns. Any sequence that ends up as all Ns is lost',
|
148 | |
@@ -10,4 +9,8 @@
|
149 | |
parser.add_argument('infile', help='Name of input fasta/q file')
|
150 | |
parser.add_argument('outfile', help='Name of output fasta/q file')
|
151 | |
options = parser.parse_args()
|
152 | |
+
|
153 | |
+
|
154 | |
+from fastaq import tasks
|
155 | |
+
|
156 | |
tasks.extend_gaps(options.infile, options.outfile, options.trim_number)
|
157 | |
--- a/scripts/fastaq_fasta_to_fastq
|
158 | |
+++ b/scripts/fastaq_fasta_to_fastq
|
159 | |
@@ -1,7 +1,6 @@
|
160 | |
#!/usr/bin/env python3
|
161 | |
|
162 | |
import argparse
|
163 | |
-from fastaq import tasks
|
164 | |
|
165 | |
parser = argparse.ArgumentParser(
|
166 | |
description = 'Given a fasta and qual file, makes a fastq file',
|
167 | |
@@ -10,4 +9,8 @@
|
168 | |
parser.add_argument('qual', help='Name of input quality scores file', metavar='qual in')
|
169 | |
parser.add_argument('outfile', help='Name of output fastq file', metavar='fastq out')
|
170 | |
options = parser.parse_args()
|
171 | |
+
|
172 | |
+
|
173 | |
+from fastaq import tasks
|
174 | |
+
|
175 | |
tasks.fasta_to_fastq(options.fasta, options.qual, options.outfile)
|
176 | |
--- a/scripts/fastaq_filter
|
177 | |
+++ b/scripts/fastaq_filter
|
178 | |
@@ -1,7 +1,6 @@
|
179 | |
#!/usr/bin/env python3
|
180 | |
|
181 | |
import argparse
|
182 | |
-from fastaq import tasks
|
183 | |
|
184 | |
parser = argparse.ArgumentParser(
|
185 | |
description = 'Filters a fasta/q file by sequence length and/or by name matching a regular expression',
|
186 | |
@@ -14,6 +13,10 @@
|
187 | |
parser.add_argument('infile', help='Name of fasta/q file to be filtered')
|
188 | |
parser.add_argument('outfile', help='Name of output fasta/q file')
|
189 | |
options = parser.parse_args()
|
190 | |
+
|
191 | |
+
|
192 | |
+from fastaq import tasks
|
193 | |
+
|
194 | |
tasks.filter(options.infile,
|
195 | |
options.outfile,
|
196 | |
minlength=options.min_length,
|
197 | |
--- a/scripts/fastaq_get_ids
|
198 | |
+++ b/scripts/fastaq_get_ids
|
199 | |
@@ -1,7 +1,6 @@
|
200 | |
#!/usr/bin/env python3
|
201 | |
|
202 | |
import argparse
|
203 | |
-from fastaq import tasks
|
204 | |
|
205 | |
parser = argparse.ArgumentParser(
|
206 | |
description = 'Gets IDs from each sequence in a fasta or fastq file',
|
207 | |
@@ -9,4 +8,8 @@
|
208 | |
parser.add_argument('infile', help='Name of input fasta/q file')
|
209 | |
parser.add_argument('outfile', help='Name of output file')
|
210 | |
options = parser.parse_args()
|
211 | |
+
|
212 | |
+
|
213 | |
+from fastaq import tasks
|
214 | |
+
|
215 | |
tasks.get_ids(options.infile, options.outfile)
|
216 | |
--- a/scripts/fastaq_get_seq_flanking_gaps
|
217 | |
+++ b/scripts/fastaq_get_seq_flanking_gaps
|
218 | |
@@ -1,7 +1,6 @@
|
219 | |
#!/usr/bin/env python3
|
220 | |
|
221 | |
import argparse
|
222 | |
-from fastaq import tasks
|
223 | |
|
224 | |
parser = argparse.ArgumentParser(
|
225 | |
description = 'Gets the sequences either side of gaps in a fasta/q file',
|
226 | |
@@ -11,4 +10,8 @@
|
227 | |
parser.add_argument('infile', help='Name of input fasta/q file')
|
228 | |
parser.add_argument('outfile', help='Name of output fasta/q file')
|
229 | |
options = parser.parse_args()
|
230 | |
+
|
231 | |
+
|
232 | |
+from fastaq import tasks
|
233 | |
+
|
234 | |
tasks.get_seqs_flanking_gaps(options.infile, options.outfile, options.left, options.right)
|
235 | |
--- a/scripts/fastaq_insert_or_delete_bases
|
236 | |
+++ b/scripts/fastaq_insert_or_delete_bases
|
237 | |
@@ -1,9 +1,6 @@
|
238 | |
#!/usr/bin/env python3
|
239 | |
|
240 | |
import argparse
|
241 | |
-import sys
|
242 | |
-import random
|
243 | |
-from fastaq import sequences, utils, intervals
|
244 | |
|
245 | |
parser = argparse.ArgumentParser(
|
246 | |
description = 'Deletes or inserts bases at given position(s) from a fasta/q file',
|
247 | |
@@ -16,6 +13,11 @@
|
248 | |
parser.add_argument('--insert_range', help='Inserts random bases starting after position P in each sequence of the input file. Inserts start + (n-1)*step bases into sequence n.', metavar='P,start,step')
|
249 | |
options = parser.parse_args()
|
250 | |
|
251 | |
+
|
252 | |
+import sys
|
253 | |
+import random
|
254 | |
+from fastaq import sequences, utils, intervals
|
255 | |
+
|
256 | |
test_ops = [int(x is not None) for x in [options.delete, options.insert, options.delete_range, options.insert_range]]
|
257 | |
|
258 | |
if sum(test_ops) != 1:
|
259 | |
--- a/scripts/fastaq_interleave
|
260 | |
+++ b/scripts/fastaq_interleave
|
261 | |
@@ -1,7 +1,6 @@
|
262 | |
#!/usr/bin/env python3
|
263 | |
|
264 | |
import argparse
|
265 | |
-from fastaq import tasks
|
266 | |
|
267 | |
parser = argparse.ArgumentParser(
|
268 | |
description = 'Interleaves two fasta/q files, so that reads are written alternately first/second in output file',
|
269 | |
@@ -10,4 +9,8 @@
|
270 | |
parser.add_argument('infile_2', help='Name of second input fasta/q file')
|
271 | |
parser.add_argument('outfile', help='Name of output fasta/q file of interleaved reads')
|
272 | |
options = parser.parse_args()
|
273 | |
+
|
274 | |
+
|
275 | |
+from fastaq import tasks
|
276 | |
+
|
277 | |
tasks.interleave(options.infile_1, options.infile_2, options.outfile)
|
278 | |
--- a/scripts/fastaq_long_read_simulate
|
279 | |
+++ b/scripts/fastaq_long_read_simulate
|
280 | |
@@ -1,7 +1,6 @@
|
281 | |
#!/usr/bin/env python3
|
282 | |
|
283 | |
import argparse
|
284 | |
-from fastaq import tasks
|
285 | |
|
286 | |
parser = argparse.ArgumentParser(
|
287 | |
description = 'Simulates long reads from a fasta/q file. Can optionally make insertions into the reads, like pacbio does. If insertions made, coverage calculation is done before the insertions (so total read length may appear longer then expected).',
|
288 | |
@@ -16,7 +15,6 @@
|
289 | |
parser.add_argument('--fixed_read_length', type=int, help='Length of each read. Only applies if method is tile or uniform. [%(default)s]', default=20000, metavar='INT')
|
290 | |
parser.add_argument('--coverage', type=float, help='Read coverage. Only applies if method is gamma or uniform. [%(default)s]', default=2, metavar='FLOAT')
|
291 | |
|
292 | |
-
|
293 | |
tiling_group = parser.add_argument_group('tiling options')
|
294 | |
tiling_group.add_argument('--tile_step', type=int, help='Distance between start of each read [%(default)s]', default=10000, metavar='INT')
|
295 | |
|
296 | |
@@ -29,8 +27,11 @@
|
297 | |
ins_group.add_argument('--ins_skip', type=int, help='Insert a random base every --skip bases plus or minus --ins_window. If this option is used, must also use --ins_window.', metavar='INT')
|
298 | |
ins_group.add_argument('--ins_window', type=int, help='See --ins_skip. If this option is used, must also use --ins_skip.', metavar='INT')
|
299 | |
|
300 | |
-
|
301 | |
options = parser.parse_args()
|
302 | |
+
|
303 | |
+
|
304 | |
+from fastaq import tasks
|
305 | |
+
|
306 | |
tasks.make_long_reads(
|
307 | |
options.infile,
|
308 | |
options.outfile,
|
309 | |
--- a/scripts/fastaq_make_random_contigs
|
310 | |
+++ b/scripts/fastaq_make_random_contigs
|
311 | |
@@ -1,7 +1,6 @@
|
312 | |
#!/usr/bin/env python3
|
313 | |
|
314 | |
import argparse
|
315 | |
-from fastaq import tasks
|
316 | |
|
317 | |
parser = argparse.ArgumentParser(
|
318 | |
description = 'Makes a multi-fasta file of random sequences, all of the same length. Each base has equal chance of being A,C,G or T',
|
319 | |
@@ -14,6 +13,10 @@
|
320 | |
parser.add_argument('length', type=int, help='Length of each contig')
|
321 | |
parser.add_argument('outfile', help='Name of output file')
|
322 | |
options = parser.parse_args()
|
323 | |
+
|
324 | |
+
|
325 | |
+from fastaq import tasks
|
326 | |
+
|
327 | |
tasks.make_random_contigs(
|
328 | |
options.contigs,
|
329 | |
options.length,
|
330 | |
--- a/scripts/fastaq_merge
|
331 | |
+++ b/scripts/fastaq_merge
|
332 | |
@@ -1,7 +1,6 @@
|
333 | |
#!/usr/bin/env python3
|
334 | |
|
335 | |
import argparse
|
336 | |
-from fastaq import tasks
|
337 | |
|
338 | |
parser = argparse.ArgumentParser(
|
339 | |
description = 'Converts multi fasta/q file to single sequence file, preserving original order of sequences',
|
340 | |
@@ -10,6 +9,10 @@
|
341 | |
parser.add_argument('outfile', help='Name of output file')
|
342 | |
parser.add_argument('-n', '--name', help='Name of sequence in output file [%(default)s]', default='union')
|
343 | |
options = parser.parse_args()
|
344 | |
+
|
345 | |
+
|
346 | |
+from fastaq import tasks
|
347 | |
+
|
348 | |
tasks.merge_to_one_seq(
|
349 | |
options.infile,
|
350 | |
options.outfile,
|
351 | |
--- a/scripts/fastaq_replace_bases
|
352 | |
+++ b/scripts/fastaq_replace_bases
|
353 | |
@@ -1,7 +1,6 @@
|
354 | |
#!/usr/bin/env python3
|
355 | |
|
356 | |
import argparse
|
357 | |
-from fastaq import tasks
|
358 | |
|
359 | |
parser = argparse.ArgumentParser(
|
360 | |
description = 'Replaces all occurences of one letter with another in a fasta/q file',
|
361 | |
@@ -11,4 +10,8 @@
|
362 | |
parser.add_argument('old', help='Base to be replaced')
|
363 | |
parser.add_argument('new', help='Replace with this letter')
|
364 | |
options = parser.parse_args()
|
365 | |
+
|
366 | |
+
|
367 | |
+from fastaq import tasks
|
368 | |
+
|
369 | |
tasks.replace_bases(options.infile, options.outfile, options.old, options.new)
|
370 | |
--- a/scripts/fastaq_reverse_complement
|
371 | |
+++ b/scripts/fastaq_reverse_complement
|
372 | |
@@ -1,7 +1,6 @@
|
373 | |
#!/usr/bin/env python3
|
374 | |
|
375 | |
import argparse
|
376 | |
-from fastaq import tasks
|
377 | |
|
378 | |
parser = argparse.ArgumentParser(
|
379 | |
description = 'Reverse complements all sequences in a fasta/q file',
|
380 | |
@@ -9,4 +8,8 @@
|
381 | |
parser.add_argument('infile', help='Name of input fasta/q file')
|
382 | |
parser.add_argument('outfile', help='Name of output fasta/q file')
|
383 | |
options = parser.parse_args()
|
384 | |
+
|
385 | |
+
|
386 | |
+from fastaq import tasks
|
387 | |
+
|
388 | |
tasks.reverse_complement(options.infile, options.outfile)
|
389 | |
--- a/scripts/fastaq_scaffolds_to_contigs
|
390 | |
+++ b/scripts/fastaq_scaffolds_to_contigs
|
391 | |
@@ -1,7 +1,6 @@
|
392 | |
#!/usr/bin/env python3
|
393 | |
|
394 | |
import argparse
|
395 | |
-from fastaq import tasks
|
396 | |
|
397 | |
parser = argparse.ArgumentParser(
|
398 | |
description = 'Creates a file of contigs from a file of scaffolds - i.e. breaks at every gap in the input',
|
399 | |
@@ -10,4 +9,8 @@
|
400 | |
parser.add_argument('infile', help='Name of input fasta/q file')
|
401 | |
parser.add_argument('outfile', help='Name of output contigs file')
|
402 | |
options = parser.parse_args()
|
403 | |
+
|
404 | |
+
|
405 | |
+from fastaq import tasks
|
406 | |
+
|
407 | |
tasks.scaffolds_to_contigs(options.infile, options.outfile, number_contigs=options.number_contigs)
|
408 | |
--- a/scripts/fastaq_search_for_seq
|
409 | |
+++ b/scripts/fastaq_search_for_seq
|
410 | |
@@ -1,7 +1,6 @@
|
411 | |
#!/usr/bin/env python3
|
412 | |
|
413 | |
import argparse
|
414 | |
-from fastaq import tasks
|
415 | |
|
416 | |
parser = argparse.ArgumentParser(
|
417 | |
description = 'Searches for an exact match on a given string and its reverese complement, in every sequences of a fasta/q file. Case insensitive. Guaranteed to find all hits',
|
418 | |
@@ -10,4 +9,8 @@
|
419 | |
parser.add_argument('outfile', help='Name of outputfile. Tab-delimited output: sequence name, position, strand')
|
420 | |
parser.add_argument('search_string', help='String to search for in the sequences')
|
421 | |
options = parser.parse_args()
|
422 | |
+
|
423 | |
+
|
424 | |
+from fastaq import tasks
|
425 | |
+
|
426 | |
tasks.search_for_seq(options.infile, options.outfile, options.search_string)
|
427 | |
--- a/scripts/fastaq_sequence_trim
|
428 | |
+++ b/scripts/fastaq_sequence_trim
|
429 | |
@@ -1,7 +1,6 @@
|
430 | |
#!/usr/bin/env python3
|
431 | |
|
432 | |
import argparse
|
433 | |
-from fastaq import tasks
|
434 | |
|
435 | |
parser = argparse.ArgumentParser(
|
436 | |
description = 'Trims sequences off the start of all sequences in a pair of fasta/q files, whenever there is a perfect match. Only keeps a read pair if both reads of the pair are at least a minimum length after any trimming',
|
437 | |
@@ -13,6 +12,10 @@
|
438 | |
parser.add_argument('outfile_2', help='Name of output reverse fasta/q file', metavar='out_2')
|
439 | |
parser.add_argument('trim_seqs', help='Name of fasta/q file of sequences to search for at the start of each input sequence', metavar='trim_seqs')
|
440 | |
options = parser.parse_args()
|
441 | |
+
|
442 | |
+
|
443 | |
+from fastaq import tasks
|
444 | |
+
|
445 | |
tasks.sequence_trim(
|
446 | |
options.infile_1,
|
447 | |
options.infile_2,
|
448 | |
--- a/scripts/fastaq_split_by_base_count
|
449 | |
+++ b/scripts/fastaq_split_by_base_count
|
450 | |
@@ -1,7 +1,6 @@
|
451 | |
#!/usr/bin/env python3
|
452 | |
|
453 | |
import argparse
|
454 | |
-from fastaq import tasks
|
455 | |
|
456 | |
parser = argparse.ArgumentParser(
|
457 | |
description = 'Splits a multi fasta/q file into separate files. Does not split sequences. Puts up to max_bases into each split file. The exception is that any sequence longer than max_bases is put into its own file.',
|
458 | |
@@ -10,6 +9,9 @@
|
459 | |
parser.add_argument('outprefix', help='Name of output fasta/q file')
|
460 | |
parser.add_argument('max_bases', type=int, help='Max bases in each output split file', metavar='max_bases')
|
461 | |
parser.add_argument('--max_seqs', type=int, help='Max number of sequences in each output split file [no limit]', metavar='INT')
|
462 | |
-
|
463 | |
options = parser.parse_args()
|
464 | |
+
|
465 | |
+
|
466 | |
+from fastaq import tasks
|
467 | |
+
|
468 | |
tasks.split_by_base_count(options.infile, options.outprefix, options.max_bases, options.max_seqs)
|
469 | |
--- a/scripts/fastaq_strip_illumina_suffix
|
470 | |
+++ b/scripts/fastaq_strip_illumina_suffix
|
471 | |
@@ -1,7 +1,6 @@
|
472 | |
#!/usr/bin/env python3
|
473 | |
|
474 | |
import argparse
|
475 | |
-from fastaq import tasks
|
476 | |
|
477 | |
parser = argparse.ArgumentParser(
|
478 | |
description = 'Strips /1 or /2 off the end of every read name in a fasta/q file',
|
479 | |
@@ -9,4 +8,8 @@
|
480 | |
parser.add_argument('infile', help='Name of input fasta/q file')
|
481 | |
parser.add_argument('outfile', help='Name of output fasta/q file')
|
482 | |
options = parser.parse_args()
|
483 | |
+
|
484 | |
+
|
485 | |
+from fastaq import tasks
|
486 | |
+
|
487 | |
tasks.strip_illumina_suffix(options.infile, options.outfile)
|
488 | |
--- a/scripts/fastaq_to_fake_qual
|
489 | |
+++ b/scripts/fastaq_to_fake_qual
|
490 | |
@@ -1,7 +1,6 @@
|
491 | |
#!/usr/bin/env python3
|
492 | |
|
493 | |
import argparse
|
494 | |
-from fastaq import tasks
|
495 | |
|
496 | |
parser = argparse.ArgumentParser(
|
497 | |
description = 'Makes fake quality scores file from a fasta/q file',
|
498 | |
@@ -10,6 +9,10 @@
|
499 | |
parser.add_argument('outfile', help='Name of output file')
|
500 | |
parser.add_argument('-q', '--qual', type=int, help='Quality score to assign to all bases [%(default)s]', default=40)
|
501 | |
options = parser.parse_args()
|
502 | |
+
|
503 | |
+
|
504 | |
+from fastaq import tasks
|
505 | |
+
|
506 | |
tasks.fastaq_to_fake_qual(
|
507 | |
options.infile,
|
508 | |
options.outfile,
|
509 | |
--- a/scripts/fastaq_to_fasta
|
510 | |
+++ b/scripts/fastaq_to_fasta
|
511 | |
@@ -1,7 +1,6 @@
|
512 | |
#!/usr/bin/env python3
|
513 | |
|
514 | |
import argparse
|
515 | |
-from fastaq import tasks
|
516 | |
|
517 | |
parser = argparse.ArgumentParser(
|
518 | |
description = 'Converts sequence file to FASTA format',
|
519 | |
@@ -11,6 +10,10 @@
|
520 | |
parser.add_argument('-l', '--line_length', type=int, help='Number of bases on each sequence line of output file [%(default)s]', default=60)
|
521 | |
parser.add_argument('-s', '--strip_after_whitespace', action='store_true', help='Remove everything after first whitesapce in every sequence name')
|
522 | |
options = parser.parse_args()
|
523 | |
+
|
524 | |
+
|
525 | |
+from fastaq import tasks
|
526 | |
+
|
527 | |
tasks.to_fasta(
|
528 | |
options.infile,
|
529 | |
options.outfile,
|
530 | |
--- a/scripts/fastaq_to_mira_xml
|
531 | |
+++ b/scripts/fastaq_to_mira_xml
|
532 | |
@@ -1,7 +1,6 @@
|
533 | |
#!/usr/bin/env python3
|
534 | |
|
535 | |
import argparse
|
536 | |
-from fastaq import tasks
|
537 | |
|
538 | |
parser = argparse.ArgumentParser(
|
539 | |
description = 'Creates an xml file from a fasta/q file of reads, for use with Mira assembler',
|
540 | |
@@ -9,4 +8,8 @@
|
541 | |
parser.add_argument('infile', help='Name of input fasta/q file')
|
542 | |
parser.add_argument('xml_out', help='Name of output xml file')
|
543 | |
options = parser.parse_args()
|
544 | |
+
|
545 | |
+
|
546 | |
+from fastaq import tasks
|
547 | |
+
|
548 | |
tasks.fastaq_to_mira_xml(options.infile, options.xml_out)
|
549 | |
--- a/scripts/fastaq_to_orfs_gff
|
550 | |
+++ b/scripts/fastaq_to_orfs_gff
|
551 | |
@@ -1,7 +1,6 @@
|
552 | |
#!/usr/bin/env python3
|
553 | |
|
554 | |
import argparse
|
555 | |
-from fastaq import tasks
|
556 | |
|
557 | |
parser = argparse.ArgumentParser(
|
558 | |
description = 'Writes a GFF file of open reading frames from a fasta/q file',
|
559 | |
@@ -10,4 +9,8 @@
|
560 | |
parser.add_argument('infile', help='Name of input fasta/q file')
|
561 | |
parser.add_argument('gff_out', help='Name of output gff file')
|
562 | |
options = parser.parse_args()
|
563 | |
+
|
564 | |
+
|
565 | |
+from fastaq import tasks
|
566 | |
+
|
567 | |
tasks.fastaq_to_orfs_gff(options.infile, options.gff_out, min_length=options.min_length)
|
568 | |
--- a/scripts/fastaq_to_perfect_reads
|
569 | |
+++ b/scripts/fastaq_to_perfect_reads
|
570 | |
@@ -1,10 +1,6 @@
|
571 | |
#!/usr/bin/env python3
|
572 | |
|
573 | |
import argparse
|
574 | |
-import random
|
575 | |
-from math import floor, ceil
|
576 | |
-from fastaq import sequences, utils
|
577 | |
-import sys
|
578 | |
|
579 | |
parser = argparse.ArgumentParser(
|
580 | |
description = 'Makes perfect paired end fastq reads from a fasta/q file, with insert sizes sampled from a normal distribution. Read orientation is innies. Output is an interleaved fastq file.',
|
581 | |
@@ -20,6 +16,12 @@
|
582 | |
parser.add_argument('--seed', type=int, help='Seed for random number generator. Default is to use python\'s default', default=None, metavar='INT')
|
583 | |
options = parser.parse_args()
|
584 | |
|
585 | |
+
|
586 | |
+import random
|
587 | |
+from math import floor, ceil
|
588 | |
+from fastaq import sequences, utils
|
589 | |
+import sys
|
590 | |
+
|
591 | |
random.seed(a=options.seed)
|
592 | |
|
593 | |
seq_reader = sequences.file_reader(options.infile)
|
594 | |
--- a/scripts/fastaq_to_quasr_primers_file
|
595 | |
+++ b/scripts/fastaq_to_quasr_primers_file
|
596 | |
@@ -1,7 +1,6 @@
|
597 | |
#!/usr/bin/env python3
|
598 | |
|
599 | |
import argparse
|
600 | |
-from fastaq import tasks
|
601 | |
|
602 | |
parser = argparse.ArgumentParser(
|
603 | |
description = 'Converts a fasta/q file to QUASR primers format: just the sequence on each line and its reverse complement, tab separated',
|
604 | |
@@ -9,4 +8,8 @@
|
605 | |
parser.add_argument('infile', help='Name of input fasta/q file')
|
606 | |
parser.add_argument('outfile', help='Name of output file')
|
607 | |
options = parser.parse_args()
|
608 | |
+
|
609 | |
+
|
610 | |
+from fastaq import tasks
|
611 | |
+
|
612 | |
tasks.to_quasr_primers(options.infile, options.outfile)
|
613 | |
--- a/scripts/fastaq_to_random_subset
|
614 | |
+++ b/scripts/fastaq_to_random_subset
|
615 | |
@@ -1,9 +1,6 @@
|
616 | |
#!/usr/bin/env python3
|
617 | |
|
618 | |
-import sys
|
619 | |
import argparse
|
620 | |
-import random
|
621 | |
-from fastaq import sequences, utils
|
622 | |
|
623 | |
parser = argparse.ArgumentParser(
|
624 | |
description = 'Takes a random subset of reads from a fasta/q file and optionally the corresponding read ' +
|
625 | |
@@ -15,6 +12,11 @@
|
626 | |
parser.add_argument('probability', type=int, help='Probability of keeping any given read (pair) in [0,100]', metavar='INT')
|
627 | |
options = parser.parse_args()
|
628 | |
|
629 | |
+
|
630 | |
+import sys
|
631 | |
+import random
|
632 | |
+from fastaq import sequences, utils
|
633 | |
+
|
634 | |
seq_reader = sequences.file_reader(options.infile)
|
635 | |
fout = utils.open_file_write(options.outfile)
|
636 | |
|
637 | |
--- a/scripts/fastaq_to_tiling_bam
|
638 | |
+++ b/scripts/fastaq_to_tiling_bam
|
639 | |
@@ -1,9 +1,6 @@
|
640 | |
#!/usr/bin/env python3
|
641 | |
|
642 | |
import argparse
|
643 | |
-import sys
|
644 | |
-import os
|
645 | |
-from fastaq import sequences, utils
|
646 | |
|
647 | |
parser = argparse.ArgumentParser(
|
648 | |
description = 'Takes a fasta/q file. Makes a BAM file containing perfect (unpaired) reads tiling the whole genome',
|
649 | |
@@ -17,6 +14,11 @@
|
650 | |
parser.add_argument('--read_group', help='Add the given read group ID to all reads [%(default)s]' ,default='42')
|
651 | |
options = parser.parse_args()
|
652 | |
|
653 | |
+
|
654 | |
+import sys
|
655 | |
+import os
|
656 | |
+from fastaq import sequences, utils
|
657 | |
+
|
658 | |
# make a header first - we need to add the @RG line to the default header made by samtools
|
659 | |
tmp_empty_file = options.outfile + '.tmp.empty'
|
660 | |
f = utils.open_file_write(tmp_empty_file)
|
661 | |
--- a/scripts/fastaq_to_unique_by_id
|
662 | |
+++ b/scripts/fastaq_to_unique_by_id
|
663 | |
@@ -1,7 +1,6 @@
|
664 | |
#!/usr/bin/env python3
|
665 | |
|
666 | |
import argparse
|
667 | |
-from fastaq import tasks
|
668 | |
|
669 | |
parser = argparse.ArgumentParser(
|
670 | |
description = 'Removes duplicate sequences from a fasta/q file, based on their names. If the same name is found more than once, then the longest sequence is kept. Order of sequences is preserved in output',
|
671 | |
@@ -9,4 +8,8 @@
|
672 | |
parser.add_argument('infile', help='Name of input fasta/q file')
|
673 | |
parser.add_argument('outfile', help='Name of output fasta/q file')
|
674 | |
options = parser.parse_args()
|
675 | |
+
|
676 | |
+
|
677 | |
+from fastaq import tasks
|
678 | |
+
|
679 | |
tasks.to_unique_by_id(options.infile, options.outfile)
|
680 | |
--- a/scripts/fastaq_translate
|
681 | |
+++ b/scripts/fastaq_translate
|
682 | |
@@ -1,7 +1,6 @@
|
683 | |
#!/usr/bin/env python3
|
684 | |
|
685 | |
import argparse
|
686 | |
-from fastaq import tasks
|
687 | |
|
688 | |
parser = argparse.ArgumentParser(
|
689 | |
description = 'Translates all sequences in a fasta or fastq file. Output is always fasta format',
|
690 | |
@@ -10,4 +9,8 @@
|
691 | |
parser.add_argument('infile', help='Name of fasta/q file to be translated', metavar='in.fasta/q')
|
692 | |
parser.add_argument('outfile', help='Name of output fasta file', metavar='out.fasta')
|
693 | |
options = parser.parse_args()
|
694 | |
+
|
695 | |
+
|
696 | |
+from fastaq import tasks
|
697 | |
+
|
698 | |
tasks.translate(options.infile, options.outfile, frame=options.frame)
|
699 | |
--- a/scripts/fastaq_trim_Ns_at_end
|
700 | |
+++ b/scripts/fastaq_trim_Ns_at_end
|
701 | |
@@ -1,7 +1,6 @@
|
702 | |
#!/usr/bin/env python3
|
703 | |
|
704 | |
import argparse
|
705 | |
-from fastaq import tasks
|
706 | |
|
707 | |
parser = argparse.ArgumentParser(
|
708 | |
description = 'Trims any Ns off each sequence in a fasta/q file. Does nothing to gaps in the middle, just trims the ends',
|
709 | |
@@ -9,4 +8,8 @@
|
710 | |
parser.add_argument('infile', help='Name of input fasta/q file')
|
711 | |
parser.add_argument('outfile', help='Name of output fasta/q file')
|
712 | |
options = parser.parse_args()
|
713 | |
+
|
714 | |
+
|
715 | |
+from fastaq import tasks
|
716 | |
+
|
717 | |
tasks.trim_Ns_at_end(options.infile, options.outfile)
|
718 | |
--- a/scripts/fastaq_trim_ends
|
719 | |
+++ b/scripts/fastaq_trim_ends
|
720 | |
@@ -1,7 +1,6 @@
|
721 | |
#!/usr/bin/env python3
|
722 | |
|
723 | |
import argparse
|
724 | |
-from fastaq import tasks
|
725 | |
|
726 | |
parser = argparse.ArgumentParser(
|
727 | |
description = 'Trims set number of bases off each sequence in a fasta/q file',
|
728 | |
@@ -11,4 +10,8 @@
|
729 | |
parser.add_argument('end_trim', type=int, help='Number of bases to trim off end')
|
730 | |
parser.add_argument('outfile', help='Name of output fasta/q file')
|
731 | |
options = parser.parse_args()
|
732 | |
+
|
733 | |
+
|
734 | |
+from fastaq import tasks
|
735 | |
+
|
736 | |
tasks.trim(options.infile, options.outfile, options.start_trim, options.end_trim)
|