diff --git a/debian/changelog b/debian/changelog new file mode 100644 index 0000000..89a691d --- /dev/null +++ b/debian/changelog @@ -0,0 +1,11 @@ +fastaq (1.6.0-1) experimental; urgency=medium + + * New upstream release + + -- Jorge Soares Tue, 18 Nov 2014 16:34:01 +0000 + +fastaq (1.5.0-1) unstable; urgency=medium + + * Initial release (Closes: #766321) + + -- Jorge Soares Thu, 23 Oct 2014 20:23:54 +0200 diff --git a/debian/compat b/debian/compat new file mode 100644 index 0000000..ec63514 --- /dev/null +++ b/debian/compat @@ -0,0 +1 @@ +9 diff --git a/debian/control b/debian/control new file mode 100644 index 0000000..e5d6731 --- /dev/null +++ b/debian/control @@ -0,0 +1,169 @@ +Source: fastaq +Maintainer: Debian Med Packaging Team +Uploaders: Andreas Tille , + Jorge Soares +Section: science +Priority: optional +Build-Depends: debhelper (>= 9), + python3, + python3-setuptools, + python3-numpy, + python3-nose, + samtools, + help2man +Standards-Version: 3.9.6 +Vcs-Browser: https://anonscm.debian.org/cgit/debian-med/fastaq.git +Homepage: https://github.com/sanger-pathogens/Fastaq + +Package: fastaq +Architecture: all +Depends: ${python3:Depends}, + ${misc:Depends} +Description: FASTA and FASTQ file manipulation tools + A collection of scripts that perform useful and common + fasta/q manipulation tasks. + . + All scripts automatically detect whether the input is + a FASTA or FASTQ file. + . + Input and output files can be gzipped. + . + fastaq_capillary_to_pairs - + Given a fasta/q file of capillary reads, + makes an interleaved file of read pairs + . + fastaq_chunker - + Splits a multi fasta/q file into separate files. + Splits sequences into chunks of a fixed size. + . + fastaq_count_sequences - + Counts the number of sequences in a fasta/q file + . + fastaq_deinterleave - + Deinterleaves fasta/q file, so that reads are written + alternately between two output files + . + fastaq_enumerate_names - + Renames sequences in a file, calling them 1,2,3... + . + fastaq_expand_nucleotides - + Makes all combinations of sequences in input file + by using all possibilities of redundant bases. + e.g. ART could be AAT or AGT. + . + fastaq_extend_gaps - + Extends the length of all gaps (and trims the start/end + of sequences) in a fasta/q file. + . + fastaq_fasta_to_fastq - + Given a fasta and qual file, makes a fastq file. + . + fastaq_filter - + Filters a fasta/q file by sequence length and/or + by name matching a regular expression. + . + fastaq_get_ids - + Gets IDs from each sequence in a fasta or fastq file. + . + fastaq_get_seq_flanking_gaps - + Gets the sequences either side of gaps in a fasta/q file. + . + fastaq_insert_or_delete_bases - + Deletes or inserts bases at given position(s) + from a fasta/q file. + . + fastaq_interleave - + Interleaves two fasta/q files, so that reads are written + alternately first/second in output file. + . + fastaq_long_read_simulate - + Simulates long reads from a fasta/q file. Can optionally + make insertions into the reads, like pacbio does. + . + fastaq_make_random_contigs - + Makes a multi-fasta file of random sequences, + all of the same length. Each base has equal chance of + being A,C,G or T + . + fastaq_merge - + Converts multi fasta/q file to single sequence file, + preserving original order of sequences. + . + fastaq_replace_bases - + Replaces all occurences of one letter with another in + a fasta/q file. + . + fastaq_reverse_complement - + Reverse complements all sequences in a fasta/q file + . + fastaq_scaffolds_to_contigs - + Creates a file of contigs from a file of scaffolds - i.e. + breaks at every gap in the input. + . + fastaq_search_for_seq - + Searches for an exact match on a given string and its + reverese complement, in every sequences of a fasta/q file. + Case insensitive. Guaranteed to find all hits. + . + fastaq_sequence_trim - + Trims sequences off the start of all sequences in a pair + of fasta/q files, whenever there is a perfect match. + Only keeps a read pair if both reads of the pair are at + least a minimum length after any trimming. + . + fastaq_split_by_base_count - + Splits a multi fasta/q file into separate files. + Does not split sequences. Puts up to max_bases + into each split file. The exception is that any + sequence longer than max_bases is put into its own file. + . + fastaq_strip_illumina_suffix - + Strips /1 or /2 off the end of every read name + in a fasta/q file. + . + fastaq_to_fake_qual - + Makes fake quality scores file from a fasta/q file. + . + fastaq_to_fasta - + Converts sequence file to FASTA format. + . + fastaq_to_mira_xml - + Creates an xml file from a fasta/q file of reads, + for use with Mira assembler. + . + fastaq_to_orfs_gff - + Writes a GFF file of open reading frames from a fasta/q file + . + fastaq_to_perfect_reads - + Makes perfect paired end fastq reads from a fasta/q file, + with insert sizes sampled from a normal distribution. + Read orientation is innies. Output is an interleaved fastq file. + . + fastaq_to_random_subset - + Takes a random subset of reads from a fasta/q file and optionally + the corresponding read from a mates file. + Ouptut is interleaved if mates file given. + . + fastaq_to_tiling_bam - + Takes a fasta/q file. Makes a BAM file containing perfect + (unpaired) reads tiling the whole genome. + . + fastaq_to_unique_by_id - + Removes duplicate sequences from a fasta/q file, + based on their names. If the same name is found + more than once, then the longest sequence is kept. + Order of sequences is preserved in output. + . + fastaq_translate - + Translates all sequences in a fasta or fastq file. + Output is always fasta format + . + fastaq_trim_ends - + Trims set number of bases off each sequence in a fasta/q file + . + fastaq_trim_Ns_at_end - + Trims any Ns off each sequence in a fasta/q file. + Does nothing to gaps in the middle, just trims the ends + . + A developer API is also provided by this package. + There are plenty of examples in tasks.py diff --git a/debian/copyright b/debian/copyright new file mode 100644 index 0000000..ca8eea8 --- /dev/null +++ b/debian/copyright @@ -0,0 +1,22 @@ +Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ +Upstream-Name: Fastaq +Source: https://github.com/sanger-pathogens/Fastaq + +Files: * +Copyright: © 2012-2013 Martin Hunt +License: GPL-3 + This package is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + . + This package is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + . + You should have received a copy of the GNU General Public License + along with this program. If not, see + . + On Debian systems, the complete text of the GNU General + Public License version 3 can be found in "/usr/share/common-licenses/GPL-3". diff --git a/debian/fastaq.manpages b/debian/fastaq.manpages new file mode 100644 index 0000000..d2c65e3 --- /dev/null +++ b/debian/fastaq.manpages @@ -0,0 +1 @@ +debian/man/* \ No newline at end of file diff --git a/debian/patches/delay-import-statements-for-manpage-creation.patch b/debian/patches/delay-import-statements-for-manpage-creation.patch new file mode 100644 index 0000000..48dac81 --- /dev/null +++ b/debian/patches/delay-import-statements-for-manpage-creation.patch @@ -0,0 +1,774 @@ +Description: Delay import of Fastaq modules by the python executables + Man pages for this package are being automatically created with through the + help2man wrapper called usage_to_man. help2man calls the python executables + with the -h option and converts the usage into a man page. + . + The first step done by all the executables is the import of the modules deployed + by this package. Since the package is not installed in the system at build time, + the man pages would never be properly created. + . + This patch solves this problem by importing the modules in this package after + the argument parsing code. + . + Upstream prefered to keep the code as it is for styling reasons, which is + perfectly reasonable + . + fastaq (1.5.0-1) UNRELEASED; urgency=low + . + * Initial release (Closes: #766321) +Author: Jorge Soares +Index: fastaq/scripts/fastaq_capillary_to_pairs +=================================================================== +--- fastaq.orig/scripts/fastaq_capillary_to_pairs ++++ fastaq/scripts/fastaq_capillary_to_pairs +@@ -1,7 +1,6 @@ + #!/usr/bin/env python3 + + import argparse +-from fastaq import tasks + + parser = argparse.ArgumentParser( + description = 'Given a fasta/q file of capillary reads, makes an interleaved file of read pairs (where more than read from same ligation, takes the longest read) and a file of unpaired reads. Replaces the .p1k/.q1k part of read names to denote fwd/rev reads with /1 and /2', +@@ -9,4 +8,8 @@ parser = argparse.ArgumentParser( + parser.add_argument('infile', help='Name of input fasta/q file') + parser.add_argument('outprefix', help='Prefix of output files', metavar='outfiles prefix') + options = parser.parse_args() ++ ++ ++from fastaq import tasks ++ + tasks.capillary_to_pairs(options.infile, options.outprefix) +Index: fastaq/scripts/fastaq_chunker +=================================================================== +--- fastaq.orig/scripts/fastaq_chunker ++++ fastaq/scripts/fastaq_chunker +@@ -1,7 +1,6 @@ + #!/usr/bin/env python3 + + import argparse +-from fastaq import tasks + + parser = argparse.ArgumentParser( + description = 'Splits a multi fasta/q file into separate files. Splits sequences into chunks of a fixed size. Aims for chunk_size chunks in each file, but allows a little extra, so chunk can be up to (chunk_size + tolerance), to prevent tiny chunks made from the ends of sequences', +@@ -12,6 +11,10 @@ parser.add_argument('chunk_size', type=i + parser.add_argument('tolerance', type=int, help='Tolerance allowed in chunk size') + parser.add_argument('--skip_all_Ns', action='store_true', help='Do not output any sequence that consists of all Ns') + options = parser.parse_args() ++ ++ ++from fastaq import tasks ++ + tasks.split_by_fixed_size( + options.infile, + options.outprefix, +Index: fastaq/scripts/fastaq_count_sequences +=================================================================== +--- fastaq.orig/scripts/fastaq_count_sequences ++++ fastaq/scripts/fastaq_count_sequences +@@ -1,11 +1,14 @@ + #!/usr/bin/env python3 + + import argparse +-from fastaq import tasks + + parser = argparse.ArgumentParser( + description = 'Counts the number of sequences in a fasta/q file', + usage = '%(prog)s ') + parser.add_argument('infile', help='Name of input fasta/q file') + options = parser.parse_args() ++ ++ ++from fastaq import tasks ++ + print(tasks.count_sequences(options.infile)) +Index: fastaq/scripts/fastaq_deinterleave +=================================================================== +--- fastaq.orig/scripts/fastaq_deinterleave ++++ fastaq/scripts/fastaq_deinterleave +@@ -1,7 +1,6 @@ + #!/usr/bin/env python3 + + import argparse +-from fastaq import tasks + + parser = argparse.ArgumentParser( + description = 'Deinterleaves fasta/q file, so that reads are written alternately between two output files', +@@ -11,4 +10,8 @@ parser.add_argument('infile', help='Name + parser.add_argument('out_fwd', help='Name of output fasta/q file of forwards reads') + parser.add_argument('out_rev', help='Name of output fasta/q file of reverse reads') + options = parser.parse_args() ++ ++ ++from fastaq import tasks ++ + tasks.deinterleave(options.infile, options.out_fwd, options.out_rev, fasta_out=options.fasta_out) +Index: fastaq/scripts/fastaq_enumerate_names +=================================================================== +--- fastaq.orig/scripts/fastaq_enumerate_names ++++ fastaq/scripts/fastaq_enumerate_names +@@ -1,7 +1,6 @@ + #!/usr/bin/env python3 + + import argparse +-from fastaq import tasks + + parser = argparse.ArgumentParser( + description = 'Renames sequences in a file, calling them 1,2,3... etc', +@@ -12,6 +11,10 @@ parser.add_argument('--keep_suffix', act + parser.add_argument('infile', help='Name of fasta/q file to be read') + parser.add_argument('outfile', help='Name of output fasta/q file') + options = parser.parse_args() ++ ++ ++from fastaq import tasks ++ + tasks.enumerate_names(options.infile, + options.outfile, + start_index=options.start_index, +Index: fastaq/scripts/fastaq_expand_nucleotides +=================================================================== +--- fastaq.orig/scripts/fastaq_expand_nucleotides ++++ fastaq/scripts/fastaq_expand_nucleotides +@@ -1,7 +1,6 @@ + #!/usr/bin/env python3 + + import argparse +-from fastaq import tasks + + parser = argparse.ArgumentParser( + description = 'Makes all combinations of sequences in input file by using all possibilities of redundant bases. e.g. ART could be AAT or AGT. Assumes input is nucleotides, not amino acids', +@@ -9,6 +8,10 @@ parser = argparse.ArgumentParser( + parser.add_argument('infile', help='Name of input file. Can be any of FASTA, FASTQ, GFF3, EMBL, GBK, Phylip') + parser.add_argument('outfile', help='Name of output file') + options = parser.parse_args() ++ ++ ++from fastaq import tasks ++ + tasks.expand_nucleotides( + options.infile, + options.outfile, +Index: fastaq/scripts/fastaq_extend_gaps +=================================================================== +--- fastaq.orig/scripts/fastaq_extend_gaps ++++ fastaq/scripts/fastaq_extend_gaps +@@ -1,7 +1,6 @@ + #!/usr/bin/env python3 + + import argparse +-from fastaq import tasks + + parser = argparse.ArgumentParser( + description = 'Extends the length of all gaps (and trims the start/end of sequences) in a fasta/q file. Does this by replacing a set number of bases either side of each gap with Ns. Any sequence that ends up as all Ns is lost', +@@ -10,4 +9,8 @@ parser.add_argument('--trim_number', typ + parser.add_argument('infile', help='Name of input fasta/q file') + parser.add_argument('outfile', help='Name of output fasta/q file') + options = parser.parse_args() ++ ++ ++from fastaq import tasks ++ + tasks.extend_gaps(options.infile, options.outfile, options.trim_number) +Index: fastaq/scripts/fastaq_fasta_to_fastq +=================================================================== +--- fastaq.orig/scripts/fastaq_fasta_to_fastq ++++ fastaq/scripts/fastaq_fasta_to_fastq +@@ -1,7 +1,6 @@ + #!/usr/bin/env python3 + + import argparse +-from fastaq import tasks + + parser = argparse.ArgumentParser( + description = 'Given a fasta and qual file, makes a fastq file', +@@ -10,4 +9,8 @@ parser.add_argument('fasta', help='Name + parser.add_argument('qual', help='Name of input quality scores file', metavar='qual in') + parser.add_argument('outfile', help='Name of output fastq file', metavar='fastq out') + options = parser.parse_args() ++ ++ ++from fastaq import tasks ++ + tasks.fasta_to_fastq(options.fasta, options.qual, options.outfile) +Index: fastaq/scripts/fastaq_filter +=================================================================== +--- fastaq.orig/scripts/fastaq_filter ++++ fastaq/scripts/fastaq_filter +@@ -1,7 +1,6 @@ + #!/usr/bin/env python3 + + import argparse +-from fastaq import tasks + + parser = argparse.ArgumentParser( + description = 'Filters a fasta/q file by sequence length and/or by name matching a regular expression', +@@ -14,6 +13,10 @@ parser.add_argument('-v', '--invert', ac + parser.add_argument('infile', help='Name of fasta/q file to be filtered') + parser.add_argument('outfile', help='Name of output fasta/q file') + options = parser.parse_args() ++ ++ ++from fastaq import tasks ++ + tasks.filter(options.infile, + options.outfile, + minlength=options.min_length, +Index: fastaq/scripts/fastaq_get_ids +=================================================================== +--- fastaq.orig/scripts/fastaq_get_ids ++++ fastaq/scripts/fastaq_get_ids +@@ -1,7 +1,6 @@ + #!/usr/bin/env python3 + + import argparse +-from fastaq import tasks + + parser = argparse.ArgumentParser( + description = 'Gets IDs from each sequence in a fasta or fastq file', +@@ -9,4 +8,8 @@ parser = argparse.ArgumentParser( + parser.add_argument('infile', help='Name of input fasta/q file') + parser.add_argument('outfile', help='Name of output file') + options = parser.parse_args() ++ ++ ++from fastaq import tasks ++ + tasks.get_ids(options.infile, options.outfile) +Index: fastaq/scripts/fastaq_get_seq_flanking_gaps +=================================================================== +--- fastaq.orig/scripts/fastaq_get_seq_flanking_gaps ++++ fastaq/scripts/fastaq_get_seq_flanking_gaps +@@ -1,7 +1,6 @@ + #!/usr/bin/env python3 + + import argparse +-from fastaq import tasks + + parser = argparse.ArgumentParser( + description = 'Gets the sequences either side of gaps in a fasta/q file', +@@ -11,4 +10,8 @@ parser.add_argument('--right', type=int, + parser.add_argument('infile', help='Name of input fasta/q file') + parser.add_argument('outfile', help='Name of output fasta/q file') + options = parser.parse_args() ++ ++ ++from fastaq import tasks ++ + tasks.get_seqs_flanking_gaps(options.infile, options.outfile, options.left, options.right) +Index: fastaq/scripts/fastaq_insert_or_delete_bases +=================================================================== +--- fastaq.orig/scripts/fastaq_insert_or_delete_bases ++++ fastaq/scripts/fastaq_insert_or_delete_bases +@@ -1,9 +1,6 @@ + #!/usr/bin/env python3 + + import argparse +-import sys +-import random +-from fastaq import sequences, utils, intervals + + parser = argparse.ArgumentParser( + description = 'Deletes or inserts bases at given position(s) from a fasta/q file', +@@ -16,6 +13,11 @@ parser.add_argument('-i','--insert', act + parser.add_argument('--insert_range', help='Inserts random bases starting after position P in each sequence of the input file. Inserts start + (n-1)*step bases into sequence n.', metavar='P,start,step') + options = parser.parse_args() + ++ ++import sys ++import random ++from fastaq import sequences, utils, intervals ++ + test_ops = [int(x is not None) for x in [options.delete, options.insert, options.delete_range, options.insert_range]] + + if sum(test_ops) != 1: +Index: fastaq/scripts/fastaq_interleave +=================================================================== +--- fastaq.orig/scripts/fastaq_interleave ++++ fastaq/scripts/fastaq_interleave +@@ -1,7 +1,6 @@ + #!/usr/bin/env python3 + + import argparse +-from fastaq import tasks + + parser = argparse.ArgumentParser( + description = 'Interleaves two fasta/q files, so that reads are written alternately first/second in output file', +@@ -10,4 +9,8 @@ parser.add_argument('infile_1', help='Na + parser.add_argument('infile_2', help='Name of second input fasta/q file') + parser.add_argument('outfile', help='Name of output fasta/q file of interleaved reads') + options = parser.parse_args() ++ ++ ++from fastaq import tasks ++ + tasks.interleave(options.infile_1, options.infile_2, options.outfile) +Index: fastaq/scripts/fastaq_long_read_simulate +=================================================================== +--- fastaq.orig/scripts/fastaq_long_read_simulate ++++ fastaq/scripts/fastaq_long_read_simulate +@@ -1,7 +1,6 @@ + #!/usr/bin/env python3 + + import argparse +-from fastaq import tasks + + parser = argparse.ArgumentParser( + description = 'Simulates long reads from a fasta/q file. Can optionally make insertions into the reads, like pacbio does. If insertions made, coverage calculation is done before the insertions (so total read length may appear longer then expected).', +@@ -29,8 +28,11 @@ ins_group = parser.add_argument_group('o + ins_group.add_argument('--ins_skip', type=int, help='Insert a random base every --skip bases plus or minus --ins_window. If this option is used, must also use --ins_window.', metavar='INT') + ins_group.add_argument('--ins_window', type=int, help='See --ins_skip. If this option is used, must also use --ins_skip.', metavar='INT') + +- + options = parser.parse_args() ++ ++ ++from fastaq import tasks ++ + tasks.make_long_reads( + options.infile, + options.outfile, +Index: fastaq/scripts/fastaq_make_random_contigs +=================================================================== +--- fastaq.orig/scripts/fastaq_make_random_contigs ++++ fastaq/scripts/fastaq_make_random_contigs +@@ -1,7 +1,6 @@ + #!/usr/bin/env python3 + + import argparse +-from fastaq import tasks + + parser = argparse.ArgumentParser( + description = 'Makes a multi-fasta file of random sequences, all of the same length. Each base has equal chance of being A,C,G or T', +@@ -14,6 +13,10 @@ parser.add_argument('contigs', type=int, + parser.add_argument('length', type=int, help='Length of each contig') + parser.add_argument('outfile', help='Name of output file') + options = parser.parse_args() ++ ++ ++from fastaq import tasks ++ + tasks.make_random_contigs( + options.contigs, + options.length, +Index: fastaq/scripts/fastaq_merge +=================================================================== +--- fastaq.orig/scripts/fastaq_merge ++++ fastaq/scripts/fastaq_merge +@@ -1,7 +1,6 @@ + #!/usr/bin/env python3 + + import argparse +-from fastaq import tasks + + parser = argparse.ArgumentParser( + description = 'Converts multi fasta/q file to single sequence file, preserving original order of sequences', +@@ -10,6 +9,10 @@ parser.add_argument('infile', help='Name + parser.add_argument('outfile', help='Name of output file') + parser.add_argument('-n', '--name', help='Name of sequence in output file [%(default)s]', default='union') + options = parser.parse_args() ++ ++ ++from fastaq import tasks ++ + tasks.merge_to_one_seq( + options.infile, + options.outfile, +Index: fastaq/scripts/fastaq_replace_bases +=================================================================== +--- fastaq.orig/scripts/fastaq_replace_bases ++++ fastaq/scripts/fastaq_replace_bases +@@ -1,7 +1,6 @@ + #!/usr/bin/env python3 + + import argparse +-from fastaq import tasks + + parser = argparse.ArgumentParser( + description = 'Replaces all occurences of one letter with another in a fasta/q file', +@@ -11,4 +10,8 @@ parser.add_argument('outfile', help='Nam + parser.add_argument('old', help='Base to be replaced') + parser.add_argument('new', help='Replace with this letter') + options = parser.parse_args() ++ ++ ++from fastaq import tasks ++ + tasks.replace_bases(options.infile, options.outfile, options.old, options.new) +Index: fastaq/scripts/fastaq_reverse_complement +=================================================================== +--- fastaq.orig/scripts/fastaq_reverse_complement ++++ fastaq/scripts/fastaq_reverse_complement +@@ -1,7 +1,6 @@ + #!/usr/bin/env python3 + + import argparse +-from fastaq import tasks + + parser = argparse.ArgumentParser( + description = 'Reverse complements all sequences in a fasta/q file', +@@ -9,4 +8,8 @@ parser = argparse.ArgumentParser( + parser.add_argument('infile', help='Name of input fasta/q file') + parser.add_argument('outfile', help='Name of output fasta/q file') + options = parser.parse_args() ++ ++ ++from fastaq import tasks ++ + tasks.reverse_complement(options.infile, options.outfile) +Index: fastaq/scripts/fastaq_scaffolds_to_contigs +=================================================================== +--- fastaq.orig/scripts/fastaq_scaffolds_to_contigs ++++ fastaq/scripts/fastaq_scaffolds_to_contigs +@@ -1,7 +1,6 @@ + #!/usr/bin/env python3 + + import argparse +-from fastaq import tasks + + parser = argparse.ArgumentParser( + description = 'Creates a file of contigs from a file of scaffolds - i.e. breaks at every gap in the input', +@@ -10,4 +9,7 @@ parser.add_argument('--number_contigs', + parser.add_argument('infile', help='Name of input fasta/q file') + parser.add_argument('outfile', help='Name of output contigs file') + options = parser.parse_args() ++ ++from fastaq import tasks ++ + tasks.scaffolds_to_contigs(options.infile, options.outfile, number_contigs=options.number_contigs) +Index: fastaq/scripts/fastaq_search_for_seq +=================================================================== +--- fastaq.orig/scripts/fastaq_search_for_seq ++++ fastaq/scripts/fastaq_search_for_seq +@@ -1,7 +1,6 @@ + #!/usr/bin/env python3 + + import argparse +-from fastaq import tasks + + parser = argparse.ArgumentParser( + description = 'Searches for an exact match on a given string and its reverese complement, in every sequences of a fasta/q file. Case insensitive. Guaranteed to find all hits', +@@ -10,4 +9,7 @@ parser.add_argument('infile', help='Name + parser.add_argument('outfile', help='Name of outputfile. Tab-delimited output: sequence name, position, strand') + parser.add_argument('search_string', help='String to search for in the sequences') + options = parser.parse_args() ++ ++from fastaq import tasks ++ + tasks.search_for_seq(options.infile, options.outfile, options.search_string) +Index: fastaq/scripts/fastaq_sequence_trim +=================================================================== +--- fastaq.orig/scripts/fastaq_sequence_trim ++++ fastaq/scripts/fastaq_sequence_trim +@@ -1,7 +1,6 @@ + #!/usr/bin/env python3 + + import argparse +-from fastaq import tasks + + parser = argparse.ArgumentParser( + description = 'Trims sequences off the start of all sequences in a pair of fasta/q files, whenever there is a perfect match. Only keeps a read pair if both reads of the pair are at least a minimum length after any trimming', +@@ -14,6 +13,10 @@ parser.add_argument('outfile_1', help='N + parser.add_argument('outfile_2', help='Name of output reverse fasta/q file', metavar='out_2') + parser.add_argument('trim_seqs', help='Name of fasta/q file of sequences to search for at the start of each input sequence', metavar='trim_seqs') + options = parser.parse_args() ++ ++ ++from fastaq import tasks ++ + tasks.sequence_trim( + options.infile_1, + options.infile_2, +Index: fastaq/scripts/fastaq_split_by_base_count +=================================================================== +--- fastaq.orig/scripts/fastaq_split_by_base_count ++++ fastaq/scripts/fastaq_split_by_base_count +@@ -1,7 +1,6 @@ + #!/usr/bin/env python3 + + import argparse +-from fastaq import tasks + + parser = argparse.ArgumentParser( + description = 'Splits a multi fasta/q file into separate files. Does not split sequences. Puts up to max_bases into each split file. The exception is that any sequence longer than max_bases is put into its own file.', +@@ -12,4 +11,8 @@ parser.add_argument('max_bases', type=in + parser.add_argument('--max_seqs', type=int, help='Max number of sequences in each output split file [no limit]', metavar='INT') + + options = parser.parse_args() ++ ++ ++from fastaq import tasks ++ + tasks.split_by_base_count(options.infile, options.outprefix, options.max_bases, options.max_seqs) +Index: fastaq/scripts/fastaq_strip_illumina_suffix +=================================================================== +--- fastaq.orig/scripts/fastaq_strip_illumina_suffix ++++ fastaq/scripts/fastaq_strip_illumina_suffix +@@ -1,7 +1,6 @@ + #!/usr/bin/env python3 + + import argparse +-from fastaq import tasks + + parser = argparse.ArgumentParser( + description = 'Strips /1 or /2 off the end of every read name in a fasta/q file', +@@ -9,4 +8,8 @@ parser = argparse.ArgumentParser( + parser.add_argument('infile', help='Name of input fasta/q file') + parser.add_argument('outfile', help='Name of output fasta/q file') + options = parser.parse_args() ++ ++ ++from fastaq import tasks ++ + tasks.strip_illumina_suffix(options.infile, options.outfile) +Index: fastaq/scripts/fastaq_to_fake_qual +=================================================================== +--- fastaq.orig/scripts/fastaq_to_fake_qual ++++ fastaq/scripts/fastaq_to_fake_qual +@@ -1,7 +1,6 @@ + #!/usr/bin/env python3 + + import argparse +-from fastaq import tasks + + parser = argparse.ArgumentParser( + description = 'Makes fake quality scores file from a fasta/q file', +@@ -10,6 +9,10 @@ parser.add_argument('infile', help='Name + parser.add_argument('outfile', help='Name of output file') + parser.add_argument('-q', '--qual', type=int, help='Quality score to assign to all bases [%(default)s]', default=40) + options = parser.parse_args() ++ ++ ++from fastaq import tasks ++ + tasks.fastaq_to_fake_qual( + options.infile, + options.outfile, +Index: fastaq/scripts/fastaq_to_fasta +=================================================================== +--- fastaq.orig/scripts/fastaq_to_fasta ++++ fastaq/scripts/fastaq_to_fasta +@@ -1,7 +1,6 @@ + #!/usr/bin/env python3 + + import argparse +-from fastaq import tasks + + parser = argparse.ArgumentParser( + description = 'Converts sequence file to FASTA format', +@@ -11,6 +10,10 @@ parser.add_argument('outfile', help='Nam + parser.add_argument('-l', '--line_length', type=int, help='Number of bases on each sequence line of output file [%(default)s]', default=60) + parser.add_argument('-s', '--strip_after_whitespace', action='store_true', help='Remove everything after first whitesapce in every sequence name') + options = parser.parse_args() ++ ++ ++from fastaq import tasks ++ + tasks.to_fasta( + options.infile, + options.outfile, +Index: fastaq/scripts/fastaq_to_mira_xml +=================================================================== +--- fastaq.orig/scripts/fastaq_to_mira_xml ++++ fastaq/scripts/fastaq_to_mira_xml +@@ -1,7 +1,6 @@ + #!/usr/bin/env python3 + + import argparse +-from fastaq import tasks + + parser = argparse.ArgumentParser( + description = 'Creates an xml file from a fasta/q file of reads, for use with Mira assembler', +@@ -9,4 +8,8 @@ parser = argparse.ArgumentParser( + parser.add_argument('infile', help='Name of input fasta/q file') + parser.add_argument('xml_out', help='Name of output xml file') + options = parser.parse_args() ++ ++ ++from fastaq import tasks ++ + tasks.fastaq_to_mira_xml(options.infile, options.xml_out) +Index: fastaq/scripts/fastaq_to_orfs_gff +=================================================================== +--- fastaq.orig/scripts/fastaq_to_orfs_gff ++++ fastaq/scripts/fastaq_to_orfs_gff +@@ -1,7 +1,6 @@ + #!/usr/bin/env python3 + + import argparse +-from fastaq import tasks + + parser = argparse.ArgumentParser( + description = 'Writes a GFF file of open reading frames from a fasta/q file', +@@ -10,4 +9,8 @@ parser.add_argument('--min_length', type + parser.add_argument('infile', help='Name of input fasta/q file') + parser.add_argument('gff_out', help='Name of output gff file') + options = parser.parse_args() ++ ++ ++from fastaq import tasks ++ + tasks.fastaq_to_orfs_gff(options.infile, options.gff_out, min_length=options.min_length) +Index: fastaq/scripts/fastaq_to_perfect_reads +=================================================================== +--- fastaq.orig/scripts/fastaq_to_perfect_reads ++++ fastaq/scripts/fastaq_to_perfect_reads +@@ -1,10 +1,6 @@ + #!/usr/bin/env python3 + + import argparse +-import random +-from math import floor, ceil +-from fastaq import sequences, utils +-import sys + + parser = argparse.ArgumentParser( + description = 'Makes perfect paired end fastq reads from a fasta/q file, with insert sizes sampled from a normal distribution. Read orientation is innies. Output is an interleaved fastq file.', +@@ -20,6 +16,12 @@ parser.add_argument('--no_n', action='st + parser.add_argument('--seed', type=int, help='Seed for random number generator. Default is to use python\'s default', default=None, metavar='INT') + options = parser.parse_args() + ++ ++import random ++from math import floor, ceil ++from fastaq import sequences, utils ++import sys ++ + random.seed(a=options.seed) + + seq_reader = sequences.file_reader(options.infile) +Index: fastaq/scripts/fastaq_to_random_subset +=================================================================== +--- fastaq.orig/scripts/fastaq_to_random_subset ++++ fastaq/scripts/fastaq_to_random_subset +@@ -1,9 +1,6 @@ + #!/usr/bin/env python3 + +-import sys + import argparse +-import random +-from fastaq import sequences, utils + + parser = argparse.ArgumentParser( + description = 'Takes a random subset of reads from a fasta/q file and optionally the corresponding read ' + +@@ -15,6 +12,11 @@ parser.add_argument('outfile', help='Nam + parser.add_argument('probability', type=int, help='Probability of keeping any given read (pair) in [0,100]', metavar='INT') + options = parser.parse_args() + ++ ++import sys ++import random ++from fastaq import sequences, utils ++ + seq_reader = sequences.file_reader(options.infile) + fout = utils.open_file_write(options.outfile) + +Index: fastaq/scripts/fastaq_to_tiling_bam +=================================================================== +--- fastaq.orig/scripts/fastaq_to_tiling_bam ++++ fastaq/scripts/fastaq_to_tiling_bam +@@ -1,9 +1,6 @@ + #!/usr/bin/env python3 + + import argparse +-import sys +-import os +-from fastaq import sequences, utils + + parser = argparse.ArgumentParser( + description = 'Takes a fasta/q file. Makes a BAM file containing perfect (unpaired) reads tiling the whole genome', +@@ -17,6 +14,11 @@ parser.add_argument('outfile', help='Nam + parser.add_argument('--read_group', help='Add the given read group ID to all reads [%(default)s]' ,default='42') + options = parser.parse_args() + ++ ++import sys ++import os ++from fastaq import sequences, utils ++ + # make a header first - we need to add the @RG line to the default header made by samtools + tmp_empty_file = options.outfile + '.tmp.empty' + f = utils.open_file_write(tmp_empty_file) +Index: fastaq/scripts/fastaq_to_unique_by_id +=================================================================== +--- fastaq.orig/scripts/fastaq_to_unique_by_id ++++ fastaq/scripts/fastaq_to_unique_by_id +@@ -1,7 +1,6 @@ + #!/usr/bin/env python3 + + import argparse +-from fastaq import tasks + + parser = argparse.ArgumentParser( + description = 'Removes duplicate sequences from a fasta/q file, based on their names. If the same name is found more than once, then the longest sequence is kept. Order of sequences is preserved in output', +@@ -9,4 +8,8 @@ parser = argparse.ArgumentParser( + parser.add_argument('infile', help='Name of input fasta/q file') + parser.add_argument('outfile', help='Name of output fasta/q file') + options = parser.parse_args() ++ ++ ++from fastaq import tasks ++ + tasks.to_unique_by_id(options.infile, options.outfile) +Index: fastaq/scripts/fastaq_translate +=================================================================== +--- fastaq.orig/scripts/fastaq_translate ++++ fastaq/scripts/fastaq_translate +@@ -1,7 +1,6 @@ + #!/usr/bin/env python3 + + import argparse +-from fastaq import tasks + + parser = argparse.ArgumentParser( + description = 'Translates all sequences in a fasta or fastq file. Output is always fasta format', +@@ -10,4 +9,8 @@ parser.add_argument('--frame', type=int, + parser.add_argument('infile', help='Name of fasta/q file to be translated', metavar='in.fasta/q') + parser.add_argument('outfile', help='Name of output fasta file', metavar='out.fasta') + options = parser.parse_args() ++ ++ ++from fastaq import tasks ++ + tasks.translate(options.infile, options.outfile, frame=options.frame) +Index: fastaq/scripts/fastaq_trim_Ns_at_end +=================================================================== +--- fastaq.orig/scripts/fastaq_trim_Ns_at_end ++++ fastaq/scripts/fastaq_trim_Ns_at_end +@@ -1,7 +1,6 @@ + #!/usr/bin/env python3 + + import argparse +-from fastaq import tasks + + parser = argparse.ArgumentParser( + description = 'Trims any Ns off each sequence in a fasta/q file. Does nothing to gaps in the middle, just trims the ends', +@@ -9,4 +8,8 @@ parser = argparse.ArgumentParser( + parser.add_argument('infile', help='Name of input fasta/q file') + parser.add_argument('outfile', help='Name of output fasta/q file') + options = parser.parse_args() ++ ++ ++from fastaq import tasks ++ + tasks.trim_Ns_at_end(options.infile, options.outfile) +Index: fastaq/scripts/fastaq_trim_ends +=================================================================== +--- fastaq.orig/scripts/fastaq_trim_ends ++++ fastaq/scripts/fastaq_trim_ends +@@ -1,7 +1,6 @@ + #!/usr/bin/env python3 + + import argparse +-from fastaq import tasks + + parser = argparse.ArgumentParser( + description = 'Trims set number of bases off each sequence in a fasta/q file', +@@ -11,4 +10,8 @@ parser.add_argument('start_trim', type=i + parser.add_argument('end_trim', type=int, help='Number of bases to trim off end') + parser.add_argument('outfile', help='Name of output fasta/q file') + options = parser.parse_args() ++ ++ ++from fastaq import tasks ++ + tasks.trim(options.infile, options.outfile, options.start_trim, options.end_trim) diff --git a/debian/patches/series b/debian/patches/series new file mode 100644 index 0000000..dfa3826 --- /dev/null +++ b/debian/patches/series @@ -0,0 +1 @@ +delay-import-statements-for-manpage-creation.patch diff --git a/debian/rules b/debian/rules new file mode 100755 index 0000000..58f2a1b --- /dev/null +++ b/debian/rules @@ -0,0 +1,25 @@ +#!/usr/bin/make -f + +export DH_VERBOSE := 1 +export PYBUILD_NAME=fastaq + +mandir := $(CURDIR)/debian/man +debfolder := $(CURDIR)/debian + +%: + dh $@ --with python3 --buildsystem=pybuild + +override_dh_auto_build: + dh_python3 + dh_auto_build + mkdir $(CURDIR)/doc + cd $(CURDIR)/doc + +override_dh_auto_clean: + rm -rf build .pybuild + rm -rf $(mandir) + +override_dh_installman: + mkdir -p $(mandir) + $(debfolder)/usage_to_man + dh_installman -- \ No newline at end of file diff --git a/debian/source/format b/debian/source/format new file mode 100644 index 0000000..46ebe02 --- /dev/null +++ b/debian/source/format @@ -0,0 +1 @@ +3.0 (quilt) \ No newline at end of file diff --git a/debian/upstream/metadata b/debian/upstream/metadata new file mode 100644 index 0000000..d8b5812 --- /dev/null +++ b/debian/upstream/metadata @@ -0,0 +1,12 @@ +Reference: + Author: + Title: + Journal: + Year: + Volume: + Number: + Pages: + DOI: + PMID: + URL: + eprint: diff --git a/debian/usage_to_man b/debian/usage_to_man new file mode 100755 index 0000000..ff45c2b --- /dev/null +++ b/debian/usage_to_man @@ -0,0 +1,142 @@ +#!/usr/bin/perl +use strict; +use warnings; + +#Converts Fastaq python scripts usage into man pages. +#The man pages are placed in the man folder of the main Fastaq directory + +createManPages(); + +sub createManPages { + + my $source= 'scripts'; + my $destination= 'debian/man'; + my $app_name = 'Fastaq'; + my $descriptions = shortDescription(); + + unless ( -d $destination ) { + system(mkdir $destination); + } + + my @files; + + push(@files,`ls $source/fastaq_*`); + + if ( scalar @files > 0 ) { + + print "Creating manpages\n"; + for my $file ( @files ) { + $file =~ s/\n$//; + + my $filename = $file; + $filename =~ s/$source\///; + + my $uc_filename = uc($filename); + my $man_file = $filename; + + $man_file = $destination . '/' . $man_file . '.1'; + + open (my $man_fh, ">", $man_file); + + my $grep_string = $filename . ': error: too few arguments'; + + my $cmd = "help2man -m $filename -n $filename --no-discard-stderr $file | sed 's/usage://gi'"; + my @output; + push(@output, `$cmd`); + + for my $line (@output) { + $line =~ s/\n$//; + + } + + for (my $i = 0; $i < scalar @output; $i++) { + my $output_line = $output[$i]; + + if ($output_line =~ m/^\.TH/) { + $output_line =~ s/\s+/ /g; + $output_line =~ s/(\.TH) ("\d+") ("[a-zA-Z0-9_ ]*") ("[a-zA-Z0-9_<>\[\]\/\.\(\), ]*") ("[a-zA-Z0-9_]*")/$1 $uc_filename $2 $3 "$app_name" "Fastaq executables"/; + } + + $output_line =~ s/ \\- $filename/$filename \- $descriptions->{$filename}/; + + if ( $output_line =~ m/^.PP/ && $output[$i + 1] =~ m/^$filename\:/ ) { + $output_line = $output[$i + 1] = ''; + } + + if ($output_line =~ m/^\.SH "SEE ALSO"/) { + last; + } + print $man_fh "$output_line\n"; + } + + writeAuthorAndCopyright($man_fh,$filename); + close($man_fh); + } + print "Manpage creation complete\n"; + } +} + +sub writeAuthorAndCopyright { + + my ($man_fh,$filename) = @_; + + my $author_blurb = < 'makes an interleaved file of read pairs', + fastaq_chunker => 'splits a multi fasta/q file into separate files', + fastaq_count_sequences => 'counts the number of sequences in a fasta/q file', + fastaq_deinterleave => 'deinterleaves fasta/q file', + fastaq_enumerate_names => 'renames sequences in a file, calling them 1,2,3...', + fastaq_expand_nucleotides => 'makes all combinations of sequences in input file', + fastaq_extend_gaps => 'extends the length of all gaps in a fasta/q file', + fastaq_fasta_to_fastq => 'given a fasta and qual file, makes a fastq file', + fastaq_filter => 'filters a fasta/q file by sequence length and/or by name', + fastaq_get_ids => 'gets ids from each sequence in a fasta or fastq file', + fastaq_get_seq_flanking_gaps => 'gets the sequences either side of gaps in a fasta/q file', + fastaq_insert_or_delete_bases => 'deletes or inserts bases at given position(s)', + fastaq_interleave => 'interleaves two fasta/q files', + fastaq_long_read_simulate => 'simulates long reads from a fasta/q file', + fastaq_make_random_contigs => 'makes a multi-fasta file of random sequences', + fastaq_merge => 'converts multi fasta/q file to single sequence file', + fastaq_replace_bases => 'replaces all occurences of one letter with another', + fastaq_reverse_complement => 'reverse complements all sequences', + fastaq_scaffolds_to_contigs => 'creates a file of contigs from a file of scaffolds', + fastaq_search_for_seq => 'searches for an exact match on a given string and its reverese complement. guaranteed to find all hits', + fastaq_sequence_trim => 'trims sequences off the start of all sequences in a pair of fasta/q files', + fastaq_split_by_base_count => 'splits a multi fasta/q file into separate files', + fastaq_strip_illumina_suffix => 'strips /1 or /2 off the end of every read name', + fastaq_to_fake_qual => 'makes fake quality scores file', + fastaq_to_fasta => 'converts sequence file to fasta format', + fastaq_to_mira_xml => 'creates an xml file from a fasta/q file of reads, for use with mira assembler', + fastaq_to_orfs_gff => 'writes a gff file of open reading frames', + fastaq_to_perfect_reads => 'makes perfect paired end fastq reads', + fastaq_to_random_subset => 'takes a random subset of reads', + fastaq_to_tiling_bam => 'makes a bam file containing perfect (unpaired) reads tiling the whole genome', + fastaq_to_unique_by_id => 'removes duplicate sequences', + fastaq_translate => 'translates all sequences', + fastaq_trim_ends => 'trims set number of bases off each sequence', + fastaq_trim_Ns_at_end => 'trims any ns off each sequence' + ); + + return(\%descriptions); +} diff --git a/debian/watch b/debian/watch new file mode 100644 index 0000000..46c1516 --- /dev/null +++ b/debian/watch @@ -0,0 +1,3 @@ +version=3 +https://github.com/sanger-pathogens/fastaq/releases .*/archive/v(\d[\d.-]+)\.(?:tar(?:\.gz|\.bz2)?|tgz) +