diff --git a/.gitignore b/.gitignore
deleted file mode 100644
index d2d6f36..0000000
--- a/.gitignore
+++ /dev/null
@@ -1,35 +0,0 @@
-*.py[cod]
-
-# C extensions
-*.so
-
-# Packages
-*.egg
-*.egg-info
-dist
-build
-eggs
-parts
-bin
-var
-sdist
-develop-eggs
-.installed.cfg
-lib
-lib64
-
-# Installer logs
-pip-log.txt
-
-# Unit test / coverage reports
-.coverage
-.tox
-nosetests.xml
-
-# Translations
-*.mo
-
-# Mr Developer
-.mr.developer.cfg
-.project
-.pydevproject
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index c3ded3b..0000000
--- a/.travis.yml
+++ /dev/null
@@ -1,6 +0,0 @@
-language: python
-python:
-  - "3.4"
-sudo: false
-script:
-  - "python setup.py test"
diff --git a/PKG-INFO b/PKG-INFO
new file mode 100644
index 0000000..759774a
--- /dev/null
+++ b/PKG-INFO
@@ -0,0 +1,18 @@
+Metadata-Version: 2.1
+Name: pyfastaq
+Version: 3.17.0
+Summary: Script to manipulate FASTA and FASTQ files, plus API for developers
+Home-page: https://github.com/sanger-pathogens/Fastaq
+Author: Martin Hunt
+Author-email: path-help@sanger.ac.uk
+License: GPLv3
+Platform: UNKNOWN
+Classifier: Development Status :: 4 - Beta
+Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
+Classifier: Programming Language :: Python :: 3 :: Only
+Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
+License-File: LICENSE
+License-File: AUTHORS
+
+UNKNOWN
+
diff --git a/README.md b/README.md
index 675cb2f..af72519 100644
--- a/README.md
+++ b/README.md
@@ -1,34 +1,61 @@
-Fastaq
-======
-
+# Fastaq
+Manipulate FASTA and FASTQ files
+
+[![Build Status](https://travis-ci.org/sanger-pathogens/Fastaq.svg?branch=master)](https://travis-ci.org/sanger-pathogens/Fastaq)   
+[![License: GPL v3](https://img.shields.io/badge/License-GPL%20v3-brightgreen.svg)](https://github.com/sanger-pathogens/Fastaq/blob/master/LICENSE)   
+
+## Contents
+  * [Introduction](#introduction)
+  * [Installation](#installation)
+    * [Using pip3](#using-pip3)
+    * [From source](#from-source)
+    * [Running the tests](#running-the-tests)
+  * [Usage](#usage)
+    * [Examples](#examples)
+    * [Available commands](#available-commands)
+    * [For developers](#for-developers)
+  * [License](#license)
+  * [Feedback/Issues](#feedbackissues)
+
+## Introduction
 Python3 script to manipulate FASTA and FASTQ (and other format) files, plus API for developers
 
-Installation
-------------
+## Installation
+There are a number of ways to install Fastaq and details are provided below. If you encounter an issue when installing Fastaq please contact your local system administrator. If you encounter a bug please log it [here](https://github.com/sanger-pathogens/Fastaq/issues) or email us at path-help@sanger.ac.uk.
 
-Install with pip3:
+### Using pip3
 
-    pip3 install pyfastaq
+`pip3 install pyfastaq`
 
+### From source
 
-Alternatively, you can download the latest release from this github repository,
-or clone the repository. Then run the tests:
+Download the latest release from this github repository or clone the repository. Then run the tests:
 
-    python3 setup.py test
+`python3 setup.py test`
 
 If the tests all pass, install:
 
-    python3 setup.py install
+`python3 setup.py install`
+
+### Running the tests
+
+The test can be run from the top level directory:  
 
+`python3 setup.py test`
 
-Usage
------
+### Runtime dependencies
+
+These must be available in your path at run time:
+  * samtools 0.1.19
+  * gzip
+  * gunzip
+
+## Usage
 
 The installation will put a single script called `fastaq` in your path.
 The usage is:
 
-    fastaq <command> [options]
-
+`fastaq <command> [options]`
 
 Key points:
  * To list the available commands and brief descriptions, just run `fastaq`
@@ -40,21 +67,18 @@ Key points:
  * Input and output files can be gzipped. An input file is assumed to be gzipped if its name ends with .gz. To gzip an output file, just name it with .gz at the end.
  * You can use a minus sign for a filename to use stdin or stdout, so commands can be piped together. See the example below.
 
-
-Examples
---------
+### Examples
 
 Reverse complement all sequences in a file:
 
-    fastaq reverse_complement in.fastq out.fastq
+`fastaq reverse_complement in.fastq out.fastq`
 
 Reverse complement all sequences in a gzipped file, then translate each sequence:
 
-    fastaq reverse_complement in.fastq.gz - | fastaq translate - out.fasta
+`fastaq reverse_complement in.fastq.gz - | fastaq translate - out.fasta`
 
 
-Available commands
-------------------
+### Available commands
 
 | Command               | Description                                                          |
 |-----------------------|----------------------------------------------------------------------|
@@ -98,22 +122,21 @@ Available commands
 | version               | Print version number and exit                                        |
 
 
-For developers
---------------
+### For developers
 
 Here is a template for counting the sequences in a FASTA or FASTQ file:
-
-    from pyfastaq import sequences
-    seq_reader = sequences.file_reader(infile)
-    count = 0
-    for seq in seq_reader:
-        count += 1
-    print(count)
-
+```
+from pyfastaq import sequences
+seq_reader = sequences.file_reader(infile)
+count = 0
+for seq in seq_reader:
+    count += 1
+print(count)
+```
 Hopefully you get the idea and there are plenty of examples in tasks.py. Detection of the input file type and whether gzipped or not is automatic. See help(sequences) for the various methods already defined in the classes Fasta and Fastq.
 
----------------------------------
-
-Build status: [![Build Status](https://travis-ci.org/sanger-pathogens/Fastaq.svg?branch=master)](https://travis-ci.org/sanger-pathogens/Fastaq)
-
+## License
+Fastaq is free software, licensed under [GPLv3](https://github.com/sanger-pathogens/Fastaq/blob/master/LICENSE).
 
+## Feedback/Issues
+Please report any issues to the [issues page](https://github.com/sanger-pathogens/Fastaq/issues) or email path-help@sanger.ac.uk.
diff --git a/debian/changelog b/debian/changelog
index a68a3ea..c192df6 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+fastaq (3.17.0+git20211102.1.3a993b9-1) UNRELEASED; urgency=low
+
+  * New upstream snapshot.
+
+ -- Debian Janitor <janitor@jelmer.uk>  Wed, 30 Mar 2022 04:54:59 -0000
+
 fastaq (3.17.0-4) unstable; urgency=medium
 
   [ Steffen Moeller ]
diff --git a/pyfastaq.egg-info/PKG-INFO b/pyfastaq.egg-info/PKG-INFO
new file mode 100644
index 0000000..759774a
--- /dev/null
+++ b/pyfastaq.egg-info/PKG-INFO
@@ -0,0 +1,18 @@
+Metadata-Version: 2.1
+Name: pyfastaq
+Version: 3.17.0
+Summary: Script to manipulate FASTA and FASTQ files, plus API for developers
+Home-page: https://github.com/sanger-pathogens/Fastaq
+Author: Martin Hunt
+Author-email: path-help@sanger.ac.uk
+License: GPLv3
+Platform: UNKNOWN
+Classifier: Development Status :: 4 - Beta
+Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
+Classifier: Programming Language :: Python :: 3 :: Only
+Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
+License-File: LICENSE
+License-File: AUTHORS
+
+UNKNOWN
+
diff --git a/pyfastaq.egg-info/SOURCES.txt b/pyfastaq.egg-info/SOURCES.txt
new file mode 100644
index 0000000..0117623
--- /dev/null
+++ b/pyfastaq.egg-info/SOURCES.txt
@@ -0,0 +1,237 @@
+AUTHORS
+LICENSE
+MANIFEST.in
+README.md
+setup.py
+pyfastaq/__init__.py
+pyfastaq/caf.py
+pyfastaq/genetic_codes.py
+pyfastaq/intervals.py
+pyfastaq/sequences.py
+pyfastaq/tasks.py
+pyfastaq/utils.py
+pyfastaq.egg-info/PKG-INFO
+pyfastaq.egg-info/SOURCES.txt
+pyfastaq.egg-info/dependency_links.txt
+pyfastaq.egg-info/top_level.txt
+pyfastaq/runners/__init__.py
+pyfastaq/runners/acgtn_only.py
+pyfastaq/runners/add_indels.py
+pyfastaq/runners/caf_to_fastq.py
+pyfastaq/runners/capillary_to_pairs.py
+pyfastaq/runners/chunker.py
+pyfastaq/runners/count_sequences.py
+pyfastaq/runners/deinterleave.py
+pyfastaq/runners/enumerate_names.py
+pyfastaq/runners/expand_nucleotides.py
+pyfastaq/runners/fasta_to_fastq.py
+pyfastaq/runners/filter.py
+pyfastaq/runners/get_ids.py
+pyfastaq/runners/get_seq_flanking_gaps.py
+pyfastaq/runners/interleave.py
+pyfastaq/runners/make_random_contigs.py
+pyfastaq/runners/merge.py
+pyfastaq/runners/replace_bases.py
+pyfastaq/runners/reverse_complement.py
+pyfastaq/runners/scaffolds_to_contigs.py
+pyfastaq/runners/search_for_seq.py
+pyfastaq/runners/sequence_trim.py
+pyfastaq/runners/sort_by_name.py
+pyfastaq/runners/sort_by_size.py
+pyfastaq/runners/split_by_base_count.py
+pyfastaq/runners/strip_illumina_suffix.py
+pyfastaq/runners/to_boulderio.py
+pyfastaq/runners/to_fake_qual.py
+pyfastaq/runners/to_fasta.py
+pyfastaq/runners/to_mira_xml.py
+pyfastaq/runners/to_orfs_gff.py
+pyfastaq/runners/to_perfect_reads.py
+pyfastaq/runners/to_random_subset.py
+pyfastaq/runners/to_tiling_bam.py
+pyfastaq/runners/to_unique_by_id.py
+pyfastaq/runners/translate.py
+pyfastaq/runners/trim_Ns_at_end.py
+pyfastaq/runners/trim_contigs.py
+pyfastaq/runners/trim_ends.py
+pyfastaq/runners/version.py
+pyfastaq/tests/caf_test.py
+pyfastaq/tests/intervals_test.py
+pyfastaq/tests/sequences_test.py
+pyfastaq/tests/tasks_test.py
+pyfastaq/tests/utils_test.py
+pyfastaq/tests/data/sequences_test_3-per-line.fa
+pyfastaq/tests/data/sequences_test_cap_to_read_pairs.fa
+pyfastaq/tests/data/sequences_test_cap_to_read_pairs.fa.paired.gz
+pyfastaq/tests/data/sequences_test_cap_to_read_pairs.fa.unpaired.gz
+pyfastaq/tests/data/sequences_test_deinterleaved_1.fa
+pyfastaq/tests/data/sequences_test_deinterleaved_2.fa
+pyfastaq/tests/data/sequences_test_deinterleaved_bad2_1.fa
+pyfastaq/tests/data/sequences_test_deinterleaved_bad2_2.fa
+pyfastaq/tests/data/sequences_test_deinterleaved_bad_1.fa
+pyfastaq/tests/data/sequences_test_deinterleaved_bad_2.fa
+pyfastaq/tests/data/sequences_test_deinterleaved_no_suffixes_1.fa
+pyfastaq/tests/data/sequences_test_deinterleaved_no_suffixes_2.fa
+pyfastaq/tests/data/sequences_test_empty_file
+pyfastaq/tests/data/sequences_test_enumerate_names.fa
+pyfastaq/tests/data/sequences_test_enumerate_names.fa.out.add_suffix
+pyfastaq/tests/data/sequences_test_enumerate_names.fa.out.keep_suffix
+pyfastaq/tests/data/sequences_test_enumerate_names.fa.out.start.1
+pyfastaq/tests/data/sequences_test_enumerate_names.fa.out.start.1.rename_file
+pyfastaq/tests/data/sequences_test_enumerate_names.fa.out.start.2
+pyfastaq/tests/data/sequences_test_fai_test.fa
+pyfastaq/tests/data/sequences_test_fai_test.fa.fai
+pyfastaq/tests/data/sequences_test_fail_no_AT.fq
+pyfastaq/tests/data/sequences_test_fail_no_plus.fq
+pyfastaq/tests/data/sequences_test_fail_no_qual.fq
+pyfastaq/tests/data/sequences_test_fail_no_seq.fq
+pyfastaq/tests/data/sequences_test_fastaq_replace_bases.expected.fa
+pyfastaq/tests/data/sequences_test_fastaq_replace_bases.fa
+pyfastaq/tests/data/sequences_test_filter_by_ids_file.fa
+pyfastaq/tests/data/sequences_test_filter_by_ids_file.fa.filtered
+pyfastaq/tests/data/sequences_test_filter_by_ids_file.fa.filtered.invert
+pyfastaq/tests/data/sequences_test_filter_by_ids_file.fa.ids
+pyfastaq/tests/data/sequences_test_filter_by_regex.fa
+pyfastaq/tests/data/sequences_test_filter_by_regex.first-char-a.fa
+pyfastaq/tests/data/sequences_test_filter_by_regex.first-of-pair.fa
+pyfastaq/tests/data/sequences_test_filter_by_regex.numeric.fa
+pyfastaq/tests/data/sequences_test_get_seqs_flanking_gaps.fa
+pyfastaq/tests/data/sequences_test_get_seqs_flanking_gaps.fa.out
+pyfastaq/tests/data/sequences_test_gffv3.gff
+pyfastaq/tests/data/sequences_test_gffv3.gff.fasta
+pyfastaq/tests/data/sequences_test_gffv3.gff.to_fasta
+pyfastaq/tests/data/sequences_test_gffv3.no_FASTA_line.gff
+pyfastaq/tests/data/sequences_test_gffv3.no_FASTA_line.gff.to_fasta
+pyfastaq/tests/data/sequences_test_gffv3.no_seq.2.gff
+pyfastaq/tests/data/sequences_test_gffv3.no_seq.gff
+pyfastaq/tests/data/sequences_test_good_file.fq
+pyfastaq/tests/data/sequences_test_good_file.fq.to_fasta
+pyfastaq/tests/data/sequences_test_good_file_mira.xml
+pyfastaq/tests/data/sequences_test_interleaved.fa
+pyfastaq/tests/data/sequences_test_interleaved.fq
+pyfastaq/tests/data/sequences_test_interleaved_bad.fa
+pyfastaq/tests/data/sequences_test_interleaved_with_suffixes.fa
+pyfastaq/tests/data/sequences_test_length_filter.fa
+pyfastaq/tests/data/sequences_test_length_filter.min-0.max-1.fa
+pyfastaq/tests/data/sequences_test_length_filter.min-0.max-inf.fa
+pyfastaq/tests/data/sequences_test_length_filter.min-4.max-4.fa
+pyfastaq/tests/data/sequences_test_make_random_contigs.default.fa
+pyfastaq/tests/data/sequences_test_make_random_contigs.first-42.fa
+pyfastaq/tests/data/sequences_test_make_random_contigs.name-by-letters.fa
+pyfastaq/tests/data/sequences_test_make_random_contigs.prefix-p.fa
+pyfastaq/tests/data/sequences_test_merge_to_one_seq.fa
+pyfastaq/tests/data/sequences_test_merge_to_one_seq.fq
+pyfastaq/tests/data/sequences_test_merge_to_one_seq.merged.fa
+pyfastaq/tests/data/sequences_test_merge_to_one_seq.merged.fq
+pyfastaq/tests/data/sequences_test_not_a_fastaq_file
+pyfastaq/tests/data/sequences_test_one-per-line.fa
+pyfastaq/tests/data/sequences_test_orfs.fa
+pyfastaq/tests/data/sequences_test_orfs.gff
+pyfastaq/tests/data/sequences_test_phylip.interleaved
+pyfastaq/tests/data/sequences_test_phylip.interleaved.to_fasta
+pyfastaq/tests/data/sequences_test_phylip.interleaved2
+pyfastaq/tests/data/sequences_test_phylip.interleaved2.to_fasta
+pyfastaq/tests/data/sequences_test_phylip.made_by_seaview
+pyfastaq/tests/data/sequences_test_phylip.made_by_seaview.to_fasta
+pyfastaq/tests/data/sequences_test_phylip.sequential
+pyfastaq/tests/data/sequences_test_phylip.sequential.to_fasta
+pyfastaq/tests/data/sequences_test_revcomp.fa
+pyfastaq/tests/data/sequences_test_search_string.fa
+pyfastaq/tests/data/sequences_test_search_string.fa.hits
+pyfastaq/tests/data/sequences_test_split_fixed_size.fa
+pyfastaq/tests/data/sequences_test_split_fixed_size.fa.split.1
+pyfastaq/tests/data/sequences_test_split_fixed_size.fa.split.2
+pyfastaq/tests/data/sequences_test_split_fixed_size.fa.split.3
+pyfastaq/tests/data/sequences_test_split_fixed_size.fa.split.4
+pyfastaq/tests/data/sequences_test_split_fixed_size.fa.split.5
+pyfastaq/tests/data/sequences_test_split_fixed_size.fa.split.6
+pyfastaq/tests/data/sequences_test_split_fixed_size.fa.split.coords
+pyfastaq/tests/data/sequences_test_split_fixed_size.fa.split.skip_if_all_Ns.1
+pyfastaq/tests/data/sequences_test_split_fixed_size.fa.split.skip_if_all_Ns.2
+pyfastaq/tests/data/sequences_test_split_fixed_size.fa.split.skip_if_all_Ns.3
+pyfastaq/tests/data/sequences_test_split_fixed_size.fa.split.skip_if_all_Ns.4
+pyfastaq/tests/data/sequences_test_split_fixed_size.fa.split.skip_if_all_Ns.coords
+pyfastaq/tests/data/sequences_test_split_fixed_size_onefile.fa
+pyfastaq/tests/data/sequences_test_split_fixed_size_onefile.out.fa
+pyfastaq/tests/data/sequences_test_split_fixed_size_onefile.skip_Ns.out.fa
+pyfastaq/tests/data/sequences_test_split_test.fa
+pyfastaq/tests/data/sequences_test_split_test.fa.2.1
+pyfastaq/tests/data/sequences_test_split_test.fa.2.2
+pyfastaq/tests/data/sequences_test_split_test.fa.2.3
+pyfastaq/tests/data/sequences_test_split_test.fa.2.4
+pyfastaq/tests/data/sequences_test_split_test.fa.3.1
+pyfastaq/tests/data/sequences_test_split_test.fa.3.2
+pyfastaq/tests/data/sequences_test_split_test.fa.3.3
+pyfastaq/tests/data/sequences_test_split_test.fa.4.1
+pyfastaq/tests/data/sequences_test_split_test.fa.4.2
+pyfastaq/tests/data/sequences_test_split_test.fa.4.3
+pyfastaq/tests/data/sequences_test_split_test.fa.6.1
+pyfastaq/tests/data/sequences_test_split_test.fa.6.2
+pyfastaq/tests/data/sequences_test_split_test.fa.6.limit2.1
+pyfastaq/tests/data/sequences_test_split_test.fa.6.limit2.2
+pyfastaq/tests/data/sequences_test_split_test.fa.6.limit2.3
+pyfastaq/tests/data/sequences_test_split_test.long.fa
+pyfastaq/tests/data/sequences_test_split_test.long.fa.2.1
+pyfastaq/tests/data/sequences_test_split_test.long.fa.2.2
+pyfastaq/tests/data/sequences_test_strip_after_whitespace.fa
+pyfastaq/tests/data/sequences_test_strip_after_whitespace.fa.to_fasta
+pyfastaq/tests/data/sequences_test_strip_illumina_suffix.fq
+pyfastaq/tests/data/sequences_test_strip_illumina_suffix.fq.stripped
+pyfastaq/tests/data/sequences_test_to_fasta_union.in.fa
+pyfastaq/tests/data/sequences_test_to_fasta_union.out.fa
+pyfastaq/tests/data/sequences_test_to_unique_by_id.fa
+pyfastaq/tests/data/sequences_test_to_unique_by_id.fa.out
+pyfastaq/tests/data/sequences_test_translate.fa
+pyfastaq/tests/data/sequences_test_translate.fa.frame0
+pyfastaq/tests/data/sequences_test_translate.fa.frame1
+pyfastaq/tests/data/sequences_test_translate.fa.frame2
+pyfastaq/tests/data/sequences_test_trim_Ns_at_end.fa
+pyfastaq/tests/data/sequences_test_trim_Ns_at_end.fa.trimmed
+pyfastaq/tests/data/sequences_test_trim_contigs.fa
+pyfastaq/tests/data/sequences_test_trim_contigs.fa.out
+pyfastaq/tests/data/sequences_test_trimmed.fq
+pyfastaq/tests/data/sequences_test_untrimmed.fq
+pyfastaq/tests/data/tasks_test_expend_nucleotides.in.fa
+pyfastaq/tests/data/tasks_test_expend_nucleotides.in.fq
+pyfastaq/tests/data/tasks_test_expend_nucleotides.out.fa
+pyfastaq/tests/data/tasks_test_expend_nucleotides.out.fq
+pyfastaq/tests/data/tasks_test_fasta_to_fake_qual.in.fa
+pyfastaq/tests/data/tasks_test_fasta_to_fake_qual.out.default.qual
+pyfastaq/tests/data/tasks_test_fasta_to_fake_qual.out.q42.qual
+pyfastaq/tests/data/tasks_test_filter_paired_both_pass.in_1.fa
+pyfastaq/tests/data/tasks_test_filter_paired_both_pass.in_2.fa
+pyfastaq/tests/data/tasks_test_filter_paired_both_pass.out_1.fa
+pyfastaq/tests/data/tasks_test_filter_paired_both_pass.out_2.fa
+pyfastaq/tests/data/tasks_test_filter_paired_one_pass.in_1.fa
+pyfastaq/tests/data/tasks_test_filter_paired_one_pass.in_2.fa
+pyfastaq/tests/data/tasks_test_filter_paired_one_pass.out_1.fa
+pyfastaq/tests/data/tasks_test_filter_paired_one_pass.out_2.fa
+pyfastaq/tests/data/tasks_test_length_offsets_from_fai.fa
+pyfastaq/tests/data/tasks_test_length_offsets_from_fai.fa.fai
+pyfastaq/tests/data/tasks_test_make_long_reads.input.fa
+pyfastaq/tests/data/tasks_test_make_long_reads.output.fa
+pyfastaq/tests/data/tasks_test_mean_length.fa
+pyfastaq/tests/data/tasks_test_sequence_trim_1.fa
+pyfastaq/tests/data/tasks_test_sequence_trim_1.trimmed.fa
+pyfastaq/tests/data/tasks_test_sequence_trim_2.fa
+pyfastaq/tests/data/tasks_test_sequence_trim_2.trimmed.fa
+pyfastaq/tests/data/tasks_test_sequences_to_trim.fa
+pyfastaq/tests/data/tasks_test_sort_by_name.in.fa
+pyfastaq/tests/data/tasks_test_sort_by_name.out.fa
+pyfastaq/tests/data/tasks_test_sort_by_size.in.fa
+pyfastaq/tests/data/tasks_test_sort_by_size.out.fa
+pyfastaq/tests/data/tasks_test_sort_by_size.out.rev.fa
+pyfastaq/tests/data/tasks_test_stats_from_fai.in.empty.fai
+pyfastaq/tests/data/tasks_test_stats_from_fai.in.fai
+pyfastaq/tests/data/tasks_test_to_boulderio.in.fa
+pyfastaq/tests/data/tasks_test_to_boulderio.out.boulder
+pyfastaq/tests/data/tasks_test_to_fastg.fasta
+pyfastaq/tests/data/tasks_test_to_fastg.fastg
+pyfastaq/tests/data/tasks_test_to_fastg.ids_to_circularise
+pyfastaq/tests/data/utils_test_file_transpose.txt
+pyfastaq/tests/data/utils_test_file_transposed.txt
+pyfastaq/tests/data/utils_test_not_really_zipped.gz
+pyfastaq/tests/data/utils_test_scaffolds.fa
+pyfastaq/tests/data/utils_test_scaffolds.fa.to_contigs.fa
+pyfastaq/tests/data/utils_test_scaffolds.fa.to_contigs.number_contigs.fa
+pyfastaq/tests/data/utils_test_system_call.txt
+scripts/fastaq
\ No newline at end of file
diff --git a/pyfastaq.egg-info/dependency_links.txt b/pyfastaq.egg-info/dependency_links.txt
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/pyfastaq.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/pyfastaq.egg-info/top_level.txt b/pyfastaq.egg-info/top_level.txt
new file mode 100644
index 0000000..1bd4fb7
--- /dev/null
+++ b/pyfastaq.egg-info/top_level.txt
@@ -0,0 +1 @@
+pyfastaq
diff --git a/pyfastaq/runners/filter.py b/pyfastaq/runners/filter.py
index 5e0964f..52c39fa 100644
--- a/pyfastaq/runners/filter.py
+++ b/pyfastaq/runners/filter.py
@@ -10,6 +10,7 @@ def run(description):
     parser.add_argument('--regex', help='If given, only reads with a name matching the regular expression will be kept')
     parser.add_argument('--ids_file', help='If given, only reads whose ID is in th given file will be used. One ID per line of file.', metavar='FILENAME')
     parser.add_argument('-v', '--invert', action='store_true', help='Only keep sequences that do not match the filters')
+    parser.add_argument('--check_comments', action='store_true', help='Search the header comments also for the given regex. Can only be specified with --regex')
 
     mate_group = parser.add_argument_group('Mate file for read pairs options')
     mate_group.add_argument('--mate_in', help='Name of mates input file. If used, must also provide --mate_out', metavar='FILENAME')
@@ -29,4 +30,5 @@ def run(description):
                  mate_in=options.mate_in,
                  mate_out=options.mate_out,
                  both_mates_pass=options.both_mates_pass,
+                 check_comments=options.check_comments,
     )
diff --git a/pyfastaq/tasks.py b/pyfastaq/tasks.py
index f5e1c52..8d68914 100644
--- a/pyfastaq/tasks.py
+++ b/pyfastaq/tasks.py
@@ -6,6 +6,11 @@ from pyfastaq import sequences, utils, caf
 
 class Error (Exception): pass
 
+
+class IncompatibleParametersError(Exception):
+    pass
+
+
 def acgtn_only(infile, outfile):
     '''Replace every non-acgtn (case insensitve) character with an N'''
     f = utils.open_file_write(outfile)
@@ -284,7 +289,12 @@ def filter(
       mate_in=None,
       mate_out=None,
       both_mates_pass=True,
+      check_comments=False
     ):
+    if check_comments and not regex:
+        raise IncompatibleParametersError(
+            "--check_comments can only be passed with --regex"
+        )
 
     ids_from_file = set()
     if ids_file is not None:
@@ -309,7 +319,7 @@ def filter(
     def passes(seq, name_regex):
         # remove trailing comments from FASTQ readname lines
         matches = name_regex.match(seq.id)
-        if matches is not None:
+        if matches is not None and not check_comments:
             clean_seq_id = matches.group(1)
         else:
             clean_seq_id = seq.id
diff --git a/pyfastaq/tests/data/caf_test.caf b/pyfastaq/tests/data/caf_test.caf
deleted file mode 100644
index f1ad7ff..0000000
--- a/pyfastaq/tests/data/caf_test.caf
+++ /dev/null
@@ -1,48 +0,0 @@
-
-DNA : read1.p1k
-NACG
-TAN
-
-BaseQuality : read1.p1k
-4 24 42 43 40 30 8
-
-Sequence : read1.p1k
-Is_read
-SCF_File read1.p1kSCF
-Template read1
-Insert_size 2000 4000
-Ligation_no 12345
-Primer Universal_primer
-Strand Forward
-Dye Dye_terminator
-Clone clone1
-Seq_vec SVEC 1 15 puc19
-Sequencing_vector "puc19"
-Clipping QUAL 2 6
-ProcessStatus PASS
-Asped 2006-7-5
-Unpadded
-Align_to_SCF 1 1272 1 1272
-
-DNA : read2.p1k
-CG
-ACGTT
-
-BaseQuality : read2.p1k
-9 9 40 41 42 42 4
-
-Sequence : read2.p1k
-Is_read
-SCF_File read2.p1kSCF
-Template read2
-Insert_size 2000 4000
-Ligation_no 23456
-Primer Universal_primer
-Strand Forward
-Dye Dye_terminator
-Clone clone2
-Seq_vec SVEC 1 32 puc19
-Sequencing_vector "puc19"
-ProcessStatus PASS
-Unpadded
-Align_to_SCF 1 1347 1 1347
diff --git a/pyfastaq/tests/data/caf_test.to_fastq.no_trim.min_length_0.fq b/pyfastaq/tests/data/caf_test.to_fastq.no_trim.min_length_0.fq
deleted file mode 100644
index 5519aad..0000000
--- a/pyfastaq/tests/data/caf_test.to_fastq.no_trim.min_length_0.fq
+++ /dev/null
@@ -1,8 +0,0 @@
-@read1.p1k
-NACGTAN
-+
-%9KLI?)
-@read2.p1k
-CGACGTT
-+
-**IJKK%
diff --git a/pyfastaq/tests/data/caf_test.to_fastq.trim.min_length_6.fq b/pyfastaq/tests/data/caf_test.to_fastq.trim.min_length_6.fq
deleted file mode 100644
index cc6d7c0..0000000
--- a/pyfastaq/tests/data/caf_test.to_fastq.trim.min_length_6.fq
+++ /dev/null
@@ -1,4 +0,0 @@
-@read2.p1k
-CGACGTT
-+
-**IJKK%
diff --git a/pyfastaq/tests/data/readnames_with_comments.fastq b/pyfastaq/tests/data/readnames_with_comments.fastq
deleted file mode 100644
index 8adbb17..0000000
--- a/pyfastaq/tests/data/readnames_with_comments.fastq
+++ /dev/null
@@ -1,20 +0,0 @@
-@A1234::15950:1663 stuff_to_remove
-TCGTAAGCCTGCTCGAGC
-+
->>3>>44@CFFFGG??EE
-@A1234::16080:1672 stuff_to_remove
-CCATCGTCTTCGCCCTGC
-+
-111AA1AAAAF1EAEGAG
-@A1234::12967:1677 stuff_to_remove
-CTCCAGCATCGTGCAAAT
-+
-3>>A?@CBDFAAACCBAF
-@A1234::16114:1681 stuff_to_remove
-TTGATATAGAGATACTTC
-+
-3>A3A5D55DBFFDFGGG
-@A1234::16669:1683 stuff_to_remove
-CTGCGCGACTATACGCAG
-+
-1>1>>>A1>D?FF10E0A
\ No newline at end of file
diff --git a/pyfastaq/tests/data/readnames_with_comments.fastq.filtered b/pyfastaq/tests/data/readnames_with_comments.fastq.filtered
deleted file mode 100644
index f277d15..0000000
--- a/pyfastaq/tests/data/readnames_with_comments.fastq.filtered
+++ /dev/null
@@ -1,4 +0,0 @@
-@A1234::12967:1677 stuff_to_remove
-CTCCAGCATCGTGCAAAT
-+
-3>>A?@CBDFAAACCBAF
diff --git a/pyfastaq/tests/data/readnames_with_comments.fastq.ids b/pyfastaq/tests/data/readnames_with_comments.fastq.ids
deleted file mode 100644
index 9343080..0000000
--- a/pyfastaq/tests/data/readnames_with_comments.fastq.ids
+++ /dev/null
@@ -1 +0,0 @@
-A1234::12967:1677
\ No newline at end of file
diff --git a/pyfastaq/tests/data/sequences_test.embl b/pyfastaq/tests/data/sequences_test.embl
deleted file mode 100644
index b40c185..0000000
--- a/pyfastaq/tests/data/sequences_test.embl
+++ /dev/null
@@ -1,203 +0,0 @@
-ID   seq1; SV 1; linear; mRNA; STD; PLN; 1859 BP.
-XX
-AC   X56734; S46826;
-XX
-DT   12-SEP-1991 (Rel. 29, Created)
-DT   25-NOV-2005 (Rel. 85, Last updated, Version 11)
-XX
-DE   Trifolium repens mRNA for non-cyanogenic beta-glucosidase
-XX
-KW   beta-glucosidase.
-XX
-OS   Trifolium repens (white clover)
-OC   Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta;
-OC   Spermatophyta; Magnoliophyta; eudicotyledons; core eudicotyledons; rosids;
-OC   fabids; Fabales; Fabaceae; Papilionoideae; Trifolieae; Trifolium.
-XX
-RN   [5]
-RP   1-1859
-RX   DOI; 10.1007/BF00039495.
-RX   PUBMED; 1907511.
-RA   Oxtoby E., Dunn M.A., Pancoro A., Hughes M.A.;
-RT   "Nucleotide and derived amino acid sequence of the cyanogenic
-RT   beta-glucosidase (linamarase) from white clover (Trifolium repens L.)";
-RL   Plant Mol. Biol. 17(2):209-219(1991).
-XX
-RN   [6]
-RP   1-1859
-RA   Hughes M.A.;
-RT   ;
-RL   Submitted (19-NOV-1990) to the INSDC.
-RL   Hughes M.A., University of Newcastle Upon Tyne, Medical School, Newcastle
-RL   Upon Tyne, NE2 4HH, UK
-XX
-DR   EuropePMC; PMC99098; 11752244.
-XX
-FH   Key             Location/Qualifiers
-FH
-FT   source          1..1859
-FT                   /organism="Trifolium repens"
-FT                   /mol_type="mRNA"
-FT                   /clone_lib="lambda gt10"
-FT                   /clone="TRE361"
-FT                   /tissue_type="leaves"
-FT                   /db_xref="taxon:3899"
-FT   mRNA            1..1859
-FT                   /experiment="experimental evidence, no additional details
-FT                   recorded"
-FT   CDS             14..1495
-FT                   /product="beta-glucosidase"
-FT                   /EC_number="3.2.1.21"
-FT                   /note="non-cyanogenic"
-FT                   /db_xref="GOA:P26204"
-FT                   /db_xref="InterPro:IPR001360"
-FT                   /db_xref="InterPro:IPR013781"
-FT                   /db_xref="InterPro:IPR017853"
-FT                   /db_xref="InterPro:IPR018120"
-FT                   /db_xref="UniProtKB/Swiss-Prot:P26204"
-FT                   /protein_id="CAA40058.1"
-FT                   /translation="MDFIVAIFALFVISSFTITSTNAVEASTLLDIGNLSRSSFPRGFI
-FT                   FGAGSSAYQFEGAVNEGGRGPSIWDTFTHKYPEKIRDGSNADITVDQYHRYKEDVGIMK
-FT                   DQNMDSYRFSISWPRILPKGKLSGGINHEGIKYYNNLINELLANGIQPFVTLFHWDLPQ
-FT                   VLEDEYGGFLNSGVINDFRDYTDLCFKEFGDRVRYWSTLNEPWVFSNSGYALGTNAPGR
-FT                   CSASNVAKPGDSGTGPYIVTHNQILAHAEAVHVYKTKYQAYQKGKIGITLVSNWLMPLD
-FT                   DNSIPDIKAAERSLDFQFGLFMEQLTTGDYSKSMRRIVKNRLPKFSKFESSLVNGSFDF
-FT                   IGINYYSSSYISNAPSHGNAKPSYSTNPMTNISFEKHGIPLGPRAASIWIYVYPYMFIQ
-FT                   EDFEIFCYILKINITILQFSITENGMNEFNDATLPVEEALLNTYRIDYYYRHLYYIRSA
-FT                   IRAGSNVKGFYAWSFLDCNEWFAGFTVRFGLNFVD"
-XX
-SQ   Sequence 1859 BP; 609 A; 314 C; 355 G; 581 T; 0 other;
-     aaacaaacca aatatggatt ttattgtagc catatttgct ctgtttgtta ttagctcatt        60
-     cacaattact tccacaaatg cagttgaagc ttctactctt cttgacatag gtaacctgag       120
-     tcggagcagt tttcctcgtg gcttcatctt tggtgctgga tcttcagcat accaatttga       180
-     aggtgcagta aacgaaggcg gtagaggacc aagtatttgg gataccttca cccataaata       240
-     tccagaaaaa ataagggatg gaagcaatgc agacatcacg gttgaccaat atcaccgcta       300
-     caaggaagat gttgggatta tgaaggatca aaatatggat tcgtatagat tctcaatctc       360
-     ttggccaaga atactcccaa agggaaagtt gagcggaggc ataaatcacg aaggaatcaa       420
-     atattacaac aaccttatca acgaactatt ggctaacggt atacaaccat ttgtaactct       480
-     ttttcattgg gatcttcccc aagtcttaga agatgagtat ggtggtttct taaactccgg       540
-     tgtaataaat gattttcgag actatacgga tctttgcttc aaggaatttg gagatagagt       600
-     gaggtattgg agtactctaa atgagccatg ggtgtttagc aattctggat atgcactagg       660
-     aacaaatgca ccaggtcgat gttcggcctc caacgtggcc aagcctggtg attctggaac       720
-     aggaccttat atagttacac acaatcaaat tcttgctcat gcagaagctg tacatgtgta       780
-     taagactaaa taccaggcat atcaaaaggg aaagataggc ataacgttgg tatctaactg       840
-     gttaatgcca cttgatgata atagcatacc agatataaag gctgccgaga gatcacttga       900
-     cttccaattt ggattgttta tggaacaatt aacaacagga gattattcta agagcatgcg       960
-     gcgtatagtt aaaaaccgat tacctaagtt ctcaaaattc gaatcaagcc tagtgaatgg      1020
-     ttcatttgat tttattggta taaactatta ctcttctagt tatattagca atgccccttc      1080
-     acatggcaat gccaaaccca gttactcaac aaatcctatg accaatattt catttgaaaa      1140
-     acatgggata cccttaggtc caagggctgc ttcaatttgg atatatgttt atccatatat      1200
-     gtttatccaa gaggacttcg agatcttttg ttacatatta aaaataaata taacaatcct      1260
-     gcaattttca atcactgaaa atggtatgaa tgaattcaac gatgcaacac ttccagtaga      1320
-     agaagctctt ttgaatactt acagaattga ttactattac cgtcacttat actacattcg      1380
-     ttctgcaatc agggctggct caaatgtgaa gggtttttac gcatggtcat ttttggactg      1440
-     taatgaatgg tttgcaggct ttactgttcg ttttggatta aactttgtag attagaaaga      1500
-     tggattaaaa aggtacccta agctttctgc ccaatggtac aagaactttc tcaaaagaaa      1560
-     ctagctagta ttattaaaag aactttgtag tagattacag tacatcgttt gaagttgagt      1620
-     tggtgcacct aattaaataa aagaggttac tcttaacata tttttaggcc attcgttgtg      1680
-     aagttgttag gctgttattt ctattatact atgttgtagt aataagtgca ttgttgtacc      1740
-     agaagctatg atcataacta taggttgatc cttcatgtat cagtttgatg ttgagaatac      1800
-     tttgaattaa aagtcttttt ttattttttt aaaaaaaaaa aaaaaaaaaa aaaaaaaaa       1859
-//
-ID   seq2; SV 1; linear; mRNA; STD; PLN; 1859 BP.
-XX
-AC   X56734; S46826;
-XX
-DT   12-SEP-1991 (Rel. 29, Created)
-DT   25-NOV-2005 (Rel. 85, Last updated, Version 11)
-XX
-DE   Trifolium repens mRNA for non-cyanogenic beta-glucosidase
-XX
-KW   beta-glucosidase.
-XX
-OS   Trifolium repens (white clover)
-OC   Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta;
-OC   Spermatophyta; Magnoliophyta; eudicotyledons; core eudicotyledons; rosids;
-OC   fabids; Fabales; Fabaceae; Papilionoideae; Trifolieae; Trifolium.
-XX
-RN   [5]
-RP   1-1859
-RX   DOI; 10.1007/BF00039495.
-RX   PUBMED; 1907511.
-RA   Oxtoby E., Dunn M.A., Pancoro A., Hughes M.A.;
-RT   "Nucleotide and derived amino acid sequence of the cyanogenic
-RT   beta-glucosidase (linamarase) from white clover (Trifolium repens L.)";
-RL   Plant Mol. Biol. 17(2):209-219(1991).
-XX
-RN   [6]
-RP   1-1859
-RA   Hughes M.A.;
-RT   ;
-RL   Submitted (19-NOV-1990) to the INSDC.
-RL   Hughes M.A., University of Newcastle Upon Tyne, Medical School, Newcastle
-RL   Upon Tyne, NE2 4HH, UK
-XX
-DR   EuropePMC; PMC99098; 11752244.
-XX
-FH   Key             Location/Qualifiers
-FH
-FT   source          1..1859
-FT                   /organism="Trifolium repens"
-FT                   /mol_type="mRNA"
-FT                   /clone_lib="lambda gt10"
-FT                   /clone="TRE361"
-FT                   /tissue_type="leaves"
-FT                   /db_xref="taxon:3899"
-FT   mRNA            1..1859
-FT                   /experiment="experimental evidence, no additional details
-FT                   recorded"
-FT   CDS             14..1495
-FT                   /product="beta-glucosidase"
-FT                   /EC_number="3.2.1.21"
-FT                   /note="non-cyanogenic"
-FT                   /db_xref="GOA:P26204"
-FT                   /db_xref="InterPro:IPR001360"
-FT                   /db_xref="InterPro:IPR013781"
-FT                   /db_xref="InterPro:IPR017853"
-FT                   /db_xref="InterPro:IPR018120"
-FT                   /db_xref="UniProtKB/Swiss-Prot:P26204"
-FT                   /protein_id="CAA40058.1"
-FT                   /translation="MDFIVAIFALFVISSFTITSTNAVEASTLLDIGNLSRSSFPRGFI
-FT                   FGAGSSAYQFEGAVNEGGRGPSIWDTFTHKYPEKIRDGSNADITVDQYHRYKEDVGIMK
-FT                   DQNMDSYRFSISWPRILPKGKLSGGINHEGIKYYNNLINELLANGIQPFVTLFHWDLPQ
-FT                   VLEDEYGGFLNSGVINDFRDYTDLCFKEFGDRVRYWSTLNEPWVFSNSGYALGTNAPGR
-FT                   CSASNVAKPGDSGTGPYIVTHNQILAHAEAVHVYKTKYQAYQKGKIGITLVSNWLMPLD
-FT                   DNSIPDIKAAERSLDFQFGLFMEQLTTGDYSKSMRRIVKNRLPKFSKFESSLVNGSFDF
-FT                   IGINYYSSSYISNAPSHGNAKPSYSTNPMTNISFEKHGIPLGPRAASIWIYVYPYMFIQ
-FT                   EDFEIFCYILKINITILQFSITENGMNEFNDATLPVEEALLNTYRIDYYYRHLYYIRSA
-FT                   IRAGSNVKGFYAWSFLDCNEWFAGFTVRFGLNFVD"
-XX
-SQ   Sequence 1859 BP; 609 A; 314 C; 355 G; 581 T; 0 other;
-     aaacaaacca aatatggatt ttattgtagc catatttgct ctgtttgtta ttagctcatt        60
-     cacaattact tccacaaatg cagttgaagc ttctactctt cttgacatag gtaacctgag       120
-     tcggagcagt tttcctcgtg gcttcatctt tggtgctgga tcttcagcat accaatttga       180
-     aggtgcagta aacgaaggcg gtagaggacc aagtatttgg gataccttca cccataaata       240
-     tccagaaaaa ataagggatg gaagcaatgc agacatcacg gttgaccaat atcaccgcta       300
-     caaggaagat gttgggatta tgaaggatca aaatatggat tcgtatagat tctcaatctc       360
-     ttggccaaga atactcccaa agggaaagtt gagcggaggc ataaatcacg aaggaatcaa       420
-     atattacaac aaccttatca acgaactatt ggctaacggt atacaaccat ttgtaactct       480
-     ttttcattgg gatcttcccc aagtcttaga agatgagtat ggtggtttct taaactccgg       540
-     tgtaataaat gattttcgag actatacgga tctttgcttc aaggaatttg gagatagagt       600
-     gaggtattgg agtactctaa atgagccatg ggtgtttagc aattctggat atgcactagg       660
-     aacaaatgca ccaggtcgat gttcggcctc caacgtggcc aagcctggtg attctggaac       720
-     aggaccttat atagttacac acaatcaaat tcttgctcat gcagaagctg tacatgtgta       780
-     taagactaaa taccaggcat atcaaaaggg aaagataggc ataacgttgg tatctaactg       840
-     gttaatgcca cttgatgata atagcatacc agatataaag gctgccgaga gatcacttga       900
-     cttccaattt ggattgttta tggaacaatt aacaacagga gattattcta agagcatgcg       960
-     gcgtatagtt aaaaaccgat tacctaagtt ctcaaaattc gaatcaagcc tagtgaatgg      1020
-     ttcatttgat tttattggta taaactatta ctcttctagt tatattagca atgccccttc      1080
-     acatggcaat gccaaaccca gttactcaac aaatcctatg accaatattt catttgaaaa      1140
-     acatgggata cccttaggtc caagggctgc ttcaatttgg atatatgttt atccatatat      1200
-     gtttatccaa gaggacttcg agatcttttg ttacatatta aaaataaata taacaatcct      1260
-     gcaattttca atcactgaaa atggtatgaa tgaattcaac gatgcaacac ttccagtaga      1320
-     agaagctctt ttgaatactt acagaattga ttactattac cgtcacttat actacattcg      1380
-     ttctgcaatc agggctggct caaatgtgaa gggtttttac gcatggtcat ttttggactg      1440
-     taatgaatgg tttgcaggct ttactgttcg ttttggatta aactttgtag attagaaaga      1500
-     tggattaaaa aggtacccta agctttctgc ccaatggtac aagaactttc tcaaaagaaa      1560
-     ctagctagta ttattaaaag aactttgtag tagattacag tacatcgttt gaagttgagt      1620
-     tggtgcacct aattaaataa aagaggttac tcttaacata tttttaggcc attcgttgtg      1680
-     aagttgttag gctgttattt ctattatact atgttgtagt aataagtgca ttgttgtacc      1740
-     agaagctatg atcataacta taggttgatc cttcatgtat cagtttgatg ttgagaatac      1800
-     tttgaattaa aagtcttttt ttattttttt aaaaaaaaaa aaaaaaaaaa ccccccccc       1859
-//
-
diff --git a/pyfastaq/tests/data/sequences_test.embl.bad b/pyfastaq/tests/data/sequences_test.embl.bad
deleted file mode 100644
index 10ca1ab..0000000
--- a/pyfastaq/tests/data/sequences_test.embl.bad
+++ /dev/null
@@ -1,202 +0,0 @@
-ID   seq1; SV 1; linear; mRNA; STD; PLN; 1859 BP.
-XX
-AC   X56734; S46826;
-XX
-DT   12-SEP-1991 (Rel. 29, Created)
-DT   25-NOV-2005 (Rel. 85, Last updated, Version 11)
-XX
-DE   Trifolium repens mRNA for non-cyanogenic beta-glucosidase
-XX
-KW   beta-glucosidase.
-XX
-OS   Trifolium repens (white clover)
-OC   Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta;
-OC   Spermatophyta; Magnoliophyta; eudicotyledons; core eudicotyledons; rosids;
-OC   fabids; Fabales; Fabaceae; Papilionoideae; Trifolieae; Trifolium.
-XX
-RN   [5]
-RP   1-1859
-RX   DOI; 10.1007/BF00039495.
-RX   PUBMED; 1907511.
-RA   Oxtoby E., Dunn M.A., Pancoro A., Hughes M.A.;
-RT   "Nucleotide and derived amino acid sequence of the cyanogenic
-RT   beta-glucosidase (linamarase) from white clover (Trifolium repens L.)";
-RL   Plant Mol. Biol. 17(2):209-219(1991).
-XX
-RN   [6]
-RP   1-1859
-RA   Hughes M.A.;
-RT   ;
-RL   Submitted (19-NOV-1990) to the INSDC.
-RL   Hughes M.A., University of Newcastle Upon Tyne, Medical School, Newcastle
-RL   Upon Tyne, NE2 4HH, UK
-XX
-DR   EuropePMC; PMC99098; 11752244.
-XX
-FH   Key             Location/Qualifiers
-FH
-FT   source          1..1859
-FT                   /organism="Trifolium repens"
-FT                   /mol_type="mRNA"
-FT                   /clone_lib="lambda gt10"
-FT                   /clone="TRE361"
-FT                   /tissue_type="leaves"
-FT                   /db_xref="taxon:3899"
-FT   mRNA            1..1859
-FT                   /experiment="experimental evidence, no additional details
-FT                   recorded"
-FT   CDS             14..1495
-FT                   /product="beta-glucosidase"
-FT                   /EC_number="3.2.1.21"
-FT                   /note="non-cyanogenic"
-FT                   /db_xref="GOA:P26204"
-FT                   /db_xref="InterPro:IPR001360"
-FT                   /db_xref="InterPro:IPR013781"
-FT                   /db_xref="InterPro:IPR017853"
-FT                   /db_xref="InterPro:IPR018120"
-FT                   /db_xref="UniProtKB/Swiss-Prot:P26204"
-FT                   /protein_id="CAA40058.1"
-FT                   /translation="MDFIVAIFALFVISSFTITSTNAVEASTLLDIGNLSRSSFPRGFI
-FT                   FGAGSSAYQFEGAVNEGGRGPSIWDTFTHKYPEKIRDGSNADITVDQYHRYKEDVGIMK
-FT                   DQNMDSYRFSISWPRILPKGKLSGGINHEGIKYYNNLINELLANGIQPFVTLFHWDLPQ
-FT                   VLEDEYGGFLNSGVINDFRDYTDLCFKEFGDRVRYWSTLNEPWVFSNSGYALGTNAPGR
-FT                   CSASNVAKPGDSGTGPYIVTHNQILAHAEAVHVYKTKYQAYQKGKIGITLVSNWLMPLD
-FT                   DNSIPDIKAAERSLDFQFGLFMEQLTTGDYSKSMRRIVKNRLPKFSKFESSLVNGSFDF
-FT                   IGINYYSSSYISNAPSHGNAKPSYSTNPMTNISFEKHGIPLGPRAASIWIYVYPYMFIQ
-FT                   EDFEIFCYILKINITILQFSITENGMNEFNDATLPVEEALLNTYRIDYYYRHLYYIRSA
-FT                   IRAGSNVKGFYAWSFLDCNEWFAGFTVRFGLNFVD"
-XX
-SQ   Sequence 1859 BP; 609 A; 314 C; 355 G; 581 T; 0 other;
-     aaacaaacca aatatggatt ttattgtagc catatttgct ctgtttgtta ttagctcatt        60
-     cacaattact tccacaaatg cagttgaagc ttctactctt cttgacatag gtaacctgag       120
-     tcggagcagt tttcctcgtg gcttcatctt tggtgctgga tcttcagcat accaatttga       180
-     aggtgcagta aacgaaggcg gtagaggacc aagtatttgg gataccttca cccataaata       240
-     tccagaaaaa ataagggatg gaagcaatgc agacatcacg gttgaccaat atcaccgcta       300
-     caaggaagat gttgggatta tgaaggatca aaatatggat tcgtatagat tctcaatctc       360
-     ttggccaaga atactcccaa agggaaagtt gagcggaggc ataaatcacg aaggaatcaa       420
-     atattacaac aaccttatca acgaactatt ggctaacggt atacaaccat ttgtaactct       480
-     ttttcattgg gatcttcccc aagtcttaga agatgagtat ggtggtttct taaactccgg       540
-     tgtaataaat gattttcgag actatacgga tctttgcttc aaggaatttg gagatagagt       600
-     gaggtattgg agtactctaa atgagccatg ggtgtttagc aattctggat atgcactagg       660
-     aacaaatgca ccaggtcgat gttcggcctc caacgtggcc aagcctggtg attctggaac       720
-     aggaccttat atagttacac acaatcaaat tcttgctcat gcagaagctg tacatgtgta       780
-     taagactaaa taccaggcat atcaaaaggg aaagataggc ataacgttgg tatctaactg       840
-     gttaatgcca cttgatgata atagcatacc agatataaag gctgccgaga gatcacttga       900
-     cttccaattt ggattgttta tggaacaatt aacaacagga gattattcta agagcatgcg       960
-     gcgtatagtt aaaaaccgat tacctaagtt ctcaaaattc gaatcaagcc tagtgaatgg      1020
-     ttcatttgat tttattggta taaactatta ctcttctagt tatattagca atgccccttc      1080
-     acatggcaat gccaaaccca gttactcaac aaatcctatg accaatattt catttgaaaa      1140
-     acatgggata cccttaggtc caagggctgc ttcaatttgg atatatgttt atccatatat      1200
-     gtttatccaa gaggacttcg agatcttttg ttacatatta aaaataaata taacaatcct      1260
-     gcaattttca atcactgaaa atggtatgaa tgaattcaac gatgcaacac ttccagtaga      1320
-     agaagctctt ttgaatactt acagaattga ttactattac cgtcacttat actacattcg      1380
-     ttctgcaatc agggctggct caaatgtgaa gggtttttac gcatggtcat ttttggactg      1440
-     taatgaatgg tttgcaggct ttactgttcg ttttggatta aactttgtag attagaaaga      1500
-     tggattaaaa aggtacccta agctttctgc ccaatggtac aagaactttc tcaaaagaaa      1560
-     ctagctagta ttattaaaag aactttgtag tagattacag tacatcgttt gaagttgagt      1620
-     tggtgcacct aattaaataa aagaggttac tcttaacata tttttaggcc attcgttgtg      1680
-     aagttgttag gctgttattt ctattatact atgttgtagt aataagtgca ttgttgtacc      1740
-     agaagctatg atcataacta taggttgatc cttcatgtat cagtttgatg ttgagaatac      1800
-     tttgaattaa aagtcttttt ttattttttt aaaaaaaaaa aaaaaaaaaa aaaaaaaaa       1859
-//
-ID   seq2; SV 1; linear; mRNA; STD; PLN; 1859 BP.
-XX
-AC   X56734; S46826;
-XX
-DT   12-SEP-1991 (Rel. 29, Created)
-DT   25-NOV-2005 (Rel. 85, Last updated, Version 11)
-XX
-DE   Trifolium repens mRNA for non-cyanogenic beta-glucosidase
-XX
-KW   beta-glucosidase.
-XX
-OS   Trifolium repens (white clover)
-OC   Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta;
-OC   Spermatophyta; Magnoliophyta; eudicotyledons; core eudicotyledons; rosids;
-OC   fabids; Fabales; Fabaceae; Papilionoideae; Trifolieae; Trifolium.
-XX
-RN   [5]
-RP   1-1859
-RX   DOI; 10.1007/BF00039495.
-RX   PUBMED; 1907511.
-RA   Oxtoby E., Dunn M.A., Pancoro A., Hughes M.A.;
-RT   "Nucleotide and derived amino acid sequence of the cyanogenic
-RT   beta-glucosidase (linamarase) from white clover (Trifolium repens L.)";
-RL   Plant Mol. Biol. 17(2):209-219(1991).
-XX
-RN   [6]
-RP   1-1859
-RA   Hughes M.A.;
-RT   ;
-RL   Submitted (19-NOV-1990) to the INSDC.
-RL   Hughes M.A., University of Newcastle Upon Tyne, Medical School, Newcastle
-RL   Upon Tyne, NE2 4HH, UK
-XX
-DR   EuropePMC; PMC99098; 11752244.
-XX
-FH   Key             Location/Qualifiers
-FH
-FT   source          1..1859
-FT                   /organism="Trifolium repens"
-FT                   /mol_type="mRNA"
-FT                   /clone_lib="lambda gt10"
-FT                   /clone="TRE361"
-FT                   /tissue_type="leaves"
-FT                   /db_xref="taxon:3899"
-FT   mRNA            1..1859
-FT                   /experiment="experimental evidence, no additional details
-FT                   recorded"
-FT   CDS             14..1495
-FT                   /product="beta-glucosidase"
-FT                   /EC_number="3.2.1.21"
-FT                   /note="non-cyanogenic"
-FT                   /db_xref="GOA:P26204"
-FT                   /db_xref="InterPro:IPR001360"
-FT                   /db_xref="InterPro:IPR013781"
-FT                   /db_xref="InterPro:IPR017853"
-FT                   /db_xref="InterPro:IPR018120"
-FT                   /db_xref="UniProtKB/Swiss-Prot:P26204"
-FT                   /protein_id="CAA40058.1"
-FT                   /translation="MDFIVAIFALFVISSFTITSTNAVEASTLLDIGNLSRSSFPRGFI
-FT                   FGAGSSAYQFEGAVNEGGRGPSIWDTFTHKYPEKIRDGSNADITVDQYHRYKEDVGIMK
-FT                   DQNMDSYRFSISWPRILPKGKLSGGINHEGIKYYNNLINELLANGIQPFVTLFHWDLPQ
-FT                   VLEDEYGGFLNSGVINDFRDYTDLCFKEFGDRVRYWSTLNEPWVFSNSGYALGTNAPGR
-FT                   CSASNVAKPGDSGTGPYIVTHNQILAHAEAVHVYKTKYQAYQKGKIGITLVSNWLMPLD
-FT                   DNSIPDIKAAERSLDFQFGLFMEQLTTGDYSKSMRRIVKNRLPKFSKFESSLVNGSFDF
-FT                   IGINYYSSSYISNAPSHGNAKPSYSTNPMTNISFEKHGIPLGPRAASIWIYVYPYMFIQ
-FT                   EDFEIFCYILKINITILQFSITENGMNEFNDATLPVEEALLNTYRIDYYYRHLYYIRSA
-FT                   IRAGSNVKGFYAWSFLDCNEWFAGFTVRFGLNFVD"
-XX
-     aaacaaacca aatatggatt ttattgtagc catatttgct ctgtttgtta ttagctcatt        60
-     cacaattact tccacaaatg cagttgaagc ttctactctt cttgacatag gtaacctgag       120
-     tcggagcagt tttcctcgtg gcttcatctt tggtgctgga tcttcagcat accaatttga       180
-     aggtgcagta aacgaaggcg gtagaggacc aagtatttgg gataccttca cccataaata       240
-     tccagaaaaa ataagggatg gaagcaatgc agacatcacg gttgaccaat atcaccgcta       300
-     caaggaagat gttgggatta tgaaggatca aaatatggat tcgtatagat tctcaatctc       360
-     ttggccaaga atactcccaa agggaaagtt gagcggaggc ataaatcacg aaggaatcaa       420
-     atattacaac aaccttatca acgaactatt ggctaacggt atacaaccat ttgtaactct       480
-     ttttcattgg gatcttcccc aagtcttaga agatgagtat ggtggtttct taaactccgg       540
-     tgtaataaat gattttcgag actatacgga tctttgcttc aaggaatttg gagatagagt       600
-     gaggtattgg agtactctaa atgagccatg ggtgtttagc aattctggat atgcactagg       660
-     aacaaatgca ccaggtcgat gttcggcctc caacgtggcc aagcctggtg attctggaac       720
-     aggaccttat atagttacac acaatcaaat tcttgctcat gcagaagctg tacatgtgta       780
-     taagactaaa taccaggcat atcaaaaggg aaagataggc ataacgttgg tatctaactg       840
-     gttaatgcca cttgatgata atagcatacc agatataaag gctgccgaga gatcacttga       900
-     cttccaattt ggattgttta tggaacaatt aacaacagga gattattcta agagcatgcg       960
-     gcgtatagtt aaaaaccgat tacctaagtt ctcaaaattc gaatcaagcc tagtgaatgg      1020
-     ttcatttgat tttattggta taaactatta ctcttctagt tatattagca atgccccttc      1080
-     acatggcaat gccaaaccca gttactcaac aaatcctatg accaatattt catttgaaaa      1140
-     acatgggata cccttaggtc caagggctgc ttcaatttgg atatatgttt atccatatat      1200
-     gtttatccaa gaggacttcg agatcttttg ttacatatta aaaataaata taacaatcct      1260
-     gcaattttca atcactgaaa atggtatgaa tgaattcaac gatgcaacac ttccagtaga      1320
-     agaagctctt ttgaatactt acagaattga ttactattac cgtcacttat actacattcg      1380
-     ttctgcaatc agggctggct caaatgtgaa gggtttttac gcatggtcat ttttggactg      1440
-     taatgaatgg tttgcaggct ttactgttcg ttttggatta aactttgtag attagaaaga      1500
-     tggattaaaa aggtacccta agctttctgc ccaatggtac aagaactttc tcaaaagaaa      1560
-     ctagctagta ttattaaaag aactttgtag tagattacag tacatcgttt gaagttgagt      1620
-     tggtgcacct aattaaataa aagaggttac tcttaacata tttttaggcc attcgttgtg      1680
-     aagttgttag gctgttattt ctattatact atgttgtagt aataagtgca ttgttgtacc      1740
-     agaagctatg atcataacta taggttgatc cttcatgtat cagtttgatg ttgagaatac      1800
-     tttgaattaa aagtcttttt ttattttttt aaaaaaaaaa aaaaaaaaaa ccccccccc       1859
-//
-
diff --git a/pyfastaq/tests/data/sequences_test.embl.bad2 b/pyfastaq/tests/data/sequences_test.embl.bad2
deleted file mode 100644
index 1dd59b1..0000000
--- a/pyfastaq/tests/data/sequences_test.embl.bad2
+++ /dev/null
@@ -1,202 +0,0 @@
-ID   seq1; SV 1; linear; mRNA; STD; PLN; 1859 BP.
-XX
-AC   X56734; S46826;
-XX
-DT   12-SEP-1991 (Rel. 29, Created)
-DT   25-NOV-2005 (Rel. 85, Last updated, Version 11)
-XX
-DE   Trifolium repens mRNA for non-cyanogenic beta-glucosidase
-XX
-KW   beta-glucosidase.
-XX
-OS   Trifolium repens (white clover)
-OC   Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta;
-OC   Spermatophyta; Magnoliophyta; eudicotyledons; core eudicotyledons; rosids;
-OC   fabids; Fabales; Fabaceae; Papilionoideae; Trifolieae; Trifolium.
-XX
-RN   [5]
-RP   1-1859
-RX   DOI; 10.1007/BF00039495.
-RX   PUBMED; 1907511.
-RA   Oxtoby E., Dunn M.A., Pancoro A., Hughes M.A.;
-RT   "Nucleotide and derived amino acid sequence of the cyanogenic
-RT   beta-glucosidase (linamarase) from white clover (Trifolium repens L.)";
-RL   Plant Mol. Biol. 17(2):209-219(1991).
-XX
-RN   [6]
-RP   1-1859
-RA   Hughes M.A.;
-RT   ;
-RL   Submitted (19-NOV-1990) to the INSDC.
-RL   Hughes M.A., University of Newcastle Upon Tyne, Medical School, Newcastle
-RL   Upon Tyne, NE2 4HH, UK
-XX
-DR   EuropePMC; PMC99098; 11752244.
-XX
-FH   Key             Location/Qualifiers
-FH
-FT   source          1..1859
-FT                   /organism="Trifolium repens"
-FT                   /mol_type="mRNA"
-FT                   /clone_lib="lambda gt10"
-FT                   /clone="TRE361"
-FT                   /tissue_type="leaves"
-FT                   /db_xref="taxon:3899"
-FT   mRNA            1..1859
-FT                   /experiment="experimental evidence, no additional details
-FT                   recorded"
-FT   CDS             14..1495
-FT                   /product="beta-glucosidase"
-FT                   /EC_number="3.2.1.21"
-FT                   /note="non-cyanogenic"
-FT                   /db_xref="GOA:P26204"
-FT                   /db_xref="InterPro:IPR001360"
-FT                   /db_xref="InterPro:IPR013781"
-FT                   /db_xref="InterPro:IPR017853"
-FT                   /db_xref="InterPro:IPR018120"
-FT                   /db_xref="UniProtKB/Swiss-Prot:P26204"
-FT                   /protein_id="CAA40058.1"
-FT                   /translation="MDFIVAIFALFVISSFTITSTNAVEASTLLDIGNLSRSSFPRGFI
-FT                   FGAGSSAYQFEGAVNEGGRGPSIWDTFTHKYPEKIRDGSNADITVDQYHRYKEDVGIMK
-FT                   DQNMDSYRFSISWPRILPKGKLSGGINHEGIKYYNNLINELLANGIQPFVTLFHWDLPQ
-FT                   VLEDEYGGFLNSGVINDFRDYTDLCFKEFGDRVRYWSTLNEPWVFSNSGYALGTNAPGR
-FT                   CSASNVAKPGDSGTGPYIVTHNQILAHAEAVHVYKTKYQAYQKGKIGITLVSNWLMPLD
-FT                   DNSIPDIKAAERSLDFQFGLFMEQLTTGDYSKSMRRIVKNRLPKFSKFESSLVNGSFDF
-FT                   IGINYYSSSYISNAPSHGNAKPSYSTNPMTNISFEKHGIPLGPRAASIWIYVYPYMFIQ
-FT                   EDFEIFCYILKINITILQFSITENGMNEFNDATLPVEEALLNTYRIDYYYRHLYYIRSA
-FT                   IRAGSNVKGFYAWSFLDCNEWFAGFTVRFGLNFVD"
-XX
-SQ   Sequence 1859 BP; 609 A; 314 C; 355 G; 581 T; 0 other;
-     aaacaaacca aatatggatt ttattgtagc catatttgct ctgtttgtta ttagctcatt        60
-     cacaattact tccacaaatg cagttgaagc ttctactctt cttgacatag gtaacctgag       120
-     tcggagcagt tttcctcgtg gcttcatctt tggtgctgga tcttcagcat accaatttga       180
-     aggtgcagta aacgaaggcg gtagaggacc aagtatttgg gataccttca cccataaata       240
-     tccagaaaaa ataagggatg gaagcaatgc agacatcacg gttgaccaat atcaccgcta       300
-     caaggaagat gttgggatta tgaaggatca aaatatggat tcgtatagat tctcaatctc       360
-     ttggccaaga atactcccaa agggaaagtt gagcggaggc ataaatcacg aaggaatcaa       420
-     atattacaac aaccttatca acgaactatt ggctaacggt atacaaccat ttgtaactct       480
-     ttttcattgg gatcttcccc aagtcttaga agatgagtat ggtggtttct taaactccgg       540
-     tgtaataaat gattttcgag actatacgga tctttgcttc aaggaatttg gagatagagt       600
-     gaggtattgg agtactctaa atgagccatg ggtgtttagc aattctggat atgcactagg       660
-     aacaaatgca ccaggtcgat gttcggcctc caacgtggcc aagcctggtg attctggaac       720
-     aggaccttat atagttacac acaatcaaat tcttgctcat gcagaagctg tacatgtgta       780
-     taagactaaa taccaggcat atcaaaaggg aaagataggc ataacgttgg tatctaactg       840
-     gttaatgcca cttgatgata atagcatacc agatataaag gctgccgaga gatcacttga       900
-     cttccaattt ggattgttta tggaacaatt aacaacagga gattattcta agagcatgcg       960
-     gcgtatagtt aaaaaccgat tacctaagtt ctcaaaattc gaatcaagcc tagtgaatgg      1020
-     ttcatttgat tttattggta taaactatta ctcttctagt tatattagca atgccccttc      1080
-     acatggcaat gccaaaccca gttactcaac aaatcctatg accaatattt catttgaaaa      1140
-     acatgggata cccttaggtc caagggctgc ttcaatttgg atatatgttt atccatatat      1200
-     gtttatccaa gaggacttcg agatcttttg ttacatatta aaaataaata taacaatcct      1260
-     gcaattttca atcactgaaa atggtatgaa tgaattcaac gatgcaacac ttccagtaga      1320
-     agaagctctt ttgaatactt acagaattga ttactattac cgtcacttat actacattcg      1380
-     ttctgcaatc agggctggct caaatgtgaa gggtttttac gcatggtcat ttttggactg      1440
-     taatgaatgg tttgcaggct ttactgttcg ttttggatta aactttgtag attagaaaga      1500
-     tggattaaaa aggtacccta agctttctgc ccaatggtac aagaactttc tcaaaagaaa      1560
-     ctagctagta ttattaaaag aactttgtag tagattacag tacatcgttt gaagttgagt      1620
-     tggtgcacct aattaaataa aagaggttac tcttaacata tttttaggcc attcgttgtg      1680
-     aagttgttag gctgttattt ctattatact atgttgtagt aataagtgca ttgttgtacc      1740
-     agaagctatg atcataacta taggttgatc cttcatgtat cagtttgatg ttgagaatac      1800
-     tttgaattaa aagtcttttt ttattttttt aaaaaaaaaa aaaaaaaaaa aaaaaaaaa       1859
-ID   seq2; SV 1; linear; mRNA; STD; PLN; 1859 BP.
-XX
-AC   X56734; S46826;
-XX
-DT   12-SEP-1991 (Rel. 29, Created)
-DT   25-NOV-2005 (Rel. 85, Last updated, Version 11)
-XX
-DE   Trifolium repens mRNA for non-cyanogenic beta-glucosidase
-XX
-KW   beta-glucosidase.
-XX
-OS   Trifolium repens (white clover)
-OC   Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta;
-OC   Spermatophyta; Magnoliophyta; eudicotyledons; core eudicotyledons; rosids;
-OC   fabids; Fabales; Fabaceae; Papilionoideae; Trifolieae; Trifolium.
-XX
-RN   [5]
-RP   1-1859
-RX   DOI; 10.1007/BF00039495.
-RX   PUBMED; 1907511.
-RA   Oxtoby E., Dunn M.A., Pancoro A., Hughes M.A.;
-RT   "Nucleotide and derived amino acid sequence of the cyanogenic
-RT   beta-glucosidase (linamarase) from white clover (Trifolium repens L.)";
-RL   Plant Mol. Biol. 17(2):209-219(1991).
-XX
-RN   [6]
-RP   1-1859
-RA   Hughes M.A.;
-RT   ;
-RL   Submitted (19-NOV-1990) to the INSDC.
-RL   Hughes M.A., University of Newcastle Upon Tyne, Medical School, Newcastle
-RL   Upon Tyne, NE2 4HH, UK
-XX
-DR   EuropePMC; PMC99098; 11752244.
-XX
-FH   Key             Location/Qualifiers
-FH
-FT   source          1..1859
-FT                   /organism="Trifolium repens"
-FT                   /mol_type="mRNA"
-FT                   /clone_lib="lambda gt10"
-FT                   /clone="TRE361"
-FT                   /tissue_type="leaves"
-FT                   /db_xref="taxon:3899"
-FT   mRNA            1..1859
-FT                   /experiment="experimental evidence, no additional details
-FT                   recorded"
-FT   CDS             14..1495
-FT                   /product="beta-glucosidase"
-FT                   /EC_number="3.2.1.21"
-FT                   /note="non-cyanogenic"
-FT                   /db_xref="GOA:P26204"
-FT                   /db_xref="InterPro:IPR001360"
-FT                   /db_xref="InterPro:IPR013781"
-FT                   /db_xref="InterPro:IPR017853"
-FT                   /db_xref="InterPro:IPR018120"
-FT                   /db_xref="UniProtKB/Swiss-Prot:P26204"
-FT                   /protein_id="CAA40058.1"
-FT                   /translation="MDFIVAIFALFVISSFTITSTNAVEASTLLDIGNLSRSSFPRGFI
-FT                   FGAGSSAYQFEGAVNEGGRGPSIWDTFTHKYPEKIRDGSNADITVDQYHRYKEDVGIMK
-FT                   DQNMDSYRFSISWPRILPKGKLSGGINHEGIKYYNNLINELLANGIQPFVTLFHWDLPQ
-FT                   VLEDEYGGFLNSGVINDFRDYTDLCFKEFGDRVRYWSTLNEPWVFSNSGYALGTNAPGR
-FT                   CSASNVAKPGDSGTGPYIVTHNQILAHAEAVHVYKTKYQAYQKGKIGITLVSNWLMPLD
-FT                   DNSIPDIKAAERSLDFQFGLFMEQLTTGDYSKSMRRIVKNRLPKFSKFESSLVNGSFDF
-FT                   IGINYYSSSYISNAPSHGNAKPSYSTNPMTNISFEKHGIPLGPRAASIWIYVYPYMFIQ
-FT                   EDFEIFCYILKINITILQFSITENGMNEFNDATLPVEEALLNTYRIDYYYRHLYYIRSA
-FT                   IRAGSNVKGFYAWSFLDCNEWFAGFTVRFGLNFVD"
-XX
-SQ   Sequence 1859 BP; 609 A; 314 C; 355 G; 581 T; 0 other;
-     aaacaaacca aatatggatt ttattgtagc catatttgct ctgtttgtta ttagctcatt        60
-     cacaattact tccacaaatg cagttgaagc ttctactctt cttgacatag gtaacctgag       120
-     tcggagcagt tttcctcgtg gcttcatctt tggtgctgga tcttcagcat accaatttga       180
-     aggtgcagta aacgaaggcg gtagaggacc aagtatttgg gataccttca cccataaata       240
-     tccagaaaaa ataagggatg gaagcaatgc agacatcacg gttgaccaat atcaccgcta       300
-     caaggaagat gttgggatta tgaaggatca aaatatggat tcgtatagat tctcaatctc       360
-     ttggccaaga atactcccaa agggaaagtt gagcggaggc ataaatcacg aaggaatcaa       420
-     atattacaac aaccttatca acgaactatt ggctaacggt atacaaccat ttgtaactct       480
-     ttttcattgg gatcttcccc aagtcttaga agatgagtat ggtggtttct taaactccgg       540
-     tgtaataaat gattttcgag actatacgga tctttgcttc aaggaatttg gagatagagt       600
-     gaggtattgg agtactctaa atgagccatg ggtgtttagc aattctggat atgcactagg       660
-     aacaaatgca ccaggtcgat gttcggcctc caacgtggcc aagcctggtg attctggaac       720
-     aggaccttat atagttacac acaatcaaat tcttgctcat gcagaagctg tacatgtgta       780
-     taagactaaa taccaggcat atcaaaaggg aaagataggc ataacgttgg tatctaactg       840
-     gttaatgcca cttgatgata atagcatacc agatataaag gctgccgaga gatcacttga       900
-     cttccaattt ggattgttta tggaacaatt aacaacagga gattattcta agagcatgcg       960
-     gcgtatagtt aaaaaccgat tacctaagtt ctcaaaattc gaatcaagcc tagtgaatgg      1020
-     ttcatttgat tttattggta taaactatta ctcttctagt tatattagca atgccccttc      1080
-     acatggcaat gccaaaccca gttactcaac aaatcctatg accaatattt catttgaaaa      1140
-     acatgggata cccttaggtc caagggctgc ttcaatttgg atatatgttt atccatatat      1200
-     gtttatccaa gaggacttcg agatcttttg ttacatatta aaaataaata taacaatcct      1260
-     gcaattttca atcactgaaa atggtatgaa tgaattcaac gatgcaacac ttccagtaga      1320
-     agaagctctt ttgaatactt acagaattga ttactattac cgtcacttat actacattcg      1380
-     ttctgcaatc agggctggct caaatgtgaa gggtttttac gcatggtcat ttttggactg      1440
-     taatgaatgg tttgcaggct ttactgttcg ttttggatta aactttgtag attagaaaga      1500
-     tggattaaaa aggtacccta agctttctgc ccaatggtac aagaactttc tcaaaagaaa      1560
-     ctagctagta ttattaaaag aactttgtag tagattacag tacatcgttt gaagttgagt      1620
-     tggtgcacct aattaaataa aagaggttac tcttaacata tttttaggcc attcgttgtg      1680
-     aagttgttag gctgttattt ctattatact atgttgtagt aataagtgca ttgttgtacc      1740
-     agaagctatg atcataacta taggttgatc cttcatgtat cagtttgatg ttgagaatac      1800
-     tttgaattaa aagtcttttt ttattttttt aaaaaaaaaa aaaaaaaaaa ccccccccc       1859
-//
-
diff --git a/pyfastaq/tests/data/sequences_test.embl.to_fasta b/pyfastaq/tests/data/sequences_test.embl.to_fasta
deleted file mode 100644
index 89e2230..0000000
--- a/pyfastaq/tests/data/sequences_test.embl.to_fasta
+++ /dev/null
@@ -1,64 +0,0 @@
->seq1
-aaacaaaccaaatatggattttattgtagccatatttgctctgtttgttattagctcatt
-cacaattacttccacaaatgcagttgaagcttctactcttcttgacataggtaacctgag
-tcggagcagttttcctcgtggcttcatctttggtgctggatcttcagcataccaatttga
-aggtgcagtaaacgaaggcggtagaggaccaagtatttgggataccttcacccataaata
-tccagaaaaaataagggatggaagcaatgcagacatcacggttgaccaatatcaccgcta
-caaggaagatgttgggattatgaaggatcaaaatatggattcgtatagattctcaatctc
-ttggccaagaatactcccaaagggaaagttgagcggaggcataaatcacgaaggaatcaa
-atattacaacaaccttatcaacgaactattggctaacggtatacaaccatttgtaactct
-ttttcattgggatcttccccaagtcttagaagatgagtatggtggtttcttaaactccgg
-tgtaataaatgattttcgagactatacggatctttgcttcaaggaatttggagatagagt
-gaggtattggagtactctaaatgagccatgggtgtttagcaattctggatatgcactagg
-aacaaatgcaccaggtcgatgttcggcctccaacgtggccaagcctggtgattctggaac
-aggaccttatatagttacacacaatcaaattcttgctcatgcagaagctgtacatgtgta
-taagactaaataccaggcatatcaaaagggaaagataggcataacgttggtatctaactg
-gttaatgccacttgatgataatagcataccagatataaaggctgccgagagatcacttga
-cttccaatttggattgtttatggaacaattaacaacaggagattattctaagagcatgcg
-gcgtatagttaaaaaccgattacctaagttctcaaaattcgaatcaagcctagtgaatgg
-ttcatttgattttattggtataaactattactcttctagttatattagcaatgccccttc
-acatggcaatgccaaacccagttactcaacaaatcctatgaccaatatttcatttgaaaa
-acatgggatacccttaggtccaagggctgcttcaatttggatatatgtttatccatatat
-gtttatccaagaggacttcgagatcttttgttacatattaaaaataaatataacaatcct
-gcaattttcaatcactgaaaatggtatgaatgaattcaacgatgcaacacttccagtaga
-agaagctcttttgaatacttacagaattgattactattaccgtcacttatactacattcg
-ttctgcaatcagggctggctcaaatgtgaagggtttttacgcatggtcatttttggactg
-taatgaatggtttgcaggctttactgttcgttttggattaaactttgtagattagaaaga
-tggattaaaaaggtaccctaagctttctgcccaatggtacaagaactttctcaaaagaaa
-ctagctagtattattaaaagaactttgtagtagattacagtacatcgtttgaagttgagt
-tggtgcacctaattaaataaaagaggttactcttaacatatttttaggccattcgttgtg
-aagttgttaggctgttatttctattatactatgttgtagtaataagtgcattgttgtacc
-agaagctatgatcataactataggttgatccttcatgtatcagtttgatgttgagaatac
-tttgaattaaaagtctttttttatttttttaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
->seq2
-aaacaaaccaaatatggattttattgtagccatatttgctctgtttgttattagctcatt
-cacaattacttccacaaatgcagttgaagcttctactcttcttgacataggtaacctgag
-tcggagcagttttcctcgtggcttcatctttggtgctggatcttcagcataccaatttga
-aggtgcagtaaacgaaggcggtagaggaccaagtatttgggataccttcacccataaata
-tccagaaaaaataagggatggaagcaatgcagacatcacggttgaccaatatcaccgcta
-caaggaagatgttgggattatgaaggatcaaaatatggattcgtatagattctcaatctc
-ttggccaagaatactcccaaagggaaagttgagcggaggcataaatcacgaaggaatcaa
-atattacaacaaccttatcaacgaactattggctaacggtatacaaccatttgtaactct
-ttttcattgggatcttccccaagtcttagaagatgagtatggtggtttcttaaactccgg
-tgtaataaatgattttcgagactatacggatctttgcttcaaggaatttggagatagagt
-gaggtattggagtactctaaatgagccatgggtgtttagcaattctggatatgcactagg
-aacaaatgcaccaggtcgatgttcggcctccaacgtggccaagcctggtgattctggaac
-aggaccttatatagttacacacaatcaaattcttgctcatgcagaagctgtacatgtgta
-taagactaaataccaggcatatcaaaagggaaagataggcataacgttggtatctaactg
-gttaatgccacttgatgataatagcataccagatataaaggctgccgagagatcacttga
-cttccaatttggattgtttatggaacaattaacaacaggagattattctaagagcatgcg
-gcgtatagttaaaaaccgattacctaagttctcaaaattcgaatcaagcctagtgaatgg
-ttcatttgattttattggtataaactattactcttctagttatattagcaatgccccttc
-acatggcaatgccaaacccagttactcaacaaatcctatgaccaatatttcatttgaaaa
-acatgggatacccttaggtccaagggctgcttcaatttggatatatgtttatccatatat
-gtttatccaagaggacttcgagatcttttgttacatattaaaaataaatataacaatcct
-gcaattttcaatcactgaaaatggtatgaatgaattcaacgatgcaacacttccagtaga
-agaagctcttttgaatacttacagaattgattactattaccgtcacttatactacattcg
-ttctgcaatcagggctggctcaaatgtgaagggtttttacgcatggtcatttttggactg
-taatgaatggtttgcaggctttactgttcgttttggattaaactttgtagattagaaaga
-tggattaaaaaggtaccctaagctttctgcccaatggtacaagaactttctcaaaagaaa
-ctagctagtattattaaaagaactttgtagtagattacagtacatcgtttgaagttgagt
-tggtgcacctaattaaataaaagaggttactcttaacatatttttaggccattcgttgtg
-aagttgttaggctgttatttctattatactatgttgtagtaataagtgcattgttgtacc
-agaagctatgatcataactataggttgatccttcatgtatcagtttgatgttgagaatac
-tttgaattaaaagtctttttttatttttttaaaaaaaaaaaaaaaaaaaaccccccccc
diff --git a/pyfastaq/tests/data/sequences_test.fa b/pyfastaq/tests/data/sequences_test.fa
deleted file mode 100644
index 22da7a3..0000000
--- a/pyfastaq/tests/data/sequences_test.fa
+++ /dev/null
@@ -1,19 +0,0 @@
->1
-ACGTA
->2
-A
-
-C
-GT
-
-A
-
->3
-
-
-ACGTA
->4
-ACGTA
-
-
-
diff --git a/pyfastaq/tests/data/sequences_test.fa.ids b/pyfastaq/tests/data/sequences_test.fa.ids
deleted file mode 100644
index 94ebaf9..0000000
--- a/pyfastaq/tests/data/sequences_test.fa.ids
+++ /dev/null
@@ -1,4 +0,0 @@
-1
-2
-3
-4
diff --git a/pyfastaq/tests/data/sequences_test.fa.qual b/pyfastaq/tests/data/sequences_test.fa.qual
deleted file mode 100644
index 435d562..0000000
--- a/pyfastaq/tests/data/sequences_test.fa.qual
+++ /dev/null
@@ -1,17 +0,0 @@
->1
-40 40 40
-40 40
-
->2
-40
-40
-
-40
-40 40
->3
-
-40 40 40 40 40
-
->4
-40 40 40      40 40
-
diff --git a/pyfastaq/tests/data/sequences_test.fa.qual.bad b/pyfastaq/tests/data/sequences_test.fa.qual.bad
deleted file mode 100644
index 92c8d8d..0000000
--- a/pyfastaq/tests/data/sequences_test.fa.qual.bad
+++ /dev/null
@@ -1,17 +0,0 @@
->1
-40 40 40
-40 40
-
->3
-40
-40
-
-40
-40 40
->3
-
-40 40 40 40 40
-
->4
-40 40 40      40 40
-
diff --git a/pyfastaq/tests/data/sequences_test.fasta_to_fastq.fq b/pyfastaq/tests/data/sequences_test.fasta_to_fastq.fq
deleted file mode 100644
index 48f7282..0000000
--- a/pyfastaq/tests/data/sequences_test.fasta_to_fastq.fq
+++ /dev/null
@@ -1,16 +0,0 @@
-@1
-ACGTA
-+
-IIIII
-@2
-ACGTA
-+
-IIIII
-@3
-ACGTA
-+
-IIIII
-@4
-ACGTA
-+
-IIIII
diff --git a/pyfastaq/tests/data/sequences_test.gbk b/pyfastaq/tests/data/sequences_test.gbk
deleted file mode 100644
index 40f1afb..0000000
--- a/pyfastaq/tests/data/sequences_test.gbk
+++ /dev/null
@@ -1,170 +0,0 @@
-LOCUS       NAME1         5028 bp    DNA             PLN       21-JUN-1999
-DEFINITION  Saccharomyces cerevisiae TCP1-beta gene, partial cds, and Axl2p
-            (AXL2) and Rev7p (REV7) genes, complete cds.
-ACCESSION   U49845
-VERSION     U49845.1  GI:1293613
-KEYWORDS    .
-SOURCE      Saccharomyces cerevisiae (baker's yeast)
-  ORGANISM  Saccharomyces cerevisiae
-            Eukaryota; Fungi; Ascomycota; Saccharomycotina; Saccharomycetes;
-            Saccharomycetales; Saccharomycetaceae; Saccharomyces.
-REFERENCE   1  (bases 1 to 5028)
-  AUTHORS   Torpey,L.E., Gibbs,P.E., Nelson,J. and Lawrence,C.W.
-  TITLE     Cloning and sequence of REV7, a gene whose function is required for
-            DNA damage-induced mutagenesis in Saccharomyces cerevisiae
-  JOURNAL   Yeast 10 (11), 1503-1509 (1994)
-  PUBMED    7871890
-REFERENCE   2  (bases 1 to 5028)
-  AUTHORS   Roemer,T., Madden,K., Chang,J. and Snyder,M.
-  TITLE     Selection of axial growth sites in yeast requires Axl2p, a novel
-            plasma membrane glycoprotein
-  JOURNAL   Genes Dev. 10 (7), 777-793 (1996)
-  PUBMED    8846915
-REFERENCE   3  (bases 1 to 5028)
-  AUTHORS   Roemer,T.
-  TITLE     Direct Submission
-  JOURNAL   Submitted (22-FEB-1996) Terry Roemer, Biology, Yale University, New
-            Haven, CT, USA
-FEATURES             Location/Qualifiers
-     source          1..5028
-                     /organism="Saccharomyces cerevisiae"
-                     /db_xref="taxon:4932"
-                     /chromosome="IX"
-                     /map="9"
-     CDS             <1..206
-                     /codon_start=3
-                     /product="TCP1-beta"
-                     /protein_id="AAA98665.1"
-                     /db_xref="GI:1293614"
-                     /translation="SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEA
-                     AEVLLRVDNIIRARPRTANRQHM"
-     gene            687..3158
-                     /gene="AXL2"
-     CDS             687..3158
-                     /gene="AXL2"
-                     /note="plasma membrane glycoprotein"
-                     /codon_start=1
-                     /function="required for axial budding pattern of S.
-                     cerevisiae"
-                     /product="Axl2p"
-                     /protein_id="AAA98666.1"
-                     /db_xref="GI:1293615"
-                     /translation="MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESF
-                     TFQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFN
-                     VILEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNE
-                     VFNVTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPE
-                     TSYSFVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYV
-                     YLDDDPISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYG
-                     DVIYFNFEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQ
-                     DHDWVKFQSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSA
-                     NATSTRSSHHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIA
-                     CGVAIPLGVILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLN
-                     NPFDDDASSYDDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQ
-                     SQSKEELLAKPPVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDS
-                     YGSQKTVDTEKLFDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTK
-                     HRNRHLQNIQDSQSGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRL
-                     VDFSNKSNVNVGQVKDIHGRIPEML"
-     gene            complement(3300..4037)
-                     /gene="REV7"
-     CDS             complement(3300..4037)
-                     /gene="REV7"
-                     /codon_start=1
-                     /product="Rev7p"
-                     /protein_id="AAA98667.1"
-                     /db_xref="GI:1293616"
-                     /translation="MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQ
-                     FVPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVD
-                     KDDQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNR
-                     RVDSLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEK
-                     LISGDDKILNGVYSQYEEGESIFGSLF"
-ORIGIN
-        1 gatcctccat atacaacggt atctccacct caggtttaga tctcaacaac ggaaccattg
-       61 ccgacatgag acagttaggt atcgtcgaga gttacaagct aaaacgagca gtagtcagct
-      121 ctgcatctga agccgctgaa gttctactaa gggtggataa catcatccgt gcaagaccaa
-      181 tgccatgact cagattctaa ttttaagcta ttcaatttct ctttgatc
-//
-LOCUS       NAME2         5028 bp    DNA             PLN       21-JUN-1999
-DEFINITION  Saccharomyces cerevisiae TCP1-beta gene, partial cds, and Axl2p
-            (AXL2) and Rev7p (REV7) genes, complete cds.
-ACCESSION   U49845
-VERSION     U49845.1  GI:1293613
-KEYWORDS    .
-SOURCE      Saccharomyces cerevisiae (baker's yeast)
-  ORGANISM  Saccharomyces cerevisiae
-            Eukaryota; Fungi; Ascomycota; Saccharomycotina; Saccharomycetes;
-            Saccharomycetales; Saccharomycetaceae; Saccharomyces.
-REFERENCE   1  (bases 1 to 5028)
-  AUTHORS   Torpey,L.E., Gibbs,P.E., Nelson,J. and Lawrence,C.W.
-  TITLE     Cloning and sequence of REV7, a gene whose function is required for
-            DNA damage-induced mutagenesis in Saccharomyces cerevisiae
-  JOURNAL   Yeast 10 (11), 1503-1509 (1994)
-  PUBMED    7871890
-REFERENCE   2  (bases 1 to 5028)
-  AUTHORS   Roemer,T., Madden,K., Chang,J. and Snyder,M.
-  TITLE     Selection of axial growth sites in yeast requires Axl2p, a novel
-            plasma membrane glycoprotein
-  JOURNAL   Genes Dev. 10 (7), 777-793 (1996)
-  PUBMED    8846915
-REFERENCE   3  (bases 1 to 5028)
-  AUTHORS   Roemer,T.
-  TITLE     Direct Submission
-  JOURNAL   Submitted (22-FEB-1996) Terry Roemer, Biology, Yale University, New
-            Haven, CT, USA
-FEATURES             Location/Qualifiers
-     source          1..5028
-                     /organism="Saccharomyces cerevisiae"
-                     /db_xref="taxon:4932"
-                     /chromosome="IX"
-                     /map="9"
-     CDS             <1..206
-                     /codon_start=3
-                     /product="TCP1-beta"
-                     /protein_id="AAA98665.1"
-                     /db_xref="GI:1293614"
-                     /translation="SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEA
-                     AEVLLRVDNIIRARPRTANRQHM"
-     gene            687..3158
-                     /gene="AXL2"
-     CDS             687..3158
-                     /gene="AXL2"
-                     /note="plasma membrane glycoprotein"
-                     /codon_start=1
-                     /function="required for axial budding pattern of S.
-                     cerevisiae"
-                     /product="Axl2p"
-                     /protein_id="AAA98666.1"
-                     /db_xref="GI:1293615"
-                     /translation="MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESF
-                     TFQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFN
-                     VILEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNE
-                     VFNVTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPE
-                     TSYSFVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYV
-                     YLDDDPISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYG
-                     DVIYFNFEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQ
-                     DHDWVKFQSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSA
-                     NATSTRSSHHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIA
-                     CGVAIPLGVILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLN
-                     NPFDDDASSYDDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQ
-                     SQSKEELLAKPPVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDS
-                     YGSQKTVDTEKLFDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTK
-                     HRNRHLQNIQDSQSGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRL
-                     VDFSNKSNVNVGQVKDIHGRIPEML"
-     gene            complement(3300..4037)
-                     /gene="REV7"
-     CDS             complement(3300..4037)
-                     /gene="REV7"
-                     /codon_start=1
-                     /product="Rev7p"
-                     /protein_id="AAA98667.1"
-                     /db_xref="GI:1293616"
-                     /translation="MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQ
-                     FVPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVD
-                     KDDQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNR
-                     RVDSLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEK
-                     LISGDDKILNGVYSQYEEGESIFGSLF"
-ORIGIN
-        1 gatcctccat atacaacggt atctccacct caggtttaga tctcaacaac ggaaccattg
-       61 ccgacatgag acagttaggt atcgtcgaga gttacaagct aaaacgagca gtagtcagct
-      121 ctgcatctga agccgctgaa gttctactaa gggtggataa catcatccgt gcaagaccaa
-      181 tgccatgact cagattctaa ttttaagcta ttcaatttct ctttgaaa
-//
diff --git a/pyfastaq/tests/data/sequences_test.gbk.to_fasta b/pyfastaq/tests/data/sequences_test.gbk.to_fasta
deleted file mode 100644
index 270d9ec..0000000
--- a/pyfastaq/tests/data/sequences_test.gbk.to_fasta
+++ /dev/null
@@ -1,10 +0,0 @@
->NAME1
-gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattg
-ccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagct
-ctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaa
-tgccatgactcagattctaattttaagctattcaatttctctttgatc
->NAME2
-gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattg
-ccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagct
-ctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaa
-tgccatgactcagattctaattttaagctattcaatttctctttgaaa
diff --git a/pyfastaq/tests/data/sequences_test.line_length3.fa b/pyfastaq/tests/data/sequences_test.line_length3.fa
deleted file mode 100644
index a77df6d..0000000
--- a/pyfastaq/tests/data/sequences_test.line_length3.fa
+++ /dev/null
@@ -1,12 +0,0 @@
->1
-ACG
-TA
->2
-ACG
-TA
->3
-ACG
-TA
->4
-ACG
-TA
diff --git a/pyfastaq/tests/data/sequences_test.to_fasta.strip_after_whitespace_non_unique.in.fa b/pyfastaq/tests/data/sequences_test.to_fasta.strip_after_whitespace_non_unique.in.fa
deleted file mode 100644
index c2e1044..0000000
--- a/pyfastaq/tests/data/sequences_test.to_fasta.strip_after_whitespace_non_unique.in.fa
+++ /dev/null
@@ -1,6 +0,0 @@
->1 spam
-ACGT
->1 eggs
-A
->2
-GTTTG
diff --git a/pyfastaq/tests/data/sequences_test.to_fasta.strip_after_whitespace_non_unique.out.fa b/pyfastaq/tests/data/sequences_test.to_fasta.strip_after_whitespace_non_unique.out.fa
deleted file mode 100644
index d3080af..0000000
--- a/pyfastaq/tests/data/sequences_test.to_fasta.strip_after_whitespace_non_unique.out.fa
+++ /dev/null
@@ -1,6 +0,0 @@
->1
-ACGT
->1
-A
->2
-GTTTG
diff --git a/pyfastaq/tests/data/sequences_test.to_fasta.strip_after_whitespace_unique.in.fa b/pyfastaq/tests/data/sequences_test.to_fasta.strip_after_whitespace_unique.in.fa
deleted file mode 100644
index d3000d3..0000000
--- a/pyfastaq/tests/data/sequences_test.to_fasta.strip_after_whitespace_unique.in.fa
+++ /dev/null
@@ -1,6 +0,0 @@
->1 abcde
-ACGT
->2 abcde
-G
->3 hello
-GTACCA
diff --git a/pyfastaq/tests/data/sequences_test.to_fasta.strip_after_whitespace_unique.out.fa b/pyfastaq/tests/data/sequences_test.to_fasta.strip_after_whitespace_unique.out.fa
deleted file mode 100644
index dc8e2e1..0000000
--- a/pyfastaq/tests/data/sequences_test.to_fasta.strip_after_whitespace_unique.out.fa
+++ /dev/null
@@ -1,6 +0,0 @@
->1
-ACGT
->2
-G
->3
-GTACCA
diff --git a/pyfastaq/tests/data/test_acgtn_only.expected.fa b/pyfastaq/tests/data/test_acgtn_only.expected.fa
deleted file mode 100644
index 9f6a22d..0000000
--- a/pyfastaq/tests/data/test_acgtn_only.expected.fa
+++ /dev/null
@@ -1,4 +0,0 @@
->seq1
-acgtACGTnN
->seq2
-aNcNgNNT
diff --git a/pyfastaq/tests/data/test_acgtn_only.in.fa b/pyfastaq/tests/data/test_acgtn_only.in.fa
deleted file mode 100644
index 9b14690..0000000
--- a/pyfastaq/tests/data/test_acgtn_only.in.fa
+++ /dev/null
@@ -1,4 +0,0 @@
->seq1
-acgtACGTnN
->seq2
-aXcRg.?T
diff --git a/pyfastaq/tests/tasks_test.py b/pyfastaq/tests/tasks_test.py
index a8856bb..b25c1a3 100644
--- a/pyfastaq/tests/tasks_test.py
+++ b/pyfastaq/tests/tasks_test.py
@@ -3,6 +3,7 @@
 import sys
 import filecmp
 import os
+import tempfile
 import unittest
 from pyfastaq import tasks, sequences
 
@@ -172,6 +173,25 @@ class TestFilter(unittest.TestCase):
             self.assertTrue(filecmp.cmp(correct_files[i], outfile))
             os.unlink(outfile)
 
+    def test_regex_check_comments_filter(self):
+        '''When check_comments is true, and the regex is in the comment'''
+        infile = tempfile.NamedTemporaryFile(suffix=".fa", mode="w+")
+        infile.write(
+            ">read1 foo=bar\nAGCT\n>read2 bar=foo\nGGG\n>read3\nGGGG\n>read4 foo=ba\n"
+            "GCA\n>read5foo=bar\nGCAT"
+        )
+        infile.seek(0)
+        regex = '\sfoo=bar'
+        outfile = tempfile.NamedTemporaryFile(suffix=".fa", mode="w+")
+
+        tasks.filter(infile.name, outfile.name, regex=regex, check_comments=True)
+        with open(outfile.name) as handle:
+            actual = handle.read()
+
+        expected = ">read1 foo=bar\nAGCT\n"
+
+        self.assertEqual(actual, expected)
+
     def test_ids_from_file_filter(self):
         '''Test that can extract reads from a file of read names'''
         infile = os.path.join(data_dir, 'sequences_test_filter_by_ids_file.fa')
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..8bfd5a1
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,4 @@
+[egg_info]
+tag_build = 
+tag_date = 0
+