Commit 47a544832c5305ba60a217e0160cf63717507bf0 - ariba

+3

-3

.travis.yml less more

7	7	- libgfortran3
8	8	- libncurses5-dev
9	9	python:
10		- "3.4"
	10	- '3.4'
11	11	sudo: false
12	12	install:
13		- "source ./install_dependencies.sh"
	13	- source ./install_dependencies.sh
14	14	script:
15		- "python setup.py test"
	15	- python setup.py test

+33

-16

Dockerfile less more

0		#
1		# This container will install ARIBA from master
2		#
3		FROM debian:testing
	0	FROM ubuntu:17.04
4	1
5		#
6		# Authorship
7		#
8		MAINTAINER ap13@sanger.ac.uk
	2	RUN apt-get update
	3	RUN apt-get install --no-install-recommends -y \
	4	build-essential \
	5	cd-hit \
	6	curl \
	7	git \
	8	libbz2-dev \
	9	liblzma-dev \
	10	mummer \
	11	python \
	12	python3-dev \
	13	python3-setuptools \
	14	python3-pip \
	15	python3-tk \
	16	python3-matplotlib \
	17	unzip \
	18	wget \
	19	zlib1g-dev
9	20
10		#
11		# Install the dependancies
12		#
13		RUN apt-get update -qq && apt-get install -y git bowtie2 cd-hit fastaq libc6 libfml0 libgcc1 libminimap0 libstdc++6 mummer python3 python3-setuptools python3-dev python3-pysam python3-pymummer python3-dendropy gcc g++ zlib1g-dev
	21	RUN wget -q http://downloads.sourceforge.net/project/bowtie-bio/bowtie2/2.2.9/bowtie2-2.2.9-linux-x86_64.zip \
	22	&& unzip bowtie2-2.2.9-linux-x86_64.zip \
	23	&& rm bowtie2-2.2.9-linux-x86_64.zip
14	24
15		#
16		# Get the latest code from github and install
17		#
18		RUN git clone https://github.com/sanger-pathogens/ariba.git && cd ariba && python3 setup.py install
	25	# Need MPLBACKEND="agg" to make matplotlib work without X11, otherwise get the error
	26	# _tkinter.TclError: no display name and no $DISPLAY environment variable
	27	ENV ARIBA_BOWTIE2=$PWD/bowtie2-2.2.9/bowtie2 ARIBA_CDHIT=cdhit-est MPLBACKEND="agg"
	28
	29	RUN git clone https://github.com/sanger-pathogens/ariba.git \
	30	&& cd ariba \
	31	&& git checkout v2.10.1 \
	32	&& python3 setup.py test \
	33	&& python3 setup.py install
	34
	35	CMD ariba

+106

-38

README.md less more

0		ARIBA
1		=====
	0	# ARIBA
2	1
3	2	Antimicrobial Resistance Identification By Assembly
4	3
5		For methods and benchmarking, please see the [preprint on biorxiv][ariba biorxiv].
6
7
8	4	For how to use ARIBA, please see the [ARIBA wiki page][ARIBA wiki].
9	5
10
11
12		Installation
13		------------
14
15		ARIBA has the following dependencies, which need to be installed:
	6	[![Build Status](https://travis-ci.org/sanger-pathogens/ariba.svg?branch=master)](https://travis-ci.org/sanger-pathogens/ariba)
	7	[![License: GPL v3](https://img.shields.io/badge/License-GPL%20v3-brightgreen.svg)](https://github.com/ssjunnebo/ariba/blob/master/LICENSE)
	8	[![status](https://img.shields.io/badge/MGEN-10.1099%2Fmgen.0.000131-brightgreen.svg)](http://mgen.microbiologyresearch.org/content/journal/mgen/10.1099/mgen.0.000131)
	9
	10	## Contents
	11	* [Introduction](#introduction)
	12	* [Quick Start](#quick-start)
	13	* [Installation](#installation)
	14	* [Required dependencies](#required-dependencies)
	15	* [Using pip3](#using-pip3)
	16	* [From Source](#from-source)
	17	* [Docker](#docker)
	18	* [Debian (testing)](#debian-testing)
	19	* [Ubuntu](#ubuntu)
	20	* [Dependencies and environment variables](#dependencies-and-environment-variables)
	21	* [Temporary files](#temporary-files)
	22	* [Usage](#usage)
	23	* [License](#license)
	24	* [Feedback/Issues](#feedbackissues)
	25	* [Citation](#citation)
	26
	27	## Introduction
	28	ARIBA is a tool that identifies antibiotic resistance genes by running local assemblies.
	29	It can also be used for [MLST calling](https://github.com/sanger-pathogens/ariba/wiki/MLST-calling-with-ARIBA).
	30
	31	The input is a FASTA file of reference sequences (can be a mix of genes and noncoding sequences) and paired sequencing reads. ARIBA reports which of the reference sequences were found, plus detailed information on the quality of the assemblies and any variants between the sequencing reads and the reference sequences.
	32
	33	## Quick Start
	34	Get reference data, for instance from [CARD](https://card.mcmaster.ca/). See [getref](https://github.com/sanger-pathogens/ariba/wiki/Task%3A-getref) for a full list.
	35
	36	ariba getref card out.card
	37
	38	Prepare reference data for ARIBA:
	39
	40	ariba prepareref -f out.card.fa -m out.card.tsv out.card.prepareref
	41
	42	Run local assemblies and call variants:
	43
	44	ariba run out.card.prepareref reads1.fastq reads2.fastq out.run
	45
	46	Summarise data from several runs:
	47
	48	ariba summary out.summary out.run1/report1.tsv out.run2/report2.tsv out.run3/report3.tsv
	49
	50	Please read the [ARIBA wiki page][ARIBA wiki] for full usage instructions.
	51
	52	## Installation
	53
	54	If you encounter an issue when installing ARIBA please contact your local system administrator. If you encounter a bug please log it [here](https://github.com/sanger-pathogens/ariba/issues) or email us at ariba-help@sanger.ac.uk.
	55
	56	### Required dependencies
16	57	* [Python3][python] version >= 3.3.2
17	58	* [Bowtie2][bowtie2] version >= 2.1.0
18	59	* [CD-HIT][cdhit] version >= 4.6
19	60	* [MUMmer][mummer] version >= 3.23
20	61
21
22		Once the dependencies are installed, install ARIBA using pip:
	62	ARIBA also depends on several Python packages, all of which are available
	63	via pip. Installing ARIBA with pip3 will get these automatically if they
	64	are not already installed:
	65	* dendropy >= 4.2.0
	66	* matplotlib (no minimum version required, but only tested on 2.0.0)
	67	* pyfastaq >= 3.12.0
	68	* pysam >= 0.9.1
	69	* pymummer >= 0.10.1
	70
	71	### Using pip3
	72	Install ARIBA using pip:
23	73
24	74	pip3 install ariba
25	75
26		ARIBA also depends on several Python packages, all of which are available
27		via pip, so the above command will get those automatically if they
28		are not installed. The packages are dendropy >= 4.2.0, matplotlib (no
29		minimum version required, but only tested on 2.0.0),
30		pyfastaq >= 3.12.0, pysam >= 0.9.1, and pymummer >= 0.10.1.
31
32		Alternatively, you can download the latest release from this github repository,
33		or clone the repository. Then run the tests:
	76	### From Source
	77	Download the latest release from this github repository or clone it. Run the tests:
34	78
35	79	python3 setup.py test
36	80

39	83	python3 setup.py install
40	84
41	85	### Docker
42		ARIBA can be run in a Docker container. First of all install Docker, then to install ARIBA run:
	86	ARIBA can be run in a Docker container. First install Docker, then install ARIBA:
43	87
44	88	docker pull sangerpathogens/ariba
45	89
46		To use ARIBA you would use a command such as this (substituting in your directories), where your files are assumed to be stored in /home/ubuntu/data:
	90	To use ARIBA use a command like this (substituting in your directories), where your files are assumed to be stored in /home/ubuntu/data:
47	91
48	92	docker run --rm -it -v /home/ubuntu/data:/data sangerpathogens/ariba ariba -h
49	93

53	97
54	98	sudo apt-get install ariba
55	99
56
57	100	### Ubuntu
58
59	101	You can use `apt-get` (see above), or to ensure you get the latest version of ARIBA, the following commands can be
60	102	used to install ARIBA and its dependencies. This was tested on a new instance of Ubuntu 16.04.
61	103

89	131	it would try to use
90	132
91	133	$HOME/bowtie2-2.1.0/bowtie2-build
92
93	134
94		### Temporary files
	135	## Temporary files
95	136
96
97	137	ARIBA can temporarily make a large number of files whilst running, which
98	138	are put in a temporary directory made by ARIBA. The total size of these
99	139	files is small, but there can be a many of them. This can be a

127	167	directory, and temporary files are kept. It is intended for
128	168	debugging.
129	169
130
131
132		Usage
133		-----
134
135		Please read the [ARIBA wiki page][ARIBA wiki] for usage instructions.
136
137
138
139		Build status: [![Build Status](https://travis-ci.org/sanger-pathogens/ariba.svg?branch=master)](https://travis-ci.org/sanger-pathogens/ariba)
	170	## Usage
	171	usage: ariba <command> <options>
	172
	173	optional arguments:
	174	-h, --help show this help message and exit
	175
	176	Available commands:
	177
	178	aln2meta Converts multi-aln fasta and SNPs to metadata
	179	expandflag Expands flag column of report file
	180	flag Translate the meaning of a flag
	181	getref Download reference data
	182	micplot Make violin/dot plots using MIC data
	183	prepareref Prepare reference data for input to "run"
	184	pubmlstget Download species from PubMLST and make db
	185	pubmlstspecies
	186	Get list of available species from PubMLST
	187	refquery Get cluster or sequence info from prepareref output
	188	run Run the local assembly pipeline
	189	summary Summarise multiple reports made by "run"
	190	test Run small built-in test dataset
	191	version Get versions and exit
	192
	193	Please read the [ARIBA wiki page][ARIBA wiki] for full usage instructions.
	194
	195	## License
	196	ARIBA is free software, licensed under [GPLv3](https://github.com/sanger-pathogens/ariba/blob/master/LICENSE).
	197
	198	## Feedback/Issues
	199	Please report any issues to the [issues page](https://github.com/sanger-pathogens/ariba/issues) or email ariba-help@sanger.ac.uk
	200
	201	## Citation
	202	If you use this software please cite:
	203
	204	ARIBA: rapid antimicrobial resistance genotyping directly from sequencing reads
	205	Hunt M, Mather AE, Sánchez-Busó L, Page AJ, Parkhill J , Keane JA, Harris SR.
	206	Microbial Genomics 2017. doi: [110.1099/mgen.0.000131](http://mgen.microbiologyresearch.org/content/journal/mgen/10.1099/mgen.0.000131)
	207
140	208
141	209	[ariba biorxiv]: http://biorxiv.org/content/early/2017/04/07/118000
142	210	[bowtie2]: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml

+112

-2

ariba/assembly.py less more

4	4	import pymummer
5	5	import fermilite_ariba
6	6	from ariba import common, faidx, mapping, bam_parse, external_progs, ref_seq_chooser
	7	import shlex
7	8
8	9	class Error (Exception): pass
9	10

28	29	nucmer_breaklen=200,
29	30	extern_progs=None,
30	31	clean=True,
	32	spades_mode="wgs",
	33	spades_options=None,
	34	threads=1
31	35	):
32	36	self.reads1 = os.path.abspath(reads1)
33	37	self.reads2 = os.path.abspath(reads2)

49	53	self.nucmer_min_len = nucmer_min_len
50	54	self.nucmer_breaklen = nucmer_breaklen
51	55	self.clean = clean
	56	self.spades_mode = spades_mode
	57	self.spades_options = spades_options
	58	self.threads = threads
52	59
53	60	if extern_progs is None:
54	61	self.extern_progs = external_progs.ExternalProgs()

92	99
93	100	self.assembled_ok = (got_from_fermilite == 0)
94	101	os.chdir(cwd)
	102
	103	@staticmethod
	104	def _check_spades_log_file(logfile):
	105	'''SPAdes can fail with a strange error. Stop everything if this happens'''
	106	f = pyfastaq.utils.open_file_read(logfile)
	107
	108	for line in f:
	109	if line.startswith('== Error == system call for:') and line.rstrip().endswith('finished abnormally, err code: -7'):
	110	pyfastaq.utils.close(f)
	111	print('Error running SPAdes. Cannot continue. This is the error from the log file', logfile, '...', file=sys.stderr)
	112	print(line, file=sys.stderr)
	113	raise Error('Fatal error ("err code: -7") running spades. Cannot continue')
	114
	115	pyfastaq.utils.close(f)
	116	return True
	117
	118	def _assemble_with_spades(self):
	119	cwd = os.getcwd()
	120	self.assembled_ok = False
	121	try:
	122	try:
	123	os.chdir(self.working_dir)
	124	except:
	125	raise Error('Error chdir ' + self.working_dir)
	126	spades_exe = self.extern_progs.exe('spades')
	127	if not spades_exe:
	128	raise Error("Spades executable has not been found")
	129	spades_options = self.spades_options
	130	if spades_options is not None:
	131	spades_options = shlex.split(self.spades_options)
	132	if self.spades_mode == "rna":
	133	spades_options = ["--rna"] + (["-k","127"] if spades_options is None else spades_options)
	134	spades_out_seq_base = "transcripts.fasta"
	135	elif self.spades_mode == "sc":
	136	spades_options = ["--sc"] + (["-k", "33,55,77,99,127","--careful"] if spades_options is None else spades_options)
	137	spades_out_seq_base = "contigs.fasta"
	138	elif self.spades_mode == "wgs":
	139	spades_options = ["-k", "33,55,77,99,127","--careful"] if spades_options is None else spades_options
	140	spades_out_seq_base = "contigs.fasta"
	141	else:
	142	raise ValueError("Unknown spades_mode value: {}".format(self.spades_mode))
	143	asm_cmd = [spades_exe, "-t", str(self.threads), "--pe1-1", self.reads1, "--pe1-2", self.reads2, "-o", self.assembler_dir] + \
	144	spades_options
	145	asm_ok,err = common.syscall(asm_cmd, verbose=True, verbose_filehandle=self.log_fh, shell=False, allow_fail=True)
	146	if not asm_ok:
	147	print('Assembly finished with errors. These are the errors:', file=self.log_fh)
	148	print(err, file=self.log_fh)
	149	print('\nEnd of spades errors\n', file=self.log_fh)
	150	else:
	151
	152	spades_log = os.path.join(self.assembler_dir, 'spades.log')
	153	if os.path.exists(spades_log):
	154	self._check_spades_log_file(spades_log)
	155
	156	with open(spades_log) as f:
	157	print('\n______________ SPAdes log ___________________\n', file=self.log_fh)
	158	for line in f:
	159	print(line.rstrip(), file=self.log_fh)
	160	print('\n______________ End of SPAdes log _________________\n', file=self.log_fh)
	161
	162	spades_warnings = os.path.join(self.assembler_dir, 'warnings.log')
	163	if os.path.exists(spades_warnings):
	164	with open(spades_warnings) as f:
	165	print('\n______________ SPAdes warnings ___________________\n', file=self.log_fh)
	166	for line in f:
	167	print(line.rstrip(), file=self.log_fh)
	168	print('\n______________ End of SPAdes warnings _________________\n', file=self.log_fh)
	169
	170	## fermilight module generates contig names that look like `cluster_1.l15.c17.ctg.1` where 'cluster_1'==self.contig_name_prefix
	171	## the whole structure of the contig name is expected in several places downstream where it is parsed into individual components.
	172	## For example, it is parsed into to l and c parts in ref_seq_chooser (although the parts are not actually used).
	173	## This is the code from fermilight module that generates the contig ID string:
	174	## ofs << ">" << namePrefix << ".l" << overlap << ".c" << minCount << ".ctg." << i + 1 << '\n'
	175	##
	176	## We generate the same contig name structure here using dummy values for overlap and minCount, in order
	177	## to avoid distrupting the downstream code.
	178	## Note that the fermilight module generates multiple versions of the assembly on a grid of l and c values,
	179	## and ref_seq_chooser then picks a single "best" (l,c) version based on coverage/identity of the nucmer
	180	## alignment to the reference. Spades generates a single version of the assembly, so ref_seq_chooser
	181	## can only pick that one version.
	182
	183	spades_out_seq = os.path.join(self.assembler_dir,spades_out_seq_base)
	184	## No need really to use general-purpose pyfastaq.sequences.file_reader here and pay performance cost for
	185	## its multi-format line tests since we are only replacing the IDs in a pre-defined format
	186	if os.path.exists(spades_out_seq):
	187	with open(spades_out_seq,"r") as inp, open(self.all_assembly_contigs_fa,"w") as out:
	188	pref = self.contig_name_prefix
	189	i_cont = 0
	190	for line in inp:
	191	if line.startswith(">"):
	192	i_cont += 1
	193	line = ">{}.l15.c17.ctg.{}\n".format(pref,i_cont)
	194	out.write(line)
	195	if i_cont > 0:
	196	self.assembled_ok = True
	197	if self.clean:
	198	print('Deleting assembly directory', self.assembler_dir, file=self.log_fh)
	199	shutil.rmtree(self.assembler_dir,ignore_errors=True)
	200	finally:
	201	os.chdir(cwd)
95	202
96	203
97	204	@staticmethod

147	254
148	255
149	256	def run(self):
150		self._assemble_with_fermilite()
	257	if self.assembler == 'fermilite':
	258	self._assemble_with_fermilite()
	259	elif self.assembler == "spades":
	260	self._assemble_with_spades()
151	261	print('Finished running assemblies', flush=True, file=self.log_fh)
152	262	self.sequences = {}
153	263

207	317	self.reads2,
208	318	self.final_assembly_fa,
209	319	self.final_assembly_bam[:-4],
210		threads=1,
	320	threads=self.threads,
211	321	sort=True,
212	322	bowtie2=self.extern_progs.exe('bowtie2'),
213	323	bowtie2_version=self.extern_progs.version('bowtie2'),

+68

-31

ariba/cluster.py less more

42	42	max_allele_freq=0.90,
43	43	unique_threshold=0.03,
44	44	max_gene_nt_extend=30,
45		spades_other_options=None,
	45	spades_mode="rna", #["rna","wgs"]
	46	spades_options=None,
46	47	clean=True,
47	48	extern_progs=None,
48	49	random_seed=42,
	50	threads_total=1
49	51	):
50	52	self.root_dir = os.path.abspath(root_dir)
51	53	self.read_store = read_store

70	72	self.sspace_k = sspace_k
71	73	self.sspace_sd = sspace_sd
72	74	self.reads_insert = reads_insert
73		self.spades_other_options = spades_other_options
	75	self.spades_mode = spades_mode
	76	self.spades_options = spades_options
74	77
75	78	self.reads_for_assembly1 = os.path.join(self.root_dir, 'reads_for_assembly_1.fq')
76	79	self.reads_for_assembly2 = os.path.join(self.root_dir, 'reads_for_assembly_2.fq')

94	97	self.max_gene_nt_extend = max_gene_nt_extend
95	98	self.status_flag = flag.Flag()
96	99	self.clean = clean
	100
	101	self.threads_total = threads_total
	102	self.remaining_clusters = None
97	103
98	104	self.assembly_dir = os.path.join(self.root_dir, 'Assembly')
99	105	self.final_assembly_fa = os.path.join(self.root_dir, 'assembly.fa')

137	143	for s in wanted_signals:
138	144	signal.signal(s, self._receive_signal)
139	145
	146	def _update_threads(self):
	147	"""Update available thread count post-construction.
	148	To be called any number of times from run() method"""
	149	if self.remaining_clusters is not None:
	150	self.threads = max(1,self.threads_total//self.remaining_clusters.value)
	151	#otherwise just keep the current (initial) value
	152	print("{} detected {} threads available to it".format(self.name,self.threads), file = self.log_fh)
	153
	154	def _report_completion(self):
	155	"""Update shared counters to signal that we are done with this cluster.
	156	Call just before exiting run() method (in a finally clause)"""
	157	rem_clust = self.remaining_clusters
	158	if rem_clust is not None:
	159	# -= is non-atomic, need to acquire a lock
	160	with self.remaining_clusters_lock:
	161	rem_clust.value -= 1
	162	# we do not need this object anymore
	163	self.remaining_clusters = None
	164	print("{} reported completion".format(self.name), file=self.log_fh)
140	165
141	166	def _atexit(self):
142	167	if self.log_fh is not None:
143	168	pyfastaq.utils.close(self.log_fh)
144	169	self.log_fh = None
145
146	170
147	171	def _receive_signal(self, signum, stack):
148	172	print('Signal', signum, 'received in cluster', self.name + '... Stopping!', file=sys.stderr, flush=True)

189	213	def _clean_file(self, filename):
190	214	if self.clean:
191	215	print('Deleting file', filename, file=self.log_fh)
192		os.unlink(filename)
	216	try: #protect against OSError: [Errno 16] Device or resource busy: '.nfs0000000010f0f04f000003c9' and such
	217	os.unlink(filename)
	218	except:
	219	pass
193	220
194	221
195	222	def _clean(self):

268	295	return total_reads
269	296
270	297
271		def run(self):
272		self._set_up_input_files()
273
274		for fname in [self.all_reads1, self.all_reads2, self.references_fa]:
275		if not os.path.exists(fname):
276		raise Error('File ' + fname + ' not found. Cannot continue')
277
278		original_dir = os.getcwd()
279		os.chdir(self.root_dir)
280
	298	def run(self,remaining_clusters=None,remaining_clusters_lock=None):
281	299	try:
282		self._run()
283		except Error as err:
	300	self.remaining_clusters = remaining_clusters
	301	self.remaining_clusters_lock = remaining_clusters_lock
	302	self._update_threads()
	303	self._set_up_input_files()
	304
	305	for fname in [self.all_reads1, self.all_reads2, self.references_fa]:
	306	if not os.path.exists(fname):
	307	raise Error('File ' + fname + ' not found. Cannot continue')
	308
	309	original_dir = os.getcwd()
	310	os.chdir(self.root_dir)
	311
	312	try:
	313	self._run()
	314	except Error as err:
	315	os.chdir(original_dir)
	316	print('Error running cluster! Error was:', err, sep='\n', file=self.log_fh)
	317	pyfastaq.utils.close(self.log_fh)
	318	self.log_fh = None
	319	raise Error('Error running cluster ' + self.name + '!')
	320
284	321	os.chdir(original_dir)
285		print('Error running cluster! Error was:', err, sep='\n', file=self.log_fh)
	322	print('Finished', file=self.log_fh, flush=True)
	323	print('{:_^79}'.format(' LOG FILE END ' + self.name + ' '), file=self.log_fh, flush=True)
	324
	325	# This stops multiprocessing complaining with the error:
	326	# multiprocessing.pool.MaybeEncodingError: Error sending result: '[<ariba.cluster.Cluster object at 0x7ffa50f8bcd0>]'. Reason: 'TypeError("cannot serialize '_io.TextIOWrapper' object",)'
286	327	pyfastaq.utils.close(self.log_fh)
287	328	self.log_fh = None
288		raise Error('Error running cluster ' + self.name + '!')
289
290		os.chdir(original_dir)
291		print('Finished', file=self.log_fh, flush=True)
292		print('{:_^79}'.format(' LOG FILE END ' + self.name + ' '), file=self.log_fh, flush=True)
293
294		# This stops multiprocessing complaining with the error:
295		# multiprocessing.pool.MaybeEncodingError: Error sending result: '[<ariba.cluster.Cluster object at 0x7ffa50f8bcd0>]'. Reason: 'TypeError("cannot serialize '_io.TextIOWrapper' object",)'
296		pyfastaq.utils.close(self.log_fh)
297		self.log_fh = None
	329	finally:
	330	self._report_completion()
298	331
299	332
300	333	def _run(self):

309	342	print('\nUsing', made_reads, 'from a total of', self.total_reads, 'for assembly.', file=self.log_fh, flush=True)
310	343	print('Assembling reads:', file=self.log_fh, flush=True)
311	344
	345	self._update_threads()
312	346	self.assembly = assembly.Assembly(
313	347	self.reads_for_assembly1,
314	348	self.reads_for_assembly2,

322	356	contig_name_prefix=self.name,
323	357	assembler=self.assembler,
324	358	extern_progs=self.extern_progs,
325		clean=self.clean
	359	clean=self.clean,
	360	spades_mode=self.spades_mode,
	361	spades_options=self.spades_options,
	362	threads=self.threads
326	363	)
327	364
328	365	self.assembly.run()

331	368	self._clean_file(self.reads_for_assembly2)
332	369	if self.clean:
333	370	print('Deleting Assembly directory', self.assembly_dir, file=self.log_fh, flush=True)
334		shutil.rmtree(self.assembly_dir)
	371	shutil.rmtree(self.assembly_dir,ignore_errors=True)
335	372
336	373
337	374	if self.assembled_ok and self.assembly.ref_seq_name is not None:

341	378	self.is_variant_only = '1' if is_variant_only else '0'
342	379
343	380	print('\nAssembly was successful\n\nMapping reads to assembly:', file=self.log_fh, flush=True)
344
	381	self._update_threads()
345	382	mapping.run_bowtie2(
346	383	self.all_reads1,
347	384	self.all_reads2,
348	385	self.final_assembly_fa,
349	386	self.final_assembly_bam[:-4],
350		threads=1,
	387	threads=self.threads,
351	388	sort=True,
352	389	bowtie2=self.extern_progs.exe('bowtie2'),
353	390	bowtie2_preset='very-sensitive-local',

+134

-77

ariba/clusters.py less more

13	13
14	14	class Error (Exception): pass
15	15
16
17		def _run_cluster(obj, verbose, clean, fails_dir):
	16	# passing shared objects (remaining_clusters) through here and thus making them
	17	# explicit arguments to Pool.startmap when running this function. That seems to be
	18	# a recommended safe transfer mechanism as opposed making them attributes of a
	19	# pre-constructed 'obj' variable (although the docs are a bit hazy on that)
	20	def _run_cluster(obj, verbose, clean, fails_dir, remaining_clusters, remaining_clusters_lock):
18	21	failed_clusters = os.listdir(fails_dir)
19	22
20	23	if len(failed_clusters) > 0:

24	27	if verbose:
25	28	print('Start running cluster', obj.name, 'in directory', obj.root_dir, flush=True)
26	29	try:
27		obj.run()
	30	obj.run(remaining_clusters=remaining_clusters,remaining_clusters_lock=remaining_clusters_lock)
28	31	except:
29	32	print('Failed cluster:', obj.name, file=sys.stderr)
30	33	with open(os.path.join(fails_dir, obj.name), 'w'):

37	40	if verbose:
38	41	print('Deleting cluster dir', obj.root_dir, flush=True)
39	42	if os.path.exists(obj.root_dir):
40		shutil.rmtree(obj.root_dir)
	43	try:
	44	shutil.rmtree(obj.root_dir)
	45	except:
	46	pass
41	47
42	48	return obj
43	49

55	61	threads=1,
56	62	verbose=False,
57	63	assembler='fermilite',
58		spades_other=None,
	64	spades_mode='rna',
	65	spades_options=None,
59	66	max_insert=1000,
60	67	min_scaff_depth=10,
61	68	nucmer_min_id=90,

85	92	self.logs_dir = os.path.join(self.outdir, 'Logs')
86	93
87	94	self.assembler = assembler
88		assert self.assembler in ['fermilite']
89	95	self.assembly_kmer = assembly_kmer
90	96	self.assembly_coverage = assembly_coverage
91		self.spades_other = spades_other
	97	self.spades_mode = spades_mode
	98	self.spades_options = spades_options
92	99
93	100	self.cdhit_files_prefix = os.path.join(self.refdata_dir, 'cdhit')
94	101	self.cdhit_cluster_representatives_fa = self.cdhit_files_prefix + '.cluster_representatives.fa'

134	141	os.mkdir(d)
135	142	except:
136	143	raise Error('Error mkdir ' + d)
137
138	144	if tmp_dir is None:
139	145	if 'ARIBA_TMPDIR' in os.environ:
140	146	tmp_dir = os.path.abspath(os.environ['ARIBA_TMPDIR'])

371	377	counter = 0
372	378	cluster_list = []
373	379	self.log_files = []
	380
	381	# How the thread count withing each Cluster.run is managed:
	382	# We want to handle those cases where there are more total threads allocated to the application than there are clusters
	383	# remaining to run (for example,
	384	# there are only two references, and eight threads). If we keep the default thread value of 1 in cluster. Cluster,
	385	# then we will be wasting the allocated threads. The most simple approach would be to divide all threads equally between clusters
	386	# before calling Pool.map. Multithreaded external programs like Spades and Bowtie2 are then called with multiple threads. That should
	387	# never be slower than keeping just one thread in cluster.Cluster, except maybe in the extreme cases when (if)
	388	# a multi-threaded run of the external program takes longer wall-clock time than a single-threaded one.
	389	# However, this solution would always keep
	390	# Cluster.threads=1 if the initial number of clusters > number of total threads. This can result in inefficiency at the
	391	# tail of the Pool.map execution flow - when the clusters are getting finished overall, we are waiting for the completion of
	392	# fewer and fewer remaining
	393	# single-threaded cluster tasks while more and more total threads are staying idle. We mitigate this through the following approach:
	394	# - Create a shared Value object that holds the number of remaining clusters (remaining_clusters).
	395	# - Each Cluster.run decrements the remaining_clusters when it completes
	396	# - Cluster.run sets its own thread count to max(1,threads_total//remaining_clusters). This can be done as many times
	397	# as needed at various points within Cluster.run (e.g. once before Spades is called, and again before Bowtie2 is called),
	398	# in order to catch more idle threads.
	399	# This is a simple and conservative approach to adaptively use all threads at the tail of the map flow. It
	400	# never over-subscribes the threads, and it does not require any extra blocking within Cluster.run in order to
	401	# wait for threads becoming available.
374	402
375	403	for cluster_name in sorted(self.cluster_to_dir):
376	404	counter += 1

405	433	reads_insert=self.insert_size,
406	434	sspace_k=self.min_scaff_depth,
407	435	sspace_sd=self.insert_sspace_sd,
408		threads=1, # clusters now run in parallel, so this should always be 1!
	436	threads=1, # initially set to 1, then will adaptively self-modify while running
409	437	assembled_threshold=self.assembled_threshold,
410	438	unique_threshold=self.unique_threshold,
411	439	max_gene_nt_extend=self.max_gene_nt_extend,
412		spades_other_options=self.spades_other,
	440	spades_mode=self.spades_mode,
	441	spades_options=self.spades_options,
413	442	clean=self.clean,
414	443	extern_progs=self.extern_progs,
	444	threads_total=self.threads
415	445	))
416
	446	# Here is why we use proxy objects from a Manager process below
	447	# instead of simple shared multiprocessing.Value counter:
	448	# Shared memory objects in multiprocessing use tempfile module to
	449	# create temporary directory, then create temporary file inside it,
	450	# memmap the file and unlink it. If TMPDIR envar points to a NFS
	451	# mount, the final cleanup handler from multiprocessing will often
	452	# return an exception due to a stale NFS file (.nfsxxxx) from a shutil.rmtree
	453	# call. See help on tempfile.gettempdir() for how the default location of
	454	# temporary files is selected. The exception is caught in except clause
	455	# inside multiprocessing cleanup, and only a harmless traceback is printed,
	456	# but it looks very spooky to the user and causes confusion. We use
	457	# instead shared proxies from the Manager. Those do not rely on shared
	458	# memory, and thus bypass the NFS issues. The counter is accesses infrequently
	459	# relative to computations, so the performance does not suffer.
	460	# default authkey in the manager will be some generated random-looking string
	461	manager = multiprocessing.Manager()
	462	remaining_clusters = manager.Value('l',len(cluster_list))
	463	# manager.Value does not provide access to the internal RLock that we need for
	464	# implementing atomic -=, so we need to carry around a separate RLock object.
	465	remaining_clusters_lock = manager.RLock()
417	466	try:
418	467	if self.threads > 1:
419	468	self.pool = multiprocessing.Pool(self.threads)
420		cluster_list = self.pool.starmap(_run_cluster, zip(cluster_list, itertools.repeat(self.verbose), itertools.repeat(self.clean), itertools.repeat(self.fails_dir)))
	469	cluster_list = self.pool.starmap(_run_cluster, zip(cluster_list, itertools.repeat(self.verbose), itertools.repeat(self.clean), itertools.repeat(self.fails_dir),
	470	itertools.repeat(remaining_clusters),itertools.repeat(remaining_clusters_lock)))
	471	# harvest the pool as soon as we no longer need it
	472	self.pool.close()
	473	self.pool.join()
421	474	else:
422	475	for c in cluster_list:
423		_run_cluster(c, self.verbose, self.clean, self.fails_dir)
	476	_run_cluster(c, self.verbose, self.clean, self.fails_dir, remaining_clusters, remaining_clusters_lock)
424	477	except:
425	478	self.clusters_all_ran_ok = False
	479
	480	if self.verbose:
	481	print('Final value of remaining_clusters counter:', remaining_clusters)
	482	remaining_clusters = None
	483	remaining_clusters_lock = None
	484	manager.shutdown()
426	485
427	486	if len(os.listdir(self.fails_dir)) > 0:
428	487	self.clusters_all_ran_ok = False

497	556
498	557	def _clean(self):
499	558	if self.clean:
500		shutil.rmtree(self.fails_dir)
	559	shutil.rmtree(self.fails_dir,ignore_errors=True)
501	560
502	561	try:
503	562	self.tmp_dir_obj.cleanup()

506	565
507	566	if self.verbose:
508	567	print('Deleting Logs directory', self.logs_dir)
509		try:
510		shutil.rmtree(self.logs_dir)
511		except:
512		pass
	568	shutil.rmtree(self.logs_dir,ignore_errors=True)
513	569
514	570	try:
515	571	if self.verbose:

550	606
551	607	def _run(self):
552	608	cwd = os.getcwd()
553		os.chdir(self.outdir)
554		self.write_versions_file(cwd)
555		self._map_and_cluster_reads()
556		self.log_files = None
557
558		if len(self.cluster_to_dir) > 0:
559		got_insert_data_ok = self._set_insert_size_data()
560		if not got_insert_data_ok:
561		print('WARNING: not enough proper read pairs (found ' + str(self.proper_pairs) + ') to determine insert size.', file=sys.stderr)
562		print('This probably means that very few reads were mapped at all. No local assemblies will be run', file=sys.stderr)
563		if self.verbose:
564		print('Not enough proper read pairs mapped to determine insert size. Skipping all assemblies.', flush=True)
	609	try:
	610	os.chdir(self.outdir)
	611	self.write_versions_file(cwd)
	612	self._map_and_cluster_reads()
	613	self.log_files = None
	614
	615	if len(self.cluster_to_dir) > 0:
	616	got_insert_data_ok = self._set_insert_size_data()
	617	if not got_insert_data_ok:
	618	print('WARNING: not enough proper read pairs (found ' + str(self.proper_pairs) + ') to determine insert size.', file=sys.stderr)
	619	print('This probably means that very few reads were mapped at all. No local assemblies will be run', file=sys.stderr)
	620	if self.verbose:
	621	print('Not enough proper read pairs mapped to determine insert size. Skipping all assemblies.', flush=True)
	622	else:
	623	if self.verbose:
	624	print('{:_^79}'.format(' Assembling each cluster '))
	625	print('Will run', self.threads, 'cluster(s) in parallel', flush=True)
	626	self._init_and_run_clusters()
	627	if self.verbose:
	628	print('Finished assembling clusters\n')
565	629	else:
566	630	if self.verbose:
567		print('{:_^79}'.format(' Assembling each cluster '))
568		print('Will run', self.threads, 'cluster(s) in parallel', flush=True)
569		self._init_and_run_clusters()
	631	print('No reads mapped. Skipping all assemblies', flush=True)
	632	print('WARNING: no reads mapped to reference genes. Therefore no local assemblies will be run', file=sys.stderr)
	633
	634	if not self.clusters_all_ran_ok:
	635	raise Error('At least one cluster failed! Stopping...')
	636
	637	if self.verbose:
	638	print('{:_^79}'.format(' Writing reports '), flush=True)
	639	print('Making', self.report_file_all_tsv)
	640	self._write_report(self.clusters, self.report_file_all_tsv)
	641
	642	if self.verbose:
	643	print('Making', self.report_file_filtered)
	644	rf = report_filter.ReportFilter(infile=self.report_file_all_tsv)
	645	rf.run(self.report_file_filtered)
	646
	647	if self.verbose:
	648	print()
	649	print('{:_^79}'.format(' Writing fasta of assembled sequences '), flush=True)
	650	print(self.catted_assembled_seqs_fasta, 'and', self.catted_genes_matching_refs_fasta, flush=True)
	651	self._write_catted_assembled_seqs_fasta(self.catted_assembled_seqs_fasta)
	652	self._write_catted_genes_matching_refs_fasta(self.catted_genes_matching_refs_fasta)
	653	self._write_catted_assemblies_fasta(self.catted_assemblies_fasta)
	654
	655	if self.log_files is not None:
	656	clusters_log_file = os.path.join(self.outdir, 'log.clusters.gz')
570	657	if self.verbose:
571		print('Finished assembling clusters\n')
572		else:
573		if self.verbose:
574		print('No reads mapped. Skipping all assemblies', flush=True)
575		print('WARNING: no reads mapped to reference genes. Therefore no local assemblies will be run', file=sys.stderr)
576
577		if not self.clusters_all_ran_ok:
578		raise Error('At least one cluster failed! Stopping...')
579
580		if self.verbose:
581		print('{:_^79}'.format(' Writing reports '), flush=True)
582		print('Making', self.report_file_all_tsv)
583		self._write_report(self.clusters, self.report_file_all_tsv)
584
585		if self.verbose:
586		print('Making', self.report_file_filtered)
587		rf = report_filter.ReportFilter(infile=self.report_file_all_tsv)
588		rf.run(self.report_file_filtered)
589
590		if self.verbose:
591		print()
592		print('{:_^79}'.format(' Writing fasta of assembled sequences '), flush=True)
593		print(self.catted_assembled_seqs_fasta, 'and', self.catted_genes_matching_refs_fasta, flush=True)
594		self._write_catted_assembled_seqs_fasta(self.catted_assembled_seqs_fasta)
595		self._write_catted_genes_matching_refs_fasta(self.catted_genes_matching_refs_fasta)
596		self._write_catted_assemblies_fasta(self.catted_assemblies_fasta)
597
598		if self.log_files is not None:
599		clusters_log_file = os.path.join(self.outdir, 'log.clusters.gz')
	658	print()
	659	print('{:_^79}'.format(' Catting cluster log files '), flush=True)
	660	print('Writing file', clusters_log_file, flush=True)
	661	common.cat_files(self.log_files, clusters_log_file)
	662
600	663	if self.verbose:
601	664	print()
602		print('{:_^79}'.format(' Catting cluster log files '), flush=True)
603		print('Writing file', clusters_log_file, flush=True)
604		common.cat_files(self.log_files, clusters_log_file)
605
606		if self.verbose:
607		print()
608		print('{:_^79}'.format(' Cleaning files '), flush=True)
609		self._clean()
610
611		Clusters._write_mlst_reports(self.mlst_profile_file, self.report_file_filtered, self.mlst_reports_prefix, verbose=self.verbose)
612
613		if self.clusters_all_ran_ok and self.verbose:
614		print('\nAll done!\n')
615
616		os.chdir(cwd)
	665	print('{:_^79}'.format(' Cleaning files '), flush=True)
	666	self._clean()
	667
	668	Clusters._write_mlst_reports(self.mlst_profile_file, self.report_file_filtered, self.mlst_reports_prefix, verbose=self.verbose)
	669
	670	if self.clusters_all_ran_ok and self.verbose:
	671	print('\nAll done!\n')
	672	finally:
	673	os.chdir(cwd)

+7

-3

ariba/common.py less more

8	8	class Error (Exception): pass
9	9
10	10
11		def syscall(cmd, allow_fail=False, verbose=False, verbose_filehandle=sys.stdout, print_errors=True):
	11	def syscall(cmd, allow_fail=False, verbose=False, verbose_filehandle=sys.stdout, print_errors=True, shell=True):
12	12	if verbose:
13	13	print('syscall:', cmd, flush=True, file=verbose_filehandle)
	14	if not shell:
	15	print('syscall string:', " ".join('"{}"'.format(_) for _ in cmd), flush=True, file=verbose_filehandle)
14	16	try:
15		subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT)
	17	subprocess.check_output(cmd, shell=shell, stderr=subprocess.STDOUT)
16	18	except subprocess.CalledProcessError as error:
17	19	errors = error.output.decode()
18	20	if print_errors:

25	27	return False, errors
26	28	else:
27	29	sys.exit(1)
28
	30	except Exception as msg:
	31	print("Unexpected exception: ", msg, file=sys.stderr)
	32	raise
29	33	return True, None
30	34
31	35

+13

-3

ariba/external_progs.py less more

12	12	'bowtie2': 'bowtie2',
13	13	'cdhit': 'cd-hit-est',
14	14	'nucmer' : 'nucmer',
	15	'spades' : 'spades.py'
15	16	}
16	17
17	18

22	23	'bowtie2': ('--version', re.compile('.bowtie2.version (.*)$')),
23	24	'cdhit': ('', re.compile('CD-HIT version ([0-9\.]+) \(')),
24	25	'nucmer': ('--version', re.compile('^NUCmer $NUCleotide MUMmer$ version ([0-9\.]+)')),
	26	'spades': ('--version', re.compile('SPAdes\s+v([0-9\.]+)'))
25	27	}
26	28
27	29

29	31	'bowtie2': '2.1.0',
30	32	'cdhit': '4.6',
31	33	'nucmer': '3.1',
	34	'spades': '3.11.0'
32	35	}
33	36
	37	prog_optional = set([
	38	'spades'
	39	])
34	40
35	41	class ExternalProgs:
36	42	def __init__(self, verbose=False, fail_on_error=True):

46	52	warnings = []
47	53
48	54	for prog in sorted(prog_to_default):
	55	msg_sink = errors
	56	if prog in prog_optional:
	57	msg_sink = warnings
	58
49	59	prog_exe = self._get_exe(prog)
50	60	self.progs[prog] = shutil.which(prog_exe)
51	61
52	62	if self.progs[prog] is None:
53		errors.append(prog + ' not found in path. Looked for ' + prog_exe)
	63	msg_sink.append(prog + ' not found in path. Looked for ' + prog_exe)
54	64
55	65	self.version_report.append('\t'.join([prog, 'NA', 'NOT_FOUND']))
56	66	if verbose:

62	72	if got_version:
63	73	self.versions[prog] = version
64	74	if prog in min_versions and LooseVersion(version) < LooseVersion(min_versions[prog]):
65		errors.append(' '.join(['Found version', version, 'of', prog, 'which is too low! Please update to at least', min_versions[prog] + '. Found it here:', prog_exe]))
	75	msg_sink.append(' '.join(['Found version', version, 'of', prog, 'which is too low! Please update to at least', min_versions[prog] + '. Found it here:', prog_exe]))
66	76	else:
67	77	self.versions[prog] = None
68		errors.append(version)
	78	msg_sink.append(version)
69	79	version = 'ERROR'
70	80
71	81	self.version_report.append('\t'.join([prog, version, self.progs[prog]]))

+2

-1

ariba/mapping.py less more

0	0	import os
1	1	import sys
	2	from distutils.version import LooseVersion
2	3	import pysam
3	4	import pyfastaq
4	5	from ariba import common

81	82	'-2', reads_rev,
82	83	]
83	84
84		if bowtie2_version == '2.3.1':
	85	if LooseVersion(bowtie2_version) >= LooseVersion('2.3.1'):
85	86	map_cmd.append('--score-min G,1,10')
86	87
87	88	if remove_both_unmapped:

+2

-0

ariba/mic_plotter.py less more

4	4	import os
5	5	import itertools
6	6	import collections
	7	import matplotlib
	8	matplotlib.use('Agg')
7	9	import matplotlib.pyplot as plt
8	10	import matplotlib.gridspec as gridspec
9	11	import matplotlib.cm as cmx

+1

-1

ariba/mlst_reporter.py less more

46	46	depths = [int(x) for x in d['smtls_nts_depth'].split(',')]
47	47	depths.sort()
48	48	het_pc = round(100.0 * depths[-1] / sum(depths), 2)
49		if results['hetmin'] == '.' or results['hetmin'] < het_pc:
	49	if results['hetmin'] == '.' or results['hetmin'] > het_pc:
50	50	results['hetmin'] = het_pc
51	51	if len(het_data):
52	52	results['hets'] = '.'.join(het_data)

+3

-3

ariba/ref_genes_getter.py less more

40	40	def _get_card_versions(self, tmp_file):
41	41	print('Getting available CARD versions')
42	42	common.download_file('https://card.mcmaster.ca/download', tmp_file, max_attempts=self.max_download_attempts, sleep_time=self.sleep_time, verbose=True)
43		p = re.compile(r'''href="(/download/.?broad.?v([0-9]+\.[0-9]+\.[0-9]+)\.tar\.gz)"''')
	43	p = re.compile(r'''href="(/download/.?broad.?v([0-9]+\.[0-9]+\.[0-9]+)\.tar\.(gz\|bz2))"''')
44	44	versions = {}
45	45
46	46	with open(tmp_file) as f:

84	84
85	85	print('Getting version', self.version)
86	86	card_tarball_url = versions[key]
87		card_tarball = 'card.tar.gz'
	87	card_tarball = 'card.tar.bz2'
88	88	print('Working in temporary directory', tmpdir)
89	89	print('Downloading data from card:', card_tarball_url, flush=True)
90	90	common.syscall('wget -O ' + card_tarball + ' ' + card_tarball_url, verbose=True)

114	114	for gene_key, gene_dict in sorted(json_data.items()):
115	115	crecord = card_record.CardRecord(gene_dict)
116	116	data = crecord.get_data()
	117	data['ARO_description'] = data['ARO_description'].encode('utf-8')
117	118	fasta_name_prefix = '.'.join([
118	119	card_record.CardRecord._ARO_name_to_fasta_name(data['ARO_name']),
119	120	data['ARO_accession'],

479	480
480	481	def run(self, outprefix):
481	482	exec('self._get_from_' + self.ref_db + '(outprefix)')
482

+1

-1

ariba/report_filter.py less more

60	60
61	61	@staticmethod
62	62	def _load_report(infile):
63		'''Loads report file into a dictionary. Key=refrence name.
	63	'''Loads report file into a dictionary. Key=reference name.
64	64	Value = list of report lines for that reference'''
65	65	report_dict = {}
66	66	f = pyfastaq.utils.open_file_read(infile)

+3

-1

ariba/tasks/run.py less more

46	46	extern_progs,
47	47	version_report_lines=version_report_lines,
48	48	assembly_coverage=options.assembly_cov,
49		assembler='fermilite',
	49	assembler=options.assembler,
50	50	threads=options.threads,
51	51	verbose=options.verbose,
52	52	min_scaff_depth=options.min_scaff_depth,

58	58	max_gene_nt_extend=options.gene_nt_extend,
59	59	clean=(not options.noclean),
60	60	tmp_dir=options.tmp_dir,
	61	spades_mode=options.spades_mode,
	62	spades_options=options.spades_options
61	63	)
62	64	c.run()
63	65

+47

-1

ariba/tests/assembly_test.py less more

4	4	import filecmp
5	5	import pyfastaq
6	6	from ariba import assembly
	7	from ariba import external_progs
7	8
8	9	modules_dir = os.path.dirname(os.path.abspath(assembly.__file__))
9	10	data_dir = os.path.join(modules_dir, 'tests', 'data')
10
	11	extern_progs = external_progs.ExternalProgs()
11	12
12	13	class TestAssembly(unittest.TestCase):
13	14	def test_run_fermilite(self):

101	102	os.unlink(bam + '.unmapped_mates')
102	103	os.unlink(bam + '.scaff')
103	104
	105	def test_check_spades_log_file(self):
	106	'''test _check_spades_log_file'''
	107	good_file = os.path.join(data_dir, 'assembly_test_check_spades_log_file.log.good')
	108	bad_file = os.path.join(data_dir, 'assembly_test_check_spades_log_file.log.bad')
	109	self.assertTrue(assembly.Assembly._check_spades_log_file(good_file))
	110	with self.assertRaises(assembly.Error):
	111	self.assertTrue(assembly.Assembly._check_spades_log_file(bad_file))
	112
	113	@unittest.skipUnless(extern_progs.exe('spades'), "Spades assembler is optional and is not configured")
	114	def test_assemble_with_spades(self):
	115	'''test _assemble_with_spades'''
	116	reads1 = os.path.join(data_dir, 'assembly_test_assemble_with_spades_reads_1.fq')
	117	reads2 = os.path.join(data_dir, 'assembly_test_assemble_with_spades_reads_2.fq')
	118	tmp_dir = 'tmp.test_assemble_with_spades'
	119	tmp_log = 'tmp.test_assemble_with_spades.log'
	120	with open(tmp_log, 'w') as tmp_log_fh:
	121	print('First line', file=tmp_log_fh)
	122	shutil.rmtree(tmp_dir, ignore_errors=True)
	123	#using spades_options=" --only-assembler" because error correction cannot determine quality offset on this
	124	#artificial dataset
	125	a = assembly.Assembly(reads1, reads2, 'not needed', 'not needed', tmp_dir, 'not_needed_for_this_test.fa',
	126	'not_needed_for_this_test.bam', tmp_log_fh, 'not needed',
	127	assembler="spades", spades_options=" --only-assembler")
	128	a._assemble_with_spades()
	129	self.assertTrue(a.assembled_ok)
	130	shutil.rmtree(tmp_dir,ignore_errors=True)
	131	os.unlink(tmp_log)
	132
	133	@unittest.skipUnless(extern_progs.exe('spades'), "Spades assembler is optional and is not configured")
	134	def test_assemble_with_spades_fail(self):
	135	'''test _assemble_with_spades handles spades fail'''
	136	reads1 = os.path.join(data_dir, 'assembly_test_assemble_with_spades_fails_reads_1.fq')
	137	reads2 = os.path.join(data_dir, 'assembly_test_assemble_with_spades_fails_reads_2.fq')
	138	tmp_dir = 'tmp.test_assemble_with_spades_fail'
	139	tmp_log = 'tmp.test_assemble_with_spades_fail.log'
	140	with open(tmp_log, 'w') as tmp_log_fh:
	141	print('First line', file=tmp_log_fh)
	142	shutil.rmtree(tmp_dir, ignore_errors=True)
	143	a = assembly.Assembly(reads1, reads2, 'not needed', 'not needed', tmp_dir, 'not_needed_for_this_test.fa',
	144	'not_needed_for_this_test.bam', tmp_log_fh, 'not needed',
	145	assembler="spades", spades_options=" --only-assembler")
	146	a._assemble_with_spades()
	147	self.assertFalse(a.assembled_ok)
	148	shutil.rmtree(tmp_dir,ignore_errors=True)
	149	os.unlink(tmp_log)

+49

-23

ariba/tests/cluster_test.py less more

42	42	dirs = [os.path.join(data_dir, d) for d in dirs]
43	43	for d in dirs:
44	44	tmpdir = 'tmp.cluster_test_init_fail_files_missing'
	45	shutil.rmtree(tmpdir,ignore_errors=True)
45	46	shutil.copytree(d, tmpdir)
46	47	with self.assertRaises(cluster.Error):
47	48	cluster.Cluster(tmpdir, 'name', refdata=refdata, total_reads=42, total_reads_bases=4242)

99	100	tsv_in = os.path.join(data_dir, 'cluster_test_full_run_no_reads_after_filtering.in.tsv')
100	101	refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
101	102	tmpdir = 'tmp.test_full_run_no_reads_after_filtering'
	103	shutil.rmtree(tmpdir, ignore_errors=True)
102	104	shutil.copytree(os.path.join(data_dir, 'cluster_test_full_run_no_reads_after_filtering'), tmpdir)
103	105
104	106	c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=0, total_reads_bases=0)

117	119	tsv_in = os.path.join(data_dir, 'cluster_test_full_run_choose_ref_fail.in.tsv')
118	120	refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
119	121	tmpdir = 'tmp.test_full_run_choose_ref_fail'
	122	shutil.rmtree(tmpdir, ignore_errors=True)
120	123	shutil.copytree(os.path.join(data_dir, 'cluster_test_full_run_choose_ref_fail'), tmpdir)
121	124
122		c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=2, total_reads_bases=108, spades_other_options='--only-assembler')
	125	c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=2, total_reads_bases=108)
123	126	c.run()
124	127
125	128	expected = '\t'.join(['.', '.', '.', '.', '1024', '2', 'cluster_name'] + ['.'] * 24)

136	139	refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
137	140	tmpdir = 'tmp.test_full_run_ref_not_in_cluster'
138	141	all_refs_fa = os.path.join(data_dir, 'cluster_test_full_run_ref_not_in_cluster.all_refs.fa')
	142	shutil.rmtree(tmpdir, ignore_errors=True)
139	143	shutil.copytree(os.path.join(data_dir, 'cluster_test_full_run_ref_not_in_cluster'), tmpdir)
140	144
141		c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=72, total_reads_bases=3600, all_ref_seqs_fasta=all_refs_fa)
	145	c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=72, total_reads_bases=3600, all_ref_seqs_fasta=all_refs_fa)
142	146	c.run()
143	147
144	148	expected = '\t'.join(['.', '.', '.', '.', '1024', '72', 'cluster_name'] + ['.'] * 24)

154	158	tsv_in = os.path.join(data_dir, 'cluster_test_full_run_assembly_fail.in.tsv')
155	159	refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
156	160	tmpdir = 'tmp.test_full_run_assembly_fail'
	161	shutil.rmtree(tmpdir, ignore_errors=True)
157	162	shutil.copytree(os.path.join(data_dir, 'cluster_test_full_run_assembly_fail'), tmpdir)
158	163
159	164	c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=4, total_reads_bases=304)

172	177	tsv_in = os.path.join(data_dir, 'cluster_test_full_run_ok_non_coding.metadata.tsv')
173	178	refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
174	179	tmpdir = 'tmp.test_full_run_ok_non_coding'
	180	shutil.rmtree(tmpdir, ignore_errors=True)
175	181	shutil.copytree(os.path.join(data_dir, 'cluster_test_full_run_ok_non_coding'), tmpdir)
176	182
177		c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=72, total_reads_bases=3600)
	183	c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=72, total_reads_bases=3600)
178	184	c.run()
179	185
180	186	self.maxDiff=None

197	203	tsv_in = os.path.join(data_dir, 'cluster_test_full_run_ok_presence_absence.metadata.tsv')
198	204	refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
199	205	tmpdir = 'tmp.cluster_test_full_run_ok_presence_absence'
	206	shutil.rmtree(tmpdir, ignore_errors=True)
200	207	shutil.copytree(os.path.join(data_dir, 'cluster_test_full_run_ok_presence_absence'), tmpdir)
201	208
202		c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=64, total_reads_bases=3200)
	209	c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=64, total_reads_bases=3200)
203	210	c.run()
204	211
205	212	expected = [

219	226	tsv_in = os.path.join(data_dir, 'cluster_test_full_run_ok_variants_only.not_present.metadata.tsv')
220	227	refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
221	228	tmpdir = 'tmp.cluster_test_full_run_ok_variants_only.not_present'
	229	shutil.rmtree(tmpdir, ignore_errors=True)
222	230	shutil.copytree(os.path.join(data_dir, 'cluster_test_full_run_ok_variants_only'), tmpdir)
223	231
224		c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=66, total_reads_bases=3300)
	232	c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=66, total_reads_bases=3300)
225	233	c.run()
226	234	expected = [
227	235	'variants_only1\tvariants_only1\t1\t1\t27\t66\tcluster_name\t96\t96\t100.0\tcluster_name.l15.c30.ctg.1\t215\t15.3\t1\tSNP\tp\tR3S\t0\t.\t.\t7\t9\tCGC\t65\t67\tCGC\t18;18;19\tC;G;C\t18;18;19\tvariants_only1:1:1:R3S:.:Ref and assembly have wild type, so do not report\tGeneric description of variants_only1'

236	244	tsv_in = os.path.join(data_dir, 'cluster_full_run_varonly.not_present.always_report.tsv')
237	245	refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
238	246	tmpdir = 'tmp.cluster_full_run_varonly.not_present.always_report'
	247	shutil.rmtree(tmpdir, ignore_errors=True)
239	248	shutil.copytree(os.path.join(data_dir, 'cluster_test_full_run_ok_variants_only'), tmpdir)
240	249
241		c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=66, total_reads_bases=3300)
	250	c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=66, total_reads_bases=3300)
242	251	c.run()
243	252	expected = [
244	253	'variants_only1\tvariants_only1\t1\t1\t27\t66\tcluster_name\t96\t96\t100.0\tcluster_name.l15.c30.ctg.1\t215\t15.3\t1\tSNP\tp\tR3S\t0\t.\t.\t7\t9\tCGC\t65\t67\tCGC\t18;18;19\tC;G;C\t18;18;19\tvariants_only1:1:1:R3S:.:Ref and assembly have wild type, but always report anyway\tGeneric description of variants_only1'

253	262	tsv_in = os.path.join(data_dir, 'cluster_test_full_run_ok_variants_only.present.metadata.tsv')
254	263	refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
255	264	tmpdir = 'tmp.cluster_test_full_run_ok_variants_only.present'
	265	shutil.rmtree(tmpdir, ignore_errors=True)
256	266	shutil.copytree(os.path.join(data_dir, 'cluster_test_full_run_ok_variants_only'), tmpdir)
257	267
258		c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=66, total_reads_bases=3300)
	268	c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=66, total_reads_bases=3300)
259	269	c.run()
260	270
261	271	expected = [

272	282	tsv_in = os.path.join(data_dir, 'cluster_test_full_run_ok_gene_start_mismatch.metadata.tsv')
273	283	refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
274	284	tmpdir = 'tmp.cluster_test_full_run_ok_gene_start_mismatch'
	285	shutil.rmtree(tmpdir, ignore_errors=True)
275	286	shutil.copytree(os.path.join(data_dir, 'cluster_test_full_run_ok_gene_start_mismatch'), tmpdir)
276		c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=112, total_reads_bases=1080)
	287	c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=112, total_reads_bases=1080)
277	288	c.run()
278	289	expected = [
279	290	'gene\tgene\t1\t0\t27\t112\tcluster_name\t96\t96\t100.0\tcluster_name.l6.c30.ctg.1\t362\t27.8\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tGeneric description of gene'

288	299	tsv_in = os.path.join(data_dir, 'cluster_full_run_smtls_snp_presabs_gene.tsv')
289	300	refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
290	301	tmpdir = 'tmp.cluster_test_full_run_ok_samtools_snp_pres_abs_gene'
	302	shutil.rmtree(tmpdir, ignore_errors=True)
291	303	shutil.copytree(os.path.join(data_dir, 'cluster_full_run_smtls_snp_presabs_gene'), tmpdir)
292		c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=148, total_reads_bases=13320)
	304	c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=148, total_reads_bases=13320)
293	305	c.run()
294	306	expected = [
295	307	'ref_gene\tref_gene\t1\t0\t155\t148\tcluster_name\t96\t96\t100.0\tcluster_name.l15.c30.ctg.1\t335\t39.8\t0\tHET\t.\t.\t.\tG18A\t.\t18\t18\tG\t137\t137\tG\t63\tG,A\t32,31\t.\tGeneric description of ref_gene'

306	318	tsv_in = os.path.join(data_dir, 'cluster_full_run_smtls_snp_varonly_gene_2.tsv')
307	319	refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
308	320	tmpdir = 'tmp.cluster_full_run_smtls_snp_varonly_gene_2'
	321	shutil.rmtree(tmpdir, ignore_errors=True)
309	322	shutil.copytree(os.path.join(data_dir, 'cluster_full_run_smtls_snp_varonly_gene_2'), tmpdir)
310		c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=148, total_reads_bases=13320)
	323	c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=148, total_reads_bases=13320)
311	324	c.run()
312	325	expected = [
313	326	'ref_gene\tref_gene\t1\t1\t155\t148\tcluster_name\t96\t96\t100.0\tcluster_name.l15.c30.ctg.1\t335\t39.8\t0\tHET\t.\t.\t.\tG18A\t.\t18\t18\tG\t137\t137\tG\t63\tG,A\t32,31\t.\tGeneric description of ref_gene'

322	335	tsv_in = os.path.join(data_dir, 'cluster_full_run_known_smtls_snp_presabs_gene.tsv')
323	336	refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
324	337	tmpdir = 'tmp.cluster_test_full_run_ok_samtools_snp_known_position_pres_abs_gene'
	338	shutil.rmtree(tmpdir, ignore_errors=True)
325	339	shutil.copytree(os.path.join(data_dir, 'cluster_full_run_known_smtls_snp_presabs_gene'), tmpdir)
326		c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=148, total_reads_bases=13320)
	340	c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=148, total_reads_bases=13320)
327	341	c.run()
328	342
329	343	# We shouldn't get an extra 'HET' line because we already know about the snp, so

341	355	tsv_in = os.path.join(data_dir, 'cluster_full_run_smtls_snp_varonly_gene_no_snp.tsv')
342	356	refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
343	357	tmpdir = 'tmp.cluster_test_full_run_smtls_snp_varonly_gene_no_snp'
	358	shutil.rmtree(tmpdir, ignore_errors=True)
344	359	shutil.copytree(os.path.join(data_dir, 'cluster_full_run_smtls_snp_varonly_gene_no_snp'), tmpdir)
345		c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=148, total_reads_bases=13320)
	360	c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=148, total_reads_bases=13320)
346	361	c.run()
347	362
348	363	# We shouldn't get an extra 'HET' line because we already know about the snp, so

360	375	tsv_in = os.path.join(data_dir, 'cluster_full_run_smtls_snp_varonly_gene.tsv')
361	376	refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
362	377	tmpdir = 'tmp.cluster_test_full_run_ok_samtools_snp_known_position_var_only_gene_does_have_var'
	378	shutil.rmtree(tmpdir, ignore_errors=True)
363	379	shutil.copytree(os.path.join(data_dir, 'cluster_full_run_smtls_snp_varonly_gene'), tmpdir)
364		c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=148, total_reads_bases=13320)
	380	c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=148, total_reads_bases=13320)
365	381	c.run()
366	382
367	383	# We shouldn't get an extra 'HET' line because we already know about the snp, so

379	395	tsv_in = os.path.join(data_dir, 'cluster_full_run_smtls_snp_presabs_nonc.tsv')
380	396	refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
381	397	tmpdir = 'tmp.cluster_test_full_run_smtls_snp_presabs_nonc'
	398	shutil.rmtree(tmpdir, ignore_errors=True)
382	399	shutil.copytree(os.path.join(data_dir, 'cluster_full_run_smtls_snp_presabs_nonc'), tmpdir)
383		c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=148, total_reads_bases=13320)
	400	c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=148, total_reads_bases=13320)
384	401	c.run()
385	402	expected = [
386	403	'ref_seq\tref_seq\t0\t0\t147\t148\tcluster_name\t96\t96\t100.0\tcluster_name.l15.c30.ctg.1\t335\t39.8\t0\tHET\t.\t.\t.\tG18A\t.\t18\t18\tG\t137\t137\tG\t63\tG,A\t32,31\t.\tGeneric description of ref_seq'

395	412	tsv_in = os.path.join(data_dir, 'cluster_full_run_smtls_known_snp_presabs_nonc.tsv')
396	413	refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
397	414	tmpdir = 'tmp.cluster_test_full_run_smtls_known_snp_presabs_nonc'
	415	shutil.rmtree(tmpdir, ignore_errors=True)
398	416	shutil.copytree(os.path.join(data_dir, 'cluster_full_run_smtls_known_snp_presabs_nonc'), tmpdir)
399		c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=148, total_reads_bases=13320)
	417	c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=148, total_reads_bases=13320)
400	418	c.run()
401	419	expected = [
402	420	'ref_seq\tref_seq\t0\t0\t147\t148\tcluster_name\t96\t96\t100.0\tcluster_name.l15.c30.ctg.1\t335\t39.8\t1\tSNP\tn\tG18A\t0\t.\t.\t18\t18\tG\t137\t137\tG\t63\tG,A\t32,31\tref_seq:0:0:G18A:.:Description of G18A\tGeneric description of ref_seq'

411	429	tsv_in = os.path.join(data_dir, 'cluster_full_run_smtls_snp_varonly_nonc.tsv')
412	430	refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
413	431	tmpdir = 'tmp.cluster_full_run_smtls_snp_varonly_nonc'
	432	shutil.rmtree(tmpdir, ignore_errors=True)
414	433	shutil.copytree(os.path.join(data_dir, 'cluster_full_run_smtls_snp_varonly_nonc'), tmpdir)
415		c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=148, total_reads_bases=13320)
	434	c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=148, total_reads_bases=13320)
416	435	c.run()
417	436	expected = [
418	437	'ref_seq\tref_seq\t0\t1\t147\t148\tcluster_name\t96\t96\t100.0\tcluster_name.l15.c30.ctg.1\t335\t39.8\t0\tHET\t.\t.\t.\tG18A\t.\t18\t18\tG\t137\t137\tG\t63\tG,A\t32,31\t.\tGeneric description of ref_seq'

427	446	tsv_in = os.path.join(data_dir, 'cluster_full_run_known_smtls_snp_presabs_nonc.tsv')
428	447	refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
429	448	tmpdir = 'tmp.cluster_test_full_run_ok_samtools_snp_known_position_pres_abs_noncoding'
	449	shutil.rmtree(tmpdir, ignore_errors=True)
430	450	shutil.copytree(os.path.join(data_dir, 'cluster_full_run_known_smtls_snp_presabs_nonc'), tmpdir)
431		c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=148, total_reads_bases=13320)
	451	c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=148, total_reads_bases=13320)
432	452	c.run()
433	453
434	454	# We shouldn't get an extra 'HET' line because we already know about the snp, so

446	466	tsv_in = os.path.join(data_dir, 'cluster_full_run_smtls_snp_varonly_nonc_no_snp.tsv')
447	467	refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
448	468	tmpdir = 'tmp.cluster_test_full_run_ok_samtools_snp_known_position_var_only_noncoding'
	469	shutil.rmtree(tmpdir, ignore_errors=True)
449	470	shutil.copytree(os.path.join(data_dir, 'cluster_full_run_smtls_snp_varonly_nonc_no_snp'), tmpdir)
450		c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=148, total_reads_bases=13320)
	471	c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=148, total_reads_bases=13320)
451	472	c.run()
452	473
453	474	# We shouldn't get an extra 'HET' line because we already know about the snp, so

465	486	tsv_in = os.path.join(data_dir, 'cluster_test_full_run_smtls_snp_varonly_nonc.tsv')
466	487	refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
467	488	tmpdir = 'tmp.cluster_test_full_run_ok_samtools_snp_known_position_var_only_noncoding'
	489	shutil.rmtree(tmpdir, ignore_errors=True)
468	490	shutil.copytree(os.path.join(data_dir, 'cluster_test_full_run_smtls_snp_varonly_nonc'), tmpdir)
469		c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=148, total_reads_bases=13320)
	491	c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=148, total_reads_bases=13320)
470	492	c.run()
471	493
472	494	# We shouldn't get an extra 'HET' line because we already know about the snp, so

484	506	tsv_in = os.path.join(data_dir, 'cluster_test_full_run_partial_asmbly.tsv')
485	507	refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
486	508	tmpdir = 'tmp.cluster_test_full_run_partial_assembly'
	509	shutil.rmtree(tmpdir, ignore_errors=True)
487	510	shutil.copytree(os.path.join(data_dir, 'cluster_test_full_run_partial_asmbly'), tmpdir)
488		c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=278, total_reads_bases=15020)
	511	c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=278, total_reads_bases=15020)
489	512	c.run()
490	513
491	514	expected = [

501	524	tsv_in = os.path.join(data_dir, 'cluster_test_full_run_multiple_vars.tsv')
502	525	refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
503	526	tmpdir = 'tmp.cluster_test_full_run_multiple_vars'
	527	shutil.rmtree(tmpdir, ignore_errors=True)
504	528	shutil.copytree(os.path.join(data_dir, 'cluster_test_full_run_multiple_vars'), tmpdir)
505		c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=292, total_reads_bases=20900)
	529	c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=292, total_reads_bases=20900)
506	530	c.run()
507	531
508	532	expected = [

519	543	tsv_in = os.path.join(data_dir, 'cluster_test_full_run_delete_codon.tsv')
520	544	refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
521	545	tmpdir = 'tmp.cluster_test_full_delete_codon'
	546	shutil.rmtree(tmpdir, ignore_errors=True)
522	547	shutil.copytree(os.path.join(data_dir, 'cluster_test_full_run_delete_codon'), tmpdir)
523		c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=292, total_reads_bases=20900)
	548	c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=292, total_reads_bases=20900)
524	549	c.run()
525	550
526	551	expected = [

536	561	tsv_in = os.path.join(data_dir, 'cluster_test_full_run_insert_codon.tsv')
537	562	refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
538	563	tmpdir = 'tmp.cluster_test_full_insert_codon'
	564	shutil.rmtree(tmpdir, ignore_errors=True)
539	565	shutil.copytree(os.path.join(data_dir, 'cluster_test_full_run_insert_codon'), tmpdir)
540		c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=292, total_reads_bases=20900)
	566	c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=292, total_reads_bases=20900)
541	567	c.run()
542	568
543	569	expected = [

+4

-0

ariba/tests/data/assembly_test_assemble_with_spades_fails_reads_1.fq less more

	0	@read1/1
	1	CACGTTCGTCGTGATGACTGACGTCACGAGCTCTGCGTACGTCATCTAGCGTATCGTACTGACTGAT
	2	+
	3	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII

+5

-0

ariba/tests/data/assembly_test_assemble_with_spades_fails_reads_2.fq less more

	0	@read1/2
	1	CACGTTCGTCGTGATGACTGACGTCACGAGCTCTGCGTACGTCATCTAGCGTATCGTACTGACTGAT
	2	+
	3	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	4

+232

-0

ariba/tests/data/assembly_test_assemble_with_spades_reads_1.fq less more

	0	@1:1:82:186/1
	1	GCTCTAAGTCCAACTCTGGCACCAGCTACAACTCTAATTGATATCCAACAAGACCTGTTAACATACGATGCGGAGG
	2	+
	3	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	4	@1:2:6:109/1
	5	GGCTTTAGCCTGGCCCAATGCCCTGAGTGGCCCTCCCTTGACTAACTCTGACGCGATCAGAGGGCTAAAGTTTGTA
	6	+
	7	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	8	@1:3:1:106/1
	9	CTCGCGGCTTTAGCCTGGCCCAATGCCCTGAGTGGCCCTCCCTTGACTAACTCTGACGCGATCAGAGGGCTAAAGT
	10	+
	11	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	12	@1:4:33:136/1
	13	TGGCCCTCCCTTGACTAACTCTGACGCGATCAGAGGGCTAAAGTTTGTAGCTCTAAGTCCAACTCTGGCACCAGCT
	14	+
	15	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	16	@1:5:196:299/1
	17	CCTTCTACTCCCATTGTCTTTGACGCTTTCTGATGTCAGTCGCCGGAGACCAGCTGTCTCCCTAGGGCGTTTAGGT
	18	+
	19	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	20	@1:6:63:168/1
	21	CAGAGGGCTAAAGTTTGTAGCTCTAAGTCCAACTCTGGCACCAGCTACAACTCTAATTGATATCCAACAAGACCTG
	22	+
	23	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	24	@1:7:10:111/1
	25	TTAGCCTGGCCCAATGCCCTGAGTGGCCCTCCCTTGACTAACTCTGACGCGATCAGAGGGCTAAAGTTTGTAGCTC
	26	+
	27	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	28	@1:8:74:178/1
	29	AGTTTGTAGCTCTAAGTCCAACTCTGGCACCAGCTACAACTCTAATTGATATCCAACAAGACCTGTTAACATACGA
	30	+
	31	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	32	@1:9:84:186/1
	33	TCTAAGTCCAACTCTGGCACCAGCTACAACTCTAATTGATATCCAACAAGACCTGTTAACATACGATGCGGAGGGA
	34	+
	35	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	36	@ref2:1:41:144/1
	37	CCTTGACTAACTCTGACGCGATCATAGGGCTAAAGTTTGTAGCTCTAAGTCCAACTCTGGCACCAGCTACAACTCT
	38	+
	39	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	40	@ref2:2:144:247/1
	41	ATACGATGCGGAGGGACTAGAGTCTCATCGTGCTCTGACGATTATATGTTGACCTTCTACTCCCATTGTCTTTGAC
	42	+
	43	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	44	@ref2:3:225:329/1
	45	CTGATGTCAGTCGCCGGAGACCAGCTGTCTCCCTAGGGCGTATAGGTGTTCCGGATACCCGTCCTCAGGCGGTGCG
	46	+
	47	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	48	@ref2:4:237:340/1
	49	GCCGGAGACCAGCTGTCTCCCTAGGGCGTATAGGTGTTCCGGATACCCGTCCTCAGGCGGTGCGTATGCTTGAGTC
	50	+
	51	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	52	@ref2:5:45:151/1
	53	GACTAACTCTGACGCGATCATAGGGCTAAAGTTTGTAGCTCTAAGTCCAACTCTGGCACCAGCTACAACTCTAATT
	54	+
	55	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	56	@ref2:6:284:386/1
	57	CGTCCTCAGGCGGTGCGTATGCTTGAGTCGGTAATATCGTCCGGACTCATCCCTACTCTTACAACTTACGTGGTTA
	58	+
	59	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	60	@ref2:7:305:407/1
	61	CTTGAGTCGGTAATATCGTCCGGACTCATCCCTACTCTTACAACTTACGTGGTTACGCTTACCCAGAGAAATATGT
	62	+
	63	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	64	@ref2:8:213:317/1
	65	CTTTGACGCTTTCTGATGTCAGTCGCCGGAGACCAGCTGTCTCCCTAGGGCGTATAGGTGTTCCGGATACCCGTCC
	66	+
	67	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	68	@ref2:9:183:287/1
	69	GATTATATGTTGACCTTCTACTCCCATTGTCTTTGACGCTTTCTGATGTCAGTCGCCGGAGACCAGCTGTCTCCCT
	70	+
	71	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	72	@ref2:10:289:393/1
	73	TCAGGCGGTGCGTATGCTTGAGTCGGTAATATCGTCCGGACTCATCCCTACTCTTACAACTTACGTGGTTACGCTT
	74	+
	75	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	76	@ref2:11:296:399/1
	77	GTGCGTATGCTTGAGTCGGTAATATCGTCCGGACTCATCCCTACTCTTACAACTTACGTGGTTACGCTTACCCAGA
	78	+
	79	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	80	@ref2:12:270:373/1
	81	GTGTTCCGGATACCCGTCCTCAGGCGGTGCGTATGCTTGAGTCGGTAATATCGTCCGGACTCATCCCTACTCTTAC
	82	+
	83	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	84	@ref2:13:167:271/1
	85	CTCATCGTGCTCTGACGATTATATGTTGACCTTCTACTCCCATTGTCTTTGACGCTTTCTGATGTCAGTCGCCGGA
	86	+
	87	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	88	@ref2:14:43:147/1
	89	TTGACTAACTCTGACGCGATCATAGGGCTAAAGTTTGTAGCTCTAAGTCCAACTCTGGCACCAGCTACAACTCTAA
	90	+
	91	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	92	@ref2:15:323:424/1
	93	TCCGGACTCATCCCTACTCTTACAACTTACGTGGTTACGCTTACCCAGAGAAATATGTGCGCTACCTGCTTAGCCT
	94	+
	95	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	96	@ref2:16:105:207/1
	97	AGCTACAACTCTAATTGATATCCAACAAGACCTGTTAACATACGATGCGGAGGGACTAGAGTCTCATCGTGCTCTG
	98	+
	99	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	100	@ref2:17:237:341/1
	101	GCCGGAGACCAGCTGTCTCCCTAGGGCGTATAGGTGTTCCGGATACCCGTCCTCAGGCGGTGCGTATGCTTGAGTC
	102	+
	103	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	104	@ref2:18:3:107/1
	105	CGCGGCTTTAGCCTGGCCCAATGCCCTGAGTGGCCCTCCCTTGACTAACTCTGACGCGATCATAGGGCTAAAGTTT
	106	+
	107	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	108	@ref2:19:272:374/1
	109	GTTCCGGATACCCGTCCTCAGGCGGTGCGTATGCTTGAGTCGGTAATATCGTCCGGACTCATCCCTACTCTTACAA
	110	+
	111	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	112	@ref2:20:251:354/1
	113	GTCTCCCTAGGGCGTATAGGTGTTCCGGATACCCGTCCTCAGGCGGTGCGTATGCTTGAGTCGGTAATATCGTCCG
	114	+
	115	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	116	@ref2:21:95:199/1
	117	CTCTGGCACCAGCTACAACTCTAATTGATATCCAACAAGACCTGTTAACATACGATGCGGAGGGACTAGAGTCTCA
	118	+
	119	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	120	@ref2:22:96:199/1
	121	TCTGGCACCAGCTACAACTCTAATTGATATCCAACAAGACCTGTTAACATACGATGCGGAGGGACTAGAGTCTCAT
	122	+
	123	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	124	@ref2:23:94:197/1
	125	ACTCTGGCACCAGCTACAACTCTAATTGATATCCAACAAGACCTGTTAACATACGATGCGGAGGGACTAGAGTCTC
	126	+
	127	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	128	@ref2:24:185:289/1
	129	TTATATGTTGACCTTCTACTCCCATTGTCTTTGACGCTTTCTGATGTCAGTCGCCGGAGACCAGCTGTCTCCCTAG
	130	+
	131	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	132	@ref2:25:152:256/1
	133	CGGAGGGACTAGAGTCTCATCGTGCTCTGACGATTATATGTTGACCTTCTACTCCCATTGTCTTTGACGCTTTCTG
	134	+
	135	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	136	@ref2:26:285:389/1
	137	GTCCTCAGGCGGTGCGTATGCTTGAGTCGGTAATATCGTCCGGACTCATCCCTACTCTTACAACTTACGTGGTTAC
	138	+
	139	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	140	@ref2:27:137:241/1
	141	TGTTAACATACGATGCGGAGGGACTAGAGTCTCATCGTGCTCTGACGATTATATGTTGACCTTCTACTCCCATTGT
	142	+
	143	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	144	@ref2:28:261:365/1
	145	GGCGTATAGGTGTTCCGGATACCCGTCCTCAGGCGGTGCGTATGCTTGAGTCGGTAATATCGTCCGGACTCATCCC
	146	+
	147	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	148	@ref2:29:12:116/1
	149	AGCCTGGCCCAATGCCCTGAGTGGCCCTCCCTTGACTAACTCTGACGCGATCATAGGGCTAAAGTTTGTAGCTCTA
	150	+
	151	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	152	@ref2:30:107:210/1
	153	CTACAACTCTAATTGATATCCAACAAGACCTGTTAACATACGATGCGGAGGGACTAGAGTCTCATCGTGCTCTGAC
	154	+
	155	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	156	@ref2:31:162:266/1
	157	AGAGTCTCATCGTGCTCTGACGATTATATGTTGACCTTCTACTCCCATTGTCTTTGACGCTTTCTGATGTCAGTCG
	158	+
	159	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	160	@ref2:32:213:317.dup.2/1
	161	CTTTGACGCTTTCTGATGTCAGTCGCCGGAGACCAGCTGTCTCCCTAGGGCGTATAGGTGTTCCGGATACCCGTCC
	162	+
	163	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	164	@ref2:33:24:127/1
	165	TGCCCTGAGTGGCCCTCCCTTGACTAACTCTGACGCGATCATAGGGCTAAAGTTTGTAGCTCTAAGTCCAACTCTG
	166	+
	167	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	168	@ref2:34:84:189/1
	169	TCTAAGTCCAACTCTGGCACCAGCTACAACTCTAATTGATATCCAACAAGACCTGTTAACATACGATGCGGAGGGA
	170	+
	171	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	172	@ref2:35:40:145/1
	173	CCCTTGACTAACTCTGACGCGATCATAGGGCTAAAGTTTGTAGCTCTAAGTCCAACTCTGGCACCAGCTACAACTC
	174	+
	175	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	176	@ref2:36:120:223/1
	177	TGATATCCAACAAGACCTGTTAACATACGATGCGGAGGGACTAGAGTCTCATCGTGCTCTGACGATTATATGTTGA
	178	+
	179	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	180	@ref2:37:106:211/1
	181	GCTACAACTCTAATTGATATCCAACAAGACCTGTTAACATACGATGCGGAGGGACTAGAGTCTCATCGTGCTCTGA
	182	+
	183	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	184	@ref2:38:98:202/1
	185	TGGCACCAGCTACAACTCTAATTGATATCCAACAAGACCTGTTAACATACGATGCGGAGGGACTAGAGTCTCATCG
	186	+
	187	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	188	@ref2:39:72:177/1
	189	AAAGTTTGTAGCTCTAAGTCCAACTCTGGCACCAGCTACAACTCTAATTGATATCCAACAAGACCTGTTAACATAC
	190	+
	191	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	192	@ref2:40:16:120/1
	193	TGGCCCAATGCCCTGAGTGGCCCTCCCTTGACTAACTCTGACGCGATCATAGGGCTAAAGTTTGTAGCTCTAAGTC
	194	+
	195	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	196	@ref2:41:308:410/1
	197	GAGTCGGTAATATCGTCCGGACTCATCCCTACTCTTACAACTTACGTGGTTACGCTTACCCAGAGAAATATGTGCG
	198	+
	199	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	200	@ref2:42:26:129/1
	201	CCCTGAGTGGCCCTCCCTTGACTAACTCTGACGCGATCATAGGGCTAAAGTTTGTAGCTCTAAGTCCAACTCTGGC
	202	+
	203	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	204	@ref2:43:130:234/1
	205	CAAGACCTGTTAACATACGATGCGGAGGGACTAGAGTCTCATCGTGCTCTGACGATTATATGTTGACCTTCTACTC
	206	+
	207	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	208	@ref2:44:52:157/1
	209	TCTGACGCGATCATAGGGCTAAAGTTTGTAGCTCTAAGTCCAACTCTGGCACCAGCTACAACTCTAATTGATATCC
	210	+
	211	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	212	@ref2:45:220:323/1
	213	GCTTTCTGATGTCAGTCGCCGGAGACCAGCTGTCTCCCTAGGGCGTATAGGTGTTCCGGATACCCGTCCTCAGGCG
	214	+
	215	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	216	@ref2:46:125:228/1
	217	TCCAACAAGACCTGTTAACATACGATGCGGAGGGACTAGAGTCTCATCGTGCTCTGACGATTATATGTTGACCTTC
	218	+
	219	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	220	@ref2:47:210:314/1
	221	TGTCTTTGACGCTTTCTGATGTCAGTCGCCGGAGACCAGCTGTCTCCCTAGGGCGTATAGGTGTTCCGGATACCCG
	222	+
	223	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	224	@ref2:48:220:324/1
	225	GCTTTCTGATGTCAGTCGCCGGAGACCAGCTGTCTCCCTAGGGCGTATAGGTGTTCCGGATACCCGTCCTCAGGCG
	226	+
	227	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	228	@ref2:49:25:128/1
	229	GCCCTGAGTGGCCCTCCCTTGACTAACTCTGACGCGATCATAGGGCTAAAGTTTGTAGCTCTAAGTCCAACTCTGG
	230	+
	231	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII

+232

-0

ariba/tests/data/assembly_test_assemble_with_spades_reads_2.fq less more

	0	@1:1:82:186/2
	1	CCTAGGGAGACAGCTGGTCTCCGGCGACTGACATCAGAAAGCGTCAAAGACAATGGGAGTAGAAGGTCAACCTATA
	2	+
	3	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	4	@1:2:6:109/2
	5	TCGTCAGAGCACGATGAGACTCTAGTCCCTCCGCATCGTATGTTAACAGGTCTTGTTGGATATCAATTAGAGTTGT
	6	+
	7	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	8	@1:3:1:106/2
	9	TCAGAGCACGATGAGACTCTAGTCCCTCCGCATCGTATGTTAACAGGTCTTGTTGGATATCAATTAGAGTTGTAGC
	10	+
	11	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	12	@1:4:33:136/2
	13	CAATGGGAGTAGAAGGTCAACCTATAATCGTCAGAGCACGATGAGACTCTAGTCCCTCCGCATCGTATGTTAACAG
	14	+
	15	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	16	@1:5:196:299/2
	17	TTCTCTGGGTAAGCGTAACCACGTAAGTTGTAAGAGTAGGGATGAGTCCGGACGATATTACCGACTCAAGCATACG
	18	+
	19	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	20	@1:6:63:168/2
	21	CTCCGGCGACTGACATCAGAAAGCGTCAAAGACAATGGGAGTAGAAGGTCAACCTATAATCGTCAGAGCACGATGA
	22	+
	23	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	24	@1:7:10:111/2
	25	AATCGTCAGAGCACGATGAGACTCTAGTCCCTCCGCATCGTATGTTAACAGGTCTTGTTGGATATCAATTAGAGTT
	26	+
	27	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	28	@1:8:74:178/2
	29	GACAGCTGGTCTCCGGCGACTGACATCAGAAAGCGTCAAAGACAATGGGAGTAGAAGGTCAACCTATAATCGTCAG
	30	+
	31	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	32	@1:9:84:186/2
	33	CCTAGGGAGACAGCTGGTCTCCGGCGACTGACATCAGAAAGCGTCAAAGACAATGGGAGTAGAAGGTCAACCTATA
	34	+
	35	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	36	@ref2:1:41:144/2
	37	GTCAAAGACAATGGGAGTAGAAGGTCAACATATAATCGTCAGAGCACGATGAGACTCTAGTCCCTCCGCATCGTAT
	38	+
	39	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	40	@ref2:2:144:247/2
	41	CGATATTACCGACTCAAGCATACGCACCGCCTGAGGACGGGTATCCGGAACACCTATACGCCCTAGGGAGACAGCT
	42	+
	43	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	44	@ref2:3:225:329/2
	45	TGTCGTAGGCTAAGCAGGTAGCGCACATATTTCTCTGGGTAAGCGTAACCACGTAAGTTGTAAGAGTAGGGATGAG
	46	+
	47	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	48	@ref2:4:237:340/2
	49	GGCTCCTGCCGTGTCGTAGGCTAAGCAGGTAGCGCACATATTTCTCTGGGTAAGCGTAACCACGTAAGTTGTAAGA
	50	+
	51	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	52	@ref2:5:45:151/2
	53	AGAAAGCGTCAAAGACAATGGGAGTAGAAGGTCAACATATAATCGTCAGAGCACGATGAGACTCTAGTCCCTCCGC
	54	+
	55	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	56	@ref2:6:284:386/2
	57	GGTACTTCGCTAGCCGCATCAGCTGACACTTATTCAGGGCCTAGCAGGCTCCTGCCGTGTCGTAGGCTAAGCAGGT
	58	+
	59	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	60	@ref2:7:305:407/2
	61	CTTGAACCTCAGCGCATGGTTGGTACTTCGCTAGCCGCATCAGCTGACACTTATTCAGGGCCTAGCAGGCTCCTGC
	62	+
	63	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	64	@ref2:8:213:317/2
	65	AGCAGGTAGCGCACATATTTCTCTGGGTAAGCGTAACCACGTAAGTTGTAAGAGTAGGGATGAGTCCGGACGATAT
	66	+
	67	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	68	@ref2:9:183:287/2
	69	GCGTAACCACGTAAGTTGTAAGAGTAGGGATGAGTCCGGACGATATTACCGACTCAAGCATACGCACCGCCTGAGG
	70	+
	71	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	72	@ref2:10:289:393/2
	73	CATGGTTGGTACTTCGCTAGCCGCATCAGCTGACACTTATTCAGGGCCTAGCAGGCTCCTGCCGTGTCGTAGGCTA
	74	+
	75	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	76	@ref2:11:296:399/2
	77	TCAGCGCATGGTTGGTACTTCGCTAGCCGCATCAGCTGACACTTATTCAGGGCCTAGCAGGCTCCTGCCGTGTCGT
	78	+
	79	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	80	@ref2:12:270:373/2
	81	CCGCATCAGCTGACACTTATTCAGGGCCTAGCAGGCTCCTGCCGTGTCGTAGGCTAAGCAGGTAGCGCACATATTT
	82	+
	83	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	84	@ref2:13:167:271/2
	85	TGTAAGAGTAGGGATGAGTCCGGACGATATTACCGACTCAAGCATACGCACCGCCTGAGGACGGGTATCCGGAACA
	86	+
	87	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	88	@ref2:14:43:147/2
	89	AGCGTCAAAGACAATGGGAGTAGAAGGTCAACATATAATCGTCAGAGCACGATGAGACTCTAGTCCCTCCGCATCG
	90	+
	91	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	92	@ref2:15:323:424/2
	93	CATCTAGGTTGGACAGCCTTGAACCTCAGCGCATGGTTGGTACTTCGCTAGCCGCATCAGCTGACACTTATTCAGG
	94	+
	95	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	96	@ref2:16:105:207/2
	97	GTATCCGGAACACCTATACGCCCTAGGGAGACAGCTGGTCTCCGGCGACTGACATCAGAAAGCGTCAAAGACAATG
	98	+
	99	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	100	@ref2:17:237:341/2
	101	AGGCTCCTGCCGTGTCGTAGGCTAAGCAGGTAGCGCACATATTTCTCTGGGTAAGCGTAACCACGTAAGTTGTAAG
	102	+
	103	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	104	@ref2:18:3:107/2
	105	GTCAGAGCACGATGAGACTCTAGTCCCTCCGCATCGTATGTTAACAGGTCTTGTTGGATATCAATTAGAGTTGTAG
	106	+
	107	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	108	@ref2:19:272:374/2
	109	GCCGCATCAGCTGACACTTATTCAGGGCCTAGCAGGCTCCTGCCGTGTCGTAGGCTAAGCAGGTAGCGCACATATT
	110	+
	111	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	112	@ref2:20:251:354/2
	113	TTCAGGGCCTAGCAGGCTCCTGCCGTGTCGTAGGCTAAGCAGGTAGCGCACATATTTCTCTGGGTAAGCGTAACCA
	114	+
	115	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	116	@ref2:21:95:199/2
	117	AACACCTATACGCCCTAGGGAGACAGCTGGTCTCCGGCGACTGACATCAGAAAGCGTCAAAGACAATGGGAGTAGA
	118	+
	119	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	120	@ref2:22:96:199/2
	121	AACACCTATACGCCCTAGGGAGACAGCTGGTCTCCGGCGACTGACATCAGAAAGCGTCAAAGACAATGGGAGTAGA
	122	+
	123	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	124	@ref2:23:94:197/2
	125	CACCTATACGCCCTAGGGAGACAGCTGGTCTCCGGCGACTGACATCAGAAAGCGTCAAAGACAATGGGAGTAGAAG
	126	+
	127	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	128	@ref2:24:185:289/2
	129	AAGCGTAACCACGTAAGTTGTAAGAGTAGGGATGAGTCCGGACGATATTACCGACTCAAGCATACGCACCGCCTGA
	130	+
	131	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	132	@ref2:25:152:256/2
	133	GAGTCCGGACGATATTACCGACTCAAGCATACGCACCGCCTGAGGACGGGTATCCGGAACACCTATACGCCCTAGG
	134	+
	135	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	136	@ref2:26:285:389/2
	137	GTTGGTACTTCGCTAGCCGCATCAGCTGACACTTATTCAGGGCCTAGCAGGCTCCTGCCGTGTCGTAGGCTAAGCA
	138	+
	139	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	140	@ref2:27:137:241/2
	141	TACCGACTCAAGCATACGCACCGCCTGAGGACGGGTATCCGGAACACCTATACGCCCTAGGGAGACAGCTGGTCTC
	142	+
	143	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	144	@ref2:28:261:365/2
	145	GCTGACACTTATTCAGGGCCTAGCAGGCTCCTGCCGTGTCGTAGGCTAAGCAGGTAGCGCACATATTTCTCTGGGT
	146	+
	147	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	148	@ref2:29:12:116/2
	149	CATATAATCGTCAGAGCACGATGAGACTCTAGTCCCTCCGCATCGTATGTTAACAGGTCTTGTTGGATATCAATTA
	150	+
	151	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	152	@ref2:30:107:210/2
	153	CGGGTATCCGGAACACCTATACGCCCTAGGGAGACAGCTGGTCTCCGGCGACTGACATCAGAAAGCGTCAAAGACA
	154	+
	155	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	156	@ref2:31:162:266/2
	157	GAGTAGGGATGAGTCCGGACGATATTACCGACTCAAGCATACGCACCGCCTGAGGACGGGTATCCGGAACACCTAT
	158	+
	159	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	160	@ref2:32:213:317.dup.2/2
	161	AGCAGGTAGCGCACATATTTCTCTGGGTAAGCGTAACCACGTAAGTTGTAAGAGTAGGGATGAGTCCGGACGATAT
	162	+
	163	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	164	@ref2:33:24:127/2
	165	TAGAAGGTCAACATATAATCGTCAGAGCACGATGAGACTCTAGTCCCTCCGCATCGTATGTTAACAGGTCTTGTTG
	166	+
	167	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	168	@ref2:34:84:189/2
	169	CGCCCTAGGGAGACAGCTGGTCTCCGGCGACTGACATCAGAAAGCGTCAAAGACAATGGGAGTAGAAGGTCAACAT
	170	+
	171	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	172	@ref2:35:40:145/2
	173	CGTCAAAGACAATGGGAGTAGAAGGTCAACATATAATCGTCAGAGCACGATGAGACTCTAGTCCCTCCGCATCGTA
	174	+
	175	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	176	@ref2:36:120:223/2
	177	CACCGCCTGAGGACGGGTATCCGGAACACCTATACGCCCTAGGGAGACAGCTGGTCTCCGGCGACTGACATCAGAA
	178	+
	179	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	180	@ref2:37:106:211/2
	181	ACGGGTATCCGGAACACCTATACGCCCTAGGGAGACAGCTGGTCTCCGGCGACTGACATCAGAAAGCGTCAAAGAC
	182	+
	183	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	184	@ref2:38:98:202/2
	185	CGGAACACCTATACGCCCTAGGGAGACAGCTGGTCTCCGGCGACTGACATCAGAAAGCGTCAAAGACAATGGGAGT
	186	+
	187	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	188	@ref2:39:72:177/2
	189	ACAGCTGGTCTCCGGCGACTGACATCAGAAAGCGTCAAAGACAATGGGAGTAGAAGGTCAACATATAATCGTCAGA
	190	+
	191	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	192	@ref2:40:16:120/2
	193	TCAACATATAATCGTCAGAGCACGATGAGACTCTAGTCCCTCCGCATCGTATGTTAACAGGTCTTGTTGGATATCA
	194	+
	195	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	196	@ref2:41:308:410/2
	197	AGCCTTGAACCTCAGCGCATGGTTGGTACTTCGCTAGCCGCATCAGCTGACACTTATTCAGGGCCTAGCAGGCTCC
	198	+
	199	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	200	@ref2:42:26:129/2
	201	AGTAGAAGGTCAACATATAATCGTCAGAGCACGATGAGACTCTAGTCCCTCCGCATCGTATGTTAACAGGTCTTGT
	202	+
	203	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	204	@ref2:43:130:234/2
	205	TCAAGCATACGCACCGCCTGAGGACGGGTATCCGGAACACCTATACGCCCTAGGGAGACAGCTGGTCTCCGGCGAC
	206	+
	207	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	208	@ref2:44:52:157/2
	209	GACATCAGAAAGCGTCAAAGACAATGGGAGTAGAAGGTCAACATATAATCGTCAGAGCACGATGAGACTCTAGTCC
	210	+
	211	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	212	@ref2:45:220:323/2
	213	AGGCTAAGCAGGTAGCGCACATATTTCTCTGGGTAAGCGTAACCACGTAAGTTGTAAGAGTAGGGATGAGTCCGGA
	214	+
	215	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	216	@ref2:46:125:228/2
	217	ATACGCACCGCCTGAGGACGGGTATCCGGAACACCTATACGCCCTAGGGAGACAGCTGGTCTCCGGCGACTGACAT
	218	+
	219	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	220	@ref2:47:210:314/2
	221	AGGTAGCGCACATATTTCTCTGGGTAAGCGTAACCACGTAAGTTGTAAGAGTAGGGATGAGTCCGGACGATATTAC
	222	+
	223	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	224	@ref2:48:220:324/2
	225	TAGGCTAAGCAGGTAGCGCACATATTTCTCTGGGTAAGCGTAACCACGTAAGTTGTAAGAGTAGGGATGAGTCCGG
	226	+
	227	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
	228	@ref2:49:25:128/2
	229	GTAGAAGGTCAACATATAATCGTCAGAGCACGATGAGACTCTAGTCCCTCCGCATCGTATGTTAACAGGTCTTGTT
	230	+
	231	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII

+5

-0

ariba/tests/data/assembly_test_check_spades_log_file.log.bad less more

	0	line 1
	1	line 2
	2
	3	== Error == system call for: "['/foo/bar/SPAdes-3.6.0-Linux/bin/spades', '/spam/eggs/K21/configs/config.info']" finished abnormally, err code: -7
	4

+5

-0

ariba/tests/data/assembly_test_check_spades_log_file.log.good less more

	0	This is a dummy spades log file.
	1
	2	It doesn't look like a real spades log file.
	3
	4	But it doesn't have lines matching the bad stuff that will mean ariba should stop NOW.

+1

-1

ariba/tests/data/mlst_reporter.het_snps.out.details.tsv less more

0	0	gene allele cov pc ctgs depth hetmin hets
1		gene1 1* 100.0 100.0 1 42.2 75.0 30,10.25,10,5
	1	gene1 1* 100.0 100.0 1 42.2 62.5 30,10.25,10,5
2	2	gene2 2 100.0 100.0 1 40.2 . .

+6

-0

scripts/ariba less more

221	221	nucmer_group.add_argument('--nucmer_breaklen', type=int, help='Value to use for -breaklen when running nucmer [%(default)s]', default=200, metavar='INT')
222	222
223	223	assembly_group = subparser_run.add_argument_group('Assembly options')
	224	assembly_group.add_argument('--assembler', help='Assembler to use', choices=['fermilite','spades'], default='fermilite')
224	225	assembly_group.add_argument('--assembly_cov', type=int, help='Target read coverage when sampling reads for assembly [%(default)s]', default=50, metavar='INT')
225	226	assembly_group.add_argument('--min_scaff_depth', type=int, help='Minimum number of read pairs needed as evidence for scaffold link between two contigs [%(default)s]', default=10, metavar='INT')
	227	assembly_group.add_argument('--spades_mode', help='If using Spades assembler, either use default WGS mode, Single Cell mode (`spades.py --sc`) or RNA mode (`spades.py --rna`). '
	228	'Use SC or RNA mode if your input is from a viral sequencing with very uneven and deep coverage. '
	229	'Set `--assembly_cov` to some high value if using SC or RNA mode', choices=['wgs','sc','rna'], default='wgs')
	230	assembly_group.add_argument('--spades_options', help='Extra options to pass to Spades assembler. Sensible default options will be picked based on `--spades_mode` argument. '
	231	'Anything set here will replace the defaults completely')
226	232
227	233	other_run_group = subparser_run.add_argument_group('Other options')
228	234	other_run_group.add_argument('--threads', type=int, help='Experimental. Number of threads. Will run clusters in parallel, but not minimap (yet) [%(default)s]', default=1, metavar='INT')

+1

-1

setup.py less more

54	54	setup(
55	55	ext_modules=[minimap_mod, fermilite_mod, vcfcall_mod],
56	56	name='ariba',
57		version='2.10.0',
	57	version='2.11.1',
58	58	description='ARIBA: Antibiotic Resistance Identification By Assembly',
59	59	packages = find_packages(),
60	60	package_data={'ariba': ['test_run_data/*']},