Codebase list roary / c6fdb3c
New upstream version 3.9.1+dfsg Sascha Steinbiss 6 years ago
10 changed file(s) with 193 addition(s) and 138 deletion(s). Raw diff Collapse all Expand all
00 name = Bio-Roary
1 version = 3.9.0
1 version = 3.9.1
22 author = Andrew J. Page <ap13@sanger.ac.uk>
33 license = GPL_3
44 copyright_holder = Wellcome Trust Sanger Institute
4646 has 'verbose_stats' => ( is => 'rw', isa => 'Bool', default => 0 );
4747 has 'translation_table' => ( is => 'rw', isa => 'Int', default => 11 );
4848 has 'mafft' => ( is => 'rw', isa => 'Bool', default => 0 );
49 has 'allow_paralogs' => ( is => 'rw', isa => 'Bool', default => 0 );
4950 has 'group_limit' => ( is => 'rw', isa => 'Num', default => 50000 );
5051 has 'core_definition' => ( is => 'rw', isa => 'Num', default => 0.99 );
5152 has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 );
7071 $job_runner, $makeblastdb_exec, $mcxdeblast_exec, $mcl_exec, $blastp_exec,
7172 $apply_unknowns_filter, $cpus, $output_multifasta_files, $verbose_stats, $translation_table,
7273 $run_qc, $core_definition, $help, $kraken_db, $cmd_version,
73 $mafft, $output_directory, $check_dependancies, $inflation_value,
74 $mafft, $output_directory, $check_dependancies, $inflation_value, $allow_paralogs,
7475 );
7576
7677 GetOptionsFromArray(
9798 'cd|core_definition=f' => \$core_definition,
9899 'v|verbose' => \$verbose,
99100 'n|mafft' => \$mafft,
101 'ap|allow_paralogs' => \$allow_paralogs,
100102 'k|kraken_db=s' => \$kraken_db,
101103 'w|version' => \$cmd_version,
102104 'a|check_dependancies' => \$check_dependancies,
301303 core_definition => $self->core_definition,
302304 verbose => $self->verbose,
303305 mafft => $self->mafft,
304 inflation_value => $self->inflation_value,
306 allow_paralogs => $self->allow_paralogs,
307 inflation_value => $self->inflation_value,
305308 );
306309 $pan_genome_obj->run();
307310
342345 -r create R plots, requires R and ggplot2
343346 -s dont split paralogs
344347 -t INT translation table [11]
348 -ap allow paralogs in core alignment
345349 -z dont delete intermediate files
346350 -v verbose output to STDOUT
347351 -w print version and exit
348352 -y add gene inference information to spreadsheet, doesnt work with -e
349 -iv STR Change the MCL inflation value [1.5]
353 -iv STR Change the MCL inflation value [1.5]
350354 -h this help message
351355
352356 Example: Quickly generate a core gene alignment using 8 threads
2626 has 'output_filename' => ( is => 'rw', isa => 'Str', default => 'core_gene_alignment.aln' );
2727 has 'core_definition' => ( is => 'rw', isa => 'Num', default => 0.99 );
2828 has 'dont_delete_files' => ( is => 'rw', isa => 'Bool', default => 0 );
29 has 'allow_paralogs' => ( is => 'rw', isa => 'Bool', default => 0 );
2930 has '_error_message' => ( is => 'rw', isa => 'Str' );
3031 has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 );
3132
3233 sub BUILD {
3334 my ($self) = @_;
3435
35 my ( $multifasta_base_directory, $spreadsheet_filename, $output_filename, $core_definition,$verbose, $help, $mafft, $dont_delete_files );
36 my ( $multifasta_base_directory, $spreadsheet_filename, $output_filename, $core_definition,$verbose, $help, $mafft, $allow_paralogs, $dont_delete_files );
3637
3738 GetOptionsFromArray(
3839 $self->args,
4142 'o|output_filename=s' => \$output_filename,
4243 'cd|core_definition=f' => \$core_definition,
4344 'z|dont_delete_files' => \$dont_delete_files,
45 'p|allow_paralogs' => \$allow_paralogs,
4446 'v|verbose' => \$verbose,
4547 'h|help' => \$help,
4648 );
5052 $self->logger->level(10000);
5153 }
5254 $self->help($help) if(defined($help));
55 $self->allow_paralogs($allow_paralogs) if(defined($allow_paralogs));
5356
5457 if ( defined($multifasta_base_directory) && ( -d $multifasta_base_directory ) ) {
5558 $self->multifasta_base_directory( abs_path($multifasta_base_directory));
9497 $self->logger->info("Extract core genes from spreadsheet");
9598 my $core_genes_obj = Bio::Roary::ExtractCoreGenesFromSpreadsheet->new(
9699 spreadsheet => $self->spreadsheet_filename,
97 core_definition => $self->core_definition
100 core_definition => $self->core_definition,
101 allow_paralogs => $self->allow_paralogs
98102 );
99103
100104 $self->logger->info("Looking up genes in files");
129133 -cd FLOAT percentage of isolates a gene must be in to be core [99]
130134 -m STR directory containing gene multi-FASTAs [pan_genome_sequences]
131135 -s STR gene presence and absence spreadsheet [gene_presence_absence.csv]
136 -p allow paralogs
132137 -z dont delete intermediate files
133138 -v verbose output to STDOUT
134139 -h this help message
4040 has 'core_definition' => ( is => 'rw', isa => 'Num', default => 0.99 );
4141 has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 );
4242 has 'mafft' => ( is => 'rw', isa => 'Bool', default => 0 );
43 has 'allow_paralogs' => ( is => 'rw', isa => 'Bool', default => 0 );
4344
4445 sub BUILD {
4546 my ($self) = @_;
4748 my (
4849 $output_filename, $dont_create_rplots, $dont_delete_files, $dont_split_groups, $output_pan_geneome_filename,
4950 $job_runner, $output_statistics_filename, $output_multifasta_files, $clusters_filename, $core_definition,
50 $fasta_files, $input_files, $verbose_stats, $translation_table, $help, $cpus,$group_limit,$verbose,$mafft
51 $fasta_files, $input_files, $verbose_stats, $translation_table, $help, $cpus,$group_limit,$verbose,$mafft, $allow_paralogs
5152 );
5253
5354
7172 'cd|core_definition=f' => \$core_definition,
7273 'v|verbose' => \$verbose,
7374 'n|mafft' => \$mafft,
75 'q|allow_paralogs' => \$allow_paralogs,
7476 'h|help' => \$help,
7577 );
7678
9294 $self->group_limit($group_limit) if ( defined($group_limit) );
9395 $self->core_definition( $core_definition/100 ) if ( defined($core_definition) );
9496 $self->mafft($mafft) if ( defined($mafft) );
97 $self->allow_paralogs($allow_paralogs) if ( defined($allow_paralogs) );
9598 if ( defined($verbose) ) {
9699 $self->verbose($verbose);
97100 $self->logger->level(10000);
157160 cpus => $self->cpus,
158161 verbose => $self->verbose,
159162 mafft => $self->mafft,
163 allow_paralogs => $self->allow_paralogs,
160164 dont_delete_files => $self->dont_delete_files,
161165 num_input_files => $#{$input_files},
162166 );
221225 -n fast core gene alignement with MAFFT instead of PRANK
222226 -o STR clusters output filename [clustered_proteins]
223227 -p STR output pan genome filename [pan_genome.fa]
228 -q allow paralogs in core alignment
224229 -s STR output gene presence and absence filename [gene_presence_absence.csv]
225230 -t INT translation table [11]
226231 -z INT number of threads [1]
2828 has 'core_definition' => ( is => 'ro', isa => 'Num', default => 1 );
2929 has 'mafft' => ( is => 'ro', isa => 'Bool', default => 0 );
3030 has 'dont_delete_files' => ( is => 'rw', isa => 'Bool', default => 0 );
31 has 'allow_paralogs' => ( is => 'rw', isa => 'Bool', default => 0 );
3132 has 'num_input_files' => ( is => 'ro', isa => 'Int', required => 1);
3233
3334 # Overload Role`
8485 my $core_cmd = "pan_genome_core_alignment";
8586 $core_cmd .= " -cd " . ($self->core_definition*100) if ( defined $self->core_definition );
8687 $core_cmd .= " --dont_delete_files " if ( defined $self->dont_delete_files && $self->dont_delete_files == 1 );
88 $core_cmd .= " --allow_paralogs " if ( defined $self->allow_paralogs && $self->allow_paralogs == 1 );
8789
8890 return $core_cmd;
8991 }
3636 has 'core_definition' => ( is => 'ro', isa => 'Num', default => 1.0 );
3737 has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 );
3838 has 'mafft' => ( is => 'ro', isa => 'Bool', default => 0 );
39 has 'allow_paralogs' => ( is => 'ro', isa => 'Bool', default => 0 );
3940 has '_working_directory' => ( is => 'ro', isa => 'File::Temp::Dir', default => sub { File::Temp->newdir( DIR => getcwd, CLEANUP => 1 ); } );
4041 has '_gff_fofn' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__gff_fofn' );
4142 has '_fasta_fofn' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__fasta_fofn' );
136137
137138 my $verbose_flag = '';
138139 $verbose_flag = '-v' if ( defined($self->verbose) && $self->verbose == 1 );
140
141 my $allow_paralogs_flag = '';
142 $allow_paralogs_flag = '--allow_paralogs' if ( defined($self->allow_paralogs) && $self->allow_paralogs == 1 );
139143
140144 return join(
141145 " ",
155159 $verbose_stats_flag,
156160 $verbose_flag,
157161 $mafft_flag,
162 $allow_paralogs_flag,
158163 '-j', $self->job_runner,
159164 '--processors', $self->cpus,
160165 '--group_limit', $self->group_limit,
1818 use Bio::Roary::GroupStatistics;
1919 use POSIX;
2020
21 has 'spreadsheet' => ( is => 'ro', isa => 'Str', required => 1 );
22 has '_csv_parser' => ( is => 'ro', isa => 'Text::CSV',lazy => 1, builder => '_build__csv_parser' );
23 has '_input_spreadsheet_fh' => ( is => 'ro', lazy => 1, builder => '_build__input_spreadsheet_fh' );
24 has 'ordered_core_genes' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build_ordered_core_genes' );
25 has 'core_definition' => ( is => 'ro', isa => 'Num', default => 1 );
26 has 'sample_names' => ( is => 'rw', isa => 'ArrayRef', default => sub {[]} );
27 has 'sample_names_to_genes' => ( is => 'rw', isa => 'HashRef', default => sub {{}} );
21 has 'spreadsheet' => ( is => 'ro', isa => 'Str', required => 1 );
22 has '_csv_parser' => ( is => 'ro', isa => 'Text::CSV', lazy => 1, builder => '_build__csv_parser' );
23 has '_input_spreadsheet_fh' => ( is => 'ro', lazy => 1, builder => '_build__input_spreadsheet_fh' );
24 has 'ordered_core_genes' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build_ordered_core_genes' );
25 has 'core_definition' => ( is => 'ro', isa => 'Num', default => 1 );
26 has 'sample_names' => ( is => 'rw', isa => 'ArrayRef', default => sub { [] } );
27 has 'sample_names_to_genes' => ( is => 'rw', isa => 'HashRef', default => sub { {} } );
28 has 'allow_paralogs' => ( is => 'rw', isa => 'Bool', default => 0 );
2829
29 has '_number_of_isolates' => ( is => 'rw', isa => 'Int');
30 has '_gene_column' => ( is => 'rw', isa => 'Int');
31 has '_num_isolates_column' => ( is => 'rw', isa => 'Int');
32 has '_avg_sequences_per_isolate_column' => ( is => 'rw', isa => 'Int');
33 has '_genome_fragement_column' => ( is => 'rw', isa => 'Int');
34 has '_order_within_fragement_column' => ( is => 'rw', isa => 'Int');
35 has '_min_no_isolates_for_core' => ( is => 'rw', isa => 'Num', lazy => 1, builder => '_build__min_no_isolates_for_core' );
30 has '_number_of_isolates' => ( is => 'rw', isa => 'Int' );
31 has '_gene_column' => ( is => 'rw', isa => 'Int' );
32 has '_num_isolates_column' => ( is => 'rw', isa => 'Int' );
33 has '_avg_sequences_per_isolate_column' => ( is => 'rw', isa => 'Int' );
34 has '_genome_fragement_column' => ( is => 'rw', isa => 'Int' );
35 has '_order_within_fragement_column' => ( is => 'rw', isa => 'Int' );
36 has '_min_no_isolates_for_core' => ( is => 'rw', isa => 'Num', lazy => 1, builder => '_build__min_no_isolates_for_core' );
3637
3738 sub _build__min_no_isolates_for_core {
38 my ($self) = @_;
39 my $threshold = $self->_number_of_isolates * $self->core_definition;
39 my ($self) = @_;
40 my $threshold = $self->_number_of_isolates * $self->core_definition;
4041
41 return $threshold;
42 return $threshold;
4243 }
4344
44 sub _build__csv_parser
45 {
46 my ($self) = @_;
47 return Text::CSV->new( { binary => 1, always_quote => 1} );
45 sub _build__csv_parser {
46 my ($self) = @_;
47 return Text::CSV->new( { binary => 1, always_quote => 1 } );
4848 }
4949
5050 sub _build__input_spreadsheet_fh {
5353 return $fh;
5454 }
5555
56 sub _update_number_of_isolates
57 {
58 my ($self, $header_row) = @_;
59 my $number_of_isolates = @{$header_row} - @{Bio::Roary::GroupStatistics->fixed_headers};
60 $self->_number_of_isolates($number_of_isolates);
56 sub _update_number_of_isolates {
57 my ( $self, $header_row ) = @_;
58 my $number_of_isolates = @{$header_row} - @{ Bio::Roary::GroupStatistics->fixed_headers };
59 $self->_number_of_isolates($number_of_isolates);
6160 }
6261
63 sub _setup_column_mappings
64 {
65 my ($self, $header_row) = @_;
66 # current ordering
67 my %columns_of_interest_mappings = (
68 'Gene' => 0,
69 'No. isolates' => 3,
70 'Avg sequences per isolate' => 5,
71 'Genome Fragment' => 6,
72 'Order within Fragment' => 7,
73 'QC' => 10,
62 sub _setup_column_mappings {
63 my ( $self, $header_row ) = @_;
64
65 # current ordering
66 my %columns_of_interest_mappings = (
67 'Gene' => 0,
68 'No. isolates' => 3,
69 'Avg sequences per isolate' => 5,
70 'Genome Fragment' => 6,
71 'Order within Fragment' => 7,
72 'QC' => 10,
7473 );
75
76 # Dynamically overwrite the default ordering
77 for(my $i = 0; $i < @{$header_row}; $i++)
78 {
79 for my $col_name (%columns_of_interest_mappings)
80 {
81 if($header_row->[$i] eq $col_name)
82 {
83 $columns_of_interest_mappings{$col_name} = $i;
84 last;
85 }
74
75 # Dynamically overwrite the default ordering
76 for ( my $i = 0 ; $i < @{$header_row} ; $i++ ) {
77 for my $col_name (%columns_of_interest_mappings) {
78 if ( $header_row->[$i] eq $col_name ) {
79 $columns_of_interest_mappings{$col_name} = $i;
80 last;
81 }
82 }
8683 }
87 }
88 $self->_gene_column($columns_of_interest_mappings{'Gene'});
89 $self->_num_isolates_column($columns_of_interest_mappings{'No. isolates'});
90 $self->_avg_sequences_per_isolate_column($columns_of_interest_mappings{'Avg sequences per isolate'});
91 $self->_genome_fragement_column($columns_of_interest_mappings{'Genome Fragment'});
92 $self->_order_within_fragement_column($columns_of_interest_mappings{'Order within Fragment'});
93 $self->_update_number_of_isolates($header_row);
94
95 # Get the sample_names
96 my @sample_names;
97 for(my $i = $self->_length_of_fixed_headers(); $i < @{$header_row}; $i++)
98 {
99 push(@sample_names,$header_row->[$i]);
100 }
101 $self->sample_names(\@sample_names);
84 $self->_gene_column( $columns_of_interest_mappings{'Gene'} );
85 $self->_num_isolates_column( $columns_of_interest_mappings{'No. isolates'} );
86 $self->_avg_sequences_per_isolate_column( $columns_of_interest_mappings{'Avg sequences per isolate'} );
87 $self->_genome_fragement_column( $columns_of_interest_mappings{'Genome Fragment'} );
88 $self->_order_within_fragement_column( $columns_of_interest_mappings{'Order within Fragment'} );
89 $self->_update_number_of_isolates($header_row);
90
91 # Get the sample_names
92 my @sample_names;
93 for ( my $i = $self->_length_of_fixed_headers() ; $i < @{$header_row} ; $i++ ) {
94 push( @sample_names, $header_row->[$i] );
95 }
96 $self->sample_names( \@sample_names );
10297 }
10398
104 sub _length_of_fixed_headers
105 {
106 my ($self) = @_;
107 return @{Bio::Roary::GroupStatistics->fixed_headers()};
99 sub _length_of_fixed_headers {
100 my ($self) = @_;
101 return @{ Bio::Roary::GroupStatistics->fixed_headers() };
108102 }
109103
110 sub _populate_sample_to_gene_lookup_with_row
111 {
112 my ($self, $row) = @_;
113
114 for(my $i = $self->_length_of_fixed_headers(); $i < @{$row}; $i++ )
115 {
116 if(defined($row->[$i]) && $row->[$i] ne "" )
117 {
118 my $sample_name = $self->sample_names->[$i - $self->_length_of_fixed_headers()];
119
120 $self->sample_names_to_genes->{$sample_name}->{$row->[$i]} = 1;
121 }
122 }
123 return 1;
104 sub _populate_sample_to_gene_lookup_with_row {
105 my ( $self, $row ) = @_;
106
107 for ( my $i = $self->_length_of_fixed_headers() ; $i < @{$row} ; $i++ ) {
108 if ( defined( $row->[$i] ) && $row->[$i] ne "" ) {
109 my $sample_name = $self->sample_names->[ $i - $self->_length_of_fixed_headers() ];
110
111 $self->sample_names_to_genes->{$sample_name}->{ $row->[$i] } = 1;
112 }
113 }
114 return 1;
124115 }
125116
117 sub _ordered_core_genes {
118 my ($self) = @_;
119 my %ordered_genes;
120 while ( my $row = $self->_csv_parser->getline( $self->_input_spreadsheet_fh ) ) {
121 next if ( @{$row} < 12 ); # no genes in group
122 next if ( !defined( $row->[ $self->_gene_column ] ) || $row->[ $self->_gene_column ] eq '' ); # no gene name
123 next
124 if ( !defined( $row->[ $self->_avg_sequences_per_isolate_column ] ) || $row->[ $self->_avg_sequences_per_isolate_column ] eq '' )
125 ; # no average
126 next
127 if ( !defined( $row->[ $self->_genome_fragement_column ] ) || $row->[ $self->_genome_fragement_column ] eq '' )
128 ; # fragment not defined
126129
127 sub _ordered_core_genes
128 {
129 my ($self) = @_;
130 my %ordered_genes;
131 while ( my $row = $self->_csv_parser->getline( $self->_input_spreadsheet_fh ) )
132 {
133 next if(@{$row} < 12); # no genes in group
134 next if(!defined($row->[$self->_gene_column]) || $row->[$self->_gene_column] eq '' ); # no gene name
135 next if(!defined($row->[$self->_avg_sequences_per_isolate_column]) || $row->[$self->_avg_sequences_per_isolate_column] eq '' ); # no average
136 next if(!defined($row->[$self->_genome_fragement_column]) || $row->[$self->_genome_fragement_column] eq '' ); # fragment not defined
137
138 # next if($self->_number_of_isolates != $row->[$self->_num_isolates_column]); # if gene is not in all isolates
139 next if ( $row->[$self->_num_isolates_column] < $self->_min_no_isolates_for_core );
140 next if($row->[$self->_avg_sequences_per_isolate_column] != 1);
141 $ordered_genes{$row->[$self->_genome_fragement_column]}{$row->[$self->_order_within_fragement_column]} = $row->[$self->_gene_column];
142 $self->_populate_sample_to_gene_lookup_with_row($row);
143 }
144
145 my @ordered_core_genes ;
146 for my $fragment_key(sort {$a <=> $b } keys %ordered_genes)
147 {
148 for my $order_within_fragement(sort {$a <=> $b } keys %{$ordered_genes{$fragment_key}})
149 {
150 push(@ordered_core_genes,$ordered_genes{$fragment_key}{$order_within_fragement});
130 # next if($self->_number_of_isolates != $row->[$self->_num_isolates_column]); # if gene is not in all isolates
131 next if ( $row->[ $self->_num_isolates_column ] < $self->_min_no_isolates_for_core );
132
133 if ( $self->allow_paralogs ) {
134 # should never happen
135 next if ( $row->[ $self->_avg_sequences_per_isolate_column ] < 1 );
136 }
137 else {
138 next if ( $row->[ $self->_avg_sequences_per_isolate_column ] != 1 );
139 }
140
141 $ordered_genes{ $row->[ $self->_genome_fragement_column ] }{ $row->[ $self->_order_within_fragement_column ] } =
142 $row->[ $self->_gene_column ];
143 $self->_populate_sample_to_gene_lookup_with_row($row);
151144 }
152 }
153 return \@ordered_core_genes;
145
146 my @ordered_core_genes;
147 for my $fragment_key ( sort { $a <=> $b } keys %ordered_genes ) {
148 for my $order_within_fragement ( sort { $a <=> $b } keys %{ $ordered_genes{$fragment_key} } ) {
149 push( @ordered_core_genes, $ordered_genes{$fragment_key}{$order_within_fragement} );
150 }
151 }
152 return \@ordered_core_genes;
154153 }
155154
156 sub _build_ordered_core_genes
157 {
158 my ($self) = @_;
159 my $header_row = $self->_csv_parser->getline( $self->_input_spreadsheet_fh );
160 $self->_setup_column_mappings($header_row);
155 sub _build_ordered_core_genes {
156 my ($self) = @_;
157 my $header_row = $self->_csv_parser->getline( $self->_input_spreadsheet_fh );
158 $self->_setup_column_mappings($header_row);
161159
162 return $self->_ordered_core_genes();
160 return $self->_ordered_core_genes();
163161 }
164
165162
166163 no Moose;
167164 __PACKAGE__->meta->make_immutable;
7171 my ( $self, $sample_name, $gene_file ) = @_;
7272
7373 # loop over this to get the geneIDs
74 for my $gene_id ( keys %{ $self->_gene_to_sequence->{$gene_file} } ) {
74 for my $gene_id ( sort keys %{ $self->_gene_to_sequence->{$gene_file} } ) {
7575 if ( defined( $self->sample_names_to_genes->{$sample_name}->{$gene_id} ) ) {
7676 return $self->_gene_to_sequence->{$gene_file}->{$gene_id};
7777 }
4747 has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 );
4848 has 'mafft' => ( is => 'ro', isa => 'Bool', default => 0 );
4949 has 'inflation_value' => ( is => 'rw', isa => 'Num', default => 1.5 );
50 has 'allow_paralogs' => ( is => 'rw', isa => 'Bool', default => 0 );
5051
5152 has 'output_multifasta_files' => ( is => 'ro', isa => 'Bool', default => 0 );
5253
135136 core_definition => $self->core_definition,
136137 verbose => $self->verbose,
137138 mafft => $self->mafft,
139 allow_paralogs => $self->allow_paralogs,
138140 );
139141 $post_analysis->run();
140142
1212
1313 my $obj;
1414
15 ok($obj = Bio::Roary::ExtractCoreGenesFromSpreadsheet->new(
16 spreadsheet => 't/data/core_group_statistics.csv',
17 ),'initalise obj');
18 is_deeply($obj->ordered_core_genes, ['argF','speH','group_5'], 'Correct ordering');
19 is_deeply($obj->sample_names_to_genes, {
20 'query_2' => {
21 '2_3' => 1,
22 '2_7' => 1,
23 '2_2' => 1
24 },
25 'query_1' => {
26 '1_6' => 1,
27 '1_3' => 1,
28 '1_2' => 1
29 }
30 }, 'Correct of sample names to genes is correct');
15 ok(
16 $obj = Bio::Roary::ExtractCoreGenesFromSpreadsheet->new(
17 spreadsheet => 't/data/core_group_statistics.csv',
18 ),
19 'initalise obj'
20 );
21 is_deeply( $obj->ordered_core_genes, [ 'argF', 'speH', 'group_5' ], 'Correct ordering' );
22 is_deeply(
23 $obj->sample_names_to_genes,
24 {
25 'query_2' => {
26 '2_3' => 1,
27 '2_7' => 1,
28 '2_2' => 1
29 },
30 'query_1' => {
31 '1_6' => 1,
32 '1_3' => 1,
33 '1_2' => 1
34 }
35 },
36 'Correct of sample names to genes is correct'
37 );
38
39 ok(
40 $obj = Bio::Roary::ExtractCoreGenesFromSpreadsheet->new(
41 spreadsheet => 't/data/core_group_statistics.csv',
42 allow_paralogs => 1,
43 ),
44 'initalise obj where paralogs allowed'
45 );
46 is_deeply( $obj->ordered_core_genes, [ 'argF', 'hly', 'speH', 'group_5' ], 'Correct ordering where paralogs allowed' );
47
48 is_deeply(
49 $obj->sample_names_to_genes,
50 {
51 'query_2' => {
52 '2_3' => 1,
53 '2_7' => 1,
54 '2_1' => 1,
55 '2_2' => 1
56 },
57 'query_1' => {
58 '1_6' => 1,
59 '1_3' => 1,
60 '1_1' => 1,
61 '1_2' => 1
62 }
63 },
64 'Correct of sample names to genes is correct where paralogs allowed'
65 );
3166
3267 done_testing();