Codebase list barrnap / 6e8a112
Reorganized executables: made bin/ dir and binaries/$OPSYS/ dirs. Torsten Seemann 9 years ago
6 changed file(s) with 224 addition(s) and 224 deletion(s). Raw diff Collapse all Expand all
+0
-224
barrnap less more
0 #!/usr/bin/env perl
1 use strict;
2 use warnings;
3 use Time::Piece;
4 use FindBin;
5
6 # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
7 # global variables
8
9 my $EXE = $FindBin::RealScript;
10 my $VERSION = "0.3";
11 my $DESC = "Bacterial/Archaeal Ribosomal RNA Predictor";
12 my $AUTHOR = 'Torsten Seemann <torsten.seemann@monash.edu>';
13 my $URL = 'http://www.vicbioinformatics.com/';
14 my $DBDIR = "$FindBin::RealBin/db";
15 my $OPSYS = $^O;
16 my $NHMMER = "$FindBin::RealBin/binaries/nhmmer.$OPSYS";
17 my %LENG = ("5S_rRNA"=>119, "16S_rRNA"=>1585, "23S_rRNA"=>3232);
18
19 # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
20 # command line options
21
22 my(@Options, $quiet, $kingdom, $threads, $evalue, $lencutoff, $incseq);
23 setOptions();
24
25 # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
26 # check all is well
27
28 msg("This is $EXE $VERSION");
29 msg("Written by $AUTHOR");
30 msg("Obtained from $URL");
31 msg("Detected operating system: $OPSYS");
32
33 if (! -x $NHMMER) {
34 err("No binary for your OS '$OPSYS' is included. If you have one, copy it to $NHMMER.");
35 }
36 msg("Using HMMER binary: $NHMMER");
37
38 #if (! -r "$DATABASE.h3i") {
39 # err("The pressed HMM files are missing. Go to the barnapp folder and type 'make database'");
40 #}
41 #msg("Found database files: $DATABASE.*");
42
43 $threads > 0 or err("Invalid --threads $threads");
44 msg("Will use $threads threads");
45
46 $evalue > 0 or err("Invalid --evalue $evalue");
47 msg("Setting evalue cutoff to $evalue");
48
49 $lencutoff > 0 or err("Invalid --lencutoff $lencutoff");
50 msg("Will tag genes < $lencutoff x full length as partial genes");
51
52 if ($kingdom =~ m/^b/i) {
53 $kingdom = 'bacteria';
54 }
55 elsif ($kingdom =~ m/^a/i) {
56 $kingdom = 'archaea';
57 }
58 else {
59 err("I don't recognise the --kingdom '$kingdom'. Try 'bacteria' or 'archaea'.");
60 }
61
62 my $hmmdb = "$DBDIR/$kingdom.hmm";
63 err("Can't find database: $hmmdb") unless -r $hmmdb;
64 msg("Using database: $hmmdb");
65
66 # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
67 # run the external command
68
69 my $fasta = shift @ARGV;
70 $fasta && -r $fasta or err("Usage: $EXE <file.fasta>");
71
72 msg("Running $NHMMER on $fasta ... please wait");
73 my $cmd = "$NHMMER --cpu $threads -E $evalue --w_length 2000 -o /dev/null --tblout /dev/stdout \Q$hmmdb\E \Q$fasta\E";
74 my @hits = qx($cmd 2>&1);
75
76 # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
77 # process the output
78
79 my @feat;
80 foreach (@hits) {
81 chomp;
82 err("nhmmer failed to run - $_") if m/fail|error|core dump|bus error/i;
83 next if m/^#/; # comment line
84 next if m/^\s*$/; # empty line
85 my @x = split ' ', $_;
86 err("bad line in nhmmer output - @x") unless defined $x[6] and $x[6] =~ m/^\d+$/;
87
88 my($begin,$end,$strand) = $x[6] < $x[7] ? ($x[6],$x[7],'+') : ($x[7],$x[6],'-');
89 my($seqid, $gene, $prod) = ($x[0], $x[2], $x[2]);
90
91 # assumes NAME field in .HMM file is of form "16s_rRNA" etc
92 $prod =~ s/_r/ ribosomal /; # convert the short ID to an English string
93
94 # check for incomplete alignments
95 my $len = $end-$begin+1;
96 if ($len < int($lencutoff * $LENG{$gene}) ) {
97 $prod .= " (partial)";
98 }
99
100 msg("Found:", $gene, $seqid, "L=$len/$LENG{$gene}", "$begin..$end", $strand, $prod);
101 push @feat, [
102 $seqid, "$EXE:$VERSION", 'rRNA', $begin, $end, '.', $strand, '.', "Name=$gene;product=$prod"
103 ];
104 }
105
106 # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
107 # output a sorted GFF3
108
109 sub gff_sort {
110 # sort by seqid, then start pos
111 return ($a->[0] cmp $b->[0]) || ($a->[3] <=> $b->[3]);
112 }
113
114 msg("Found", scalar(@feat), "ribosomal RNA features.");
115 msg("Sorting features and outputting GFF3...");
116 print "##gff-version 3\n";
117 for my $row (sort { gff_sort } @feat) {
118 print join("\t", @$row),"\n";
119 }
120 if ($incseq) {
121 print "##FASTA\n";
122 open FASTA, $fasta;
123 print while (<FASTA>); # `cat $fasta`
124 }
125
126 #----------------------------------------------------------------------
127
128 sub msg {
129 return if $quiet;
130 my $t = localtime;
131 my $line = "[".$t->hms."] @_\n";
132 print STDERR $line;
133 }
134
135 #----------------------------------------------------------------------
136
137 sub err {
138 $quiet=0;
139 msg(@_);
140 exit(2);
141 }
142
143 #----------------------------------------------------------------------
144
145 sub version {
146 print STDERR "$EXE $VERSION\n";
147 exit;
148 }
149
150 #----------------------------------------------------------------------
151
152 sub show_citation {
153 print STDERR << "EOCITE";
154
155 If you use Barrnap in your work, please cite:
156
157 Seemann T (2013)
158 $EXE $VERSION : $DESC
159 $URL
160
161 Thank you.
162
163 EOCITE
164
165 exit;
166 }
167
168 #----------------------------------------------------------------------
169 # Option setting routines
170
171 sub setOptions {
172 use Getopt::Long;
173
174 @Options = (
175 'Options:',
176 {OPT=>"help", VAR=>\&usage, DESC=>"This help"},
177 {OPT=>"version", VAR=>\&version, DESC=>"Print version and exit"},
178 {OPT=>"citation",VAR=>\&show_citation, DESC=>"Print citation for referencing $EXE"},
179 {OPT=>"kingdom=s", VAR=>\$kingdom, DEFAULT=>'bacteria', DESC=>"Kingdom: [b]acteria [a]rchaea"},
180 {OPT=>"quiet!", VAR=>\$quiet, DEFAULT=>0, DESC=>"No screen output"},
181 {OPT=>"threads=i", VAR=>\$threads, DEFAULT=>8, DESC=>"Number of threads/cores/CPUs to use"},
182 {OPT=>"lencutoff=f",VAR=>\$lencutoff, DEFAULT=>0.8, DESC=>"Proportional length threshold to tag as pseudo"},
183 {OPT=>"evalue=f",VAR=>\$evalue, DEFAULT=>1E-6, DESC=>"Similarity e-value cut-off"},
184 {OPT=>"incseq!", VAR=>\$incseq, DEFAULT=>0, DESC=>"Include FASTA input sequences in GFF3 output"},
185 );
186
187 (!@ARGV) && (usage());
188
189 &GetOptions(map {$_->{OPT}, $_->{VAR}} grep { ref } @Options) || usage();
190
191 # Now setup default values.
192 foreach (@Options) {
193 if (ref $_ && defined($_->{DEFAULT}) && !defined(${$_->{VAR}})) {
194 ${$_->{VAR}} = $_->{DEFAULT};
195 }
196 }
197 }
198
199 #----------------------------------------------------------------------
200
201 sub usage {
202 print STDERR "Synopsis:\n $EXE $VERSION - $DESC\n";
203 print STDERR "Author:\n $AUTHOR\n";
204 print STDERR "Usage:\n $EXE [options] <chromosomes.fasta>\n";
205 foreach (@Options) {
206 if (ref) {
207 my $def = defined($_->{DEFAULT}) ? " (default '$_->{DEFAULT}')" : "";
208 $def = ($def ? ' (default OFF)' : '(default ON)') if $_->{OPT} =~ m/!$/;
209 my $opt = $_->{OPT};
210 $opt =~ s/!$//;
211 $opt =~ s/=s$/ [X]/;
212 $opt =~ s/=i$/ [N]/;
213 $opt =~ s/=f$/ [n.n]/;
214 printf STDERR " --%-15s %s%s\n", $opt, $_->{DESC}, $def;
215 }
216 else {
217 print STDERR "$_\n";
218 }
219 }
220 exit(1);
221 }
222
223 #----------------------------------------------------------------------
0 #!/usr/bin/env perl
1 use strict;
2 use warnings;
3 use Time::Piece;
4 use FindBin;
5
6 # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
7 # global variables
8
9 my $EXE = $FindBin::RealScript;
10 my $VERSION = "0.4-dev";
11 my $DESC = "Bacterial/Archaeal Ribosomal RNA Predictor";
12 my $AUTHOR = 'Torsten Seemann <torsten.seemann@monash.edu>';
13 my $URL = 'http://www.vicbioinformatics.com/';
14 my $DBDIR = "$FindBin::RealBin/../db";
15 my $OPSYS = $^O;
16 my $NHMMER = "$FindBin::RealBin/../binaries/$OPSYS/nhmmer";
17 my %LENG = ("5S_rRNA"=>119, "16S_rRNA"=>1585, "23S_rRNA"=>3232);
18
19 # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
20 # command line options
21
22 my(@Options, $quiet, $kingdom, $threads, $evalue, $lencutoff, $incseq);
23 setOptions();
24
25 # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
26 # check all is well
27
28 msg("This is $EXE $VERSION");
29 msg("Written by $AUTHOR");
30 msg("Obtained from $URL");
31 msg("Detected operating system: $OPSYS");
32
33 if (! -x $NHMMER) {
34 err("No binary for your OS '$OPSYS' is included. If you have one, copy it to $NHMMER.");
35 }
36 msg("Using HMMER binary: $NHMMER");
37
38 #if (! -r "$DATABASE.h3i") {
39 # err("The pressed HMM files are missing. Go to the barnapp folder and type 'make database'");
40 #}
41 #msg("Found database files: $DATABASE.*");
42
43 $threads > 0 or err("Invalid --threads $threads");
44 msg("Will use $threads threads");
45
46 $evalue > 0 or err("Invalid --evalue $evalue");
47 msg("Setting evalue cutoff to $evalue");
48
49 $lencutoff > 0 or err("Invalid --lencutoff $lencutoff");
50 msg("Will tag genes < $lencutoff x full length as partial genes");
51
52 if ($kingdom =~ m/^b/i) {
53 $kingdom = 'bacteria';
54 }
55 elsif ($kingdom =~ m/^a/i) {
56 $kingdom = 'archaea';
57 }
58 else {
59 err("I don't recognise the --kingdom '$kingdom'. Try 'bacteria' or 'archaea'.");
60 }
61
62 my $hmmdb = "$DBDIR/$kingdom.hmm";
63 err("Can't find database: $hmmdb") unless -r $hmmdb;
64 msg("Using database: $hmmdb");
65
66 # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
67 # run the external command
68
69 my $fasta = shift @ARGV;
70 $fasta && -r $fasta or err("Usage: $EXE <file.fasta>");
71
72 msg("Running $NHMMER on $fasta ... please wait");
73 my $cmd = "$NHMMER --cpu $threads -E $evalue --w_length 2000 -o /dev/null --tblout /dev/stdout \Q$hmmdb\E \Q$fasta\E";
74 my @hits = qx($cmd 2>&1);
75
76 # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
77 # process the output
78
79 my @feat;
80 foreach (@hits) {
81 chomp;
82 err("nhmmer failed to run - $_") if m/fail|error|core dump|bus error/i;
83 next if m/^#/; # comment line
84 next if m/^\s*$/; # empty line
85 my @x = split ' ', $_;
86 err("bad line in nhmmer output - @x") unless defined $x[6] and $x[6] =~ m/^\d+$/;
87
88 my($begin,$end,$strand) = $x[6] < $x[7] ? ($x[6],$x[7],'+') : ($x[7],$x[6],'-');
89 my($seqid, $gene, $prod) = ($x[0], $x[2], $x[2]);
90
91 # assumes NAME field in .HMM file is of form "16s_rRNA" etc
92 $prod =~ s/_r/ ribosomal /; # convert the short ID to an English string
93
94 # check for incomplete alignments
95 my $len = $end-$begin+1;
96 if ($len < int($lencutoff * $LENG{$gene}) ) {
97 $prod .= " (partial)";
98 }
99
100 msg("Found:", $gene, $seqid, "L=$len/$LENG{$gene}", "$begin..$end", $strand, $prod);
101 push @feat, [
102 $seqid, "$EXE:$VERSION", 'rRNA', $begin, $end, '.', $strand, '.', "Name=$gene;product=$prod"
103 ];
104 }
105
106 # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
107 # output a sorted GFF3
108
109 sub gff_sort {
110 # sort by seqid, then start pos
111 return ($a->[0] cmp $b->[0]) || ($a->[3] <=> $b->[3]);
112 }
113
114 msg("Found", scalar(@feat), "ribosomal RNA features.");
115 msg("Sorting features and outputting GFF3...");
116 print "##gff-version 3\n";
117 for my $row (sort { gff_sort } @feat) {
118 print join("\t", @$row),"\n";
119 }
120 if ($incseq) {
121 print "##FASTA\n";
122 open FASTA, $fasta;
123 print while (<FASTA>); # `cat $fasta`
124 }
125
126 #----------------------------------------------------------------------
127
128 sub msg {
129 return if $quiet;
130 my $t = localtime;
131 my $line = "[".$t->hms."] @_\n";
132 print STDERR $line;
133 }
134
135 #----------------------------------------------------------------------
136
137 sub err {
138 $quiet=0;
139 msg(@_);
140 exit(2);
141 }
142
143 #----------------------------------------------------------------------
144
145 sub version {
146 print STDERR "$EXE $VERSION\n";
147 exit;
148 }
149
150 #----------------------------------------------------------------------
151
152 sub show_citation {
153 print STDERR << "EOCITE";
154
155 If you use Barrnap in your work, please cite:
156
157 Seemann T (2013)
158 $EXE $VERSION : $DESC
159 $URL
160
161 Thank you.
162
163 EOCITE
164
165 exit;
166 }
167
168 #----------------------------------------------------------------------
169 # Option setting routines
170
171 sub setOptions {
172 use Getopt::Long;
173
174 @Options = (
175 'Options:',
176 {OPT=>"help", VAR=>\&usage, DESC=>"This help"},
177 {OPT=>"version", VAR=>\&version, DESC=>"Print version and exit"},
178 {OPT=>"citation",VAR=>\&show_citation, DESC=>"Print citation for referencing $EXE"},
179 {OPT=>"kingdom=s", VAR=>\$kingdom, DEFAULT=>'bacteria', DESC=>"Kingdom: [b]acteria [a]rchaea"},
180 {OPT=>"quiet!", VAR=>\$quiet, DEFAULT=>0, DESC=>"No screen output"},
181 {OPT=>"threads=i", VAR=>\$threads, DEFAULT=>8, DESC=>"Number of threads/cores/CPUs to use"},
182 {OPT=>"lencutoff=f",VAR=>\$lencutoff, DEFAULT=>0.8, DESC=>"Proportional length threshold to tag as pseudo"},
183 {OPT=>"evalue=f",VAR=>\$evalue, DEFAULT=>1E-6, DESC=>"Similarity e-value cut-off"},
184 {OPT=>"incseq!", VAR=>\$incseq, DEFAULT=>0, DESC=>"Include FASTA input sequences in GFF3 output"},
185 );
186
187 (!@ARGV) && (usage());
188
189 &GetOptions(map {$_->{OPT}, $_->{VAR}} grep { ref } @Options) || usage();
190
191 # Now setup default values.
192 foreach (@Options) {
193 if (ref $_ && defined($_->{DEFAULT}) && !defined(${$_->{VAR}})) {
194 ${$_->{VAR}} = $_->{DEFAULT};
195 }
196 }
197 }
198
199 #----------------------------------------------------------------------
200
201 sub usage {
202 print STDERR "Synopsis:\n $EXE $VERSION - $DESC\n";
203 print STDERR "Author:\n $AUTHOR\n";
204 print STDERR "Usage:\n $EXE [options] <chromosomes.fasta>\n";
205 foreach (@Options) {
206 if (ref) {
207 my $def = defined($_->{DEFAULT}) ? " (default '$_->{DEFAULT}')" : "";
208 $def = ($def ? ' (default OFF)' : '(default ON)') if $_->{OPT} =~ m/!$/;
209 my $opt = $_->{OPT};
210 $opt =~ s/!$//;
211 $opt =~ s/=s$/ [X]/;
212 $opt =~ s/=i$/ [N]/;
213 $opt =~ s/=f$/ [n.n]/;
214 printf STDERR " --%-15s %s%s\n", $opt, $_->{DESC}, $def;
215 }
216 else {
217 print STDERR "$_\n";
218 }
219 }
220 exit(1);
221 }
222
223 #----------------------------------------------------------------------
Binary diff not shown
Binary diff not shown
binaries/nhmmer.darwin less more
Binary diff not shown
binaries/nhmmer.linux less more
Binary diff not shown