Tree @lintian-fixes/main (Download .tar.gz)
- centrifuge.xcodeproj
- debian
- doc
- evaluation
- example
- indices
- third_party
- .gitignore
- aligner_bt.cpp
- aligner_bt.h
- aligner_cache.cpp
- aligner_cache.h
- aligner_metrics.h
- aligner_result.h
- aligner_seed.cpp
- aligner_seed.h
- aligner_seed_policy.cpp
- aligner_seed_policy.h
- aligner_sw.cpp
- aligner_sw.h
- aligner_sw_common.h
- aligner_sw_nuc.h
- aligner_swsse.cpp
- aligner_swsse.h
- aligner_swsse_ee_i16.cpp
- aligner_swsse_ee_u8.cpp
- aligner_swsse_loc_i16.cpp
- aligner_swsse_loc_u8.cpp
- aln_sink.h
- alphabet.cpp
- alphabet.h
- assert_helpers.h
- AUTHORS
- binary_sa_search.h
- bitpack.h
- blockwise_sa.h
- bt2_idx.cpp
- bt2_idx.h
- bt2_io.h
- bt2_util.h
- btypes.h
- ccnt_lut.cpp
- centrifuge
- centrifuge-build
- centrifuge-BuildSharedSequence.pl
- centrifuge-compress.pl
- centrifuge-download
- centrifuge-inspect
- centrifuge-kreport
- centrifuge-promote
- centrifuge-RemoveEmptySequence.pl
- centrifuge-RemoveN.pl
- centrifuge-sort-nt.pl
- centrifuge.cpp
- centrifuge_build.cpp
- centrifuge_build_main.cpp
- centrifuge_compress.cpp
- centrifuge_inspect.cpp
- centrifuge_main.cpp
- centrifuge_report.cpp
- classifier.h
- diff_sample.cpp
- diff_sample.h
- dp_framer.cpp
- dp_framer.h
- ds.cpp
- ds.h
- edit.cpp
- edit.h
- endian_swap.h
- fast_mutex.h
- filebuf.h
- formats.h
- functions.sh
- group_walk.cpp
- group_walk.h
- hi_aligner.h
- hier_idx.h
- hier_idx_common.h
- hyperloglogbias.h
- hyperloglogplus.h
- LICENSE
- limit.cpp
- limit.h
- ls.cpp
- ls.h
- Makefile
- MANUAL
- MANUAL.markdown
- mask.cpp
- mask.h
- mem_ids.h
- mm.h
- multikey_qsort.h
- NEWS
- opts.h
- outq.cpp
- outq.h
- pat.cpp
- pat.h
- pe.cpp
- pe.h
- presets.cpp
- presets.h
- processor_support.h
- qual.cpp
- qual.h
- random_source.cpp
- random_source.h
- random_util.cpp
- random_util.h
- read.h
- read_qseq.cpp
- README.md
- ref_coord.cpp
- ref_coord.h
- ref_read.cpp
- ref_read.h
- reference.cpp
- reference.h
- scoring.cpp
- scoring.h
- search_globals.h
- sequence_io.h
- shmem.cpp
- shmem.h
- simple_func.cpp
- simple_func.h
- sse_util.cpp
- sse_util.h
- sstring.cpp
- sstring.h
- str_util.h
- taxonomy.h
- threading.h
- timer.h
- tinythread.cpp
- tinythread.h
- tokenize.h
- TUTORIAL
- util.h
- VERSION
- word_io.h
- zbox.h
centrifuge-promote @lintian-fixes/main — raw · history · blame
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 | #!/usr/bin/env perl use strict ; use warnings ; use File::Basename; use Cwd; use Cwd 'cwd' ; use Cwd 'abs_path' ; die "Usage: centrifuge-promote.pl centrifuge_index centrifuge_output level > output\n\n". "Promote the taxonomy id to specified level in Centrifuge output.\n" if ( @ARGV == 0 ) ; my $CWD = dirname( abs_path( $0 ) ) ; # Go through the index to obtain the taxonomy tree my %taxParent ; my %taxIdToSeqId ; my %taxLevel ; my $centrifuge_index = $ARGV[0] ; open FP1, "-|", "$CWD/centrifuge-inspect --taxonomy-tree $centrifuge_index" or die "can't open $!\n" ; while ( <FP1> ) { chomp ; my @cols = split /\t\|\t/; $taxParent{ $cols[0] } = $cols[1] ; $taxLevel{ $cols[0] } = $cols[2] ; } close FP1 ; open FP1, "-|", "$CWD/centrifuge-inspect --conversion-table $centrifuge_index" or die "can't open $!\n" ; while ( <FP1> ) { chomp ; my @cols = split /\t/ ; $taxIdToSeqId{ $cols[1] } = $cols[0] ; } close FP1 ; # Go through the output of centrifuge my $level = $ARGV[2] ; sub PromoteTaxId { my $tid = $_[0] ; return 0 if ( $tid <= 0 || !defined( $taxLevel{ $tid } ) ) ; if ( $taxLevel{ $tid } eq $level ) { return $tid ; } else { return 0 if ( $tid <= 1 ) ; return PromoteTaxId( $taxParent{ $tid } ) ; } } sub OutputPromotedLines { my @lines = @{ $_[0] } ; return if ( scalar( @lines ) <= 0 ) ; my @newLines ; my $i ; my $numMatches = 0 ; my %showedUpTaxId ; my $tab = sprintf( "\t" ) ; for ( $i = 0 ; $i < scalar( @lines ) ; ++$i ) { my @cols = split /\t+/, $lines[ $i ] ; my $newTid = PromoteTaxId( $cols[2] ) ; if ( $newTid <= 1 ) { $newTid = $cols[2] ; } my $newLevel = $cols[1] ; $newLevel = $taxLevel{ $newTid } if ( $newTid >= 1 && defined $taxLevel{ $newTid } ) ; next if ( defined $showedUpTaxId{ $newTid } ) ; $showedUpTaxId{ $newTid } = 1 ; ++$numMatches ; $cols[2] = $newTid ; $cols[1] = $newLevel ; push @newLines, join( $tab, @cols ) ; } for ( $i = 0 ; $i < scalar( @newLines ) ; ++$i ) { my @cols = split /\t+/, $newLines[$i] ; $cols[-1] = $numMatches ; print join( $tab, @cols ), "\n" ; } } open FP1, $ARGV[1] ; my $header = <FP1> ; my $prevReadId = "" ; my @lines ; print $header ; while ( <FP1> ) { chomp ; my @cols = split /\t/ ; if ( $cols[0] eq $prevReadId ) { push @lines, $_ ; } else { $prevReadId = $cols[0] ; OutputPromotedLines( \@lines ) ; undef @lines ; push @lines, $_ ; } } OutputPromotedLines( \@lines ) ; close FP1 ; |