Codebase list barrnap / c44e0b0
Update database builder script to support new SILVA download Torsten Seemann 7 years ago
1 changed file(s) with 21 addition(s) and 20 deletion(s). Raw diff Collapse all Expand all
00 #!/bin/bash
11
22 CPUS=$(grep -c bogomips /proc/cpuinfo)
3 CURL="curl"
4 GUNZIP="gzip -c"
35
46 RFAM="Rfam.seed"
7 RFAMURL="ftp://ftp.ebi.ac.uk/pub/databases/Rfam/CURRENT/${RFAM}.gz"
58 if [ ! -r "$RFAM" ]; then
69 echo "Downloading: $RFAM"
7 wget --quiet ftp://ftp.ebi.ac.uk/pub/databases/Rfam/CURRENT/Rfam.seed.gz
8 gunzip $RFAM.gz
10 $CURL "$RFAMURL" | $GUNZIP -c > "$RFAM"
911 else
1012 echo "Using existing file: $RFAM"
1113 fi
1214
13
1415 # 23S only as 16S is in RFAM
15 SILVA="LSURef_115_tax_silva_full_align_trunc.fasta"
16 SILVA="SILVA_128_LSURef_tax_silva_full_align_trunc.fasta"
17 SILVAURL="http://www.arb-silva.de/fileadmin/silva_databases/current/Exports/${SILVA}.gz"
1618 if [ ! -r "$SILVA" ]; then
1719 echo "Downloading: $SILVA"
18 wget --quiet http://www.arb-silva.de/fileadmin/silva_databases/current/Exports/LSURef_115_tax_silva_full_align_trunc.fasta.tgz
19 tar zxf $SILVA.tgz
20 rm -f $SILVA.tgz
20 $CURL "$SILVAURL" | $GUNZIP -c > "$SILVA"
2121 else
2222 echo "Using existing file: $SILVA"
2323 fi
2424
2525 # this will write three files: LSU.Kingdom.aln
26 ./fix-SILVA.pl --seed --type LSU $SILVA
26 echo "Fixing and splitting SILVA data"
27 ./fix-SILVA.pl --seed --type LSU "$SILVA"
2728
2829 # Prepare RFAM for fetches
2930 echo "Indexing $RFAM"
30 rm -f $RFAM.ssi
31 esl-afetch --index $RFAM
31 rm -f "$RFAM.ssi"
32 esl-afetch --index "$RFAM"
3233
3334 echo "Fetching models..."
3435
3536 # Bact
3637 echo "Bac"
37 esl-afetch $RFAM RF00001 > 5S.bac.aln
38 esl-afetch "$RFAM" RF00001 > 5S.bac.aln
3839 esl-reformat -r stockholm LSU.Bacteria.aln > 23S.bac.aln
39 esl-afetch $RFAM RF00177 > 16S.bac.aln
40 esl-afetch "$RFAM" RF00177 > 16S.bac.aln
4041
4142 # Arch
4243 echo "Arc"
43 esl-afetch $RFAM RF00001 > 5S.arc.aln
44 esl-afetch $RFAM RF00002 > 5_8S.arc.aln
44 esl-afetch "$RFAM" RF00001 > 5S.arc.aln
45 esl-afetch "$RFAM" RF00002 > 5_8S.arc.aln
4546 esl-reformat -r stockholm LSU.Archaea.aln > 23S.arc.aln
46 esl-afetch $RFAM RF01959 > 16S.arc.aln
47 esl-afetch "$RFAM" RF01959 > 16S.arc.aln
4748
4849 # Euk
4950 echo "Euk"
50 esl-afetch $RFAM RF00001 > 5S.euk.aln
51 esl-afetch $RFAM RF00002 > 5_8S.euk.aln
51 esl-afetch "$RFAM" RF00001 > 5S.euk.aln
52 esl-afetch "$RFAM" RF00002 > 5_8S.euk.aln
5253 esl-reformat -r stockholm LSU.Eukaryota.aln > 28S.euk.aln
53 esl-afetch $RFAM RF01960 > 18S.euk.aln
54 esl-afetch "$RFAM" RF01960 > 18S.euk.aln
5455
5556 # Mito
5657 FILE="12S.mito.aln"
6667
6768
6869 for K in arc bac euk mito ; do
69 for T in 5S 5_8S 16S 23S 28S ; do
70 for T in 5S 5_8S 12S 16S 23S 28S ; do
7071 ID="$T.$K"
7172 if [ -r "$ID.aln" ]; then
7273 echo "*** $ID ***"
8990 for ID in $(cat MODELS) ; do
9091
9192 echo "Extracting: $ID.aln"
92 esl-afetch $RFAM $ID > $ID.aln
93 esl-afetch "$RFAM" $ID > $ID.aln
9394
9495 echo "Building: $ID.hmm"
9596 rm -f $ID.hmm.h?? $ID.hmm