Codebase list fasta3 / fresh-snapshots/upstream
Import upstream version 36.3.8i.14-Nov-2020+git20221202.1.162df08 Debian Janitor 1 year, 4 months ago
5 changed file(s) with 86 addition(s) and 16 deletion(s). Raw diff Collapse all Expand all
+0
-5
.gitignore less more
0 bin/
1 results/
2 *.o
3 *~
4 *.annot
44 multiple high-scoring alignments to be shown, rather than just one.
55 This is the main functional difference between FASTA and BLAST -
66 BLAST could show multiple HSPs, FASTA did not.
7
8 >>Nov 21, 2022
9 [doinit.c]
10 set m_msg->quiet=3 when expansion script used to suppress warning messages.
11 [scripts/ann_pfam_www.py]
12 look up clan information
13
14 >>Nov 14, 2022
15 [defs.h, initfa.c, mshowbest.c]
16
17 Add option 'r' to -m8CB to display raw optimal score in BLAST tabular
18 output, e.g "-m8Cr" or "-m8CBlr".
719
820 >>Nov 12, 2022 [released as v36.3.8i Nov, 2022]
921 [scripts/ann_pfam*]
1426 because clan information is not yet available (or I do not know how to
1527 get it). In addition, ann_pfam_sql.py is available, which largely
1628 replaces ann_pfam_sql.pl.
17
18 >>Nov 14, 2022
19 [defs.h, initfa.c, mshowbest.c]
20
21 Add option 'r' to -m8CB to display raw optimal score in BLAST tabular
22 output, e.g "-m8Cr" or "-m8CBlr".
2329
2430 >>Nov 7, 2022
2531 [doinit.c/initenv()]
3131 #
3232 # currently, it does not provide clan information, because the
3333 # EBI/Interpro/Pfam API does not provide clan information
34 #
35 # to get clan (set) information:
36 # https://www.ebi.ac.uk/interpro/api/set/pfam/entry/pfam/PF02798
37 #
38 # which provides (among other things):
39 # "results": {"metadata": {
40 # "accession": "CL0172",
41 # "name": "Thioredoxin",
42 # "source_database": "pfam"}, ... }
3443
3544 import fileinput
3645 import sys
4251
4352 interpro_prot_url = "https://www.ebi.ac.uk/interpro/api/entry/pfam/protein/uniprot/"
4453 interpro_domain_url = "https://www.ebi.ac.uk/interpro/api/entry/pfam/"
54 interpro_clan_url = "https://www.ebi.ac.uk/interpro/api/set/pfam/entry/pfam/"
4555
4656 def get_pfam_id_www( acc):
4757
8090
8191 return(pf_dom_id)
8292
93 def get_clan_info_www(pf_acc):
94
95 if (pf_acc=='NODOM'):
96 return {'name':'NODOM','accession':'NODOM'}
97
98 try:
99 req = urllib.request.urlopen(interpro_clan_url + pf_acc)
100
101 except urllib.error.URLError as e:
102 prot_info = ''
103 sys.stderr.write(e.read().decode('utf-8')+'\n')
104
105 else:
106 clan_info = req.read().decode('utf-8')
107
108
109 json_info= json.loads(clan_info)
110
111 ## results look like:
112 ## "results": [
113 ## {
114 ## "metadata": {
115 ## "accession": "CL0172",
116 ## "name": "Thioredoxin",
117 ## "source_database": "pfam"
118 ## },
119 ## ...
120 ## }
121 ## ]
122 ##
123
124 if ('results') in json_info:
125 return(json_info['results'][0]['metadata'])
126 else:
127 return None
128
83129 def get_seq_acc(seq_id):
84130
85131 if (re.search(r'^gi\|',seq_id)):
148194
149195 return npf_domains
150196
151 def print_doms(seq_id, color_ix, args, dom_colors, dom_names):
197 def print_doms(seq_id, color_ix, args, dom_colors, dom_names, clan_info):
152198
153199 this_acc = get_seq_acc(seq_id)
154200
177223 ## check if domain_acc has short name
178224 if (pf_acc not in dom_names):
179225 pf_id = dom_names[pf_acc] = get_pfam_id_www(pf_acc)
226 pf_clan_info = clan_info[pf_acc] = get_clan_info_www(pf_acc)
180227 else:
181228 pf_id = dom_names[pf_acc]
229 pf_clan_info = clan_info[pf_acc]
230
231
232 this_clan_info = clan_info[pf_acc]
233
182234
183235 ## display id or acc?
184 pf_info = pf_id
236 if (this_clan_info):
237 pf_info = 'C.'+ this_clan_info['name']
238 else:
239 pf_info = pf_id
240
185241 if (args.pfam_acc):
186 pf_info = pf_acc
242 if (pf_info and not args.no_clans):
243 pf_info = this_clan_info['accession']
244 else:
245 pf_info = pf_acc
187246
188247 if (args.acc_comment):
189248 pf_info = "%s{%s}"%(pf_info,pf_acc)
199258
200259 dom_colors = {'NODOM':'0'}
201260 dom_names = {}
261 clan_info = {'NODOM':{'name':'NODOM','accession':'NODOM'}}
202262
203263 color_ix = 1
204264
207267 seq_id = line.split('\t')[0]
208268
209269 print(">%s"%(seq_id))
210 color_ix = print_doms(seq_id, color_ix, args, dom_colors, dom_names)
270 color_ix = print_doms(seq_id, color_ix, args, dom_colors, dom_names, clan_info)
211271
212272 def main() :
213273
217277 parser.add_argument('--bound_comment',dest='bound_comment',action='store_true',default=False)
218278 parser.add_argument('--neg_doms',dest='neg_doms',action='store_true',default=False)
219279 parser.add_argument('--neg-doms',dest='neg_doms',action='store_true',default=False)
280 parser.add_argument('--no-clans',dest='no_clans',action='store_true',default=False)
281 parser.add_argument('--no_clans',dest='no_clans',action='store_true',default=False)
220282 parser.add_argument('--min_nodom',dest='min_nodom',action='store',default=10)
221283 parser.add_argument('--no_over',dest='no_over',action='store_true',default=False)
222284 parser.add_argument('--no-over',dest='no_over',action='store_true',default=False)
240302 color_ix = 1
241303 dom_colors = {'NODOM':'0'}
242304 dom_names = {}
305 clan_info = {'NODOM':{'name':'NODOM','accession':'NODOM'}}
243306
244307 for seq_id in args.files:
245308 print(">%s"%(seq_id))
246 print_doms(seq_id, color_ix, args, dom_colors, dom_names)
309 print_doms(seq_id, color_ix, args, dom_colors, dom_names, clan_info)
247310
248311 if __name__ == '__main__':
249312 main()
15761576 int lib_db_str_len;
15771577 char *bp, *lib_bp;
15781578
1579 #ifndef UNIX
1580 return 0;
1581 #else
1582
15791583 if ((lib_db_file=(char *)calloc(MAX_STR,sizeof(char)))==NULL) {
15801584 fprintf(stderr,"*** ERROR [%s:%d] - [build_lib_db] Cannot allocate lib_db_file",
15811585 __FILE__, __LINE__);
16611665
16621666 no_lib:
16631667 return NULL;
1668 #endif
16641669 }
16651670
16661671 /* used to temporarily allocate annotation array in next_annot_entry()*/
379379 break;
380380 case 'e':
381381 strncpy(m_msp->link_lname, optarg, MAX_LSTR);
382 m_msp->quiet = 3; /* no warning when expansion file missing */
382383 break;
383384 case 'F':
384385 sscanf(optarg,"%lg",&m_msp->e_low);