31 | 31 |
#
|
32 | 32 |
# currently, it does not provide clan information, because the
|
33 | 33 |
# EBI/Interpro/Pfam API does not provide clan information
|
|
34 |
#
|
|
35 |
# to get clan (set) information:
|
|
36 |
# https://www.ebi.ac.uk/interpro/api/set/pfam/entry/pfam/PF02798
|
|
37 |
#
|
|
38 |
# which provides (among other things):
|
|
39 |
# "results": {"metadata": {
|
|
40 |
# "accession": "CL0172",
|
|
41 |
# "name": "Thioredoxin",
|
|
42 |
# "source_database": "pfam"}, ... }
|
34 | 43 |
|
35 | 44 |
import fileinput
|
36 | 45 |
import sys
|
|
42 | 51 |
|
43 | 52 |
interpro_prot_url = "https://www.ebi.ac.uk/interpro/api/entry/pfam/protein/uniprot/"
|
44 | 53 |
interpro_domain_url = "https://www.ebi.ac.uk/interpro/api/entry/pfam/"
|
|
54 |
interpro_clan_url = "https://www.ebi.ac.uk/interpro/api/set/pfam/entry/pfam/"
|
45 | 55 |
|
46 | 56 |
def get_pfam_id_www( acc):
|
47 | 57 |
|
|
80 | 90 |
|
81 | 91 |
return(pf_dom_id)
|
82 | 92 |
|
|
93 |
def get_clan_info_www(pf_acc):
|
|
94 |
|
|
95 |
if (pf_acc=='NODOM'):
|
|
96 |
return {'name':'NODOM','accession':'NODOM'}
|
|
97 |
|
|
98 |
try:
|
|
99 |
req = urllib.request.urlopen(interpro_clan_url + pf_acc)
|
|
100 |
|
|
101 |
except urllib.error.URLError as e:
|
|
102 |
prot_info = ''
|
|
103 |
sys.stderr.write(e.read().decode('utf-8')+'\n')
|
|
104 |
|
|
105 |
else:
|
|
106 |
clan_info = req.read().decode('utf-8')
|
|
107 |
|
|
108 |
|
|
109 |
json_info= json.loads(clan_info)
|
|
110 |
|
|
111 |
## results look like:
|
|
112 |
## "results": [
|
|
113 |
## {
|
|
114 |
## "metadata": {
|
|
115 |
## "accession": "CL0172",
|
|
116 |
## "name": "Thioredoxin",
|
|
117 |
## "source_database": "pfam"
|
|
118 |
## },
|
|
119 |
## ...
|
|
120 |
## }
|
|
121 |
## ]
|
|
122 |
##
|
|
123 |
|
|
124 |
if ('results') in json_info:
|
|
125 |
return(json_info['results'][0]['metadata'])
|
|
126 |
else:
|
|
127 |
return None
|
|
128 |
|
83 | 129 |
def get_seq_acc(seq_id):
|
84 | 130 |
|
85 | 131 |
if (re.search(r'^gi\|',seq_id)):
|
|
148 | 194 |
|
149 | 195 |
return npf_domains
|
150 | 196 |
|
151 | |
def print_doms(seq_id, color_ix, args, dom_colors, dom_names):
|
|
197 |
def print_doms(seq_id, color_ix, args, dom_colors, dom_names, clan_info):
|
152 | 198 |
|
153 | 199 |
this_acc = get_seq_acc(seq_id)
|
154 | 200 |
|
|
177 | 223 |
## check if domain_acc has short name
|
178 | 224 |
if (pf_acc not in dom_names):
|
179 | 225 |
pf_id = dom_names[pf_acc] = get_pfam_id_www(pf_acc)
|
|
226 |
pf_clan_info = clan_info[pf_acc] = get_clan_info_www(pf_acc)
|
180 | 227 |
else:
|
181 | 228 |
pf_id = dom_names[pf_acc]
|
|
229 |
pf_clan_info = clan_info[pf_acc]
|
|
230 |
|
|
231 |
|
|
232 |
this_clan_info = clan_info[pf_acc]
|
|
233 |
|
182 | 234 |
|
183 | 235 |
## display id or acc?
|
184 | |
pf_info = pf_id
|
|
236 |
if (this_clan_info):
|
|
237 |
pf_info = 'C.'+ this_clan_info['name']
|
|
238 |
else:
|
|
239 |
pf_info = pf_id
|
|
240 |
|
185 | 241 |
if (args.pfam_acc):
|
186 | |
pf_info = pf_acc
|
|
242 |
if (pf_info and not args.no_clans):
|
|
243 |
pf_info = this_clan_info['accession']
|
|
244 |
else:
|
|
245 |
pf_info = pf_acc
|
187 | 246 |
|
188 | 247 |
if (args.acc_comment):
|
189 | 248 |
pf_info = "%s{%s}"%(pf_info,pf_acc)
|
|
199 | 258 |
|
200 | 259 |
dom_colors = {'NODOM':'0'}
|
201 | 260 |
dom_names = {}
|
|
261 |
clan_info = {'NODOM':{'name':'NODOM','accession':'NODOM'}}
|
202 | 262 |
|
203 | 263 |
color_ix = 1
|
204 | 264 |
|
|
207 | 267 |
seq_id = line.split('\t')[0]
|
208 | 268 |
|
209 | 269 |
print(">%s"%(seq_id))
|
210 | |
color_ix = print_doms(seq_id, color_ix, args, dom_colors, dom_names)
|
|
270 |
color_ix = print_doms(seq_id, color_ix, args, dom_colors, dom_names, clan_info)
|
211 | 271 |
|
212 | 272 |
def main() :
|
213 | 273 |
|
|
217 | 277 |
parser.add_argument('--bound_comment',dest='bound_comment',action='store_true',default=False)
|
218 | 278 |
parser.add_argument('--neg_doms',dest='neg_doms',action='store_true',default=False)
|
219 | 279 |
parser.add_argument('--neg-doms',dest='neg_doms',action='store_true',default=False)
|
|
280 |
parser.add_argument('--no-clans',dest='no_clans',action='store_true',default=False)
|
|
281 |
parser.add_argument('--no_clans',dest='no_clans',action='store_true',default=False)
|
220 | 282 |
parser.add_argument('--min_nodom',dest='min_nodom',action='store',default=10)
|
221 | 283 |
parser.add_argument('--no_over',dest='no_over',action='store_true',default=False)
|
222 | 284 |
parser.add_argument('--no-over',dest='no_over',action='store_true',default=False)
|
|
240 | 302 |
color_ix = 1
|
241 | 303 |
dom_colors = {'NODOM':'0'}
|
242 | 304 |
dom_names = {}
|
|
305 |
clan_info = {'NODOM':{'name':'NODOM','accession':'NODOM'}}
|
243 | 306 |
|
244 | 307 |
for seq_id in args.files:
|
245 | 308 |
print(">%s"%(seq_id))
|
246 | |
print_doms(seq_id, color_ix, args, dom_colors, dom_names)
|
|
309 |
print_doms(seq_id, color_ix, args, dom_colors, dom_names, clan_info)
|
247 | 310 |
|
248 | 311 |
if __name__ == '__main__':
|
249 | 312 |
main()
|