1
2
3
4
5
6 """
7 This module provides code to work with html files from InterPro,
8 and code to access resources at InterPro over the WWW.
9 http://www.ebi.ac.uk/interpro/
10
11
12 Classes:
13 Record Holds interpro sequence data.
14 InterProParser Parses interpro sequence data into a Record object.
15
16 Functions:
17 get_interpro_entry
18
19 """
20
21 import warnings
22 import Bio
23 warnings.warn("Bio.InterPro is deprecated, and will be removed in a future "
24 "release of Biopython. Please get in contact via the mailing "
25 "lists if this is a problem for you.", Bio.BiopythonDeprecationWarning)
26
27 from Bio import File
28 import sgmllib
29 from Bio.SeqFeature import Reference
30
32
34 keys = self.keys()
35 keys.sort()
36 out = ''
37 for key in keys:
38 val = self[ key ]
39 if key == 'References':
40 out = out + '\n%s\n' % key
41 for reference in val:
42 out = out + '%s\n' % str( reference )
43 out = out + '\n'
44 elif key == 'Examples':
45 out = out + '\n%s\n' % key
46 for example in val:
47 out = out + '%s\n' % example
48 elif key == 'Abstract':
49 out = out + '\n%s\n' % key
50 out = out + '%s...\n' % val[ : 80 ]
51 elif type( self[ key ] ) == list:
52 out = out + '\n%s\n' % key
53 for item in val:
54 out = out + '%s\n' % item
55
56 else:
57 out = out + '%s: %s\n' % ( key, self[ key ] )
58 return out
59
61 """Parses InterPro sequence data into a Record object.
62
63 """
65 sgmllib.SGMLParser.reset( self )
66 self.text = ''
67 self.inter_pro_dict = Record()
68 self.inter_pro_dict['Database'] = ''
69 self.inter_pro_dict['Accession'] = ''
70 self.inter_pro_dict['Name'] = ''
71 self.inter_pro_dict['Dates'] = ''
72 self.inter_pro_dict['Type'] = ''
73 self.inter_pro_dict['Parent'] = ''
74 self.inter_pro_dict['Process'] = ''
75 self.inter_pro_dict['Function'] = ''
76 self.inter_pro_dict['Component'] = ''
77 self.inter_pro_dict['Signatures'] = []
78 self.inter_pro_dict['Abstract'] = ''
79 self.inter_pro_dict['Examples'] = []
80 self.inter_pro_dict['References'] = []
81 self.inter_pro_dict['Database links'] = []
82 self._state = 'title'
83 self._reference_state = ''
84 self._key_waiting = ''
85 self._current_reference = ''
86
91
92 - def feed(self, handle):
93 """feed(self, handle )
94
95 Feed in interpro data for scanning. handle is a file-like object
96 containing interpro data. consumer is a Consumer object that will
97 receive events as the ndb data is scanned.
98
99 """
100 if isinstance(handle, File.UndoHandle):
101 uhandle = handle
102 else:
103 uhandle = File.UndoHandle(handle)
104 text = ''
105 while 1:
106 line = uhandle.readline()
107 if not line:
108 break
109 line = line.strip()
110 if line[ -7: ] == '</HTML>':
111 break
112 text = text + ' ' + line
113
114 sgmllib.SGMLParser.feed( self, text )
115
116
118 newtext = newtext.strip()
119 self.text = self.text + newtext
120
122 dictionary = dict( attrs )
123 for key in dictionary:
124 val = dictionary[key]
125
128
130 self._state = 'chugging_along'
131
133 dictionary = dict( attrs )
134 if self._state == 'chugging_along':
135 if 'class' in dictionary:
136 if dictionary['class'] == 'tag':
137 self._state = 'waiting_tag'
138 self._flush_text()
139 elif dictionary['class'] == 'inf':
140 self._state = 'waiting_inf'
141 self._flush_text()
142
144 if self._state == 'waiting_tag':
145 self._key_waiting = self._flush_text()
146 self._state = 'chugging_along'
147 elif self._state == 'waiting_inf':
148 key = self._key_waiting
149 if key in self.inter_pro_dict:
150 val = self._flush_text()
151 if key == 'Signatures':
152 pass
153 elif key == 'Database links':
154 pass
155 else:
156 self.inter_pro_dict[ key ] = val
157 self._key_waiting = ''
158 self._state = 'chugging_along'
159
160
162 if self._key_waiting == 'Examples':
163 self._state = 'examples'
164 self._flush_text()
165
167 self._key_waiting = ''
168 self._state = 'chugging_along'
169
171 if self._key_waiting == 'References':
172 self._state = 'references'
173 self._reference_state = 'pubmed_id'
174 self._flush_text()
175 self._references = []
176
178 if self._state == 'references':
179 self._references.append( self._current_reference )
180 self.inter_pro_dict['References'] = self._references
181 self._state = 'chugging_along'
182
184 if self._state == 'references':
185 self._reference_state = 'pubmed_id'
186 self._flush_text()
187 if( self._current_reference != '' ):
188 self._references.append( self._current_reference )
189 self._current_reference = Reference()
190
195
197 dictionary = dict( attrs )
198 if self._state == 'references':
199 if self._reference_state == 'pubmed_id':
200 if 'name' in dictionary:
201 self._current_reference.pubmed_id = dictionary['name']
202 self._reference_state = 'authors'
203 elif self._reference_state == 'journal':
204 self._current_reference.journal = self._flush_text()
205 self._reference_state = 'medline_id'
206
219
220 - def do_br( self, attrs ):
221 if self._state == 'references':
222 if self._reference_state == 'authors':
223 self._current_reference.authors = self._flush_text()
224 self._reference_state = 'title'
225 elif self._key_waiting == 'Signatures':
226 self.inter_pro_dict['Signatures'].append( self._flush_text() )
227 elif self._key_waiting == 'Database links':
228 self.inter_pro_dict['Database links'].append( self._flush_text() )
229
232
234 if self._state == 'references':
235 if self._reference_state == 'title':
236 text = self._flush_text()
237 self._current_reference.title = text
238 self._reference_state = 'journal'
239
240
242 if self._state == 'references':
243 if tag == 'li':
244 self.stack.pop()
245 elif tag == 'a':
246 if self._reference_state == 'pubmed_id':
247 self.stack.pop()
248 method(attrs)
249
250
251 - def _flush_text( self ):
252 text = self.text.strip()
253 self.text = ''
254 return text[:]
255
257 """get specified interpro entry"""
258 import urllib
259 handle = urllib.urlopen("http://www.ebi.ac.uk/interpro/IEntry?ac=" + id )
260
261
262 return handle
263
264 if __name__ == '__main__':
265 import Bio.File
266 handle = get_interpro_entry('IPR001064')
267 undo_handle = Bio.File.UndoHandle( handle )
268 interpro_parser = InterProParser()
269 record = interpro_parser.parse( handle )
270 print str( record )
271