Package Bio :: Package InterPro
[hide private]
[frames] | no frames]

Source Code for Package Bio.InterPro

  1  # Copyright 2001 by Katharine Lindner.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """ 
  7  This module provides code to work with html files from InterPro, 
  8  and code to access resources at InterPro over the WWW. 
  9  http://www.ebi.ac.uk/interpro/ 
 10   
 11   
 12  Classes: 
 13  Record             Holds interpro sequence data. 
 14  InterProParser     Parses interpro sequence data into a Record object. 
 15   
 16  Functions: 
 17  get_interpro_entry 
 18   
 19  """ 
 20   
 21  import warnings 
 22  import Bio 
 23  warnings.warn("Bio.InterPro is deprecated, and will be removed in a future " 
 24                "release of Biopython. Please get in contact via the mailing " 
 25                "lists if this is a problem for you.", Bio.BiopythonDeprecationWarning) 
 26   
 27  from Bio import File 
 28  import sgmllib 
 29  from Bio.SeqFeature import Reference 
 30   
31 -class Record( dict ):
32
33 - def __str__( self ):
34 keys = self.keys() 35 keys.sort() 36 out = '' 37 for key in keys: 38 val = self[ key ] 39 if key == 'References': 40 out = out + '\n%s\n' % key 41 for reference in val: 42 out = out + '%s\n' % str( reference ) 43 out = out + '\n' 44 elif key == 'Examples': 45 out = out + '\n%s\n' % key 46 for example in val: 47 out = out + '%s\n' % example 48 elif key == 'Abstract': 49 out = out + '\n%s\n' % key 50 out = out + '%s...\n' % val[ : 80 ] 51 elif type( self[ key ] ) == list: 52 out = out + '\n%s\n' % key 53 for item in val: 54 out = out + '%s\n' % item 55 56 else: 57 out = out + '%s: %s\n' % ( key, self[ key ] ) 58 return out
59
60 -class InterProParser( sgmllib.SGMLParser ):
61 """Parses InterPro sequence data into a Record object. 62 63 """
64 - def reset(self):
65 sgmllib.SGMLParser.reset( self ) 66 self.text = '' 67 self.inter_pro_dict = Record() 68 self.inter_pro_dict['Database'] = '' 69 self.inter_pro_dict['Accession'] = '' 70 self.inter_pro_dict['Name'] = '' 71 self.inter_pro_dict['Dates'] = '' 72 self.inter_pro_dict['Type'] = '' 73 self.inter_pro_dict['Parent'] = '' 74 self.inter_pro_dict['Process'] = '' 75 self.inter_pro_dict['Function'] = '' 76 self.inter_pro_dict['Component'] = '' 77 self.inter_pro_dict['Signatures'] = [] 78 self.inter_pro_dict['Abstract'] = '' 79 self.inter_pro_dict['Examples'] = [] 80 self.inter_pro_dict['References'] = [] 81 self.inter_pro_dict['Database links'] = [] 82 self._state = 'title' 83 self._reference_state = '' 84 self._key_waiting = '' 85 self._current_reference = ''
86
87 - def parse(self, handle):
88 self.reset() 89 self.feed(handle) 90 return self.inter_pro_dict
91
92 - def feed(self, handle):
93 """feed(self, handle ) 94 95 Feed in interpro data for scanning. handle is a file-like object 96 containing interpro data. consumer is a Consumer object that will 97 receive events as the ndb data is scanned. 98 99 """ 100 if isinstance(handle, File.UndoHandle): 101 uhandle = handle 102 else: 103 uhandle = File.UndoHandle(handle) 104 text = '' 105 while 1: 106 line = uhandle.readline() 107 if not line: 108 break 109 line = line.strip() 110 if line[ -7: ] == '</HTML>': 111 break 112 text = text + ' ' + line 113 114 sgmllib.SGMLParser.feed( self, text )
115 116
117 - def handle_data(self, newtext ):
118 newtext = newtext.strip() 119 self.text = self.text + newtext
120
121 - def start_table( self, attrs ):
122 dictionary = dict( attrs ) 123 for key in dictionary: 124 val = dictionary[key]
125
126 - def start_h2( self, attrs ):
127 pass
128
129 - def end_h2( self ):
130 self._state = 'chugging_along'
131
132 - def start_td( self, attrs ):
133 dictionary = dict( attrs ) 134 if self._state == 'chugging_along': 135 if 'class' in dictionary: 136 if dictionary['class'] == 'tag': 137 self._state = 'waiting_tag' 138 self._flush_text() 139 elif dictionary['class'] == 'inf': 140 self._state = 'waiting_inf' 141 self._flush_text()
142
143 - def end_td( self ):
144 if self._state == 'waiting_tag': 145 self._key_waiting = self._flush_text() 146 self._state = 'chugging_along' 147 elif self._state == 'waiting_inf': 148 key = self._key_waiting 149 if key in self.inter_pro_dict: 150 val = self._flush_text() 151 if key == 'Signatures': 152 pass 153 elif key == 'Database links': 154 pass 155 else: 156 self.inter_pro_dict[ key ] = val 157 self._key_waiting = '' 158 self._state = 'chugging_along'
159 160
161 - def start_ul( self, attrs ):
162 if self._key_waiting == 'Examples': 163 self._state = 'examples' 164 self._flush_text()
165
166 - def end_ul( self ):
167 self._key_waiting = '' 168 self._state = 'chugging_along'
169
170 - def start_ol( self, attrs ):
171 if self._key_waiting == 'References': 172 self._state = 'references' 173 self._reference_state = 'pubmed_id' 174 self._flush_text() 175 self._references = []
176
177 - def end_ol( self ):
178 if self._state == 'references': 179 self._references.append( self._current_reference ) 180 self.inter_pro_dict['References'] = self._references 181 self._state = 'chugging_along'
182
183 - def start_li( self, attrs ):
184 if self._state == 'references': 185 self._reference_state = 'pubmed_id' 186 self._flush_text() 187 if( self._current_reference != '' ): 188 self._references.append( self._current_reference ) 189 self._current_reference = Reference()
190
191 - def end_li( self ):
192 if self._state == 'examples': 193 text = self._flush_text() 194 self.inter_pro_dict['Examples'].append( text )
195
196 - def start_a( self, attrs ):
197 dictionary = dict( attrs ) 198 if self._state == 'references': 199 if self._reference_state == 'pubmed_id': 200 if 'name' in dictionary: 201 self._current_reference.pubmed_id = dictionary['name'] 202 self._reference_state = 'authors' 203 elif self._reference_state == 'journal': 204 self._current_reference.journal = self._flush_text() 205 self._reference_state = 'medline_id'
206
207 - def end_a( self ):
208 if self._state == 'references': 209 if self._reference_state == 'medline_id': 210 text = self._flush_text() 211 cols = text.split( ':' ) 212 try: 213 medline_id = cols[ 1 ] 214 except IndexError: 215 medline_id = None 216 else: 217 medline_id = medline_id[ : -1 ] 218 self._current_reference.medline_id = medline_id
219
220 - def do_br( self, attrs ):
221 if self._state == 'references': 222 if self._reference_state == 'authors': 223 self._current_reference.authors = self._flush_text() 224 self._reference_state = 'title' 225 elif self._key_waiting == 'Signatures': 226 self.inter_pro_dict['Signatures'].append( self._flush_text() ) 227 elif self._key_waiting == 'Database links': 228 self.inter_pro_dict['Database links'].append( self._flush_text() )
229
230 - def start_i( self, attrs ):
231 pass
232
233 - def end_i( self ):
234 if self._state == 'references': 235 if self._reference_state == 'title': 236 text = self._flush_text() 237 self._current_reference.title = text 238 self._reference_state = 'journal'
239 240
241 - def handle_starttag(self, tag, method, attrs):
242 if self._state == 'references': 243 if tag == 'li': 244 self.stack.pop() 245 elif tag == 'a': 246 if self._reference_state == 'pubmed_id': 247 self.stack.pop() 248 method(attrs)
249 250
251 - def _flush_text( self ):
252 text = self.text.strip() 253 self.text = '' 254 return text[:]
255
256 -def get_interpro_entry( id ):
257 """get specified interpro entry""" 258 import urllib 259 handle = urllib.urlopen("http://www.ebi.ac.uk/interpro/IEntry?ac=" + id ) 260 261 # XXX need to check to see if the entry exists! 262 return handle
263 264 if __name__ == '__main__': 265 import Bio.File 266 handle = get_interpro_entry('IPR001064') 267 undo_handle = Bio.File.UndoHandle( handle ) 268 interpro_parser = InterProParser() 269 record = interpro_parser.parse( handle ) 270 print str( record ) 271