Package Bio :: Package PDB :: Module PDBParser'
[hide private]
[frames] | no frames]

Source Code for Module Bio.PDB.PDBParser'

  1  # Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package.   
  5   
  6  """Parser for PDB files.""" 
  7   
  8  import warnings 
  9   
 10  import numpy 
 11   
 12  from Bio.PDB.PDBExceptions import \ 
 13          PDBConstructionException, PDBConstructionWarning 
 14  from Bio.PDB.StructureBuilder import StructureBuilder 
 15  from Bio.PDB.parse_pdb_header import _parse_pdb_header_list 
 16   
 17   
 18  # If PDB spec says "COLUMNS 18-20" this means line[17:20] 
 19   
 20   
21 -class PDBParser:
22 """ 23 Parse a PDB file and return a Structure object. 24 """ 25
26 - def __init__(self, PERMISSIVE=1, get_header=0, structure_builder=None):
27 """ 28 The PDB parser call a number of standard methods in an aggregated 29 StructureBuilder object. Normally this object is instanciated by the 30 PDBParser object itself, but if the user provides his own StructureBuilder 31 object, the latter is used instead. 32 33 Arguments: 34 o PERMISSIVE - int, if this is 0 exceptions in constructing the 35 SMCRA data structure are fatal. If 1 (DEFAULT), the exceptions are 36 caught, but some residues or atoms will be missing. THESE EXCEPTIONS 37 ARE DUE TO PROBLEMS IN THE PDB FILE!. 38 o structure_builder - an optional user implemented StructureBuilder class. 39 """ 40 if structure_builder!=None: 41 self.structure_builder=structure_builder 42 else: 43 self.structure_builder=StructureBuilder() 44 self.header=None 45 self.trailer=None 46 self.line_counter=0 47 self.PERMISSIVE=PERMISSIVE
48 49 # Public methods 50
51 - def get_structure(self, id, file):
52 """Return the structure. 53 54 Arguments: 55 o id - string, the id that will be used for the structure 56 o file - name of the PDB file OR an open filehandle 57 """ 58 self.header=None 59 self.trailer=None 60 # Make a StructureBuilder instance (pass id of structure as parameter) 61 self.structure_builder.init_structure(id) 62 if isinstance(file, basestring): 63 file=open(file) 64 self._parse(file.readlines()) 65 self.structure_builder.set_header(self.header) 66 # Return the Structure instance 67 return self.structure_builder.get_structure()
68
69 - def get_header(self):
70 "Return the header." 71 return self.header
72
73 - def get_trailer(self):
74 "Return the trailer." 75 return self.trailer
76 77 # Private methods 78
79 - def _parse(self, header_coords_trailer):
80 "Parse the PDB file." 81 # Extract the header; return the rest of the file 82 self.header, coords_trailer=self._get_header(header_coords_trailer) 83 # Parse the atomic data; return the PDB file trailer 84 self.trailer=self._parse_coordinates(coords_trailer)
85
86 - def _get_header(self, header_coords_trailer):
87 "Get the header of the PDB file, return the rest." 88 structure_builder=self.structure_builder 89 for i in range(0, len(header_coords_trailer)): 90 structure_builder.set_line_counter(i+1) 91 line=header_coords_trailer[i] 92 record_type=line[0:6] 93 if(record_type=='ATOM ' or record_type=='HETATM' or record_type=='MODEL '): 94 break 95 header=header_coords_trailer[0:i] 96 # Return the rest of the coords+trailer for further processing 97 self.line_counter=i 98 coords_trailer=header_coords_trailer[i:] 99 header_dict=_parse_pdb_header_list(header) 100 return header_dict, coords_trailer
101
102 - def _parse_coordinates(self, coords_trailer):
103 "Parse the atomic data in the PDB file." 104 local_line_counter=0 105 structure_builder=self.structure_builder 106 current_model_id=0 107 # Flag we have an open model 108 model_open=0 109 current_chain_id=None 110 current_segid=None 111 current_residue_id=None 112 current_resname=None 113 for i in range(0, len(coords_trailer)): 114 line=coords_trailer[i] 115 record_type=line[0:6] 116 global_line_counter=self.line_counter+local_line_counter+1 117 structure_builder.set_line_counter(global_line_counter) 118 if(record_type=='ATOM ' or record_type=='HETATM'): 119 # Initialize the Model - there was no explicit MODEL record 120 if not model_open: 121 structure_builder.init_model(current_model_id) 122 current_model_id+=1 123 model_open=1 124 fullname=line[12:16] 125 # get rid of whitespace in atom names 126 split_list=fullname.split() 127 if len(split_list)!=1: 128 # atom name has internal spaces, e.g. " N B ", so 129 # we do not strip spaces 130 name=fullname 131 else: 132 # atom name is like " CA ", so we can strip spaces 133 name=split_list[0] 134 altloc=line[16:17] 135 resname=line[17:20] 136 chainid=line[21:22] 137 try: 138 serial_number=int(line[6:11]) 139 except: 140 serial_number=0 141 resseq=int(line[22:26].split()[0]) # sequence identifier 142 icode=line[26:27] # insertion code 143 if record_type=='HETATM': # hetero atom flag 144 if resname=="HOH" or resname=="WAT": 145 hetero_flag="W" 146 else: 147 hetero_flag="H" 148 else: 149 hetero_flag=" " 150 residue_id=(hetero_flag, resseq, icode) 151 # atomic coordinates 152 try: 153 x=float(line[30:38]) 154 y=float(line[38:46]) 155 z=float(line[46:54]) 156 except: 157 #Should we allow parsing to continue in permissive mode? 158 #If so what coordindates should we default to? Easier to abort! 159 raise PDBConstructionException(\ 160 "Invalid or missing coordinate(s) at line %i." \ 161 % global_line_counter) 162 coord=numpy.array((x, y, z), 'f') 163 # occupancy & B factor 164 try: 165 occupancy=float(line[54:60]) 166 except: 167 self._handle_PDB_exception("Invalid or missing occupancy", 168 global_line_counter) 169 occupancy = 0.0 #Is one or zero a good default? 170 try: 171 bfactor=float(line[60:66]) 172 except: 173 self._handle_PDB_exception("Invalid or missing B factor", 174 global_line_counter) 175 bfactor = 0.0 #The PDB use a default of zero if the data is missing 176 segid=line[72:76] 177 element=line[76:78].strip() 178 if current_segid!=segid: 179 current_segid=segid 180 structure_builder.init_seg(current_segid) 181 if current_chain_id!=chainid: 182 current_chain_id=chainid 183 structure_builder.init_chain(current_chain_id) 184 current_residue_id=residue_id 185 current_resname=resname 186 try: 187 structure_builder.init_residue(resname, hetero_flag, resseq, icode) 188 except PDBConstructionException, message: 189 self._handle_PDB_exception(message, global_line_counter) 190 elif current_residue_id!=residue_id or current_resname!=resname: 191 current_residue_id=residue_id 192 current_resname=resname 193 try: 194 structure_builder.init_residue(resname, hetero_flag, resseq, icode) 195 except PDBConstructionException, message: 196 self._handle_PDB_exception(message, global_line_counter) 197 # init atom 198 try: 199 structure_builder.init_atom(name, coord, bfactor, occupancy, altloc, 200 fullname, serial_number, element) 201 except PDBConstructionException, message: 202 self._handle_PDB_exception(message, global_line_counter) 203 elif(record_type=='ANISOU'): 204 anisou=map(float, (line[28:35], line[35:42], line[43:49], line[49:56], line[56:63], line[63:70])) 205 # U's are scaled by 10^4 206 anisou_array=(numpy.array(anisou, 'f')/10000.0).astype('f') 207 structure_builder.set_anisou(anisou_array) 208 elif(record_type=='MODEL '): 209 try: 210 serial_num=int(line[10:14]) 211 except: 212 self._handle_PDB_exception("Invalid or missing model serial number", 213 global_line_counter) 214 serial_num=0 215 structure_builder.init_model(current_model_id,serial_num) 216 current_model_id+=1 217 model_open=1 218 current_chain_id=None 219 current_residue_id=None 220 elif(record_type=='END ' or record_type=='CONECT'): 221 # End of atomic data, return the trailer 222 self.line_counter=self.line_counter+local_line_counter 223 return coords_trailer[local_line_counter:] 224 elif(record_type=='ENDMDL'): 225 model_open=0 226 current_chain_id=None 227 current_residue_id=None 228 elif(record_type=='SIGUIJ'): 229 # standard deviation of anisotropic B factor 230 siguij=map(float, (line[28:35], line[35:42], line[42:49], line[49:56], line[56:63], line[63:70])) 231 # U sigma's are scaled by 10^4 232 siguij_array=(numpy.array(siguij, 'f')/10000.0).astype('f') 233 structure_builder.set_siguij(siguij_array) 234 elif(record_type=='SIGATM'): 235 # standard deviation of atomic positions 236 sigatm=map(float, (line[30:38], line[38:45], line[46:54], line[54:60], line[60:66])) 237 sigatm_array=numpy.array(sigatm, 'f') 238 structure_builder.set_sigatm(sigatm_array) 239 local_line_counter=local_line_counter+1 240 # EOF (does not end in END or CONECT) 241 self.line_counter=self.line_counter+local_line_counter 242 return []
243
244 - def _handle_PDB_exception(self, message, line_counter):
245 """ 246 This method catches an exception that occurs in the StructureBuilder 247 object (if PERMISSIVE==1), or raises it again, this time adding the 248 PDB line number to the error message. 249 """ 250 message="%s at line %i." % (message, line_counter) 251 if self.PERMISSIVE: 252 # just print a warning - some residues/atoms may be missing 253 warnings.warn("PDBConstructionException: %s\n" 254 "Exception ignored.\n" 255 "Some atoms or residues may be missing in the data structure." 256 % message, PDBConstructionWarning) 257 else: 258 # exceptions are fatal - raise again with new message (including line nr) 259 raise PDBConstructionException(message)
260 261 262 if __name__=="__main__": 263 264 import sys 265 266 p=PDBParser(PERMISSIVE=1) 267 268 filename = sys.argv[1] 269 s=p.get_structure("scr", filename) 270 271 for m in s: 272 p=m.get_parent() 273 assert(p is s) 274 for c in m: 275 p=c.get_parent() 276 assert(p is m) 277 for r in c: 278 print r 279 p=r.get_parent() 280 assert(p is c) 281 for a in r: 282 p=a.get_parent() 283 if not p is r: 284 print p, r 285