Package Bio :: Package PDB :: Module FragmentMapper'
[hide private]
[frames] | no frames]

Source Code for Module Bio.PDB.FragmentMapper'

  1  # Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Classify protein backbone structure according to Kolodny et al's fragment 
  7  libraries. 
  8   
  9  It can be regarded as a form of objective secondary structure classification. 
 10  Only fragments of length 5 or 7 are supported (ie. there is a 'central' 
 11  residue). 
 12   
 13  Full reference: 
 14   
 15  Kolodny R, Koehl P, Guibas L, Levitt M. 
 16  Small libraries of protein fragments model native protein structures accurately. 
 17  J Mol Biol. 2002 323(2):297-307. 
 18   
 19  The definition files of the fragments can be obtained from: 
 20   
 21  U{http://csb.stanford.edu/~rachel/fragments/} 
 22   
 23  You need these files to use this module. 
 24   
 25  The following example uses the library with 10 fragments of length 5. 
 26  The library files can be found in directory 'fragment_data'. 
 27   
 28      >>> model = structure[0] 
 29      >>> fm = FragmentMapper(model, lsize=10, flength=5, dir="fragment_data") 
 30      >>> fragment = fm[residue] 
 31  """ 
 32   
 33  import numpy 
 34   
 35  from Bio.SVDSuperimposer import SVDSuperimposer 
 36   
 37  from Bio.PDB import Selection 
 38  from Bio.PDB.PDBExceptions import PDBException 
 39  from Bio.PDB.PDBParser import PDBParser 
 40  from Bio.PDB.Polypeptide import PPBuilder 
 41   
 42   
 43  # fragment file (lib_SIZE_z_LENGTH.txt) 
 44  # SIZE=number of fragments 
 45  # LENGTH=length of fragment (4,5,6,7) 
 46  _FRAGMENT_FILE="lib_%s_z_%s.txt" 
 47   
 48   
49 -def _read_fragments(size, length, dir="."):
50 """ 51 Read a fragment spec file (available from 52 U{http://csb.stanford.edu/rachel/fragments/} 53 and return a list of Fragment objects. 54 55 @param size: number of fragments in the library 56 @type size: int 57 58 @param length: length of the fragments 59 @type length: int 60 61 @param dir: directory where the fragment spec files can be found 62 @type dir: string 63 """ 64 filename=(dir+"/"+_FRAGMENT_FILE) % (size, length) 65 fp=open(filename, "r") 66 flist=[] 67 # ID of fragment=rank in spec file 68 fid=0 69 for l in fp.readlines(): 70 # skip comment and blank lines 71 if l[0]=="*" or l[0]=="\n": 72 continue 73 sl=l.split() 74 if sl[1]=="------": 75 # Start of fragment definition 76 f=Fragment(length, fid) 77 flist.append(f) 78 # increase fragment id (rank) 79 fid+=1 80 continue 81 # Add CA coord to Fragment 82 coord=numpy.array(map(float, sl[0:3])) 83 # XXX= dummy residue name 84 f.add_residue("XXX", coord) 85 fp.close() 86 return flist
87 88
89 -class Fragment:
90 """ 91 Represent a polypeptide C-alpha fragment. 92 """
93 - def __init__(self, length, fid):
94 """ 95 @param length: length of the fragment 96 @type length: int 97 98 @param fid: id for the fragment 99 @type fid: int 100 """ 101 # nr of residues in fragment 102 self.length=length 103 # nr of residues added 104 self.counter=0 105 self.resname_list=[] 106 # CA coordinate matrix 107 self.coords_ca=numpy.zeros((length, 3), "d") 108 self.fid=fid
109
110 - def get_resname_list(self):
111 """ 112 @return: the residue names 113 @rtype: [string, string,...] 114 """ 115 return self.resname_list
116
117 - def get_id(self):
118 """ 119 @return: id for the fragment 120 @rtype: int 121 """ 122 return self.fid
123
124 - def get_coords(self):
125 """ 126 @return: the CA coords in the fragment 127 @rtype: Numeric (Nx3) array 128 """ 129 return self.coords_ca
130
131 - def add_residue(self, resname, ca_coord):
132 """ 133 @param resname: residue name (eg. GLY). 134 @type resname: string 135 136 @param ca_coord: the c-alpha coorinates of the residues 137 @type ca_coord: Numeric array with length 3 138 """ 139 if self.counter>=self.length: 140 raise PDBException("Fragment boundary exceeded.") 141 self.resname_list.append(resname) 142 self.coords_ca[self.counter]=ca_coord 143 self.counter=self.counter+1
144
145 - def __len__(self):
146 """ 147 @return: length of fragment 148 @rtype: int 149 """ 150 return self.length
151
152 - def __sub__(self, other):
153 """ 154 Return rmsd between two fragments. 155 156 Example: 157 >>> rmsd=fragment1-fragment2 158 159 @return: rmsd between fragments 160 @rtype: float 161 """ 162 sup=SVDSuperimposer() 163 sup.set(self.coords_ca, other.coords_ca) 164 sup.run() 165 return sup.get_rms()
166
167 - def __repr__(self):
168 """ 169 Returns <Fragment length=L id=ID> where L=length of fragment 170 and ID the identifier (rank in the library). 171 """ 172 return "<Fragment length=%i id=%i>" % (self.length, self.fid)
173 174
175 -def _make_fragment_list(pp, length):
176 """ 177 Dice up a peptide in fragments of length "length". 178 179 @param pp: a list of residues (part of one peptide) 180 @type pp: [L{Residue}, L{Residue}, ...] 181 182 @param length: fragment length 183 @type length: int 184 """ 185 frag_list=[] 186 for i in range(0, len(pp)-length+1): 187 f=Fragment(length, -1) 188 for j in range(0, length): 189 residue=pp[i+j] 190 resname=residue.get_resname() 191 if residue.has_id("CA"): 192 ca=residue["CA"] 193 else: 194 raise PDBException("CHAINBREAK") 195 if ca.is_disordered(): 196 raise PDBException("CHAINBREAK") 197 ca_coord=ca.get_coord() 198 f.add_residue(resname, ca_coord) 199 frag_list.append(f) 200 return frag_list
201 202
203 -def _map_fragment_list(flist, reflist):
204 """ 205 Map all frgaments in flist to the closest 206 (in RMSD) fragment in reflist. 207 208 Returns a list of reflist indices. 209 210 @param flist: list of protein fragments 211 @type flist: [L{Fragment}, L{Fragment}, ...] 212 213 @param reflist: list of reference (ie. library) fragments 214 @type reflist: [L{Fragment}, L{Fragment}, ...] 215 """ 216 mapped=[] 217 for f in flist: 218 rank=[] 219 for i in range(0, len(reflist)): 220 rf=reflist[i] 221 rms=f-rf 222 rank.append((rms, rf)) 223 rank.sort() 224 fragment=rank[0][1] 225 mapped.append(fragment) 226 return mapped
227 228
229 -class FragmentMapper:
230 """ 231 Map polypeptides in a model to lists of representative fragments. 232 """
233 - def __init__(self, model, lsize=20, flength=5, fdir="."):
234 """ 235 @param model: the model that will be mapped 236 @type model: L{Model} 237 238 @param lsize: number of fragments in the library 239 @type lsize: int 240 241 @param flength: length of fragments in the library 242 @type flength: int 243 244 @param fdir: directory where the definition files are 245 found (default=".") 246 @type fdir: string 247 """ 248 if flength==5: 249 self.edge=2 250 elif flength==7: 251 self.edge=3 252 else: 253 raise PDBException("Fragment length should be 5 or 7.") 254 self.flength=flength 255 self.lsize=lsize 256 self.reflist=_read_fragments(lsize, flength, fdir) 257 self.model=model 258 self.fd=self._map(self.model)
259
260 - def _map(self, model):
261 """ 262 @param model: the model that will be mapped 263 @type model: L{Model} 264 """ 265 ppb=PPBuilder() 266 ppl=ppb.build_peptides(model) 267 fd={} 268 for pp in ppl: 269 try: 270 # make fragments 271 flist=_make_fragment_list(pp, self.flength) 272 # classify fragments 273 mflist=_map_fragment_list(flist, self.reflist) 274 for i in range(0, len(pp)): 275 res=pp[i] 276 if i<self.edge: 277 # start residues 278 continue 279 elif i>=(len(pp)-self.edge): 280 # end residues 281 continue 282 else: 283 # fragment 284 index=i-self.edge 285 assert(index>=0) 286 fd[res]=mflist[index] 287 except PDBException, why: 288 if why == 'CHAINBREAK': 289 # Funny polypeptide - skip 290 pass 291 else: 292 raise PDBException(why) 293 return fd
294
295 - def has_key(self, res):
296 """(Obsolete) 297 298 @type res: L{Residue} 299 """ 300 import warnings 301 warnings.warn("has_key is obsolete; use 'res in object' instead", PendingDeprecationWarning) 302 return (res in self)
303
304 - def __contains__(self, res):
305 """True if the given residue is in any of the mapped fragments. 306 307 @type res: L{Residue} 308 """ 309 return (res in self.fd)
310
311 - def __getitem__(self, res):
312 """ 313 @type res: L{Residue} 314 315 @return: fragment classification 316 @rtype: L{Fragment} 317 """ 318 return self.fd[res]
319 320 321 if __name__=="__main__": 322 323 import sys 324 325 p=PDBParser() 326 s=p.get_structure("X", sys.argv[1]) 327 328 m=s[0] 329 fm=FragmentMapper(m, 10, 5, "levitt_data") 330 331 332 for r in Selection.unfold_entities(m, "R"): 333 334 print r, 335 if r in fm: 336 print fm[r] 337 else: 338 print 339