Package Bio :: Package FSSP
[hide private]
[frames] | no frames]

Source Code for Package Bio.FSSP

  1  """Parser for FSSP files, used in a database of protein fold classifications. 
  2   
  3  This is a module to handle FSSP files. For now it parses only the header, 
  4  summary and alignment sections. 
  5   
  6  See: Holm and Sander (1996) The FSSP database: fold classification based on 
  7  structure-structure alignment of proteins. 
  8   
  9  functions: read_fssp(file_handle): reads an fssp file into the records. Returns a 
 10  tuple of two instances. 
 11  mult_align: returns a Biopython alignment object 
 12  """ 
 13  import re 
 14  import fssp_rec 
 15  from Bio.Align import Generic 
 16  from Bio import Alphabet 
 17  fff_rec = fssp_rec.fff_rec 
 18  header_records = { 
 19     'database' : re.compile('^DATABASE'), 
 20     'pdbid': re.compile('^PDBID'), 
 21     'header': re.compile('^HEADER'), 
 22     'compnd': re.compile('^COMPND'), 
 23     'author': re.compile('^AUTHOR'), 
 24     'source': re.compile('^SOURCE'), 
 25     'seqlength': re.compile('^SEQLENGTH'), 
 26     'nalign': re.compile('^NALIGN') 
 27  } 
 28   
 29  summary_title = re.compile('## +SUMMARY') 
 30  summary_rec = re.compile(' *[0-9]+: +[1-9][0-9a-z]{3,3}') 
 31  alignments_title= re.compile('## +ALIGNMENTS') 
 32  alignments_rec = re.compile(' *[0-9]+ +-{0,1}[0-9]+') 
 33  equiv_title = re.compile('## +EQUIVALENCES') 
 34   
35 -class FSSPHeader:
36 - def __init__(self):
37 self.database = None 38 self.pdbid = '' 39 self.header = '' 40 self.compnd = '' 41 self.source = '' 42 self.author = [] 43 self.seqlength = 0 44 self.nalign = 0
45 - def fill_header(self,inline):
46 for i in header_records: 47 if header_records[i].match(inline): 48 if i == 'database' or i == 'seqlength' or i == 'nalign': 49 setattr(self,i,int(inline.split()[1])) 50 elif i == 'compnd' or i == 'author': 51 setattr(self,i,inline.split()[1:]) 52 elif i == 'source' or i == 'header': 53 attr = inline[inline.find(' ')+1:].strip() 54 setattr(self,i,attr) 55 else: 56 setattr(self,i,inline.split()[1])
57
58 -class PosAlign:
59 - def __init__(self,inStr):
60 inStr = inStr.strip() 61 if len(inStr) != 1 and len(inStr)!= 2: 62 raise ValueError('PosAlign: length not 2 chars' + inStr) 63 if inStr == '..': 64 self.aa = '-' 65 self.gap = 1 66 else: 67 self.gap = 0 68 self.aa = inStr[0] 69 if self.aa == self.aa.lower(): 70 self.aa = 'C' 71 if len(inStr) == 2: 72 self.ss = inStr[1].upper() 73 else: 74 self.ss = '0'
75
76 - def __repr__(self):
77 if self.gap: 78 outstring = '..' 79 else: 80 outstring = self.aa+self.ss.lower() 81 return outstring
82 83 __str__ = __repr__
84 85 86 87
88 -class FSSPSumRec:
89 """ Contains info from an FSSP summary record"""
90 - def __init__(self,in_str):
91 self.raw = in_str 92 in_rec = in_str.strip().split() 93 # print in_rec 94 self.nr = int(in_rec[0][:-1]) 95 self.pdb1 = in_rec[1][:4] 96 if len(in_rec[1]) == 4: 97 self.chain1='0' 98 elif len(in_rec[1]) == 5: 99 self.chain1=in_rec[1][4] 100 else: 101 raise ValueError('Bad PDB ID 1') 102 self.pdb2 = in_rec[2][:4] 103 if len(in_rec[2]) == 4: 104 self.chain2='0' 105 elif len(in_rec[2]) == 5: 106 self.chain2=in_rec[2][4] 107 else: 108 raise ValueError('Bad PDB ID 2') 109 self.zscore = float(in_rec[3]) 110 self.rmsd = float(in_rec[4]) 111 self.lali = float(in_rec[5]) 112 self.lseq2 = float(in_rec[6]) 113 self.pID = float(in_rec[7]) 114 self.revers = int(in_rec[8]) 115 self.permut = int(in_rec[9]) 116 self.nfrag = int(in_rec[10]) 117 self.topo = in_rec[11] 118 self.doc = '' 119 for i in in_rec[12:]: 120 self.doc = self.doc + i + ' ' 121 self.doc = self.doc.rstrip() + '\n'
122
123 - def __repr__(self):
124 return self.raw
125 __str__ = __repr__
126
127 -class FSSPAlignRec:
128 - def __init__(self,in_fff_rec):
129 # print in_fff_rec 130 self.abs_res_num = int(in_fff_rec[fssp_rec.align.abs_res_num]) 131 self.pdb_res_num = in_fff_rec[fssp_rec.align.pdb_res_num].strip() 132 self.chain_id = in_fff_rec[fssp_rec.align.chain_id] 133 if self.chain_id == ' ': 134 self.chain_id = '0' 135 self.res_name = in_fff_rec[fssp_rec.align.res_name] 136 if self.res_name == self.res_name.lower(): 137 self.res_name = 'C' 138 self.ss1 = in_fff_rec[fssp_rec.align.ss1] 139 self.turn3 = in_fff_rec[fssp_rec.align.turn3] 140 self.turn4 = in_fff_rec[fssp_rec.align.turn4] 141 self.turn5 = in_fff_rec[fssp_rec.align.turn5] 142 self.pos_align_dict = {} 143 self.PosAlignList = []
144 - def add_align_list(self,align_list):
145 for i in align_list: 146 self.PosAlignList.append(PosAlign(i))
147 - def pos_align_list2dict(self):
148 j = 1 149 for i in self.PosAlignList: 150 self.pos_align_dict[j] = i 151 j = j + 1
152 153
154 -class FSSPAlignDict(dict):
155 - def __init__(self):
156 # The following two dictionaries are pointers to records in self 157 # The first dictionary is a "pdb_residue_number: self_key" 158 # The second dictionary is a "absolute_residue_number: self_key" 159 self.pdb_res_dict = {} 160 self.abs_res_dict = {} 161 self.data = {}
162 - def build_resnum_list(self):
163 for i in self: 164 self.abs_res_dict[self[i].abs_res_num] = i 165 self.pdb_res_dict[self[i].pdb_res_num] = i
166 # Given an absolute residue number & chain, returns the relevant fssp 167 # record
168 - def abs(self,num):
169 return self[self.abs_res_dict[num]]
170 # Given an PDB residue number & chain, returns the relevant fssp 171 # record
172 - def pdb(self,num):
173 return self[self.pdb_res_dict[num]]
174 # Returns a sequence string 175
176 - def sequence(self,num):
177 s = '' 178 sorted_pos_nums = self.abs_res_dict.keys() 179 sorted_pos_nums.sort() 180 for i in sorted_pos_nums: 181 s += self.abs(i).pos_align_dict[num].aa 182 return s
183
184 - def fasta_mult_align(self):
185 mult_align_dict = {} 186 for j in self.abs(1).pos_align_dict: 187 mult_align_dict[j] = '' 188 for fssp_rec in self.itervalues(): 189 for j in fssp_rec.pos_align_dict: 190 mult_align_dict[j] += fssp_rec.pos_align_dict[j].aa 191 seq_order = mult_align_dict.keys() 192 seq_order.sort() 193 out_str = '' 194 for i in seq_order: 195 out_str += '> %d\n' % i 196 k = 0 197 for j in mult_align_dict[i]: 198 k += 1 199 if k % 72 == 0: 200 out_str += '\n' 201 out_str += j 202 out_str += '\n' 203 return out_str
204
205 -class FSSPSumDict(dict):
206 pass
207 208 # 209 # Process a fssp file into its constituents. Return a 2-tuple containing 210 # a list of FSSPSumRecs and a dictionary of alignment records. 211 #
212 -def read_fssp(fssp_handle):
213 header = FSSPHeader() 214 sum_dict = FSSPSumDict() 215 align_dict = FSSPAlignDict() 216 # fssp_handle=open(fssp_handlename) 217 curline = fssp_handle.readline() 218 while not summary_title.match(curline): 219 # Still in title 220 header.fill_header(curline) 221 curline = fssp_handle.readline() 222 223 if not summary_title.match(curline): 224 raise ValueError('Bad FSSP file: no summary record found') 225 curline = fssp_handle.readline() #Read the title line, discard 226 curline = fssp_handle.readline() #Read the next line 227 # Process the summary records into a list 228 while summary_rec.match(curline): 229 cur_sum_rec = FSSPSumRec(curline) 230 sum_dict[cur_sum_rec.nr] = cur_sum_rec 231 curline = fssp_handle.readline() 232 233 # Outer loop: process everything up to the EQUIVALENCES title record 234 while not equiv_title.match(curline): 235 while (not alignments_title.match(curline) and 236 not equiv_title.match(curline)): 237 curline = fssp_handle.readline() 238 if not alignments_title.match(curline): 239 if equiv_title.match(curline): 240 # print "Reached equiv_title" 241 break 242 else: 243 raise ValueError('Bad FSSP file: no alignments title record found') 244 245 if equiv_title.match(curline): 246 break 247 # If we got to this point, this means that we have matched an 248 # alignments title. Parse the alignment records in a loop. 249 curline = fssp_handle.readline() #Read the title line, discard 250 curline = fssp_handle.readline() #Read the next line 251 while alignments_rec.match(curline): 252 align_rec = FSSPAlignRec(fff_rec(curline)) 253 key = align_rec.chain_id+align_rec.res_name+str(align_rec.pdb_res_num) 254 align_list = curline[fssp_rec.align.start_aa_list:].strip().split() 255 if key not in align_dict: 256 align_dict[key] = align_rec 257 align_dict[key].add_align_list(align_list) 258 curline = fssp_handle.readline() 259 if not curline: 260 print 'EOFEOFEOF' 261 raise EOFError 262 for i in align_dict.itervalues(): 263 i.pos_align_list2dict() 264 del i.PosAlignList 265 align_dict.build_resnum_list() 266 return (header, sum_dict, align_dict)
267