Trees | Indices | Help |
---|
|
1 #!/usr/bin/env python 2 # 3 # Restriction Analysis Libraries. 4 # Copyright (C) 2004. Frederic Sohm. 5 # 6 # This code is part of the Biopython distribution and governed by its 7 # license. Please see the LICENSE file that should have been included 8 # as part of this package. 9 # 10 11 import re 12 import itertools 13 from Bio.Restriction import RanaConfig as RanaConf 14 15 """ 16 Usage: 17 18 PrintFormat allow to print the results from restriction analysis in 3 19 different format. 20 List, column or map. 21 22 the easiest way to use it is: 23 24 >>> from Bio.Restriction.PrintFormat import PrintFormat 25 >>> from Bio.Restriction.Restriction import AllEnzymes 26 >>> from Bio import Entrez 27 >>> from Bio import SeqIO 28 >>> handle = Entrez.efetch(db="nucleotide", rettype="fasta", id="SYNPBR322") 29 >>> pBR322 = SeqIO.read(handle, "fasta") 30 >>> handle.close() 31 >>> dct = AllEnzymes.search(pBR322.seq) 32 >>> new = PrintFormat() 33 >>> new.print_that(dct, '\n my pBR322 analysis\n\n','\n no site :\n\n') 34 35 my pBR322 analysis 36 37 AasI : 2169, 2582. 38 AatII : 4289. 39 ... 40 More enzymes. 41 ... 42 ZraI : 4287. 43 ZrmI : 3847. 44 45 no site: 46 47 AarI AatI Acc65I AcsI AcvI AdeI AflII AgeI 48 ... 49 More enzymes. 50 ... 51 Vha464I XapI XbaI XcmI XhoI XmaCI XmaI XmaJI 52 Zsp2I 53 54 >>> new.sequence = pBR322.seq 55 >>> new.print_as("map") 56 >>> new.print_that(dct) 57 ... 58 59 Some of the methods of PrintFormat are meant to be overriden by derived 60 class. 61 """ 6264 """PrintFormat allow the printing of results of restriction analysis.""" 65 66 ConsoleWidth = RanaConf.ConsoleWidth 67 NameWidth = RanaConf.NameWidth 68 MaxSize = RanaConf.MaxSize 69 Cmodulo = ConsoleWidth%NameWidth 70 PrefWidth = ConsoleWidth - Cmodulo 71 Indent = RanaConf.Indent 72 linesize = PrefWidth - NameWidth 73 7741979 """PF.print_as([what='list']) -> print the results as specified. 80 81 Valid format are: 82 'list' -> alphabetical order 83 'number' -> number of sites in the sequence 84 'map' -> a map representation of the sequence with the sites. 85 86 If you want more flexibility over-ride the virtual method make_format. 87 """ 88 if what == 'map': 89 self.make_format = self._make_map 90 elif what == 'number': 91 self.make_format = self._make_number 92 else: 93 self.make_format = self._make_list 94 95 return96 9799 """PF.print_that(dct, [title[, s1]]) -> Print dct nicely formatted. 100 101 dct is a dictionary as returned by a RestrictionBatch.search() 102 103 title is the title of the map. 104 It must be a formated string, i.e. you must include the line break. 105 106 s1 is the title separating the list of enzymes that have sites from 107 those without sites. 108 s1 must be a formatted string as well. 109 110 The format of print_that is a list.""" 111 if not dct: 112 dct = self.results 113 ls, nc = [], [] 114 for k, v in dct.iteritems(): 115 if v: 116 ls.append((k,v)) 117 else: 118 nc.append(k) 119 print self.make_format(ls, title, nc, s1) 120 return121123 """PF.make_format(cut, nc, title, s) -> string 124 125 Virtual method. 126 Here to be pointed to one of the _make_* methods. 127 You can as well create a new method and point make_format to it.""" 128 return self._make_list(cut,title, nc,s1)129 130 ###### _make_* methods to be used with the virtual method make_format 131133 """PF._make_number(ls,title, nc,s1) -> string. 134 135 return a string of form: 136 137 title. 138 139 enzyme1 : position1, position2. 140 enzyme2 : position1, position2, position3. 141 142 ls is a list of cutting enzymes. 143 title is the title. 144 nc is a list of non cutting enzymes. 145 s1 is the sentence before the non cutting enzymes.""" 146 return self._make_list_only(ls, title) + self._make_nocut_only(nc, s1)147149 """PF._make_number(ls,title, nc,s1) -> string. 150 151 return a string of form: 152 153 title. 154 155 enzyme1, position 156 | 157 AAAAAAAAAAAAAAAAAAAAA... 158 ||||||||||||||||||||| 159 TTTTTTTTTTTTTTTTTTTTT... 160 161 ls is a list of cutting enzymes. 162 title is the title. 163 nc is a list of non cutting enzymes. 164 s1 is the sentence before the non cutting enzymes.""" 165 return self._make_map_only(ls, title) + self._make_nocut_only(nc, s1)166168 """PF._make_number(ls,title, nc,s1) -> string. 169 170 title. 171 172 enzyme which cut 1 time: 173 174 enzyme1 : position1. 175 176 enzyme which cut 2 times: 177 178 enzyme2 : position1, position2. 179 ... 180 181 ls is a list of cutting enzymes. 182 title is the title. 183 nc is a list of non cutting enzymes. 184 s1 is the sentence before the non cutting enzymes.""" 185 return self._make_number_only(ls, title)+self._make_nocut_only(nc,s1)186188 """PF._make_nocut(ls,title, nc,s1) -> string. 189 190 return a formatted string of the non cutting enzymes. 191 192 ls is a list of cutting enzymes -> will not be used. 193 Here for compatibility with make_format. 194 195 title is the title. 196 nc is a list of non cutting enzymes. 197 s1 is the sentence before the non cutting enzymes.""" 198 return title + self._make_nocut_only(nc, s1)199201 """PF._make_nocut_only(nc, s1) -> string. 202 203 return a formatted string of the non cutting enzymes. 204 205 nc is a list of non cutting enzymes. 206 s1 is the sentence before the non cutting enzymes. 207 """ 208 if not nc: 209 return s1 210 nc.sort() 211 st = '' 212 stringsite = s1 or '\n Enzymes which do not cut the sequence.\n\n' 213 Join = ''.join 214 for key in nc: 215 st = Join((st, str.ljust(str(key), self.NameWidth))) 216 if len(st) > self.linesize: 217 stringsite = Join((stringsite, st, '\n')) 218 st = '' 219 stringsite = Join((stringsite, st, '\n')) 220 return stringsite221223 """PF._make_list_only(ls, title) -> string. 224 225 return a string of form: 226 227 title. 228 229 enzyme1 : position1, position2. 230 enzyme2 : position1, position2, position3. 231 ... 232 233 ls is a list of results. 234 title is a string. 235 Non cutting enzymes are not included.""" 236 if not ls: 237 return title 238 return self.__next_section(ls, title)239241 """PF._make_number_only(ls, title) -> string. 242 243 return a string of form: 244 245 title. 246 247 enzyme which cut 1 time: 248 249 enzyme1 : position1. 250 251 enzyme which cut 2 times: 252 253 enzyme2 : position1, position2. 254 ... 255 256 257 ls is a list of results. 258 title is a string. 259 Non cutting enzymes are not included.""" 260 if not ls: 261 return title 262 ls.sort(lambda x,y : cmp(len(x[1]), len(y[1]))) 263 iterator = iter(ls) 264 cur_len = 1 265 new_sect = [] 266 for name, sites in iterator: 267 l = len(sites) 268 if l > cur_len: 269 title += "\n\nenzymes which cut %i times :\n\n"%cur_len 270 title = self.__next_section(new_sect, title) 271 new_sect, cur_len = [(name, sites)], l 272 continue 273 new_sect.append((name,sites)) 274 title += "\n\nenzymes which cut %i times :\n\n"%cur_len 275 return self.__next_section(new_sect, title)276278 """PF._make_map_only(ls, title) -> string. 279 280 return a string of form: 281 282 title. 283 284 enzyme1, position 285 | 286 AAAAAAAAAAAAAAAAAAAAA... 287 ||||||||||||||||||||| 288 TTTTTTTTTTTTTTTTTTTTT... 289 290 291 ls is a list of results. 292 title is a string. 293 Non cutting enzymes are not included. 294 """ 295 if not ls: 296 return title 297 resultKeys = [str(x) for x,y in ls] 298 resultKeys.sort() 299 map = title or '' 300 enzymemap = {} 301 for (enzyme, cut) in ls: 302 for c in cut: 303 if c in enzymemap: 304 enzymemap[c].append(str(enzyme)) 305 else: 306 enzymemap[c] = [str(enzyme)] 307 mapping = enzymemap.keys() 308 mapping.sort() 309 cutloc = {} 310 x, counter, length = 0, 0, len(self.sequence) 311 for x in xrange(60, length, 60): 312 counter = x - 60 313 l=[] 314 for key in mapping: 315 if key <= x: 316 l.append(key) 317 else: 318 cutloc[counter] = l 319 mapping = mapping[mapping.index(key):] 320 break 321 cutloc[x] = l 322 cutloc[x] = mapping 323 sequence = self.sequence.tostring() 324 revsequence = self.sequence.complement().tostring() 325 a = '|' 326 base, counter = 0, 0 327 emptyline = ' ' * 60 328 Join = ''.join 329 for base in xrange(60, length, 60): 330 counter = base - 60 331 line = emptyline 332 for key in cutloc[counter]: 333 s = '' 334 if key == base: 335 for n in enzymemap[key] : s = ' '.join((s,n)) 336 l = line[0:59] 337 lineo = Join((l, str(key), s, '\n')) 338 line2 = Join((l, a, '\n')) 339 linetot = Join((lineo, line2)) 340 map = Join((map, linetot)) 341 break 342 for n in enzymemap[key] : s = ' '.join((s,n)) 343 k = key%60 344 lineo = Join((line[0:(k-1)], str(key), s, '\n')) 345 line = Join((line[0:(k-1)], a, line[k:])) 346 line2 = Join((line[0:(k-1)], a, line[k:], '\n')) 347 linetot = Join((lineo,line2)) 348 map = Join((map,linetot)) 349 mapunit = '\n'.join((sequence[counter : base],a * 60, 350 revsequence[counter : base], 351 Join((str.ljust(str(counter+1), 15), ' '* 30, 352 str.rjust(str(base), 15),'\n\n')) 353 )) 354 map = Join((map, mapunit)) 355 line = ' '* 60 356 for key in cutloc[base]: 357 s = '' 358 if key == length: 359 for n in enzymemap[key]: 360 s = Join((s,' ',n)) 361 l = line[0:(length-1)] 362 lineo = Join((l,str(key),s,'\n')) 363 line2 = Join((l,a,'\n')) 364 linetot = Join((lineo, line2)) 365 map = Join((map, linetot)) 366 break 367 for n in enzymemap[key] : s = Join((s,' ',n)) 368 k = key%60 369 lineo = Join((line[0:(k-1)],str(key),s,'\n')) 370 line = Join((line[0:(k-1)],a,line[k:])) 371 line2 = Join((line[0:(k-1)],a,line[k:],'\n')) 372 linetot = Join((lineo,line2)) 373 map = Join((map,linetot)) 374 mapunit = '' 375 mapunit = Join((sequence[base : length], '\n')) 376 mapunit = Join((mapunit, a * (length-base), '\n')) 377 mapunit = Join((mapunit,revsequence[base:length], '\n')) 378 mapunit = Join((mapunit, Join((str.ljust(str(base+1), 15), ' '*( 379 length-base-30),str.rjust(str(length), 15), 380 '\n\n')))) 381 map = Join((map,mapunit)) 382 return map383 384 ###### private method to do lists: 385387 """FP.__next_section(ls, into) -> string. 388 389 ls is a list of tuple (string, [int, int]). 390 into is a string to which the formatted ls will be added. 391 392 Format ls as a string of lines: 393 The form is: 394 395 enzyme1 : position1. 396 enzyme2 : position2, position3. 397 398 then add the formatted ls to tot 399 return tot.""" 400 ls.sort() 401 indentation = '\n' + (self.NameWidth + self.Indent) * ' ' 402 linesize = self.linesize - self.MaxSize 403 pat = re.compile("([\w,\s()]){1,%i}[,\.]"%linesize) 404 several, Join = '', ''.join 405 for name, sites in ls: 406 stringsite = '' 407 l = Join((', '.join([str(site) for site in sites]), '.')) 408 if len(l) > linesize: 409 # 410 # cut where appropriate and add the indentation 411 # 412 l = [x.group() for x in re.finditer(pat, l)] 413 stringsite = indentation.join(l) 414 else: 415 stringsite = l 416 into = Join((into, 417 str(name).ljust(self.NameWidth),' : ',stringsite,'\n')) 418 return into
Trees | Indices | Help |
---|
Generated by Epydoc 3.0.1 on Fri Nov 26 15:46:02 2010 | http://epydoc.sourceforge.net |