Package Bio :: Package Restriction :: Module Restriction
[hide private]
[frames] | no frames]

Source Code for Module Bio.Restriction.Restriction

   1  #!/usr/bin/env python 
   2  # 
   3  #      Restriction Analysis Libraries. 
   4  #      Copyright (C) 2004. Frederic Sohm. 
   5  # 
   6  # This code is part of the Biopython distribution and governed by its 
   7  # license.  Please see the LICENSE file that should have been included 
   8  # as part of this package. 
   9  # 
  10   
  11  """ Notes about the diverses class of the restriction enzyme implementation. 
  12   
  13          RestrictionType is the type of all restriction enzymes. 
  14      ---------------------------------------------------------------------------- 
  15          AbstractCut implements some methods that are common to all enzymes. 
  16      ---------------------------------------------------------------------------- 
  17          NoCut, OneCut,TwoCuts   represent the number of double strand cuts 
  18                                  produced by the enzyme. 
  19                                  they correspond to the 4th field of the rebase 
  20                                  record emboss_e.NNN. 
  21                  0->NoCut    : the enzyme is not characterised. 
  22                  2->OneCut   : the enzyme produce one double strand cut. 
  23                  4->TwoCuts  : two double strand cuts. 
  24      ---------------------------------------------------------------------------- 
  25          Meth_Dep, Meth_Undep    represent the methylation susceptibility to 
  26                                  the enzyme. 
  27                                  Not implemented yet. 
  28      ---------------------------------------------------------------------------- 
  29          Palindromic,            if the site is palindromic or not. 
  30          NotPalindromic          allow some optimisations of the code. 
  31                                  No need to check the reverse strand 
  32                                  with palindromic sites. 
  33      ----------------------------------------------------------------------------                                     
  34          Unknown, Blunt,         represent the overhang. 
  35          Ov5, Ov3                Unknown is here for symetry reasons and 
  36                                  correspond to enzymes that are not characterised 
  37                                  in rebase. 
  38      ---------------------------------------------------------------------------- 
  39          Defined, Ambiguous,     represent the sequence of the overhang. 
  40          NotDefined              
  41                                  NotDefined is for enzymes not characterised in 
  42                                  rebase. 
  43                                   
  44                                  Defined correspond to enzymes that display a 
  45                                  constant overhang whatever the sequence. 
  46                                  ex : EcoRI. G^AATTC -> overhang :AATT 
  47                                              CTTAA^G 
  48   
  49                                  Ambiguous : the overhang varies with the 
  50                                  sequence restricted. 
  51                                  Typically enzymes which cut outside their 
  52                                  restriction site or (but not always) 
  53                                  inside an ambiguous site. 
  54                                  ex: 
  55                                  AcuI CTGAAG(22/20)  -> overhang : NN 
  56                                  AasI GACNNN^NNNGTC  -> overhang : NN 
  57                                       CTGN^NNNNNCAG 
  58   
  59              note : these 3 classes refers to the overhang not the site. 
  60                 So the enzyme ApoI (RAATTY) is defined even if its restriction 
  61                 site is ambiguous. 
  62                                   
  63                      ApoI R^AATTY -> overhang : AATT -> Defined 
  64                           YTTAA^R 
  65                 Accordingly, blunt enzymes are always Defined even 
  66                 when they cut outside their restriction site. 
  67      ---------------------------------------------------------------------------- 
  68          Not_available,          as found in rebase file emboss_r.NNN files. 
  69          Commercially_available 
  70                                  allow the selection of the enzymes according to 
  71                                  their suppliers to reduce the quantity 
  72                                  of results. 
  73                                  Also will allow the implementation of buffer 
  74                                  compatibility tables. Not implemented yet. 
  75   
  76                                  the list of suppliers is extracted from 
  77                                  emboss_s.NNN 
  78      ---------------------------------------------------------------------------- 
  79          """ 
  80   
  81  import re 
  82  import itertools 
  83   
  84  from Bio.Seq import Seq, MutableSeq 
  85  from Bio.Alphabet import IUPAC 
  86   
  87  from Bio.Restriction.Restriction_Dictionary import rest_dict as enzymedict 
  88  from Bio.Restriction.Restriction_Dictionary import typedict 
  89  from Bio.Restriction.Restriction_Dictionary import suppliers as suppliers_dict 
  90  from Bio.Restriction.RanaConfig import * 
  91  from Bio.Restriction.PrintFormat import PrintFormat 
  92   
  93  #Used to use Bio.Restriction.DNAUtils.check_bases (and expose it under this 
  94  #namespace), but have deprecated that module. 
95 -def _check_bases(seq_string):
96 """Check characters in a string (PRIVATE). 97 98 Remove digits and white space present in string. Allows any valid ambiguous 99 IUPAC DNA single letters codes (ABCDGHKMNRSTVWY, lower case are converted). 100 101 Other characters (e.g. symbols) trigger a TypeError. 102 103 Returns the string WITH A LEADING SPACE (!). This is for backwards 104 compatibility, and may in part be explained by the fact that 105 Bio.Restriction doesn't use zero based counting. 106 """ 107 #Remove white space and make upper case: 108 seq_string = "".join(seq_string.split()).upper() 109 #Remove digits 110 for c in "0123456789" : seq_string = seq_string.replace(c,"") 111 #Check only allowed IUPAC letters 112 if not set(seq_string).issubset(set("ABCDGHKMNRSTVWY")) : 113 raise TypeError("Invalid character found in %s" % repr(seq_string)) 114 return " " + seq_string
115
116 -def check_bases(seq_string):
117 """Check characters in a string (DEPRECATED).""" 118 import warnings 119 import Bio 120 warnings.warn("The check_bases function has been deprecated, and will be" 121 "removed in a future release of Biopython.", Bio.BiopythonDeprecationWarning) 122 return _check_bases(seq_string)
123 124 matching = {'A' : 'ARWMHVDN', 'C' : 'CYSMHBVN', 'G' : 'GRSKBVDN', 125 'T' : 'TYWKHBDN', 'R' : 'ABDGHKMNSRWV', 'Y' : 'CBDHKMNSTWVY', 126 'W' : 'ABDHKMNRTWVY', 'S' : 'CBDGHKMNSRVY', 'M' : 'ACBDHMNSRWVY', 127 'K' : 'BDGHKNSRTWVY', 'H' : 'ACBDHKMNSRTWVY', 128 'B' : 'CBDGHKMNSRTWVY', 'V' : 'ACBDGHKMNSRWVY', 129 'D' : 'ABDGHKMNSRTWVY', 'N' : 'ACBDGHKMNSRTWVY'} 130 131 DNA = Seq 132
133 -class FormattedSeq(object):
134 """FormattedSeq(seq, [linear=True])-> new FormattedSeq. 135 136 Translate a Bio.Seq into a formatted sequence to be used with Restriction. 137 138 Roughly: 139 remove anything which is not IUPAC alphabet and then add a space 140 in front of the sequence to get a biological index instead of a 141 python index (i.e. index of the first base is 1 not 0). 142 143 Retains information about the shape of the molecule linear (default) 144 or circular. Restriction sites are search over the edges of circular 145 sequence.""" 146
147 - def __init__(self, seq, linear = True):
148 """FormattedSeq(seq, [linear=True])-> new FormattedSeq. 149 150 seq is either a Bio.Seq, Bio.MutableSeq or a FormattedSeq. 151 if seq is a FormattedSeq, linear will have no effect on the 152 shape of the sequence.""" 153 if isinstance(seq, Seq) or isinstance(seq, MutableSeq): 154 stringy = seq.tostring() 155 self.lower = stringy.islower() 156 #Note this adds a leading space to the sequence (!) 157 self.data = _check_bases(stringy) 158 self.linear = linear 159 self.klass = seq.__class__ 160 self.alphabet = seq.alphabet 161 elif isinstance(seq, FormattedSeq): 162 self.lower = seq.lower 163 self.data = seq.data 164 self.linear = seq.linear 165 self.alphabet = seq.alphabet 166 self.klass = seq.klass 167 else: 168 raise TypeError('expected Seq or MutableSeq, got %s' % type(seq))
169
170 - def __len__(self):
171 return len(self.data) - 1
172
173 - def __repr__(self):
174 return 'FormattedSeq(%s, linear=%s)' %(repr(self[1:]), repr(self.linear))
175
176 - def __eq__(self, other):
177 if isinstance(other, FormattedSeq): 178 if repr(self) == repr(other): 179 return True 180 else: 181 return False 182 return False
183
184 - def circularise(self):
185 """FS.circularise() -> circularise FS""" 186 self.linear = False 187 return
188
189 - def linearise(self):
190 """FS.linearise() -> linearise FS""" 191 self.linear = True 192 return
193
194 - def to_linear(self):
195 """FS.to_linear() -> new linear FS instance""" 196 new = self.__class__(self) 197 new.linear = True 198 return new
199
200 - def to_circular(self):
201 """FS.to_circular() -> new circular FS instance""" 202 new = self.__class__(self) 203 new.linear = False 204 return new
205
206 - def is_linear(self):
207 """FS.is_linear() -> bool. 208 209 True if the sequence will analysed as a linear sequence.""" 210 return self.linear
211
212 - def finditer(self, pattern, size):
213 """FS.finditer(pattern, size) -> list. 214 215 return a list of pattern into the sequence. 216 the list is made of tuple (location, pattern.group). 217 the latter is used with non palindromic sites. 218 pattern is the regular expression pattern corresponding to the 219 enzyme restriction site. 220 size is the size of the restriction enzyme recognition-site size.""" 221 if self.is_linear(): 222 data = self.data 223 else: 224 data = self.data + self.data[1:size] 225 return [(i.start(), i.group) for i in re.finditer(pattern, data)]
226
227 - def __getitem__(self, i):
228 if self.lower: 229 return self.klass((self.data[i]).lower(), self.alphabet) 230 return self.klass(self.data[i], self.alphabet)
231 232
233 -class RestrictionType(type):
234 """RestrictionType. Type from which derives all enzyme classes. 235 236 Implement the operator methods.""" 237
238 - def __init__(cls, name='', bases=(), dct={}):
239 """RE(name, bases, dct) -> RestrictionType instance. 240 241 Not intended to be used in normal operation. The enzymes are 242 instantiated when importing the module. 243 244 see below.""" 245 if "-" in name : 246 raise ValueError("Problem with hyphen in %s as enzyme name" \ 247 % repr(name)) 248 super(RestrictionType, cls).__init__(cls, name, bases, dct) 249 try : 250 cls.compsite = re.compile(cls.compsite) 251 except Exception, err : 252 raise ValueError("Problem with regular expression, re.compiled(%s)" \ 253 % repr(cls.compsite))
254
255 - def __add__(cls, other):
256 """RE.__add__(other) -> RestrictionBatch(). 257 258 if other is an enzyme returns a batch of the two enzymes. 259 if other is already a RestrictionBatch add enzyme to it.""" 260 if isinstance(other, RestrictionType): 261 return RestrictionBatch([cls, other]) 262 elif isinstance(other, RestrictionBatch): 263 return other.add_nocheck(cls) 264 else: 265 raise TypeError
266
267 - def __div__(cls, other):
268 """RE.__div__(other) -> list. 269 270 RE/other 271 returns RE.search(other).""" 272 return cls.search(other)
273
274 - def __rdiv__(cls, other):
275 """RE.__rdiv__(other) -> list. 276 277 other/RE 278 returns RE.search(other).""" 279 return cls.search(other)
280
281 - def __truediv__(cls, other):
282 """RE.__truediv__(other) -> list. 283 284 RE/other 285 returns RE.search(other).""" 286 return cls.search(other)
287
288 - def __rtruediv__(cls, other):
289 """RE.__rtruediv__(other) -> list. 290 291 other/RE 292 returns RE.search(other).""" 293 return cls.search(other)
294
295 - def __floordiv__(cls, other):
296 """RE.__floordiv__(other) -> list. 297 298 RE//other 299 returns RE.catalyse(other).""" 300 return cls.catalyse(other)
301
302 - def __rfloordiv__(cls, other):
303 """RE.__rfloordiv__(other) -> list. 304 305 other//RE 306 returns RE.catalyse(other).""" 307 return cls.catalyse(other)
308
309 - def __str__(cls):
310 """RE.__str__() -> str. 311 312 return the name of the enzyme.""" 313 return cls.__name__
314
315 - def __repr__(cls):
316 """RE.__repr__() -> str. 317 318 used with eval or exec will instantiate the enzyme.""" 319 return "%s" % cls.__name__
320
321 - def __len__(cls):
322 """RE.__len__() -> int. 323 324 length of the recognition site.""" 325 return cls.size
326
327 - def __hash__(cls):
328 #Python default is to use id(...) 329 #This is consistent with the __eq__ implementation 330 return id(cls)
331
332 - def __eq__(cls, other):
333 """RE == other -> bool 334 335 True if RE and other are the same enzyme. 336 337 Specifically this checks they are the same Python object. 338 """ 339 #assert (id(cls)==id(other)) == (other is cls) == (cls is other) 340 return id(cls)==id(other)
341
342 - def __ne__(cls, other):
343 """RE != other -> bool. 344 isoschizomer strict, same recognition site, same restriction -> False 345 all the other-> True 346 347 WARNING - This is not the inverse of the __eq__ method. 348 """ 349 if not isinstance(other, RestrictionType): 350 return True 351 elif cls.charac == other.charac: 352 return False 353 else: 354 return True
355
356 - def __rshift__(cls, other):
357 """RE >> other -> bool. 358 359 neoschizomer : same recognition site, different restriction. -> True 360 all the others : -> False""" 361 if not isinstance(other, RestrictionType): 362 return False 363 elif cls.site == other.site and cls.charac != other.charac: 364 return True 365 else: 366 return False
367
368 - def __mod__(cls, other):
369 """a % b -> bool. 370 371 Test compatibility of the overhang of a and b. 372 True if a and b have compatible overhang.""" 373 if not isinstance(other, RestrictionType): 374 raise TypeError( \ 375 'expected RestrictionType, got %s instead' % type(other)) 376 return cls._mod1(other)
377
378 - def __ge__(cls, other):
379 """a >= b -> bool. 380 381 a is greater or equal than b if the a site is longer than b site. 382 if their site have the same length sort by alphabetical order of their 383 names.""" 384 if not isinstance(other, RestrictionType): 385 raise NotImplementedError 386 if len(cls) > len(other): 387 return True 388 elif cls.size == len(other) and cls.__name__ >= other.__name__: 389 return True 390 else: 391 return False
392
393 - def __gt__(cls, other):
394 """a > b -> bool. 395 396 sorting order: 397 1. size of the recognition site. 398 2. if equal size, alphabetical order of the names.""" 399 if not isinstance(other, RestrictionType): 400 raise NotImplementedError 401 if len(cls) > len(other): 402 return True 403 elif cls.size == len(other) and cls.__name__ > other.__name__: 404 return True 405 else: 406 return False
407
408 - def __le__(cls, other):
409 """a <= b -> bool. 410 411 sorting order: 412 1. size of the recognition site. 413 2. if equal size, alphabetical order of the names.""" 414 if not isinstance(other, RestrictionType): 415 raise NotImplementedError 416 elif len(cls) < len(other): 417 return True 418 elif len(cls) == len(other) and cls.__name__ <= other.__name__: 419 return True 420 else: 421 return False
422
423 - def __lt__(cls, other):
424 """a < b -> bool. 425 426 sorting order: 427 1. size of the recognition site. 428 2. if equal size, alphabetical order of the names.""" 429 if not isinstance(other, RestrictionType): 430 raise NotImplementedError 431 elif len(cls) < len(other): 432 return True 433 elif len(cls) == len(other) and cls.__name__ < other.__name__: 434 return True 435 else: 436 return False
437 438
439 -class AbstractCut(RestrictionType):
440 """Implement the methods that are common to all restriction enzymes. 441 442 All the methods are classmethod. 443 444 For internal use only. Not meant to be instantiate.""" 445
446 - def search(cls, dna, linear=True):
447 """RE.search(dna, linear=True) -> list. 448 449 return a list of all the site of RE in dna. Compensate for circular 450 sequences and so on. 451 452 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 453 454 if linear is False, the restriction sites than span over the boundaries 455 will be included. 456 457 The positions are the first base of the 3' fragment, 458 i.e. the first base after the position the enzyme will cut. """ 459 # 460 # Separating search from _search allow a (very limited) optimisation 461 # of the search when using a batch of restriction enzymes. 462 # in this case the DNA is tested once by the class which implements 463 # the batch instead of being tested by each enzyme single. 464 # see RestrictionBatch.search() for example. 465 # 466 if isinstance(dna, FormattedSeq): 467 cls.dna = dna 468 return cls._search() 469 else : 470 cls.dna = FormattedSeq(dna, linear) 471 return cls._search()
472 search = classmethod(search) 473
474 - def all_suppliers(self):
475 """RE.all_suppliers -> print all the suppliers of R""" 476 supply = [x[0] for x in suppliers_dict.itervalues()] 477 supply.sort() 478 print ",\n".join(supply) 479 return
480 all_suppliers = classmethod(all_suppliers) 481
482 - def is_equischizomer(self, other):
483 """RE.is_equischizomers(other) -> bool. 484 485 True if other is an isoschizomer of RE. 486 False else. 487 488 equischizomer <=> same site, same position of restriction.""" 489 return not self != other
490 is_equischizomer = classmethod(is_equischizomer) 491
492 - def is_neoschizomer(self, other):
493 """RE.is_neoschizomers(other) -> bool. 494 495 True if other is an isoschizomer of RE. 496 False else. 497 498 neoschizomer <=> same site, different position of restriction.""" 499 return self >> other
500 is_neoschizomer = classmethod(is_neoschizomer) 501
502 - def is_isoschizomer(self, other):
503 """RE.is_isoschizomers(other) -> bool. 504 505 True if other is an isoschizomer of RE. 506 False else. 507 508 isoschizomer <=> same site.""" 509 return (not self != other) or self >> other
510 is_isoschizomer = classmethod(is_isoschizomer) 511
512 - def equischizomers(self, batch=None):
513 """RE.equischizomers([batch]) -> list. 514 515 return a tuple of all the isoschizomers of RE. 516 if batch is supplied it is used instead of the default AllEnzymes. 517 518 equischizomer <=> same site, same position of restriction.""" 519 if not batch : batch = AllEnzymes 520 r = [x for x in batch if not self != x] 521 i = r.index(self) 522 del r[i] 523 r.sort() 524 return r
525 equischizomers = classmethod(equischizomers) 526
527 - def neoschizomers(self, batch=None):
528 """RE.neoschizomers([batch]) -> list. 529 530 return a tuple of all the neoschizomers of RE. 531 if batch is supplied it is used instead of the default AllEnzymes. 532 533 neoschizomer <=> same site, different position of restriction.""" 534 if not batch : batch = AllEnzymes 535 r = [x for x in batch if self >> x] 536 r.sort() 537 return r
538 neoschizomers = classmethod(neoschizomers) 539
540 - def isoschizomers(self, batch=None):
541 """RE.isoschizomers([batch]) -> list. 542 543 return a tuple of all the equischizomers and neoschizomers of RE. 544 if batch is supplied it is used instead of the default AllEnzymes.""" 545 if not batch : batch = AllEnzymes 546 r = [x for x in batch if (self >> x) or (not self != x)] 547 i = r.index(self) 548 del r[i] 549 r.sort() 550 return r
551 isoschizomers = classmethod(isoschizomers) 552
553 - def frequency(self):
554 """RE.frequency() -> int. 555 556 frequency of the site.""" 557 return self.freq
558 frequency = classmethod(frequency)
559 560
561 -class NoCut(AbstractCut):
562 """Implement the methods specific to the enzymes that do not cut. 563 564 These enzymes are generally enzymes that have been only partially 565 characterised and the way they cut the DNA is unknow or enzymes for 566 which the pattern of cut is to complex to be recorded in Rebase 567 (ncuts values of 0 in emboss_e.###). 568 569 When using search() with these enzymes the values returned are at the start of 570 the restriction site. 571 572 Their catalyse() method returns a TypeError. 573 574 Unknown and NotDefined are also part of the base classes of these enzymes. 575 576 Internal use only. Not meant to be instantiated.""" 577
578 - def cut_once(self):
579 """RE.cut_once() -> bool. 580 581 True if the enzyme cut the sequence one time on each strand.""" 582 return False
583 cut_once = classmethod(cut_once) 584
585 - def cut_twice(self):
586 """RE.cut_twice() -> bool. 587 588 True if the enzyme cut the sequence twice on each strand.""" 589 return False
590 cut_twice = classmethod(cut_twice) 591
592 - def _modify(self, location):
593 """RE._modify(location) -> int. 594 595 for internal use only. 596 597 location is an integer corresponding to the location of the match for 598 the enzyme pattern in the sequence. 599 _modify returns the real place where the enzyme will cut. 600 601 example: 602 EcoRI pattern : GAATTC 603 EcoRI will cut after the G. 604 so in the sequence: 605 ______ 606 GAATACACGGAATTCGA 607 | 608 10 609 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base 610 EcoRI cut after the G so: 611 EcoRI._modify(10) -> 11. 612 613 if the enzyme cut twice _modify will returns two integer corresponding 614 to each cutting site. 615 """ 616 yield location
617 _modify = classmethod(_modify) 618
619 - def _rev_modify(self, location):
620 """RE._rev_modify(location) -> generator of int. 621 622 for internal use only. 623 624 as _modify for site situated on the antiparallel strand when the 625 enzyme is not palindromic 626 """ 627 yield location
628 _rev_modify = classmethod(_rev_modify) 629
630 - def characteristic(self):
631 """RE.characteristic() -> tuple. 632 633 the tuple contains the attributes: 634 fst5 -> first 5' cut ((current strand) or None 635 fst3 -> first 3' cut (complementary strand) or None 636 scd5 -> second 5' cut (current strand) or None 637 scd5 -> second 3' cut (complementary strand) or None 638 site -> recognition site.""" 639 return None, None, None, None, self.site
640 characteristic = classmethod(characteristic)
641
642 -class OneCut(AbstractCut):
643 """Implement the methods specific to the enzymes that cut the DNA only once 644 645 Correspond to ncuts values of 2 in emboss_e.### 646 647 Internal use only. Not meant to be instantiated.""" 648
649 - def cut_once(self):
650 """RE.cut_once() -> bool. 651 652 True if the enzyme cut the sequence one time on each strand.""" 653 return True
654 cut_once = classmethod(cut_once) 655
656 - def cut_twice(self):
657 """RE.cut_twice() -> bool. 658 659 True if the enzyme cut the sequence twice on each strand.""" 660 return False
661 cut_twice = classmethod(cut_twice) 662
663 - def _modify(self, location):
664 """RE._modify(location) -> int. 665 666 for internal use only. 667 668 location is an integer corresponding to the location of the match for 669 the enzyme pattern in the sequence. 670 _modify returns the real place where the enzyme will cut. 671 672 example: 673 EcoRI pattern : GAATTC 674 EcoRI will cut after the G. 675 so in the sequence: 676 ______ 677 GAATACACGGAATTCGA 678 | 679 10 680 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base 681 EcoRI cut after the G so: 682 EcoRI._modify(10) -> 11. 683 684 if the enzyme cut twice _modify will returns two integer corresponding 685 to each cutting site. 686 """ 687 yield location + self.fst5
688 _modify = classmethod(_modify) 689
690 - def _rev_modify(self, location):
691 """RE._rev_modify(location) -> generator of int. 692 693 for internal use only. 694 695 as _modify for site situated on the antiparallel strand when the 696 enzyme is not palindromic 697 """ 698 yield location - self.fst3
699 _rev_modify = classmethod(_rev_modify) 700
701 - def characteristic(self):
702 """RE.characteristic() -> tuple. 703 704 the tuple contains the attributes: 705 fst5 -> first 5' cut ((current strand) or None 706 fst3 -> first 3' cut (complementary strand) or None 707 scd5 -> second 5' cut (current strand) or None 708 scd5 -> second 3' cut (complementary strand) or None 709 site -> recognition site.""" 710 return self.fst5, self.fst3, None, None, self.site
711 characteristic = classmethod(characteristic)
712 713
714 -class TwoCuts(AbstractCut):
715 """Implement the methods specific to the enzymes that cut the DNA twice 716 717 Correspond to ncuts values of 4 in emboss_e.### 718 719 Internal use only. Not meant to be instantiated.""" 720
721 - def cut_once(self):
722 """RE.cut_once() -> bool. 723 724 True if the enzyme cut the sequence one time on each strand.""" 725 return False
726 cut_once = classmethod(cut_once) 727
728 - def cut_twice(self):
729 """RE.cut_twice() -> bool. 730 731 True if the enzyme cut the sequence twice on each strand.""" 732 return True
733 cut_twice = classmethod(cut_twice) 734
735 - def _modify(self, location):
736 """RE._modify(location) -> int. 737 738 for internal use only. 739 740 location is an integer corresponding to the location of the match for 741 the enzyme pattern in the sequence. 742 _modify returns the real place where the enzyme will cut. 743 744 example: 745 EcoRI pattern : GAATTC 746 EcoRI will cut after the G. 747 so in the sequence: 748 ______ 749 GAATACACGGAATTCGA 750 | 751 10 752 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base 753 EcoRI cut after the G so: 754 EcoRI._modify(10) -> 11. 755 756 if the enzyme cut twice _modify will returns two integer corresponding 757 to each cutting site. 758 """ 759 yield location + self.fst5 760 yield location + self.scd5
761 _modify = classmethod(_modify) 762
763 - def _rev_modify(self, location):
764 """RE._rev_modify(location) -> generator of int. 765 766 for internal use only. 767 768 as _modify for site situated on the antiparallel strand when the 769 enzyme is not palindromic 770 """ 771 yield location - self.fst3 772 yield location - self.scd3
773 _rev_modify = classmethod(_rev_modify) 774
775 - def characteristic(self):
776 """RE.characteristic() -> tuple. 777 778 the tuple contains the attributes: 779 fst5 -> first 5' cut ((current strand) or None 780 fst3 -> first 3' cut (complementary strand) or None 781 scd5 -> second 5' cut (current strand) or None 782 scd5 -> second 3' cut (complementary strand) or None 783 site -> recognition site.""" 784 return self.fst5, self.fst3, self.scd5, self.scd3, self.site
785 characteristic = classmethod(characteristic)
786 787
788 -class Meth_Dep(AbstractCut):
789 """Implement the information about methylation. 790 791 Enzymes of this class possess a site which is methylable.""" 792
793 - def is_methylable(self):
794 """RE.is_methylable() -> bool. 795 796 True if the recognition site is a methylable.""" 797 return True
798 is_methylable = classmethod(is_methylable)
799
800 -class Meth_Undep(AbstractCut):
801 """Implement informations about methylation sensitibility. 802 803 Enzymes of this class are not sensible to methylation.""" 804
805 - def is_methylable(self):
806 """RE.is_methylable() -> bool. 807 808 True if the recognition site is a methylable.""" 809 return False
810 is_methylable = classmethod(is_methylable)
811
812 -class Palindromic(AbstractCut):
813 """Implement the methods specific to the enzymes which are palindromic 814 815 palindromic means : the recognition site and its reverse complement are 816 identical. 817 Remarks : an enzyme with a site CGNNCG is palindromic even if some 818 of the sites that it will recognise are not. 819 for example here : CGAACG 820 821 Internal use only. Not meant to be instantiated.""" 822
823 - def _search(self):
824 """RE._search() -> list. 825 826 for internal use only. 827 828 implement the search method for palindromic and non palindromic enzyme. 829 """ 830 siteloc = self.dna.finditer(self.compsite,self.size) 831 self.results = [r for s,g in siteloc for r in self._modify(s)] 832 if self.results : self._drop() 833 return self.results
834 _search = classmethod(_search) 835
836 - def is_palindromic(self):
837 """RE.is_palindromic() -> bool. 838 839 True if the recognition site is a palindrom.""" 840 return True
841 is_palindromic = classmethod(is_palindromic)
842 843
844 -class NonPalindromic(AbstractCut):
845 """Implement the methods specific to the enzymes which are not palindromic 846 847 palindromic means : the recognition site and its reverse complement are 848 identical. 849 850 Internal use only. Not meant to be instantiated.""" 851
852 - def _search(self):
853 """RE._search() -> list. 854 855 for internal use only. 856 857 implement the search method for palindromic and non palindromic enzyme. 858 """ 859 iterator = self.dna.finditer(self.compsite, self.size) 860 self.results = [] 861 modif = self._modify 862 revmodif = self._rev_modify 863 s = str(self) 864 self.on_minus = [] 865 for start, group in iterator: 866 if group(s): 867 self.results += [r for r in modif(start)] 868 else: 869 self.on_minus += [r for r in revmodif(start)] 870 self.results += self.on_minus 871 if self.results: 872 self.results.sort() 873 self._drop() 874 return self.results
875 _search = classmethod(_search) 876
877 - def is_palindromic(self):
878 """RE.is_palindromic() -> bool. 879 880 True if the recognition site is a palindrom.""" 881 return False
882 is_palindromic = classmethod(is_palindromic)
883
884 -class Unknown(AbstractCut):
885 """Implement the methods specific to the enzymes for which the overhang 886 is unknown. 887 888 These enzymes are also NotDefined and NoCut. 889 890 Internal use only. Not meant to be instantiated.""" 891
892 - def catalyse(self, dna, linear=True):
893 """RE.catalyse(dna, linear=True) -> tuple of DNA. 894 RE.catalyze(dna, linear=True) -> tuple of DNA. 895 896 return a tuple of dna as will be produced by using RE to restrict the 897 dna. 898 899 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 900 901 if linear is False, the sequence is considered to be circular and the 902 output will be modified accordingly.""" 903 raise NotImplementedError('%s restriction is unknown.' \ 904 % self.__name__)
905 catalyze = catalyse = classmethod(catalyse) 906
907 - def is_blunt(self):
908 """RE.is_blunt() -> bool. 909 910 True if the enzyme produces blunt end. 911 912 see also: 913 RE.is_3overhang() 914 RE.is_5overhang() 915 RE.is_unknown()""" 916 return False
917 is_blunt = classmethod(is_blunt) 918
919 - def is_5overhang(self):
920 """RE.is_5overhang() -> bool. 921 922 True if the enzyme produces 5' overhang sticky end. 923 924 see also: 925 RE.is_3overhang() 926 RE.is_blunt() 927 RE.is_unknown()""" 928 return False
929 is_5overhang = classmethod(is_5overhang) 930
931 - def is_3overhang(self):
932 """RE.is_3overhang() -> bool. 933 934 True if the enzyme produces 3' overhang sticky end. 935 936 see also: 937 RE.is_5overhang() 938 RE.is_blunt() 939 RE.is_unknown()""" 940 return False
941 is_3overhang = classmethod(is_3overhang) 942
943 - def overhang(self):
944 """RE.overhang() -> str. type of overhang of the enzyme., 945 946 can be "3' overhang", "5' overhang", "blunt", "unknown" """ 947 return 'unknown'
948 overhang = classmethod(overhang) 949
950 - def compatible_end(self):
951 """RE.compatible_end() -> list. 952 953 list of all the enzymes that share compatible end with RE.""" 954 return []
955 compatible_end = classmethod(compatible_end) 956
957 - def _mod1(self, other):
958 """RE._mod1(other) -> bool. 959 960 for internal use only 961 962 test for the compatibility of restriction ending of RE and other.""" 963 return False
964 _mod1 = classmethod(_mod1)
965
966 -class Blunt(AbstractCut):
967 """Implement the methods specific to the enzymes for which the overhang 968 is blunt. 969 970 The enzyme cuts the + strand and the - strand of the DNA at the same 971 place. 972 973 Internal use only. Not meant to be instantiated.""" 974
975 - def catalyse(self, dna, linear=True):
976 """RE.catalyse(dna, linear=True) -> tuple of DNA. 977 RE.catalyze(dna, linear=True) -> tuple of DNA. 978 979 return a tuple of dna as will be produced by using RE to restrict the 980 dna. 981 982 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 983 984 if linear is False, the sequence is considered to be circular and the 985 output will be modified accordingly.""" 986 r = self.search(dna, linear) 987 d = self.dna 988 if not r : return d[1:], 989 fragments = [] 990 length = len(r)-1 991 if d.is_linear(): 992 # 993 # START of the sequence to FIRST site. 994 # 995 fragments.append(d[1:r[0]]) 996 if length: 997 # 998 # if more than one site add them. 999 # 1000 fragments += [d[r[x]:r[x+1]] for x in xrange(length)] 1001 # 1002 # LAST site to END of the sequence. 1003 # 1004 fragments.append(d[r[-1]:]) 1005 else: 1006 # 1007 # circular : bridge LAST site to FIRST site. 1008 # 1009 fragments.append(d[r[-1]:]+d[1:r[0]]) 1010 if not length: 1011 # 1012 # one site we finish here. 1013 # 1014 return tuple(fragments) 1015 # 1016 # add the others. 1017 # 1018 fragments += [d[r[x]:r[x+1]] for x in xrange(length)] 1019 return tuple(fragments)
1020 catalyze = catalyse = classmethod(catalyse) 1021
1022 - def is_blunt(self):
1023 """RE.is_blunt() -> bool. 1024 1025 True if the enzyme produces blunt end. 1026 1027 see also: 1028 RE.is_3overhang() 1029 RE.is_5overhang() 1030 RE.is_unknown()""" 1031 return True
1032 is_blunt = classmethod(is_blunt) 1033
1034 - def is_5overhang(self):
1035 """RE.is_5overhang() -> bool. 1036 1037 True if the enzyme produces 5' overhang sticky end. 1038 1039 see also: 1040 RE.is_3overhang() 1041 RE.is_blunt() 1042 RE.is_unknown()""" 1043 return False
1044 is_5overhang = classmethod(is_5overhang) 1045
1046 - def is_3overhang(self):
1047 """RE.is_3overhang() -> bool. 1048 1049 True if the enzyme produces 3' overhang sticky end. 1050 1051 see also: 1052 RE.is_5overhang() 1053 RE.is_blunt() 1054 RE.is_unknown()""" 1055 return False
1056 is_3overhang = classmethod(is_3overhang) 1057
1058 - def overhang(self):
1059 """RE.overhang() -> str. type of overhang of the enzyme., 1060 1061 can be "3' overhang", "5' overhang", "blunt", "unknown" """ 1062 return 'blunt'
1063 overhang = classmethod(overhang) 1064
1065 - def compatible_end(self, batch=None):
1066 """RE.compatible_end() -> list. 1067 1068 list of all the enzymes that share compatible end with RE.""" 1069 if not batch : batch = AllEnzymes 1070 r = [x for x in iter(AllEnzymes) if x.is_blunt()] 1071 r.sort() 1072 return r
1073 compatible_end = classmethod(compatible_end) 1074
1075 - def _mod1(other):
1076 """RE._mod1(other) -> bool. 1077 1078 for internal use only 1079 1080 test for the compatibility of restriction ending of RE and other.""" 1081 if issubclass(other, Blunt) : return True 1082 else : return False
1083 _mod1 = staticmethod(_mod1)
1084
1085 -class Ov5(AbstractCut):
1086 """Implement the methods specific to the enzymes for which the overhang 1087 is recessed in 3'. 1088 1089 The enzyme cuts the + strand after the - strand of the DNA. 1090 1091 Internal use only. Not meant to be instantiated.""" 1092
1093 - def catalyse(self, dna, linear=True):
1094 """RE.catalyse(dna, linear=True) -> tuple of DNA. 1095 RE.catalyze(dna, linear=True) -> tuple of DNA. 1096 1097 return a tuple of dna as will be produced by using RE to restrict the 1098 dna. 1099 1100 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 1101 1102 if linear is False, the sequence is considered to be circular and the 1103 output will be modified accordingly.""" 1104 r = self.search(dna, linear) 1105 d = self.dna 1106 if not r : return d[1:], 1107 length = len(r)-1 1108 fragments = [] 1109 if d.is_linear(): 1110 # 1111 # START of the sequence to FIRST site. 1112 # 1113 fragments.append(d[1:r[0]]) 1114 if length: 1115 # 1116 # if more than one site add them. 1117 # 1118 fragments += [d[r[x]:r[x+1]] for x in xrange(length)] 1119 # 1120 # LAST site to END of the sequence. 1121 # 1122 fragments.append(d[r[-1]:]) 1123 else: 1124 # 1125 # circular : bridge LAST site to FIRST site. 1126 # 1127 fragments.append(d[r[-1]:]+d[1:r[0]]) 1128 if not length: 1129 # 1130 # one site we finish here. 1131 # 1132 return tuple(fragments) 1133 # 1134 # add the others. 1135 # 1136 fragments += [d[r[x]:r[x+1]] for x in xrange(length)] 1137 return tuple(fragments)
1138 catalyze = catalyse = classmethod(catalyse) 1139
1140 - def is_blunt(self):
1141 """RE.is_blunt() -> bool. 1142 1143 True if the enzyme produces blunt end. 1144 1145 see also: 1146 RE.is_3overhang() 1147 RE.is_5overhang() 1148 RE.is_unknown()""" 1149 return False
1150 is_blunt = classmethod(is_blunt) 1151
1152 - def is_5overhang(self):
1153 """RE.is_5overhang() -> bool. 1154 1155 True if the enzyme produces 5' overhang sticky end. 1156 1157 see also: 1158 RE.is_3overhang() 1159 RE.is_blunt() 1160 RE.is_unknown()""" 1161 return True
1162 is_5overhang = classmethod(is_5overhang) 1163
1164 - def is_3overhang(self):
1165 """RE.is_3overhang() -> bool. 1166 1167 True if the enzyme produces 3' overhang sticky end. 1168 1169 see also: 1170 RE.is_5overhang() 1171 RE.is_blunt() 1172 RE.is_unknown()""" 1173 return False
1174 is_3overhang = classmethod(is_3overhang) 1175
1176 - def overhang(self):
1177 """RE.overhang() -> str. type of overhang of the enzyme., 1178 1179 can be "3' overhang", "5' overhang", "blunt", "unknown" """ 1180 return "5' overhang"
1181 overhang = classmethod(overhang) 1182
1183 - def compatible_end(self, batch=None):
1184 """RE.compatible_end() -> list. 1185 1186 list of all the enzymes that share compatible end with RE.""" 1187 if not batch : batch = AllEnzymes 1188 r = [x for x in iter(AllEnzymes) if x.is_5overhang() and x % self] 1189 r.sort() 1190 return r
1191 compatible_end = classmethod(compatible_end) 1192
1193 - def _mod1(self, other):
1194 """RE._mod1(other) -> bool. 1195 1196 for internal use only 1197 1198 test for the compatibility of restriction ending of RE and other.""" 1199 if issubclass(other, Ov5) : return self._mod2(other) 1200 else : return False
1201 _mod1 = classmethod(_mod1)
1202 1203
1204 -class Ov3(AbstractCut):
1205 """Implement the methods specific to the enzymes for which the overhang 1206 is recessed in 5'. 1207 1208 The enzyme cuts the - strand after the + strand of the DNA. 1209 1210 Internal use only. Not meant to be instantiated.""" 1211
1212 - def catalyse(self, dna, linear=True):
1213 """RE.catalyse(dna, linear=True) -> tuple of DNA. 1214 RE.catalyze(dna, linear=True) -> tuple of DNA. 1215 1216 return a tuple of dna as will be produced by using RE to restrict the 1217 dna. 1218 1219 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 1220 1221 if linear is False, the sequence is considered to be circular and the 1222 output will be modified accordingly.""" 1223 r = self.search(dna, linear) 1224 d = self.dna 1225 if not r : return d[1:], 1226 fragments = [] 1227 length = len(r)-1 1228 if d.is_linear(): 1229 # 1230 # START of the sequence to FIRST site. 1231 # 1232 fragments.append(d[1:r[0]]) 1233 if length: 1234 # 1235 # if more than one site add them. 1236 # 1237 fragments += [d[r[x]:r[x+1]] for x in xrange(length)] 1238 # 1239 # LAST site to END of the sequence. 1240 # 1241 fragments.append(d[r[-1]:]) 1242 else: 1243 # 1244 # circular : bridge LAST site to FIRST site. 1245 # 1246 fragments.append(d[r[-1]:]+d[1:r[0]]) 1247 if not length: 1248 # 1249 # one site we finish here. 1250 # 1251 return tuple(fragments) 1252 # 1253 # add the others. 1254 # 1255 fragments += [d[r[x]:r[x+1]] for x in xrange(length)] 1256 return tuple(fragments)
1257 catalyze = catalyse = classmethod(catalyse) 1258
1259 - def is_blunt(self):
1260 """RE.is_blunt() -> bool. 1261 1262 True if the enzyme produces blunt end. 1263 1264 see also: 1265 RE.is_3overhang() 1266 RE.is_5overhang() 1267 RE.is_unknown()""" 1268 return False
1269 is_blunt = classmethod(is_blunt) 1270
1271 - def is_5overhang(self):
1272 """RE.is_5overhang() -> bool. 1273 1274 True if the enzyme produces 5' overhang sticky end. 1275 1276 see also: 1277 RE.is_3overhang() 1278 RE.is_blunt() 1279 RE.is_unknown()""" 1280 return False
1281 is_5overhang = classmethod(is_5overhang) 1282
1283 - def is_3overhang(self):
1284 """RE.is_3overhang() -> bool. 1285 1286 True if the enzyme produces 3' overhang sticky end. 1287 1288 see also: 1289 RE.is_5overhang() 1290 RE.is_blunt() 1291 RE.is_unknown()""" 1292 return True
1293 is_3overhang = classmethod(is_3overhang) 1294
1295 - def overhang(self):
1296 """RE.overhang() -> str. type of overhang of the enzyme., 1297 1298 can be "3' overhang", "5' overhang", "blunt", "unknown" """ 1299 return "3' overhang"
1300 overhang = classmethod(overhang) 1301
1302 - def compatible_end(self, batch=None):
1303 """RE.compatible_end() -> list. 1304 1305 list of all the enzymes that share compatible end with RE.""" 1306 if not batch : batch = AllEnzymes 1307 r = [x for x in iter(AllEnzymes) if x.is_3overhang() and x % self] 1308 r.sort() 1309 return r
1310 compatible_end = classmethod(compatible_end) 1311
1312 - def _mod1(self, other):
1313 """RE._mod1(other) -> bool. 1314 1315 for internal use only 1316 1317 test for the compatibility of restriction ending of RE and other.""" 1318 # 1319 # called by RE._mod1(other) when the one of the enzyme is ambiguous 1320 # 1321 if issubclass(other, Ov3) : return self._mod2(other) 1322 else : return False
1323 _mod1 = classmethod(_mod1)
1324 1325
1326 -class Defined(AbstractCut):
1327 """Implement the methods specific to the enzymes for which the overhang 1328 and the cut are not variable. 1329 1330 Typical example : EcoRI -> G^AATT_C 1331 The overhang will always be AATT 1332 Notes: 1333 Blunt enzymes are always defined. even if there site is GGATCCNNN^_N 1334 There overhang is always the same : blunt! 1335 1336 Internal use only. Not meant to be instantiated.""" 1337
1338 - def _drop(self):
1339 """RE._drop() -> list. 1340 1341 for internal use only. 1342 1343 drop the site that are situated outside the sequence in linear sequence. 1344 modify the index for site in circular sequences.""" 1345 # 1346 # remove or modify the results that are outside the sequence. 1347 # This is necessary since after finding the site we add the distance 1348 # from the site to the cut with the _modify and _rev_modify methods. 1349 # For linear we will remove these sites altogether. 1350 # For circular sequence, we modify the result rather than _drop it 1351 # since the site is in the sequence. 1352 # 1353 length = len(self.dna) 1354 drop = itertools.dropwhile 1355 take = itertools.takewhile 1356 if self.dna.is_linear(): 1357 self.results = [x for x in drop(lambda x:x<1, self.results)] 1358 self.results = [x for x in take(lambda x:x<length, self.results)] 1359 else: 1360 for index, location in enumerate(self.results): 1361 if location < 1: 1362 self.results[index] += length 1363 else: 1364 break 1365 for index, location in enumerate(self.results[::-1]): 1366 if location > length: 1367 self.results[-(index+1)] -= length 1368 else: 1369 break 1370 return
1371 _drop = classmethod(_drop) 1372
1373 - def is_defined(self):
1374 """RE.is_defined() -> bool. 1375 1376 True if the sequence recognised and cut is constant, 1377 i.e. the recognition site is not degenerated AND the enzyme cut inside 1378 the site. 1379 1380 see also: 1381 RE.is_ambiguous() 1382 RE.is_unknown()""" 1383 return True
1384 is_defined = classmethod(is_defined) 1385
1386 - def is_ambiguous(self):
1387 """RE.is_ambiguous() -> bool. 1388 1389 True if the sequence recognised and cut is ambiguous, 1390 i.e. the recognition site is degenerated AND/OR the enzyme cut outside 1391 the site. 1392 1393 see also: 1394 RE.is_defined() 1395 RE.is_unknown()""" 1396 return False
1397 is_ambiguous = classmethod(is_ambiguous) 1398
1399 - def is_unknown(self):
1400 """RE.is_unknown() -> bool. 1401 1402 True if the sequence is unknown, 1403 i.e. the recognition site has not been characterised yet. 1404 1405 see also: 1406 RE.is_defined() 1407 RE.is_ambiguous()""" 1408 return False
1409 is_unknown = classmethod(is_unknown) 1410
1411 - def elucidate(self):
1412 """RE.elucidate() -> str 1413 1414 return a representation of the site with the cut on the (+) strand 1415 represented as '^' and the cut on the (-) strand as '_'. 1416 ie: 1417 >>> EcoRI.elucidate() # 5' overhang 1418 'G^AATT_C' 1419 >>> KpnI.elucidate() # 3' overhang 1420 'G_GTAC^C' 1421 >>> EcoRV.elucidate() # blunt 1422 'GAT^_ATC' 1423 >>> SnaI.elucidate() # NotDefined, cut profile unknown. 1424 '? GTATAC ?' 1425 >>> 1426 """ 1427 f5 = self.fst5 1428 f3 = self.fst3 1429 site = self.site 1430 if self.cut_twice() : re = 'cut twice, not yet implemented sorry.' 1431 elif self.is_5overhang(): 1432 if f5 == f3 == 0 : re = 'N^'+ self.site + '_N' 1433 elif f3 == 0 : re = site[:f5] + '^' + site[f5:] + '_N' 1434 else : re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:] 1435 elif self.is_blunt(): 1436 re = site[:f5] + '^_' + site[f5:] 1437 else: 1438 if f5 == f3 == 0 : re = 'N_'+ site + '^N' 1439 else : re = site[:f3] + '_' + site[f3:f5] +'^'+ site[f5:] 1440 return re
1441 elucidate = classmethod(elucidate) 1442
1443 - def _mod2(self, other):
1444 """RE._mod2(other) -> bool. 1445 1446 for internal use only 1447 1448 test for the compatibility of restriction ending of RE and other.""" 1449 # 1450 # called by RE._mod1(other) when the one of the enzyme is ambiguous 1451 # 1452 if other.ovhgseq == self.ovhgseq: 1453 return True 1454 elif issubclass(other, Ambiguous): 1455 return other._mod2(self) 1456 else: 1457 return False
1458 _mod2 = classmethod(_mod2)
1459 1460
1461 -class Ambiguous(AbstractCut):
1462 """Implement the methods specific to the enzymes for which the overhang 1463 is variable. 1464 1465 Typical example : BstXI -> CCAN_NNNN^NTGG 1466 The overhang can be any sequence of 4 bases. 1467 Notes: 1468 Blunt enzymes are always defined. even if there site is GGATCCNNN^_N 1469 There overhang is always the same : blunt! 1470 1471 Internal use only. Not meant to be instantiated.""" 1472
1473 - def _drop(self):
1474 """RE._drop() -> list. 1475 1476 for internal use only. 1477 1478 drop the site that are situated outside the sequence in linear sequence. 1479 modify the index for site in circular sequences.""" 1480 length = len(self.dna) 1481 drop = itertools.dropwhile 1482 take = itertools.takewhile 1483 if self.dna.is_linear(): 1484 self.results = [x for x in drop(lambda x : x < 1, self.results)] 1485 self.results = [x for x in take(lambda x : x <length, self.results)] 1486 else: 1487 for index, location in enumerate(self.results): 1488 if location < 1: 1489 self.results[index] += length 1490 else: 1491 break 1492 for index, location in enumerate(self.results[::-1]): 1493 if location > length: 1494 self.results[-(index+1)] -= length 1495 else: 1496 break 1497 return
1498 _drop = classmethod(_drop) 1499
1500 - def is_defined(self):
1501 """RE.is_defined() -> bool. 1502 1503 True if the sequence recognised and cut is constant, 1504 i.e. the recognition site is not degenerated AND the enzyme cut inside 1505 the site. 1506 1507 see also: 1508 RE.is_ambiguous() 1509 RE.is_unknown()""" 1510 return False
1511 is_defined = classmethod(is_defined) 1512
1513 - def is_ambiguous(self):
1514 """RE.is_ambiguous() -> bool. 1515 1516 True if the sequence recognised and cut is ambiguous, 1517 i.e. the recognition site is degenerated AND/OR the enzyme cut outside 1518 the site. 1519 1520 1521 see also: 1522 RE.is_defined() 1523 RE.is_unknown()""" 1524 return True
1525 is_ambiguous = classmethod(is_ambiguous) 1526
1527 - def is_unknown(self):
1528 """RE.is_unknown() -> bool. 1529 1530 True if the sequence is unknown, 1531 i.e. the recognition site has not been characterised yet. 1532 1533 see also: 1534 RE.is_defined() 1535 RE.is_ambiguous()""" 1536 return False
1537 is_unknown = classmethod(is_unknown) 1538
1539 - def _mod2(self, other):
1540 """RE._mod2(other) -> bool. 1541 1542 for internal use only 1543 1544 test for the compatibility of restriction ending of RE and other.""" 1545 # 1546 # called by RE._mod1(other) when the one of the enzyme is ambiguous 1547 # 1548 if len(self.ovhgseq) != len(other.ovhgseq): 1549 return False 1550 else: 1551 se = self.ovhgseq 1552 for base in se: 1553 if base in 'ATCG': 1554 pass 1555 if base in 'N': 1556 se = '.'.join(se.split('N')) 1557 if base in 'RYWMSKHDBV': 1558 expand = '['+ matching[base] + ']' 1559 se = expand.join(se.split(base)) 1560 if re.match(se, other.ovhgseq): 1561 return True 1562 else: 1563 return False
1564 _mod2 = classmethod(_mod2) 1565
1566 - def elucidate(self):
1567 """RE.elucidate() -> str 1568 1569 return a representation of the site with the cut on the (+) strand 1570 represented as '^' and the cut on the (-) strand as '_'. 1571 ie: 1572 >>> EcoRI.elucidate() # 5' overhang 1573 'G^AATT_C' 1574 >>> KpnI.elucidate() # 3' overhang 1575 'G_GTAC^C' 1576 >>> EcoRV.elucidate() # blunt 1577 'GAT^_ATC' 1578 >>> SnaI.elucidate() # NotDefined, cut profile unknown. 1579 '? GTATAC ?' 1580 >>> 1581 """ 1582 f5 = self.fst5 1583 f3 = self.fst3 1584 length = len(self) 1585 site = self.site 1586 if self.cut_twice() : re = 'cut twice, not yet implemented sorry.' 1587 elif self.is_5overhang(): 1588 if f3 == f5 == 0: 1589 re = 'N^' + site +'_N' 1590 elif 0 <= f5 <= length and 0 <= f3+length <= length: 1591 re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:] 1592 elif 0 <= f5 <= length: 1593 re = site[:f5] + '^' + site[f5:] + f3*'N' + '_N' 1594 elif 0 <= f3+length <= length: 1595 re = 'N^' + abs(f5) * 'N' + site[:f3] + '_' + site[f3:] 1596 elif f3+length < 0: 1597 re = 'N^'*abs(f5)*'N' + '_' + abs(length+f3)*'N' + site 1598 elif f5 > length: 1599 re = site + (f5-length)*'N'+'^'+(length+f3-f5)*'N'+'_N' 1600 else: 1601 re = 'N^' + abs(f5) * 'N' + site + f3*'N' + '_N' 1602 elif self.is_blunt(): 1603 if f5 < 0: 1604 re = 'N^_' + abs(f5)*'N' + site 1605 elif f5 > length: 1606 re = site + (f5-length)*'N' + '^_N' 1607 else: 1608 raise ValueError('%s.easyrepr() : error f5=%i' \ 1609 % (self.name,f5)) 1610 else: 1611 if f3 == 0: 1612 if f5 == 0 : re = 'N_' + site + '^N' 1613 else : re = site + '_' + (f5-length)*'N' + '^N' 1614 elif 0 < f3+length <= length and 0 <= f5 <= length: 1615 re = site[:f3] + '_' + site[f3:f5] + '^' + site[f5:] 1616 elif 0 < f3+length <= length: 1617 re = site[:f3] + '_' + site[f3:] + (f5-length)*'N' + '^N' 1618 elif 0 <= f5 <= length: 1619 re = 'N_' +'N'*(f3+length) + site[:f5] + '^' + site[f5:] 1620 elif f3 > 0: 1621 re = site + f3*'N' + '_' + (f5-f3-length)*'N' + '^N' 1622 elif f5 < 0: 1623 re = 'N_' + abs(f3-f5+length)*'N' + '^' + abs(f5)*'N' + site 1624 else: 1625 re = 'N_' + abs(f3+length)*'N' + site + (f5-length)*'N' + '^N' 1626 return re
1627 elucidate = classmethod(elucidate)
1628 1629
1630 -class NotDefined(AbstractCut):
1631 """Implement the methods specific to the enzymes for which the overhang 1632 is not characterised. 1633 1634 Correspond to NoCut and Unknown. 1635 1636 Internal use only. Not meant to be instantiated.""" 1637
1638 - def _drop(self):
1639 """RE._drop() -> list. 1640 1641 for internal use only. 1642 1643 drop the site that are situated outside the sequence in linear sequence. 1644 modify the index for site in circular sequences.""" 1645 if self.dna.is_linear(): 1646 return 1647 else: 1648 length = len(self.dna) 1649 for index, location in enumerate(self.results): 1650 if location < 1: 1651 self.results[index] += length 1652 else: 1653 break 1654 for index, location in enumerate(self.results[:-1]): 1655 if location > length: 1656 self.results[-(index+1)] -= length 1657 else: 1658 break 1659 return
1660 _drop = classmethod(_drop) 1661
1662 - def is_defined(self):
1663 """RE.is_defined() -> bool. 1664 1665 True if the sequence recognised and cut is constant, 1666 i.e. the recognition site is not degenerated AND the enzyme cut inside 1667 the site. 1668 1669 see also: 1670 RE.is_ambiguous() 1671 RE.is_unknown()""" 1672 return False
1673 is_defined = classmethod(is_defined) 1674
1675 - def is_ambiguous(self):
1676 """RE.is_ambiguous() -> bool. 1677 1678 True if the sequence recognised and cut is ambiguous, 1679 i.e. the recognition site is degenerated AND/OR the enzyme cut outside 1680 the site. 1681 1682 1683 see also: 1684 RE.is_defined() 1685 RE.is_unknown()""" 1686 return False
1687 is_ambiguous = classmethod(is_ambiguous) 1688
1689 - def is_unknown(self):
1690 """RE.is_unknown() -> bool. 1691 1692 True if the sequence is unknown, 1693 i.e. the recognition site has not been characterised yet. 1694 1695 see also: 1696 RE.is_defined() 1697 RE.is_ambiguous()""" 1698 return True
1699 is_unknown = classmethod(is_unknown) 1700
1701 - def _mod2(self, other):
1702 """RE._mod2(other) -> bool. 1703 1704 for internal use only 1705 1706 test for the compatibility of restriction ending of RE and other.""" 1707 # 1708 # Normally we should not arrive here. But well better safe than sorry. 1709 # the overhang is not defined we are compatible with nobody. 1710 # could raise an Error may be rather than return quietly. 1711 # 1712 #return False 1713 raise ValueError("%s.mod2(%s), %s : NotDefined. pas glop pas glop!" \ 1714 % (str(self), str(other), str(self)))
1715 _mod2 = classmethod(_mod2) 1716
1717 - def elucidate(self):
1718 """RE.elucidate() -> str 1719 1720 return a representation of the site with the cut on the (+) strand 1721 represented as '^' and the cut on the (-) strand as '_'. 1722 ie: 1723 >>> EcoRI.elucidate() # 5' overhang 1724 'G^AATT_C' 1725 >>> KpnI.elucidate() # 3' overhang 1726 'G_GTAC^C' 1727 >>> EcoRV.elucidate() # blunt 1728 'GAT^_ATC' 1729 >>> SnaI.elucidate() # NotDefined, cut profile unknown. 1730 '? GTATAC ?' 1731 >>> 1732 """ 1733 return '? %s ?' % self.site
1734 elucidate = classmethod(elucidate)
1735 1736
1737 -class Commercially_available(AbstractCut):
1738 # 1739 # Recent addition to Rebase make this naming convention uncertain. 1740 # May be better to says enzymes which have a supplier. 1741 # 1742 """Implement the methods specific to the enzymes which are commercially 1743 available. 1744 1745 Internal use only. Not meant to be instantiated.""" 1746
1747 - def suppliers(self):
1748 """RE.suppliers() -> print the suppliers of RE.""" 1749 supply = suppliers_dict.items() 1750 for k,v in supply: 1751 if k in self.suppl: 1752 print v[0]+',' 1753 return
1754 suppliers = classmethod(suppliers) 1755
1756 - def supplier_list(self):
1757 """RE.supplier_list() -> list. 1758 1759 list of the supplier names for RE.""" 1760 return [v[0] for k,v in suppliers_dict.items() if k in self.suppl]
1761 supplier_list = classmethod(supplier_list) 1762
1763 - def buffers(self, supplier):
1764 """RE.buffers(supplier) -> string. 1765 1766 not implemented yet.""" 1767 return
1768 buffers = classmethod(buffers) 1769
1770 - def is_comm(self):
1771 """RE.iscomm() -> bool. 1772 1773 True if RE has suppliers.""" 1774 return True
1775 is_comm = classmethod(is_comm)
1776 1777
1778 -class Not_available(AbstractCut):
1779 """Implement the methods specific to the enzymes which are not commercially 1780 available. 1781 1782 Internal use only. Not meant to be instantiated.""" 1783
1784 - def suppliers():
1785 """RE.suppliers() -> print the suppliers of RE.""" 1786 return None
1787 suppliers = staticmethod(suppliers) 1788
1789 - def supplier_list(self):
1790 """RE.supplier_list() -> list. 1791 1792 list of the supplier names for RE.""" 1793 return []
1794 supplier_list = classmethod(supplier_list) 1795
1796 - def buffers(self, supplier):
1797 """RE.buffers(supplier) -> string. 1798 1799 not implemented yet.""" 1800 raise TypeError("Enzyme not commercially available.")
1801 buffers = classmethod(buffers) 1802
1803 - def is_comm(self):
1804 """RE.iscomm() -> bool. 1805 1806 True if RE has suppliers.""" 1807 return False
1808 is_comm = classmethod(is_comm)
1809 1810 1811 ############################################################################### 1812 # # 1813 # Restriction Batch # 1814 # # 1815 ############################################################################### 1816 1817
1818 -class RestrictionBatch(set):
1819
1820 - def __init__(self, first=[], suppliers=[]):
1821 """RestrictionBatch([sequence]) -> new RestrictionBatch.""" 1822 first = [self.format(x) for x in first] 1823 first += [eval(x) for n in suppliers for x in suppliers_dict[n][1]] 1824 set.__init__(self, first) 1825 self.mapping = dict.fromkeys(self) 1826 self.already_mapped = None
1827
1828 - def __str__(self):
1829 if len(self) < 5: 1830 return '+'.join(self.elements()) 1831 else: 1832 return '...'.join(('+'.join(self.elements()[:2]),\ 1833 '+'.join(self.elements()[-2:])))
1834
1835 - def __repr__(self):
1836 return 'RestrictionBatch(%s)' % self.elements()
1837
1838 - def __contains__(self, other):
1839 try: 1840 other = self.format(other) 1841 except ValueError : # other is not a restriction enzyme 1842 return False 1843 return set.__contains__(self, other)
1844
1845 - def __div__(self, other):
1846 return self.search(other)
1847
1848 - def __rdiv__(self, other):
1849 return self.search(other)
1850
1851 - def get(self, enzyme, add=False):
1852 """B.get(enzyme[, add]) -> enzyme class. 1853 1854 if add is True and enzyme is not in B add enzyme to B. 1855 if add is False (which is the default) only return enzyme. 1856 if enzyme is not a RestrictionType or can not be evaluated to 1857 a RestrictionType, raise a ValueError.""" 1858 e = self.format(enzyme) 1859 if e in self: 1860 return e 1861 elif add: 1862 self.add(e) 1863 return e 1864 else: 1865 raise ValueError('enzyme %s is not in RestrictionBatch' \ 1866 % e.__name__)
1867
1868 - def lambdasplit(self, func):
1869 """B.lambdasplit(func) -> RestrictionBatch . 1870 1871 the new batch will contains only the enzymes for which 1872 func return True.""" 1873 d = [x for x in itertools.ifilter(func, self)] 1874 new = RestrictionBatch() 1875 new._data = dict(zip(d, [True]*len(d))) 1876 return new
1877
1878 - def add_supplier(self, letter):
1879 """B.add_supplier(letter) -> add a new set of enzyme to B. 1880 1881 letter represents the suppliers as defined in the dictionary 1882 RestrictionDictionary.suppliers 1883 return None. 1884 raise a KeyError if letter is not a supplier code.""" 1885 supplier = suppliers_dict[letter] 1886 self.suppliers.append(letter) 1887 for x in supplier[1]: 1888 self.add_nocheck(eval(x)) 1889 return
1890
1891 - def current_suppliers(self):
1892 """B.current_suppliers() -> add a new set of enzyme to B. 1893 1894 return a sorted list of the suppliers which have been used to 1895 create the batch.""" 1896 suppl_list = [suppliers_dict[x][0] for x in self.suppliers] 1897 suppl_list.sort() 1898 return suppl_list
1899
1900 - def __iadd__(self, other):
1901 """ b += other -> add other to b, check the type of other.""" 1902 self.add(other) 1903 return self
1904
1905 - def __add__(self, other):
1906 """ b + other -> new RestrictionBatch.""" 1907 new = self.__class__(self) 1908 new.add(other) 1909 return new
1910
1911 - def remove(self, other):
1912 """B.remove(other) -> remove other from B if other is a RestrictionType. 1913 1914 Safe set.remove method. Verify that other is a RestrictionType or can be 1915 evaluated to a RestrictionType. 1916 raise a ValueError if other can not be evaluated to a RestrictionType. 1917 raise a KeyError if other is not in B.""" 1918 return set.remove(self, self.format(other))
1919
1920 - def add(self, other):
1921 """B.add(other) -> add other to B if other is a RestrictionType. 1922 1923 Safe set.add method. Verify that other is a RestrictionType or can be 1924 evaluated to a RestrictionType. 1925 raise a ValueError if other can not be evaluated to a RestrictionType. 1926 """ 1927 return set.add(self, self.format(other))
1928
1929 - def add_nocheck(self, other):
1930 """B.add_nocheck(other) -> add other to B. don't check type of other. 1931 """ 1932 return set.add(self, other)
1933
1934 - def format(self, y):
1935 """B.format(y) -> RestrictionType or raise ValueError. 1936 1937 if y is a RestrictionType return y 1938 if y can be evaluated to a RestrictionType return eval(y) 1939 raise a Value Error in all other case.""" 1940 try: 1941 if isinstance(y, RestrictionType): 1942 return y 1943 elif isinstance(eval(str(y)), RestrictionType): 1944 return eval(y) 1945 1946 else: 1947 pass 1948 except (NameError, SyntaxError): 1949 pass 1950 raise ValueError('%s is not a RestrictionType' % y.__class__)
1951 1952
1953 - def is_restriction(self, y):
1954 """B.is_restriction(y) -> bool. 1955 1956 True is y or eval(y) is a RestrictionType.""" 1957 return isinstance(y, RestrictionType) or \ 1958 isinstance(eval(str(y)), RestrictionType)
1959
1960 - def split(self, *classes, **bool):
1961 """B.split(class, [class.__name__ = True]) -> new RestrictionBatch. 1962 1963 it works but it is slow, so it has really an interest when splitting 1964 over multiple conditions.""" 1965 def splittest(element): 1966 for klass in classes: 1967 b = bool.get(klass.__name__, True) 1968 if issubclass(element, klass): 1969 if b: 1970 continue 1971 else: 1972 return False 1973 elif b: 1974 return False 1975 else: 1976 continue 1977 return True
1978 d = [k for k in itertools.ifilter(splittest, self)] 1979 new = RestrictionBatch() 1980 new._data = dict(zip(d, [True]*len(d))) 1981 return new
1982
1983 - def elements(self):
1984 """B.elements() -> tuple. 1985 1986 give all the names of the enzymes in B sorted alphabetically.""" 1987 l = [str(e) for e in self] 1988 l.sort() 1989 return l
1990
1991 - def as_string(self):
1992 """B.as_string() -> list. 1993 1994 return a list of the name of the elements of B.""" 1995 return [str(e) for e in self]
1996
1997 - def suppl_codes(self):
1998 """B.suppl_codes() -> dict 1999 2000 letter code for the suppliers""" 2001 supply = dict([(k,v[0]) for k,v in suppliers_dict.iteritems()]) 2002 return supply
2003 suppl_codes = classmethod(suppl_codes) 2004
2005 - def show_codes(self):
2006 "B.show_codes() -> letter codes for the suppliers""" 2007 supply = [' = '.join(i) for i in self.suppl_codes().iteritems()] 2008 print '\n'.join(supply) 2009 return
2010 show_codes = classmethod(show_codes) 2011
2012 - def search(self, dna, linear=True):
2013 """B.search(dna) -> dict.""" 2014 # 2015 # here we replace the search method of the individual enzymes 2016 # with one unique testing method. 2017 # 2018 if not hasattr(self, "already_mapped") : 2019 #TODO - Why does this happen! 2020 #Try the "doctest" at the start of PrintFormat.py 2021 self.already_mapped = None 2022 if isinstance(dna, DNA): 2023 # For the searching, we just care about the sequence as a string, 2024 # if that is the same we can use the cached search results. 2025 # At the time of writing, Seq == method isn't implemented, 2026 # and therefore does object identity which is stricter. 2027 if (str(dna), linear) == self.already_mapped: 2028 return self.mapping 2029 else: 2030 self.already_mapped = str(dna), linear 2031 fseq = FormattedSeq(dna, linear) 2032 self.mapping = dict([(x, x.search(fseq)) for x in self]) 2033 return self.mapping 2034 elif isinstance(dna, FormattedSeq): 2035 if (str(dna), dna.linear) == self.already_mapped: 2036 return self.mapping 2037 else: 2038 self.already_mapped = str(dna), dna.linear 2039 self.mapping = dict([(x, x.search(dna)) for x in self]) 2040 return self.mapping 2041 raise TypeError("Expected Seq or MutableSeq instance, got %s instead"\ 2042 %type(dna))
2043 2044 ############################################################################### 2045 # # 2046 # Restriction Analysis # 2047 # # 2048 ############################################################################### 2049
2050 -class Analysis(RestrictionBatch, PrintFormat):
2051
2052 - def __init__(self, restrictionbatch=RestrictionBatch(),sequence=DNA(''), 2053 linear=True):
2054 """Analysis([restrictionbatch [, sequence] linear=True]) -> New Analysis class. 2055 2056 For most of the method of this class if a dictionary is given it will 2057 be used as the base to calculate the results. 2058 If no dictionary is given a new analysis using the Restriction Batch 2059 which has been given when the Analysis class has been instantiated.""" 2060 RestrictionBatch.__init__(self, restrictionbatch) 2061 self.rb = restrictionbatch 2062 self.sequence = sequence 2063 self.linear = linear 2064 if self.sequence: 2065 self.search(self.sequence, self.linear)
2066
2067 - def __repr__(self):
2068 return 'Analysis(%s,%s,%s)'%\ 2069 (repr(self.rb),repr(self.sequence),self.linear)
2070
2071 - def _sub_set(self, wanted):
2072 """A._sub_set(other_set) -> dict. 2073 2074 Internal use only. 2075 2076 screen the results through wanted set. 2077 Keep only the results for which the enzymes is in wanted set. 2078 """ 2079 return dict([(k,v) for k,v in self.mapping.iteritems() if k in wanted])
2080
2081 - def _boundaries(self, start, end):
2082 """A._boundaries(start, end) -> tuple. 2083 2084 Format the boundaries for use with the methods that limit the 2085 search to only part of the sequence given to analyse. 2086 """ 2087 if not isinstance(start, int): 2088 raise TypeError('expected int, got %s instead' % type(start)) 2089 if not isinstance(end, int): 2090 raise TypeError('expected int, got %s instead' % type(end)) 2091 if start < 1: 2092 start += len(self.sequence) 2093 if end < 1: 2094 end += len(self.sequence) 2095 if start < end: 2096 pass 2097 else: 2098 start, end == end, start 2099 if start < 1: 2100 start == 1 2101 if start < end: 2102 return start, end, self._test_normal 2103 else: 2104 return start, end, self._test_reverse
2105
2106 - def _test_normal(self, start, end, site):
2107 """A._test_normal(start, end, site) -> bool. 2108 2109 Internal use only 2110 Test if site is in between start and end. 2111 """ 2112 return start <= site < end
2113
2114 - def _test_reverse(self, start, end, site):
2115 """A._test_reverse(start, end, site) -> bool. 2116 2117 Internal use only 2118 Test if site is in between end and start (for circular sequences). 2119 """ 2120 return start <= site <= len(self.sequence) or 1 <= site < end
2121
2122 - def print_that(self, dct=None, title='', s1=''):
2123 """A.print_that([dct[, title[, s1]]]) -> print the results from dct. 2124 2125 If dct is not given the full dictionary is used. 2126 """ 2127 if not dct: 2128 dct = self.mapping 2129 print 2130 return PrintFormat.print_that(self, dct, title, s1)
2131
2132 - def change(self, **what):
2133 """A.change(**attribute_name) -> Change attribute of Analysis. 2134 2135 It is possible to change the width of the shell by setting 2136 self.ConsoleWidth to what you want. 2137 self.NameWidth refer to the maximal length of the enzyme name. 2138 2139 Changing one of these parameters here might not give the results 2140 you expect. In which case, you can settle back to a 80 columns shell 2141 or try to change self.Cmodulo and self.PrefWidth in PrintFormat until 2142 you get it right.""" 2143 for k,v in what.iteritems(): 2144 if k in ('NameWidth', 'ConsoleWidth'): 2145 setattr(self, k, v) 2146 self.Cmodulo = self.ConsoleWidth % self.NameWidth 2147 self.PrefWidth = self.ConsoleWidth - self.Cmodulo 2148 elif k is 'sequence': 2149 setattr(self, 'sequence', v) 2150 self.search(self.sequence, self.linear) 2151 elif k is 'rb': 2152 self = Analysis.__init__(self, v, self.sequence, self.linear) 2153 elif k is 'linear': 2154 setattr(self, 'linear', v) 2155 self.search(self.sequence, v) 2156 elif k in ('Indent', 'Maxsize'): 2157 setattr(self, k, v) 2158 elif k in ('Cmodulo', 'PrefWidth'): 2159 raise AttributeError( \ 2160 'To change %s, change NameWidth and/or ConsoleWidth' \ 2161 % name) 2162 else: 2163 raise AttributeError( \ 2164 'Analysis has no attribute %s' % name) 2165 return
2166
2167 - def full(self, linear=True):
2168 """A.full() -> dict. 2169 2170 Full Restriction Map of the sequence.""" 2171 return self.mapping
2172
2173 - def blunt(self, dct = None):
2174 """A.blunt([dct]) -> dict. 2175 2176 Only the enzymes which have a 3'overhang restriction site.""" 2177 if not dct: 2178 dct = self.mapping 2179 return dict([(k,v) for k,v in dct.iteritems() if k.is_blunt()])
2180
2181 - def overhang5(self, dct=None):
2182 """A.overhang5([dct]) -> dict. 2183 2184 Only the enzymes which have a 5' overhang restriction site.""" 2185 if not dct: 2186 dct = self.mapping 2187 return dict([(k,v) for k,v in dct.iteritems() if k.is_5overhang()])
2188 2189
2190 - def overhang3(self, dct=None):
2191 """A.Overhang3([dct]) -> dict. 2192 2193 Only the enzymes which have a 3'overhang restriction site.""" 2194 if not dct: 2195 dct = self.mapping 2196 return dict([(k,v) for k,v in dct.iteritems() if k.is_3overhang()])
2197 2198
2199 - def defined(self, dct=None):
2200 """A.defined([dct]) -> dict. 2201 2202 Only the enzymes that have a defined restriction site in Rebase.""" 2203 if not dct: 2204 dct = self.mapping 2205 return dict([(k,v) for k,v in dct.iteritems() if k.is_defined()])
2206
2207 - def with_sites(self, dct=None):
2208 """A.with_sites([dct]) -> dict. 2209 2210 Enzymes which have at least one site in the sequence.""" 2211 if not dct: 2212 dct = self.mapping 2213 return dict([(k,v) for k,v in dct.iteritems() if v])
2214
2215 - def without_site(self, dct=None):
2216 """A.without_site([dct]) -> dict. 2217 2218 Enzymes which have no site in the sequence.""" 2219 if not dct: 2220 dct = self.mapping 2221 return dict([(k,v) for k,v in dct.iteritems() if not v])
2222
2223 - def with_N_sites(self, N, dct=None):
2224 """A.With_N_Sites(N [, dct]) -> dict. 2225 2226 Enzymes which cut N times the sequence.""" 2227 if not dct: 2228 dct = self.mapping 2229 return dict([(k,v) for k,v in dct.iteritems()if len(v) == N])
2230
2231 - def with_number_list(self, list, dct= None):
2232 if not dct: 2233 dct = self.mapping 2234 return dict([(k,v) for k,v in dct.iteritems() if len(v) in list])
2235
2236 - def with_name(self, names, dct=None):
2237 """A.with_name(list_of_names [, dct]) -> 2238 2239 Limit the search to the enzymes named in list_of_names.""" 2240 for i, enzyme in enumerate(names): 2241 if not enzyme in AllEnzymes: 2242 print "no datas for the enzyme:", str(name) 2243 del names[i] 2244 if not dct: 2245 return RestrictionBatch(names).search(self.sequence) 2246 return dict([(n, dct[n]) for n in names if n in dct])
2247
2248 - def with_site_size(self, site_size, dct=None):
2249 """A.with_site_size(site_size [, dct]) -> 2250 2251 Limit the search to the enzymes whose site is of size <site_size>.""" 2252 sites = [name for name in self if name.size == site_size] 2253 if not dct: 2254 return RestrictionBatch(sites).search(self.sequence) 2255 return dict([(k,v) for k,v in dct.iteritems() if k in site_size])
2256
2257 - def only_between(self, start, end, dct=None):
2258 """A.only_between(start, end[, dct]) -> dict. 2259 2260 Enzymes that cut the sequence only in between start and end.""" 2261 start, end, test = self._boundaries(start, end) 2262 if not dct: 2263 dct = self.mapping 2264 d = dict(dct) 2265 for key, sites in dct.iteritems(): 2266 if not sites: 2267 del d[key] 2268 continue 2269 for site in sites: 2270 if test(start, end, site): 2271 continue 2272 else: 2273 del d[key] 2274 break 2275 return d
2276
2277 - def between(self, start, end, dct=None):
2278 """A.between(start, end [, dct]) -> dict. 2279 2280 Enzymes that cut the sequence at least in between start and end. 2281 They may cut outside as well.""" 2282 start, end, test = self._boundaries(start, end) 2283 d = {} 2284 if not dct: 2285 dct = self.mapping 2286 for key, sites in dct.iteritems(): 2287 for site in sites: 2288 if test(start, end, site): 2289 d[key] = sites 2290 break 2291 continue 2292 return d
2293
2294 - def show_only_between(self, start, end, dct=None):
2295 """A.show_only_between(start, end [, dct]) -> dict. 2296 2297 Enzymes that cut the sequence outside of the region 2298 in between start and end but do not cut inside.""" 2299 d = [] 2300 if start <= end: 2301 d = [(k, [vv for vv in v if start<=vv<=end]) 2302 for v in self.between(start, end, dct)] 2303 else: 2304 d = [(k, [vv for vv in v if start<=vv or vv <= end]) 2305 for v in self.between(start, end, dct)] 2306 return dict(d)
2307
2308 - def only_outside(self, start, end, dct = None):
2309 """A.only_outside(start, end [, dct]) -> dict. 2310 2311 Enzymes that cut the sequence outside of the region 2312 in between start and end but do not cut inside.""" 2313 start, end, test = self._boundaries(start, end) 2314 if not dct : dct = self.mapping 2315 d = dict(dct) 2316 for key, sites in dct.iteritems(): 2317 if not sites: 2318 del d[key] 2319 continue 2320 for site in sites: 2321 if test(start, end, site): 2322 del d[key] 2323 break 2324 else: 2325 continue 2326 return d
2327
2328 - def outside(self, start, end, dct=None):
2329 """A.outside((start, end [, dct]) -> dict. 2330 2331 Enzymes that cut outside the region in between start and end. 2332 No test is made to know if they cut or not inside this region.""" 2333 start, end, test = self._boundaries(start, end) 2334 if not dct: 2335 dct = self.mapping 2336 d = {} 2337 for key, sites in dct.iteritems(): 2338 for site in sites: 2339 if test(start, end, site): 2340 continue 2341 else: 2342 d[key] = sites 2343 break 2344 return d
2345 2346
2347 - def do_not_cut(self, start, end, dct = None):
2348 """A.do_not_cut(start, end [, dct]) -> dict. 2349 2350 Enzymes that do not cut the region in between start and end.""" 2351 if not dct: 2352 dct = self.mapping 2353 d = self.without_site() 2354 d.update(self.only_outside(start, end, dct)) 2355 return d
2356 2357 # 2358 # The restriction enzyme classes are created dynamically when the module is 2359 # imported. Here is the magic which allow the creation of the 2360 # restriction-enzyme classes. 2361 # 2362 # The reason for the two dictionaries in Restriction_Dictionary 2363 # one for the types (which will be called pseudo-type as they really 2364 # correspond to the values that instances of RestrictionType can take) 2365 # and one for the enzymes is efficiency as the bases are evaluated 2366 # once per pseudo-type. 2367 # 2368 # However Restriction is still a very inefficient module at import. But 2369 # remember that around 660 classes (which is more or less the size of Rebase) 2370 # have to be created dynamically. However, this processing take place only 2371 # once. 2372 # This inefficiency is however largely compensated by the use of metaclass 2373 # which provide a very efficient layout for the class themselves mostly 2374 # alleviating the need of if/else loops in the class methods. 2375 # 2376 # It is essential to run Restriction with doc string optimisation (-OO switch) 2377 # as the doc string of 660 classes take a lot of processing. 2378 # 2379 CommOnly = RestrictionBatch() # commercial enzymes 2380 NonComm = RestrictionBatch() # not available commercially 2381 for TYPE, (bases, enzymes) in typedict.iteritems(): 2382 # 2383 # The keys are the pseudo-types TYPE (stored as type1, type2...) 2384 # The names are not important and are only present to differentiate 2385 # the keys in the dict. All the pseudo-types are in fact RestrictionType. 2386 # These names will not be used after and the pseudo-types are not 2387 # kept in the locals() dictionary. It is therefore impossible to 2388 # import them. 2389 # Now, if you have look at the dictionary, you will see that not all the 2390 # types are present as those without corresponding enzymes have been 2391 # removed by Dictionary_Builder(). 2392 # 2393 # The values are tuples which contain 2394 # as first element a tuple of bases (as string) and 2395 # as second element the names of the enzymes. 2396 # 2397 # First eval the bases. 2398 # 2399 bases = tuple([eval(x) for x in bases]) 2400 # 2401 # now create the particular value of RestrictionType for the classes 2402 # in enzymes. 2403 # 2404 T = type.__new__(RestrictionType, 'RestrictionType', bases, {}) 2405 for k in enzymes: 2406 # 2407 # Now, we go through all the enzymes and assign them their type. 2408 # enzymedict[k] contains the values of the attributes for this 2409 # particular class (self.site, self.ovhg,....). 2410 # 2411 newenz = T(k, bases, enzymedict[k]) 2412 # 2413 # we add the enzymes to the corresponding batch. 2414 # 2415 # No need to verify the enzyme is a RestrictionType -> add_nocheck 2416 # 2417 if newenz.is_comm() : CommOnly.add_nocheck(newenz) 2418 else : NonComm.add_nocheck(newenz) 2419 # 2420 # AllEnzymes is a RestrictionBatch with all the enzymes from Rebase. 2421 # 2422 AllEnzymes = CommOnly | NonComm 2423 # 2424 # Now, place the enzymes in locals so they can be imported. 2425 # 2426 names = [str(x) for x in AllEnzymes] 2427 try: 2428 del x 2429 except NameError: 2430 #Scoping changed in Python 3, the variable isn't leaked 2431 pass 2432 locals().update(dict(zip(names, AllEnzymes))) 2433 __all__=['FormattedSeq', 'Analysis', 'RestrictionBatch','AllEnzymes','CommOnly','NonComm']+names 2434 del k, enzymes, TYPE, bases, names 2435