1
2
3
4
5
6
7
8
9
10
11 """ Notes about the diverses class of the restriction enzyme implementation.
12
13 RestrictionType is the type of all restriction enzymes.
14 ----------------------------------------------------------------------------
15 AbstractCut implements some methods that are common to all enzymes.
16 ----------------------------------------------------------------------------
17 NoCut, OneCut,TwoCuts represent the number of double strand cuts
18 produced by the enzyme.
19 they correspond to the 4th field of the rebase
20 record emboss_e.NNN.
21 0->NoCut : the enzyme is not characterised.
22 2->OneCut : the enzyme produce one double strand cut.
23 4->TwoCuts : two double strand cuts.
24 ----------------------------------------------------------------------------
25 Meth_Dep, Meth_Undep represent the methylation susceptibility to
26 the enzyme.
27 Not implemented yet.
28 ----------------------------------------------------------------------------
29 Palindromic, if the site is palindromic or not.
30 NotPalindromic allow some optimisations of the code.
31 No need to check the reverse strand
32 with palindromic sites.
33 ----------------------------------------------------------------------------
34 Unknown, Blunt, represent the overhang.
35 Ov5, Ov3 Unknown is here for symetry reasons and
36 correspond to enzymes that are not characterised
37 in rebase.
38 ----------------------------------------------------------------------------
39 Defined, Ambiguous, represent the sequence of the overhang.
40 NotDefined
41 NotDefined is for enzymes not characterised in
42 rebase.
43
44 Defined correspond to enzymes that display a
45 constant overhang whatever the sequence.
46 ex : EcoRI. G^AATTC -> overhang :AATT
47 CTTAA^G
48
49 Ambiguous : the overhang varies with the
50 sequence restricted.
51 Typically enzymes which cut outside their
52 restriction site or (but not always)
53 inside an ambiguous site.
54 ex:
55 AcuI CTGAAG(22/20) -> overhang : NN
56 AasI GACNNN^NNNGTC -> overhang : NN
57 CTGN^NNNNNCAG
58
59 note : these 3 classes refers to the overhang not the site.
60 So the enzyme ApoI (RAATTY) is defined even if its restriction
61 site is ambiguous.
62
63 ApoI R^AATTY -> overhang : AATT -> Defined
64 YTTAA^R
65 Accordingly, blunt enzymes are always Defined even
66 when they cut outside their restriction site.
67 ----------------------------------------------------------------------------
68 Not_available, as found in rebase file emboss_r.NNN files.
69 Commercially_available
70 allow the selection of the enzymes according to
71 their suppliers to reduce the quantity
72 of results.
73 Also will allow the implementation of buffer
74 compatibility tables. Not implemented yet.
75
76 the list of suppliers is extracted from
77 emboss_s.NNN
78 ----------------------------------------------------------------------------
79 """
80
81 import re
82 import itertools
83
84 from Bio.Seq import Seq, MutableSeq
85 from Bio.Alphabet import IUPAC
86
87 from Bio.Restriction.Restriction_Dictionary import rest_dict as enzymedict
88 from Bio.Restriction.Restriction_Dictionary import typedict
89 from Bio.Restriction.Restriction_Dictionary import suppliers as suppliers_dict
90 from Bio.Restriction.RanaConfig import *
91 from Bio.Restriction.PrintFormat import PrintFormat
92
93
94
96 """Check characters in a string (PRIVATE).
97
98 Remove digits and white space present in string. Allows any valid ambiguous
99 IUPAC DNA single letters codes (ABCDGHKMNRSTVWY, lower case are converted).
100
101 Other characters (e.g. symbols) trigger a TypeError.
102
103 Returns the string WITH A LEADING SPACE (!). This is for backwards
104 compatibility, and may in part be explained by the fact that
105 Bio.Restriction doesn't use zero based counting.
106 """
107
108 seq_string = "".join(seq_string.split()).upper()
109
110 for c in "0123456789" : seq_string = seq_string.replace(c,"")
111
112 if not set(seq_string).issubset(set("ABCDGHKMNRSTVWY")) :
113 raise TypeError("Invalid character found in %s" % repr(seq_string))
114 return " " + seq_string
115
117 """Check characters in a string (DEPRECATED)."""
118 import warnings
119 import Bio
120 warnings.warn("The check_bases function has been deprecated, and will be"
121 "removed in a future release of Biopython.", Bio.BiopythonDeprecationWarning)
122 return _check_bases(seq_string)
123
124 matching = {'A' : 'ARWMHVDN', 'C' : 'CYSMHBVN', 'G' : 'GRSKBVDN',
125 'T' : 'TYWKHBDN', 'R' : 'ABDGHKMNSRWV', 'Y' : 'CBDHKMNSTWVY',
126 'W' : 'ABDHKMNRTWVY', 'S' : 'CBDGHKMNSRVY', 'M' : 'ACBDHMNSRWVY',
127 'K' : 'BDGHKNSRTWVY', 'H' : 'ACBDHKMNSRTWVY',
128 'B' : 'CBDGHKMNSRTWVY', 'V' : 'ACBDGHKMNSRWVY',
129 'D' : 'ABDGHKMNSRTWVY', 'N' : 'ACBDGHKMNSRTWVY'}
130
131 DNA = Seq
132
231
232
234 """RestrictionType. Type from which derives all enzyme classes.
235
236 Implement the operator methods."""
237
238 - def __init__(cls, name='', bases=(), dct={}):
239 """RE(name, bases, dct) -> RestrictionType instance.
240
241 Not intended to be used in normal operation. The enzymes are
242 instantiated when importing the module.
243
244 see below."""
245 if "-" in name :
246 raise ValueError("Problem with hyphen in %s as enzyme name" \
247 % repr(name))
248 super(RestrictionType, cls).__init__(cls, name, bases, dct)
249 try :
250 cls.compsite = re.compile(cls.compsite)
251 except Exception, err :
252 raise ValueError("Problem with regular expression, re.compiled(%s)" \
253 % repr(cls.compsite))
254
266
268 """RE.__div__(other) -> list.
269
270 RE/other
271 returns RE.search(other)."""
272 return cls.search(other)
273
275 """RE.__rdiv__(other) -> list.
276
277 other/RE
278 returns RE.search(other)."""
279 return cls.search(other)
280
282 """RE.__truediv__(other) -> list.
283
284 RE/other
285 returns RE.search(other)."""
286 return cls.search(other)
287
289 """RE.__rtruediv__(other) -> list.
290
291 other/RE
292 returns RE.search(other)."""
293 return cls.search(other)
294
296 """RE.__floordiv__(other) -> list.
297
298 RE//other
299 returns RE.catalyse(other)."""
300 return cls.catalyse(other)
301
303 """RE.__rfloordiv__(other) -> list.
304
305 other//RE
306 returns RE.catalyse(other)."""
307 return cls.catalyse(other)
308
310 """RE.__str__() -> str.
311
312 return the name of the enzyme."""
313 return cls.__name__
314
316 """RE.__repr__() -> str.
317
318 used with eval or exec will instantiate the enzyme."""
319 return "%s" % cls.__name__
320
322 """RE.__len__() -> int.
323
324 length of the recognition site."""
325 return cls.size
326
328
329
330 return id(cls)
331
333 """RE == other -> bool
334
335 True if RE and other are the same enzyme.
336
337 Specifically this checks they are the same Python object.
338 """
339
340 return id(cls)==id(other)
341
343 """RE != other -> bool.
344 isoschizomer strict, same recognition site, same restriction -> False
345 all the other-> True
346
347 WARNING - This is not the inverse of the __eq__ method.
348 """
349 if not isinstance(other, RestrictionType):
350 return True
351 elif cls.charac == other.charac:
352 return False
353 else:
354 return True
355
357 """RE >> other -> bool.
358
359 neoschizomer : same recognition site, different restriction. -> True
360 all the others : -> False"""
361 if not isinstance(other, RestrictionType):
362 return False
363 elif cls.site == other.site and cls.charac != other.charac:
364 return True
365 else:
366 return False
367
369 """a % b -> bool.
370
371 Test compatibility of the overhang of a and b.
372 True if a and b have compatible overhang."""
373 if not isinstance(other, RestrictionType):
374 raise TypeError( \
375 'expected RestrictionType, got %s instead' % type(other))
376 return cls._mod1(other)
377
379 """a >= b -> bool.
380
381 a is greater or equal than b if the a site is longer than b site.
382 if their site have the same length sort by alphabetical order of their
383 names."""
384 if not isinstance(other, RestrictionType):
385 raise NotImplementedError
386 if len(cls) > len(other):
387 return True
388 elif cls.size == len(other) and cls.__name__ >= other.__name__:
389 return True
390 else:
391 return False
392
394 """a > b -> bool.
395
396 sorting order:
397 1. size of the recognition site.
398 2. if equal size, alphabetical order of the names."""
399 if not isinstance(other, RestrictionType):
400 raise NotImplementedError
401 if len(cls) > len(other):
402 return True
403 elif cls.size == len(other) and cls.__name__ > other.__name__:
404 return True
405 else:
406 return False
407
409 """a <= b -> bool.
410
411 sorting order:
412 1. size of the recognition site.
413 2. if equal size, alphabetical order of the names."""
414 if not isinstance(other, RestrictionType):
415 raise NotImplementedError
416 elif len(cls) < len(other):
417 return True
418 elif len(cls) == len(other) and cls.__name__ <= other.__name__:
419 return True
420 else:
421 return False
422
424 """a < b -> bool.
425
426 sorting order:
427 1. size of the recognition site.
428 2. if equal size, alphabetical order of the names."""
429 if not isinstance(other, RestrictionType):
430 raise NotImplementedError
431 elif len(cls) < len(other):
432 return True
433 elif len(cls) == len(other) and cls.__name__ < other.__name__:
434 return True
435 else:
436 return False
437
438
440 """Implement the methods that are common to all restriction enzymes.
441
442 All the methods are classmethod.
443
444 For internal use only. Not meant to be instantiate."""
445
446 - def search(cls, dna, linear=True):
447 """RE.search(dna, linear=True) -> list.
448
449 return a list of all the site of RE in dna. Compensate for circular
450 sequences and so on.
451
452 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
453
454 if linear is False, the restriction sites than span over the boundaries
455 will be included.
456
457 The positions are the first base of the 3' fragment,
458 i.e. the first base after the position the enzyme will cut. """
459
460
461
462
463
464
465
466 if isinstance(dna, FormattedSeq):
467 cls.dna = dna
468 return cls._search()
469 else :
470 cls.dna = FormattedSeq(dna, linear)
471 return cls._search()
472 search = classmethod(search)
473
475 """RE.all_suppliers -> print all the suppliers of R"""
476 supply = [x[0] for x in suppliers_dict.itervalues()]
477 supply.sort()
478 print ",\n".join(supply)
479 return
480 all_suppliers = classmethod(all_suppliers)
481
483 """RE.is_equischizomers(other) -> bool.
484
485 True if other is an isoschizomer of RE.
486 False else.
487
488 equischizomer <=> same site, same position of restriction."""
489 return not self != other
490 is_equischizomer = classmethod(is_equischizomer)
491
493 """RE.is_neoschizomers(other) -> bool.
494
495 True if other is an isoschizomer of RE.
496 False else.
497
498 neoschizomer <=> same site, different position of restriction."""
499 return self >> other
500 is_neoschizomer = classmethod(is_neoschizomer)
501
503 """RE.is_isoschizomers(other) -> bool.
504
505 True if other is an isoschizomer of RE.
506 False else.
507
508 isoschizomer <=> same site."""
509 return (not self != other) or self >> other
510 is_isoschizomer = classmethod(is_isoschizomer)
511
513 """RE.equischizomers([batch]) -> list.
514
515 return a tuple of all the isoschizomers of RE.
516 if batch is supplied it is used instead of the default AllEnzymes.
517
518 equischizomer <=> same site, same position of restriction."""
519 if not batch : batch = AllEnzymes
520 r = [x for x in batch if not self != x]
521 i = r.index(self)
522 del r[i]
523 r.sort()
524 return r
525 equischizomers = classmethod(equischizomers)
526
528 """RE.neoschizomers([batch]) -> list.
529
530 return a tuple of all the neoschizomers of RE.
531 if batch is supplied it is used instead of the default AllEnzymes.
532
533 neoschizomer <=> same site, different position of restriction."""
534 if not batch : batch = AllEnzymes
535 r = [x for x in batch if self >> x]
536 r.sort()
537 return r
538 neoschizomers = classmethod(neoschizomers)
539
541 """RE.isoschizomers([batch]) -> list.
542
543 return a tuple of all the equischizomers and neoschizomers of RE.
544 if batch is supplied it is used instead of the default AllEnzymes."""
545 if not batch : batch = AllEnzymes
546 r = [x for x in batch if (self >> x) or (not self != x)]
547 i = r.index(self)
548 del r[i]
549 r.sort()
550 return r
551 isoschizomers = classmethod(isoschizomers)
552
554 """RE.frequency() -> int.
555
556 frequency of the site."""
557 return self.freq
558 frequency = classmethod(frequency)
559
560
561 -class NoCut(AbstractCut):
562 """Implement the methods specific to the enzymes that do not cut.
563
564 These enzymes are generally enzymes that have been only partially
565 characterised and the way they cut the DNA is unknow or enzymes for
566 which the pattern of cut is to complex to be recorded in Rebase
567 (ncuts values of 0 in emboss_e.###).
568
569 When using search() with these enzymes the values returned are at the start of
570 the restriction site.
571
572 Their catalyse() method returns a TypeError.
573
574 Unknown and NotDefined are also part of the base classes of these enzymes.
575
576 Internal use only. Not meant to be instantiated."""
577
579 """RE.cut_once() -> bool.
580
581 True if the enzyme cut the sequence one time on each strand."""
582 return False
583 cut_once = classmethod(cut_once)
584
586 """RE.cut_twice() -> bool.
587
588 True if the enzyme cut the sequence twice on each strand."""
589 return False
590 cut_twice = classmethod(cut_twice)
591
593 """RE._modify(location) -> int.
594
595 for internal use only.
596
597 location is an integer corresponding to the location of the match for
598 the enzyme pattern in the sequence.
599 _modify returns the real place where the enzyme will cut.
600
601 example:
602 EcoRI pattern : GAATTC
603 EcoRI will cut after the G.
604 so in the sequence:
605 ______
606 GAATACACGGAATTCGA
607 |
608 10
609 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
610 EcoRI cut after the G so:
611 EcoRI._modify(10) -> 11.
612
613 if the enzyme cut twice _modify will returns two integer corresponding
614 to each cutting site.
615 """
616 yield location
617 _modify = classmethod(_modify)
618
620 """RE._rev_modify(location) -> generator of int.
621
622 for internal use only.
623
624 as _modify for site situated on the antiparallel strand when the
625 enzyme is not palindromic
626 """
627 yield location
628 _rev_modify = classmethod(_rev_modify)
629
631 """RE.characteristic() -> tuple.
632
633 the tuple contains the attributes:
634 fst5 -> first 5' cut ((current strand) or None
635 fst3 -> first 3' cut (complementary strand) or None
636 scd5 -> second 5' cut (current strand) or None
637 scd5 -> second 3' cut (complementary strand) or None
638 site -> recognition site."""
639 return None, None, None, None, self.site
640 characteristic = classmethod(characteristic)
641
643 """Implement the methods specific to the enzymes that cut the DNA only once
644
645 Correspond to ncuts values of 2 in emboss_e.###
646
647 Internal use only. Not meant to be instantiated."""
648
650 """RE.cut_once() -> bool.
651
652 True if the enzyme cut the sequence one time on each strand."""
653 return True
654 cut_once = classmethod(cut_once)
655
657 """RE.cut_twice() -> bool.
658
659 True if the enzyme cut the sequence twice on each strand."""
660 return False
661 cut_twice = classmethod(cut_twice)
662
664 """RE._modify(location) -> int.
665
666 for internal use only.
667
668 location is an integer corresponding to the location of the match for
669 the enzyme pattern in the sequence.
670 _modify returns the real place where the enzyme will cut.
671
672 example:
673 EcoRI pattern : GAATTC
674 EcoRI will cut after the G.
675 so in the sequence:
676 ______
677 GAATACACGGAATTCGA
678 |
679 10
680 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
681 EcoRI cut after the G so:
682 EcoRI._modify(10) -> 11.
683
684 if the enzyme cut twice _modify will returns two integer corresponding
685 to each cutting site.
686 """
687 yield location + self.fst5
688 _modify = classmethod(_modify)
689
691 """RE._rev_modify(location) -> generator of int.
692
693 for internal use only.
694
695 as _modify for site situated on the antiparallel strand when the
696 enzyme is not palindromic
697 """
698 yield location - self.fst3
699 _rev_modify = classmethod(_rev_modify)
700
702 """RE.characteristic() -> tuple.
703
704 the tuple contains the attributes:
705 fst5 -> first 5' cut ((current strand) or None
706 fst3 -> first 3' cut (complementary strand) or None
707 scd5 -> second 5' cut (current strand) or None
708 scd5 -> second 3' cut (complementary strand) or None
709 site -> recognition site."""
710 return self.fst5, self.fst3, None, None, self.site
711 characteristic = classmethod(characteristic)
712
713
715 """Implement the methods specific to the enzymes that cut the DNA twice
716
717 Correspond to ncuts values of 4 in emboss_e.###
718
719 Internal use only. Not meant to be instantiated."""
720
722 """RE.cut_once() -> bool.
723
724 True if the enzyme cut the sequence one time on each strand."""
725 return False
726 cut_once = classmethod(cut_once)
727
729 """RE.cut_twice() -> bool.
730
731 True if the enzyme cut the sequence twice on each strand."""
732 return True
733 cut_twice = classmethod(cut_twice)
734
736 """RE._modify(location) -> int.
737
738 for internal use only.
739
740 location is an integer corresponding to the location of the match for
741 the enzyme pattern in the sequence.
742 _modify returns the real place where the enzyme will cut.
743
744 example:
745 EcoRI pattern : GAATTC
746 EcoRI will cut after the G.
747 so in the sequence:
748 ______
749 GAATACACGGAATTCGA
750 |
751 10
752 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
753 EcoRI cut after the G so:
754 EcoRI._modify(10) -> 11.
755
756 if the enzyme cut twice _modify will returns two integer corresponding
757 to each cutting site.
758 """
759 yield location + self.fst5
760 yield location + self.scd5
761 _modify = classmethod(_modify)
762
764 """RE._rev_modify(location) -> generator of int.
765
766 for internal use only.
767
768 as _modify for site situated on the antiparallel strand when the
769 enzyme is not palindromic
770 """
771 yield location - self.fst3
772 yield location - self.scd3
773 _rev_modify = classmethod(_rev_modify)
774
776 """RE.characteristic() -> tuple.
777
778 the tuple contains the attributes:
779 fst5 -> first 5' cut ((current strand) or None
780 fst3 -> first 3' cut (complementary strand) or None
781 scd5 -> second 5' cut (current strand) or None
782 scd5 -> second 3' cut (complementary strand) or None
783 site -> recognition site."""
784 return self.fst5, self.fst3, self.scd5, self.scd3, self.site
785 characteristic = classmethod(characteristic)
786
787
789 """Implement the information about methylation.
790
791 Enzymes of this class possess a site which is methylable."""
792
794 """RE.is_methylable() -> bool.
795
796 True if the recognition site is a methylable."""
797 return True
798 is_methylable = classmethod(is_methylable)
799
801 """Implement informations about methylation sensitibility.
802
803 Enzymes of this class are not sensible to methylation."""
804
806 """RE.is_methylable() -> bool.
807
808 True if the recognition site is a methylable."""
809 return False
810 is_methylable = classmethod(is_methylable)
811
813 """Implement the methods specific to the enzymes which are palindromic
814
815 palindromic means : the recognition site and its reverse complement are
816 identical.
817 Remarks : an enzyme with a site CGNNCG is palindromic even if some
818 of the sites that it will recognise are not.
819 for example here : CGAACG
820
821 Internal use only. Not meant to be instantiated."""
822
824 """RE._search() -> list.
825
826 for internal use only.
827
828 implement the search method for palindromic and non palindromic enzyme.
829 """
830 siteloc = self.dna.finditer(self.compsite,self.size)
831 self.results = [r for s,g in siteloc for r in self._modify(s)]
832 if self.results : self._drop()
833 return self.results
834 _search = classmethod(_search)
835
837 """RE.is_palindromic() -> bool.
838
839 True if the recognition site is a palindrom."""
840 return True
841 is_palindromic = classmethod(is_palindromic)
842
843
845 """Implement the methods specific to the enzymes which are not palindromic
846
847 palindromic means : the recognition site and its reverse complement are
848 identical.
849
850 Internal use only. Not meant to be instantiated."""
851
853 """RE._search() -> list.
854
855 for internal use only.
856
857 implement the search method for palindromic and non palindromic enzyme.
858 """
859 iterator = self.dna.finditer(self.compsite, self.size)
860 self.results = []
861 modif = self._modify
862 revmodif = self._rev_modify
863 s = str(self)
864 self.on_minus = []
865 for start, group in iterator:
866 if group(s):
867 self.results += [r for r in modif(start)]
868 else:
869 self.on_minus += [r for r in revmodif(start)]
870 self.results += self.on_minus
871 if self.results:
872 self.results.sort()
873 self._drop()
874 return self.results
875 _search = classmethod(_search)
876
878 """RE.is_palindromic() -> bool.
879
880 True if the recognition site is a palindrom."""
881 return False
882 is_palindromic = classmethod(is_palindromic)
883
885 """Implement the methods specific to the enzymes for which the overhang
886 is unknown.
887
888 These enzymes are also NotDefined and NoCut.
889
890 Internal use only. Not meant to be instantiated."""
891
893 """RE.catalyse(dna, linear=True) -> tuple of DNA.
894 RE.catalyze(dna, linear=True) -> tuple of DNA.
895
896 return a tuple of dna as will be produced by using RE to restrict the
897 dna.
898
899 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
900
901 if linear is False, the sequence is considered to be circular and the
902 output will be modified accordingly."""
903 raise NotImplementedError('%s restriction is unknown.' \
904 % self.__name__)
905 catalyze = catalyse = classmethod(catalyse)
906
908 """RE.is_blunt() -> bool.
909
910 True if the enzyme produces blunt end.
911
912 see also:
913 RE.is_3overhang()
914 RE.is_5overhang()
915 RE.is_unknown()"""
916 return False
917 is_blunt = classmethod(is_blunt)
918
920 """RE.is_5overhang() -> bool.
921
922 True if the enzyme produces 5' overhang sticky end.
923
924 see also:
925 RE.is_3overhang()
926 RE.is_blunt()
927 RE.is_unknown()"""
928 return False
929 is_5overhang = classmethod(is_5overhang)
930
932 """RE.is_3overhang() -> bool.
933
934 True if the enzyme produces 3' overhang sticky end.
935
936 see also:
937 RE.is_5overhang()
938 RE.is_blunt()
939 RE.is_unknown()"""
940 return False
941 is_3overhang = classmethod(is_3overhang)
942
944 """RE.overhang() -> str. type of overhang of the enzyme.,
945
946 can be "3' overhang", "5' overhang", "blunt", "unknown" """
947 return 'unknown'
948 overhang = classmethod(overhang)
949
951 """RE.compatible_end() -> list.
952
953 list of all the enzymes that share compatible end with RE."""
954 return []
955 compatible_end = classmethod(compatible_end)
956
958 """RE._mod1(other) -> bool.
959
960 for internal use only
961
962 test for the compatibility of restriction ending of RE and other."""
963 return False
964 _mod1 = classmethod(_mod1)
965
966 -class Blunt(AbstractCut):
967 """Implement the methods specific to the enzymes for which the overhang
968 is blunt.
969
970 The enzyme cuts the + strand and the - strand of the DNA at the same
971 place.
972
973 Internal use only. Not meant to be instantiated."""
974
976 """RE.catalyse(dna, linear=True) -> tuple of DNA.
977 RE.catalyze(dna, linear=True) -> tuple of DNA.
978
979 return a tuple of dna as will be produced by using RE to restrict the
980 dna.
981
982 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
983
984 if linear is False, the sequence is considered to be circular and the
985 output will be modified accordingly."""
986 r = self.search(dna, linear)
987 d = self.dna
988 if not r : return d[1:],
989 fragments = []
990 length = len(r)-1
991 if d.is_linear():
992
993
994
995 fragments.append(d[1:r[0]])
996 if length:
997
998
999
1000 fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
1001
1002
1003
1004 fragments.append(d[r[-1]:])
1005 else:
1006
1007
1008
1009 fragments.append(d[r[-1]:]+d[1:r[0]])
1010 if not length:
1011
1012
1013
1014 return tuple(fragments)
1015
1016
1017
1018 fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
1019 return tuple(fragments)
1020 catalyze = catalyse = classmethod(catalyse)
1021
1023 """RE.is_blunt() -> bool.
1024
1025 True if the enzyme produces blunt end.
1026
1027 see also:
1028 RE.is_3overhang()
1029 RE.is_5overhang()
1030 RE.is_unknown()"""
1031 return True
1032 is_blunt = classmethod(is_blunt)
1033
1035 """RE.is_5overhang() -> bool.
1036
1037 True if the enzyme produces 5' overhang sticky end.
1038
1039 see also:
1040 RE.is_3overhang()
1041 RE.is_blunt()
1042 RE.is_unknown()"""
1043 return False
1044 is_5overhang = classmethod(is_5overhang)
1045
1047 """RE.is_3overhang() -> bool.
1048
1049 True if the enzyme produces 3' overhang sticky end.
1050
1051 see also:
1052 RE.is_5overhang()
1053 RE.is_blunt()
1054 RE.is_unknown()"""
1055 return False
1056 is_3overhang = classmethod(is_3overhang)
1057
1059 """RE.overhang() -> str. type of overhang of the enzyme.,
1060
1061 can be "3' overhang", "5' overhang", "blunt", "unknown" """
1062 return 'blunt'
1063 overhang = classmethod(overhang)
1064
1066 """RE.compatible_end() -> list.
1067
1068 list of all the enzymes that share compatible end with RE."""
1069 if not batch : batch = AllEnzymes
1070 r = [x for x in iter(AllEnzymes) if x.is_blunt()]
1071 r.sort()
1072 return r
1073 compatible_end = classmethod(compatible_end)
1074
1076 """RE._mod1(other) -> bool.
1077
1078 for internal use only
1079
1080 test for the compatibility of restriction ending of RE and other."""
1081 if issubclass(other, Blunt) : return True
1082 else : return False
1083 _mod1 = staticmethod(_mod1)
1084
1085 -class Ov5(AbstractCut):
1086 """Implement the methods specific to the enzymes for which the overhang
1087 is recessed in 3'.
1088
1089 The enzyme cuts the + strand after the - strand of the DNA.
1090
1091 Internal use only. Not meant to be instantiated."""
1092
1094 """RE.catalyse(dna, linear=True) -> tuple of DNA.
1095 RE.catalyze(dna, linear=True) -> tuple of DNA.
1096
1097 return a tuple of dna as will be produced by using RE to restrict the
1098 dna.
1099
1100 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
1101
1102 if linear is False, the sequence is considered to be circular and the
1103 output will be modified accordingly."""
1104 r = self.search(dna, linear)
1105 d = self.dna
1106 if not r : return d[1:],
1107 length = len(r)-1
1108 fragments = []
1109 if d.is_linear():
1110
1111
1112
1113 fragments.append(d[1:r[0]])
1114 if length:
1115
1116
1117
1118 fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
1119
1120
1121
1122 fragments.append(d[r[-1]:])
1123 else:
1124
1125
1126
1127 fragments.append(d[r[-1]:]+d[1:r[0]])
1128 if not length:
1129
1130
1131
1132 return tuple(fragments)
1133
1134
1135
1136 fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
1137 return tuple(fragments)
1138 catalyze = catalyse = classmethod(catalyse)
1139
1141 """RE.is_blunt() -> bool.
1142
1143 True if the enzyme produces blunt end.
1144
1145 see also:
1146 RE.is_3overhang()
1147 RE.is_5overhang()
1148 RE.is_unknown()"""
1149 return False
1150 is_blunt = classmethod(is_blunt)
1151
1153 """RE.is_5overhang() -> bool.
1154
1155 True if the enzyme produces 5' overhang sticky end.
1156
1157 see also:
1158 RE.is_3overhang()
1159 RE.is_blunt()
1160 RE.is_unknown()"""
1161 return True
1162 is_5overhang = classmethod(is_5overhang)
1163
1165 """RE.is_3overhang() -> bool.
1166
1167 True if the enzyme produces 3' overhang sticky end.
1168
1169 see also:
1170 RE.is_5overhang()
1171 RE.is_blunt()
1172 RE.is_unknown()"""
1173 return False
1174 is_3overhang = classmethod(is_3overhang)
1175
1177 """RE.overhang() -> str. type of overhang of the enzyme.,
1178
1179 can be "3' overhang", "5' overhang", "blunt", "unknown" """
1180 return "5' overhang"
1181 overhang = classmethod(overhang)
1182
1184 """RE.compatible_end() -> list.
1185
1186 list of all the enzymes that share compatible end with RE."""
1187 if not batch : batch = AllEnzymes
1188 r = [x for x in iter(AllEnzymes) if x.is_5overhang() and x % self]
1189 r.sort()
1190 return r
1191 compatible_end = classmethod(compatible_end)
1192
1193 - def _mod1(self, other):
1194 """RE._mod1(other) -> bool.
1195
1196 for internal use only
1197
1198 test for the compatibility of restriction ending of RE and other."""
1199 if issubclass(other, Ov5) : return self._mod2(other)
1200 else : return False
1201 _mod1 = classmethod(_mod1)
1202
1203
1204 -class Ov3(AbstractCut):
1205 """Implement the methods specific to the enzymes for which the overhang
1206 is recessed in 5'.
1207
1208 The enzyme cuts the - strand after the + strand of the DNA.
1209
1210 Internal use only. Not meant to be instantiated."""
1211
1213 """RE.catalyse(dna, linear=True) -> tuple of DNA.
1214 RE.catalyze(dna, linear=True) -> tuple of DNA.
1215
1216 return a tuple of dna as will be produced by using RE to restrict the
1217 dna.
1218
1219 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
1220
1221 if linear is False, the sequence is considered to be circular and the
1222 output will be modified accordingly."""
1223 r = self.search(dna, linear)
1224 d = self.dna
1225 if not r : return d[1:],
1226 fragments = []
1227 length = len(r)-1
1228 if d.is_linear():
1229
1230
1231
1232 fragments.append(d[1:r[0]])
1233 if length:
1234
1235
1236
1237 fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
1238
1239
1240
1241 fragments.append(d[r[-1]:])
1242 else:
1243
1244
1245
1246 fragments.append(d[r[-1]:]+d[1:r[0]])
1247 if not length:
1248
1249
1250
1251 return tuple(fragments)
1252
1253
1254
1255 fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
1256 return tuple(fragments)
1257 catalyze = catalyse = classmethod(catalyse)
1258
1260 """RE.is_blunt() -> bool.
1261
1262 True if the enzyme produces blunt end.
1263
1264 see also:
1265 RE.is_3overhang()
1266 RE.is_5overhang()
1267 RE.is_unknown()"""
1268 return False
1269 is_blunt = classmethod(is_blunt)
1270
1272 """RE.is_5overhang() -> bool.
1273
1274 True if the enzyme produces 5' overhang sticky end.
1275
1276 see also:
1277 RE.is_3overhang()
1278 RE.is_blunt()
1279 RE.is_unknown()"""
1280 return False
1281 is_5overhang = classmethod(is_5overhang)
1282
1284 """RE.is_3overhang() -> bool.
1285
1286 True if the enzyme produces 3' overhang sticky end.
1287
1288 see also:
1289 RE.is_5overhang()
1290 RE.is_blunt()
1291 RE.is_unknown()"""
1292 return True
1293 is_3overhang = classmethod(is_3overhang)
1294
1296 """RE.overhang() -> str. type of overhang of the enzyme.,
1297
1298 can be "3' overhang", "5' overhang", "blunt", "unknown" """
1299 return "3' overhang"
1300 overhang = classmethod(overhang)
1301
1303 """RE.compatible_end() -> list.
1304
1305 list of all the enzymes that share compatible end with RE."""
1306 if not batch : batch = AllEnzymes
1307 r = [x for x in iter(AllEnzymes) if x.is_3overhang() and x % self]
1308 r.sort()
1309 return r
1310 compatible_end = classmethod(compatible_end)
1311
1312 - def _mod1(self, other):
1313 """RE._mod1(other) -> bool.
1314
1315 for internal use only
1316
1317 test for the compatibility of restriction ending of RE and other."""
1318
1319
1320
1321 if issubclass(other, Ov3) : return self._mod2(other)
1322 else : return False
1323 _mod1 = classmethod(_mod1)
1324
1325
1327 """Implement the methods specific to the enzymes for which the overhang
1328 and the cut are not variable.
1329
1330 Typical example : EcoRI -> G^AATT_C
1331 The overhang will always be AATT
1332 Notes:
1333 Blunt enzymes are always defined. even if there site is GGATCCNNN^_N
1334 There overhang is always the same : blunt!
1335
1336 Internal use only. Not meant to be instantiated."""
1337
1371 _drop = classmethod(_drop)
1372
1374 """RE.is_defined() -> bool.
1375
1376 True if the sequence recognised and cut is constant,
1377 i.e. the recognition site is not degenerated AND the enzyme cut inside
1378 the site.
1379
1380 see also:
1381 RE.is_ambiguous()
1382 RE.is_unknown()"""
1383 return True
1384 is_defined = classmethod(is_defined)
1385
1387 """RE.is_ambiguous() -> bool.
1388
1389 True if the sequence recognised and cut is ambiguous,
1390 i.e. the recognition site is degenerated AND/OR the enzyme cut outside
1391 the site.
1392
1393 see also:
1394 RE.is_defined()
1395 RE.is_unknown()"""
1396 return False
1397 is_ambiguous = classmethod(is_ambiguous)
1398
1400 """RE.is_unknown() -> bool.
1401
1402 True if the sequence is unknown,
1403 i.e. the recognition site has not been characterised yet.
1404
1405 see also:
1406 RE.is_defined()
1407 RE.is_ambiguous()"""
1408 return False
1409 is_unknown = classmethod(is_unknown)
1410
1412 """RE.elucidate() -> str
1413
1414 return a representation of the site with the cut on the (+) strand
1415 represented as '^' and the cut on the (-) strand as '_'.
1416 ie:
1417 >>> EcoRI.elucidate() # 5' overhang
1418 'G^AATT_C'
1419 >>> KpnI.elucidate() # 3' overhang
1420 'G_GTAC^C'
1421 >>> EcoRV.elucidate() # blunt
1422 'GAT^_ATC'
1423 >>> SnaI.elucidate() # NotDefined, cut profile unknown.
1424 '? GTATAC ?'
1425 >>>
1426 """
1427 f5 = self.fst5
1428 f3 = self.fst3
1429 site = self.site
1430 if self.cut_twice() : re = 'cut twice, not yet implemented sorry.'
1431 elif self.is_5overhang():
1432 if f5 == f3 == 0 : re = 'N^'+ self.site + '_N'
1433 elif f3 == 0 : re = site[:f5] + '^' + site[f5:] + '_N'
1434 else : re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:]
1435 elif self.is_blunt():
1436 re = site[:f5] + '^_' + site[f5:]
1437 else:
1438 if f5 == f3 == 0 : re = 'N_'+ site + '^N'
1439 else : re = site[:f3] + '_' + site[f3:f5] +'^'+ site[f5:]
1440 return re
1441 elucidate = classmethod(elucidate)
1442
1443 - def _mod2(self, other):
1444 """RE._mod2(other) -> bool.
1445
1446 for internal use only
1447
1448 test for the compatibility of restriction ending of RE and other."""
1449
1450
1451
1452 if other.ovhgseq == self.ovhgseq:
1453 return True
1454 elif issubclass(other, Ambiguous):
1455 return other._mod2(self)
1456 else:
1457 return False
1458 _mod2 = classmethod(_mod2)
1459
1460
1462 """Implement the methods specific to the enzymes for which the overhang
1463 is variable.
1464
1465 Typical example : BstXI -> CCAN_NNNN^NTGG
1466 The overhang can be any sequence of 4 bases.
1467 Notes:
1468 Blunt enzymes are always defined. even if there site is GGATCCNNN^_N
1469 There overhang is always the same : blunt!
1470
1471 Internal use only. Not meant to be instantiated."""
1472
1498 _drop = classmethod(_drop)
1499
1501 """RE.is_defined() -> bool.
1502
1503 True if the sequence recognised and cut is constant,
1504 i.e. the recognition site is not degenerated AND the enzyme cut inside
1505 the site.
1506
1507 see also:
1508 RE.is_ambiguous()
1509 RE.is_unknown()"""
1510 return False
1511 is_defined = classmethod(is_defined)
1512
1514 """RE.is_ambiguous() -> bool.
1515
1516 True if the sequence recognised and cut is ambiguous,
1517 i.e. the recognition site is degenerated AND/OR the enzyme cut outside
1518 the site.
1519
1520
1521 see also:
1522 RE.is_defined()
1523 RE.is_unknown()"""
1524 return True
1525 is_ambiguous = classmethod(is_ambiguous)
1526
1528 """RE.is_unknown() -> bool.
1529
1530 True if the sequence is unknown,
1531 i.e. the recognition site has not been characterised yet.
1532
1533 see also:
1534 RE.is_defined()
1535 RE.is_ambiguous()"""
1536 return False
1537 is_unknown = classmethod(is_unknown)
1538
1539 - def _mod2(self, other):
1540 """RE._mod2(other) -> bool.
1541
1542 for internal use only
1543
1544 test for the compatibility of restriction ending of RE and other."""
1545
1546
1547
1548 if len(self.ovhgseq) != len(other.ovhgseq):
1549 return False
1550 else:
1551 se = self.ovhgseq
1552 for base in se:
1553 if base in 'ATCG':
1554 pass
1555 if base in 'N':
1556 se = '.'.join(se.split('N'))
1557 if base in 'RYWMSKHDBV':
1558 expand = '['+ matching[base] + ']'
1559 se = expand.join(se.split(base))
1560 if re.match(se, other.ovhgseq):
1561 return True
1562 else:
1563 return False
1564 _mod2 = classmethod(_mod2)
1565
1567 """RE.elucidate() -> str
1568
1569 return a representation of the site with the cut on the (+) strand
1570 represented as '^' and the cut on the (-) strand as '_'.
1571 ie:
1572 >>> EcoRI.elucidate() # 5' overhang
1573 'G^AATT_C'
1574 >>> KpnI.elucidate() # 3' overhang
1575 'G_GTAC^C'
1576 >>> EcoRV.elucidate() # blunt
1577 'GAT^_ATC'
1578 >>> SnaI.elucidate() # NotDefined, cut profile unknown.
1579 '? GTATAC ?'
1580 >>>
1581 """
1582 f5 = self.fst5
1583 f3 = self.fst3
1584 length = len(self)
1585 site = self.site
1586 if self.cut_twice() : re = 'cut twice, not yet implemented sorry.'
1587 elif self.is_5overhang():
1588 if f3 == f5 == 0:
1589 re = 'N^' + site +'_N'
1590 elif 0 <= f5 <= length and 0 <= f3+length <= length:
1591 re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:]
1592 elif 0 <= f5 <= length:
1593 re = site[:f5] + '^' + site[f5:] + f3*'N' + '_N'
1594 elif 0 <= f3+length <= length:
1595 re = 'N^' + abs(f5) * 'N' + site[:f3] + '_' + site[f3:]
1596 elif f3+length < 0:
1597 re = 'N^'*abs(f5)*'N' + '_' + abs(length+f3)*'N' + site
1598 elif f5 > length:
1599 re = site + (f5-length)*'N'+'^'+(length+f3-f5)*'N'+'_N'
1600 else:
1601 re = 'N^' + abs(f5) * 'N' + site + f3*'N' + '_N'
1602 elif self.is_blunt():
1603 if f5 < 0:
1604 re = 'N^_' + abs(f5)*'N' + site
1605 elif f5 > length:
1606 re = site + (f5-length)*'N' + '^_N'
1607 else:
1608 raise ValueError('%s.easyrepr() : error f5=%i' \
1609 % (self.name,f5))
1610 else:
1611 if f3 == 0:
1612 if f5 == 0 : re = 'N_' + site + '^N'
1613 else : re = site + '_' + (f5-length)*'N' + '^N'
1614 elif 0 < f3+length <= length and 0 <= f5 <= length:
1615 re = site[:f3] + '_' + site[f3:f5] + '^' + site[f5:]
1616 elif 0 < f3+length <= length:
1617 re = site[:f3] + '_' + site[f3:] + (f5-length)*'N' + '^N'
1618 elif 0 <= f5 <= length:
1619 re = 'N_' +'N'*(f3+length) + site[:f5] + '^' + site[f5:]
1620 elif f3 > 0:
1621 re = site + f3*'N' + '_' + (f5-f3-length)*'N' + '^N'
1622 elif f5 < 0:
1623 re = 'N_' + abs(f3-f5+length)*'N' + '^' + abs(f5)*'N' + site
1624 else:
1625 re = 'N_' + abs(f3+length)*'N' + site + (f5-length)*'N' + '^N'
1626 return re
1627 elucidate = classmethod(elucidate)
1628
1629
1631 """Implement the methods specific to the enzymes for which the overhang
1632 is not characterised.
1633
1634 Correspond to NoCut and Unknown.
1635
1636 Internal use only. Not meant to be instantiated."""
1637
1660 _drop = classmethod(_drop)
1661
1663 """RE.is_defined() -> bool.
1664
1665 True if the sequence recognised and cut is constant,
1666 i.e. the recognition site is not degenerated AND the enzyme cut inside
1667 the site.
1668
1669 see also:
1670 RE.is_ambiguous()
1671 RE.is_unknown()"""
1672 return False
1673 is_defined = classmethod(is_defined)
1674
1676 """RE.is_ambiguous() -> bool.
1677
1678 True if the sequence recognised and cut is ambiguous,
1679 i.e. the recognition site is degenerated AND/OR the enzyme cut outside
1680 the site.
1681
1682
1683 see also:
1684 RE.is_defined()
1685 RE.is_unknown()"""
1686 return False
1687 is_ambiguous = classmethod(is_ambiguous)
1688
1690 """RE.is_unknown() -> bool.
1691
1692 True if the sequence is unknown,
1693 i.e. the recognition site has not been characterised yet.
1694
1695 see also:
1696 RE.is_defined()
1697 RE.is_ambiguous()"""
1698 return True
1699 is_unknown = classmethod(is_unknown)
1700
1701 - def _mod2(self, other):
1702 """RE._mod2(other) -> bool.
1703
1704 for internal use only
1705
1706 test for the compatibility of restriction ending of RE and other."""
1707
1708
1709
1710
1711
1712
1713 raise ValueError("%s.mod2(%s), %s : NotDefined. pas glop pas glop!" \
1714 % (str(self), str(other), str(self)))
1715 _mod2 = classmethod(_mod2)
1716
1718 """RE.elucidate() -> str
1719
1720 return a representation of the site with the cut on the (+) strand
1721 represented as '^' and the cut on the (-) strand as '_'.
1722 ie:
1723 >>> EcoRI.elucidate() # 5' overhang
1724 'G^AATT_C'
1725 >>> KpnI.elucidate() # 3' overhang
1726 'G_GTAC^C'
1727 >>> EcoRV.elucidate() # blunt
1728 'GAT^_ATC'
1729 >>> SnaI.elucidate() # NotDefined, cut profile unknown.
1730 '? GTATAC ?'
1731 >>>
1732 """
1733 return '? %s ?' % self.site
1734 elucidate = classmethod(elucidate)
1735
1736
1738
1739
1740
1741
1742 """Implement the methods specific to the enzymes which are commercially
1743 available.
1744
1745 Internal use only. Not meant to be instantiated."""
1746
1748 """RE.suppliers() -> print the suppliers of RE."""
1749 supply = suppliers_dict.items()
1750 for k,v in supply:
1751 if k in self.suppl:
1752 print v[0]+','
1753 return
1754 suppliers = classmethod(suppliers)
1755
1757 """RE.supplier_list() -> list.
1758
1759 list of the supplier names for RE."""
1760 return [v[0] for k,v in suppliers_dict.items() if k in self.suppl]
1761 supplier_list = classmethod(supplier_list)
1762
1764 """RE.buffers(supplier) -> string.
1765
1766 not implemented yet."""
1767 return
1768 buffers = classmethod(buffers)
1769
1771 """RE.iscomm() -> bool.
1772
1773 True if RE has suppliers."""
1774 return True
1775 is_comm = classmethod(is_comm)
1776
1777
1779 """Implement the methods specific to the enzymes which are not commercially
1780 available.
1781
1782 Internal use only. Not meant to be instantiated."""
1783
1785 """RE.suppliers() -> print the suppliers of RE."""
1786 return None
1787 suppliers = staticmethod(suppliers)
1788
1790 """RE.supplier_list() -> list.
1791
1792 list of the supplier names for RE."""
1793 return []
1794 supplier_list = classmethod(supplier_list)
1795
1797 """RE.buffers(supplier) -> string.
1798
1799 not implemented yet."""
1800 raise TypeError("Enzyme not commercially available.")
1801 buffers = classmethod(buffers)
1802
1804 """RE.iscomm() -> bool.
1805
1806 True if RE has suppliers."""
1807 return False
1808 is_comm = classmethod(is_comm)
1809
1810
1811
1812
1813
1814
1815
1816
1817
1819
1820 - def __init__(self, first=[], suppliers=[]):
1827
1829 if len(self) < 5:
1830 return '+'.join(self.elements())
1831 else:
1832 return '...'.join(('+'.join(self.elements()[:2]),\
1833 '+'.join(self.elements()[-2:])))
1834
1836 return 'RestrictionBatch(%s)' % self.elements()
1837
1844
1847
1850
1851 - def get(self, enzyme, add=False):
1852 """B.get(enzyme[, add]) -> enzyme class.
1853
1854 if add is True and enzyme is not in B add enzyme to B.
1855 if add is False (which is the default) only return enzyme.
1856 if enzyme is not a RestrictionType or can not be evaluated to
1857 a RestrictionType, raise a ValueError."""
1858 e = self.format(enzyme)
1859 if e in self:
1860 return e
1861 elif add:
1862 self.add(e)
1863 return e
1864 else:
1865 raise ValueError('enzyme %s is not in RestrictionBatch' \
1866 % e.__name__)
1867
1869 """B.lambdasplit(func) -> RestrictionBatch .
1870
1871 the new batch will contains only the enzymes for which
1872 func return True."""
1873 d = [x for x in itertools.ifilter(func, self)]
1874 new = RestrictionBatch()
1875 new._data = dict(zip(d, [True]*len(d)))
1876 return new
1877
1879 """B.add_supplier(letter) -> add a new set of enzyme to B.
1880
1881 letter represents the suppliers as defined in the dictionary
1882 RestrictionDictionary.suppliers
1883 return None.
1884 raise a KeyError if letter is not a supplier code."""
1885 supplier = suppliers_dict[letter]
1886 self.suppliers.append(letter)
1887 for x in supplier[1]:
1888 self.add_nocheck(eval(x))
1889 return
1890
1892 """B.current_suppliers() -> add a new set of enzyme to B.
1893
1894 return a sorted list of the suppliers which have been used to
1895 create the batch."""
1896 suppl_list = [suppliers_dict[x][0] for x in self.suppliers]
1897 suppl_list.sort()
1898 return suppl_list
1899
1901 """ b += other -> add other to b, check the type of other."""
1902 self.add(other)
1903 return self
1904
1906 """ b + other -> new RestrictionBatch."""
1907 new = self.__class__(self)
1908 new.add(other)
1909 return new
1910
1912 """B.remove(other) -> remove other from B if other is a RestrictionType.
1913
1914 Safe set.remove method. Verify that other is a RestrictionType or can be
1915 evaluated to a RestrictionType.
1916 raise a ValueError if other can not be evaluated to a RestrictionType.
1917 raise a KeyError if other is not in B."""
1918 return set.remove(self, self.format(other))
1919
1920 - def add(self, other):
1921 """B.add(other) -> add other to B if other is a RestrictionType.
1922
1923 Safe set.add method. Verify that other is a RestrictionType or can be
1924 evaluated to a RestrictionType.
1925 raise a ValueError if other can not be evaluated to a RestrictionType.
1926 """
1927 return set.add(self, self.format(other))
1928
1930 """B.add_nocheck(other) -> add other to B. don't check type of other.
1931 """
1932 return set.add(self, other)
1933
1951
1952
1954 """B.is_restriction(y) -> bool.
1955
1956 True is y or eval(y) is a RestrictionType."""
1957 return isinstance(y, RestrictionType) or \
1958 isinstance(eval(str(y)), RestrictionType)
1959
1960 - def split(self, *classes, **bool):
1961 """B.split(class, [class.__name__ = True]) -> new RestrictionBatch.
1962
1963 it works but it is slow, so it has really an interest when splitting
1964 over multiple conditions."""
1965 def splittest(element):
1966 for klass in classes:
1967 b = bool.get(klass.__name__, True)
1968 if issubclass(element, klass):
1969 if b:
1970 continue
1971 else:
1972 return False
1973 elif b:
1974 return False
1975 else:
1976 continue
1977 return True
1978 d = [k for k in itertools.ifilter(splittest, self)]
1979 new = RestrictionBatch()
1980 new._data = dict(zip(d, [True]*len(d)))
1981 return new
1982
1984 """B.elements() -> tuple.
1985
1986 give all the names of the enzymes in B sorted alphabetically."""
1987 l = [str(e) for e in self]
1988 l.sort()
1989 return l
1990
1992 """B.as_string() -> list.
1993
1994 return a list of the name of the elements of B."""
1995 return [str(e) for e in self]
1996
1998 """B.suppl_codes() -> dict
1999
2000 letter code for the suppliers"""
2001 supply = dict([(k,v[0]) for k,v in suppliers_dict.iteritems()])
2002 return supply
2003 suppl_codes = classmethod(suppl_codes)
2004
2006 "B.show_codes() -> letter codes for the suppliers"""
2007 supply = [' = '.join(i) for i in self.suppl_codes().iteritems()]
2008 print '\n'.join(supply)
2009 return
2010 show_codes = classmethod(show_codes)
2011
2012 - def search(self, dna, linear=True):
2013 """B.search(dna) -> dict."""
2014
2015
2016
2017
2018 if not hasattr(self, "already_mapped") :
2019
2020
2021 self.already_mapped = None
2022 if isinstance(dna, DNA):
2023
2024
2025
2026
2027 if (str(dna), linear) == self.already_mapped:
2028 return self.mapping
2029 else:
2030 self.already_mapped = str(dna), linear
2031 fseq = FormattedSeq(dna, linear)
2032 self.mapping = dict([(x, x.search(fseq)) for x in self])
2033 return self.mapping
2034 elif isinstance(dna, FormattedSeq):
2035 if (str(dna), dna.linear) == self.already_mapped:
2036 return self.mapping
2037 else:
2038 self.already_mapped = str(dna), dna.linear
2039 self.mapping = dict([(x, x.search(dna)) for x in self])
2040 return self.mapping
2041 raise TypeError("Expected Seq or MutableSeq instance, got %s instead"\
2042 %type(dna))
2043
2044
2045
2046
2047
2048
2049
2050 -class Analysis(RestrictionBatch, PrintFormat):
2051
2054 """Analysis([restrictionbatch [, sequence] linear=True]) -> New Analysis class.
2055
2056 For most of the method of this class if a dictionary is given it will
2057 be used as the base to calculate the results.
2058 If no dictionary is given a new analysis using the Restriction Batch
2059 which has been given when the Analysis class has been instantiated."""
2060 RestrictionBatch.__init__(self, restrictionbatch)
2061 self.rb = restrictionbatch
2062 self.sequence = sequence
2063 self.linear = linear
2064 if self.sequence:
2065 self.search(self.sequence, self.linear)
2066
2068 return 'Analysis(%s,%s,%s)'%\
2069 (repr(self.rb),repr(self.sequence),self.linear)
2070
2072 """A._sub_set(other_set) -> dict.
2073
2074 Internal use only.
2075
2076 screen the results through wanted set.
2077 Keep only the results for which the enzymes is in wanted set.
2078 """
2079 return dict([(k,v) for k,v in self.mapping.iteritems() if k in wanted])
2080
2082 """A._boundaries(start, end) -> tuple.
2083
2084 Format the boundaries for use with the methods that limit the
2085 search to only part of the sequence given to analyse.
2086 """
2087 if not isinstance(start, int):
2088 raise TypeError('expected int, got %s instead' % type(start))
2089 if not isinstance(end, int):
2090 raise TypeError('expected int, got %s instead' % type(end))
2091 if start < 1:
2092 start += len(self.sequence)
2093 if end < 1:
2094 end += len(self.sequence)
2095 if start < end:
2096 pass
2097 else:
2098 start, end == end, start
2099 if start < 1:
2100 start == 1
2101 if start < end:
2102 return start, end, self._test_normal
2103 else:
2104 return start, end, self._test_reverse
2105
2107 """A._test_normal(start, end, site) -> bool.
2108
2109 Internal use only
2110 Test if site is in between start and end.
2111 """
2112 return start <= site < end
2113
2115 """A._test_reverse(start, end, site) -> bool.
2116
2117 Internal use only
2118 Test if site is in between end and start (for circular sequences).
2119 """
2120 return start <= site <= len(self.sequence) or 1 <= site < end
2121
2122 - def print_that(self, dct=None, title='', s1=''):
2123 """A.print_that([dct[, title[, s1]]]) -> print the results from dct.
2124
2125 If dct is not given the full dictionary is used.
2126 """
2127 if not dct:
2128 dct = self.mapping
2129 print
2130 return PrintFormat.print_that(self, dct, title, s1)
2131
2133 """A.change(**attribute_name) -> Change attribute of Analysis.
2134
2135 It is possible to change the width of the shell by setting
2136 self.ConsoleWidth to what you want.
2137 self.NameWidth refer to the maximal length of the enzyme name.
2138
2139 Changing one of these parameters here might not give the results
2140 you expect. In which case, you can settle back to a 80 columns shell
2141 or try to change self.Cmodulo and self.PrefWidth in PrintFormat until
2142 you get it right."""
2143 for k,v in what.iteritems():
2144 if k in ('NameWidth', 'ConsoleWidth'):
2145 setattr(self, k, v)
2146 self.Cmodulo = self.ConsoleWidth % self.NameWidth
2147 self.PrefWidth = self.ConsoleWidth - self.Cmodulo
2148 elif k is 'sequence':
2149 setattr(self, 'sequence', v)
2150 self.search(self.sequence, self.linear)
2151 elif k is 'rb':
2152 self = Analysis.__init__(self, v, self.sequence, self.linear)
2153 elif k is 'linear':
2154 setattr(self, 'linear', v)
2155 self.search(self.sequence, v)
2156 elif k in ('Indent', 'Maxsize'):
2157 setattr(self, k, v)
2158 elif k in ('Cmodulo', 'PrefWidth'):
2159 raise AttributeError( \
2160 'To change %s, change NameWidth and/or ConsoleWidth' \
2161 % name)
2162 else:
2163 raise AttributeError( \
2164 'Analysis has no attribute %s' % name)
2165 return
2166
2167 - def full(self, linear=True):
2168 """A.full() -> dict.
2169
2170 Full Restriction Map of the sequence."""
2171 return self.mapping
2172
2173 - def blunt(self, dct = None):
2174 """A.blunt([dct]) -> dict.
2175
2176 Only the enzymes which have a 3'overhang restriction site."""
2177 if not dct:
2178 dct = self.mapping
2179 return dict([(k,v) for k,v in dct.iteritems() if k.is_blunt()])
2180
2182 """A.overhang5([dct]) -> dict.
2183
2184 Only the enzymes which have a 5' overhang restriction site."""
2185 if not dct:
2186 dct = self.mapping
2187 return dict([(k,v) for k,v in dct.iteritems() if k.is_5overhang()])
2188
2189
2191 """A.Overhang3([dct]) -> dict.
2192
2193 Only the enzymes which have a 3'overhang restriction site."""
2194 if not dct:
2195 dct = self.mapping
2196 return dict([(k,v) for k,v in dct.iteritems() if k.is_3overhang()])
2197
2198
2200 """A.defined([dct]) -> dict.
2201
2202 Only the enzymes that have a defined restriction site in Rebase."""
2203 if not dct:
2204 dct = self.mapping
2205 return dict([(k,v) for k,v in dct.iteritems() if k.is_defined()])
2206
2208 """A.with_sites([dct]) -> dict.
2209
2210 Enzymes which have at least one site in the sequence."""
2211 if not dct:
2212 dct = self.mapping
2213 return dict([(k,v) for k,v in dct.iteritems() if v])
2214
2216 """A.without_site([dct]) -> dict.
2217
2218 Enzymes which have no site in the sequence."""
2219 if not dct:
2220 dct = self.mapping
2221 return dict([(k,v) for k,v in dct.iteritems() if not v])
2222
2224 """A.With_N_Sites(N [, dct]) -> dict.
2225
2226 Enzymes which cut N times the sequence."""
2227 if not dct:
2228 dct = self.mapping
2229 return dict([(k,v) for k,v in dct.iteritems()if len(v) == N])
2230
2232 if not dct:
2233 dct = self.mapping
2234 return dict([(k,v) for k,v in dct.iteritems() if len(v) in list])
2235
2237 """A.with_name(list_of_names [, dct]) ->
2238
2239 Limit the search to the enzymes named in list_of_names."""
2240 for i, enzyme in enumerate(names):
2241 if not enzyme in AllEnzymes:
2242 print "no datas for the enzyme:", str(name)
2243 del names[i]
2244 if not dct:
2245 return RestrictionBatch(names).search(self.sequence)
2246 return dict([(n, dct[n]) for n in names if n in dct])
2247
2249 """A.with_site_size(site_size [, dct]) ->
2250
2251 Limit the search to the enzymes whose site is of size <site_size>."""
2252 sites = [name for name in self if name.size == site_size]
2253 if not dct:
2254 return RestrictionBatch(sites).search(self.sequence)
2255 return dict([(k,v) for k,v in dct.iteritems() if k in site_size])
2256
2258 """A.only_between(start, end[, dct]) -> dict.
2259
2260 Enzymes that cut the sequence only in between start and end."""
2261 start, end, test = self._boundaries(start, end)
2262 if not dct:
2263 dct = self.mapping
2264 d = dict(dct)
2265 for key, sites in dct.iteritems():
2266 if not sites:
2267 del d[key]
2268 continue
2269 for site in sites:
2270 if test(start, end, site):
2271 continue
2272 else:
2273 del d[key]
2274 break
2275 return d
2276
2277 - def between(self, start, end, dct=None):
2278 """A.between(start, end [, dct]) -> dict.
2279
2280 Enzymes that cut the sequence at least in between start and end.
2281 They may cut outside as well."""
2282 start, end, test = self._boundaries(start, end)
2283 d = {}
2284 if not dct:
2285 dct = self.mapping
2286 for key, sites in dct.iteritems():
2287 for site in sites:
2288 if test(start, end, site):
2289 d[key] = sites
2290 break
2291 continue
2292 return d
2293
2295 """A.show_only_between(start, end [, dct]) -> dict.
2296
2297 Enzymes that cut the sequence outside of the region
2298 in between start and end but do not cut inside."""
2299 d = []
2300 if start <= end:
2301 d = [(k, [vv for vv in v if start<=vv<=end])
2302 for v in self.between(start, end, dct)]
2303 else:
2304 d = [(k, [vv for vv in v if start<=vv or vv <= end])
2305 for v in self.between(start, end, dct)]
2306 return dict(d)
2307
2309 """A.only_outside(start, end [, dct]) -> dict.
2310
2311 Enzymes that cut the sequence outside of the region
2312 in between start and end but do not cut inside."""
2313 start, end, test = self._boundaries(start, end)
2314 if not dct : dct = self.mapping
2315 d = dict(dct)
2316 for key, sites in dct.iteritems():
2317 if not sites:
2318 del d[key]
2319 continue
2320 for site in sites:
2321 if test(start, end, site):
2322 del d[key]
2323 break
2324 else:
2325 continue
2326 return d
2327
2328 - def outside(self, start, end, dct=None):
2329 """A.outside((start, end [, dct]) -> dict.
2330
2331 Enzymes that cut outside the region in between start and end.
2332 No test is made to know if they cut or not inside this region."""
2333 start, end, test = self._boundaries(start, end)
2334 if not dct:
2335 dct = self.mapping
2336 d = {}
2337 for key, sites in dct.iteritems():
2338 for site in sites:
2339 if test(start, end, site):
2340 continue
2341 else:
2342 d[key] = sites
2343 break
2344 return d
2345
2346
2348 """A.do_not_cut(start, end [, dct]) -> dict.
2349
2350 Enzymes that do not cut the region in between start and end."""
2351 if not dct:
2352 dct = self.mapping
2353 d = self.without_site()
2354 d.update(self.only_outside(start, end, dct))
2355 return d
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379 CommOnly = RestrictionBatch()
2380 NonComm = RestrictionBatch()
2381 for TYPE, (bases, enzymes) in typedict.iteritems():
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399 bases = tuple([eval(x) for x in bases])
2400
2401
2402
2403
2404 T = type.__new__(RestrictionType, 'RestrictionType', bases, {})
2405 for k in enzymes:
2406
2407
2408
2409
2410
2411 newenz = T(k, bases, enzymedict[k])
2412
2413
2414
2415
2416
2417 if newenz.is_comm() : CommOnly.add_nocheck(newenz)
2418 else : NonComm.add_nocheck(newenz)
2419
2420
2421
2422 AllEnzymes = CommOnly | NonComm
2423
2424
2425
2426 names = [str(x) for x in AllEnzymes]
2427 try:
2428 del x
2429 except NameError:
2430
2431 pass
2432 locals().update(dict(zip(names, AllEnzymes)))
2433 __all__=['FormattedSeq', 'Analysis', 'RestrictionBatch','AllEnzymes','CommOnly','NonComm']+names
2434 del k, enzymes, TYPE, bases, names
2435