1
2
3
4
5
6
7
8
9
10
11 """ Notes about the diverses class of the restriction enzyme implementation.
12
13 RestrictionType is the type of all restriction enzymes.
14 ----------------------------------------------------------------------------
15 AbstractCut implements some methods that are common to all enzymes.
16 ----------------------------------------------------------------------------
17 NoCut, OneCut,TwoCuts represent the number of double strand cuts
18 produced by the enzyme.
19 they correspond to the 4th field of the rebase
20 record emboss_e.NNN.
21 0->NoCut : the enzyme is not characterised.
22 2->OneCut : the enzyme produce one double strand cut.
23 4->TwoCuts : two double strand cuts.
24 ----------------------------------------------------------------------------
25 Meth_Dep, Meth_Undep represent the methylation susceptibility to
26 the enzyme.
27 Not implemented yet.
28 ----------------------------------------------------------------------------
29 Palindromic, if the site is palindromic or not.
30 NotPalindromic allow some optimisations of the code.
31 No need to check the reverse strand
32 with palindromic sites.
33 ----------------------------------------------------------------------------
34 Unknown, Blunt, represent the overhang.
35 Ov5, Ov3 Unknown is here for symetry reasons and
36 correspond to enzymes that are not characterised
37 in rebase.
38 ----------------------------------------------------------------------------
39 Defined, Ambiguous, represent the sequence of the overhang.
40 NotDefined
41 NotDefined is for enzymes not characterised in
42 rebase.
43
44 Defined correspond to enzymes that display a
45 constant overhang whatever the sequence.
46 ex : EcoRI. G^AATTC -> overhang :AATT
47 CTTAA^G
48
49 Ambiguous : the overhang varies with the
50 sequence restricted.
51 Typically enzymes which cut outside their
52 restriction site or (but not always)
53 inside an ambiguous site.
54 ex:
55 AcuI CTGAAG(22/20) -> overhang : NN
56 AasI GACNNN^NNNGTC -> overhang : NN
57 CTGN^NNNNNCAG
58
59 note : these 3 classes refers to the overhang not the site.
60 So the enzyme ApoI (RAATTY) is defined even if its restriction
61 site is ambiguous.
62
63 ApoI R^AATTY -> overhang : AATT -> Defined
64 YTTAA^R
65 Accordingly, blunt enzymes are always Defined even
66 when they cut outside their restriction site.
67 ----------------------------------------------------------------------------
68 Not_available, as found in rebase file emboss_r.NNN files.
69 Commercially_available
70 allow the selection of the enzymes according to
71 their suppliers to reduce the quantity
72 of results.
73 Also will allow the implementation of buffer
74 compatibility tables. Not implemented yet.
75
76 the list of suppliers is extracted from
77 emboss_s.NNN
78 ----------------------------------------------------------------------------
79 """
80
81 import re
82 import itertools
83
84 from Bio.Seq import Seq, MutableSeq
85 from Bio.Alphabet import IUPAC
86
87 from Bio.Restriction.Restriction_Dictionary import rest_dict as enzymedict
88 from Bio.Restriction.Restriction_Dictionary import typedict
89 from Bio.Restriction.Restriction_Dictionary import suppliers as suppliers_dict
90 from Bio.Restriction.RanaConfig import *
91 from Bio.Restriction.PrintFormat import PrintFormat
92
93
94
96 """Check characters in a string (PRIVATE).
97
98 Remove digits and white space present in string. Allows any valid ambiguous
99 IUPAC DNA single letters codes (ABCDGHKMNRSTVWY, lower case are converted).
100
101 Other characters (e.g. symbols) trigger a TypeError.
102
103 Returns the string WITH A LEADING SPACE (!). This is for backwards
104 compatibility, and may in part be explained by the fact that
105 Bio.Restriction doesn't use zero based counting.
106 """
107
108 seq_string = "".join(seq_string.split()).upper()
109
110 for c in "0123456789" : seq_string = seq_string.replace(c,"")
111
112 if not set(seq_string).issubset(set("ABCDGHKMNRSTVWY")) :
113 raise TypeError("Invalid character found in %s" % repr(seq_string))
114 return " " + seq_string
115
117 """Check characters in a string (DEPRECATED)."""
118 import warnings
119 warnings.warn("The check_bases function has been deprecated, and will be"
120 "removed in a future release of Biopython.", DeprecationWarning)
121 return _check_bases(seq_string)
122
123 matching = {'A' : 'ARWMHVDN', 'C' : 'CYSMHBVN', 'G' : 'GRSKBVDN',
124 'T' : 'TYWKHBDN', 'R' : 'ABDGHKMNSRWV', 'Y' : 'CBDHKMNSTWVY',
125 'W' : 'ABDHKMNRTWVY', 'S' : 'CBDGHKMNSRVY', 'M' : 'ACBDHMNSRWVY',
126 'K' : 'BDGHKNSRTWVY', 'H' : 'ACBDHKMNSRTWVY',
127 'B' : 'CBDGHKMNSRTWVY', 'V' : 'ACBDGHKMNSRWVY',
128 'D' : 'ABDGHKMNSRTWVY', 'N' : 'ACBDGHKMNSRTWVY'}
129
130 DNA = Seq
131
230
231
233 """RestrictionType. Type from which derives all enzyme classes.
234
235 Implement the operator methods."""
236
237 - def __init__(cls, name='', bases=(), dct={}):
238 """RE(name, bases, dct) -> RestrictionType instance.
239
240 Not intended to be used in normal operation. The enzymes are
241 instantiated when importing the module.
242
243 see below."""
244 if "-" in name :
245 raise ValueError("Problem with hyphen in %s as enzyme name" \
246 % repr(name))
247 super(RestrictionType, cls).__init__(cls, name, bases, dct)
248 try :
249 cls.compsite = re.compile(cls.compsite)
250 except Exception, err :
251 raise ValueError("Problem with regular expression, re.compiled(%s)" \
252 % repr(cls.compsite))
253
265
267 """RE.__div__(other) -> list.
268
269 RE/other
270 returns RE.search(other)."""
271 return cls.search(other)
272
274 """RE.__rdiv__(other) -> list.
275
276 other/RE
277 returns RE.search(other)."""
278 return cls.search(other)
279
281 """RE.__truediv__(other) -> list.
282
283 RE/other
284 returns RE.search(other)."""
285 return cls.search(other)
286
288 """RE.__rtruediv__(other) -> list.
289
290 other/RE
291 returns RE.search(other)."""
292 return cls.search(other)
293
295 """RE.__floordiv__(other) -> list.
296
297 RE//other
298 returns RE.catalyse(other)."""
299 return cls.catalyse(other)
300
302 """RE.__rfloordiv__(other) -> list.
303
304 other//RE
305 returns RE.catalyse(other)."""
306 return cls.catalyse(other)
307
309 """RE.__str__() -> str.
310
311 return the name of the enzyme."""
312 return cls.__name__
313
315 """RE.__repr__() -> str.
316
317 used with eval or exec will instantiate the enzyme."""
318 return "%s" % cls.__name__
319
321 """RE.__len__() -> int.
322
323 length of the recognition site."""
324 return cls.size
325
327 """RE == other -> bool
328
329 True if RE and other are the same enzyme."""
330 return other is cls
331
333 """RE != other -> bool.
334 isoschizomer strict, same recognition site, same restriction -> False
335 all the other-> True"""
336 if not isinstance(other, RestrictionType):
337 return True
338 elif cls.charac == other.charac:
339 return False
340 else:
341 return True
342
344 """RE >> other -> bool.
345
346 neoschizomer : same recognition site, different restriction. -> True
347 all the others : -> False"""
348 if not isinstance(other, RestrictionType):
349 return False
350 elif cls.site == other.site and cls.charac != other.charac:
351 return True
352 else:
353 return False
354
356 """a % b -> bool.
357
358 Test compatibility of the overhang of a and b.
359 True if a and b have compatible overhang."""
360 if not isinstance(other, RestrictionType):
361 raise TypeError( \
362 'expected RestrictionType, got %s instead' % type(other))
363 return cls._mod1(other)
364
366 """a >= b -> bool.
367
368 a is greater or equal than b if the a site is longer than b site.
369 if their site have the same length sort by alphabetical order of their
370 names."""
371 if not isinstance(other, RestrictionType):
372 raise NotImplementedError
373 if len(cls) > len(other):
374 return True
375 elif cls.size == len(other) and cls.__name__ >= other.__name__:
376 return True
377 else:
378 return False
379
381 """a > b -> bool.
382
383 sorting order:
384 1. size of the recognition site.
385 2. if equal size, alphabetical order of the names."""
386 if not isinstance(other, RestrictionType):
387 raise NotImplementedError
388 if len(cls) > len(other):
389 return True
390 elif cls.size == len(other) and cls.__name__ > other.__name__:
391 return True
392 else:
393 return False
394
396 """a <= b -> bool.
397
398 sorting order:
399 1. size of the recognition site.
400 2. if equal size, alphabetical order of the names."""
401 if not isinstance(other, RestrictionType):
402 raise NotImplementedError
403 elif len(cls) < len(other):
404 return True
405 elif len(cls) == len(other) and cls.__name__ <= other.__name__:
406 return True
407 else:
408 return False
409
411 """a < b -> bool.
412
413 sorting order:
414 1. size of the recognition site.
415 2. if equal size, alphabetical order of the names."""
416 if not isinstance(other, RestrictionType):
417 raise NotImplementedError
418 elif len(cls) < len(other):
419 return True
420 elif len(cls) == len(other) and cls.__name__ < other.__name__:
421 return True
422 else:
423 return False
424
425
427 """Implement the methods that are common to all restriction enzymes.
428
429 All the methods are classmethod.
430
431 For internal use only. Not meant to be instantiate."""
432
433 - def search(cls, dna, linear=True):
434 """RE.search(dna, linear=True) -> list.
435
436 return a list of all the site of RE in dna. Compensate for circular
437 sequences and so on.
438
439 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
440
441 if linear is False, the restriction sites than span over the boundaries
442 will be included.
443
444 The positions are the first base of the 3' fragment,
445 i.e. the first base after the position the enzyme will cut. """
446
447
448
449
450
451
452
453 if isinstance(dna, FormattedSeq):
454 cls.dna = dna
455 return cls._search()
456 else :
457 cls.dna = FormattedSeq(dna, linear)
458 return cls._search()
459 search = classmethod(search)
460
462 """RE.all_suppliers -> print all the suppliers of R"""
463 supply = [x[0] for x in suppliers_dict.itervalues()]
464 supply.sort()
465 print ",\n".join(supply)
466 return
467 all_suppliers = classmethod(all_suppliers)
468
470 """RE.is_equischizomers(other) -> bool.
471
472 True if other is an isoschizomer of RE.
473 False else.
474
475 equischizomer <=> same site, same position of restriction."""
476 return not self != other
477 is_equischizomer = classmethod(is_equischizomer)
478
480 """RE.is_neoschizomers(other) -> bool.
481
482 True if other is an isoschizomer of RE.
483 False else.
484
485 neoschizomer <=> same site, different position of restriction."""
486 return self >> other
487 is_neoschizomer = classmethod(is_neoschizomer)
488
490 """RE.is_isoschizomers(other) -> bool.
491
492 True if other is an isoschizomer of RE.
493 False else.
494
495 isoschizomer <=> same site."""
496 return (not self != other) or self >> other
497 is_isoschizomer = classmethod(is_isoschizomer)
498
500 """RE.equischizomers([batch]) -> list.
501
502 return a tuple of all the isoschizomers of RE.
503 if batch is supplied it is used instead of the default AllEnzymes.
504
505 equischizomer <=> same site, same position of restriction."""
506 if not batch : batch = AllEnzymes
507 r = [x for x in batch if not self != x]
508 i = r.index(self)
509 del r[i]
510 r.sort()
511 return r
512 equischizomers = classmethod(equischizomers)
513
515 """RE.neoschizomers([batch]) -> list.
516
517 return a tuple of all the neoschizomers of RE.
518 if batch is supplied it is used instead of the default AllEnzymes.
519
520 neoschizomer <=> same site, different position of restriction."""
521 if not batch : batch = AllEnzymes
522 r = [x for x in batch if self >> x]
523 r.sort()
524 return r
525 neoschizomers = classmethod(neoschizomers)
526
528 """RE.isoschizomers([batch]) -> list.
529
530 return a tuple of all the equischizomers and neoschizomers of RE.
531 if batch is supplied it is used instead of the default AllEnzymes."""
532 if not batch : batch = AllEnzymes
533 r = [x for x in batch if (self >> x) or (not self != x)]
534 i = r.index(self)
535 del r[i]
536 r.sort()
537 return r
538 isoschizomers = classmethod(isoschizomers)
539
541 """RE.frequency() -> int.
542
543 frequency of the site."""
544 return self.freq
545 frequency = classmethod(frequency)
546
547
548 -class NoCut(AbstractCut):
549 """Implement the methods specific to the enzymes that do not cut.
550
551 These enzymes are generally enzymes that have been only partially
552 characterised and the way they cut the DNA is unknow or enzymes for
553 which the pattern of cut is to complex to be recorded in Rebase
554 (ncuts values of 0 in emboss_e.###).
555
556 When using search() with these enzymes the values returned are at the start of
557 the restriction site.
558
559 Their catalyse() method returns a TypeError.
560
561 Unknown and NotDefined are also part of the base classes of these enzymes.
562
563 Internal use only. Not meant to be instantiated."""
564
566 """RE.cut_once() -> bool.
567
568 True if the enzyme cut the sequence one time on each strand."""
569 return False
570 cut_once = classmethod(cut_once)
571
573 """RE.cut_twice() -> bool.
574
575 True if the enzyme cut the sequence twice on each strand."""
576 return False
577 cut_twice = classmethod(cut_twice)
578
580 """RE._modify(location) -> int.
581
582 for internal use only.
583
584 location is an integer corresponding to the location of the match for
585 the enzyme pattern in the sequence.
586 _modify returns the real place where the enzyme will cut.
587
588 example:
589 EcoRI pattern : GAATTC
590 EcoRI will cut after the G.
591 so in the sequence:
592 ______
593 GAATACACGGAATTCGA
594 |
595 10
596 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
597 EcoRI cut after the G so:
598 EcoRI._modify(10) -> 11.
599
600 if the enzyme cut twice _modify will returns two integer corresponding
601 to each cutting site.
602 """
603 yield location
604 _modify = classmethod(_modify)
605
607 """RE._rev_modify(location) -> generator of int.
608
609 for internal use only.
610
611 as _modify for site situated on the antiparallel strand when the
612 enzyme is not palindromic
613 """
614 yield location
615 _rev_modify = classmethod(_rev_modify)
616
618 """RE.characteristic() -> tuple.
619
620 the tuple contains the attributes:
621 fst5 -> first 5' cut ((current strand) or None
622 fst3 -> first 3' cut (complementary strand) or None
623 scd5 -> second 5' cut (current strand) or None
624 scd5 -> second 3' cut (complementary strand) or None
625 site -> recognition site."""
626 return None, None, None, None, self.site
627 characteristic = classmethod(characteristic)
628
630 """Implement the methods specific to the enzymes that cut the DNA only once
631
632 Correspond to ncuts values of 2 in emboss_e.###
633
634 Internal use only. Not meant to be instantiated."""
635
637 """RE.cut_once() -> bool.
638
639 True if the enzyme cut the sequence one time on each strand."""
640 return True
641 cut_once = classmethod(cut_once)
642
644 """RE.cut_twice() -> bool.
645
646 True if the enzyme cut the sequence twice on each strand."""
647 return False
648 cut_twice = classmethod(cut_twice)
649
651 """RE._modify(location) -> int.
652
653 for internal use only.
654
655 location is an integer corresponding to the location of the match for
656 the enzyme pattern in the sequence.
657 _modify returns the real place where the enzyme will cut.
658
659 example:
660 EcoRI pattern : GAATTC
661 EcoRI will cut after the G.
662 so in the sequence:
663 ______
664 GAATACACGGAATTCGA
665 |
666 10
667 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
668 EcoRI cut after the G so:
669 EcoRI._modify(10) -> 11.
670
671 if the enzyme cut twice _modify will returns two integer corresponding
672 to each cutting site.
673 """
674 yield location + self.fst5
675 _modify = classmethod(_modify)
676
678 """RE._rev_modify(location) -> generator of int.
679
680 for internal use only.
681
682 as _modify for site situated on the antiparallel strand when the
683 enzyme is not palindromic
684 """
685 yield location - self.fst3
686 _rev_modify = classmethod(_rev_modify)
687
689 """RE.characteristic() -> tuple.
690
691 the tuple contains the attributes:
692 fst5 -> first 5' cut ((current strand) or None
693 fst3 -> first 3' cut (complementary strand) or None
694 scd5 -> second 5' cut (current strand) or None
695 scd5 -> second 3' cut (complementary strand) or None
696 site -> recognition site."""
697 return self.fst5, self.fst3, None, None, self.site
698 characteristic = classmethod(characteristic)
699
700
702 """Implement the methods specific to the enzymes that cut the DNA twice
703
704 Correspond to ncuts values of 4 in emboss_e.###
705
706 Internal use only. Not meant to be instantiated."""
707
709 """RE.cut_once() -> bool.
710
711 True if the enzyme cut the sequence one time on each strand."""
712 return False
713 cut_once = classmethod(cut_once)
714
716 """RE.cut_twice() -> bool.
717
718 True if the enzyme cut the sequence twice on each strand."""
719 return True
720 cut_twice = classmethod(cut_twice)
721
723 """RE._modify(location) -> int.
724
725 for internal use only.
726
727 location is an integer corresponding to the location of the match for
728 the enzyme pattern in the sequence.
729 _modify returns the real place where the enzyme will cut.
730
731 example:
732 EcoRI pattern : GAATTC
733 EcoRI will cut after the G.
734 so in the sequence:
735 ______
736 GAATACACGGAATTCGA
737 |
738 10
739 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
740 EcoRI cut after the G so:
741 EcoRI._modify(10) -> 11.
742
743 if the enzyme cut twice _modify will returns two integer corresponding
744 to each cutting site.
745 """
746 yield location + self.fst5
747 yield location + self.scd5
748 _modify = classmethod(_modify)
749
751 """RE._rev_modify(location) -> generator of int.
752
753 for internal use only.
754
755 as _modify for site situated on the antiparallel strand when the
756 enzyme is not palindromic
757 """
758 yield location - self.fst3
759 yield location - self.scd3
760 _rev_modify = classmethod(_rev_modify)
761
763 """RE.characteristic() -> tuple.
764
765 the tuple contains the attributes:
766 fst5 -> first 5' cut ((current strand) or None
767 fst3 -> first 3' cut (complementary strand) or None
768 scd5 -> second 5' cut (current strand) or None
769 scd5 -> second 3' cut (complementary strand) or None
770 site -> recognition site."""
771 return self.fst5, self.fst3, self.scd5, self.scd3, self.site
772 characteristic = classmethod(characteristic)
773
774
776 """Implement the information about methylation.
777
778 Enzymes of this class possess a site which is methylable."""
779
781 """RE.is_methylable() -> bool.
782
783 True if the recognition site is a methylable."""
784 return True
785 is_methylable = classmethod(is_methylable)
786
788 """Implement informations about methylation sensitibility.
789
790 Enzymes of this class are not sensible to methylation."""
791
793 """RE.is_methylable() -> bool.
794
795 True if the recognition site is a methylable."""
796 return False
797 is_methylable = classmethod(is_methylable)
798
800 """Implement the methods specific to the enzymes which are palindromic
801
802 palindromic means : the recognition site and its reverse complement are
803 identical.
804 Remarks : an enzyme with a site CGNNCG is palindromic even if some
805 of the sites that it will recognise are not.
806 for example here : CGAACG
807
808 Internal use only. Not meant to be instantiated."""
809
811 """RE._search() -> list.
812
813 for internal use only.
814
815 implement the search method for palindromic and non palindromic enzyme.
816 """
817 siteloc = self.dna.finditer(self.compsite,self.size)
818 self.results = [r for s,g in siteloc for r in self._modify(s)]
819 if self.results : self._drop()
820 return self.results
821 _search = classmethod(_search)
822
824 """RE.is_palindromic() -> bool.
825
826 True if the recognition site is a palindrom."""
827 return True
828 is_palindromic = classmethod(is_palindromic)
829
830
832 """Implement the methods specific to the enzymes which are not palindromic
833
834 palindromic means : the recognition site and its reverse complement are
835 identical.
836
837 Internal use only. Not meant to be instantiated."""
838
840 """RE._search() -> list.
841
842 for internal use only.
843
844 implement the search method for palindromic and non palindromic enzyme.
845 """
846 iterator = self.dna.finditer(self.compsite, self.size)
847 self.results = []
848 modif = self._modify
849 revmodif = self._rev_modify
850 s = str(self)
851 self.on_minus = []
852 for start, group in iterator:
853 if group(s):
854 self.results += [r for r in modif(start)]
855 else:
856 self.on_minus += [r for r in revmodif(start)]
857 self.results += self.on_minus
858 if self.results:
859 self.results.sort()
860 self._drop()
861 return self.results
862 _search = classmethod(_search)
863
865 """RE.is_palindromic() -> bool.
866
867 True if the recognition site is a palindrom."""
868 return False
869 is_palindromic = classmethod(is_palindromic)
870
872 """Implement the methods specific to the enzymes for which the overhang
873 is unknown.
874
875 These enzymes are also NotDefined and NoCut.
876
877 Internal use only. Not meant to be instantiated."""
878
880 """RE.catalyse(dna, linear=True) -> tuple of DNA.
881 RE.catalyze(dna, linear=True) -> tuple of DNA.
882
883 return a tuple of dna as will be produced by using RE to restrict the
884 dna.
885
886 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
887
888 if linear is False, the sequence is considered to be circular and the
889 output will be modified accordingly."""
890 raise NotImplementedError('%s restriction is unknown.' \
891 % self.__name__)
892 catalyze = catalyse = classmethod(catalyse)
893
895 """RE.is_blunt() -> bool.
896
897 True if the enzyme produces blunt end.
898
899 see also:
900 RE.is_3overhang()
901 RE.is_5overhang()
902 RE.is_unknown()"""
903 return False
904 is_blunt = classmethod(is_blunt)
905
907 """RE.is_5overhang() -> bool.
908
909 True if the enzyme produces 5' overhang sticky end.
910
911 see also:
912 RE.is_3overhang()
913 RE.is_blunt()
914 RE.is_unknown()"""
915 return False
916 is_5overhang = classmethod(is_5overhang)
917
919 """RE.is_3overhang() -> bool.
920
921 True if the enzyme produces 3' overhang sticky end.
922
923 see also:
924 RE.is_5overhang()
925 RE.is_blunt()
926 RE.is_unknown()"""
927 return False
928 is_3overhang = classmethod(is_3overhang)
929
931 """RE.overhang() -> str. type of overhang of the enzyme.,
932
933 can be "3' overhang", "5' overhang", "blunt", "unknown" """
934 return 'unknown'
935 overhang = classmethod(overhang)
936
938 """RE.compatible_end() -> list.
939
940 list of all the enzymes that share compatible end with RE."""
941 return []
942 compatible_end = classmethod(compatible_end)
943
945 """RE._mod1(other) -> bool.
946
947 for internal use only
948
949 test for the compatibility of restriction ending of RE and other."""
950 return False
951 _mod1 = classmethod(_mod1)
952
953 -class Blunt(AbstractCut):
954 """Implement the methods specific to the enzymes for which the overhang
955 is blunt.
956
957 The enzyme cuts the + strand and the - strand of the DNA at the same
958 place.
959
960 Internal use only. Not meant to be instantiated."""
961
963 """RE.catalyse(dna, linear=True) -> tuple of DNA.
964 RE.catalyze(dna, linear=True) -> tuple of DNA.
965
966 return a tuple of dna as will be produced by using RE to restrict the
967 dna.
968
969 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
970
971 if linear is False, the sequence is considered to be circular and the
972 output will be modified accordingly."""
973 r = self.search(dna, linear)
974 d = self.dna
975 if not r : return d[1:],
976 fragments = []
977 length = len(r)-1
978 if d.is_linear():
979
980
981
982 fragments.append(d[1:r[0]])
983 if length:
984
985
986
987 fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
988
989
990
991 fragments.append(d[r[-1]:])
992 else:
993
994
995
996 fragments.append(d[r[-1]:]+d[1:r[0]])
997 if not length:
998
999
1000
1001 return tuple(fragments)
1002
1003
1004
1005 fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
1006 return tuple(fragments)
1007 catalyze = catalyse = classmethod(catalyse)
1008
1010 """RE.is_blunt() -> bool.
1011
1012 True if the enzyme produces blunt end.
1013
1014 see also:
1015 RE.is_3overhang()
1016 RE.is_5overhang()
1017 RE.is_unknown()"""
1018 return True
1019 is_blunt = classmethod(is_blunt)
1020
1022 """RE.is_5overhang() -> bool.
1023
1024 True if the enzyme produces 5' overhang sticky end.
1025
1026 see also:
1027 RE.is_3overhang()
1028 RE.is_blunt()
1029 RE.is_unknown()"""
1030 return False
1031 is_5overhang = classmethod(is_5overhang)
1032
1034 """RE.is_3overhang() -> bool.
1035
1036 True if the enzyme produces 3' overhang sticky end.
1037
1038 see also:
1039 RE.is_5overhang()
1040 RE.is_blunt()
1041 RE.is_unknown()"""
1042 return False
1043 is_3overhang = classmethod(is_3overhang)
1044
1046 """RE.overhang() -> str. type of overhang of the enzyme.,
1047
1048 can be "3' overhang", "5' overhang", "blunt", "unknown" """
1049 return 'blunt'
1050 overhang = classmethod(overhang)
1051
1053 """RE.compatible_end() -> list.
1054
1055 list of all the enzymes that share compatible end with RE."""
1056 if not batch : batch = AllEnzymes
1057 r = [x for x in iter(AllEnzymes) if x.is_blunt()]
1058 r.sort()
1059 return r
1060 compatible_end = classmethod(compatible_end)
1061
1063 """RE._mod1(other) -> bool.
1064
1065 for internal use only
1066
1067 test for the compatibility of restriction ending of RE and other."""
1068 if issubclass(other, Blunt) : return True
1069 else : return False
1070 _mod1 = staticmethod(_mod1)
1071
1072 -class Ov5(AbstractCut):
1073 """Implement the methods specific to the enzymes for which the overhang
1074 is recessed in 3'.
1075
1076 The enzyme cuts the + strand after the - strand of the DNA.
1077
1078 Internal use only. Not meant to be instantiated."""
1079
1081 """RE.catalyse(dna, linear=True) -> tuple of DNA.
1082 RE.catalyze(dna, linear=True) -> tuple of DNA.
1083
1084 return a tuple of dna as will be produced by using RE to restrict the
1085 dna.
1086
1087 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
1088
1089 if linear is False, the sequence is considered to be circular and the
1090 output will be modified accordingly."""
1091 r = self.search(dna, linear)
1092 d = self.dna
1093 if not r : return d[1:],
1094 length = len(r)-1
1095 fragments = []
1096 if d.is_linear():
1097
1098
1099
1100 fragments.append(d[1:r[0]])
1101 if length:
1102
1103
1104
1105 fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
1106
1107
1108
1109 fragments.append(d[r[-1]:])
1110 else:
1111
1112
1113
1114 fragments.append(d[r[-1]:]+d[1:r[0]])
1115 if not length:
1116
1117
1118
1119 return tuple(fragments)
1120
1121
1122
1123 fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
1124 return tuple(fragments)
1125 catalyze = catalyse = classmethod(catalyse)
1126
1128 """RE.is_blunt() -> bool.
1129
1130 True if the enzyme produces blunt end.
1131
1132 see also:
1133 RE.is_3overhang()
1134 RE.is_5overhang()
1135 RE.is_unknown()"""
1136 return False
1137 is_blunt = classmethod(is_blunt)
1138
1140 """RE.is_5overhang() -> bool.
1141
1142 True if the enzyme produces 5' overhang sticky end.
1143
1144 see also:
1145 RE.is_3overhang()
1146 RE.is_blunt()
1147 RE.is_unknown()"""
1148 return True
1149 is_5overhang = classmethod(is_5overhang)
1150
1152 """RE.is_3overhang() -> bool.
1153
1154 True if the enzyme produces 3' overhang sticky end.
1155
1156 see also:
1157 RE.is_5overhang()
1158 RE.is_blunt()
1159 RE.is_unknown()"""
1160 return False
1161 is_3overhang = classmethod(is_3overhang)
1162
1164 """RE.overhang() -> str. type of overhang of the enzyme.,
1165
1166 can be "3' overhang", "5' overhang", "blunt", "unknown" """
1167 return "5' overhang"
1168 overhang = classmethod(overhang)
1169
1171 """RE.compatible_end() -> list.
1172
1173 list of all the enzymes that share compatible end with RE."""
1174 if not batch : batch = AllEnzymes
1175 r = [x for x in iter(AllEnzymes) if x.is_5overhang() and x % self]
1176 r.sort()
1177 return r
1178 compatible_end = classmethod(compatible_end)
1179
1180 - def _mod1(self, other):
1181 """RE._mod1(other) -> bool.
1182
1183 for internal use only
1184
1185 test for the compatibility of restriction ending of RE and other."""
1186 if issubclass(other, Ov5) : return self._mod2(other)
1187 else : return False
1188 _mod1 = classmethod(_mod1)
1189
1190
1191 -class Ov3(AbstractCut):
1192 """Implement the methods specific to the enzymes for which the overhang
1193 is recessed in 5'.
1194
1195 The enzyme cuts the - strand after the + strand of the DNA.
1196
1197 Internal use only. Not meant to be instantiated."""
1198
1200 """RE.catalyse(dna, linear=True) -> tuple of DNA.
1201 RE.catalyze(dna, linear=True) -> tuple of DNA.
1202
1203 return a tuple of dna as will be produced by using RE to restrict the
1204 dna.
1205
1206 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
1207
1208 if linear is False, the sequence is considered to be circular and the
1209 output will be modified accordingly."""
1210 r = self.search(dna, linear)
1211 d = self.dna
1212 if not r : return d[1:],
1213 fragments = []
1214 length = len(r)-1
1215 if d.is_linear():
1216
1217
1218
1219 fragments.append(d[1:r[0]])
1220 if length:
1221
1222
1223
1224 fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
1225
1226
1227
1228 fragments.append(d[r[-1]:])
1229 else:
1230
1231
1232
1233 fragments.append(d[r[-1]:]+d[1:r[0]])
1234 if not length:
1235
1236
1237
1238 return tuple(fragments)
1239
1240
1241
1242 fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
1243 return tuple(fragments)
1244 catalyze = catalyse = classmethod(catalyse)
1245
1247 """RE.is_blunt() -> bool.
1248
1249 True if the enzyme produces blunt end.
1250
1251 see also:
1252 RE.is_3overhang()
1253 RE.is_5overhang()
1254 RE.is_unknown()"""
1255 return False
1256 is_blunt = classmethod(is_blunt)
1257
1259 """RE.is_5overhang() -> bool.
1260
1261 True if the enzyme produces 5' overhang sticky end.
1262
1263 see also:
1264 RE.is_3overhang()
1265 RE.is_blunt()
1266 RE.is_unknown()"""
1267 return False
1268 is_5overhang = classmethod(is_5overhang)
1269
1271 """RE.is_3overhang() -> bool.
1272
1273 True if the enzyme produces 3' overhang sticky end.
1274
1275 see also:
1276 RE.is_5overhang()
1277 RE.is_blunt()
1278 RE.is_unknown()"""
1279 return True
1280 is_3overhang = classmethod(is_3overhang)
1281
1283 """RE.overhang() -> str. type of overhang of the enzyme.,
1284
1285 can be "3' overhang", "5' overhang", "blunt", "unknown" """
1286 return "3' overhang"
1287 overhang = classmethod(overhang)
1288
1290 """RE.compatible_end() -> list.
1291
1292 list of all the enzymes that share compatible end with RE."""
1293 if not batch : batch = AllEnzymes
1294 r = [x for x in iter(AllEnzymes) if x.is_3overhang() and x % self]
1295 r.sort()
1296 return r
1297 compatible_end = classmethod(compatible_end)
1298
1299 - def _mod1(self, other):
1300 """RE._mod1(other) -> bool.
1301
1302 for internal use only
1303
1304 test for the compatibility of restriction ending of RE and other."""
1305
1306
1307
1308 if issubclass(other, Ov3) : return self._mod2(other)
1309 else : return False
1310 _mod1 = classmethod(_mod1)
1311
1312
1314 """Implement the methods specific to the enzymes for which the overhang
1315 and the cut are not variable.
1316
1317 Typical example : EcoRI -> G^AATT_C
1318 The overhang will always be AATT
1319 Notes:
1320 Blunt enzymes are always defined. even if there site is GGATCCNNN^_N
1321 There overhang is always the same : blunt!
1322
1323 Internal use only. Not meant to be instantiated."""
1324
1358 _drop = classmethod(_drop)
1359
1361 """RE.is_defined() -> bool.
1362
1363 True if the sequence recognised and cut is constant,
1364 i.e. the recognition site is not degenerated AND the enzyme cut inside
1365 the site.
1366
1367 see also:
1368 RE.is_ambiguous()
1369 RE.is_unknown()"""
1370 return True
1371 is_defined = classmethod(is_defined)
1372
1374 """RE.is_ambiguous() -> bool.
1375
1376 True if the sequence recognised and cut is ambiguous,
1377 i.e. the recognition site is degenerated AND/OR the enzyme cut outside
1378 the site.
1379
1380 see also:
1381 RE.is_defined()
1382 RE.is_unknown()"""
1383 return False
1384 is_ambiguous = classmethod(is_ambiguous)
1385
1387 """RE.is_unknown() -> bool.
1388
1389 True if the sequence is unknown,
1390 i.e. the recognition site has not been characterised yet.
1391
1392 see also:
1393 RE.is_defined()
1394 RE.is_ambiguous()"""
1395 return False
1396 is_unknown = classmethod(is_unknown)
1397
1399 """RE.elucidate() -> str
1400
1401 return a representation of the site with the cut on the (+) strand
1402 represented as '^' and the cut on the (-) strand as '_'.
1403 ie:
1404 >>> EcoRI.elucidate() # 5' overhang
1405 'G^AATT_C'
1406 >>> KpnI.elucidate() # 3' overhang
1407 'G_GTAC^C'
1408 >>> EcoRV.elucidate() # blunt
1409 'GAT^_ATC'
1410 >>> SnaI.elucidate() # NotDefined, cut profile unknown.
1411 '? GTATAC ?'
1412 >>>
1413 """
1414 f5 = self.fst5
1415 f3 = self.fst3
1416 site = self.site
1417 if self.cut_twice() : re = 'cut twice, not yet implemented sorry.'
1418 elif self.is_5overhang():
1419 if f5 == f3 == 0 : re = 'N^'+ self.site + '_N'
1420 elif f3 == 0 : re = site[:f5] + '^' + site[f5:] + '_N'
1421 else : re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:]
1422 elif self.is_blunt():
1423 re = site[:f5] + '^_' + site[f5:]
1424 else:
1425 if f5 == f3 == 0 : re = 'N_'+ site + '^N'
1426 else : re = site[:f3] + '_' + site[f3:f5] +'^'+ site[f5:]
1427 return re
1428 elucidate = classmethod(elucidate)
1429
1430 - def _mod2(self, other):
1431 """RE._mod2(other) -> bool.
1432
1433 for internal use only
1434
1435 test for the compatibility of restriction ending of RE and other."""
1436
1437
1438
1439 if other.ovhgseq == self.ovhgseq:
1440 return True
1441 elif issubclass(other, Ambiguous):
1442 return other._mod2(self)
1443 else:
1444 return False
1445 _mod2 = classmethod(_mod2)
1446
1447
1449 """Implement the methods specific to the enzymes for which the overhang
1450 is variable.
1451
1452 Typical example : BstXI -> CCAN_NNNN^NTGG
1453 The overhang can be any sequence of 4 bases.
1454 Notes:
1455 Blunt enzymes are always defined. even if there site is GGATCCNNN^_N
1456 There overhang is always the same : blunt!
1457
1458 Internal use only. Not meant to be instantiated."""
1459
1485 _drop = classmethod(_drop)
1486
1488 """RE.is_defined() -> bool.
1489
1490 True if the sequence recognised and cut is constant,
1491 i.e. the recognition site is not degenerated AND the enzyme cut inside
1492 the site.
1493
1494 see also:
1495 RE.is_ambiguous()
1496 RE.is_unknown()"""
1497 return False
1498 is_defined = classmethod(is_defined)
1499
1501 """RE.is_ambiguous() -> bool.
1502
1503 True if the sequence recognised and cut is ambiguous,
1504 i.e. the recognition site is degenerated AND/OR the enzyme cut outside
1505 the site.
1506
1507
1508 see also:
1509 RE.is_defined()
1510 RE.is_unknown()"""
1511 return True
1512 is_ambiguous = classmethod(is_ambiguous)
1513
1515 """RE.is_unknown() -> bool.
1516
1517 True if the sequence is unknown,
1518 i.e. the recognition site has not been characterised yet.
1519
1520 see also:
1521 RE.is_defined()
1522 RE.is_ambiguous()"""
1523 return False
1524 is_unknown = classmethod(is_unknown)
1525
1526 - def _mod2(self, other):
1527 """RE._mod2(other) -> bool.
1528
1529 for internal use only
1530
1531 test for the compatibility of restriction ending of RE and other."""
1532
1533
1534
1535 if len(self.ovhgseq) != len(other.ovhgseq):
1536 return False
1537 else:
1538 se = self.ovhgseq
1539 for base in se:
1540 if base in 'ATCG':
1541 pass
1542 if base in 'N':
1543 se = '.'.join(se.split('N'))
1544 if base in 'RYWMSKHDBV':
1545 expand = '['+ matching[base] + ']'
1546 se = expand.join(se.split(base))
1547 if re.match(se, other.ovhgseq):
1548 return True
1549 else:
1550 return False
1551 _mod2 = classmethod(_mod2)
1552
1554 """RE.elucidate() -> str
1555
1556 return a representation of the site with the cut on the (+) strand
1557 represented as '^' and the cut on the (-) strand as '_'.
1558 ie:
1559 >>> EcoRI.elucidate() # 5' overhang
1560 'G^AATT_C'
1561 >>> KpnI.elucidate() # 3' overhang
1562 'G_GTAC^C'
1563 >>> EcoRV.elucidate() # blunt
1564 'GAT^_ATC'
1565 >>> SnaI.elucidate() # NotDefined, cut profile unknown.
1566 '? GTATAC ?'
1567 >>>
1568 """
1569 f5 = self.fst5
1570 f3 = self.fst3
1571 length = len(self)
1572 site = self.site
1573 if self.cut_twice() : re = 'cut twice, not yet implemented sorry.'
1574 elif self.is_5overhang():
1575 if f3 == f5 == 0:
1576 re = 'N^' + site +'_N'
1577 elif 0 <= f5 <= length and 0 <= f3+length <= length:
1578 re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:]
1579 elif 0 <= f5 <= length:
1580 re = site[:f5] + '^' + site[f5:] + f3*'N' + '_N'
1581 elif 0 <= f3+length <= length:
1582 re = 'N^' + abs(f5) * 'N' + site[:f3] + '_' + site[f3:]
1583 elif f3+length < 0:
1584 re = 'N^'*abs(f5)*'N' + '_' + abs(length+f3)*'N' + site
1585 elif f5 > length:
1586 re = site + (f5-length)*'N'+'^'+(length+f3-f5)*'N'+'_N'
1587 else:
1588 re = 'N^' + abs(f5) * 'N' + site + f3*'N' + '_N'
1589 elif self.is_blunt():
1590 if f5 < 0:
1591 re = 'N^_' + abs(f5)*'N' + site
1592 elif f5 > length:
1593 re = site + (f5-length)*'N' + '^_N'
1594 else:
1595 raise ValueError('%s.easyrepr() : error f5=%i' \
1596 % (self.name,f5))
1597 else:
1598 if f3 == 0:
1599 if f5 == 0 : re = 'N_' + site + '^N'
1600 else : re = site + '_' + (f5-length)*'N' + '^N'
1601 elif 0 < f3+length <= length and 0 <= f5 <= length:
1602 re = site[:f3] + '_' + site[f3:f5] + '^' + site[f5:]
1603 elif 0 < f3+length <= length:
1604 re = site[:f3] + '_' + site[f3:] + (f5-length)*'N' + '^N'
1605 elif 0 <= f5 <= length:
1606 re = 'N_' +'N'*(f3+length) + site[:f5] + '^' + site[f5:]
1607 elif f3 > 0:
1608 re = site + f3*'N' + '_' + (f5-f3-length)*'N' + '^N'
1609 elif f5 < 0:
1610 re = 'N_' + abs(f3-f5+length)*'N' + '^' + abs(f5)*'N' + site
1611 else:
1612 re = 'N_' + abs(f3+length)*'N' + site + (f5-length)*'N' + '^N'
1613 return re
1614 elucidate = classmethod(elucidate)
1615
1616
1618 """Implement the methods specific to the enzymes for which the overhang
1619 is not characterised.
1620
1621 Correspond to NoCut and Unknown.
1622
1623 Internal use only. Not meant to be instantiated."""
1624
1647 _drop = classmethod(_drop)
1648
1650 """RE.is_defined() -> bool.
1651
1652 True if the sequence recognised and cut is constant,
1653 i.e. the recognition site is not degenerated AND the enzyme cut inside
1654 the site.
1655
1656 see also:
1657 RE.is_ambiguous()
1658 RE.is_unknown()"""
1659 return False
1660 is_defined = classmethod(is_defined)
1661
1663 """RE.is_ambiguous() -> bool.
1664
1665 True if the sequence recognised and cut is ambiguous,
1666 i.e. the recognition site is degenerated AND/OR the enzyme cut outside
1667 the site.
1668
1669
1670 see also:
1671 RE.is_defined()
1672 RE.is_unknown()"""
1673 return False
1674 is_ambiguous = classmethod(is_ambiguous)
1675
1677 """RE.is_unknown() -> bool.
1678
1679 True if the sequence is unknown,
1680 i.e. the recognition site has not been characterised yet.
1681
1682 see also:
1683 RE.is_defined()
1684 RE.is_ambiguous()"""
1685 return True
1686 is_unknown = classmethod(is_unknown)
1687
1688 - def _mod2(self, other):
1689 """RE._mod2(other) -> bool.
1690
1691 for internal use only
1692
1693 test for the compatibility of restriction ending of RE and other."""
1694
1695
1696
1697
1698
1699
1700 raise ValueError("%s.mod2(%s), %s : NotDefined. pas glop pas glop!" \
1701 % (str(self), str(other), str(self)))
1702 _mod2 = classmethod(_mod2)
1703
1705 """RE.elucidate() -> str
1706
1707 return a representation of the site with the cut on the (+) strand
1708 represented as '^' and the cut on the (-) strand as '_'.
1709 ie:
1710 >>> EcoRI.elucidate() # 5' overhang
1711 'G^AATT_C'
1712 >>> KpnI.elucidate() # 3' overhang
1713 'G_GTAC^C'
1714 >>> EcoRV.elucidate() # blunt
1715 'GAT^_ATC'
1716 >>> SnaI.elucidate() # NotDefined, cut profile unknown.
1717 '? GTATAC ?'
1718 >>>
1719 """
1720 return '? %s ?' % self.site
1721 elucidate = classmethod(elucidate)
1722
1723
1725
1726
1727
1728
1729 """Implement the methods specific to the enzymes which are commercially
1730 available.
1731
1732 Internal use only. Not meant to be instantiated."""
1733
1735 """RE.suppliers() -> print the suppliers of RE."""
1736 supply = suppliers_dict.items()
1737 for k,v in supply:
1738 if k in self.suppl:
1739 print v[0]+','
1740 return
1741 suppliers = classmethod(suppliers)
1742
1744 """RE.supplier_list() -> list.
1745
1746 list of the supplier names for RE."""
1747 return [v[0] for k,v in suppliers_dict.items() if k in self.suppl]
1748 supplier_list = classmethod(supplier_list)
1749
1751 """RE.buffers(supplier) -> string.
1752
1753 not implemented yet."""
1754 return
1755 buffers = classmethod(buffers)
1756
1758 """RE.iscomm() -> bool.
1759
1760 True if RE has suppliers."""
1761 return True
1762 is_comm = classmethod(is_comm)
1763
1764
1766 """Implement the methods specific to the enzymes which are not commercially
1767 available.
1768
1769 Internal use only. Not meant to be instantiated."""
1770
1772 """RE.suppliers() -> print the suppliers of RE."""
1773 return None
1774 suppliers = staticmethod(suppliers)
1775
1777 """RE.supplier_list() -> list.
1778
1779 list of the supplier names for RE."""
1780 return []
1781 supplier_list = classmethod(supplier_list)
1782
1784 """RE.buffers(supplier) -> string.
1785
1786 not implemented yet."""
1787 raise TypeError("Enzyme not commercially available.")
1788 buffers = classmethod(buffers)
1789
1791 """RE.iscomm() -> bool.
1792
1793 True if RE has suppliers."""
1794 return False
1795 is_comm = classmethod(is_comm)
1796
1797
1798
1799
1800
1801
1802
1803
1804
1806
1807 - def __init__(self, first=[], suppliers=[]):
1814
1816 if len(self) < 5:
1817 return '+'.join(self.elements())
1818 else:
1819 return '...'.join(('+'.join(self.elements()[:2]),\
1820 '+'.join(self.elements()[-2:])))
1821
1823 return 'RestrictionBatch(%s)' % self.elements()
1824
1831
1834
1837
1838 - def get(self, enzyme, add=False):
1839 """B.get(enzyme[, add]) -> enzyme class.
1840
1841 if add is True and enzyme is not in B add enzyme to B.
1842 if add is False (which is the default) only return enzyme.
1843 if enzyme is not a RestrictionType or can not be evaluated to
1844 a RestrictionType, raise a ValueError."""
1845 e = self.format(enzyme)
1846 if e in self:
1847 return e
1848 elif add:
1849 self.add(e)
1850 return e
1851 else:
1852 raise ValueError('enzyme %s is not in RestrictionBatch' \
1853 % e.__name__)
1854
1856 """B.lambdasplit(func) -> RestrictionBatch .
1857
1858 the new batch will contains only the enzymes for which
1859 func return True."""
1860 d = [x for x in itertools.ifilter(func, self)]
1861 new = RestrictionBatch()
1862 new._data = dict(map(None, d, [True]*len(d)))
1863 return new
1864
1866 """B.add_supplier(letter) -> add a new set of enzyme to B.
1867
1868 letter represents the suppliers as defined in the dictionary
1869 RestrictionDictionary.suppliers
1870 return None.
1871 raise a KeyError if letter is not a supplier code."""
1872 supplier = suppliers_dict[letter]
1873 self.suppliers.append(letter)
1874 for x in supplier[1]:
1875 self.add_nocheck(eval(x))
1876 return
1877
1879 """B.current_suppliers() -> add a new set of enzyme to B.
1880
1881 return a sorted list of the suppliers which have been used to
1882 create the batch."""
1883 suppl_list = [suppliers_dict[x][0] for x in self.suppliers]
1884 suppl_list.sort()
1885 return suppl_list
1886
1888 """ b += other -> add other to b, check the type of other."""
1889 self.add(other)
1890 return self
1891
1893 """ b + other -> new RestrictionBatch."""
1894 new = self.__class__(self)
1895 new.add(other)
1896 return new
1897
1899 """B.remove(other) -> remove other from B if other is a RestrictionType.
1900
1901 Safe set.remove method. Verify that other is a RestrictionType or can be
1902 evaluated to a RestrictionType.
1903 raise a ValueError if other can not be evaluated to a RestrictionType.
1904 raise a KeyError if other is not in B."""
1905 return set.remove(self, self.format(other))
1906
1907 - def add(self, other):
1908 """B.add(other) -> add other to B if other is a RestrictionType.
1909
1910 Safe set.add method. Verify that other is a RestrictionType or can be
1911 evaluated to a RestrictionType.
1912 raise a ValueError if other can not be evaluated to a RestrictionType.
1913 """
1914 return set.add(self, self.format(other))
1915
1917 """B.add_nocheck(other) -> add other to B. don't check type of other.
1918 """
1919 return set.add(self, other)
1920
1938
1939
1941 """B.is_restriction(y) -> bool.
1942
1943 True is y or eval(y) is a RestrictionType."""
1944 return isinstance(y, RestrictionType) or \
1945 isinstance(eval(str(y)), RestrictionType)
1946
1947 - def split(self, *classes, **bool):
1948 """B.split(class, [class.__name__ = True]) -> new RestrictionBatch.
1949
1950 it works but it is slow, so it has really an interest when splitting
1951 over multiple conditions."""
1952 def splittest(element):
1953 for klass in classes:
1954 b = bool.get(klass.__name__, True)
1955 if issubclass(element, klass):
1956 if b:
1957 continue
1958 else:
1959 return False
1960 elif b:
1961 return False
1962 else:
1963 continue
1964 return True
1965 d = [k for k in itertools.ifilter(splittest, self)]
1966 new = RestrictionBatch()
1967 new._data = dict(map(None, d, [True]*len(d)))
1968 return new
1969
1971 """B.elements() -> tuple.
1972
1973 give all the names of the enzymes in B sorted alphabetically."""
1974 l = [str(e) for e in self]
1975 l.sort()
1976 return l
1977
1979 """B.as_string() -> list.
1980
1981 return a list of the name of the elements of B."""
1982 return [str(e) for e in self]
1983
1985 """B.suppl_codes() -> dict
1986
1987 letter code for the suppliers"""
1988 supply = dict([(k,v[0]) for k,v in suppliers_dict.iteritems()])
1989 return supply
1990 suppl_codes = classmethod(suppl_codes)
1991
1993 "B.show_codes() -> letter codes for the suppliers"""
1994 supply = [' = '.join(i) for i in self.suppl_codes().iteritems()]
1995 print '\n'.join(supply)
1996 return
1997 show_codes = classmethod(show_codes)
1998
1999 - def search(self, dna, linear=True):
2000 """B.search(dna) -> dict."""
2001
2002
2003
2004
2005 if not hasattr(self, "already_mapped") :
2006
2007
2008 self.already_mapped = None
2009 if isinstance(dna, DNA):
2010
2011
2012
2013
2014 if (str(dna), linear) == self.already_mapped:
2015 return self.mapping
2016 else:
2017 self.already_mapped = str(dna), linear
2018 fseq = FormattedSeq(dna, linear)
2019 self.mapping = dict([(x, x.search(fseq)) for x in self])
2020 return self.mapping
2021 elif isinstance(dna, FormattedSeq):
2022 if (str(dna), dna.linear) == self.already_mapped:
2023 return self.mapping
2024 else:
2025 self.already_mapped = str(dna), dna.linear
2026 self.mapping = dict([(x, x.search(dna)) for x in self])
2027 return self.mapping
2028 raise TypeError("Expected Seq or MutableSeq instance, got %s instead"\
2029 %type(dna))
2030
2031
2032
2033
2034
2035
2036
2037 -class Analysis(RestrictionBatch, PrintFormat):
2038
2041 """Analysis([restrictionbatch [, sequence] linear=True]) -> New Analysis class.
2042
2043 For most of the method of this class if a dictionary is given it will
2044 be used as the base to calculate the results.
2045 If no dictionary is given a new analysis using the Restriction Batch
2046 which has been given when the Analysis class has been instantiated."""
2047 RestrictionBatch.__init__(self, restrictionbatch)
2048 self.rb = restrictionbatch
2049 self.sequence = sequence
2050 self.linear = linear
2051 if self.sequence:
2052 self.search(self.sequence, self.linear)
2053
2055 return 'Analysis(%s,%s,%s)'%\
2056 (repr(self.rb),repr(self.sequence),self.linear)
2057
2059 """A._sub_set(other_set) -> dict.
2060
2061 Internal use only.
2062
2063 screen the results through wanted set.
2064 Keep only the results for which the enzymes is in wanted set.
2065 """
2066 return dict([(k,v) for k,v in self.mapping.iteritems() if k in wanted])
2067
2069 """A._boundaries(start, end) -> tuple.
2070
2071 Format the boundaries for use with the methods that limit the
2072 search to only part of the sequence given to analyse.
2073 """
2074 if not isinstance(start, int):
2075 raise TypeError('expected int, got %s instead' % type(start))
2076 if not isinstance(end, int):
2077 raise TypeError('expected int, got %s instead' % type(end))
2078 if start < 1:
2079 start += len(self.sequence)
2080 if end < 1:
2081 end += len(self.sequence)
2082 if start < end:
2083 pass
2084 else:
2085 start, end == end, start
2086 if start < 1:
2087 start == 1
2088 if start < end:
2089 return start, end, self._test_normal
2090 else:
2091 return start, end, self._test_reverse
2092
2094 """A._test_normal(start, end, site) -> bool.
2095
2096 Internal use only
2097 Test if site is in between start and end.
2098 """
2099 return start <= site < end
2100
2102 """A._test_reverse(start, end, site) -> bool.
2103
2104 Internal use only
2105 Test if site is in between end and start (for circular sequences).
2106 """
2107 return start <= site <= len(self.sequence) or 1 <= site < end
2108
2109 - def print_that(self, dct=None, title='', s1=''):
2110 """A.print_that([dct[, title[, s1]]]) -> print the results from dct.
2111
2112 If dct is not given the full dictionary is used.
2113 """
2114 if not dct:
2115 dct = self.mapping
2116 print
2117 return PrintFormat.print_that(self, dct, title, s1)
2118
2120 """A.change(**attribute_name) -> Change attribute of Analysis.
2121
2122 It is possible to change the width of the shell by setting
2123 self.ConsoleWidth to what you want.
2124 self.NameWidth refer to the maximal length of the enzyme name.
2125
2126 Changing one of these parameters here might not give the results
2127 you expect. In which case, you can settle back to a 80 columns shell
2128 or try to change self.Cmodulo and self.PrefWidth in PrintFormat until
2129 you get it right."""
2130 for k,v in what.iteritems():
2131 if k in ('NameWidth', 'ConsoleWidth'):
2132 setattr(self, k, v)
2133 self.Cmodulo = self.ConsoleWidth % self.NameWidth
2134 self.PrefWidth = self.ConsoleWidth - self.Cmodulo
2135 elif k is 'sequence':
2136 setattr(self, 'sequence', v)
2137 self.search(self.sequence, self.linear)
2138 elif k is 'rb':
2139 self = Analysis.__init__(self, v, self.sequence, self.linear)
2140 elif k is 'linear':
2141 setattr(self, 'linear', v)
2142 self.search(self.sequence, v)
2143 elif k in ('Indent', 'Maxsize'):
2144 setattr(self, k, v)
2145 elif k in ('Cmodulo', 'PrefWidth'):
2146 raise AttributeError( \
2147 'To change %s, change NameWidth and/or ConsoleWidth' \
2148 % name)
2149 else:
2150 raise AttributeError( \
2151 'Analysis has no attribute %s' % name)
2152 return
2153
2154 - def full(self, linear=True):
2155 """A.full() -> dict.
2156
2157 Full Restriction Map of the sequence."""
2158 return self.mapping
2159
2160 - def blunt(self, dct = None):
2161 """A.blunt([dct]) -> dict.
2162
2163 Only the enzymes which have a 3'overhang restriction site."""
2164 if not dct:
2165 dct = self.mapping
2166 return dict([(k,v) for k,v in dct.iteritems() if k.is_blunt()])
2167
2169 """A.overhang5([dct]) -> dict.
2170
2171 Only the enzymes which have a 5' overhang restriction site."""
2172 if not dct:
2173 dct = self.mapping
2174 return dict([(k,v) for k,v in dct.iteritems() if k.is_5overhang()])
2175
2176
2178 """A.Overhang3([dct]) -> dict.
2179
2180 Only the enzymes which have a 3'overhang restriction site."""
2181 if not dct:
2182 dct = self.mapping
2183 return dict([(k,v) for k,v in dct.iteritems() if k.is_3overhang()])
2184
2185
2187 """A.defined([dct]) -> dict.
2188
2189 Only the enzymes that have a defined restriction site in Rebase."""
2190 if not dct:
2191 dct = self.mapping
2192 return dict([(k,v) for k,v in dct.iteritems() if k.is_defined()])
2193
2195 """A.with_sites([dct]) -> dict.
2196
2197 Enzymes which have at least one site in the sequence."""
2198 if not dct:
2199 dct = self.mapping
2200 return dict([(k,v) for k,v in dct.iteritems() if v])
2201
2203 """A.without_site([dct]) -> dict.
2204
2205 Enzymes which have no site in the sequence."""
2206 if not dct:
2207 dct = self.mapping
2208 return dict([(k,v) for k,v in dct.iteritems() if not v])
2209
2211 """A.With_N_Sites(N [, dct]) -> dict.
2212
2213 Enzymes which cut N times the sequence."""
2214 if not dct:
2215 dct = self.mapping
2216 return dict([(k,v) for k,v in dct.iteritems()if len(v) == N])
2217
2219 if not dct:
2220 dct = self.mapping
2221 return dict([(k,v) for k,v in dct.iteritems() if len(v) in list])
2222
2224 """A.with_name(list_of_names [, dct]) ->
2225
2226 Limit the search to the enzymes named in list_of_names."""
2227 for i, enzyme in enumerate(names):
2228 if not enzyme in AllEnzymes:
2229 print "no datas for the enzyme:", str(name)
2230 del names[i]
2231 if not dct:
2232 return RestrictionBatch(names).search(self.sequence)
2233 return dict([(n, dct[n]) for n in names if n in dct])
2234
2236 """A.with_site_size(site_size [, dct]) ->
2237
2238 Limit the search to the enzymes whose site is of size <site_size>."""
2239 sites = [name for name in self if name.size == site_size]
2240 if not dct:
2241 return RestrictionBatch(sites).search(self.sequence)
2242 return dict([(k,v) for k,v in dct.iteritems() if k in site_size])
2243
2245 """A.only_between(start, end[, dct]) -> dict.
2246
2247 Enzymes that cut the sequence only in between start and end."""
2248 start, end, test = self._boundaries(start, end)
2249 if not dct:
2250 dct = self.mapping
2251 d = dict(dct)
2252 for key, sites in dct.iteritems():
2253 if not sites:
2254 del d[key]
2255 continue
2256 for site in sites:
2257 if test(start, end, site):
2258 continue
2259 else:
2260 del d[key]
2261 break
2262 return d
2263
2264 - def between(self, start, end, dct=None):
2265 """A.between(start, end [, dct]) -> dict.
2266
2267 Enzymes that cut the sequence at least in between start and end.
2268 They may cut outside as well."""
2269 start, end, test = self._boundaries(start, end)
2270 d = {}
2271 if not dct:
2272 dct = self.mapping
2273 for key, sites in dct.iteritems():
2274 for site in sites:
2275 if test(start, end, site):
2276 d[key] = sites
2277 break
2278 continue
2279 return d
2280
2282 """A.show_only_between(start, end [, dct]) -> dict.
2283
2284 Enzymes that cut the sequence outside of the region
2285 in between start and end but do not cut inside."""
2286 d = []
2287 if start <= end:
2288 d = [(k, [vv for vv in v if start<=vv<=end])
2289 for v in self.between(start, end, dct)]
2290 else:
2291 d = [(k, [vv for vv in v if start<=vv or vv <= end])
2292 for v in self.between(start, end, dct)]
2293 return dict(d)
2294
2296 """A.only_outside(start, end [, dct]) -> dict.
2297
2298 Enzymes that cut the sequence outside of the region
2299 in between start and end but do not cut inside."""
2300 start, end, test = self._boundaries(start, end)
2301 if not dct : dct = self.mapping
2302 d = dict(dct)
2303 for key, sites in dct.iteritems():
2304 if not sites:
2305 del d[key]
2306 continue
2307 for site in sites:
2308 if test(start, end, site):
2309 del d[key]
2310 break
2311 else:
2312 continue
2313 return d
2314
2315 - def outside(self, start, end, dct=None):
2316 """A.outside((start, end [, dct]) -> dict.
2317
2318 Enzymes that cut outside the region in between start and end.
2319 No test is made to know if they cut or not inside this region."""
2320 start, end, test = self._boundaries(start, end)
2321 if not dct:
2322 dct = self.mapping
2323 d = {}
2324 for key, sites in dct.iteritems():
2325 for site in sites:
2326 if test(start, end, site):
2327 continue
2328 else:
2329 d[key] = sites
2330 break
2331 return d
2332
2333
2335 """A.do_not_cut(start, end [, dct]) -> dict.
2336
2337 Enzymes that do not cut the region in between start and end."""
2338 if not dct:
2339 dct = self.mapping
2340 d = self.without_site()
2341 d.update(self.only_outside(start, end, dct))
2342 return d
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366 CommOnly = RestrictionBatch()
2367 NonComm = RestrictionBatch()
2368 for TYPE, (bases, enzymes) in typedict.iteritems():
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386 bases = tuple([eval(x) for x in bases])
2387
2388
2389
2390
2391 T = type.__new__(RestrictionType, 'RestrictionType', bases, {})
2392 for k in enzymes:
2393
2394
2395
2396
2397
2398 newenz = T(k, bases, enzymedict[k])
2399
2400
2401
2402
2403
2404 if newenz.is_comm() : CommOnly.add_nocheck(newenz)
2405 else : NonComm.add_nocheck(newenz)
2406
2407
2408
2409 AllEnzymes = CommOnly | NonComm
2410
2411
2412
2413 names = [str(x) for x in AllEnzymes]
2414 locals().update(dict(map(None, names, AllEnzymes)))
2415 __all__=['FormattedSeq', 'Analysis', 'RestrictionBatch','AllEnzymes','CommOnly','NonComm']+names
2416 del k, x, enzymes, TYPE, bases, names
2417