Package Bio :: Package Restriction :: Module Restriction
[hide private]
[frames] | no frames]

Source Code for Module Bio.Restriction.Restriction

   1  #!/usr/bin/env python 
   2  # 
   3  #      Restriction Analysis Libraries. 
   4  #      Copyright (C) 2004. Frederic Sohm. 
   5  # 
   6  # This code is part of the Biopython distribution and governed by its 
   7  # license.  Please see the LICENSE file that should have been included 
   8  # as part of this package. 
   9  # 
  10   
  11  """ Notes about the diverses class of the restriction enzyme implementation. 
  12   
  13          RestrictionType is the type of all restriction enzymes. 
  14      ---------------------------------------------------------------------------- 
  15          AbstractCut implements some methods that are common to all enzymes. 
  16      ---------------------------------------------------------------------------- 
  17          NoCut, OneCut,TwoCuts   represent the number of double strand cuts 
  18                                  produced by the enzyme. 
  19                                  they correspond to the 4th field of the rebase 
  20                                  record emboss_e.NNN. 
  21                  0->NoCut    : the enzyme is not characterised. 
  22                  2->OneCut   : the enzyme produce one double strand cut. 
  23                  4->TwoCuts  : two double strand cuts. 
  24      ---------------------------------------------------------------------------- 
  25          Meth_Dep, Meth_Undep    represent the methylation susceptibility to 
  26                                  the enzyme. 
  27                                  Not implemented yet. 
  28      ---------------------------------------------------------------------------- 
  29          Palindromic,            if the site is palindromic or not. 
  30          NotPalindromic          allow some optimisations of the code. 
  31                                  No need to check the reverse strand 
  32                                  with palindromic sites. 
  33      ----------------------------------------------------------------------------                                     
  34          Unknown, Blunt,         represent the overhang. 
  35          Ov5, Ov3                Unknown is here for symetry reasons and 
  36                                  correspond to enzymes that are not characterised 
  37                                  in rebase. 
  38      ---------------------------------------------------------------------------- 
  39          Defined, Ambiguous,     represent the sequence of the overhang. 
  40          NotDefined              
  41                                  NotDefined is for enzymes not characterised in 
  42                                  rebase. 
  43                                   
  44                                  Defined correspond to enzymes that display a 
  45                                  constant overhang whatever the sequence. 
  46                                  ex : EcoRI. G^AATTC -> overhang :AATT 
  47                                              CTTAA^G 
  48   
  49                                  Ambiguous : the overhang varies with the 
  50                                  sequence restricted. 
  51                                  Typically enzymes which cut outside their 
  52                                  restriction site or (but not always) 
  53                                  inside an ambiguous site. 
  54                                  ex: 
  55                                  AcuI CTGAAG(22/20)  -> overhang : NN 
  56                                  AasI GACNNN^NNNGTC  -> overhang : NN 
  57                                       CTGN^NNNNNCAG 
  58   
  59              note : these 3 classes refers to the overhang not the site. 
  60                 So the enzyme ApoI (RAATTY) is defined even if its restriction 
  61                 site is ambiguous. 
  62                                   
  63                      ApoI R^AATTY -> overhang : AATT -> Defined 
  64                           YTTAA^R 
  65                 Accordingly, blunt enzymes are always Defined even 
  66                 when they cut outside their restriction site. 
  67      ---------------------------------------------------------------------------- 
  68          Not_available,          as found in rebase file emboss_r.NNN files. 
  69          Commercially_available 
  70                                  allow the selection of the enzymes according to 
  71                                  their suppliers to reduce the quantity 
  72                                  of results. 
  73                                  Also will allow the implementation of buffer 
  74                                  compatibility tables. Not implemented yet. 
  75   
  76                                  the list of suppliers is extracted from 
  77                                  emboss_s.NNN 
  78      ---------------------------------------------------------------------------- 
  79          """ 
  80   
  81  import re 
  82  import itertools 
  83   
  84  from Bio.Seq import Seq, MutableSeq 
  85  from Bio.Alphabet import IUPAC 
  86   
  87  from Bio.Restriction.Restriction_Dictionary import rest_dict as enzymedict 
  88  from Bio.Restriction.Restriction_Dictionary import typedict 
  89  from Bio.Restriction.Restriction_Dictionary import suppliers as suppliers_dict 
  90  from Bio.Restriction.RanaConfig import * 
  91  from Bio.Restriction.PrintFormat import PrintFormat 
  92   
  93  #Used to use Bio.Restriction.DNAUtils.check_bases (and expose it under this 
  94  #namespace), but have deprecated that module. 
95 -def _check_bases(seq_string):
96 """Check characters in a string (PRIVATE). 97 98 Remove digits and white space present in string. Allows any valid ambiguous 99 IUPAC DNA single letters codes (ABCDGHKMNRSTVWY, lower case are converted). 100 101 Other characters (e.g. symbols) trigger a TypeError. 102 103 Returns the string WITH A LEADING SPACE (!). This is for backwards 104 compatibility, and may in part be explained by the fact that 105 Bio.Restriction doesn't use zero based counting. 106 """ 107 #Remove white space and make upper case: 108 seq_string = "".join(seq_string.split()).upper() 109 #Remove digits 110 for c in "0123456789" : seq_string = seq_string.replace(c,"") 111 #Check only allowed IUPAC letters 112 if not set(seq_string).issubset(set("ABCDGHKMNRSTVWY")) : 113 raise TypeError("Invalid character found in %s" % repr(seq_string)) 114 return " " + seq_string
115
116 -def check_bases(seq_string):
117 """Check characters in a string (DEPRECATED).""" 118 import warnings 119 warnings.warn("The check_bases function has been deprecated, and will be" 120 "removed in a future release of Biopython.", DeprecationWarning) 121 return _check_bases(seq_string)
122 123 matching = {'A' : 'ARWMHVDN', 'C' : 'CYSMHBVN', 'G' : 'GRSKBVDN', 124 'T' : 'TYWKHBDN', 'R' : 'ABDGHKMNSRWV', 'Y' : 'CBDHKMNSTWVY', 125 'W' : 'ABDHKMNRTWVY', 'S' : 'CBDGHKMNSRVY', 'M' : 'ACBDHMNSRWVY', 126 'K' : 'BDGHKNSRTWVY', 'H' : 'ACBDHKMNSRTWVY', 127 'B' : 'CBDGHKMNSRTWVY', 'V' : 'ACBDGHKMNSRWVY', 128 'D' : 'ABDGHKMNSRTWVY', 'N' : 'ACBDGHKMNSRTWVY'} 129 130 DNA = Seq 131
132 -class FormattedSeq(object):
133 """FormattedSeq(seq, [linear=True])-> new FormattedSeq. 134 135 Translate a Bio.Seq into a formatted sequence to be used with Restriction. 136 137 Roughly: 138 remove anything which is not IUPAC alphabet and then add a space 139 in front of the sequence to get a biological index instead of a 140 python index (i.e. index of the first base is 1 not 0). 141 142 Retains information about the shape of the molecule linear (default) 143 or circular. Restriction sites are search over the edges of circular 144 sequence.""" 145
146 - def __init__(self, seq, linear = True):
147 """FormattedSeq(seq, [linear=True])-> new FormattedSeq. 148 149 seq is either a Bio.Seq, Bio.MutableSeq or a FormattedSeq. 150 if seq is a FormattedSeq, linear will have no effect on the 151 shape of the sequence.""" 152 if isinstance(seq, Seq) or isinstance(seq, MutableSeq): 153 stringy = seq.tostring() 154 self.lower = stringy.islower() 155 #Note this adds a leading space to the sequence (!) 156 self.data = _check_bases(stringy) 157 self.linear = linear 158 self.klass = seq.__class__ 159 self.alphabet = seq.alphabet 160 elif isinstance(seq, FormattedSeq): 161 self.lower = seq.lower 162 self.data = seq.data 163 self.linear = seq.linear 164 self.alphabet = seq.alphabet 165 self.klass = seq.klass 166 else: 167 raise TypeError('expected Seq or MutableSeq, got %s' % type(seq))
168
169 - def __len__(self):
170 return len(self.data) - 1
171
172 - def __repr__(self):
173 return 'FormattedSeq(%s, linear=%s)' %(repr(self[1:]), repr(self.linear))
174
175 - def __eq__(self, other):
176 if isinstance(other, FormattedSeq): 177 if repr(self) == repr(other): 178 return True 179 else: 180 return False 181 return False
182
183 - def circularise(self):
184 """FS.circularise() -> circularise FS""" 185 self.linear = False 186 return
187
188 - def linearise(self):
189 """FS.linearise() -> linearise FS""" 190 self.linear = True 191 return
192
193 - def to_linear(self):
194 """FS.to_linear() -> new linear FS instance""" 195 new = self.__class__(self) 196 new.linear = True 197 return new
198
199 - def to_circular(self):
200 """FS.to_circular() -> new circular FS instance""" 201 new = self.__class__(self) 202 new.linear = False 203 return new
204
205 - def is_linear(self):
206 """FS.is_linear() -> bool. 207 208 True if the sequence will analysed as a linear sequence.""" 209 return self.linear
210
211 - def finditer(self, pattern, size):
212 """FS.finditer(pattern, size) -> list. 213 214 return a list of pattern into the sequence. 215 the list is made of tuple (location, pattern.group). 216 the latter is used with non palindromic sites. 217 pattern is the regular expression pattern corresponding to the 218 enzyme restriction site. 219 size is the size of the restriction enzyme recognition-site size.""" 220 if self.is_linear(): 221 data = self.data 222 else: 223 data = self.data + self.data[1:size] 224 return [(i.start(), i.group) for i in re.finditer(pattern, data)]
225
226 - def __getitem__(self, i):
227 if self.lower: 228 return self.klass((self.data[i]).lower(), self.alphabet) 229 return self.klass(self.data[i], self.alphabet)
230 231
232 -class RestrictionType(type):
233 """RestrictionType. Type from which derives all enzyme classes. 234 235 Implement the operator methods.""" 236
237 - def __init__(cls, name='', bases=(), dct={}):
238 """RE(name, bases, dct) -> RestrictionType instance. 239 240 Not intended to be used in normal operation. The enzymes are 241 instantiated when importing the module. 242 243 see below.""" 244 if "-" in name : 245 raise ValueError("Problem with hyphen in %s as enzyme name" \ 246 % repr(name)) 247 super(RestrictionType, cls).__init__(cls, name, bases, dct) 248 try : 249 cls.compsite = re.compile(cls.compsite) 250 except Exception, err : 251 raise ValueError("Problem with regular expression, re.compiled(%s)" \ 252 % repr(cls.compsite))
253
254 - def __add__(cls, other):
255 """RE.__add__(other) -> RestrictionBatch(). 256 257 if other is an enzyme returns a batch of the two enzymes. 258 if other is already a RestrictionBatch add enzyme to it.""" 259 if isinstance(other, RestrictionType): 260 return RestrictionBatch([cls, other]) 261 elif isinstance(other, RestrictionBatch): 262 return other.add_nocheck(cls) 263 else: 264 raise TypeError
265
266 - def __div__(cls, other):
267 """RE.__div__(other) -> list. 268 269 RE/other 270 returns RE.search(other).""" 271 return cls.search(other)
272
273 - def __rdiv__(cls, other):
274 """RE.__rdiv__(other) -> list. 275 276 other/RE 277 returns RE.search(other).""" 278 return cls.search(other)
279
280 - def __truediv__(cls, other):
281 """RE.__truediv__(other) -> list. 282 283 RE/other 284 returns RE.search(other).""" 285 return cls.search(other)
286
287 - def __rtruediv__(cls, other):
288 """RE.__rtruediv__(other) -> list. 289 290 other/RE 291 returns RE.search(other).""" 292 return cls.search(other)
293
294 - def __floordiv__(cls, other):
295 """RE.__floordiv__(other) -> list. 296 297 RE//other 298 returns RE.catalyse(other).""" 299 return cls.catalyse(other)
300
301 - def __rfloordiv__(cls, other):
302 """RE.__rfloordiv__(other) -> list. 303 304 other//RE 305 returns RE.catalyse(other).""" 306 return cls.catalyse(other)
307
308 - def __str__(cls):
309 """RE.__str__() -> str. 310 311 return the name of the enzyme.""" 312 return cls.__name__
313
314 - def __repr__(cls):
315 """RE.__repr__() -> str. 316 317 used with eval or exec will instantiate the enzyme.""" 318 return "%s" % cls.__name__
319
320 - def __len__(cls):
321 """RE.__len__() -> int. 322 323 length of the recognition site.""" 324 return cls.size
325
326 - def __eq__(cls, other):
327 """RE == other -> bool 328 329 True if RE and other are the same enzyme.""" 330 return other is cls
331
332 - def __ne__(cls, other):
333 """RE != other -> bool. 334 isoschizomer strict, same recognition site, same restriction -> False 335 all the other-> True""" 336 if not isinstance(other, RestrictionType): 337 return True 338 elif cls.charac == other.charac: 339 return False 340 else: 341 return True
342
343 - def __rshift__(cls, other):
344 """RE >> other -> bool. 345 346 neoschizomer : same recognition site, different restriction. -> True 347 all the others : -> False""" 348 if not isinstance(other, RestrictionType): 349 return False 350 elif cls.site == other.site and cls.charac != other.charac: 351 return True 352 else: 353 return False
354
355 - def __mod__(cls, other):
356 """a % b -> bool. 357 358 Test compatibility of the overhang of a and b. 359 True if a and b have compatible overhang.""" 360 if not isinstance(other, RestrictionType): 361 raise TypeError( \ 362 'expected RestrictionType, got %s instead' % type(other)) 363 return cls._mod1(other)
364
365 - def __ge__(cls, other):
366 """a >= b -> bool. 367 368 a is greater or equal than b if the a site is longer than b site. 369 if their site have the same length sort by alphabetical order of their 370 names.""" 371 if not isinstance(other, RestrictionType): 372 raise NotImplementedError 373 if len(cls) > len(other): 374 return True 375 elif cls.size == len(other) and cls.__name__ >= other.__name__: 376 return True 377 else: 378 return False
379
380 - def __gt__(cls, other):
381 """a > b -> bool. 382 383 sorting order: 384 1. size of the recognition site. 385 2. if equal size, alphabetical order of the names.""" 386 if not isinstance(other, RestrictionType): 387 raise NotImplementedError 388 if len(cls) > len(other): 389 return True 390 elif cls.size == len(other) and cls.__name__ > other.__name__: 391 return True 392 else: 393 return False
394
395 - def __le__(cls, other):
396 """a <= b -> bool. 397 398 sorting order: 399 1. size of the recognition site. 400 2. if equal size, alphabetical order of the names.""" 401 if not isinstance(other, RestrictionType): 402 raise NotImplementedError 403 elif len(cls) < len(other): 404 return True 405 elif len(cls) == len(other) and cls.__name__ <= other.__name__: 406 return True 407 else: 408 return False
409
410 - def __lt__(cls, other):
411 """a < b -> bool. 412 413 sorting order: 414 1. size of the recognition site. 415 2. if equal size, alphabetical order of the names.""" 416 if not isinstance(other, RestrictionType): 417 raise NotImplementedError 418 elif len(cls) < len(other): 419 return True 420 elif len(cls) == len(other) and cls.__name__ < other.__name__: 421 return True 422 else: 423 return False
424 425
426 -class AbstractCut(RestrictionType):
427 """Implement the methods that are common to all restriction enzymes. 428 429 All the methods are classmethod. 430 431 For internal use only. Not meant to be instantiate.""" 432
433 - def search(cls, dna, linear=True):
434 """RE.search(dna, linear=True) -> list. 435 436 return a list of all the site of RE in dna. Compensate for circular 437 sequences and so on. 438 439 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 440 441 if linear is False, the restriction sites than span over the boundaries 442 will be included. 443 444 The positions are the first base of the 3' fragment, 445 i.e. the first base after the position the enzyme will cut. """ 446 # 447 # Separating search from _search allow a (very limited) optimisation 448 # of the search when using a batch of restriction enzymes. 449 # in this case the DNA is tested once by the class which implements 450 # the batch instead of being tested by each enzyme single. 451 # see RestrictionBatch.search() for example. 452 # 453 if isinstance(dna, FormattedSeq): 454 cls.dna = dna 455 return cls._search() 456 else : 457 cls.dna = FormattedSeq(dna, linear) 458 return cls._search()
459 search = classmethod(search) 460
461 - def all_suppliers(self):
462 """RE.all_suppliers -> print all the suppliers of R""" 463 supply = [x[0] for x in suppliers_dict.itervalues()] 464 supply.sort() 465 print ",\n".join(supply) 466 return
467 all_suppliers = classmethod(all_suppliers) 468
469 - def is_equischizomer(self, other):
470 """RE.is_equischizomers(other) -> bool. 471 472 True if other is an isoschizomer of RE. 473 False else. 474 475 equischizomer <=> same site, same position of restriction.""" 476 return not self != other
477 is_equischizomer = classmethod(is_equischizomer) 478
479 - def is_neoschizomer(self, other):
480 """RE.is_neoschizomers(other) -> bool. 481 482 True if other is an isoschizomer of RE. 483 False else. 484 485 neoschizomer <=> same site, different position of restriction.""" 486 return self >> other
487 is_neoschizomer = classmethod(is_neoschizomer) 488
489 - def is_isoschizomer(self, other):
490 """RE.is_isoschizomers(other) -> bool. 491 492 True if other is an isoschizomer of RE. 493 False else. 494 495 isoschizomer <=> same site.""" 496 return (not self != other) or self >> other
497 is_isoschizomer = classmethod(is_isoschizomer) 498
499 - def equischizomers(self, batch=None):
500 """RE.equischizomers([batch]) -> list. 501 502 return a tuple of all the isoschizomers of RE. 503 if batch is supplied it is used instead of the default AllEnzymes. 504 505 equischizomer <=> same site, same position of restriction.""" 506 if not batch : batch = AllEnzymes 507 r = [x for x in batch if not self != x] 508 i = r.index(self) 509 del r[i] 510 r.sort() 511 return r
512 equischizomers = classmethod(equischizomers) 513
514 - def neoschizomers(self, batch=None):
515 """RE.neoschizomers([batch]) -> list. 516 517 return a tuple of all the neoschizomers of RE. 518 if batch is supplied it is used instead of the default AllEnzymes. 519 520 neoschizomer <=> same site, different position of restriction.""" 521 if not batch : batch = AllEnzymes 522 r = [x for x in batch if self >> x] 523 r.sort() 524 return r
525 neoschizomers = classmethod(neoschizomers) 526
527 - def isoschizomers(self, batch=None):
528 """RE.isoschizomers([batch]) -> list. 529 530 return a tuple of all the equischizomers and neoschizomers of RE. 531 if batch is supplied it is used instead of the default AllEnzymes.""" 532 if not batch : batch = AllEnzymes 533 r = [x for x in batch if (self >> x) or (not self != x)] 534 i = r.index(self) 535 del r[i] 536 r.sort() 537 return r
538 isoschizomers = classmethod(isoschizomers) 539
540 - def frequency(self):
541 """RE.frequency() -> int. 542 543 frequency of the site.""" 544 return self.freq
545 frequency = classmethod(frequency)
546 547
548 -class NoCut(AbstractCut):
549 """Implement the methods specific to the enzymes that do not cut. 550 551 These enzymes are generally enzymes that have been only partially 552 characterised and the way they cut the DNA is unknow or enzymes for 553 which the pattern of cut is to complex to be recorded in Rebase 554 (ncuts values of 0 in emboss_e.###). 555 556 When using search() with these enzymes the values returned are at the start of 557 the restriction site. 558 559 Their catalyse() method returns a TypeError. 560 561 Unknown and NotDefined are also part of the base classes of these enzymes. 562 563 Internal use only. Not meant to be instantiated.""" 564
565 - def cut_once(self):
566 """RE.cut_once() -> bool. 567 568 True if the enzyme cut the sequence one time on each strand.""" 569 return False
570 cut_once = classmethod(cut_once) 571
572 - def cut_twice(self):
573 """RE.cut_twice() -> bool. 574 575 True if the enzyme cut the sequence twice on each strand.""" 576 return False
577 cut_twice = classmethod(cut_twice) 578
579 - def _modify(self, location):
580 """RE._modify(location) -> int. 581 582 for internal use only. 583 584 location is an integer corresponding to the location of the match for 585 the enzyme pattern in the sequence. 586 _modify returns the real place where the enzyme will cut. 587 588 example: 589 EcoRI pattern : GAATTC 590 EcoRI will cut after the G. 591 so in the sequence: 592 ______ 593 GAATACACGGAATTCGA 594 | 595 10 596 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base 597 EcoRI cut after the G so: 598 EcoRI._modify(10) -> 11. 599 600 if the enzyme cut twice _modify will returns two integer corresponding 601 to each cutting site. 602 """ 603 yield location
604 _modify = classmethod(_modify) 605
606 - def _rev_modify(self, location):
607 """RE._rev_modify(location) -> generator of int. 608 609 for internal use only. 610 611 as _modify for site situated on the antiparallel strand when the 612 enzyme is not palindromic 613 """ 614 yield location
615 _rev_modify = classmethod(_rev_modify) 616
617 - def characteristic(self):
618 """RE.characteristic() -> tuple. 619 620 the tuple contains the attributes: 621 fst5 -> first 5' cut ((current strand) or None 622 fst3 -> first 3' cut (complementary strand) or None 623 scd5 -> second 5' cut (current strand) or None 624 scd5 -> second 3' cut (complementary strand) or None 625 site -> recognition site.""" 626 return None, None, None, None, self.site
627 characteristic = classmethod(characteristic)
628
629 -class OneCut(AbstractCut):
630 """Implement the methods specific to the enzymes that cut the DNA only once 631 632 Correspond to ncuts values of 2 in emboss_e.### 633 634 Internal use only. Not meant to be instantiated.""" 635
636 - def cut_once(self):
637 """RE.cut_once() -> bool. 638 639 True if the enzyme cut the sequence one time on each strand.""" 640 return True
641 cut_once = classmethod(cut_once) 642
643 - def cut_twice(self):
644 """RE.cut_twice() -> bool. 645 646 True if the enzyme cut the sequence twice on each strand.""" 647 return False
648 cut_twice = classmethod(cut_twice) 649
650 - def _modify(self, location):
651 """RE._modify(location) -> int. 652 653 for internal use only. 654 655 location is an integer corresponding to the location of the match for 656 the enzyme pattern in the sequence. 657 _modify returns the real place where the enzyme will cut. 658 659 example: 660 EcoRI pattern : GAATTC 661 EcoRI will cut after the G. 662 so in the sequence: 663 ______ 664 GAATACACGGAATTCGA 665 | 666 10 667 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base 668 EcoRI cut after the G so: 669 EcoRI._modify(10) -> 11. 670 671 if the enzyme cut twice _modify will returns two integer corresponding 672 to each cutting site. 673 """ 674 yield location + self.fst5
675 _modify = classmethod(_modify) 676
677 - def _rev_modify(self, location):
678 """RE._rev_modify(location) -> generator of int. 679 680 for internal use only. 681 682 as _modify for site situated on the antiparallel strand when the 683 enzyme is not palindromic 684 """ 685 yield location - self.fst3
686 _rev_modify = classmethod(_rev_modify) 687
688 - def characteristic(self):
689 """RE.characteristic() -> tuple. 690 691 the tuple contains the attributes: 692 fst5 -> first 5' cut ((current strand) or None 693 fst3 -> first 3' cut (complementary strand) or None 694 scd5 -> second 5' cut (current strand) or None 695 scd5 -> second 3' cut (complementary strand) or None 696 site -> recognition site.""" 697 return self.fst5, self.fst3, None, None, self.site
698 characteristic = classmethod(characteristic)
699 700
701 -class TwoCuts(AbstractCut):
702 """Implement the methods specific to the enzymes that cut the DNA twice 703 704 Correspond to ncuts values of 4 in emboss_e.### 705 706 Internal use only. Not meant to be instantiated.""" 707
708 - def cut_once(self):
709 """RE.cut_once() -> bool. 710 711 True if the enzyme cut the sequence one time on each strand.""" 712 return False
713 cut_once = classmethod(cut_once) 714
715 - def cut_twice(self):
716 """RE.cut_twice() -> bool. 717 718 True if the enzyme cut the sequence twice on each strand.""" 719 return True
720 cut_twice = classmethod(cut_twice) 721
722 - def _modify(self, location):
723 """RE._modify(location) -> int. 724 725 for internal use only. 726 727 location is an integer corresponding to the location of the match for 728 the enzyme pattern in the sequence. 729 _modify returns the real place where the enzyme will cut. 730 731 example: 732 EcoRI pattern : GAATTC 733 EcoRI will cut after the G. 734 so in the sequence: 735 ______ 736 GAATACACGGAATTCGA 737 | 738 10 739 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base 740 EcoRI cut after the G so: 741 EcoRI._modify(10) -> 11. 742 743 if the enzyme cut twice _modify will returns two integer corresponding 744 to each cutting site. 745 """ 746 yield location + self.fst5 747 yield location + self.scd5
748 _modify = classmethod(_modify) 749
750 - def _rev_modify(self, location):
751 """RE._rev_modify(location) -> generator of int. 752 753 for internal use only. 754 755 as _modify for site situated on the antiparallel strand when the 756 enzyme is not palindromic 757 """ 758 yield location - self.fst3 759 yield location - self.scd3
760 _rev_modify = classmethod(_rev_modify) 761
762 - def characteristic(self):
763 """RE.characteristic() -> tuple. 764 765 the tuple contains the attributes: 766 fst5 -> first 5' cut ((current strand) or None 767 fst3 -> first 3' cut (complementary strand) or None 768 scd5 -> second 5' cut (current strand) or None 769 scd5 -> second 3' cut (complementary strand) or None 770 site -> recognition site.""" 771 return self.fst5, self.fst3, self.scd5, self.scd3, self.site
772 characteristic = classmethod(characteristic)
773 774
775 -class Meth_Dep(AbstractCut):
776 """Implement the information about methylation. 777 778 Enzymes of this class possess a site which is methylable.""" 779
780 - def is_methylable(self):
781 """RE.is_methylable() -> bool. 782 783 True if the recognition site is a methylable.""" 784 return True
785 is_methylable = classmethod(is_methylable)
786
787 -class Meth_Undep(AbstractCut):
788 """Implement informations about methylation sensitibility. 789 790 Enzymes of this class are not sensible to methylation.""" 791
792 - def is_methylable(self):
793 """RE.is_methylable() -> bool. 794 795 True if the recognition site is a methylable.""" 796 return False
797 is_methylable = classmethod(is_methylable)
798
799 -class Palindromic(AbstractCut):
800 """Implement the methods specific to the enzymes which are palindromic 801 802 palindromic means : the recognition site and its reverse complement are 803 identical. 804 Remarks : an enzyme with a site CGNNCG is palindromic even if some 805 of the sites that it will recognise are not. 806 for example here : CGAACG 807 808 Internal use only. Not meant to be instantiated.""" 809
810 - def _search(self):
811 """RE._search() -> list. 812 813 for internal use only. 814 815 implement the search method for palindromic and non palindromic enzyme. 816 """ 817 siteloc = self.dna.finditer(self.compsite,self.size) 818 self.results = [r for s,g in siteloc for r in self._modify(s)] 819 if self.results : self._drop() 820 return self.results
821 _search = classmethod(_search) 822
823 - def is_palindromic(self):
824 """RE.is_palindromic() -> bool. 825 826 True if the recognition site is a palindrom.""" 827 return True
828 is_palindromic = classmethod(is_palindromic)
829 830
831 -class NonPalindromic(AbstractCut):
832 """Implement the methods specific to the enzymes which are not palindromic 833 834 palindromic means : the recognition site and its reverse complement are 835 identical. 836 837 Internal use only. Not meant to be instantiated.""" 838
839 - def _search(self):
840 """RE._search() -> list. 841 842 for internal use only. 843 844 implement the search method for palindromic and non palindromic enzyme. 845 """ 846 iterator = self.dna.finditer(self.compsite, self.size) 847 self.results = [] 848 modif = self._modify 849 revmodif = self._rev_modify 850 s = str(self) 851 self.on_minus = [] 852 for start, group in iterator: 853 if group(s): 854 self.results += [r for r in modif(start)] 855 else: 856 self.on_minus += [r for r in revmodif(start)] 857 self.results += self.on_minus 858 if self.results: 859 self.results.sort() 860 self._drop() 861 return self.results
862 _search = classmethod(_search) 863
864 - def is_palindromic(self):
865 """RE.is_palindromic() -> bool. 866 867 True if the recognition site is a palindrom.""" 868 return False
869 is_palindromic = classmethod(is_palindromic)
870
871 -class Unknown(AbstractCut):
872 """Implement the methods specific to the enzymes for which the overhang 873 is unknown. 874 875 These enzymes are also NotDefined and NoCut. 876 877 Internal use only. Not meant to be instantiated.""" 878
879 - def catalyse(self, dna, linear=True):
880 """RE.catalyse(dna, linear=True) -> tuple of DNA. 881 RE.catalyze(dna, linear=True) -> tuple of DNA. 882 883 return a tuple of dna as will be produced by using RE to restrict the 884 dna. 885 886 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 887 888 if linear is False, the sequence is considered to be circular and the 889 output will be modified accordingly.""" 890 raise NotImplementedError('%s restriction is unknown.' \ 891 % self.__name__)
892 catalyze = catalyse = classmethod(catalyse) 893
894 - def is_blunt(self):
895 """RE.is_blunt() -> bool. 896 897 True if the enzyme produces blunt end. 898 899 see also: 900 RE.is_3overhang() 901 RE.is_5overhang() 902 RE.is_unknown()""" 903 return False
904 is_blunt = classmethod(is_blunt) 905
906 - def is_5overhang(self):
907 """RE.is_5overhang() -> bool. 908 909 True if the enzyme produces 5' overhang sticky end. 910 911 see also: 912 RE.is_3overhang() 913 RE.is_blunt() 914 RE.is_unknown()""" 915 return False
916 is_5overhang = classmethod(is_5overhang) 917
918 - def is_3overhang(self):
919 """RE.is_3overhang() -> bool. 920 921 True if the enzyme produces 3' overhang sticky end. 922 923 see also: 924 RE.is_5overhang() 925 RE.is_blunt() 926 RE.is_unknown()""" 927 return False
928 is_3overhang = classmethod(is_3overhang) 929
930 - def overhang(self):
931 """RE.overhang() -> str. type of overhang of the enzyme., 932 933 can be "3' overhang", "5' overhang", "blunt", "unknown" """ 934 return 'unknown'
935 overhang = classmethod(overhang) 936
937 - def compatible_end(self):
938 """RE.compatible_end() -> list. 939 940 list of all the enzymes that share compatible end with RE.""" 941 return []
942 compatible_end = classmethod(compatible_end) 943
944 - def _mod1(self, other):
945 """RE._mod1(other) -> bool. 946 947 for internal use only 948 949 test for the compatibility of restriction ending of RE and other.""" 950 return False
951 _mod1 = classmethod(_mod1)
952
953 -class Blunt(AbstractCut):
954 """Implement the methods specific to the enzymes for which the overhang 955 is blunt. 956 957 The enzyme cuts the + strand and the - strand of the DNA at the same 958 place. 959 960 Internal use only. Not meant to be instantiated.""" 961
962 - def catalyse(self, dna, linear=True):
963 """RE.catalyse(dna, linear=True) -> tuple of DNA. 964 RE.catalyze(dna, linear=True) -> tuple of DNA. 965 966 return a tuple of dna as will be produced by using RE to restrict the 967 dna. 968 969 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 970 971 if linear is False, the sequence is considered to be circular and the 972 output will be modified accordingly.""" 973 r = self.search(dna, linear) 974 d = self.dna 975 if not r : return d[1:], 976 fragments = [] 977 length = len(r)-1 978 if d.is_linear(): 979 # 980 # START of the sequence to FIRST site. 981 # 982 fragments.append(d[1:r[0]]) 983 if length: 984 # 985 # if more than one site add them. 986 # 987 fragments += [d[r[x]:r[x+1]] for x in xrange(length)] 988 # 989 # LAST site to END of the sequence. 990 # 991 fragments.append(d[r[-1]:]) 992 else: 993 # 994 # circular : bridge LAST site to FIRST site. 995 # 996 fragments.append(d[r[-1]:]+d[1:r[0]]) 997 if not length: 998 # 999 # one site we finish here. 1000 # 1001 return tuple(fragments) 1002 # 1003 # add the others. 1004 # 1005 fragments += [d[r[x]:r[x+1]] for x in xrange(length)] 1006 return tuple(fragments)
1007 catalyze = catalyse = classmethod(catalyse) 1008
1009 - def is_blunt(self):
1010 """RE.is_blunt() -> bool. 1011 1012 True if the enzyme produces blunt end. 1013 1014 see also: 1015 RE.is_3overhang() 1016 RE.is_5overhang() 1017 RE.is_unknown()""" 1018 return True
1019 is_blunt = classmethod(is_blunt) 1020
1021 - def is_5overhang(self):
1022 """RE.is_5overhang() -> bool. 1023 1024 True if the enzyme produces 5' overhang sticky end. 1025 1026 see also: 1027 RE.is_3overhang() 1028 RE.is_blunt() 1029 RE.is_unknown()""" 1030 return False
1031 is_5overhang = classmethod(is_5overhang) 1032
1033 - def is_3overhang(self):
1034 """RE.is_3overhang() -> bool. 1035 1036 True if the enzyme produces 3' overhang sticky end. 1037 1038 see also: 1039 RE.is_5overhang() 1040 RE.is_blunt() 1041 RE.is_unknown()""" 1042 return False
1043 is_3overhang = classmethod(is_3overhang) 1044
1045 - def overhang(self):
1046 """RE.overhang() -> str. type of overhang of the enzyme., 1047 1048 can be "3' overhang", "5' overhang", "blunt", "unknown" """ 1049 return 'blunt'
1050 overhang = classmethod(overhang) 1051
1052 - def compatible_end(self, batch=None):
1053 """RE.compatible_end() -> list. 1054 1055 list of all the enzymes that share compatible end with RE.""" 1056 if not batch : batch = AllEnzymes 1057 r = [x for x in iter(AllEnzymes) if x.is_blunt()] 1058 r.sort() 1059 return r
1060 compatible_end = classmethod(compatible_end) 1061
1062 - def _mod1(other):
1063 """RE._mod1(other) -> bool. 1064 1065 for internal use only 1066 1067 test for the compatibility of restriction ending of RE and other.""" 1068 if issubclass(other, Blunt) : return True 1069 else : return False
1070 _mod1 = staticmethod(_mod1)
1071
1072 -class Ov5(AbstractCut):
1073 """Implement the methods specific to the enzymes for which the overhang 1074 is recessed in 3'. 1075 1076 The enzyme cuts the + strand after the - strand of the DNA. 1077 1078 Internal use only. Not meant to be instantiated.""" 1079
1080 - def catalyse(self, dna, linear=True):
1081 """RE.catalyse(dna, linear=True) -> tuple of DNA. 1082 RE.catalyze(dna, linear=True) -> tuple of DNA. 1083 1084 return a tuple of dna as will be produced by using RE to restrict the 1085 dna. 1086 1087 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 1088 1089 if linear is False, the sequence is considered to be circular and the 1090 output will be modified accordingly.""" 1091 r = self.search(dna, linear) 1092 d = self.dna 1093 if not r : return d[1:], 1094 length = len(r)-1 1095 fragments = [] 1096 if d.is_linear(): 1097 # 1098 # START of the sequence to FIRST site. 1099 # 1100 fragments.append(d[1:r[0]]) 1101 if length: 1102 # 1103 # if more than one site add them. 1104 # 1105 fragments += [d[r[x]:r[x+1]] for x in xrange(length)] 1106 # 1107 # LAST site to END of the sequence. 1108 # 1109 fragments.append(d[r[-1]:]) 1110 else: 1111 # 1112 # circular : bridge LAST site to FIRST site. 1113 # 1114 fragments.append(d[r[-1]:]+d[1:r[0]]) 1115 if not length: 1116 # 1117 # one site we finish here. 1118 # 1119 return tuple(fragments) 1120 # 1121 # add the others. 1122 # 1123 fragments += [d[r[x]:r[x+1]] for x in xrange(length)] 1124 return tuple(fragments)
1125 catalyze = catalyse = classmethod(catalyse) 1126
1127 - def is_blunt(self):
1128 """RE.is_blunt() -> bool. 1129 1130 True if the enzyme produces blunt end. 1131 1132 see also: 1133 RE.is_3overhang() 1134 RE.is_5overhang() 1135 RE.is_unknown()""" 1136 return False
1137 is_blunt = classmethod(is_blunt) 1138
1139 - def is_5overhang(self):
1140 """RE.is_5overhang() -> bool. 1141 1142 True if the enzyme produces 5' overhang sticky end. 1143 1144 see also: 1145 RE.is_3overhang() 1146 RE.is_blunt() 1147 RE.is_unknown()""" 1148 return True
1149 is_5overhang = classmethod(is_5overhang) 1150
1151 - def is_3overhang(self):
1152 """RE.is_3overhang() -> bool. 1153 1154 True if the enzyme produces 3' overhang sticky end. 1155 1156 see also: 1157 RE.is_5overhang() 1158 RE.is_blunt() 1159 RE.is_unknown()""" 1160 return False
1161 is_3overhang = classmethod(is_3overhang) 1162
1163 - def overhang(self):
1164 """RE.overhang() -> str. type of overhang of the enzyme., 1165 1166 can be "3' overhang", "5' overhang", "blunt", "unknown" """ 1167 return "5' overhang"
1168 overhang = classmethod(overhang) 1169
1170 - def compatible_end(self, batch=None):
1171 """RE.compatible_end() -> list. 1172 1173 list of all the enzymes that share compatible end with RE.""" 1174 if not batch : batch = AllEnzymes 1175 r = [x for x in iter(AllEnzymes) if x.is_5overhang() and x % self] 1176 r.sort() 1177 return r
1178 compatible_end = classmethod(compatible_end) 1179
1180 - def _mod1(self, other):
1181 """RE._mod1(other) -> bool. 1182 1183 for internal use only 1184 1185 test for the compatibility of restriction ending of RE and other.""" 1186 if issubclass(other, Ov5) : return self._mod2(other) 1187 else : return False
1188 _mod1 = classmethod(_mod1)
1189 1190
1191 -class Ov3(AbstractCut):
1192 """Implement the methods specific to the enzymes for which the overhang 1193 is recessed in 5'. 1194 1195 The enzyme cuts the - strand after the + strand of the DNA. 1196 1197 Internal use only. Not meant to be instantiated.""" 1198
1199 - def catalyse(self, dna, linear=True):
1200 """RE.catalyse(dna, linear=True) -> tuple of DNA. 1201 RE.catalyze(dna, linear=True) -> tuple of DNA. 1202 1203 return a tuple of dna as will be produced by using RE to restrict the 1204 dna. 1205 1206 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 1207 1208 if linear is False, the sequence is considered to be circular and the 1209 output will be modified accordingly.""" 1210 r = self.search(dna, linear) 1211 d = self.dna 1212 if not r : return d[1:], 1213 fragments = [] 1214 length = len(r)-1 1215 if d.is_linear(): 1216 # 1217 # START of the sequence to FIRST site. 1218 # 1219 fragments.append(d[1:r[0]]) 1220 if length: 1221 # 1222 # if more than one site add them. 1223 # 1224 fragments += [d[r[x]:r[x+1]] for x in xrange(length)] 1225 # 1226 # LAST site to END of the sequence. 1227 # 1228 fragments.append(d[r[-1]:]) 1229 else: 1230 # 1231 # circular : bridge LAST site to FIRST site. 1232 # 1233 fragments.append(d[r[-1]:]+d[1:r[0]]) 1234 if not length: 1235 # 1236 # one site we finish here. 1237 # 1238 return tuple(fragments) 1239 # 1240 # add the others. 1241 # 1242 fragments += [d[r[x]:r[x+1]] for x in xrange(length)] 1243 return tuple(fragments)
1244 catalyze = catalyse = classmethod(catalyse) 1245
1246 - def is_blunt(self):
1247 """RE.is_blunt() -> bool. 1248 1249 True if the enzyme produces blunt end. 1250 1251 see also: 1252 RE.is_3overhang() 1253 RE.is_5overhang() 1254 RE.is_unknown()""" 1255 return False
1256 is_blunt = classmethod(is_blunt) 1257
1258 - def is_5overhang(self):
1259 """RE.is_5overhang() -> bool. 1260 1261 True if the enzyme produces 5' overhang sticky end. 1262 1263 see also: 1264 RE.is_3overhang() 1265 RE.is_blunt() 1266 RE.is_unknown()""" 1267 return False
1268 is_5overhang = classmethod(is_5overhang) 1269
1270 - def is_3overhang(self):
1271 """RE.is_3overhang() -> bool. 1272 1273 True if the enzyme produces 3' overhang sticky end. 1274 1275 see also: 1276 RE.is_5overhang() 1277 RE.is_blunt() 1278 RE.is_unknown()""" 1279 return True
1280 is_3overhang = classmethod(is_3overhang) 1281
1282 - def overhang(self):
1283 """RE.overhang() -> str. type of overhang of the enzyme., 1284 1285 can be "3' overhang", "5' overhang", "blunt", "unknown" """ 1286 return "3' overhang"
1287 overhang = classmethod(overhang) 1288
1289 - def compatible_end(self, batch=None):
1290 """RE.compatible_end() -> list. 1291 1292 list of all the enzymes that share compatible end with RE.""" 1293 if not batch : batch = AllEnzymes 1294 r = [x for x in iter(AllEnzymes) if x.is_3overhang() and x % self] 1295 r.sort() 1296 return r
1297 compatible_end = classmethod(compatible_end) 1298
1299 - def _mod1(self, other):
1300 """RE._mod1(other) -> bool. 1301 1302 for internal use only 1303 1304 test for the compatibility of restriction ending of RE and other.""" 1305 # 1306 # called by RE._mod1(other) when the one of the enzyme is ambiguous 1307 # 1308 if issubclass(other, Ov3) : return self._mod2(other) 1309 else : return False
1310 _mod1 = classmethod(_mod1)
1311 1312
1313 -class Defined(AbstractCut):
1314 """Implement the methods specific to the enzymes for which the overhang 1315 and the cut are not variable. 1316 1317 Typical example : EcoRI -> G^AATT_C 1318 The overhang will always be AATT 1319 Notes: 1320 Blunt enzymes are always defined. even if there site is GGATCCNNN^_N 1321 There overhang is always the same : blunt! 1322 1323 Internal use only. Not meant to be instantiated.""" 1324
1325 - def _drop(self):
1326 """RE._drop() -> list. 1327 1328 for internal use only. 1329 1330 drop the site that are situated outside the sequence in linear sequence. 1331 modify the index for site in circular sequences.""" 1332 # 1333 # remove or modify the results that are outside the sequence. 1334 # This is necessary since after finding the site we add the distance 1335 # from the site to the cut with the _modify and _rev_modify methods. 1336 # For linear we will remove these sites altogether. 1337 # For circular sequence, we modify the result rather than _drop it 1338 # since the site is in the sequence. 1339 # 1340 length = len(self.dna) 1341 drop = itertools.dropwhile 1342 take = itertools.takewhile 1343 if self.dna.is_linear(): 1344 self.results = [x for x in drop(lambda x:x<1, self.results)] 1345 self.results = [x for x in take(lambda x:x<length, self.results)] 1346 else: 1347 for index, location in enumerate(self.results): 1348 if location < 1: 1349 self.results[index] += length 1350 else: 1351 break 1352 for index, location in enumerate(self.results[::-1]): 1353 if location > length: 1354 self.results[-(index+1)] -= length 1355 else: 1356 break 1357 return
1358 _drop = classmethod(_drop) 1359
1360 - def is_defined(self):
1361 """RE.is_defined() -> bool. 1362 1363 True if the sequence recognised and cut is constant, 1364 i.e. the recognition site is not degenerated AND the enzyme cut inside 1365 the site. 1366 1367 see also: 1368 RE.is_ambiguous() 1369 RE.is_unknown()""" 1370 return True
1371 is_defined = classmethod(is_defined) 1372
1373 - def is_ambiguous(self):
1374 """RE.is_ambiguous() -> bool. 1375 1376 True if the sequence recognised and cut is ambiguous, 1377 i.e. the recognition site is degenerated AND/OR the enzyme cut outside 1378 the site. 1379 1380 see also: 1381 RE.is_defined() 1382 RE.is_unknown()""" 1383 return False
1384 is_ambiguous = classmethod(is_ambiguous) 1385
1386 - def is_unknown(self):
1387 """RE.is_unknown() -> bool. 1388 1389 True if the sequence is unknown, 1390 i.e. the recognition site has not been characterised yet. 1391 1392 see also: 1393 RE.is_defined() 1394 RE.is_ambiguous()""" 1395 return False
1396 is_unknown = classmethod(is_unknown) 1397
1398 - def elucidate(self):
1399 """RE.elucidate() -> str 1400 1401 return a representation of the site with the cut on the (+) strand 1402 represented as '^' and the cut on the (-) strand as '_'. 1403 ie: 1404 >>> EcoRI.elucidate() # 5' overhang 1405 'G^AATT_C' 1406 >>> KpnI.elucidate() # 3' overhang 1407 'G_GTAC^C' 1408 >>> EcoRV.elucidate() # blunt 1409 'GAT^_ATC' 1410 >>> SnaI.elucidate() # NotDefined, cut profile unknown. 1411 '? GTATAC ?' 1412 >>> 1413 """ 1414 f5 = self.fst5 1415 f3 = self.fst3 1416 site = self.site 1417 if self.cut_twice() : re = 'cut twice, not yet implemented sorry.' 1418 elif self.is_5overhang(): 1419 if f5 == f3 == 0 : re = 'N^'+ self.site + '_N' 1420 elif f3 == 0 : re = site[:f5] + '^' + site[f5:] + '_N' 1421 else : re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:] 1422 elif self.is_blunt(): 1423 re = site[:f5] + '^_' + site[f5:] 1424 else: 1425 if f5 == f3 == 0 : re = 'N_'+ site + '^N' 1426 else : re = site[:f3] + '_' + site[f3:f5] +'^'+ site[f5:] 1427 return re
1428 elucidate = classmethod(elucidate) 1429
1430 - def _mod2(self, other):
1431 """RE._mod2(other) -> bool. 1432 1433 for internal use only 1434 1435 test for the compatibility of restriction ending of RE and other.""" 1436 # 1437 # called by RE._mod1(other) when the one of the enzyme is ambiguous 1438 # 1439 if other.ovhgseq == self.ovhgseq: 1440 return True 1441 elif issubclass(other, Ambiguous): 1442 return other._mod2(self) 1443 else: 1444 return False
1445 _mod2 = classmethod(_mod2)
1446 1447
1448 -class Ambiguous(AbstractCut):
1449 """Implement the methods specific to the enzymes for which the overhang 1450 is variable. 1451 1452 Typical example : BstXI -> CCAN_NNNN^NTGG 1453 The overhang can be any sequence of 4 bases. 1454 Notes: 1455 Blunt enzymes are always defined. even if there site is GGATCCNNN^_N 1456 There overhang is always the same : blunt! 1457 1458 Internal use only. Not meant to be instantiated.""" 1459
1460 - def _drop(self):
1461 """RE._drop() -> list. 1462 1463 for internal use only. 1464 1465 drop the site that are situated outside the sequence in linear sequence. 1466 modify the index for site in circular sequences.""" 1467 length = len(self.dna) 1468 drop = itertools.dropwhile 1469 take = itertools.takewhile 1470 if self.dna.is_linear(): 1471 self.results = [x for x in drop(lambda x : x < 1, self.results)] 1472 self.results = [x for x in take(lambda x : x <length, self.results)] 1473 else: 1474 for index, location in enumerate(self.results): 1475 if location < 1: 1476 self.results[index] += length 1477 else: 1478 break 1479 for index, location in enumerate(self.results[::-1]): 1480 if location > length: 1481 self.results[-(index+1)] -= length 1482 else: 1483 break 1484 return
1485 _drop = classmethod(_drop) 1486
1487 - def is_defined(self):
1488 """RE.is_defined() -> bool. 1489 1490 True if the sequence recognised and cut is constant, 1491 i.e. the recognition site is not degenerated AND the enzyme cut inside 1492 the site. 1493 1494 see also: 1495 RE.is_ambiguous() 1496 RE.is_unknown()""" 1497 return False
1498 is_defined = classmethod(is_defined) 1499
1500 - def is_ambiguous(self):
1501 """RE.is_ambiguous() -> bool. 1502 1503 True if the sequence recognised and cut is ambiguous, 1504 i.e. the recognition site is degenerated AND/OR the enzyme cut outside 1505 the site. 1506 1507 1508 see also: 1509 RE.is_defined() 1510 RE.is_unknown()""" 1511 return True
1512 is_ambiguous = classmethod(is_ambiguous) 1513
1514 - def is_unknown(self):
1515 """RE.is_unknown() -> bool. 1516 1517 True if the sequence is unknown, 1518 i.e. the recognition site has not been characterised yet. 1519 1520 see also: 1521 RE.is_defined() 1522 RE.is_ambiguous()""" 1523 return False
1524 is_unknown = classmethod(is_unknown) 1525
1526 - def _mod2(self, other):
1527 """RE._mod2(other) -> bool. 1528 1529 for internal use only 1530 1531 test for the compatibility of restriction ending of RE and other.""" 1532 # 1533 # called by RE._mod1(other) when the one of the enzyme is ambiguous 1534 # 1535 if len(self.ovhgseq) != len(other.ovhgseq): 1536 return False 1537 else: 1538 se = self.ovhgseq 1539 for base in se: 1540 if base in 'ATCG': 1541 pass 1542 if base in 'N': 1543 se = '.'.join(se.split('N')) 1544 if base in 'RYWMSKHDBV': 1545 expand = '['+ matching[base] + ']' 1546 se = expand.join(se.split(base)) 1547 if re.match(se, other.ovhgseq): 1548 return True 1549 else: 1550 return False
1551 _mod2 = classmethod(_mod2) 1552
1553 - def elucidate(self):
1554 """RE.elucidate() -> str 1555 1556 return a representation of the site with the cut on the (+) strand 1557 represented as '^' and the cut on the (-) strand as '_'. 1558 ie: 1559 >>> EcoRI.elucidate() # 5' overhang 1560 'G^AATT_C' 1561 >>> KpnI.elucidate() # 3' overhang 1562 'G_GTAC^C' 1563 >>> EcoRV.elucidate() # blunt 1564 'GAT^_ATC' 1565 >>> SnaI.elucidate() # NotDefined, cut profile unknown. 1566 '? GTATAC ?' 1567 >>> 1568 """ 1569 f5 = self.fst5 1570 f3 = self.fst3 1571 length = len(self) 1572 site = self.site 1573 if self.cut_twice() : re = 'cut twice, not yet implemented sorry.' 1574 elif self.is_5overhang(): 1575 if f3 == f5 == 0: 1576 re = 'N^' + site +'_N' 1577 elif 0 <= f5 <= length and 0 <= f3+length <= length: 1578 re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:] 1579 elif 0 <= f5 <= length: 1580 re = site[:f5] + '^' + site[f5:] + f3*'N' + '_N' 1581 elif 0 <= f3+length <= length: 1582 re = 'N^' + abs(f5) * 'N' + site[:f3] + '_' + site[f3:] 1583 elif f3+length < 0: 1584 re = 'N^'*abs(f5)*'N' + '_' + abs(length+f3)*'N' + site 1585 elif f5 > length: 1586 re = site + (f5-length)*'N'+'^'+(length+f3-f5)*'N'+'_N' 1587 else: 1588 re = 'N^' + abs(f5) * 'N' + site + f3*'N' + '_N' 1589 elif self.is_blunt(): 1590 if f5 < 0: 1591 re = 'N^_' + abs(f5)*'N' + site 1592 elif f5 > length: 1593 re = site + (f5-length)*'N' + '^_N' 1594 else: 1595 raise ValueError('%s.easyrepr() : error f5=%i' \ 1596 % (self.name,f5)) 1597 else: 1598 if f3 == 0: 1599 if f5 == 0 : re = 'N_' + site + '^N' 1600 else : re = site + '_' + (f5-length)*'N' + '^N' 1601 elif 0 < f3+length <= length and 0 <= f5 <= length: 1602 re = site[:f3] + '_' + site[f3:f5] + '^' + site[f5:] 1603 elif 0 < f3+length <= length: 1604 re = site[:f3] + '_' + site[f3:] + (f5-length)*'N' + '^N' 1605 elif 0 <= f5 <= length: 1606 re = 'N_' +'N'*(f3+length) + site[:f5] + '^' + site[f5:] 1607 elif f3 > 0: 1608 re = site + f3*'N' + '_' + (f5-f3-length)*'N' + '^N' 1609 elif f5 < 0: 1610 re = 'N_' + abs(f3-f5+length)*'N' + '^' + abs(f5)*'N' + site 1611 else: 1612 re = 'N_' + abs(f3+length)*'N' + site + (f5-length)*'N' + '^N' 1613 return re
1614 elucidate = classmethod(elucidate)
1615 1616
1617 -class NotDefined(AbstractCut):
1618 """Implement the methods specific to the enzymes for which the overhang 1619 is not characterised. 1620 1621 Correspond to NoCut and Unknown. 1622 1623 Internal use only. Not meant to be instantiated.""" 1624
1625 - def _drop(self):
1626 """RE._drop() -> list. 1627 1628 for internal use only. 1629 1630 drop the site that are situated outside the sequence in linear sequence. 1631 modify the index for site in circular sequences.""" 1632 if self.dna.is_linear(): 1633 return 1634 else: 1635 length = len(self.dna) 1636 for index, location in enumerate(self.results): 1637 if location < 1: 1638 self.results[index] += length 1639 else: 1640 break 1641 for index, location in enumerate(self.results[:-1]): 1642 if location > length: 1643 self.results[-(index+1)] -= length 1644 else: 1645 break 1646 return
1647 _drop = classmethod(_drop) 1648
1649 - def is_defined(self):
1650 """RE.is_defined() -> bool. 1651 1652 True if the sequence recognised and cut is constant, 1653 i.e. the recognition site is not degenerated AND the enzyme cut inside 1654 the site. 1655 1656 see also: 1657 RE.is_ambiguous() 1658 RE.is_unknown()""" 1659 return False
1660 is_defined = classmethod(is_defined) 1661
1662 - def is_ambiguous(self):
1663 """RE.is_ambiguous() -> bool. 1664 1665 True if the sequence recognised and cut is ambiguous, 1666 i.e. the recognition site is degenerated AND/OR the enzyme cut outside 1667 the site. 1668 1669 1670 see also: 1671 RE.is_defined() 1672 RE.is_unknown()""" 1673 return False
1674 is_ambiguous = classmethod(is_ambiguous) 1675
1676 - def is_unknown(self):
1677 """RE.is_unknown() -> bool. 1678 1679 True if the sequence is unknown, 1680 i.e. the recognition site has not been characterised yet. 1681 1682 see also: 1683 RE.is_defined() 1684 RE.is_ambiguous()""" 1685 return True
1686 is_unknown = classmethod(is_unknown) 1687
1688 - def _mod2(self, other):
1689 """RE._mod2(other) -> bool. 1690 1691 for internal use only 1692 1693 test for the compatibility of restriction ending of RE and other.""" 1694 # 1695 # Normally we should not arrive here. But well better safe than sorry. 1696 # the overhang is not defined we are compatible with nobody. 1697 # could raise an Error may be rather than return quietly. 1698 # 1699 #return False 1700 raise ValueError("%s.mod2(%s), %s : NotDefined. pas glop pas glop!" \ 1701 % (str(self), str(other), str(self)))
1702 _mod2 = classmethod(_mod2) 1703
1704 - def elucidate(self):
1705 """RE.elucidate() -> str 1706 1707 return a representation of the site with the cut on the (+) strand 1708 represented as '^' and the cut on the (-) strand as '_'. 1709 ie: 1710 >>> EcoRI.elucidate() # 5' overhang 1711 'G^AATT_C' 1712 >>> KpnI.elucidate() # 3' overhang 1713 'G_GTAC^C' 1714 >>> EcoRV.elucidate() # blunt 1715 'GAT^_ATC' 1716 >>> SnaI.elucidate() # NotDefined, cut profile unknown. 1717 '? GTATAC ?' 1718 >>> 1719 """ 1720 return '? %s ?' % self.site
1721 elucidate = classmethod(elucidate)
1722 1723
1724 -class Commercially_available(AbstractCut):
1725 # 1726 # Recent addition to Rebase make this naming convention uncertain. 1727 # May be better to says enzymes which have a supplier. 1728 # 1729 """Implement the methods specific to the enzymes which are commercially 1730 available. 1731 1732 Internal use only. Not meant to be instantiated.""" 1733
1734 - def suppliers(self):
1735 """RE.suppliers() -> print the suppliers of RE.""" 1736 supply = suppliers_dict.items() 1737 for k,v in supply: 1738 if k in self.suppl: 1739 print v[0]+',' 1740 return
1741 suppliers = classmethod(suppliers) 1742
1743 - def supplier_list(self):
1744 """RE.supplier_list() -> list. 1745 1746 list of the supplier names for RE.""" 1747 return [v[0] for k,v in suppliers_dict.items() if k in self.suppl]
1748 supplier_list = classmethod(supplier_list) 1749
1750 - def buffers(self, supplier):
1751 """RE.buffers(supplier) -> string. 1752 1753 not implemented yet.""" 1754 return
1755 buffers = classmethod(buffers) 1756
1757 - def is_comm(self):
1758 """RE.iscomm() -> bool. 1759 1760 True if RE has suppliers.""" 1761 return True
1762 is_comm = classmethod(is_comm)
1763 1764
1765 -class Not_available(AbstractCut):
1766 """Implement the methods specific to the enzymes which are not commercially 1767 available. 1768 1769 Internal use only. Not meant to be instantiated.""" 1770
1771 - def suppliers():
1772 """RE.suppliers() -> print the suppliers of RE.""" 1773 return None
1774 suppliers = staticmethod(suppliers) 1775
1776 - def supplier_list(self):
1777 """RE.supplier_list() -> list. 1778 1779 list of the supplier names for RE.""" 1780 return []
1781 supplier_list = classmethod(supplier_list) 1782
1783 - def buffers(self, supplier):
1784 """RE.buffers(supplier) -> string. 1785 1786 not implemented yet.""" 1787 raise TypeError("Enzyme not commercially available.")
1788 buffers = classmethod(buffers) 1789
1790 - def is_comm(self):
1791 """RE.iscomm() -> bool. 1792 1793 True if RE has suppliers.""" 1794 return False
1795 is_comm = classmethod(is_comm)
1796 1797 1798 ############################################################################### 1799 # # 1800 # Restriction Batch # 1801 # # 1802 ############################################################################### 1803 1804
1805 -class RestrictionBatch(set):
1806
1807 - def __init__(self, first=[], suppliers=[]):
1808 """RestrictionBatch([sequence]) -> new RestrictionBatch.""" 1809 first = [self.format(x) for x in first] 1810 first += [eval(x) for n in suppliers for x in suppliers_dict[n][1]] 1811 set.__init__(self, first) 1812 self.mapping = dict.fromkeys(self) 1813 self.already_mapped = None
1814
1815 - def __str__(self):
1816 if len(self) < 5: 1817 return '+'.join(self.elements()) 1818 else: 1819 return '...'.join(('+'.join(self.elements()[:2]),\ 1820 '+'.join(self.elements()[-2:])))
1821
1822 - def __repr__(self):
1823 return 'RestrictionBatch(%s)' % self.elements()
1824
1825 - def __contains__(self, other):
1826 try: 1827 other = self.format(other) 1828 except ValueError : # other is not a restriction enzyme 1829 return False 1830 return set.__contains__(self, other)
1831
1832 - def __div__(self, other):
1833 return self.search(other)
1834
1835 - def __rdiv__(self, other):
1836 return self.search(other)
1837
1838 - def get(self, enzyme, add=False):
1839 """B.get(enzyme[, add]) -> enzyme class. 1840 1841 if add is True and enzyme is not in B add enzyme to B. 1842 if add is False (which is the default) only return enzyme. 1843 if enzyme is not a RestrictionType or can not be evaluated to 1844 a RestrictionType, raise a ValueError.""" 1845 e = self.format(enzyme) 1846 if e in self: 1847 return e 1848 elif add: 1849 self.add(e) 1850 return e 1851 else: 1852 raise ValueError('enzyme %s is not in RestrictionBatch' \ 1853 % e.__name__)
1854
1855 - def lambdasplit(self, func):
1856 """B.lambdasplit(func) -> RestrictionBatch . 1857 1858 the new batch will contains only the enzymes for which 1859 func return True.""" 1860 d = [x for x in itertools.ifilter(func, self)] 1861 new = RestrictionBatch() 1862 new._data = dict(map(None, d, [True]*len(d))) 1863 return new
1864
1865 - def add_supplier(self, letter):
1866 """B.add_supplier(letter) -> add a new set of enzyme to B. 1867 1868 letter represents the suppliers as defined in the dictionary 1869 RestrictionDictionary.suppliers 1870 return None. 1871 raise a KeyError if letter is not a supplier code.""" 1872 supplier = suppliers_dict[letter] 1873 self.suppliers.append(letter) 1874 for x in supplier[1]: 1875 self.add_nocheck(eval(x)) 1876 return
1877
1878 - def current_suppliers(self):
1879 """B.current_suppliers() -> add a new set of enzyme to B. 1880 1881 return a sorted list of the suppliers which have been used to 1882 create the batch.""" 1883 suppl_list = [suppliers_dict[x][0] for x in self.suppliers] 1884 suppl_list.sort() 1885 return suppl_list
1886
1887 - def __iadd__(self, other):
1888 """ b += other -> add other to b, check the type of other.""" 1889 self.add(other) 1890 return self
1891
1892 - def __add__(self, other):
1893 """ b + other -> new RestrictionBatch.""" 1894 new = self.__class__(self) 1895 new.add(other) 1896 return new
1897
1898 - def remove(self, other):
1899 """B.remove(other) -> remove other from B if other is a RestrictionType. 1900 1901 Safe set.remove method. Verify that other is a RestrictionType or can be 1902 evaluated to a RestrictionType. 1903 raise a ValueError if other can not be evaluated to a RestrictionType. 1904 raise a KeyError if other is not in B.""" 1905 return set.remove(self, self.format(other))
1906
1907 - def add(self, other):
1908 """B.add(other) -> add other to B if other is a RestrictionType. 1909 1910 Safe set.add method. Verify that other is a RestrictionType or can be 1911 evaluated to a RestrictionType. 1912 raise a ValueError if other can not be evaluated to a RestrictionType. 1913 """ 1914 return set.add(self, self.format(other))
1915
1916 - def add_nocheck(self, other):
1917 """B.add_nocheck(other) -> add other to B. don't check type of other. 1918 """ 1919 return set.add(self, other)
1920
1921 - def format(self, y):
1922 """B.format(y) -> RestrictionType or raise ValueError. 1923 1924 if y is a RestrictionType return y 1925 if y can be evaluated to a RestrictionType return eval(y) 1926 raise a Value Error in all other case.""" 1927 try: 1928 if isinstance(y, RestrictionType): 1929 return y 1930 elif isinstance(eval(str(y)), RestrictionType): 1931 return eval(y) 1932 1933 else: 1934 pass 1935 except (NameError, SyntaxError): 1936 pass 1937 raise ValueError('%s is not a RestrictionType' % y.__class__)
1938 1939
1940 - def is_restriction(self, y):
1941 """B.is_restriction(y) -> bool. 1942 1943 True is y or eval(y) is a RestrictionType.""" 1944 return isinstance(y, RestrictionType) or \ 1945 isinstance(eval(str(y)), RestrictionType)
1946
1947 - def split(self, *classes, **bool):
1948 """B.split(class, [class.__name__ = True]) -> new RestrictionBatch. 1949 1950 it works but it is slow, so it has really an interest when splitting 1951 over multiple conditions.""" 1952 def splittest(element): 1953 for klass in classes: 1954 b = bool.get(klass.__name__, True) 1955 if issubclass(element, klass): 1956 if b: 1957 continue 1958 else: 1959 return False 1960 elif b: 1961 return False 1962 else: 1963 continue 1964 return True
1965 d = [k for k in itertools.ifilter(splittest, self)] 1966 new = RestrictionBatch() 1967 new._data = dict(map(None, d, [True]*len(d))) 1968 return new
1969
1970 - def elements(self):
1971 """B.elements() -> tuple. 1972 1973 give all the names of the enzymes in B sorted alphabetically.""" 1974 l = [str(e) for e in self] 1975 l.sort() 1976 return l
1977
1978 - def as_string(self):
1979 """B.as_string() -> list. 1980 1981 return a list of the name of the elements of B.""" 1982 return [str(e) for e in self]
1983
1984 - def suppl_codes(self):
1985 """B.suppl_codes() -> dict 1986 1987 letter code for the suppliers""" 1988 supply = dict([(k,v[0]) for k,v in suppliers_dict.iteritems()]) 1989 return supply
1990 suppl_codes = classmethod(suppl_codes) 1991
1992 - def show_codes(self):
1993 "B.show_codes() -> letter codes for the suppliers""" 1994 supply = [' = '.join(i) for i in self.suppl_codes().iteritems()] 1995 print '\n'.join(supply) 1996 return
1997 show_codes = classmethod(show_codes) 1998
1999 - def search(self, dna, linear=True):
2000 """B.search(dna) -> dict.""" 2001 # 2002 # here we replace the search method of the individual enzymes 2003 # with one unique testing method. 2004 # 2005 if not hasattr(self, "already_mapped") : 2006 #TODO - Why does this happen! 2007 #Try the "doctest" at the start of PrintFormat.py 2008 self.already_mapped = None 2009 if isinstance(dna, DNA): 2010 # For the searching, we just care about the sequence as a string, 2011 # if that is the same we can use the cached search results. 2012 # At the time of writing, Seq == method isn't implemented, 2013 # and therefore does object identity which is stricter. 2014 if (str(dna), linear) == self.already_mapped: 2015 return self.mapping 2016 else: 2017 self.already_mapped = str(dna), linear 2018 fseq = FormattedSeq(dna, linear) 2019 self.mapping = dict([(x, x.search(fseq)) for x in self]) 2020 return self.mapping 2021 elif isinstance(dna, FormattedSeq): 2022 if (str(dna), dna.linear) == self.already_mapped: 2023 return self.mapping 2024 else: 2025 self.already_mapped = str(dna), dna.linear 2026 self.mapping = dict([(x, x.search(dna)) for x in self]) 2027 return self.mapping 2028 raise TypeError("Expected Seq or MutableSeq instance, got %s instead"\ 2029 %type(dna))
2030 2031 ############################################################################### 2032 # # 2033 # Restriction Analysis # 2034 # # 2035 ############################################################################### 2036
2037 -class Analysis(RestrictionBatch, PrintFormat):
2038
2039 - def __init__(self, restrictionbatch=RestrictionBatch(),sequence=DNA(''), 2040 linear=True):
2041 """Analysis([restrictionbatch [, sequence] linear=True]) -> New Analysis class. 2042 2043 For most of the method of this class if a dictionary is given it will 2044 be used as the base to calculate the results. 2045 If no dictionary is given a new analysis using the Restriction Batch 2046 which has been given when the Analysis class has been instantiated.""" 2047 RestrictionBatch.__init__(self, restrictionbatch) 2048 self.rb = restrictionbatch 2049 self.sequence = sequence 2050 self.linear = linear 2051 if self.sequence: 2052 self.search(self.sequence, self.linear)
2053
2054 - def __repr__(self):
2055 return 'Analysis(%s,%s,%s)'%\ 2056 (repr(self.rb),repr(self.sequence),self.linear)
2057
2058 - def _sub_set(self, wanted):
2059 """A._sub_set(other_set) -> dict. 2060 2061 Internal use only. 2062 2063 screen the results through wanted set. 2064 Keep only the results for which the enzymes is in wanted set. 2065 """ 2066 return dict([(k,v) for k,v in self.mapping.iteritems() if k in wanted])
2067
2068 - def _boundaries(self, start, end):
2069 """A._boundaries(start, end) -> tuple. 2070 2071 Format the boundaries for use with the methods that limit the 2072 search to only part of the sequence given to analyse. 2073 """ 2074 if not isinstance(start, int): 2075 raise TypeError('expected int, got %s instead' % type(start)) 2076 if not isinstance(end, int): 2077 raise TypeError('expected int, got %s instead' % type(end)) 2078 if start < 1: 2079 start += len(self.sequence) 2080 if end < 1: 2081 end += len(self.sequence) 2082 if start < end: 2083 pass 2084 else: 2085 start, end == end, start 2086 if start < 1: 2087 start == 1 2088 if start < end: 2089 return start, end, self._test_normal 2090 else: 2091 return start, end, self._test_reverse
2092
2093 - def _test_normal(self, start, end, site):
2094 """A._test_normal(start, end, site) -> bool. 2095 2096 Internal use only 2097 Test if site is in between start and end. 2098 """ 2099 return start <= site < end
2100
2101 - def _test_reverse(self, start, end, site):
2102 """A._test_reverse(start, end, site) -> bool. 2103 2104 Internal use only 2105 Test if site is in between end and start (for circular sequences). 2106 """ 2107 return start <= site <= len(self.sequence) or 1 <= site < end
2108
2109 - def print_that(self, dct=None, title='', s1=''):
2110 """A.print_that([dct[, title[, s1]]]) -> print the results from dct. 2111 2112 If dct is not given the full dictionary is used. 2113 """ 2114 if not dct: 2115 dct = self.mapping 2116 print 2117 return PrintFormat.print_that(self, dct, title, s1)
2118
2119 - def change(self, **what):
2120 """A.change(**attribute_name) -> Change attribute of Analysis. 2121 2122 It is possible to change the width of the shell by setting 2123 self.ConsoleWidth to what you want. 2124 self.NameWidth refer to the maximal length of the enzyme name. 2125 2126 Changing one of these parameters here might not give the results 2127 you expect. In which case, you can settle back to a 80 columns shell 2128 or try to change self.Cmodulo and self.PrefWidth in PrintFormat until 2129 you get it right.""" 2130 for k,v in what.iteritems(): 2131 if k in ('NameWidth', 'ConsoleWidth'): 2132 setattr(self, k, v) 2133 self.Cmodulo = self.ConsoleWidth % self.NameWidth 2134 self.PrefWidth = self.ConsoleWidth - self.Cmodulo 2135 elif k is 'sequence': 2136 setattr(self, 'sequence', v) 2137 self.search(self.sequence, self.linear) 2138 elif k is 'rb': 2139 self = Analysis.__init__(self, v, self.sequence, self.linear) 2140 elif k is 'linear': 2141 setattr(self, 'linear', v) 2142 self.search(self.sequence, v) 2143 elif k in ('Indent', 'Maxsize'): 2144 setattr(self, k, v) 2145 elif k in ('Cmodulo', 'PrefWidth'): 2146 raise AttributeError( \ 2147 'To change %s, change NameWidth and/or ConsoleWidth' \ 2148 % name) 2149 else: 2150 raise AttributeError( \ 2151 'Analysis has no attribute %s' % name) 2152 return
2153
2154 - def full(self, linear=True):
2155 """A.full() -> dict. 2156 2157 Full Restriction Map of the sequence.""" 2158 return self.mapping
2159
2160 - def blunt(self, dct = None):
2161 """A.blunt([dct]) -> dict. 2162 2163 Only the enzymes which have a 3'overhang restriction site.""" 2164 if not dct: 2165 dct = self.mapping 2166 return dict([(k,v) for k,v in dct.iteritems() if k.is_blunt()])
2167
2168 - def overhang5(self, dct=None):
2169 """A.overhang5([dct]) -> dict. 2170 2171 Only the enzymes which have a 5' overhang restriction site.""" 2172 if not dct: 2173 dct = self.mapping 2174 return dict([(k,v) for k,v in dct.iteritems() if k.is_5overhang()])
2175 2176
2177 - def overhang3(self, dct=None):
2178 """A.Overhang3([dct]) -> dict. 2179 2180 Only the enzymes which have a 3'overhang restriction site.""" 2181 if not dct: 2182 dct = self.mapping 2183 return dict([(k,v) for k,v in dct.iteritems() if k.is_3overhang()])
2184 2185
2186 - def defined(self, dct=None):
2187 """A.defined([dct]) -> dict. 2188 2189 Only the enzymes that have a defined restriction site in Rebase.""" 2190 if not dct: 2191 dct = self.mapping 2192 return dict([(k,v) for k,v in dct.iteritems() if k.is_defined()])
2193
2194 - def with_sites(self, dct=None):
2195 """A.with_sites([dct]) -> dict. 2196 2197 Enzymes which have at least one site in the sequence.""" 2198 if not dct: 2199 dct = self.mapping 2200 return dict([(k,v) for k,v in dct.iteritems() if v])
2201
2202 - def without_site(self, dct=None):
2203 """A.without_site([dct]) -> dict. 2204 2205 Enzymes which have no site in the sequence.""" 2206 if not dct: 2207 dct = self.mapping 2208 return dict([(k,v) for k,v in dct.iteritems() if not v])
2209
2210 - def with_N_sites(self, N, dct=None):
2211 """A.With_N_Sites(N [, dct]) -> dict. 2212 2213 Enzymes which cut N times the sequence.""" 2214 if not dct: 2215 dct = self.mapping 2216 return dict([(k,v) for k,v in dct.iteritems()if len(v) == N])
2217
2218 - def with_number_list(self, list, dct= None):
2219 if not dct: 2220 dct = self.mapping 2221 return dict([(k,v) for k,v in dct.iteritems() if len(v) in list])
2222
2223 - def with_name(self, names, dct=None):
2224 """A.with_name(list_of_names [, dct]) -> 2225 2226 Limit the search to the enzymes named in list_of_names.""" 2227 for i, enzyme in enumerate(names): 2228 if not enzyme in AllEnzymes: 2229 print "no datas for the enzyme:", str(name) 2230 del names[i] 2231 if not dct: 2232 return RestrictionBatch(names).search(self.sequence) 2233 return dict([(n, dct[n]) for n in names if n in dct])
2234
2235 - def with_site_size(self, site_size, dct=None):
2236 """A.with_site_size(site_size [, dct]) -> 2237 2238 Limit the search to the enzymes whose site is of size <site_size>.""" 2239 sites = [name for name in self if name.size == site_size] 2240 if not dct: 2241 return RestrictionBatch(sites).search(self.sequence) 2242 return dict([(k,v) for k,v in dct.iteritems() if k in site_size])
2243
2244 - def only_between(self, start, end, dct=None):
2245 """A.only_between(start, end[, dct]) -> dict. 2246 2247 Enzymes that cut the sequence only in between start and end.""" 2248 start, end, test = self._boundaries(start, end) 2249 if not dct: 2250 dct = self.mapping 2251 d = dict(dct) 2252 for key, sites in dct.iteritems(): 2253 if not sites: 2254 del d[key] 2255 continue 2256 for site in sites: 2257 if test(start, end, site): 2258 continue 2259 else: 2260 del d[key] 2261 break 2262 return d
2263
2264 - def between(self, start, end, dct=None):
2265 """A.between(start, end [, dct]) -> dict. 2266 2267 Enzymes that cut the sequence at least in between start and end. 2268 They may cut outside as well.""" 2269 start, end, test = self._boundaries(start, end) 2270 d = {} 2271 if not dct: 2272 dct = self.mapping 2273 for key, sites in dct.iteritems(): 2274 for site in sites: 2275 if test(start, end, site): 2276 d[key] = sites 2277 break 2278 continue 2279 return d
2280
2281 - def show_only_between(self, start, end, dct=None):
2282 """A.show_only_between(start, end [, dct]) -> dict. 2283 2284 Enzymes that cut the sequence outside of the region 2285 in between start and end but do not cut inside.""" 2286 d = [] 2287 if start <= end: 2288 d = [(k, [vv for vv in v if start<=vv<=end]) 2289 for v in self.between(start, end, dct)] 2290 else: 2291 d = [(k, [vv for vv in v if start<=vv or vv <= end]) 2292 for v in self.between(start, end, dct)] 2293 return dict(d)
2294
2295 - def only_outside(self, start, end, dct = None):
2296 """A.only_outside(start, end [, dct]) -> dict. 2297 2298 Enzymes that cut the sequence outside of the region 2299 in between start and end but do not cut inside.""" 2300 start, end, test = self._boundaries(start, end) 2301 if not dct : dct = self.mapping 2302 d = dict(dct) 2303 for key, sites in dct.iteritems(): 2304 if not sites: 2305 del d[key] 2306 continue 2307 for site in sites: 2308 if test(start, end, site): 2309 del d[key] 2310 break 2311 else: 2312 continue 2313 return d
2314
2315 - def outside(self, start, end, dct=None):
2316 """A.outside((start, end [, dct]) -> dict. 2317 2318 Enzymes that cut outside the region in between start and end. 2319 No test is made to know if they cut or not inside this region.""" 2320 start, end, test = self._boundaries(start, end) 2321 if not dct: 2322 dct = self.mapping 2323 d = {} 2324 for key, sites in dct.iteritems(): 2325 for site in sites: 2326 if test(start, end, site): 2327 continue 2328 else: 2329 d[key] = sites 2330 break 2331 return d
2332 2333
2334 - def do_not_cut(self, start, end, dct = None):
2335 """A.do_not_cut(start, end [, dct]) -> dict. 2336 2337 Enzymes that do not cut the region in between start and end.""" 2338 if not dct: 2339 dct = self.mapping 2340 d = self.without_site() 2341 d.update(self.only_outside(start, end, dct)) 2342 return d
2343 2344 # 2345 # The restriction enzyme classes are created dynamically when the module is 2346 # imported. Here is the magic which allow the creation of the 2347 # restriction-enzyme classes. 2348 # 2349 # The reason for the two dictionaries in Restriction_Dictionary 2350 # one for the types (which will be called pseudo-type as they really 2351 # correspond to the values that instances of RestrictionType can take) 2352 # and one for the enzymes is efficiency as the bases are evaluated 2353 # once per pseudo-type. 2354 # 2355 # However Restriction is still a very inefficient module at import. But 2356 # remember that around 660 classes (which is more or less the size of Rebase) 2357 # have to be created dynamically. However, this processing take place only 2358 # once. 2359 # This inefficiency is however largely compensated by the use of metaclass 2360 # which provide a very efficient layout for the class themselves mostly 2361 # alleviating the need of if/else loops in the class methods. 2362 # 2363 # It is essential to run Restriction with doc string optimisation (-OO switch) 2364 # as the doc string of 660 classes take a lot of processing. 2365 # 2366 CommOnly = RestrictionBatch() # commercial enzymes 2367 NonComm = RestrictionBatch() # not available commercially 2368 for TYPE, (bases, enzymes) in typedict.iteritems(): 2369 # 2370 # The keys are the pseudo-types TYPE (stored as type1, type2...) 2371 # The names are not important and are only present to differentiate 2372 # the keys in the dict. All the pseudo-types are in fact RestrictionType. 2373 # These names will not be used after and the pseudo-types are not 2374 # kept in the locals() dictionary. It is therefore impossible to 2375 # import them. 2376 # Now, if you have look at the dictionary, you will see that not all the 2377 # types are present as those without corresponding enzymes have been 2378 # removed by Dictionary_Builder(). 2379 # 2380 # The values are tuples which contain 2381 # as first element a tuple of bases (as string) and 2382 # as second element the names of the enzymes. 2383 # 2384 # First eval the bases. 2385 # 2386 bases = tuple([eval(x) for x in bases]) 2387 # 2388 # now create the particular value of RestrictionType for the classes 2389 # in enzymes. 2390 # 2391 T = type.__new__(RestrictionType, 'RestrictionType', bases, {}) 2392 for k in enzymes: 2393 # 2394 # Now, we go through all the enzymes and assign them their type. 2395 # enzymedict[k] contains the values of the attributes for this 2396 # particular class (self.site, self.ovhg,....). 2397 # 2398 newenz = T(k, bases, enzymedict[k]) 2399 # 2400 # we add the enzymes to the corresponding batch. 2401 # 2402 # No need to verify the enzyme is a RestrictionType -> add_nocheck 2403 # 2404 if newenz.is_comm() : CommOnly.add_nocheck(newenz) 2405 else : NonComm.add_nocheck(newenz) 2406 # 2407 # AllEnzymes is a RestrictionBatch with all the enzymes from Rebase. 2408 # 2409 AllEnzymes = CommOnly | NonComm 2410 # 2411 # Now, place the enzymes in locals so they can be imported. 2412 # 2413 names = [str(x) for x in AllEnzymes] 2414 locals().update(dict(map(None, names, AllEnzymes))) 2415 __all__=['FormattedSeq', 'Analysis', 'RestrictionBatch','AllEnzymes','CommOnly','NonComm']+names 2416 del k, x, enzymes, TYPE, bases, names 2417