Package Bio :: Package Restriction :: Module PrintFormat
[hide private]
[frames] | no frames]

Source Code for Module Bio.Restriction.PrintFormat

  1  #!/usr/bin/env python 
  2  # 
  3  #      Restriction Analysis Libraries. 
  4  #      Copyright (C) 2004. Frederic Sohm. 
  5  # 
  6  # This code is part of the Biopython distribution and governed by its 
  7  # license.  Please see the LICENSE file that should have been included 
  8  # as part of this package. 
  9  # 
 10   
 11  import re 
 12  import itertools 
 13  from Bio.Restriction import RanaConfig as RanaConf 
 14   
 15  """ 
 16  Usage: 
 17   
 18      PrintFormat allow to print the results from restriction analysis in 3 
 19      different format. 
 20      List, column or map. 
 21   
 22      the easiest way to use it is: 
 23       
 24      >>> from Bio.Restriction.PrintFormat import PrintFormat 
 25      >>> from Bio.Restriction.Restriction import AllEnzymes 
 26      >>> from Bio import Entrez 
 27      >>> from Bio import SeqIO 
 28      >>> handle = Entrez.efetch(db="nucleotide", rettype="fasta", id="SYNPBR322") 
 29      >>> pBR322 = SeqIO.read(handle, "fasta") 
 30      >>> handle.close() 
 31      >>> dct = AllEnzymes.search(pBR322.seq) 
 32      >>> new = PrintFormat() 
 33      >>> new.print_that(dct, '\n my pBR322 analysis\n\n','\n no site :\n\n') 
 34   
 35       my pBR322 analysis 
 36        
 37      AasI       :  2169, 2582. 
 38      AatII      :  4289. 
 39      ... 
 40              More enzymes. 
 41      ... 
 42      ZraI       :  4287. 
 43      ZrmI       :  3847. 
 44       
 45       no site: 
 46        
 47      AarI      AatI      Acc65I    AcsI      AcvI      AdeI      AflII     AgeI     
 48      ... 
 49              More enzymes. 
 50      ... 
 51      Vha464I   XapI      XbaI      XcmI      XhoI      XmaCI     XmaI      XmaJI      
 52      Zsp2I  
 53   
 54      >>> new.sequence = pBR322.seq 
 55      >>> new.print_as("map") 
 56      >>> new.print_that(dct) 
 57      ... 
 58       
 59      Some of the methods of PrintFormat are meant to be overriden by derived 
 60      class. 
 61  """ 
 62   
63 -class PrintFormat(object):
64 """PrintFormat allow the printing of results of restriction analysis.""" 65 66 ConsoleWidth = RanaConf.ConsoleWidth 67 NameWidth = RanaConf.NameWidth 68 MaxSize = RanaConf.MaxSize 69 Cmodulo = ConsoleWidth%NameWidth 70 PrefWidth = ConsoleWidth - Cmodulo 71 Indent = RanaConf.Indent 72 linesize = PrefWidth - NameWidth 73
74 - def __init__(self):
75 """PrintFormat() -> new PrintFormat Instance""" 76 pass
77
78 - def print_as(self, what='list'):
79 """PF.print_as([what='list']) -> print the results as specified. 80 81 Valid format are: 82 'list' -> alphabetical order 83 'number' -> number of sites in the sequence 84 'map' -> a map representation of the sequence with the sites. 85 86 If you want more flexibility over-ride the virtual method make_format. 87 """ 88 if what == 'map': 89 self.make_format = self._make_map 90 elif what == 'number': 91 self.make_format = self._make_number 92 else: 93 self.make_format = self._make_list 94 95 return
96 97
98 - def print_that(self, dct, title='', s1=''):
99 """PF.print_that(dct, [title[, s1]]) -> Print dct nicely formatted. 100 101 dct is a dictionary as returned by a RestrictionBatch.search() 102 103 title is the title of the map. 104 It must be a formated string, i.e. you must include the line break. 105 106 s1 is the title separating the list of enzymes that have sites from 107 those without sites. 108 s1 must be a formatted string as well. 109 110 The format of print_that is a list.""" 111 if not dct: 112 dct = self.results 113 ls, nc = [], [] 114 for k, v in dct.iteritems(): 115 if v: 116 ls.append((k,v)) 117 else: 118 nc.append(k) 119 print self.make_format(ls, title, nc, s1) 120 return
121
122 - def make_format(self, cut=[], title='', nc=[], s1=''):
123 """PF.make_format(cut, nc, title, s) -> string 124 125 Virtual method. 126 Here to be pointed to one of the _make_* methods. 127 You can as well create a new method and point make_format to it.""" 128 return self._make_list(cut,title, nc,s1)
129 130 ###### _make_* methods to be used with the virtual method make_format 131
132 - def _make_list(self, ls,title, nc,s1):
133 """PF._make_number(ls,title, nc,s1) -> string. 134 135 return a string of form: 136 137 title. 138 139 enzyme1 : position1, position2. 140 enzyme2 : position1, position2, position3. 141 142 ls is a list of cutting enzymes. 143 title is the title. 144 nc is a list of non cutting enzymes. 145 s1 is the sentence before the non cutting enzymes.""" 146 return self._make_list_only(ls, title) + self._make_nocut_only(nc, s1)
147
148 - def _make_map(self, ls,title, nc,s1):
149 """PF._make_number(ls,title, nc,s1) -> string. 150 151 return a string of form: 152 153 title. 154 155 enzyme1, position 156 | 157 AAAAAAAAAAAAAAAAAAAAA... 158 ||||||||||||||||||||| 159 TTTTTTTTTTTTTTTTTTTTT... 160 161 ls is a list of cutting enzymes. 162 title is the title. 163 nc is a list of non cutting enzymes. 164 s1 is the sentence before the non cutting enzymes.""" 165 return self._make_map_only(ls, title) + self._make_nocut_only(nc, s1)
166
167 - def _make_number(self, ls,title, nc,s1):
168 """PF._make_number(ls,title, nc,s1) -> string. 169 170 title. 171 172 enzyme which cut 1 time: 173 174 enzyme1 : position1. 175 176 enzyme which cut 2 times: 177 178 enzyme2 : position1, position2. 179 ... 180 181 ls is a list of cutting enzymes. 182 title is the title. 183 nc is a list of non cutting enzymes. 184 s1 is the sentence before the non cutting enzymes.""" 185 return self._make_number_only(ls, title)+self._make_nocut_only(nc,s1)
186
187 - def _make_nocut(self, ls,title, nc,s1):
188 """PF._make_nocut(ls,title, nc,s1) -> string. 189 190 return a formatted string of the non cutting enzymes. 191 192 ls is a list of cutting enzymes -> will not be used. 193 Here for compatibility with make_format. 194 195 title is the title. 196 nc is a list of non cutting enzymes. 197 s1 is the sentence before the non cutting enzymes.""" 198 return title + self._make_nocut_only(nc, s1)
199
200 - def _make_nocut_only(self, nc, s1, ls =[],title=''):
201 """PF._make_nocut_only(nc, s1) -> string. 202 203 return a formatted string of the non cutting enzymes. 204 205 nc is a list of non cutting enzymes. 206 s1 is the sentence before the non cutting enzymes. 207 """ 208 if not nc: 209 return s1 210 nc.sort() 211 st = '' 212 stringsite = s1 or '\n Enzymes which do not cut the sequence.\n\n' 213 Join = ''.join 214 for key in nc: 215 st = Join((st, str.ljust(str(key), self.NameWidth))) 216 if len(st) > self.linesize: 217 stringsite = Join((stringsite, st, '\n')) 218 st = '' 219 stringsite = Join((stringsite, st, '\n')) 220 return stringsite
221
222 - def _make_list_only(self, ls, title, nc = [], s1 = ''):
223 """PF._make_list_only(ls, title) -> string. 224 225 return a string of form: 226 227 title. 228 229 enzyme1 : position1, position2. 230 enzyme2 : position1, position2, position3. 231 ... 232 233 ls is a list of results. 234 title is a string. 235 Non cutting enzymes are not included.""" 236 if not ls: 237 return title 238 return self.__next_section(ls, title)
239
240 - def _make_number_only(self, ls, title, nc = [], s1 =''):
241 """PF._make_number_only(ls, title) -> string. 242 243 return a string of form: 244 245 title. 246 247 enzyme which cut 1 time: 248 249 enzyme1 : position1. 250 251 enzyme which cut 2 times: 252 253 enzyme2 : position1, position2. 254 ... 255 256 257 ls is a list of results. 258 title is a string. 259 Non cutting enzymes are not included.""" 260 if not ls: 261 return title 262 ls.sort(lambda x,y : cmp(len(x[1]), len(y[1]))) 263 iterator = iter(ls) 264 cur_len = 1 265 new_sect = [] 266 for name, sites in iterator: 267 l = len(sites) 268 if l > cur_len: 269 title += "\n\nenzymes which cut %i times :\n\n"%cur_len 270 title = self.__next_section(new_sect, title) 271 new_sect, cur_len = [(name, sites)], l 272 continue 273 new_sect.append((name,sites)) 274 title += "\n\nenzymes which cut %i times :\n\n"%cur_len 275 return self.__next_section(new_sect, title)
276
277 - def _make_map_only(self, ls, title, nc = [], s1 = ''):
278 """PF._make_map_only(ls, title) -> string. 279 280 return a string of form: 281 282 title. 283 284 enzyme1, position 285 | 286 AAAAAAAAAAAAAAAAAAAAA... 287 ||||||||||||||||||||| 288 TTTTTTTTTTTTTTTTTTTTT... 289 290 291 ls is a list of results. 292 title is a string. 293 Non cutting enzymes are not included. 294 """ 295 if not ls: 296 return title 297 resultKeys = [str(x) for x,y in ls] 298 resultKeys.sort() 299 map = title or '' 300 enzymemap = {} 301 for (enzyme, cut) in ls: 302 for c in cut: 303 if c in enzymemap: 304 enzymemap[c].append(str(enzyme)) 305 else: 306 enzymemap[c] = [str(enzyme)] 307 mapping = enzymemap.keys() 308 mapping.sort() 309 cutloc = {} 310 x, counter, length = 0, 0, len(self.sequence) 311 for x in xrange(60, length, 60): 312 counter = x - 60 313 l=[] 314 for key in mapping: 315 if key <= x: 316 l.append(key) 317 else: 318 cutloc[counter] = l 319 mapping = mapping[mapping.index(key):] 320 break 321 cutloc[x] = l 322 cutloc[x] = mapping 323 sequence = self.sequence.tostring() 324 revsequence = self.sequence.complement().tostring() 325 a = '|' 326 base, counter = 0, 0 327 emptyline = ' ' * 60 328 Join = ''.join 329 for base in xrange(60, length, 60): 330 counter = base - 60 331 line = emptyline 332 for key in cutloc[counter]: 333 s = '' 334 if key == base: 335 for n in enzymemap[key] : s = ' '.join((s,n)) 336 l = line[0:59] 337 lineo = Join((l, str(key), s, '\n')) 338 line2 = Join((l, a, '\n')) 339 linetot = Join((lineo, line2)) 340 map = Join((map, linetot)) 341 break 342 for n in enzymemap[key] : s = ' '.join((s,n)) 343 k = key%60 344 lineo = Join((line[0:(k-1)], str(key), s, '\n')) 345 line = Join((line[0:(k-1)], a, line[k:])) 346 line2 = Join((line[0:(k-1)], a, line[k:], '\n')) 347 linetot = Join((lineo,line2)) 348 map = Join((map,linetot)) 349 mapunit = '\n'.join((sequence[counter : base],a * 60, 350 revsequence[counter : base], 351 Join((str.ljust(str(counter+1), 15), ' '* 30, 352 str.rjust(str(base), 15),'\n\n')) 353 )) 354 map = Join((map, mapunit)) 355 line = ' '* 60 356 for key in cutloc[base]: 357 s = '' 358 if key == length: 359 for n in enzymemap[key]: 360 s = Join((s,' ',n)) 361 l = line[0:(length-1)] 362 lineo = Join((l,str(key),s,'\n')) 363 line2 = Join((l,a,'\n')) 364 linetot = Join((lineo, line2)) 365 map = Join((map, linetot)) 366 break 367 for n in enzymemap[key] : s = Join((s,' ',n)) 368 k = key%60 369 lineo = Join((line[0:(k-1)],str(key),s,'\n')) 370 line = Join((line[0:(k-1)],a,line[k:])) 371 line2 = Join((line[0:(k-1)],a,line[k:],'\n')) 372 linetot = Join((lineo,line2)) 373 map = Join((map,linetot)) 374 mapunit = '' 375 mapunit = Join((sequence[base : length], '\n')) 376 mapunit = Join((mapunit, a * (length-base), '\n')) 377 mapunit = Join((mapunit,revsequence[base:length], '\n')) 378 mapunit = Join((mapunit, Join((str.ljust(str(base+1), 15), ' '*( 379 length-base-30),str.rjust(str(length), 15), 380 '\n\n')))) 381 map = Join((map,mapunit)) 382 return map
383 384 ###### private method to do lists: 385
386 - def __next_section(self, ls, into):
387 """FP.__next_section(ls, into) -> string. 388 389 ls is a list of tuple (string, [int, int]). 390 into is a string to which the formatted ls will be added. 391 392 Format ls as a string of lines: 393 The form is: 394 395 enzyme1 : position1. 396 enzyme2 : position2, position3. 397 398 then add the formatted ls to tot 399 return tot.""" 400 ls.sort() 401 indentation = '\n' + (self.NameWidth + self.Indent) * ' ' 402 linesize = self.linesize - self.MaxSize 403 pat = re.compile("([\w,\s()]){1,%i}[,\.]"%linesize) 404 several, Join = '', ''.join 405 for name, sites in ls: 406 stringsite = '' 407 l = Join((', '.join([str(site) for site in sites]), '.')) 408 if len(l) > linesize: 409 # 410 # cut where appropriate and add the indentation 411 # 412 l = [x.group() for x in re.finditer(pat, l)] 413 stringsite = indentation.join(l) 414 else: 415 stringsite = l 416 into = Join((into, 417 str(name).ljust(self.NameWidth),' : ',stringsite,'\n')) 418 return into
419