1
2
3
4
5
6 from Bio.Alphabet import IUPAC
7 from Bio import File
8 from Bio.ParserSupport import *
9 from Bio import Seq
10 from Bio.MEME import Motif
11 import re
12
14 """A class for holding the results of a MEME run (OBSOLETE).
15
16 A MEMERecord is an object that holds the results from running
17 MEME. It implements no methods of its own.
18
19 This class is OBSOLETE; its functionality is now available through
20 Bio.Motif.Parsers.MEME.
21 """
23 """__init__ (self)"""
24 self.motifs = []
25 self.version = ""
26 self.datafile = ""
27 self.command = ""
28 self.alphabet = None
29 self.sequence_names = []
30
32 for m in self.motifs:
33 if m.name == name:
34 return m
35
37 """A parser for the text output of the MEME program (OBSOLETE).
38 Parses the output into an object of the MEMERecord class.
39
40 Methods:
41 parse (handle): parses the contents of the file handle passed to it.
42
43 Example:
44
45 f = open("meme.output.txt")
46 parser = MEMEParser()
47 meme_record = parser.parse(f)
48 for motif in meme_record.motifs:
49 for instance in motif.instances:
50 print instance.motif_name, instance.sequence_name, instance.strand, instance.pvalue
51
52 This class is OBSOLETE; its functionality is now available through
53 Bio.Motif.Parsers.MEME.
54 """
59
60 - def parse (self, handle):
61 """parse (self, handle)"""
62 self._scanner.feed(handle, self._consumer)
63 return self._consumer.data
64
65
66
68 """Scanner for MEME output (OBSOLETE).
69
70 Methods:
71 feed
72
73 This class is OBSOLETE; its functionality is now available through
74 Bio.Motif.Parsers.MEME.
75 """
76
77 - def feed (self, handle, consumer):
78 """
79 Feeds in MEME output for scanning. handle should
80 implement the readline method. consumer is
81 a Consumer object that can receive the salient events.
82 """
83 if isinstance(handle, File.UndoHandle):
84 uhandle = handle
85 else:
86 uhandle = File.UndoHandle(handle)
87
88 self._scan_header(uhandle, consumer)
89 self._scan_motifs (uhandle, consumer)
90
92 try:
93 read_and_call_until(uhandle, consumer.noevent, contains = 'MEME version')
94 except ValueError:
95 raise ValueError("Improper input file. File should contain a line starting MEME version.")
96 read_and_call(uhandle, consumer._version, start = 'MEME version')
97 read_and_call_until(uhandle, consumer.noevent, start = 'TRAINING SET')
98 read_and_call(uhandle, consumer.noevent, start = 'TRAINING SET')
99 read_and_call(uhandle, consumer.noevent, start = '****')
100 read_and_call(uhandle, consumer._datafile, start = 'DATAFILE')
101 read_and_call(uhandle, consumer._alphabet, start = 'ALPHABET')
102 read_and_call(uhandle, consumer.noevent, start = 'Sequence name')
103 read_and_call(uhandle, consumer.noevent, start = '----')
104 read_and_call_until(uhandle, consumer._sequence_name, start = '***')
105 read_and_call_until(uhandle, consumer.noevent, start = 'command:')
106 read_and_call(uhandle, consumer._commandline, start = 'command:')
107 read_and_call_until(uhandle, consumer.noevent, start = 'MOTIF 1')
108
110 while 1:
111 read_and_call(uhandle, consumer._add_motif_with_info, start = 'MOTIF')
112 read_and_call_until(uhandle, consumer.noevent, contains = 'sorted by position p-value')
113 read_and_call(uhandle, consumer.motif_name, contains = 'sorted by position p-value')
114 read_and_call(uhandle, consumer.noevent, start = '---')
115 read_and_call(uhandle, consumer.noevent, start = 'Sequence name')
116 read_and_call(uhandle, consumer.noevent, start = '---')
117 read_and_call_until(uhandle, consumer.add_instance, start = '---')
118 read_and_call_until(uhandle, consumer.noevent, start = 'log-odds matrix')
119 read_and_call(uhandle, consumer.noevent)
120 read_and_call_until(uhandle, consumer.add_to_logodds, start = '---')
121 read_and_call_until(uhandle, consumer.noevent, start = 'letter-probability matrix')
122 read_and_call(uhandle, consumer.noevent, start = 'letter-probability matrix')
123 read_and_call_until(uhandle, consumer.add_to_pssm, start = '---')
124 read_and_call_until(uhandle, consumer.noevent, start = 'Time')
125 read_and_call(uhandle, consumer.noevent, start = 'Time')
126 read_and_call(uhandle, consumer.noevent, blank = 1)
127 read_and_call(uhandle, consumer.noevent, start = '***')
128 read_and_call_while(uhandle, consumer.noevent, blank = 1)
129 read_and_call(uhandle, consumer.noevent, start = '***')
130 line = safe_peekline(uhandle)
131 if line.startswith("SUMMARY OF MOTIFS"):
132 break
133
134
135
137 """
138 Consumer that can receive events from MEME Scanner (OBSOLETE).
139
140 This is the Consumer object that should be passed to the
141 MEME Scanner.
142
143 This class is OBSOLETE; its functionality is now available through
144 Bio.Motif.Parsers.MEME.
145 """
146
148 self.current_motif = None
149 self.sequence_names = []
150 self.data = MEMERecord()
151
156
158 line = line.strip()
159 line = line.replace('DATAFILE= ','')
160 self.data.datafile = line
161
170
172 line = line.strip()
173 ls = line.split()
174 self.data.sequence_names.append(ls[0])
175 if len(ls) == 6:
176 self.data.sequence_names.append(ls[3])
177
179 line = line.strip()
180 line = line.replace('command: ','')
181 self.data.command = line
182
193
199
209
211 line = line.strip()
212 sl = line.split()
213 thisposition = tuple([float(i) for i in sl])
214 self.current_motif.add_to_pssm(thisposition)
215
217 line = line.strip()
218 sl = line.split()
219 thisposition = tuple([float(i) for i in sl])
220 self.current_motif.add_to_logodds(thisposition)
221
224
225
226
228 """
229 Consumer that can receive events from _MASTScanner (OBSOLETE).
230
231 A _MASTConsumer parses lines from a mast text output file.
232 The motif match diagrams are parsed using line buffering.
233 Each of the buffering functions have a dummy variable that is
234 required for testing using the Bio.ParserSupport.TaggingConsumer.
235 If this variable isn't there, the TaggingConsumer barfs. In
236 the _MASTScanner, None is passed in the place of this variable.
237
238 This class is OBSOLETE; its functionality is now available through
239 Bio.Motif.Parsers.MAST.
240 """
242 self.data = MASTRecord()
243 self._current_seq = ""
244 self._line_buffer = []
245 self._buffer_size = 0
246 self._buffered_seq_start = 0
247
252
264
275
299
324
350
356
358 line = line.strip()
359 if not line.startswith('*****'):
360 self._line_buffer.append(line)
361 else:
362 return -1
363
365 """Parses the line buffer to get e-values for each instance of a motif.
366 This buffer parser is the most likely point of failure for the
367 MASTParser.
368 """
369 insts = self.data.get_motif_matches_for_sequence(self._current_seq)
370 if len(insts) > 0:
371
372 fullSeq = self._line_buffer[self._buffer_size-1]
373 pvals = self._line_buffer[1].split()
374 p = 0
375 lpval = len(pvals)
376 while p < lpval:
377 if pvals[p].count('e') > 1:
378
379
380 pvs = []
381 spe = pvals[p].split('e')
382 spe.reverse()
383 dotind = spe[1].find('.')
384 if dotind == -1:
385 thispval = spe[1][-1] + 'e' + spe[0]
386 else:
387 thispval = spe[1][dotind-1:] + 'e' + spe[0]
388 pvs.append(thispval)
389 for spi in range(2,len(spe)):
390 dotind = spe[spi].find('.')
391 prevdotind = spe[spi-1].find('.')
392 if dotind != -1:
393 if prevdotind == -1:
394 thispval = spe[spi][dotind-1:] + 'e' + spe[spi-1][:-1]
395 else:
396 thispval = spe[spi][dotind-1:] + 'e' + spe[spi-1][0:prevdotind-1]
397 else:
398 if prevdotind == -1:
399 thispval = spe[spi][-1] + 'e' + spe[spi-1][:-1]
400 else:
401 thispval = spe[spi][-1] + 'e' + spe[spi-1][0:prevdotind-1]
402 pvs.append(thispval)
403 pvs.reverse()
404 if p > 0:
405 pvals = pvals[0:p] + pvs + pvals[p+1:]
406 else:
407 pvals = pvs + pvals[p+1:]
408 lpval = len(pvals)
409 p += 1
410 i = 0
411 if len(pvals) != len(insts):
412 sys.stderr.write("Failure to parse p-values for " + self._current_seq + ": " + self._line_buffer[1] + " to: " + str(pvals) + "\n")
413 pvals = []
414
415
416 for i in range(0,len(insts)):
417 inst = insts[i]
418 start = inst.start - self._buffered_seq_start + 1
419 thisSeq = fullSeq[start:start+inst.length]
420 thisSeq = Seq.Seq(thisSeq, self.data.alphabet)
421 inst._sequence(thisSeq)
422 if pvals:
423 inst._pvalue(float(pvals[i]))
424
426 self._line_buffer = []
427 self._buffer_size = 0
428
430 if self._buffer_size == 0:
431 if len(self._line_buffer) > 0:
432 self._buffer_size = len(self._line_buffer)
433 ll = self._line_buffer[self._buffer_size-1].split()
434 self._line_buffer[self._buffer_size-1] = ll[1]
435 self._buffered_seq_start = int(ll[0])
436 else:
437 i = 0
438 for i in range(self._buffer_size, len(self._line_buffer)-1):
439 self._line_buffer[i-self._buffer_size] = self._line_buffer[i-self._buffer_size] + self._line_buffer[i].strip()
440 ll = self._line_buffer[len(self._line_buffer)-1].split()
441 if int(ll[0]) == self._buffered_seq_start + len(self._line_buffer[self._buffer_size-1]):
442 self._line_buffer[self._buffer_size-1] += ll[1]
443 else:
444 differ = int(ll[0]) - (self._buffered_seq_start + len(self._line_buffer[self._buffer_size-1]))
445 self._line_buffer[self._buffer_size-1] += "N"*differ
446 self._line_buffer[self._buffer_size-1] += ll[1]
447 self._line_buffer = self._line_buffer[0:self._buffer_size]
448
450 line = line.strip()
451 if line.find('[') != -1 or line.find('<') != -1:
452 pass
453 elif line.find('e') != -1:
454 pass
455 elif line.find('+') != -1:
456 pass
457
460
461
462
464 """
465 Parser for MAST text output (OBSOLETE).
466 HTML output cannot be parsed, yet. Returns a MASTRecord
467
468 A MASTParser takes a file handle for a MAST text output file and
469 returns a MASTRecord, containing the hits between motifs and
470 sequences. The parser does some unusual line buffering to parse out
471 match diagrams. Really complex diagrams often lead to an error message
472 and p-values not being parsed for a given line.
473
474 Methods:
475 parse (handle): parses the data from the file handle passed to it.
476
477 Example:
478
479 f = open("mast_file.txt")
480 parser = MASTParser()
481 mast_record = parser.parse(f)
482 for motif in mast_record.motifs:
483 for instance in motif.instances:
484 print instance.motif_name, instance.sequence_name, instance.strand, instance.pvalue
485
486 This class is OBSOLETE; its functionality is now available through
487 Bio.Motif.Parsers.MAST.
488 """
492
493 - def parse (self, handle):
494 self._scanner.feed(handle, self._consumer)
495 return self._consumer.data
496
497
498
500 """
501 Scanner for MAST text output (OBSOLETE).
502
503 This class is OBSOLETE; its functionality is now available through
504 Bio.Motif.Parsers.MAST.
505 """
506 - def feed (self, handle, consumer):
515
517 try:
518 read_and_call_until(uhandle, consumer.noevent, contains = "MAST version")
519 except ValueError:
520 raise ValueError("Improper input file. Does not begin with a line with 'MAST version'")
521 read_and_call(uhandle, consumer._version, contains = 'MAST version')
522 read_and_call_until(uhandle, consumer.noevent, start = 'DATABASE AND MOTIFS')
523 read_and_call(uhandle, consumer.noevent, start = 'DATABASE')
524 read_and_call(uhandle, consumer.noevent, start = '****')
525 read_and_call(uhandle, consumer._database, contains = 'DATABASE')
526 read_and_call_until(uhandle, consumer.noevent, contains = 'MOTIF WIDTH')
527 read_and_call(uhandle, consumer.noevent, contains = 'MOTIF')
528 read_and_call(uhandle, consumer.noevent, contains = '----')
529 read_and_call_until(uhandle, consumer._add_motif, blank = 1)
530 read_and_call_until(uhandle, consumer.noevent, start = 'SECTION II:')
531
533 read_and_call_until(uhandle, consumer.noevent, start = 'SEQUENCE NAME')
534 read_and_call(uhandle, consumer.noevent, start = 'SEQUENCE NAME')
535 read_and_call(uhandle, consumer.noevent, start = '---')
536
537 read_and_call_until(uhandle, consumer.noevent, blank = 1)
538 read_and_call(uhandle, consumer.noevent, blank = 1)
539
541 read_and_call_until(uhandle, consumer.noevent, start = 'SECTION III:')
542 read_and_call(uhandle, consumer.noevent, start = 'SECTION III:')
543 read_and_call_until(uhandle, consumer.noevent, start = '****')
544 read_and_call(uhandle, consumer.noevent, start = '****')
545 read_and_call_until(uhandle, consumer.noevent, start = '*****')
546 read_and_call(uhandle, consumer.noevent)
547 read_and_call_while(uhandle, consumer.noevent, blank = 1)
548 readMatches = 1
549 while readMatches == 1:
550 if consumer._current_seq:
551 if consumer._buffer_size != 0:
552 consumer._parse_buffer(None)
553 consumer._blank_buffer(None)
554 read_and_call(uhandle, consumer._set_current_seq)
555 read_and_call_until(uhandle, consumer.noevent, start = ' DIAGRAM')
556 read_and_call_until(uhandle, consumer._add_line_to_buffer, blank = 1)
557 consumer._add_diagram_from_buffer(None)
558 consumer._blank_buffer(None)
559 read_and_call(uhandle, consumer.noevent, blank = 1)
560 while 1:
561 line = safe_peekline(uhandle)
562 if line.startswith('****'):
563 consumer._parse_buffer(None)
564 readMatches = 0
565 break
566 read_and_call_until(uhandle, consumer._add_line_to_buffer, blank = 1)
567 read_and_call(uhandle, consumer.noevent, blank = 1)
568 consumer._collapse_buffer(None)
569 if attempt_read_and_call(uhandle, consumer.noevent, blank = 1):
570 break
571 elif attempt_read_and_call(uhandle, consumer.noevent, start = '*****'):
572 consumer._parse_buffer(None)
573 consumer._blank_buffer(None)
574 readMatches = 0
575 break
576
577
578
580 """The class for holding the results from a MAST run (OBSOLETE).
581
582 A MASTRecord holds data about matches between motifs and sequences.
583 The motifs held by the MASTRecord are objects of the class MEMEMotif.
584
585 Methods:
586 get_motif_matches_for_sequence(sequence_name): returns all of the
587 motif matches within a given sequence. The matches are objects of
588 the class MEME.Motif.Instance
589 get_motif_matches (motif_name): returns all of the matches for a motif
590 in the sequences searched. The matches returned are of class
591 MEME.Motif.Instance
592 get_motif_by_name (motif_name): returns a MEMEMotif with the given
593 name.
594
595 This class is OBSOLETE; its functionality is now available through
596 Bio.Motif.Parsers.MAST.
597 """
606
609
615
618
620 insts = []
621 for m in self.motifs:
622 for i in m.instances:
623 if i.sequence_name == seq:
624 insts.append(i)
625 insts.sort(lambda x,y: cmp(x.start, y.start))
626 return insts
627
631
633 self.diagrams[seq] = diagram
634
637
640
643
645 for m in self.motifs:
646 if m.name == name:
647 return m
648