1
2
3
4
5
6
7
8 """Code for calling standalone BLAST and parsing plain text output (OBSOLETE).
9
10 Rather than parsing the human readable plain text BLAST output (which seems to
11 change with every update to BLAST), we and the NBCI recommend you parse the
12 XML output instead. The plain text parser in this module still works at the
13 time of writing, but is considered obsolete and updating it to cope with the
14 latest versions of BLAST is not a priority for us.
15
16 This module also provides code to work with the "legacy" standalone version of
17 NCBI BLAST, tools blastall, rpsblast and blastpgp via three helper functions of
18 the same name. These functions are very limited for dealing with the output as
19 files rather than handles, for which the wrappers in Bio.Blast.Applications are
20 prefered. Furthermore, the NCBI themselves regard these command line tools as
21 "legacy", and encourage using the new BLAST+ tools instead. Biopython has
22 wrappers for these under Bio.Blast.Applications (see the tutorial).
23
24 Classes:
25 LowQualityBlastError Except that indicates low quality query sequences.
26 BlastParser Parses output from blast.
27 BlastErrorParser Parses output and tries to diagnose possible errors.
28 PSIBlastParser Parses output from psi-blast.
29 Iterator Iterates over a file of blast results.
30
31 _Scanner Scans output from standalone BLAST.
32 _BlastConsumer Consumes output from blast.
33 _PSIBlastConsumer Consumes output from psi-blast.
34 _HeaderConsumer Consumes header information.
35 _DescriptionConsumer Consumes description information.
36 _AlignmentConsumer Consumes alignment information.
37 _HSPConsumer Consumes hsp information.
38 _DatabaseReportConsumer Consumes database report information.
39 _ParametersConsumer Consumes parameters information.
40
41 Functions:
42 blastall Execute blastall (OBSOLETE).
43 blastpgp Execute blastpgp (OBSOLETE).
44 rpsblast Execute rpsblast (OBSOLETE).
45
46 For calling the BLAST command line tools, we encourage you to use the
47 command line wrappers in Bio.Blast.Applications - the three functions
48 blastall, blastpgp and rpsblast are considered to be obsolete now, and
49 are likely to be deprecated and then removed in future releases.
50 """
51
52 import os
53 import re
54
55 from Bio import File
56 from Bio.ParserSupport import *
57 from Bio.Blast import Record
58 from Bio.Application import _escape_filename
59
61 """Error caused by running a low quality sequence through BLAST.
62
63 When low quality sequences (like GenBank entries containing only
64 stretches of a single nucleotide) are BLASTed, they will result in
65 BLAST generating an error and not being able to perform the BLAST.
66 search. This error should be raised for the BLAST reports produced
67 in this case.
68 """
69 pass
70
72 """Error caused by running a short query sequence through BLAST.
73
74 If the query sequence is too short, BLAST outputs warnings and errors:
75 Searching[blastall] WARNING: [000.000] AT1G08320: SetUpBlastSearch failed.
76 [blastall] ERROR: [000.000] AT1G08320: Blast:
77 [blastall] ERROR: [000.000] AT1G08320: Blast: Query must be at least wordsize
78 done
79
80 This exception is raised when that condition is detected.
81
82 """
83 pass
84
85
87 """Scan BLAST output from blastall or blastpgp.
88
89 Tested with blastall and blastpgp v2.0.10, v2.0.11
90
91 Methods:
92 feed Feed data into the scanner.
93
94 """
95 - def feed(self, handle, consumer):
115
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160 consumer.start_header()
161
162 read_and_call(uhandle, consumer.version, contains='BLAST')
163 read_and_call_while(uhandle, consumer.noevent, blank=1)
164
165
166 attempt_read_and_call(uhandle, consumer.noevent, start="<pre>")
167
168
169 while attempt_read_and_call(uhandle,
170 consumer.reference, start='Reference'):
171
172
173 while 1:
174 line = uhandle.readline()
175 if is_blank_line(line):
176 consumer.noevent(line)
177 break
178 elif line.startswith("RID"):
179 break
180 else:
181
182 consumer.reference(line)
183
184
185 read_and_call_while(uhandle, consumer.noevent, blank=1)
186 attempt_read_and_call(uhandle, consumer.reference, start="RID:")
187 read_and_call_while(uhandle, consumer.noevent, blank=1)
188
189
190
191 if attempt_read_and_call(
192 uhandle, consumer.reference, start="Reference"):
193 read_and_call_until(uhandle, consumer.reference, blank=1)
194 read_and_call_while(uhandle, consumer.noevent, blank=1)
195
196
197 if attempt_read_and_call(
198 uhandle, consumer.reference, start="Reference"):
199 read_and_call_until(uhandle, consumer.reference, blank=1)
200 read_and_call_while(uhandle, consumer.noevent, blank=1)
201
202 line = uhandle.peekline()
203 assert line.strip() != ""
204 assert not line.startswith("RID:")
205 if line.startswith("Query="):
206
207
208
209 read_and_call(uhandle, consumer.query_info, start='Query=')
210 read_and_call_until(uhandle, consumer.query_info, blank=1)
211 read_and_call_while(uhandle, consumer.noevent, blank=1)
212
213
214 read_and_call_until(uhandle, consumer.database_info, end='total letters')
215 read_and_call(uhandle, consumer.database_info, contains='sequences')
216 read_and_call_while(uhandle, consumer.noevent, blank=1)
217 elif line.startswith("Database:"):
218
219 read_and_call_until(uhandle, consumer.database_info, end='total letters')
220 read_and_call(uhandle, consumer.database_info, contains='sequences')
221 read_and_call_while(uhandle, consumer.noevent, blank=1)
222
223
224
225
226 read_and_call(uhandle, consumer.query_info, start='Query=')
227
228 while True:
229 line = uhandle.peekline()
230 if not line.strip() : break
231 if "Score E" in line : break
232
233 read_and_call(uhandle, consumer.query_info)
234 read_and_call_while(uhandle, consumer.noevent, blank=1)
235 else:
236 raise ValueError("Invalid header?")
237
238 consumer.end_header()
239
257
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281 consumer.start_descriptions()
282
283
284
285 attempt_read_and_call(uhandle, consumer.noevent, start='Searching')
286
287
288
289 if not uhandle.peekline():
290 raise ValueError("Unexpected end of blast report. " + \
291 "Looks suspiciously like a PSI-BLAST crash.")
292
293
294
295
296
297
298
299
300
301 line = uhandle.peekline()
302 if line.find("ERROR:") != -1 or line.startswith("done"):
303 read_and_call_while(uhandle, consumer.noevent, contains="ERROR:")
304 read_and_call(uhandle, consumer.noevent, start="done")
305
306
307
308
309
310
311
312
313
314
315
316
317
318 read_and_call_while(uhandle, consumer.noevent, blank=1)
319
320 if attempt_read_and_call(uhandle, consumer.round, start='Results'):
321 read_and_call_while(uhandle, consumer.noevent, blank=1)
322
323
324
325
326
327
328
329
330 if not attempt_read_and_call(
331 uhandle, consumer.description_header,
332 has_re=re.compile(r'Score +E')):
333
334 attempt_read_and_call(uhandle, consumer.no_hits,
335 contains='No hits found')
336 try:
337 read_and_call_while(uhandle, consumer.noevent, blank=1)
338 except ValueError, err:
339 if str(err) != "Unexpected end of stream." : raise err
340
341 consumer.end_descriptions()
342
343 return
344
345
346 read_and_call(uhandle, consumer.description_header,
347 start='Sequences producing')
348
349
350 attempt_read_and_call(uhandle, consumer.model_sequences,
351 start='Sequences used in model')
352 read_and_call_while(uhandle, consumer.noevent, blank=1)
353
354
355
356
357 if safe_peekline(uhandle).startswith(" Database:"):
358 consumer.end_descriptions()
359
360 return
361
362
363
364 if not uhandle.peekline().startswith('Sequences not found'):
365 read_and_call_until(uhandle, consumer.description, blank=1)
366 read_and_call_while(uhandle, consumer.noevent, blank=1)
367
368
369
370
371
372 if attempt_read_and_call(uhandle, consumer.nonmodel_sequences,
373 start='Sequences not found'):
374
375 read_and_call_while(uhandle, consumer.noevent, blank=1)
376 l = safe_peekline(uhandle)
377
378
379
380
381 if not l.startswith('CONVERGED') and l[0] != '>' \
382 and not l.startswith('QUERY'):
383 read_and_call_until(uhandle, consumer.description, blank=1)
384 read_and_call_while(uhandle, consumer.noevent, blank=1)
385
386 attempt_read_and_call(uhandle, consumer.converged, start='CONVERGED')
387 read_and_call_while(uhandle, consumer.noevent, blank=1)
388
389 consumer.end_descriptions()
390
410
417
434
463
469
471
472
473
474
475
476
477 read_and_call(uhandle, consumer.score, start=' Score')
478 read_and_call(uhandle, consumer.identities, start=' Identities')
479
480 attempt_read_and_call(uhandle, consumer.strand, start = ' Strand')
481
482 attempt_read_and_call(uhandle, consumer.frame, start = ' Frame')
483 read_and_call(uhandle, consumer.noevent, blank=1)
484
486
487
488
489
490
491
492
493
494
495 while 1:
496
497 attempt_read_and_call(uhandle, consumer.noevent, start=' ')
498 read_and_call(uhandle, consumer.query, start='Query')
499 read_and_call(uhandle, consumer.align, start=' ')
500 read_and_call(uhandle, consumer.sbjct, start='Sbjct')
501 try:
502 read_and_call_while(uhandle, consumer.noevent, blank=1)
503 except ValueError, err:
504 if str(err) != "Unexpected end of stream." : raise err
505
506
507
508 break
509 line = safe_peekline(uhandle)
510
511 if not (line.startswith('Query') or line.startswith(' ')):
512 break
513
536
537 - def _eof(self, uhandle):
538 try:
539 line = safe_peekline(uhandle)
540 except ValueError, err:
541 if str(err) != "Unexpected end of stream." : raise err
542 line = ""
543 return not line
544
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572 if self._eof(uhandle) : return
573
574 consumer.start_database_report()
575
576
577
578
579 if attempt_read_and_call(uhandle, consumer.noevent, start=" Subset"):
580 read_and_call(uhandle, consumer.noevent, contains="letters")
581 read_and_call(uhandle, consumer.noevent, contains="sequences")
582 read_and_call(uhandle, consumer.noevent, start=" ")
583
584
585
586 while attempt_read_and_call(uhandle, consumer.database,
587 start=' Database'):
588
589
590
591 if not uhandle.peekline().strip() \
592 or uhandle.peekline().startswith("BLAST"):
593 consumer.end_database_report()
594 return
595
596
597 read_and_call_until(uhandle, consumer.database, start=' Posted')
598 read_and_call(uhandle, consumer.posted_date, start=' Posted')
599 read_and_call(uhandle, consumer.num_letters_in_database,
600 start=' Number of letters')
601 read_and_call(uhandle, consumer.num_sequences_in_database,
602 start=' Number of sequences')
603
604 attempt_read_and_call(uhandle, consumer.noevent, start=' ')
605
606 line = safe_readline(uhandle)
607 uhandle.saveline(line)
608 if line.find('Lambda') != -1:
609 break
610
611 read_and_call(uhandle, consumer.noevent, start='Lambda')
612 read_and_call(uhandle, consumer.ka_params)
613
614
615 attempt_read_and_call(uhandle, consumer.noevent, blank=1)
616
617
618 attempt_read_and_call(uhandle, consumer.gapped, start='Gapped')
619
620 if attempt_read_and_call(uhandle, consumer.noevent, start='Lambda'):
621 read_and_call(uhandle, consumer.ka_params_gap)
622
623
624
625
626 try:
627 read_and_call_while(uhandle, consumer.noevent, blank=1)
628 except ValueError, x:
629 if str(x) != "Unexpected end of stream.":
630 raise
631 consumer.end_database_report()
632
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691 if not uhandle.peekline().strip():
692 return
693
694
695
696 consumer.start_parameters()
697
698
699 attempt_read_and_call(uhandle, consumer.matrix, start='Matrix')
700
701 attempt_read_and_call(uhandle, consumer.gap_penalties, start='Gap')
702
703 attempt_read_and_call(uhandle, consumer.num_sequences,
704 start='Number of Sequences')
705 attempt_read_and_call(uhandle, consumer.num_hits,
706 start='Number of Hits')
707 attempt_read_and_call(uhandle, consumer.num_sequences,
708 start='Number of Sequences')
709 attempt_read_and_call(uhandle, consumer.num_extends,
710 start='Number of extensions')
711 attempt_read_and_call(uhandle, consumer.num_good_extends,
712 start='Number of successful')
713
714 attempt_read_and_call(uhandle, consumer.num_seqs_better_e,
715 start='Number of sequences')
716
717
718 if attempt_read_and_call(uhandle, consumer.hsps_no_gap,
719 start="Number of HSP's better"):
720
721 if attempt_read_and_call(uhandle, consumer.noevent,
722 start="Number of HSP's gapped:"):
723 read_and_call(uhandle, consumer.noevent,
724 start="Number of HSP's successfully")
725
726 attempt_read_and_call(uhandle, consumer.noevent,
727 start="Number of extra gapped extensions")
728 else:
729 read_and_call(uhandle, consumer.hsps_prelim_gapped,
730 start="Number of HSP's successfully")
731 read_and_call(uhandle, consumer.hsps_prelim_gap_attempted,
732 start="Number of HSP's that")
733 read_and_call(uhandle, consumer.hsps_gapped,
734 start="Number of HSP's gapped")
735
736 elif attempt_read_and_call(uhandle, consumer.noevent,
737 start="Number of HSP's gapped"):
738 read_and_call(uhandle, consumer.noevent,
739 start="Number of HSP's successfully")
740
741
742 attempt_read_and_call(uhandle, consumer.query_length,
743 has_re=re.compile(r"[Ll]ength of query"))
744
745 attempt_read_and_call(uhandle, consumer.database_length,
746 has_re=re.compile(r"[Ll]ength of \s*[Dd]atabase"))
747
748
749 attempt_read_and_call(uhandle, consumer.noevent,
750 start="Length adjustment")
751 attempt_read_and_call(uhandle, consumer.effective_hsp_length,
752 start='effective HSP')
753
754 attempt_read_and_call(
755 uhandle, consumer.effective_query_length,
756 has_re=re.compile(r'[Ee]ffective length of query'))
757
758
759 attempt_read_and_call(
760 uhandle, consumer.effective_database_length,
761 has_re=re.compile(r'[Ee]ffective length of \s*[Dd]atabase'))
762
763
764 attempt_read_and_call(
765 uhandle, consumer.effective_search_space,
766 has_re=re.compile(r'[Ee]ffective search space:'))
767
768 attempt_read_and_call(
769 uhandle, consumer.effective_search_space_used,
770 has_re=re.compile(r'[Ee]ffective search space used'))
771
772
773 attempt_read_and_call(uhandle, consumer.frameshift, start='frameshift')
774
775
776 attempt_read_and_call(uhandle, consumer.threshold, start='T')
777
778 attempt_read_and_call(uhandle, consumer.threshold, start='Neighboring words threshold')
779
780
781 attempt_read_and_call(uhandle, consumer.window_size, start='A')
782
783 attempt_read_and_call(uhandle, consumer.window_size, start='Window for multiple hits')
784
785
786 attempt_read_and_call(uhandle, consumer.dropoff_1st_pass, start='X1')
787
788 attempt_read_and_call(uhandle, consumer.gap_x_dropoff, start='X2')
789
790
791 attempt_read_and_call(uhandle, consumer.gap_x_dropoff_final,
792 start='X3')
793
794
795 attempt_read_and_call(uhandle, consumer.gap_trigger, start='S1')
796
797
798
799 if not is_blank_line(uhandle.peekline(), allow_spaces=1):
800 read_and_call(uhandle, consumer.blast_cutoff, start='S2')
801
802 consumer.end_parameters()
803
805 """Parses BLAST data into a Record.Blast object.
806
807 """
812
813 - def parse(self, handle):
814 """parse(self, handle)"""
815 self._scanner.feed(handle, self._consumer)
816 return self._consumer.data
817
819 """Parses BLAST data into a Record.PSIBlast object.
820
821 """
826
827 - def parse(self, handle):
828 """parse(self, handle)"""
829 self._scanner.feed(handle, self._consumer)
830 return self._consumer.data
831
835
837 c = line.split()
838 self._header.application = c[0]
839 self._header.version = c[1]
840 if len(c) > 2:
841
842
843 self._header.date = c[2][1:-1]
844
850
865
882
887
890 self._descriptions = []
891 self._model_sequences = []
892 self._nonmodel_sequences = []
893 self._converged = 0
894 self._type = None
895 self._roundnum = None
896
897 self.__has_n = 0
898
900 if line.startswith('Sequences producing'):
901 cols = line.split()
902 if cols[-1] == 'N':
903 self.__has_n = 1
904
906 dh = self._parse(line)
907 if self._type == 'model':
908 self._model_sequences.append(dh)
909 elif self._type == 'nonmodel':
910 self._nonmodel_sequences.append(dh)
911 else:
912 self._descriptions.append(dh)
913
916
918 self._type = 'nonmodel'
919
922
925
927 if not line.startswith('Results from round'):
928 raise ValueError("I didn't understand the round line\n%s" % line)
929 self._roundnum = _safe_int(line[18:].strip())
930
933
934 - def _parse(self, description_line):
935 line = description_line
936 dh = Record.Description()
937
938
939
940
941
942
943
944
945 cols = line.split()
946 if len(cols) < 3:
947 raise ValueError( \
948 "Line does not appear to contain description:\n%s" % line)
949 if self.__has_n:
950 i = line.rfind(cols[-1])
951 i = line.rfind(cols[-2], 0, i)
952 i = line.rfind(cols[-3], 0, i)
953 else:
954 i = line.rfind(cols[-1])
955 i = line.rfind(cols[-2], 0, i)
956 if self.__has_n:
957 dh.title, dh.score, dh.e, dh.num_alignments = \
958 line[:i].rstrip(), cols[-3], cols[-2], cols[-1]
959 else:
960 dh.title, dh.score, dh.e, dh.num_alignments = \
961 line[:i].rstrip(), cols[-2], cols[-1], 1
962 dh.num_alignments = _safe_int(dh.num_alignments)
963 dh.score = _safe_int(dh.score)
964 dh.e = _safe_float(dh.e)
965 return dh
966
968
969
970
971
975
977 if self._alignment.title:
978 self._alignment.title += " "
979 self._alignment.title += line.strip()
980
982
983 parts = line.replace(" ","").split("=")
984 assert len(parts)==2, "Unrecognised format length line"
985 self._alignment.length = parts[1]
986 self._alignment.length = _safe_int(self._alignment.length)
987
989
990 if line.startswith('QUERY') or line.startswith('blast_tmp'):
991
992
993
994
995
996 try:
997 name, start, seq, end = line.split()
998 except ValueError:
999 raise ValueError("I do not understand the line\n%s" % line)
1000 self._start_index = line.index(start, len(name))
1001 self._seq_index = line.index(seq,
1002 self._start_index+len(start))
1003
1004 self._name_length = self._start_index - 1
1005 self._start_length = self._seq_index - self._start_index - 1
1006 self._seq_length = line.rfind(end) - self._seq_index - 1
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016 name = line[:self._name_length]
1017 name = name.rstrip()
1018 start = line[self._start_index:self._start_index+self._start_length]
1019 start = start.rstrip()
1020 if start:
1021 start = _safe_int(start)
1022 end = line[self._seq_index+self._seq_length:].rstrip()
1023 if end:
1024 end = _safe_int(end)
1025 seq = line[self._seq_index:self._seq_index+self._seq_length].rstrip()
1026
1027 if len(seq) < self._seq_length:
1028 seq = seq + ' '*(self._seq_length-len(seq))
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047 align = self._multiple_alignment.alignment
1048 align.append((name, start, seq, end))
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1098
1099 if self._alignment:
1100 self._alignment.title = self._alignment.title.rstrip()
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122 try:
1123 del self._seq_index
1124 del self._seq_length
1125 del self._start_index
1126 del self._start_length
1127 del self._name_length
1128 except AttributeError:
1129 pass
1130
1134
1136 self._hsp.bits, self._hsp.score = _re_search(
1137 r"Score =\s*([0-9.e+]+) bits \(([0-9]+)\)", line,
1138 "I could not find the score in line\n%s" % line)
1139 self._hsp.score = _safe_float(self._hsp.score)
1140 self._hsp.bits = _safe_float(self._hsp.bits)
1141
1142 x, y = _re_search(
1143 r"Expect\(?(\d*)\)? = +([0-9.e\-|\+]+)", line,
1144 "I could not find the expect in line\n%s" % line)
1145 if x:
1146 self._hsp.num_alignments = _safe_int(x)
1147 else:
1148 self._hsp.num_alignments = 1
1149 self._hsp.expect = _safe_float(y)
1150
1152 x, y = _re_search(
1153 r"Identities = (\d+)\/(\d+)", line,
1154 "I could not find the identities in line\n%s" % line)
1155 self._hsp.identities = _safe_int(x), _safe_int(y)
1156 self._hsp.align_length = _safe_int(y)
1157
1158 if line.find('Positives') != -1:
1159 x, y = _re_search(
1160 r"Positives = (\d+)\/(\d+)", line,
1161 "I could not find the positives in line\n%s" % line)
1162 self._hsp.positives = _safe_int(x), _safe_int(y)
1163 assert self._hsp.align_length == _safe_int(y)
1164
1165 if line.find('Gaps') != -1:
1166 x, y = _re_search(
1167 r"Gaps = (\d+)\/(\d+)", line,
1168 "I could not find the gaps in line\n%s" % line)
1169 self._hsp.gaps = _safe_int(x), _safe_int(y)
1170 assert self._hsp.align_length == _safe_int(y)
1171
1172
1174 self._hsp.strand = _re_search(
1175 r"Strand = (\w+) / (\w+)", line,
1176 "I could not find the strand in line\n%s" % line)
1177
1179
1180
1181
1182 if line.find('/') != -1:
1183 self._hsp.frame = _re_search(
1184 r"Frame = ([-+][123]) / ([-+][123])", line,
1185 "I could not find the frame in line\n%s" % line)
1186 else:
1187 self._hsp.frame = _re_search(
1188 r"Frame = ([-+][123])", line,
1189 "I could not find the frame in line\n%s" % line)
1190
1191
1192
1193
1194
1195
1196 _query_re = re.compile(r"Query(:?) \s*(\d+)\s*(.+) (\d+)")
1198 m = self._query_re.search(line)
1199 if m is None:
1200 raise ValueError("I could not find the query in line\n%s" % line)
1201
1202
1203
1204 colon, start, seq, end = m.groups()
1205 self._hsp.query = self._hsp.query + seq
1206 if self._hsp.query_start is None:
1207 self._hsp.query_start = _safe_int(start)
1208
1209
1210
1211 self._hsp.query_end = _safe_int(end)
1212
1213
1214 self._query_start_index = m.start(3)
1215 self._query_len = len(seq)
1216
1218 seq = line[self._query_start_index:].rstrip()
1219 if len(seq) < self._query_len:
1220
1221 seq = seq + ' ' * (self._query_len-len(seq))
1222 elif len(seq) < self._query_len:
1223 raise ValueError("Match is longer than the query in line\n%s" \
1224 % line)
1225 self._hsp.match = self._hsp.match + seq
1226
1227
1228
1229 _sbjct_re = re.compile(r"Sbjct(:?) \s*(\d+)\s*(.+) (\d+)")
1231 m = self._sbjct_re.search(line)
1232 if m is None:
1233 raise ValueError("I could not find the sbjct in line\n%s" % line)
1234 colon, start, seq, end = m.groups()
1235
1236
1237
1238
1239 if not seq.strip():
1240 seq = ' ' * self._query_len
1241 self._hsp.sbjct = self._hsp.sbjct + seq
1242 if self._hsp.sbjct_start is None:
1243 self._hsp.sbjct_start = _safe_int(start)
1244
1245 self._hsp.sbjct_end = _safe_int(end)
1246 if len(seq) != self._query_len:
1247 raise ValueError( \
1248 "QUERY and SBJCT sequence lengths don't match in line\n%s" \
1249 % line)
1250
1251 del self._query_start_index
1252 del self._query_len
1253
1256
1258
1261
1263 m = re.search(r"Database: (.+)$", line)
1264 if m:
1265 self._dr.database_name.append(m.group(1))
1266 elif self._dr.database_name:
1267
1268 self._dr.database_name[-1] = "%s%s" % (self._dr.database_name[-1],
1269 line.strip())
1270
1271 - def posted_date(self, line):
1272 self._dr.posted_date.append(_re_search(
1273 r"Posted date:\s*(.+)$", line,
1274 "I could not find the posted date in line\n%s" % line))
1275
1280
1285
1289
1292
1296
1299
1303
1306
1311
1313 if line.find('1st pass') != -1:
1314 x, = _get_cols(line, (-4,), ncols=11, expected={2:"Hits"})
1315 self._params.num_hits = _safe_int(x)
1316 else:
1317 x, = _get_cols(line, (-1,), ncols=6, expected={2:"Hits"})
1318 self._params.num_hits = _safe_int(x)
1319
1321 if line.find('1st pass') != -1:
1322 x, = _get_cols(line, (-4,), ncols=9, expected={2:"Sequences:"})
1323 self._params.num_sequences = _safe_int(x)
1324 else:
1325 x, = _get_cols(line, (-1,), ncols=4, expected={2:"Sequences:"})
1326 self._params.num_sequences = _safe_int(x)
1327
1329 if line.find('1st pass') != -1:
1330 x, = _get_cols(line, (-4,), ncols=9, expected={2:"extensions:"})
1331 self._params.num_extends = _safe_int(x)
1332 else:
1333 x, = _get_cols(line, (-1,), ncols=4, expected={2:"extensions:"})
1334 self._params.num_extends = _safe_int(x)
1335
1337 if line.find('1st pass') != -1:
1338 x, = _get_cols(line, (-4,), ncols=10, expected={3:"extensions:"})
1339 self._params.num_good_extends = _safe_int(x)
1340 else:
1341 x, = _get_cols(line, (-1,), ncols=5, expected={3:"extensions:"})
1342 self._params.num_good_extends = _safe_int(x)
1343
1349
1354
1360
1366
1371
1376
1381
1387
1393
1399
1405
1411
1415
1417 if line[:2] == "T:":
1418
1419 self._params.threshold, = _get_cols(
1420 line, (1,), ncols=2, expected={0:"T:"})
1421 elif line[:28] == "Neighboring words threshold:":
1422 self._params.threshold, = _get_cols(
1423 line, (3,), ncols=4, expected={0:"Neighboring", 1:"words", 2:"threshold:"})
1424 else:
1425 raise ValueError("Unrecognised threshold line:\n%s" % line)
1426 self._params.threshold = _safe_int(self._params.threshold)
1427
1429 if line[:2] == "A:":
1430 self._params.window_size, = _get_cols(
1431 line, (1,), ncols=2, expected={0:"A:"})
1432 elif line[:25] == "Window for multiple hits:":
1433 self._params.window_size, = _get_cols(
1434 line, (4,), ncols=5, expected={0:"Window", 2:"multiple", 3:"hits:"})
1435 else:
1436 raise ValueError("Unrecognised window size line:\n%s" % line)
1437 self._params.window_size = _safe_int(self._params.window_size)
1438
1444
1450
1456
1462
1468
1471
1472
1473 -class _BlastConsumer(AbstractConsumer,
1474 _HeaderConsumer,
1475 _DescriptionConsumer,
1476 _AlignmentConsumer,
1477 _HSPConsumer,
1478 _DatabaseReportConsumer,
1479 _ParametersConsumer
1480 ):
1481
1482
1483
1484
1485
1486
1487
1488
1489
1492
1494
1495 raise ValueError("This consumer doesn't handle PSI-BLAST data")
1496
1500
1504
1506 self.data.descriptions = self._descriptions
1507
1509 _AlignmentConsumer.end_alignment(self)
1510 if self._alignment.hsps:
1511 self.data.alignments.append(self._alignment)
1512 if self._multiple_alignment.alignment:
1513 self.data.multiple_alignment = self._multiple_alignment
1514
1516 _HSPConsumer.end_hsp(self)
1517 try:
1518 self._alignment.hsps.append(self._hsp)
1519 except AttributeError:
1520 raise ValueError("Found an HSP before an alignment")
1521
1525
1529
1530 -class _PSIBlastConsumer(AbstractConsumer,
1531 _HeaderConsumer,
1532 _DescriptionConsumer,
1533 _AlignmentConsumer,
1534 _HSPConsumer,
1535 _DatabaseReportConsumer,
1536 _ParametersConsumer
1537 ):
1540
1544
1548
1553
1555 _DescriptionConsumer.end_descriptions(self)
1556 self._round.number = self._roundnum
1557 if self._descriptions:
1558 self._round.new_seqs.extend(self._descriptions)
1559 self._round.reused_seqs.extend(self._model_sequences)
1560 self._round.new_seqs.extend(self._nonmodel_sequences)
1561 if self._converged:
1562 self.data.converged = 1
1563
1565 _AlignmentConsumer.end_alignment(self)
1566 if self._alignment.hsps:
1567 self._round.alignments.append(self._alignment)
1568 if self._multiple_alignment:
1569 self._round.multiple_alignment = self._multiple_alignment
1570
1572 _HSPConsumer.end_hsp(self)
1573 try:
1574 self._alignment.hsps.append(self._hsp)
1575 except AttributeError:
1576 raise ValueError("Found an HSP before an alignment")
1577
1581
1585
1587 """Iterates over a file of multiple BLAST results.
1588
1589 Methods:
1590 next Return the next record from the stream, or None.
1591
1592 """
1593 - def __init__(self, handle, parser=None):
1594 """__init__(self, handle, parser=None)
1595
1596 Create a new iterator. handle is a file-like object. parser
1597 is an optional Parser object to change the results into another form.
1598 If set to None, then the raw contents of the file will be returned.
1599
1600 """
1601 try:
1602 handle.readline
1603 except AttributeError:
1604 raise ValueError(
1605 "I expected a file handle or file-like object, got %s"
1606 % type(handle))
1607 self._uhandle = File.UndoHandle(handle)
1608 self._parser = parser
1609 self._header = []
1610
1612 """next(self) -> object
1613
1614 Return the next Blast record from the file. If no more records,
1615 return None.
1616
1617 """
1618 lines = []
1619 query = False
1620 while 1:
1621 line = self._uhandle.readline()
1622 if not line:
1623 break
1624
1625 if lines and (line.startswith('BLAST')
1626 or line.startswith('BLAST', 1)
1627 or line.startswith('<?xml ')):
1628 self._uhandle.saveline(line)
1629 break
1630
1631 if line.startswith("Query="):
1632 if not query:
1633 if not self._header:
1634 self._header = lines[:]
1635 query = True
1636 else:
1637
1638 self._uhandle.saveline(line)
1639 break
1640 lines.append(line)
1641
1642 if query and "BLAST" not in lines[0]:
1643
1644
1645
1646
1647
1648
1649 lines = self._header + lines
1650
1651 if not lines:
1652 return None
1653
1654 data = ''.join(lines)
1655 if self._parser is not None:
1656 return self._parser.parse(File.StringHandle(data))
1657 return data
1658
1660 return iter(self.next, None)
1661
1662 -def blastall(blastcmd, program, database, infile, align_view='7', **keywds):
1663 """Execute and retrieve data from standalone BLASTPALL as handles (OBSOLETE).
1664
1665 NOTE - This function is obsolete, you are encouraged to the command
1666 line wrapper Bio.Blast.Applications.BlastallCommandline instead.
1667
1668 Execute and retrieve data from blastall. blastcmd is the command
1669 used to launch the 'blastall' executable. program is the blast program
1670 to use, e.g. 'blastp', 'blastn', etc. database is the path to the database
1671 to search against. infile is the path to the file containing
1672 the sequence to search with.
1673
1674 The return values are two handles, for standard output and standard error.
1675
1676 You may pass more parameters to **keywds to change the behavior of
1677 the search. Otherwise, optional values will be chosen by blastall.
1678 The Blast output is by default in XML format. Use the align_view keyword
1679 for output in a different format.
1680
1681 Scoring
1682 matrix Matrix to use.
1683 gap_open Gap open penalty.
1684 gap_extend Gap extension penalty.
1685 nuc_match Nucleotide match reward. (BLASTN)
1686 nuc_mismatch Nucleotide mismatch penalty. (BLASTN)
1687 query_genetic_code Genetic code for Query.
1688 db_genetic_code Genetic code for database. (TBLAST[NX])
1689
1690 Algorithm
1691 gapped Whether to do a gapped alignment. T/F (not for TBLASTX)
1692 expectation Expectation value cutoff.
1693 wordsize Word size.
1694 strands Query strands to search against database.([T]BLAST[NX])
1695 keep_hits Number of best hits from a region to keep.
1696 xdrop Dropoff value (bits) for gapped alignments.
1697 hit_extend Threshold for extending hits.
1698 region_length Length of region used to judge hits.
1699 db_length Effective database length.
1700 search_length Effective length of search space.
1701
1702 Processing
1703 filter Filter query sequence for low complexity (with SEG)? T/F
1704 believe_query Believe the query defline. T/F
1705 restrict_gi Restrict search to these GI's.
1706 nprocessors Number of processors to use.
1707 oldengine Force use of old engine T/F
1708
1709 Formatting
1710 html Produce HTML output? T/F
1711 descriptions Number of one-line descriptions.
1712 alignments Number of alignments.
1713 align_view Alignment view. Integer 0-11,
1714 passed as a string or integer.
1715 show_gi Show GI's in deflines? T/F
1716 seqalign_file seqalign file to output.
1717 outfile Output file for report. Filename to write to, if
1718 ommitted standard output is used (which you can access
1719 from the returned handles).
1720 """
1721
1722 _security_check_parameters(keywds)
1723
1724 att2param = {
1725 'matrix' : '-M',
1726 'gap_open' : '-G',
1727 'gap_extend' : '-E',
1728 'nuc_match' : '-r',
1729 'nuc_mismatch' : '-q',
1730 'query_genetic_code' : '-Q',
1731 'db_genetic_code' : '-D',
1732
1733 'gapped' : '-g',
1734 'expectation' : '-e',
1735 'wordsize' : '-W',
1736 'strands' : '-S',
1737 'keep_hits' : '-K',
1738 'xdrop' : '-X',
1739 'hit_extend' : '-f',
1740 'region_length' : '-L',
1741 'db_length' : '-z',
1742 'search_length' : '-Y',
1743
1744 'program' : '-p',
1745 'database' : '-d',
1746 'infile' : '-i',
1747 'filter' : '-F',
1748 'believe_query' : '-J',
1749 'restrict_gi' : '-l',
1750 'nprocessors' : '-a',
1751 'oldengine' : '-V',
1752
1753 'html' : '-T',
1754 'descriptions' : '-v',
1755 'alignments' : '-b',
1756 'align_view' : '-m',
1757 'show_gi' : '-I',
1758 'seqalign_file' : '-O',
1759 'outfile' : '-o',
1760 }
1761 from Applications import BlastallCommandline
1762 cline = BlastallCommandline(blastcmd)
1763 cline.set_parameter(att2param['program'], program)
1764 cline.set_parameter(att2param['database'], database)
1765 cline.set_parameter(att2param['infile'], infile)
1766 cline.set_parameter(att2param['align_view'], str(align_view))
1767 for key, value in keywds.iteritems():
1768 cline.set_parameter(att2param[key], str(value))
1769 return _invoke_blast(cline)
1770
1771
1772 -def blastpgp(blastcmd, database, infile, align_view='7', **keywds):
1773 """Execute and retrieve data from standalone BLASTPGP as handles (OBSOLETE).
1774
1775 NOTE - This function is obsolete, you are encouraged to the command
1776 line wrapper Bio.Blast.Applications.BlastpgpCommandline instead.
1777
1778 Execute and retrieve data from blastpgp. blastcmd is the command
1779 used to launch the 'blastpgp' executable. database is the path to the
1780 database to search against. infile is the path to the file containing
1781 the sequence to search with.
1782
1783 The return values are two handles, for standard output and standard error.
1784
1785 You may pass more parameters to **keywds to change the behavior of
1786 the search. Otherwise, optional values will be chosen by blastpgp.
1787 The Blast output is by default in XML format. Use the align_view keyword
1788 for output in a different format.
1789
1790 Scoring
1791 matrix Matrix to use.
1792 gap_open Gap open penalty.
1793 gap_extend Gap extension penalty.
1794 window_size Multiple hits window size.
1795 npasses Number of passes.
1796 passes Hits/passes. Integer 0-2.
1797
1798 Algorithm
1799 gapped Whether to do a gapped alignment. T/F
1800 expectation Expectation value cutoff.
1801 wordsize Word size.
1802 keep_hits Number of beset hits from a region to keep.
1803 xdrop Dropoff value (bits) for gapped alignments.
1804 hit_extend Threshold for extending hits.
1805 region_length Length of region used to judge hits.
1806 db_length Effective database length.
1807 search_length Effective length of search space.
1808 nbits_gapping Number of bits to trigger gapping.
1809 pseudocounts Pseudocounts constants for multiple passes.
1810 xdrop_final X dropoff for final gapped alignment.
1811 xdrop_extension Dropoff for blast extensions.
1812 model_threshold E-value threshold to include in multipass model.
1813 required_start Start of required region in query.
1814 required_end End of required region in query.
1815
1816 Processing
1817 XXX should document default values
1818 program The blast program to use. (PHI-BLAST)
1819 filter Filter query sequence for low complexity (with SEG)? T/F
1820 believe_query Believe the query defline? T/F
1821 nprocessors Number of processors to use.
1822
1823 Formatting
1824 html Produce HTML output? T/F
1825 descriptions Number of one-line descriptions.
1826 alignments Number of alignments.
1827 align_view Alignment view. Integer 0-11,
1828 passed as a string or integer.
1829 show_gi Show GI's in deflines? T/F
1830 seqalign_file seqalign file to output.
1831 align_outfile Output file for alignment.
1832 checkpoint_outfile Output file for PSI-BLAST checkpointing.
1833 restart_infile Input file for PSI-BLAST restart.
1834 hit_infile Hit file for PHI-BLAST.
1835 matrix_outfile Output file for PSI-BLAST matrix in ASCII.
1836 align_outfile Output file for alignment. Filename to write to, if
1837 ommitted standard output is used (which you can access
1838 from the returned handles).
1839
1840 align_infile Input alignment file for PSI-BLAST restart.
1841
1842 """
1843
1844 _security_check_parameters(keywds)
1845
1846 att2param = {
1847 'matrix' : '-M',
1848 'gap_open' : '-G',
1849 'gap_extend' : '-E',
1850 'window_size' : '-A',
1851 'npasses' : '-j',
1852 'passes' : '-P',
1853
1854 'gapped' : '-g',
1855 'expectation' : '-e',
1856 'wordsize' : '-W',
1857 'keep_hits' : '-K',
1858 'xdrop' : '-X',
1859 'hit_extend' : '-f',
1860 'region_length' : '-L',
1861 'db_length' : '-Z',
1862 'search_length' : '-Y',
1863 'nbits_gapping' : '-N',
1864 'pseudocounts' : '-c',
1865 'xdrop_final' : '-Z',
1866 'xdrop_extension' : '-y',
1867 'model_threshold' : '-h',
1868 'required_start' : '-S',
1869 'required_end' : '-H',
1870
1871 'program' : '-p',
1872 'database' : '-d',
1873 'infile' : '-i',
1874 'filter' : '-F',
1875 'believe_query' : '-J',
1876 'nprocessors' : '-a',
1877
1878 'html' : '-T',
1879 'descriptions' : '-v',
1880 'alignments' : '-b',
1881 'align_view' : '-m',
1882 'show_gi' : '-I',
1883 'seqalign_file' : '-O',
1884 'align_outfile' : '-o',
1885 'checkpoint_outfile' : '-C',
1886 'restart_infile' : '-R',
1887 'hit_infile' : '-k',
1888 'matrix_outfile' : '-Q',
1889 'align_infile' : '-B',
1890 }
1891 from Applications import BlastpgpCommandline
1892 cline = BlastpgpCommandline(blastcmd)
1893 cline.set_parameter(att2param['database'], database)
1894 cline.set_parameter(att2param['infile'], infile)
1895 cline.set_parameter(att2param['align_view'], str(align_view))
1896 for key, value in keywds.iteritems():
1897 cline.set_parameter(att2param[key], str(value))
1898 return _invoke_blast(cline)
1899
1900
1901 -def rpsblast(blastcmd, database, infile, align_view="7", **keywds):
1902 """Execute and retrieve data from standalone RPS-BLAST as handles (OBSOLETE).
1903
1904 NOTE - This function is obsolete, you are encouraged to the command
1905 line wrapper Bio.Blast.Applications.RpsBlastCommandline instead.
1906
1907 Execute and retrieve data from standalone RPS-BLAST. blastcmd is the
1908 command used to launch the 'rpsblast' executable. database is the path
1909 to the database to search against. infile is the path to the file
1910 containing the sequence to search with.
1911
1912 The return values are two handles, for standard output and standard error.
1913
1914 You may pass more parameters to **keywds to change the behavior of
1915 the search. Otherwise, optional values will be chosen by rpsblast.
1916
1917 Please note that this function will give XML output by default, by
1918 setting align_view to seven (i.e. command line option -m 7).
1919 You should use the NCBIXML.parse() function to read the resulting output.
1920 This is because NCBIStandalone.BlastParser() does not understand the
1921 plain text output format from rpsblast.
1922
1923 WARNING - The following text and associated parameter handling has not
1924 received extensive testing. Please report any errors we might have made...
1925
1926 Algorithm/Scoring
1927 gapped Whether to do a gapped alignment. T/F
1928 multihit 0 for multiple hit (default), 1 for single hit
1929 expectation Expectation value cutoff.
1930 range_restriction Range restriction on query sequence (Format: start,stop) blastp only
1931 0 in 'start' refers to the beginning of the sequence
1932 0 in 'stop' refers to the end of the sequence
1933 Default = 0,0
1934 xdrop Dropoff value (bits) for gapped alignments.
1935 xdrop_final X dropoff for final gapped alignment (in bits).
1936 xdrop_extension Dropoff for blast extensions (in bits).
1937 search_length Effective length of search space.
1938 nbits_gapping Number of bits to trigger gapping.
1939 protein Query sequence is protein. T/F
1940 db_length Effective database length.
1941
1942 Processing
1943 filter Filter query sequence for low complexity? T/F
1944 case_filter Use lower case filtering of FASTA sequence T/F, default F
1945 believe_query Believe the query defline. T/F
1946 nprocessors Number of processors to use.
1947 logfile Name of log file to use, default rpsblast.log
1948
1949 Formatting
1950 html Produce HTML output? T/F
1951 descriptions Number of one-line descriptions.
1952 alignments Number of alignments.
1953 align_view Alignment view. Integer 0-11,
1954 passed as a string or integer.
1955 show_gi Show GI's in deflines? T/F
1956 seqalign_file seqalign file to output.
1957 align_outfile Output file for alignment. Filename to write to, if
1958 ommitted standard output is used (which you can access
1959 from the returned handles).
1960 """
1961
1962 _security_check_parameters(keywds)
1963
1964 att2param = {
1965 'multihit' : '-P',
1966 'gapped' : '-g',
1967 'expectation' : '-e',
1968 'range_restriction' : '-L',
1969 'xdrop' : '-X',
1970 'xdrop_final' : '-Z',
1971 'xdrop_extension' : '-y',
1972 'search_length' : '-Y',
1973 'nbits_gapping' : '-N',
1974 'protein' : '-p',
1975 'db_length' : '-z',
1976
1977 'database' : '-d',
1978 'infile' : '-i',
1979 'filter' : '-F',
1980 'case_filter' : '-U',
1981 'believe_query' : '-J',
1982 'nprocessors' : '-a',
1983 'logfile' : '-l',
1984
1985 'html' : '-T',
1986 'descriptions' : '-v',
1987 'alignments' : '-b',
1988 'align_view' : '-m',
1989 'show_gi' : '-I',
1990 'seqalign_file' : '-O',
1991 'align_outfile' : '-o',
1992 }
1993
1994 from Applications import RpsBlastCommandline
1995 cline = RpsBlastCommandline(blastcmd)
1996 cline.set_parameter(att2param['database'], database)
1997 cline.set_parameter(att2param['infile'], infile)
1998 cline.set_parameter(att2param['align_view'], str(align_view))
1999 for key, value in keywds.iteritems():
2000 cline.set_parameter(att2param[key], str(value))
2001 return _invoke_blast(cline)
2002
2003
2005 m = re.search(regex, line)
2006 if not m:
2007 raise ValueError(error_msg)
2008 return m.groups()
2009
2010 -def _get_cols(line, cols_to_get, ncols=None, expected={}):
2011 cols = line.split()
2012
2013
2014 if ncols is not None and len(cols) != ncols:
2015 raise ValueError("I expected %d columns (got %d) in line\n%s" \
2016 % (ncols, len(cols), line))
2017
2018
2019 for k in expected.keys():
2020 if cols[k] != expected[k]:
2021 raise ValueError("I expected '%s' in column %d in line\n%s" \
2022 % (expected[k], k, line))
2023
2024
2025 results = []
2026 for c in cols_to_get:
2027 results.append(cols[c])
2028 return tuple(results)
2029
2031 try:
2032 return int(str)
2033 except ValueError:
2034
2035
2036 str = str.replace(',', '')
2037 try:
2038
2039 return int(str)
2040 except ValueError:
2041 pass
2042
2043
2044 return long(float(str))
2045
2047
2048
2049
2050
2051
2052 if str and str[0] in ['E', 'e']:
2053 str = '1' + str
2054 try:
2055 return float(str)
2056 except ValueError:
2057
2058 str = str.replace(',', '')
2059
2060 return float(str)
2061
2062
2064 """Start BLAST and returns handles for stdout and stderr (PRIVATE).
2065
2066 Expects a command line wrapper object from Bio.Blast.Applications
2067 """
2068 import subprocess, sys
2069 blast_cmd = cline.program_name
2070 if not os.path.exists(blast_cmd):
2071 raise ValueError("BLAST executable does not exist at %s" % blast_cmd)
2072
2073
2074
2075
2076 blast_process = subprocess.Popen(str(cline),
2077 stdin=subprocess.PIPE,
2078 stdout=subprocess.PIPE,
2079 stderr=subprocess.PIPE,
2080 shell=(sys.platform!="win32"))
2081 blast_process.stdin.close()
2082 return blast_process.stdout, blast_process.stderr
2083
2084
2086 """Look for any attempt to insert a command into a parameter.
2087
2088 e.g. blastall(..., matrix='IDENTITY -F 0; rm -rf /etc/passwd')
2089
2090 Looks for ";" or "&&" in the strings (Unix and Windows syntax
2091 for appending a command line), or ">", "<" or "|" (redirection)
2092 and if any are found raises an exception.
2093 """
2094 for key, value in param_dict.iteritems():
2095 str_value = str(value)
2096 for bad_str in [";", "&&", ">", "<", "|"]:
2097 if bad_str in str_value:
2098 raise ValueError("Rejecting suspicious argument for %s" % key)
2099
2110
2112 """Attempt to catch and diagnose BLAST errors while parsing.
2113
2114 This utilizes the BlastParser module but adds an additional layer
2115 of complexity on top of it by attempting to diagnose ValueErrors
2116 that may actually indicate problems during BLAST parsing.
2117
2118 Current BLAST problems this detects are:
2119 o LowQualityBlastError - When BLASTing really low quality sequences
2120 (ie. some GenBank entries which are just short streches of a single
2121 nucleotide), BLAST will report an error with the sequence and be
2122 unable to search with this. This will lead to a badly formatted
2123 BLAST report that the parsers choke on. The parser will convert the
2124 ValueError to a LowQualityBlastError and attempt to provide useful
2125 information.
2126
2127 """
2128 - def __init__(self, bad_report_handle = None):
2129 """Initialize a parser that tries to catch BlastErrors.
2130
2131 Arguments:
2132 o bad_report_handle - An optional argument specifying a handle
2133 where bad reports should be sent. This would allow you to save
2134 all of the bad reports to a file, for instance. If no handle
2135 is specified, the bad reports will not be saved.
2136 """
2137 self._bad_report_handle = bad_report_handle
2138
2139
2140 self._scanner = _Scanner()
2141 self._consumer = _BlastErrorConsumer()
2142
2143 - def parse(self, handle):
2144 """Parse a handle, attempting to diagnose errors.
2145 """
2146 results = handle.read()
2147
2148 try:
2149 self._scanner.feed(File.StringHandle(results), self._consumer)
2150 except ValueError, msg:
2151
2152 if self._bad_report_handle:
2153
2154 self._bad_report_handle.write(results)
2155
2156
2157 self._diagnose_error(
2158 File.StringHandle(results), self._consumer.data)
2159
2160
2161
2162 raise
2163 return self._consumer.data
2164
2166 """Attempt to diagnose an error in the passed handle.
2167
2168 Arguments:
2169 o handle - The handle potentially containing the error
2170 o data_record - The data record partially created by the consumer.
2171 """
2172 line = handle.readline()
2173
2174 while line:
2175
2176
2177
2178 if line.startswith('Searchingdone'):
2179 raise LowQualityBlastError("Blast failure occured on query: ",
2180 data_record.query)
2181 line = handle.readline()
2182