1
2
3
4
5
6
7
8
9 """
10 This module provides code to work with the WWW version of BLAST
11 provided by the NCBI.
12 http://blast.ncbi.nlm.nih.gov/
13
14 Functions:
15 qblast Do a BLAST search using the QBLAST API.
16 """
17
18 try:
19 import cStringIO as StringIO
20 except ImportError:
21 import StringIO
22
23
24
25 -def qblast(program, database, sequence,
26 auto_format=None,composition_based_statistics=None,
27 db_genetic_code=None,endpoints=None,entrez_query='(none)',
28 expect=10.0,filter=None,gapcosts=None,genetic_code=None,
29 hitlist_size=50,i_thresh=None,layout=None,lcase_mask=None,
30 matrix_name=None,nucl_penalty=None,nucl_reward=None,
31 other_advanced=None,perc_ident=None,phi_pattern=None,
32 query_file=None,query_believe_defline=None,query_from=None,
33 query_to=None,searchsp_eff=None,service=None,threshold=None,
34 ungapped_alignment=None,word_size=None,
35 alignments=500,alignment_view=None,descriptions=500,
36 entrez_links_new_window=None,expect_low=None,expect_high=None,
37 format_entrez_query=None,format_object=None,format_type='XML',
38 ncbi_gi=None,results_file=None,show_overview=None
39 ):
40 """Do a BLAST search using the QBLAST server at NCBI.
41
42 Supports all parameters of the qblast API for Put and Get.
43 Some useful parameters:
44 program blastn, blastp, blastx, tblastn, or tblastx (lower case)
45 database Which database to search against (e.g. "nr").
46 sequence The sequence to search.
47 ncbi_gi TRUE/FALSE whether to give 'gi' identifier.
48 descriptions Number of descriptions to show. Def 500.
49 alignments Number of alignments to show. Def 500.
50 expect An expect value cutoff. Def 10.0.
51 matrix_name Specify an alt. matrix (PAM30, PAM70, BLOSUM80, BLOSUM45).
52 filter "none" turns off filtering. Default no filtering
53 format_type "HTML", "Text", "ASN.1", or "XML". Def. "XML".
54 entrez_query Entrez query to limit Blast search
55 hitlist_size Number of hits to return. Default 50
56
57 This function does no checking of the validity of the parameters
58 and passes the values to the server as is. More help is available at:
59 http://www.ncbi.nlm.nih.gov/BLAST/blast_overview.html
60
61 """
62 import urllib, urllib2
63 import time
64
65 assert program in ['blastn', 'blastp', 'blastx', 'tblastn', 'tblastx']
66
67
68
69 parameters = [
70 ('AUTO_FORMAT',auto_format),
71 ('COMPOSITION_BASED_STATISTICS',composition_based_statistics),
72 ('DATABASE',database),
73 ('DB_GENETIC_CODE',db_genetic_code),
74 ('ENDPOINTS',endpoints),
75 ('ENTREZ_QUERY',entrez_query),
76 ('EXPECT',expect),
77 ('FILTER',filter),
78 ('GAPCOSTS',gapcosts),
79 ('GENETIC_CODE',genetic_code),
80 ('HITLIST_SIZE',hitlist_size),
81 ('I_THRESH',i_thresh),
82 ('LAYOUT',layout),
83 ('LCASE_MASK',lcase_mask),
84 ('MATRIX_NAME',matrix_name),
85 ('NUCL_PENALTY',nucl_penalty),
86 ('NUCL_REWARD',nucl_reward),
87 ('OTHER_ADVANCED',other_advanced),
88 ('PERC_IDENT',perc_ident),
89 ('PHI_PATTERN',phi_pattern),
90 ('PROGRAM',program),
91 ('QUERY',sequence),
92 ('QUERY_FILE',query_file),
93 ('QUERY_BELIEVE_DEFLINE',query_believe_defline),
94 ('QUERY_FROM',query_from),
95 ('QUERY_TO',query_to),
96 ('SEARCHSP_EFF',searchsp_eff),
97 ('SERVICE',service),
98 ('THRESHOLD',threshold),
99 ('UNGAPPED_ALIGNMENT',ungapped_alignment),
100 ('WORD_SIZE',word_size),
101 ('CMD', 'Put'),
102 ]
103 query = [x for x in parameters if x[1] is not None]
104 message = urllib.urlencode(query)
105
106
107
108
109
110 request = urllib2.Request("http://blast.ncbi.nlm.nih.gov/Blast.cgi",
111 message,
112 {"User-Agent":"BiopythonClient"})
113 handle = urllib2.urlopen(request)
114
115
116
117 rid, rtoe = _parse_qblast_ref_page(handle)
118 parameters = [
119 ('ALIGNMENTS',alignments),
120 ('ALIGNMENT_VIEW',alignment_view),
121 ('DESCRIPTIONS',descriptions),
122 ('ENTREZ_LINKS_NEW_WINDOW',entrez_links_new_window),
123 ('EXPECT_LOW',expect_low),
124 ('EXPECT_HIGH',expect_high),
125 ('FORMAT_ENTREZ_QUERY',format_entrez_query),
126 ('FORMAT_OBJECT',format_object),
127 ('FORMAT_TYPE',format_type),
128 ('NCBI_GI',ncbi_gi),
129 ('RID',rid),
130 ('RESULTS_FILE',results_file),
131 ('SERVICE',service),
132 ('SHOW_OVERVIEW',show_overview),
133 ('CMD', 'Get'),
134 ]
135 query = [x for x in parameters if x[1] is not None]
136 message = urllib.urlencode(query)
137
138
139 delay = 3.0
140 previous = time.time()
141 while True:
142 current = time.time()
143 wait = previous + delay - current
144 if wait > 0:
145 time.sleep(wait)
146 previous = current + wait
147 else:
148 previous = current
149
150 request = urllib2.Request("http://blast.ncbi.nlm.nih.gov/Blast.cgi",
151 message,
152 {"User-Agent":"BiopythonClient"})
153 handle = urllib2.urlopen(request)
154 results = handle.read()
155
156
157 if results=="\n\n":
158 continue
159
160 if results.find("Status=") < 0:
161 break
162 i = results.index("Status=")
163 j = results.index("\n", i)
164 status = results[i+len("Status="):j].strip()
165 if status.upper() == "READY":
166 break
167
168 return StringIO.StringIO(results)
169
171 """Extract a tuple of RID, RTOE from the 'please wait' page (PRIVATE).
172
173 The NCBI FAQ pages use TOE for 'Time of Execution', so RTOE is proably
174 'Request Time of Execution' and RID would be 'Request Identifier'.
175 """
176 s = handle.read()
177 i = s.find("RID =")
178 if i == -1:
179 rid = None
180 else:
181 j = s.find("\n", i)
182 rid = s[i+len("RID ="):j].strip()
183
184 i = s.find("RTOE =")
185 if i == -1:
186 rtoe = None
187 else:
188 j = s.find("\n", i)
189 rtoe = s[i+len("RTOE ="):j].strip()
190
191 if not rid and not rtoe:
192
193
194
195
196
197
198
199 i = s.find('<div class="error msInf">')
200 if i != -1:
201 msg = s[i+len('<div class="error msInf">'):].strip()
202 msg = msg.split("</div>",1)[0].split("\n",1)[0].strip()
203 if msg:
204 raise ValueError("Error message from NCBI: %s" % msg)
205
206 i = s.find('<p class="error">')
207 if i != -1:
208 msg = s[i+len('<p class="error">'):].strip()
209 msg = msg.split("</p>",1)[0].split("\n",1)[0].strip()
210 if msg:
211 raise ValueError("Error message from NCBI: %s" % msg)
212
213 i = s.find('Message ID#')
214 if i != -1:
215
216 msg = s[i:].split("<",1)[0].split("\n",1)[0].strip()
217 raise ValueError("Error message from NCBI: %s" % msg)
218
219
220 raise ValueError("No RID and no RTOE found in the 'please wait' page, "
221 "there was probably an error in your request but we "
222 "could not extract a helpful error message.")
223 elif not rid:
224
225 raise ValueError("No RID found in the 'please wait' page."
226 " (although RTOE = %s)" % repr(rtoe))
227 elif not rtoe:
228
229 raise ValueError("No RTOE found in the 'please wait' page."
230 " (although RID = %s)" % repr(rid))
231
232 try:
233 return rid, int(rtoe)
234 except ValueError:
235 raise ValueError("A non-integer RTOE found in " \
236 +"the 'please wait' page, %s" % repr(rtoe))
237