1
2
3
4
5
6 """
7 This module is OBSOLETE.
8 Most of the functionality in this module has moved to Bio.ExPASy.Prodoc;
9 please see
10
11 Bio.ExPASy.Prodoc.read To read a Prodoc file containing one entry.
12 Bio.ExPASy.Prodoc.parse Iterates over entries in a Prodoc file.
13 Bio.ExPASy.Prodoc.Record Holds Prodoc data.
14 Bio.ExPASy.Prodoc.Reference Holds data from a Prodoc reference.
15
16 The other functions and classes in Bio.Prosite.Prodoc (including
17 Bio.Prosite.Prodoc.index_file and Bio.Prosite.Prodoc.Dictionary) are
18 considered deprecated, and were not moved to Bio.ExPASy.Prodoc. If you use
19 this functionality, please contact the Biopython developers at
20 biopython-dev@biopython.org to avoid permanent removal of this module from
21 Biopython.
22
23
24
25
26 This module provides code to work with the prosite.doc file from
27 Prosite, available at http://www.expasy.ch/prosite/.
28
29 Tested with:
30 Release 15.0, July 1998
31 Release 16.0, July 1999
32 Release 20.22, 13 November 2007
33
34
35 Functions:
36 parse Iterates over entries in a Prodoc file.
37 index_file Index a Prodoc file for a Dictionary.
38 _extract_record Extract Prodoc data from a web page.
39
40
41 Classes:
42 Record Holds Prodoc data.
43 Reference Holds data from a Prodoc reference.
44 Dictionary Accesses a Prodoc file using a dictionary interface.
45 RecordParser Parses a Prodoc record into a Record object.
46
47 _Scanner Scans Prodoc-formatted data.
48 _RecordConsumer Consumes Prodoc data to a Record object.
49 """
50
51 from types import *
52 import os
53 import sgmllib
54 from Bio import File
55 from Bio import Index
56 from Bio.ParserSupport import *
57
69
78
79
80
81
82
84 """Holds information from a Prodoc record.
85
86 Members:
87 accession Accession number of the record.
88 prosite_refs List of tuples (prosite accession, prosite name).
89 text Free format text.
90 references List of reference objects.
91
92 """
94 self.accession = ''
95 self.prosite_refs = []
96 self.text = ''
97 self.references = []
98
100 """Holds information from a Prodoc citation.
101
102 Members:
103 number Number of the reference. (string)
104 authors Names of the authors.
105 citation Describes the citation.
106
107 """
109 self.number = ''
110 self.authors = ''
111 self.citation = ''
112
114 """Accesses a Prodoc file using a dictionary interface.
115
116 """
117 __filename_key = '__filename'
118
119 - def __init__(self, indexname, parser=None):
120 """__init__(self, indexname, parser=None)
121
122 Open a Prodoc Dictionary. indexname is the name of the
123 index for the dictionary. The index should have been created
124 using the index_file function. parser is an optional Parser
125 object to change the results into another form. If set to None,
126 then the raw contents of the file will be returned.
127
128 """
129 self._index = Index.Index(indexname)
130 self._handle = open(self._index[Dictionary.__filename_key])
131 self._parser = parser
132
135
143
146
148 """Parses Prodoc data into a Record object.
149
150 """
154
155 - def parse(self, handle):
156 self._scanner.feed(handle, self._consumer)
157 return self._consumer.data
158
160 """Scans Prodoc-formatted data.
161
162 Tested with:
163 Release 15.0, July 1998
164
165 """
166 - def feed(self, handle, consumer):
167 """feed(self, handle, consumer)
168
169 Feed in Prodoc data for scanning. handle is a file-like
170 object that contains prosite data. consumer is a
171 Consumer object that will receive events as the report is scanned.
172
173 """
174 if isinstance(handle, File.UndoHandle):
175 uhandle = handle
176 else:
177 uhandle = File.UndoHandle(handle)
178
179 while 1:
180 line = uhandle.peekline()
181 if not line:
182 break
183 elif is_blank_line(line):
184
185 uhandle.readline()
186 continue
187 else:
188 self._scan_record(uhandle, consumer)
189
202
205
210
211 - def _scan_text(self, uhandle, consumer):
212 while 1:
213 line = safe_readline(uhandle)
214 if (line[0] == '[' and line[3] == ']' and line[4] == ' ') or \
215 line[:5] == '{END}':
216 uhandle.saveline(line)
217 break
218 consumer.text(line)
219
227
229
230
231 read_and_call_while(uhandle, consumer.noevent, blank=1)
232 if attempt_read_and_call(uhandle, consumer.noevent, start='+----'):
233 read_and_call_until(uhandle, consumer.noevent, start='+----')
234 read_and_call(uhandle, consumer.noevent, start='+----')
235 read_and_call_while(uhandle, consumer.noevent, blank=1)
236
238 """Consumer that converts a Prodoc record to a Record object.
239
240 Members:
241 data Record with Prodoc data.
242
243 """
246
249
252
254 line = line.rstrip()
255 if line[0] != '{' or line[-1] != '}':
256 raise ValueError("I don't understand accession line\n%s" % line)
257 acc = line[1:-1]
258 if acc[:4] != 'PDOC':
259 raise ValueError("Invalid accession in line\n%s" % line)
260 self.data.accession = acc
261
263 line = line.rstrip()
264 if line[0] != '{' or line[-1] != '}':
265 raise ValueError("I don't understand accession line\n%s" % line)
266 acc, name = line[1:-1].split('; ')
267 self.data.prosite_refs.append((acc, name))
268
269 - def text(self, line):
270 self.data.text = self.data.text + line
271
273 if line[0] == '[' and line[3] == ']':
274 self._ref = Reference()
275 self._ref.number = line[1:3].strip()
276 if line[1] == 'E':
277
278
279 self._ref.citation = line[4:].strip()
280 else:
281 self._ref.authors = line[4:].strip()
282 self.data.references.append(self._ref)
283 elif line[:4] == ' ':
284 if not self._ref:
285 raise ValueError("Unnumbered reference lines\n%s" % line)
286 self._ref.citation = self._ref.citation + line[5:]
287 else:
288 raise Exception("I don't understand the reference line\n%s" % line)
289
295
296 -def index_file(filename, indexname, rec2key=None):
297 """index_file(filename, indexname, rec2key=None)
298
299 Index a Prodoc file. filename is the name of the file.
300 indexname is the name of the dictionary. rec2key is an
301 optional callback that takes a Record and generates a unique key
302 (e.g. the accession number) for the record. If not specified,
303 the id name will be used.
304
305 """
306 import os
307 if not os.path.exists(filename):
308 raise ValueError("%s does not exist" % filename)
309
310 index = Index.Index(indexname, truncate=1)
311 index[Dictionary._Dictionary__filename_key] = filename
312
313 handle = open(filename)
314 records = parse(handle)
315 end = 0L
316 for record in records:
317 start = end
318 end = long(handle.tell())
319 length = end - start
320
321 if rec2key is not None:
322 key = rec2key(record)
323 else:
324 key = record.accession
325
326 if not key:
327 raise KeyError("empty key was produced")
328 elif key in index:
329 raise KeyError("duplicate key %s found" % key)
330
331 index[key] = start, length
332