1
2
3
4
5
6
7 import warnings
8 import numpy
9
10
11 from StructureBuilder import StructureBuilder
12 from PDBExceptions import PDBConstructionException, PDBConstructionWarning
13 from parse_pdb_header import _parse_pdb_header_list
14
15 __doc__="Parser for PDB files."
16
17
18
19
20
22 """
23 Parse a PDB file and return a Structure object.
24 """
25
26 - def __init__(self, PERMISSIVE=1, get_header=0, structure_builder=None):
27 """
28 The PDB parser call a number of standard methods in an aggregated
29 StructureBuilder object. Normally this object is instanciated by the
30 PDBParser object itself, but if the user provides his own StructureBuilder
31 object, the latter is used instead.
32
33 Arguments:
34 o PERMISSIVE - int, if this is 0 exceptions in constructing the
35 SMCRA data structure are fatal. If 1 (DEFAULT), the exceptions are
36 caught, but some residues or atoms will be missing. THESE EXCEPTIONS
37 ARE DUE TO PROBLEMS IN THE PDB FILE!.
38 o structure_builder - an optional user implemented StructureBuilder class.
39 """
40 if structure_builder!=None:
41 self.structure_builder=structure_builder
42 else:
43 self.structure_builder=StructureBuilder()
44 self.header=None
45 self.trailer=None
46 self.line_counter=0
47 self.PERMISSIVE=PERMISSIVE
48
49
50
52 """Return the structure.
53
54 Arguments:
55 o id - string, the id that will be used for the structure
56 o file - name of the PDB file OR an open filehandle
57 """
58 self.header=None
59 self.trailer=None
60
61 self.structure_builder.init_structure(id)
62 if isinstance(file, basestring):
63 file=open(file)
64 self._parse(file.readlines())
65 self.structure_builder.set_header(self.header)
66
67 return self.structure_builder.get_structure()
68
70 "Return the header."
71 return self.header
72
74 "Return the trailer."
75 return self.trailer
76
77
78
79 - def _parse(self, header_coords_trailer):
85
87 "Get the header of the PDB file, return the rest."
88 structure_builder=self.structure_builder
89 for i in range(0, len(header_coords_trailer)):
90 structure_builder.set_line_counter(i+1)
91 line=header_coords_trailer[i]
92 record_type=line[0:6]
93 if(record_type=='ATOM ' or record_type=='HETATM' or record_type=='MODEL '):
94 break
95 header=header_coords_trailer[0:i]
96
97 self.line_counter=i
98 coords_trailer=header_coords_trailer[i:]
99 header_dict=_parse_pdb_header_list(header)
100 return header_dict, coords_trailer
101
103 "Parse the atomic data in the PDB file."
104 local_line_counter=0
105 structure_builder=self.structure_builder
106 current_model_id=0
107
108 model_open=0
109 current_chain_id=None
110 current_segid=None
111 current_residue_id=None
112 current_resname=None
113 for i in range(0, len(coords_trailer)):
114 line=coords_trailer[i]
115 record_type=line[0:6]
116 global_line_counter=self.line_counter+local_line_counter+1
117 structure_builder.set_line_counter(global_line_counter)
118 if(record_type=='ATOM ' or record_type=='HETATM'):
119
120 if not model_open:
121 structure_builder.init_model(current_model_id)
122 current_model_id+=1
123 model_open=1
124 fullname=line[12:16]
125
126 split_list=fullname.split()
127 if len(split_list)!=1:
128
129
130 name=fullname
131 else:
132
133 name=split_list[0]
134 altloc=line[16:17]
135 resname=line[17:20]
136 chainid=line[21:22]
137 try:
138 serial_number=int(line[6:11])
139 except:
140 serial_number=0
141 resseq=int(line[22:26].split()[0])
142 icode=line[26:27]
143 if record_type=='HETATM':
144 if resname=="HOH" or resname=="WAT":
145 hetero_flag="W"
146 else:
147 hetero_flag="H"
148 else:
149 hetero_flag=" "
150 residue_id=(hetero_flag, resseq, icode)
151
152 try:
153 x=float(line[30:38])
154 y=float(line[38:46])
155 z=float(line[46:54])
156 except:
157
158
159 raise PDBConstructionException(\
160 "Invalid or missing coordinate(s) at line %i." \
161 % global_line_counter)
162 coord=numpy.array((x, y, z), 'f')
163
164 try:
165 occupancy=float(line[54:60])
166 except:
167 self._handle_PDB_exception("Invalid or missing occupancy",
168 global_line_counter)
169 occupancy = 0.0
170 try:
171 bfactor=float(line[60:66])
172 except:
173 self._handle_PDB_exception("Invalid or missing B factor",
174 global_line_counter)
175 bfactor = 0.0
176 segid=line[72:76]
177 element=line[76:78].strip()
178 if current_segid!=segid:
179 current_segid=segid
180 structure_builder.init_seg(current_segid)
181 if current_chain_id!=chainid:
182 current_chain_id=chainid
183 structure_builder.init_chain(current_chain_id)
184 current_residue_id=residue_id
185 current_resname=resname
186 try:
187 structure_builder.init_residue(resname, hetero_flag, resseq, icode)
188 except PDBConstructionException, message:
189 self._handle_PDB_exception(message, global_line_counter)
190 elif current_residue_id!=residue_id or current_resname!=resname:
191 current_residue_id=residue_id
192 current_resname=resname
193 try:
194 structure_builder.init_residue(resname, hetero_flag, resseq, icode)
195 except PDBConstructionException, message:
196 self._handle_PDB_exception(message, global_line_counter)
197
198 try:
199 structure_builder.init_atom(name, coord, bfactor, occupancy, altloc,
200 fullname, serial_number, element)
201 except PDBConstructionException, message:
202 self._handle_PDB_exception(message, global_line_counter)
203 elif(record_type=='ANISOU'):
204 anisou=map(float, (line[28:35], line[35:42], line[43:49], line[49:56], line[56:63], line[63:70]))
205
206 anisou_array=(numpy.array(anisou, 'f')/10000.0).astype('f')
207 structure_builder.set_anisou(anisou_array)
208 elif(record_type=='MODEL '):
209 structure_builder.init_model(current_model_id)
210 current_model_id+=1
211 model_open=1
212 current_chain_id=None
213 current_residue_id=None
214 elif(record_type=='END ' or record_type=='CONECT'):
215
216 self.line_counter=self.line_counter+local_line_counter
217 return coords_trailer[local_line_counter:]
218 elif(record_type=='ENDMDL'):
219 model_open=0
220 current_chain_id=None
221 current_residue_id=None
222 elif(record_type=='SIGUIJ'):
223
224 siguij=map(float, (line[28:35], line[35:42], line[42:49], line[49:56], line[56:63], line[63:70]))
225
226 siguij_array=(numpy.array(siguij, 'f')/10000.0).astype('f')
227 structure_builder.set_siguij(siguij_array)
228 elif(record_type=='SIGATM'):
229
230 sigatm=map(float, (line[30:38], line[38:45], line[46:54], line[54:60], line[60:66]))
231 sigatm_array=numpy.array(sigatm, 'f')
232 structure_builder.set_sigatm(sigatm_array)
233 local_line_counter=local_line_counter+1
234
235 self.line_counter=self.line_counter+local_line_counter
236 return []
237
239 """
240 This method catches an exception that occurs in the StructureBuilder
241 object (if PERMISSIVE==1), or raises it again, this time adding the
242 PDB line number to the error message.
243 """
244 message="%s at line %i." % (message, line_counter)
245 if self.PERMISSIVE:
246
247 warnings.warn("PDBConstructionException: %s\n"
248 "Exception ignored.\n"
249 "Some atoms or residues may be missing in the data structure."
250 % message, PDBConstructionWarning)
251 else:
252
253 raise PDBConstructionException(message)
254
255
256 if __name__=="__main__":
257
258 import sys
259
260 p=PDBParser(PERMISSIVE=1)
261
262 filename = sys.argv[1]
263 s=p.get_structure("scr", filename)
264
265 for m in s:
266 p=m.get_parent()
267 assert(p is s)
268 for c in m:
269 p=c.get_parent()
270 assert(p is m)
271 for r in c:
272 print r
273 p=r.get_parent()
274 assert(p is c)
275 for a in r:
276 p=a.get_parent()
277 if not p is r:
278 print p, r
279