Package Bio :: Package KEGG :: Package Enzyme
[hide private]
[frames] | no frames]

Source Code for Package Bio.KEGG.Enzyme

  1  # Copyright 2001 by Tarjei Mikkelsen.  All rights reserved. 
  2  # Copyright 2007 by Michiel de Hoon.  All rights reserved. 
  3  # This code is part of the Biopython distribution and governed by its 
  4  # license.  Please see the LICENSE file that should have been included 
  5  # as part of this package. 
  6   
  7  """ 
  8  This module provides code to work with the KEGG Enzyme database. 
  9   
 10  Functions: 
 11  parse - Returns an iterator giving Record objects. 
 12   
 13  Classes: 
 14  Record               -- Holds the information from a KEGG Enzyme record. 
 15  """ 
 16   
 17  from Bio.KEGG import _write_kegg 
 18  from Bio.KEGG import _wrap_kegg 
 19   
 20   
 21  # Set up line wrapping rules (see Bio.KEGG._wrap_kegg) 
 22  rxn_wrap = [0, "", 
 23              (" + ","",1,1), 
 24              (" = ","",1,1), 
 25              (" ","$",1,1), 
 26              ("-","$",1,1)] 
 27  name_wrap = [0, "", 
 28               (" ","$",1,1), 
 29               ("-","$",1,1)] 
 30  id_wrap = lambda indent : [indent, "", 
 31                             (" ","",1,0)] 
 32  struct_wrap = lambda indent : [indent, "", 
 33                                 ("  ","",1,1)] 
 34   
35 -class Record:
36 """Holds info from a KEGG Enzyme record. 37 38 Members: 39 entry The EC number (withou the 'EC '). 40 name A list of the enzyme names. 41 classname A list of the classification terms. 42 sysname The systematic name of the enzyme. 43 reaction A list of the reaction description strings. 44 substrate A list of the substrates. 45 product A list of the products. 46 inhibitor A list of the inhibitors. 47 cofactor A list of the cofactors. 48 effector A list of the effectors. 49 comment A list of the comment strings. 50 pathway A list of 3-tuples: (database, id, pathway) 51 genes A list of 2-tuples: (organism, list of gene ids) 52 disease A list of 3-tuples: (database, id, disease) 53 structures A list of 2-tuples: (database, list of struct ids) 54 dblinks A list of 2-tuples: (database, list of db ids) 55 """
56 - def __init__(self):
57 """__init___(self) 58 59 Create a new Record. 60 """ 61 self.entry = "" 62 self.name = [] 63 self.classname = [] 64 self.sysname = [] 65 self.reaction = [] 66 self.substrate = [] 67 self.product = [] 68 self.inhibitor = [] 69 self.cofactor = [] 70 self.effector = [] 71 self.comment = [] 72 self.pathway = [] 73 self.genes = [] 74 self.disease = [] 75 self.structures = [] 76 self.dblinks = []
77 - def __str__(self):
78 """__str__(self) 79 80 Returns a string representation of this Record. 81 """ 82 return self._entry() + \ 83 self._name() + \ 84 self._classname() + \ 85 self._sysname() + \ 86 self._reaction() + \ 87 self._substrate() + \ 88 self._product() + \ 89 self._inhibitor() + \ 90 self._cofactor() + \ 91 self._effector() + \ 92 self._comment() + \ 93 self._pathway() + \ 94 self._genes() + \ 95 self._disease() + \ 96 self._structures() + \ 97 self._dblinks() + \ 98 "///"
99 - def _entry(self):
100 return _write_kegg("ENTRY", 101 ["EC " + self.entry])
102 - def _name(self):
103 return _write_kegg("NAME", 104 map(lambda l: 105 _wrap_kegg(l, wrap_rule = name_wrap), 106 self.name))
107 - def _classname(self):
108 return _write_kegg("CLASS", 109 self.classname)
110 - def _sysname(self):
111 return _write_kegg("SYSNAME", 112 [_wrap_kegg(l, wrap_rule = name_wrap) \ 113 for l in self.sysname])
114 - def _reaction(self):
115 return _write_kegg("REACTION", 116 [_wrap_kegg(l, wrap_rule = rxn_wrap) \ 117 for l in self.reaction])
118 - def _substrate(self):
119 return _write_kegg("SUBSTRATE", 120 [_wrap_kegg(l, wrap_rule = name_wrap) \ 121 for l in self.substrate])
122 - def _product(self):
123 return _write_kegg("PRODUCT", 124 [_wrap_kegg(l, wrap_rule = name_wrap) \ 125 for l in self.product])
126 - def _inhibitor(self):
127 return _write_kegg("INHIBITOR", 128 [_wrap_kegg(l, wrap_rule = name_wrap) \ 129 for l in self.inhibitor])
130 - def _cofactor(self):
131 return _write_kegg("COFACTOR", 132 [_wrap_kegg(l, wrap_rule = name_wrap) \ 133 for l in self.cofactor])
134 - def _effector(self):
135 return _write_kegg("EFFECTOR", 136 [_wrap_kegg(l, wrap_rule = name_wrap) \ 137 for l in self.effector])
138 - def _comment(self):
139 return _write_kegg("COMMENT", 140 [_wrap_kegg(l, wrap_rule = id_wrap(0)) \ 141 for l in self.comment])
142 - def _pathway(self):
143 s = [] 144 for entry in self.pathway: 145 s.append(entry[0] + ": " + entry[1] + " " + entry[2]) 146 return _write_kegg("PATHWAY", 147 [_wrap_kegg(l, wrap_rule = id_wrap(16)) \ 148 for l in s])
149 - def _genes(self):
150 s = [] 151 for entry in self.genes: 152 s.append(entry[0] + ": " + " ".join(entry[1])) 153 return _write_kegg("GENES", 154 [_wrap_kegg(l, wrap_rule = id_wrap(5)) \ 155 for l in s])
156 - def _disease(self):
157 s = [] 158 for entry in self.disease: 159 s.append(entry[0] + ": " + entry[1] + " " + entry[2]) 160 return _write_kegg("DISEASE", 161 [_wrap_kegg(l, wrap_rule = id_wrap(13)) \ 162 for l in s])
163 - def _structures(self):
164 s = [] 165 for entry in self.structures: 166 s.append(entry[0] + ": " + " ".join(entry[1]) + " ") 167 return _write_kegg("STRUCTURES", 168 [_wrap_kegg(l, wrap_rule = struct_wrap(5)) \ 169 for l in s])
179 180 181
182 -def parse(handle):
183 """Parse a KEGG Enzyme file, returning Record objects. 184 185 This is an iterator function, typically used in a for loop. For 186 example, using one of the example KEGG files in the Biopython 187 test suite, 188 189 >>> handle = open("KEGG/enzyme.sample") 190 >>> for record in parse(handle): 191 ... print record.entry, record.name[0] 192 ... 193 1.1.1.1 Alcohol dehydrogenase 194 1.1.1.62 Estradiol 17beta-dehydrogenase 195 1.1.1.68 Transferred to EC 1.7.99.5 196 1.6.5.3 NADH dehydrogenase (ubiquinone) 197 1.14.13.28 3,9-Dihydroxypterocarpan 6a-monooxygenase 198 2.4.1.68 Glycoprotein 6-alpha-L-fucosyltransferase 199 3.1.1.6 Acetylesterase 200 2.7.2.1 Acetate kinase 201 """ 202 record = Record() 203 for line in handle: 204 if line[:3]=="///": 205 yield record 206 record = Record() 207 continue 208 if line[:12]!=" ": 209 keyword = line[:12] 210 data = line[12:].strip() 211 if keyword=="ENTRY ": 212 words = data.split() 213 record.entry = words[1] 214 elif keyword=="CLASS ": 215 record.classname.append(data) 216 elif keyword=="COFACTOR ": 217 record.cofactor.append(data) 218 elif keyword=="COMMENT ": 219 record.comment.append(data) 220 elif keyword=="DBLINKS ": 221 if ":" in data: 222 key, values = data.split(":") 223 values = values.split() 224 row = (key, values) 225 record.dblinks.append(row) 226 else: 227 row = record.dblinks[-1] 228 key, values = row 229 values.extend(data.split()) 230 row = key, values 231 record.dblinks[-1] = row 232 elif keyword=="DISEASE ": 233 if ":" in data: 234 database, data = data.split(":") 235 number, name = data.split(None, 1) 236 row = (database, number, name) 237 record.disease.append(row) 238 else: 239 row = record.disease[-1] 240 database, number, name = row 241 name = name + " " + data 242 row = database, number, name 243 record.disease[-1] = row 244 elif keyword=="EFFECTOR ": 245 record.effector.append(data.strip(";")) 246 elif keyword=="GENES ": 247 if data[3:5]==': ': 248 key, values = data.split(":",1) 249 values = [value.split("(")[0] for value in values.split()] 250 row = (key, values) 251 record.genes.append(row) 252 else: 253 row = record.genes[-1] 254 key, values = row 255 for value in data.split(): 256 value = value.split("(")[0] 257 values.append(value) 258 row = key, values 259 record.genes[-1] = row 260 elif keyword=="INHIBITOR ": 261 record.inhibitor.append(data.strip(";")) 262 elif keyword=="NAME ": 263 record.name.append(data.strip(";")) 264 elif keyword=="PATHWAY ": 265 if data[:5]=='PATH:': 266 path, map, name = data.split(None,2) 267 pathway = (path[:-1], map, name) 268 record.pathway.append(pathway) 269 else: 270 pathway = record.pathway[-1] 271 path, map, name = pathway 272 name = name + " " + data 273 pathway = path, map, name 274 record.pathway[-1] = pathway 275 elif keyword=="PRODUCT ": 276 record.product.append(data.strip(";")) 277 elif keyword=="REACTION ": 278 record.reaction.append(data.strip(";")) 279 elif keyword=="STRUCTURES ": 280 if data[:4]=='PDB:': 281 database = data[:3] 282 accessions = data[4:].split() 283 row = (database, accessions) 284 record.structures.append(row) 285 else: 286 row = record.structures[-1] 287 database, accessions = row 288 accessions.extend(data.split()) 289 row = (database, accessions) 290 record.structures[-1] = row 291 elif keyword=="SUBSTRATE ": 292 record.substrate.append(data.strip(";")) 293 elif keyword=="SYSNAME ": 294 record.sysname.append(data.strip(";"))
295
296 -def _test():
297 """Run the Bio.KEGG.Enzyme module's doctests. 298 299 This will try and locate the unit tests directory, and run the doctests 300 from there in order that the relative paths used in the examples work. 301 """ 302 import doctest 303 import os 304 if os.path.isdir(os.path.join("..","..","..","Tests")): 305 print "Runing doctests..." 306 cur_dir = os.path.abspath(os.curdir) 307 os.chdir(os.path.join("..","..","..","Tests")) 308 doctest.testmod() 309 os.chdir(cur_dir) 310 del cur_dir 311 print "Done"
312 313 if __name__ == "__main__": 314 _test() 315