Package Bio :: Package Phylo :: Module PhyloXMLIO
[hide private]
[frames] | no frames]

Source Code for Module Bio.Phylo.PhyloXMLIO

  1  # Copyright (C) 2009 by Eric Talevich (eric.talevich@gmail.com) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license. Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """PhyloXML reader/parser, writer, and associated functions. 
  7   
  8  Instantiates tree elements from a parsed PhyloXML file, and constructs an XML 
  9  file from a Bio.Phylo.PhyloXML object. 
 10   
 11  About capitalization: 
 12   
 13      - phyloXML means the file format specification 
 14      - PhyloXML means the Biopython module Bio.Phylo.PhyloXML and its classes 
 15      - Phyloxml means the top-level class used by PhyloXMLIO.read (but not 
 16        Bio.Phylo.read!), containing a list of Phylogenies (Tree-derived objects) 
 17  """ 
 18  __docformat__ = "epytext en" 
 19   
 20  import warnings 
 21   
 22  from Bio.Phylo import PhyloXML as PX 
 23   
 24  try: 
 25      from xml.etree import cElementTree as ElementTree 
 26  except ImportError: 
 27      try: 
 28          from xml.etree import ElementTree as ElementTree 
 29      except ImportError: 
 30          # Python 2.4 -- check for 3rd-party implementations 
 31          try: 
 32              from lxml import etree as ElementTree 
 33          except ImportError: 
 34              try: 
 35                  import cElementTree as ElementTree 
 36              except ImportError: 
 37                  try: 
 38                      from elementtree import ElementTree 
 39                  except ImportError: 
 40                      from Bio import MissingExternalDependencyError 
 41                      raise MissingExternalDependencyError( 
 42                              "No ElementTree module was found. " 
 43                              "Use Python 2.5+, lxml or elementtree if you " 
 44                              "want to use Bio.PhyloXML.") 
 45   
 46  # Keep the standard namespace prefixes when writing 
 47  # See http://effbot.org/zone/element-namespaces.htm 
 48  NAMESPACES = { 
 49          'phy':  'http://www.phyloxml.org', 
 50          'xs':   'http://www.w3.org/2001/XMLSchema', 
 51          } 
 52   
 53  try: 
 54      register_namespace = ElementTree.register_namespace 
 55  except AttributeError: 
 56      if not hasattr(ElementTree, '_namespace_map'): 
 57          # cElementTree needs the pure-Python xml.etree.ElementTree 
 58          # Py2.4 support: the exception handler can go away when Py2.4 does 
 59          try: 
 60              from xml.etree import ElementTree as ET_py 
 61              ElementTree._namespace_map = ET_py._namespace_map 
 62          except ImportError: 
 63              warnings.warn("Couldn't import xml.etree.ElementTree; " 
 64                      "phyloXML namespaces may have unexpected abbreviations " 
 65                      "in the output.", 
 66                      # NB: ImportWarning was introduced in Py2.5 
 67                      Warning, stacklevel=2) 
 68              ElementTree._namespace_map = {} 
 69   
70 - def register_namespace(prefix, uri):
71 ElementTree._namespace_map[uri] = prefix
72 73 for prefix, uri in NAMESPACES.iteritems(): 74 register_namespace(prefix, uri) 75 76
77 -class PhyloXMLError(Exception):
78 """Exception raised when PhyloXML object construction cannot continue. 79 80 XML syntax errors will be found and raised by the underlying ElementTree 81 module; this exception is for valid XML that breaks the phyloXML 82 specification. 83 """ 84 pass
85 86 87 # --------------------------------------------------------- 88 # Public API 89
90 -def read(file):
91 """Parse a phyloXML file or stream and build a tree of Biopython objects. 92 93 The children of the root node are phylogenies and possibly other arbitrary 94 (non-phyloXML) objects. 95 96 @return: a single Bio.Phylo.PhyloXML.Phyloxml object. 97 """ 98 return Parser(file).read()
99
100 -def parse(file):
101 """Iterate over the phylogenetic trees in a phyloXML file. 102 103 This ignores any additional data stored at the top level, but may be more 104 memory-efficient than the read() function. 105 106 @return: a generator of Bio.Phylo.PhyloXML.Phylogeny objects. 107 """ 108 return Parser(file).parse()
109
110 -def write(obj, file, encoding=None, indent=False):
111 """Write a phyloXML file. 112 113 The first argument is an instance of Phyloxml, Phylogeny or BaseTree.Tree, 114 or an iterable of either of the latter two. The object will be converted to 115 a Phyloxml object before serialization. 116 117 The file argument can be either an open handle or a file name. 118 """ 119 def fix_single(tree): 120 if isinstance(tree, PX.Phylogeny): 121 return tree 122 if isinstance(tree, PX.Clade): 123 return tree.to_phylogeny() 124 if isinstance(tree, PX.BaseTree.Tree): 125 return PX.Phylogeny.from_tree(tree) 126 if isinstance(tree, PX.BaseTree.Clade): 127 return PX.Phylogeny.from_tree(PX.BaseTree.Tree(root=tree)) 128 else: 129 raise ValueError("iterable must contain Tree or Clade types")
130 131 if isinstance(obj, PX.Phyloxml): 132 pass 133 elif (isinstance(obj, PX.BaseTree.Tree) or 134 isinstance(obj, PX.BaseTree.Clade)): 135 obj = fix_single(obj).to_phyloxml() 136 elif hasattr(obj, '__iter__'): 137 obj = PX.Phyloxml({}, phylogenies=(fix_single(t) for t in obj)) 138 else: 139 raise ValueError("First argument must be a Phyloxml, Phylogeny, " 140 "Tree, or iterable of Trees or Phylogenies.") 141 return Writer(obj).write(file, encoding=encoding, indent=indent) 142 143 144 # --------------------------------------------------------- 145 # Functions I wish ElementTree had 146
147 -def _local(tag):
148 """Extract the local tag from a namespaced tag name.""" 149 if tag[0] == '{': 150 return tag[tag.index('}')+1:] 151 return tag
152
153 -def _split_namespace(tag):
154 """Split a tag into namespace and local tag strings.""" 155 try: 156 return tag[1:].split('}', 1) 157 except: 158 return ('', tag)
159
160 -def _ns(tag, namespace=NAMESPACES['phy']):
161 """Format an XML tag with the given namespace.""" 162 return '{%s}%s' % (namespace, tag)
163
164 -def _get_child_as(parent, tag, construct):
165 """Find a child node by tag, and pass it through a constructor. 166 167 Returns None if no matching child is found. 168 """ 169 child = parent.find(_ns(tag)) 170 if child is not None: 171 return construct(child)
172
173 -def _get_child_text(parent, tag, construct=unicode):
174 """Find a child node by tag; pass its text through a constructor. 175 176 Returns None if no matching child is found. 177 """ 178 child = parent.find(_ns(tag)) 179 if child is not None and child.text: 180 return construct(child.text)
181
182 -def _get_children_as(parent, tag, construct):
183 """Find child nodes by tag; pass each through a constructor. 184 185 Returns an empty list if no matching child is found. 186 """ 187 return [construct(child) for child in 188 parent.findall(_ns(tag))]
189
190 -def _get_children_text(parent, tag, construct=unicode):
191 """Find child nodes by tag; pass each node's text through a constructor. 192 193 Returns an empty list if no matching child is found. 194 """ 195 return [construct(child.text) for child in 196 parent.findall(_ns(tag)) 197 if child.text]
198
199 -def _indent(elem, level=0):
200 """Add line breaks and indentation to ElementTree in-place. 201 202 Sources: 203 - U{ http://effbot.org/zone/element-lib.htm#prettyprint } 204 - U{ http://infix.se/2007/02/06/gentlemen-indent-your-xml } 205 """ 206 i = "\n" + level*" " 207 if len(elem): 208 if not elem.text or not elem.text.strip(): 209 elem.text = i + " " 210 for e in elem: 211 _indent(e, level+1) 212 if not e.tail or not e.tail.strip(): 213 e.tail = i + " " 214 if not e.tail or not e.tail.strip(): 215 e.tail = i 216 else: 217 if level and (not elem.tail or not elem.tail.strip()): 218 elem.tail = i
219 220 # --------------------------------------------------------- 221 # INPUT 222 # --------------------------------------------------------- 223
224 -def _str2bool(text):
225 if text == 'true': 226 return True 227 if text == 'false': 228 return False 229 raise ValueError('String could not be converted to boolean: ' + text)
230
231 -def _dict_str2bool(dct, keys):
232 out = dct.copy() 233 for key in keys: 234 if key in out: 235 out[key] = _str2bool(out[key]) 236 return out
237
238 -def _int(text):
239 if text is not None: 240 try: 241 return int(text) 242 except Exception: 243 return None
244
245 -def _float(text):
246 if text is not None: 247 try: 248 return float(text) 249 except Exception: 250 return None
251
252 -def _collapse_wspace(text):
253 """Replace all spans of whitespace with a single space character. 254 255 Also remove leading and trailing whitespace. See "Collapse Whitespace 256 Policy" in the U{ phyloXML spec glossary 257 <http://phyloxml.org/documentation/version_100/phyloxml.xsd.html#Glossary> 258 }. 259 """ 260 if text is not None: 261 return ' '.join(text.split())
262 263 # NB: Not currently used
264 -def _replace_wspace(text):
265 """Replace tab, LF and CR characters with spaces, but don't collapse. 266 267 See "Replace Whitespace Policy" in the U{ phyloXML spec glossary 268 <http://phyloxml.org/documentation/version_100/phyloxml.xsd.html#Glossary> 269 }. 270 """ 271 for char in ('\t', '\n', '\r'): 272 if char in text: 273 text = text.replace(char, ' ') 274 return text
275 276
277 -class Parser(object):
278 """Methods for parsing all phyloXML nodes from an XML stream. 279 280 To minimize memory use, the tree of ElementTree parsing events is cleared 281 after completing each phylogeny, clade, and top-level 'other' element. 282 Elements below the clade level are kept in memory until parsing of the 283 current clade is finished -- this shouldn't be a problem because clade is 284 the only recursive element, and non-clade nodes below this level are of 285 bounded size. 286 """ 287
288 - def __init__(self, file):
289 # Get an iterable context for XML parsing events 290 context = iter(ElementTree.iterparse(file, events=('start', 'end'))) 291 event, root = context.next() 292 self.root = root 293 self.context = context
294
295 - def read(self):
296 """Parse the phyloXML file and create a single Phyloxml object.""" 297 phyloxml = PX.Phyloxml(dict((_local(key), val) 298 for key, val in self.root.items())) 299 other_depth = 0 300 for event, elem in self.context: 301 namespace, localtag = _split_namespace(elem.tag) 302 if event == 'start': 303 if namespace != NAMESPACES['phy']: 304 other_depth += 1 305 continue 306 if localtag == 'phylogeny': 307 phylogeny = self._parse_phylogeny(elem) 308 phyloxml.phylogenies.append(phylogeny) 309 if event == 'end' and namespace != NAMESPACES['phy']: 310 # Deal with items not specified by phyloXML 311 other_depth -= 1 312 if other_depth == 0: 313 # We're directly under the root node -- evaluate 314 otr = self.other(elem, namespace, localtag) 315 phyloxml.other.append(otr) 316 self.root.clear() 317 return phyloxml
318
319 - def parse(self):
320 """Parse the phyloXML file incrementally and return each phylogeny.""" 321 phytag = _ns('phylogeny') 322 for event, elem in self.context: 323 if event == 'start' and elem.tag == phytag: 324 yield self._parse_phylogeny(elem)
325 326 # Special parsing cases -- incremental, using self.context 327
328 - def _parse_phylogeny(self, parent):
329 """Parse a single phylogeny within the phyloXML tree. 330 331 Recursively builds a phylogenetic tree with help from parse_clade, then 332 clears the XML event history for the phylogeny element and returns 333 control to the top-level parsing function. 334 """ 335 phylogeny = PX.Phylogeny(**_dict_str2bool(parent.attrib, 336 ['rooted', 'rerootable'])) 337 list_types = { 338 # XML tag, plural attribute 339 'confidence': 'confidences', 340 'property': 'properties', 341 'clade_relation': 'clade_relations', 342 'sequence_relation': 'sequence_relations', 343 } 344 for event, elem in self.context: 345 namespace, tag = _split_namespace(elem.tag) 346 if event == 'start' and tag == 'clade': 347 assert phylogeny.root is None, \ 348 "Phylogeny object should only have 1 clade" 349 phylogeny.root = self._parse_clade(elem) 350 continue 351 if event == 'end': 352 if tag == 'phylogeny': 353 parent.clear() 354 break 355 # Handle the other non-recursive children 356 if tag in list_types: 357 getattr(phylogeny, list_types[tag]).append( 358 getattr(self, tag)(elem)) 359 # Complex types 360 elif tag in ('date', 'id'): 361 setattr(phylogeny, tag, getattr(self, tag)(elem)) 362 # Simple types 363 elif tag in ('name', 'description'): 364 setattr(phylogeny, tag, _collapse_wspace(elem.text)) 365 # Unknown tags 366 elif namespace != NAMESPACES['phy']: 367 phylogeny.other.append(self.other(elem, namespace, tag)) 368 parent.clear() 369 else: 370 # NB: This shouldn't happen in valid files 371 raise PhyloXMLError('Misidentified tag: ' + tag) 372 return phylogeny
373 374 _clade_complex_types = ['color', 'events', 'binary_characters', 'date'] 375 _clade_list_types = { 376 'confidence': 'confidences', 377 'distribution': 'distributions', 378 'reference': 'references', 379 'property': 'properties', 380 } 381 _clade_tracked_tags = set(_clade_complex_types + _clade_list_types.keys() 382 + ['branch_length', 'name', 'node_id', 'width']) 383
384 - def _parse_clade(self, parent):
385 """Parse a Clade node and its children, recursively.""" 386 clade = PX.Clade(**parent.attrib) 387 if clade.branch_length is not None: 388 clade.branch_length = float(clade.branch_length) 389 # NB: Only evaluate nodes at the current level 390 tag_stack = [] 391 for event, elem in self.context: 392 namespace, tag = _split_namespace(elem.tag) 393 if event == 'start': 394 if tag == 'clade': 395 clade.clades.append(self._parse_clade(elem)) 396 continue 397 if tag == 'taxonomy': 398 clade.taxonomies.append(self._parse_taxonomy(elem)) 399 continue 400 if tag == 'sequence': 401 clade.sequences.append(self._parse_sequence(elem)) 402 continue 403 if tag in self._clade_tracked_tags: 404 tag_stack.append(tag) 405 if event == 'end': 406 if tag == 'clade': 407 elem.clear() 408 break 409 if tag != tag_stack[-1]: 410 continue 411 tag_stack.pop() 412 # Handle the other non-recursive children 413 if tag in self._clade_list_types: 414 getattr(clade, self._clade_list_types[tag]).append( 415 getattr(self, tag)(elem)) 416 elif tag in self._clade_complex_types: 417 setattr(clade, tag, getattr(self, tag)(elem)) 418 elif tag == 'branch_length': 419 # NB: possible collision with the attribute 420 if clade.branch_length is not None: 421 raise PhyloXMLError( 422 'Attribute branch_length was already set ' 423 'for this Clade.') 424 clade.branch_length = _float(elem.text) 425 elif tag == 'width': 426 clade.width = _float(elem.text) 427 elif tag == 'name': 428 clade.name = _collapse_wspace(elem.text) 429 elif tag == 'node_id': 430 clade.node_id = PX.Id(elem.text.strip(), 431 elem.attrib.get('provider')) 432 elif namespace != NAMESPACES['phy']: 433 clade.other.append(self.other(elem, namespace, tag)) 434 elem.clear() 435 else: 436 raise PhyloXMLError('Misidentified tag: ' + tag) 437 return clade
438
439 - def _parse_sequence(self, parent):
440 sequence = PX.Sequence(**parent.attrib) 441 for event, elem in self.context: 442 namespace, tag = _split_namespace(elem.tag) 443 if event == 'end': 444 if tag == 'sequence': 445 parent.clear() 446 break 447 if tag in ('accession', 'mol_seq', 'uri', 448 'domain_architecture'): 449 setattr(sequence, tag, getattr(self, tag)(elem)) 450 elif tag == 'annotation': 451 sequence.annotations.append(self.annotation(elem)) 452 elif tag == 'name': 453 sequence.name = _collapse_wspace(elem.text) 454 elif tag in ('symbol', 'location'): 455 setattr(sequence, tag, elem.text) 456 elif namespace != NAMESPACES['phy']: 457 sequence.other.append(self.other(elem, namespace, tag)) 458 parent.clear() 459 return sequence
460
461 - def _parse_taxonomy(self, parent):
462 taxonomy = PX.Taxonomy(**parent.attrib) 463 for event, elem in self.context: 464 namespace, tag = _split_namespace(elem.tag) 465 if event == 'end': 466 if tag == 'taxonomy': 467 parent.clear() 468 break 469 if tag in ('id', 'uri'): 470 setattr(taxonomy, tag, getattr(self, tag)(elem)) 471 elif tag == 'common_name': 472 taxonomy.common_names.append(_collapse_wspace(elem.text)) 473 elif tag == 'synonym': 474 taxonomy.synonyms.append(elem.text) 475 elif tag in ('code', 'scientific_name', 'authority', 'rank'): 476 # ENH: check_str on rank 477 setattr(taxonomy, tag, elem.text) 478 elif namespace != NAMESPACES['phy']: 479 taxonomy.other.append(self.other(elem, namespace, tag)) 480 parent.clear() 481 return taxonomy
482
483 - def other(self, elem, namespace, localtag):
484 return PX.Other(localtag, namespace, elem.attrib, 485 value=elem.text and elem.text.strip() or None, 486 children=[self.other(child, *_split_namespace(child.tag)) 487 for child in elem])
488 489 # Complex types 490
491 - def accession(self, elem):
492 return PX.Accession(elem.text.strip(), elem.get('source'))
493
494 - def annotation(self, elem):
495 return PX.Annotation( 496 desc=_collapse_wspace(_get_child_text(elem, 'desc')), 497 confidence=_get_child_as(elem, 'confidence', self.confidence), 498 properties=_get_children_as(elem, 'property', self.property), 499 uri=_get_child_as(elem, 'uri', self.uri), 500 **elem.attrib)
501
502 - def binary_characters(self, elem):
503 def bc_getter(elem): 504 return _get_children_text(elem, 'bc')
505 return PX.BinaryCharacters( 506 type=elem.get('type'), 507 gained_count=_int(elem.get('gained_count')), 508 lost_count=_int(elem.get('lost_count')), 509 present_count=_int(elem.get('present_count')), 510 absent_count=_int(elem.get('absent_count')), 511 # Flatten BinaryCharacterList sub-nodes into lists of strings 512 gained=_get_child_as(elem, 'gained', bc_getter), 513 lost=_get_child_as(elem, 'lost', bc_getter), 514 present=_get_child_as(elem, 'present', bc_getter), 515 absent=_get_child_as(elem, 'absent', bc_getter))
516
517 - def clade_relation(self, elem):
518 return PX.CladeRelation( 519 elem.get('type'), elem.get('id_ref_0'), elem.get('id_ref_1'), 520 distance=elem.get('distance'), 521 confidence=_get_child_as(elem, 'confidence', self.confidence))
522
523 - def color(self, elem):
524 red, green, blue = (_get_child_text(elem, color, int) for color in 525 ('red', 'green', 'blue')) 526 return PX.BranchColor(red, green, blue)
527
528 - def confidence(self, elem):
529 return PX.Confidence( 530 _float(elem.text), 531 elem.get('type'))
532
533 - def date(self, elem):
534 return PX.Date( 535 unit=elem.get('unit'), 536 desc=_collapse_wspace(_get_child_text(elem, 'desc')), 537 value=_get_child_text(elem, 'value', float), 538 minimum=_get_child_text(elem, 'minimum', float), 539 maximum=_get_child_text(elem, 'maximum', float), 540 )
541
542 - def distribution(self, elem):
543 return PX.Distribution( 544 desc=_collapse_wspace(_get_child_text(elem, 'desc')), 545 points=_get_children_as(elem, 'point', self.point), 546 polygons=_get_children_as(elem, 'polygon', self.polygon))
547
548 - def domain(self, elem):
549 return PX.ProteinDomain(elem.text.strip(), 550 int(elem.get('from')) - 1, 551 int(elem.get('to')), 552 confidence=_float(elem.get('confidence')), 553 id=elem.get('id'))
554
555 - def domain_architecture(self, elem):
556 return PX.DomainArchitecture( 557 length=int(elem.get('length')), 558 domains=_get_children_as(elem, 'domain', self.domain))
559
560 - def events(self, elem):
561 return PX.Events( 562 type=_get_child_text(elem, 'type'), 563 duplications=_get_child_text(elem, 'duplications', int), 564 speciations=_get_child_text(elem, 'speciations', int), 565 losses=_get_child_text(elem, 'losses', int), 566 confidence=_get_child_as(elem, 'confidence', self.confidence))
567
568 - def id(self, elem):
569 provider = elem.get('provider') or elem.get('type') 570 return PX.Id(elem.text.strip(), provider)
571
572 - def mol_seq(self, elem):
573 is_aligned = elem.get('is_aligned') 574 if is_aligned is not None: 575 is_aligned = _str2bool(is_aligned) 576 return PX.MolSeq(elem.text.strip(), is_aligned=is_aligned)
577
578 - def point(self, elem):
579 return PX.Point( 580 elem.get('geodetic_datum'), 581 _get_child_text(elem, 'lat', float), 582 _get_child_text(elem, 'long', float), 583 alt=_get_child_text(elem, 'alt', float), 584 alt_unit=elem.get('alt_unit'))
585
586 - def polygon(self, elem):
587 return PX.Polygon( 588 points=_get_children_as(elem, 'point', self.point))
589
590 - def property(self, elem):
591 return PX.Property(elem.text.strip(), 592 elem.get('ref'), elem.get('applies_to'), elem.get('datatype'), 593 unit=elem.get('unit'), 594 id_ref=elem.get('id_ref'))
595
596 - def reference(self, elem):
597 return PX.Reference( 598 doi=elem.get('doi'), 599 desc=_get_child_text(elem, 'desc'))
600
601 - def sequence_relation(self, elem):
602 return PX.SequenceRelation( 603 elem.get('type'), elem.get('id_ref_0'), elem.get('id_ref_1'), 604 distance=_float(elem.get('distance')), 605 confidence=_get_child_as(elem, 'confidence', self.confidence))
606
607 - def uri(self, elem):
608 return PX.Uri(elem.text.strip(), 609 desc=_collapse_wspace(elem.get('desc')), 610 type=elem.get('type'))
611 612 613 614 # --------------------------------------------------------- 615 # OUTPUT 616 # --------------------------------------------------------- 617
618 -def _serialize(value):
619 """Convert a Python primitive to a phyloXML-compatible Unicode string.""" 620 if isinstance(value, float): 621 return unicode(value).upper() 622 elif isinstance(value, bool): 623 return unicode(value).lower() 624 return unicode(value)
625 626
627 -def _clean_attrib(obj, attrs):
628 """Create a dictionary from an object's specified, non-None attributes.""" 629 out = {} 630 for key in attrs: 631 val = getattr(obj, key) 632 if val is not None: 633 out[key] = _serialize(val) 634 return out
635 636
637 -def _handle_complex(tag, attribs, subnodes, has_text=False):
638 def wrapped(self, obj): 639 elem = ElementTree.Element(tag, _clean_attrib(obj, attribs)) 640 for subn in subnodes: 641 if isinstance(subn, basestring): 642 # singular object: method and attribute names are the same 643 if getattr(obj, subn) is not None: 644 elem.append(getattr(self, subn)(getattr(obj, subn))) 645 else: 646 # list: singular method, pluralized attribute name 647 method, plural = subn 648 for item in getattr(obj, plural): 649 elem.append(getattr(self, method)(item)) 650 if has_text: 651 elem.text = _serialize(obj.value) 652 return elem
653 wrapped.__doc__ = "Serialize a %s and its subnodes, in order." % tag 654 return wrapped 655 656
657 -def _handle_simple(tag):
658 def wrapped(self, obj): 659 elem = ElementTree.Element(tag) 660 elem.text = _serialize(obj) 661 return elem
662 wrapped.__doc__ = "Serialize a simple %s node." % tag 663 return wrapped 664 665
666 -class Writer(object):
667 """Methods for serializing a PhyloXML object to XML.""" 668
669 - def __init__(self, phyloxml):
670 """Build an ElementTree from a PhyloXML object.""" 671 assert isinstance(phyloxml, PX.Phyloxml), "Not a Phyloxml object" 672 self._tree = ElementTree.ElementTree(self.phyloxml(phyloxml))
673
674 - def write(self, file, encoding=None, indent=False):
675 if indent: 676 _indent(self._tree.getroot()) 677 if encoding is not None: 678 self._tree.write(file, encoding) 679 else: 680 self._tree.write(file) 681 return len(self._tree.getroot())
682 683 # Convert classes to ETree elements 684
685 - def phyloxml(self, obj):
686 elem = ElementTree.Element(_ns('phyloxml'), 687 # NB: This is for XSD validation, which we don't do 688 # {_ns('schemaLocation', NAMESPACES['xsi']): 689 # obj.attributes['schemaLocation'], 690 # } 691 ) 692 for tree in obj.phylogenies: 693 elem.append(self.phylogeny(tree)) 694 for otr in obj.other: 695 elem.append(self.other(otr)) 696 return elem
697
698 - def other(self, obj):
699 elem = ElementTree.Element(_ns(obj.tag, obj.namespace), obj.attributes) 700 elem.text = obj.value 701 for child in obj.children: 702 elem.append(self.other(child)) 703 return elem
704 705 phylogeny = _handle_complex(_ns('phylogeny'), 706 ('rooted', 'rerootable', 'branch_length_unit', 'type'), 707 ( 'name', 708 'id', 709 'description', 710 'date', 711 ('confidence', 'confidences'), 712 'clade', 713 ('clade_relation', 'clade_relations'), 714 ('sequence_relation', 'sequence_relations'), 715 ('property', 'properties'), 716 ('other', 'other'), 717 )) 718 719 clade = _handle_complex(_ns('clade'), ('id_source',), 720 ( 'name', 721 'branch_length', 722 ('confidence', 'confidences'), 723 'width', 724 'color', 725 'node_id', 726 ('taxonomy', 'taxonomies'), 727 ('sequence', 'sequences'), 728 'events', 729 'binary_characters', 730 ('distribution', 'distributions'), 731 'date', 732 ('reference', 'references'), 733 ('property', 'properties'), 734 ('clade', 'clades'), 735 ('other', 'other'), 736 )) 737 738 accession = _handle_complex(_ns('accession'), ('source',), 739 (), has_text=True) 740 741 annotation = _handle_complex(_ns('annotation'), 742 ('ref', 'source', 'evidence', 'type'), 743 ( 'desc', 744 'confidence', 745 ('property', 'properties'), 746 'uri', 747 )) 748
749 - def binary_characters(self, obj):
750 """Serialize a binary_characters node and its subnodes.""" 751 elem = ElementTree.Element(_ns('binary_characters'), 752 _clean_attrib(obj, 753 ('type', 'gained_count', 'lost_count', 754 'present_count', 'absent_count'))) 755 for subn in ('gained', 'lost', 'present', 'absent'): 756 subelem = ElementTree.Element(_ns(subn)) 757 for token in getattr(obj, subn): 758 subelem.append(self.bc(token)) 759 elem.append(subelem) 760 return elem
761 762 clade_relation = _handle_complex(_ns('clade_relation'), 763 ('id_ref_0', 'id_ref_1', 'distance', 'type'), 764 ('confidence',)) 765 766 color = _handle_complex(_ns('color'), (), ('red', 'green', 'blue')) 767 768 confidence = _handle_complex(_ns('confidence'), ('type',), 769 (), has_text=True) 770 771 date = _handle_complex(_ns('date'), ('unit',), 772 ('desc', 'value', 'minimum', 'maximum')) 773 774 distribution = _handle_complex(_ns('distribution'), (), 775 ( 'desc', 776 ('point', 'points'), 777 ('polygon', 'polygons'), 778 )) 779
780 - def domain(self, obj):
781 """Serialize a domain node.""" 782 elem = ElementTree.Element(_ns('domain'), 783 {'from': str(obj.start + 1), 'to': str(obj.end)}) 784 if obj.confidence is not None: 785 elem.set('confidence', _serialize(obj.confidence)) 786 if obj.id is not None: 787 elem.set('id', obj.id) 788 elem.text = _serialize(obj.value) 789 return elem
790 791 domain_architecture = _handle_complex(_ns('domain_architecture'), 792 ('length',), 793 (('domain', 'domains'),)) 794 795 events = _handle_complex(_ns('events'), (), 796 ( 'type', 797 'duplications', 798 'speciations', 799 'losses', 800 'confidence', 801 )) 802 803 id = _handle_complex(_ns('id'), ('provider',), (), has_text=True) 804 805 mol_seq = _handle_complex(_ns('mol_seq'), ('is_aligned',), 806 (), has_text=True) 807 808 node_id = _handle_complex(_ns('node_id'), ('provider',), (), has_text=True) 809 810 point = _handle_complex(_ns('point'), ('geodetic_datum', 'alt_unit'), 811 ('lat', 'long', 'alt')) 812 813 polygon = _handle_complex(_ns('polygon'), (), (('point', 'points'),)) 814 815 property = _handle_complex(_ns('property'), 816 ('ref', 'unit', 'datatype', 'applies_to', 'id_ref'), 817 (), has_text=True) 818 819 reference = _handle_complex(_ns('reference'), ('doi',), ('desc',)) 820 821 sequence = _handle_complex(_ns('sequence'), 822 ('type', 'id_ref', 'id_source'), 823 ( 'symbol', 824 'accession', 825 'name', 826 'location', 827 'mol_seq', 828 'uri', 829 ('annotation', 'annotations'), 830 'domain_architecture', 831 ('other', 'other'), 832 )) 833 834 sequence_relation = _handle_complex(_ns('sequence_relation'), 835 ('id_ref_0', 'id_ref_1', 'distance', 'type'), 836 ('confidence',)) 837 838 taxonomy = _handle_complex(_ns('taxonomy'), 839 ('id_source',), 840 ( 'id', 841 'code', 842 'scientific_name', 843 'authority', 844 ('common_name', 'common_names'), 845 ('synonym', 'synonyms'), 846 'rank', 847 'uri', 848 ('other', 'other'), 849 )) 850 851 uri = _handle_complex(_ns('uri'), ('desc', 'type'), (), has_text=True) 852 853 # Primitive types 854 855 # Floating point 856 alt = _handle_simple(_ns('alt')) 857 branch_length = _handle_simple(_ns('branch_length')) 858 lat = _handle_simple(_ns('lat')) 859 long = _handle_simple(_ns('long')) 860 maximum = _handle_simple(_ns('maximum')) 861 minimum = _handle_simple(_ns('minimum')) 862 value = _handle_simple(_ns('value')) 863 width = _handle_simple(_ns('width')) 864 865 # Integers 866 blue = _handle_simple(_ns('blue')) 867 duplications = _handle_simple(_ns('duplications')) 868 green = _handle_simple(_ns('green')) 869 losses = _handle_simple(_ns('losses')) 870 red = _handle_simple(_ns('red')) 871 speciations = _handle_simple(_ns('speciations')) 872 873 # Strings 874 bc = _handle_simple(_ns('bc')) 875 code = _handle_simple(_ns('code')) 876 common_name = _handle_simple(_ns('common_name')) 877 desc = _handle_simple(_ns('desc')) 878 description = _handle_simple(_ns('description')) 879 location = _handle_simple(_ns('location')) 880 mol_seq = _handle_simple(_ns('mol_seq')) 881 name = _handle_simple(_ns('name')) 882 rank = _handle_simple(_ns('rank')) 883 scientific_name = _handle_simple(_ns('scientific_name')) 884 symbol = _handle_simple(_ns('symbol')) 885 synonym = _handle_simple(_ns('synonym')) 886 type = _handle_simple(_ns('type'))
887