Package Bio :: Package Phylo :: Module PhyloXMLIO
[hide private]
[frames] | no frames]

Source Code for Module Bio.Phylo.PhyloXMLIO

  1  # Copyright (C) 2009 by Eric Talevich (eric.talevich@gmail.com) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license. Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """PhyloXML reader/parser, writer, and associated functions. 
  7   
  8  Instantiates tree elements from a parsed PhyloXML file, and constructs an XML 
  9  file from a Bio.Phylo.PhyloXML object. 
 10   
 11  About capitalization: 
 12   
 13      - phyloXML means the file format specification 
 14      - PhyloXML means the Biopython module Bio.Phylo.PhyloXML and its classes 
 15      - Phyloxml means the top-level class used by PhyloXMLIO.read (but not 
 16        Bio.Phylo.read!), containing a list of Phylogenies (Tree-derived objects) 
 17  """ 
 18  __docformat__ = "epytext en" 
 19   
 20  import sys 
 21  import warnings 
 22   
 23  from Bio.Phylo import PhyloXML as PX 
 24   
 25  if (3, 0, 0) <= sys.version_info[:3] <= (3, 1, 2): 
 26      # Workaround for cElementTree regression in python 3.0--3.1.2 
 27      # See http://bugs.python.org/issue9257 
 28      from xml.etree import ElementTree 
 29  else: 
 30      try: 
 31          from xml.etree import cElementTree as ElementTree 
 32      except ImportError: 
 33          # Alternative Python implementation, perhaps? 
 34          try: 
 35              from xml.etree import ElementTree as ElementTree 
 36          except ImportError: 
 37              # Python 2.4 -- check for 3rd-party implementations 
 38              try: 
 39                  from lxml import etree as ElementTree 
 40              except ImportError: 
 41                  try: 
 42                      import cElementTree as ElementTree 
 43                  except ImportError: 
 44                      try: 
 45                          from elementtree import ElementTree 
 46                      except ImportError: 
 47                          from Bio import MissingPythonDependencyError 
 48                          raise MissingPythonDependencyError( 
 49                                  "No ElementTree module was found. " 
 50                                  "Use Python 2.5+, lxml or elementtree if you " 
 51                                  "want to use Bio.PhyloXML.") 
 52   
 53  # Keep the standard namespace prefixes when writing 
 54  # See http://effbot.org/zone/element-namespaces.htm 
 55  NAMESPACES = { 
 56          'phy':  'http://www.phyloxml.org', 
 57          'xs':   'http://www.w3.org/2001/XMLSchema', 
 58          } 
 59   
 60  try: 
 61      register_namespace = ElementTree.register_namespace 
 62  except AttributeError: 
 63      if not hasattr(ElementTree, '_namespace_map'): 
 64          # cElementTree needs the pure-Python xml.etree.ElementTree 
 65          # Py2.4 support: the exception handler can go away when Py2.4 does 
 66          try: 
 67              from xml.etree import ElementTree as ET_py 
 68              ElementTree._namespace_map = ET_py._namespace_map 
 69          except ImportError: 
 70              warnings.warn("Couldn't import xml.etree.ElementTree; " 
 71                      "phyloXML namespaces may have unexpected abbreviations " 
 72                      "in the output.", 
 73                      # NB: ImportWarning was introduced in Py2.5 
 74                      Warning, stacklevel=2) 
 75              ElementTree._namespace_map = {} 
 76   
77 - def register_namespace(prefix, uri):
78 ElementTree._namespace_map[uri] = prefix
79 80 for prefix, uri in NAMESPACES.iteritems(): 81 register_namespace(prefix, uri) 82 83
84 -class PhyloXMLError(Exception):
85 """Exception raised when PhyloXML object construction cannot continue. 86 87 XML syntax errors will be found and raised by the underlying ElementTree 88 module; this exception is for valid XML that breaks the phyloXML 89 specification. 90 """ 91 pass
92 93 94 # --------------------------------------------------------- 95 # Public API 96
97 -def read(file):
98 """Parse a phyloXML file or stream and build a tree of Biopython objects. 99 100 The children of the root node are phylogenies and possibly other arbitrary 101 (non-phyloXML) objects. 102 103 @return: a single Bio.Phylo.PhyloXML.Phyloxml object. 104 """ 105 return Parser(file).read()
106
107 -def parse(file):
108 """Iterate over the phylogenetic trees in a phyloXML file. 109 110 This ignores any additional data stored at the top level, but may be more 111 memory-efficient than the read() function. 112 113 @return: a generator of Bio.Phylo.PhyloXML.Phylogeny objects. 114 """ 115 return Parser(file).parse()
116
117 -def write(obj, file, encoding='utf-8', indent=True):
118 """Write a phyloXML file. 119 120 The first argument is an instance of Phyloxml, Phylogeny or BaseTree.Tree, 121 or an iterable of either of the latter two. The object will be converted to 122 a Phyloxml object before serialization. 123 124 The file argument can be either an open handle or a file name. 125 """ 126 def fix_single(tree): 127 if isinstance(tree, PX.Phylogeny): 128 return tree 129 if isinstance(tree, PX.Clade): 130 return tree.to_phylogeny() 131 if isinstance(tree, PX.BaseTree.Tree): 132 return PX.Phylogeny.from_tree(tree) 133 if isinstance(tree, PX.BaseTree.Clade): 134 return PX.Phylogeny.from_tree(PX.BaseTree.Tree(root=tree)) 135 else: 136 raise ValueError("iterable must contain Tree or Clade types")
137 138 if isinstance(obj, PX.Phyloxml): 139 pass 140 elif (isinstance(obj, PX.BaseTree.Tree) or 141 isinstance(obj, PX.BaseTree.Clade)): 142 obj = fix_single(obj).to_phyloxml() 143 elif hasattr(obj, '__iter__'): 144 obj = PX.Phyloxml({}, phylogenies=(fix_single(t) for t in obj)) 145 else: 146 raise ValueError("First argument must be a Phyloxml, Phylogeny, " 147 "Tree, or iterable of Trees or Phylogenies.") 148 return Writer(obj).write(file, encoding=encoding, indent=indent) 149 150 151 # --------------------------------------------------------- 152 # Functions I wish ElementTree had 153
154 -def _local(tag):
155 """Extract the local tag from a namespaced tag name.""" 156 if tag[0] == '{': 157 return tag[tag.index('}')+1:] 158 return tag
159
160 -def _split_namespace(tag):
161 """Split a tag into namespace and local tag strings.""" 162 try: 163 return tag[1:].split('}', 1) 164 except: 165 return ('', tag)
166
167 -def _ns(tag, namespace=NAMESPACES['phy']):
168 """Format an XML tag with the given namespace.""" 169 return '{%s}%s' % (namespace, tag)
170
171 -def _get_child_as(parent, tag, construct):
172 """Find a child node by tag, and pass it through a constructor. 173 174 Returns None if no matching child is found. 175 """ 176 child = parent.find(_ns(tag)) 177 if child is not None: 178 return construct(child)
179
180 -def _get_child_text(parent, tag, construct=unicode):
181 """Find a child node by tag; pass its text through a constructor. 182 183 Returns None if no matching child is found. 184 """ 185 child = parent.find(_ns(tag)) 186 if child is not None and child.text: 187 return construct(child.text)
188
189 -def _get_children_as(parent, tag, construct):
190 """Find child nodes by tag; pass each through a constructor. 191 192 Returns an empty list if no matching child is found. 193 """ 194 return [construct(child) for child in 195 parent.findall(_ns(tag))]
196
197 -def _get_children_text(parent, tag, construct=unicode):
198 """Find child nodes by tag; pass each node's text through a constructor. 199 200 Returns an empty list if no matching child is found. 201 """ 202 return [construct(child.text) for child in 203 parent.findall(_ns(tag)) 204 if child.text]
205
206 -def _indent(elem, level=0):
207 """Add line breaks and indentation to ElementTree in-place. 208 209 Sources: 210 - U{ http://effbot.org/zone/element-lib.htm#prettyprint } 211 - U{ http://infix.se/2007/02/06/gentlemen-indent-your-xml } 212 """ 213 i = "\n" + level*" " 214 if len(elem): 215 if not elem.text or not elem.text.strip(): 216 elem.text = i + " " 217 for e in elem: 218 _indent(e, level+1) 219 if not e.tail or not e.tail.strip(): 220 e.tail = i + " " 221 if not e.tail or not e.tail.strip(): 222 e.tail = i 223 else: 224 if level and (not elem.tail or not elem.tail.strip()): 225 elem.tail = i
226 227 # --------------------------------------------------------- 228 # INPUT 229 # --------------------------------------------------------- 230
231 -def _str2bool(text):
232 if text == 'true': 233 return True 234 if text == 'false': 235 return False 236 raise ValueError('String could not be converted to boolean: ' + text)
237
238 -def _dict_str2bool(dct, keys):
239 out = dct.copy() 240 for key in keys: 241 if key in out: 242 out[key] = _str2bool(out[key]) 243 return out
244
245 -def _int(text):
246 if text is not None: 247 try: 248 return int(text) 249 except Exception: 250 return None
251
252 -def _float(text):
253 if text is not None: 254 try: 255 return float(text) 256 except Exception: 257 return None
258
259 -def _collapse_wspace(text):
260 """Replace all spans of whitespace with a single space character. 261 262 Also remove leading and trailing whitespace. See "Collapse Whitespace 263 Policy" in the U{ phyloXML spec glossary 264 <http://phyloxml.org/documentation/version_100/phyloxml.xsd.html#Glossary> 265 }. 266 """ 267 if text is not None: 268 return ' '.join(text.split())
269 270 # NB: Not currently used
271 -def _replace_wspace(text):
272 """Replace tab, LF and CR characters with spaces, but don't collapse. 273 274 See "Replace Whitespace Policy" in the U{ phyloXML spec glossary 275 <http://phyloxml.org/documentation/version_100/phyloxml.xsd.html#Glossary> 276 }. 277 """ 278 for char in ('\t', '\n', '\r'): 279 if char in text: 280 text = text.replace(char, ' ') 281 return text
282 283
284 -class Parser(object):
285 """Methods for parsing all phyloXML nodes from an XML stream. 286 287 To minimize memory use, the tree of ElementTree parsing events is cleared 288 after completing each phylogeny, clade, and top-level 'other' element. 289 Elements below the clade level are kept in memory until parsing of the 290 current clade is finished -- this shouldn't be a problem because clade is 291 the only recursive element, and non-clade nodes below this level are of 292 bounded size. 293 """ 294
295 - def __init__(self, file):
296 # Get an iterable context for XML parsing events 297 context = iter(ElementTree.iterparse(file, events=('start', 'end'))) 298 event, root = context.next() 299 self.root = root 300 self.context = context
301
302 - def read(self):
303 """Parse the phyloXML file and create a single Phyloxml object.""" 304 phyloxml = PX.Phyloxml(dict((_local(key), val) 305 for key, val in self.root.items())) 306 other_depth = 0 307 for event, elem in self.context: 308 namespace, localtag = _split_namespace(elem.tag) 309 if event == 'start': 310 if namespace != NAMESPACES['phy']: 311 other_depth += 1 312 continue 313 if localtag == 'phylogeny': 314 phylogeny = self._parse_phylogeny(elem) 315 phyloxml.phylogenies.append(phylogeny) 316 if event == 'end' and namespace != NAMESPACES['phy']: 317 # Deal with items not specified by phyloXML 318 other_depth -= 1 319 if other_depth == 0: 320 # We're directly under the root node -- evaluate 321 otr = self.other(elem, namespace, localtag) 322 phyloxml.other.append(otr) 323 self.root.clear() 324 return phyloxml
325
326 - def parse(self):
327 """Parse the phyloXML file incrementally and return each phylogeny.""" 328 phytag = _ns('phylogeny') 329 for event, elem in self.context: 330 if event == 'start' and elem.tag == phytag: 331 yield self._parse_phylogeny(elem)
332 333 # Special parsing cases -- incremental, using self.context 334
335 - def _parse_phylogeny(self, parent):
336 """Parse a single phylogeny within the phyloXML tree. 337 338 Recursively builds a phylogenetic tree with help from parse_clade, then 339 clears the XML event history for the phylogeny element and returns 340 control to the top-level parsing function. 341 """ 342 phylogeny = PX.Phylogeny(**_dict_str2bool(parent.attrib, 343 ['rooted', 'rerootable'])) 344 list_types = { 345 # XML tag, plural attribute 346 'confidence': 'confidences', 347 'property': 'properties', 348 'clade_relation': 'clade_relations', 349 'sequence_relation': 'sequence_relations', 350 } 351 for event, elem in self.context: 352 namespace, tag = _split_namespace(elem.tag) 353 if event == 'start' and tag == 'clade': 354 assert phylogeny.root is None, \ 355 "Phylogeny object should only have 1 clade" 356 phylogeny.root = self._parse_clade(elem) 357 continue 358 if event == 'end': 359 if tag == 'phylogeny': 360 parent.clear() 361 break 362 # Handle the other non-recursive children 363 if tag in list_types: 364 getattr(phylogeny, list_types[tag]).append( 365 getattr(self, tag)(elem)) 366 # Complex types 367 elif tag in ('date', 'id'): 368 setattr(phylogeny, tag, getattr(self, tag)(elem)) 369 # Simple types 370 elif tag in ('name', 'description'): 371 setattr(phylogeny, tag, _collapse_wspace(elem.text)) 372 # Unknown tags 373 elif namespace != NAMESPACES['phy']: 374 phylogeny.other.append(self.other(elem, namespace, tag)) 375 parent.clear() 376 else: 377 # NB: This shouldn't happen in valid files 378 raise PhyloXMLError('Misidentified tag: ' + tag) 379 return phylogeny
380 381 _clade_complex_types = ['color', 'events', 'binary_characters', 'date'] 382 _clade_list_types = { 383 'confidence': 'confidences', 384 'distribution': 'distributions', 385 'reference': 'references', 386 'property': 'properties', 387 } 388 _clade_tracked_tags = set(_clade_complex_types + _clade_list_types.keys() 389 + ['branch_length', 'name', 'node_id', 'width']) 390
391 - def _parse_clade(self, parent):
392 """Parse a Clade node and its children, recursively.""" 393 clade = PX.Clade(**parent.attrib) 394 if clade.branch_length is not None: 395 clade.branch_length = float(clade.branch_length) 396 # NB: Only evaluate nodes at the current level 397 tag_stack = [] 398 for event, elem in self.context: 399 namespace, tag = _split_namespace(elem.tag) 400 if event == 'start': 401 if tag == 'clade': 402 clade.clades.append(self._parse_clade(elem)) 403 continue 404 if tag == 'taxonomy': 405 clade.taxonomies.append(self._parse_taxonomy(elem)) 406 continue 407 if tag == 'sequence': 408 clade.sequences.append(self._parse_sequence(elem)) 409 continue 410 if tag in self._clade_tracked_tags: 411 tag_stack.append(tag) 412 if event == 'end': 413 if tag == 'clade': 414 elem.clear() 415 break 416 if tag != tag_stack[-1]: 417 continue 418 tag_stack.pop() 419 # Handle the other non-recursive children 420 if tag in self._clade_list_types: 421 getattr(clade, self._clade_list_types[tag]).append( 422 getattr(self, tag)(elem)) 423 elif tag in self._clade_complex_types: 424 setattr(clade, tag, getattr(self, tag)(elem)) 425 elif tag == 'branch_length': 426 # NB: possible collision with the attribute 427 if clade.branch_length is not None: 428 raise PhyloXMLError( 429 'Attribute branch_length was already set ' 430 'for this Clade.') 431 clade.branch_length = _float(elem.text) 432 elif tag == 'width': 433 clade.width = _float(elem.text) 434 elif tag == 'name': 435 clade.name = _collapse_wspace(elem.text) 436 elif tag == 'node_id': 437 clade.node_id = PX.Id(elem.text.strip(), 438 elem.attrib.get('provider')) 439 elif namespace != NAMESPACES['phy']: 440 clade.other.append(self.other(elem, namespace, tag)) 441 elem.clear() 442 else: 443 raise PhyloXMLError('Misidentified tag: ' + tag) 444 return clade
445
446 - def _parse_sequence(self, parent):
447 sequence = PX.Sequence(**parent.attrib) 448 for event, elem in self.context: 449 namespace, tag = _split_namespace(elem.tag) 450 if event == 'end': 451 if tag == 'sequence': 452 parent.clear() 453 break 454 if tag in ('accession', 'mol_seq', 'uri', 455 'domain_architecture'): 456 setattr(sequence, tag, getattr(self, tag)(elem)) 457 elif tag == 'annotation': 458 sequence.annotations.append(self.annotation(elem)) 459 elif tag == 'name': 460 sequence.name = _collapse_wspace(elem.text) 461 elif tag in ('symbol', 'location'): 462 setattr(sequence, tag, elem.text) 463 elif namespace != NAMESPACES['phy']: 464 sequence.other.append(self.other(elem, namespace, tag)) 465 parent.clear() 466 return sequence
467
468 - def _parse_taxonomy(self, parent):
469 taxonomy = PX.Taxonomy(**parent.attrib) 470 for event, elem in self.context: 471 namespace, tag = _split_namespace(elem.tag) 472 if event == 'end': 473 if tag == 'taxonomy': 474 parent.clear() 475 break 476 if tag in ('id', 'uri'): 477 setattr(taxonomy, tag, getattr(self, tag)(elem)) 478 elif tag == 'common_name': 479 taxonomy.common_names.append(_collapse_wspace(elem.text)) 480 elif tag == 'synonym': 481 taxonomy.synonyms.append(elem.text) 482 elif tag in ('code', 'scientific_name', 'authority', 'rank'): 483 # ENH: check_str on rank 484 setattr(taxonomy, tag, elem.text) 485 elif namespace != NAMESPACES['phy']: 486 taxonomy.other.append(self.other(elem, namespace, tag)) 487 parent.clear() 488 return taxonomy
489
490 - def other(self, elem, namespace, localtag):
491 return PX.Other(localtag, namespace, elem.attrib, 492 value=elem.text and elem.text.strip() or None, 493 children=[self.other(child, *_split_namespace(child.tag)) 494 for child in elem])
495 496 # Complex types 497
498 - def accession(self, elem):
499 return PX.Accession(elem.text.strip(), elem.get('source'))
500
501 - def annotation(self, elem):
502 return PX.Annotation( 503 desc=_collapse_wspace(_get_child_text(elem, 'desc')), 504 confidence=_get_child_as(elem, 'confidence', self.confidence), 505 properties=_get_children_as(elem, 'property', self.property), 506 uri=_get_child_as(elem, 'uri', self.uri), 507 **elem.attrib)
508
509 - def binary_characters(self, elem):
510 def bc_getter(elem): 511 return _get_children_text(elem, 'bc')
512 return PX.BinaryCharacters( 513 type=elem.get('type'), 514 gained_count=_int(elem.get('gained_count')), 515 lost_count=_int(elem.get('lost_count')), 516 present_count=_int(elem.get('present_count')), 517 absent_count=_int(elem.get('absent_count')), 518 # Flatten BinaryCharacterList sub-nodes into lists of strings 519 gained=_get_child_as(elem, 'gained', bc_getter), 520 lost=_get_child_as(elem, 'lost', bc_getter), 521 present=_get_child_as(elem, 'present', bc_getter), 522 absent=_get_child_as(elem, 'absent', bc_getter))
523
524 - def clade_relation(self, elem):
525 return PX.CladeRelation( 526 elem.get('type'), elem.get('id_ref_0'), elem.get('id_ref_1'), 527 distance=elem.get('distance'), 528 confidence=_get_child_as(elem, 'confidence', self.confidence))
529
530 - def color(self, elem):
531 red, green, blue = (_get_child_text(elem, color, int) for color in 532 ('red', 'green', 'blue')) 533 return PX.BranchColor(red, green, blue)
534
535 - def confidence(self, elem):
536 return PX.Confidence( 537 _float(elem.text), 538 elem.get('type'))
539
540 - def date(self, elem):
541 return PX.Date( 542 unit=elem.get('unit'), 543 desc=_collapse_wspace(_get_child_text(elem, 'desc')), 544 value=_get_child_text(elem, 'value', float), 545 minimum=_get_child_text(elem, 'minimum', float), 546 maximum=_get_child_text(elem, 'maximum', float), 547 )
548
549 - def distribution(self, elem):
550 return PX.Distribution( 551 desc=_collapse_wspace(_get_child_text(elem, 'desc')), 552 points=_get_children_as(elem, 'point', self.point), 553 polygons=_get_children_as(elem, 'polygon', self.polygon))
554
555 - def domain(self, elem):
556 return PX.ProteinDomain(elem.text.strip(), 557 int(elem.get('from')) - 1, 558 int(elem.get('to')), 559 confidence=_float(elem.get('confidence')), 560 id=elem.get('id'))
561
562 - def domain_architecture(self, elem):
563 return PX.DomainArchitecture( 564 length=int(elem.get('length')), 565 domains=_get_children_as(elem, 'domain', self.domain))
566
567 - def events(self, elem):
568 return PX.Events( 569 type=_get_child_text(elem, 'type'), 570 duplications=_get_child_text(elem, 'duplications', int), 571 speciations=_get_child_text(elem, 'speciations', int), 572 losses=_get_child_text(elem, 'losses', int), 573 confidence=_get_child_as(elem, 'confidence', self.confidence))
574
575 - def id(self, elem):
576 provider = elem.get('provider') or elem.get('type') 577 return PX.Id(elem.text.strip(), provider)
578
579 - def mol_seq(self, elem):
580 is_aligned = elem.get('is_aligned') 581 if is_aligned is not None: 582 is_aligned = _str2bool(is_aligned) 583 return PX.MolSeq(elem.text.strip(), is_aligned=is_aligned)
584
585 - def point(self, elem):
586 return PX.Point( 587 elem.get('geodetic_datum'), 588 _get_child_text(elem, 'lat', float), 589 _get_child_text(elem, 'long', float), 590 alt=_get_child_text(elem, 'alt', float), 591 alt_unit=elem.get('alt_unit'))
592
593 - def polygon(self, elem):
594 return PX.Polygon( 595 points=_get_children_as(elem, 'point', self.point))
596
597 - def property(self, elem):
598 return PX.Property(elem.text.strip(), 599 elem.get('ref'), elem.get('applies_to'), elem.get('datatype'), 600 unit=elem.get('unit'), 601 id_ref=elem.get('id_ref'))
602
603 - def reference(self, elem):
604 return PX.Reference( 605 doi=elem.get('doi'), 606 desc=_get_child_text(elem, 'desc'))
607
608 - def sequence_relation(self, elem):
609 return PX.SequenceRelation( 610 elem.get('type'), elem.get('id_ref_0'), elem.get('id_ref_1'), 611 distance=_float(elem.get('distance')), 612 confidence=_get_child_as(elem, 'confidence', self.confidence))
613
614 - def uri(self, elem):
615 return PX.Uri(elem.text.strip(), 616 desc=_collapse_wspace(elem.get('desc')), 617 type=elem.get('type'))
618 619 620 621 # --------------------------------------------------------- 622 # OUTPUT 623 # --------------------------------------------------------- 624
625 -def _serialize(value):
626 """Convert a Python primitive to a phyloXML-compatible Unicode string.""" 627 if isinstance(value, float): 628 return unicode(value).upper() 629 elif isinstance(value, bool): 630 return unicode(value).lower() 631 return unicode(value)
632 633
634 -def _clean_attrib(obj, attrs):
635 """Create a dictionary from an object's specified, non-None attributes.""" 636 out = {} 637 for key in attrs: 638 val = getattr(obj, key) 639 if val is not None: 640 out[key] = _serialize(val) 641 return out
642 643
644 -def _handle_complex(tag, attribs, subnodes, has_text=False):
645 def wrapped(self, obj): 646 elem = ElementTree.Element(tag, _clean_attrib(obj, attribs)) 647 for subn in subnodes: 648 if isinstance(subn, basestring): 649 # singular object: method and attribute names are the same 650 if getattr(obj, subn) is not None: 651 elem.append(getattr(self, subn)(getattr(obj, subn))) 652 else: 653 # list: singular method, pluralized attribute name 654 method, plural = subn 655 for item in getattr(obj, plural): 656 elem.append(getattr(self, method)(item)) 657 if has_text: 658 elem.text = _serialize(obj.value) 659 return elem
660 wrapped.__doc__ = "Serialize a %s and its subnodes, in order." % tag 661 return wrapped 662 663
664 -def _handle_simple(tag):
665 def wrapped(self, obj): 666 elem = ElementTree.Element(tag) 667 elem.text = _serialize(obj) 668 return elem
669 wrapped.__doc__ = "Serialize a simple %s node." % tag 670 return wrapped 671 672
673 -class Writer(object):
674 """Methods for serializing a PhyloXML object to XML.""" 675
676 - def __init__(self, phyloxml):
677 """Build an ElementTree from a PhyloXML object.""" 678 assert isinstance(phyloxml, PX.Phyloxml), "Not a Phyloxml object" 679 self._tree = ElementTree.ElementTree(self.phyloxml(phyloxml))
680
681 - def write(self, file, encoding='utf-8', indent=True):
682 if indent: 683 _indent(self._tree.getroot()) 684 self._tree.write(file, encoding) 685 return len(self._tree.getroot())
686 687 # Convert classes to ETree elements 688
689 - def phyloxml(self, obj):
690 elem = ElementTree.Element(_ns('phyloxml'), 691 # NB: This is for XSD validation, which we don't do 692 # {_ns('schemaLocation', NAMESPACES['xsi']): 693 # obj.attributes['schemaLocation'], 694 # } 695 ) 696 for tree in obj.phylogenies: 697 elem.append(self.phylogeny(tree)) 698 for otr in obj.other: 699 elem.append(self.other(otr)) 700 return elem
701
702 - def other(self, obj):
703 elem = ElementTree.Element(_ns(obj.tag, obj.namespace), obj.attributes) 704 elem.text = obj.value 705 for child in obj.children: 706 elem.append(self.other(child)) 707 return elem
708 709 phylogeny = _handle_complex(_ns('phylogeny'), 710 ('rooted', 'rerootable', 'branch_length_unit', 'type'), 711 ( 'name', 712 'id', 713 'description', 714 'date', 715 ('confidence', 'confidences'), 716 'clade', 717 ('clade_relation', 'clade_relations'), 718 ('sequence_relation', 'sequence_relations'), 719 ('property', 'properties'), 720 ('other', 'other'), 721 )) 722 723 clade = _handle_complex(_ns('clade'), ('id_source',), 724 ( 'name', 725 'branch_length', 726 ('confidence', 'confidences'), 727 'width', 728 'color', 729 'node_id', 730 ('taxonomy', 'taxonomies'), 731 ('sequence', 'sequences'), 732 'events', 733 'binary_characters', 734 ('distribution', 'distributions'), 735 'date', 736 ('reference', 'references'), 737 ('property', 'properties'), 738 ('clade', 'clades'), 739 ('other', 'other'), 740 )) 741 742 accession = _handle_complex(_ns('accession'), ('source',), 743 (), has_text=True) 744 745 annotation = _handle_complex(_ns('annotation'), 746 ('ref', 'source', 'evidence', 'type'), 747 ( 'desc', 748 'confidence', 749 ('property', 'properties'), 750 'uri', 751 )) 752
753 - def binary_characters(self, obj):
754 """Serialize a binary_characters node and its subnodes.""" 755 elem = ElementTree.Element(_ns('binary_characters'), 756 _clean_attrib(obj, 757 ('type', 'gained_count', 'lost_count', 758 'present_count', 'absent_count'))) 759 for subn in ('gained', 'lost', 'present', 'absent'): 760 subelem = ElementTree.Element(_ns(subn)) 761 for token in getattr(obj, subn): 762 subelem.append(self.bc(token)) 763 elem.append(subelem) 764 return elem
765 766 clade_relation = _handle_complex(_ns('clade_relation'), 767 ('id_ref_0', 'id_ref_1', 'distance', 'type'), 768 ('confidence',)) 769 770 color = _handle_complex(_ns('color'), (), ('red', 'green', 'blue')) 771 772 confidence = _handle_complex(_ns('confidence'), ('type',), 773 (), has_text=True) 774 775 date = _handle_complex(_ns('date'), ('unit',), 776 ('desc', 'value', 'minimum', 'maximum')) 777 778 distribution = _handle_complex(_ns('distribution'), (), 779 ( 'desc', 780 ('point', 'points'), 781 ('polygon', 'polygons'), 782 )) 783
784 - def domain(self, obj):
785 """Serialize a domain node.""" 786 elem = ElementTree.Element(_ns('domain'), 787 {'from': str(obj.start + 1), 'to': str(obj.end)}) 788 if obj.confidence is not None: 789 elem.set('confidence', _serialize(obj.confidence)) 790 if obj.id is not None: 791 elem.set('id', obj.id) 792 elem.text = _serialize(obj.value) 793 return elem
794 795 domain_architecture = _handle_complex(_ns('domain_architecture'), 796 ('length',), 797 (('domain', 'domains'),)) 798 799 events = _handle_complex(_ns('events'), (), 800 ( 'type', 801 'duplications', 802 'speciations', 803 'losses', 804 'confidence', 805 )) 806 807 id = _handle_complex(_ns('id'), ('provider',), (), has_text=True) 808 809 mol_seq = _handle_complex(_ns('mol_seq'), ('is_aligned',), 810 (), has_text=True) 811 812 node_id = _handle_complex(_ns('node_id'), ('provider',), (), has_text=True) 813 814 point = _handle_complex(_ns('point'), ('geodetic_datum', 'alt_unit'), 815 ('lat', 'long', 'alt')) 816 817 polygon = _handle_complex(_ns('polygon'), (), (('point', 'points'),)) 818 819 property = _handle_complex(_ns('property'), 820 ('ref', 'unit', 'datatype', 'applies_to', 'id_ref'), 821 (), has_text=True) 822 823 reference = _handle_complex(_ns('reference'), ('doi',), ('desc',)) 824 825 sequence = _handle_complex(_ns('sequence'), 826 ('type', 'id_ref', 'id_source'), 827 ( 'symbol', 828 'accession', 829 'name', 830 'location', 831 'mol_seq', 832 'uri', 833 ('annotation', 'annotations'), 834 'domain_architecture', 835 ('other', 'other'), 836 )) 837 838 sequence_relation = _handle_complex(_ns('sequence_relation'), 839 ('id_ref_0', 'id_ref_1', 'distance', 'type'), 840 ('confidence',)) 841 842 taxonomy = _handle_complex(_ns('taxonomy'), 843 ('id_source',), 844 ( 'id', 845 'code', 846 'scientific_name', 847 'authority', 848 ('common_name', 'common_names'), 849 ('synonym', 'synonyms'), 850 'rank', 851 'uri', 852 ('other', 'other'), 853 )) 854 855 uri = _handle_complex(_ns('uri'), ('desc', 'type'), (), has_text=True) 856 857 # Primitive types 858 859 # Floating point 860 alt = _handle_simple(_ns('alt')) 861 branch_length = _handle_simple(_ns('branch_length')) 862 lat = _handle_simple(_ns('lat')) 863 long = _handle_simple(_ns('long')) 864 maximum = _handle_simple(_ns('maximum')) 865 minimum = _handle_simple(_ns('minimum')) 866 value = _handle_simple(_ns('value')) 867 width = _handle_simple(_ns('width')) 868 869 # Integers 870 blue = _handle_simple(_ns('blue')) 871 duplications = _handle_simple(_ns('duplications')) 872 green = _handle_simple(_ns('green')) 873 losses = _handle_simple(_ns('losses')) 874 red = _handle_simple(_ns('red')) 875 speciations = _handle_simple(_ns('speciations')) 876 877 # Strings 878 bc = _handle_simple(_ns('bc')) 879 code = _handle_simple(_ns('code')) 880 common_name = _handle_simple(_ns('common_name')) 881 desc = _handle_simple(_ns('desc')) 882 description = _handle_simple(_ns('description')) 883 location = _handle_simple(_ns('location')) 884 mol_seq = _handle_simple(_ns('mol_seq')) 885 name = _handle_simple(_ns('name')) 886 rank = _handle_simple(_ns('rank')) 887 scientific_name = _handle_simple(_ns('scientific_name')) 888 symbol = _handle_simple(_ns('symbol')) 889 synonym = _handle_simple(_ns('synonym')) 890 type = _handle_simple(_ns('type'))
891