Package Bio :: Package Phylo :: Module BaseTree
[hide private]
[frames] | no frames]

Source Code for Module Bio.Phylo.BaseTree

  1  # Copyright (C) 2009 by Eric Talevich (eric.talevich@gmail.com) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license. Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Base classes for Bio.Phylo objects. 
  7   
  8  All object representations for phylogenetic trees should derive from these base 
  9  classes in order to use the common methods defined on them. 
 10  """ 
 11  __docformat__ = "epytext en" 
 12   
 13  import collections 
 14  import copy 
 15  import itertools 
 16  import random 
 17  import re 
 18  import warnings 
 19  import Bio 
 20   
 21  from Bio.Phylo import _sugar 
22 23 # General tree-traversal algorithms 24 25 -def _level_traverse(root, get_children):
26 """Traverse a tree in breadth-first (level) order.""" 27 Q = collections.deque([root]) 28 while Q: 29 v = Q.popleft() 30 yield v 31 Q.extend(get_children(v))
32
33 -def _preorder_traverse(root, get_children):
34 """Traverse a tree in depth-first pre-order (parent before children).""" 35 def dfs(elem): 36 yield elem 37 for v in get_children(elem): 38 for u in dfs(v): 39 yield u
40 for elem in dfs(root): 41 yield elem 42
43 -def _postorder_traverse(root, get_children):
44 """Traverse a tree in depth-first post-order (children before parent).""" 45 def dfs(elem): 46 for v in get_children(elem): 47 for u in dfs(v): 48 yield u 49 yield elem
50 for elem in dfs(root): 51 yield elem 52
53 54 -def _sorted_attrs(elem):
55 """Get a flat list of elem's attributes, sorted for consistency.""" 56 singles = [] 57 lists = [] 58 # Sort attributes for consistent results 59 for attrname, child in sorted(elem.__dict__.iteritems(), 60 key=lambda kv: kv[0]): 61 if child is None: 62 continue 63 if isinstance(child, list): 64 lists.extend(child) 65 else: 66 singles.append(child) 67 return (x for x in singles + lists 68 if isinstance(x, TreeElement))
69
70 71 # Factory functions to generalize searching for clades/nodes 72 73 -def _identity_matcher(target):
74 """Match a node to the target object by identity.""" 75 def match(node): 76 return (node is target)
77 return match 78
79 -def _class_matcher(target_cls):
80 """Match a node if it's an instance of the given class.""" 81 def match(node): 82 return isinstance(node, target_cls)
83 return match 84
85 -def _string_matcher(target):
86 def match(node): 87 return unicode(node) == target
88 return match 89
90 -def _attribute_matcher(kwargs):
91 """Match a node by specified attribute values. 92 93 'terminal' is a special case: True restricts the search to external (leaf) 94 nodes, False restricts to internal nodes, and None allows all tree elements 95 to be searched, including phyloXML annotations. 96 97 Otherwise, for a tree element to match the specification (i.e. for the 98 function produced by _attribute_matcher to return True when given a tree 99 element), it must have each of the attributes specified by the keys and 100 match each of the corresponding values -- think 'and', not 'or', for 101 multiple keys. 102 """ 103 def match(node): 104 if 'terminal' in kwargs: 105 # Special case: restrict to internal/external/any nodes 106 kwa_copy = kwargs.copy() 107 pattern = kwa_copy.pop('terminal') 108 if (pattern is not None and 109 (not hasattr(node, 'is_terminal') or 110 node.is_terminal() != pattern)): 111 return False 112 else: 113 kwa_copy = kwargs 114 for key, pattern in kwa_copy.iteritems(): 115 # Nodes must match all other specified attributes 116 if not hasattr(node, key): 117 return False 118 target = getattr(node, key) 119 if isinstance(pattern, basestring): 120 return (isinstance(target, basestring) and 121 re.match(pattern+'$', target)) 122 if isinstance(pattern, bool): 123 return (pattern == bool(target)) 124 if isinstance(pattern, int): 125 return (pattern == target) 126 if pattern is None: 127 return (target is None) 128 raise TypeError('invalid query type: %s' % type(pattern)) 129 return True
130 return match 131
132 -def _function_matcher(matcher_func):
133 """Safer attribute lookup -- returns False instead of raising an error.""" 134 def match(node): 135 try: 136 return matcher_func(node) 137 except (LookupError, AttributeError, ValueError): 138 return False
139 return match 140
141 -def _object_matcher(obj):
142 """Retrieve a matcher function by passing an arbitrary object. 143 144 i.e. passing a TreeElement such as a Node or Tree instance returns an 145 identity matcher, passing a type such as the PhyloXML.Taxonomy class returns 146 a class matcher, and passing a dictionary returns an attribute matcher. 147 148 The resulting 'match' function returns True when given an object matching 149 the specification (identity, type or attribute values), otherwise False. 150 This is useful for writing functions that search the tree, and probably 151 shouldn't be used directly by the end user. 152 """ 153 if isinstance(obj, TreeElement): 154 return _identity_matcher(obj) 155 if isinstance(obj, type): 156 return _class_matcher(obj) 157 if isinstance(obj, basestring): 158 return _string_matcher(obj) 159 if isinstance(obj, dict): 160 return _attribute_matcher(obj) 161 if callable(obj): 162 return _function_matcher(obj) 163 raise ValueError("%s (type %s) is not a valid type for comparison." 164 % (obj, type(obj)))
165
166 167 -def _combine_matchers(target, kwargs, require_spec):
168 """Merge target specifications with keyword arguments. 169 170 Dispatch the components to the various matcher functions, then merge into a 171 single boolean function. 172 """ 173 if not target: 174 if not kwargs: 175 if require_spec: 176 raise ValueError("you must specify a target object or keyword " 177 "arguments.") 178 return lambda x: True 179 return _attribute_matcher(kwargs) 180 match_obj = _object_matcher(target) 181 if not kwargs: 182 return match_obj 183 match_kwargs = _attribute_matcher(kwargs) 184 return (lambda x: match_obj(x) and match_kwargs(x))
185
186 187 -def _combine_args(first, *rest):
188 """Convert [targets] or *targets arguments to a single iterable. 189 190 This helps other functions work like the built-in functions `max` and 191 `min`. 192 """ 193 # Background: is_monophyletic takes a single list or iterable (like the 194 # same method in Bio.Nexus.Trees); root_with_outgroup and common_ancestor 195 # take separate arguments. This mismatch was in the initial release and I 196 # didn't notice the inconsistency until after Biopython 1.55. I can think 197 # of cases where either style is more convenient, so let's support both 198 # (for backward compatibility and consistency between methods). 199 if hasattr(first, '__iter__') and not (isinstance(first, TreeElement) or 200 isinstance(first, type) or isinstance(first, basestring) or 201 isinstance(first, dict)): 202 # `terminals` is an iterable of targets 203 if rest: 204 raise ValueError("Arguments must be either a single list of " 205 "targets, or separately specified targets " 206 "(e.g. foo(t1, t2, t3)), but not both.") 207 return first 208 # `terminals` is a single target -- wrap in a container 209 return itertools.chain([first], rest)
210
211 212 # Class definitions 213 214 -class TreeElement(object):
215 """Base class for all Bio.Phylo classes.""" 216
217 - def __repr__(self):
218 """Show this object's constructor with its primitive arguments.""" 219 def pair_as_kwarg_string(key, val): 220 if isinstance(val, basestring): 221 return "%s='%s'" % (key, _sugar.trim_str(unicode(val))) 222 return "%s=%s" % (key, val)
223 return u'%s(%s)' % (self.__class__.__name__, 224 ', '.join(pair_as_kwarg_string(key, val) 225 for key, val in self.__dict__.iteritems() 226 if val is not None and 227 type(val) in (str, int, float, bool, unicode) 228 ))
229 230 __str__ = __repr__ 231
232 233 -class TreeMixin(object):
234 """Methods for Tree- and Clade-based classes. 235 236 This lets Tree and Clade support the same traversal and searching 237 operations without requiring Clade to inherit from Tree, so Clade isn't 238 required to have all of Tree's attributes -- just 'root' (a Clade 239 instance) and 'is_terminal()'. 240 """ 241 # Traversal methods 242
243 - def _filter_search(self, filter_func, order, follow_attrs):
244 """Perform a BFS or DFS traversal through all elements in this tree. 245 246 @return: generator of all elements for which 'filter_func' is True. 247 """ 248 order_opts = {'preorder': _preorder_traverse, 249 'postorder': _postorder_traverse, 250 'level': _level_traverse} 251 try: 252 order_func = order_opts[order] 253 except KeyError: 254 raise ValueError("Invalid order '%s'; must be one of: %s" 255 % (order, tuple(order_opts.keys()))) 256 if follow_attrs: 257 get_children = _sorted_attrs 258 root = self 259 else: 260 get_children = lambda elem: elem.clades 261 root = self.root 262 return itertools.ifilter(filter_func, order_func(root, get_children))
263
264 - def find_any(self, *args, **kwargs):
265 """Return the first element found by find_elements(), or None. 266 267 This is also useful for checking whether any matching element exists in 268 the tree, and can be used in a conditional expression. 269 """ 270 hits = self.find_elements(*args, **kwargs) 271 try: 272 return hits.next() 273 except StopIteration: 274 return None
275
276 - def find_elements(self, target=None, terminal=None, order='preorder', 277 **kwargs):
278 """Find all tree elements matching the given attributes. 279 280 The arbitrary keyword arguments indicate the attribute name of the 281 sub-element and the value to match: string, integer or boolean. Strings 282 are evaluated as regular expression matches; integers are compared 283 directly for equality, and booleans evaluate the attribute's truth value 284 (True or False) before comparing. To handle nonzero floats, search with 285 a boolean argument, then filter the result manually. 286 287 If no keyword arguments are given, then just the class type is used for 288 matching. 289 290 The result is an iterable through all matching objects, by depth-first 291 search. (Not necessarily the same order as the elements appear in the 292 source file!) 293 294 Example: 295 296 >>> from Bio.Phylo.IO import PhyloXMIO 297 >>> phx = PhyloXMLIO.read('phyloxml_examples.xml') 298 >>> matches = phx.phylogenies[5].find_elements(code='OCTVU') 299 >>> matches.next() 300 Taxonomy(code='OCTVU', scientific_name='Octopus vulgaris') 301 302 @param target: 303 Specifies the characteristics to search for. (The default, 304 TreeElement, matches any standard Bio.Phylo type.) 305 @type target: TreeElement instance, type, dict, or callable 306 307 @param terminal: 308 A boolean value to select for or against terminal nodes (a.k.a. leaf 309 nodes). True searches for only terminal nodes, False excludes 310 terminal nodes, and the default, None, searches both terminal and 311 non-terminal nodes, as well as any tree elements lacking the 312 'is_terminal' method. 313 @type terminal: bool 314 315 @param order: 316 Tree traversal order: 'preorder' (default) is depth-first search, 317 'postorder' is DFS with child nodes preceding parents, and 'level' 318 is breadth-first search. 319 @type order: string ('preorder'|'postorder'|'level') 320 """ 321 if terminal is not None: 322 kwargs['terminal'] = terminal 323 is_matching_elem = _combine_matchers(target, kwargs, False) 324 return self._filter_search(is_matching_elem, order, True)
325
326 - def find_clades(self, target=None, terminal=None, order='preorder', 327 **kwargs):
328 """Find each clade containing a matching element. 329 330 That is, find each element as with find_elements(), but return the 331 corresponding clade object. (This is usually what you want.) 332 333 The result is an iterable through all matching objects, searching 334 depth-first (preorder) by default. 335 """ 336 def match_attrs(elem): 337 orig_clades = elem.__dict__.pop('clades') 338 found = elem.find_any(target, **kwargs) 339 elem.clades = orig_clades 340 return (found is not None)
341 if terminal is None: 342 is_matching_elem = match_attrs 343 else: 344 def is_matching_elem(elem): 345 return ((elem.is_terminal() == terminal) and 346 match_attrs(elem))
347 return self._filter_search(is_matching_elem, order, False) 348
349 - def get_path(self, target=None, **kwargs):
350 """List the clades directly between this root and the given target. 351 352 Returns a list of all clade objects along this path, ending with 353 the given target, but excluding the root clade. 354 """ 355 # Only one path will work -- ignore weights and visits 356 path = [] 357 match = _combine_matchers(target, kwargs, True) 358 def check_in_path(v): 359 if match(v): 360 path.append(v) 361 return True 362 elif v.is_terminal(): 363 return False 364 for child in v: 365 if check_in_path(child): 366 path.append(v) 367 return True 368 return False
369 if not check_in_path(self.root): 370 return None 371 return path[-2::-1] 372
373 - def get_nonterminals(self, order='preorder'):
374 """Get a list of all of this tree's nonterminal (internal) nodes.""" 375 return list(self.find_clades(terminal=False, order=order))
376
377 - def get_terminals(self, order='preorder'):
378 """Get a list of all of this tree's terminal (leaf) nodes.""" 379 return list(self.find_clades(terminal=True, order=order))
380
381 - def trace(self, start, finish):
382 """List of all clade object between two targets in this tree. 383 384 Excluding start, including finish. 385 """ 386 mrca = self.common_ancestor(start, finish) 387 fromstart = mrca.get_path(start)[-2::-1] 388 to = mrca.get_path(finish) 389 return fromstart + [mrca] + to
390 391 # Information methods 392
393 - def common_ancestor(self, targets, *more_targets):
394 """Most recent common ancestor (clade) of all the given targets. 395 396 Edge cases: 397 398 - If no target is given, returns self.root 399 - If 1 target is given, returns the target 400 - If any target is not found in this tree, raises a ValueError 401 """ 402 paths = [self.get_path(t) 403 for t in _combine_args(targets, *more_targets)] 404 # Validation -- otherwise izip throws a spooky error below 405 for p, t in zip(paths, targets): 406 if p is None: 407 raise ValueError("target %s is not in this tree" % repr(t)) 408 mrca = self.root 409 for level in itertools.izip(*paths): 410 ref = level[0] 411 for other in level[1:]: 412 if ref is not other: 413 break 414 else: 415 mrca = ref 416 if ref is not mrca: 417 break 418 return mrca
419
420 - def count_terminals(self):
421 """Counts the number of terminal (leaf) nodes within this tree.""" 422 return _sugar.iterlen(self.find_clades(terminal=True))
423
424 - def depths(self, unit_branch_lengths=False):
425 """Create a mapping of tree clades to depths (by branch length). 426 427 The result is a dictionary where the keys are all of the Clade instances 428 in the tree, and the values are the distance from the root to each clade 429 (including terminals). 430 431 By default the distance is the cumulative branch length leading to the 432 clade. With the unit_branch_lengths=True option, only the number of 433 branches (levels in the tree) is counted. 434 435 @return: dict of {clade: depth} 436 """ 437 if unit_branch_lengths: 438 depth_of = lambda c: 1 439 else: 440 depth_of = lambda c: c.branch_length or 0 441 depths = {} 442 def update_depths(node, curr_depth): 443 depths[node] = curr_depth 444 for child in node.clades: 445 new_depth = curr_depth + depth_of(child) 446 update_depths(child, new_depth)
447 update_depths(self.root, 0) 448 return depths 449
450 - def distance(self, target1, target2=None):
451 """Calculate the sum of the branch lengths between two targets. 452 453 If only one target is specified, the other is the root of this tree. 454 """ 455 if target2 is None: 456 return sum(n.branch_length for n in self.get_path(target1) 457 if n.branch_length is not None) 458 mrca = self.common_ancestor(target1, target2) 459 return mrca.distance(target1) + mrca.distance(target2)
460
461 - def is_bifurcating(self):
462 """Return True if tree downstream of node is strictly bifurcating. 463 464 I.e., all nodes have either 2 or 0 children (internal or external, 465 respectively). The root may have 3 descendents and still be considered 466 part of a bifurcating tree, because it has no ancestor. 467 """ 468 # Root can be trifurcating 469 if isinstance(self, Tree) and len(self.root) == 3: 470 return (self.root.clades[0].is_bifurcating() and 471 self.root.clades[1].is_bifurcating() and 472 self.root.clades[2].is_bifurcating()) 473 if len(self.root) == 2: 474 return (self.root.clades[0].is_bifurcating() and 475 self.root.clades[1].is_bifurcating()) 476 if len(self.root) == 0: 477 return True 478 return False
479
480 - def is_monophyletic(self, terminals, *more_terminals):
481 """MRCA of terminals if they comprise a complete subclade, or False. 482 483 I.e., there exists a clade such that its terminals are the same set as 484 the given targets. 485 486 The given targets must be terminals of the tree. 487 488 To match both Bio.Nexus.Trees and the other multi-target methods in 489 Bio.Phylo, arguments to this method can be specified either of two ways: 490 (i) as a single list of targets, or (ii) separately specified targets, 491 e.g. is_monophyletic(t1, t2, t3) -- but not both. 492 493 For convenience, this method returns the common ancestor (MCRA) of the 494 targets if they are monophyletic (instead of the value True), and False 495 otherwise. 496 497 @return: common ancestor if terminals are monophyletic, otherwise False. 498 """ 499 target_set = set(_combine_args(terminals, *more_terminals)) 500 current = self.root 501 while True: 502 if set(current.get_terminals()) == target_set: 503 return current 504 # Try a narrower subclade 505 for subclade in current.clades: 506 if set(subclade.get_terminals()).issuperset(target_set): 507 current = subclade 508 break 509 else: 510 return False
511
512 - def is_parent_of(self, target=None, **kwargs):
513 """True if target is a descendent of this tree. 514 515 Not required to be a direct descendent. 516 517 To check only direct descendents of a clade, simply use list membership 518 testing: "if subclade in clade: ..." 519 """ 520 return self.get_path(target, **kwargs) is not None
521
522 - def is_preterminal(self):
523 """True if all direct descendents are terminal.""" 524 if self.root.is_terminal(): 525 return False 526 for clade in self.root.clades: 527 if not clade.is_terminal(): 528 return False 529 return True
530
531 - def total_branch_length(self):
532 """Calculate the sum of all the branch lengths in this tree.""" 533 return sum(node.branch_length 534 for node in self.find_clades(branch_length=True))
535 536 # Tree manipulation methods 537
538 - def collapse(self, target=None, **kwargs):
539 """Deletes target from the tree, relinking its children to its parent. 540 541 @return: the parent clade. 542 """ 543 path = self.get_path(target, **kwargs) 544 if not path: 545 raise ValueError("couldn't collapse %s in this tree" 546 % (target or kwargs)) 547 if len(path) == 1: 548 parent = self.root 549 else: 550 parent = path[-2] 551 popped = parent.clades.pop(parent.clades.index(path[-1])) 552 extra_length = popped.branch_length or 0 553 for child in popped: 554 child.branch_length += extra_length 555 parent.clades.extend(popped.clades) 556 return parent
557
558 - def collapse_all(self):
559 """Collapse all the descendents of this tree, leaving only terminals. 560 561 Branch lengths are preserved, i.e. the distance to each terminal stays 562 the same. 563 564 To collapse only certain elements, use the collapse method directly in a 565 loop with find_clades: 566 567 >>> for clade in tree.find_clades(branch_length=True, order='level'): 568 ... if (clade.branch_length < .5 and 569 ... not clade.is_terminal() and 570 ... clade is not self.root): 571 ... tree.collapse(clade) 572 573 Note that level-order traversal helps avoid strange side-effects when 574 modifying the tree while iterating over its clades. 575 """ 576 internals = self.find_clades(terminal=False, order='level') 577 # Skip the root node -- it can't be collapsed 578 internals.next() 579 for clade in internals: 580 self.collapse(clade)
581
582 - def ladderize(self, reverse=False):
583 """Sort clades in-place according to the number of terminal nodes. 584 585 Deepest clades are last by default. Use reverse=True to sort clades 586 deepest-to-shallowest. 587 """ 588 self.root.clades.sort(key=lambda c: c.count_terminals(), 589 reverse=reverse) 590 for subclade in self.root.clades: 591 subclade.ladderize(reverse=reverse)
592
593 - def prune(self, target=None, **kwargs):
594 """Prunes a terminal clade from the tree. 595 596 If taxon is from a bifurcation, the connecting node will be collapsed 597 and its branch length added to remaining terminal node. This might be no 598 longer be a meaningful value. 599 600 @return: parent clade of the pruned target 601 """ 602 if 'terminal' in kwargs and kwargs['terminal']: 603 raise ValueError("target must be terminal") 604 path = self.get_path(target, terminal=True, **kwargs) 605 if not path: 606 raise ValueError("can't find a matching target below this root") 607 if len(path) == 1: 608 parent = self.root 609 else: 610 parent = path[-2] 611 parent.clades.remove(path[-1]) 612 if len(parent) == 1: 613 # We deleted a branch from a bifurcation 614 if parent == self.root: 615 # If we're at the root, move the root upwards 616 # NB: This loses the length of the original branch 617 newroot = parent.clades[0] 618 newroot.branch_length = None 619 parent = self.root = newroot 620 else: 621 # If we're not at the root, collapse this parent 622 child = parent.clades[0] 623 if child.branch_length is not None: 624 child.branch_length += (parent.branch_length or 0.0) 625 if len(path) < 3: 626 grandparent = self.root 627 else: 628 grandparent = path[-3] 629 # Replace parent with child at the same place in grandparent 630 index = grandparent.clades.index(parent) 631 grandparent.clades.pop(index) 632 grandparent.clades.insert(index, child) 633 parent = grandparent 634 return parent
635
636 - def split(self, n=2, branch_length=1.0):
637 """Generate n (default 2) new descendants. 638 639 In a species tree, this is a speciation event. 640 641 New clades have the given branch_length and the same name as this 642 clade's root plus an integer suffix (counting from 0). For example, 643 splitting a clade named "A" produces sub-clades named "A0" and "A1". 644 """ 645 clade_cls = type(self.root) 646 base_name = self.root.name or '' 647 for i in range(n): 648 clade = clade_cls(name=base_name+str(i), 649 branch_length=branch_length) 650 self.root.clades.append(clade)
651
652 653 -class Tree(TreeElement, TreeMixin):
654 """A phylogenetic tree, containing global info for the phylogeny. 655 656 The structure and node-specific data is accessible through the 'root' 657 clade attached to the Tree instance. 658 659 @param root: 660 The starting node of the tree. If the tree is rooted, this will usually 661 be the root node. 662 @type root: Clade 663 664 @param rooted: 665 Whether or not the tree is rooted. By default, a tree is assumed to be 666 rooted. 667 @type rooted: bool 668 669 @param id: The identifier of the tree, if there is one. 670 @type id: str 671 672 @param name: The name of the tree, in essence a label. 673 @type name: str 674 """
675 - def __init__(self, root=None, rooted=True, id=None, name=None):
676 self.root = root or Clade() 677 self.rooted = rooted 678 self.id = id 679 self.name = name
680 681 @classmethod
682 - def from_clade(cls, clade, **kwargs):
683 """Create a new Tree object given a clade. 684 685 Keyword arguments are the usual Tree constructor parameters. 686 """ 687 root = copy.deepcopy(clade) 688 return cls(root, **kwargs)
689 690 @classmethod
691 - def randomized(cls, taxa, branch_length=1.0, branch_stdev=None):
692 """Create a randomized bifurcating tree given a list of taxa. 693 694 @param taxa: Either an integer specifying the number of taxa to create 695 (automatically named taxon#), or an iterable of taxon names, as 696 strings. 697 698 @return: a tree of the same type as this class. 699 """ 700 if isinstance(taxa, int): 701 taxa = ['taxon%s' % (i+1) for i in range(taxa)] 702 elif hasattr(taxa, '__iter__'): 703 taxa = list(taxa) 704 else: 705 raise TypeError("taxa argument must be integer (# taxa) or " 706 "iterable of taxon names.") 707 rtree = cls() 708 terminals = [rtree.root] 709 while len(terminals) < len(taxa): 710 newsplit = random.choice(terminals) 711 newterms = newsplit.split(branch_length=branch_length) 712 if branch_stdev: 713 # Add some noise to the branch lengths 714 for nt in newterms: 715 nt.branch_length = max(0, 716 random.gauss(branch_length, branch_stdev)) 717 terminals.remove(newsplit) 718 terminals.extend(newterms) 719 # Distribute taxon labels randomly 720 random.shuffle(taxa) 721 for node, name in zip(terminals, taxa): 722 node.name = name 723 return rtree
724 725 @property
726 - def clade(self):
727 """The first clade in this tree (not itself).""" 728 return self.root
729
730 - def as_phyloxml(self, **kwargs):
731 """Convert this tree to a PhyloXML-compatible Phylogeny. 732 733 This lets you use the additional annotation types PhyloXML defines, and 734 save this information when you write this tree as 'phyloxml'. 735 """ 736 from Bio.Phylo.PhyloXML import Phylogeny 737 return Phylogeny.from_tree(self, **kwargs)
738
739 - def root_with_outgroup(self, outgroup_targets, *more_targets):
740 """Reroot this tree with the outgroup clade containing outgroup_targets. 741 742 Operates in-place. 743 744 Edge cases: 745 746 - If outgroup == self.root, no change 747 - If outgroup is terminal, create new bifurcating root node with a 748 0-length branch to the outgroup 749 - If outgroup is internal, use the given outgroup node as the new 750 trifurcating root, keeping branches the same 751 - If the original root was bifurcating, drop it from the tree, 752 preserving total branch lengths 753 """ 754 # This raises a ValueError if any target is not in this tree 755 # Otherwise, common_ancestor guarantees outgroup is in this tree 756 outgroup = self.common_ancestor(outgroup_targets, *more_targets) 757 outgroup_path = self.get_path(outgroup) 758 if len(outgroup_path) == 0: 759 # Outgroup is the current root -- no change 760 return 761 762 prev_blen = outgroup.branch_length 763 if outgroup.is_terminal(): 764 # Create a new root with a 0-length branch to the outgroup 765 outgroup.branch_length = 0.0 766 new_root = self.root.__class__(branch_length=None, clades=[outgroup]) 767 else: 768 # Use the given outgroup node as the new (trifurcating) root 769 new_root = outgroup 770 new_root.branch_length = None 771 772 # Tracing the outgroup lineage backwards, reattach the subclades under a 773 # new root clade. Reverse the branches directly above the outgroup in 774 # the tree, but keep the descendants of those clades as they are. 775 new_parent = new_root 776 for parent in outgroup_path[-2::-1]: 777 parent.clades.pop(parent.clades.index(new_parent)) 778 prev_blen, parent.branch_length = parent.branch_length, prev_blen 779 new_parent.clades.insert(0, parent) 780 new_parent = parent 781 # Finally, handle the original root according to number of descendents 782 old_root = self.root 783 old_root.clades.pop(old_root.clades.index(new_parent)) 784 if len(old_root) == 1: 785 # Delete the old bifurcating root & add branch lengths 786 ingroup = old_root.clades[0] 787 if ingroup.branch_length: 788 ingroup.branch_length += prev_blen 789 else: 790 ingroup.branch_length = prev_blen 791 new_parent.clades.insert(0, ingroup) 792 # ENH: If annotations are attached to old_root, do... something. 793 else: 794 # Keep the old trifurcating/polytomous root as an internal node 795 old_root.branch_length = prev_blen 796 new_parent.clades.insert(0, old_root) 797 798 self.root = new_root 799 self.rooted = True 800 return
801 802 # Method assumed by TreeMixin 803
804 - def is_terminal(self):
805 """True if the root of this tree is terminal.""" 806 return (not self.root.clades)
807 808 # Convention from SeqRecord and Alignment classes 809
810 - def __format__(self, format_spec):
811 """Serialize the tree as a string in the specified file format. 812 813 This method supports the format() built-in function added in Python 814 2.6/3.0. The format_spec should be a lower case string supported by 815 Bio.Phylo.write as an output file format. 816 """ 817 if format_spec: 818 from StringIO import StringIO 819 from Bio.Phylo import _io 820 handle = StringIO() 821 _io.write([self], handle, format_spec) 822 return handle.getvalue() 823 else: 824 # Follow python convention and default to using __str__ 825 return str(self)
826
827 - def format(self, format):
828 """Serialize the tree as a string in the specified file format. 829 830 This duplicates the __format__ magic method for pre-2.6 Pythons. 831 """ 832 return self.__format__(format)
833 834 # Pretty-printer for the entire tree hierarchy 835
836 - def __str__(self):
837 """String representation of the entire tree. 838 839 Serializes each sub-clade recursively using repr() to create a summary 840 of the object structure. 841 """ 842 TAB = ' ' 843 textlines = [] 844 def print_tree(obj, indent): 845 """Recursively serialize sub-elements. 846 847 This closes over textlines and modifies it in-place. 848 """ 849 textlines.append(TAB*indent + repr(obj)) 850 indent += 1 851 for attr in obj.__dict__: 852 child = getattr(obj, attr) 853 if isinstance(child, TreeElement): 854 print_tree(child, indent) 855 elif isinstance(child, list): 856 for elem in child: 857 if isinstance(elem, TreeElement): 858 print_tree(elem, indent)
859 print_tree(self, 0) 860 return '\n'.join(textlines)
861
862 863 -class Clade(TreeElement, TreeMixin):
864 """A recursively defined sub-tree. 865 866 @param branch_length: 867 The length of the branch leading to the root node of this clade. 868 @type branch_length: str 869 870 @param name: The clade's name (a label). 871 @type name: str 872 873 @param clades: Sub-trees rooted directly under this tree's root. 874 @type clades: list 875 """
876 - def __init__(self, branch_length=None, name=None, clades=None, 877 confidence=None):
878 self.branch_length = branch_length 879 self.name = name 880 self.clades = clades or [] 881 self.confidence = confidence
882 883 @property
884 - def root(self):
885 """Allow TreeMixin methods to traverse clades properly.""" 886 return self
887
888 - def is_terminal(self):
889 """True if this is a terminal (leaf) node.""" 890 return (not self.clades)
891 892 # Sequence-type behavior methods 893
894 - def __getitem__(self, index):
895 """Get clades by index (integer or slice).""" 896 if isinstance(index, int) or isinstance(index, slice): 897 return self.clades[index] 898 ref = self 899 for idx in index: 900 ref = ref[idx] 901 return ref
902
903 - def __iter__(self):
904 """Iterate through this tree's direct descendent clades (sub-trees).""" 905 return iter(self.clades)
906
907 - def __len__(self):
908 """Number of clades directy under the root.""" 909 return len(self.clades)
910
911 - def __nonzero__(self):
912 """Boolean value of an instance of this class. 913 914 NB: If this method is not defined, but __len__ is, then the object is 915 considered true if the result of __len__() is nonzero. We want Clade 916 instances to always be considered true. 917 """ 918 return True
919
920 - def __str__(self):
921 if self.name: 922 return _sugar.trim_str(self.name, maxlen=40) 923 return self.__class__.__name__
924