Package Bio :: Package Phylo :: Module _utils
[hide private]
[frames] | no frames]

Source Code for Module Bio.Phylo._utils

  1  # Copyright (C) 2009 by Eric Talevich (eric.talevich@gmail.com) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license. Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Utilities for handling, displaying and exporting Phylo trees. 
  7   
  8  Third-party libraries are loaded when the corresponding function is called. 
  9  """ 
 10  __docformat__ = "epytext en" 
 11   
 12  import math 
 13  import sys 
 14   
 15   
16 -def to_networkx(tree):
17 """Convert a Tree object to a networkx graph. 18 19 The result is useful for graph-oriented analysis, and also interactive 20 plotting with pylab, matplotlib or pygraphviz, though the resulting diagram 21 is usually not ideal for displaying a phylogeny. 22 23 Requires NetworkX version 0.99 or 1.0. 24 """ 25 try: 26 import networkx 27 except ImportError: 28 from Bio import MissingPythonDependencyError 29 raise MissingPythonDependencyError( 30 "Install NetworkX if you want to use to_networkx.") 31 32 def add_edge(graph, n1, n2): 33 # NB (1/2010): the networkx API congealed recently 34 # Ubuntu Lucid uses v0.99, newest is v1.0.1, let's support both 35 if networkx.__version__ >= '1.0': 36 graph.add_edge(n1, n2, weight=str(n2.branch_length or 1.0)) 37 # Copy branch color value as hex, if available 38 if hasattr(n2, 'color') and n2.color is not None: 39 graph[n1][n2]['color'] = n2.color.to_hex() 40 elif hasattr(n1, 'color') and n1.color is not None: 41 # Cascading color attributes 42 graph[n1][n2]['color'] = n1.color.to_hex() 43 n2.color = n1.color 44 # Copy branch weight value (float) if available 45 if hasattr(n2, 'width') and n2.width is not None: 46 graph[n1][n2]['width'] = n2.width 47 elif hasattr(n1, 'width') and n1.width is not None: 48 # Cascading width attributes 49 graph[n1][n2]['width'] = n1.width 50 n2.width = n1.width 51 elif networkx.__version__ >= '0.99': 52 graph.add_edge(n1, n2, (n2.branch_length or 1.0)) 53 else: 54 graph.add_edge(n1, n2)
55 56 def build_subgraph(graph, top): 57 """Walk down the Tree, building graphs, edges and nodes.""" 58 for clade in top: 59 graph.add_node(clade.root) 60 add_edge(graph, top.root, clade.root) 61 build_subgraph(graph, clade) 62 63 if tree.rooted: 64 G = networkx.DiGraph() 65 else: 66 G = networkx.Graph() 67 G.add_node(tree.root) 68 build_subgraph(G, tree.root) 69 return G 70 71
72 -def draw_graphviz(tree, label_func=str, prog='twopi', args='', 73 node_color='#c0deff', **kwargs):
74 """Display a tree or clade as a graph, using the graphviz engine. 75 76 Requires NetworkX, matplotlib, Graphviz and either PyGraphviz or pydot. 77 78 Example: 79 80 >>> import pylab 81 >>> from Bio import Phylo 82 >>> tree = Phylo.read('ex/apaf.xml', 'phyloxml') 83 >>> Phylo.draw_graphviz(tree) 84 >>> pylab.show() 85 >>> pylab.savefig('apaf.png') 86 87 The third and fourth parameters apply to Graphviz, and the remaining 88 arbitrary keyword arguments are passed directly to networkx.draw(), which 89 in turn mostly wraps matplotlib/pylab. See the documentation for Graphviz 90 and networkx for detailed explanations. 91 92 The NetworkX/matplotlib parameters are described in the docstrings for 93 networkx.draw() and pylab.scatter(), but the most reasonable options to try 94 are: I{ alpha, node_color, node_size, node_shape, edge_color, style, 95 font_size, font_color, font_weight, font_family } 96 97 @param label_func: A function to extract a label from a node. By default 98 this is str(), but you can use a different function to select another 99 string associated with each node. If this function returns None for a 100 node, no label will be shown for that node. 101 102 The label will also be silently skipped if the throws an exception 103 related to ordinary attribute access (LookupError, AttributeError, 104 ValueError); all other exception types will still be raised. This 105 means you can use a lambda expression that simply attempts to look up 106 the desired value without checking if the intermediate attributes are 107 available: 108 109 >>> Phylo.draw_graphviz(tree, lambda n: n.taxonomies[0].code) 110 111 @param prog: The Graphviz program to use when rendering the graph. 'twopi' 112 behaves the best for large graphs, reliably avoiding crossing edges, but 113 for moderate graphs 'neato' looks a bit nicer. For small directed 114 graphs, 'dot' may produce the most normal-looking phylogram, but will 115 cross and distort edges in larger graphs. (The programs 'circo' and 116 'fdp' are not recommended.) 117 118 @param args: String of options passed to the external graphviz program. 119 Normally not needed, but offered here for completeness. 120 """ 121 try: 122 import networkx 123 except ImportError: 124 from Bio import MissingPythonDependencyError 125 raise MissingPythonDependencyError( 126 "Install NetworkX if you want to use to_networkx.") 127 128 G = to_networkx(tree) 129 Gi = networkx.convert_node_labels_to_integers(G, discard_old_labels=False) 130 try: 131 posi = networkx.pygraphviz_layout(Gi, prog, args=args) 132 except ImportError: 133 try: 134 posi = networkx.pydot_layout(Gi, prog) 135 except ImportError: 136 raise MissingPythonDependencyError( 137 "Install PyGraphviz or Pydot if you want to use " 138 "draw_graphviz.") 139 posn = dict((n, posi[Gi.node_labels[n]]) for n in G) 140 141 def get_label_mapping(G, selection): 142 for node in G.nodes(): 143 if (selection is None) or (node in selection): 144 try: 145 label = label_func(node) 146 if label not in (None, node.__class__.__name__): 147 yield (node, label) 148 except (LookupError, AttributeError, ValueError): 149 pass
150 151 if 'nodelist' in kwargs: 152 labels = dict(get_label_mapping(G, set(kwargs['nodelist']))) 153 else: 154 labels = dict(get_label_mapping(G, None)) 155 kwargs['nodelist'] = labels.keys() 156 if 'edge_color' not in kwargs: 157 kwargs['edge_color'] = [isinstance(e[2], dict) and 158 e[2].get('color', 'k') or 'k' 159 for e in G.edges(data=True)] 160 if 'width' not in kwargs: 161 kwargs['width'] = [isinstance(e[2], dict) and 162 e[2].get('width', 1.0) or 1.0 163 for e in G.edges(data=True)] 164 networkx.draw(G, posn, labels=labels, node_color=node_color, **kwargs) 165 166
167 -def draw_ascii(tree, file=sys.stdout, column_width=80):
168 """Draw an ascii-art phylogram of the given tree. 169 170 The printed result looks like:: 171 172 _________ Orange 173 ______________| 174 | |______________ Tangerine 175 ______________| 176 | | _________________________ Grapefruit 177 _| |_________| 178 | |______________ Pummelo 179 | 180 |__________________________________ Apple 181 182 183 @param file: File handle opened for writing the output drawing. 184 @param column_width: Total number of text columns used by the drawing. 185 """ 186 taxa = tree.get_terminals() 187 # Some constants for the drawing calculations 188 max_label_width = max(len(str(taxon)) for taxon in taxa) 189 drawing_width = column_width - max_label_width - 1 190 drawing_height = 2 * len(taxa) - 1 191 192 def get_col_positions(tree): 193 """Create a mapping of each clade to its column position.""" 194 depths = tree.depths() 195 # If there are no branch lengths, assume unit branch lengths 196 if not max(depths.itervalues()): 197 depths = tree.depths(unit_branch_lengths=True) 198 # Potential drawing overflow due to rounding -- 1 char per tree layer 199 fudge_margin = int(math.ceil(math.log(len(taxa), 2))) 200 cols_per_branch_unit = ((drawing_width - fudge_margin) 201 / float(max(depths.itervalues()))) 202 return dict((clade, int(round(blen*cols_per_branch_unit + 0.5))) 203 for clade, blen in depths.iteritems())
204 205 def get_row_positions(tree): 206 positions = dict((taxon, 2*idx) for idx, taxon in enumerate(taxa)) 207 def calc_row(clade): 208 for subclade in clade: 209 if subclade not in positions: 210 calc_row(subclade) 211 positions[clade] = (positions[clade.clades[0]] + 212 positions[clade.clades[-1]]) / 2 213 calc_row(tree.root) 214 return positions 215 216 col_positions = get_col_positions(tree) 217 row_positions = get_row_positions(tree) 218 char_matrix = [[' ' for x in range(drawing_width)] 219 for y in range(drawing_height)] 220 221 def draw_clade(clade, startcol): 222 thiscol = col_positions[clade] 223 thisrow = row_positions[clade] 224 # Draw a horizontal line 225 for col in range(startcol, thiscol): 226 char_matrix[thisrow][col] = '_' 227 if clade.clades: 228 # Draw a vertical line 229 toprow = row_positions[clade.clades[0]] 230 botrow = row_positions[clade.clades[-1]] 231 for row in range(toprow+1, botrow+1): 232 char_matrix[row][thiscol] = '|' 233 # NB: Short terminal branches need something to stop rstrip() 234 if (col_positions[clade.clades[0]] - thiscol) < 2: 235 char_matrix[toprow][thiscol] = ',' 236 # Draw descendents 237 for child in clade: 238 draw_clade(child, thiscol+1) 239 240 draw_clade(tree.root, 0) 241 # Print the complete drawing 242 for idx, row in enumerate(char_matrix): 243 line = ''.join(row).rstrip() 244 # Add labels for terminal taxa in the right margin 245 if idx % 2 == 0: 246 line += ' ' + str(taxa[idx/2]) 247 file.write(line + '\n') 248 file.write('\n') 249