Package Bio :: Package Phylo :: Module NewickIO
[hide private]
[frames] | no frames]

Source Code for Module Bio.Phylo.NewickIO

  1  # Copyright (C) 2009 by Eric Talevich (eric.talevich@gmail.com) 
  2  # Based on Bio.Nexus, copyright 2005-2008 by Frank Kauff & Cymon J. Cox. 
  3  # All rights reserved. 
  4  # This code is part of the Biopython distribution and governed by its 
  5  # license. Please see the LICENSE file that should have been included 
  6  # as part of this package. 
  7   
  8  """I/O function wrappers for the Newick file format. 
  9   
 10  See: U{ http://evolution.genetics.washington.edu/phylip/newick_doc.html } 
 11  """ 
 12  __docformat__ = "epytext en" 
 13   
 14  from cStringIO import StringIO 
 15   
 16  from Bio.Phylo import Newick 
 17   
 18  # Definitions retrieved from Bio.Nexus.Trees 
 19  NODECOMMENT_START = '[&' 
 20  NODECOMMENT_END = ']' 
21 22 23 -class NewickError(Exception):
24 """Exception raised when Newick object construction cannot continue.""" 25 pass
26
27 28 # --------------------------------------------------------- 29 # Public API 30 31 -def parse(handle):
32 """Iterate over the trees in a Newick file handle. 33 34 @return: a generator of Bio.Phylo.Newick.Tree objects. 35 """ 36 return Parser(handle).parse()
37
38 -def write(trees, handle, plain=False, **kwargs):
39 """Write a trees in Newick format to the given file handle. 40 41 @return: number of trees written. 42 """ 43 return Writer(trees).write(handle, plain=plain, **kwargs)
44
45 46 # --------------------------------------------------------- 47 # Input 48 49 -class Parser(object):
50 """Parse a Newick tree given a file handle. 51 52 Based on the parser in Bio.Nexus.Trees. 53 """ 54
55 - def __init__(self, handle):
56 self.handle = handle
57 58 @classmethod
59 - def from_string(cls, treetext):
60 handle = StringIO(treetext) 61 return cls(handle)
62
63 - def parse(self, values_are_support=False, rooted=False):
64 """Parse the text stream this object was initialized with.""" 65 self.values_are_support = values_are_support 66 self.rooted = rooted 67 buf = '' 68 for line in self.handle: 69 buf += line.rstrip() 70 if buf.endswith(';'): 71 yield self._parse_tree(buf) 72 buf = '' 73 if buf: 74 # Last tree is missing a terminal ';' character -- that's OK 75 yield self._parse_tree(buf)
76
77 - def _parse_tree(self, text):
78 """Parses the text representation into an Tree object.""" 79 # XXX what global info do we have here? Any? Use **kwargs? 80 return Newick.Tree(root=self._parse_subtree(text))
81
82 - def _parse_subtree(self, text):
83 """Parse (a,b,c...)[[[xx]:]yy] into subcomponents, recursively.""" 84 text = text.strip().rstrip(';') 85 if text.count('(')!=text.count(')'): 86 raise NewickError("Parentheses do not match in (sub)tree: " + text) 87 # Text is now "(...)..." (balanced parens) or "..." (leaf node) 88 if text.count('(') == 0: 89 # Leaf/terminal node -- recursion stops here 90 return self._parse_tag(text) 91 # Handle one layer of the nested subtree 92 # XXX what if there's a paren in a comment or other string? 93 close_posn = text.rfind(')') 94 subtrees = [] 95 # Locate subtrees by counting nesting levels of parens 96 plevel = 0 97 prev = 1 98 for posn in range(1, close_posn): 99 if text[posn] == '(': 100 plevel += 1 101 elif text[posn] == ')': 102 plevel -= 1 103 elif text[posn] == ',' and plevel == 0: 104 subtrees.append(text[prev:posn]) 105 prev = posn + 1 106 subtrees.append(text[prev:close_posn]) 107 # Construct a new clade from trailing text, then attach subclades 108 clade = self._parse_tag(text[close_posn+1:]) 109 clade.clades = [self._parse_subtree(st) for st in subtrees] 110 return clade
111
112 - def _parse_tag(self, text):
113 """Extract the data for a node from text. 114 115 @return: Clade instance containing any available data 116 """ 117 # Extract the comment 118 comment_start = text.find(NODECOMMENT_START) 119 if comment_start != -1: 120 comment_end = text.find(NODECOMMENT_END) 121 if comment_end == -1: 122 raise NewickError('Error in tree description: ' 123 'Found %s without matching %s' 124 % (NODECOMMENT_START, NODECOMMENT_END)) 125 comment = text[comment_start+len(NODECOMMENT_START):comment_end] 126 text = text[:comment_start] + text[comment_end+len(NODECOMMENT_END):] 127 else: 128 comment = None 129 clade = Newick.Clade(comment=comment) 130 # Extract name (taxon), and optionally support, branch length 131 # Float values are support and branch length, the string is name/taxon 132 values = [] 133 for part in (t.strip() for t in text.split(':')): 134 if part: 135 try: 136 values.append(float(part)) 137 except ValueError: 138 assert clade.name is None, "Two string taxonomies?" 139 clade.name = part 140 if len(values) == 1: 141 # Real branch length, or support as branch length 142 if self.values_are_support: 143 clade.confidence = values[0] 144 else: 145 clade.branch_length = values[0] 146 elif len(values) == 2: 147 # Two non-taxon values: support comes first. (Is that always so?) 148 clade.confidence, clade.branch_length = values 149 elif len(values) > 2: 150 raise NewickError("Too many colons in tag: " + text) 151 return clade
152
153 154 # --------------------------------------------------------- 155 # Output 156 157 -class Writer(object):
158 """Based on the writer in Bio.Nexus.Trees (str, to_string).""" 159
160 - def __init__(self, trees):
161 self.trees = trees
162
163 - def write(self, handle, **kwargs):
164 """Write this instance's trees to a file handle.""" 165 count = 0 166 for treestr in self.to_strings(**kwargs): 167 handle.write(treestr + '\n') 168 count += 1 169 return count
170
171 - def to_strings(self, support_as_branchlengths=False, 172 branchlengths_only=False, plain=False, 173 plain_newick=True, ladderize=None, 174 max_support=1.0):
175 """Return an iterable of PAUP-compatible tree lines.""" 176 # If there's a conflict in the arguments, we override plain=True 177 if support_as_branchlengths or branchlengths_only: 178 plain = False 179 make_info_string = self._info_factory(plain, support_as_branchlengths, 180 branchlengths_only, max_support) 181 def newickize(clade): 182 """Convert a node tree to a Newick tree string, recursively.""" 183 if clade.is_terminal(): #terminal 184 return ((clade.name or '') 185 + make_info_string(clade, terminal=True)) 186 else: 187 subtrees = (newickize(sub) for sub in clade) 188 return '(%s)%s' % (','.join(subtrees), 189 make_info_string(clade))
190 191 # Convert each tree to a string 192 for tree in self.trees: 193 if ladderize in ('left', 'LEFT', 'right', 'RIGHT'): 194 # Nexus compatibility shim, kind of 195 tree.ladderize(reverse=(ladderize in ('right', 'RIGHT'))) 196 rawtree = newickize(tree.root) + ';' 197 if plain_newick: 198 yield rawtree 199 continue 200 # Nexus-style (?) notation before the raw Newick tree 201 treeline = ['tree', (tree.name or 'a_tree'), '='] 202 if tree.weight != 1: 203 treeline.append('[&W%s]' % round(float(tree.weight), 3)) 204 if tree.rooted: 205 treeline.append('[&R]') 206 treeline.append(rawtree) 207 yield ' '.join(treeline)
208
209 - def _info_factory(self, plain, support_as_branchlengths, 210 branchlengths_only, max_support):
211 """Return a function that creates a nicely formatted node tag.""" 212 if plain: 213 # Plain tree only. That's easy. 214 def make_info_string(clade, terminal=False): 215 return ''
216 217 elif support_as_branchlengths: 218 # Support as branchlengths (eg. PAUP), ignore actual branchlengths 219 def make_info_string(clade, terminal=False): 220 if terminal: 221 # terminal branches have 100% support 222 return ':%1.2f' % max_support 223 else: 224 return ':%1.2f' % (clade.confidence) 225 226 elif branchlengths_only: 227 # write only branchlengths, ignore support 228 def make_info_string(clade, terminal=False): 229 return ':%1.5f' % (clade.branch_length) 230 231 else: 232 # write support and branchlengths (e.g. .con tree of mrbayes) 233 def make_info_string(clade, terminal=False): 234 if terminal: 235 return ':%1.5f' % (clade.branch_length or 1.0) 236 else: 237 if (clade.branch_length is not None and 238 hasattr(clade, 'confidence') and 239 clade.confidence is not None): 240 # we have blen and suppport 241 return '%1.2f:%1.5f' % (clade.confidence, 242 clade.branch_length) 243 elif clade.branch_length is not None: 244 # we have only blen 245 return '0.00000:%1.5f' % clade.branch_length 246 elif (hasattr(clade, 'confidence') and 247 clade.confidence is not None): 248 # we have only support 249 return '%1.2f:0.00000' % clade.confidence 250 else: 251 return '0.00:0.00000' 252 253 return make_info_string 254