Package Bio :: Package Align :: Package Applications :: Module _Clustalw
[hide private]
[frames] | no frames]

Source Code for Module Bio.Align.Applications._Clustalw

  1  # Copyright 2009 by Cymon J. Cox.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  """Command line wrapper for the multiple alignment program Clustal W. 
  6   
  7  http://www.clustal.org/ 
  8   
  9  Citation: 
 10   
 11  Larkin MA, Blackshields G, Brown NP, Chenna R, McGettigan PA, McWilliam H,  
 12  Valentin F, Wallace IM, Wilm A, Lopez R, Thompson JD, Gibson TJ, Higgins DG. 
 13  (2007). Clustal W and Clustal X version 2.0. Bioinformatics, 23, 2947-2948.  
 14   
 15  Last checked against versions: 1.83 and 2.0.10 
 16  """ 
 17   
 18  import os 
 19  from Bio.Application import _Option, _Switch, AbstractCommandline 
 20   
21 -class ClustalwCommandline(AbstractCommandline):
22 """Command line wrapper for clustalw (version one or two).""" 23 #TODO - Should we default to cmd="clustalw2" now?
24 - def __init__(self, cmd="clustalw", **kwargs):
25 self.parameters = \ 26 [ 27 _Option(["-infile", "-INFILE", "INFILE", "infile"], 28 ["input", "file"], 29 None, 30 False, 31 "Input sequences.", 32 True), 33 _Option(["-profile1", "-PROFILE1", "PROFILE1", "profile1"], 34 ["input", "file"], 35 None, 36 False, 37 "Profiles (old alignment).", 38 True), 39 _Option(["-profile2", "-PROFILE2", "PROFILE2", "profile2"], 40 ["input", "file"], 41 None, 42 False, 43 "Profiles (old alignment).", 44 True), 45 ################## VERBS (do things) ############################# 46 _Switch(["-options", "-OPTIONS", "OPTIONS", "options"], 47 ["input"], 48 "List the command line parameters"), 49 _Switch(["-help", "-HELP", "HELP", "help"], 50 ["input"], 51 "Outline the command line params."), 52 _Switch(["-check", "-CHECK", "CHECK", "check"], 53 ["input"], 54 "Outline the command line params."), 55 _Switch(["-fullhelp", "-FULLHELP", "FULLHELP", "fullhelp"], 56 ["input"], 57 "Output full help content."), 58 _Switch(["-align", "-ALIGN", "ALIGN", "align"], 59 ["input"], 60 "Do full multiple alignment."), 61 _Switch(["-tree", "-TREE", "TREE", "tree"], 62 ["input"], 63 "Calculate NJ tree."), 64 _Option(["-bootstrap", "-BOOTSTRAP", "BOOTSTRAP", "bootstrap"], 65 ["input"], 66 lambda x: isinstance(x, int), 67 False, 68 "Bootstrap a NJ tree (n= number of bootstraps; def. = 1000).", 69 True), 70 _Switch(["-convert", "-CONVERT", "CONVERT", "convert"], 71 ["input"], 72 "Output the input sequences in a different file format."), 73 ##################### PARAMETERS (set things) ######################### 74 # ***General settings:**** 75 # Makes no sense in biopython 76 #_Option(["-interactive", "-INTERACTIVE", "INTERACTIVE", "interactive"], 77 # ["input"], 78 # lambda x: 0, #Does not take value 79 # False, 80 # "read command line, then enter normal interactive menus", 81 # False), 82 _Switch(["-quicktree", "-QUICKTREE", "QUICKTREE", "quicktree"], 83 ["input"], 84 "Use FAST algorithm for the alignment guide tree"), 85 _Option(["-type", "-TYPE", "TYPE", "type"], 86 ["input"], 87 lambda x: x in ["PROTEIN", "DNA", "protein", "dna"], 88 False, 89 "PROTEIN or DNA sequences", 90 True), 91 _Switch(["-negative", "-NEGATIVE", "NEGATIVE", "negative"], 92 ["input"], 93 "Protein alignment with negative values in matrix"), 94 _Option(["-outfile", "-OUTFILE", "OUTFILE", "outfile"], 95 ["input", "file"], 96 None, 97 False, 98 "Output sequence alignment file name", 99 True), 100 _Option(["-output", "-OUTPUT", "OUTPUT", "output"], 101 ["input"], 102 lambda x: x in ["GCG", "GDE", "PHYLIP", "PIR", "NEXUS", 103 "gcg", "gde", "phylip", "pir", "nexus"], 104 False, 105 "Output format: GCG, GDE, PHYLIP, PIR or NEXUS", 106 True), 107 _Option(["-outorder", "-OUTORDER", "OUTORDER", "outorder"], 108 ["input"], 109 lambda x: x in ["INPUT", "input", "ALIGNED", "aligned"], 110 False, 111 "Output taxon order: INPUT or ALIGNED", 112 True), 113 _Option(["-case", "-CASE", "CASE", "case"], 114 ["input"], 115 lambda x: x in ["UPPER", "upper", "LOWER", "lower"], 116 False, 117 "LOWER or UPPER (for GDE output only)", 118 True), 119 _Option(["-seqnos", "-SEQNOS", "SEQNOS", "seqnos"], 120 ["input"], 121 lambda x: x in ["ON", "on", "OFF", "off"], 122 False, 123 "OFF or ON (for Clustal output only)", 124 True), 125 _Option(["-seqno_range", "-SEQNO_RANGE", "SEQNO_RANGE", "seqno_range"], 126 ["input"], 127 lambda x: x in ["ON", "on", "OFF", "off"], 128 False, 129 "OFF or ON (NEW- for all output formats)", 130 True), 131 _Option(["-range", "-RANGE", "RANGE", "range"], 132 ["input"], 133 None, 134 False, 135 "Sequence range to write starting m to m+n. " + \ 136 "Input as string eg. '24,200'", 137 True), 138 _Option(["-maxseqlen", "-MAXSEQLEN", "MAXSEQLEN", "maxseqlen"], 139 ["input"], 140 lambda x: isinstance(x, int), 141 False, 142 "Maximum allowed input sequence length", 143 True), 144 _Switch(["-quiet", "-QUIET", "QUIET", "quiet"], 145 ["input"], 146 "Reduce console output to minimum"), 147 _Switch(["-stats", "-STATS", "STATS", "stats"], 148 ["input"], 149 "Log some alignents statistics to file"), 150 # ***Fast Pairwise Alignments:*** 151 _Option(["-ktuple", "-KTUPLE", "KTUPLE", "ktuple"], 152 ["input"], 153 lambda x: isinstance(x, int) or \ 154 isinstance(x, float), 155 False, 156 "Word size", 157 True), 158 _Option(["-topdiags", "-TOPDIAGS", "TOPDIAGS", "topdiags"], 159 ["input"], 160 lambda x: isinstance(x, int) or \ 161 isinstance(x, float), 162 False, 163 "Number of best diags.", 164 True), 165 _Option(["-window", "-WINDOW", "WINDOW", "window"], 166 ["input"], 167 lambda x: isinstance(x, int) or \ 168 isinstance(x, float), 169 False, 170 "Window around best diags.", 171 True), 172 _Option(["-pairgap", "-PAIRGAP", "PAIRGAP", "pairgap"], 173 ["input"], 174 lambda x: isinstance(x, int) or \ 175 isinstance(x, float), 176 False, 177 "Gap penalty", 178 True), 179 _Option(["-score", "-SCORE", "SCORE", "score"], 180 ["input"], 181 lambda x: x in ["percent", "PERCENT", "absolute", 182 "ABSOLUTE"], 183 False, 184 "Either: PERCENT or ABSOLUTE", 185 True), 186 # ***Slow Pairwise Alignments:*** 187 _Option(["-pwmatrix", "-PWMATRIX", "PWMATRIX", "pwmatrix"], 188 ["input"], 189 lambda x: x in ["BLOSUM", "PAM", "GONNET", "ID", \ 190 "blosum", "pam", "gonnet", "id"] or \ 191 os.path.exists(x), 192 False, 193 "Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename", 194 True), 195 _Option(["-pwdnamatrix", "-PWDNAMATRIX", "PWDNAMATRIX", "pwdnamatrix"], 196 ["input"], 197 lambda x: x in ["IUB", "CLUSTALW", "iub", "clustalw"] or \ 198 os.path.exists(x), 199 False, 200 "DNA weight matrix=IUB, CLUSTALW or filename", 201 True), 202 _Option(["-pwgapopen", "-PWGAPOPEN", "PWGAPOPEN", "pwgapopen"], 203 ["input"], 204 lambda x: isinstance(x, int) or \ 205 isinstance(x, float), 206 False, 207 "Gap opening penalty", 208 True), 209 _Option(["-pwgapext", "-PWGAPEXT", "PWGAPEXT", "pwgapext"], 210 ["input"], 211 lambda x: isinstance(x, int) or \ 212 isinstance(x, float), 213 False, 214 "Gap opening penalty", 215 True), 216 # ***Multiple Alignments:*** 217 _Option(["-newtree", "-NEWTREE", "NEWTREE", "newtree"], 218 ["output", "file"], 219 None, 220 False, 221 "Output file name for newly created guide tree", 222 True), 223 _Option(["-usetree", "-USETREE", "USETREE", "usetree"], 224 ["input", "file"], 225 lambda x: os.path.exists, 226 False, 227 "File name of guide tree", 228 True), 229 _Option(["-matrix", "-MATRIX", "MATRIX", "matrix"], 230 ["input"], 231 lambda x: x in ["BLOSUM", "PAM", "GONNET", "ID", \ 232 "blosum", "pam", "gonnet", "id"] or \ 233 os.path.exists(x), 234 False, 235 "Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename", 236 True), 237 _Option(["-dnamatrix", "-DNAMATRIX", "DNAMATRIX", "dnamatrix"], 238 ["input"], 239 lambda x: x in ["IUB", "CLUSTALW", "iub", "clustalw"] or \ 240 os.path.exists(x), 241 False, 242 "DNA weight matrix=IUB, CLUSTALW or filename", 243 True), 244 _Option(["-gapopen", "-GAPOPEN", "GAPOPEN", "gapopen"], 245 ["input"], 246 lambda x: isinstance(x, int) or \ 247 isinstance(x, float), 248 False, 249 "Gap opening penalty", 250 True), 251 _Option(["-gapext", "-GAPEXT", "GAPEXT", "gapext"], 252 ["input"], 253 lambda x: isinstance(x, int) or \ 254 isinstance(x, float), 255 False, 256 "Gap extension penalty", 257 True), 258 _Switch(["-endgaps", "-ENDGAPS", "ENDGAPS", "endgaps"], 259 ["input"], 260 "No end gap separation pen."), 261 _Option(["-gapdist", "-GAPDIST", "GAPDIST", "gapdist"], 262 ["input"], 263 lambda x: isinstance(x, int) or \ 264 isinstance(x, float), 265 False, 266 "Gap separation pen. range", 267 False), 268 _Switch(["-nopgap", "-NOPGAP", "NOPGAP", "nopgap"], 269 ["input"], 270 "Residue-specific gaps off"), 271 _Switch(["-nohgap", "-NOHGAP", "NOHGAP", "nohgap"], 272 ["input"], 273 "Hydrophilic gaps off"), 274 _Switch(["-hgapresidues", "-HGAPRESIDUES", "HGAPRESIDUES", "hgapresidues"], 275 ["input"], 276 "List hydrophilic res."), 277 _Option(["-maxdiv", "-MAXDIV", "MAXDIV", "maxdiv"], 278 ["input"], 279 lambda x: isinstance(x, int) or \ 280 isinstance(x, float), 281 False, 282 "% ident. for delay", 283 True), 284 _Option(["-transweight", "-TRANSWEIGHT", "TRANSWEIGHT", "transweight"], 285 ["input"], 286 lambda x: isinstance(x, int) or \ 287 isinstance(x, float), 288 False, 289 "Transitions weighting", 290 True), 291 _Option(["-iteration", "-ITERATION", "ITERATION", "iteration"], 292 ["input"], 293 lambda x: x in ["NONE", "TREE", "ALIGNMENT", 294 "none", "tree", "alignment"], 295 False, 296 "NONE or TREE or ALIGNMENT", 297 True), 298 _Option(["-numiter", "-NUMITER", "NUMITER", "numiter"], 299 ["input"], 300 lambda x: isinstance(x, int), 301 False, 302 "maximum number of iterations to perform", 303 False), 304 _Switch(["-noweights", "-NOWEIGHTS", "NOWEIGHTS", "noweights"], 305 ["input"], 306 "Disable sequence weighting"), 307 # ***Profile Alignments:*** 308 _Switch(["-profile", "-PROFILE", "PROFILE", "profile"], 309 ["input"], 310 "Merge two alignments by profile alignment"), 311 _Option(["-newtree1", "-NEWTREE1", "NEWTREE1", "newtree1"], 312 ["output", "file"], 313 None, 314 False, 315 "Output file name for new guide tree of profile1", 316 True), 317 _Option(["-newtree2", "-NEWTREE2", "NEWTREE2", "newtree2"], 318 ["output", "file"], 319 None, 320 False, 321 "Output file for new guide tree of profile2", 322 True), 323 _Option(["-usetree1", "-USETREE1", "USETREE1", "usetree1"], 324 ["input", "file"], 325 lambda x: os.path.exists, 326 False, 327 "File name of guide tree for profile1", 328 True), 329 _Option(["-usetree2", "-USETREE2", "USETREE2", "usetree2"], 330 ["input", "file"], 331 lambda x: os.path.exists, 332 False, 333 "File name of guide tree for profile2", 334 True), 335 # ***Sequence to Profile Alignments:*** 336 _Switch(["-sequences", "-SEQUENCES", "SEQUENCES", "sequences"], 337 ["input"], 338 "Sequentially add profile2 sequences to profile1 alignment"), 339 _Switch(["-nosecstr1", "-NOSECSTR1", "NOSECSTR1", "nosecstr1"], 340 ["input"], 341 "Do not use secondary structure-gap penalty mask for profile 1"), 342 _Switch(["-nosecstr2", "-NOSECSTR2", "NOSECSTR2", "nosecstr2"], 343 ["input"], 344 "Do not use secondary structure-gap penalty mask for profile 2"), 345 # ***Structure Alignments:*** 346 _Option(["-secstrout", "-SECSTROUT", "SECSTROUT", "secstrout"], 347 ["input"], 348 lambda x: x in ["STRUCTURE", "MASK", "BOTH", "NONE", 349 "structure", "mask", "both", "none"], 350 False, 351 "STRUCTURE or MASK or BOTH or NONE output in alignment file", 352 True), 353 _Option(["-helixgap", "-HELIXGAP", "HELIXGAP", "helixgap"], 354 ["input"], 355 lambda x: isinstance(x, int) or \ 356 isinstance(x, float), 357 False, 358 "Gap penalty for helix core residues", 359 True), 360 _Option(["-strandgap", "-STRANDGAP", "STRANDGAP", "strandgap"], 361 ["input"], 362 lambda x: isinstance(x, int) or \ 363 isinstance(x, float), 364 False, 365 "gap penalty for strand core residues", 366 True), 367 _Option(["-loopgap", "-LOOPGAP", "LOOPGAP", "loopgap"], 368 ["input"], 369 lambda x: isinstance(x, int) or \ 370 isinstance(x, float), 371 False, 372 "Gap penalty for loop regions", 373 True), 374 _Option(["-terminalgap", "-TERMINALGAP", "TERMINALGAP", "terminalgap"], 375 ["input"], 376 lambda x: isinstance(x, int) or \ 377 isinstance(x, float), 378 False, 379 "Gap penalty for structure termini", 380 True), 381 _Option(["-helixendin", "-HELIXENDIN", "HELIXENDIN", "helixendin"], 382 ["input"], 383 lambda x: isinstance(x, int), 384 False, 385 "Number of residues inside helix to be treated as terminal", 386 True), 387 _Option(["-helixendout", "-HELIXENDOUT", "HELIXENDOUT", "helixendout"], 388 ["input"], 389 lambda x: isinstance(x, int), 390 False, 391 "Number of residues outside helix to be treated as terminal", 392 True), 393 _Option(["-strandendin", "-STRANDENDIN", "STRANDENDIN", "strandendin"], 394 ["input"], 395 lambda x: isinstance(x, int), 396 False, 397 "Number of residues inside strand to be treated as terminal", 398 True), 399 _Option(["-strandendout", "-STRANDENDOUT", "STRANDENDOUT", "strandendout"], 400 ["input"], 401 lambda x: isinstance(x, int), 402 False, 403 "number of residues outside strand to be treated as terminal", 404 True), 405 # ***Trees:*** 406 _Option(["-outputtree", "-OUTPUTTREE", "OUTPUTTREE", "outputtree"], 407 ["input"], 408 lambda x: x in ["NJ", "PHYLIP", "DIST", "NEXUS", 409 "nj", "phylip", "dist", "nexus"], 410 False, 411 "nj OR phylip OR dist OR nexus", 412 True), 413 _Option(["-seed", "-SEED", "SEED", "seed"], 414 ["input"], 415 lambda x: isinstance(x, int), 416 False, 417 "Seed number for bootstraps.", 418 True), 419 _Switch(["-kimura", "-KIMURA", "KIMURA", "kimura"], 420 ["input"], 421 "Use Kimura's correction."), 422 _Switch(["-tossgaps", "-TOSSGAPS", "TOSSGAPS", "tossgaps"], 423 ["input"], 424 "Ignore positions with gaps."), 425 _Option(["-bootlabels", "-BOOTLABELS", "BOOTLABELS", "bootlabels"], 426 ["input"], 427 lambda x: x in ["NODE", "BRANCH", "node", "branch"], 428 False, 429 "Node OR branch position of bootstrap values in tree display", 430 True), 431 _Option(["-clustering", "-CLUSTERING", "CLUSTERING", "clustering"], 432 ["input"], 433 lambda x: x in ["NJ", "UPGMA", "nj", "upgma"], 434 False, 435 "NJ or UPGMA", 436 True) 437 ] 438 AbstractCommandline.__init__(self, cmd, **kwargs)
439