1
2
3
4
5 """Command line wrapper for the multiple alignment programme MAFFT.
6
7 http://align.bmr.kyushu-u.ac.jp/mafft/software/
8
9 Citations:
10
11 Katoh, Toh (BMC Bioinformatics 9:212, 2008) Improved accuracy of
12 multiple ncRNA alignment by incorporating structural information into a
13 MAFFT-based framework (describes RNA structural alignment methods)
14
15 Katoh, Toh (Briefings in Bioinformatics 9:286-298, 2008) Recent developments in
16 the MAFFT multiple sequence alignment program (outlines version 6)
17 Katoh, Toh (Bioinformatics 23:372-374, 2007) Errata PartTree: an algorithm to
18 build an approximate tree from a large number of unaligned sequences (describes
19 the PartTree algorithm)
20
21 Katoh, Kuma, Toh, Miyata (Nucleic Acids Res. 33:511-518, 2005) MAFFT version 5:
22 improvement in accuracy of multiple sequence alignment (describes [ancestral
23 versions of] the G-INS-i, L-INS-i and E-INS-i strategies) Katoh, Misawa, Kuma,
24 Miyata (Nucleic Acids Res. 30:3059-3066, 2002)
25
26 Last checked against version: 6.626b (2009/03/16)
27 """
28
29 import os
30 from Bio.Application import _Option, _Switch, _Argument, AbstractCommandline
31
33 """Command line wrapper for the multiple alignment program MAFFT."""
34 - def __init__(self, cmd="mafft", **kwargs):
35 BLOSUM_MATRICES = ["30","45","62","80"]
36 self.parameters = \
37 [
38
39
40
41 _Switch(["--auto", "auto"], ["input"],
42 "Automatically select strategy. Default off."),
43
44 _Switch(["--6merpair", "6merpair", "sixmerpair"], ["input"],
45 "Distance is calculated based on the number of shared " + \
46 "6mers. Default: on"),
47
48
49
50
51
52 _Switch(["--globalpair", "globalpair"], ["input"],
53 "All pairwise alignments are computed with the " + \
54 "Needleman-Wunsch algorithm. Default: off"),
55
56
57
58
59
60 _Switch(["--localpair", "localpair"], ["input"],
61 "All pairwise alignments are computed with the " + \
62 "Smith-Waterman algorithm. Default: off"),
63
64
65
66
67
68
69 _Switch(["--genafpair", "genafpair"], ["input"],
70 "All pairwise alignments are computed with a local " + \
71 "algorithm with the generalized affine gap cost " + \
72 "(Altschul 1998). Default: off"),
73
74
75 _Switch(["--fastapair", "fastapair"], ["input"],
76 "All pairwise alignments are computed with FASTA " + \
77 "(Pearson and Lipman 1988). Default: off"),
78
79
80
81 _Option(["--weighti", "weighti"], ["input"],
82 lambda x: isinstance(x, float), 0,
83 "Weighting factor for the consistency term calculated " + \
84 "from pairwise alignments. Default: 2.7",
85 0),
86
87
88 _Option(["--retree", "retree"], ["input"],
89 lambda x: isinstance(x, int), 0,
90 "Guide tree is built number times in the progressive " + \
91 "stage. Valid with 6mer distance. Default: 2",
92 0),
93
94 _Option(["--maxiterate", "maxiterate"], ["input"],
95 lambda x: isinstance(x, int), 0,
96 "Number cycles of iterative refinement are performed. " + \
97 "Default: 0",
98 0),
99
100 _Switch(["--fft", "fft"], ["input"],
101 "Use FFT approximation in group-to-group alignment. " + \
102 "Default: on"),
103
104
105 _Switch(["--nofft", "nofft"], ["input"],
106 "Do not use FFT approximation in group-to-group " + \
107 "alignment. Default: off"),
108
109
110 _Switch(["--noscore", "noscore"], ["input"],
111 "Alignment score is not checked in the iterative " + \
112 "refinement stage. Default: off (score is checked)"),
113
114
115 _Switch(["--memsave", "memsave"], ["input"],
116 "Use the Myers-Miller (1988) algorithm. Default: " + \
117 "automatically turned on when the alignment length " + \
118 "exceeds 10,000 (aa/nt)."),
119
120
121
122 _Switch(["--parttree", "parttree"], ["input"],
123 "Use a fast tree-building method with the 6mer " + \
124 "distance. Default: off"),
125
126
127
128 _Switch(["--dpparttree", "dpparttree"], ["input"],
129 "The PartTree algorithm is used with distances " + \
130 "based on DP. Default: off"),
131
132
133
134
135 _Switch(["--fastaparttree", "fastaparttree"], ["input"],
136 "The PartTree algorithm is used with distances based " + \
137 "on FASTA. Default: off"),
138
139 _Option(["--partsize", "partsize"], ["input"],
140 lambda x: isinstance(x, int), 0,
141 "The number of partitions in the PartTree algorithm. " + \
142 "Default: 50",
143 0),
144
145
146 _Switch(["--groupsize", "groupsize"], ["input"],
147 "Do not make alignment larger than number sequences. " + \
148 "Default: the number of input sequences"),
149
150
151 _Option(["--op", "op"], ["input"],
152 lambda x: isinstance(x, float), 0,
153 "Gap opening penalty at group-to-group alignment. " + \
154 "Default: 1.53",
155 0),
156
157
158 _Option(["--ep", "ep"], ["input"],
159 lambda x: isinstance(x, float), 0,
160 "Offset value, which works like gap extension penalty, " + \
161 "for group-to- group alignment. Default: 0.123",
162 0),
163
164
165 _Option(["--lop", "lop"], ["input"],
166 lambda x: isinstance(x, float), 0,
167 "Gap opening penalty at local pairwise alignment. " + \
168 "Default: 0.123",
169 0),
170
171
172 _Option(["--lep", "lep"], ["input"],
173 lambda x: isinstance(x, float), 0,
174 "Offset value at local pairwise alignment. " + \
175 "Default: 0.1",
176 0),
177
178
179 _Option(["--lexp", "lexp"], ["input"],
180 lambda x: isinstance(x, float), 0,
181 "Gap extension penalty at local pairwise alignment. " + \
182 "Default: -0.1",
183 0),
184
185
186 _Option(["--LOP", "LOP"], ["input"],
187 lambda x: isinstance(x, float), 0,
188 "Gap opening penalty to skip the alignment. " + \
189 "Default: -6.00",
190 0),
191
192
193 _Option(["--LEXP", "LEXP"], ["input"],
194 lambda x: isinstance(x, float),
195 0,
196 "Gap extension penalty to skip the alignment. " + \
197 "Default: 0.00",
198 0),
199
200
201
202 _Option(["--bl", "bl"], ["input"],
203 lambda x: x in BLOSUM_MATRICES, 0,
204 "BLOSUM number matrix is used. Default: 62",
205 0),
206
207
208 _Option(["--jtt", "jtt"], ["input"], None, 0,
209 "JTT PAM number (Jones et al. 1992) matrix is used. " + \
210 "number>0. Default: BLOSUM62",
211 0),
212
213
214 _Option(["--tm", "tm"], ["input"],
215 os.path.exists, 0,
216 "Transmembrane PAM number (Jones et al. 1994) " + \
217 "matrix is used. number>0. Default: BLOSUM62",
218 0),
219
220
221
222 _Option(["--aamatrix", "aamatrix"], ["input"],
223 os.path.exists, 0,
224 "Use a user-defined AA scoring matrix. " + \
225 "Default: BLOSUM62",
226 0),
227
228
229 _Switch(["--fmodel", "fmodel"], ["input"],
230 "Incorporate the AA/nuc composition information " + \
231 "into the scoring matrix. Default: off"),
232
233
234 _Switch(["--clustalout", "clustalout"], ["input"],
235 "Output format: clustal format. Default: off (fasta" + \
236 "format)"),
237
238 _Switch(["--inputorder", "inputorder"], ["input"],
239 "Output order: same as input. Default: on"),
240
241 _Switch(["--reorder", "reorder"], ["input"],
242 "Output order: aligned. Default: off (inputorder)"),
243
244 _Switch(["--treeout", "treeout"], ["input"],
245 "Guide tree is output to the input.tree file. Default: off"),
246
247 _Switch(["--quiet", "quiet"], ["input"],
248 "Do not report progress. Default: off"),
249
250
251 _Switch(["--nuc", "nuc"], ["input"],
252 "Assume the sequences are nucleotide. Default: auto"),
253
254 _Switch(["--amino", "amino"], ["input"],
255 "Assume the sequences are amino acid. Default: auto"),
256
257
258
259
260
261
262 _Option(["--seed", "seed"], ["input", "file"], os.path.exists, 0,
263 "Seed alignments given in alignment_n (fasta format) " + \
264 "are aligned with sequences in input.",
265 0),
266
267
268
269
270
271
272
273
274
275 _Argument(["input"], ["input"], os.path.exists, 1,
276 "Input file name"),
277
278
279
280 _Argument(["input1"], ["input"], os.path.exists, 0,
281 "Second input file name for the mafft-profile command")
282 ]
283 AbstractCommandline.__init__(self, cmd, **kwargs)
284