3 myself=`dirname $0`/`basename $0`; export myself
4 version="version 6.00 (2005/05/01)"; export version
8 if [ `echo $os | grep -i cygwin` ]; then
10 elif [ `echo $os | grep -i darwin` ]; then
12 elif [ `echo $os | grep -i sunos` ]; then
19 if [ $MAFFT_BINARIES ]; then
20 prefix=$MAFFT_BINARIES
25 if [ ! -x $prefix/tbfast ]; then
27 echo "correctly installed?" 2>&1
28 echo "mafft binaries have to be installed in" $prefix.
36 defaultmodel=" -b 62 "
41 defaultdistance="sixtuples"
42 if [ $progname = "linsi" ]; then
46 defaultdistance="local"
47 elif [ $progname = "ginsi" ]; then
51 defaultdistance="global"
52 elif [ $progname = "einsi" ]; then
56 defaultdistance="genaf"
57 elif [ $progname = "fftns" ]; then
60 elif [ $progname = "fftnsi" ]; then
64 elif [ $progname = "nwns" ]; then
67 elif [ $progname = "nwnsi" ]; then
76 iterate=$defaultiterate
79 distance=$defaultdistance
100 if [ $1 = "--clustalout" ]; then
101 outputformat="clustal"
102 elif [ $1 = "--reorder" ]; then
104 elif [ $1 = "--rough" ]; then
106 elif [ $1 = "--noscore" ]; then
108 elif [ $1 = "--fastswpair" ]; then
111 elif [ $1 = "--fastapair" ]; then
114 elif [ $1 = "--suboptpair" ]; then
116 elif [ $1 = "--blastpair" ]; then
118 elif [ $1 = "--globalpair" ]; then
120 elif [ $1 = "--localpair" ]; then
122 elif [ $1 = "--genafpair" ]; then
124 elif [ $1 = "--nuc" ]; then
126 elif [ $1 = "--amino" ]; then
128 elif [ $1 = "--fft" ]; then
130 elif [ $1 = "--nofft" ]; then
132 elif [ $1 = "--quiet" ]; then
134 elif [ $1 = "--nj" ]; then
136 elif [ $1 = "--coreext" ]; then
138 elif [ $1 = "--core" ]; then
140 elif [ $1 = "--maxiterate" ]; then
142 iterate=`expr $1 - 0`
143 elif [ $1 = "--retree" ]; then
145 cycle=`expr $1 - 0` || er=1
146 elif [ $1 = "--jtt" ]; then
149 elif [ $1 = "--tm" ]; then
152 elif [ $1 = "--bl" ]; then
155 elif [ $1 = "--weighti" ]; then
158 elif [ $1 = "--op" ]; then
161 elif [ $1 = "--ep" ]; then
164 elif [ $1 = "--lop" ]; then
167 elif [ $1 = "--LOP" ]; then
170 elif [ $1 = "--lep" ]; then
173 elif [ $1 = "--lexp" ]; then
176 elif [ $1 = "--LEXP" ]; then
179 elif [ $1 = "--corethr" ]; then
182 elif [ $1 = "--corewin" ]; then
185 elif [ $1 -gt 0 -a $progname = "fftns" -o $1 -gt 0 -a $progname = "nwns" ]; then
188 echo "Unknown option: " $1 2>&1
193 if [ $fft -eq 1 ]; then
198 if [ $sw -eq 1 ]; then
204 if [ $distance = "fasta" ]; then
205 if [ ! $FASTA_4_MAFFT ]; then
206 FASTA_4_MAFFT=`which fasta34`
209 if [ ! -x $FASTA_4_MAFFT ]; then
211 echo "== Install FASTA ========================================================"
212 echo "This option requires the fasta34 program (FASTA version x.xx or higher)"
213 echo "installed in your PATH. If you have the fasta34 program but have renamed"
214 echo "(like /usr/local/bin/myfasta), set the FASTA_4_MAFFT environment variable"
215 echo "to point your fasta34 (like setenv FASTA_4_MAFFT /usr/local/bin/myfasta)."
216 echo "========================================================================="
221 if [ $distance = "sixtuples" ]; then
225 localparam=" -l "$weighti
228 if [ $distance = "fasta" -a $sw -eq 0 ]; then
230 elif [ $distance = "fasta" -a $sw -eq 1 ]; then
232 elif [ $distance = "subopt" ]; then
234 elif [ $distance = "blast" ]; then
236 elif [ $distance = "global" ]; then
238 elif [ $distance = "local" ]; then
240 elif [ $distance = "genaf" ]; then
242 elif [ $fft -eq 1 ]; then
247 strategy=$strategy"NS-"
248 if [ $iterate -gt 0 ]; then
249 strategy=$strategy"i"
250 elif [ $rough -eq 1 ]; then
251 strategy=$strategy"ROUGH"
253 strategy=$strategy$cycle
257 performance='Not tested.'
258 if [ $strategy = "F-INS-i" ]; then
259 explanation='Iterative refinement method incorporating LOCAL pairwise alignment information'
260 performance='Most accurate, but very slow'
261 elif [ $strategy = "L-INS-i" ]; then
262 explanation='Iterative refinement method incorporating LOCAL pairwise alignment information'
263 performance='Probably most accurate, very slow'
264 elif [ $strategy = "E-INS-i" ]; then
265 explanation='Iterative refinement method incorporating LOCAL pairwise alignment with generalized affine gap costs (Altshul 1998)'
266 performance='Suitable for sequences with long unalignable regions, very slow'
267 elif [ $strategy = "G-INS-i" ]; then
268 explanation='Iterative refinement method incorporating GLOBAL pairwise alignment information'
269 performance='Suitable for sequences of similar lengths, very slow'
270 elif [ $strategy = "F-INS-1" ]; then
271 explanation='Progressive method incorporating LOCAL pairwise alignment information'
272 elif [ $strategy = "L-INS-1" ]; then
273 explanation='Progressive method incorporating LOCAL pairwise alignment information'
274 elif [ $strategy = "G-INS-1" ]; then
275 explanation='Progressive method incorporating GLOBAL pairwise alignment information'
276 elif [ $strategy = "FFT-NS-i" -o $strategy = "NW-NS-i" ]; then
277 explanation='Iterative refinement method (max. '$iterate' iterations)'
278 if [ $iterate -gt 2 ]; then
279 performance='Accurate but slow'
281 performance='Standard'
283 elif [ $strategy = "FFT-NS-2" -o $strategy = "NW-NS-2" ]; then
284 explanation='Progressive method (guide trees were built '$cycle' times.)'
285 performance='Fast but rough'
286 elif [ $strategy = "FFT-NS-1" -o $strategy = "NW-NS-1" ]; then
287 explanation='Progressive method (rough guide tree was used.)'
288 performance='Very fast but very rough'
291 if [ $outputformat = "clustal" -a $outorder = "aligned" ]; then
292 outputcommand="$prefix/f2cl -c $strategy -r order "
293 elif [ $outputformat = "clustal" -a $outorder = "input" ]; then
294 outputcommand="$prefix/f2cl -c $strategy "
295 elif [ $outputformat = "pir" -a $outorder = "aligned" ]; then
296 outputcommand="$prefix/f2cl -f -r order "
298 outputcommand=" cat "
301 TMPFILE=/tmp/`basename $0`.`whoami`.$$.`date +%y%m%d%H%M%S`
302 TMPFILE=/tmp/$progname.$$
304 mkdir $TMPFILE || er=1
305 trap "rm -r $TMPFILE " 0
306 if [ $# -eq 1 ]; then
307 if [ -s "$1" -o "$1" = - ]; then
308 cat "$1" | tr "\r" "\n" > $TMPFILE/infile
310 echo "Cannot open $1." 2>&1
319 if [ $er -eq 1 ]; then
321 echo "MAFFT" $version 2>&1
323 echo " References: " 2>&1
324 echo " Katoh et al., 2002, NAR 30: 3059-3066" 2>&1
325 echo " Katoh et al., 2005, NAR 33: 511-518" 2>&1
326 echo " http://www.biophys.kyoto-u.ac.jp/~katoh/programs/align/mafft" 2>&1
328 # echo "Usage: `basename $0` [options] inputfile > outputfile" 2>&1
329 echo " Options: " 2>&1
330 echo " --globalpair : All pairwise global alignment information is included." 2>&1
331 echo " default: off" 2>&1
332 echo " --localpair : All pairwise local alignment information is included." 2>&1
333 echo " default: off" 2>&1
334 echo " --op # : Gap opening penalty (>0). default: $defaultgop " 2>&1
335 echo " --ep # : Offset (>0, works like gap extension penalty). " 2>&1
336 echo " default: $defaultaof " 2>&1
337 echo " --bl #, --jtt # : Scoring matrix. default: BLOSUM62" 2>&1
338 echo " Alternatives are BLOSUM (--bl) 30, 45, 62, 80, " 2>&1
339 echo " or JTT (--jtt) # PAM. " 2>&1
340 echo " --nuc or --amino : Sequence type. default: auto" 2>&1
341 echo " --retree # : The number of tree building in progressive method " 2>&1
342 echo " (see the paper for detail). default: $defaultcycle " 2>&1
343 echo " --maxiterate # : Maximum number of iterative refinement. default: $defaultiterate " 2>&1
344 echo " --nj : Tree dependent iterative refinement is performed" 2>&1
345 echo " on an NJ tree. default: UPG tree" 2>&1
346 if [ $defaultfft -eq 1 ]; then
347 echo " --fft or --nofft: FFT is enabled or disabled. default: enabled" 2>&1
349 echo " --fft or --nofft: FFT is enabled or disabled. default: disabled" 2>&1
351 # if [ $defaultrough -eq 1 ]; then
352 # echo " --rough : Rough alignment, for >1,000 sequences. default: enabled" 2>&1
354 # echo " --rough : Rough alignment, for >1,000 sequences. default: disabled" 2>&1
356 echo " --clustalout: Output: clustal format (not tested). default: fasta" 2>&1
357 echo " --reorder: Outorder: aligned (not tested). default: input order" 2>&1
358 echo " --quiet : Do not report progress." 2>&1
360 echo " Input format: fasta format" 2>&1
362 echo " Typical usages:" 2>&1
363 echo " % mafft --maxiterate 1000 --localpair input > output" 2>&1
364 echo " L-INS-i (most accurate and very slow, iterative refinement" 2>&1
365 echo " method incorporating LOCAL alignment information)" 2>&1
367 echo " % mafft --maxiterate 1000 --globalpair input > output" 2>&1
368 echo " G-INS-i (slightly less accurate than L-INS-i and very slow," 2>&1
369 echo " iterative refinement method incorporating GLOBAL alignment" 2>&1
370 echo " information, suitable for sequences of similar lengths) " 2>&1
372 echo " % mafft --maxiterate 1000 input > output" 2>&1
373 echo " FFT-NS-i (accurate and slow, iterative refinement method)" 2>&1
375 echo " % mafft --retree 2 input > output (DEFAULT; same as mafft input > output)" 2>&1
376 echo " FFT-NS-2 (rough and fast, default, progressive method)" 2>&1
378 echo " % mafft --retree 1 input > output" 2>&1
379 echo " FFT-NS-1 (very rough and very fast, progressive method with" 2>&1
380 echo " a rough guide tree)" 2>&1
387 if [ $rough -gt 0 ]; then
388 $prefix/splitseq infile 500
389 for spfile in sp-[0-9]*[0-9]
391 echo $spfile > /dev/tty
392 nseq=`grep -c '^[>|=]' $spfile`
393 echo $nseq > /dev/tty
394 if [ $nseq -eq 1 ]; then
395 cp $spfile $spfile.aln
397 $prefix/sextet5 $seqtype < $spfile > /dev/null 2>&1 || exit 1
398 $prefix/tbfast $seqtype $model -f "-"$gop -h "-"$aof $param_fft -Ax < $spfile > /dev/null 2>&1 || exit 1
399 cat pre > $spfile.aln
403 for alfile in sp-[0-9]*[0-9].aln
405 ngroup=`expr $ngroup + 1`
406 echo $alfile > /dev/tty
407 if [ $ngroup -eq 1 ]; then
410 $prefix/galn -A alall $alfile > altmp
416 if [ $quiet -gt 0 ]; then
417 if [ $distance = "subopt" ]; then
418 $prefix/pairlocalalign $seqtype -b 62 -g $lexp -f $lgop -h $laof -O $LGOP -E $LEXP -S < infile > /dev/null 2>&1 || exit 1
419 $prefix/tbfast $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam -Ax < infile > /dev/null 2>&1 || exit 1
420 elif [ $distance = "fasta" ]; then
421 $prefix/dndfast7 $swopt < infile > /dev/null 2>&1 || exit 1
422 $prefix/tbfast $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam -Ax < infile > /dev/null 2>&1 || exit 1
423 elif [ $distance = "blast" ]; then
424 $prefix/dndblast < infile > /dev/null 2>&1 || exit 1
425 $prefix/tbfast $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam -Ax < infile > /dev/null 2>&1 || exit 1
426 elif [ $distance = "global" ]; then
427 $prefix/pairlocalalign $seqtype $model -f "-"$gop -h "-"$aof -F < infile > /dev/null 2>&1 || exit 1
428 $prefix/tbfast $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam -Ax < infile > /dev/null 2>&1 || exit 1
429 elif [ $distance = "local" ]; then
430 $prefix/pairlocalalign $seqtype -b 62 -g $lexp -f $lgop -h $laof -L < infile > /dev/null 2>&1 || exit 1
431 $prefix/tbfast $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam -Ax < infile > /dev/null 2>&1 || exit 1
432 elif [ $distance = "genaf" ]; then
433 $prefix/pairlocalalign $seqtype -b 62 -g $lexp -f $lgop -h $laof -O $LGOP -E $LEXP -N < infile > /dev/null 2>&1 || exit 1
434 $prefix/tbfast $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam -Ax < infile > /dev/null 2>&1 || exit 1
436 $prefix/disttbfast $seqtype $model -f "-"$gop -h "-"$aof $param_fft -Ax < infile > pre 2>/dev/null || exit 1
438 while [ $cycle -gt 1 ]
440 $prefix/tbfast $seqtype $model -f "-"$gop -h "-"$aof $param_fft -JAx < pre > /dev/null 2>&1 || exit 1
441 cycle=`expr $cycle - 1`
443 if [ $iterate -gt 0 ]; then
444 if [ $distance == "sixtuples" ]; then
445 $prefix/dndpre < pre > /dev/null 2>&1 || exit 1
447 $prefix/dvtditr $scorecalcopt $localparam -F -z 50 $seqtype $model -f "-"$gop -h "-"$aof -A -c -I $iterate $param_it < pre > /dev/null 2>&1 || exit 1
450 if [ $distance = "subopt" ]; then
451 $prefix/pairlocalalign $seqtype -b 62 -g $lexp -f $lgop -h $laof -O $LGOP -E $LEXP -S < infile > /dev/null || exit 1
452 $prefix/tbfast $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam -Ax < infile > /dev/null || exit 1
453 elif [ $distance = "fasta" ]; then
454 $prefix/dndfast7 $swopt < infile > /dev/null || exit 1
455 $prefix/tbfast $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam -Ax < infile > /dev/null || exit 1
456 elif [ $distance = "blast" ]; then
457 $prefix/dndblast < infile > /dev/null || exit 1
458 $prefix/tbfast $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam -Ax < infile > /dev/null || exit 1
459 elif [ $distance = "global" ]; then
460 $prefix/pairlocalalign $seqtype $model -f "-"$gop -h "-"$aof -F < infile > /dev/null || exit 1
461 $prefix/tbfast $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam -Ax < infile > /dev/null || exit 1
462 elif [ $distance = "local" ]; then
463 $prefix/pairlocalalign $seqtype -b 62 -g $lexp -f $lgop -h $laof -L < infile > /dev/null || exit 1
464 $prefix/tbfast $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam -Ax < infile > /dev/null || exit 1
465 elif [ $distance = "genaf" ]; then
466 $prefix/pairlocalalign $seqtype -b 62 -g $lexp -f $lgop -h $laof -O $LGOP -E $LEXP -N < infile > /dev/null || exit 1
467 $prefix/tbfast $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam -Ax < infile > /dev/null || exit 1
469 $prefix/disttbfast $seqtype $model -f "-"$gop -h "-"$aof $param_fft -Ax < infile > pre || exit 1
471 while [ $cycle -gt 1 ]
473 $prefix/tbfast $seqtype $model -f "-"$gop -h "-"$aof $param_fft -JAx < pre > /dev/null || exit 1
474 cycle=`expr $cycle - 1`
476 if [ $iterate -gt 0 ]; then
477 if [ $distance = "sixtuples" ]; then
478 $prefix/dndpre < pre > /dev/null 2>&1 || exit 1
480 $prefix/dvtditr $scorecalcopt $localparam -F -z 50 $seqtype $model -f "-"$gop -h "-"$aof -A -c -I $iterate $param_it < pre > /dev/null || exit 1
483 # cat < pre || exit 1
484 if [ $coreout -eq 0 ]; then
485 $outputcommand < pre || exit 1
487 $prefix/setcore -w $corewin -i $corethr $coreext < pre > pre2
488 $outputcommand < pre2 || exit 1
493 if [ $distance = "fasta" ]; then
494 echo "Pairwise alignments were computed by FASTA" >/dev/tty
495 echo "(Pearson & Lipman, 1988, PNAS 85:2444-2448)" >/dev/tty
497 if [ $distance = "blast" ]; then
498 echo "Pairwise alignments were computed by BLAST">/dev/tty
499 echo "(Altschul et al., 1997, NAR 25:3389-3402)">/dev/tty
501 printf "\n" >/dev/tty
502 echo 'Strategy:' >/dev/tty
503 printf ' '$strategy >/dev/tty
504 echo ' ('$performance')' >/dev/tty
505 echo ' '$explanation >/dev/tty
507 echo "Type '$0 --help' for other options." >/dev/tty
514 if [ $os != "darwin" ]; then
516 tmpawk=`which nawk 2>/dev/null | awk '{print $1}'`
517 if [ -x $tmpawk ]; then
522 tmpawk=`which gawk 2>/dev/null | awk '{print $1}'`
523 if [ -x $tmpawk ]; then
535 export defaultiterate
539 prefix = ENVIRON["prefix"];
540 version = ENVIRON["version"];
541 myself = ENVIRON["myself"];
542 defaultgop = ENVIRON["defaultgop"]
543 defaultaof = ENVIRON["defaultaof"]
544 defaultfft = ENVIRON["defaultfft"]
545 defaultcycle = ENVIRON["defaultcycle"]
546 defaultiterate = ENVIRON["defaultiterate"]
550 printf( "\n" ) > "/dev/tty";
551 printf( "---------------------------------------------------------------------\n" ) > "/dev/tty";
552 printf( "\n" ) > "/dev/tty";
553 printf( " MAFFT %s\n", version ) > "/dev/tty";
554 printf( "\n" ) > "/dev/tty";
555 printf( " K. Katoh, K. Misawa, K. Kuma and T. Miyata (2002)\n" ) > "/dev/tty";
556 printf( " Nucleic Acids Research 30: 3059-3066.\n" ) > "/dev/tty";
557 printf( " http://www.biophys.kyoto-u.ac.jp/~katoh/programs/align/mafft\n" ) > "/dev/tty";
558 printf( "---------------------------------------------------------------------\n" ) > "/dev/tty";
563 printf( "\n" ) > "/dev/tty";
564 printf( "Input file? (fasta format)\n@ " ) > "/dev/tty";
565 res = getline < "/dev/tty";
567 if( res == 0 || NF == 0 )
569 infile0 = sprintf( "%s", $1 );
570 infile = sprintf( "%s", $1 );
572 res = getline < infile;
575 printf( "%s: No such file.\n\n", infile );
577 printf( "%s: Empty.\n", infile );
580 printf( "OK. infile = %s\n\n", infile );
588 printf( "\n" ) > "/dev/tty";
589 printf( "Output file?\n" ) > "/dev/tty";
590 printf( "@ " ) > "/dev/tty";
591 res = getline < "/dev/tty";
593 if( res == 0 || NF == 0 )
597 outfile = sprintf( "%s", $1 );
598 printf( "OK. outfile = %s\n\n", outfile );
606 retree = defaultcycle
608 printf( "Number of tree-rebuilding?\n" ) > "/dev/tty";
609 printf( "@ [%d] ", retree ) > "/dev/tty";
610 res = getline < "/dev/tty";
618 if( retree < 1 || 10 < retree )
622 printf( "OK. %d\n\n", retree );
629 niterate = defaultiterate;
631 printf( "Maximum number of iterations?\n" ) > "/dev/tty";
632 printf( "@ [%d] ", niterate ) > "/dev/tty";
633 res = getline < "/dev/tty";
641 if( niterate < 0 || 1000 < niterate )
645 printf( "OK. %d\n\n", niterate );
654 printf( "Use fft?\n" ) > "/dev/tty";
655 printf( "@ [%s] ", fft?"Yes":"No" ) > "/dev/tty";
656 res = getline < "/dev/tty";
664 else if( NF == 0 || $0 ~ /^[Yy]/ )
669 else if( NF == 0 || $0 ~ /^[Nn]/ )
677 printf( "OK. FFT is enabled.\n\n" );
678 fftparam = " --fft ";
682 printf( "OK. FFT is disabled.\n\n" );
683 fftparam = " --nofft ";
690 printf( "Scoring matrix? (ignored when DNA sequence is input.)\n" ) > "/dev/tty";
691 printf( " 1. BLOSUM 30\n" ) > "/dev/tty";
692 printf( " 2. BLOSUM 45\n" ) > "/dev/tty";
693 printf( " 3. BLOSUM 62\n" ) > "/dev/tty";
694 printf( " 4. BLOSUM 80\n" ) > "/dev/tty";
695 printf( " 5. JTT 200\n" ) > "/dev/tty";
696 printf( " 6. JTT 100\n" ) > "/dev/tty";
697 printf( "@ [%d] ", scoringmatrix ) > "/dev/tty";
698 res = getline < "/dev/tty";
705 scoringmatrix = 0 + $1;
706 if( scoringmatrix < 1 || 6 < scoringmatrix )
713 if( scoringmatrix == 1 )
714 scoringparam = " --bl 30 ";
715 else if( scoringmatrix == 2 )
716 scoringparam = " --bl 45 ";
717 else if( scoringmatrix == 3 )
718 scoringparam = " --bl 62 ";
719 else if( scoringmatrix == 4 )
720 scoringparam = " --bl 80 ";
721 else if( scoringmatrix == 5 )
722 scoringparam = " --jtt 200 ";
723 else if( scoringmatrix == 6 )
724 scoringparam = " --jtt 100 ";
725 printf( "OK. %s\n\n",scoringparam );
729 penalty = 0.0 + defaultgop;
730 offset = 0.0 + defaultaof;
732 printf( "Parameters (gap opening penalty, offset)?\n", penalty, offset ) > "/dev/tty";
733 printf( "@ [%5.3f, %5.3f] ", penalty, offset ) > "/dev/tty";
734 res = getline < "/dev/tty";
747 if( penalty <= 0.0 || 10.0 < penalty )
749 else if( offset <= 0.0 || 10.0 < offset )
753 printf( "OK. %5.3f %5.3f\n\n", penalty, offset );
758 command = sprintf( "%s %s --retree %d --maxiterate %d %s --op %f --ep %f %s > %s", myself, fftparam, retree, niterate, scoringparam, penalty, offset, infile, outfile );
759 printf( "%s\n\n", command );
767 printf( "@ [Y] " ) > "/dev/tty";
768 res = getline < "/dev/tty";
772 else if( NF == 0 || $0 ~ /^[Yy]/ )