3 # FORESTER -- software libraries and applications
4 # for evolutionary biology research and applications.
6 # Copyright (C) 2017 Christian M. Zmasek
9 # This library is free software; you can redistribute it and/or
10 # modify it under the terms of the GNU Lesser General Public
11 # License as published by the Free Software Foundation; either
12 # version 2.1 of the License, or (at your option) any later version.
14 # This library is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 # Lesser General Public License for more details.
19 # You should have received a copy of the GNU Lesser General Public
20 # License along with this library; if not, write to the Free Software
21 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 # Contact: cmzmasek at yahoo dot com
24 # WWW: https://sites.google.com/site/cmzmasek/home/software/forester
28 # Requirements phylo_pl is part of the FORESTER libraries.
29 # ------------ Many of its global variables are set via forester.pm.
32 # Note. Use xt.pl (for Pfam alignments) or mt.pl (for other alignments)
33 # to run phylo_pl.pl on whole directories of alignments files.
38 # =========================
40 # METHOD ORDER (IMPORTANT!)
52 #==========================
57 use lib $FindBin::Bin;
60 my $VERSION = "1.0.1";
61 my $LAST_MODIFIED = "2017/04/26";
63 my $RAXML_MODEL_BASE = "PROTGAMMA";
64 my $RAXML_ALGORITHM = "a";
66 my $TEMP_DIR_DEFAULT = "/tmp/phylo_pl_"; # Where all the infiles, outfiles, etc will be created.
68 my $bootstraps = 100; # 0,1: do not bootstrap. Default: 100
69 my $matrix = 5; # 0 = JTT
76 # 8 = DCMut in PHYML, VT in TREE-PUZZLE
81 my $rate_heterogeneity = 0; # 0 = Uniform rate (default)
82 # 1 = 8 Gamma distributed rates
83 # 2 = Two rates (1 invariable + 1 variable)
84 # 3 = Mixed (1 invariable + 8 Gamma rates)
85 my $seed = 9; # Seed for random number generators. Default: 9
86 my $keep_multiple_trees = 0; # 0: delete multiple tree file
87 # 1: do not delete multiple tree file
88 my $exact_parameter_est = 0; # 0: no; 1: yes
90 my $phyml_rel_substitution_rate_cat = 4;
93 my $use_fastme = 0; # 0: no; 1: yes
94 my $use_phylip_nj = 0; # 0: no; 1: yes
95 my $use_phylip_fitch_fm = 0; # 0: no; 1: yes
96 my $use_phylip_fitch_me = 0; # 0: no; 1: yes
97 my $use_bionj = 0; # 0: no; 1: yes
98 my $use_weighbor = 0; # 0: no; 1: yes
99 my $use_raxml = 0; # 0: no; 1: yes
100 my $use_phyml = 0; # 0: no; 1: yes
101 my $use_proml = 0; # 0: no; 1: yes
102 my $use_protpars = 0; # 0: no; 1: yes
103 my $use_global_rearr = 0; # 0: no; 1: yes
104 my $estimate_invar_sites = 0; # 0: no; 1: yes
106 my $fastme_init_tree_opt = "NJ";
108 my %seqnames = (); # number => seqname
109 my %numbers = (); # seqname => number
115 my $multipwdfile = "";
116 my $distancefile = "";
120 my $current_dir = "";
122 my $number_of_seqs = 0;
123 my $number_of_aa = 0;
125 my $use_pwd_based_methods = 0;
128 print( "phylo_pl $VERSION ($LAST_MODIFIED)\n" );
129 print( "__________________________________\n");
134 unless ( @ARGV == 2 || @ARGV == 3 || @ARGV == 4 || @ARGV == 5 ) {
141 # Analyzes the options:
142 # ---------------------
144 if ( $ARGV[ 0 ] =~ /^-.+/ ) {
146 unless ( @ARGV > 2 ) {
150 $options = $ARGV[ 0 ];
152 if ( @ARGV != 3 && @ARGV != 4 ) {
156 $infile = $ARGV[ 1 ];
157 $outfile = $ARGV[ 2 ];
159 $temp_dir = $ARGV[ 3 ];
161 if ( $options =~ /B(\d+)/ ) {
163 if ( $bootstraps <= 1 ) {
166 elsif ( $bootstraps <= 9 ) {
168 print "\n\nphylo_pl: WARNING: Bootstrap number must be devisable by 10,\nno bootstrapping.\n\n";
170 elsif ( $bootstraps % 10 != 0 ) {
171 $bootstraps = $bootstraps - $bootstraps % 10; # to ensure $bootstraps % 10 == 0
172 print "\n\nphylo_pl: WARNING: Bootstrap number must be devisable by 10,\nset to $bootstraps.\n\n";
175 if ( $options =~ /n/ ) {
178 if ( $options =~ /q@(\d)/ ) {
182 $fastme_init_tree_opt = "GME";
184 elsif ( $opt == 2 ) {
185 $fastme_init_tree_opt = "BME";
187 elsif ( $opt == 3 ) {
188 $fastme_init_tree_opt = "NJ";
195 if ( $options =~ /f/ ) {
196 $use_phylip_fitch_fm = 1;
198 if ( $options =~ /e/ ) {
199 $use_phylip_fitch_me = 1;
201 if ( $options =~ /b/ ) {
204 if ( $options =~ /w/ ) {
207 if ( $options =~ /x/ ) {
210 if ( $options =~ /y/ ) {
213 if ( $options =~ /o/ ) {
216 if ( $options =~ /p/ ) {
219 if ( $options =~ /G/ ) {
220 $use_global_rearr = 1;
222 if ( $options =~ /I/ ) {
223 $estimate_invar_sites = 1;
225 if ( $options =~ /j(\d+)/ ) {
227 if ( $jumbles < 1 ) {
231 if ( $options =~ /r(\d+)/ ) {
232 $phyml_rel_substitution_rate_cat = $1;
233 if ( $phyml_rel_substitution_rate_cat < 1 ) {
234 $phyml_rel_substitution_rate_cat = 1;
237 if ( $options =~ /J/ ) {
240 if ( $options =~ /P/ ) {
243 if ( $options =~ /L/ ) {
244 $matrix = 2; # Blosum 62
246 if ( $options =~ /M/ ) {
247 $matrix = 3; # mtREV24
249 if ( $options =~ /W/ ) {
252 if ( $options =~ /A/ ) {
255 if ( $options =~ /D/ ) {
256 $matrix = 8; # DCMut in PHYML and RAXML, VT in PUZZLE
258 if ( $options =~ /H/ ) {
261 if ( $options =~ /T/ ) {
264 if ( $options =~ /Z/ ) {
267 if ( $options =~ /C/ ) {
270 if ( $options =~ /S(\d+)/ ) {
273 if ( $options =~ /X/ ) {
274 $keep_multiple_trees = 1;
276 if ( $options =~ /E/ ) {
277 $exact_parameter_est = 1;
279 if ( $options =~ /g/ ) {
280 $rate_heterogeneity = 1;
282 if ( $options =~ /t/ ) {
283 $rate_heterogeneity = 2;
285 if ( $options =~ /m/ ) {
286 $rate_heterogeneity = 3;
290 unless ( @ARGV == 2 || @ARGV == 3 ) {
294 $infile = $ARGV[ 0 ];
295 $outfile = $ARGV[ 1 ];
297 $temp_dir = $ARGV[ 2 ];
301 if ( $use_fastme != 1 &&
302 $use_phylip_nj != 1 &&
303 $use_phylip_fitch_fm != 1 &&
304 $use_phylip_fitch_me != 1 &&
306 $use_weighbor != 1 &&
310 $use_protpars != 1 ) {
314 $use_phylip_fitch_fm = 1;
315 $use_phylip_fitch_me = 1;
325 if ( $use_fastme == 1 ||
326 $use_phylip_nj == 1 ||
327 $use_phylip_fitch_fm == 1 ||
328 $use_phylip_fitch_me == 1 ||
330 $use_weighbor == 1 ) {
331 $use_pwd_based_methods = 1;
334 $use_pwd_based_methods = 0;
337 $current_dir = `pwd`;
338 $current_dir =~ s/\s//;
340 if ( $outfile !~ /^\// ) {
341 # outfile is not absolute path.
342 $outfile = $current_dir."/".$outfile;
347 # TREE-PUZZLE sets the option in this way:
348 # If two rates or mixed, exact parameter estimates are used.
349 if ( $rate_heterogeneity == 2
350 || $rate_heterogeneity == 3 ) {
351 $exact_parameter_est = 1
355 if ( $outfile =~ /\.xml$/i ) {
356 $outfile =~ s/\.xml//i;
358 elsif ( $outfile =~ /\.aln$/i ) {
359 $outfile =~ s/\.aln//i;
361 elsif ( $outfile =~ /\.fasta$/i ) {
362 $outfile =~ s/\.fasta//i;
364 elsif ( $outfile =~ /\.fas$/i ) {
365 $outfile =~ s/\.fas//i;
367 elsif ( $outfile =~ /\.seqs$/i ) {
368 $outfile =~ s/\.seqs//i;
372 $logfile = $outfile.$LOG_FILE_SUFFIX;
373 $multipwdfile = $outfile.$MULTIPLE_PWD_FILE_SUFFIX;
374 $distancefile = $outfile.$SUFFIX_PWD_NOT_BOOTS;
376 &dieIfFileExists( $logfile );
377 &dieIfFileExists( $multipwdfile );
378 &dieIfFileExists( $distancefile );
381 my $fastme_outtree = $outfile."_fme.xml";
382 my $phylip_nj_outtree = $outfile."_pnj.xml";
383 my $phylip_fm_outtree = $outfile."_pfm.xml";
384 my $phylip_me_outtree = $outfile."_pme.xml";
385 my $bionj_outtree = $outfile."_bionj.xml";
386 my $weighbor_outtree = $outfile."_weigh.xml";
387 my $raxml_outtree = $outfile."_raxml.xml";
388 my $phyml_outtree = $outfile."_phyml.xml";
389 my $proml_outtree = $outfile."_proml.xml";
390 my $protpars_outtree = $outfile."_ppp.xml";
391 my $all_outtree = $outfile."_comb.xml";
393 my $multitreefile_fastme = $outfile."_fme".$MULTIPLE_TREES_FILE_SUFFIX;
394 my $multitreefile_phylip_nj = $outfile."_pnj".$MULTIPLE_TREES_FILE_SUFFIX;
395 my $multitreefile_phylip_fm = $outfile."_pfm".$MULTIPLE_TREES_FILE_SUFFIX;
396 my $multitreefile_phylip_me = $outfile."_pme".$MULTIPLE_TREES_FILE_SUFFIX;
397 my $multitreefile_bionj = $outfile."_bionj".$MULTIPLE_TREES_FILE_SUFFIX;
398 my $multitreefile_weighbor = $outfile."_weigh".$MULTIPLE_TREES_FILE_SUFFIX;
399 my $multitreefile_raxml = $outfile."_raxml".$MULTIPLE_TREES_FILE_SUFFIX;
400 my $multitreefile_phyml = $outfile."_phyml".$MULTIPLE_TREES_FILE_SUFFIX;
401 my $multitreefile_proml = $outfile."_proml".$MULTIPLE_TREES_FILE_SUFFIX;
402 my $multitreefile_protpars = $outfile."_ppp".$MULTIPLE_TREES_FILE_SUFFIX;
404 if ( $use_fastme == 1 ) {
405 &dieIfFileExists( $fastme_outtree );
406 if ( $keep_multiple_trees == 1 && $bootstraps > 1 ) {
407 &dieIfFileExists( $multitreefile_fastme );
410 if( $use_phylip_nj == 1 ) {
411 &dieIfFileExists( $phylip_nj_outtree );
412 if ( $keep_multiple_trees == 1 && $bootstraps > 1 ) {
413 &dieIfFileExists( $multitreefile_phylip_nj );
416 if( $use_phylip_fitch_fm == 1 ) {
417 &dieIfFileExists( $phylip_fm_outtree );
418 if ( $keep_multiple_trees == 1 && $bootstraps > 1 ) {
419 &dieIfFileExists( $multitreefile_phylip_fm );
422 if( $use_phylip_fitch_me == 1 ) {
423 &dieIfFileExists( $phylip_me_outtree );
424 if ( $keep_multiple_trees == 1 && $bootstraps > 1 ) {
425 &dieIfFileExists( $multitreefile_phylip_me );
428 if( $use_bionj == 1 ) {
429 &dieIfFileExists( $bionj_outtree );
430 if ( $keep_multiple_trees == 1 && $bootstraps > 1 ) {
431 &dieIfFileExists( $multitreefile_bionj );
434 if( $use_weighbor == 1 ) {
435 &dieIfFileExists( $weighbor_outtree );
436 if ( $keep_multiple_trees == 1 && $bootstraps > 1 ) {
437 &dieIfFileExists( $multitreefile_weighbor );
440 if( $use_raxml == 1 ) {
441 &dieIfFileExists( $raxml_outtree );
442 if ( $keep_multiple_trees == 1 && $bootstraps > 1 ) {
443 &dieIfFileExists( $multitreefile_raxml );
446 if( $use_phyml == 1 ) {
447 &dieIfFileExists( $phyml_outtree );
448 if ( $keep_multiple_trees == 1 && $bootstraps > 1 ) {
449 &dieIfFileExists( $multitreefile_phyml );
452 if( $use_proml == 1 ) {
453 &dieIfFileExists( $proml_outtree );
454 if ( $keep_multiple_trees == 1 && $bootstraps > 1 ) {
455 &dieIfFileExists( $multitreefile_proml );
458 if( $use_protpars == 1 ) {
459 &dieIfFileExists( $protpars_outtree );
460 if ( $keep_multiple_trees == 1 && $bootstraps > 1 ) {
461 &dieIfFileExists( $multitreefile_protpars );
464 if ( $bootstraps > 1 ) {
465 &dieIfFileExists( $all_outtree );
467 if ( $infile ne "" ) {
468 unless ( ( -s $infile ) && ( -f $infile ) && ( -T $infile ) ) {
469 die "\n\nphylo_pl: Input alignment file \"$infile\" does not exist, is empty, or is not a plain textfile.\n\n";
476 # Prints out the options:
477 # -----------------------
480 $log = "\n$0 logfile:\n";
481 $log = $log."Version: $VERSION\n\n";
485 if ( $infile ne "" ) {
486 $log = $log."Input : $infile\n";
489 if ( $keep_multiple_trees == 1 && $bootstraps >= 2 ) {
490 $log = $log."Multiple distance matrices : $multipwdfile\n";
494 $log = $log."Bootstraps : $bootstraps\n";
496 if ( $use_pwd_based_methods == 1 ) {
497 $log = $log."Prgrm to calculate pairwise dist. : TREE-PUZZLE (version: $PUZZLE_VERSION)\n";
501 if ( $use_fastme == 1 ) {
502 $log = $log."Program to calculate tree : FastME (version: $FASTME_VERSION)\n";
503 $log = $log."Method for intial tree in FastME : $fastme_init_tree_opt\n";
504 $log = $log."Tree swapping (NNI) in FastME : balanced (default)\n";
506 if ( $use_phylip_nj == 1 ) {
507 $log = $log."Program to calculate tree : PHYLIP NEIGHBOR NJ (version: $PHYLIP_VERSION)\n";
509 if ( $use_phylip_fitch_fm == 1 ) {
510 $log = $log."Program to calculate tree : PHYLIP FITCH Fitch-Margoliash (version: $PHYLIP_VERSION)\n";
512 if ( $use_phylip_fitch_me == 1 ) {
513 $log = $log."Program to calculate tree : PHYLIP FITCH Minimal Evolution (version: $PHYLIP_VERSION)\n";
515 if ( $use_bionj == 1 ) {
516 $log = $log."Program to calculate tree : BIONJ (version: $BIONJ_VERSION)\n";
518 if ( $use_weighbor == 1 ) {
519 $log = $log."Program to calculate tree : Weighbor [no invariable sites, b=14] (version: $WEIGHBOR_VERSION)\n";
521 if ( $use_raxml == 1 ) {
522 $log = $log."Program to calculate tree : RAxML [$RAXML_MODEL_BASE] (uses its own bootstraps, if bootstrapped: -f $RAXML_ALGORITHM) (version: $RAXML_VERSION)\n";
524 if ( $use_phyml == 1 ) {
525 $log = $log."Program to calculate tree : PHYML (MLE for gamma distr param and proportion of inv sites) (version: $PHYML_VERSION)\n";
526 $log = $log."# of rel subst rate categories : $phyml_rel_substitution_rate_cat\n";
528 if ( $use_proml == 1 ) {
529 $log = $log."Program to calculate tree : PHYLIP PROML (uses PAM unless JTT selected) (version: $PHYLIP_VERSION)\n";
531 if ( $use_protpars == 1 ) {
532 $log = $log."Program to calculate tree : PHYLIP PROTPARS (with global rearrangements) (version: $PHYLIP_VERSION)\n";
534 if ( $use_phylip_fitch_fm == 1 || $use_phylip_fitch_me == 1 || $use_protpars == 1 || $use_proml ) {
535 $log = $log."Number of jumbles (input order rand): $jumbles\n";
538 if ( $use_phylip_fitch_fm == 1 || $use_phylip_fitch_me == 1 || $use_proml ) {
539 if ( $use_global_rearr == 1 ) {
540 $log = $log."Global rearrangements : true\n";
543 $log = $log."Global rearrangements : false\n";
548 if ( $bootstraps > 0 ) {
549 $log = $log."Prgrm to calculate ML branch lenghts: TREE-PUZZLE (version: $PUZZLE_VERSION)\n";
552 $log = $log."Model : ";
553 if ( $matrix == 0 ) {
554 $log = $log."JTT (Jones et al. 1992)\n";
556 elsif ( $matrix == 1 ) {
557 $log = $log."PAM (Dayhoff et al. 1978)\n";
559 elsif ( $matrix == 2 ) {
560 $log = $log."BLOSUM 62 (Henikoff-Henikoff 92)\n";
562 elsif ( $matrix == 3 ) {
563 $log = $log."mtREV24 (Adachi-Hasegawa 1996)\n";
565 elsif ( $matrix == 5 ) {
566 $log = $log."VT (Mueller-Vingron 2000)\n";
568 elsif ( $matrix == 6 ) {
569 $log = $log."WAG (Whelan-Goldman 2000)\n";
571 elsif ( $matrix == 7 ) {
572 $log = $log."auto in TREE-PUZZLE\n";
574 elsif ( $matrix == 8 ) {
575 $log = $log."DCMut (Kosial and Goldman, 2005) in PHYML and RAxML, VT in TREE-PUZZLE\n";
578 elsif ( $matrix == 9 ) {
579 $log = $log."HKY (Hasegawa et al. 1985) in TREE-PUZZLE\n";
581 elsif ( $matrix == 10 ) {
582 $log = $log."TN (Tamura-Nei 1993) in TREE-PUZZLE\n";
584 elsif ( $matrix == 11 ) {
585 $log = $log."GTR (e.g. Lanave et al. 1980)in TREE-PUZZLE\n";
587 elsif ( $matrix == 12 ) {
588 $log = $log."SH (Schoeniger-von Haeseler 1994) in TREE-PUZZLE\n";
593 &dieWithUnexpectedError( "Unknown model: matrix=$matrix" );
595 if ( $use_raxml == 1 || $use_phyml == 1 ) {
596 if ( $estimate_invar_sites == 1 ) {
597 $log = $log."Estimate proportion of invariable sites in RAXML and/or PHYML: true\n";
600 $log = $log."Estimate proportion of invariable sites in RAXML and/or PHYML: false (proportion \"0.0\" is used in PHYML)\n";
604 $log = $log."Model of rate heterogeneity (PUZZLE): ";
605 if ( $rate_heterogeneity == 1 ) {
606 $log = $log."8 Gamma distributed rates\n";
608 elsif ( $rate_heterogeneity == 2 ) {
609 $log = $log."Two rates (1 invariable + 1 variable)\n";
611 elsif ( $rate_heterogeneity == 3 ) {
612 $log = $log."Mixed (1 invariable + 8 Gamma rates)\n";
615 $log = $log."Uniform rate\n";
617 $log = $log."Seed for random number generators : $seed\n";
618 if ( $exact_parameter_est == 1 ) {
619 $log = $log."Exact parameter estimates in TREE-PUZZLE\n";
622 $log = $log."Start time/date : ".`date`;
627 # That's where the mischief starts....
628 # ------------------------------------
634 my $random_number = int( rand( $range ) );
636 if ( $temp_dir eq "" ) {
637 $temp_dir = $TEMP_DIR_DEFAULT;
640 $temp_dir = $temp_dir.$random_number.$ii;
642 while ( -e $temp_dir ) {
644 $temp_dir = $temp_dir.$random_number.$ii;
647 mkdir( $temp_dir, 0700 )
648 || die "\n\n$0: Could not create <<$temp_dir>>: $!\n\n";
650 unless ( ( -e $temp_dir ) && ( -d $temp_dir ) ) {
651 die "\n\n$0: <<$temp_dir>> does not exist, or is not a directory.\n\n";
655 &cp( $infile, $temp_dir."/INFILE" );
656 unless ( chmod ( 0600, $temp_dir."/INFILE" ) ) {
657 warn "\n\n$0: Could not chmod. $!\n\n";
662 || die "\n\n$0: Could not chdir to <<$temp_dir>>: $!\n\n";
664 &cp( $infile, "infile" );
665 @out = &getNumberOfSeqsAndAas( $infile );
666 $number_of_seqs = $out[ 0 ];
667 $number_of_aa = $out[ 1 ];
669 my $SEQBOOT_OUTFILE = "seqboot_outfile";
671 if ( $bootstraps > 1 && ( $use_pwd_based_methods == 1
674 || $use_protpars == 1 ) ) {
675 &executeSeqboot( $seed, $bootstraps );
676 &mv( "outfile", $SEQBOOT_OUTFILE );
680 &cp( $infile, "align" );
682 if ( $use_pwd_based_methods == 1 ) {
683 # Calculating the pairwise distances (saved in file "infile"): "puzzle"
684 if ( $bootstraps > 1 ) {
685 &executePuzzleBootstrapped( $SEQBOOT_OUTFILE,
688 $exact_parameter_est,
689 $rate_heterogeneity );
691 $pwdfile = $SEQBOOT_OUTFILE.".dist";
694 &executePuzzle( "infile",
697 $exact_parameter_est,
698 $rate_heterogeneity );
699 $pwdfile = "infile.dist";
705 # Methods based on alignment
706 # --------------------------
707 my $OUTTREE_RAXML = "outtree_rax";
708 my $OUTTREE_PHYML = "outtree_phyml";
709 my $OUTTREE_PROML = "outtree_proml";
710 my $OUTTREE_PROTPARS = "outtree_protpars";
712 my $CONSENSUS_RAXML = "consensus_raxml";
713 my $CONSENSUS_PHYML = "consensus_phyml";
714 my $CONSENSUS_PROML = "consensus_proml";
715 my $CONSENSUS_PROTPARS = "consensus_protpars";
717 my $OUTTREES_ALL = "outtrees_all";
720 if ( $use_raxml == 1 ) {
723 if ( $matrix == 0 ) {
726 elsif ( $matrix == 1 ) {
729 elsif ( $matrix == 2 ) {
732 elsif ( $matrix == 3 ) {
735 elsif ( $matrix == 5 ) {
738 elsif ( $matrix == 6 ) {
741 elsif ( $matrix == 7 ) {
744 elsif ( $matrix == 8 ) {
748 &dieWithUnexpectedError( "Unknown model: matrix=$matrix" );
751 print( "\n========== RAxML begin =========\n\n" );
753 # 1. DNA or Amino-Acids sequence filename (PHYLIP format)
754 # 2. Model, eg. PROTGAMMAIVT
755 # 3. Replicates (bootstrap)
756 # 4. Seed for bootstrap
758 # 6. Algorithm (only for bootstrap, default otherwise)
760 if ( $estimate_invar_sites == 1 ) {
764 # NOTE. RaxML does its own bootstrapping.
765 &executeRaxml( "align", $RAXML_MODEL_BASE.$invar.$model."X", $bootstraps, $seed, "xxx", $RAXML_ALGORITHM );
766 print( "\n========== RAxML end =========\n\n" );
768 &rm( "RAxML_log.xxx" );
769 &rm( "RAxML_parsimonyTree.xxx" );
770 &mv( "RAxML_info.xxx", $outfile."_raxml_info" );
771 # if ( $bootstraps > 1 ) {
772 &rm( "RAxML_bestTree.xxx" );
773 &mv( "RAxML_bipartitions.xxx", $CONSENSUS_RAXML );
774 &append( "RAxML_bootstrap.xxx", $OUTTREES_ALL );
775 if ( $keep_multiple_trees == 1 ) {
776 &mv( "RAxML_bootstrap.xxx", $multitreefile_raxml );
779 &rm( "RAxML_bootstrap.xxx" );
784 # &mv( "RAxML_result.xxx", $OUTTREE_RAXML );
789 if ( $use_phyml == 1 ) {
792 if ( $matrix == 0 ) {
795 elsif ( $matrix == 1 ) {
798 elsif ( $matrix == 2 ) {
801 elsif ( $matrix == 3 ) {
804 elsif ( $matrix == 5 ) {
807 elsif ( $matrix == 6 ) {
810 elsif ( $matrix == 7 ) {
813 elsif ( $matrix == 8 ) {
817 &dieWithUnexpectedError( "Unknown model: matrix=$matrix" );
821 if ( $bootstraps > 1 ) {
822 $input = $SEQBOOT_OUTFILE;
827 print( "\n========== PHYML begin =========\n\n" );
829 # 1. DNA or Amino-Acids sequence filename (PHYLIP format)
830 # 2. number of data sets to analyse (ex:3)
831 # 3. Model: JTT | MtREV | Dayhoff | WAG | VT | DCMut | Blosum62 (Amino-Acids)
832 # 4. number of relative substitution rate categories (ex:4), positive integer
833 # 5. starting tree filename (Newick format), your tree filename | BIONJ for a distance-based tree
834 # 6. 1 to estimate proportion of invariable sites, otherwise, fixed proportion "0.0" is used
835 # PHYML produces several results files :
836 # <sequence file name>_phyml_lk.txt : likelihood value(s)
837 # <sequence file name>_phyml_tree.txt : inferred tree(s)
838 # <sequence file name>_phyml_stat.txt : detailed execution stats
839 &executePhyml( $input, $bootstraps, $model, $phyml_rel_substitution_rate_cat, "BIONJ", $estimate_invar_sites );
840 print( "\n========== PHYML end =========\n\n" );
842 &rm( $input."_phyml_lk.txt" );
843 &mv( $input."_phyml_tree.txt", $OUTTREE_PHYML );
844 if ( -e $outfile."_phyml_stat" ) {
845 &rm( $outfile."_phyml_stat" );
847 &mv( $input."_phyml_stat.txt", $outfile."_phyml_stat" );
848 if ( $bootstraps > 1 ) {
849 &append( $OUTTREE_PHYML, $OUTTREES_ALL );
855 if ( $use_proml == 1 ) {
857 if ( $bootstraps > 1 ) {
858 $input = $SEQBOOT_OUTFILE;
863 print( "\n========== PHYLIP PROML begin =========\n\n" );
865 # 1. name of alignment file (in correct format!)
866 # 2. number of bootstraps
867 # 3. jumbles: 0: do not jumble; >=1 number of jumbles
868 # 4. seed for random number generator
869 # 5. 1 for PAM instead of JTT
871 if ( $matrix == 0 ) {
874 &executeProml( $input, $bootstraps, $jumbles, $seed, $use_pam, $use_global_rearr );
875 print( "\n========== PHYLIP PROML end =========\n\n" );
876 &mv( "outtree", $OUTTREE_PROML );
878 if ( $bootstraps > 1 ) {
879 &append( $OUTTREE_PROML, $OUTTREES_ALL );
885 if ( $use_protpars == 1 ) {
887 if ( $bootstraps > 1 ) {
888 $input = $SEQBOOT_OUTFILE;
893 print( "\n========== PHYLIP PROTPARS begin =========\n\n" );
894 &executeProtpars( $input, $bootstraps, $jumbles, $seed );
895 print( "\n========== PHYLIP PROTPARS end =========\n\n" );
896 &mv( "outtree", $OUTTREE_PROTPARS );
897 &rm( $outfile."_protpars_outfile" );
898 &mv( "outfile", $outfile."_protpars_outfile" );
899 if ( $bootstraps > 1 ) {
900 &append( $OUTTREE_PROTPARS, $OUTTREES_ALL );
907 # Methods based on PWD
908 # --------------------
909 my $OUTTREE_FASTME = "outtree_fastme";
910 my $OUTTREE_PHYLIP_NJ = "outtree_phylip_nj";
911 my $OUTTREE_PHYLIP_FM = "outtree_phylip_fm";
912 my $OUTTREE_PHYLIP_ME = "outtree_phylip_me";
913 my $OUTTREE_BIONJ = "outtree_bionj";
914 my $OUTTREE_WEIGHBOR = "outtree_weighbor";
916 my $CONSENSUS_FASTME = "consensus_fastme";
917 my $CONSENSUS_PHYLIP_NJ = "consensus_phylip_nj";
918 my $CONSENSUS_PHYLIP_FM = "consensus_phylip_fm";
919 my $CONSENSUS_PHYLIP_ME = "consensus_phylip_me";
920 my $CONSENSUS_BIONJ = "consensus_bionj";
921 my $CONSENSUS_WEIGHBOR = "consensus_weighbor";
922 my $CONSENSUS_ALL = "consensus_all";
925 if ( $use_fastme == 1 ) {
926 print( "\n========== FASTME begin =========\n\n" );
927 &executeFastme( $pwdfile, $bootstraps, $fastme_init_tree_opt );
928 print( "\n========== FASTME end ===========\n\n" );
930 &mv( "output.tre", $OUTTREE_FASTME );
931 if ( $bootstraps > 1 ) {
932 &append( $OUTTREE_FASTME, $OUTTREES_ALL );
936 if ( $use_phylip_nj ) {
937 print( "\n========== PHYLIP NEIGHBOR begin =========\n\n" );
938 &executeNeighbor( $pwdfile, $bootstraps, $seed, 0 );
939 print( "\n========== PHYLIP NEIGHBOR end =========\n\n" );
940 &mv( "outtree", $OUTTREE_PHYLIP_NJ );
942 if ( $bootstraps > 1 ) {
943 &append( $OUTTREE_PHYLIP_NJ, $OUTTREES_ALL );
947 if ( $use_phylip_fitch_fm ) {
948 print( "\n========== PHYLIP FITCH FM begin =========\n\n" );
949 &executeFitch( $pwdfile, $bootstraps, $seed, $jumbles, 0, "FM", $use_global_rearr );
950 print( "\n========== PHYLIP FITCH FM end =========\n\n" );
951 &mv( "outtree", $OUTTREE_PHYLIP_FM );
953 if ( $bootstraps > 1 ) {
954 &append( $OUTTREE_PHYLIP_FM, $OUTTREES_ALL );
958 if ( $use_phylip_fitch_me ) {
959 print( "\n========== PHYLIP FITCH ME begin =========\n\n" );
960 &executeFitch( $pwdfile, $bootstraps, $seed, $jumbles, 0, "ME", $use_global_rearr );
961 print( "\n========== PHYLIP FITCH ME end =========\n\n" );
962 &mv( "outtree", $OUTTREE_PHYLIP_ME );
964 if ( $bootstraps > 1 ) {
965 &append( $OUTTREE_PHYLIP_ME, $OUTTREES_ALL );
970 print( "\n========== BIONJ begin =========\n\n" );
971 &executeBionj( $pwdfile, $OUTTREE_BIONJ );
972 print( "\n========== BIONJ end =========\n\n" );
973 if ( $bootstraps > 1 ) {
974 &append( $OUTTREE_BIONJ, $OUTTREES_ALL );
978 if ( $use_weighbor ) {
979 print( "\n========== WEIGHBOR begin =========\n\n" );
980 &executeWeighbor( $number_of_aa, 14, $pwdfile, $OUTTREE_WEIGHBOR );
981 print( "\n========== WEIGHBOR end =========\n\n" );
982 if ( $bootstraps > 1 ) {
983 &append( $OUTTREE_WEIGHBOR, $OUTTREES_ALL );
990 if ( $bootstraps > 1 ) {
992 if ( $use_fastme == 1 ) {
993 &consense( $OUTTREE_FASTME, $CONSENSUS_FASTME );
995 if ( $use_phylip_nj == 1 ) {
996 &consense( $OUTTREE_PHYLIP_NJ, $CONSENSUS_PHYLIP_NJ );
998 if ( $use_phylip_fitch_fm == 1 ) {
999 &consense( $OUTTREE_PHYLIP_FM, $CONSENSUS_PHYLIP_FM );
1001 if ( $use_phylip_fitch_me == 1 ) {
1002 &consense( $OUTTREE_PHYLIP_ME, $CONSENSUS_PHYLIP_ME );
1004 if ( $use_bionj == 1 ) {
1005 &consense( $OUTTREE_BIONJ, $CONSENSUS_BIONJ );
1007 if ( $use_weighbor == 1 ) {
1008 &consense( $OUTTREE_WEIGHBOR, $CONSENSUS_WEIGHBOR );
1010 if ( $use_phyml == 1 ) {
1011 &consense( $OUTTREE_PHYML, $CONSENSUS_PHYML );
1013 if ( $use_proml == 1 ) {
1014 &consense( $OUTTREE_PROML, $CONSENSUS_PROML );
1016 if ( $use_protpars == 1 ) {
1017 &consense( $OUTTREE_PROTPARS, $CONSENSUS_PROTPARS );
1019 if ( $all_count > 1 ) {
1020 &consense( $OUTTREES_ALL, $CONSENSUS_ALL );
1023 &rm( $OUTTREES_ALL );
1026 my $INTREE_FOR_PUZZLE = "intree"; #why so serious?
1027 &rm( $INTREE_FOR_PUZZLE );
1028 system( "touch", $INTREE_FOR_PUZZLE )
1029 && die("\n\n$0: could not \"touch $INTREE_FOR_PUZZLE\": $!\n\n");
1031 if ( $use_fastme == 1 ) {
1032 &append( $CONSENSUS_FASTME, $INTREE_FOR_PUZZLE );
1034 if ( $use_phylip_nj == 1 ) {
1035 &append( $CONSENSUS_PHYLIP_NJ, $INTREE_FOR_PUZZLE );
1037 if ( $use_phylip_fitch_fm == 1 ) {
1038 &append( $CONSENSUS_PHYLIP_FM, $INTREE_FOR_PUZZLE );
1040 if ( $use_phylip_fitch_me == 1 ) {
1041 &append( $CONSENSUS_PHYLIP_ME, $INTREE_FOR_PUZZLE );
1043 if ( $use_bionj == 1 ) {
1044 &append( $CONSENSUS_BIONJ, $INTREE_FOR_PUZZLE );
1046 if ( $use_weighbor == 1 ) {
1047 &append( $CONSENSUS_WEIGHBOR, $INTREE_FOR_PUZZLE );
1049 if ( $use_raxml == 1 ) {
1050 # Needed, because TREE-PUZZLE adds internal labels for all subsequent trees
1051 # when evaluating given trees (this seems a strange behaviour).
1052 removeSupportValues( $CONSENSUS_RAXML, $CONSENSUS_RAXML."_support_removed" );
1053 &append( $CONSENSUS_RAXML."_support_removed", $INTREE_FOR_PUZZLE );
1054 &rm( $CONSENSUS_RAXML."_support_removed" );
1056 if ( $use_phyml == 1 ) {
1057 &append( $CONSENSUS_PHYML, $INTREE_FOR_PUZZLE );
1059 if ( $use_proml == 1 ) {
1060 &append( $CONSENSUS_PROML, $INTREE_FOR_PUZZLE );
1062 if ( $use_protpars == 1 ) {
1063 &append( $CONSENSUS_PROTPARS, $INTREE_FOR_PUZZLE );
1065 if ( $all_count > 1 ) {
1066 &append( $CONSENSUS_ALL, $INTREE_FOR_PUZZLE );
1070 # Puzzle for ML branch lenghts:
1071 # The alignment is read from infile by default.
1072 # The tree is read from intree by default.
1074 &mv( "align", "infile" ); # align = original alignment in phylip interleaved.
1076 &executePuzzleToCalculateBranchLenghts( $matrix,
1077 $exact_parameter_est,
1078 $rate_heterogeneity );
1080 my $OUTTREE_PUZZLE = "outtree_puzzle";
1082 &rm( $outfile."_puzzle_outfile" );
1084 &mv( "outfile", $outfile."_puzzle_outfile" );
1085 &mv( "outtree", $OUTTREE_PUZZLE );
1093 if ( $use_fastme == 1 ) {
1094 &executeSupportTransfer( $OUTTREE_PUZZLE, $CONSENSUS_FASTME, $fastme_outtree, $counter++ );
1095 &rm( $CONSENSUS_FASTME );
1097 if ( $use_phylip_nj == 1 ) {
1098 &executeSupportTransfer( $OUTTREE_PUZZLE, $CONSENSUS_PHYLIP_NJ, $phylip_nj_outtree, $counter++ );
1099 &rm( $CONSENSUS_PHYLIP_NJ );
1101 if ( $use_phylip_fitch_fm == 1 ) {
1102 &executeSupportTransfer( $OUTTREE_PUZZLE, $CONSENSUS_PHYLIP_FM, $phylip_fm_outtree, $counter++ );
1103 &rm( $CONSENSUS_PHYLIP_FM );
1105 if ( $use_phylip_fitch_me == 1 ) {
1106 &executeSupportTransfer( $OUTTREE_PUZZLE, $CONSENSUS_PHYLIP_ME, $phylip_me_outtree, $counter++ );
1107 &rm( $CONSENSUS_PHYLIP_ME );
1109 if ( $use_bionj == 1 ) {
1110 &executeSupportTransfer( $OUTTREE_PUZZLE, $CONSENSUS_BIONJ, $bionj_outtree, $counter++ );
1111 &rm( $CONSENSUS_BIONJ );
1113 if ( $use_weighbor == 1 ) {
1114 &executeSupportTransfer( $OUTTREE_PUZZLE, $CONSENSUS_WEIGHBOR, $weighbor_outtree, $counter++ );
1115 &rm( $CONSENSUS_WEIGHBOR );
1117 if ( $use_raxml == 1 ) {
1118 &to_phyloxml( $CONSENSUS_RAXML, $raxml_outtree, 1, 1 );
1121 if ( $use_phyml == 1 ) {
1122 &executeSupportTransfer( $OUTTREE_PUZZLE, $CONSENSUS_PHYML, $phyml_outtree, $counter++ );
1123 &rm( $CONSENSUS_PHYML );
1125 if ( $use_proml == 1 ) {
1126 &executeSupportTransfer( $OUTTREE_PUZZLE, $CONSENSUS_PROML, $proml_outtree, $counter++ );
1127 &rm( $CONSENSUS_PROML );
1129 if ( $use_protpars == 1 ) {
1130 &executeSupportTransfer( $OUTTREE_PUZZLE, $CONSENSUS_PROTPARS, $protpars_outtree, $counter++ );
1131 &rm( $CONSENSUS_PROTPARS );
1133 if ( $all_count > 1 ) {
1134 &executeSupportTransfer( $OUTTREE_PUZZLE, $CONSENSUS_ALL, $all_outtree, $counter++ );
1135 &rm( $CONSENSUS_ALL );
1140 &rm( $OUTTREE_PUZZLE );
1141 &rm( $SEQBOOT_OUTFILE );
1142 if ( $keep_multiple_trees == 1 ) {
1143 if ( $use_fastme == 1 ) {
1144 &mv( $OUTTREE_FASTME, $multitreefile_fastme );
1146 if ( $use_phylip_nj == 1 ) {
1147 &mv( $OUTTREE_PHYLIP_NJ, $multitreefile_phylip_nj );
1149 if ( $use_phylip_fitch_fm == 1 ) {
1150 &mv( $OUTTREE_PHYLIP_FM, $multitreefile_phylip_fm );
1152 if ( $use_phylip_fitch_me == 1 ) {
1153 &mv( $OUTTREE_PHYLIP_ME, $multitreefile_phylip_me );
1155 if ( $use_bionj == 1 ) {
1156 &mv( $OUTTREE_BIONJ, $multitreefile_bionj );
1158 if ( $use_weighbor == 1 ) {
1159 &mv( $OUTTREE_WEIGHBOR, $multitreefile_weighbor );
1161 if ( $use_phyml == 1 ) {
1162 &mv( $OUTTREE_PHYML, $multitreefile_phyml );
1164 if ( $use_proml == 1 ) {
1165 &mv( $OUTTREE_PROML, $multitreefile_proml );
1167 if ( $use_protpars == 1 ) {
1168 &mv( $OUTTREE_PROTPARS, $multitreefile_protpars );
1170 &mv( $pwdfile, $multipwdfile );
1173 if ( $use_fastme == 1 ) {
1174 &rm( $OUTTREE_FASTME );
1176 if ( $use_phylip_nj == 1 ) {
1177 &rm( $OUTTREE_PHYLIP_NJ );
1179 if ( $use_phylip_fitch_fm == 1 ) {
1180 &rm( $OUTTREE_PHYLIP_FM );
1182 if ( $use_phylip_fitch_me == 1 ) {
1183 &rm( $OUTTREE_PHYLIP_ME );
1185 if ( $use_bionj == 1 ) {
1186 &rm( $OUTTREE_BIONJ );
1188 if ( $use_weighbor == 1 ) {
1189 &rm( $OUTTREE_WEIGHBOR );
1191 if ( $use_phyml == 1 ) {
1192 &rm( $OUTTREE_PHYML );
1194 if ( $use_proml == 1 ) {
1195 &rm( $OUTTREE_PROML );
1197 if ( $use_protpars == 1 ) {
1198 &rm( $OUTTREE_PROTPARS );
1202 if ( $all_count > 1 ) {
1203 &rm( $OUTTREES_ALL );
1205 } # if ( $bootstraps > 1 )
1207 &rm( "infile.dist" );
1209 &rm( "infile.puzzle" );
1210 if ( $use_fastme == 1 ) {
1211 &to_phyloxml( $OUTTREE_FASTME, $fastme_outtree, 0, 1 );
1213 if ( $use_phylip_nj == 1 ) {
1214 &to_phyloxml( $OUTTREE_PHYLIP_NJ, $phylip_nj_outtree, 0, 1);
1216 if ( $use_phylip_fitch_fm == 1 ) {
1217 &to_phyloxml( $OUTTREE_PHYLIP_FM, $phylip_fm_outtree, 0, 1 );
1219 if ( $use_phylip_fitch_me == 1 ) {
1220 &to_phyloxml( $OUTTREE_PHYLIP_ME, $phylip_me_outtree, 0, 1 );
1222 if ( $use_bionj == 1 ) {
1223 &to_phyloxml( $OUTTREE_BIONJ, $bionj_outtree, 0, 1 );
1225 if ( $use_weighbor == 1 ) {
1226 &to_phyloxml( $OUTTREE_WEIGHBOR, $weighbor_outtree, 0, 1 );
1228 if ( $use_raxml == 1 ) {
1229 # &to_phyloxml( $OUTTREE_RAXML, $raxml_outtree, 0, 1 );
1230 &to_phyloxml( $CONSENSUS_RAXML, $raxml_outtree, 1, 1 );
1232 if ( $use_phyml == 1 ) {
1233 &to_phyloxml( $OUTTREE_PHYML, $phyml_outtree, 0, 1 );
1235 if ( $use_proml == 1 ) {
1236 &to_phyloxml( $OUTTREE_PROML, $proml_outtree, 0, 1 );
1238 if ( $use_protpars == 1 ) {
1239 &to_phyloxml( $OUTTREE_PROTPARS, $protpars_outtree, 0, 1 );
1241 } # if ( $bootstraps > 1 )
1246 &rm( "align.reduced" );
1249 $log = $log."Finish time/date : ".`date`;
1251 if ( $bootstraps > 1 ) {
1252 $log = $log."Puzzle output file : ".$outfile."_puzzle_outfile\n";
1254 $log = $log."Columns in alignment : $number_of_aa\n";
1255 $log = $log."Number of sequences in alignment : $number_of_seqs\n";
1256 if ( $all_count > 1 ) {
1257 $log = $log."Combined consensus : $all_outtree\n";
1261 if ( $bootstraps > 1 ) {
1263 $log = $log."Simple support value statistics (trees are numbered the same as for TREE PUZZLE output)\n";
1264 $log = $log."------------------------------- \n";
1268 open( OUT, ">$logfile" ) || die "\n$0: Cannot create file <<$logfile>>: $!\n";
1272 if ( $bootstraps > 1 ) {
1273 # Simple support statistics
1274 # -------------------------
1275 my $SS_OUT = $temp_dir."/ss_out";
1278 if ( $use_fastme == 1 ) {
1279 $phylos[ $ounter++ ] = $fastme_outtree;
1281 if ( $use_phylip_nj == 1 ) {
1282 $phylos[ $ounter++ ] = $phylip_nj_outtree;
1284 if ( $use_phylip_fitch_fm == 1 ) {
1285 $phylos[ $ounter++ ] = $phylip_fm_outtree;
1287 if ( $use_phylip_fitch_me == 1 ) {
1288 $phylos[ $ounter++ ] = $phylip_me_outtree;
1290 if ( $use_bionj == 1 ) {
1291 $phylos[ $ounter++ ] = $bionj_outtree;
1293 if ( $use_weighbor == 1 ) {
1294 $phylos[ $ounter++ ] = $weighbor_outtree;
1296 if ( $use_raxml == 1 ) {
1297 $phylos[ $ounter++ ] = $raxml_outtree;
1299 if ( $use_phyml == 1 ) {
1300 $phylos[ $ounter++ ] = $phyml_outtree;
1302 if ( $use_proml == 1 ) {
1303 $phylos[ $ounter++ ] = $proml_outtree;
1305 if ( $use_protpars == 1 ) {
1306 $phylos[ $ounter++ ] = $protpars_outtree;
1308 if ( $all_count > 1) {
1309 $phylos[ $ounter++ ] = $all_outtree;
1311 &executeSupportStatistics( $SS_OUT, @phylos );
1312 &append( $SS_OUT, $logfile );
1315 # Append parts of puzzle output file
1316 # ----------------------------------
1317 if ( $all_count > 1 ) {
1318 &parsePuzzleOutfile( $outfile."_puzzle_outfile", $logfile );
1322 chdir( $current_dir )
1323 || die "\n\n$0: Could not chdir to <<$current_dir>>: $!\n\n";
1326 || print "\n\n$0: Warning: Could not remove <<$temp_dir>>: $!\n\n";
1328 print "\n\n\n$0 successfully completed.\n\n";
1339 # 1. DNA or Amino-Acids sequence filename (PHYLIP format)
1340 # 2. Model, eg. PROTGAMMAIVT
1341 # 3. Replicates (bootstrap)
1342 # 4. Seed for bootstrap
1344 # 6. Algorithm (only for bootstrap, default otherwise)
1345 # NOTE. RaxML does its own bootstrapping.
1348 my $model = $_[ 1 ];
1349 my $replicates = $_[ 2 ];
1351 my $outfile_suffix = $_[ 4 ];
1356 &testForTextFilePresence( $msa );
1357 my $command = "$RAXML -p 27 -m $model -s $msa -n $outfile_suffix";
1359 if ( $replicates > 1 ) {
1360 $command = $command . " -x $seed -N $replicates";
1362 $command = $command . " -f $algo";
1366 print( "\n$command\n");
1369 && &dieWithUnexpectedError( $command );
1377 my $internal_names_are_boots = $_[ 2 ];
1378 my $extract_taxonomy = $_[ 3 ];
1379 &dieIfFileExists( $to );
1380 &dieIfFileNotExists( $from );
1381 my $command = "$NEWICK_TO_PHYLOXML -f=nn $from $to";
1382 if ( $internal_names_are_boots == 1 ) {
1383 $command = $command . " -i";
1385 if ( $extract_taxonomy == 1 ) {
1386 $command = $command . " -xt";
1389 && die "$0: Could not execute \"$command \"";
1397 &dieIfFileExists( $to );
1398 &dieIfFileNotExists( $from );
1399 system( "mv", $from, $to )
1400 && die "\n\n$0: could not move \"$from\" to \"$to\": $!\n\n";
1406 &dieIfFileExists( $to );
1407 &dieIfFileNotExists( $from );
1409 system( "cp", $from, $to )
1410 && die "\n\n$0: could not copy \"$from\" to \"$to\": $!\n\n";
1419 my $multi_in = $_[ 0 ];
1420 my $consense_out = $_[ 1 ];
1421 &executeConsense( $multi_in );
1422 &mv( "outtree", $consense_out );
1429 # 1. file to be appended
1430 # 2. file to append to
1432 my $to_be_appended = $_[ 0 ];
1433 my $append_to = $_[ 1 ];
1434 &dieIfFileNotExists( $to_be_appended );
1435 system( "cat $to_be_appended >> $append_to" )
1436 && die "\n\n$0: could not execute \"cat $to_be_appended >> $append_to\": $!\n\n";
1440 sub dieIfFileExists {
1443 die "\n\n$0: \"$file\" already exists\n\n";
1447 sub dieIfFileNotExists {
1449 unless ( ( -s $file ) && ( -f $file ) ) {
1450 die( "\n\n$0: \"$file\" does not exist or is empty" );
1458 # 1. seed for random number generator
1459 # 2. number of bootstraps
1460 # Reads in "infile" by default.
1461 sub executeSeqboot {
1467 &testForTextFilePresence( $infile );
1473 system( "$SEQBOOT << !
1479 && die "$0: Could not execute \"$SEQBOOT\"";
1485 # One/two/three argument(s):
1486 # Reads in tree from "intree" by default. (Presence of "intree" automatically
1487 # switches into "User defined trees" mode.)
1488 # 1. matrix option: 0 = JTT; 2 = BLOSUM 62; 3 = mtREV24;
1489 # 5 = VT; 6 = WAG; 7 = auto; PAM otherwise
1490 # 2. Parameter estimates: 1 for "Exact (slow)"; "Approximate (faster)" otherwise
1491 # 3. Model of rate heterogeneity:
1492 # 1 for "8 Gamma distributed rates"
1493 # 2 for "Two rates (1 invariable + 1 variable)"
1494 # 3 for "Mixed (1 invariable + 8 Gamma rates)"
1495 # otherwise: Uniform rate
1496 # Last modified: 09/08/03 (added 2nd and 3rd parameter)
1497 sub executePuzzleToCalculateBranchLenghts {
1498 my $matrix_option = $_[ 0 ];
1499 my $parameter_estimates_option = $_[ 1 ];
1500 my $rate_heterogeneity_option = $_[ 2 ];
1506 unless ( ( -s "infile" ) && ( -f "infile" ) && ( -T "infile" ) ) {
1507 die "\n$0: executePuzzleToCalculateBranchLenghts: <<infile>> does not exist, is empty, or is not a plain textfile.\n";
1509 unless ( ( -s "intree" ) && ( -f "intree" ) && ( -T "intree" ) ) {
1510 die "\n$0: executePuzzleToCalculateBranchLenghts: <<intree>> does not exist, is empty, or is not a plain textfile.\n";
1513 $mat = setModelForPuzzle( $matrix_option );
1514 if ( $parameter_estimates_option ) {
1515 $est = &setParameterEstimatesOptionForPuzzle( $parameter_estimates_option );
1517 if ( $rate_heterogeneity_option ) {
1518 $rate = &setRateHeterogeneityOptionForPuzzle( $rate_heterogeneity_option );
1522 system( "$PUZZLE << !
1527 && die "$0: Could not execute \"$PUZZLE\" (mat=$mat est=$est rate=$rate)";
1533 # 2. file to append to
1534 sub parsePuzzleOutfile {
1535 my $puzzle_outfile = $_[ 0 ];
1536 my $file_to_append_to = $_[ 1 ];
1537 &testForTextFilePresence( $puzzle_outfile );
1538 open( OUT, ">>$file_to_append_to" ) || &dieWithUnexpectedError( "Cannot open \"$file_to_append_to\"" );
1539 open( IN, "$puzzle_outfile" ) || &dieWithUnexpectedError( "Cannot open file \"$puzzle_outfile\"" );
1542 print OUT "\nTREE PUZZLE output\n";
1543 print OUT "------------------\n";
1544 while ( $return_line = <IN> ) {
1545 if ( $return_line =~/COMPARISON OF USER TREES/ ) {
1548 elsif( $return_line =~/TIME STAMP/ ) {
1552 print OUT $return_line;
1559 # Three/four arguments:
1560 # 1. Name of file containing tree with correct branch lengths
1561 # 2. Name of file containing tree with correct bootstraps
1563 # 4. Index of tree with correct branch lengths, in case more than one in file
1564 # Last modified: 2007.11.27
1565 sub executeSupportTransfer {
1566 my $tree_with_bl = $_[ 0 ];
1567 my $tree_with_bs = $_[ 1 ];
1569 my $index = $_[ 3 ];
1571 &testForTextFilePresence( $tree_with_bl );
1572 &testForTextFilePresence( $tree_with_bs );
1573 my $command = "$SUPPORT_TRANSFER $tree_with_bl $tree_with_bs $out $index";
1575 && die "$0: Could not execute \"$command\"";
1579 # Two or more arguments:
1581 # 2. phylogeny 1 with support values
1582 # 3. phylogeny 2 with support values
1584 sub executeSupportStatistics {
1585 my $outfile = $_[ 0 ];
1586 &dieIfFileExists( $outfile );
1588 for( my $i = 1; $i < scalar(@_); ++$i ) {
1589 &testForTextFilePresence( $_[ $i ] );
1590 $phylos .= $_[ $i ]." ";
1592 my $command = "$SUPPORT_STATISTICS -o=$outfile $phylos";
1593 system( "$command" )
1594 && die "$0: Could not execute \"$command\"";
1598 sub getNumberOfSeqsAndAas {
1599 my $infile = $_[ 0 ];
1602 open( IN, "$infile" ) || die "\n$0: Cannot open file <<$infile>>: $!\n";
1604 if ( $_ =~ /^\s*(\d+)\s+(\d+)\s*$/ ) {
1611 if ( $seqs == 0 || $aa == 0 ) {
1612 die( "\n$0: Could not get number of seqs and aa from: $infile" );
1619 sub removeSupportValues {
1620 my $infile = $_[ 0 ];
1621 my $outfile = $_[ 1 ];
1622 &testForTextFilePresence( $infile );
1623 open( OUT, ">$outfile" ) || &dieWithUnexpectedError( "Cannot create file \"$outfile\"" );
1624 open( IN, "$infile" ) || &dieWithUnexpectedError( "Cannot open file \"$infile\"" );
1625 while ( my $line = <IN> ) {
1626 $line =~ s/\)\d+\.?\d*:/\):/g;
1637 # 1. name of alignment file (in correct format!)
1638 # 2. number of bootstraps
1639 # 3. jumbles: 0: do not jumble; >=1 number of jumbles
1640 # 4. seed for random number generator
1641 # 5. 1 for PAM instead of JTT
1642 # 6. 1 to use globale rearragements
1644 my $align = $_[ 0 ];
1648 my $use_pam = $_[ 4 ];
1649 my $use_global_rearr = $_[ 5 ];
1654 &testForTextFilePresence( $align );
1656 if ( $bs > 1 && $rand < 1 ) {
1675 if ( $use_pam == 1 ) {
1682 if ( $use_global_rearr == 1 ) {
1687 system( "$PROML 2>&1 << !
1688 $align$jumble$multi$pam$global
1692 && &dieWithUnexpectedError( "Could not execute \"$PROML $align$jumble$multi$pam$global\"" );
1693 # 3: Do NOT print out tree
1703 Copyright (C) 2017 Christian M Zmasek
1706 Author: Christian M Zmasek
1707 cmzmasek at yahoo dot com
1708 https://sites.google.com/site/cmzmasek/home/software/forester
1710 Requirements phylo_pl is part of the FORESTER collection of programs.
1711 ------------ Many of its global variables are set via forester.pm.
1713 Note. Use xt.pl (for Pfam alignments) or mt.pl (for other alignments)
1714 to run phylo_pl.pl on whole directories of alignments files.
1719 phylo_pl.pl [-options] <input alignment in SELEX (Pfam), PHYLIP
1720 sequential format, or Clustal W output> <outputfile>
1721 [path/name for temporary directory to be created]
1724 "% phylo_pl.pl -B100q\@1nbS9X IL5.aln IL5_tree"
1729 Bx : Number of bootstraps. B0: do not bootstrap. Default is 100 bootstrapps.
1730 The number of bootstrapps should be divisible by 10.
1731 J : Use JTT matrix (Jones et al. 1992) in TREE-PUZZLE and/or PHYML, RAXML, default: VT (Mueller-Vingron 2000).
1732 L : Use BLOSUM 62 matrix (Henikoff-Henikoff 92) in TREE-PUZZLE and/or PHYML, RAXML, default: VT.
1733 M : Use mtREV24 matrix (Adachi-Hasegawa 1996) in TREE-PUZZLE and/or PHYML, default: VT.
1734 W : Use WAG matrix (Whelan-Goldman 2000) in TREE-PUZZLE and/or PHYML, RAXML, default: VT.
1735 P : Use PAM matrix (Dayhoff et al. 1978) in TREE-PUZZLE and/or PHYML, RAXML, default: VT.
1736 D : Use DCMut matrix (Kosial and Goldman, 2005) in PHYML, RAXML, VT in TREE-PUZZLE.
1737 A : Let TREE-PUZZLE choose which matrix to use, default: VT.
1738 H : Use HKY (Hasegawa et al. 1985) in TREE-PUZZLE [for nucleic acids]
1739 T : Use TN (Tamura-Nei 1993) in TREE-PUZZLE [for nucleic acids]
1740 Z : Use GTR (e.g. Lanave et al. 1980) in TREE-PUZZLE [for nucleic acids]
1741 C : Use SH (Schoeniger-von Haeseler 1994) in TREE-PUZZLE [for nucleic acids]
1742 E : Exact parameter estimates in TREE-PUZZLE, default: Approximate.
1743 Model of rate heterogeneity in TREE-PUZZLE (default: Uniform rate):
1744 g : 8 Gamma distributed rates
1745 t : Two rates (1 invariable + 1 variable)
1746 m : Mixed (1 invariable + 8 Gamma rates)
1747 q\@x: Use FastME, x: 1: GME
1750 n : Use PHYLIP Neighbor (NJ).
1751 f : Use PHYLIP Fitch.
1752 e : Use PHYLIP Minimal Evolution.
1757 o : Use PHYLIP proml.
1758 p : Use PHYLIP protpars.
1759 rx : Number of relative substitution rate categories in PHYML (default is 4).
1760 jx : Number of jumbles (input order randomization) for PHYLIP FM, ME, PROTPARS, and PROML (default is 2) (random seed set with Sx).
1761 I : Estimate proportion of invariable sites in RAXML and/or PHYML (otherwise, proportion "0.0" is used in PHYML)
1762 G : to turn on global rearrangements in PHYLIP FM, ME, and PROML
1763 Sx : Seed for random number generator(s). Must be 4n+1. Default is 9.
1764 X : To keep multiple tree file (=trees from bootstrap resampled alignments) and
1765 pairwise distance matrix file (in case of bootstrap analysis).