3 # FORESTER -- software libraries and applications
4 # for evolutionary biology research and applications.
6 # Copyright (C) 2017 Christian M. Zmasek
9 # This library is free software; you can redistribute it and/or
10 # modify it under the terms of the GNU Lesser General Public
11 # License as published by the Free Software Foundation; either
12 # version 2.1 of the License, or (at your option) any later version.
14 # This library is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 # Lesser General Public License for more details.
19 # You should have received a copy of the GNU Lesser General Public
20 # License along with this library; if not, write to the Free Software
21 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 # Contact: cmzmasek at yahoo dot com
24 # WWW: https://sites.google.com/site/cmzmasek/home/software/forester
28 # Requirements phylo_pl is part of the FORESTER libraries.
29 # ------------ Many of its global variables are set via forester.pm.
32 # Note. Use xt.pl (for Pfam alignments) or mt.pl (for other alignments)
33 # to run phylo_pl.pl on whole directories of alignments files.
38 # =========================
40 # METHOD ORDER (IMPORTANT!)
52 #==========================
57 use lib $FindBin::Bin;
60 my $VERSION = "1.0.1";
61 my $LAST_MODIFIED = "2017/02/07";
63 my $RAXML_MODEL_BASE = "PROTGAMMA";
64 my $RAXML_ALGORITHM = "a";
66 my $TEMP_DIR_DEFAULT = "/tmp/phylo_pl_"; # Where all the infiles, outfiles, etc will be created.
68 my $bootstraps = 100; # 0,1: do not bootstrap. Default: 100
69 my $matrix = 5; # 0 = JTT
76 # 8 = DCMut in PHYML, VT in TREE-PUZZLE
77 my $rate_heterogeneity = 0; # 0 = Uniform rate (default)
78 # 1 = 8 Gamma distributed rates
79 # 2 = Two rates (1 invariable + 1 variable)
80 # 3 = Mixed (1 invariable + 8 Gamma rates)
81 my $seed = 9; # Seed for random number generators. Default: 9
82 my $keep_multiple_trees = 0; # 0: delete multiple tree file
83 # 1: do not delete multiple tree file
84 my $exact_parameter_est = 0; # 0: no; 1: yes
86 my $phyml_rel_substitution_rate_cat = 4;
89 my $use_fastme = 0; # 0: no; 1: yes
90 my $use_phylip_nj = 0; # 0: no; 1: yes
91 my $use_phylip_fitch_fm = 0; # 0: no; 1: yes
92 my $use_phylip_fitch_me = 0; # 0: no; 1: yes
93 my $use_bionj = 0; # 0: no; 1: yes
94 my $use_weighbor = 0; # 0: no; 1: yes
95 my $use_raxml = 0; # 0: no; 1: yes
96 my $use_phyml = 0; # 0: no; 1: yes
97 my $use_proml = 0; # 0: no; 1: yes
98 my $use_protpars = 0; # 0: no; 1: yes
99 my $use_global_rearr = 0; # 0: no; 1: yes
100 my $estimate_invar_sites = 0; # 0: no; 1: yes
102 my $fastme_init_tree_opt = "NJ";
104 my %seqnames = (); # number => seqname
105 my %numbers = (); # seqname => number
111 my $multipwdfile = "";
112 my $distancefile = "";
116 my $current_dir = "";
118 my $number_of_seqs = 0;
119 my $number_of_aa = 0;
121 my $use_pwd_based_methods = 0;
124 print( "phylo_pl $VERSION ($LAST_MODIFIED)\n" );
125 print( "__________________________________\n");
130 unless ( @ARGV == 2 || @ARGV == 3 || @ARGV == 4 || @ARGV == 5 ) {
137 # Analyzes the options:
138 # ---------------------
140 if ( $ARGV[ 0 ] =~ /^-.+/ ) {
142 unless ( @ARGV > 2 ) {
146 $options = $ARGV[ 0 ];
148 if ( @ARGV != 3 && @ARGV != 4 ) {
152 $infile = $ARGV[ 1 ];
153 $outfile = $ARGV[ 2 ];
155 $temp_dir = $ARGV[ 3 ];
157 if ( $options =~ /B(\d+)/ ) {
159 if ( $bootstraps <= 1 ) {
162 elsif ( $bootstraps <= 9 ) {
164 print "\n\nphylo_pl: WARNING: Bootstrap number must be devisable by 10,\nno bootstrapping.\n\n";
166 elsif ( $bootstraps % 10 != 0 ) {
167 $bootstraps = $bootstraps - $bootstraps % 10; # to ensure $bootstraps % 10 == 0
168 print "\n\nphylo_pl: WARNING: Bootstrap number must be devisable by 10,\nset to $bootstraps.\n\n";
171 if ( $options =~ /n/ ) {
174 if ( $options =~ /q@(\d)/ ) {
178 $fastme_init_tree_opt = "GME";
180 elsif ( $opt == 2 ) {
181 $fastme_init_tree_opt = "BME";
183 elsif ( $opt == 3 ) {
184 $fastme_init_tree_opt = "NJ";
191 if ( $options =~ /f/ ) {
192 $use_phylip_fitch_fm = 1;
194 if ( $options =~ /e/ ) {
195 $use_phylip_fitch_me = 1;
197 if ( $options =~ /b/ ) {
200 if ( $options =~ /w/ ) {
203 if ( $options =~ /x/ ) {
206 if ( $options =~ /y/ ) {
209 if ( $options =~ /o/ ) {
212 if ( $options =~ /p/ ) {
215 if ( $options =~ /G/ ) {
216 $use_global_rearr = 1;
218 if ( $options =~ /I/ ) {
219 $estimate_invar_sites = 1;
221 if ( $options =~ /j(\d+)/ ) {
223 if ( $jumbles < 1 ) {
227 if ( $options =~ /r(\d+)/ ) {
228 $phyml_rel_substitution_rate_cat = $1;
229 if ( $phyml_rel_substitution_rate_cat < 1 ) {
230 $phyml_rel_substitution_rate_cat = 1;
233 if ( $options =~ /J/ ) {
236 if ( $options =~ /P/ ) {
239 if ( $options =~ /L/ ) {
240 $matrix = 2; # Blosum 62
242 if ( $options =~ /M/ ) {
243 $matrix = 3; # mtREV24
245 if ( $options =~ /W/ ) {
248 if ( $options =~ /A/ ) {
251 if ( $options =~ /D/ ) {
252 $matrix = 8; # DCMut in PHYML and RAXML, VT in PUZZLE
254 if ( $options =~ /S(\d+)/ ) {
257 if ( $options =~ /X/ ) {
258 $keep_multiple_trees = 1;
260 if ( $options =~ /E/ ) {
261 $exact_parameter_est = 1;
263 if ( $options =~ /g/ ) {
264 $rate_heterogeneity = 1;
266 if ( $options =~ /t/ ) {
267 $rate_heterogeneity = 2;
269 if ( $options =~ /m/ ) {
270 $rate_heterogeneity = 3;
274 unless ( @ARGV == 2 || @ARGV == 3 ) {
278 $infile = $ARGV[ 0 ];
279 $outfile = $ARGV[ 1 ];
281 $temp_dir = $ARGV[ 2 ];
285 if ( $use_fastme != 1 &&
286 $use_phylip_nj != 1 &&
287 $use_phylip_fitch_fm != 1 &&
288 $use_phylip_fitch_me != 1 &&
290 $use_weighbor != 1 &&
294 $use_protpars != 1 ) {
298 $use_phylip_fitch_fm = 1;
299 $use_phylip_fitch_me = 1;
309 if ( $use_fastme == 1 ||
310 $use_phylip_nj == 1 ||
311 $use_phylip_fitch_fm == 1 ||
312 $use_phylip_fitch_me == 1 ||
314 $use_weighbor == 1 ) {
315 $use_pwd_based_methods = 1;
318 $use_pwd_based_methods = 0;
321 $current_dir = `pwd`;
322 $current_dir =~ s/\s//;
324 if ( $outfile !~ /^\// ) {
325 # outfile is not absolute path.
326 $outfile = $current_dir."/".$outfile;
331 # TREE-PUZZLE sets the option in this way:
332 # If two rates or mixed, exact parameter estimates are used.
333 if ( $rate_heterogeneity == 2
334 || $rate_heterogeneity == 3 ) {
335 $exact_parameter_est = 1
339 if ( $outfile =~ /\.xml$/i ) {
340 $outfile =~ s/\.xml//i;
342 elsif ( $outfile =~ /\.aln$/i ) {
343 $outfile =~ s/\.aln//i;
345 elsif ( $outfile =~ /\.fasta$/i ) {
346 $outfile =~ s/\.fasta//i;
348 elsif ( $outfile =~ /\.fas$/i ) {
349 $outfile =~ s/\.fas//i;
351 elsif ( $outfile =~ /\.seqs$/i ) {
352 $outfile =~ s/\.seqs//i;
356 $logfile = $outfile.$LOG_FILE_SUFFIX;
357 $multipwdfile = $outfile.$MULTIPLE_PWD_FILE_SUFFIX;
358 $distancefile = $outfile.$SUFFIX_PWD_NOT_BOOTS;
360 &dieIfFileExists( $logfile );
361 &dieIfFileExists( $multipwdfile );
362 &dieIfFileExists( $distancefile );
365 my $fastme_outtree = $outfile."_fme.xml";
366 my $phylip_nj_outtree = $outfile."_pnj.xml";
367 my $phylip_fm_outtree = $outfile."_pfm.xml";
368 my $phylip_me_outtree = $outfile."_pme.xml";
369 my $bionj_outtree = $outfile."_bionj.xml";
370 my $weighbor_outtree = $outfile."_weigh.xml";
371 my $raxml_outtree = $outfile."_raxml.xml";
372 my $phyml_outtree = $outfile."_phyml.xml";
373 my $proml_outtree = $outfile."_proml.xml";
374 my $protpars_outtree = $outfile."_ppp.xml";
375 my $all_outtree = $outfile."_comb.xml";
377 my $multitreefile_fastme = $outfile."_fme".$MULTIPLE_TREES_FILE_SUFFIX;
378 my $multitreefile_phylip_nj = $outfile."_pnj".$MULTIPLE_TREES_FILE_SUFFIX;
379 my $multitreefile_phylip_fm = $outfile."_pfm".$MULTIPLE_TREES_FILE_SUFFIX;
380 my $multitreefile_phylip_me = $outfile."_pme".$MULTIPLE_TREES_FILE_SUFFIX;
381 my $multitreefile_bionj = $outfile."_bionj".$MULTIPLE_TREES_FILE_SUFFIX;
382 my $multitreefile_weighbor = $outfile."_weigh".$MULTIPLE_TREES_FILE_SUFFIX;
383 my $multitreefile_raxml = $outfile."_raxml".$MULTIPLE_TREES_FILE_SUFFIX;
384 my $multitreefile_phyml = $outfile."_phyml".$MULTIPLE_TREES_FILE_SUFFIX;
385 my $multitreefile_proml = $outfile."_proml".$MULTIPLE_TREES_FILE_SUFFIX;
386 my $multitreefile_protpars = $outfile."_ppp".$MULTIPLE_TREES_FILE_SUFFIX;
388 if ( $use_fastme == 1 ) {
389 &dieIfFileExists( $fastme_outtree );
390 if ( $keep_multiple_trees == 1 && $bootstraps > 1 ) {
391 &dieIfFileExists( $multitreefile_fastme );
394 if( $use_phylip_nj == 1 ) {
395 &dieIfFileExists( $phylip_nj_outtree );
396 if ( $keep_multiple_trees == 1 && $bootstraps > 1 ) {
397 &dieIfFileExists( $multitreefile_phylip_nj );
400 if( $use_phylip_fitch_fm == 1 ) {
401 &dieIfFileExists( $phylip_fm_outtree );
402 if ( $keep_multiple_trees == 1 && $bootstraps > 1 ) {
403 &dieIfFileExists( $multitreefile_phylip_fm );
406 if( $use_phylip_fitch_me == 1 ) {
407 &dieIfFileExists( $phylip_me_outtree );
408 if ( $keep_multiple_trees == 1 && $bootstraps > 1 ) {
409 &dieIfFileExists( $multitreefile_phylip_me );
412 if( $use_bionj == 1 ) {
413 &dieIfFileExists( $bionj_outtree );
414 if ( $keep_multiple_trees == 1 && $bootstraps > 1 ) {
415 &dieIfFileExists( $multitreefile_bionj );
418 if( $use_weighbor == 1 ) {
419 &dieIfFileExists( $weighbor_outtree );
420 if ( $keep_multiple_trees == 1 && $bootstraps > 1 ) {
421 &dieIfFileExists( $multitreefile_weighbor );
424 if( $use_raxml == 1 ) {
425 &dieIfFileExists( $raxml_outtree );
426 if ( $keep_multiple_trees == 1 && $bootstraps > 1 ) {
427 &dieIfFileExists( $multitreefile_raxml );
430 if( $use_phyml == 1 ) {
431 &dieIfFileExists( $phyml_outtree );
432 if ( $keep_multiple_trees == 1 && $bootstraps > 1 ) {
433 &dieIfFileExists( $multitreefile_phyml );
436 if( $use_proml == 1 ) {
437 &dieIfFileExists( $proml_outtree );
438 if ( $keep_multiple_trees == 1 && $bootstraps > 1 ) {
439 &dieIfFileExists( $multitreefile_proml );
442 if( $use_protpars == 1 ) {
443 &dieIfFileExists( $protpars_outtree );
444 if ( $keep_multiple_trees == 1 && $bootstraps > 1 ) {
445 &dieIfFileExists( $multitreefile_protpars );
448 if ( $bootstraps > 1 ) {
449 &dieIfFileExists( $all_outtree );
451 if ( $infile ne "" ) {
452 unless ( ( -s $infile ) && ( -f $infile ) && ( -T $infile ) ) {
453 die "\n\nphylo_pl: Input alignment file \"$infile\" does not exist, is empty, or is not a plain textfile.\n\n";
460 # Prints out the options:
461 # -----------------------
464 $log = "\n$0 logfile:\n";
465 $log = $log."Version: $VERSION\n\n";
469 if ( $infile ne "" ) {
470 $log = $log."Input : $infile\n";
473 if ( $keep_multiple_trees == 1 && $bootstraps >= 2 ) {
474 $log = $log."Multiple distance matrices : $multipwdfile\n";
478 $log = $log."Bootstraps : $bootstraps\n";
480 if ( $use_pwd_based_methods == 1 ) {
481 $log = $log."Prgrm to calculate pairwise dist. : TREE-PUZZLE (version: $PUZZLE_VERSION)\n";
485 if ( $use_fastme == 1 ) {
486 $log = $log."Program to calculate tree : FastME (version: $FASTME_VERSION)\n";
487 $log = $log."Method for intial tree in FastME : $fastme_init_tree_opt\n";
488 $log = $log."Tree swapping (NNI) in FastME : balanced (default)\n";
490 if ( $use_phylip_nj == 1 ) {
491 $log = $log."Program to calculate tree : PHYLIP NEIGHBOR NJ (version: $PHYLIP_VERSION)\n";
493 if ( $use_phylip_fitch_fm == 1 ) {
494 $log = $log."Program to calculate tree : PHYLIP FITCH Fitch-Margoliash (version: $PHYLIP_VERSION)\n";
496 if ( $use_phylip_fitch_me == 1 ) {
497 $log = $log."Program to calculate tree : PHYLIP FITCH Minimal Evolution (version: $PHYLIP_VERSION)\n";
499 if ( $use_bionj == 1 ) {
500 $log = $log."Program to calculate tree : BIONJ (version: $BIONJ_VERSION)\n";
502 if ( $use_weighbor == 1 ) {
503 $log = $log."Program to calculate tree : Weighbor [no invariable sites, b=14] (version: $WEIGHBOR_VERSION)\n";
505 if ( $use_raxml == 1 ) {
506 $log = $log."Program to calculate tree : RAxML [$RAXML_MODEL_BASE] (uses its own bootstraps, if bootstrapped: -f $RAXML_ALGORITHM) (version: $RAXML_VERSION)\n";
508 if ( $use_phyml == 1 ) {
509 $log = $log."Program to calculate tree : PHYML (MLE for gamma distr param and proportion of inv sites) (version: $PHYML_VERSION)\n";
510 $log = $log."# of rel subst rate categories : $phyml_rel_substitution_rate_cat\n";
512 if ( $use_proml == 1 ) {
513 $log = $log."Program to calculate tree : PHYLIP PROML (uses PAM unless JTT selected) (version: $PHYLIP_VERSION)\n";
515 if ( $use_protpars == 1 ) {
516 $log = $log."Program to calculate tree : PHYLIP PROTPARS (with global rearrangements) (version: $PHYLIP_VERSION)\n";
518 if ( $use_phylip_fitch_fm == 1 || $use_phylip_fitch_me == 1 || $use_protpars == 1 || $use_proml ) {
519 $log = $log."Number of jumbles (input order rand): $jumbles\n";
522 if ( $use_phylip_fitch_fm == 1 || $use_phylip_fitch_me == 1 || $use_proml ) {
523 if ( $use_global_rearr == 1 ) {
524 $log = $log."Global rearrangements : true\n";
527 $log = $log."Global rearrangements : false\n";
532 if ( $bootstraps > 0 ) {
533 $log = $log."Prgrm to calculate ML branch lenghts: TREE-PUZZLE (version: $PUZZLE_VERSION)\n";
536 $log = $log."Model : ";
537 if ( $matrix == 0 ) {
538 $log = $log."JTT (Jones et al. 1992)\n";
540 elsif ( $matrix == 1 ) {
541 $log = $log."PAM (Dayhoff et al. 1978)\n";
543 elsif ( $matrix == 2 ) {
544 $log = $log."BLOSUM 62 (Henikoff-Henikoff 92)\n";
546 elsif ( $matrix == 3 ) {
547 $log = $log."mtREV24 (Adachi-Hasegawa 1996)\n";
549 elsif ( $matrix == 5 ) {
550 $log = $log."VT (Mueller-Vingron 2000)\n";
552 elsif ( $matrix == 6 ) {
553 $log = $log."WAG (Whelan-Goldman 2000)\n";
555 elsif ( $matrix == 7 ) {
556 $log = $log."auto in TREE-PUZZLE\n";
558 elsif ( $matrix == 8 ) {
559 $log = $log."DCMut (Kosial and Goldman, 2005) in PHYML and RAxML, VT in TREE-PUZZLE\n";
562 &dieWithUnexpectedError( "Unknown model: matrix=$matrix" );
564 if ( $use_raxml == 1 || $use_phyml == 1 ) {
565 if ( $estimate_invar_sites == 1 ) {
566 $log = $log."Estimate proportion of invariable sites in RAXML and/or PHYML: true\n";
569 $log = $log."Estimate proportion of invariable sites in RAXML and/or PHYML: false (proportion \"0.0\" is used in PHYML)\n";
573 $log = $log."Model of rate heterogeneity (PUZZLE): ";
574 if ( $rate_heterogeneity == 1 ) {
575 $log = $log."8 Gamma distributed rates\n";
577 elsif ( $rate_heterogeneity == 2 ) {
578 $log = $log."Two rates (1 invariable + 1 variable)\n";
580 elsif ( $rate_heterogeneity == 3 ) {
581 $log = $log."Mixed (1 invariable + 8 Gamma rates)\n";
584 $log = $log."Uniform rate\n";
586 $log = $log."Seed for random number generators : $seed\n";
587 if ( $exact_parameter_est == 1 ) {
588 $log = $log."Exact parameter estimates in TREE-PUZZLE\n";
591 $log = $log."Start time/date : ".`date`;
596 # That's where the mischief starts....
597 # ------------------------------------
603 my $random_number = int( rand( $range ) );
605 if ( $temp_dir eq "" ) {
606 $temp_dir = $TEMP_DIR_DEFAULT;
609 $temp_dir = $temp_dir.$random_number.$ii;
611 while ( -e $temp_dir ) {
613 $temp_dir = $temp_dir.$random_number.$ii;
616 mkdir( $temp_dir, 0700 )
617 || die "\n\n$0: Could not create <<$temp_dir>>: $!\n\n";
619 unless ( ( -e $temp_dir ) && ( -d $temp_dir ) ) {
620 die "\n\n$0: <<$temp_dir>> does not exist, or is not a directory.\n\n";
624 &cp( $infile, $temp_dir."/INFILE" );
625 unless ( chmod ( 0600, $temp_dir."/INFILE" ) ) {
626 warn "\n\n$0: Could not chmod. $!\n\n";
631 || die "\n\n$0: Could not chdir to <<$temp_dir>>: $!\n\n";
633 &cp( $infile, "infile" );
634 @out = &getNumberOfSeqsAndAas( $infile );
635 $number_of_seqs = $out[ 0 ];
636 $number_of_aa = $out[ 1 ];
638 my $SEQBOOT_OUTFILE = "seqboot_outfile";
640 if ( $bootstraps > 1 && ( $use_pwd_based_methods == 1
643 || $use_protpars == 1 ) ) {
644 &executeSeqboot( $seed, $bootstraps );
645 &mv( "outfile", $SEQBOOT_OUTFILE );
649 &cp( $infile, "align" );
651 if ( $use_pwd_based_methods == 1 ) {
652 # Calculating the pairwise distances (saved in file "infile"): "puzzle"
653 if ( $bootstraps > 1 ) {
654 &executePuzzleBootstrapped( $SEQBOOT_OUTFILE,
657 $exact_parameter_est,
658 $rate_heterogeneity );
660 $pwdfile = $SEQBOOT_OUTFILE.".dist";
663 &executePuzzle( "infile",
666 $exact_parameter_est,
667 $rate_heterogeneity );
668 $pwdfile = "infile.dist";
674 # Methods based on alignment
675 # --------------------------
676 my $OUTTREE_RAXML = "outtree_rax";
677 my $OUTTREE_PHYML = "outtree_phyml";
678 my $OUTTREE_PROML = "outtree_proml";
679 my $OUTTREE_PROTPARS = "outtree_protpars";
681 my $CONSENSUS_RAXML = "consensus_raxml";
682 my $CONSENSUS_PHYML = "consensus_phyml";
683 my $CONSENSUS_PROML = "consensus_proml";
684 my $CONSENSUS_PROTPARS = "consensus_protpars";
686 my $OUTTREES_ALL = "outtrees_all";
689 if ( $use_raxml == 1 ) {
692 if ( $matrix == 0 ) {
695 elsif ( $matrix == 1 ) {
698 elsif ( $matrix == 2 ) {
701 elsif ( $matrix == 3 ) {
704 elsif ( $matrix == 5 ) {
707 elsif ( $matrix == 6 ) {
710 elsif ( $matrix == 7 ) {
713 elsif ( $matrix == 8 ) {
717 &dieWithUnexpectedError( "Unknown model: matrix=$matrix" );
720 print( "\n========== RAxML begin =========\n\n" );
722 # 1. DNA or Amino-Acids sequence filename (PHYLIP format)
723 # 2. Model, eg. PROTGAMMAIVT
724 # 3. Replicates (bootstrap)
725 # 4. Seed for bootstrap
727 # 6. Algorithm (only for bootstrap, default otherwise)
729 if ( $estimate_invar_sites == 1 ) {
733 # NOTE. RaxML does its own bootstrapping.
734 &executeRaxml( "align", $RAXML_MODEL_BASE.$invar.$model."F", $bootstraps, $seed, "xxx", $RAXML_ALGORITHM );
735 print( "\n========== RAxML end =========\n\n" );
737 &rm( "RAxML_log.xxx" );
738 &rm( "RAxML_parsimonyTree.xxx" );
739 &mv( "RAxML_info.xxx", $outfile."_raxml_info" );
740 if ( $bootstraps > 1 ) {
741 &rm( "RAxML_bestTree.xxx" );
742 &mv( "RAxML_bipartitions.xxx", $CONSENSUS_RAXML );
743 &append( "RAxML_bootstrap.xxx", $OUTTREES_ALL );
744 if ( $keep_multiple_trees == 1 ) {
745 &mv( "RAxML_bootstrap.xxx", $multitreefile_raxml );
748 &rm( "RAxML_bootstrap.xxx" );
753 &mv( "RAxML_result.xxx", $OUTTREE_RAXML );
758 if ( $use_phyml == 1 ) {
761 if ( $matrix == 0 ) {
764 elsif ( $matrix == 1 ) {
767 elsif ( $matrix == 2 ) {
770 elsif ( $matrix == 3 ) {
773 elsif ( $matrix == 5 ) {
776 elsif ( $matrix == 6 ) {
779 elsif ( $matrix == 7 ) {
782 elsif ( $matrix == 8 ) {
786 &dieWithUnexpectedError( "Unknown model: matrix=$matrix" );
790 if ( $bootstraps > 1 ) {
791 $input = $SEQBOOT_OUTFILE;
796 print( "\n========== PHYML begin =========\n\n" );
798 # 1. DNA or Amino-Acids sequence filename (PHYLIP format)
799 # 2. number of data sets to analyse (ex:3)
800 # 3. Model: JTT | MtREV | Dayhoff | WAG | VT | DCMut | Blosum62 (Amino-Acids)
801 # 4. number of relative substitution rate categories (ex:4), positive integer
802 # 5. starting tree filename (Newick format), your tree filename | BIONJ for a distance-based tree
803 # 6. 1 to estimate proportion of invariable sites, otherwise, fixed proportion "0.0" is used
804 # PHYML produces several results files :
805 # <sequence file name>_phyml_lk.txt : likelihood value(s)
806 # <sequence file name>_phyml_tree.txt : inferred tree(s)
807 # <sequence file name>_phyml_stat.txt : detailed execution stats
808 &executePhyml( $input, $bootstraps, $model, $phyml_rel_substitution_rate_cat, "BIONJ", $estimate_invar_sites );
809 print( "\n========== PHYML end =========\n\n" );
811 &rm( $input."_phyml_lk.txt" );
812 &mv( $input."_phyml_tree.txt", $OUTTREE_PHYML );
813 if ( -e $outfile."_phyml_stat" ) {
814 &rm( $outfile."_phyml_stat" );
816 &mv( $input."_phyml_stat.txt", $outfile."_phyml_stat" );
817 if ( $bootstraps > 1 ) {
818 &append( $OUTTREE_PHYML, $OUTTREES_ALL );
824 if ( $use_proml == 1 ) {
826 if ( $bootstraps > 1 ) {
827 $input = $SEQBOOT_OUTFILE;
832 print( "\n========== PHYLIP PROML begin =========\n\n" );
834 # 1. name of alignment file (in correct format!)
835 # 2. number of bootstraps
836 # 3. jumbles: 0: do not jumble; >=1 number of jumbles
837 # 4. seed for random number generator
838 # 5. 1 for PAM instead of JTT
840 if ( $matrix == 0 ) {
843 &executeProml( $input, $bootstraps, $jumbles, $seed, $use_pam, $use_global_rearr );
844 print( "\n========== PHYLIP PROML end =========\n\n" );
845 &mv( "outtree", $OUTTREE_PROML );
847 if ( $bootstraps > 1 ) {
848 &append( $OUTTREE_PROML, $OUTTREES_ALL );
854 if ( $use_protpars == 1 ) {
856 if ( $bootstraps > 1 ) {
857 $input = $SEQBOOT_OUTFILE;
862 print( "\n========== PHYLIP PROTPARS begin =========\n\n" );
863 &executeProtpars( $input, $bootstraps, $jumbles, $seed );
864 print( "\n========== PHYLIP PROTPARS end =========\n\n" );
865 &mv( "outtree", $OUTTREE_PROTPARS );
866 &rm( $outfile."_protpars_outfile" );
867 &mv( "outfile", $outfile."_protpars_outfile" );
868 if ( $bootstraps > 1 ) {
869 &append( $OUTTREE_PROTPARS, $OUTTREES_ALL );
876 # Methods based on PWD
877 # --------------------
878 my $OUTTREE_FASTME = "outtree_fastme";
879 my $OUTTREE_PHYLIP_NJ = "outtree_phylip_nj";
880 my $OUTTREE_PHYLIP_FM = "outtree_phylip_fm";
881 my $OUTTREE_PHYLIP_ME = "outtree_phylip_me";
882 my $OUTTREE_BIONJ = "outtree_bionj";
883 my $OUTTREE_WEIGHBOR = "outtree_weighbor";
885 my $CONSENSUS_FASTME = "consensus_fastme";
886 my $CONSENSUS_PHYLIP_NJ = "consensus_phylip_nj";
887 my $CONSENSUS_PHYLIP_FM = "consensus_phylip_fm";
888 my $CONSENSUS_PHYLIP_ME = "consensus_phylip_me";
889 my $CONSENSUS_BIONJ = "consensus_bionj";
890 my $CONSENSUS_WEIGHBOR = "consensus_weighbor";
891 my $CONSENSUS_ALL = "consensus_all";
894 if ( $use_fastme == 1 ) {
895 print( "\n========== FASTME begin =========\n\n" );
896 &executeFastme( $pwdfile, $bootstraps, $fastme_init_tree_opt );
897 print( "\n========== FASTME end ===========\n\n" );
899 &mv( "output.tre", $OUTTREE_FASTME );
900 if ( $bootstraps > 1 ) {
901 &append( $OUTTREE_FASTME, $OUTTREES_ALL );
905 if ( $use_phylip_nj ) {
906 print( "\n========== PHYLIP NEIGHBOR begin =========\n\n" );
907 &executeNeighbor( $pwdfile, $bootstraps, $seed, 0 );
908 print( "\n========== PHYLIP NEIGHBOR end =========\n\n" );
909 &mv( "outtree", $OUTTREE_PHYLIP_NJ );
911 if ( $bootstraps > 1 ) {
912 &append( $OUTTREE_PHYLIP_NJ, $OUTTREES_ALL );
916 if ( $use_phylip_fitch_fm ) {
917 print( "\n========== PHYLIP FITCH FM begin =========\n\n" );
918 &executeFitch( $pwdfile, $bootstraps, $seed, $jumbles, 0, "FM", $use_global_rearr );
919 print( "\n========== PHYLIP FITCH FM end =========\n\n" );
920 &mv( "outtree", $OUTTREE_PHYLIP_FM );
922 if ( $bootstraps > 1 ) {
923 &append( $OUTTREE_PHYLIP_FM, $OUTTREES_ALL );
927 if ( $use_phylip_fitch_me ) {
928 print( "\n========== PHYLIP FITCH ME begin =========\n\n" );
929 &executeFitch( $pwdfile, $bootstraps, $seed, $jumbles, 0, "ME", $use_global_rearr );
930 print( "\n========== PHYLIP FITCH ME end =========\n\n" );
931 &mv( "outtree", $OUTTREE_PHYLIP_ME );
933 if ( $bootstraps > 1 ) {
934 &append( $OUTTREE_PHYLIP_ME, $OUTTREES_ALL );
939 print( "\n========== BIONJ begin =========\n\n" );
940 &executeBionj( $pwdfile, $OUTTREE_BIONJ );
941 print( "\n========== BIONJ end =========\n\n" );
942 if ( $bootstraps > 1 ) {
943 &append( $OUTTREE_BIONJ, $OUTTREES_ALL );
947 if ( $use_weighbor ) {
948 print( "\n========== WEIGHBOR begin =========\n\n" );
949 &executeWeighbor( $number_of_aa, 14, $pwdfile, $OUTTREE_WEIGHBOR );
950 print( "\n========== WEIGHBOR end =========\n\n" );
951 if ( $bootstraps > 1 ) {
952 &append( $OUTTREE_WEIGHBOR, $OUTTREES_ALL );
959 if ( $bootstraps > 1 ) {
961 if ( $use_fastme == 1 ) {
962 &consense( $OUTTREE_FASTME, $CONSENSUS_FASTME );
964 if ( $use_phylip_nj == 1 ) {
965 &consense( $OUTTREE_PHYLIP_NJ, $CONSENSUS_PHYLIP_NJ );
967 if ( $use_phylip_fitch_fm == 1 ) {
968 &consense( $OUTTREE_PHYLIP_FM, $CONSENSUS_PHYLIP_FM );
970 if ( $use_phylip_fitch_me == 1 ) {
971 &consense( $OUTTREE_PHYLIP_ME, $CONSENSUS_PHYLIP_ME );
973 if ( $use_bionj == 1 ) {
974 &consense( $OUTTREE_BIONJ, $CONSENSUS_BIONJ );
976 if ( $use_weighbor == 1 ) {
977 &consense( $OUTTREE_WEIGHBOR, $CONSENSUS_WEIGHBOR );
979 if ( $use_phyml == 1 ) {
980 &consense( $OUTTREE_PHYML, $CONSENSUS_PHYML );
982 if ( $use_proml == 1 ) {
983 &consense( $OUTTREE_PROML, $CONSENSUS_PROML );
985 if ( $use_protpars == 1 ) {
986 &consense( $OUTTREE_PROTPARS, $CONSENSUS_PROTPARS );
988 if ( $all_count > 1 ) {
989 &consense( $OUTTREES_ALL, $CONSENSUS_ALL );
992 &rm( $OUTTREES_ALL );
995 my $INTREE_FOR_PUZZLE = "intree"; #why so serious?
996 &rm( $INTREE_FOR_PUZZLE );
997 system( "touch", $INTREE_FOR_PUZZLE )
998 && die("\n\n$0: could not \"touch $INTREE_FOR_PUZZLE\": $!\n\n");
1000 if ( $use_fastme == 1 ) {
1001 &append( $CONSENSUS_FASTME, $INTREE_FOR_PUZZLE );
1003 if ( $use_phylip_nj == 1 ) {
1004 &append( $CONSENSUS_PHYLIP_NJ, $INTREE_FOR_PUZZLE );
1006 if ( $use_phylip_fitch_fm == 1 ) {
1007 &append( $CONSENSUS_PHYLIP_FM, $INTREE_FOR_PUZZLE );
1009 if ( $use_phylip_fitch_me == 1 ) {
1010 &append( $CONSENSUS_PHYLIP_ME, $INTREE_FOR_PUZZLE );
1012 if ( $use_bionj == 1 ) {
1013 &append( $CONSENSUS_BIONJ, $INTREE_FOR_PUZZLE );
1015 if ( $use_weighbor == 1 ) {
1016 &append( $CONSENSUS_WEIGHBOR, $INTREE_FOR_PUZZLE );
1018 if ( $use_raxml == 1 ) {
1019 # Needed, because TREE-PUZZLE adds internal labels for all subsequent trees
1020 # when evaluating given trees (this seems a strange behaviour).
1021 removeSupportValues( $CONSENSUS_RAXML, $CONSENSUS_RAXML."_support_removed" );
1022 &append( $CONSENSUS_RAXML."_support_removed", $INTREE_FOR_PUZZLE );
1023 &rm( $CONSENSUS_RAXML."_support_removed" );
1025 if ( $use_phyml == 1 ) {
1026 &append( $CONSENSUS_PHYML, $INTREE_FOR_PUZZLE );
1028 if ( $use_proml == 1 ) {
1029 &append( $CONSENSUS_PROML, $INTREE_FOR_PUZZLE );
1031 if ( $use_protpars == 1 ) {
1032 &append( $CONSENSUS_PROTPARS, $INTREE_FOR_PUZZLE );
1034 if ( $all_count > 1 ) {
1035 &append( $CONSENSUS_ALL, $INTREE_FOR_PUZZLE );
1039 # Puzzle for ML branch lenghts:
1040 # The alignment is read from infile by default.
1041 # The tree is read from intree by default.
1043 &mv( "align", "infile" ); # align = original alignment in phylip interleaved.
1045 &executePuzzleToCalculateBranchLenghts( $matrix,
1046 $exact_parameter_est,
1047 $rate_heterogeneity );
1049 my $OUTTREE_PUZZLE = "outtree_puzzle";
1051 &rm( $outfile."_puzzle_outfile" );
1053 &mv( "outfile", $outfile."_puzzle_outfile" );
1054 &mv( "outtree", $OUTTREE_PUZZLE );
1062 if ( $use_fastme == 1 ) {
1063 &executeSupportTransfer( $OUTTREE_PUZZLE, $CONSENSUS_FASTME, $fastme_outtree, $counter++ );
1064 &rm( $CONSENSUS_FASTME );
1066 if ( $use_phylip_nj == 1 ) {
1067 &executeSupportTransfer( $OUTTREE_PUZZLE, $CONSENSUS_PHYLIP_NJ, $phylip_nj_outtree, $counter++ );
1068 &rm( $CONSENSUS_PHYLIP_NJ );
1070 if ( $use_phylip_fitch_fm == 1 ) {
1071 &executeSupportTransfer( $OUTTREE_PUZZLE, $CONSENSUS_PHYLIP_FM, $phylip_fm_outtree, $counter++ );
1072 &rm( $CONSENSUS_PHYLIP_FM );
1074 if ( $use_phylip_fitch_me == 1 ) {
1075 &executeSupportTransfer( $OUTTREE_PUZZLE, $CONSENSUS_PHYLIP_ME, $phylip_me_outtree, $counter++ );
1076 &rm( $CONSENSUS_PHYLIP_ME );
1078 if ( $use_bionj == 1 ) {
1079 &executeSupportTransfer( $OUTTREE_PUZZLE, $CONSENSUS_BIONJ, $bionj_outtree, $counter++ );
1080 &rm( $CONSENSUS_BIONJ );
1082 if ( $use_weighbor == 1 ) {
1083 &executeSupportTransfer( $OUTTREE_PUZZLE, $CONSENSUS_WEIGHBOR, $weighbor_outtree, $counter++ );
1084 &rm( $CONSENSUS_WEIGHBOR );
1086 if ( $use_raxml == 1 ) {
1087 &to_phyloxml( $CONSENSUS_RAXML, $raxml_outtree, 1, 1 );
1090 if ( $use_phyml == 1 ) {
1091 &executeSupportTransfer( $OUTTREE_PUZZLE, $CONSENSUS_PHYML, $phyml_outtree, $counter++ );
1092 &rm( $CONSENSUS_PHYML );
1094 if ( $use_proml == 1 ) {
1095 &executeSupportTransfer( $OUTTREE_PUZZLE, $CONSENSUS_PROML, $proml_outtree, $counter++ );
1096 &rm( $CONSENSUS_PROML );
1098 if ( $use_protpars == 1 ) {
1099 &executeSupportTransfer( $OUTTREE_PUZZLE, $CONSENSUS_PROTPARS, $protpars_outtree, $counter++ );
1100 &rm( $CONSENSUS_PROTPARS );
1102 if ( $all_count > 1 ) {
1103 &executeSupportTransfer( $OUTTREE_PUZZLE, $CONSENSUS_ALL, $all_outtree, $counter++ );
1104 &rm( $CONSENSUS_ALL );
1109 &rm( $OUTTREE_PUZZLE );
1110 &rm( $SEQBOOT_OUTFILE );
1111 if ( $keep_multiple_trees == 1 ) {
1112 if ( $use_fastme == 1 ) {
1113 &mv( $OUTTREE_FASTME, $multitreefile_fastme );
1115 if ( $use_phylip_nj == 1 ) {
1116 &mv( $OUTTREE_PHYLIP_NJ, $multitreefile_phylip_nj );
1118 if ( $use_phylip_fitch_fm == 1 ) {
1119 &mv( $OUTTREE_PHYLIP_FM, $multitreefile_phylip_fm );
1121 if ( $use_phylip_fitch_me == 1 ) {
1122 &mv( $OUTTREE_PHYLIP_ME, $multitreefile_phylip_me );
1124 if ( $use_bionj == 1 ) {
1125 &mv( $OUTTREE_BIONJ, $multitreefile_bionj );
1127 if ( $use_weighbor == 1 ) {
1128 &mv( $OUTTREE_WEIGHBOR, $multitreefile_weighbor );
1130 if ( $use_phyml == 1 ) {
1131 &mv( $OUTTREE_PHYML, $multitreefile_phyml );
1133 if ( $use_proml == 1 ) {
1134 &mv( $OUTTREE_PROML, $multitreefile_proml );
1136 if ( $use_protpars == 1 ) {
1137 &mv( $OUTTREE_PROTPARS, $multitreefile_protpars );
1139 &mv( $pwdfile, $multipwdfile );
1142 if ( $use_fastme == 1 ) {
1143 &rm( $OUTTREE_FASTME );
1145 if ( $use_phylip_nj == 1 ) {
1146 &rm( $OUTTREE_PHYLIP_NJ );
1148 if ( $use_phylip_fitch_fm == 1 ) {
1149 &rm( $OUTTREE_PHYLIP_FM );
1151 if ( $use_phylip_fitch_me == 1 ) {
1152 &rm( $OUTTREE_PHYLIP_ME );
1154 if ( $use_bionj == 1 ) {
1155 &rm( $OUTTREE_BIONJ );
1157 if ( $use_weighbor == 1 ) {
1158 &rm( $OUTTREE_WEIGHBOR );
1160 if ( $use_phyml == 1 ) {
1161 &rm( $OUTTREE_PHYML );
1163 if ( $use_proml == 1 ) {
1164 &rm( $OUTTREE_PROML );
1166 if ( $use_protpars == 1 ) {
1167 &rm( $OUTTREE_PROTPARS );
1171 if ( $all_count > 1 ) {
1172 &rm( $OUTTREES_ALL );
1174 } # if ( $bootstraps > 1 )
1176 &rm( "infile.dist" );
1178 &rm( "infile.puzzle" );
1179 if ( $use_fastme == 1 ) {
1180 &to_phyloxml( $OUTTREE_FASTME, $fastme_outtree, 0, 1 );
1182 if ( $use_phylip_nj == 1 ) {
1183 &to_phyloxml( $OUTTREE_PHYLIP_NJ, $phylip_nj_outtree, 0, 1);
1185 if ( $use_phylip_fitch_fm == 1 ) {
1186 &to_phyloxml( $OUTTREE_PHYLIP_FM, $phylip_fm_outtree, 0, 1 );
1188 if ( $use_phylip_fitch_me == 1 ) {
1189 &to_phyloxml( $OUTTREE_PHYLIP_ME, $phylip_me_outtree, 0, 1 );
1191 if ( $use_bionj == 1 ) {
1192 &to_phyloxml( $OUTTREE_BIONJ, $bionj_outtree, 0, 1 );
1194 if ( $use_weighbor == 1 ) {
1195 &to_phyloxml( $OUTTREE_WEIGHBOR, $weighbor_outtree, 0, 1 );
1197 if ( $use_raxml == 1 ) {
1198 &to_phyloxml( $OUTTREE_RAXML, $raxml_outtree, 0, 1 );
1200 if ( $use_phyml == 1 ) {
1201 &to_phyloxml( $OUTTREE_PHYML, $phyml_outtree, 0, 1 );
1203 if ( $use_proml == 1 ) {
1204 &to_phyloxml( $OUTTREE_PROML, $proml_outtree, 0, 1 );
1206 if ( $use_protpars == 1 ) {
1207 &to_phyloxml( $OUTTREE_PROTPARS, $protpars_outtree, 0, 1 );
1209 } # if ( $bootstraps > 1 )
1214 &rm( "align.reduced" );
1217 $log = $log."Finish time/date : ".`date`;
1219 if ( $bootstraps > 1 ) {
1220 $log = $log."Puzzle output file : ".$outfile."_puzzle_outfile\n";
1222 $log = $log."Columns in alignment : $number_of_aa\n";
1223 $log = $log."Number of sequences in alignment : $number_of_seqs\n";
1224 if ( $all_count > 1 ) {
1225 $log = $log."Combined consensus : $all_outtree\n";
1229 if ( $bootstraps > 1 ) {
1231 $log = $log."Simple support value statistics (trees are numbered the same as for TREE PUZZLE output)\n";
1232 $log = $log."------------------------------- \n";
1236 open( OUT, ">$logfile" ) || die "\n$0: Cannot create file <<$logfile>>: $!\n";
1240 if ( $bootstraps > 1 ) {
1241 # Simple support statistics
1242 # -------------------------
1243 my $SS_OUT = $temp_dir."/ss_out";
1246 if ( $use_fastme == 1 ) {
1247 $phylos[ $ounter++ ] = $fastme_outtree;
1249 if ( $use_phylip_nj == 1 ) {
1250 $phylos[ $ounter++ ] = $phylip_nj_outtree;
1252 if ( $use_phylip_fitch_fm == 1 ) {
1253 $phylos[ $ounter++ ] = $phylip_fm_outtree;
1255 if ( $use_phylip_fitch_me == 1 ) {
1256 $phylos[ $ounter++ ] = $phylip_me_outtree;
1258 if ( $use_bionj == 1 ) {
1259 $phylos[ $ounter++ ] = $bionj_outtree;
1261 if ( $use_weighbor == 1 ) {
1262 $phylos[ $ounter++ ] = $weighbor_outtree;
1264 if ( $use_raxml == 1 ) {
1265 $phylos[ $ounter++ ] = $raxml_outtree;
1267 if ( $use_phyml == 1 ) {
1268 $phylos[ $ounter++ ] = $phyml_outtree;
1270 if ( $use_proml == 1 ) {
1271 $phylos[ $ounter++ ] = $proml_outtree;
1273 if ( $use_protpars == 1 ) {
1274 $phylos[ $ounter++ ] = $protpars_outtree;
1276 if ( $all_count > 1) {
1277 $phylos[ $ounter++ ] = $all_outtree;
1279 &executeSupportStatistics( $SS_OUT, @phylos );
1280 &append( $SS_OUT, $logfile );
1283 # Append parts of puzzle output file
1284 # ----------------------------------
1285 if ( $all_count > 1 ) {
1286 &parsePuzzleOutfile( $outfile."_puzzle_outfile", $logfile );
1290 chdir( $current_dir )
1291 || die "\n\n$0: Could not chdir to <<$current_dir>>: $!\n\n";
1294 || print "\n\n$0: Warning: Could not remove <<$temp_dir>>: $!\n\n";
1296 print "\n\n\n$0 successfully comleted.\n\n";
1307 # 1. DNA or Amino-Acids sequence filename (PHYLIP format)
1308 # 2. Model, eg. PROTGAMMAIVT
1309 # 3. Replicates (bootstrap)
1310 # 4. Seed for bootstrap
1312 # 6. Algorithm (only for bootstrap, default otherwise)
1313 # NOTE. RaxML does its own bootstrapping.
1316 my $model = $_[ 1 ];
1317 my $replicates = $_[ 2 ];
1319 my $outfile_suffix = $_[ 4 ];
1322 &testForTextFilePresence( $msa );
1323 my $command = "$RAXML -m $model -s $msa -n $outfile_suffix";
1325 if ( $replicates > 1 ) {
1326 $command = $command . " -x $seed -N $replicates";
1328 $command = $command . " -f $algo";
1332 print( "\n$command\n");
1335 && &dieWithUnexpectedError( $command );
1343 my $internal_names_are_boots = $_[ 2 ];
1344 my $extract_taxonomy = $_[ 3 ];
1345 &dieIfFileExists( $to );
1346 &dieIfFileNotExists( $from );
1347 my $command = "$NEWICK_TO_PHYLOXML -f=nn $from $to";
1348 if ( $internal_names_are_boots == 1 ) {
1349 $command = $command . " -i";
1351 if ( $extract_taxonomy == 1 ) {
1352 $command = $command . " -xt";
1355 && die "$0: Could not execute \"$command \"";
1363 &dieIfFileExists( $to );
1364 &dieIfFileNotExists( $from );
1365 system( "mv", $from, $to )
1366 && die "\n\n$0: could not move \"$from\" to \"$to\": $!\n\n";
1372 &dieIfFileExists( $to );
1373 &dieIfFileNotExists( $from );
1375 system( "cp", $from, $to )
1376 && die "\n\n$0: could not copy \"$from\" to \"$to\": $!\n\n";
1385 my $multi_in = $_[ 0 ];
1386 my $consense_out = $_[ 1 ];
1387 &executeConsense( $multi_in );
1388 &mv( "outtree", $consense_out );
1395 # 1. file to be appended
1396 # 2. file to append to
1398 my $to_be_appended = $_[ 0 ];
1399 my $append_to = $_[ 1 ];
1400 &dieIfFileNotExists( $to_be_appended );
1401 system( "cat $to_be_appended >> $append_to" )
1402 && die "\n\n$0: could not execute \"cat $to_be_appended >> $append_to\": $!\n\n";
1406 sub dieIfFileExists {
1409 die "\n\n$0: \"$file\" already exists\n\n";
1413 sub dieIfFileNotExists {
1415 unless ( ( -s $file ) && ( -f $file ) ) {
1416 die( "\n\n$0: \"$file\" does not exist or is empty" );
1424 # 1. seed for random number generator
1425 # 2. number of bootstraps
1426 # Reads in "infile" by default.
1427 sub executeSeqboot {
1433 &testForTextFilePresence( $infile );
1439 system( "$SEQBOOT << !
1445 && die "$0: Could not execute \"$SEQBOOT\"";
1451 # One/two/three argument(s):
1452 # Reads in tree from "intree" by default. (Presence of "intree" automatically
1453 # switches into "User defined trees" mode.)
1454 # 1. matrix option: 0 = JTT; 2 = BLOSUM 62; 3 = mtREV24;
1455 # 5 = VT; 6 = WAG; 7 = auto; PAM otherwise
1456 # 2. Parameter estimates: 1 for "Exact (slow)"; "Approximate (faster)" otherwise
1457 # 3. Model of rate heterogeneity:
1458 # 1 for "8 Gamma distributed rates"
1459 # 2 for "Two rates (1 invariable + 1 variable)"
1460 # 3 for "Mixed (1 invariable + 8 Gamma rates)"
1461 # otherwise: Uniform rate
1462 # Last modified: 09/08/03 (added 2nd and 3rd parameter)
1463 sub executePuzzleToCalculateBranchLenghts {
1464 my $matrix_option = $_[ 0 ];
1465 my $parameter_estimates_option = $_[ 1 ];
1466 my $rate_heterogeneity_option = $_[ 2 ];
1472 unless ( ( -s "infile" ) && ( -f "infile" ) && ( -T "infile" ) ) {
1473 die "\n$0: executePuzzleToCalculateBranchLenghts: <<infile>> does not exist, is empty, or is not a plain textfile.\n";
1475 unless ( ( -s "intree" ) && ( -f "intree" ) && ( -T "intree" ) ) {
1476 die "\n$0: executePuzzleToCalculateBranchLenghts: <<intree>> does not exist, is empty, or is not a plain textfile.\n";
1479 $mat = setModelForPuzzle( $matrix_option );
1480 if ( $parameter_estimates_option ) {
1481 $est = &setParameterEstimatesOptionForPuzzle( $parameter_estimates_option );
1483 if ( $rate_heterogeneity_option ) {
1484 $rate = &setRateHeterogeneityOptionForPuzzle( $rate_heterogeneity_option );
1488 system( "$PUZZLE << !
1493 && die "$0: Could not execute \"$PUZZLE\" (mat=$mat est=$est rate=$rate)";
1499 # 2. file to append to
1500 sub parsePuzzleOutfile {
1501 my $puzzle_outfile = $_[ 0 ];
1502 my $file_to_append_to = $_[ 1 ];
1503 &testForTextFilePresence( $puzzle_outfile );
1504 open( OUT, ">>$file_to_append_to" ) || &dieWithUnexpectedError( "Cannot open \"$file_to_append_to\"" );
1505 open( IN, "$puzzle_outfile" ) || &dieWithUnexpectedError( "Cannot open file \"$puzzle_outfile\"" );
1508 print OUT "\nTREE PUZZLE output\n";
1509 print OUT "------------------\n";
1510 while ( $return_line = <IN> ) {
1511 if ( $return_line =~/COMPARISON OF USER TREES/ ) {
1514 elsif( $return_line =~/TIME STAMP/ ) {
1518 print OUT $return_line;
1525 # Three/four arguments:
1526 # 1. Name of file containing tree with correct branch lengths
1527 # 2. Name of file containing tree with correct bootstraps
1529 # 4. Index of tree with correct branch lengths, in case more than one in file
1530 # Last modified: 2007.11.27
1531 sub executeSupportTransfer {
1532 my $tree_with_bl = $_[ 0 ];
1533 my $tree_with_bs = $_[ 1 ];
1535 my $index = $_[ 3 ];
1537 &testForTextFilePresence( $tree_with_bl );
1538 &testForTextFilePresence( $tree_with_bs );
1539 my $command = "$SUPPORT_TRANSFER $tree_with_bl $tree_with_bs $out $index";
1541 && die "$0: Could not execute \"$command\"";
1545 # Two or more arguments:
1547 # 2. phylogeny 1 with support values
1548 # 3. phylogeny 2 with support values
1550 sub executeSupportStatistics {
1551 my $outfile = $_[ 0 ];
1552 &dieIfFileExists( $outfile );
1554 for( my $i = 1; $i < scalar(@_); ++$i ) {
1555 &testForTextFilePresence( $_[ $i ] );
1556 $phylos .= $_[ $i ]." ";
1558 my $command = "$SUPPORT_STATISTICS -o=$outfile $phylos";
1559 system( "$command" )
1560 && die "$0: Could not execute \"$command\"";
1564 sub getNumberOfSeqsAndAas {
1565 my $infile = $_[ 0 ];
1568 open( IN, "$infile" ) || die "\n$0: Cannot open file <<$infile>>: $!\n";
1570 if ( $_ =~ /^\s*(\d+)\s+(\d+)\s*$/ ) {
1577 if ( $seqs == 0 || $aa == 0 ) {
1578 die( "\n$0: Could not get number of seqs and aa from: $infile" );
1585 sub removeSupportValues {
1586 my $infile = $_[ 0 ];
1587 my $outfile = $_[ 1 ];
1588 &testForTextFilePresence( $infile );
1589 open( OUT, ">$outfile" ) || &dieWithUnexpectedError( "Cannot create file \"$outfile\"" );
1590 open( IN, "$infile" ) || &dieWithUnexpectedError( "Cannot open file \"$infile\"" );
1591 while ( my $line = <IN> ) {
1592 $line =~ s/\)\d+\.?\d*:/\):/g;
1603 # 1. name of alignment file (in correct format!)
1604 # 2. number of bootstraps
1605 # 3. jumbles: 0: do not jumble; >=1 number of jumbles
1606 # 4. seed for random number generator
1607 # 5. 1 for PAM instead of JTT
1608 # 6. 1 to use globale rearragements
1610 my $align = $_[ 0 ];
1614 my $use_pam = $_[ 4 ];
1615 my $use_global_rearr = $_[ 5 ];
1620 &testForTextFilePresence( $align );
1622 if ( $bs > 1 && $rand < 1 ) {
1641 if ( $use_pam == 1 ) {
1648 if ( $use_global_rearr == 1 ) {
1653 system( "$PROML 2>&1 << !
1654 $align$jumble$multi$pam$global
1658 && &dieWithUnexpectedError( "Could not execute \"$PROML $align$jumble$multi$pam$global\"" );
1659 # 3: Do NOT print out tree
1669 Copyright (C) 2017 Christian M Zmasek
1672 Author: Christian M Zmasek
1673 cmzmasek at yahoo dot com
1674 https://sites.google.com/site/cmzmasek/home/software/forester
1676 Requirements phylo_pl is part of the FORESTER collection of programs.
1677 ------------ Many of its global variables are set via forester.pm.
1679 Note. Use xt.pl (for Pfam alignments) or mt.pl (for other alignments)
1680 to run phylo_pl.pl on whole directories of alignments files.
1685 phylo_pl.pl [-options] <input alignment in SELEX (Pfam), PHYLIP
1686 sequential format, or Clustal W output> <outputfile>
1687 [path/name for temporary directory to be created]
1690 "% phylo_pl.pl -B100q\@1nbS9X IL5.aln IL5_tree"
1695 Bx : Number of bootstraps. B0: do not bootstrap. Default is 100 bootstrapps.
1696 The number of bootstrapps should be divisible by 10.
1697 J : Use JTT matrix (Jones et al. 1992) in TREE-PUZZLE and/or PHYML, RAXML, default: VT (Mueller-Vingron 2000).
1698 L : Use BLOSUM 62 matrix (Henikoff-Henikoff 92) in TREE-PUZZLE and/or PHYML, RAXML, default: VT.
1699 M : Use mtREV24 matrix (Adachi-Hasegawa 1996) in TREE-PUZZLE and/or PHYML, default: VT.
1700 W : Use WAG matrix (Whelan-Goldman 2000) in TREE-PUZZLE and/or PHYML, RAXML, default: VT.
1701 P : Use PAM matrix (Dayhoff et al. 1978) in TREE-PUZZLE and/or PHYML, RAXML, default: VT.
1702 D : Use DCMut matrix (Kosial and Goldman, 2005) in PHYML, RAXML, VT in TREE-PUZZLE.
1703 A : Let TREE-PUZZLE choose which matrix to use, default: VT
1704 E : Exact parameter estimates in TREE-PUZZLE, default: Approximate.
1705 Model of rate heterogeneity in TREE-PUZZLE (default: Uniform rate):
1706 g : 8 Gamma distributed rates
1707 t : Two rates (1 invariable + 1 variable)
1708 m : Mixed (1 invariable + 8 Gamma rates)
1709 q\@x: Use FastME, x: 1: GME
1712 n : Use PHYLIP Neighbor (NJ).
1713 f : Use PHYLIP Fitch.
1714 e : Use PHYLIP Minimal Evolution.
1719 o : Use PHYLIP proml.
1720 p : Use PHYLIP protpars.
1721 rx : Number of relative substitution rate categories in PHYML (default is 4).
1722 jx : Number of jumbles (input order randomization) for PHYLIP FM, ME, PROTPARS, and PROML (default is 2) (random seed set with Sx).
1723 I : Estimate proportion of invariable sites in RAXML and/or PHYML (otherwise, proportion "0.0" is used in PHYML)
1724 G : to turn on global rearrangements in PHYLIP FM, ME, and PROML
1725 Sx : Seed for random number generator(s). Must be 4n+1. Default is 9.
1726 X : To keep multiple tree file (=trees from bootstrap resampled alignments) and
1727 pairwise distance matrix file (in case of bootstrap analysis).