2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
7 // Copyright (C) 2000-2001 Washington University School of Medicine
8 // and Howard Hughes Medical Institute
11 // This library is free software; you can redistribute it and/or
12 // modify it under the terms of the GNU Lesser General Public
13 // License as published by the Free Software Foundation; either
14 // version 2.1 of the License, or (at your option) any later version.
16 // This library is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 // Lesser General Public License for more details.
21 // You should have received a copy of the GNU Lesser General Public
22 // License along with this library; if not, write to the Free Software
23 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
25 // Contact: phylosoft @ gmail . com
26 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
28 package org.forester.rio;
31 import java.io.FileNotFoundException;
32 import java.io.IOException;
33 import java.text.DecimalFormat;
34 import java.util.ArrayList;
35 import java.util.Collections;
36 import java.util.HashMap;
37 import java.util.HashSet;
38 import java.util.List;
40 import java.util.SortedSet;
41 import java.util.TreeSet;
43 import org.forester.datastructures.IntMatrix;
44 import org.forester.io.parsers.IteratingPhylogenyParser;
45 import org.forester.io.parsers.PhylogenyParser;
46 import org.forester.io.parsers.nexus.NexusPhylogeniesParser;
47 import org.forester.io.parsers.nhx.NHXParser;
48 import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION;
49 import org.forester.io.parsers.util.ParserUtils;
50 import org.forester.phylogeny.Phylogeny;
51 import org.forester.phylogeny.PhylogenyMethods;
52 import org.forester.phylogeny.PhylogenyNode;
53 import org.forester.phylogeny.data.Taxonomy;
54 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
55 import org.forester.phylogeny.factories.PhylogenyFactory;
56 import org.forester.sdi.GSDI;
57 import org.forester.sdi.GSDIR;
58 import org.forester.sdi.SDIException;
59 import org.forester.sdi.SDIR;
60 import org.forester.sdi.SDIutil;
61 import org.forester.sdi.SDIutil.ALGORITHM;
62 import org.forester.sdi.SDIutil.TaxonomyComparisonBase;
63 import org.forester.util.BasicDescriptiveStatistics;
64 import org.forester.util.ForesterUtil;
66 public final class RIO {
68 public static final int DEFAULT_RANGE = -1;
69 private static final int END_OF_GT = Integer.MAX_VALUE;
70 private static IntMatrix _m;
71 private Phylogeny[] _analyzed_gene_trees;
72 private List<PhylogenyNode> _removed_gene_tree_nodes;
73 private int _ext_nodes;
74 private int _int_nodes;
75 private TaxonomyComparisonBase _gsdir_tax_comp_base;
76 private final StringBuilder _log;
77 private final BasicDescriptiveStatistics _duplications_stats;
78 private final boolean _produce_log;
79 private final boolean _verbose;
80 private final REROOTING _rerooting;
81 private final Phylogeny _species_tree;
82 private Phylogeny _min_dub_gene_tree;
84 private RIO( final IteratingPhylogenyParser p,
85 final Phylogeny species_tree,
86 final ALGORITHM algorithm,
87 final REROOTING rerooting,
88 final String outgroup,
91 final boolean produce_log,
92 final boolean verbose ) throws IOException, SDIException, RIOException {
93 if ( ( last == DEFAULT_RANGE ) && ( first >= 0 ) ) {
96 else if ( ( first == DEFAULT_RANGE ) && ( last >= 0 ) ) {
99 removeSingleDescendentsNodes( species_tree, verbose );
101 checkPreconditions( p, species_tree, rerooting, outgroup, first, last );
102 _produce_log = produce_log;
104 _rerooting = rerooting;
107 _log = new StringBuilder();
108 _gsdir_tax_comp_base = null;
109 _analyzed_gene_trees = null;
110 _removed_gene_tree_nodes = null;
111 _duplications_stats = new BasicDescriptiveStatistics();
113 inferOrthologs( p, species_tree, algorithm, outgroup, first, last );
114 _species_tree = species_tree;
117 private RIO( final Phylogeny[] gene_trees,
118 final Phylogeny species_tree,
119 final ALGORITHM algorithm,
120 final REROOTING rerooting,
121 final String outgroup,
124 final boolean produce_log,
125 final boolean verbose ) throws IOException, SDIException, RIOException {
126 if ( ( last == DEFAULT_RANGE ) && ( first >= 0 ) ) {
127 last = gene_trees.length - 1;
129 else if ( ( first == DEFAULT_RANGE ) && ( last >= 0 ) ) {
132 removeSingleDescendentsNodes( species_tree, verbose );
133 checkPreconditions( gene_trees, species_tree, rerooting, outgroup, first, last );
134 _produce_log = produce_log;
136 _rerooting = rerooting;
139 _log = new StringBuilder();
140 _gsdir_tax_comp_base = null;
141 _analyzed_gene_trees = null;
142 _removed_gene_tree_nodes = null;
143 _duplications_stats = new BasicDescriptiveStatistics();
144 inferOrthologs( gene_trees, species_tree, algorithm, outgroup, first, last );
145 _species_tree = species_tree;
148 public final Phylogeny[] getAnalyzedGeneTrees() {
149 return _analyzed_gene_trees;
152 public final BasicDescriptiveStatistics getDuplicationsStatistics() {
153 return _duplications_stats;
157 * Returns the numbers of number of ext nodes in gene trees analyzed (after
160 * @return number of ext nodes in gene trees analyzed (after stripping)
162 public final int getExtNodesOfAnalyzedGeneTrees() {
166 public final TaxonomyComparisonBase getGSDIRtaxCompBase() {
167 return _gsdir_tax_comp_base;
171 * Returns the numbers of number of int nodes in gene trees analyzed (after
174 * @return number of int nodes in gene trees analyzed (after stripping)
176 public final int getIntNodesOfAnalyzedGeneTrees() {
180 public final StringBuilder getLog() {
184 final public Phylogeny getMinDuplicationsGeneTree() {
185 return _min_dub_gene_tree;
188 public final IntMatrix getOrthologTable() {
192 public final List<PhylogenyNode> getRemovedGeneTreeNodes() {
193 return _removed_gene_tree_nodes;
196 public final Phylogeny getSpeciesTree() {
197 return _species_tree;
200 private final void inferOrthologs( final IteratingPhylogenyParser parser,
201 final Phylogeny species_tree,
202 final ALGORITHM algorithm,
203 final String outgroup,
205 final int last ) throws SDIException, RIOException, FileNotFoundException,
207 if ( !parser.hasNext() ) {
208 throw new RIOException( "no gene trees to analyze" );
211 preLog( -1, species_tree, algorithm, outgroup );
214 System.out.println();
216 final DecimalFormat pf = new java.text.DecimalFormat( "000" );
217 int gene_tree_ext_nodes = 0;
220 final boolean no_range = ( first < 0 ) || ( last < first );
221 while ( parser.hasNext() ) {
222 final Phylogeny gt = parser.next();
223 if ( no_range || ( ( i >= first ) && ( i <= last ) ) ) {
224 if ( gt.isEmpty() ) {
225 throw new RIOException( "gene tree #" + i + " is empty" );
227 if ( gt.getNumberOfExternalNodes() == 1 ) {
228 throw new RIOException( "gene tree #" + i + " has only one external node" );
231 ForesterUtil.updateProgress( i, pf );
233 if ( counter == 0 ) {
234 if ( algorithm == ALGORITHM.SDIR ) {
235 // Removes from species_tree all species not found in gene_tree.
236 PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( gt, species_tree );
237 if ( species_tree.isEmpty() ) {
238 throw new RIOException( "failed to establish species based mapping between gene and species trees" );
241 gene_tree_ext_nodes = gt.getNumberOfExternalNodes();
243 else if ( gene_tree_ext_nodes != gt.getNumberOfExternalNodes() ) {
244 throw new RIOException( "gene tree #" + i + " has a different number of external nodes ("
245 + gt.getNumberOfExternalNodes() + ") than the preceding gene tree(s) ("
246 + gene_tree_ext_nodes + ")" );
248 if ( algorithm == ALGORITHM.SDIR ) {
249 // Removes from gene_tree all species not found in species_tree.
250 PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( species_tree, gt );
251 if ( gt.isEmpty() ) {
252 throw new RIOException( "failed to establish species based mapping between gene and species trees" );
255 final Phylogeny analyzed_gt = performOrthologInference( gt, species_tree, algorithm, outgroup, counter );
256 RIO.calculateOrthologTable( analyzed_gt, true, counter );
261 if ( ( first >= 0 ) && ( counter == 0 ) && ( i > 0 ) ) {
262 throw new RIOException( "attempt to analyze first gene tree #" + first + " in a set of " + i );
268 postLog( species_tree, first, first + counter - 1 );
271 System.out.println();
272 System.out.println();
276 private final void inferOrthologs( final Phylogeny[] gene_trees,
277 final Phylogeny species_tree,
278 final ALGORITHM algorithm,
279 final String outgroup,
281 final int last ) throws SDIException, RIOException, FileNotFoundException,
283 if ( algorithm == ALGORITHM.SDIR ) {
284 // Removes from species_tree all species not found in gene_tree.
285 PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( gene_trees[ 0 ], species_tree );
286 if ( species_tree.isEmpty() ) {
287 throw new RIOException( "failed to establish species based mapping between gene and species trees" );
290 final Phylogeny[] my_gene_trees;
291 if ( ( first >= 0 ) && ( last >= first ) && ( last < gene_trees.length ) ) {
292 my_gene_trees = new Phylogeny[ ( 1 + last ) - first ];
294 for( int i = first; i <= last; ++i ) {
295 my_gene_trees[ c++ ] = gene_trees[ i ];
299 my_gene_trees = gene_trees;
302 preLog( gene_trees.length, species_tree, algorithm, outgroup );
304 if ( _verbose && ( my_gene_trees.length >= 4 ) ) {
305 System.out.println();
307 _analyzed_gene_trees = new Phylogeny[ my_gene_trees.length ];
308 int gene_tree_ext_nodes = 0;
309 for( int i = 0; i < my_gene_trees.length; ++i ) {
310 final Phylogeny gt = my_gene_trees[ i ];
311 if ( gt.isEmpty() ) {
312 throw new RIOException( "gene tree #" + i + " is empty" );
314 if ( gt.getNumberOfExternalNodes() == 1 ) {
315 throw new RIOException( "gene tree #" + i + " has only one external node" );
317 if ( _verbose && ( my_gene_trees.length > 4 ) ) {
318 ForesterUtil.updateProgress( ( ( double ) i ) / my_gene_trees.length );
321 gene_tree_ext_nodes = gt.getNumberOfExternalNodes();
323 else if ( gene_tree_ext_nodes != gt.getNumberOfExternalNodes() ) {
324 throw new RIOException( "gene tree #" + i + " has a different number of external nodes ("
325 + gt.getNumberOfExternalNodes() + ") than the preceding gene tree(s) (" + gene_tree_ext_nodes
328 if ( algorithm == ALGORITHM.SDIR ) {
329 // Removes from gene_tree all species not found in species_tree.
330 PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( species_tree, gt );
331 if ( gt.isEmpty() ) {
332 throw new RIOException( "failed to establish species based mapping between gene and species trees" );
335 _analyzed_gene_trees[ i ] = performOrthologInference( gt, species_tree, algorithm, outgroup, i );
338 postLog( species_tree, first, last );
340 if ( _verbose && ( my_gene_trees.length > 4 ) ) {
341 System.out.println();
342 System.out.println();
346 private final boolean log() {
350 private final void log( final String s ) {
352 _log.append( ForesterUtil.LINE_SEPARATOR );
355 private final void logRemovedGeneTreeNodes() {
356 log( "Species stripped from gene trees:" );
357 final SortedSet<String> rn = new TreeSet<String>();
358 for( final PhylogenyNode n : getRemovedGeneTreeNodes() ) {
359 final Taxonomy t = n.getNodeData().getTaxonomy();
360 switch ( getGSDIRtaxCompBase() ) {
362 rn.add( t.getTaxonomyCode() );
366 rn.add( t.getIdentifier().toString() );
369 case SCIENTIFIC_NAME: {
370 rn.add( t.getScientificName() );
375 for( final String s : rn ) {
381 private final Phylogeny performOrthologInference( final Phylogeny gene_tree,
382 final Phylogeny species_tree,
383 final ALGORITHM algorithm,
384 final String outgroup,
385 final int i ) throws SDIException, RIOException {
386 final Phylogeny assigned_tree;
387 switch ( algorithm ) {
389 assigned_tree = performOrthologInferenceBySDI( gene_tree, species_tree );
393 assigned_tree = performOrthologInferenceByGSDI( gene_tree, species_tree, outgroup, i );
397 throw new IllegalArgumentException( "illegal algorithm: " + algorithm );
401 _ext_nodes = assigned_tree.getNumberOfExternalNodes();
402 _int_nodes = assigned_tree.getNumberOfInternalNodes();
404 else if ( _ext_nodes != assigned_tree.getNumberOfExternalNodes() ) {
405 throw new RIOException( "after stripping gene tree #" + i + " has a different number of external nodes ("
406 + assigned_tree.getNumberOfExternalNodes() + ") than the preceding gene tree(s) (" + _ext_nodes
409 return assigned_tree;
412 private final Phylogeny performOrthologInferenceByGSDI( final Phylogeny gene_tree,
413 final Phylogeny species_tree,
414 final String outgroup,
415 final int i ) throws SDIException, RIOException {
416 final Phylogeny assigned_tree;
418 if ( _rerooting == REROOTING.BY_ALGORITHM ) {
419 final GSDIR gsdir = new GSDIR( gene_tree, species_tree, true, i == 0 );
420 assigned_tree = gsdir.getMinDuplicationsSumGeneTree();
422 _removed_gene_tree_nodes = gsdir.getStrippedExternalGeneTreeNodes();
423 for( final PhylogenyNode r : _removed_gene_tree_nodes ) {
424 if ( !r.getNodeData().isHasTaxonomy() ) {
425 throw new RIOException( "node with no (appropriate) taxonomic information found in gene tree #"
426 + i + ": " + r.toString() );
431 _gsdir_tax_comp_base = gsdir.getTaxCompBase();
433 dups = gsdir.getMinDuplicationsSum();
436 if ( _rerooting == REROOTING.MIDPOINT ) {
437 PhylogenyMethods.midpointRoot( gene_tree );
439 else if ( _rerooting == REROOTING.OUTGROUP ) {
440 final PhylogenyNode n = gene_tree.getNode( outgroup );
441 gene_tree.reRoot( n );
443 final GSDI gsdi = new GSDI( gene_tree, species_tree, true, true, true );
444 _removed_gene_tree_nodes = gsdi.getStrippedExternalGeneTreeNodes();
445 for( final PhylogenyNode r : _removed_gene_tree_nodes ) {
446 if ( !r.getNodeData().isHasTaxonomy() ) {
447 throw new RIOException( "node with no (appropriate) taxonomic information found in gene tree #" + i
448 + ": " + r.toString() );
451 assigned_tree = gene_tree;
453 _gsdir_tax_comp_base = gsdi.getTaxCompBase();
455 dups = gsdi.getDuplicationsSum();
457 if ( ( i == 0 ) || ( dups < _duplications_stats.getMin() ) ) {
458 _min_dub_gene_tree = assigned_tree;
460 _duplications_stats.addValue( dups );
461 return assigned_tree;
464 private final Phylogeny performOrthologInferenceBySDI( final Phylogeny gene_tree, final Phylogeny species_tree )
465 throws SDIException {
466 final SDIR sdir = new SDIR();
467 return sdir.infer( gene_tree, species_tree, false, true, true, true, 1 )[ 0 ];
470 private final void postLog( final Phylogeny species_tree, final int first, final int last ) {
472 if ( ( getRemovedGeneTreeNodes() != null ) && ( getRemovedGeneTreeNodes().size() > 0 ) ) {
473 logRemovedGeneTreeNodes();
475 log( "Species tree external nodes (after stripping) : " + species_tree.getNumberOfExternalNodes() );
476 log( "Species tree polytomies (after stripping) : "
477 + PhylogenyMethods.countNumberOfPolytomies( species_tree ) );
478 log( "Taxonomy linking based on : " + getGSDIRtaxCompBase() );
479 final java.text.DecimalFormat df = new java.text.DecimalFormat( "0.#" );
480 if ( ( first >= 0 ) && ( last >= 0 ) ) {
481 log( "Gene trees analyzed range : " + first + "-" + last );
483 log( "Gene trees analyzed : " + _duplications_stats.getN() );
484 log( "Mean number of duplications : " + df.format( _duplications_stats.arithmeticMean() )
485 + " (sd: " + df.format( _duplications_stats.sampleStandardDeviation() ) + ")" + " ("
486 + df.format( ( 100.0 * _duplications_stats.arithmeticMean() ) / getIntNodesOfAnalyzedGeneTrees() )
488 if ( _duplications_stats.getN() > 3 ) {
489 log( "Median number of duplications : " + df.format( _duplications_stats.median() )
490 + " (" + df.format( ( 100.0 * _duplications_stats.median() ) / getIntNodesOfAnalyzedGeneTrees() )
493 log( "Minimum duplications : " + ( int ) _duplications_stats.getMin() + " ("
494 + df.format( ( 100.0 * _duplications_stats.getMin() ) / getIntNodesOfAnalyzedGeneTrees() ) + "%)" );
495 log( "Maximum duplications : " + ( int ) _duplications_stats.getMax() + " ("
496 + df.format( ( 100.0 * _duplications_stats.getMax() ) / getIntNodesOfAnalyzedGeneTrees() ) + "%)" );
497 log( "Gene tree internal nodes : " + getIntNodesOfAnalyzedGeneTrees() );
498 log( "Gene tree external nodes : " + getExtNodesOfAnalyzedGeneTrees() );
501 private final void preLog( final int gene_trees,
502 final Phylogeny species_tree,
503 final ALGORITHM algorithm,
504 final String outgroup ) {
505 if ( gene_trees > 0 ) {
506 log( "Number of gene trees (total) : " + gene_trees );
508 log( "Algorithm : " + algorithm );
509 log( "Species tree external nodes (prior to stripping): " + species_tree.getNumberOfExternalNodes() );
510 log( "Species tree polytomies (prior to stripping) : "
511 + PhylogenyMethods.countNumberOfPolytomies( species_tree ) );
513 switch ( _rerooting ) {
515 rs = "minimizing duplications";
523 rs = "outgroup: " + outgroup;
531 log( "Re-rooting : " + rs );
534 public final static IntMatrix calculateOrthologTable( final Phylogeny[] analyzed_gene_trees, final boolean sort )
535 throws RIOException {
536 final List<String> labels = new ArrayList<String>();
537 final Set<String> labels_set = new HashSet<String>();
538 for( final PhylogenyNode n : analyzed_gene_trees[ 0 ].getExternalNodes() ) {
539 final String label = obtainLabel( labels_set, n );
540 labels_set.add( label );
544 Collections.sort( labels );
546 final IntMatrix m = new IntMatrix( labels );
548 for( final Phylogeny gt : analyzed_gene_trees ) {
550 updateCounts( m, counter, gt );
555 public final static RIO executeAnalysis( final File gene_trees_file,
556 final File species_tree_file,
557 final ALGORITHM algorithm,
558 final REROOTING rerooting,
559 final String outgroup,
562 final boolean produce_log,
563 final boolean verbose ) throws IOException, SDIException, RIOException {
564 final Phylogeny[] gene_trees = parseGeneTrees( gene_trees_file );
565 if ( gene_trees.length < 1 ) {
566 throw new RIOException( "\"" + gene_trees_file + "\" is devoid of appropriate gene trees" );
568 final Phylogeny species_tree = SDIutil.parseSpeciesTree( gene_trees[ 0 ],
572 TAXONOMY_EXTRACTION.NO );
573 return new RIO( gene_trees, species_tree, algorithm, rerooting, outgroup, first, last, produce_log, verbose );
576 public final static RIO executeAnalysis( final File gene_trees_file,
577 final Phylogeny species_tree,
578 final ALGORITHM algorithm,
579 final REROOTING rerooting,
580 final String outgroup,
581 final boolean produce_log,
582 final boolean verbose ) throws IOException, SDIException, RIOException {
583 return new RIO( parseGeneTrees( gene_trees_file ),
594 public final static RIO executeAnalysis( final File gene_trees_file,
595 final Phylogeny species_tree,
596 final ALGORITHM algorithm,
597 final REROOTING rerooting,
598 final String outgroup,
601 final boolean produce_log,
602 final boolean verbose ) throws IOException, SDIException, RIOException {
603 return new RIO( parseGeneTrees( gene_trees_file ),
614 public final static RIO executeAnalysis( final IteratingPhylogenyParser p,
615 final File species_tree_file,
616 final ALGORITHM algorithm,
617 final REROOTING rerooting,
618 final String outgroup,
621 final boolean produce_log,
622 final boolean verbose ) throws IOException, SDIException, RIOException {
623 final Phylogeny g0 = p.next();
624 if ( ( g0 == null ) || g0.isEmpty() || ( g0.getNumberOfExternalNodes() < 2 ) ) {
625 throw new RIOException( "input file does not seem to contain any gene trees" );
627 final Phylogeny species_tree = SDIutil.parseSpeciesTree( g0,
631 TAXONOMY_EXTRACTION.NO );
633 return new RIO( p, species_tree, algorithm, rerooting, outgroup, first, last, produce_log, verbose );
636 public final static RIO executeAnalysis( final IteratingPhylogenyParser p,
637 final Phylogeny species_tree,
638 final ALGORITHM algorithm,
639 final REROOTING rerooting,
640 final String outgroup,
641 final boolean produce_log,
642 final boolean verbose ) throws IOException, SDIException, RIOException {
654 public final static RIO executeAnalysis( final IteratingPhylogenyParser p,
655 final Phylogeny species_tree,
656 final ALGORITHM algorithm,
657 final REROOTING rerooting,
658 final String outgroup,
661 final boolean produce_log,
662 final boolean verbose ) throws IOException, SDIException, RIOException {
663 return new RIO( p, species_tree, algorithm, rerooting, outgroup, first, last, produce_log, verbose );
666 public final static RIO executeAnalysis( final Phylogeny[] gene_trees, final Phylogeny species_tree )
667 throws IOException, SDIException, RIOException {
668 return new RIO( gene_trees,
671 REROOTING.BY_ALGORITHM,
679 public final static RIO executeAnalysis( final Phylogeny[] gene_trees,
680 final Phylogeny species_tree,
681 final ALGORITHM algorithm,
682 final REROOTING rerooting,
683 final String outgroup,
684 final boolean produce_log,
685 final boolean verbose ) throws IOException, SDIException, RIOException {
686 return new RIO( gene_trees,
697 public final static RIO executeAnalysis( final Phylogeny[] gene_trees,
698 final Phylogeny species_tree,
699 final ALGORITHM algorithm,
700 final REROOTING rerooting,
701 final String outgroup,
704 final boolean produce_log,
705 final boolean verbose ) throws IOException, SDIException, RIOException {
706 return new RIO( gene_trees, species_tree, algorithm, rerooting, outgroup, first, last, produce_log, verbose );
709 private final static void calculateOrthologTable( final Phylogeny g, final boolean sort, final int counter )
710 throws RIOException {
711 if ( counter == 0 ) {
712 final List<String> labels = new ArrayList<String>();
713 final Set<String> labels_set = new HashSet<String>();
714 for( final PhylogenyNode n : g.getExternalNodes() ) {
715 final String label = obtainLabel( labels_set, n );
716 labels_set.add( label );
720 Collections.sort( labels );
722 _m = new IntMatrix( labels );
724 updateCounts( _m, counter, g );
727 private final static void checkPreconditions( final IteratingPhylogenyParser p,
728 final Phylogeny species_tree,
729 final REROOTING rerooting,
730 final String outgroup,
732 final int last ) throws RIOException, IOException {
733 final Phylogeny g0 = p.next();
734 if ( ( g0 == null ) || g0.isEmpty() ) {
735 throw new RIOException( "input file does not seem to contain any gene trees" );
737 if ( g0.getNumberOfExternalNodes() < 2 ) {
738 throw new RIOException( "input file does not seem to contain any useable gene trees" );
740 if ( !species_tree.isRooted() ) {
741 throw new RIOException( "species tree is not rooted" );
743 if ( !( ( last == DEFAULT_RANGE ) && ( first == DEFAULT_RANGE ) )
744 && ( ( last < first ) || ( last < 0 ) || ( first < 0 ) ) ) {
745 throw new RIOException( "attempt to set range (0-based) of gene to analyze to: from " + first + " to "
748 if ( ( rerooting == REROOTING.OUTGROUP ) && ForesterUtil.isEmpty( outgroup ) ) {
749 throw new RIOException( "outgroup not set for midpoint rooting" );
751 if ( ( rerooting != REROOTING.OUTGROUP ) && !ForesterUtil.isEmpty( outgroup ) ) {
752 throw new RIOException( "outgroup only used for midpoint rooting" );
754 if ( ( rerooting == REROOTING.MIDPOINT ) && ( PhylogenyMethods.calculateMaxDistanceToRoot( g0 ) <= 0 ) ) {
755 throw new RIOException( "attempt to use midpoint rooting on gene trees which seem to have no (positive) branch lengths (cladograms)" );
757 if ( rerooting == REROOTING.OUTGROUP ) {
759 g0.getNode( outgroup );
761 catch ( final IllegalArgumentException e ) {
762 throw new RIOException( "cannot perform re-rooting by outgroup: " + e.getLocalizedMessage() );
767 private final static void checkPreconditions( final Phylogeny[] gene_trees,
768 final Phylogeny species_tree,
769 final REROOTING rerooting,
770 final String outgroup,
772 final int last ) throws RIOException {
773 if ( !species_tree.isRooted() ) {
774 throw new RIOException( "species tree is not rooted" );
776 if ( !( ( last == DEFAULT_RANGE ) && ( first == DEFAULT_RANGE ) )
777 && ( ( last < first ) || ( last >= gene_trees.length ) || ( last < 0 ) || ( first < 0 ) ) ) {
778 throw new RIOException( "attempt to set range (0-based) of gene to analyze to: from " + first + " to "
779 + last + " (out of " + gene_trees.length + ")" );
781 if ( ( rerooting == REROOTING.OUTGROUP ) && ForesterUtil.isEmpty( outgroup ) ) {
782 throw new RIOException( "outgroup not set for midpoint rooting" );
784 if ( ( rerooting != REROOTING.OUTGROUP ) && !ForesterUtil.isEmpty( outgroup ) ) {
785 throw new RIOException( "outgroup only used for midpoint rooting" );
787 if ( ( rerooting == REROOTING.MIDPOINT )
788 && ( PhylogenyMethods.calculateMaxDistanceToRoot( gene_trees[ 0 ] ) <= 0 ) ) {
789 throw new RIOException( "attempt to use midpoint rooting on gene trees which seem to have no (positive) branch lengths (cladograms)" );
791 if ( rerooting == REROOTING.OUTGROUP ) {
793 gene_trees[ 0 ].getNode( outgroup );
795 catch ( final IllegalArgumentException e ) {
796 throw new RIOException( "cannot perform re-rooting by outgroup: " + e.getLocalizedMessage() );
801 private final static String obtainLabel( final Set<String> labels_set, final PhylogenyNode n ) throws RIOException {
803 if ( n.getNodeData().isHasSequence() && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getName() ) ) {
804 label = n.getNodeData().getSequence().getName();
806 else if ( n.getNodeData().isHasSequence() && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getSymbol() ) ) {
807 label = n.getNodeData().getSequence().getSymbol();
809 else if ( !ForesterUtil.isEmpty( n.getName() ) ) {
813 throw new RIOException( "node " + n + " has no appropriate label" );
815 if ( labels_set.contains( label ) ) {
816 throw new RIOException( "label " + label + " is not unique" );
821 private final static Phylogeny[] parseGeneTrees( final File gene_trees_file ) throws FileNotFoundException,
823 final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
824 final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( gene_trees_file, true );
825 if ( p instanceof NHXParser ) {
826 final NHXParser nhx = ( NHXParser ) p;
827 nhx.setReplaceUnderscores( false );
828 nhx.setIgnoreQuotes( true );
829 nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.YES );
831 else if ( p instanceof NexusPhylogeniesParser ) {
832 final NexusPhylogeniesParser nex = ( NexusPhylogeniesParser ) p;
833 nex.setReplaceUnderscores( false );
834 nex.setIgnoreQuotes( true );
835 nex.setTaxonomyExtraction( TAXONOMY_EXTRACTION.YES );
837 return factory.create( gene_trees_file, p );
840 private final static void removeSingleDescendentsNodes( final Phylogeny species_tree, final boolean verbose ) {
841 final int o = PhylogenyMethods.countNumberOfOneDescendantNodes( species_tree );
844 System.out.println( "warning: species tree has " + o
845 + " internal nodes with only one descendent which are therefore going to be removed" );
847 PhylogenyMethods.deleteInternalNodesWithOnlyOneDescendent( species_tree );
851 private final static void updateCounts( final IntMatrix m, final int counter, final Phylogeny g )
852 throws RIOException {
853 PhylogenyMethods.preOrderReId( g );
854 final HashMap<String, PhylogenyNode> map = PhylogenyMethods.createNameToExtNodeMap( g );
855 for( int x = 0; x < m.size(); ++x ) {
856 final String mx = m.getLabel( x );
857 final PhylogenyNode nx = map.get( mx );
859 throw new RIOException( "node \"" + mx + "\" not present in gene tree #" + counter );
863 for( int y = 0; y < m.size(); ++y ) {
864 my = m.getLabel( y );
867 throw new RIOException( "node \"" + my + "\" not present in gene tree #" + counter );
869 if ( !PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( nx, ny ).isDuplication() ) {
870 m.inreaseByOne( x, y );
876 public enum REROOTING {
877 NONE, BY_ALGORITHM, MIDPOINT, OUTGROUP;