remove single node root
[jalview.git] / forester / java / src / org / forester / rio / RIO.java
index 38efa35..24b4c2a 100644 (file)
@@ -41,6 +41,7 @@ import java.util.TreeSet;
 
 import org.forester.datastructures.IntMatrix;
 import org.forester.io.parsers.PhylogenyParser;
+import org.forester.io.parsers.nexus.NexusPhylogeniesParser;
 import org.forester.io.parsers.nhx.NHXParser;
 import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION;
 import org.forester.io.parsers.util.ParserUtils;
@@ -66,6 +67,7 @@ public final class RIO {
     private Phylogeny[]                      _analyzed_gene_trees;
     private List<PhylogenyNode>              _removed_gene_tree_nodes;
     private int                              _ext_nodes;
+    private int                              _int_nodes;
     private TaxonomyComparisonBase           _gsdir_tax_comp_base;
     private final StringBuilder              _log;
     private final BasicDescriptiveStatistics _duplications_stats;
@@ -94,6 +96,7 @@ public final class RIO {
         _verbose = verbose;
         _rerooting = rerooting;
         _ext_nodes = -1;
+        _int_nodes = -1;
         _log = new StringBuilder();
         _gsdir_tax_comp_base = null;
         _analyzed_gene_trees = null;
@@ -120,6 +123,16 @@ public final class RIO {
         return _ext_nodes;
     }
 
+    /**
+     * Returns the numbers of number of int nodes in gene trees analyzed (after
+     * stripping).
+     * 
+     * @return number of int nodes in gene trees analyzed (after stripping)
+     */
+    public final int getIntNodesOfAnalyzedGeneTrees() {
+        return _int_nodes;
+    }
+
     public final TaxonomyComparisonBase getGSDIRtaxCompBase() {
         return _gsdir_tax_comp_base;
     }
@@ -252,6 +265,7 @@ public final class RIO {
         }
         if ( i == 0 ) {
             _ext_nodes = assigned_tree.getNumberOfExternalNodes();
+            _int_nodes = assigned_tree.getNumberOfInternalNodes();
         }
         else if ( _ext_nodes != assigned_tree.getNumberOfExternalNodes() ) {
             throw new RIOException( "after stripping gene tree #" + ( i + 1 )
@@ -331,12 +345,19 @@ public final class RIO {
         final java.text.DecimalFormat df = new java.text.DecimalFormat( "0.#" );
         log( "Gene trees analyzed                             : " + _duplications_stats.getN() );
         log( "Mean number of duplications                     : " + df.format( _duplications_stats.arithmeticMean() )
-                + " (sd: " + df.format( _duplications_stats.sampleStandardDeviation() ) + ")" );
+                + " (sd: " + df.format( _duplications_stats.sampleStandardDeviation() ) + ")" + " ("
+                + df.format( 100.0 * _duplications_stats.arithmeticMean() / getIntNodesOfAnalyzedGeneTrees() ) + "%)" );
         if ( _duplications_stats.getN() > 3 ) {
-            log( "Median number of duplications                   : " + df.format( _duplications_stats.median() ) );
-        }
-        log( "Minimum duplications                            : " + ( int ) _duplications_stats.getMin() );
-        log( "Maximum duplications                            : " + ( int ) _duplications_stats.getMax() );
+            log( "Median number of duplications                   : " + df.format( _duplications_stats.median() )
+                    + " (" + df.format( 100.0 * _duplications_stats.median() / getIntNodesOfAnalyzedGeneTrees() )
+                    + "%)" );
+        }
+        log( "Minimum duplications                            : " + ( int ) _duplications_stats.getMin() + " ("
+                + df.format( 100.0 * _duplications_stats.getMin() / getIntNodesOfAnalyzedGeneTrees() ) + "%)" );
+        log( "Maximum duplications                            : " + ( int ) _duplications_stats.getMax() + " ("
+                + df.format( 100.0 * _duplications_stats.getMax() / getIntNodesOfAnalyzedGeneTrees() ) + "%)" );
+        log( "Gene tree internal nodes                        : " + getIntNodesOfAnalyzedGeneTrees() );
+        log( "Gene tree external nodes                        : " + getExtNodesOfAnalyzedGeneTrees() );
     }
 
     private final void preLog( final Phylogeny[] gene_trees,
@@ -345,7 +366,7 @@ public final class RIO {
                                final String outgroup,
                                final int first,
                                final int last ) {
-        log( "Number of gene tree (total)                     : " + gene_trees.length );
+        log( "Number of gene trees (total)                    : " + gene_trees.length );
         log( "Algorithm                                       : " + algorithm );
         log( "Species tree external nodes (prior to stripping): " + species_tree.getNumberOfExternalNodes() );
         log( "Species tree polytomies (prior to stripping)    : "
@@ -476,15 +497,7 @@ public final class RIO {
                                              final int last,
                                              final boolean produce_log,
                                              final boolean verbose ) throws IOException, SDIException, RIOException {
-        final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
-        final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( gene_trees_file, true );
-        if ( p instanceof NHXParser ) {
-            final NHXParser nhx = ( NHXParser ) p;
-            nhx.setReplaceUnderscores( false );
-            nhx.setIgnoreQuotes( true );
-            nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.YES );
-        }
-        final Phylogeny[] gene_trees = factory.create( gene_trees_file, p );
+        final Phylogeny[] gene_trees = parseGeneTrees( gene_trees_file );
         if ( gene_trees.length < 1 ) {
             throw new RIOException( "\"" + gene_trees_file + "\" is devoid of appropriate gene trees" );
         }
@@ -503,16 +516,7 @@ public final class RIO {
                                              final String outgroup,
                                              final boolean produce_log,
                                              final boolean verbose ) throws IOException, SDIException, RIOException {
-        final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
-        final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( gene_trees_file, true );
-        if ( p instanceof NHXParser ) {
-            final NHXParser nhx = ( NHXParser ) p;
-            nhx.setReplaceUnderscores( false );
-            nhx.setIgnoreQuotes( true );
-            nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.YES );
-        }
-        final Phylogeny[] gene_trees = factory.create( gene_trees_file, p );
-        return new RIO( gene_trees,
+        return new RIO( parseGeneTrees( gene_trees_file ),
                         species_tree,
                         algorithm,
                         rerooting,
@@ -532,19 +536,15 @@ public final class RIO {
                                              final int last,
                                              final boolean produce_log,
                                              final boolean verbose ) throws IOException, SDIException, RIOException {
-        final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
-        final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( gene_trees_file, true );
-        if ( p instanceof NHXParser ) {
-            final NHXParser nhx = ( NHXParser ) p;
-            nhx.setReplaceUnderscores( false );
-            nhx.setIgnoreQuotes( true );
-            nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.YES );
-        }
-        final Phylogeny[] gene_trees = factory.create( gene_trees_file, p );
-        if ( gene_trees.length < 1 ) {
-            throw new RIOException( "\"" + gene_trees_file + "\" is devoid of appropriate gene trees" );
-        }
-        return new RIO( gene_trees, species_tree, algorithm, rerooting, outgroup, first, last, produce_log, verbose );
+        return new RIO( parseGeneTrees( gene_trees_file ),
+                        species_tree,
+                        algorithm,
+                        rerooting,
+                        outgroup,
+                        first,
+                        last,
+                        produce_log,
+                        verbose );
     }
 
     public final static RIO executeAnalysis( final Phylogeny[] gene_trees, final Phylogeny species_tree )
@@ -624,6 +624,25 @@ public final class RIO {
         }
     }
 
+    private final static Phylogeny[] parseGeneTrees( final File gene_trees_file ) throws FileNotFoundException,
+            IOException {
+        final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+        final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( gene_trees_file, true );
+        if ( p instanceof NHXParser ) {
+            final NHXParser nhx = ( NHXParser ) p;
+            nhx.setReplaceUnderscores( false );
+            nhx.setIgnoreQuotes( true );
+            nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.YES );
+        }
+        else if ( p instanceof NexusPhylogeniesParser ) {
+            final NexusPhylogeniesParser nex = ( NexusPhylogeniesParser ) p;
+            nex.setReplaceUnderscores( false );
+            nex.setIgnoreQuotes( true );
+            nex.setTaxonomyExtraction( TAXONOMY_EXTRACTION.YES );
+        }
+        return factory.create( gene_trees_file, p );
+    }
+
     private final static void removeSingleDescendentsNodes( final Phylogeny species_tree, final boolean verbose ) {
         final int o = PhylogenyMethods.countNumberOfOneDescendantNodes( species_tree );
         if ( o > 0 ) {