rio - gsdir work...
[jalview.git] / forester / java / src / org / forester / rio / RIO.java
index 38efa35..d844d2b 100644 (file)
@@ -41,6 +41,7 @@ import java.util.TreeSet;
 
 import org.forester.datastructures.IntMatrix;
 import org.forester.io.parsers.PhylogenyParser;
+import org.forester.io.parsers.nexus.NexusPhylogeniesParser;
 import org.forester.io.parsers.nhx.NHXParser;
 import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION;
 import org.forester.io.parsers.util.ParserUtils;
@@ -66,6 +67,7 @@ public final class RIO {
     private Phylogeny[]                      _analyzed_gene_trees;
     private List<PhylogenyNode>              _removed_gene_tree_nodes;
     private int                              _ext_nodes;
+    private int                              _int_nodes;
     private TaxonomyComparisonBase           _gsdir_tax_comp_base;
     private final StringBuilder              _log;
     private final BasicDescriptiveStatistics _duplications_stats;
@@ -94,6 +96,7 @@ public final class RIO {
         _verbose = verbose;
         _rerooting = rerooting;
         _ext_nodes = -1;
+        _int_nodes = -1;
         _log = new StringBuilder();
         _gsdir_tax_comp_base = null;
         _analyzed_gene_trees = null;
@@ -120,6 +123,16 @@ public final class RIO {
         return _ext_nodes;
     }
 
+    /**
+     * Returns the numbers of number of int nodes in gene trees analyzed (after
+     * stripping).
+     * 
+     * @return number of int nodes in gene trees analyzed (after stripping)
+     */
+    public final int getIntNodesOfAnalyzedGeneTrees() {
+        return _int_nodes;
+    }
+
     public final TaxonomyComparisonBase getGSDIRtaxCompBase() {
         return _gsdir_tax_comp_base;
     }
@@ -148,7 +161,7 @@ public final class RIO {
         }
         final Phylogeny[] my_gene_trees;
         if ( ( first >= 0 ) && ( last >= first ) && ( last < gene_trees.length ) ) {
-            my_gene_trees = new Phylogeny[ 1 + last - first ];
+            my_gene_trees = new Phylogeny[ ( 1 + last ) - first ];
             int c = 0;
             for( int i = first; i <= last; ++i ) {
                 my_gene_trees[ c++ ] = gene_trees[ i ];
@@ -252,6 +265,7 @@ public final class RIO {
         }
         if ( i == 0 ) {
             _ext_nodes = assigned_tree.getNumberOfExternalNodes();
+            _int_nodes = assigned_tree.getNumberOfInternalNodes();
         }
         else if ( _ext_nodes != assigned_tree.getNumberOfExternalNodes() ) {
             throw new RIOException( "after stripping gene tree #" + ( i + 1 )
@@ -331,12 +345,20 @@ public final class RIO {
         final java.text.DecimalFormat df = new java.text.DecimalFormat( "0.#" );
         log( "Gene trees analyzed                             : " + _duplications_stats.getN() );
         log( "Mean number of duplications                     : " + df.format( _duplications_stats.arithmeticMean() )
-                + " (sd: " + df.format( _duplications_stats.sampleStandardDeviation() ) + ")" );
+                + " (sd: " + df.format( _duplications_stats.sampleStandardDeviation() ) + ")" + " ("
+                + df.format( ( 100.0 * _duplications_stats.arithmeticMean() ) / getIntNodesOfAnalyzedGeneTrees() )
+                + "%)" );
         if ( _duplications_stats.getN() > 3 ) {
-            log( "Median number of duplications                   : " + df.format( _duplications_stats.median() ) );
-        }
-        log( "Minimum duplications                            : " + ( int ) _duplications_stats.getMin() );
-        log( "Maximum duplications                            : " + ( int ) _duplications_stats.getMax() );
+            log( "Median number of duplications                   : " + df.format( _duplications_stats.median() )
+                    + " (" + df.format( ( 100.0 * _duplications_stats.median() ) / getIntNodesOfAnalyzedGeneTrees() )
+                    + "%)" );
+        }
+        log( "Minimum duplications                            : " + ( int ) _duplications_stats.getMin() + " ("
+                + df.format( ( 100.0 * _duplications_stats.getMin() ) / getIntNodesOfAnalyzedGeneTrees() ) + "%)" );
+        log( "Maximum duplications                            : " + ( int ) _duplications_stats.getMax() + " ("
+                + df.format( ( 100.0 * _duplications_stats.getMax() ) / getIntNodesOfAnalyzedGeneTrees() ) + "%)" );
+        log( "Gene tree internal nodes                        : " + getIntNodesOfAnalyzedGeneTrees() );
+        log( "Gene tree external nodes                        : " + getExtNodesOfAnalyzedGeneTrees() );
     }
 
     private final void preLog( final Phylogeny[] gene_trees,
@@ -345,7 +367,7 @@ public final class RIO {
                                final String outgroup,
                                final int first,
                                final int last ) {
-        log( "Number of gene tree (total)                     : " + gene_trees.length );
+        log( "Number of gene trees (total)                    : " + gene_trees.length );
         log( "Algorithm                                       : " + algorithm );
         log( "Species tree external nodes (prior to stripping): " + species_tree.getNumberOfExternalNodes() );
         log( "Species tree polytomies (prior to stripping)    : "
@@ -476,15 +498,7 @@ public final class RIO {
                                              final int last,
                                              final boolean produce_log,
                                              final boolean verbose ) throws IOException, SDIException, RIOException {
-        final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
-        final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( gene_trees_file, true );
-        if ( p instanceof NHXParser ) {
-            final NHXParser nhx = ( NHXParser ) p;
-            nhx.setReplaceUnderscores( false );
-            nhx.setIgnoreQuotes( true );
-            nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.YES );
-        }
-        final Phylogeny[] gene_trees = factory.create( gene_trees_file, p );
+        final Phylogeny[] gene_trees = parseGeneTrees( gene_trees_file );
         if ( gene_trees.length < 1 ) {
             throw new RIOException( "\"" + gene_trees_file + "\" is devoid of appropriate gene trees" );
         }
@@ -503,16 +517,7 @@ public final class RIO {
                                              final String outgroup,
                                              final boolean produce_log,
                                              final boolean verbose ) throws IOException, SDIException, RIOException {
-        final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
-        final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( gene_trees_file, true );
-        if ( p instanceof NHXParser ) {
-            final NHXParser nhx = ( NHXParser ) p;
-            nhx.setReplaceUnderscores( false );
-            nhx.setIgnoreQuotes( true );
-            nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.YES );
-        }
-        final Phylogeny[] gene_trees = factory.create( gene_trees_file, p );
-        return new RIO( gene_trees,
+        return new RIO( parseGeneTrees( gene_trees_file ),
                         species_tree,
                         algorithm,
                         rerooting,
@@ -532,19 +537,15 @@ public final class RIO {
                                              final int last,
                                              final boolean produce_log,
                                              final boolean verbose ) throws IOException, SDIException, RIOException {
-        final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
-        final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( gene_trees_file, true );
-        if ( p instanceof NHXParser ) {
-            final NHXParser nhx = ( NHXParser ) p;
-            nhx.setReplaceUnderscores( false );
-            nhx.setIgnoreQuotes( true );
-            nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.YES );
-        }
-        final Phylogeny[] gene_trees = factory.create( gene_trees_file, p );
-        if ( gene_trees.length < 1 ) {
-            throw new RIOException( "\"" + gene_trees_file + "\" is devoid of appropriate gene trees" );
-        }
-        return new RIO( gene_trees, species_tree, algorithm, rerooting, outgroup, first, last, produce_log, verbose );
+        return new RIO( parseGeneTrees( gene_trees_file ),
+                        species_tree,
+                        algorithm,
+                        rerooting,
+                        outgroup,
+                        first,
+                        last,
+                        produce_log,
+                        verbose );
     }
 
     public final static RIO executeAnalysis( final Phylogeny[] gene_trees, final Phylogeny species_tree )
@@ -624,6 +625,25 @@ public final class RIO {
         }
     }
 
+    private final static Phylogeny[] parseGeneTrees( final File gene_trees_file ) throws FileNotFoundException,
+            IOException {
+        final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+        final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( gene_trees_file, true );
+        if ( p instanceof NHXParser ) {
+            final NHXParser nhx = ( NHXParser ) p;
+            nhx.setReplaceUnderscores( false );
+            nhx.setIgnoreQuotes( true );
+            nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.YES );
+        }
+        else if ( p instanceof NexusPhylogeniesParser ) {
+            final NexusPhylogeniesParser nex = ( NexusPhylogeniesParser ) p;
+            nex.setReplaceUnderscores( false );
+            nex.setIgnoreQuotes( true );
+            nex.setTaxonomyExtraction( TAXONOMY_EXTRACTION.YES );
+        }
+        return factory.create( gene_trees_file, p );
+    }
+
     private final static void removeSingleDescendentsNodes( final Phylogeny species_tree, final boolean verbose ) {
         final int o = PhylogenyMethods.countNumberOfOneDescendantNodes( species_tree );
         if ( o > 0 ) {