X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Frio%2FRIO.java;h=d844d2b709aaac20c6e65f790add9b407d6c16e5;hb=5309828c338cdf84ef10f70dc2472cf27016a75b;hp=38efa35c45705301af4fb546ec39df50e0dd613c;hpb=99c1b5211c817dbe35a646589d9c4dbf508b8d50;p=jalview.git diff --git a/forester/java/src/org/forester/rio/RIO.java b/forester/java/src/org/forester/rio/RIO.java index 38efa35..d844d2b 100644 --- a/forester/java/src/org/forester/rio/RIO.java +++ b/forester/java/src/org/forester/rio/RIO.java @@ -41,6 +41,7 @@ import java.util.TreeSet; import org.forester.datastructures.IntMatrix; import org.forester.io.parsers.PhylogenyParser; +import org.forester.io.parsers.nexus.NexusPhylogeniesParser; import org.forester.io.parsers.nhx.NHXParser; import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION; import org.forester.io.parsers.util.ParserUtils; @@ -66,6 +67,7 @@ public final class RIO { private Phylogeny[] _analyzed_gene_trees; private List _removed_gene_tree_nodes; private int _ext_nodes; + private int _int_nodes; private TaxonomyComparisonBase _gsdir_tax_comp_base; private final StringBuilder _log; private final BasicDescriptiveStatistics _duplications_stats; @@ -94,6 +96,7 @@ public final class RIO { _verbose = verbose; _rerooting = rerooting; _ext_nodes = -1; + _int_nodes = -1; _log = new StringBuilder(); _gsdir_tax_comp_base = null; _analyzed_gene_trees = null; @@ -120,6 +123,16 @@ public final class RIO { return _ext_nodes; } + /** + * Returns the numbers of number of int nodes in gene trees analyzed (after + * stripping). + * + * @return number of int nodes in gene trees analyzed (after stripping) + */ + public final int getIntNodesOfAnalyzedGeneTrees() { + return _int_nodes; + } + public final TaxonomyComparisonBase getGSDIRtaxCompBase() { return _gsdir_tax_comp_base; } @@ -148,7 +161,7 @@ public final class RIO { } final Phylogeny[] my_gene_trees; if ( ( first >= 0 ) && ( last >= first ) && ( last < gene_trees.length ) ) { - my_gene_trees = new Phylogeny[ 1 + last - first ]; + my_gene_trees = new Phylogeny[ ( 1 + last ) - first ]; int c = 0; for( int i = first; i <= last; ++i ) { my_gene_trees[ c++ ] = gene_trees[ i ]; @@ -252,6 +265,7 @@ public final class RIO { } if ( i == 0 ) { _ext_nodes = assigned_tree.getNumberOfExternalNodes(); + _int_nodes = assigned_tree.getNumberOfInternalNodes(); } else if ( _ext_nodes != assigned_tree.getNumberOfExternalNodes() ) { throw new RIOException( "after stripping gene tree #" + ( i + 1 ) @@ -331,12 +345,20 @@ public final class RIO { final java.text.DecimalFormat df = new java.text.DecimalFormat( "0.#" ); log( "Gene trees analyzed : " + _duplications_stats.getN() ); log( "Mean number of duplications : " + df.format( _duplications_stats.arithmeticMean() ) - + " (sd: " + df.format( _duplications_stats.sampleStandardDeviation() ) + ")" ); + + " (sd: " + df.format( _duplications_stats.sampleStandardDeviation() ) + ")" + " (" + + df.format( ( 100.0 * _duplications_stats.arithmeticMean() ) / getIntNodesOfAnalyzedGeneTrees() ) + + "%)" ); if ( _duplications_stats.getN() > 3 ) { - log( "Median number of duplications : " + df.format( _duplications_stats.median() ) ); - } - log( "Minimum duplications : " + ( int ) _duplications_stats.getMin() ); - log( "Maximum duplications : " + ( int ) _duplications_stats.getMax() ); + log( "Median number of duplications : " + df.format( _duplications_stats.median() ) + + " (" + df.format( ( 100.0 * _duplications_stats.median() ) / getIntNodesOfAnalyzedGeneTrees() ) + + "%)" ); + } + log( "Minimum duplications : " + ( int ) _duplications_stats.getMin() + " (" + + df.format( ( 100.0 * _duplications_stats.getMin() ) / getIntNodesOfAnalyzedGeneTrees() ) + "%)" ); + log( "Maximum duplications : " + ( int ) _duplications_stats.getMax() + " (" + + df.format( ( 100.0 * _duplications_stats.getMax() ) / getIntNodesOfAnalyzedGeneTrees() ) + "%)" ); + log( "Gene tree internal nodes : " + getIntNodesOfAnalyzedGeneTrees() ); + log( "Gene tree external nodes : " + getExtNodesOfAnalyzedGeneTrees() ); } private final void preLog( final Phylogeny[] gene_trees, @@ -345,7 +367,7 @@ public final class RIO { final String outgroup, final int first, final int last ) { - log( "Number of gene tree (total) : " + gene_trees.length ); + log( "Number of gene trees (total) : " + gene_trees.length ); log( "Algorithm : " + algorithm ); log( "Species tree external nodes (prior to stripping): " + species_tree.getNumberOfExternalNodes() ); log( "Species tree polytomies (prior to stripping) : " @@ -476,15 +498,7 @@ public final class RIO { final int last, final boolean produce_log, final boolean verbose ) throws IOException, SDIException, RIOException { - final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( gene_trees_file, true ); - if ( p instanceof NHXParser ) { - final NHXParser nhx = ( NHXParser ) p; - nhx.setReplaceUnderscores( false ); - nhx.setIgnoreQuotes( true ); - nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.YES ); - } - final Phylogeny[] gene_trees = factory.create( gene_trees_file, p ); + final Phylogeny[] gene_trees = parseGeneTrees( gene_trees_file ); if ( gene_trees.length < 1 ) { throw new RIOException( "\"" + gene_trees_file + "\" is devoid of appropriate gene trees" ); } @@ -503,16 +517,7 @@ public final class RIO { final String outgroup, final boolean produce_log, final boolean verbose ) throws IOException, SDIException, RIOException { - final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( gene_trees_file, true ); - if ( p instanceof NHXParser ) { - final NHXParser nhx = ( NHXParser ) p; - nhx.setReplaceUnderscores( false ); - nhx.setIgnoreQuotes( true ); - nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.YES ); - } - final Phylogeny[] gene_trees = factory.create( gene_trees_file, p ); - return new RIO( gene_trees, + return new RIO( parseGeneTrees( gene_trees_file ), species_tree, algorithm, rerooting, @@ -532,19 +537,15 @@ public final class RIO { final int last, final boolean produce_log, final boolean verbose ) throws IOException, SDIException, RIOException { - final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( gene_trees_file, true ); - if ( p instanceof NHXParser ) { - final NHXParser nhx = ( NHXParser ) p; - nhx.setReplaceUnderscores( false ); - nhx.setIgnoreQuotes( true ); - nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.YES ); - } - final Phylogeny[] gene_trees = factory.create( gene_trees_file, p ); - if ( gene_trees.length < 1 ) { - throw new RIOException( "\"" + gene_trees_file + "\" is devoid of appropriate gene trees" ); - } - return new RIO( gene_trees, species_tree, algorithm, rerooting, outgroup, first, last, produce_log, verbose ); + return new RIO( parseGeneTrees( gene_trees_file ), + species_tree, + algorithm, + rerooting, + outgroup, + first, + last, + produce_log, + verbose ); } public final static RIO executeAnalysis( final Phylogeny[] gene_trees, final Phylogeny species_tree ) @@ -624,6 +625,25 @@ public final class RIO { } } + private final static Phylogeny[] parseGeneTrees( final File gene_trees_file ) throws FileNotFoundException, + IOException { + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( gene_trees_file, true ); + if ( p instanceof NHXParser ) { + final NHXParser nhx = ( NHXParser ) p; + nhx.setReplaceUnderscores( false ); + nhx.setIgnoreQuotes( true ); + nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.YES ); + } + else if ( p instanceof NexusPhylogeniesParser ) { + final NexusPhylogeniesParser nex = ( NexusPhylogeniesParser ) p; + nex.setReplaceUnderscores( false ); + nex.setIgnoreQuotes( true ); + nex.setTaxonomyExtraction( TAXONOMY_EXTRACTION.YES ); + } + return factory.create( gene_trees_file, p ); + } + private final static void removeSingleDescendentsNodes( final Phylogeny species_tree, final boolean verbose ) { final int o = PhylogenyMethods.countNumberOfOneDescendantNodes( species_tree ); if ( o > 0 ) {