From 321da767c9d9eb4e5f40f656e16005ff9244fd4d Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Mon, 14 May 2012 20:00:32 +0000 Subject: [PATCH] in progress --- .../src/org/forester/archaeopteryx/AptxUtil.java | 97 ++++++++++++++------ .../org/forester/phylogeny/PhylogenyMethods.java | 32 ++++++- forester/java/src/org/forester/test/Test.java | 2 +- .../java/src/org/forester/util/AsciiHistogram.java | 34 ++++++- .../forester/util/BasicDescriptiveStatistics.java | 12 +++ .../org/forester/util/DescriptiveStatistics.java | 4 + 6 files changed, 145 insertions(+), 36 deletions(-) diff --git a/forester/java/src/org/forester/archaeopteryx/AptxUtil.java b/forester/java/src/org/forester/archaeopteryx/AptxUtil.java index cc0c365..e71eeef 100644 --- a/forester/java/src/org/forester/archaeopteryx/AptxUtil.java +++ b/forester/java/src/org/forester/archaeopteryx/AptxUtil.java @@ -79,6 +79,7 @@ import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.phylogeny.iterators.PreorderTreeIterator; +import org.forester.util.AsciiHistogram; import org.forester.util.DescriptiveStatistics; import org.forester.util.ForesterUtil; import org.forester.ws.uniprot.UniProtTaxonomy; @@ -573,42 +574,82 @@ public final class AptxUtil { desc.append( "Maximum distance to root: " ); desc.append( ForesterUtil.round( PhylogenyMethods.calculateMaxDistanceToRoot( phy ), 6 ) ); desc.append( "\n" ); - desc.append( "Descendants per node statistics: " ); - final DescriptiveStatistics ds = PhylogenyMethods.calculatNumberOfDescendantsPerNodeStatistics( phy ); - desc.append( "\n" ); - desc.append( " Median: " + ForesterUtil.round( ds.median(), 2 ) ); - desc.append( "\n" ); - desc.append( " Mean: " + ForesterUtil.round( ds.arithmeticMean(), 2 ) ); - desc.append( "\n" ); - desc.append( " SD: " + ForesterUtil.round( ds.sampleStandardDeviation(), 2 ) ); - desc.append( "\n" ); - desc.append( " Minimum: " + ForesterUtil.roundToInt( ds.getMin() ) ); - desc.append( "\n" ); - desc.append( " Maximum: " + ForesterUtil.roundToInt( ds.getMax() ) ); + final Set taxs = PhylogenyMethods.obtainDistinctTaxonomies( phy.getRoot() ); + if ( taxs != null ) { + desc.append( "Distinct external taxonomies: " ); + desc.append( taxs.size() ); + } desc.append( "\n" ); - final DescriptiveStatistics cs = PhylogenyMethods.calculatConfidenceStatistics( phy ); - if ( cs.getN() > 1 ) { - desc.append( "Support statistics: " ); + final DescriptiveStatistics bs = PhylogenyMethods.calculatBranchLengthStatistics( phy ); + if ( bs.getN() > 2 ) { desc.append( "\n" ); - desc.append( " Branches with support: " + cs.getN() ); + desc.append( "Branch-length statistics: " ); desc.append( "\n" ); - desc.append( " Median: " + ForesterUtil.round( cs.median(), 6 ) ); + desc.append( " Number of branches with non-negative branch-lengths: " + bs.getN() ); desc.append( "\n" ); - desc.append( " Mean: " + ForesterUtil.round( cs.arithmeticMean(), 6 ) ); + desc.append( " Median: " + ForesterUtil.round( bs.median(), 6 ) ); desc.append( "\n" ); - if ( cs.getN() > 2 ) { - desc.append( " SD: " + ForesterUtil.round( cs.sampleStandardDeviation(), 6 ) ); - desc.append( "\n" ); - } - desc.append( " Minimum: " + ForesterUtil.roundToInt( cs.getMin() ) ); + desc.append( " Mean: " + ForesterUtil.round( bs.arithmeticMean(), 6 ) ); + desc.append( "\n" ); + desc.append( " SD: " + ForesterUtil.round( bs.sampleStandardDeviation(), 6 ) ); + desc.append( "\n" ); + desc.append( " Minimum: " + ForesterUtil.round( bs.getMin(), 6 ) ); desc.append( "\n" ); - desc.append( " Maximum: " + ForesterUtil.roundToInt( cs.getMax() ) ); + desc.append( " Maximum: " + ForesterUtil.round( bs.getMax(), 6 ) ); desc.append( "\n" ); + desc.append( "\n" ); + final AsciiHistogram histo = new AsciiHistogram( bs ); + desc.append( histo.toStringBuffer( 12, '#', 40, 7, " " ) ); } - final Set taxs = PhylogenyMethods.obtainDistinctTaxonomies( phy.getRoot() ); - if ( taxs != null ) { - desc.append( "Distinct external taxonomies: " ); - desc.append( taxs.size() ); + final DescriptiveStatistics ds = PhylogenyMethods.calculatNumberOfDescendantsPerNodeStatistics( phy ); + if ( ds.getN() > 2 ) { + desc.append( "\n" ); + desc.append( "Descendants per node statistics: " ); + desc.append( "\n" ); + desc.append( " Median: " + ForesterUtil.round( ds.median(), 2 ) ); + desc.append( "\n" ); + desc.append( " Mean: " + ForesterUtil.round( ds.arithmeticMean(), 2 ) ); + desc.append( "\n" ); + desc.append( " SD: " + ForesterUtil.round( ds.sampleStandardDeviation(), 2 ) ); + desc.append( "\n" ); + desc.append( " Minimum: " + ForesterUtil.roundToInt( ds.getMin() ) ); + desc.append( "\n" ); + desc.append( " Maximum: " + ForesterUtil.roundToInt( ds.getMax() ) ); + desc.append( "\n" ); + } + final List css = PhylogenyMethods.calculatConfidenceStatistics( phy ); + if ( css.size() > 0 ) { + desc.append( "\n" ); + for( int i = 0; i < css.size(); ++i ) { + final DescriptiveStatistics cs = css.get( i ); + if ( ( cs != null ) && ( cs.getN() > 1 ) ) { + if ( css.size() > 1 ) { + desc.append( "Support statistics " + ( i + 1 ) + ": " ); + } + else { + desc.append( "Support statistics: " ); + } + if ( !ForesterUtil.isEmpty( cs.getDescription() ) ) { + desc.append( "\n" ); + desc.append( " Type: " + cs.getDescription() ); + } + desc.append( "\n" ); + desc.append( " Branches with support: " + cs.getN() ); + desc.append( "\n" ); + desc.append( " Median: " + ForesterUtil.round( cs.median(), 6 ) ); + desc.append( "\n" ); + desc.append( " Mean: " + ForesterUtil.round( cs.arithmeticMean(), 6 ) ); + desc.append( "\n" ); + if ( cs.getN() > 2 ) { + desc.append( " SD: " + ForesterUtil.round( cs.sampleStandardDeviation(), 6 ) ); + desc.append( "\n" ); + } + desc.append( " Minimum: " + ForesterUtil.roundToInt( cs.getMin() ) ); + desc.append( "\n" ); + desc.append( " Maximum: " + ForesterUtil.roundToInt( cs.getMax() ) ); + desc.append( "\n" ); + } + } } } return desc.toString(); diff --git a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java index 4a2115e..dbf5d5f 100644 --- a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java +++ b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java @@ -593,13 +593,39 @@ public class PhylogenyMethods { return stats; } - public static DescriptiveStatistics calculatConfidenceStatistics( final Phylogeny phy ) { + public static DescriptiveStatistics calculatBranchLengthStatistics( final Phylogeny phy ) { final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { final PhylogenyNode n = iter.next(); - if ( !n.isExternal() ) { + if ( !n.isRoot() && ( n.getDistanceToParent() >= 0.0 ) ) { + stats.addValue( n.getDistanceToParent() ); + } + } + return stats; + } + + public static List calculatConfidenceStatistics( final Phylogeny phy ) { + final List stats = new ArrayList(); + for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { + final PhylogenyNode n = iter.next(); + if ( !n.isExternal() && !n.isRoot() ) { if ( n.getBranchData().isHasConfidences() ) { - stats.addValue( n.getBranchData().getConfidence( 0 ).getValue() ); + for( int i = 0; i < n.getBranchData().getConfidences().size(); ++i ) { + final Confidence c = n.getBranchData().getConfidences().get( i ); + if ( ( i > ( stats.size() - 1 ) ) || ( stats.get( i ) == null ) ) { + stats.add( i, new BasicDescriptiveStatistics() ); + } + if ( !ForesterUtil.isEmpty( c.getType() ) ) { + if ( !ForesterUtil.isEmpty( stats.get( i ).getDescription() ) ) { + if ( !stats.get( i ).getDescription().equalsIgnoreCase( c.getType() ) ) { + throw new IllegalArgumentException( "support values in node [" + n.toString() + + "] appear inconsistently ordered" ); + } + } + stats.get( i ).setDescription( c.getType() ); + } + stats.get( i ).addValue( ( ( c != null ) && ( c.getValue() >= 0 ) ) ? c.getValue() : 0 ); + } } } } diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index dce54dc..97e6950 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -2850,7 +2850,7 @@ public final class Test { dss3.addValue( 10 ); final AsciiHistogram histo = new AsciiHistogram( dss3 ); histo.toStringBuffer( 10, '=', 40, 5 ); - histo.toStringBuffer( 3, 8, 10, '=', 40, 5 ); + histo.toStringBuffer( 3, 8, 10, '=', 40, 5, null ); } catch ( final Exception e ) { e.printStackTrace( System.out ); diff --git a/forester/java/src/org/forester/util/AsciiHistogram.java b/forester/java/src/org/forester/util/AsciiHistogram.java index 3d295cf..ea64f40 100644 --- a/forester/java/src/org/forester/util/AsciiHistogram.java +++ b/forester/java/src/org/forester/util/AsciiHistogram.java @@ -50,7 +50,8 @@ public class AsciiHistogram { final int max_count, final int under, final int over, - final double binning_factor ) { + final double binning_factor, + final String indent ) { final double draw_factor = ( double ) max_count / size; final int counts_size = ForesterUtil.roundToInt( Math.log10( max_count ) ) + 1; if ( !ForesterUtil.isEmpty( getTitle() ) ) { @@ -59,12 +60,18 @@ public class AsciiHistogram { sb.append( ForesterUtil.LINE_SEPARATOR ); } if ( under > 0 ) { + if ( !ForesterUtil.isEmpty( indent ) ) { + sb.append( indent ); + } sb.append( "[" + under + "] " ); sb.append( ForesterUtil.LINE_SEPARATOR ); } for( int i = 0; i < bins.length; ++i ) { final int count = bins[ i ]; final double label = ForesterUtil.round( ( min + i * ( 1.0 / binning_factor ) ), digits ); + if ( !ForesterUtil.isEmpty( indent ) ) { + sb.append( indent ); + } sb.append( ForesterUtil.pad( label + "", digits, '0', false ) ); sb.append( " [" + ForesterUtil.pad( count + "", counts_size, ' ', true ) + "] " ); final int s = ForesterUtil.roundToInt( count / draw_factor ); @@ -74,6 +81,9 @@ public class AsciiHistogram { sb.append( ForesterUtil.LINE_SEPARATOR ); } if ( over > 0 ) { + if ( !ForesterUtil.isEmpty( indent ) ) { + sb.append( indent ); + } sb.append( "[" + over + "] " ); sb.append( ForesterUtil.LINE_SEPARATOR ); } @@ -92,7 +102,8 @@ public class AsciiHistogram { final int number_of_bins, final char symbol, final int size, - final int digits ) { + final int digits, + final String indent ) { if ( min >= max ) { throw new IllegalArgumentException( "min [" + min + "] is larger than or equal to max [" + max + "]" ); } @@ -112,16 +123,31 @@ public class AsciiHistogram { max_count = bin; } } - drawToStringBuffer( min, symbol, size, digits, sb, bins, max_count, 0, 0, binning_factor ); + drawToStringBuffer( min, symbol, size, digits, sb, bins, max_count, 0, 0, binning_factor, indent ); return sb; } + public StringBuffer toStringBuffer( final int bins, + final char symbol, + final int size, + final int digits, + final String indent ) { + return toStringBuffer( getDescriptiveStatistics().getMin(), + getDescriptiveStatistics().getMax(), + bins, + symbol, + size, + digits, + indent ); + } + public StringBuffer toStringBuffer( final int bins, final char symbol, final int size, final int digits ) { return toStringBuffer( getDescriptiveStatistics().getMin(), getDescriptiveStatistics().getMax(), bins, symbol, size, - digits ); + digits, + null ); } } diff --git a/forester/java/src/org/forester/util/BasicDescriptiveStatistics.java b/forester/java/src/org/forester/util/BasicDescriptiveStatistics.java index 5e8931c..4d73ffb 100644 --- a/forester/java/src/org/forester/util/BasicDescriptiveStatistics.java +++ b/forester/java/src/org/forester/util/BasicDescriptiveStatistics.java @@ -39,6 +39,7 @@ public class BasicDescriptiveStatistics implements DescriptiveStatistics { private double _max; private double _sigma; private boolean _recalc_sigma; + private String _desc; public BasicDescriptiveStatistics() { init(); @@ -167,6 +168,7 @@ public class BasicDescriptiveStatistics implements DescriptiveStatistics { _max = -Double.MAX_VALUE; _sigma = 0.0; _recalc_sigma = true; + _desc = ""; } /* (non-Javadoc) @@ -361,4 +363,14 @@ public class BasicDescriptiveStatistics implements DescriptiveStatistics { public List getData() { return _data; } + + @Override + public void setDescription( final String desc ) { + _desc = desc; + } + + @Override + public String getDescription() { + return _desc; + } } diff --git a/forester/java/src/org/forester/util/DescriptiveStatistics.java b/forester/java/src/org/forester/util/DescriptiveStatistics.java index 484b00f..014b530 100644 --- a/forester/java/src/org/forester/util/DescriptiveStatistics.java +++ b/forester/java/src/org/forester/util/DescriptiveStatistics.java @@ -85,4 +85,8 @@ public interface DescriptiveStatistics { @Override public abstract String toString(); + + public abstract void setDescription( final String desc ); + + public abstract String getDescription(); } \ No newline at end of file -- 1.7.10.2