in progress
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Mon, 14 May 2012 20:00:32 +0000 (20:00 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Mon, 14 May 2012 20:00:32 +0000 (20:00 +0000)
forester/java/src/org/forester/archaeopteryx/AptxUtil.java
forester/java/src/org/forester/phylogeny/PhylogenyMethods.java
forester/java/src/org/forester/test/Test.java
forester/java/src/org/forester/util/AsciiHistogram.java
forester/java/src/org/forester/util/BasicDescriptiveStatistics.java
forester/java/src/org/forester/util/DescriptiveStatistics.java

index cc0c365..e71eeef 100644 (file)
@@ -79,6 +79,7 @@ import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
 import org.forester.phylogeny.factories.PhylogenyFactory;
 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
 import org.forester.phylogeny.iterators.PreorderTreeIterator;
+import org.forester.util.AsciiHistogram;
 import org.forester.util.DescriptiveStatistics;
 import org.forester.util.ForesterUtil;
 import org.forester.ws.uniprot.UniProtTaxonomy;
@@ -573,42 +574,82 @@ public final class AptxUtil {
             desc.append( "Maximum distance to root: " );
             desc.append( ForesterUtil.round( PhylogenyMethods.calculateMaxDistanceToRoot( phy ), 6 ) );
             desc.append( "\n" );
-            desc.append( "Descendants per node statistics: " );
-            final DescriptiveStatistics ds = PhylogenyMethods.calculatNumberOfDescendantsPerNodeStatistics( phy );
-            desc.append( "\n" );
-            desc.append( "    Median: " + ForesterUtil.round( ds.median(), 2 ) );
-            desc.append( "\n" );
-            desc.append( "    Mean: " + ForesterUtil.round( ds.arithmeticMean(), 2 ) );
-            desc.append( "\n" );
-            desc.append( "    SD: " + ForesterUtil.round( ds.sampleStandardDeviation(), 2 ) );
-            desc.append( "\n" );
-            desc.append( "    Minimum: " + ForesterUtil.roundToInt( ds.getMin() ) );
-            desc.append( "\n" );
-            desc.append( "    Maximum: " + ForesterUtil.roundToInt( ds.getMax() ) );
+            final Set<Taxonomy> taxs = PhylogenyMethods.obtainDistinctTaxonomies( phy.getRoot() );
+            if ( taxs != null ) {
+                desc.append( "Distinct external taxonomies: " );
+                desc.append( taxs.size() );
+            }
             desc.append( "\n" );
-            final DescriptiveStatistics cs = PhylogenyMethods.calculatConfidenceStatistics( phy );
-            if ( cs.getN() > 1 ) {
-                desc.append( "Support statistics: " );
+            final DescriptiveStatistics bs = PhylogenyMethods.calculatBranchLengthStatistics( phy );
+            if ( bs.getN() > 2 ) {
                 desc.append( "\n" );
-                desc.append( "    Branches with support: " + cs.getN() );
+                desc.append( "Branch-length statistics: " );
                 desc.append( "\n" );
-                desc.append( "    Median: " + ForesterUtil.round( cs.median(), 6 ) );
+                desc.append( "    Number of branches with non-negative branch-lengths: " + bs.getN() );
                 desc.append( "\n" );
-                desc.append( "    Mean: " + ForesterUtil.round( cs.arithmeticMean(), 6 ) );
+                desc.append( "    Median: " + ForesterUtil.round( bs.median(), 6 ) );
                 desc.append( "\n" );
-                if ( cs.getN() > 2 ) {
-                    desc.append( "    SD: " + ForesterUtil.round( cs.sampleStandardDeviation(), 6 ) );
-                    desc.append( "\n" );
-                }
-                desc.append( "    Minimum: " + ForesterUtil.roundToInt( cs.getMin() ) );
+                desc.append( "    Mean: " + ForesterUtil.round( bs.arithmeticMean(), 6 ) );
+                desc.append( "\n" );
+                desc.append( "    SD: " + ForesterUtil.round( bs.sampleStandardDeviation(), 6 ) );
+                desc.append( "\n" );
+                desc.append( "    Minimum: " + ForesterUtil.round( bs.getMin(), 6 ) );
                 desc.append( "\n" );
-                desc.append( "    Maximum: " + ForesterUtil.roundToInt( cs.getMax() ) );
+                desc.append( "    Maximum: " + ForesterUtil.round( bs.getMax(), 6 ) );
                 desc.append( "\n" );
+                desc.append( "\n" );
+                final AsciiHistogram histo = new AsciiHistogram( bs );
+                desc.append( histo.toStringBuffer( 12, '#', 40, 7, "    " ) );
             }
-            final Set<Taxonomy> taxs = PhylogenyMethods.obtainDistinctTaxonomies( phy.getRoot() );
-            if ( taxs != null ) {
-                desc.append( "Distinct external taxonomies: " );
-                desc.append( taxs.size() );
+            final DescriptiveStatistics ds = PhylogenyMethods.calculatNumberOfDescendantsPerNodeStatistics( phy );
+            if ( ds.getN() > 2 ) {
+                desc.append( "\n" );
+                desc.append( "Descendants per node statistics: " );
+                desc.append( "\n" );
+                desc.append( "    Median: " + ForesterUtil.round( ds.median(), 2 ) );
+                desc.append( "\n" );
+                desc.append( "    Mean: " + ForesterUtil.round( ds.arithmeticMean(), 2 ) );
+                desc.append( "\n" );
+                desc.append( "    SD: " + ForesterUtil.round( ds.sampleStandardDeviation(), 2 ) );
+                desc.append( "\n" );
+                desc.append( "    Minimum: " + ForesterUtil.roundToInt( ds.getMin() ) );
+                desc.append( "\n" );
+                desc.append( "    Maximum: " + ForesterUtil.roundToInt( ds.getMax() ) );
+                desc.append( "\n" );
+            }
+            final List<DescriptiveStatistics> css = PhylogenyMethods.calculatConfidenceStatistics( phy );
+            if ( css.size() > 0 ) {
+                desc.append( "\n" );
+                for( int i = 0; i < css.size(); ++i ) {
+                    final DescriptiveStatistics cs = css.get( i );
+                    if ( ( cs != null ) && ( cs.getN() > 1 ) ) {
+                        if ( css.size() > 1 ) {
+                            desc.append( "Support statistics " + ( i + 1 ) + ": " );
+                        }
+                        else {
+                            desc.append( "Support statistics: " );
+                        }
+                        if ( !ForesterUtil.isEmpty( cs.getDescription() ) ) {
+                            desc.append( "\n" );
+                            desc.append( "    Type: " + cs.getDescription() );
+                        }
+                        desc.append( "\n" );
+                        desc.append( "    Branches with support: " + cs.getN() );
+                        desc.append( "\n" );
+                        desc.append( "    Median: " + ForesterUtil.round( cs.median(), 6 ) );
+                        desc.append( "\n" );
+                        desc.append( "    Mean: " + ForesterUtil.round( cs.arithmeticMean(), 6 ) );
+                        desc.append( "\n" );
+                        if ( cs.getN() > 2 ) {
+                            desc.append( "    SD: " + ForesterUtil.round( cs.sampleStandardDeviation(), 6 ) );
+                            desc.append( "\n" );
+                        }
+                        desc.append( "    Minimum: " + ForesterUtil.roundToInt( cs.getMin() ) );
+                        desc.append( "\n" );
+                        desc.append( "    Maximum: " + ForesterUtil.roundToInt( cs.getMax() ) );
+                        desc.append( "\n" );
+                    }
+                }
             }
         }
         return desc.toString();
index 4a2115e..dbf5d5f 100644 (file)
@@ -593,13 +593,39 @@ public class PhylogenyMethods {
         return stats;
     }
 
-    public static DescriptiveStatistics calculatConfidenceStatistics( final Phylogeny phy ) {
+    public static DescriptiveStatistics calculatBranchLengthStatistics( final Phylogeny phy ) {
         final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
         for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
             final PhylogenyNode n = iter.next();
-            if ( !n.isExternal() ) {
+            if ( !n.isRoot() && ( n.getDistanceToParent() >= 0.0 ) ) {
+                stats.addValue( n.getDistanceToParent() );
+            }
+        }
+        return stats;
+    }
+
+    public static List<DescriptiveStatistics> calculatConfidenceStatistics( final Phylogeny phy ) {
+        final List<DescriptiveStatistics> stats = new ArrayList<DescriptiveStatistics>();
+        for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
+            final PhylogenyNode n = iter.next();
+            if ( !n.isExternal() && !n.isRoot() ) {
                 if ( n.getBranchData().isHasConfidences() ) {
-                    stats.addValue( n.getBranchData().getConfidence( 0 ).getValue() );
+                    for( int i = 0; i < n.getBranchData().getConfidences().size(); ++i ) {
+                        final Confidence c = n.getBranchData().getConfidences().get( i );
+                        if ( ( i > ( stats.size() - 1 ) ) || ( stats.get( i ) == null ) ) {
+                            stats.add( i, new BasicDescriptiveStatistics() );
+                        }
+                        if ( !ForesterUtil.isEmpty( c.getType() ) ) {
+                            if ( !ForesterUtil.isEmpty( stats.get( i ).getDescription() ) ) {
+                                if ( !stats.get( i ).getDescription().equalsIgnoreCase( c.getType() ) ) {
+                                    throw new IllegalArgumentException( "support values in node [" + n.toString()
+                                            + "] appear inconsistently ordered" );
+                                }
+                            }
+                            stats.get( i ).setDescription( c.getType() );
+                        }
+                        stats.get( i ).addValue( ( ( c != null ) && ( c.getValue() >= 0 ) ) ? c.getValue() : 0 );
+                    }
                 }
             }
         }
index dce54dc..97e6950 100644 (file)
@@ -2850,7 +2850,7 @@ public final class Test {
             dss3.addValue( 10 );
             final AsciiHistogram histo = new AsciiHistogram( dss3 );
             histo.toStringBuffer( 10, '=', 40, 5 );
-            histo.toStringBuffer( 3, 8, 10, '=', 40, 5 );
+            histo.toStringBuffer( 3, 8, 10, '=', 40, 5, null );
         }
         catch ( final Exception e ) {
             e.printStackTrace( System.out );
index 3d295cf..ea64f40 100644 (file)
@@ -50,7 +50,8 @@ public class AsciiHistogram {
                                      final int max_count,
                                      final int under,
                                      final int over,
-                                     final double binning_factor ) {
+                                     final double binning_factor,
+                                     final String indent ) {
         final double draw_factor = ( double ) max_count / size;
         final int counts_size = ForesterUtil.roundToInt( Math.log10( max_count ) ) + 1;
         if ( !ForesterUtil.isEmpty( getTitle() ) ) {
@@ -59,12 +60,18 @@ public class AsciiHistogram {
             sb.append( ForesterUtil.LINE_SEPARATOR );
         }
         if ( under > 0 ) {
+            if ( !ForesterUtil.isEmpty( indent ) ) {
+                sb.append( indent );
+            }
             sb.append( "[" + under + "] " );
             sb.append( ForesterUtil.LINE_SEPARATOR );
         }
         for( int i = 0; i < bins.length; ++i ) {
             final int count = bins[ i ];
             final double label = ForesterUtil.round( ( min + i * ( 1.0 / binning_factor ) ), digits );
+            if ( !ForesterUtil.isEmpty( indent ) ) {
+                sb.append( indent );
+            }
             sb.append( ForesterUtil.pad( label + "", digits, '0', false ) );
             sb.append( " [" + ForesterUtil.pad( count + "", counts_size, ' ', true ) + "] " );
             final int s = ForesterUtil.roundToInt( count / draw_factor );
@@ -74,6 +81,9 @@ public class AsciiHistogram {
             sb.append( ForesterUtil.LINE_SEPARATOR );
         }
         if ( over > 0 ) {
+            if ( !ForesterUtil.isEmpty( indent ) ) {
+                sb.append( indent );
+            }
             sb.append( "[" + over + "] " );
             sb.append( ForesterUtil.LINE_SEPARATOR );
         }
@@ -92,7 +102,8 @@ public class AsciiHistogram {
                                         final int number_of_bins,
                                         final char symbol,
                                         final int size,
-                                        final int digits ) {
+                                        final int digits,
+                                        final String indent ) {
         if ( min >= max ) {
             throw new IllegalArgumentException( "min [" + min + "] is larger than or equal to max [" + max + "]" );
         }
@@ -112,16 +123,31 @@ public class AsciiHistogram {
                 max_count = bin;
             }
         }
-        drawToStringBuffer( min, symbol, size, digits, sb, bins, max_count, 0, 0, binning_factor );
+        drawToStringBuffer( min, symbol, size, digits, sb, bins, max_count, 0, 0, binning_factor, indent );
         return sb;
     }
 
+    public StringBuffer toStringBuffer( final int bins,
+                                        final char symbol,
+                                        final int size,
+                                        final int digits,
+                                        final String indent ) {
+        return toStringBuffer( getDescriptiveStatistics().getMin(),
+                               getDescriptiveStatistics().getMax(),
+                               bins,
+                               symbol,
+                               size,
+                               digits,
+                               indent );
+    }
+
     public StringBuffer toStringBuffer( final int bins, final char symbol, final int size, final int digits ) {
         return toStringBuffer( getDescriptiveStatistics().getMin(),
                                getDescriptiveStatistics().getMax(),
                                bins,
                                symbol,
                                size,
-                               digits );
+                               digits,
+                               null );
     }
 }
index 5e8931c..4d73ffb 100644 (file)
@@ -39,6 +39,7 @@ public class BasicDescriptiveStatistics implements DescriptiveStatistics {
     private double       _max;
     private double       _sigma;
     private boolean      _recalc_sigma;
+    private String       _desc;
 
     public BasicDescriptiveStatistics() {
         init();
@@ -167,6 +168,7 @@ public class BasicDescriptiveStatistics implements DescriptiveStatistics {
         _max = -Double.MAX_VALUE;
         _sigma = 0.0;
         _recalc_sigma = true;
+        _desc = "";
     }
 
     /* (non-Javadoc)
@@ -361,4 +363,14 @@ public class BasicDescriptiveStatistics implements DescriptiveStatistics {
     public List<Double> getData() {
         return _data;
     }
+
+    @Override
+    public void setDescription( final String desc ) {
+        _desc = desc;
+    }
+
+    @Override
+    public String getDescription() {
+        return _desc;
+    }
 }
index 484b00f..014b530 100644 (file)
@@ -85,4 +85,8 @@ public interface DescriptiveStatistics {
 
     @Override
     public abstract String toString();
+
+    public abstract void setDescription( final String desc );
+
+    public abstract String getDescription();
 }
\ No newline at end of file