import org.forester.phylogeny.factories.PhylogenyFactory;
import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
import org.forester.phylogeny.iterators.PreorderTreeIterator;
+import org.forester.util.AsciiHistogram;
import org.forester.util.DescriptiveStatistics;
import org.forester.util.ForesterUtil;
import org.forester.ws.uniprot.UniProtTaxonomy;
desc.append( "Maximum distance to root: " );
desc.append( ForesterUtil.round( PhylogenyMethods.calculateMaxDistanceToRoot( phy ), 6 ) );
desc.append( "\n" );
- desc.append( "Descendants per node statistics: " );
- final DescriptiveStatistics ds = PhylogenyMethods.calculatNumberOfDescendantsPerNodeStatistics( phy );
- desc.append( "\n" );
- desc.append( " Median: " + ForesterUtil.round( ds.median(), 2 ) );
- desc.append( "\n" );
- desc.append( " Mean: " + ForesterUtil.round( ds.arithmeticMean(), 2 ) );
- desc.append( "\n" );
- desc.append( " SD: " + ForesterUtil.round( ds.sampleStandardDeviation(), 2 ) );
- desc.append( "\n" );
- desc.append( " Minimum: " + ForesterUtil.roundToInt( ds.getMin() ) );
- desc.append( "\n" );
- desc.append( " Maximum: " + ForesterUtil.roundToInt( ds.getMax() ) );
+ final Set<Taxonomy> taxs = PhylogenyMethods.obtainDistinctTaxonomies( phy.getRoot() );
+ if ( taxs != null ) {
+ desc.append( "Distinct external taxonomies: " );
+ desc.append( taxs.size() );
+ }
desc.append( "\n" );
- final DescriptiveStatistics cs = PhylogenyMethods.calculatConfidenceStatistics( phy );
- if ( cs.getN() > 1 ) {
- desc.append( "Support statistics: " );
+ final DescriptiveStatistics bs = PhylogenyMethods.calculatBranchLengthStatistics( phy );
+ if ( bs.getN() > 2 ) {
desc.append( "\n" );
- desc.append( " Branches with support: " + cs.getN() );
+ desc.append( "Branch-length statistics: " );
desc.append( "\n" );
- desc.append( " Median: " + ForesterUtil.round( cs.median(), 6 ) );
+ desc.append( " Number of branches with non-negative branch-lengths: " + bs.getN() );
desc.append( "\n" );
- desc.append( " Mean: " + ForesterUtil.round( cs.arithmeticMean(), 6 ) );
+ desc.append( " Median: " + ForesterUtil.round( bs.median(), 6 ) );
desc.append( "\n" );
- if ( cs.getN() > 2 ) {
- desc.append( " SD: " + ForesterUtil.round( cs.sampleStandardDeviation(), 6 ) );
- desc.append( "\n" );
- }
- desc.append( " Minimum: " + ForesterUtil.roundToInt( cs.getMin() ) );
+ desc.append( " Mean: " + ForesterUtil.round( bs.arithmeticMean(), 6 ) );
+ desc.append( "\n" );
+ desc.append( " SD: " + ForesterUtil.round( bs.sampleStandardDeviation(), 6 ) );
+ desc.append( "\n" );
+ desc.append( " Minimum: " + ForesterUtil.round( bs.getMin(), 6 ) );
desc.append( "\n" );
- desc.append( " Maximum: " + ForesterUtil.roundToInt( cs.getMax() ) );
+ desc.append( " Maximum: " + ForesterUtil.round( bs.getMax(), 6 ) );
desc.append( "\n" );
+ desc.append( "\n" );
+ final AsciiHistogram histo = new AsciiHistogram( bs );
+ desc.append( histo.toStringBuffer( 12, '#', 40, 7, " " ) );
}
- final Set<Taxonomy> taxs = PhylogenyMethods.obtainDistinctTaxonomies( phy.getRoot() );
- if ( taxs != null ) {
- desc.append( "Distinct external taxonomies: " );
- desc.append( taxs.size() );
+ final DescriptiveStatistics ds = PhylogenyMethods.calculatNumberOfDescendantsPerNodeStatistics( phy );
+ if ( ds.getN() > 2 ) {
+ desc.append( "\n" );
+ desc.append( "Descendants per node statistics: " );
+ desc.append( "\n" );
+ desc.append( " Median: " + ForesterUtil.round( ds.median(), 2 ) );
+ desc.append( "\n" );
+ desc.append( " Mean: " + ForesterUtil.round( ds.arithmeticMean(), 2 ) );
+ desc.append( "\n" );
+ desc.append( " SD: " + ForesterUtil.round( ds.sampleStandardDeviation(), 2 ) );
+ desc.append( "\n" );
+ desc.append( " Minimum: " + ForesterUtil.roundToInt( ds.getMin() ) );
+ desc.append( "\n" );
+ desc.append( " Maximum: " + ForesterUtil.roundToInt( ds.getMax() ) );
+ desc.append( "\n" );
+ }
+ final List<DescriptiveStatistics> css = PhylogenyMethods.calculatConfidenceStatistics( phy );
+ if ( css.size() > 0 ) {
+ desc.append( "\n" );
+ for( int i = 0; i < css.size(); ++i ) {
+ final DescriptiveStatistics cs = css.get( i );
+ if ( ( cs != null ) && ( cs.getN() > 1 ) ) {
+ if ( css.size() > 1 ) {
+ desc.append( "Support statistics " + ( i + 1 ) + ": " );
+ }
+ else {
+ desc.append( "Support statistics: " );
+ }
+ if ( !ForesterUtil.isEmpty( cs.getDescription() ) ) {
+ desc.append( "\n" );
+ desc.append( " Type: " + cs.getDescription() );
+ }
+ desc.append( "\n" );
+ desc.append( " Branches with support: " + cs.getN() );
+ desc.append( "\n" );
+ desc.append( " Median: " + ForesterUtil.round( cs.median(), 6 ) );
+ desc.append( "\n" );
+ desc.append( " Mean: " + ForesterUtil.round( cs.arithmeticMean(), 6 ) );
+ desc.append( "\n" );
+ if ( cs.getN() > 2 ) {
+ desc.append( " SD: " + ForesterUtil.round( cs.sampleStandardDeviation(), 6 ) );
+ desc.append( "\n" );
+ }
+ desc.append( " Minimum: " + ForesterUtil.roundToInt( cs.getMin() ) );
+ desc.append( "\n" );
+ desc.append( " Maximum: " + ForesterUtil.roundToInt( cs.getMax() ) );
+ desc.append( "\n" );
+ }
+ }
}
}
return desc.toString();
return stats;
}
- public static DescriptiveStatistics calculatConfidenceStatistics( final Phylogeny phy ) {
+ public static DescriptiveStatistics calculatBranchLengthStatistics( final Phylogeny phy ) {
final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
final PhylogenyNode n = iter.next();
- if ( !n.isExternal() ) {
+ if ( !n.isRoot() && ( n.getDistanceToParent() >= 0.0 ) ) {
+ stats.addValue( n.getDistanceToParent() );
+ }
+ }
+ return stats;
+ }
+
+ public static List<DescriptiveStatistics> calculatConfidenceStatistics( final Phylogeny phy ) {
+ final List<DescriptiveStatistics> stats = new ArrayList<DescriptiveStatistics>();
+ for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
+ final PhylogenyNode n = iter.next();
+ if ( !n.isExternal() && !n.isRoot() ) {
if ( n.getBranchData().isHasConfidences() ) {
- stats.addValue( n.getBranchData().getConfidence( 0 ).getValue() );
+ for( int i = 0; i < n.getBranchData().getConfidences().size(); ++i ) {
+ final Confidence c = n.getBranchData().getConfidences().get( i );
+ if ( ( i > ( stats.size() - 1 ) ) || ( stats.get( i ) == null ) ) {
+ stats.add( i, new BasicDescriptiveStatistics() );
+ }
+ if ( !ForesterUtil.isEmpty( c.getType() ) ) {
+ if ( !ForesterUtil.isEmpty( stats.get( i ).getDescription() ) ) {
+ if ( !stats.get( i ).getDescription().equalsIgnoreCase( c.getType() ) ) {
+ throw new IllegalArgumentException( "support values in node [" + n.toString()
+ + "] appear inconsistently ordered" );
+ }
+ }
+ stats.get( i ).setDescription( c.getType() );
+ }
+ stats.get( i ).addValue( ( ( c != null ) && ( c.getValue() >= 0 ) ) ? c.getValue() : 0 );
+ }
}
}
}
dss3.addValue( 10 );
final AsciiHistogram histo = new AsciiHistogram( dss3 );
histo.toStringBuffer( 10, '=', 40, 5 );
- histo.toStringBuffer( 3, 8, 10, '=', 40, 5 );
+ histo.toStringBuffer( 3, 8, 10, '=', 40, 5, null );
}
catch ( final Exception e ) {
e.printStackTrace( System.out );
final int max_count,
final int under,
final int over,
- final double binning_factor ) {
+ final double binning_factor,
+ final String indent ) {
final double draw_factor = ( double ) max_count / size;
final int counts_size = ForesterUtil.roundToInt( Math.log10( max_count ) ) + 1;
if ( !ForesterUtil.isEmpty( getTitle() ) ) {
sb.append( ForesterUtil.LINE_SEPARATOR );
}
if ( under > 0 ) {
+ if ( !ForesterUtil.isEmpty( indent ) ) {
+ sb.append( indent );
+ }
sb.append( "[" + under + "] " );
sb.append( ForesterUtil.LINE_SEPARATOR );
}
for( int i = 0; i < bins.length; ++i ) {
final int count = bins[ i ];
final double label = ForesterUtil.round( ( min + i * ( 1.0 / binning_factor ) ), digits );
+ if ( !ForesterUtil.isEmpty( indent ) ) {
+ sb.append( indent );
+ }
sb.append( ForesterUtil.pad( label + "", digits, '0', false ) );
sb.append( " [" + ForesterUtil.pad( count + "", counts_size, ' ', true ) + "] " );
final int s = ForesterUtil.roundToInt( count / draw_factor );
sb.append( ForesterUtil.LINE_SEPARATOR );
}
if ( over > 0 ) {
+ if ( !ForesterUtil.isEmpty( indent ) ) {
+ sb.append( indent );
+ }
sb.append( "[" + over + "] " );
sb.append( ForesterUtil.LINE_SEPARATOR );
}
final int number_of_bins,
final char symbol,
final int size,
- final int digits ) {
+ final int digits,
+ final String indent ) {
if ( min >= max ) {
throw new IllegalArgumentException( "min [" + min + "] is larger than or equal to max [" + max + "]" );
}
max_count = bin;
}
}
- drawToStringBuffer( min, symbol, size, digits, sb, bins, max_count, 0, 0, binning_factor );
+ drawToStringBuffer( min, symbol, size, digits, sb, bins, max_count, 0, 0, binning_factor, indent );
return sb;
}
+ public StringBuffer toStringBuffer( final int bins,
+ final char symbol,
+ final int size,
+ final int digits,
+ final String indent ) {
+ return toStringBuffer( getDescriptiveStatistics().getMin(),
+ getDescriptiveStatistics().getMax(),
+ bins,
+ symbol,
+ size,
+ digits,
+ indent );
+ }
+
public StringBuffer toStringBuffer( final int bins, final char symbol, final int size, final int digits ) {
return toStringBuffer( getDescriptiveStatistics().getMin(),
getDescriptiveStatistics().getMax(),
bins,
symbol,
size,
- digits );
+ digits,
+ null );
}
}
private double _max;
private double _sigma;
private boolean _recalc_sigma;
+ private String _desc;
public BasicDescriptiveStatistics() {
init();
_max = -Double.MAX_VALUE;
_sigma = 0.0;
_recalc_sigma = true;
+ _desc = "";
}
/* (non-Javadoc)
public List<Double> getData() {
return _data;
}
+
+ @Override
+ public void setDescription( final String desc ) {
+ _desc = desc;
+ }
+
+ @Override
+ public String getDescription() {
+ return _desc;
+ }
}
@Override
public abstract String toString();
+
+ public abstract void setDescription( final String desc );
+
+ public abstract String getDescription();
}
\ No newline at end of file