4 // FORESTER -- software libraries and applications
5 // for evolutionary biology research and applications.
7 // Copyright (C) 2008-2009 Christian M. Zmasek
8 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
11 // This library is free software; you can redistribute it and/or
12 // modify it under the terms of the GNU Lesser General Public
13 // License as published by the Free Software Foundation; either
14 // version 2.1 of the License, or (at your option) any later version.
16 // This library is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 // Lesser General Public License for more details.
21 // You should have received a copy of the GNU Lesser General Public
22 // License along with this library; if not, write to the Free Software
23 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
25 // Contact: phylosoft @ gmail . com
26 // WWW: www.phylosoft.org/forester
28 package org.forester.util;
30 import java.util.ArrayList;
31 import java.util.Arrays;
32 import java.util.List;
34 public class BasicDescriptiveStatistics implements DescriptiveStatistics {
36 private List<Double> _data;
40 private double _sigma;
41 private boolean _recalc_sigma;
44 public BasicDescriptiveStatistics() {
48 public BasicDescriptiveStatistics( final String desc ) {
50 setDescription( desc );
54 * @see org.forester.util.DescriptiveStatisticsI#addValue(double)
57 public void addValue( final double d ) {
60 _data.add( new Double( d ) );
70 * @see org.forester.util.DescriptiveStatisticsI#arithmeticMean()
73 public double arithmeticMean() {
75 return getSum() / getN();
79 * @see org.forester.util.DescriptiveStatisticsI#asSummary()
82 public String asSummary() {
84 return arithmeticMean() + DescriptiveStatistics.PLUS_MINUS + sampleStandardDeviation() + " [" + getMin()
85 + "..." + getMax() + "]";
88 return "" + arithmeticMean();
93 * @see org.forester.util.DescriptiveStatisticsI#coefficientOfVariation()
96 public double coefficientOfVariation() {
98 return ( sampleStandardDeviation() / arithmeticMean() );
102 * @see org.forester.util.DescriptiveStatisticsI#getDataAsDoubleArray()
105 public double[] getDataAsDoubleArray() {
107 final double[] data_array = new double[ getN() ];
108 for( int i = 0; i < getN(); ++i ) {
109 data_array[ i ] = getValue( i );
115 * @see org.forester.util.DescriptiveStatisticsI#getMax()
118 public double getMax() {
124 * @see org.forester.util.DescriptiveStatisticsI#getMin()
127 public double getMin() {
133 * @see org.forester.util.DescriptiveStatisticsI#getN()
141 * @see org.forester.util.DescriptiveStatisticsI#getSum()
144 public double getSum() {
150 * @see org.forester.util.DescriptiveStatisticsI#getSummaryAsString()
153 public String getSummaryAsString() {
155 final double mean = arithmeticMean();
156 final double sd = sampleStandardDeviation();
157 return "" + mean + ( ( char ) 177 ) + sd + " [" + getMin() + "..." + getMax() + "]";
161 * @see org.forester.util.DescriptiveStatisticsI#getValue(int)
164 public double getValue( final int index ) {
166 return ( ( ( _data.get( index ) ) ).doubleValue() );
169 private void init() {
170 _data = new ArrayList<Double>();
172 _min = Double.MAX_VALUE;
173 _max = -Double.MAX_VALUE;
175 _recalc_sigma = true;
180 * @see org.forester.util.DescriptiveStatisticsI#median()
183 public double median() {
187 median = getValue( 0 );
190 final int index = ( getN() / 2 );
191 final double[] data_array = getDataAsDoubleArray();
192 Arrays.sort( data_array );
193 if ( ( ( data_array.length ) % 2 ) == 0 ) {
194 // even number of data values
195 median = ( data_array[ index - 1 ] + data_array[ index ] ) / 2.0;
198 median = data_array[ index ];
205 * @see org.forester.util.DescriptiveStatisticsI#midrange()
208 public double midrange() {
210 return ( _min + _max ) / 2.0;
214 * @see org.forester.util.DescriptiveStatisticsI#pearsonianSkewness()
217 public double pearsonianSkewness() {
219 final double mean = arithmeticMean();
220 final double median = median();
221 final double sd = sampleStandardDeviation();
222 return ( ( 3 * ( mean - median ) ) / sd );
226 * @see org.forester.util.DescriptiveStatisticsI#sampleStandardDeviation()
229 public double sampleStandardDeviation() {
230 return Math.sqrt( sampleVariance() );
234 * @see org.forester.util.DescriptiveStatisticsI#sampleStandardUnit(double)
237 public double sampleStandardUnit( final double value ) {
239 return BasicDescriptiveStatistics.sampleStandardUnit( value, arithmeticMean(), sampleStandardDeviation() );
243 * @see org.forester.util.DescriptiveStatisticsI#sampleVariance()
246 public double sampleVariance() {
249 throw new ArithmeticException( "attempt to calculate sample variance for less then two values" );
251 return ( sumDeviations() / ( getN() - 1 ) );
255 * @see org.forester.util.DescriptiveStatisticsI#standardErrorOfMean()
258 public double standardErrorOfMean() {
260 return ( sampleStandardDeviation() / Math.sqrt( getN() ) );
264 * @see org.forester.util.DescriptiveStatisticsI#sumDeviations()
267 public double sumDeviations() {
269 if ( _recalc_sigma ) {
270 _recalc_sigma = false;
272 final double mean = arithmeticMean();
273 for( int i = 0; i < getN(); ++i ) {
274 _sigma += Math.pow( ( getValue( i ) - mean ), 2 );
281 * @see org.forester.util.DescriptiveStatisticsI#toString()
284 public String toString() {
286 return "empty data set statistics";
288 final StringBuffer sb = new StringBuffer();
289 sb.append( "Descriptive statistics:" );
290 sb.append( ForesterUtil.getLineSeparator() );
291 sb.append( "n : " + getN() );
293 sb.append( ForesterUtil.getLineSeparator() );
294 sb.append( "min : " + getMin() );
295 sb.append( ForesterUtil.getLineSeparator() );
296 sb.append( "max : " + getMax() );
297 sb.append( ForesterUtil.getLineSeparator() );
298 sb.append( "midrange : " + midrange() );
299 sb.append( ForesterUtil.getLineSeparator() );
300 sb.append( "median : " + median() );
301 sb.append( ForesterUtil.getLineSeparator() );
302 sb.append( "mean : " + arithmeticMean() );
303 sb.append( ForesterUtil.getLineSeparator() );
304 sb.append( "sd : " + sampleStandardDeviation() );
305 sb.append( ForesterUtil.getLineSeparator() );
306 sb.append( "variance : " + sampleVariance() );
307 sb.append( ForesterUtil.getLineSeparator() );
308 sb.append( "standard error of mean : " + standardErrorOfMean() );
309 sb.append( ForesterUtil.getLineSeparator() );
310 sb.append( "coefficient of variation: " + coefficientOfVariation() );
311 sb.append( ForesterUtil.getLineSeparator() );
312 sb.append( "pearsonian skewness : " + pearsonianSkewness() );
314 return sb.toString();
317 private void validate() throws ArithmeticException {
319 throw new ArithmeticException( "attempt to get a result from empty data set statistics" );
323 public static int[] performBinning( final double[] values,
326 final int number_of_bins ) {
328 throw new IllegalArgumentException( "min [" + min + "] is larger than or equal to max [" + max + "]" );
330 if ( number_of_bins < 3 ) {
331 throw new IllegalArgumentException( "number of bins is smaller than 3" );
333 final int[] bins = new int[ number_of_bins ];
334 final double binning_factor = number_of_bins / ( max - min );
335 final int last_index = number_of_bins - 1;
336 for( final double d : values ) {
337 if ( !( ( d > max ) || ( d < min ) ) ) {
338 final int bin = ( int ) ( ( d - min ) * binning_factor );
339 if ( bin > last_index ) {
340 ++bins[ last_index ];
351 * Computes the sample standard unit (z-score). Used to compute 'value' in
352 * terms of standard units. Note that 'value', 'mean' and 'sd' must be all
353 * from the same sample data.
356 * a double in the sample for which
358 * the mean of the sample.
360 * The standard deviation of the sample.
361 * @return 'value' in terms of standard units
363 public static double sampleStandardUnit( final double value, final double mean, final double sd ) {
364 return ( value - mean ) / sd;
368 public List<Double> getData() {
373 public void setDescription( final String desc ) {
378 public String getDescription() {