// $Id: // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2010 Christian M. Zmasek // Copyright (C) 2008-2010 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.surfacing; import java.util.ArrayList; import java.util.List; import java.util.SortedMap; import java.util.TreeMap; import org.forester.species.Species; import org.forester.util.BasicDescriptiveStatistics; import org.forester.util.DescriptiveStatistics; public class DomainLengths { final String _domain_id; final SortedMap _length_statistics; public DomainLengths( final String domain_id ) { _domain_id = domain_id; _length_statistics = new TreeMap(); } public void addLength( final Species species, final int domain_length ) { if ( !getLengthStatistics().containsKey( species ) ) { addLengthStatistics( species, new BasicDescriptiveStatistics() ); } getLengthStatistic( species ).addValue( domain_length ); } /** * Returns descriptive statistics based on the arithmetic means * for each species. * * * @return */ public DescriptiveStatistics calculateMeanBasedStatistics() { final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); for( final DescriptiveStatistics s : getLengthStatisticsList() ) { stats.addValue( s.arithmeticMean() ); } return stats; } /** * * Note. This is not technically a Z-score since the distribution * of means is unknown (and not normal). * * @param species * @return */ public double calculateZScoreForSpecies( final Species species ) { final double species_mean = getLengthStatistic( species ).arithmeticMean(); final DescriptiveStatistics domain_stats = calculateMeanBasedStatistics(); final double population_sd = domain_stats.sampleStandardDeviation(); final double population_mean = domain_stats.arithmeticMean(); return ( species_mean - population_mean ) / population_sd; } public String getDomainId() { return _domain_id; } public DescriptiveStatistics getLengthStatistic( final Species species ) { return getLengthStatistics().get( species ); } public List getLengthStatisticsList() { final List list = new ArrayList(); for( final DescriptiveStatistics stats : _length_statistics.values() ) { list.add( stats ); } return list; } public List getMeanBasedOutlierSpecies( final double z_score_limit ) { final List species = new ArrayList(); if ( getSpeciesList().size() > 1 ) { for( final Species s : getSpeciesList() ) { final double z = calculateZScoreForSpecies( s ); if ( z_score_limit < 0 ) { if ( z <= z_score_limit ) { species.add( s ); } } else if ( z_score_limit > 0 ) { if ( z >= z_score_limit ) { species.add( s ); } } } } return species; } public List getSpeciesList() { final List list = new ArrayList(); for( final Species s : _length_statistics.keySet() ) { list.add( s ); } return list; } public boolean isHasLengthStatistic( final Species species ) { return getLengthStatistics().containsKey( species ); } private void addLengthStatistics( final Species species, final DescriptiveStatistics length_statistic ) { if ( getLengthStatistics().containsKey( species ) ) { throw new IllegalArgumentException( "length statistics for [" + species.getSpeciesId() + "] already added" ); } getLengthStatistics().put( species, length_statistic ); } private SortedMap getLengthStatistics() { return _length_statistics; } }