3 // FORESTER -- software libraries and applications
4 // for evolutionary biology research and applications.
6 // Copyright (C) 2008-2010 Christian M. Zmasek
7 // Copyright (C) 2008-2010 Burnham Institute for Medical Research
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 // Contact: phylosoft @ gmail . com
25 // WWW: www.phylosoft.org/forester
27 package org.forester.surfacing;
29 import java.util.ArrayList;
30 import java.util.List;
31 import java.util.SortedMap;
32 import java.util.TreeMap;
34 import org.forester.util.BasicDescriptiveStatistics;
35 import org.forester.util.DescriptiveStatistics;
37 public class DomainLengths {
39 final DomainId _domain_id;
40 final SortedMap<Species, DescriptiveStatistics> _length_statistics;
42 public DomainLengths( final DomainId domain_id ) {
43 _domain_id = domain_id;
44 _length_statistics = new TreeMap<Species, DescriptiveStatistics>();
47 public void addLength( final Species species, final int domain_length ) {
48 if ( !getLengthStatistics().containsKey( species ) ) {
49 addLengthStatistics( species, new BasicDescriptiveStatistics() );
51 getLengthStatistic( species ).addValue( domain_length );
54 private void addLengthStatistics( final Species species, final DescriptiveStatistics length_statistic ) {
55 if ( getLengthStatistics().containsKey( species ) ) {
56 throw new IllegalArgumentException( "length statistics for [" + species.getSpeciesId() + "] already added" );
58 getLengthStatistics().put( species, length_statistic );
62 * Returns descriptive statistics based on the arithmetic means
68 public DescriptiveStatistics calculateMeanBasedStatistics() {
69 final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
70 for( final DescriptiveStatistics s : getLengthStatisticsList() ) {
71 stats.addValue( s.arithmeticMean() );
78 * Note. This is not technically a Z-score since the distribution
79 * of means is unknown (and not normal).
84 public double calculateZScoreForSpecies( final Species species ) {
85 final double species_mean = getLengthStatistic( species ).arithmeticMean();
86 final DescriptiveStatistics domain_stats = calculateMeanBasedStatistics();
87 final double population_sd = domain_stats.sampleStandardDeviation();
88 final double population_mean = domain_stats.arithmeticMean();
89 return ( species_mean - population_mean ) / population_sd;
92 public DomainId getDomainId() {
96 public DescriptiveStatistics getLengthStatistic( final Species species ) {
97 return getLengthStatistics().get( species );
100 private SortedMap<Species, DescriptiveStatistics> getLengthStatistics() {
101 return _length_statistics;
104 public List<DescriptiveStatistics> getLengthStatisticsList() {
105 final List<DescriptiveStatistics> list = new ArrayList<DescriptiveStatistics>();
106 for( final DescriptiveStatistics stats : _length_statistics.values() ) {
112 public List<Species> getMeanBasedOutlierSpecies( final double z_score_limit ) {
113 final List<Species> species = new ArrayList<Species>();
114 if ( getSpeciesList().size() > 1 ) {
115 for( final Species s : getSpeciesList() ) {
116 final double z = calculateZScoreForSpecies( s );
117 if ( z_score_limit < 0 ) {
118 if ( z <= z_score_limit ) {
122 else if ( z_score_limit > 0 ) {
123 if ( z >= z_score_limit ) {
132 public List<Species> getSpeciesList() {
133 final List<Species> list = new ArrayList<Species>();
134 for( final Species s : _length_statistics.keySet() ) {
140 public boolean isHasLengthStatistic( final Species species ) {
141 return getLengthStatistics().containsKey( species );