2 // forester -- software libraries and applications
3 // for genomics and evolutionary biology research.
5 // Copyright (C) 2010 Christian M Zmasek
6 // Copyright (C) 2010 Sanford-Burnham Medical Research Institute
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
26 package org.forester.evoinference.distance;
28 import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix;
29 import org.forester.msa.Msa;
30 import org.forester.sequence.Sequence;
32 public final class PairwiseDistanceCalculator {
34 public static final double DEFAULT_VALUE_FOR_TOO_LARGE_DISTANCE_FOR_KIMURA_FORMULA = 10; // Felsenstein uses -1
35 private static final char GAP = Sequence.GAP;
36 private final Msa _msa;
37 private final double _value_for_too_large_distance_for_kimura_formula;
39 private PairwiseDistanceCalculator( final Msa msa, final double value_for_too_large_distance_for_kimura_formula ) {
41 _value_for_too_large_distance_for_kimura_formula = value_for_too_large_distance_for_kimura_formula;
44 private double calcFractionalDissimilarity( final int row_1, final int row_2 ) {
45 final int length = _msa.getLength();
50 for( int col = 0; col < length; ++col ) {
51 aa_1 = _msa.getResidueAt( row_1, col );
52 aa_2 = _msa.getResidueAt( row_2, col );
53 if ( ( aa_1 != GAP ) && ( aa_2 != GAP ) ) {
63 return ( double ) nd / n;
74 private double calcKimuraDistance( final int row_1, final int row_2 ) {
75 final double p = calcFractionalDissimilarity( row_1, row_2 );
76 final double dp = 1 - p - ( 0.2 * p * p );
78 return _value_for_too_large_distance_for_kimura_formula;
81 return 0; // Too avoid -0.
83 return -Math.log( dp );
86 private double calcPoissonDistance( final int row_1, final int row_2 ) {
87 final double p = calcFractionalDissimilarity( row_1, row_2 );
88 final double dp = 1 - p;
90 return _value_for_too_large_distance_for_kimura_formula;
93 return 0; // Too avoid -0.
95 return -Math.log( dp );
98 private BasicSymmetricalDistanceMatrix calcKimuraDistances() {
99 final int s = _msa.getNumberOfSequences();
100 final BasicSymmetricalDistanceMatrix d = new BasicSymmetricalDistanceMatrix( s );
101 copyIdentifiers( s, d );
102 calcKimuraDistances( s, d );
106 private BasicSymmetricalDistanceMatrix calcPoissonDistances() {
107 final int s = _msa.getNumberOfSequences();
108 final BasicSymmetricalDistanceMatrix d = new BasicSymmetricalDistanceMatrix( s );
109 copyIdentifiers( s, d );
110 calcPoissonDistances( s, d );
114 private BasicSymmetricalDistanceMatrix calcFractionalDissimilarities() {
115 final int s = _msa.getNumberOfSequences();
116 final BasicSymmetricalDistanceMatrix d = new BasicSymmetricalDistanceMatrix( s );
117 copyIdentifiers( s, d );
118 calcFractionalDissimilarities( s, d );
122 private void calcKimuraDistances( final int s, final BasicSymmetricalDistanceMatrix d ) {
123 for( int i = 1; i < s; i++ ) {
124 for( int j = 0; j < i; j++ ) {
125 d.setValue( i, j, calcKimuraDistance( i, j ) );
130 private void calcPoissonDistances( final int s, final BasicSymmetricalDistanceMatrix d ) {
131 for( int i = 1; i < s; i++ ) {
132 for( int j = 0; j < i; j++ ) {
133 d.setValue( i, j, calcPoissonDistance( i, j ) );
138 private void calcFractionalDissimilarities( final int s, final BasicSymmetricalDistanceMatrix d ) {
139 for( int i = 1; i < s; i++ ) {
140 for( int j = 0; j < i; j++ ) {
141 d.setValue( i, j, calcFractionalDissimilarity( i, j ) );
147 public Object clone() throws CloneNotSupportedException {
148 throw new CloneNotSupportedException();
151 private void copyIdentifiers( final int s, final BasicSymmetricalDistanceMatrix d ) {
152 for( int i = 0; i < s; i++ ) {
153 d.setIdentifier( i, _msa.getIdentifier( i ) );
157 public static BasicSymmetricalDistanceMatrix calcFractionalDissimilarities( final Msa msa ) {
158 return new PairwiseDistanceCalculator( msa, DEFAULT_VALUE_FOR_TOO_LARGE_DISTANCE_FOR_KIMURA_FORMULA )
159 .calcFractionalDissimilarities();
162 public static BasicSymmetricalDistanceMatrix calcPoissonDistances( final Msa msa ) {
163 return new PairwiseDistanceCalculator( msa, DEFAULT_VALUE_FOR_TOO_LARGE_DISTANCE_FOR_KIMURA_FORMULA )
164 .calcPoissonDistances();
167 public static BasicSymmetricalDistanceMatrix calcKimuraDistances( final Msa msa ) {
168 return new PairwiseDistanceCalculator( msa, DEFAULT_VALUE_FOR_TOO_LARGE_DISTANCE_FOR_KIMURA_FORMULA )
169 .calcKimuraDistances();
172 public static BasicSymmetricalDistanceMatrix calcKimuraDistances( final Msa msa,
173 final double value_for_too_large_distance_for_kimura_formula ) {
174 return new PairwiseDistanceCalculator( msa, value_for_too_large_distance_for_kimura_formula )
175 .calcKimuraDistances();
178 public enum PWD_DISTANCE_METHOD {
179 KIMURA_DISTANCE, POISSON_DISTANCE, FRACTIONAL_DISSIMILARITY;