import java.util.Date;
import java.util.List;
-import org.forester.archaeopteryx.Archaeopteryx;
import org.forester.evoinference.distance.NeighborJoining;
import org.forester.evoinference.distance.PairwiseDistanceCalculator;
import org.forester.evoinference.matrix.character.BasicCharacterStateMatrix;
public class TestPhylogenyReconstruction {
- private final static double ZERO_DIFF = 1.0E-9;
- private final static boolean TIME = false;
+ private final static double ZERO_DIFF = 1.0E-9;
public static boolean isEqual( final double a, final double b ) {
return ( ( Math.abs( a - b ) ) < ZERO_DIFF );
}
+ public static boolean isUnequal( final double a, final double b ) {
+ return !isEqual( a, b );
+ }
+
public static void main( final String[] args ) {
- testNeighborJoining();
- // timeNeighborJoining();
+ if ( testNeighborJoining() ) {
+ System.out.println( "OK." );
+ }
+ else {
+ System.out.println( "failed." );
+ }
+ //timeNeighborJoining();
}
public static boolean test( final File test_dir ) {
private static boolean testNeighborJoining() {
try {
- final NeighborJoining nj = NeighborJoining.createInstance();
- // BasicSymmetricalDistanceMatrix m0 = new BasicSymmetricalDistanceMatrix( 4 );
- // m0.setIdentifier( 0, "A" );
- // m0.setIdentifier( 1, "B" );
- // m0.setIdentifier( 2, "C" );
- // m0.setIdentifier( 3, "D" );
- // m0.setRow( "5 ", 1 );
- // m0.setRow( "3 6 ", 2 );
- // m0.setRow( "7.5 10.5 5.5", 3 );
- // System.out.println( m0.toString() );
- // final Phylogeny p0 = nj.execute( m0 );
- // Archaeopteryx.createApplication( p0 );
- //
+ NeighborJoining nj = NeighborJoining.createInstance();
+ final BasicSymmetricalDistanceMatrix m0 = new BasicSymmetricalDistanceMatrix( 4 );
+ m0.setIdentifier( 0, "A" );
+ m0.setIdentifier( 1, "B" );
+ m0.setIdentifier( 2, "C" );
+ m0.setIdentifier( 3, "D" );
+ m0.setRow( "5 ", 1 );
+ m0.setRow( "3 6 ", 2 );
+ m0.setRow( "7.5 10.5 5.5", 3 );
+ final Phylogeny p0 = nj.execute( m0 );
+ p0.reRoot( p0.getNode( "D" ) );
+ if ( isUnequal( p0.getNode( "A" ).getDistanceToParent(), 1 ) ) {
+ return false;
+ }
+ if ( isUnequal( p0.getNode( "B" ).getDistanceToParent(), 4 ) ) {
+ return false;
+ }
+ if ( isUnequal( p0.getNode( "C" ).getDistanceToParent(), 0.5 ) ) {
+ return false;
+ }
+ if ( isUnequal( p0.getNode( "D" ).getDistanceToParent(), 2.5 ) ) {
+ return false;
+ }
+ if ( isUnequal( p0.getNode( "A" ).getParent().getDistanceToParent(), 1.5 ) ) {
+ return false;
+ }
+ if ( isUnequal( p0.getNode( "A" ).getParent().getParent().getDistanceToParent(), 2.5 ) ) {
+ return false;
+ }
+ nj = NeighborJoining.createInstance();
+ final BasicSymmetricalDistanceMatrix m00 = new BasicSymmetricalDistanceMatrix( 4 );
+ m00.setIdentifier( 0, "A" );
+ m00.setIdentifier( 1, "B" );
+ m00.setIdentifier( 2, "C" );
+ m00.setIdentifier( 3, "D" );
+ m00.setRow( "2.01 ", 1 );
+ m00.setRow( "3 3.01 ", 2 );
+ m00.setRow( "3.01 3.02 1.01", 3 );
+ final Phylogeny p00 = nj.execute( m00 );
+ p00.reRoot( p00.getNode( "D" ) );
+ if ( isUnequal( p00.getNode( "A" ).getDistanceToParent(), 1 ) ) {
+ return false;
+ }
+ if ( isUnequal( p00.getNode( "B" ).getDistanceToParent(), 1.01 ) ) {
+ return false;
+ }
+ if ( isUnequal( p00.getNode( "C" ).getDistanceToParent(), 0.5 ) ) {
+ return false;
+ }
+ if ( isUnequal( p00.getNode( "D" ).getDistanceToParent(), 0.255 ) ) {
+ return false;
+ }
+ if ( isUnequal( p00.getNode( "A" ).getParent().getDistanceToParent(), 1.5 ) ) {
+ return false;
+ }
+ if ( isUnequal( p00.getNode( "A" ).getParent().getParent().getDistanceToParent(), 0.255 ) ) {
+ return false;
+ }
BasicSymmetricalDistanceMatrix m = new BasicSymmetricalDistanceMatrix( 6 );
m.setRow( "5", 1 );
m.setRow( "4 7", 2 );
m.setIdentifier( 3, "D" );
m.setIdentifier( 4, "E" );
m.setIdentifier( 5, "F" );
- System.out.println( m.toString() );
+ nj = NeighborJoining.createInstance();
final Phylogeny p1 = nj.execute( m );
- Archaeopteryx.createApplication( p1 );
- // m = new BasicSymmetricalDistanceMatrix( 7 );
- // m.setIdentifier( 0, "Bovine" );
- // m.setIdentifier( 1, "Mouse" );
- // m.setIdentifier( 2, "Gibbon" );
- // m.setIdentifier( 3, "Orang" );
- // m.setIdentifier( 4, "Gorilla" );
- // m.setIdentifier( 5, "Chimp" );
- // m.setIdentifier( 6, "Human" );
- // m.setRow( "0.00000 1.68660 1.71980 1.66060 1.52430 1.60430 1.59050", 0 );
- // m.setRow( "1.68660 0.00000 1.52320 1.48410 1.44650 1.43890 1.46290", 1 );
- // m.setRow( "1.71980 1.52320 0.00000 0.71150 0.59580 0.61790 0.55830", 2 );
- // m.setRow( "1.66060 1.48410 0.71150 0.00000 0.46310 0.50610 0.47100", 3 );
- // m.setRow( "1.52430 1.44650 0.59580 0.46310 0.00000 0.34840 0.30830", 4 );
- // m.setRow( "1.60430 1.43890 0.61790 0.50610 0.34840 0.00000 0.26920", 5 );
- // m.setRow( "1.59050 1.46290 0.55830 0.47100 0.30830 0.26920 0.00000", 6 );
- // System.out.println( m.toString() );
- // final Phylogeny p2 = nj.execute( m );
- // p2.reRoot( p2.getNode( "Bovine" ) );
- // System.out.println( p2.toString() );
- // Archaeopteryx.createApplication( p2 );
- // // from phylip Neighbor-Joining/UPGMA method version 3.69:
- // // ((((((Chimp:0.15167,Human:0.11753):0.03982,Gorilla:0.15393):0.02696,Orang:0.28469):0.04648,Gibbon:0.35793):0.42027,Mouse:0.76891):0.458845,Bovine:0.458845);
- // Archaeopteryx.createApplication( p2 );
- // m = new BasicSymmetricalDistanceMatrix( 4 );
- // m.setIdentifier( 0, "A" );
- // m.setIdentifier( 1, "B" );
- // m.setIdentifier( 2, "C" );
- // m.setIdentifier( 3, "D" );
- // m.setRow( "0.00 0.95 0.17 0.98", 0 );
- // m.setRow( "0.95 0.00 1.02 1.83", 1 );
- // m.setRow( "0.17 1.02 0.00 1.01", 2 );
- // m.setRow( "0.98 1.83 1.01 0.00", 3 );
- // final Phylogeny p3 = nj.execute( m );
- // //
- // // -- A 0.05
- // // - |0.01
- // // ----------------------- B 0.90
- // //
- // // --- C 0.10
- // // - |0.01
- // // ------------------------- D 0.91
- // p3.reRoot( p3.getNode( "C" ).getParent() );
- // if ( !isEqual( p3.getNode( "A" ).getDistanceToParent(), 0.05 ) ) {
- // return false;
- // }
- // if ( !isEqual( p3.getNode( "B" ).getDistanceToParent(), 0.90 ) ) {
- // return false;
- // }
- // if ( !isEqual( p3.getNode( "C" ).getDistanceToParent(), 0.10 ) ) {
- // return false;
- // }
- // if ( !isEqual( p3.getNode( "D" ).getDistanceToParent(), 0.91 ) ) {
- // return false;
- // }
- // if ( TIME ) {
- // timeNeighborJoining();
- // }
+ p1.reRoot( p1.getNode( "F" ) );
+ if ( isUnequal( p1.getNode( "A" ).getDistanceToParent(), 1 ) ) {
+ return false;
+ }
+ if ( isUnequal( p1.getNode( "B" ).getDistanceToParent(), 4 ) ) {
+ return false;
+ }
+ if ( isUnequal( p1.getNode( "C" ).getDistanceToParent(), 2 ) ) {
+ return false;
+ }
+ if ( isUnequal( p1.getNode( "D" ).getDistanceToParent(), 3 ) ) {
+ return false;
+ }
+ if ( isUnequal( p1.getNode( "E" ).getDistanceToParent(), 2 ) ) {
+ return false;
+ }
+ if ( isUnequal( p1.getNode( "F" ).getDistanceToParent(), 2.5 ) ) {
+ return false;
+ }
+ if ( isUnequal( p1.getNode( "A" ).getParent().getDistanceToParent(), 1 ) ) {
+ return false;
+ }
+ if ( isUnequal( p1.getNode( "A" ).getParent().getParent().getDistanceToParent(), 1 ) ) {
+ return false;
+ }
+ if ( isUnequal( p1.getNode( "A" ).getParent().getParent().getParent().getDistanceToParent(), 2.5 ) ) {
+ return false;
+ }
+ if ( isUnequal( p1.getNode( "B" ).getParent().getDistanceToParent(), 1 ) ) {
+ return false;
+ }
+ if ( isUnequal( p1.getNode( "D" ).getParent().getDistanceToParent(), 1 ) ) {
+ return false;
+ }
+ if ( isUnequal( p1.getNode( "E" ).getParent().getDistanceToParent(), 1 ) ) {
+ return false;
+ }
+ m = new BasicSymmetricalDistanceMatrix( 7 );
+ m.setIdentifier( 0, "Bovine" );
+ m.setIdentifier( 1, "Mouse" );
+ m.setIdentifier( 2, "Gibbon" );
+ m.setIdentifier( 3, "Orang" );
+ m.setIdentifier( 4, "Gorilla" );
+ m.setIdentifier( 5, "Chimp" );
+ m.setIdentifier( 6, "Human" );
+ m.setRow( "0.00000 1.68660 1.71980 1.66060 1.52430 1.60430 1.59050", 0 );
+ m.setRow( "1.68660 0.00000 1.52320 1.48410 1.44650 1.43890 1.46290", 1 );
+ m.setRow( "1.71980 1.52320 0.00000 0.71150 0.59580 0.61790 0.55830", 2 );
+ m.setRow( "1.66060 1.48410 0.71150 0.00000 0.46310 0.50610 0.47100", 3 );
+ m.setRow( "1.52430 1.44650 0.59580 0.46310 0.00000 0.34840 0.30830", 4 );
+ m.setRow( "1.60430 1.43890 0.61790 0.50610 0.34840 0.00000 0.26920", 5 );
+ m.setRow( "1.59050 1.46290 0.55830 0.47100 0.30830 0.26920 0.00000", 6 );
+ nj = NeighborJoining.createInstance( false, 6 );
+ final Phylogeny p2 = nj.execute( m );
+ p2.reRoot( p2.getNode( "Bovine" ) );
+ if ( isUnequal( p2.getNode( "Chimp" ).getDistanceToParent(), 0.151675 ) ) {
+ return false;
+ }
+ if ( isUnequal( p2.getNode( "Human" ).getDistanceToParent(), 0.117525 ) ) {
+ return false;
+ }
+ if ( isUnequal( p2.getNode( "Gorilla" ).getDistanceToParent(), 0.153932 ) ) {
+ return false;
+ }
+ if ( isUnequal( p2.getNode( "Orang" ).getDistanceToParent(), 0.284694 ) ) {
+ return false;
+ }
+ if ( isUnequal( p2.getNode( "Gibbon" ).getDistanceToParent(), 0.357931 ) ) {
+ return false;
+ }
+ if ( isUnequal( p2.getNode( "Mouse" ).getDistanceToParent(), 0.76891 ) ) {
+ return false;
+ }
+ if ( isUnequal( p2.getNode( "Bovine" ).getDistanceToParent(), 0.458845 ) ) {
+ return false;
+ }
+ if ( isUnequal( p2.getNode( "Chimp" ).getParent().getDistanceToParent(), 0.039819 ) ) {
+ return false;
+ }
+ if ( isUnequal( p2.getNode( "Human" ).getParent().getDistanceToParent(), 0.039819 ) ) {
+ return false;
+ }
+ if ( isUnequal( p2.getNode( "Chimp" ).getParent().getParent().getDistanceToParent(), 0.026956 ) ) {
+ return false;
+ }
+ if ( isUnequal( p2.getNode( "Chimp" ).getParent().getParent().getParent().getDistanceToParent(), 0.046481 ) ) {
+ return false;
+ }
+ if ( isUnequal( p2.getNode( "Chimp" ).getParent().getParent().getParent().getParent().getDistanceToParent(),
+ 0.420269 ) ) {
+ return false;
+ }
+ if ( isUnequal( p2.getNode( "Chimp" ).getParent().getParent().getParent().getParent().getParent()
+ .getDistanceToParent(), 0.458845 ) ) {
+ return false;
+ }
+ m = new BasicSymmetricalDistanceMatrix( 4 );
+ m.setIdentifier( 0, "A" );
+ m.setIdentifier( 1, "B" );
+ m.setIdentifier( 2, "C" );
+ m.setIdentifier( 3, "D" );
+ m.setRow( "0.00 0.95 0.17 0.98", 0 );
+ m.setRow( "0.95 0.00 1.02 1.83", 1 );
+ m.setRow( "0.17 1.02 0.00 1.01", 2 );
+ m.setRow( "0.98 1.83 1.01 0.00", 3 );
+ final Phylogeny p3 = nj.execute( m );
+ p3.reRoot( p3.getNode( "C" ) );
+ if ( isUnequal( p3.getNode( "A" ).getDistanceToParent(), 0.05 ) ) {
+ return false;
+ }
+ if ( isUnequal( p3.getNode( "B" ).getDistanceToParent(), 0.90 ) ) {
+ return false;
+ }
+ if ( !isEqual( p3.getNode( "C" ).getDistanceToParent(), 0.05 ) ) {
+ return false;
+ }
+ if ( !isEqual( p3.getNode( "D" ).getDistanceToParent(), 0.91 ) ) {
+ return false;
+ }
+ if ( isUnequal( p3.getNode( "A" ).getParent().getDistanceToParent(), 0.02 ) ) {
+ return false;
+ }
+ if ( isUnequal( p3.getNode( "A" ).getParent().getParent().getDistanceToParent(), 0.05 ) ) {
+ return false;
+ }
+ //if ( TIME ) {
+ // timeNeighborJoining();
+ //}
}
catch ( final Exception e ) {
e.printStackTrace( System.out );
private static void timeNeighborJoining() {
final NeighborJoining nj = NeighborJoining.createInstance();
- for( int n = 3; n <= 6; ++n ) {
+ for( int n = 3; n <= 10; ++n ) {
final int x = ( int ) Math.pow( 2, n );
final BasicSymmetricalDistanceMatrix mt = new BasicSymmetricalDistanceMatrix( x );
mt.randomize( new Date().getTime() );
// System.out.println( mt.toStringBuffer( Format.PHYLIP ) );
final long start_time = new Date().getTime();
nj.execute( mt );
- System.out.println( "Size: " + x + " -> " + ( new Date().getTime() - start_time ) + "ms." );
+ System.out.println( "Size: " + x + " -> " + ( new Date().getTime() - start_time ) + "ms" );
}
}
}
package org.forester.evoinference.distance;
+import java.math.RoundingMode;
+import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.List;
public final class NeighborJoining {
private BasicSymmetricalDistanceMatrix _d;
- // private BasicSymmetricalDistanceMatrix _m;
private double[][] _d_values;
private double[][] _m_values;
private double[] _r;
private PhylogenyNode[] _external_nodes;
private int[] _mappings;
private final boolean _verbose;
- private final static boolean DEBUG = true;
+ private final DecimalFormat _df;
- private NeighborJoining( final boolean verbose ) {
+ private NeighborJoining() {
+ _verbose = false;
+ _df = null;
+ }
+
+ private NeighborJoining( final boolean verbose, final int maximum_fraction_digits_for_distances ) {
+ if ( ( maximum_fraction_digits_for_distances < 1 ) || ( maximum_fraction_digits_for_distances > 9 ) ) {
+ throw new IllegalArgumentException( "maximum fraction digits for distances is out of range: "
+ + maximum_fraction_digits_for_distances );
+ }
_verbose = verbose;
+ _df = new DecimalFormat();
+ _df.setMaximumFractionDigits( maximum_fraction_digits_for_distances );
+ _df.setRoundingMode( RoundingMode.HALF_UP );
}
private final void printM() {
- for( int i = 0; i < _m_values.length; i++ ) {
+ System.out.println( "M:" );
+ for( final double[] _m_value : _m_values ) {
for( int j = 0; j < _m_values.length; j++ ) {
- System.out.print( _m_values[ i ][ j ] );
+ System.out.print( _m_value[ j ] );
System.out.print( " " );
}
System.out.println();
}
private final void printD() {
- for( int i = 0; i < _d_values.length; i++ ) {
+ System.out.println( "D:" );
+ for( final double[] _d_value : _d_values ) {
for( int j = 0; j < _d_values.length; j++ ) {
- System.out.print( _d_values[ i ][ j ] );
+ System.out.print( _d_value[ j ] );
System.out.print( " " );
}
System.out.println();
}
private final void calculateDistancesFromNewNode( final int otu1, final int otu2, final double d ) {
- final int otu1_m = _mappings[ otu1 ];
- final int otu2_m = _mappings[ otu2 ];
- int i_m;
+ // final int otu1_m = _mappings[ otu1 ];
+ // final int otu2_m = _mappings[ otu2 ];
+ // int i_m;
for( int i = 0; i < _n; ++i ) {
if ( ( i == otu1 ) || ( i == otu2 ) ) {
continue;
}
- _d_values[ _mappings[ otu1 ] ][ _mappings[ i ] ] = ( getValueFromD( otu1, i ) + getValueFromD( i, otu2 ) - d ) / 2;
+ if ( otu1 < i ) {
+ _d_values[ _mappings[ otu1 ] ][ _mappings[ i ] ] = ( getValueFromD( otu1, i ) + getValueFromD( i, otu2 ) - d ) / 2;
+ }
+ else {
+ _d_values[ _mappings[ i ] ][ _mappings[ otu1 ] ] = ( getValueFromD( otu1, i ) + getValueFromD( i, otu2 ) - d ) / 2;
+ }
//i_m = _mappings[ i ];
//_d_values[ otu1_m ][ i_m ] = ( ( _d_values[ otu1_m ][ i_m ] + _d_values[ i_m ][ otu2_m ] ) - 2 ) / 2;
}
for( int n = 0; n < _n; ++n ) {
//d += _d_values[ i_m ][ _mappings[ n ] ];
if ( i != n ) {
- if ( i < n ) {
- d += getValueFromD( i, n );
- System.out.print( "+" );
- System.out.print( getValueFromD( i, n ) );
- }
- else {
- d += getValueFromD( n, i );
- System.out.print( "+" );
- System.out.print( getValueFromD( n, i ) );
- }
- }
- else {
- if ( DEBUG ) {
- if ( getValueFromD( i, n ) != 0 ) {
- throw new RuntimeException( "faulty NJ code" );
- }
- }
+ d += getValueFromD( i, n );
}
}
_r[ i ] = d;
- System.out.print( "=" );
- System.out.println( d );
}
}
// Calculates the minimal distance.
// If more than one minimal distances, always the first found is used
// could randomize this, so that any would be returned in a randomized fashion...
- double minimum = Double.MAX_VALUE;
- int otu1 = -1;
- int otu2 = -1;
+ double minimum = _m_values[ 0 ][ 1 ];
+ int otu1 = 0;
+ int otu2 = 1;
for( int j = 1; j < _n; ++j ) {
for( int i = 0; i < j; ++i ) {
if ( _m_values[ i ][ j ] < minimum ) {
}
}
// It is a condition that otu1 < otu2.
- if ( DEBUG ) {
- if ( otu1 > otu2 ) {
- throw new RuntimeException( "faulty NJ code: otu1 > otu2" );
- }
- }
final PhylogenyNode node = new PhylogenyNode();
final double d = getValueFromD( otu1, otu2 );
final double d1 = ( d / 2 ) + ( ( _r[ otu1 ] - _r[ otu2 ] ) / ( 2 * ( _n - 2 ) ) );
final double d2 = d - d1;
- getExternalPhylogenyNode( otu1 ).setDistanceToParent( d1 );
- getExternalPhylogenyNode( otu2 ).setDistanceToParent( d2 );
+ if ( _df == null ) {
+ getExternalPhylogenyNode( otu1 ).setDistanceToParent( d1 );
+ getExternalPhylogenyNode( otu2 ).setDistanceToParent( d2 );
+ }
+ else {
+ // yes, yes, slow but only grows with n (and not n^2 or worse)...
+ getExternalPhylogenyNode( otu1 ).setDistanceToParent( Double.parseDouble( _df.format( d1 ) ) );
+ getExternalPhylogenyNode( otu2 ).setDistanceToParent( Double.parseDouble( _df.format( d2 ) ) );
+ }
node.addAsChild( getExternalPhylogenyNode( otu1 ) );
node.addAsChild( getExternalPhylogenyNode( otu2 ) );
if ( _verbose ) {
--_n;
}
final double d = getValueFromD( 0, 1 ) / 2;
- getExternalPhylogenyNode( 0 ).setDistanceToParent( d );
- getExternalPhylogenyNode( 1 ).setDistanceToParent( d );
+ if ( _df == null ) {
+ getExternalPhylogenyNode( 0 ).setDistanceToParent( d );
+ getExternalPhylogenyNode( 1 ).setDistanceToParent( d );
+ }
+ else {
+ final double dd = Double.parseDouble( _df.format( d ) );
+ getExternalPhylogenyNode( 0 ).setDistanceToParent( dd );
+ getExternalPhylogenyNode( 1 ).setDistanceToParent( dd );
+ }
final PhylogenyNode root = new PhylogenyNode();
root.addAsChild( getExternalPhylogenyNode( 0 ) );
root.addAsChild( getExternalPhylogenyNode( 1 ) );
private final double getValueFromD( final int otu1, final int otu2 ) {
if ( otu1 > otu2 ) {
- //throw new IllegalStateException();
return _d_values[ _mappings[ otu2 ] ][ _mappings[ otu1 ] ];
}
return _d_values[ _mappings[ otu1 ] ][ _mappings[ otu2 ] ];
private final void updateM() {
calculateNetDivergences();
- double r_j;
- int j_m;
+ final double r_j;
+ final int j_m;
final int _n_2 = _n - 2;
for( int j = 1; j < _n; ++j ) {
//r_j = _r[ j ];
//_m_values[ i ][ j ] = _d_values[ _mappings[ i ] ][ j_m ] - ( ( _r[ i ] + r_j ) / ( _n_2 ) );
}
}
- printM();
- printD();
}
- // private final double getValueFromD( final int otu1, final int otu2 ) {
- // return _d_values[ _mappings[ otu1 ] ][ _mappings[ otu2 ] ];
- // }
// otu2 will, in effect, be "deleted" from the matrix.
private final void updateMappings( final int otu2 ) {
for( int i = otu2; i < ( _mappings.length - 1 ); ++i ) {
}
public final static NeighborJoining createInstance() {
- return new NeighborJoining( false );
+ return new NeighborJoining();
}
- public final static NeighborJoining createInstance( final boolean verbose ) {
- return new NeighborJoining( verbose );
+ public final static NeighborJoining createInstance( final boolean verbose,
+ final int maximum_fraction_digits_for_distances ) {
+ return new NeighborJoining( verbose, maximum_fraction_digits_for_distances );
}
}