From 4d7228400f44ee88136f3d70d9455b1b1d95de27 Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Wed, 5 Mar 2014 02:27:44 +0000 Subject: [PATCH] inprogress --- .../org/forester/application/msa_compactor.java | 2 +- forester/java/src/org/forester/application/nj.java | 2 +- .../src/org/forester/archaeopteryx/Constants.java | 2 +- .../evoinference/TestPhylogenyReconstruction.java | 280 ++++++++++++++------ .../evoinference/distance/NeighborJoining.java | 112 ++++---- .../org/forester/msa_compactor/MsaCompactor.java | 10 +- 6 files changed, 268 insertions(+), 140 deletions(-) diff --git a/forester/java/src/org/forester/application/msa_compactor.java b/forester/java/src/org/forester/application/msa_compactor.java index c385ca9..3291d65 100644 --- a/forester/java/src/org/forester/application/msa_compactor.java +++ b/forester/java/src/org/forester/application/msa_compactor.java @@ -10,8 +10,8 @@ import org.forester.io.parsers.FastaParser; import org.forester.io.parsers.GeneralMsaParser; import org.forester.msa.Msa; import org.forester.msa.Msa.MSA_FORMAT; -import org.forester.msa_compactor.MsaCompactor; import org.forester.msa.MsaMethods; +import org.forester.msa_compactor.MsaCompactor; import org.forester.util.CommandLineArguments; import org.forester.util.ForesterUtil; diff --git a/forester/java/src/org/forester/application/nj.java b/forester/java/src/org/forester/application/nj.java index 0f1ef3e..ad38946 100644 --- a/forester/java/src/org/forester/application/nj.java +++ b/forester/java/src/org/forester/application/nj.java @@ -119,7 +119,7 @@ public class nj { + matrices[ 0 ].getSize() ); } final List ps = new ArrayList(); - final NeighborJoining nj = NeighborJoining.createInstance( verbose ); + final NeighborJoining nj = NeighborJoining.createInstance( verbose, 6 ); final long start_time = new Date().getTime(); for( final DistanceMatrix matrix : matrices ) { ps.add( nj.execute( ( BasicSymmetricalDistanceMatrix ) matrix ) ); diff --git a/forester/java/src/org/forester/archaeopteryx/Constants.java b/forester/java/src/org/forester/archaeopteryx/Constants.java index 1e22f74..6a441b7 100644 --- a/forester/java/src/org/forester/archaeopteryx/Constants.java +++ b/forester/java/src/org/forester/archaeopteryx/Constants.java @@ -82,7 +82,7 @@ public final class Constants { final static String APTX_REFERENCE = "Zmasek..."; //TODO final static String PHYLOXML_REFERENCE = ForesterConstants.PHYLO_XML_REFERENCE; final static String PHYLOXML_REFERENCE_SHORT = "Han MV and Zmasek CM (2009), BMC Bioinformatics, 10:356"; - final static short NUMBER_OF_DIGITS_AFTER_COMMA_FOR_BRANCH_LENGTH_VALUES_DEFAULT = 2; + final static short NUMBER_OF_DIGITS_AFTER_COMMA_FOR_BRANCH_LENGTH_VALUES_DEFAULT = 7; final static short NUMBER_OF_DIGITS_AFTER_COMMA_FOR_CONFIDENCE_VALUES_DEFAULT = 1; public static final boolean NH_PARSING_IGNORE_QUOTES_DEFAULT = false; static final CLADOGRAM_TYPE CLADOGRAM_TYPE_DEFAULT = CLADOGRAM_TYPE.EXT_NODE_SUM_DEP; diff --git a/forester/java/src/org/forester/evoinference/TestPhylogenyReconstruction.java b/forester/java/src/org/forester/evoinference/TestPhylogenyReconstruction.java index 1963491..c2c2d89 100644 --- a/forester/java/src/org/forester/evoinference/TestPhylogenyReconstruction.java +++ b/forester/java/src/org/forester/evoinference/TestPhylogenyReconstruction.java @@ -33,7 +33,6 @@ import java.io.StringWriter; import java.util.Date; import java.util.List; -import org.forester.archaeopteryx.Archaeopteryx; import org.forester.evoinference.distance.NeighborJoining; import org.forester.evoinference.distance.PairwiseDistanceCalculator; import org.forester.evoinference.matrix.character.BasicCharacterStateMatrix; @@ -55,16 +54,24 @@ import org.forester.util.ForesterUtil; public class TestPhylogenyReconstruction { - private final static double ZERO_DIFF = 1.0E-9; - private final static boolean TIME = false; + private final static double ZERO_DIFF = 1.0E-9; public static boolean isEqual( final double a, final double b ) { return ( ( Math.abs( a - b ) ) < ZERO_DIFF ); } + public static boolean isUnequal( final double a, final double b ) { + return !isEqual( a, b ); + } + public static void main( final String[] args ) { - testNeighborJoining(); - // timeNeighborJoining(); + if ( testNeighborJoining() ) { + System.out.println( "OK." ); + } + else { + System.out.println( "failed." ); + } + //timeNeighborJoining(); } public static boolean test( final File test_dir ) { @@ -1914,19 +1921,64 @@ public class TestPhylogenyReconstruction { private static boolean testNeighborJoining() { try { - final NeighborJoining nj = NeighborJoining.createInstance(); - // BasicSymmetricalDistanceMatrix m0 = new BasicSymmetricalDistanceMatrix( 4 ); - // m0.setIdentifier( 0, "A" ); - // m0.setIdentifier( 1, "B" ); - // m0.setIdentifier( 2, "C" ); - // m0.setIdentifier( 3, "D" ); - // m0.setRow( "5 ", 1 ); - // m0.setRow( "3 6 ", 2 ); - // m0.setRow( "7.5 10.5 5.5", 3 ); - // System.out.println( m0.toString() ); - // final Phylogeny p0 = nj.execute( m0 ); - // Archaeopteryx.createApplication( p0 ); - // + NeighborJoining nj = NeighborJoining.createInstance(); + final BasicSymmetricalDistanceMatrix m0 = new BasicSymmetricalDistanceMatrix( 4 ); + m0.setIdentifier( 0, "A" ); + m0.setIdentifier( 1, "B" ); + m0.setIdentifier( 2, "C" ); + m0.setIdentifier( 3, "D" ); + m0.setRow( "5 ", 1 ); + m0.setRow( "3 6 ", 2 ); + m0.setRow( "7.5 10.5 5.5", 3 ); + final Phylogeny p0 = nj.execute( m0 ); + p0.reRoot( p0.getNode( "D" ) ); + if ( isUnequal( p0.getNode( "A" ).getDistanceToParent(), 1 ) ) { + return false; + } + if ( isUnequal( p0.getNode( "B" ).getDistanceToParent(), 4 ) ) { + return false; + } + if ( isUnequal( p0.getNode( "C" ).getDistanceToParent(), 0.5 ) ) { + return false; + } + if ( isUnequal( p0.getNode( "D" ).getDistanceToParent(), 2.5 ) ) { + return false; + } + if ( isUnequal( p0.getNode( "A" ).getParent().getDistanceToParent(), 1.5 ) ) { + return false; + } + if ( isUnequal( p0.getNode( "A" ).getParent().getParent().getDistanceToParent(), 2.5 ) ) { + return false; + } + nj = NeighborJoining.createInstance(); + final BasicSymmetricalDistanceMatrix m00 = new BasicSymmetricalDistanceMatrix( 4 ); + m00.setIdentifier( 0, "A" ); + m00.setIdentifier( 1, "B" ); + m00.setIdentifier( 2, "C" ); + m00.setIdentifier( 3, "D" ); + m00.setRow( "2.01 ", 1 ); + m00.setRow( "3 3.01 ", 2 ); + m00.setRow( "3.01 3.02 1.01", 3 ); + final Phylogeny p00 = nj.execute( m00 ); + p00.reRoot( p00.getNode( "D" ) ); + if ( isUnequal( p00.getNode( "A" ).getDistanceToParent(), 1 ) ) { + return false; + } + if ( isUnequal( p00.getNode( "B" ).getDistanceToParent(), 1.01 ) ) { + return false; + } + if ( isUnequal( p00.getNode( "C" ).getDistanceToParent(), 0.5 ) ) { + return false; + } + if ( isUnequal( p00.getNode( "D" ).getDistanceToParent(), 0.255 ) ) { + return false; + } + if ( isUnequal( p00.getNode( "A" ).getParent().getDistanceToParent(), 1.5 ) ) { + return false; + } + if ( isUnequal( p00.getNode( "A" ).getParent().getParent().getDistanceToParent(), 0.255 ) ) { + return false; + } BasicSymmetricalDistanceMatrix m = new BasicSymmetricalDistanceMatrix( 6 ); m.setRow( "5", 1 ); m.setRow( "4 7", 2 ); @@ -1939,66 +1991,136 @@ public class TestPhylogenyReconstruction { m.setIdentifier( 3, "D" ); m.setIdentifier( 4, "E" ); m.setIdentifier( 5, "F" ); - System.out.println( m.toString() ); + nj = NeighborJoining.createInstance(); final Phylogeny p1 = nj.execute( m ); - Archaeopteryx.createApplication( p1 ); - // m = new BasicSymmetricalDistanceMatrix( 7 ); - // m.setIdentifier( 0, "Bovine" ); - // m.setIdentifier( 1, "Mouse" ); - // m.setIdentifier( 2, "Gibbon" ); - // m.setIdentifier( 3, "Orang" ); - // m.setIdentifier( 4, "Gorilla" ); - // m.setIdentifier( 5, "Chimp" ); - // m.setIdentifier( 6, "Human" ); - // m.setRow( "0.00000 1.68660 1.71980 1.66060 1.52430 1.60430 1.59050", 0 ); - // m.setRow( "1.68660 0.00000 1.52320 1.48410 1.44650 1.43890 1.46290", 1 ); - // m.setRow( "1.71980 1.52320 0.00000 0.71150 0.59580 0.61790 0.55830", 2 ); - // m.setRow( "1.66060 1.48410 0.71150 0.00000 0.46310 0.50610 0.47100", 3 ); - // m.setRow( "1.52430 1.44650 0.59580 0.46310 0.00000 0.34840 0.30830", 4 ); - // m.setRow( "1.60430 1.43890 0.61790 0.50610 0.34840 0.00000 0.26920", 5 ); - // m.setRow( "1.59050 1.46290 0.55830 0.47100 0.30830 0.26920 0.00000", 6 ); - // System.out.println( m.toString() ); - // final Phylogeny p2 = nj.execute( m ); - // p2.reRoot( p2.getNode( "Bovine" ) ); - // System.out.println( p2.toString() ); - // Archaeopteryx.createApplication( p2 ); - // // from phylip Neighbor-Joining/UPGMA method version 3.69: - // // ((((((Chimp:0.15167,Human:0.11753):0.03982,Gorilla:0.15393):0.02696,Orang:0.28469):0.04648,Gibbon:0.35793):0.42027,Mouse:0.76891):0.458845,Bovine:0.458845); - // Archaeopteryx.createApplication( p2 ); - // m = new BasicSymmetricalDistanceMatrix( 4 ); - // m.setIdentifier( 0, "A" ); - // m.setIdentifier( 1, "B" ); - // m.setIdentifier( 2, "C" ); - // m.setIdentifier( 3, "D" ); - // m.setRow( "0.00 0.95 0.17 0.98", 0 ); - // m.setRow( "0.95 0.00 1.02 1.83", 1 ); - // m.setRow( "0.17 1.02 0.00 1.01", 2 ); - // m.setRow( "0.98 1.83 1.01 0.00", 3 ); - // final Phylogeny p3 = nj.execute( m ); - // // - // // -- A 0.05 - // // - |0.01 - // // ----------------------- B 0.90 - // // - // // --- C 0.10 - // // - |0.01 - // // ------------------------- D 0.91 - // p3.reRoot( p3.getNode( "C" ).getParent() ); - // if ( !isEqual( p3.getNode( "A" ).getDistanceToParent(), 0.05 ) ) { - // return false; - // } - // if ( !isEqual( p3.getNode( "B" ).getDistanceToParent(), 0.90 ) ) { - // return false; - // } - // if ( !isEqual( p3.getNode( "C" ).getDistanceToParent(), 0.10 ) ) { - // return false; - // } - // if ( !isEqual( p3.getNode( "D" ).getDistanceToParent(), 0.91 ) ) { - // return false; - // } - // if ( TIME ) { - // timeNeighborJoining(); - // } + p1.reRoot( p1.getNode( "F" ) ); + if ( isUnequal( p1.getNode( "A" ).getDistanceToParent(), 1 ) ) { + return false; + } + if ( isUnequal( p1.getNode( "B" ).getDistanceToParent(), 4 ) ) { + return false; + } + if ( isUnequal( p1.getNode( "C" ).getDistanceToParent(), 2 ) ) { + return false; + } + if ( isUnequal( p1.getNode( "D" ).getDistanceToParent(), 3 ) ) { + return false; + } + if ( isUnequal( p1.getNode( "E" ).getDistanceToParent(), 2 ) ) { + return false; + } + if ( isUnequal( p1.getNode( "F" ).getDistanceToParent(), 2.5 ) ) { + return false; + } + if ( isUnequal( p1.getNode( "A" ).getParent().getDistanceToParent(), 1 ) ) { + return false; + } + if ( isUnequal( p1.getNode( "A" ).getParent().getParent().getDistanceToParent(), 1 ) ) { + return false; + } + if ( isUnequal( p1.getNode( "A" ).getParent().getParent().getParent().getDistanceToParent(), 2.5 ) ) { + return false; + } + if ( isUnequal( p1.getNode( "B" ).getParent().getDistanceToParent(), 1 ) ) { + return false; + } + if ( isUnequal( p1.getNode( "D" ).getParent().getDistanceToParent(), 1 ) ) { + return false; + } + if ( isUnequal( p1.getNode( "E" ).getParent().getDistanceToParent(), 1 ) ) { + return false; + } + m = new BasicSymmetricalDistanceMatrix( 7 ); + m.setIdentifier( 0, "Bovine" ); + m.setIdentifier( 1, "Mouse" ); + m.setIdentifier( 2, "Gibbon" ); + m.setIdentifier( 3, "Orang" ); + m.setIdentifier( 4, "Gorilla" ); + m.setIdentifier( 5, "Chimp" ); + m.setIdentifier( 6, "Human" ); + m.setRow( "0.00000 1.68660 1.71980 1.66060 1.52430 1.60430 1.59050", 0 ); + m.setRow( "1.68660 0.00000 1.52320 1.48410 1.44650 1.43890 1.46290", 1 ); + m.setRow( "1.71980 1.52320 0.00000 0.71150 0.59580 0.61790 0.55830", 2 ); + m.setRow( "1.66060 1.48410 0.71150 0.00000 0.46310 0.50610 0.47100", 3 ); + m.setRow( "1.52430 1.44650 0.59580 0.46310 0.00000 0.34840 0.30830", 4 ); + m.setRow( "1.60430 1.43890 0.61790 0.50610 0.34840 0.00000 0.26920", 5 ); + m.setRow( "1.59050 1.46290 0.55830 0.47100 0.30830 0.26920 0.00000", 6 ); + nj = NeighborJoining.createInstance( false, 6 ); + final Phylogeny p2 = nj.execute( m ); + p2.reRoot( p2.getNode( "Bovine" ) ); + if ( isUnequal( p2.getNode( "Chimp" ).getDistanceToParent(), 0.151675 ) ) { + return false; + } + if ( isUnequal( p2.getNode( "Human" ).getDistanceToParent(), 0.117525 ) ) { + return false; + } + if ( isUnequal( p2.getNode( "Gorilla" ).getDistanceToParent(), 0.153932 ) ) { + return false; + } + if ( isUnequal( p2.getNode( "Orang" ).getDistanceToParent(), 0.284694 ) ) { + return false; + } + if ( isUnequal( p2.getNode( "Gibbon" ).getDistanceToParent(), 0.357931 ) ) { + return false; + } + if ( isUnequal( p2.getNode( "Mouse" ).getDistanceToParent(), 0.76891 ) ) { + return false; + } + if ( isUnequal( p2.getNode( "Bovine" ).getDistanceToParent(), 0.458845 ) ) { + return false; + } + if ( isUnequal( p2.getNode( "Chimp" ).getParent().getDistanceToParent(), 0.039819 ) ) { + return false; + } + if ( isUnequal( p2.getNode( "Human" ).getParent().getDistanceToParent(), 0.039819 ) ) { + return false; + } + if ( isUnequal( p2.getNode( "Chimp" ).getParent().getParent().getDistanceToParent(), 0.026956 ) ) { + return false; + } + if ( isUnequal( p2.getNode( "Chimp" ).getParent().getParent().getParent().getDistanceToParent(), 0.046481 ) ) { + return false; + } + if ( isUnequal( p2.getNode( "Chimp" ).getParent().getParent().getParent().getParent().getDistanceToParent(), + 0.420269 ) ) { + return false; + } + if ( isUnequal( p2.getNode( "Chimp" ).getParent().getParent().getParent().getParent().getParent() + .getDistanceToParent(), 0.458845 ) ) { + return false; + } + m = new BasicSymmetricalDistanceMatrix( 4 ); + m.setIdentifier( 0, "A" ); + m.setIdentifier( 1, "B" ); + m.setIdentifier( 2, "C" ); + m.setIdentifier( 3, "D" ); + m.setRow( "0.00 0.95 0.17 0.98", 0 ); + m.setRow( "0.95 0.00 1.02 1.83", 1 ); + m.setRow( "0.17 1.02 0.00 1.01", 2 ); + m.setRow( "0.98 1.83 1.01 0.00", 3 ); + final Phylogeny p3 = nj.execute( m ); + p3.reRoot( p3.getNode( "C" ) ); + if ( isUnequal( p3.getNode( "A" ).getDistanceToParent(), 0.05 ) ) { + return false; + } + if ( isUnequal( p3.getNode( "B" ).getDistanceToParent(), 0.90 ) ) { + return false; + } + if ( !isEqual( p3.getNode( "C" ).getDistanceToParent(), 0.05 ) ) { + return false; + } + if ( !isEqual( p3.getNode( "D" ).getDistanceToParent(), 0.91 ) ) { + return false; + } + if ( isUnequal( p3.getNode( "A" ).getParent().getDistanceToParent(), 0.02 ) ) { + return false; + } + if ( isUnequal( p3.getNode( "A" ).getParent().getParent().getDistanceToParent(), 0.05 ) ) { + return false; + } + //if ( TIME ) { + // timeNeighborJoining(); + //} } catch ( final Exception e ) { e.printStackTrace( System.out ); @@ -2369,7 +2491,7 @@ public class TestPhylogenyReconstruction { private static void timeNeighborJoining() { final NeighborJoining nj = NeighborJoining.createInstance(); - for( int n = 3; n <= 6; ++n ) { + for( int n = 3; n <= 10; ++n ) { final int x = ( int ) Math.pow( 2, n ); final BasicSymmetricalDistanceMatrix mt = new BasicSymmetricalDistanceMatrix( x ); mt.randomize( new Date().getTime() ); @@ -2379,7 +2501,7 @@ public class TestPhylogenyReconstruction { // System.out.println( mt.toStringBuffer( Format.PHYLIP ) ); final long start_time = new Date().getTime(); nj.execute( mt ); - System.out.println( "Size: " + x + " -> " + ( new Date().getTime() - start_time ) + "ms." ); + System.out.println( "Size: " + x + " -> " + ( new Date().getTime() - start_time ) + "ms" ); } } } diff --git a/forester/java/src/org/forester/evoinference/distance/NeighborJoining.java b/forester/java/src/org/forester/evoinference/distance/NeighborJoining.java index 2dc0ffc..b92d367 100644 --- a/forester/java/src/org/forester/evoinference/distance/NeighborJoining.java +++ b/forester/java/src/org/forester/evoinference/distance/NeighborJoining.java @@ -25,6 +25,8 @@ package org.forester.evoinference.distance; +import java.math.RoundingMode; +import java.text.DecimalFormat; import java.util.ArrayList; import java.util.List; @@ -36,7 +38,6 @@ import org.forester.util.ForesterUtil; public final class NeighborJoining { private BasicSymmetricalDistanceMatrix _d; - // private BasicSymmetricalDistanceMatrix _m; private double[][] _d_values; private double[][] _m_values; private double[] _r; @@ -44,16 +45,29 @@ public final class NeighborJoining { private PhylogenyNode[] _external_nodes; private int[] _mappings; private final boolean _verbose; - private final static boolean DEBUG = true; + private final DecimalFormat _df; - private NeighborJoining( final boolean verbose ) { + private NeighborJoining() { + _verbose = false; + _df = null; + } + + private NeighborJoining( final boolean verbose, final int maximum_fraction_digits_for_distances ) { + if ( ( maximum_fraction_digits_for_distances < 1 ) || ( maximum_fraction_digits_for_distances > 9 ) ) { + throw new IllegalArgumentException( "maximum fraction digits for distances is out of range: " + + maximum_fraction_digits_for_distances ); + } _verbose = verbose; + _df = new DecimalFormat(); + _df.setMaximumFractionDigits( maximum_fraction_digits_for_distances ); + _df.setRoundingMode( RoundingMode.HALF_UP ); } private final void printM() { - for( int i = 0; i < _m_values.length; i++ ) { + System.out.println( "M:" ); + for( final double[] _m_value : _m_values ) { for( int j = 0; j < _m_values.length; j++ ) { - System.out.print( _m_values[ i ][ j ] ); + System.out.print( _m_value[ j ] ); System.out.print( " " ); } System.out.println(); @@ -62,9 +76,10 @@ public final class NeighborJoining { } private final void printD() { - for( int i = 0; i < _d_values.length; i++ ) { + System.out.println( "D:" ); + for( final double[] _d_value : _d_values ) { for( int j = 0; j < _d_values.length; j++ ) { - System.out.print( _d_values[ i ][ j ] ); + System.out.print( _d_value[ j ] ); System.out.print( " " ); } System.out.println(); @@ -73,14 +88,19 @@ public final class NeighborJoining { } private final void calculateDistancesFromNewNode( final int otu1, final int otu2, final double d ) { - final int otu1_m = _mappings[ otu1 ]; - final int otu2_m = _mappings[ otu2 ]; - int i_m; + // final int otu1_m = _mappings[ otu1 ]; + // final int otu2_m = _mappings[ otu2 ]; + // int i_m; for( int i = 0; i < _n; ++i ) { if ( ( i == otu1 ) || ( i == otu2 ) ) { continue; } - _d_values[ _mappings[ otu1 ] ][ _mappings[ i ] ] = ( getValueFromD( otu1, i ) + getValueFromD( i, otu2 ) - d ) / 2; + if ( otu1 < i ) { + _d_values[ _mappings[ otu1 ] ][ _mappings[ i ] ] = ( getValueFromD( otu1, i ) + getValueFromD( i, otu2 ) - d ) / 2; + } + else { + _d_values[ _mappings[ i ] ][ _mappings[ otu1 ] ] = ( getValueFromD( otu1, i ) + getValueFromD( i, otu2 ) - d ) / 2; + } //i_m = _mappings[ i ]; //_d_values[ otu1_m ][ i_m ] = ( ( _d_values[ otu1_m ][ i_m ] + _d_values[ i_m ][ otu2_m ] ) - 2 ) / 2; } @@ -95,28 +115,10 @@ public final class NeighborJoining { for( int n = 0; n < _n; ++n ) { //d += _d_values[ i_m ][ _mappings[ n ] ]; if ( i != n ) { - if ( i < n ) { - d += getValueFromD( i, n ); - System.out.print( "+" ); - System.out.print( getValueFromD( i, n ) ); - } - else { - d += getValueFromD( n, i ); - System.out.print( "+" ); - System.out.print( getValueFromD( n, i ) ); - } - } - else { - if ( DEBUG ) { - if ( getValueFromD( i, n ) != 0 ) { - throw new RuntimeException( "faulty NJ code" ); - } - } + d += getValueFromD( i, n ); } } _r[ i ] = d; - System.out.print( "=" ); - System.out.println( d ); } } @@ -128,9 +130,9 @@ public final class NeighborJoining { // Calculates the minimal distance. // If more than one minimal distances, always the first found is used // could randomize this, so that any would be returned in a randomized fashion... - double minimum = Double.MAX_VALUE; - int otu1 = -1; - int otu2 = -1; + double minimum = _m_values[ 0 ][ 1 ]; + int otu1 = 0; + int otu2 = 1; for( int j = 1; j < _n; ++j ) { for( int i = 0; i < j; ++i ) { if ( _m_values[ i ][ j ] < minimum ) { @@ -141,17 +143,19 @@ public final class NeighborJoining { } } // It is a condition that otu1 < otu2. - if ( DEBUG ) { - if ( otu1 > otu2 ) { - throw new RuntimeException( "faulty NJ code: otu1 > otu2" ); - } - } final PhylogenyNode node = new PhylogenyNode(); final double d = getValueFromD( otu1, otu2 ); final double d1 = ( d / 2 ) + ( ( _r[ otu1 ] - _r[ otu2 ] ) / ( 2 * ( _n - 2 ) ) ); final double d2 = d - d1; - getExternalPhylogenyNode( otu1 ).setDistanceToParent( d1 ); - getExternalPhylogenyNode( otu2 ).setDistanceToParent( d2 ); + if ( _df == null ) { + getExternalPhylogenyNode( otu1 ).setDistanceToParent( d1 ); + getExternalPhylogenyNode( otu2 ).setDistanceToParent( d2 ); + } + else { + // yes, yes, slow but only grows with n (and not n^2 or worse)... + getExternalPhylogenyNode( otu1 ).setDistanceToParent( Double.parseDouble( _df.format( d1 ) ) ); + getExternalPhylogenyNode( otu2 ).setDistanceToParent( Double.parseDouble( _df.format( d2 ) ) ); + } node.addAsChild( getExternalPhylogenyNode( otu1 ) ); node.addAsChild( getExternalPhylogenyNode( otu2 ) ); if ( _verbose ) { @@ -163,8 +167,15 @@ public final class NeighborJoining { --_n; } final double d = getValueFromD( 0, 1 ) / 2; - getExternalPhylogenyNode( 0 ).setDistanceToParent( d ); - getExternalPhylogenyNode( 1 ).setDistanceToParent( d ); + if ( _df == null ) { + getExternalPhylogenyNode( 0 ).setDistanceToParent( d ); + getExternalPhylogenyNode( 1 ).setDistanceToParent( d ); + } + else { + final double dd = Double.parseDouble( _df.format( d ) ); + getExternalPhylogenyNode( 0 ).setDistanceToParent( dd ); + getExternalPhylogenyNode( 1 ).setDistanceToParent( dd ); + } final PhylogenyNode root = new PhylogenyNode(); root.addAsChild( getExternalPhylogenyNode( 0 ) ); root.addAsChild( getExternalPhylogenyNode( 1 ) ); @@ -190,7 +201,6 @@ public final class NeighborJoining { private final double getValueFromD( final int otu1, final int otu2 ) { if ( otu1 > otu2 ) { - //throw new IllegalStateException(); return _d_values[ _mappings[ otu2 ] ][ _mappings[ otu1 ] ]; } return _d_values[ _mappings[ otu1 ] ][ _mappings[ otu2 ] ]; @@ -233,8 +243,8 @@ public final class NeighborJoining { private final void updateM() { calculateNetDivergences(); - double r_j; - int j_m; + final double r_j; + final int j_m; final int _n_2 = _n - 2; for( int j = 1; j < _n; ++j ) { //r_j = _r[ j ]; @@ -244,13 +254,8 @@ public final class NeighborJoining { //_m_values[ i ][ j ] = _d_values[ _mappings[ i ] ][ j_m ] - ( ( _r[ i ] + r_j ) / ( _n_2 ) ); } } - printM(); - printD(); } - // private final double getValueFromD( final int otu1, final int otu2 ) { - // return _d_values[ _mappings[ otu1 ] ][ _mappings[ otu2 ] ]; - // } // otu2 will, in effect, be "deleted" from the matrix. private final void updateMappings( final int otu2 ) { for( int i = otu2; i < ( _mappings.length - 1 ); ++i ) { @@ -259,10 +264,11 @@ public final class NeighborJoining { } public final static NeighborJoining createInstance() { - return new NeighborJoining( false ); + return new NeighborJoining(); } - public final static NeighborJoining createInstance( final boolean verbose ) { - return new NeighborJoining( verbose ); + public final static NeighborJoining createInstance( final boolean verbose, + final int maximum_fraction_digits_for_distances ) { + return new NeighborJoining( verbose, maximum_fraction_digits_for_distances ); } } diff --git a/forester/java/src/org/forester/msa_compactor/MsaCompactor.java b/forester/java/src/org/forester/msa_compactor/MsaCompactor.java index cc0875d..35d524c 100644 --- a/forester/java/src/org/forester/msa_compactor/MsaCompactor.java +++ b/forester/java/src/org/forester/msa_compactor/MsaCompactor.java @@ -213,7 +213,7 @@ public class MsaCompactor { } } - Phylogeny pi( String matrix ) { + Phylogeny pi( final String matrix ) { final Phylogeny master_phy = inferNJphylogeny( PWD_DISTANCE_METHOD.KIMURA_DISTANCE, _msa, true, matrix ); final int seed = 15; final int n = 100; @@ -231,10 +231,10 @@ public class MsaCompactor { return master_phy; } - private Phylogeny inferNJphylogeny( PWD_DISTANCE_METHOD pwd_distance_method, + private Phylogeny inferNJphylogeny( final PWD_DISTANCE_METHOD pwd_distance_method, final Msa msa, - boolean write_matrix, - String matrix_name ) { + final boolean write_matrix, + final String matrix_name ) { BasicSymmetricalDistanceMatrix m = null; switch ( pwd_distance_method ) { case KIMURA_DISTANCE: @@ -253,7 +253,7 @@ public class MsaCompactor { try { m.write( ForesterUtil.createBufferedWriter( matrix_name ) ); } - catch ( IOException e ) { + catch ( final IOException e ) { // TODO Auto-generated catch block e.printStackTrace(); } -- 1.7.10.2