inprogress
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Wed, 5 Mar 2014 02:27:44 +0000 (02:27 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Wed, 5 Mar 2014 02:27:44 +0000 (02:27 +0000)
forester/java/src/org/forester/application/msa_compactor.java
forester/java/src/org/forester/application/nj.java
forester/java/src/org/forester/archaeopteryx/Constants.java
forester/java/src/org/forester/evoinference/TestPhylogenyReconstruction.java
forester/java/src/org/forester/evoinference/distance/NeighborJoining.java
forester/java/src/org/forester/msa_compactor/MsaCompactor.java

index c385ca9..3291d65 100644 (file)
@@ -10,8 +10,8 @@ import org.forester.io.parsers.FastaParser;
 import org.forester.io.parsers.GeneralMsaParser;
 import org.forester.msa.Msa;
 import org.forester.msa.Msa.MSA_FORMAT;
-import org.forester.msa_compactor.MsaCompactor;
 import org.forester.msa.MsaMethods;
+import org.forester.msa_compactor.MsaCompactor;
 import org.forester.util.CommandLineArguments;
 import org.forester.util.ForesterUtil;
 
index 0f1ef3e..ad38946 100644 (file)
@@ -119,7 +119,7 @@ public class nj {
                     + matrices[ 0 ].getSize() );
         }
         final List<Phylogeny> ps = new ArrayList<Phylogeny>();
-        final NeighborJoining nj = NeighborJoining.createInstance( verbose );
+        final NeighborJoining nj = NeighborJoining.createInstance( verbose, 6 );
         final long start_time = new Date().getTime();
         for( final DistanceMatrix matrix : matrices ) {
             ps.add( nj.execute( ( BasicSymmetricalDistanceMatrix ) matrix ) );
index 1e22f74..6a441b7 100644 (file)
@@ -82,7 +82,7 @@ public final class Constants {
     final static String         APTX_REFERENCE                                                = "Zmasek...";                                                         //TODO
     final static String         PHYLOXML_REFERENCE                                            = ForesterConstants.PHYLO_XML_REFERENCE;
     final static String         PHYLOXML_REFERENCE_SHORT                                      = "Han MV and Zmasek CM (2009), BMC Bioinformatics, 10:356";
-    final static short          NUMBER_OF_DIGITS_AFTER_COMMA_FOR_BRANCH_LENGTH_VALUES_DEFAULT = 2;
+    final static short          NUMBER_OF_DIGITS_AFTER_COMMA_FOR_BRANCH_LENGTH_VALUES_DEFAULT = 7;
     final static short          NUMBER_OF_DIGITS_AFTER_COMMA_FOR_CONFIDENCE_VALUES_DEFAULT    = 1;
     public static final boolean NH_PARSING_IGNORE_QUOTES_DEFAULT                              = false;
     static final CLADOGRAM_TYPE CLADOGRAM_TYPE_DEFAULT                                        = CLADOGRAM_TYPE.EXT_NODE_SUM_DEP;
index 1963491..c2c2d89 100644 (file)
@@ -33,7 +33,6 @@ import java.io.StringWriter;
 import java.util.Date;
 import java.util.List;
 
-import org.forester.archaeopteryx.Archaeopteryx;
 import org.forester.evoinference.distance.NeighborJoining;
 import org.forester.evoinference.distance.PairwiseDistanceCalculator;
 import org.forester.evoinference.matrix.character.BasicCharacterStateMatrix;
@@ -55,16 +54,24 @@ import org.forester.util.ForesterUtil;
 
 public class TestPhylogenyReconstruction {
 
-    private final static double  ZERO_DIFF = 1.0E-9;
-    private final static boolean TIME      = false;
+    private final static double ZERO_DIFF = 1.0E-9;
 
     public static boolean isEqual( final double a, final double b ) {
         return ( ( Math.abs( a - b ) ) < ZERO_DIFF );
     }
 
+    public static boolean isUnequal( final double a, final double b ) {
+        return !isEqual( a, b );
+    }
+
     public static void main( final String[] args ) {
-        testNeighborJoining();
-        // timeNeighborJoining();
+        if ( testNeighborJoining() ) {
+            System.out.println( "OK." );
+        }
+        else {
+            System.out.println( "failed." );
+        }
+        //timeNeighborJoining();
     }
 
     public static boolean test( final File test_dir ) {
@@ -1914,19 +1921,64 @@ public class TestPhylogenyReconstruction {
 
     private static boolean testNeighborJoining() {
         try {
-            final NeighborJoining nj = NeighborJoining.createInstance();
-            //            BasicSymmetricalDistanceMatrix m0 = new BasicSymmetricalDistanceMatrix( 4 );
-            //            m0.setIdentifier( 0, "A" );
-            //            m0.setIdentifier( 1, "B" );
-            //            m0.setIdentifier( 2, "C" );
-            //            m0.setIdentifier( 3, "D" );
-            //            m0.setRow( "5 ", 1 );
-            //            m0.setRow( "3 6 ", 2 );
-            //            m0.setRow( "7.5 10.5 5.5", 3 );
-            //            System.out.println( m0.toString() );
-            //            final Phylogeny p0 = nj.execute( m0 );
-            //            Archaeopteryx.createApplication( p0 );
-            //
+            NeighborJoining nj = NeighborJoining.createInstance();
+            final BasicSymmetricalDistanceMatrix m0 = new BasicSymmetricalDistanceMatrix( 4 );
+            m0.setIdentifier( 0, "A" );
+            m0.setIdentifier( 1, "B" );
+            m0.setIdentifier( 2, "C" );
+            m0.setIdentifier( 3, "D" );
+            m0.setRow( "5 ", 1 );
+            m0.setRow( "3 6 ", 2 );
+            m0.setRow( "7.5 10.5 5.5", 3 );
+            final Phylogeny p0 = nj.execute( m0 );
+            p0.reRoot( p0.getNode( "D" ) );
+            if ( isUnequal( p0.getNode( "A" ).getDistanceToParent(), 1 ) ) {
+                return false;
+            }
+            if ( isUnequal( p0.getNode( "B" ).getDistanceToParent(), 4 ) ) {
+                return false;
+            }
+            if ( isUnequal( p0.getNode( "C" ).getDistanceToParent(), 0.5 ) ) {
+                return false;
+            }
+            if ( isUnequal( p0.getNode( "D" ).getDistanceToParent(), 2.5 ) ) {
+                return false;
+            }
+            if ( isUnequal( p0.getNode( "A" ).getParent().getDistanceToParent(), 1.5 ) ) {
+                return false;
+            }
+            if ( isUnequal( p0.getNode( "A" ).getParent().getParent().getDistanceToParent(), 2.5 ) ) {
+                return false;
+            }
+            nj = NeighborJoining.createInstance();
+            final BasicSymmetricalDistanceMatrix m00 = new BasicSymmetricalDistanceMatrix( 4 );
+            m00.setIdentifier( 0, "A" );
+            m00.setIdentifier( 1, "B" );
+            m00.setIdentifier( 2, "C" );
+            m00.setIdentifier( 3, "D" );
+            m00.setRow( "2.01 ", 1 );
+            m00.setRow( "3 3.01 ", 2 );
+            m00.setRow( "3.01 3.02 1.01", 3 );
+            final Phylogeny p00 = nj.execute( m00 );
+            p00.reRoot( p00.getNode( "D" ) );
+            if ( isUnequal( p00.getNode( "A" ).getDistanceToParent(), 1 ) ) {
+                return false;
+            }
+            if ( isUnequal( p00.getNode( "B" ).getDistanceToParent(), 1.01 ) ) {
+                return false;
+            }
+            if ( isUnequal( p00.getNode( "C" ).getDistanceToParent(), 0.5 ) ) {
+                return false;
+            }
+            if ( isUnequal( p00.getNode( "D" ).getDistanceToParent(), 0.255 ) ) {
+                return false;
+            }
+            if ( isUnequal( p00.getNode( "A" ).getParent().getDistanceToParent(), 1.5 ) ) {
+                return false;
+            }
+            if ( isUnequal( p00.getNode( "A" ).getParent().getParent().getDistanceToParent(), 0.255 ) ) {
+                return false;
+            }
             BasicSymmetricalDistanceMatrix m = new BasicSymmetricalDistanceMatrix( 6 );
             m.setRow( "5", 1 );
             m.setRow( "4 7", 2 );
@@ -1939,66 +1991,136 @@ public class TestPhylogenyReconstruction {
             m.setIdentifier( 3, "D" );
             m.setIdentifier( 4, "E" );
             m.setIdentifier( 5, "F" );
-            System.out.println( m.toString() );
+            nj = NeighborJoining.createInstance();
             final Phylogeny p1 = nj.execute( m );
-            Archaeopteryx.createApplication( p1 );
-            //            m = new BasicSymmetricalDistanceMatrix( 7 );
-            //            m.setIdentifier( 0, "Bovine" );
-            //            m.setIdentifier( 1, "Mouse" );
-            //            m.setIdentifier( 2, "Gibbon" );
-            //            m.setIdentifier( 3, "Orang" );
-            //            m.setIdentifier( 4, "Gorilla" );
-            //            m.setIdentifier( 5, "Chimp" );
-            //            m.setIdentifier( 6, "Human" );
-            //            m.setRow( "0.00000 1.68660 1.71980 1.66060 1.52430 1.60430 1.59050", 0 );
-            //            m.setRow( "1.68660 0.00000 1.52320 1.48410 1.44650 1.43890 1.46290", 1 );
-            //            m.setRow( "1.71980 1.52320 0.00000 0.71150 0.59580 0.61790 0.55830", 2 );
-            //            m.setRow( "1.66060 1.48410 0.71150 0.00000 0.46310 0.50610 0.47100", 3 );
-            //            m.setRow( "1.52430 1.44650 0.59580 0.46310 0.00000 0.34840 0.30830", 4 );
-            //            m.setRow( "1.60430 1.43890 0.61790 0.50610 0.34840 0.00000 0.26920", 5 );
-            //            m.setRow( "1.59050 1.46290 0.55830 0.47100 0.30830 0.26920 0.00000", 6 );
-            //            System.out.println( m.toString() );
-            //            final Phylogeny p2 = nj.execute( m );
-            //            p2.reRoot( p2.getNode( "Bovine" ) );
-            //            System.out.println( p2.toString() );
-            //            Archaeopteryx.createApplication( p2 );
-            //            // from phylip Neighbor-Joining/UPGMA method version 3.69:
-            //            // ((((((Chimp:0.15167,Human:0.11753):0.03982,Gorilla:0.15393):0.02696,Orang:0.28469):0.04648,Gibbon:0.35793):0.42027,Mouse:0.76891):0.458845,Bovine:0.458845);
-            //            Archaeopteryx.createApplication( p2 );
-            //            m = new BasicSymmetricalDistanceMatrix( 4 );
-            //            m.setIdentifier( 0, "A" );
-            //            m.setIdentifier( 1, "B" );
-            //            m.setIdentifier( 2, "C" );
-            //            m.setIdentifier( 3, "D" );
-            //            m.setRow( "0.00 0.95 0.17 0.98", 0 );
-            //            m.setRow( "0.95 0.00 1.02 1.83", 1 );
-            //            m.setRow( "0.17 1.02 0.00 1.01", 2 );
-            //            m.setRow( "0.98 1.83 1.01 0.00", 3 );
-            //            final Phylogeny p3 = nj.execute( m );
-            //            //
-            //            // -- A 0.05
-            //            // - |0.01
-            //            // ----------------------- B 0.90
-            //            //
-            //            // --- C 0.10
-            //            // - |0.01
-            //            // ------------------------- D 0.91
-            //            p3.reRoot( p3.getNode( "C" ).getParent() );
-            //            if ( !isEqual( p3.getNode( "A" ).getDistanceToParent(), 0.05 ) ) {
-            //                return false;
-            //            }
-            //            if ( !isEqual( p3.getNode( "B" ).getDistanceToParent(), 0.90 ) ) {
-            //                return false;
-            //            }
-            //            if ( !isEqual( p3.getNode( "C" ).getDistanceToParent(), 0.10 ) ) {
-            //                return false;
-            //            }
-            //            if ( !isEqual( p3.getNode( "D" ).getDistanceToParent(), 0.91 ) ) {
-            //                return false;
-            //            }
-            //            if ( TIME ) {
-            //                timeNeighborJoining();
-            //            }
+            p1.reRoot( p1.getNode( "F" ) );
+            if ( isUnequal( p1.getNode( "A" ).getDistanceToParent(), 1 ) ) {
+                return false;
+            }
+            if ( isUnequal( p1.getNode( "B" ).getDistanceToParent(), 4 ) ) {
+                return false;
+            }
+            if ( isUnequal( p1.getNode( "C" ).getDistanceToParent(), 2 ) ) {
+                return false;
+            }
+            if ( isUnequal( p1.getNode( "D" ).getDistanceToParent(), 3 ) ) {
+                return false;
+            }
+            if ( isUnequal( p1.getNode( "E" ).getDistanceToParent(), 2 ) ) {
+                return false;
+            }
+            if ( isUnequal( p1.getNode( "F" ).getDistanceToParent(), 2.5 ) ) {
+                return false;
+            }
+            if ( isUnequal( p1.getNode( "A" ).getParent().getDistanceToParent(), 1 ) ) {
+                return false;
+            }
+            if ( isUnequal( p1.getNode( "A" ).getParent().getParent().getDistanceToParent(), 1 ) ) {
+                return false;
+            }
+            if ( isUnequal( p1.getNode( "A" ).getParent().getParent().getParent().getDistanceToParent(), 2.5 ) ) {
+                return false;
+            }
+            if ( isUnequal( p1.getNode( "B" ).getParent().getDistanceToParent(), 1 ) ) {
+                return false;
+            }
+            if ( isUnequal( p1.getNode( "D" ).getParent().getDistanceToParent(), 1 ) ) {
+                return false;
+            }
+            if ( isUnequal( p1.getNode( "E" ).getParent().getDistanceToParent(), 1 ) ) {
+                return false;
+            }
+            m = new BasicSymmetricalDistanceMatrix( 7 );
+            m.setIdentifier( 0, "Bovine" );
+            m.setIdentifier( 1, "Mouse" );
+            m.setIdentifier( 2, "Gibbon" );
+            m.setIdentifier( 3, "Orang" );
+            m.setIdentifier( 4, "Gorilla" );
+            m.setIdentifier( 5, "Chimp" );
+            m.setIdentifier( 6, "Human" );
+            m.setRow( "0.00000 1.68660 1.71980 1.66060 1.52430 1.60430 1.59050", 0 );
+            m.setRow( "1.68660 0.00000 1.52320 1.48410 1.44650 1.43890 1.46290", 1 );
+            m.setRow( "1.71980 1.52320 0.00000 0.71150 0.59580 0.61790 0.55830", 2 );
+            m.setRow( "1.66060 1.48410 0.71150 0.00000 0.46310 0.50610 0.47100", 3 );
+            m.setRow( "1.52430 1.44650 0.59580 0.46310 0.00000 0.34840 0.30830", 4 );
+            m.setRow( "1.60430 1.43890 0.61790 0.50610 0.34840 0.00000 0.26920", 5 );
+            m.setRow( "1.59050 1.46290 0.55830 0.47100 0.30830 0.26920 0.00000", 6 );
+            nj = NeighborJoining.createInstance( false, 6 );
+            final Phylogeny p2 = nj.execute( m );
+            p2.reRoot( p2.getNode( "Bovine" ) );
+            if ( isUnequal( p2.getNode( "Chimp" ).getDistanceToParent(), 0.151675 ) ) {
+                return false;
+            }
+            if ( isUnequal( p2.getNode( "Human" ).getDistanceToParent(), 0.117525 ) ) {
+                return false;
+            }
+            if ( isUnequal( p2.getNode( "Gorilla" ).getDistanceToParent(), 0.153932 ) ) {
+                return false;
+            }
+            if ( isUnequal( p2.getNode( "Orang" ).getDistanceToParent(), 0.284694 ) ) {
+                return false;
+            }
+            if ( isUnequal( p2.getNode( "Gibbon" ).getDistanceToParent(), 0.357931 ) ) {
+                return false;
+            }
+            if ( isUnequal( p2.getNode( "Mouse" ).getDistanceToParent(), 0.76891 ) ) {
+                return false;
+            }
+            if ( isUnequal( p2.getNode( "Bovine" ).getDistanceToParent(), 0.458845 ) ) {
+                return false;
+            }
+            if ( isUnequal( p2.getNode( "Chimp" ).getParent().getDistanceToParent(), 0.039819 ) ) {
+                return false;
+            }
+            if ( isUnequal( p2.getNode( "Human" ).getParent().getDistanceToParent(), 0.039819 ) ) {
+                return false;
+            }
+            if ( isUnequal( p2.getNode( "Chimp" ).getParent().getParent().getDistanceToParent(), 0.026956 ) ) {
+                return false;
+            }
+            if ( isUnequal( p2.getNode( "Chimp" ).getParent().getParent().getParent().getDistanceToParent(), 0.046481 ) ) {
+                return false;
+            }
+            if ( isUnequal( p2.getNode( "Chimp" ).getParent().getParent().getParent().getParent().getDistanceToParent(),
+                            0.420269 ) ) {
+                return false;
+            }
+            if ( isUnequal( p2.getNode( "Chimp" ).getParent().getParent().getParent().getParent().getParent()
+                    .getDistanceToParent(), 0.458845 ) ) {
+                return false;
+            }
+            m = new BasicSymmetricalDistanceMatrix( 4 );
+            m.setIdentifier( 0, "A" );
+            m.setIdentifier( 1, "B" );
+            m.setIdentifier( 2, "C" );
+            m.setIdentifier( 3, "D" );
+            m.setRow( "0.00 0.95 0.17 0.98", 0 );
+            m.setRow( "0.95 0.00 1.02 1.83", 1 );
+            m.setRow( "0.17 1.02 0.00 1.01", 2 );
+            m.setRow( "0.98 1.83 1.01 0.00", 3 );
+            final Phylogeny p3 = nj.execute( m );
+            p3.reRoot( p3.getNode( "C" ) );
+            if ( isUnequal( p3.getNode( "A" ).getDistanceToParent(), 0.05 ) ) {
+                return false;
+            }
+            if ( isUnequal( p3.getNode( "B" ).getDistanceToParent(), 0.90 ) ) {
+                return false;
+            }
+            if ( !isEqual( p3.getNode( "C" ).getDistanceToParent(), 0.05 ) ) {
+                return false;
+            }
+            if ( !isEqual( p3.getNode( "D" ).getDistanceToParent(), 0.91 ) ) {
+                return false;
+            }
+            if ( isUnequal( p3.getNode( "A" ).getParent().getDistanceToParent(), 0.02 ) ) {
+                return false;
+            }
+            if ( isUnequal( p3.getNode( "A" ).getParent().getParent().getDistanceToParent(), 0.05 ) ) {
+                return false;
+            }
+            //if ( TIME ) {
+            //    timeNeighborJoining();
+            //}
         }
         catch ( final Exception e ) {
             e.printStackTrace( System.out );
@@ -2369,7 +2491,7 @@ public class TestPhylogenyReconstruction {
 
     private static void timeNeighborJoining() {
         final NeighborJoining nj = NeighborJoining.createInstance();
-        for( int n = 3; n <= 6; ++n ) {
+        for( int n = 3; n <= 10; ++n ) {
             final int x = ( int ) Math.pow( 2, n );
             final BasicSymmetricalDistanceMatrix mt = new BasicSymmetricalDistanceMatrix( x );
             mt.randomize( new Date().getTime() );
@@ -2379,7 +2501,7 @@ public class TestPhylogenyReconstruction {
             //  System.out.println( mt.toStringBuffer( Format.PHYLIP ) );
             final long start_time = new Date().getTime();
             nj.execute( mt );
-            System.out.println( "Size: " + x + " -> " + ( new Date().getTime() - start_time ) + "ms." );
+            System.out.println( "Size: " + x + " -> " + ( new Date().getTime() - start_time ) + "ms" );
         }
     }
 }
index 2dc0ffc..b92d367 100644 (file)
@@ -25,6 +25,8 @@
 
 package org.forester.evoinference.distance;
 
+import java.math.RoundingMode;
+import java.text.DecimalFormat;
 import java.util.ArrayList;
 import java.util.List;
 
@@ -36,7 +38,6 @@ import org.forester.util.ForesterUtil;
 public final class NeighborJoining {
 
     private BasicSymmetricalDistanceMatrix _d;
-    // private BasicSymmetricalDistanceMatrix _m;
     private double[][]                     _d_values;
     private double[][]                     _m_values;
     private double[]                       _r;
@@ -44,16 +45,29 @@ public final class NeighborJoining {
     private PhylogenyNode[]                _external_nodes;
     private int[]                          _mappings;
     private final boolean                  _verbose;
-    private final static boolean           DEBUG = true;
+    private final DecimalFormat            _df;
 
-    private NeighborJoining( final boolean verbose ) {
+    private NeighborJoining() {
+        _verbose = false;
+        _df = null;
+    }
+
+    private NeighborJoining( final boolean verbose, final int maximum_fraction_digits_for_distances ) {
+        if ( ( maximum_fraction_digits_for_distances < 1 ) || ( maximum_fraction_digits_for_distances > 9 ) ) {
+            throw new IllegalArgumentException( "maximum fraction digits for distances is out of range: "
+                    + maximum_fraction_digits_for_distances );
+        }
         _verbose = verbose;
+        _df = new DecimalFormat();
+        _df.setMaximumFractionDigits( maximum_fraction_digits_for_distances );
+        _df.setRoundingMode( RoundingMode.HALF_UP );
     }
 
     private final void printM() {
-        for( int i = 0; i < _m_values.length; i++ ) {
+        System.out.println( "M:" );
+        for( final double[] _m_value : _m_values ) {
             for( int j = 0; j < _m_values.length; j++ ) {
-                System.out.print( _m_values[ i ][ j ] );
+                System.out.print( _m_value[ j ] );
                 System.out.print( " " );
             }
             System.out.println();
@@ -62,9 +76,10 @@ public final class NeighborJoining {
     }
 
     private final void printD() {
-        for( int i = 0; i < _d_values.length; i++ ) {
+        System.out.println( "D:" );
+        for( final double[] _d_value : _d_values ) {
             for( int j = 0; j < _d_values.length; j++ ) {
-                System.out.print( _d_values[ i ][ j ] );
+                System.out.print( _d_value[ j ] );
                 System.out.print( " " );
             }
             System.out.println();
@@ -73,14 +88,19 @@ public final class NeighborJoining {
     }
 
     private final void calculateDistancesFromNewNode( final int otu1, final int otu2, final double d ) {
-        final int otu1_m = _mappings[ otu1 ];
-        final int otu2_m = _mappings[ otu2 ];
-        int i_m;
+        // final int otu1_m = _mappings[ otu1 ];
+        // final int otu2_m = _mappings[ otu2 ];
+        //  int i_m;
         for( int i = 0; i < _n; ++i ) {
             if ( ( i == otu1 ) || ( i == otu2 ) ) {
                 continue;
             }
-            _d_values[ _mappings[ otu1 ] ][ _mappings[ i ] ] = ( getValueFromD( otu1, i ) + getValueFromD( i, otu2 ) - d ) / 2;
+            if ( otu1 < i ) {
+                _d_values[ _mappings[ otu1 ] ][ _mappings[ i ] ] = ( getValueFromD( otu1, i ) + getValueFromD( i, otu2 ) - d ) / 2;
+            }
+            else {
+                _d_values[ _mappings[ i ] ][ _mappings[ otu1 ] ] = ( getValueFromD( otu1, i ) + getValueFromD( i, otu2 ) - d ) / 2;
+            }
             //i_m = _mappings[ i ];
             //_d_values[ otu1_m ][ i_m ] = ( ( _d_values[ otu1_m ][ i_m ] + _d_values[ i_m ][ otu2_m ] ) - 2 ) / 2;
         }
@@ -95,28 +115,10 @@ public final class NeighborJoining {
             for( int n = 0; n < _n; ++n ) {
                 //d += _d_values[ i_m ][ _mappings[ n ] ];
                 if ( i != n ) {
-                    if ( i < n ) {
-                        d += getValueFromD( i, n );
-                        System.out.print( "+" );
-                        System.out.print( getValueFromD( i, n ) );
-                    }
-                    else {
-                        d += getValueFromD( n, i );
-                        System.out.print( "+" );
-                        System.out.print( getValueFromD( n, i ) );
-                    }
-                }
-                else {
-                    if ( DEBUG ) {
-                        if ( getValueFromD( i, n ) != 0 ) {
-                            throw new RuntimeException( "faulty NJ code" );
-                        }
-                    }
+                    d += getValueFromD( i, n );
                 }
             }
             _r[ i ] = d;
-            System.out.print( "=" );
-            System.out.println( d );
         }
     }
 
@@ -128,9 +130,9 @@ public final class NeighborJoining {
             // Calculates the minimal distance.
             // If more than one minimal distances, always the first found is used
             // could randomize this, so that any would be returned in a randomized fashion...
-            double minimum = Double.MAX_VALUE;
-            int otu1 = -1;
-            int otu2 = -1;
+            double minimum = _m_values[ 0 ][ 1 ];
+            int otu1 = 0;
+            int otu2 = 1;
             for( int j = 1; j < _n; ++j ) {
                 for( int i = 0; i < j; ++i ) {
                     if ( _m_values[ i ][ j ] < minimum ) {
@@ -141,17 +143,19 @@ public final class NeighborJoining {
                 }
             }
             // It is a condition that otu1 < otu2.
-            if ( DEBUG ) {
-                if ( otu1 > otu2 ) {
-                    throw new RuntimeException( "faulty NJ code: otu1 > otu2" );
-                }
-            }
             final PhylogenyNode node = new PhylogenyNode();
             final double d = getValueFromD( otu1, otu2 );
             final double d1 = ( d / 2 ) + ( ( _r[ otu1 ] - _r[ otu2 ] ) / ( 2 * ( _n - 2 ) ) );
             final double d2 = d - d1;
-            getExternalPhylogenyNode( otu1 ).setDistanceToParent( d1 );
-            getExternalPhylogenyNode( otu2 ).setDistanceToParent( d2 );
+            if ( _df == null ) {
+                getExternalPhylogenyNode( otu1 ).setDistanceToParent( d1 );
+                getExternalPhylogenyNode( otu2 ).setDistanceToParent( d2 );
+            }
+            else {
+                // yes, yes, slow but only grows with n (and not n^2 or worse)...
+                getExternalPhylogenyNode( otu1 ).setDistanceToParent( Double.parseDouble( _df.format( d1 ) ) );
+                getExternalPhylogenyNode( otu2 ).setDistanceToParent( Double.parseDouble( _df.format( d2 ) ) );
+            }
             node.addAsChild( getExternalPhylogenyNode( otu1 ) );
             node.addAsChild( getExternalPhylogenyNode( otu2 ) );
             if ( _verbose ) {
@@ -163,8 +167,15 @@ public final class NeighborJoining {
             --_n;
         }
         final double d = getValueFromD( 0, 1 ) / 2;
-        getExternalPhylogenyNode( 0 ).setDistanceToParent( d );
-        getExternalPhylogenyNode( 1 ).setDistanceToParent( d );
+        if ( _df == null ) {
+            getExternalPhylogenyNode( 0 ).setDistanceToParent( d );
+            getExternalPhylogenyNode( 1 ).setDistanceToParent( d );
+        }
+        else {
+            final double dd = Double.parseDouble( _df.format( d ) );
+            getExternalPhylogenyNode( 0 ).setDistanceToParent( dd );
+            getExternalPhylogenyNode( 1 ).setDistanceToParent( dd );
+        }
         final PhylogenyNode root = new PhylogenyNode();
         root.addAsChild( getExternalPhylogenyNode( 0 ) );
         root.addAsChild( getExternalPhylogenyNode( 1 ) );
@@ -190,7 +201,6 @@ public final class NeighborJoining {
 
     private final double getValueFromD( final int otu1, final int otu2 ) {
         if ( otu1 > otu2 ) {
-            //throw new IllegalStateException();
             return _d_values[ _mappings[ otu2 ] ][ _mappings[ otu1 ] ];
         }
         return _d_values[ _mappings[ otu1 ] ][ _mappings[ otu2 ] ];
@@ -233,8 +243,8 @@ public final class NeighborJoining {
 
     private final void updateM() {
         calculateNetDivergences();
-        double r_j;
-        int j_m;
+        final double r_j;
+        final int j_m;
         final int _n_2 = _n - 2;
         for( int j = 1; j < _n; ++j ) {
             //r_j = _r[ j ];
@@ -244,13 +254,8 @@ public final class NeighborJoining {
                 //_m_values[ i ][ j ] = _d_values[ _mappings[ i ] ][ j_m ] - ( ( _r[ i ] + r_j ) / ( _n_2 ) );
             }
         }
-        printM();
-        printD();
     }
 
-    //  private final double getValueFromD( final int otu1, final int otu2 ) {
-    //      return _d_values[ _mappings[ otu1 ] ][ _mappings[ otu2 ] ];
-    // }
     // otu2 will, in effect, be "deleted" from the matrix.
     private final void updateMappings( final int otu2 ) {
         for( int i = otu2; i < ( _mappings.length - 1 ); ++i ) {
@@ -259,10 +264,11 @@ public final class NeighborJoining {
     }
 
     public final static NeighborJoining createInstance() {
-        return new NeighborJoining( false );
+        return new NeighborJoining();
     }
 
-    public final static NeighborJoining createInstance( final boolean verbose ) {
-        return new NeighborJoining( verbose );
+    public final static NeighborJoining createInstance( final boolean verbose,
+                                                        final int maximum_fraction_digits_for_distances ) {
+        return new NeighborJoining( verbose, maximum_fraction_digits_for_distances );
     }
 }
index cc0875d..35d524c 100644 (file)
@@ -213,7 +213,7 @@ public class MsaCompactor {
         }
     }
 
-    Phylogeny pi( String matrix ) {
+    Phylogeny pi( final String matrix ) {
         final Phylogeny master_phy = inferNJphylogeny( PWD_DISTANCE_METHOD.KIMURA_DISTANCE, _msa, true, matrix );
         final int seed = 15;
         final int n = 100;
@@ -231,10 +231,10 @@ public class MsaCompactor {
         return master_phy;
     }
 
-    private Phylogeny inferNJphylogeny( PWD_DISTANCE_METHOD pwd_distance_method,
+    private Phylogeny inferNJphylogeny( final PWD_DISTANCE_METHOD pwd_distance_method,
                                         final Msa msa,
-                                        boolean write_matrix,
-                                        String matrix_name ) {
+                                        final boolean write_matrix,
+                                        final String matrix_name ) {
         BasicSymmetricalDistanceMatrix m = null;
         switch ( pwd_distance_method ) {
             case KIMURA_DISTANCE:
@@ -253,7 +253,7 @@ public class MsaCompactor {
             try {
                 m.write( ForesterUtil.createBufferedWriter( matrix_name ) );
             }
-            catch ( IOException e ) {
+            catch ( final IOException e ) {
                 // TODO Auto-generated catch block
                 e.printStackTrace();
             }