X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fevoinference%2Fdistance%2FNeighborJoining.java;h=c0792a4c10289ea56729b0dd09d4c686b1f29c5f;hb=083c646bffc9c910714880519a029ef8a38e4942;hp=c624bfff21f3dabe41e904e4a7d5028e2faa9b9e;hpb=656be28debec520e0e35a8b311114398a40ea366;p=jalview.git diff --git a/forester/java/src/org/forester/evoinference/distance/NeighborJoining.java b/forester/java/src/org/forester/evoinference/distance/NeighborJoining.java index c624bff..c0792a4 100644 --- a/forester/java/src/org/forester/evoinference/distance/NeighborJoining.java +++ b/forester/java/src/org/forester/evoinference/distance/NeighborJoining.java @@ -2,8 +2,7 @@ // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // -// Copyright (C) 2008-2009 Christian M. Zmasek -// Copyright (C) 2008-2009 Burnham Institute for Medical Research +// Copyright (C) 2014 Christian M. Zmasek // All rights reserved // // This library is free software; you can redistribute it and/or @@ -25,6 +24,8 @@ package org.forester.evoinference.distance; +import java.math.RoundingMode; +import java.text.DecimalFormat; import java.util.ArrayList; import java.util.List; @@ -36,46 +37,29 @@ import org.forester.util.ForesterUtil; public final class NeighborJoining { private BasicSymmetricalDistanceMatrix _d; - // private BasicSymmetricalDistanceMatrix _m; private double[][] _d_values; - private double[][] _m_values; - private double[] _r; - private int _n; + private final DecimalFormat _df; private PhylogenyNode[] _external_nodes; + private double[][] _m_values; private int[] _mappings; + private int _n; + private double[] _r; private final boolean _verbose; - private final static boolean DEBUG = false; - private NeighborJoining( final boolean verbose ) { - _verbose = verbose; - } - - private final void calculateDistancesFromNewNode( final int otu1, final int otu2, final double d ) { - final int otu1_m = _mappings[ otu1 ]; - final int otu2_m = _mappings[ otu2 ]; - int i_m; - for( int i = 0; i < _n; ++i ) { - if ( ( i == otu1 ) || ( i == otu2 ) ) { - continue; - } - // _d_values[ _mappings[ otu1 ] ][ _mappings[ i ] ] = ( getValueFromD( otu1, i ) + getValueFromD( i, otu2 ) - d ) / 2; - i_m = _mappings[ i ]; - _d_values[ otu1_m ][ i_m ] = ( ( _d_values[ otu1_m ][ i_m ] + _d_values[ i_m ][ otu2_m ] ) - 2 ) / 2; - } + private NeighborJoining() { + _verbose = false; + _df = null; } - private final void calculateNetDivergences() { - double d; - int i_m; - for( int i = 0; i < _n; ++i ) { - d = 0; - i_m = _mappings[ i ]; - for( int n = 0; n < _n; ++n ) { - d += _d_values[ i_m ][ _mappings[ n ] ]; - //d += getValueFromD( i, n ); - } - _r[ i ] = d; + private NeighborJoining( final boolean verbose, final int maximum_fraction_digits_for_distances ) { + if ( ( maximum_fraction_digits_for_distances < 1 ) || ( maximum_fraction_digits_for_distances > 9 ) ) { + throw new IllegalArgumentException( "maximum fraction digits for distances is out of range: " + + maximum_fraction_digits_for_distances ); } + _verbose = verbose; + _df = new DecimalFormat(); + _df.setMaximumFractionDigits( maximum_fraction_digits_for_distances ); + _df.setRoundingMode( RoundingMode.HALF_UP ); } public final Phylogeny execute( final BasicSymmetricalDistanceMatrix distance ) { @@ -86,9 +70,9 @@ public final class NeighborJoining { // Calculates the minimal distance. // If more than one minimal distances, always the first found is used // could randomize this, so that any would be returned in a randomized fashion... - double minimum = Double.MAX_VALUE; - int otu1 = -1; - int otu2 = -1; + double minimum = _m_values[ 0 ][ 1 ]; + int otu1 = 0; + int otu2 = 1; for( int j = 1; j < _n; ++j ) { for( int i = 0; i < j; ++i ) { if ( _m_values[ i ][ j ] < minimum ) { @@ -99,17 +83,19 @@ public final class NeighborJoining { } } // It is a condition that otu1 < otu2. - if ( DEBUG ) { - if ( otu1 > otu2 ) { - throw new RuntimeException( "NJ code is faulty: otu1 > otu2" ); - } - } final PhylogenyNode node = new PhylogenyNode(); - final double d = getValueFromD( otu1, otu2 ); + final double d = _d_values[ _mappings[ otu1 ] ][ _mappings[ otu2 ] ]; final double d1 = ( d / 2 ) + ( ( _r[ otu1 ] - _r[ otu2 ] ) / ( 2 * ( _n - 2 ) ) ); final double d2 = d - d1; - getExternalPhylogenyNode( otu1 ).setDistanceToParent( d1 ); - getExternalPhylogenyNode( otu2 ).setDistanceToParent( d2 ); + if ( _df == null ) { + getExternalPhylogenyNode( otu1 ).setDistanceToParent( d1 ); + getExternalPhylogenyNode( otu2 ).setDistanceToParent( d2 ); + } + else { + // yes, yes, slow but only grows with n (and not n^2 or worse)... + getExternalPhylogenyNode( otu1 ).setDistanceToParent( Double.parseDouble( _df.format( d1 ) ) ); + getExternalPhylogenyNode( otu2 ).setDistanceToParent( Double.parseDouble( _df.format( d2 ) ) ); + } node.addAsChild( getExternalPhylogenyNode( otu1 ) ); node.addAsChild( getExternalPhylogenyNode( otu2 ) ); if ( _verbose ) { @@ -120,9 +106,16 @@ public final class NeighborJoining { updateMappings( otu2 ); --_n; } - final double d = getValueFromD( 0, 1 ) / 2; - getExternalPhylogenyNode( 0 ).setDistanceToParent( d ); - getExternalPhylogenyNode( 1 ).setDistanceToParent( d ); + final double d = _d_values[ _mappings[ 0 ] ][ _mappings[ 1 ] ] / 2; + if ( _df == null ) { + getExternalPhylogenyNode( 0 ).setDistanceToParent( d ); + getExternalPhylogenyNode( 1 ).setDistanceToParent( d ); + } + else { + final double dd = Double.parseDouble( _df.format( d ) ); + getExternalPhylogenyNode( 0 ).setDistanceToParent( dd ); + getExternalPhylogenyNode( 1 ).setDistanceToParent( dd ); + } final PhylogenyNode root = new PhylogenyNode(); root.addAsChild( getExternalPhylogenyNode( 0 ) ); root.addAsChild( getExternalPhylogenyNode( 1 ) ); @@ -142,12 +135,54 @@ public final class NeighborJoining { return pl; } - private final PhylogenyNode getExternalPhylogenyNode( final int i ) { - return _external_nodes[ _mappings[ i ] ]; + private final void calculateDistancesFromNewNode( final int otu1, final int otu2, final double d ) { + final int m_otu1 = _mappings[ otu1 ]; + final int m_otu2 = _mappings[ otu2 ]; + for( int i = 0; i < _n; ++i ) { + if ( ( i == otu1 ) || ( i == otu2 ) ) { + continue; + } + final int m_i = _mappings[ i ]; + if ( otu1 < i ) { + if ( otu2 > i ) { + _d_values[ m_otu1 ][ m_i ] = ( _d_values[ m_otu1 ][ m_i ] + _d_values[ m_i ][ m_otu2 ] - d ) / 2; + } + else { + _d_values[ m_otu1 ][ m_i ] = ( _d_values[ m_otu1 ][ m_i ] + _d_values[ m_otu2 ][ m_i ] - d ) / 2; + } + } + else { + if ( otu2 > i ) { + _d_values[ m_i ][ m_otu1 ] = ( _d_values[ m_i ][ m_otu1 ] + _d_values[ m_i ][ m_otu2 ] - d ) / 2; + } + else { + _d_values[ m_i ][ m_otu1 ] = ( _d_values[ m_i ][ m_otu1 ] + _d_values[ m_otu2 ][ m_i ] - d ) / 2; + } + } + } } - private final double getValueFromD( final int otu1, final int otu2 ) { - return _d_values[ _mappings[ otu1 ] ][ _mappings[ otu2 ] ]; + private final void calculateNetDivergences() { + double d; + for( int i = 0; i < _n; ++i ) { + d = 0; + final int m_i = _mappings[ i ]; + for( int n = 0; n < _n; ++n ) { + if ( i != n ) { + if ( i > n ) { + d += _d_values[ _mappings[ n ] ][ m_i ]; + } + else { + d += _d_values[ m_i ][ _mappings[ n ] ]; + } + } + } + _r[ i ] = d; + } + } + + private final PhylogenyNode getExternalPhylogenyNode( final int i ) { + return _external_nodes[ _mappings[ i ] ]; } private final void initExternalNodes() { @@ -160,7 +195,7 @@ public final class NeighborJoining { _external_nodes[ i ].setName( id ); } else { - _external_nodes[ i ].setName( "" + i ); + _external_nodes[ i ].setName( Integer.toString( i ) ); } _mappings[ i ] = i; } @@ -187,15 +222,12 @@ public final class NeighborJoining { private final void updateM() { calculateNetDivergences(); - double r_j; - int j_m; - final int _n_2 = _n - 2; + final int n_minus_2 = _n - 2; for( int j = 1; j < _n; ++j ) { - r_j = _r[ j ]; - j_m = _mappings[ j ]; + final double r_j = _r[ j ]; + final int m_j = _mappings[ j ]; for( int i = 0; i < j; ++i ) { - // _m_values[ i ][ j ] = getValueFromD( i, j ) - ( _r[ i ] + r_j ) / ( _n - 2 ); - _m_values[ i ][ j ] = _d_values[ _mappings[ i ] ][ j_m ] - ( ( _r[ i ] + r_j ) / ( _n_2 ) ); + _m_values[ i ][ j ] = _d_values[ _mappings[ i ] ][ m_j ] - ( ( _r[ i ] + r_j ) / n_minus_2 ); } } } @@ -208,10 +240,11 @@ public final class NeighborJoining { } public final static NeighborJoining createInstance() { - return new NeighborJoining( false ); + return new NeighborJoining(); } - public final static NeighborJoining createInstance( final boolean verbose ) { - return new NeighborJoining( verbose ); + public final static NeighborJoining createInstance( final boolean verbose, + final int maximum_fraction_digits_for_distances ) { + return new NeighborJoining( verbose, maximum_fraction_digits_for_distances ); } }