X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fevoinference%2Fdistance%2FNeighborJoining.java;h=156d72e94bc65aa486e452c4c9ad70cdb54121da;hb=dd04eb6f09074d32b99879cbe5f6bb5aa7db0ce6;hp=18182e560edc6893109ddca10645d56634e591ce;hpb=99fc1a4a87f0753845f497a73e2ac98ad05c1cc6;p=jalview.git diff --git a/forester/java/src/org/forester/evoinference/distance/NeighborJoining.java b/forester/java/src/org/forester/evoinference/distance/NeighborJoining.java index 18182e5..156d72e 100644 --- a/forester/java/src/org/forester/evoinference/distance/NeighborJoining.java +++ b/forester/java/src/org/forester/evoinference/distance/NeighborJoining.java @@ -2,8 +2,7 @@ // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // -// Copyright (C) 2008-2009 Christian M. Zmasek -// Copyright (C) 2008-2009 Burnham Institute for Medical Research +// Copyright (C) 2014 Christian M. Zmasek // All rights reserved // // This library is free software; you can redistribute it and/or @@ -21,10 +20,12 @@ // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com -// WWW: www.phylosoft.org/forester +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.evoinference.distance; +import java.math.RoundingMode; +import java.text.DecimalFormat; import java.util.ArrayList; import java.util.List; @@ -35,93 +36,78 @@ import org.forester.util.ForesterUtil; public final class NeighborJoining { + private final static DecimalFormat DF = new DecimalFormat( "0.00000" ); private BasicSymmetricalDistanceMatrix _d; - private BasicSymmetricalDistanceMatrix _m; - private double[] _r; - private int _n; + private double[][] _d_values; + private final DecimalFormat _df; private PhylogenyNode[] _external_nodes; private int[] _mappings; + private int _n; + private double[] _r; private final boolean _verbose; - private final static boolean DEBUG = false; + private int _min_i; + private int _min_j; - private NeighborJoining( final boolean verbose ) { - _verbose = verbose; + private NeighborJoining() { + _verbose = false; + _df = null; } - private final void calculateDistancesFromNewNode( final int otu1, final int otu2, final double d ) { - for( int i = 0; i < _n; ++i ) { - if ( ( i == otu1 ) || ( i == otu2 ) ) { - continue; - } - //final double nd = ; - // setValueInD( nd, otu1, i ); - // _d.setValue( _mappings[ otu1 ], - // _mappings[ i ], - // ( getValueFromD( otu1, i ) + getValueFromD( i, otu2 ) - d ) / 2 ); - _d._values[ _mappings[ otu1 ] ][ _mappings[ i ] ] = ( getValueFromD( otu1, i ) + getValueFromD( i, otu2 ) - d ) / 2; - } - } - - private final void calculateNetDivergences() { - double d; - for( int i = 0; i < _n; ++i ) { - d = 0; - for( int n = 0; n < _n; ++n ) { - // d += getValueFromD( i, n ); - d += _d._values[ _mappings[ i ] ][ _mappings[ n ] ]; - } - _r[ i ] = d; + private NeighborJoining( final boolean verbose, final int maximum_fraction_digits_for_distances ) { + if ( ( maximum_fraction_digits_for_distances < 1 ) || ( maximum_fraction_digits_for_distances > 9 ) ) { + throw new IllegalArgumentException( "maximum fraction digits for distances is out of range: " + + maximum_fraction_digits_for_distances ); } + _verbose = verbose; + _df = new DecimalFormat(); + _df.setMaximumFractionDigits( maximum_fraction_digits_for_distances ); + _df.setRoundingMode( RoundingMode.HALF_UP ); } public final Phylogeny execute( final BasicSymmetricalDistanceMatrix distance ) { reset( distance ); final Phylogeny phylogeny = new Phylogeny(); while ( _n > 2 ) { - updateM(); // Calculates the minimal distance. // If more than one minimal distances, always the first found is used - // could randomize this, so that any would be returned in a randomized fashion... - double minimum = Double.MAX_VALUE; - int otu1 = -1; - int otu2 = -1; - for( int j = 1; j < _n; ++j ) { - for( int i = 0; i < j; ++i ) { - // if ( _m.getValue( i, j ) < minimum ) { - if ( _m._values[ i ][ j ] < minimum ) { - //minimum = _m.getValue( i, j ); - minimum = _m._values[ i ][ j ]; - otu1 = i; - otu2 = j; - } - } - } + updateM(); + final int otu1 = _min_i; + final int otu2 = _min_j; + //System.out.println( _min_i + " " + _min_j ); // It is a condition that otu1 < otu2. - if ( DEBUG ) { - if ( otu1 > otu2 ) { - throw new RuntimeException( "NJ code is faulty: otu1 > otu2" ); - } - } final PhylogenyNode node = new PhylogenyNode(); - final double d = getValueFromD( otu1, otu2 ); + final double d = _d_values[ _mappings[ otu1 ] ][ _mappings[ otu2 ] ]; final double d1 = ( d / 2 ) + ( ( _r[ otu1 ] - _r[ otu2 ] ) / ( 2 * ( _n - 2 ) ) ); final double d2 = d - d1; - getExternalPhylogenyNode( otu1 ).setDistanceToParent( d1 ); - getExternalPhylogenyNode( otu2 ).setDistanceToParent( d2 ); + if ( _df == null ) { + getExternalPhylogenyNode( otu1 ).setDistanceToParent( d1 ); + getExternalPhylogenyNode( otu2 ).setDistanceToParent( d2 ); + } + else { + // yes, yes, slow but only grows with n (and not n^2 or worse)... + getExternalPhylogenyNode( otu1 ).setDistanceToParent( Double.parseDouble( _df.format( d1 ) ) ); + getExternalPhylogenyNode( otu2 ).setDistanceToParent( Double.parseDouble( _df.format( d2 ) ) ); + } node.addAsChild( getExternalPhylogenyNode( otu1 ) ); node.addAsChild( getExternalPhylogenyNode( otu2 ) ); if ( _verbose ) { printProgress( otu1, otu2 ); } calculateDistancesFromNewNode( otu1, otu2, d ); - //setExternalPhylogenyNode( node, otu1 ); _external_nodes[ _mappings[ otu1 ] ] = node; updateMappings( otu2 ); --_n; } - final double d = getValueFromD( 0, 1 ) / 2; - getExternalPhylogenyNode( 0 ).setDistanceToParent( d ); - getExternalPhylogenyNode( 1 ).setDistanceToParent( d ); + final double d = _d_values[ _mappings[ 0 ] ][ _mappings[ 1 ] ] / 2; + if ( _df == null ) { + getExternalPhylogenyNode( 0 ).setDistanceToParent( d ); + getExternalPhylogenyNode( 1 ).setDistanceToParent( d ); + } + else { + final double dd = Double.parseDouble( _df.format( d ) ); + getExternalPhylogenyNode( 0 ).setDistanceToParent( dd ); + getExternalPhylogenyNode( 1 ).setDistanceToParent( dd ); + } final PhylogenyNode root = new PhylogenyNode(); root.addAsChild( getExternalPhylogenyNode( 0 ) ); root.addAsChild( getExternalPhylogenyNode( 1 ) ); @@ -141,54 +127,96 @@ public final class NeighborJoining { return pl; } - // private int[] findMinimalDistance() { - // // if more than one minimal distances, always the first found is - // // returned - // // i could randomize this, so that any would be returned in a randomized - // // fashion... - // double minimum = Double.MAX_VALUE; - // int otu_1 = -1; - // int otu_2 = -1; - // for( int j = 1; j < _n; ++j ) { - // for( int i = 0; i < j; ++i ) { - // if ( _m.getValue( i, j ) < minimum ) { - // minimum = _m.getValue( i, j ); - // otu_1 = i; - // otu_2 = j; - // } - // } - // } - // return new int[] { otu_1, otu_2 }; - // } - private final PhylogenyNode getExternalPhylogenyNode( final int i ) { - return _external_nodes[ _mappings[ i ] ]; + private final void calculateDistancesFromNewNode( final int otu1, final int otu2, final double d ) { + final int m_otu1 = _mappings[ otu1 ]; + final int m_otu2 = _mappings[ otu2 ]; + for( int i = 0; i < _n; ++i ) { + if ( ( i == otu1 ) || ( i == otu2 ) ) { + continue; + } + final int m_i = _mappings[ i ]; + if ( otu1 < i ) { + if ( otu2 > i ) { + _d_values[ m_otu1 ][ m_i ] = ( _d_values[ m_otu1 ][ m_i ] + _d_values[ m_i ][ m_otu2 ] - d ) / 2; + //System.out.print( DF.format( _d_values[ m_otu1 ][ m_i ] ) ); + } + else { + _d_values[ m_otu1 ][ m_i ] = ( _d_values[ m_otu1 ][ m_i ] + _d_values[ m_otu2 ][ m_i ] - d ) / 2; + //System.out.print( DF.format( _d_values[ m_otu1 ][ m_i ] ) ); + } + } + else { + if ( otu2 > i ) { + _d_values[ m_i ][ m_otu1 ] = ( _d_values[ m_i ][ m_otu1 ] + _d_values[ m_i ][ m_otu2 ] - d ) / 2; + //System.out.print( DF.format( _d_values[ m_i ][ m_otu1 ] ) ); + } + else { + _d_values[ m_i ][ m_otu1 ] = ( _d_values[ m_i ][ m_otu1 ] + _d_values[ m_otu2 ][ m_i ] - d ) / 2; + // System.out.print( DF.format( _d_values[ m_otu1 ][ m_i ] ) ); + } + } + //System.out.print( " " ); + } + } + + private final void calculateNetDivergences() { + double d; + for( int i = 0; i < _n; ++i ) { + d = 0; + final int m_i = _mappings[ i ]; + for( int n = 0; n < _n; ++n ) { + if ( i != n ) { + if ( i > n ) { + d += _d_values[ _mappings[ n ] ][ m_i ]; + } + else { + d += _d_values[ m_i ][ _mappings[ n ] ]; + } + } + } + _r[ i ] = d; + } } - private final double getValueFromD( final int otu1, final int otu2 ) { - //return _d.getValue( _mappings[ otu1 ], _mappings[ otu2 ] ); - return _d._values[ _mappings[ otu1 ] ][ _mappings[ otu2 ] ]; + private final PhylogenyNode getExternalPhylogenyNode( final int i ) { + return _external_nodes[ _mappings[ i ] ]; } private final void initExternalNodes() { _external_nodes = new PhylogenyNode[ _n ]; + String id; for( int i = 0; i < _n; ++i ) { _external_nodes[ i ] = new PhylogenyNode(); - final String id = _d.getIdentifier( i ); + id = _d.getIdentifier( i ); if ( id != null ) { _external_nodes[ i ].setName( id ); } else { - _external_nodes[ i ].setName( "" + i ); + _external_nodes[ i ].setName( Integer.toString( i ) ); } _mappings[ i ] = i; } } private final void printProgress( final int otu1, final int otu2 ) { - final PhylogenyNode n1 = getExternalPhylogenyNode( otu1 ); - final PhylogenyNode n2 = getExternalPhylogenyNode( otu2 ); - System.out.println( "Node " + ( ForesterUtil.isEmpty( n1.getName() ) ? n1.getId() : n1.getName() ) + " joins " - + ( ForesterUtil.isEmpty( n2.getName() ) ? n2.getId() : n2.getName() ) ); + System.out.println( "Node " + printProgressNodeToString( getExternalPhylogenyNode( otu1 ) ) + " joins " + + ( printProgressNodeToString( getExternalPhylogenyNode( otu2 ) ) ) ); + } + + private final String printProgressNodeToString( final PhylogenyNode n ) { + if ( n.isExternal() ) { + if ( ForesterUtil.isEmpty( n.getName() ) ) { + return Long.toString( n.getId() ); + } + return n.getName(); + } + return n.getId() + + " (" + + ( ForesterUtil.isEmpty( n.getChildNode1().getName() ) ? n.getChildNode1().getId() : n.getChildNode1() + .getName() ) + + "+" + + ( ForesterUtil.isEmpty( n.getChildNode2().getName() ) ? n.getChildNode2().getId() : n.getChildNode2() + .getName() ) + ")"; } // only the values in the lower triangle are used. @@ -196,46 +224,56 @@ public final class NeighborJoining { private final void reset( final BasicSymmetricalDistanceMatrix distances ) { _n = distances.getSize(); _d = distances; - _m = new BasicSymmetricalDistanceMatrix( _n ); _r = new double[ _n ]; _mappings = new int[ _n ]; + _d_values = _d.getValues(); initExternalNodes(); } - // private final void setExternalPhylogenyNode( final PhylogenyNode node, final int i ) { - // _external_nodes[ _mappings[ i ] ] = node; - // } - // private final void setValueInD( final double d, final int otu1, final int otu2 ) { - // _d.setValue( _mappings[ otu1 ], _mappings[ otu2 ], d ); - // } private final void updateM() { calculateNetDivergences(); + Double min = Double.MAX_VALUE; + _min_i = -1; + _min_j = -1; + final int n_minus_2 = _n - 2; for( int j = 1; j < _n; ++j ) { + final double r_j = _r[ j ]; + final int m_j = _mappings[ j ]; for( int i = 0; i < j; ++i ) { - //_m.setValue( i, j, calculateM( i, j ) ); - //_m.setValue( i, j, getValueFromD( i, j ) - ( _r[ i ] + _r[ j ] ) / ( _n - 2 ) ); - //_m._values[ i ][ j ] = getValueFromD( i, j ) - ( _r[ i ] + _r[ j ] ) / ( _n - 2 ); - _m._values[ i ][ j ] = _d._values[ _mappings[ i ] ][ _mappings[ j ] ] - ( _r[ i ] + _r[ j ] ) - / ( _n - 2 ); + final double m = _d_values[ _mappings[ i ] ][ m_j ] - ( ( _r[ i ] + r_j ) / n_minus_2 ); + if ( m < min ) { + min = m; + _min_i = i; + _min_j = j; + } } } + // for( int j = 1; j < _n; ++j ) { + // final double r_j = _r[ j ]; + // final int m_j = _mappings[ j ]; + // for( int i = 0; i < j; ++i ) { + // System.out.print( i ); + // System.out.print( "->" ); + // System.out.print( DF.format( _r[ i ] ) ); + // System.out.print( " " ); + // } + // System.out.println(); + // } } - //private double calculateM( final int i, final int j ) { - // return getValueFromD( i, j ) - ( _r[ i ] + _r[ j ] ) / ( _n - 2 ); - //} // otu2 will, in effect, be "deleted" from the matrix. private final void updateMappings( final int otu2 ) { - for( int i = otu2; i < _mappings.length - 1; ++i ) { + for( int i = otu2; i < ( _mappings.length - 1 ); ++i ) { _mappings[ i ] = _mappings[ i + 1 ]; } } public final static NeighborJoining createInstance() { - return new NeighborJoining( false ); + return new NeighborJoining(); } - public final static NeighborJoining createInstance( final boolean verbose ) { - return new NeighborJoining( verbose ); + public final static NeighborJoining createInstance( final boolean verbose, + final int maximum_fraction_digits_for_distances ) { + return new NeighborJoining( verbose, maximum_fraction_digits_for_distances ); } }