X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fevoinference%2Fdistance%2FNeighborJoining.java;h=f63af043f37a89f2293277142f1ea680997ffd54;hb=cb49ee5684c6907b3161db82ff9aea72961b8548;hp=3cd0d28efc1b323a7e12c5d1b9f319bbf72c9d9e;hpb=88fadb6580a37c8c55e7062cc6dab09ee583a6fd;p=jalview.git diff --git a/forester/java/src/org/forester/evoinference/distance/NeighborJoining.java b/forester/java/src/org/forester/evoinference/distance/NeighborJoining.java index 3cd0d28..f63af04 100644 --- a/forester/java/src/org/forester/evoinference/distance/NeighborJoining.java +++ b/forester/java/src/org/forester/evoinference/distance/NeighborJoining.java @@ -2,8 +2,7 @@ // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // -// Copyright (C) 2008-2009 Christian M. Zmasek -// Copyright (C) 2008-2009 Burnham Institute for Medical Research +// Copyright (C) 2014 Christian M. Zmasek // All rights reserved // // This library is free software; you can redistribute it and/or @@ -37,15 +36,17 @@ import org.forester.util.ForesterUtil; public final class NeighborJoining { + private final static DecimalFormat DF = new DecimalFormat( "0.00000" ); private BasicSymmetricalDistanceMatrix _d; private double[][] _d_values; private final DecimalFormat _df; private PhylogenyNode[] _external_nodes; - private double[][] _m_values; private int[] _mappings; private int _n; private double[] _r; private final boolean _verbose; + private int _min_i; + private int _min_j; private NeighborJoining() { _verbose = false; @@ -67,22 +68,12 @@ public final class NeighborJoining { reset( distance ); final Phylogeny phylogeny = new Phylogeny(); while ( _n > 2 ) { - updateM(); // Calculates the minimal distance. // If more than one minimal distances, always the first found is used - // could randomize this, so that any would be returned in a randomized fashion... - double minimum = _m_values[ 0 ][ 1 ]; - int otu1 = 0; - int otu2 = 1; - for( int j = 1; j < _n; ++j ) { - for( int i = 0; i < j; ++i ) { - if ( _m_values[ i ][ j ] < minimum ) { - minimum = _m_values[ i ][ j ]; - otu1 = i; - otu2 = j; - } - } - } + updateM(); + final int otu1 = _min_i; + final int otu2 = _min_j; + //System.out.println( _min_i + " " + _min_j ); // It is a condition that otu1 < otu2. final PhylogenyNode node = new PhylogenyNode(); final double d = _d_values[ _mappings[ otu1 ] ][ _mappings[ otu2 ] ]; @@ -146,20 +137,25 @@ public final class NeighborJoining { final int m_i = _mappings[ i ]; if ( otu1 < i ) { if ( otu2 > i ) { - _d_values[ m_otu1 ][ m_i ] = ( _d_values[ m_otu1 ][ m_i ] + _d_values[ m_i ][ m_otu2 ] - d ) / 2; + _d_values[ m_otu1 ][ m_i ] = ( ( _d_values[ m_otu1 ][ m_i ] + _d_values[ m_i ][ m_otu2 ] ) - d ) / 2; + //System.out.print( DF.format( _d_values[ m_otu1 ][ m_i ] ) ); } else { - _d_values[ m_otu1 ][ m_i ] = ( _d_values[ m_otu1 ][ m_i ] + _d_values[ m_otu2 ][ m_i ] - d ) / 2; + _d_values[ m_otu1 ][ m_i ] = ( ( _d_values[ m_otu1 ][ m_i ] + _d_values[ m_otu2 ][ m_i ] ) - d ) / 2; + //System.out.print( DF.format( _d_values[ m_otu1 ][ m_i ] ) ); } } else { if ( otu2 > i ) { - _d_values[ m_i ][ m_otu1 ] = ( _d_values[ m_i ][ m_otu1 ] + _d_values[ m_i ][ m_otu2 ] - d ) / 2; + _d_values[ m_i ][ m_otu1 ] = ( ( _d_values[ m_i ][ m_otu1 ] + _d_values[ m_i ][ m_otu2 ] ) - d ) / 2; + //System.out.print( DF.format( _d_values[ m_i ][ m_otu1 ] ) ); } else { - _d_values[ m_i ][ m_otu1 ] = ( _d_values[ m_i ][ m_otu1 ] + _d_values[ m_otu2 ][ m_i ] - d ) / 2; + _d_values[ m_i ][ m_otu1 ] = ( ( _d_values[ m_i ][ m_otu1 ] + _d_values[ m_otu2 ][ m_i ] ) - d ) / 2; + // System.out.print( DF.format( _d_values[ m_otu1 ][ m_i ] ) ); } } + //System.out.print( " " ); } } @@ -202,35 +198,25 @@ public final class NeighborJoining { } } - private final void printD() { - System.out.println( "D:" ); - for( final double[] _d_value : _d_values ) { - for( int j = 0; j < _d_values.length; j++ ) { - System.out.print( _d_value[ j ] ); - System.out.print( " " ); - } - System.out.println(); - } - System.out.println(); + private final void printProgress( final int otu1, final int otu2 ) { + System.out.println( "Node " + printProgressNodeToString( getExternalPhylogenyNode( otu1 ) ) + " joins " + + ( printProgressNodeToString( getExternalPhylogenyNode( otu2 ) ) ) ); } - private final void printM() { - System.out.println( "M:" ); - for( final double[] _m_value : _m_values ) { - for( int j = 0; j < _m_values.length; j++ ) { - System.out.print( _m_value[ j ] ); - System.out.print( " " ); + private final String printProgressNodeToString( final PhylogenyNode n ) { + if ( n.isExternal() ) { + if ( ForesterUtil.isEmpty( n.getName() ) ) { + return Long.toString( n.getId() ); } - System.out.println(); + return n.getName(); } - System.out.println(); - } - - private final void printProgress( final int otu1, final int otu2 ) { - final PhylogenyNode n1 = getExternalPhylogenyNode( otu1 ); - final PhylogenyNode n2 = getExternalPhylogenyNode( otu2 ); - System.out.println( "Node " + ( ForesterUtil.isEmpty( n1.getName() ) ? n1.getId() : n1.getName() ) + " joins " - + ( ForesterUtil.isEmpty( n2.getName() ) ? n2.getId() : n2.getName() ) ); + return n.getId() + + " (" + + ( ForesterUtil.isEmpty( n.getChildNode1().getName() ) ? n.getChildNode1().getId() : n.getChildNode1() + .getName() ) + + "+" + + ( ForesterUtil.isEmpty( n.getChildNode2().getName() ) ? n.getChildNode2().getId() : n.getChildNode2() + .getName() ) + ")"; } // only the values in the lower triangle are used. @@ -241,20 +227,38 @@ public final class NeighborJoining { _r = new double[ _n ]; _mappings = new int[ _n ]; _d_values = _d.getValues(); - _m_values = new double[ _n ][ _n ]; initExternalNodes(); } private final void updateM() { calculateNetDivergences(); + Double min = Double.MAX_VALUE; + _min_i = -1; + _min_j = -1; final int n_minus_2 = _n - 2; for( int j = 1; j < _n; ++j ) { final double r_j = _r[ j ]; final int m_j = _mappings[ j ]; for( int i = 0; i < j; ++i ) { - _m_values[ i ][ j ] = _d_values[ _mappings[ i ] ][ m_j ] - ( ( _r[ i ] + r_j ) / n_minus_2 ); + final double m = _d_values[ _mappings[ i ] ][ m_j ] - ( ( _r[ i ] + r_j ) / n_minus_2 ); + if ( m < min ) { + min = m; + _min_i = i; + _min_j = j; + } } } + // for( int j = 1; j < _n; ++j ) { + // final double r_j = _r[ j ]; + // final int m_j = _mappings[ j ]; + // for( int i = 0; i < j; ++i ) { + // System.out.print( i ); + // System.out.print( "->" ); + // System.out.print( DF.format( _r[ i ] ) ); + // System.out.print( " " ); + // } + // System.out.println(); + // } } // otu2 will, in effect, be "deleted" from the matrix.