}
public static void main( final String[] args ) {
- System.out.println( "NJ" );
- if ( testNeighborJoining() ) {
- System.out.println( " OK." );
- }
- else {
- System.out.println( " failed." );
- }
+ // System.out.println( "NJ" );
+ // if ( testNeighborJoining() ) {
+ // System.out.println( " OK." );
+ // }
+ // else {
+ // System.out.println( " failed." );
+ // }
System.out.println( "NJR" );
if ( testNeighborJoiningR() ) {
System.out.println( " OK." );
m.setRow( "1.52430 1.44650 0.59580 0.46310 0.00000 0.34840 0.30830", 4 );
m.setRow( "1.60430 1.43890 0.61790 0.50610 0.34840 0.00000 0.26920", 5 );
m.setRow( "1.59050 1.46290 0.55830 0.47100 0.30830 0.26920 0.00000", 6 );
- nj = NeighborJoining.createInstance( false, 6 );
+ //NeighborJoiningR njr = NeighborJoiningR.createInstance( true, 6 );
+ nj = NeighborJoining.createInstance( true, 6 );
final Phylogeny p2 = nj.execute( m );
+ // Archaeopteryx.createApplication( p2 );
p2.reRoot( p2.getNode( "Bovine" ) );
if ( isUnequal( p2.getNode( "Chimp" ).getDistanceToParent(), 0.151675 ) ) {
return false;
private static boolean testNeighborJoiningR() {
try {
NeighborJoiningR nj = NeighborJoiningR.createInstance();
- final BasicSymmetricalDistanceMatrix m0 = new BasicSymmetricalDistanceMatrix( 4 );
- m0.setIdentifier( 0, "A" );
- m0.setIdentifier( 1, "B" );
- m0.setIdentifier( 2, "C" );
- m0.setIdentifier( 3, "D" );
- m0.setRow( "5 ", 1 );
- m0.setRow( "3 6 ", 2 );
- m0.setRow( "7.5 10.5 5.5", 3 );
- final Phylogeny p0 = nj.execute( m0 );
- p0.reRoot( p0.getNode( "D" ) );
- if ( isUnequal( p0.getNode( "A" ).getDistanceToParent(), 1 ) ) {
- return false;
- }
- if ( isUnequal( p0.getNode( "B" ).getDistanceToParent(), 4 ) ) {
- return false;
- }
- if ( isUnequal( p0.getNode( "C" ).getDistanceToParent(), 0.5 ) ) {
- return false;
- }
- if ( isUnequal( p0.getNode( "D" ).getDistanceToParent(), 2.5 ) ) {
- return false;
- }
- if ( isUnequal( p0.getNode( "A" ).getParent().getDistanceToParent(), 1.5 ) ) {
- return false;
- }
- if ( isUnequal( p0.getNode( "A" ).getParent().getParent().getDistanceToParent(), 2.5 ) ) {
+ // final BasicSymmetricalDistanceMatrix m0 = new BasicSymmetricalDistanceMatrix( 4 );
+ // m0.setIdentifier( 0, "A" );
+ // m0.setIdentifier( 1, "B" );
+ // m0.setIdentifier( 2, "C" );
+ // m0.setIdentifier( 3, "D" );
+ // m0.setRow( "5 ", 1 );
+ // m0.setRow( "3 6 ", 2 );
+ // m0.setRow( "7.5 10.5 5.5", 3 );
+ // final Phylogeny p0 = nj.execute( m0 );
+ // p0.reRoot( p0.getNode( "D" ) );
+ // // Archaeopteryx.createApplication( p0 );
+ // if ( isUnequal( p0.getNode( "A" ).getDistanceToParent(), 1 ) ) {
+ // return false;
+ // }
+ // if ( isUnequal( p0.getNode( "B" ).getDistanceToParent(), 4 ) ) {
+ // return false;
+ // }
+ // if ( isUnequal( p0.getNode( "C" ).getDistanceToParent(), 0.5 ) ) {
+ // return false;
+ // }
+ // if ( isUnequal( p0.getNode( "D" ).getDistanceToParent(), 2.5 ) ) {
+ // return false;
+ // }
+ // if ( isUnequal( p0.getNode( "A" ).getParent().getDistanceToParent(), 1.5 ) ) {
+ // return false;
+ // }
+ // if ( isUnequal( p0.getNode( "A" ).getParent().getParent().getDistanceToParent(), 2.5 ) ) {
+ // return false;
+ // }
+ BasicSymmetricalDistanceMatrix m = new BasicSymmetricalDistanceMatrix( 6 );
+ // m.setRow( "5", 1 );
+ // m.setRow( "4 7", 2 );
+ // m.setRow( "7 10 7", 3 );
+ // m.setRow( "6 9 6 5", 4 );
+ // m.setRow( "8 11 8 9 8", 5 );
+ // m.setIdentifier( 0, "A" );
+ // m.setIdentifier( 1, "B" );
+ // m.setIdentifier( 2, "C" );
+ // m.setIdentifier( 3, "D" );
+ // m.setIdentifier( 4, "E" );
+ // m.setIdentifier( 5, "F" );
+ // nj = NeighborJoiningR.createInstance();
+ // final Phylogeny p1 = nj.execute( m );
+ // p1.reRoot( p1.getNode( "F" ) );
+ // Archaeopteryx.createApplication( p1 );
+ // if ( isUnequal( p1.getNode( "A" ).getDistanceToParent(), 1 ) ) {
+ // return false;
+ // }
+ // if ( isUnequal( p1.getNode( "B" ).getDistanceToParent(), 4 ) ) {
+ // return false;
+ // }
+ // if ( isUnequal( p1.getNode( "C" ).getDistanceToParent(), 2 ) ) {
+ // return false;
+ // }
+ // if ( isUnequal( p1.getNode( "D" ).getDistanceToParent(), 3 ) ) {
+ // return false;
+ // }
+ // if ( isUnequal( p1.getNode( "E" ).getDistanceToParent(), 2 ) ) {
+ // return false;
+ // }
+ // if ( isUnequal( p1.getNode( "F" ).getDistanceToParent(), 2.5 ) ) {
+ // return false;
+ // }
+ // if ( isUnequal( p1.getNode( "A" ).getParent().getDistanceToParent(), 1 ) ) {
+ // return false;
+ // }
+ // if ( isUnequal( p1.getNode( "A" ).getParent().getParent().getDistanceToParent(), 1 ) ) {
+ // return false;
+ // }
+ // if ( isUnequal( p1.getNode( "A" ).getParent().getParent().getParent().getDistanceToParent(), 2.5 ) ) {
+ // return false;
+ // }
+ // if ( isUnequal( p1.getNode( "B" ).getParent().getDistanceToParent(), 1 ) ) {
+ // return false;
+ // }
+ // if ( isUnequal( p1.getNode( "D" ).getParent().getDistanceToParent(), 1 ) ) {
+ // return false;
+ // }
+ // if ( isUnequal( p1.getNode( "E" ).getParent().getDistanceToParent(), 1 ) ) {
+ // return false;
+ // }
+ ///////////////
+ m = new BasicSymmetricalDistanceMatrix( 7 );
+ m.setIdentifier( 0, "Bovine" );
+ m.setIdentifier( 1, "Mouse" );
+ m.setIdentifier( 2, "Gibbon" );
+ m.setIdentifier( 3, "Orang" );
+ m.setIdentifier( 4, "Gorilla" );
+ m.setIdentifier( 5, "Chimp" );
+ m.setIdentifier( 6, "Human" );
+ m.setRow( "0.00000 1.68660 1.71980 1.66060 1.52430 1.60430 1.59050", 0 );
+ m.setRow( "1.68660 0.00000 1.52320 1.48410 1.44650 1.43890 1.46290", 1 );
+ m.setRow( "1.71980 1.52320 0.00000 0.71150 0.59580 0.61790 0.55830", 2 );
+ m.setRow( "1.66060 1.48410 0.71150 0.00000 0.46310 0.50610 0.47100", 3 );
+ m.setRow( "1.52430 1.44650 0.59580 0.46310 0.00000 0.34840 0.30830", 4 );
+ m.setRow( "1.60430 1.43890 0.61790 0.50610 0.34840 0.00000 0.26920", 5 );
+ m.setRow( "1.59050 1.46290 0.55830 0.47100 0.30830 0.26920 0.00000", 6 );
+ NeighborJoiningR njr = NeighborJoiningR.createInstance( true, 6 );
+ //nj = NeighborJoining.createInstance( true, 6 );
+ final Phylogeny p2 = njr.execute( m );
+ // Archaeopteryx.createApplication( p2 );
+ p2.reRoot( p2.getNode( "Bovine" ) );
+ if ( isUnequal( p2.getNode( "Chimp" ).getDistanceToParent(), 0.151675 ) ) {
return false;
}
- BasicSymmetricalDistanceMatrix m = new BasicSymmetricalDistanceMatrix( 6 );
- m.setRow( "5", 1 );
- m.setRow( "4 7", 2 );
- m.setRow( "7 10 7", 3 );
- m.setRow( "6 9 6 5", 4 );
- m.setRow( "8 11 8 9 8", 5 );
- m.setIdentifier( 0, "A" );
- m.setIdentifier( 1, "B" );
- m.setIdentifier( 2, "C" );
- m.setIdentifier( 3, "D" );
- m.setIdentifier( 4, "E" );
- m.setIdentifier( 5, "F" );
- nj = NeighborJoiningR.createInstance();
- final Phylogeny p1 = nj.execute( m );
- p1.reRoot( p1.getNode( "F" ) );
- if ( isUnequal( p1.getNode( "A" ).getDistanceToParent(), 1 ) ) {
+ if ( isUnequal( p2.getNode( "Human" ).getDistanceToParent(), 0.117525 ) ) {
return false;
}
- if ( isUnequal( p1.getNode( "B" ).getDistanceToParent(), 4 ) ) {
+ if ( isUnequal( p2.getNode( "Gorilla" ).getDistanceToParent(), 0.153932 ) ) {
return false;
}
- if ( isUnequal( p1.getNode( "C" ).getDistanceToParent(), 2 ) ) {
+ if ( isUnequal( p2.getNode( "Orang" ).getDistanceToParent(), 0.284694 ) ) {
return false;
}
- if ( isUnequal( p1.getNode( "D" ).getDistanceToParent(), 3 ) ) {
+ if ( isUnequal( p2.getNode( "Gibbon" ).getDistanceToParent(), 0.357931 ) ) {
return false;
}
- if ( isUnequal( p1.getNode( "E" ).getDistanceToParent(), 2 ) ) {
+ if ( isUnequal( p2.getNode( "Mouse" ).getDistanceToParent(), 0.76891 ) ) {
return false;
}
- if ( isUnequal( p1.getNode( "F" ).getDistanceToParent(), 2.5 ) ) {
+ if ( isUnequal( p2.getNode( "Bovine" ).getDistanceToParent(), 0.458845 ) ) {
return false;
}
- if ( isUnequal( p1.getNode( "A" ).getParent().getDistanceToParent(), 1 ) ) {
+ if ( isUnequal( p2.getNode( "Chimp" ).getParent().getDistanceToParent(), 0.039819 ) ) {
return false;
}
- if ( isUnequal( p1.getNode( "A" ).getParent().getParent().getDistanceToParent(), 1 ) ) {
+ if ( isUnequal( p2.getNode( "Human" ).getParent().getDistanceToParent(), 0.039819 ) ) {
return false;
}
- if ( isUnequal( p1.getNode( "A" ).getParent().getParent().getParent().getDistanceToParent(), 2.5 ) ) {
+ if ( isUnequal( p2.getNode( "Chimp" ).getParent().getParent().getDistanceToParent(), 0.026956 ) ) {
return false;
}
- if ( isUnequal( p1.getNode( "B" ).getParent().getDistanceToParent(), 1 ) ) {
+ if ( isUnequal( p2.getNode( "Chimp" ).getParent().getParent().getParent().getDistanceToParent(), 0.046481 ) ) {
return false;
}
- if ( isUnequal( p1.getNode( "D" ).getParent().getDistanceToParent(), 1 ) ) {
+ if ( isUnequal( p2.getNode( "Chimp" ).getParent().getParent().getParent().getParent().getDistanceToParent(),
+ 0.420269 ) ) {
return false;
}
- if ( isUnequal( p1.getNode( "E" ).getParent().getDistanceToParent(), 1 ) ) {
+ if ( isUnequal( p2.getNode( "Chimp" ).getParent().getParent().getParent().getParent().getParent()
+ .getDistanceToParent(), 0.458845 ) ) {
return false;
}
}
import java.util.ArrayList;
import java.util.List;
import java.util.Map.Entry;
-import java.util.SortedMap;
import java.util.SortedSet;
-import java.util.TreeMap;
-import java.util.TreeSet;
import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix;
import org.forester.phylogeny.Phylogeny;
public final class NeighborJoiningR {
- private BasicSymmetricalDistanceMatrix _d;
- private double[][] _d_values;
- private final DecimalFormat _df;
- private PhylogenyNode[] _external_nodes;
- private int[] _mappings;
- private int _n;
- private double[] _r;
- private final boolean _verbose;
- private int _min_i;
- private int _min_j;
- private List<SortedMap<Double, SortedSet<Integer>>> _s;
+ private final static DecimalFormat DF = new DecimalFormat( "0.00" );
+ private BasicSymmetricalDistanceMatrix _d;
+ private double[][] _d_values;
+ private final DecimalFormat _df;
+ private PhylogenyNode[] _external_nodes;
+ private int[] _mappings;
+ private int _n;
+ private double[] _r;
+ private final boolean _verbose;
+ private int _min_i;
+ private int _min_j;
+ private S _s;
+ private double _d_min; //TODO remove me
private NeighborJoiningR() {
_verbose = false;
reset( distance );
final Phylogeny phylogeny = new Phylogeny();
while ( _n > 2 ) {
+ System.out.println( "N=" + _n );
+ System.out.println();
// Calculates the minimal distance.
// If more than one minimal distances, always the first found is used
- updateM();
+ final double m = updateM();
final int otu1 = _min_i;
final int otu2 = _min_j;
+ System.out.println( _min_i + " " + _min_j + " => " + DF.format( m ) + " (" + DF.format( _d_min ) + ")" );
// It is a condition that otu1 < otu2.
final PhylogenyNode node = new PhylogenyNode();
final double d = getDvalue( otu1, otu2 );
_external_nodes[ _mappings[ otu1 ] ] = node;
updateMappings( otu2 );
--_n;
+ System.out.println( "-------------------------------------------------------------" );
+ System.out.println( "" );
}
final double d = getDvalue( 0, 1 ) / 2;
if ( _df == null ) {
return _d_values[ _mappings[ j ] ][ _mappings[ i ] ];
}
+ private double getDvalueUnmapped( final int i, final int j ) {
+ if ( i < j ) {
+ return _d_values[ i ][ j ];
+ }
+ return _d_values[ j ][ i ];
+ }
+
private final void calculateNetDivergences() {
for( int i = 0; i < _n; ++i ) {
_r[ i ] = calculateNetDivergence( i );
}
private final void printProgress( final int otu1, final int otu2 ) {
- final PhylogenyNode n1 = getExternalPhylogenyNode( otu1 );
- final PhylogenyNode n2 = getExternalPhylogenyNode( otu2 );
- System.out.println( "Node " + ( ForesterUtil.isEmpty( n1.getName() ) ? n1.getId() : n1.getName() ) + " joins "
- + ( ForesterUtil.isEmpty( n2.getName() ) ? n2.getId() : n2.getName() ) );
+ System.out.println( "Node " + printProgressNodeToString( getExternalPhylogenyNode( otu1 ) ) + " joins "
+ + ( printProgressNodeToString( getExternalPhylogenyNode( otu2 ) ) ) );
+ }
+
+ private final String printProgressNodeToString( final PhylogenyNode n ) {
+ if ( n.isExternal() ) {
+ if ( ForesterUtil.isEmpty( n.getName() ) ) {
+ return Long.toString( n.getId() );
+ }
+ return n.getName();
+ }
+ return n.getId()
+ + " ("
+ + ( ForesterUtil.isEmpty( n.getChildNode1().getName() ) ? n.getChildNode1().getId() : n.getChildNode1()
+ .getName() )
+ + "+"
+ + ( ForesterUtil.isEmpty( n.getChildNode2().getName() ) ? n.getChildNode2().getId() : n.getChildNode2()
+ .getName() ) + ")";
}
// only the values in the lower triangle are used.
_r = new double[ _n ];
_mappings = new int[ _n ];
_d_values = _d.getValues();
- _s = new ArrayList<SortedMap<Double, SortedSet<Integer>>>();
- for( int j = 0; j < _n; ++j ) {
- final TreeMap<Double, SortedSet<Integer>> map = new TreeMap<Double, SortedSet<Integer>>();
- for( int i = 0; i < j; ++i ) {
- if ( !map.containsKey( _d_values[ i ][ j ] ) ) {
- map.put( _d_values[ i ][ j ], new TreeSet<Integer>() );
- }
- map.get( _d_values[ i ][ j ] ).add( i );
- }
- _s.add( map );
- }
+ _s = new S();
+ _s.initialize( distances );
initExternalNodes();
printM();
}
final private void printM() {
for( int j = 1; j < _n; ++j ) {
for( int i = 0; i < _n; ++i ) {
- System.out.print( _d_values[ i ][ j ] );
+ System.out.print( DF.format( _d_values[ _mappings[ i ] ][ _mappings[ j ] ] ) );
System.out.print( " " );
}
System.out.print( " " );
- for( final Entry<Double, SortedSet<Integer>> entry : _s.get( j ).entrySet() ) {
+ for( final Entry<Double, SortedSet<Integer>> entry : _s.getSentrySet( _mappings[ j ] ) ) {
final double key = entry.getKey();
final SortedSet<Integer> value = entry.getValue();
- System.out.print( key + "=" );
+ System.out.print( DF.format( key ) + "=" );
boolean first = true;
for( final Integer v : value ) {
if ( !first ) {
}
}
- private final void updateM() {
+ private final double updateM() {
+ printM();
calculateNetDivergences();
Double min = Double.MAX_VALUE;
_min_i = -1;
for( int j = 1; j < _n; ++j ) {
final double r_j = _r[ j ];
final int m_j = _mappings[ j ];
- final SortedMap<Double, SortedSet<Integer>> s_j = _s.get( m_j );
- for( final Entry<Double, SortedSet<Integer>> entry : s_j.entrySet() ) {
- //Double key = entry.getKey();
- final SortedSet<Integer> value = entry.getValue();
- for( final Integer sorted_i : value ) {
- System.out.print( sorted_i + " " );
- // final double m = _d_values[ _mappings[ sorted_i ] ][ m_j ]
- // - ( ( _r[ sorted_i ] + r_j ) / n_minus_2 );
- // if ( m < min ) {
- // min = m;
- // _min_i = sorted_i;
- // _min_j = j;
- // }
+ int counter = 0;
+ int counter_all = 0;
+ X: for( final Entry<Double, SortedSet<Integer>> entry : _s.getSentrySet( m_j ) ) {
+ for( final int sorted_i : entry.getValue() ) {
+ //if ( counter_all >= j ) {
+ // break X;
+ //}
+ if ( _mappings[ counter ] == counter_all ) {
+ System.out.print( sorted_i + " " );
+ System.out.print( "(" + DF.format( getDvalueUnmapped( sorted_i, m_j ) ) + ") " );
+ final double m = getDvalueUnmapped( sorted_i, m_j ) - ( ( _r[ sorted_i ] + r_j ) / n_minus_2 );
+ if ( m < min ) {
+ _d_min = getDvalueUnmapped( sorted_i, m_j );
+ min = m;
+ _min_i = sorted_i;
+ _min_j = j;
+ }
+ ++counter;
+ }
+ ++counter_all;
}
}
System.out.println();
+ /*
for( int i = 0; i < j; ++i ) {
final double m = getDvalue( i, j ) - ( ( _r[ i ] + r_j ) / n_minus_2 );
if ( m < min ) {
min = m;
+ _d_min = getDvalue( i, j );
_min_i = i;
_min_j = j;
}
- }
+ }*/
}
+ System.out.println();
+ return min;
}
// otu2 will, in effect, be "deleted" from the matrix.