From 45081fca2f4b6ba45ebc5026450980afb8340911 Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Fri, 21 Dec 2012 02:07:49 +0000 Subject: [PATCH] midpoint rooting is faster --- .../org/forester/phylogeny/PhylogenyMethods.java | 215 ++++++++++++-------- forester/java/src/org/forester/test/Test.java | 99 +++++---- 2 files changed, 180 insertions(+), 134 deletions(-) diff --git a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java index 1dbcfa3..79a374c 100644 --- a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java +++ b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java @@ -58,76 +58,56 @@ import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.util.BasicDescriptiveStatistics; import org.forester.util.DescriptiveStatistics; -import org.forester.util.FailedConditionCheckException; import org.forester.util.ForesterUtil; public class PhylogenyMethods { - private static PhylogenyMethods _instance = null; - private PhylogenyNode _farthest_1 = null; - private PhylogenyNode _farthest_2 = null; - + //private static PhylogenyMethods _instance = null; + //private final PhylogenyNode _farthest_1 = null; + //private final PhylogenyNode _farthest_2 = null; private PhylogenyMethods() { // Hidden constructor. } - /** - * Calculates the distance between PhylogenyNodes node1 and node2. - * - * - * @param node1 - * @param node2 - * @return distance between node1 and node2 - */ - public double calculateDistance( final PhylogenyNode node1, final PhylogenyNode node2 ) { - final PhylogenyNode lca = calculateLCA( node1, node2 ); - final PhylogenyNode n1 = node1; - final PhylogenyNode n2 = node2; - return ( PhylogenyMethods.getDistance( n1, lca ) + PhylogenyMethods.getDistance( n2, lca ) ); - } - - public double calculateFurthestDistance( final Phylogeny phylogeny ) { - if ( phylogeny.getNumberOfExternalNodes() < 2 ) { - return 0.0; - } - _farthest_1 = null; - _farthest_2 = null; - PhylogenyNode node_1 = null; - PhylogenyNode node_2 = null; - double farthest_d = -Double.MAX_VALUE; - final PhylogenyMethods methods = PhylogenyMethods.getInstance(); - final List ext_nodes = phylogeny.getRoot().getAllExternalDescendants(); - for( int i = 1; i < ext_nodes.size(); ++i ) { - for( int j = 0; j < i; ++j ) { - final double d = methods.calculateDistance( ext_nodes.get( i ), ext_nodes.get( j ) ); - if ( d < 0.0 ) { - throw new RuntimeException( "distance cannot be negative" ); - } - if ( d > farthest_d ) { - farthest_d = d; - node_1 = ext_nodes.get( i ); - node_2 = ext_nodes.get( j ); - } - } - } - _farthest_1 = node_1; - _farthest_2 = node_2; - return farthest_d; - } - + // public double calculateFurthestDistance( final Phylogeny phylogeny ) { + // if ( phylogeny.getNumberOfExternalNodes() < 2 ) { + // return 0.0; + // } + // _farthest_1 = null; + // _farthest_2 = null; + // PhylogenyNode node_1 = null; + // PhylogenyNode node_2 = null; + // double farthest_d = -Double.MAX_VALUE; + // final PhylogenyMethods methods = PhylogenyMethods.getInstance(); + // final List ext_nodes = phylogeny.getRoot().getAllExternalDescendants(); + // for( int i = 1; i < ext_nodes.size(); ++i ) { + // for( int j = 0; j < i; ++j ) { + // final double d = methods.calculateDistance( ext_nodes.get( i ), ext_nodes.get( j ) ); + // if ( d < 0.0 ) { + // throw new RuntimeException( "distance cannot be negative" ); + // } + // if ( d > farthest_d ) { + // farthest_d = d; + // node_1 = ext_nodes.get( i ); + // node_2 = ext_nodes.get( j ); + // } + // } + // } + // _farthest_1 = node_1; + // _farthest_2 = node_2; + // return farthest_d; + // } @Override public Object clone() throws CloneNotSupportedException { throw new CloneNotSupportedException(); } - public PhylogenyNode getFarthestNode1() { - return _farthest_1; - } - - public PhylogenyNode getFarthestNode2() { - return _farthest_2; - } - + // public PhylogenyNode getFarthestNode1() { + // return _farthest_1; + // } + // public PhylogenyNode getFarthestNode2() { + // return _farthest_2; + // } public static DescriptiveStatistics calculatBranchLengthStatistics( final Phylogeny phy ) { final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { @@ -168,6 +148,21 @@ public class PhylogenyMethods { } /** + * Calculates the distance between PhylogenyNodes node1 and node2. + * + * + * @param node1 + * @param node2 + * @return distance between node1 and node2 + */ + public static double calculateDistance( final PhylogenyNode node1, final PhylogenyNode node2 ) { + final PhylogenyNode lca = calculateLCA( node1, node2 ); + final PhylogenyNode n1 = node1; + final PhylogenyNode n2 = node2; + return ( PhylogenyMethods.getDistance( n1, lca ) + PhylogenyMethods.getDistance( n2, lca ) ); + } + + /** * Returns the LCA of PhylogenyNodes node1 and node2. * * @@ -550,13 +545,12 @@ public class PhylogenyMethods { return farthest; } - public static PhylogenyMethods getInstance() { - if ( PhylogenyMethods._instance == null ) { - PhylogenyMethods._instance = new PhylogenyMethods(); - } - return PhylogenyMethods._instance; - } - + // public static PhylogenyMethods getInstance() { + // if ( PhylogenyMethods._instance == null ) { + // PhylogenyMethods._instance = new PhylogenyMethods(); + // } + // return PhylogenyMethods._instance; + // } /** * Returns the largest confidence value found on phy. */ @@ -666,36 +660,77 @@ public class PhylogenyMethods { } public static void midpointRoot( final Phylogeny phylogeny ) { - if ( phylogeny.getNumberOfExternalNodes() < 2 ) { + if ( ( phylogeny.getNumberOfExternalNodes() < 2 ) || ( calculateMaxDistanceToRoot( phylogeny ) <= 0 ) ) { return; } - final PhylogenyMethods methods = getInstance(); - final double farthest_d = methods.calculateFurthestDistance( phylogeny ); - final PhylogenyNode f1 = methods.getFarthestNode1(); - final PhylogenyNode f2 = methods.getFarthestNode2(); - if ( farthest_d <= 0.0 ) { - return; - } - double x = farthest_d / 2.0; - PhylogenyNode n = f1; - if ( PhylogenyMethods.getDistance( f1, phylogeny.getRoot() ) < PhylogenyMethods.getDistance( f2, phylogeny - .getRoot() ) ) { - n = f2; - } - while ( ( x > n.getDistanceToParent() ) && !n.isRoot() ) { - x -= ( n.getDistanceToParent() > 0 ? n.getDistanceToParent() : 0 ); - n = n.getParent(); + int counter = 0; + final int total_nodes = phylogeny.getNodeCount(); + while ( true ) { + if ( ++counter > total_nodes ) { + throw new RuntimeException( "this should not have happened: midpoint rooting does not converge" ); + } + PhylogenyNode a = null; + double da = 0; + double db = 0; + for( int i = 0; i < phylogeny.getRoot().getNumberOfDescendants(); ++i ) { + final PhylogenyNode f = getFurthestDescendant( phylogeny.getRoot().getChildNode( i ) ); + final double df = getDistance( f, phylogeny.getRoot() ); + if ( df > 0 ) { + if ( df > da ) { + db = da; + da = df; + a = f; + } + else if ( df > db ) { + db = df; + } + } + } + final double diff = da - db; + if ( diff < 0.000001 ) { + break; + } + double x = da - ( diff / 2.0 ); + while ( ( x > a.getDistanceToParent() ) && !a.isRoot() ) { + x -= ( a.getDistanceToParent() > 0 ? a.getDistanceToParent() : 0 ); + a = a.getParent(); + } + phylogeny.reRoot( a, x ); } - phylogeny.reRoot( n, x ); phylogeny.recalculateNumberOfExternalDescendants( true ); - final PhylogenyNode a = getFurthestDescendant( phylogeny.getRoot().getChildNode1() ); - final PhylogenyNode b = getFurthestDescendant( phylogeny.getRoot().getChildNode2() ); - final double da = getDistance( a, phylogeny.getRoot() ); - final double db = getDistance( b, phylogeny.getRoot() ); - if ( Math.abs( da - db ) > 0.000001 ) { - throw new FailedConditionCheckException( "this should not have happened: midpoint rooting failed: da=" - + da + ", db=" + db + ", diff=" + Math.abs( da - db ) ); - } + } + + public static void midpointRootOLD( final Phylogeny phylogeny ) { + // if ( phylogeny.getNumberOfExternalNodes() < 2 ) { + // return; + // } + // final PhylogenyMethods methods = getInstance(); + //final double farthest_d = methods.calculateFurthestDistance( phylogeny ); + // final PhylogenyNode f1 = methods.getFarthestNode1(); + // final PhylogenyNode f2 = methods.getFarthestNode2(); + // if ( farthest_d <= 0.0 ) { + // return; + // } + // double x = farthest_d / 2.0; + // PhylogenyNode n = f1; + // if ( PhylogenyMethods.getDistance( f1, phylogeny.getRoot() ) < PhylogenyMethods.getDistance( f2, phylogeny + // .getRoot() ) ) { + // n = f2; + // } + // while ( ( x > n.getDistanceToParent() ) && !n.isRoot() ) { + // x -= ( n.getDistanceToParent() > 0 ? n.getDistanceToParent() : 0 ); + // n = n.getParent(); + // } + // phylogeny.reRoot( n, x ); + // phylogeny.recalculateNumberOfExternalDescendants( true ); + // final PhylogenyNode a = getFurthestDescendant( phylogeny.getRoot().getChildNode1() ); + // final PhylogenyNode b = getFurthestDescendant( phylogeny.getRoot().getChildNode2() ); + // final double da = getDistance( a, phylogeny.getRoot() ); + // final double db = getDistance( b, phylogeny.getRoot() ); + // if ( Math.abs( da - db ) > 0.000001 ) { + // throw new FailedConditionCheckException( "this should not have happened: midpoint rooting failed: da=" + // + da + ", db=" + db + ", diff=" + Math.abs( da - db ) ); + // } } public static void normalizeBootstrapValues( final Phylogeny phylogeny, diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index 163bdb3..6b726eb 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -137,7 +137,6 @@ public final class Test { } private final static Event getEvent( final Phylogeny p, final String n1, final String n2 ) { - final PhylogenyMethods pm = PhylogenyMethods.getInstance(); return PhylogenyMethods.calculateLCA( p.getNode( n1 ), p.getNode( n2 ) ).getNodeData().getEvent(); } @@ -3005,133 +3004,132 @@ public final class Test { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny p1 = factory.create( "(((A:1,B:2,X:100)ab:3,C:4)abc:5,(D:7,(E:9,F:10)ef:8)def:6)r", new NHXParser() )[ 0 ]; - final PhylogenyMethods pm = PhylogenyMethods.getInstance(); - if ( pm.calculateDistance( p1.getNode( "C" ), p1.getNode( "C" ) ) != 0 ) { + if ( PhylogenyMethods.calculateDistance( p1.getNode( "C" ), p1.getNode( "C" ) ) != 0 ) { return false; } - if ( pm.calculateDistance( p1.getNode( "def" ), p1.getNode( "def" ) ) != 0 ) { + if ( PhylogenyMethods.calculateDistance( p1.getNode( "def" ), p1.getNode( "def" ) ) != 0 ) { return false; } - if ( pm.calculateDistance( p1.getNode( "ef" ), p1.getNode( "ef" ) ) != 0 ) { + if ( PhylogenyMethods.calculateDistance( p1.getNode( "ef" ), p1.getNode( "ef" ) ) != 0 ) { return false; } - if ( pm.calculateDistance( p1.getNode( "r" ), p1.getNode( "r" ) ) != 0 ) { + if ( PhylogenyMethods.calculateDistance( p1.getNode( "r" ), p1.getNode( "r" ) ) != 0 ) { return false; } - if ( pm.calculateDistance( p1.getNode( "A" ), p1.getNode( "A" ) ) != 0 ) { + if ( PhylogenyMethods.calculateDistance( p1.getNode( "A" ), p1.getNode( "A" ) ) != 0 ) { return false; } - if ( pm.calculateDistance( p1.getNode( "A" ), p1.getNode( "B" ) ) != 3 ) { + if ( PhylogenyMethods.calculateDistance( p1.getNode( "A" ), p1.getNode( "B" ) ) != 3 ) { return false; } - if ( pm.calculateDistance( p1.getNode( "B" ), p1.getNode( "A" ) ) != 3 ) { + if ( PhylogenyMethods.calculateDistance( p1.getNode( "B" ), p1.getNode( "A" ) ) != 3 ) { return false; } - if ( pm.calculateDistance( p1.getNode( "A" ), p1.getNode( "C" ) ) != 8 ) { + if ( PhylogenyMethods.calculateDistance( p1.getNode( "A" ), p1.getNode( "C" ) ) != 8 ) { return false; } - if ( pm.calculateDistance( p1.getNode( "C" ), p1.getNode( "A" ) ) != 8 ) { + if ( PhylogenyMethods.calculateDistance( p1.getNode( "C" ), p1.getNode( "A" ) ) != 8 ) { return false; } - if ( pm.calculateDistance( p1.getNode( "A" ), p1.getNode( "D" ) ) != 22 ) { + if ( PhylogenyMethods.calculateDistance( p1.getNode( "A" ), p1.getNode( "D" ) ) != 22 ) { return false; } - if ( pm.calculateDistance( p1.getNode( "A" ), p1.getNode( "E" ) ) != 32 ) { + if ( PhylogenyMethods.calculateDistance( p1.getNode( "A" ), p1.getNode( "E" ) ) != 32 ) { return false; } - if ( pm.calculateDistance( p1.getNode( "E" ), p1.getNode( "A" ) ) != 32 ) { + if ( PhylogenyMethods.calculateDistance( p1.getNode( "E" ), p1.getNode( "A" ) ) != 32 ) { return false; } - if ( pm.calculateDistance( p1.getNode( "A" ), p1.getNode( "F" ) ) != 33 ) { + if ( PhylogenyMethods.calculateDistance( p1.getNode( "A" ), p1.getNode( "F" ) ) != 33 ) { return false; } - if ( pm.calculateDistance( p1.getNode( "F" ), p1.getNode( "A" ) ) != 33 ) { + if ( PhylogenyMethods.calculateDistance( p1.getNode( "F" ), p1.getNode( "A" ) ) != 33 ) { return false; } - if ( pm.calculateDistance( p1.getNode( "A" ), p1.getNode( "ab" ) ) != 1 ) { + if ( PhylogenyMethods.calculateDistance( p1.getNode( "A" ), p1.getNode( "ab" ) ) != 1 ) { return false; } - if ( pm.calculateDistance( p1.getNode( "ab" ), p1.getNode( "A" ) ) != 1 ) { + if ( PhylogenyMethods.calculateDistance( p1.getNode( "ab" ), p1.getNode( "A" ) ) != 1 ) { return false; } - if ( pm.calculateDistance( p1.getNode( "A" ), p1.getNode( "abc" ) ) != 4 ) { + if ( PhylogenyMethods.calculateDistance( p1.getNode( "A" ), p1.getNode( "abc" ) ) != 4 ) { return false; } - if ( pm.calculateDistance( p1.getNode( "abc" ), p1.getNode( "A" ) ) != 4 ) { + if ( PhylogenyMethods.calculateDistance( p1.getNode( "abc" ), p1.getNode( "A" ) ) != 4 ) { return false; } - if ( pm.calculateDistance( p1.getNode( "A" ), p1.getNode( "r" ) ) != 9 ) { + if ( PhylogenyMethods.calculateDistance( p1.getNode( "A" ), p1.getNode( "r" ) ) != 9 ) { return false; } - if ( pm.calculateDistance( p1.getNode( "r" ), p1.getNode( "A" ) ) != 9 ) { + if ( PhylogenyMethods.calculateDistance( p1.getNode( "r" ), p1.getNode( "A" ) ) != 9 ) { return false; } - if ( pm.calculateDistance( p1.getNode( "A" ), p1.getNode( "def" ) ) != 15 ) { + if ( PhylogenyMethods.calculateDistance( p1.getNode( "A" ), p1.getNode( "def" ) ) != 15 ) { return false; } - if ( pm.calculateDistance( p1.getNode( "def" ), p1.getNode( "A" ) ) != 15 ) { + if ( PhylogenyMethods.calculateDistance( p1.getNode( "def" ), p1.getNode( "A" ) ) != 15 ) { return false; } - if ( pm.calculateDistance( p1.getNode( "A" ), p1.getNode( "ef" ) ) != 23 ) { + if ( PhylogenyMethods.calculateDistance( p1.getNode( "A" ), p1.getNode( "ef" ) ) != 23 ) { return false; } - if ( pm.calculateDistance( p1.getNode( "ef" ), p1.getNode( "A" ) ) != 23 ) { + if ( PhylogenyMethods.calculateDistance( p1.getNode( "ef" ), p1.getNode( "A" ) ) != 23 ) { return false; } - if ( pm.calculateDistance( p1.getNode( "ef" ), p1.getNode( "def" ) ) != 8 ) { + if ( PhylogenyMethods.calculateDistance( p1.getNode( "ef" ), p1.getNode( "def" ) ) != 8 ) { return false; } - if ( pm.calculateDistance( p1.getNode( "def" ), p1.getNode( "ef" ) ) != 8 ) { + if ( PhylogenyMethods.calculateDistance( p1.getNode( "def" ), p1.getNode( "ef" ) ) != 8 ) { return false; } - if ( pm.calculateDistance( p1.getNode( "ef" ), p1.getNode( "r" ) ) != 14 ) { + if ( PhylogenyMethods.calculateDistance( p1.getNode( "ef" ), p1.getNode( "r" ) ) != 14 ) { return false; } - if ( pm.calculateDistance( p1.getNode( "ef" ), p1.getNode( "abc" ) ) != 19 ) { + if ( PhylogenyMethods.calculateDistance( p1.getNode( "ef" ), p1.getNode( "abc" ) ) != 19 ) { return false; } - if ( pm.calculateDistance( p1.getNode( "ef" ), p1.getNode( "ab" ) ) != 22 ) { + if ( PhylogenyMethods.calculateDistance( p1.getNode( "ef" ), p1.getNode( "ab" ) ) != 22 ) { return false; } - if ( pm.calculateDistance( p1.getNode( "ab" ), p1.getNode( "ef" ) ) != 22 ) { + if ( PhylogenyMethods.calculateDistance( p1.getNode( "ab" ), p1.getNode( "ef" ) ) != 22 ) { return false; } - if ( pm.calculateDistance( p1.getNode( "def" ), p1.getNode( "abc" ) ) != 11 ) { + if ( PhylogenyMethods.calculateDistance( p1.getNode( "def" ), p1.getNode( "abc" ) ) != 11 ) { return false; } final Phylogeny p2 = factory.create( "((A:4,B:5,C:6)abc:1,(D:7,E:8,F:9)def:2,(G:10,H:11,I:12)ghi:3)r", new NHXParser() )[ 0 ]; - if ( pm.calculateDistance( p2.getNode( "A" ), p2.getNode( "B" ) ) != 9 ) { + if ( PhylogenyMethods.calculateDistance( p2.getNode( "A" ), p2.getNode( "B" ) ) != 9 ) { return false; } - if ( pm.calculateDistance( p2.getNode( "A" ), p2.getNode( "C" ) ) != 10 ) { + if ( PhylogenyMethods.calculateDistance( p2.getNode( "A" ), p2.getNode( "C" ) ) != 10 ) { return false; } - if ( pm.calculateDistance( p2.getNode( "A" ), p2.getNode( "D" ) ) != 14 ) { + if ( PhylogenyMethods.calculateDistance( p2.getNode( "A" ), p2.getNode( "D" ) ) != 14 ) { return false; } - if ( pm.calculateDistance( p2.getNode( "A" ), p2.getNode( "ghi" ) ) != 8 ) { + if ( PhylogenyMethods.calculateDistance( p2.getNode( "A" ), p2.getNode( "ghi" ) ) != 8 ) { return false; } - if ( pm.calculateDistance( p2.getNode( "A" ), p2.getNode( "I" ) ) != 20 ) { + if ( PhylogenyMethods.calculateDistance( p2.getNode( "A" ), p2.getNode( "I" ) ) != 20 ) { return false; } - if ( pm.calculateDistance( p2.getNode( "G" ), p2.getNode( "ghi" ) ) != 10 ) { + if ( PhylogenyMethods.calculateDistance( p2.getNode( "G" ), p2.getNode( "ghi" ) ) != 10 ) { return false; } - if ( pm.calculateDistance( p2.getNode( "r" ), p2.getNode( "r" ) ) != 0 ) { + if ( PhylogenyMethods.calculateDistance( p2.getNode( "r" ), p2.getNode( "r" ) ) != 0 ) { return false; } - if ( pm.calculateDistance( p2.getNode( "r" ), p2.getNode( "G" ) ) != 13 ) { + if ( PhylogenyMethods.calculateDistance( p2.getNode( "r" ), p2.getNode( "G" ) ) != 13 ) { return false; } - if ( pm.calculateDistance( p2.getNode( "G" ), p2.getNode( "r" ) ) != 13 ) { + if ( PhylogenyMethods.calculateDistance( p2.getNode( "G" ), p2.getNode( "r" ) ) != 13 ) { return false; } - if ( pm.calculateDistance( p2.getNode( "G" ), p2.getNode( "H" ) ) != 21 ) { + if ( PhylogenyMethods.calculateDistance( p2.getNode( "G" ), p2.getNode( "H" ) ) != 21 ) { return false; } - if ( pm.calculateDistance( p2.getNode( "G" ), p2.getNode( "I" ) ) != 22 ) { + if ( PhylogenyMethods.calculateDistance( p2.getNode( "G" ), p2.getNode( "I" ) ) != 22 ) { return false; } } @@ -3963,6 +3961,18 @@ public final class Test { private static boolean testMidpointrooting() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final Phylogeny t0 = factory.create( "(A:1,B:4,C:2,D:2,E:6,F:1,G:1,H:1)", new NHXParser() )[ 0 ]; + PhylogenyMethods.midpointRoot( t0 ); + if ( !isEqual( t0.getNode( "E" ).getDistanceToParent(), 5 ) ) { + return false; + } + if ( !isEqual( t0.getNode( "B" ).getDistanceToParent(), 4 ) ) { + return false; + } + if ( !isEqual( PhylogenyMethods.calculateLCA( t0.getNode( "F" ), t0.getNode( "G" ) ).getDistanceToParent(), + 1 ) ) { + return false; + } final Phylogeny t1 = factory.create( "((A:1,B:2)AB:1[&&NHX:B=55],(C:3,D:4)CD:3[&&NHX:B=10])ABCD:0.5", new NHXParser() )[ 0 ]; if ( !t1.isRooted() ) { @@ -4002,6 +4012,7 @@ public final class Test { return false; } if ( !isEqual( t1.getNode( "CD" ).getDistanceToParent(), 1 ) ) { + System.exit( -1 ); return false; } if ( !isEqual( t1.getNode( "AB" ).getDistanceToParent(), 3 ) ) { -- 1.7.10.2