// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // Copyright (C) 2000-2001 Washington University School of Medicine // and Howard Hughes Medical Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: www.phylosoft.org/forester package org.forester.sdi; import java.io.File; import java.util.ArrayList; import java.util.ListIterator; import java.util.Vector; import org.forester.io.parsers.PhylogenyParser; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.util.ForesterUtil; /* * @author Christian M. Zmasek * * @version 1.001 -- last modified: 12/04/00 */ public class DistanceCalculator { public final static double DEFAULT = -1.0; private Phylogeny tree_; private ArrayList nodes_; private int n_; private double mean_, variance_, stand_dev_; private PhylogenyNode lca_; // The LCA of the // Nodes in nodes_ /** * Default constructor. (Last modified: 11/30/00) */ public DistanceCalculator() { tree_ = null; nodes_ = null; n_ = 0; mean_ = DistanceCalculator.DEFAULT; variance_ = DistanceCalculator.DEFAULT; stand_dev_ = DistanceCalculator.DEFAULT; lca_ = null; } /** * Constructor. Sets the rooted Phylogeny t for which the mean distance to * the root and its variance and standard deviation are calculated. (Last * modified: 12/01/00) * * @param t * the rooted Phylogeny for which the mean distance to the root * and its variance and standard deviation are calculated */ public DistanceCalculator( final Phylogeny t ) { setTree( t ); } /** * Constructor. Sets the rooted Phylogeny t and the external Nodes ext_nodes * for which the mean distance to their lowest common ancestor and its * variance and standard deviation are calculated. (Last modified: 12/01/00) * * @param t * the rooted Phylogeny containing Nodes in Vector ext_nodes * @param ext_nodes * a Vector of Nodes of t, the mean distance to their lowest * common ancestor and its variance and standard deviation are * calculated */ public DistanceCalculator( final Phylogeny t, final Vector ext_nodes ) { setTreeAndExtNodes( t, ext_nodes ); } // (Last modified: 12/01/00) private PhylogenyNode calculateLCA( final ArrayList nodes ) { if ( ( nodes == null ) || nodes.isEmpty() ) { return null; } PhylogenyNode node = nodes.get( 0 ); int c = node.getNumberOfExternalNodes(); final int v = nodes.size(); while ( !node.isRoot() && ( c < v ) ) { node = node.getParent(); c = node.getNumberOfExternalNodes(); } ArrayList current_nodes = new ArrayList( node.getAllExternalDescendants() ); while ( !node.isRoot() && !current_nodes.containsAll( nodes ) ) { node = node.getParent(); current_nodes = new ArrayList( node.getAllExternalDescendants() ); } return node; } // (Last modified: 11/31/00) private void calculateMean() { if ( ( nodes_ == null ) || nodes_.isEmpty() || ( tree_ == null ) || tree_.isEmpty() ) { return; } double sum = 0.0; final ListIterator li = nodes_.listIterator(); n_ = 0; try { while ( li.hasNext() ) { n_++; sum += getDistanceToNode( li.next(), lca_ ); } } catch ( final Exception e ) { System.err.println( "calculateMean(): " + "Exception: " + e ); System.exit( -1 ); } setMean( sum / n_ ); } // (Last modified: 11/30/00) private void calculateMeanDistToRoot() { if ( ( tree_ == null ) || tree_.isEmpty() ) { return; } double sum = 0.0; PhylogenyNode node = tree_.getFirstExternalNode(); n_ = 0; while ( node != null ) { n_++; sum += getDistanceToRoot( node ); node = node.getNextExternalNode(); } setMean( sum / n_ ); } // (Last modified: 11/31/00) private void calculateStandardDeviation() { if ( ( getVariance() == DistanceCalculator.DEFAULT ) || ( getVariance() < 0.0 ) ) { return; } setStandardDeviation( java.lang.Math.sqrt( getVariance() ) ); } // (Last modified: 11/31/00) private void calculateVariance() { if ( ( getMean() == DistanceCalculator.DEFAULT ) || ( nodes_ == null ) || nodes_.isEmpty() || ( tree_ == null ) || tree_.isEmpty() || ( n_ <= 1.0 ) ) { return; } double x = 0.0, sum = 0.0; final ListIterator li = nodes_.listIterator(); try { while ( li.hasNext() ) { x = getDistanceToNode( li.next(), lca_ ) - getMean(); sum += ( x * x ); } } catch ( final Exception e ) { System.err.println( "calculateVariance(): " + "Exception: " + e ); System.exit( -1 ); } setVariance( sum / ( n_ - 1 ) ); } // (Last modified: 11/31/00) private void calculateVarianceDistToRoot() { if ( ( getMean() == DistanceCalculator.DEFAULT ) || ( tree_ == null ) || tree_.isEmpty() || ( n_ <= 1.0 ) ) { return; } double x = 0.0, sum = 0.0; PhylogenyNode node = tree_.getFirstExternalNode(); while ( node != null ) { x = getDistanceToRoot( node ) - getMean(); sum += ( x * x ); node = node.getNextExternalNode(); } setVariance( sum / ( n_ - 1 ) ); } /** * Calculates the distance of the PhylogenyNode with seq name seq_name to * the LCA of ext_nodes, which has been set either with constructor * DistanceCalculator(Phylogeny,Vector) or method * setTreeAndExtNodes(Phylogeny,Vector). Throws an exception if no * PhylogenyNode with seq name_seq name is found or if seq_name is not * unique. (Last modified: 12/03/00) * * @param seq_name * the seq name for the PhylogenyNode for which the distance to * the LCA is to be calculated * @return distance of PhylogenyNode with seq name seq_name to the LCA of * Nodes in ext_nodes * @see #DistanceCalculator(Phylogeny,Vector) * @see #setTreeAndExtNodes(Phylogeny,Vector) * @see #setTreeAndExtNodes(Phylogeny,ArrayList) */ public double getDistanceToLCA( final String seq_name ) { if ( ( tree_ == null ) || tree_.isEmpty() || ( lca_ == null ) ) { return 0.0; } return getDistanceToNode( seq_name, lca_ ); } /** * Calculates the distance of PhylogenyNode outer to PhylogenyNode inner. * PhylogenyNode inner must be closer to the root than PhylogenyNode outer * and on the same "path". (Last modified: 12/01/00) * * @param outer * a PhylogenyNode * @param inner * a PhylogenyNode closer to the root than outer * @return distance of PhylogenyNode outer to PhylogenyNode inner */ public double getDistanceToNode( PhylogenyNode outer, final PhylogenyNode inner ) { double d = 0.0, dist = 0.0; while ( ( inner != outer ) && !outer.isRoot() ) { d = outer.getDistanceToParent(); if ( d > 0.0 ) { dist += d; } outer = outer.getParent(); } if ( !inner.isRoot() && outer.isRoot() ) { throw new IllegalArgumentException( "getDistanceToNode(PhylogenyNode outer,PhylogenyNode inner): " + "PhylogenyNode inner is not closer to the root than PhylogenyNode outer " + "or is not on the same \"subtree\"" ); } return dist; } /** * Calculates the distance of the PhylogenyNode with seq name seq_name to * PhylogenyNode inner. PhylogenyNode inner must be closer to the root than * the PhylogenyNode with seq name seq_name and on the same "path". Throws * an exception if no PhylogenyNode with seq name_seq name is found or if * seq_name is not unique. (Last modified: 12/01/00) * * @param seq_name * the seq name of a PhylogenyNode further from the root than * PhylogenyNode inner * @param inner * a PhylogenyNode * @return distance of PhylogenyNode with seq name seq_nam to PhylogenyNode * inner */ public double getDistanceToNode( final String seq_name, final PhylogenyNode inner ) { if ( ( tree_ == null ) || tree_.isEmpty() ) { return 0.0; } return getDistanceToNode( tree_.getNodeViaSequenceName( seq_name ), inner ); } /** * Calculates the distance of PhylogenyNode n to the root of Phylogeny t * which has been set either with a constructor, setTree(Phylogeny), or * setTreeAndExtNodes(Phylogeny,Vector). (Last modified: 12/01/00) * * @param n * the PhylogenyNode for which the distance to the root is to be * calculated * @return distance of PhylogenyNode n to the root * @see #DistanceCalculator(Phylogeny) * @see #DistanceCalculator(Phylogeny,Vector) * @see #setTree(Phylogeny) * @see #setTreeAndExtNodes(Phylogeny,Vector) */ public double getDistanceToRoot( final PhylogenyNode n ) { if ( ( tree_ == null ) || tree_.isEmpty() ) { return 0.0; } double d = 0.0; try { d = getDistanceToNode( n, tree_.getRoot() ); } catch ( final Exception e ) { System.err.println( "getDistanceToRoot(PhylogenyNode): Unexpected " + "exception: " + e ); System.exit( -1 ); } return d; } /** * Calculates the distance of the PhylogenyNode with seq name seq_name to * the root of Phylogeny t, which has been set either with a constructor, * setTree(Phylogeny), or setTreeAndExtNodes(Phylogeny,Vector). Throws an * exception if no PhylogenyNode with seq name_seq name is found or if * seq_name is not unique. (Last modified: 12/01/00) * * @param seq_name * the seq name for the PhylogenyNode for which the distance to * the root is to be calculated * @return distance of PhylogenyNode with seq name seq_name to the root * @see #DistanceCalculator(Phylogeny) * @see #DistanceCalculator(Phylogeny,Vector) * @see #setTree(Phylogeny) * @see #setTreeAndExtNodes(Phylogeny,Vector) * @see #setTreeAndExtNodes(Phylogeny,ArrayList) */ public double getDistanceToRoot( final String seq_name ) { if ( ( tree_ == null ) || tree_.isEmpty() ) { return 0.0; } return getDistanceToNode( seq_name, tree_.getRoot() ); } /** * Returns the mean distance. If constructor DistanceCalculator(Phylogeny) * or method setTree(Phylogeny) have been used, it is the mean of the * distances from the root to all external Nodes. If constructor * DistanceCalculator(Phylogeny,Vector) or method * setTreeAndExtNodes(Phylogeny,Vector) have been used, it is the mean of * the distances from the external nodes ext_nodes to their lowest common * ancestor. (Last modified: 11/30/00) * * @return mean distance * @see #DistanceCalculator(Phylogeny) * @see #DistanceCalculator(Phylogeny,Vector) * @see #setTree(Phylogeny) * @see #setTreeAndExtNodes(Phylogeny,Vector) * @see #setTreeAndExtNodes(Phylogeny,ArrayList) */ public double getMean() { return mean_; } /** * Returns the sum of all Nodes used to calculate the mean. (Last modified: * 12/01/00) * * @return n */ public int getN() { return n_; } /** * Returns the standard deviation. If constructor * DistanceCalculator(Phylogeny) or method setTree(Phylogeny) have been * used, it is the standard deviation of the distances from the root to all * external Nodes. If constructor DistanceCalculator(Phylogeny,Vector) or * method setTreeAndExtNodes(Phylogeny,Vector) have been used, it is the * standard deviation of the distances from the external nodes ext_nodes to * their lowest common ancestor. (Last modified: 11/30/00) * * @return standard deviation * @see #DistanceCalculator(Phylogeny) * @see #DistanceCalculator(Phylogeny,Vector) * @see #setTree(Phylogeny) * @see #setTreeAndExtNodes(Phylogeny,Vector) * @see #setTreeAndExtNodes(Phylogeny,ArrayList) */ public double getStandardDeviation() { return stand_dev_; } /** * Returns the variance. ( 1/(N - 1) * Sum((x-mean)^2) ) If constructor * DistanceCalculator(Phylogeny) or method setTree(Phylogeny) have been * used, it is the variance of the distances from the root to all external * Nodes. If constructor DistanceCalculator(Phylogeny,Vector) or method * setTreeAndExtNodes(Phylogeny,Vector) have been used, it is the variance * of the distances from the external nodes ext_nodes to their lowest common * ancestor. (Last modified: 11/30/00) * * @return variance * @see #DistanceCalculator(Phylogeny) * @see #DistanceCalculator(Phylogeny,Vector) * @see #setTree(Phylogeny) * @see #setTreeAndExtNodes(Phylogeny,Vector) * @see #setTreeAndExtNodes(Phylogeny,ArrayList) */ public double getVariance() { return variance_; } // (Last modified: 11/30/00) private void setMean( final double d ) { mean_ = d; } // (Last modified: 11/30/00) private void setStandardDeviation( final double d ) { stand_dev_ = d; } /** * Sets the rooted Phylogeny t for which the mean distance to the root and * its variance and standard deviation are calculated. (Last modified: * 12/01/00) * * @param t * the rooted Phylogeny for which the mean distance to the root * and its variance and standard deviation are calculated */ public void setTree( final Phylogeny t ) { tree_ = t; nodes_ = null; n_ = 0; mean_ = DistanceCalculator.DEFAULT; variance_ = DistanceCalculator.DEFAULT; stand_dev_ = DistanceCalculator.DEFAULT; lca_ = null; calculateMeanDistToRoot(); calculateVarianceDistToRoot(); calculateStandardDeviation(); } /** * Sets the rooted Phylogeny t and the external Nodes ext_nodes for which * the mean distance to their lowest common ancestor and its variance and * standard deviation are calculated. (Last modified: 12/03/00) * * @param t * the rooted Phylogeny containing Nodes in Vector ext_nodes * @param ext_nodes * a ArrayList of Nodes of t, the mean distance to their lowest * common ancestor and its variance and standard deviation are * calculated */ public void setTreeAndExtNodes( final Phylogeny t, final ArrayList ext_nodes ) { tree_ = t; nodes_ = ext_nodes; n_ = 0; mean_ = DistanceCalculator.DEFAULT; variance_ = DistanceCalculator.DEFAULT; stand_dev_ = DistanceCalculator.DEFAULT; lca_ = calculateLCA( nodes_ ); calculateMean(); calculateVariance(); calculateStandardDeviation(); } /** * Sets the rooted Phylogeny t and the external Nodes ext_nodes for which * the mean distance to their lowest common ancestor and its variance and * standard deviation are calculated. (Last modified: 12/03/00) * * @param t * the rooted Phylogeny containing Nodes in Vector ext_nodes * @param ext_nodes * a Vector of Nodes of t, the mean distance to their lowest * common ancestor and its variance and standard deviation are * calculated */ public void setTreeAndExtNodes( final Phylogeny t, final Vector ext_nodes ) { setTreeAndExtNodes( t, new ArrayList( ext_nodes ) ); } // (Last modified: 11/30/00) private void setVariance( final double d ) { variance_ = d; } // Main for testing. public static void main( final String args[] ) { File tree_file = null; Phylogeny tree = null; DistanceCalculator dc = null; tree_file = new File( args[ 0 ] ); try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final PhylogenyParser pp = ForesterUtil.createParserDependingOnFileType( tree_file, true ); tree = factory.create( tree_file, pp )[ 0 ]; } catch ( final Exception e ) { System.out.println( e.toString() ); System.exit( -1 ); } double time = System.currentTimeMillis(); dc = new DistanceCalculator( tree ); final double m = dc.getMean(), var = dc.getVariance(), sd = dc.getStandardDeviation(); time = ( System.currentTimeMillis() - time ); System.out.println( "\nn = " + dc.getN() ); System.out.println( "mea = " + m ); System.out.println( "var = " + var ); System.out.println( "sd = " + sd + "\n" ); System.out.println( "t=" + time + "\n" ); } }