+++ /dev/null
-// $Id:
-// FORESTER -- software libraries and applications
-// for evolutionary biology research and applications.
-//
-// Copyright (C) 2008-2009 Christian M. Zmasek
-// Copyright (C) 2008-2009 Burnham Institute for Medical Research
-// Copyright (C) 2000-2001 Washington University School of Medicine
-// and Howard Hughes Medical Institute
-// All rights reserved
-//
-// This library is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 2.1 of the License, or (at your option) any later version.
-//
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// Lesser General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License along with this library; if not, write to the Free Software
-// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
-//
-// Contact: phylosoft @ gmail . com
-// WWW: www.phylosoft.org/forester
-
-package org.forester.sdi;
-
-import java.io.File;
-import java.util.ArrayList;
-import java.util.ListIterator;
-import java.util.Vector;
-
-import org.forester.io.parsers.PhylogenyParser;
-import org.forester.io.parsers.util.ParserUtils;
-import org.forester.phylogeny.Phylogeny;
-import org.forester.phylogeny.PhylogenyNode;
-import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
-import org.forester.phylogeny.factories.PhylogenyFactory;
-
-/*
- * @author Christian M. Zmasek
- *
- * @version 1.001 -- last modified: 12/04/00
- */
-public class DistanceCalculator {
-
- public final static double DEFAULT = -1.0;
- private Phylogeny tree_;
- private ArrayList<PhylogenyNode> nodes_;
- private int n_;
- private double mean_, variance_, stand_dev_;
- private PhylogenyNode lca_; // The LCA of the
-
- // Nodes in nodes_
- /**
- * Default constructor. (Last modified: 11/30/00)
- */
- public DistanceCalculator() {
- tree_ = null;
- nodes_ = null;
- n_ = 0;
- mean_ = DistanceCalculator.DEFAULT;
- variance_ = DistanceCalculator.DEFAULT;
- stand_dev_ = DistanceCalculator.DEFAULT;
- lca_ = null;
- }
-
- /**
- * Constructor. Sets the rooted Phylogeny t for which the mean distance to
- * the root and its variance and standard deviation are calculated. (Last
- * modified: 12/01/00)
- *
- * @param t
- * the rooted Phylogeny for which the mean distance to the root
- * and its variance and standard deviation are calculated
- */
- public DistanceCalculator( final Phylogeny t ) {
- setTree( t );
- }
-
- /**
- * Constructor. Sets the rooted Phylogeny t and the external Nodes ext_nodes
- * for which the mean distance to their lowest common ancestor and its
- * variance and standard deviation are calculated. (Last modified: 12/01/00)
- *
- * @param t
- * the rooted Phylogeny containing Nodes in Vector ext_nodes
- * @param ext_nodes
- * a Vector of Nodes of t, the mean distance to their lowest
- * common ancestor and its variance and standard deviation are
- * calculated
- */
- public DistanceCalculator( final Phylogeny t, final Vector<PhylogenyNode> ext_nodes ) {
- setTreeAndExtNodes( t, ext_nodes );
- }
-
- // (Last modified: 12/01/00)
- private PhylogenyNode calculateLCA( final ArrayList<PhylogenyNode> nodes ) {
- if ( ( nodes == null ) || nodes.isEmpty() ) {
- return null;
- }
- PhylogenyNode node = nodes.get( 0 );
- int c = node.getNumberOfExternalNodes();
- final int v = nodes.size();
- while ( !node.isRoot() && ( c < v ) ) {
- node = node.getParent();
- c = node.getNumberOfExternalNodes();
- }
- ArrayList<PhylogenyNode> current_nodes = new ArrayList<PhylogenyNode>( node.getAllExternalDescendants() );
- while ( !node.isRoot() && !current_nodes.containsAll( nodes ) ) {
- node = node.getParent();
- current_nodes = new ArrayList<PhylogenyNode>( node.getAllExternalDescendants() );
- }
- return node;
- }
-
- // (Last modified: 11/31/00)
- private void calculateMean() {
- if ( ( nodes_ == null ) || nodes_.isEmpty() || ( tree_ == null ) || tree_.isEmpty() ) {
- return;
- }
- double sum = 0.0;
- final ListIterator<PhylogenyNode> li = nodes_.listIterator();
- n_ = 0;
- try {
- while ( li.hasNext() ) {
- n_++;
- sum += getDistanceToNode( li.next(), lca_ );
- }
- }
- catch ( final Exception e ) {
- System.err.println( "calculateMean(): " + "Exception: " + e );
- System.exit( -1 );
- }
- setMean( sum / n_ );
- }
-
- // (Last modified: 11/30/00)
- private void calculateMeanDistToRoot() {
- if ( ( tree_ == null ) || tree_.isEmpty() ) {
- return;
- }
- double sum = 0.0;
- PhylogenyNode node = tree_.getFirstExternalNode();
- n_ = 0;
- while ( node != null ) {
- n_++;
- sum += getDistanceToRoot( node );
- node = node.getNextExternalNode();
- }
- setMean( sum / n_ );
- }
-
- // (Last modified: 11/31/00)
- private void calculateStandardDeviation() {
- if ( ( getVariance() == DistanceCalculator.DEFAULT ) || ( getVariance() < 0.0 ) ) {
- return;
- }
- setStandardDeviation( java.lang.Math.sqrt( getVariance() ) );
- }
-
- // (Last modified: 11/31/00)
- private void calculateVariance() {
- if ( ( getMean() == DistanceCalculator.DEFAULT ) || ( nodes_ == null ) || nodes_.isEmpty() || ( tree_ == null )
- || tree_.isEmpty() || ( n_ <= 1.0 ) ) {
- return;
- }
- double x = 0.0, sum = 0.0;
- final ListIterator<PhylogenyNode> li = nodes_.listIterator();
- try {
- while ( li.hasNext() ) {
- x = getDistanceToNode( li.next(), lca_ ) - getMean();
- sum += ( x * x );
- }
- }
- catch ( final Exception e ) {
- System.err.println( "calculateVariance(): " + "Exception: " + e );
- System.exit( -1 );
- }
- setVariance( sum / ( n_ - 1 ) );
- }
-
- // (Last modified: 11/31/00)
- private void calculateVarianceDistToRoot() {
- if ( ( getMean() == DistanceCalculator.DEFAULT ) || ( tree_ == null ) || tree_.isEmpty() || ( n_ <= 1.0 ) ) {
- return;
- }
- double x = 0.0, sum = 0.0;
- PhylogenyNode node = tree_.getFirstExternalNode();
- while ( node != null ) {
- x = getDistanceToRoot( node ) - getMean();
- sum += ( x * x );
- node = node.getNextExternalNode();
- }
- setVariance( sum / ( n_ - 1 ) );
- }
-
- /**
- * Calculates the distance of the PhylogenyNode with seq name seq_name to
- * the LCA of ext_nodes, which has been set either with constructor
- * DistanceCalculator(Phylogeny,Vector) or method
- * setTreeAndExtNodes(Phylogeny,Vector). Throws an exception if no
- * PhylogenyNode with seq name_seq name is found or if seq_name is not
- * unique. (Last modified: 12/03/00)
- *
- * @param seq_name
- * the seq name for the PhylogenyNode for which the distance to
- * the LCA is to be calculated
- * @return distance of PhylogenyNode with seq name seq_name to the LCA of
- * Nodes in ext_nodes
- * @see #DistanceCalculator(Phylogeny,Vector)
- * @see #setTreeAndExtNodes(Phylogeny,Vector)
- * @see #setTreeAndExtNodes(Phylogeny,ArrayList)
- */
- public double getDistanceToLCA( final String seq_name ) {
- if ( ( tree_ == null ) || tree_.isEmpty() || ( lca_ == null ) ) {
- return 0.0;
- }
- return getDistanceToNode( seq_name, lca_ );
- }
-
- /**
- * Calculates the distance of PhylogenyNode outer to PhylogenyNode inner.
- * PhylogenyNode inner must be closer to the root than PhylogenyNode outer
- * and on the same "path". (Last modified: 12/01/00)
- *
- * @param outer
- * a PhylogenyNode
- * @param inner
- * a PhylogenyNode closer to the root than outer
- * @return distance of PhylogenyNode outer to PhylogenyNode inner
- */
- public double getDistanceToNode( PhylogenyNode outer, final PhylogenyNode inner ) {
- double d = 0.0, dist = 0.0;
- while ( ( inner != outer ) && !outer.isRoot() ) {
- d = outer.getDistanceToParent();
- if ( d > 0.0 ) {
- dist += d;
- }
- outer = outer.getParent();
- }
- if ( !inner.isRoot() && outer.isRoot() ) {
- throw new IllegalArgumentException( "getDistanceToNode(PhylogenyNode outer,PhylogenyNode inner): "
- + "PhylogenyNode inner is not closer to the root than PhylogenyNode outer "
- + "or is not on the same \"subtree\"" );
- }
- return dist;
- }
-
- /**
- * Calculates the distance of the PhylogenyNode with seq name seq_name to
- * PhylogenyNode inner. PhylogenyNode inner must be closer to the root than
- * the PhylogenyNode with seq name seq_name and on the same "path". Throws
- * an exception if no PhylogenyNode with seq name_seq name is found or if
- * seq_name is not unique. (Last modified: 12/01/00)
- *
- * @param seq_name
- * the seq name of a PhylogenyNode further from the root than
- * PhylogenyNode inner
- * @param inner
- * a PhylogenyNode
- * @return distance of PhylogenyNode with seq name seq_nam to PhylogenyNode
- * inner
- */
- public double getDistanceToNode( final String seq_name, final PhylogenyNode inner ) {
- if ( ( tree_ == null ) || tree_.isEmpty() ) {
- return 0.0;
- }
- return getDistanceToNode( tree_.getNodeViaSequenceName( seq_name ), inner );
- }
-
- /**
- * Calculates the distance of PhylogenyNode n to the root of Phylogeny t
- * which has been set either with a constructor, setTree(Phylogeny), or
- * setTreeAndExtNodes(Phylogeny,Vector). (Last modified: 12/01/00)
- *
- * @param n
- * the PhylogenyNode for which the distance to the root is to be
- * calculated
- * @return distance of PhylogenyNode n to the root
- * @see #DistanceCalculator(Phylogeny)
- * @see #DistanceCalculator(Phylogeny,Vector)
- * @see #setTree(Phylogeny)
- * @see #setTreeAndExtNodes(Phylogeny,Vector)
- */
- public double getDistanceToRoot( final PhylogenyNode n ) {
- if ( ( tree_ == null ) || tree_.isEmpty() ) {
- return 0.0;
- }
- double d = 0.0;
- try {
- d = getDistanceToNode( n, tree_.getRoot() );
- }
- catch ( final Exception e ) {
- System.err.println( "getDistanceToRoot(PhylogenyNode): Unexpected " + "exception: " + e );
- System.exit( -1 );
- }
- return d;
- }
-
- /**
- * Calculates the distance of the PhylogenyNode with seq name seq_name to
- * the root of Phylogeny t, which has been set either with a constructor,
- * setTree(Phylogeny), or setTreeAndExtNodes(Phylogeny,Vector). Throws an
- * exception if no PhylogenyNode with seq name_seq name is found or if
- * seq_name is not unique. (Last modified: 12/01/00)
- *
- * @param seq_name
- * the seq name for the PhylogenyNode for which the distance to
- * the root is to be calculated
- * @return distance of PhylogenyNode with seq name seq_name to the root
- * @see #DistanceCalculator(Phylogeny)
- * @see #DistanceCalculator(Phylogeny,Vector)
- * @see #setTree(Phylogeny)
- * @see #setTreeAndExtNodes(Phylogeny,Vector)
- * @see #setTreeAndExtNodes(Phylogeny,ArrayList)
- */
- public double getDistanceToRoot( final String seq_name ) {
- if ( ( tree_ == null ) || tree_.isEmpty() ) {
- return 0.0;
- }
- return getDistanceToNode( seq_name, tree_.getRoot() );
- }
-
- /**
- * Returns the mean distance. If constructor DistanceCalculator(Phylogeny)
- * or method setTree(Phylogeny) have been used, it is the mean of the
- * distances from the root to all external Nodes. If constructor
- * DistanceCalculator(Phylogeny,Vector) or method
- * setTreeAndExtNodes(Phylogeny,Vector) have been used, it is the mean of
- * the distances from the external nodes ext_nodes to their lowest common
- * ancestor. (Last modified: 11/30/00)
- *
- * @return mean distance
- * @see #DistanceCalculator(Phylogeny)
- * @see #DistanceCalculator(Phylogeny,Vector)
- * @see #setTree(Phylogeny)
- * @see #setTreeAndExtNodes(Phylogeny,Vector)
- * @see #setTreeAndExtNodes(Phylogeny,ArrayList)
- */
- public double getMean() {
- return mean_;
- }
-
- /**
- * Returns the sum of all Nodes used to calculate the mean. (Last modified:
- * 12/01/00)
- *
- * @return n
- */
- public int getN() {
- return n_;
- }
-
- /**
- * Returns the standard deviation. If constructor
- * DistanceCalculator(Phylogeny) or method setTree(Phylogeny) have been
- * used, it is the standard deviation of the distances from the root to all
- * external Nodes. If constructor DistanceCalculator(Phylogeny,Vector) or
- * method setTreeAndExtNodes(Phylogeny,Vector) have been used, it is the
- * standard deviation of the distances from the external nodes ext_nodes to
- * their lowest common ancestor. (Last modified: 11/30/00)
- *
- * @return standard deviation
- * @see #DistanceCalculator(Phylogeny)
- * @see #DistanceCalculator(Phylogeny,Vector)
- * @see #setTree(Phylogeny)
- * @see #setTreeAndExtNodes(Phylogeny,Vector)
- * @see #setTreeAndExtNodes(Phylogeny,ArrayList)
- */
- public double getStandardDeviation() {
- return stand_dev_;
- }
-
- /**
- * Returns the variance. ( 1/(N - 1) * Sum((x-mean)^2) ) If constructor
- * DistanceCalculator(Phylogeny) or method setTree(Phylogeny) have been
- * used, it is the variance of the distances from the root to all external
- * Nodes. If constructor DistanceCalculator(Phylogeny,Vector) or method
- * setTreeAndExtNodes(Phylogeny,Vector) have been used, it is the variance
- * of the distances from the external nodes ext_nodes to their lowest common
- * ancestor. (Last modified: 11/30/00)
- *
- * @return variance
- * @see #DistanceCalculator(Phylogeny)
- * @see #DistanceCalculator(Phylogeny,Vector)
- * @see #setTree(Phylogeny)
- * @see #setTreeAndExtNodes(Phylogeny,Vector)
- * @see #setTreeAndExtNodes(Phylogeny,ArrayList)
- */
- public double getVariance() {
- return variance_;
- }
-
- // (Last modified: 11/30/00)
- private void setMean( final double d ) {
- mean_ = d;
- }
-
- // (Last modified: 11/30/00)
- private void setStandardDeviation( final double d ) {
- stand_dev_ = d;
- }
-
- /**
- * Sets the rooted Phylogeny t for which the mean distance to the root and
- * its variance and standard deviation are calculated. (Last modified:
- * 12/01/00)
- *
- * @param t
- * the rooted Phylogeny for which the mean distance to the root
- * and its variance and standard deviation are calculated
- */
- public void setTree( final Phylogeny t ) {
- tree_ = t;
- nodes_ = null;
- n_ = 0;
- mean_ = DistanceCalculator.DEFAULT;
- variance_ = DistanceCalculator.DEFAULT;
- stand_dev_ = DistanceCalculator.DEFAULT;
- lca_ = null;
- calculateMeanDistToRoot();
- calculateVarianceDistToRoot();
- calculateStandardDeviation();
- }
-
- /**
- * Sets the rooted Phylogeny t and the external Nodes ext_nodes for which
- * the mean distance to their lowest common ancestor and its variance and
- * standard deviation are calculated. (Last modified: 12/03/00)
- *
- * @param t
- * the rooted Phylogeny containing Nodes in Vector ext_nodes
- * @param ext_nodes
- * a ArrayList of Nodes of t, the mean distance to their lowest
- * common ancestor and its variance and standard deviation are
- * calculated
- */
- public void setTreeAndExtNodes( final Phylogeny t, final ArrayList<PhylogenyNode> ext_nodes ) {
- tree_ = t;
- nodes_ = ext_nodes;
- n_ = 0;
- mean_ = DistanceCalculator.DEFAULT;
- variance_ = DistanceCalculator.DEFAULT;
- stand_dev_ = DistanceCalculator.DEFAULT;
- lca_ = calculateLCA( nodes_ );
- calculateMean();
- calculateVariance();
- calculateStandardDeviation();
- }
-
- /**
- * Sets the rooted Phylogeny t and the external Nodes ext_nodes for which
- * the mean distance to their lowest common ancestor and its variance and
- * standard deviation are calculated. (Last modified: 12/03/00)
- *
- * @param t
- * the rooted Phylogeny containing Nodes in Vector ext_nodes
- * @param ext_nodes
- * a Vector of Nodes of t, the mean distance to their lowest
- * common ancestor and its variance and standard deviation are
- * calculated
- */
- public void setTreeAndExtNodes( final Phylogeny t, final Vector<PhylogenyNode> ext_nodes ) {
- setTreeAndExtNodes( t, new ArrayList<PhylogenyNode>( ext_nodes ) );
- }
-
- // (Last modified: 11/30/00)
- private void setVariance( final double d ) {
- variance_ = d;
- }
-
- // Main for testing.
- public static void main( final String args[] ) {
- File tree_file = null;
- Phylogeny tree = null;
- DistanceCalculator dc = null;
- tree_file = new File( args[ 0 ] );
- try {
- final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
- final PhylogenyParser pp = ParserUtils.createParserDependingOnFileType( tree_file, true );
- tree = factory.create( tree_file, pp )[ 0 ];
- }
- catch ( final Exception e ) {
- System.out.println( e.toString() );
- System.exit( -1 );
- }
- double time = System.currentTimeMillis();
- dc = new DistanceCalculator( tree );
- final double m = dc.getMean(), var = dc.getVariance(), sd = dc.getStandardDeviation();
- time = ( System.currentTimeMillis() - time );
- System.out.println( "\nn = " + dc.getN() );
- System.out.println( "mea = " + m );
- System.out.println( "var = " + var );
- System.out.println( "sd = " + sd + "\n" );
- System.out.println( "t=" + time + "\n" );
- }
-}