2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
7 // Copyright (C) 2000-2001 Washington University School of Medicine
8 // and Howard Hughes Medical Institute
11 // This library is free software; you can redistribute it and/or
12 // modify it under the terms of the GNU Lesser General Public
13 // License as published by the Free Software Foundation; either
14 // version 2.1 of the License, or (at your option) any later version.
16 // This library is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 // Lesser General Public License for more details.
21 // You should have received a copy of the GNU Lesser General Public
22 // License along with this library; if not, write to the Free Software
23 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
25 // Contact: phylosoft @ gmail . com
26 // WWW: www.phylosoft.org/forester
28 package org.forester.sdi;
31 import java.util.ArrayList;
32 import java.util.ListIterator;
33 import java.util.Vector;
35 import org.forester.io.parsers.PhylogenyParser;
36 import org.forester.phylogeny.Phylogeny;
37 import org.forester.phylogeny.PhylogenyNode;
38 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
39 import org.forester.phylogeny.factories.PhylogenyFactory;
40 import org.forester.util.ForesterUtil;
43 * @author Christian M. Zmasek
45 * @version 1.001 -- last modified: 12/04/00
47 public class DistanceCalculator {
49 public final static double DEFAULT = -1.0;
50 private Phylogeny tree_;
51 private ArrayList<PhylogenyNode> nodes_;
53 private double mean_, variance_, stand_dev_;
54 private PhylogenyNode lca_; // The LCA of the
58 * Default constructor. (Last modified: 11/30/00)
60 public DistanceCalculator() {
64 mean_ = DistanceCalculator.DEFAULT;
65 variance_ = DistanceCalculator.DEFAULT;
66 stand_dev_ = DistanceCalculator.DEFAULT;
71 * Constructor. Sets the rooted Phylogeny t for which the mean distance to
72 * the root and its variance and standard deviation are calculated. (Last
76 * the rooted Phylogeny for which the mean distance to the root
77 * and its variance and standard deviation are calculated
79 public DistanceCalculator( final Phylogeny t ) {
84 * Constructor. Sets the rooted Phylogeny t and the external Nodes ext_nodes
85 * for which the mean distance to their lowest common ancestor and its
86 * variance and standard deviation are calculated. (Last modified: 12/01/00)
89 * the rooted Phylogeny containing Nodes in Vector ext_nodes
91 * a Vector of Nodes of t, the mean distance to their lowest
92 * common ancestor and its variance and standard deviation are
95 public DistanceCalculator( final Phylogeny t, final Vector<PhylogenyNode> ext_nodes ) {
96 setTreeAndExtNodes( t, ext_nodes );
99 // (Last modified: 12/01/00)
100 private PhylogenyNode calculateLCA( final ArrayList<PhylogenyNode> nodes ) {
101 if ( ( nodes == null ) || nodes.isEmpty() ) {
104 PhylogenyNode node = nodes.get( 0 );
105 int c = node.getNumberOfExternalNodes();
106 final int v = nodes.size();
107 while ( !node.isRoot() && ( c < v ) ) {
108 node = node.getParent();
109 c = node.getNumberOfExternalNodes();
111 ArrayList<PhylogenyNode> current_nodes = new ArrayList<PhylogenyNode>( node.getAllExternalDescendants() );
112 while ( !node.isRoot() && !current_nodes.containsAll( nodes ) ) {
113 node = node.getParent();
114 current_nodes = new ArrayList<PhylogenyNode>( node.getAllExternalDescendants() );
119 // (Last modified: 11/31/00)
120 private void calculateMean() {
121 if ( ( nodes_ == null ) || nodes_.isEmpty() || ( tree_ == null ) || tree_.isEmpty() ) {
125 final ListIterator<PhylogenyNode> li = nodes_.listIterator();
128 while ( li.hasNext() ) {
130 sum += getDistanceToNode( li.next(), lca_ );
133 catch ( final Exception e ) {
134 System.err.println( "calculateMean(): " + "Exception: " + e );
140 // (Last modified: 11/30/00)
141 private void calculateMeanDistToRoot() {
142 if ( ( tree_ == null ) || tree_.isEmpty() ) {
146 PhylogenyNode node = tree_.getFirstExternalNode();
148 while ( node != null ) {
150 sum += getDistanceToRoot( node );
151 node = node.getNextExternalNode();
156 // (Last modified: 11/31/00)
157 private void calculateStandardDeviation() {
158 if ( ( getVariance() == DistanceCalculator.DEFAULT ) || ( getVariance() < 0.0 ) ) {
161 setStandardDeviation( java.lang.Math.sqrt( getVariance() ) );
164 // (Last modified: 11/31/00)
165 private void calculateVariance() {
166 if ( ( getMean() == DistanceCalculator.DEFAULT ) || ( nodes_ == null ) || nodes_.isEmpty() || ( tree_ == null )
167 || tree_.isEmpty() || ( n_ <= 1.0 ) ) {
170 double x = 0.0, sum = 0.0;
171 final ListIterator<PhylogenyNode> li = nodes_.listIterator();
173 while ( li.hasNext() ) {
174 x = getDistanceToNode( li.next(), lca_ ) - getMean();
178 catch ( final Exception e ) {
179 System.err.println( "calculateVariance(): " + "Exception: " + e );
182 setVariance( sum / ( n_ - 1 ) );
185 // (Last modified: 11/31/00)
186 private void calculateVarianceDistToRoot() {
187 if ( ( getMean() == DistanceCalculator.DEFAULT ) || ( tree_ == null ) || tree_.isEmpty() || ( n_ <= 1.0 ) ) {
190 double x = 0.0, sum = 0.0;
191 PhylogenyNode node = tree_.getFirstExternalNode();
192 while ( node != null ) {
193 x = getDistanceToRoot( node ) - getMean();
195 node = node.getNextExternalNode();
197 setVariance( sum / ( n_ - 1 ) );
201 * Calculates the distance of the PhylogenyNode with seq name seq_name to
202 * the LCA of ext_nodes, which has been set either with constructor
203 * DistanceCalculator(Phylogeny,Vector) or method
204 * setTreeAndExtNodes(Phylogeny,Vector). Throws an exception if no
205 * PhylogenyNode with seq name_seq name is found or if seq_name is not
206 * unique. (Last modified: 12/03/00)
209 * the seq name for the PhylogenyNode for which the distance to
210 * the LCA is to be calculated
211 * @return distance of PhylogenyNode with seq name seq_name to the LCA of
213 * @see #DistanceCalculator(Phylogeny,Vector)
214 * @see #setTreeAndExtNodes(Phylogeny,Vector)
215 * @see #setTreeAndExtNodes(Phylogeny,ArrayList)
217 public double getDistanceToLCA( final String seq_name ) {
218 if ( ( tree_ == null ) || tree_.isEmpty() || ( lca_ == null ) ) {
221 return getDistanceToNode( seq_name, lca_ );
225 * Calculates the distance of PhylogenyNode outer to PhylogenyNode inner.
226 * PhylogenyNode inner must be closer to the root than PhylogenyNode outer
227 * and on the same "path". (Last modified: 12/01/00)
232 * a PhylogenyNode closer to the root than outer
233 * @return distance of PhylogenyNode outer to PhylogenyNode inner
235 public double getDistanceToNode( PhylogenyNode outer, final PhylogenyNode inner ) {
236 double d = 0.0, dist = 0.0;
237 while ( ( inner != outer ) && !outer.isRoot() ) {
238 d = outer.getDistanceToParent();
242 outer = outer.getParent();
244 if ( !inner.isRoot() && outer.isRoot() ) {
245 throw new IllegalArgumentException( "getDistanceToNode(PhylogenyNode outer,PhylogenyNode inner): "
246 + "PhylogenyNode inner is not closer to the root than PhylogenyNode outer "
247 + "or is not on the same \"subtree\"" );
253 * Calculates the distance of the PhylogenyNode with seq name seq_name to
254 * PhylogenyNode inner. PhylogenyNode inner must be closer to the root than
255 * the PhylogenyNode with seq name seq_name and on the same "path". Throws
256 * an exception if no PhylogenyNode with seq name_seq name is found or if
257 * seq_name is not unique. (Last modified: 12/01/00)
260 * the seq name of a PhylogenyNode further from the root than
261 * PhylogenyNode inner
264 * @return distance of PhylogenyNode with seq name seq_nam to PhylogenyNode
267 public double getDistanceToNode( final String seq_name, final PhylogenyNode inner ) {
268 if ( ( tree_ == null ) || tree_.isEmpty() ) {
271 return getDistanceToNode( tree_.getNodeViaSequenceName( seq_name ), inner );
275 * Calculates the distance of PhylogenyNode n to the root of Phylogeny t
276 * which has been set either with a constructor, setTree(Phylogeny), or
277 * setTreeAndExtNodes(Phylogeny,Vector). (Last modified: 12/01/00)
280 * the PhylogenyNode for which the distance to the root is to be
282 * @return distance of PhylogenyNode n to the root
283 * @see #DistanceCalculator(Phylogeny)
284 * @see #DistanceCalculator(Phylogeny,Vector)
285 * @see #setTree(Phylogeny)
286 * @see #setTreeAndExtNodes(Phylogeny,Vector)
288 public double getDistanceToRoot( final PhylogenyNode n ) {
289 if ( ( tree_ == null ) || tree_.isEmpty() ) {
294 d = getDistanceToNode( n, tree_.getRoot() );
296 catch ( final Exception e ) {
297 System.err.println( "getDistanceToRoot(PhylogenyNode): Unexpected " + "exception: " + e );
304 * Calculates the distance of the PhylogenyNode with seq name seq_name to
305 * the root of Phylogeny t, which has been set either with a constructor,
306 * setTree(Phylogeny), or setTreeAndExtNodes(Phylogeny,Vector). Throws an
307 * exception if no PhylogenyNode with seq name_seq name is found or if
308 * seq_name is not unique. (Last modified: 12/01/00)
311 * the seq name for the PhylogenyNode for which the distance to
312 * the root is to be calculated
313 * @return distance of PhylogenyNode with seq name seq_name to the root
314 * @see #DistanceCalculator(Phylogeny)
315 * @see #DistanceCalculator(Phylogeny,Vector)
316 * @see #setTree(Phylogeny)
317 * @see #setTreeAndExtNodes(Phylogeny,Vector)
318 * @see #setTreeAndExtNodes(Phylogeny,ArrayList)
320 public double getDistanceToRoot( final String seq_name ) {
321 if ( ( tree_ == null ) || tree_.isEmpty() ) {
324 return getDistanceToNode( seq_name, tree_.getRoot() );
328 * Returns the mean distance. If constructor DistanceCalculator(Phylogeny)
329 * or method setTree(Phylogeny) have been used, it is the mean of the
330 * distances from the root to all external Nodes. If constructor
331 * DistanceCalculator(Phylogeny,Vector) or method
332 * setTreeAndExtNodes(Phylogeny,Vector) have been used, it is the mean of
333 * the distances from the external nodes ext_nodes to their lowest common
334 * ancestor. (Last modified: 11/30/00)
336 * @return mean distance
337 * @see #DistanceCalculator(Phylogeny)
338 * @see #DistanceCalculator(Phylogeny,Vector)
339 * @see #setTree(Phylogeny)
340 * @see #setTreeAndExtNodes(Phylogeny,Vector)
341 * @see #setTreeAndExtNodes(Phylogeny,ArrayList)
343 public double getMean() {
348 * Returns the sum of all Nodes used to calculate the mean. (Last modified:
358 * Returns the standard deviation. If constructor
359 * DistanceCalculator(Phylogeny) or method setTree(Phylogeny) have been
360 * used, it is the standard deviation of the distances from the root to all
361 * external Nodes. If constructor DistanceCalculator(Phylogeny,Vector) or
362 * method setTreeAndExtNodes(Phylogeny,Vector) have been used, it is the
363 * standard deviation of the distances from the external nodes ext_nodes to
364 * their lowest common ancestor. (Last modified: 11/30/00)
366 * @return standard deviation
367 * @see #DistanceCalculator(Phylogeny)
368 * @see #DistanceCalculator(Phylogeny,Vector)
369 * @see #setTree(Phylogeny)
370 * @see #setTreeAndExtNodes(Phylogeny,Vector)
371 * @see #setTreeAndExtNodes(Phylogeny,ArrayList)
373 public double getStandardDeviation() {
378 * Returns the variance. ( 1/(N - 1) * Sum((x-mean)^2) ) If constructor
379 * DistanceCalculator(Phylogeny) or method setTree(Phylogeny) have been
380 * used, it is the variance of the distances from the root to all external
381 * Nodes. If constructor DistanceCalculator(Phylogeny,Vector) or method
382 * setTreeAndExtNodes(Phylogeny,Vector) have been used, it is the variance
383 * of the distances from the external nodes ext_nodes to their lowest common
384 * ancestor. (Last modified: 11/30/00)
387 * @see #DistanceCalculator(Phylogeny)
388 * @see #DistanceCalculator(Phylogeny,Vector)
389 * @see #setTree(Phylogeny)
390 * @see #setTreeAndExtNodes(Phylogeny,Vector)
391 * @see #setTreeAndExtNodes(Phylogeny,ArrayList)
393 public double getVariance() {
397 // (Last modified: 11/30/00)
398 private void setMean( final double d ) {
402 // (Last modified: 11/30/00)
403 private void setStandardDeviation( final double d ) {
408 * Sets the rooted Phylogeny t for which the mean distance to the root and
409 * its variance and standard deviation are calculated. (Last modified:
413 * the rooted Phylogeny for which the mean distance to the root
414 * and its variance and standard deviation are calculated
416 public void setTree( final Phylogeny t ) {
420 mean_ = DistanceCalculator.DEFAULT;
421 variance_ = DistanceCalculator.DEFAULT;
422 stand_dev_ = DistanceCalculator.DEFAULT;
424 calculateMeanDistToRoot();
425 calculateVarianceDistToRoot();
426 calculateStandardDeviation();
430 * Sets the rooted Phylogeny t and the external Nodes ext_nodes for which
431 * the mean distance to their lowest common ancestor and its variance and
432 * standard deviation are calculated. (Last modified: 12/03/00)
435 * the rooted Phylogeny containing Nodes in Vector ext_nodes
437 * a ArrayList of Nodes of t, the mean distance to their lowest
438 * common ancestor and its variance and standard deviation are
441 public void setTreeAndExtNodes( final Phylogeny t, final ArrayList<PhylogenyNode> ext_nodes ) {
445 mean_ = DistanceCalculator.DEFAULT;
446 variance_ = DistanceCalculator.DEFAULT;
447 stand_dev_ = DistanceCalculator.DEFAULT;
448 lca_ = calculateLCA( nodes_ );
451 calculateStandardDeviation();
455 * Sets the rooted Phylogeny t and the external Nodes ext_nodes for which
456 * the mean distance to their lowest common ancestor and its variance and
457 * standard deviation are calculated. (Last modified: 12/03/00)
460 * the rooted Phylogeny containing Nodes in Vector ext_nodes
462 * a Vector of Nodes of t, the mean distance to their lowest
463 * common ancestor and its variance and standard deviation are
466 public void setTreeAndExtNodes( final Phylogeny t, final Vector<PhylogenyNode> ext_nodes ) {
467 setTreeAndExtNodes( t, new ArrayList<PhylogenyNode>( ext_nodes ) );
470 // (Last modified: 11/30/00)
471 private void setVariance( final double d ) {
476 public static void main( final String args[] ) {
477 File tree_file = null;
478 Phylogeny tree = null;
479 DistanceCalculator dc = null;
480 tree_file = new File( args[ 0 ] );
482 final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
483 final PhylogenyParser pp = ForesterUtil.createParserDependingOnFileType( tree_file, true );
484 tree = factory.create( tree_file, pp )[ 0 ];
486 catch ( final Exception e ) {
487 System.out.println( e.toString() );
490 double time = System.currentTimeMillis();
491 dc = new DistanceCalculator( tree );
492 final double m = dc.getMean(), var = dc.getVariance(), sd = dc.getStandardDeviation();
493 time = ( System.currentTimeMillis() - time );
494 System.out.println( "\nn = " + dc.getN() );
495 System.out.println( "mea = " + m );
496 System.out.println( "var = " + var );
497 System.out.println( "sd = " + sd + "\n" );
498 System.out.println( "t=" + time + "\n" );