2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
26 package org.forester.phylogeny;
28 import java.awt.Color;
30 import java.io.IOException;
31 import java.util.ArrayList;
32 import java.util.Arrays;
33 import java.util.Collections;
34 import java.util.Comparator;
35 import java.util.HashMap;
36 import java.util.HashSet;
37 import java.util.Iterator;
38 import java.util.List;
41 import java.util.regex.Matcher;
42 import java.util.regex.Pattern;
43 import java.util.regex.PatternSyntaxException;
45 import org.forester.io.parsers.FastaParser;
46 import org.forester.io.parsers.PhylogenyParser;
47 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
48 import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
49 import org.forester.io.parsers.util.PhylogenyParserException;
50 import org.forester.msa.Msa;
51 import org.forester.phylogeny.data.Accession;
52 import org.forester.phylogeny.data.Annotation;
53 import org.forester.phylogeny.data.BranchColor;
54 import org.forester.phylogeny.data.BranchWidth;
55 import org.forester.phylogeny.data.Confidence;
56 import org.forester.phylogeny.data.DomainArchitecture;
57 import org.forester.phylogeny.data.Event;
58 import org.forester.phylogeny.data.Identifier;
59 import org.forester.phylogeny.data.PhylogenyDataUtil;
60 import org.forester.phylogeny.data.Sequence;
61 import org.forester.phylogeny.data.Taxonomy;
62 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
63 import org.forester.phylogeny.factories.PhylogenyFactory;
64 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
65 import org.forester.phylogeny.iterators.PreorderTreeIterator;
66 import org.forester.util.BasicDescriptiveStatistics;
67 import org.forester.util.DescriptiveStatistics;
68 import org.forester.util.ForesterUtil;
69 import org.forester.util.TaxonomyUtil;
71 public class PhylogenyMethods {
73 private static boolean _order_changed;
75 private PhylogenyMethods() {
76 // Hidden constructor.
80 public Object clone() throws CloneNotSupportedException {
81 throw new CloneNotSupportedException();
84 public static boolean extractFastaInformation( final Phylogeny phy ) {
85 boolean could_extract = false;
86 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
87 final PhylogenyNode node = iter.next();
88 if ( !ForesterUtil.isEmpty( node.getName() ) ) {
89 final Matcher name_m = FastaParser.FASTA_DESC_LINE.matcher( node.getName() );
90 if ( name_m.lookingAt() ) {
92 final String acc_source = name_m.group( 1 );
93 final String acc = name_m.group( 2 );
94 final String seq_name = name_m.group( 3 );
95 final String tax_sn = name_m.group( 4 );
96 if ( !ForesterUtil.isEmpty( acc_source ) && !ForesterUtil.isEmpty( acc ) ) {
97 ForesterUtil.ensurePresenceOfSequence( node );
98 node.getNodeData().getSequence( 0 ).setAccession( new Accession( acc, acc_source ) );
100 if ( !ForesterUtil.isEmpty( seq_name ) ) {
101 ForesterUtil.ensurePresenceOfSequence( node );
102 node.getNodeData().getSequence( 0 ).setName( seq_name );
104 if ( !ForesterUtil.isEmpty( tax_sn ) ) {
105 ForesterUtil.ensurePresenceOfTaxonomy( node );
106 node.getNodeData().getTaxonomy( 0 ).setScientificName( tax_sn );
111 return could_extract;
114 public static DescriptiveStatistics calculateBranchLengthStatistics( final Phylogeny phy ) {
115 final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
116 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
117 final PhylogenyNode n = iter.next();
118 if ( !n.isRoot() && ( n.getDistanceToParent() >= 0.0 ) ) {
119 stats.addValue( n.getDistanceToParent() );
125 public static List<DescriptiveStatistics> calculateConfidenceStatistics( final Phylogeny phy ) {
126 final List<DescriptiveStatistics> stats = new ArrayList<DescriptiveStatistics>();
127 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
128 final PhylogenyNode n = iter.next();
129 if ( !n.isExternal() && !n.isRoot() ) {
130 if ( n.getBranchData().isHasConfidences() ) {
131 for( int i = 0; i < n.getBranchData().getConfidences().size(); ++i ) {
132 final Confidence c = n.getBranchData().getConfidences().get( i );
133 if ( ( i > ( stats.size() - 1 ) ) || ( stats.get( i ) == null ) ) {
134 stats.add( i, new BasicDescriptiveStatistics() );
136 if ( !ForesterUtil.isEmpty( c.getType() ) ) {
137 if ( !ForesterUtil.isEmpty( stats.get( i ).getDescription() ) ) {
138 if ( !stats.get( i ).getDescription().equalsIgnoreCase( c.getType() ) ) {
139 throw new IllegalArgumentException( "support values in node [" + n.toString()
140 + "] appear inconsistently ordered" );
143 stats.get( i ).setDescription( c.getType() );
145 stats.get( i ).addValue( ( ( c != null ) && ( c.getValue() >= 0 ) ) ? c.getValue() : 0 );
154 * For external nodes the level is 0.
159 public static int calculateLevel( final PhylogenyNode node ) {
160 if ( node.isExternal() ) {
164 for( PhylogenyNode ext : node.getAllExternalDescendants() ) {
166 while ( ext != node ) {
167 ext = ext.getParent();
170 if ( counter > level ) {
178 * Calculates the distance between PhylogenyNodes node1 and node2.
183 * @return distance between node1 and node2
185 public static double calculateDistance( final PhylogenyNode node1, final PhylogenyNode node2 ) {
186 final PhylogenyNode lca = calculateLCA( node1, node2 );
187 final PhylogenyNode n1 = node1;
188 final PhylogenyNode n2 = node2;
189 return ( PhylogenyMethods.getDistance( n1, lca ) + PhylogenyMethods.getDistance( n2, lca ) );
193 * Returns the LCA of PhylogenyNodes node1 and node2.
198 * @return LCA of node1 and node2
200 public final static PhylogenyNode calculateLCA( PhylogenyNode node1, PhylogenyNode node2 ) {
201 if ( node1 == null ) {
202 throw new IllegalArgumentException( "first argument (node) is null" );
204 if ( node2 == null ) {
205 throw new IllegalArgumentException( "second argument (node) is null" );
207 if ( node1 == node2 ) {
210 if ( ( node1.getParent() == node2.getParent() ) ) {
211 return node1.getParent();
213 int depth1 = node1.calculateDepth();
214 int depth2 = node2.calculateDepth();
215 while ( ( depth1 > -1 ) && ( depth2 > -1 ) ) {
216 if ( depth1 > depth2 ) {
217 node1 = node1.getParent();
220 else if ( depth2 > depth1 ) {
221 node2 = node2.getParent();
225 if ( node1 == node2 ) {
228 node1 = node1.getParent();
229 node2 = node2.getParent();
234 throw new IllegalArgumentException( "illegal attempt to calculate LCA of two nodes which do not share a common root" );
238 * Returns the LCA of PhylogenyNodes node1 and node2.
239 * Precondition: ids are in pre-order (or level-order).
244 * @return LCA of node1 and node2
246 public final static PhylogenyNode calculateLCAonTreeWithIdsInPreOrder( PhylogenyNode node1, PhylogenyNode node2 ) {
247 if ( node1 == null ) {
248 throw new IllegalArgumentException( "first argument (node) is null" );
250 if ( node2 == null ) {
251 throw new IllegalArgumentException( "second argument (node) is null" );
253 while ( node1 != node2 ) {
254 if ( node1.getId() > node2.getId() ) {
255 node1 = node1.getParent();
258 node2 = node2.getParent();
264 public static int calculateMaxDepth( final Phylogeny phy ) {
266 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
267 final PhylogenyNode node = iter.next();
268 final int steps = node.calculateDepth();
276 public static String[] obtainPresentRanksSorted( final Phylogeny phy ) {
277 final Set<String> present_ranks = new HashSet<String>();
278 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
279 final PhylogenyNode node = iter.next();
280 if ( !node.isExternal() && !node.isRoot() && ( node.getNodeData().getTaxonomy() != null )
281 && !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getRank() ) ) {
282 final String current_rank = node.getNodeData().getTaxonomy().getRank();
283 if ( TaxonomyUtil.RANK_TO_INT.containsKey( current_rank ) ) {
284 present_ranks.add( current_rank );
288 final String ordered_ranks[] = new String[ present_ranks.size() + 1 ];
290 for( final String rank : TaxonomyUtil.RANKS ) {
291 if ( present_ranks.contains( rank ) ) {
292 ordered_ranks[ c++ ] = rank;
295 ordered_ranks[ c ] = "off";
296 return ordered_ranks;
299 public static int calculateMaxDepthConsiderCollapsed( final Phylogeny phy ) {
301 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
302 PhylogenyNode n = iter.next();
304 while ( n.getParent() != null ) {
305 if ( !n.isCollapse() ) {
317 public static double calculateMaxDistanceToRoot( final Phylogeny phy ) {
319 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
320 final PhylogenyNode node = iter.next();
321 final double d = node.calculateDistanceToRoot();
329 public static PhylogenyNode calculateNodeWithMaxDistanceToRoot( final Phylogeny phy ) {
331 PhylogenyNode max_node = phy.getFirstExternalNode();
332 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
333 final PhylogenyNode node = iter.next();
334 final double d = node.calculateDistanceToRoot();
343 public static int calculateNumberOfExternalNodesWithoutTaxonomy( final PhylogenyNode node ) {
344 final List<PhylogenyNode> descs = node.getAllExternalDescendants();
346 for( final PhylogenyNode n : descs ) {
347 if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
354 public static DescriptiveStatistics calculateNumberOfDescendantsPerNodeStatistics( final Phylogeny phy ) {
355 final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
356 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
357 final PhylogenyNode n = iter.next();
358 if ( !n.isExternal() ) {
359 stats.addValue( n.getNumberOfDescendants() );
365 public final static void collapseSubtreeStructure( final PhylogenyNode n ) {
366 final List<PhylogenyNode> eds = n.getAllExternalDescendants();
367 final List<Double> d = new ArrayList<Double>();
368 for( final PhylogenyNode ed : eds ) {
369 d.add( calculateDistanceToAncestor( n, ed ) );
371 for( int i = 0; i < eds.size(); ++i ) {
372 n.setChildNode( i, eds.get( i ) );
373 eds.get( i ).setDistanceToParent( d.get( i ) );
377 public static int countNumberOfOneDescendantNodes( final Phylogeny phy ) {
379 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
380 final PhylogenyNode n = iter.next();
381 if ( !n.isExternal() && ( n.getNumberOfDescendants() == 1 ) ) {
388 public static int countNumberOfPolytomies( final Phylogeny phy ) {
390 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
391 final PhylogenyNode n = iter.next();
392 if ( !n.isExternal() && ( n.getNumberOfDescendants() > 2 ) ) {
399 public static final HashMap<String, PhylogenyNode> createNameToExtNodeMap( final Phylogeny phy ) {
400 final HashMap<String, PhylogenyNode> nodes = new HashMap<String, PhylogenyNode>();
401 final List<PhylogenyNode> ext = phy.getExternalNodes();
402 for( final PhylogenyNode n : ext ) {
403 nodes.put( n.getName(), n );
408 public static void deleteExternalNodesNegativeSelection( final Set<Long> to_delete, final Phylogeny phy ) {
409 for( final Long id : to_delete ) {
410 phy.deleteSubtree( phy.getNode( id ), true );
412 phy.clearHashIdToNodeMap();
413 phy.externalNodesHaveChanged();
416 public static void deleteExternalNodesNegativeSelection( final String[] node_names_to_delete, final Phylogeny p )
417 throws IllegalArgumentException {
418 for( final String element : node_names_to_delete ) {
419 if ( ForesterUtil.isEmpty( element ) ) {
422 List<PhylogenyNode> nodes = null;
423 nodes = p.getNodes( element );
424 final Iterator<PhylogenyNode> it = nodes.iterator();
425 while ( it.hasNext() ) {
426 final PhylogenyNode n = it.next();
427 if ( !n.isExternal() ) {
428 throw new IllegalArgumentException( "attempt to delete non-external node \"" + element + "\"" );
430 p.deleteSubtree( n, true );
433 p.clearHashIdToNodeMap();
434 p.externalNodesHaveChanged();
437 public static List<String> deleteExternalNodesPositiveSelection( final String[] node_names_to_keep,
438 final Phylogeny p ) {
439 final PhylogenyNodeIterator it = p.iteratorExternalForward();
440 final String[] to_delete = new String[ p.getNumberOfExternalNodes() ];
442 Arrays.sort( node_names_to_keep );
443 while ( it.hasNext() ) {
444 final String curent_name = it.next().getName();
445 if ( Arrays.binarySearch( node_names_to_keep, curent_name ) < 0 ) {
446 to_delete[ i++ ] = curent_name;
449 PhylogenyMethods.deleteExternalNodesNegativeSelection( to_delete, p );
450 final List<String> deleted = new ArrayList<String>();
451 for( final String n : to_delete ) {
452 if ( !ForesterUtil.isEmpty( n ) ) {
459 public static void deleteExternalNodesPositiveSelectionT( final List<Taxonomy> species_to_keep,
460 final Phylogeny phy ) {
461 final Set<Long> to_delete = new HashSet<Long>();
462 for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
463 final PhylogenyNode n = it.next();
464 if ( n.getNodeData().isHasTaxonomy() ) {
465 if ( !species_to_keep.contains( n.getNodeData().getTaxonomy() ) ) {
466 to_delete.add( n.getId() );
470 throw new IllegalArgumentException( "node " + n.getId() + " has no taxonomic data" );
473 deleteExternalNodesNegativeSelection( to_delete, phy );
476 final public static void deleteInternalNodesWithOnlyOneDescendent( final Phylogeny phy ) {
477 final ArrayList<PhylogenyNode> to_delete = new ArrayList<PhylogenyNode>();
478 for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) {
479 final PhylogenyNode n = iter.next();
480 if ( ( !n.isExternal() ) && ( n.getNumberOfDescendants() == 1 ) ) {
484 for( final PhylogenyNode d : to_delete ) {
485 PhylogenyMethods.removeNode( d, phy );
487 phy.clearHashIdToNodeMap();
488 phy.externalNodesHaveChanged();
491 final public static void deleteNonOrthologousExternalNodes( final Phylogeny phy, final PhylogenyNode n ) {
492 if ( n.isInternal() ) {
493 throw new IllegalArgumentException( "node is not external" );
495 final ArrayList<PhylogenyNode> to_delete = new ArrayList<PhylogenyNode>();
496 for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
497 final PhylogenyNode i = it.next();
498 if ( !PhylogenyMethods.getEventAtLCA( n, i ).isSpeciation() ) {
502 for( final PhylogenyNode d : to_delete ) {
503 phy.deleteSubtree( d, true );
505 phy.clearHashIdToNodeMap();
506 phy.externalNodesHaveChanged();
509 public final static List<List<PhylogenyNode>> divideIntoSubTrees( final Phylogeny phy,
510 final double min_distance_to_root ) {
511 if ( min_distance_to_root <= 0 ) {
512 throw new IllegalArgumentException( "attempt to use min distance to root of: " + min_distance_to_root );
514 final List<List<PhylogenyNode>> l = new ArrayList<List<PhylogenyNode>>();
515 setAllIndicatorsToZero( phy );
516 for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
517 final PhylogenyNode n = it.next();
518 if ( n.getIndicator() != 0 ) {
521 l.add( divideIntoSubTreesHelper( n, min_distance_to_root ) );
523 throw new RuntimeException( "this should not have happened" );
529 public static List<PhylogenyNode> getAllDescendants( final PhylogenyNode node ) {
530 final List<PhylogenyNode> descs = new ArrayList<PhylogenyNode>();
531 final Set<Long> encountered = new HashSet<Long>();
532 if ( !node.isExternal() ) {
533 final List<PhylogenyNode> exts = node.getAllExternalDescendants();
534 for( PhylogenyNode current : exts ) {
535 descs.add( current );
536 while ( current != node ) {
537 current = current.getParent();
538 if ( encountered.contains( current.getId() ) ) {
541 descs.add( current );
542 encountered.add( current.getId() );
549 public static List<PhylogenyNode> getAllDescendantsOfGivenLevel( final PhylogenyNode node, final int level ) {
550 final List<PhylogenyNode> descs = new ArrayList<PhylogenyNode>();
551 final Set<Long> encountered = new HashSet<Long>();
552 if ( !node.isExternal() ) {
553 final List<PhylogenyNode> exts = node.getAllExternalDescendants();
554 for( PhylogenyNode current : exts ) {
555 if ( calculateLevel( current ) == level ) {
556 descs.add( current );
558 while ( current != node ) {
559 current = current.getParent();
560 if ( encountered.contains( current.getId() ) ) {
563 if ( calculateLevel( current ) == level ) {
564 descs.add( current );
566 encountered.add( current.getId() );
580 public static Color getBranchColorValue( final PhylogenyNode node ) {
581 if ( node.getBranchData().getBranchColor() == null ) {
584 return node.getBranchData().getBranchColor().getValue();
590 public static double getBranchWidthValue( final PhylogenyNode node ) {
591 if ( !node.getBranchData().isHasBranchWidth() ) {
592 return BranchWidth.BRANCH_WIDTH_DEFAULT_VALUE;
594 return node.getBranchData().getBranchWidth().getValue();
600 public static double getConfidenceValue( final PhylogenyNode node ) {
601 if ( !node.getBranchData().isHasConfidences() ) {
602 return Confidence.CONFIDENCE_DEFAULT_VALUE;
604 return node.getBranchData().getConfidence( 0 ).getValue();
610 public static double[] getConfidenceValuesAsArray( final PhylogenyNode node ) {
611 if ( !node.getBranchData().isHasConfidences() ) {
612 return new double[ 0 ];
614 final double[] values = new double[ node.getBranchData().getConfidences().size() ];
616 for( final Confidence c : node.getBranchData().getConfidences() ) {
617 values[ i++ ] = c.getValue();
622 final public static Event getEventAtLCA( final PhylogenyNode n1, final PhylogenyNode n2 ) {
623 return calculateLCA( n1, n2 ).getNodeData().getEvent();
627 * Returns taxonomy t if all external descendants have
628 * the same taxonomy t, null otherwise.
631 public static Taxonomy getExternalDescendantsTaxonomy( final PhylogenyNode node ) {
632 final List<PhylogenyNode> descs = node.getAllExternalDescendants();
634 for( final PhylogenyNode n : descs ) {
635 if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
638 else if ( tax == null ) {
639 tax = n.getNodeData().getTaxonomy();
641 else if ( n.getNodeData().getTaxonomy().isEmpty() || !tax.isEqual( n.getNodeData().getTaxonomy() ) ) {
648 public static PhylogenyNode getFurthestDescendant( final PhylogenyNode node ) {
649 final List<PhylogenyNode> children = node.getAllExternalDescendants();
650 PhylogenyNode farthest = null;
651 double longest = -Double.MAX_VALUE;
652 for( final PhylogenyNode child : children ) {
653 if ( PhylogenyMethods.getDistance( child, node ) > longest ) {
655 longest = PhylogenyMethods.getDistance( child, node );
661 // public static PhylogenyMethods getInstance() {
662 // if ( PhylogenyMethods._instance == null ) {
663 // PhylogenyMethods._instance = new PhylogenyMethods();
665 // return PhylogenyMethods._instance;
668 * Returns the largest confidence value found on phy.
670 static public double getMaximumConfidenceValue( final Phylogeny phy ) {
671 double max = -Double.MAX_VALUE;
672 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
673 final double s = PhylogenyMethods.getConfidenceValue( iter.next() );
674 if ( ( s != Confidence.CONFIDENCE_DEFAULT_VALUE ) && ( s > max ) ) {
681 static public int getMinimumDescendentsPerInternalNodes( final Phylogeny phy ) {
682 int min = Integer.MAX_VALUE;
685 for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) {
687 if ( n.isInternal() ) {
688 d = n.getNumberOfDescendants();
698 * Convenience method for display purposes.
699 * Not intended for algorithms.
701 public static String getSpecies( final PhylogenyNode node ) {
702 if ( !node.getNodeData().isHasTaxonomy() ) {
705 else if ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getScientificName() ) ) {
706 return node.getNodeData().getTaxonomy().getScientificName();
708 if ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getTaxonomyCode() ) ) {
709 return node.getNodeData().getTaxonomy().getTaxonomyCode();
712 return node.getNodeData().getTaxonomy().getCommonName();
717 * Convenience method for display purposes.
718 * Not intended for algorithms.
720 public static String getTaxonomyIdentifier( final PhylogenyNode node ) {
721 if ( !node.getNodeData().isHasTaxonomy() || ( node.getNodeData().getTaxonomy().getIdentifier() == null ) ) {
724 return node.getNodeData().getTaxonomy().getIdentifier().getValue();
727 public final static boolean isAllDecendentsAreDuplications( final PhylogenyNode n ) {
728 if ( n.isExternal() ) {
732 if ( n.isDuplication() ) {
733 for( final PhylogenyNode desc : n.getDescendants() ) {
734 if ( !isAllDecendentsAreDuplications( desc ) ) {
746 public static boolean isHasExternalDescendant( final PhylogenyNode node ) {
747 for( int i = 0; i < node.getNumberOfDescendants(); ++i ) {
748 if ( node.getChildNode( i ).isExternal() ) {
756 * This is case insensitive.
759 public synchronized static boolean isTaxonomyHasIdentifierOfGivenProvider( final Taxonomy tax,
760 final String[] providers ) {
761 if ( ( tax.getIdentifier() != null ) && !ForesterUtil.isEmpty( tax.getIdentifier().getProvider() ) ) {
762 final String my_tax_prov = tax.getIdentifier().getProvider();
763 for( final String provider : providers ) {
764 if ( provider.equalsIgnoreCase( my_tax_prov ) ) {
775 public static void midpointRoot( final Phylogeny phylogeny ) {
776 if ( ( phylogeny.getNumberOfExternalNodes() < 2 ) || ( calculateMaxDistanceToRoot( phylogeny ) <= 0 ) ) {
780 final int total_nodes = phylogeny.getNodeCount();
782 if ( ++counter > total_nodes ) {
783 throw new RuntimeException( "this should not have happened: midpoint rooting does not converge" );
785 PhylogenyNode a = null;
788 for( int i = 0; i < phylogeny.getRoot().getNumberOfDescendants(); ++i ) {
789 final PhylogenyNode f = getFurthestDescendant( phylogeny.getRoot().getChildNode( i ) );
790 final double df = getDistance( f, phylogeny.getRoot() );
797 else if ( df > db ) {
802 final double diff = da - db;
803 if ( diff < 0.000001 ) {
806 double x = da - ( diff / 2.0 );
807 while ( ( x > a.getDistanceToParent() ) && !a.isRoot() ) {
808 x -= ( a.getDistanceToParent() > 0 ? a.getDistanceToParent() : 0 );
811 phylogeny.reRoot( a, x );
813 phylogeny.recalculateNumberOfExternalDescendants( true );
816 public static void normalizeBootstrapValues( final Phylogeny phylogeny,
817 final double max_bootstrap_value,
818 final double max_normalized_value ) {
819 for( final PhylogenyNodeIterator iter = phylogeny.iteratorPreorder(); iter.hasNext(); ) {
820 final PhylogenyNode node = iter.next();
821 if ( node.isInternal() ) {
822 final double confidence = getConfidenceValue( node );
823 if ( confidence != Confidence.CONFIDENCE_DEFAULT_VALUE ) {
824 if ( confidence >= max_bootstrap_value ) {
825 setBootstrapConfidence( node, max_normalized_value );
828 setBootstrapConfidence( node, ( confidence * max_normalized_value ) / max_bootstrap_value );
835 public static List<PhylogenyNode> obtainAllNodesAsList( final Phylogeny phy ) {
836 final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
837 if ( phy.isEmpty() ) {
840 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
841 nodes.add( iter.next() );
847 * Returns a map of distinct taxonomies of
848 * all external nodes of node.
849 * If at least one of the external nodes has no taxonomy,
853 public static Map<Taxonomy, Integer> obtainDistinctTaxonomyCounts( final PhylogenyNode node ) {
854 final List<PhylogenyNode> descs = node.getAllExternalDescendants();
855 final Map<Taxonomy, Integer> tax_map = new HashMap<Taxonomy, Integer>();
856 for( final PhylogenyNode n : descs ) {
857 if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
860 final Taxonomy t = n.getNodeData().getTaxonomy();
861 if ( tax_map.containsKey( t ) ) {
862 tax_map.put( t, tax_map.get( t ) + 1 );
872 * Arranges the order of childern for each node of this Phylogeny in such a
873 * way that either the branch with more children is on top (right) or on
874 * bottom (left), dependent on the value of boolean order.
877 * decides in which direction to order
880 public static void orderAppearance( final PhylogenyNode n,
882 final boolean order_ext_alphabetically,
883 final DESCENDANT_SORT_PRIORITY pri ) {
884 if ( n.isExternal() ) {
888 if ( ( n.getNumberOfDescendants() == 2 )
889 && ( n.getChildNode1().getNumberOfExternalNodes() != n.getChildNode2().getNumberOfExternalNodes() )
890 && ( ( n.getChildNode1().getNumberOfExternalNodes() < n.getChildNode2()
891 .getNumberOfExternalNodes() ) == order ) ) {
892 final PhylogenyNode temp = n.getChildNode1();
893 n.setChild1( n.getChildNode2() );
895 _order_changed = true;
897 else if ( order_ext_alphabetically ) {
898 boolean all_ext = true;
899 for( final PhylogenyNode i : n.getDescendants() ) {
900 if ( !i.isExternal() ) {
906 PhylogenyMethods.sortNodeDescendents( n, pri );
909 for( int i = 0; i < n.getNumberOfDescendants(); ++i ) {
910 orderAppearance( n.getChildNode( i ), order, order_ext_alphabetically, pri );
915 public synchronized static void orderAppearanceX( final PhylogenyNode n,
916 final boolean order_ext_alphabetically,
917 final DESCENDANT_SORT_PRIORITY pri ) {
918 if ( n.isExternal() ) {
922 _order_changed = false;
923 orderAppearance( n, true, order_ext_alphabetically, pri );
924 if ( !_order_changed ) {
925 orderAppearance( n, false, order_ext_alphabetically, pri );
930 public static void postorderBranchColorAveragingExternalNodeBased( final Phylogeny p ) {
931 for( final PhylogenyNodeIterator iter = p.iteratorPostorder(); iter.hasNext(); ) {
932 final PhylogenyNode node = iter.next();
937 if ( node.isInternal() ) {
938 //for( final PhylogenyNodeIterator iterator = node.iterateChildNodesForward(); iterator.hasNext(); ) {
939 for( int i = 0; i < node.getNumberOfDescendants(); ++i ) {
940 final PhylogenyNode child_node = node.getChildNode( i );
941 final Color child_color = getBranchColorValue( child_node );
942 if ( child_color != null ) {
944 red += child_color.getRed();
945 green += child_color.getGreen();
946 blue += child_color.getBlue();
949 setBranchColorValue( node,
950 new Color( ForesterUtil.roundToInt( red / n ),
951 ForesterUtil.roundToInt( green / n ),
952 ForesterUtil.roundToInt( blue / n ) ) );
957 public static final void preOrderReId( final Phylogeny phy ) {
958 if ( phy.isEmpty() ) {
961 phy.setIdToNodeMap( null );
962 long i = PhylogenyNode.getNodeCount();
963 for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) {
964 it.next().setId( i++ );
966 PhylogenyNode.setNodeCount( i );
969 public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final File file )
971 final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
972 final Phylogeny[] trees = factory.create( file, parser );
973 if ( ( trees == null ) || ( trees.length == 0 ) ) {
974 throw new PhylogenyParserException( "Unable to parse phylogeny from file: " + file );
979 public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final List<File> files )
981 final List<Phylogeny> tree_list = new ArrayList<Phylogeny>();
982 for( final File file : files ) {
983 final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
984 final Phylogeny[] trees = factory.create( file, parser );
985 if ( ( trees == null ) || ( trees.length == 0 ) ) {
986 throw new PhylogenyParserException( "Unable to parse phylogeny from file: " + file );
988 tree_list.addAll( Arrays.asList( trees ) );
990 return tree_list.toArray( new Phylogeny[ tree_list.size() ] );
993 public static void removeNode( final PhylogenyNode remove_me, final Phylogeny phylogeny ) {
994 if ( remove_me.isRoot() ) {
995 if ( remove_me.getNumberOfDescendants() == 1 ) {
996 final PhylogenyNode desc = remove_me.getDescendants().get( 0 );
997 desc.setDistanceToParent( addPhylogenyDistances( remove_me.getDistanceToParent(),
998 desc.getDistanceToParent() ) );
999 desc.setParent( null );
1000 phylogeny.setRoot( desc );
1001 phylogeny.clearHashIdToNodeMap();
1004 throw new IllegalArgumentException( "attempt to remove a root node with more than one descendants" );
1007 else if ( remove_me.isExternal() ) {
1008 phylogeny.deleteSubtree( remove_me, false );
1009 phylogeny.clearHashIdToNodeMap();
1010 phylogeny.externalNodesHaveChanged();
1013 final PhylogenyNode parent = remove_me.getParent();
1014 final List<PhylogenyNode> descs = remove_me.getDescendants();
1015 parent.removeChildNode( remove_me );
1016 for( final PhylogenyNode desc : descs ) {
1017 parent.addAsChild( desc );
1018 desc.setDistanceToParent( addPhylogenyDistances( remove_me.getDistanceToParent(),
1019 desc.getDistanceToParent() ) );
1021 remove_me.setParent( null );
1022 phylogeny.clearHashIdToNodeMap();
1023 phylogeny.externalNodesHaveChanged();
1027 private static enum NDF {
1029 TaxonomyCode( "TC" ),
1030 TaxonomyCommonName( "TN" ),
1031 TaxonomyScientificName( "TS" ),
1032 TaxonomyIdentifier( "TI" ),
1033 TaxonomySynonym( "SY" ),
1034 SequenceName( "SN" ),
1036 SequenceSymbol( "SS" ),
1037 SequenceAccession( "SA" ),
1041 BinaryCharacter( "BC" ),
1042 MolecularSequence( "MS" );
1044 private final String _text;
1046 NDF( final String text ) {
1050 public static NDF fromString( final String text ) {
1051 for( final NDF n : NDF.values() ) {
1052 if ( text.startsWith( n._text ) ) {
1060 public static List<Long> searchData( final String query,
1061 final Phylogeny phy,
1062 final boolean case_sensitive,
1063 final boolean partial,
1064 final boolean regex,
1065 final boolean search_domains,
1066 final double domains_confidence_threshold ) {
1067 final List<Long> nodes = new ArrayList<Long>();
1068 if ( phy.isEmpty() || ( query == null ) ) {
1071 if ( ForesterUtil.isEmpty( query ) ) {
1074 String my_query = query;
1076 if ( ( my_query.length() > 2 ) && ( my_query.indexOf( ":" ) == 2 ) ) {
1077 ndf = NDF.fromString( my_query );
1078 if ( ndf != null ) {
1079 my_query = my_query.substring( 3 );
1082 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
1083 final PhylogenyNode node = iter.next();
1084 boolean match = false;
1085 if ( ( ( ndf == null ) || ( ndf == NDF.NodeName ) )
1086 && match( node.getName(), my_query, case_sensitive, partial, regex ) ) {
1089 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCode ) ) && node.getNodeData().isHasTaxonomy()
1090 && match( node.getNodeData().getTaxonomy().getTaxonomyCode(),
1097 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCommonName ) ) && node.getNodeData().isHasTaxonomy()
1098 && match( node.getNodeData().getTaxonomy().getCommonName(),
1105 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyScientificName ) ) && node.getNodeData().isHasTaxonomy()
1106 && match( node.getNodeData().getTaxonomy().getScientificName(),
1113 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyIdentifier ) ) && node.getNodeData().isHasTaxonomy()
1114 && ( node.getNodeData().getTaxonomy().getIdentifier() != null )
1115 && match( node.getNodeData().getTaxonomy().getIdentifier().getValue(),
1122 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomySynonym ) ) && node.getNodeData().isHasTaxonomy()
1123 && !node.getNodeData().getTaxonomy().getSynonyms().isEmpty() ) {
1124 final List<String> syns = node.getNodeData().getTaxonomy().getSynonyms();
1125 I: for( final String syn : syns ) {
1126 if ( match( syn, my_query, case_sensitive, partial, regex ) ) {
1132 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceName ) ) && node.getNodeData().isHasSequence()
1133 && match( node.getNodeData().getSequence().getName(), my_query, case_sensitive, partial, regex ) ) {
1136 if ( !match && ( ( ndf == null ) || ( ndf == NDF.GeneName ) ) && node.getNodeData().isHasSequence()
1137 && match( node.getNodeData().getSequence().getGeneName(),
1144 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceSymbol ) ) && node.getNodeData().isHasSequence()
1145 && match( node.getNodeData().getSequence().getSymbol(),
1152 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceAccession ) ) && node.getNodeData().isHasSequence()
1153 && ( node.getNodeData().getSequence().getAccession() != null )
1154 && match( node.getNodeData().getSequence().getAccession().getValue(),
1161 if ( !match && ( ( ( ndf == null ) && search_domains ) || ( ndf == NDF.Domain ) )
1162 && node.getNodeData().isHasSequence()
1163 && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) {
1164 final DomainArchitecture da = node.getNodeData().getSequence().getDomainArchitecture();
1165 I: for( int i = 0; i < da.getNumberOfDomains(); ++i ) {
1166 if ( ( da.getDomain( i ).getConfidence() <= domains_confidence_threshold )
1167 && ( match( da.getDomain( i ).getName(), my_query, case_sensitive, partial, regex ) ) ) {
1173 if ( !match && ( ( ndf == null ) || ( ndf == NDF.Annotation ) ) && node.getNodeData().isHasSequence()
1174 && ( node.getNodeData().getSequence().getAnnotations() != null ) ) {
1175 for( final Annotation ann : node.getNodeData().getSequence().getAnnotations() ) {
1176 if ( match( ann.getDesc(), my_query, case_sensitive, partial, regex ) ) {
1180 if ( match( ann.getRef(), my_query, case_sensitive, partial, regex ) ) {
1186 if ( !match && ( ( ndf == null ) || ( ndf == NDF.CrossRef ) ) && node.getNodeData().isHasSequence()
1187 && ( node.getNodeData().getSequence().getCrossReferences() != null ) ) {
1188 for( final Accession x : node.getNodeData().getSequence().getCrossReferences() ) {
1189 if ( match( x.getComment(), my_query, case_sensitive, partial, regex ) ) {
1193 if ( match( x.getSource(), my_query, case_sensitive, partial, regex ) ) {
1197 if ( match( x.getValue(), my_query, case_sensitive, partial, regex ) ) {
1203 if ( !match && ( ( ndf == null ) || ( ndf == NDF.BinaryCharacter ) )
1204 && ( node.getNodeData().getBinaryCharacters() != null ) ) {
1205 Iterator<String> it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator();
1206 I: while ( it.hasNext() ) {
1207 if ( match( it.next(), my_query, case_sensitive, partial, regex ) ) {
1212 it = node.getNodeData().getBinaryCharacters().getGainedCharacters().iterator();
1213 I: while ( it.hasNext() ) {
1214 if ( match( it.next(), my_query, case_sensitive, partial, regex ) ) {
1220 if ( !match && ( ndf == NDF.MolecularSequence ) && node.getNodeData().isHasSequence()
1221 && match( node.getNodeData().getSequence().getMolecularSequence(),
1229 nodes.add( node.getId() );
1235 public static List<Long> searchDataLogicalAnd( final String[] queries,
1236 final Phylogeny phy,
1237 final boolean case_sensitive,
1238 final boolean partial,
1239 final boolean search_domains,
1240 final double domains_confidence_threshold ) {
1241 final List<Long> nodes = new ArrayList<Long>();
1242 if ( phy.isEmpty() || ( queries == null ) || ( queries.length < 1 ) ) {
1245 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
1246 final PhylogenyNode node = iter.next();
1247 boolean all_matched = true;
1248 for( String query : queries ) {
1249 if ( query == null ) {
1252 query = query.trim();
1254 if ( ( query.length() > 2 ) && ( query.indexOf( ":" ) == 2 ) ) {
1255 ndf = NDF.fromString( query );
1256 if ( ndf != null ) {
1257 query = query.substring( 3 );
1260 boolean match = false;
1261 if ( ForesterUtil.isEmpty( query ) ) {
1264 if ( ( ( ndf == null ) || ( ndf == NDF.NodeName ) )
1265 && match( node.getName(), query, case_sensitive, partial, false ) ) {
1268 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCode ) ) && node.getNodeData().isHasTaxonomy()
1269 && match( node.getNodeData().getTaxonomy().getTaxonomyCode(),
1276 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCommonName ) ) && node.getNodeData().isHasTaxonomy()
1277 && match( node.getNodeData().getTaxonomy().getCommonName(),
1284 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyScientificName ) )
1285 && node.getNodeData().isHasTaxonomy()
1286 && match( node.getNodeData().getTaxonomy().getScientificName(),
1293 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyIdentifier ) ) && node.getNodeData().isHasTaxonomy()
1294 && ( node.getNodeData().getTaxonomy().getIdentifier() != null )
1295 && match( node.getNodeData().getTaxonomy().getIdentifier().getValue(),
1302 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomySynonym ) ) && node.getNodeData().isHasTaxonomy()
1303 && !node.getNodeData().getTaxonomy().getSynonyms().isEmpty() ) {
1304 final List<String> syns = node.getNodeData().getTaxonomy().getSynonyms();
1305 I: for( final String syn : syns ) {
1306 if ( match( syn, query, case_sensitive, partial, false ) ) {
1312 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceName ) ) && node.getNodeData().isHasSequence()
1313 && match( node.getNodeData().getSequence().getName(),
1320 if ( !match && ( ( ndf == null ) || ( ndf == NDF.GeneName ) ) && node.getNodeData().isHasSequence()
1321 && match( node.getNodeData().getSequence().getGeneName(),
1328 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceSymbol ) )
1329 && node.getNodeData().isHasSequence() && match( node.getNodeData().getSequence().getSymbol(),
1336 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceAccession ) )
1337 && node.getNodeData().isHasSequence()
1338 && ( node.getNodeData().getSequence().getAccession() != null )
1339 && match( node.getNodeData().getSequence().getAccession().getValue(),
1346 if ( !match && ( ( ( ndf == null ) && search_domains ) || ( ndf == NDF.Domain ) )
1347 && node.getNodeData().isHasSequence()
1348 && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) {
1349 final DomainArchitecture da = node.getNodeData().getSequence().getDomainArchitecture();
1350 I: for( int i = 0; i < da.getNumberOfDomains(); ++i ) {
1351 if ( ( da.getDomain( i ).getConfidence() <= domains_confidence_threshold )
1352 && match( da.getDomain( i ).getName(), query, case_sensitive, partial, false ) ) {
1358 if ( !match && ( ( ndf == null ) || ( ndf == NDF.Annotation ) ) && node.getNodeData().isHasSequence()
1359 && ( node.getNodeData().getSequence().getAnnotations() != null ) ) {
1360 for( final Annotation ann : node.getNodeData().getSequence().getAnnotations() ) {
1361 if ( match( ann.getDesc(), query, case_sensitive, partial, false ) ) {
1365 if ( match( ann.getRef(), query, case_sensitive, partial, false ) ) {
1371 if ( !match && ( ( ndf == null ) || ( ndf == NDF.CrossRef ) ) && node.getNodeData().isHasSequence()
1372 && ( node.getNodeData().getSequence().getCrossReferences() != null ) ) {
1373 for( final Accession x : node.getNodeData().getSequence().getCrossReferences() ) {
1374 if ( match( x.getComment(), query, case_sensitive, partial, false ) ) {
1378 if ( match( x.getSource(), query, case_sensitive, partial, false ) ) {
1382 if ( match( x.getValue(), query, case_sensitive, partial, false ) ) {
1388 if ( !match && ( ( ndf == null ) || ( ndf == NDF.BinaryCharacter ) )
1389 && ( node.getNodeData().getBinaryCharacters() != null ) ) {
1390 Iterator<String> it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator();
1391 I: while ( it.hasNext() ) {
1392 if ( match( it.next(), query, case_sensitive, partial, false ) ) {
1397 it = node.getNodeData().getBinaryCharacters().getGainedCharacters().iterator();
1398 I: while ( it.hasNext() ) {
1399 if ( match( it.next(), query, case_sensitive, partial, false ) ) {
1405 if ( !match && ( ndf == NDF.MolecularSequence ) && node.getNodeData().isHasSequence()
1406 && match( node.getNodeData().getSequence().getMolecularSequence(),
1414 all_matched = false;
1418 if ( all_matched ) {
1419 nodes.add( node.getId() );
1425 public static void setAllIndicatorsToZero( final Phylogeny phy ) {
1426 for( final PhylogenyNodeIterator it = phy.iteratorPostorder(); it.hasNext(); ) {
1427 it.next().setIndicator( ( byte ) 0 );
1432 * Convenience method.
1433 * Sets value for the first confidence value (created if not present, values overwritten otherwise).
1435 public static void setBootstrapConfidence( final PhylogenyNode node, final double bootstrap_confidence_value ) {
1436 setConfidence( node, bootstrap_confidence_value, "bootstrap" );
1439 public static void setBranchColorValue( final PhylogenyNode node, final Color color ) {
1440 if ( node.getBranchData().getBranchColor() == null ) {
1441 node.getBranchData().setBranchColor( new BranchColor() );
1443 node.getBranchData().getBranchColor().setValue( color );
1447 * Convenience method
1449 public static void setBranchWidthValue( final PhylogenyNode node, final double branch_width_value ) {
1450 node.getBranchData().setBranchWidth( new BranchWidth( branch_width_value ) );
1454 * Convenience method.
1455 * Sets value for the first confidence value (created if not present, values overwritten otherwise).
1457 public static void setConfidence( final PhylogenyNode node, final double confidence_value ) {
1458 setConfidence( node, confidence_value, "" );
1462 * Convenience method.
1463 * Sets value for the first confidence value (created if not present, values overwritten otherwise).
1465 public static void setConfidence( final PhylogenyNode node, final double confidence_value, final String type ) {
1466 Confidence c = null;
1467 if ( node.getBranchData().getNumberOfConfidences() > 0 ) {
1468 c = node.getBranchData().getConfidence( 0 );
1471 c = new Confidence();
1472 node.getBranchData().addConfidence( c );
1475 c.setValue( confidence_value );
1478 public static void setScientificName( final PhylogenyNode node, final String scientific_name ) {
1479 if ( !node.getNodeData().isHasTaxonomy() ) {
1480 node.getNodeData().setTaxonomy( new Taxonomy() );
1482 node.getNodeData().getTaxonomy().setScientificName( scientific_name );
1486 * Convenience method to set the taxonomy code of a phylogeny node.
1490 * @param taxonomy_code
1491 * @throws PhyloXmlDataFormatException
1493 public static void setTaxonomyCode( final PhylogenyNode node, final String taxonomy_code )
1494 throws PhyloXmlDataFormatException {
1495 if ( !node.getNodeData().isHasTaxonomy() ) {
1496 node.getNodeData().setTaxonomy( new Taxonomy() );
1498 node.getNodeData().getTaxonomy().setTaxonomyCode( taxonomy_code );
1501 final static public void sortNodeDescendents( final PhylogenyNode node, final DESCENDANT_SORT_PRIORITY pri ) {
1502 Comparator<PhylogenyNode> c;
1505 c = new PhylogenyNodeSortSequencePriority();
1508 c = new PhylogenyNodeSortNodeNamePriority();
1511 c = new PhylogenyNodeSortTaxonomyPriority();
1513 final List<PhylogenyNode> descs = node.getDescendants();
1514 Collections.sort( descs, c );
1516 for( final PhylogenyNode desc : descs ) {
1517 node.setChildNode( i++, desc );
1522 * Removes from Phylogeny to_be_stripped all external Nodes which are
1523 * associated with a species NOT found in Phylogeny reference.
1526 * a reference Phylogeny
1527 * @param to_be_stripped
1528 * Phylogeny to be stripped
1529 * @return nodes removed from to_be_stripped
1531 public static List<PhylogenyNode> taxonomyBasedDeletionOfExternalNodes( final Phylogeny reference,
1532 final Phylogeny to_be_stripped ) {
1533 final Set<String> ref_ext_taxo = new HashSet<String>();
1534 for( final PhylogenyNodeIterator it = reference.iteratorExternalForward(); it.hasNext(); ) {
1535 final PhylogenyNode n = it.next();
1536 if ( !n.getNodeData().isHasTaxonomy() ) {
1537 throw new IllegalArgumentException( "no taxonomic data in node: " + n );
1539 if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) {
1540 ref_ext_taxo.add( n.getNodeData().getTaxonomy().getScientificName() );
1542 if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) {
1543 ref_ext_taxo.add( n.getNodeData().getTaxonomy().getTaxonomyCode() );
1545 if ( ( n.getNodeData().getTaxonomy().getIdentifier() != null )
1546 && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getIdentifier().getValue() ) ) {
1547 ref_ext_taxo.add( n.getNodeData().getTaxonomy().getIdentifier().getValuePlusProvider() );
1550 final ArrayList<PhylogenyNode> nodes_to_delete = new ArrayList<PhylogenyNode>();
1551 for( final PhylogenyNodeIterator it = to_be_stripped.iteratorExternalForward(); it.hasNext(); ) {
1552 final PhylogenyNode n = it.next();
1553 if ( !n.getNodeData().isHasTaxonomy() ) {
1554 nodes_to_delete.add( n );
1556 else if ( !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getScientificName() ) )
1557 && !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getTaxonomyCode() ) )
1558 && !( ( n.getNodeData().getTaxonomy().getIdentifier() != null ) && ref_ext_taxo
1559 .contains( n.getNodeData().getTaxonomy().getIdentifier().getValuePlusProvider() ) ) ) {
1560 nodes_to_delete.add( n );
1563 for( final PhylogenyNode n : nodes_to_delete ) {
1564 to_be_stripped.deleteSubtree( n, true );
1566 to_be_stripped.clearHashIdToNodeMap();
1567 to_be_stripped.externalNodesHaveChanged();
1568 return nodes_to_delete;
1571 final static public void transferInternalNamesToConfidenceValues( final Phylogeny phy,
1572 final String confidence_type ) {
1573 final PhylogenyNodeIterator it = phy.iteratorPostorder();
1574 while ( it.hasNext() ) {
1575 final PhylogenyNode n = it.next();
1576 if ( !n.isExternal() && !ForesterUtil.isEmpty( n.getName() ) ) {
1579 value = Double.parseDouble( n.getName() );
1581 catch ( final NumberFormatException e ) {
1582 throw new IllegalArgumentException( "failed to parse number from [" + n.getName() + "]: "
1583 + e.getLocalizedMessage() );
1585 if ( value >= 0.0 ) {
1586 n.getBranchData().addConfidence( new Confidence( value, confidence_type ) );
1593 final static public boolean isInternalNamesLookLikeConfidences( final Phylogeny phy ) {
1594 final PhylogenyNodeIterator it = phy.iteratorPostorder();
1595 while ( it.hasNext() ) {
1596 final PhylogenyNode n = it.next();
1597 if ( !n.isExternal() && !n.isRoot() ) {
1598 if ( !ForesterUtil.isEmpty( n.getName() ) ) {
1601 value = Double.parseDouble( n.getName() );
1603 catch ( final NumberFormatException e ) {
1606 if ( ( value < 0.0 ) || ( value > 100 ) ) {
1615 final static public void transferInternalNodeNamesToConfidence( final Phylogeny phy,
1616 final String confidence_type ) {
1617 final PhylogenyNodeIterator it = phy.iteratorPostorder();
1618 while ( it.hasNext() ) {
1619 transferInternalNodeNameToConfidence( confidence_type, it.next() );
1623 private static void transferInternalNodeNameToConfidence( final String confidence_type, final PhylogenyNode n ) {
1624 if ( !n.isExternal() && !n.getBranchData().isHasConfidences() ) {
1625 if ( !ForesterUtil.isEmpty( n.getName() ) ) {
1628 d = Double.parseDouble( n.getName() );
1630 catch ( final Exception e ) {
1634 n.getBranchData().addConfidence( new Confidence( d, confidence_type ) );
1641 final static public void transferNodeNameToField( final Phylogeny phy,
1642 final PhylogenyNodeField field,
1643 final boolean external_only )
1644 throws PhyloXmlDataFormatException {
1645 final PhylogenyNodeIterator it = phy.iteratorPostorder();
1646 while ( it.hasNext() ) {
1647 final PhylogenyNode n = it.next();
1648 if ( external_only && n.isInternal() ) {
1651 final String name = n.getName().trim();
1652 if ( !ForesterUtil.isEmpty( name ) ) {
1656 setTaxonomyCode( n, name );
1658 case TAXONOMY_SCIENTIFIC_NAME:
1660 if ( !n.getNodeData().isHasTaxonomy() ) {
1661 n.getNodeData().setTaxonomy( new Taxonomy() );
1663 n.getNodeData().getTaxonomy().setScientificName( name );
1665 case TAXONOMY_COMMON_NAME:
1667 if ( !n.getNodeData().isHasTaxonomy() ) {
1668 n.getNodeData().setTaxonomy( new Taxonomy() );
1670 n.getNodeData().getTaxonomy().setCommonName( name );
1672 case SEQUENCE_SYMBOL:
1674 if ( !n.getNodeData().isHasSequence() ) {
1675 n.getNodeData().setSequence( new Sequence() );
1677 n.getNodeData().getSequence().setSymbol( name );
1681 if ( !n.getNodeData().isHasSequence() ) {
1682 n.getNodeData().setSequence( new Sequence() );
1684 n.getNodeData().getSequence().setName( name );
1686 case TAXONOMY_ID_UNIPROT_1: {
1687 if ( !n.getNodeData().isHasTaxonomy() ) {
1688 n.getNodeData().setTaxonomy( new Taxonomy() );
1691 final int i = name.indexOf( '_' );
1693 id = name.substring( 0, i );
1698 n.getNodeData().getTaxonomy()
1699 .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) );
1702 case TAXONOMY_ID_UNIPROT_2: {
1703 if ( !n.getNodeData().isHasTaxonomy() ) {
1704 n.getNodeData().setTaxonomy( new Taxonomy() );
1707 final int i = name.indexOf( '_' );
1709 id = name.substring( i + 1, name.length() );
1714 n.getNodeData().getTaxonomy()
1715 .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) );
1719 if ( !n.getNodeData().isHasTaxonomy() ) {
1720 n.getNodeData().setTaxonomy( new Taxonomy() );
1722 n.getNodeData().getTaxonomy().setIdentifier( new Identifier( name ) );
1729 throw new IllegalArgumentException( "don't know what to do with " + field );
1736 static double addPhylogenyDistances( final double a, final double b ) {
1737 if ( ( a >= 0.0 ) && ( b >= 0.0 ) ) {
1740 else if ( a >= 0.0 ) {
1743 else if ( b >= 0.0 ) {
1746 return PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT;
1749 static double calculateDistanceToAncestor( final PhylogenyNode anc, PhylogenyNode desc ) {
1751 boolean all_default = true;
1752 while ( anc != desc ) {
1753 if ( desc.getDistanceToParent() != PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ) {
1754 d += desc.getDistanceToParent();
1755 if ( all_default ) {
1756 all_default = false;
1759 desc = desc.getParent();
1761 if ( all_default ) {
1762 return PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT;
1767 public static double calculateAverageTreeHeight( final PhylogenyNode node ) {
1768 final List<PhylogenyNode> ext = node.getAllExternalDescendants();
1770 for( PhylogenyNode n : ext ) {
1771 while ( n != node ) {
1772 if ( n.getDistanceToParent() > 0 ) {
1773 s += n.getDistanceToParent();
1778 return s / ext.size();
1782 * Deep copies the phylogeny originating from this node.
1784 static PhylogenyNode copySubTree( final PhylogenyNode source ) {
1785 if ( source == null ) {
1789 final PhylogenyNode newnode = source.copyNodeData();
1790 if ( !source.isExternal() ) {
1791 for( int i = 0; i < source.getNumberOfDescendants(); ++i ) {
1792 newnode.setChildNode( i, PhylogenyMethods.copySubTree( source.getChildNode( i ) ) );
1800 * Shallow copies the phylogeny originating from this node.
1802 static PhylogenyNode copySubTreeShallow( final PhylogenyNode source ) {
1803 if ( source == null ) {
1807 final PhylogenyNode newnode = source.copyNodeDataShallow();
1808 if ( !source.isExternal() ) {
1809 for( int i = 0; i < source.getNumberOfDescendants(); ++i ) {
1810 newnode.setChildNode( i, PhylogenyMethods.copySubTreeShallow( source.getChildNode( i ) ) );
1817 private final static List<PhylogenyNode> divideIntoSubTreesHelper( final PhylogenyNode node,
1818 final double min_distance_to_root ) {
1819 final List<PhylogenyNode> l = new ArrayList<PhylogenyNode>();
1820 final PhylogenyNode r = moveTowardsRoot( node, min_distance_to_root );
1821 for( final PhylogenyNode ext : r.getAllExternalDescendants() ) {
1822 if ( ext.getIndicator() != 0 ) {
1823 throw new RuntimeException( "this should not have happened" );
1825 ext.setIndicator( ( byte ) 1 );
1832 * Calculates the distance between PhylogenyNodes n1 and n2.
1833 * PRECONDITION: n1 is a descendant of n2.
1836 * a descendant of n2
1838 * @return distance between n1 and n2
1840 private static double getDistance( PhylogenyNode n1, final PhylogenyNode n2 ) {
1842 while ( n1 != n2 ) {
1843 if ( n1.getDistanceToParent() > 0.0 ) {
1844 d += n1.getDistanceToParent();
1846 n1 = n1.getParent();
1851 private static boolean match( final String s,
1853 final boolean case_sensitive,
1854 final boolean partial,
1855 final boolean regex ) {
1856 if ( ForesterUtil.isEmpty( s ) || ForesterUtil.isEmpty( query ) ) {
1859 String my_s = s.trim();
1860 String my_query = query.trim();
1861 if ( !case_sensitive && !regex ) {
1862 my_s = my_s.toLowerCase();
1863 my_query = my_query.toLowerCase();
1868 if ( case_sensitive ) {
1869 p = Pattern.compile( my_query );
1872 p = Pattern.compile( my_query, Pattern.CASE_INSENSITIVE );
1875 catch ( final PatternSyntaxException e ) {
1879 return p.matcher( my_s ).find();
1885 else if ( partial ) {
1886 return my_s.indexOf( my_query ) >= 0;
1891 p = Pattern.compile( "(\\b|_)" + Pattern.quote( my_query ) + "(\\b|_)" );
1893 catch ( final PatternSyntaxException e ) {
1897 return p.matcher( my_s ).find();
1905 private final static PhylogenyNode moveTowardsRoot( final PhylogenyNode node, final double min_distance_to_root ) {
1906 PhylogenyNode n = node;
1907 PhylogenyNode prev = node;
1908 while ( min_distance_to_root < n.calculateDistanceToRoot() ) {
1915 public static enum DESCENDANT_SORT_PRIORITY {
1921 public static enum PhylogenyNodeField {
1926 TAXONOMY_COMMON_NAME,
1928 TAXONOMY_ID_UNIPROT_1,
1929 TAXONOMY_ID_UNIPROT_2,
1930 TAXONOMY_SCIENTIFIC_NAME;
1933 public static void addMolecularSeqsToTree( final Phylogeny phy, final Msa msa ) {
1934 for( int s = 0; s < msa.getNumberOfSequences(); ++s ) {
1935 final org.forester.sequence.MolecularSequence seq = msa.getSequence( s );
1936 final PhylogenyNode node = phy.getNode( seq.getIdentifier() );
1937 final org.forester.phylogeny.data.Sequence new_seq = new Sequence();
1938 new_seq.setMolecularSequenceAligned( true );
1939 new_seq.setMolecularSequence( seq.getMolecularSequenceAsString() );
1940 new_seq.setName( seq.getIdentifier() );
1942 new_seq.setType( PhyloXmlUtil.SEQ_TYPE_PROTEIN );
1944 catch ( final PhyloXmlDataFormatException ignore ) {
1947 node.getNodeData().addSequence( new_seq );
1951 final private static class PhylogenyNodeSortTaxonomyPriority implements Comparator<PhylogenyNode> {
1954 public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) {
1955 if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) {
1956 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) )
1957 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) {
1958 return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase()
1959 .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() );
1961 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) )
1962 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
1963 return n1.getNodeData().getTaxonomy().getTaxonomyCode()
1964 .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() );
1967 if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) {
1968 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) )
1969 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) {
1970 return n1.getNodeData().getSequence().getName().toLowerCase()
1971 .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() );
1973 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) )
1974 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) {
1975 return n1.getNodeData().getSequence().getGeneName()
1976 .compareTo( n2.getNodeData().getSequence().getGeneName() );
1978 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) )
1979 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) {
1980 return n1.getNodeData().getSequence().getSymbol()
1981 .compareTo( n2.getNodeData().getSequence().getSymbol() );
1984 if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) {
1985 return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() );
1991 final private static class PhylogenyNodeSortSequencePriority implements Comparator<PhylogenyNode> {
1994 public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) {
1995 if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) {
1996 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) )
1997 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) {
1998 return n1.getNodeData().getSequence().getName().toLowerCase()
1999 .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() );
2001 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) )
2002 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) {
2003 return n1.getNodeData().getSequence().getGeneName()
2004 .compareTo( n2.getNodeData().getSequence().getGeneName() );
2006 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) )
2007 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) {
2008 return n1.getNodeData().getSequence().getSymbol()
2009 .compareTo( n2.getNodeData().getSequence().getSymbol() );
2012 if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) {
2013 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) )
2014 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) {
2015 return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase()
2016 .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() );
2018 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) )
2019 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
2020 return n1.getNodeData().getTaxonomy().getTaxonomyCode()
2021 .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() );
2024 if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) {
2025 return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() );
2031 final private static class PhylogenyNodeSortNodeNamePriority implements Comparator<PhylogenyNode> {
2034 public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) {
2035 if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) {
2036 return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() );
2038 if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) {
2039 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) )
2040 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) {
2041 return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase()
2042 .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() );
2044 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) )
2045 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
2046 return n1.getNodeData().getTaxonomy().getTaxonomyCode()
2047 .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() );
2050 if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) {
2051 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) )
2052 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) {
2053 return n1.getNodeData().getSequence().getName().toLowerCase()
2054 .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() );
2056 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) )
2057 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) {
2058 return n1.getNodeData().getSequence().getGeneName()
2059 .compareTo( n2.getNodeData().getSequence().getGeneName() );
2061 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) )
2062 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) {
2063 return n1.getNodeData().getSequence().getSymbol()
2064 .compareTo( n2.getNodeData().getSequence().getSymbol() );
2071 public final static Map<Long, Integer> calculateDepths( final Phylogeny phy ) {
2072 final Map<Long, Integer> depths = new HashMap<Long, Integer>();
2073 calculateDepthsHelper( phy.getRoot(), 0, depths );
2077 private final static void calculateDepthsHelper( final PhylogenyNode n, int d, final Map<Long, Integer> depths ) {
2078 depths.put( n.getId(), d );
2080 final List<PhylogenyNode> descs = n.getDescendants();
2081 for( final PhylogenyNode desc : descs ) {
2082 calculateDepthsHelper( desc, d, depths );
2086 public final static void collapseToDepth( final Phylogeny phy, final int depth ) {
2087 if ( phy.getNumberOfExternalNodes() < 3 ) {
2090 collapseToDepthHelper( phy.getRoot(), 0, depth );
2093 private final static void collapseToDepthHelper( final PhylogenyNode n, int d, final int depth ) {
2094 if ( n.isExternal() ) {
2095 n.setCollapse( false );
2099 n.setCollapse( true );
2100 final PhylogenyNodeIterator it = new PreorderTreeIterator( n );
2101 while ( it.hasNext() ) {
2102 it.next().setCollapse( true );
2106 n.setCollapse( false );
2108 final List<PhylogenyNode> descs = n.getDescendants();
2109 for( final PhylogenyNode desc : descs ) {
2110 collapseToDepthHelper( desc, d, depth );
2115 public final static void collapseToRank( final Phylogeny phy, final int rank ) {
2116 if ( phy.getNumberOfExternalNodes() < 3 ) {
2119 if ( rank < 0 || rank >= TaxonomyUtil.RANKS.length ) {
2120 throw new IllegalArgumentException( "Rank " + rank + " is out of range" );
2122 collapseToRankHelper( phy.getRoot(), rank );
2125 private final static void collapseToRankHelper( final PhylogenyNode n, final int target_rank ) {
2126 if ( n.isExternal() ) {
2127 n.setCollapse( false );
2130 if ( ( n.getNodeData().getTaxonomy() != null )
2131 && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getRank() ) ) {
2132 final String current_rank = n.getNodeData().getTaxonomy().getRank();
2133 if ( !TaxonomyUtil.RANK_TO_INT.containsKey( current_rank ) ) {
2134 System.out.println( "Don't know rank \"" + current_rank + "\", ignoring." );
2137 if ( TaxonomyUtil.RANK_TO_INT.get( current_rank ) >= target_rank ) {
2138 n.setCollapse( true );
2139 final PhylogenyNodeIterator it = new PreorderTreeIterator( n );
2140 while ( it.hasNext() ) {
2141 it.next().setCollapse( true );
2147 n.setCollapse( false );
2148 final List<PhylogenyNode> descs = n.getDescendants();
2149 for( final PhylogenyNode desc : descs ) {
2150 collapseToRankHelper( desc, target_rank );
2154 public final static PhylogenyNode getFirstExternalNode( final PhylogenyNode node ) {
2155 PhylogenyNode n = node;
2156 while ( n.isInternal() ) {
2157 n = n.getFirstChildNode();
2162 public final static PhylogenyNode getLastExternalNode( final PhylogenyNode node ) {
2163 PhylogenyNode n = node;
2164 while ( n.isInternal() ) {
2165 n = n.getLastChildNode();
2170 public final static boolean isHasCollapsedNodes( final Phylogeny phy ) {
2171 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
2172 final PhylogenyNode n = iter.next();
2173 if ( !n.isExternal() && ( n.isCollapse() ) ) {