2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
26 package org.forester.phylogeny;
28 import java.awt.Color;
30 import java.io.IOException;
31 import java.util.ArrayList;
32 import java.util.Arrays;
33 import java.util.Collections;
34 import java.util.Comparator;
35 import java.util.HashMap;
36 import java.util.HashSet;
37 import java.util.Iterator;
38 import java.util.List;
41 import java.util.regex.Matcher;
42 import java.util.regex.Pattern;
43 import java.util.regex.PatternSyntaxException;
45 import org.forester.archaeopteryx.TreePanelUtil;
46 import org.forester.io.parsers.FastaParser;
47 import org.forester.io.parsers.PhylogenyParser;
48 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
49 import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
50 import org.forester.io.parsers.util.PhylogenyParserException;
51 import org.forester.msa.Msa;
52 import org.forester.phylogeny.data.Accession;
53 import org.forester.phylogeny.data.Annotation;
54 import org.forester.phylogeny.data.BranchColor;
55 import org.forester.phylogeny.data.BranchWidth;
56 import org.forester.phylogeny.data.Confidence;
57 import org.forester.phylogeny.data.DomainArchitecture;
58 import org.forester.phylogeny.data.Event;
59 import org.forester.phylogeny.data.Identifier;
60 import org.forester.phylogeny.data.PhylogenyDataUtil;
61 import org.forester.phylogeny.data.Sequence;
62 import org.forester.phylogeny.data.Taxonomy;
63 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
64 import org.forester.phylogeny.factories.PhylogenyFactory;
65 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
66 import org.forester.phylogeny.iterators.PreorderTreeIterator;
67 import org.forester.util.BasicDescriptiveStatistics;
68 import org.forester.util.DescriptiveStatistics;
69 import org.forester.util.FailedConditionCheckException;
70 import org.forester.util.ForesterUtil;
71 import org.forester.util.TaxonomyUtil;
73 public class PhylogenyMethods {
75 private static boolean _order_changed;
77 private PhylogenyMethods() {
78 // Hidden constructor.
82 public Object clone() throws CloneNotSupportedException {
83 throw new CloneNotSupportedException();
86 public static boolean extractFastaInformation( final Phylogeny phy ) {
87 boolean could_extract = false;
88 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
89 final PhylogenyNode node = iter.next();
90 if ( !ForesterUtil.isEmpty( node.getName() ) ) {
91 final Matcher name_m = FastaParser.FASTA_DESC_LINE.matcher( node.getName() );
92 if ( name_m.lookingAt() ) {
94 final String acc_source = name_m.group( 1 );
95 final String acc = name_m.group( 2 );
96 final String seq_name = name_m.group( 3 );
97 final String tax_sn = name_m.group( 4 );
98 if ( !ForesterUtil.isEmpty( acc_source ) && !ForesterUtil.isEmpty( acc ) ) {
99 ForesterUtil.ensurePresenceOfSequence( node );
100 node.getNodeData().getSequence( 0 ).setAccession( new Accession( acc, acc_source ) );
102 if ( !ForesterUtil.isEmpty( seq_name ) ) {
103 ForesterUtil.ensurePresenceOfSequence( node );
104 node.getNodeData().getSequence( 0 ).setName( seq_name );
106 if ( !ForesterUtil.isEmpty( tax_sn ) ) {
107 ForesterUtil.ensurePresenceOfTaxonomy( node );
108 node.getNodeData().getTaxonomy( 0 ).setScientificName( tax_sn );
113 return could_extract;
116 public static DescriptiveStatistics calculateBranchLengthStatistics( final Phylogeny phy ) {
117 final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
118 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
119 final PhylogenyNode n = iter.next();
120 if ( !n.isRoot() && ( n.getDistanceToParent() >= 0.0 ) ) {
121 stats.addValue( n.getDistanceToParent() );
127 public static List<DescriptiveStatistics> calculateConfidenceStatistics( final Phylogeny phy ) {
128 final List<DescriptiveStatistics> stats = new ArrayList<DescriptiveStatistics>();
129 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
130 final PhylogenyNode n = iter.next();
131 if ( !n.isExternal() && !n.isRoot() ) {
132 if ( n.getBranchData().isHasConfidences() ) {
133 for( int i = 0; i < n.getBranchData().getConfidences().size(); ++i ) {
134 final Confidence c = n.getBranchData().getConfidences().get( i );
135 if ( ( i > ( stats.size() - 1 ) ) || ( stats.get( i ) == null ) ) {
136 stats.add( i, new BasicDescriptiveStatistics() );
138 if ( !ForesterUtil.isEmpty( c.getType() ) ) {
139 if ( !ForesterUtil.isEmpty( stats.get( i ).getDescription() ) ) {
140 if ( !stats.get( i ).getDescription().equalsIgnoreCase( c.getType() ) ) {
141 throw new IllegalArgumentException( "support values in node [" + n.toString()
142 + "] appear inconsistently ordered" );
145 stats.get( i ).setDescription( c.getType() );
147 stats.get( i ).addValue( ( ( c != null ) && ( c.getValue() >= 0 ) ) ? c.getValue() : 0 );
156 * Calculates the distance between PhylogenyNodes node1 and node2.
161 * @return distance between node1 and node2
163 public static double calculateDistance( final PhylogenyNode node1, final PhylogenyNode node2 ) {
164 final PhylogenyNode lca = calculateLCA( node1, node2 );
165 final PhylogenyNode n1 = node1;
166 final PhylogenyNode n2 = node2;
167 return ( PhylogenyMethods.getDistance( n1, lca ) + PhylogenyMethods.getDistance( n2, lca ) );
171 * Returns the LCA of PhylogenyNodes node1 and node2.
176 * @return LCA of node1 and node2
178 public final static PhylogenyNode calculateLCA( PhylogenyNode node1, PhylogenyNode node2 ) {
179 if ( node1 == null ) {
180 throw new IllegalArgumentException( "first argument (node) is null" );
182 if ( node2 == null ) {
183 throw new IllegalArgumentException( "second argument (node) is null" );
185 if ( node1 == node2 ) {
188 if ( ( node1.getParent() == node2.getParent() ) ) {
189 return node1.getParent();
191 int depth1 = node1.calculateDepth();
192 int depth2 = node2.calculateDepth();
193 while ( ( depth1 > -1 ) && ( depth2 > -1 ) ) {
194 if ( depth1 > depth2 ) {
195 node1 = node1.getParent();
198 else if ( depth2 > depth1 ) {
199 node2 = node2.getParent();
203 if ( node1 == node2 ) {
206 node1 = node1.getParent();
207 node2 = node2.getParent();
212 throw new IllegalArgumentException( "illegal attempt to calculate LCA of two nodes which do not share a common root" );
216 * Returns the LCA of PhylogenyNodes node1 and node2.
217 * Precondition: ids are in pre-order (or level-order).
222 * @return LCA of node1 and node2
224 public final static PhylogenyNode calculateLCAonTreeWithIdsInPreOrder( PhylogenyNode node1, PhylogenyNode node2 ) {
225 if ( node1 == null ) {
226 throw new IllegalArgumentException( "first argument (node) is null" );
228 if ( node2 == null ) {
229 throw new IllegalArgumentException( "second argument (node) is null" );
231 while ( node1 != node2 ) {
232 if ( node1.getId() > node2.getId() ) {
233 node1 = node1.getParent();
236 node2 = node2.getParent();
242 public static short calculateMaxBranchesToLeaf( final PhylogenyNode node ) {
243 if ( node.isExternal() ) {
247 for( PhylogenyNode d : node.getAllExternalDescendants() ) {
249 while ( d != node ) {
250 if ( d.isCollapse() ) {
265 public static int calculateMaxDepth( final Phylogeny phy ) {
267 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
268 final PhylogenyNode node = iter.next();
269 final int steps = node.calculateDepth();
277 public static String[] obtainPresentRanksSorted( final Phylogeny phy ) {
278 final Set<String> present_ranks = new HashSet<String>();
279 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
280 final PhylogenyNode node = iter.next();
281 if ( !node.isExternal() && !node.isRoot() && ( node.getNodeData().getTaxonomy() != null )
282 && !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getRank() ) ) {
283 final String current_rank = node.getNodeData().getTaxonomy().getRank();
284 if ( TaxonomyUtil.RANK_TO_INT.containsKey( current_rank ) ) {
285 present_ranks.add( current_rank );
289 final String ordered_ranks[] = new String[present_ranks.size() + 1];
291 for( final String rank : TaxonomyUtil.RANKS ) {
292 if ( present_ranks.contains( rank ) ) {
293 ordered_ranks[ c++ ] = rank;
296 ordered_ranks[ c ] = "off";
297 return ordered_ranks;
300 public static int calculateMaxDepthConsiderCollapsed( final Phylogeny phy ) {
302 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
303 PhylogenyNode n = iter.next();
305 while ( n.getParent() != null ) {
306 if ( !n.isCollapse() ) {
318 public static double calculateMaxDistanceToRoot( final Phylogeny phy ) {
320 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
321 final PhylogenyNode node = iter.next();
322 final double d = node.calculateDistanceToRoot();
330 public static PhylogenyNode calculateNodeWithMaxDistanceToRoot( final Phylogeny phy ) {
332 PhylogenyNode max_node = phy.getFirstExternalNode();
333 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
334 final PhylogenyNode node = iter.next();
335 final double d = node.calculateDistanceToRoot();
344 public static int calculateNumberOfExternalNodesWithoutTaxonomy( final PhylogenyNode node ) {
345 final List<PhylogenyNode> descs = node.getAllExternalDescendants();
347 for( final PhylogenyNode n : descs ) {
348 if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
355 public static DescriptiveStatistics calculateNumberOfDescendantsPerNodeStatistics( final Phylogeny phy ) {
356 final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
357 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
358 final PhylogenyNode n = iter.next();
359 if ( !n.isExternal() ) {
360 stats.addValue( n.getNumberOfDescendants() );
366 public final static void collapseSubtreeStructure( final PhylogenyNode n ) {
367 final List<PhylogenyNode> eds = n.getAllExternalDescendants();
368 final List<Double> d = new ArrayList<Double>();
369 for( final PhylogenyNode ed : eds ) {
370 d.add( calculateDistanceToAncestor( n, ed ) );
372 for( int i = 0; i < eds.size(); ++i ) {
373 n.setChildNode( i, eds.get( i ) );
374 eds.get( i ).setDistanceToParent( d.get( i ) );
378 public static int countNumberOfOneDescendantNodes( final Phylogeny phy ) {
380 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
381 final PhylogenyNode n = iter.next();
382 if ( !n.isExternal() && ( n.getNumberOfDescendants() == 1 ) ) {
389 public static int countNumberOfPolytomies( final Phylogeny phy ) {
391 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
392 final PhylogenyNode n = iter.next();
393 if ( !n.isExternal() && ( n.getNumberOfDescendants() > 2 ) ) {
400 public static final HashMap<String, PhylogenyNode> createNameToExtNodeMap( final Phylogeny phy ) {
401 final HashMap<String, PhylogenyNode> nodes = new HashMap<String, PhylogenyNode>();
402 final List<PhylogenyNode> ext = phy.getExternalNodes();
403 for( final PhylogenyNode n : ext ) {
404 nodes.put( n.getName(), n );
409 public static void deleteExternalNodesNegativeSelection( final Set<Long> to_delete, final Phylogeny phy ) {
410 for( final Long id : to_delete ) {
411 phy.deleteSubtree( phy.getNode( id ), true );
413 phy.clearHashIdToNodeMap();
414 phy.externalNodesHaveChanged();
417 public static void deleteExternalNodesNegativeSelection( final String[] node_names_to_delete, final Phylogeny p )
418 throws IllegalArgumentException {
419 for( final String element : node_names_to_delete ) {
420 if ( ForesterUtil.isEmpty( element ) ) {
423 List<PhylogenyNode> nodes = null;
424 nodes = p.getNodes( element );
425 final Iterator<PhylogenyNode> it = nodes.iterator();
426 while ( it.hasNext() ) {
427 final PhylogenyNode n = it.next();
428 if ( !n.isExternal() ) {
429 throw new IllegalArgumentException( "attempt to delete non-external node \"" + element + "\"" );
431 p.deleteSubtree( n, true );
434 p.clearHashIdToNodeMap();
435 p.externalNodesHaveChanged();
438 public static List<String> deleteExternalNodesPositiveSelection( final String[] node_names_to_keep,
439 final Phylogeny p ) {
440 final PhylogenyNodeIterator it = p.iteratorExternalForward();
441 final String[] to_delete = new String[ p.getNumberOfExternalNodes() ];
443 Arrays.sort( node_names_to_keep );
444 while ( it.hasNext() ) {
445 final String curent_name = it.next().getName();
446 if ( Arrays.binarySearch( node_names_to_keep, curent_name ) < 0 ) {
447 to_delete[ i++ ] = curent_name;
450 PhylogenyMethods.deleteExternalNodesNegativeSelection( to_delete, p );
451 final List<String> deleted = new ArrayList<String>();
452 for( final String n : to_delete ) {
453 if ( !ForesterUtil.isEmpty( n ) ) {
460 public static void deleteExternalNodesPositiveSelectionT( final List<Taxonomy> species_to_keep,
461 final Phylogeny phy ) {
462 final Set<Long> to_delete = new HashSet<Long>();
463 for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
464 final PhylogenyNode n = it.next();
465 if ( n.getNodeData().isHasTaxonomy() ) {
466 if ( !species_to_keep.contains( n.getNodeData().getTaxonomy() ) ) {
467 to_delete.add( n.getId() );
471 throw new IllegalArgumentException( "node " + n.getId() + " has no taxonomic data" );
474 deleteExternalNodesNegativeSelection( to_delete, phy );
477 final public static void deleteInternalNodesWithOnlyOneDescendent( final Phylogeny phy ) {
478 final ArrayList<PhylogenyNode> to_delete = new ArrayList<PhylogenyNode>();
479 for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) {
480 final PhylogenyNode n = iter.next();
481 if ( ( !n.isExternal() ) && ( n.getNumberOfDescendants() == 1 ) ) {
485 for( final PhylogenyNode d : to_delete ) {
486 PhylogenyMethods.removeNode( d, phy );
488 phy.clearHashIdToNodeMap();
489 phy.externalNodesHaveChanged();
492 final public static void deleteNonOrthologousExternalNodes( final Phylogeny phy, final PhylogenyNode n ) {
493 if ( n.isInternal() ) {
494 throw new IllegalArgumentException( "node is not external" );
496 final ArrayList<PhylogenyNode> to_delete = new ArrayList<PhylogenyNode>();
497 for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
498 final PhylogenyNode i = it.next();
499 if ( !PhylogenyMethods.getEventAtLCA( n, i ).isSpeciation() ) {
503 for( final PhylogenyNode d : to_delete ) {
504 phy.deleteSubtree( d, true );
506 phy.clearHashIdToNodeMap();
507 phy.externalNodesHaveChanged();
510 public final static List<List<PhylogenyNode>> divideIntoSubTrees( final Phylogeny phy,
511 final double min_distance_to_root ) {
512 if ( min_distance_to_root <= 0 ) {
513 throw new IllegalArgumentException( "attempt to use min distance to root of: " + min_distance_to_root );
515 final List<List<PhylogenyNode>> l = new ArrayList<List<PhylogenyNode>>();
516 setAllIndicatorsToZero( phy );
517 for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
518 final PhylogenyNode n = it.next();
519 if ( n.getIndicator() != 0 ) {
522 l.add( divideIntoSubTreesHelper( n, min_distance_to_root ) );
524 throw new RuntimeException( "this should not have happened" );
530 public static List<PhylogenyNode> getAllDescendants( final PhylogenyNode node ) {
531 final List<PhylogenyNode> descs = new ArrayList<PhylogenyNode>();
532 final Set<Long> encountered = new HashSet<Long>();
533 if ( !node.isExternal() ) {
534 final List<PhylogenyNode> exts = node.getAllExternalDescendants();
535 for( PhylogenyNode current : exts ) {
536 descs.add( current );
537 while ( current != node ) {
538 current = current.getParent();
539 if ( encountered.contains( current.getId() ) ) {
542 descs.add( current );
543 encountered.add( current.getId() );
557 public static Color getBranchColorValue( final PhylogenyNode node ) {
558 if ( node.getBranchData().getBranchColor() == null ) {
561 return node.getBranchData().getBranchColor().getValue();
567 public static double getBranchWidthValue( final PhylogenyNode node ) {
568 if ( !node.getBranchData().isHasBranchWidth() ) {
569 return BranchWidth.BRANCH_WIDTH_DEFAULT_VALUE;
571 return node.getBranchData().getBranchWidth().getValue();
577 public static double getConfidenceValue( final PhylogenyNode node ) {
578 if ( !node.getBranchData().isHasConfidences() ) {
579 return Confidence.CONFIDENCE_DEFAULT_VALUE;
581 return node.getBranchData().getConfidence( 0 ).getValue();
587 public static double[] getConfidenceValuesAsArray( final PhylogenyNode node ) {
588 if ( !node.getBranchData().isHasConfidences() ) {
589 return new double[ 0 ];
591 final double[] values = new double[ node.getBranchData().getConfidences().size() ];
593 for( final Confidence c : node.getBranchData().getConfidences() ) {
594 values[ i++ ] = c.getValue();
599 final public static Event getEventAtLCA( final PhylogenyNode n1, final PhylogenyNode n2 ) {
600 return calculateLCA( n1, n2 ).getNodeData().getEvent();
604 * Returns taxonomy t if all external descendants have
605 * the same taxonomy t, null otherwise.
608 public static Taxonomy getExternalDescendantsTaxonomy( final PhylogenyNode node ) {
609 final List<PhylogenyNode> descs = node.getAllExternalDescendants();
611 for( final PhylogenyNode n : descs ) {
612 if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
615 else if ( tax == null ) {
616 tax = n.getNodeData().getTaxonomy();
618 else if ( n.getNodeData().getTaxonomy().isEmpty() || !tax.isEqual( n.getNodeData().getTaxonomy() ) ) {
625 public static PhylogenyNode getFurthestDescendant( final PhylogenyNode node ) {
626 final List<PhylogenyNode> children = node.getAllExternalDescendants();
627 PhylogenyNode farthest = null;
628 double longest = -Double.MAX_VALUE;
629 for( final PhylogenyNode child : children ) {
630 if ( PhylogenyMethods.getDistance( child, node ) > longest ) {
632 longest = PhylogenyMethods.getDistance( child, node );
638 // public static PhylogenyMethods getInstance() {
639 // if ( PhylogenyMethods._instance == null ) {
640 // PhylogenyMethods._instance = new PhylogenyMethods();
642 // return PhylogenyMethods._instance;
645 * Returns the largest confidence value found on phy.
647 static public double getMaximumConfidenceValue( final Phylogeny phy ) {
648 double max = -Double.MAX_VALUE;
649 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
650 final double s = PhylogenyMethods.getConfidenceValue( iter.next() );
651 if ( ( s != Confidence.CONFIDENCE_DEFAULT_VALUE ) && ( s > max ) ) {
658 static public int getMinimumDescendentsPerInternalNodes( final Phylogeny phy ) {
659 int min = Integer.MAX_VALUE;
662 for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) {
664 if ( n.isInternal() ) {
665 d = n.getNumberOfDescendants();
675 * Convenience method for display purposes.
676 * Not intended for algorithms.
678 public static String getSpecies( final PhylogenyNode node ) {
679 if ( !node.getNodeData().isHasTaxonomy() ) {
682 else if ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getScientificName() ) ) {
683 return node.getNodeData().getTaxonomy().getScientificName();
685 if ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getTaxonomyCode() ) ) {
686 return node.getNodeData().getTaxonomy().getTaxonomyCode();
689 return node.getNodeData().getTaxonomy().getCommonName();
694 * Convenience method for display purposes.
695 * Not intended for algorithms.
697 public static String getTaxonomyIdentifier( final PhylogenyNode node ) {
698 if ( !node.getNodeData().isHasTaxonomy() || ( node.getNodeData().getTaxonomy().getIdentifier() == null ) ) {
701 return node.getNodeData().getTaxonomy().getIdentifier().getValue();
704 public final static boolean isAllDecendentsAreDuplications( final PhylogenyNode n ) {
705 if ( n.isExternal() ) {
709 if ( n.isDuplication() ) {
710 for( final PhylogenyNode desc : n.getDescendants() ) {
711 if ( !isAllDecendentsAreDuplications( desc ) ) {
723 public static boolean isHasExternalDescendant( final PhylogenyNode node ) {
724 for( int i = 0; i < node.getNumberOfDescendants(); ++i ) {
725 if ( node.getChildNode( i ).isExternal() ) {
733 * This is case insensitive.
736 public synchronized static boolean isTaxonomyHasIdentifierOfGivenProvider( final Taxonomy tax,
737 final String[] providers ) {
738 if ( ( tax.getIdentifier() != null ) && !ForesterUtil.isEmpty( tax.getIdentifier().getProvider() ) ) {
739 final String my_tax_prov = tax.getIdentifier().getProvider();
740 for( final String provider : providers ) {
741 if ( provider.equalsIgnoreCase( my_tax_prov ) ) {
752 public static void midpointRoot( final Phylogeny phylogeny ) {
753 if ( ( phylogeny.getNumberOfExternalNodes() < 2 ) || ( calculateMaxDistanceToRoot( phylogeny ) <= 0 ) ) {
757 final int total_nodes = phylogeny.getNodeCount();
759 if ( ++counter > total_nodes ) {
760 throw new RuntimeException( "this should not have happened: midpoint rooting does not converge" );
762 PhylogenyNode a = null;
765 for( int i = 0; i < phylogeny.getRoot().getNumberOfDescendants(); ++i ) {
766 final PhylogenyNode f = getFurthestDescendant( phylogeny.getRoot().getChildNode( i ) );
767 final double df = getDistance( f, phylogeny.getRoot() );
774 else if ( df > db ) {
779 final double diff = da - db;
780 if ( diff < 0.000001 ) {
783 double x = da - ( diff / 2.0 );
784 while ( ( x > a.getDistanceToParent() ) && !a.isRoot() ) {
785 x -= ( a.getDistanceToParent() > 0 ? a.getDistanceToParent() : 0 );
788 phylogeny.reRoot( a, x );
790 phylogeny.recalculateNumberOfExternalDescendants( true );
793 public static void normalizeBootstrapValues( final Phylogeny phylogeny,
794 final double max_bootstrap_value,
795 final double max_normalized_value ) {
796 for( final PhylogenyNodeIterator iter = phylogeny.iteratorPreorder(); iter.hasNext(); ) {
797 final PhylogenyNode node = iter.next();
798 if ( node.isInternal() ) {
799 final double confidence = getConfidenceValue( node );
800 if ( confidence != Confidence.CONFIDENCE_DEFAULT_VALUE ) {
801 if ( confidence >= max_bootstrap_value ) {
802 setBootstrapConfidence( node, max_normalized_value );
805 setBootstrapConfidence( node, ( confidence * max_normalized_value ) / max_bootstrap_value );
812 public static List<PhylogenyNode> obtainAllNodesAsList( final Phylogeny phy ) {
813 final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
814 if ( phy.isEmpty() ) {
817 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
818 nodes.add( iter.next() );
824 * Returns a map of distinct taxonomies of
825 * all external nodes of node.
826 * If at least one of the external nodes has no taxonomy,
830 public static Map<Taxonomy, Integer> obtainDistinctTaxonomyCounts( final PhylogenyNode node ) {
831 final List<PhylogenyNode> descs = node.getAllExternalDescendants();
832 final Map<Taxonomy, Integer> tax_map = new HashMap<Taxonomy, Integer>();
833 for( final PhylogenyNode n : descs ) {
834 if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
837 final Taxonomy t = n.getNodeData().getTaxonomy();
838 if ( tax_map.containsKey( t ) ) {
839 tax_map.put( t, tax_map.get( t ) + 1 );
849 * Arranges the order of childern for each node of this Phylogeny in such a
850 * way that either the branch with more children is on top (right) or on
851 * bottom (left), dependent on the value of boolean order.
854 * decides in which direction to order
857 public static void orderAppearance( final PhylogenyNode n,
859 final boolean order_ext_alphabetically,
860 final DESCENDANT_SORT_PRIORITY pri ) {
861 if ( n.isExternal() ) {
865 if ( ( n.getNumberOfDescendants() == 2 )
866 && ( n.getChildNode1().getNumberOfExternalNodes() != n.getChildNode2().getNumberOfExternalNodes() )
867 && ( ( n.getChildNode1().getNumberOfExternalNodes() < n.getChildNode2()
868 .getNumberOfExternalNodes() ) == order ) ) {
869 final PhylogenyNode temp = n.getChildNode1();
870 n.setChild1( n.getChildNode2() );
872 _order_changed = true;
874 else if ( order_ext_alphabetically ) {
875 boolean all_ext = true;
876 for( final PhylogenyNode i : n.getDescendants() ) {
877 if ( !i.isExternal() ) {
883 PhylogenyMethods.sortNodeDescendents( n, pri );
886 for( int i = 0; i < n.getNumberOfDescendants(); ++i ) {
887 orderAppearance( n.getChildNode( i ), order, order_ext_alphabetically, pri );
892 public synchronized static void orderAppearanceX( final PhylogenyNode n,
893 final boolean order_ext_alphabetically,
894 final DESCENDANT_SORT_PRIORITY pri ) {
895 if ( n.isExternal() ) {
899 _order_changed = false;
900 orderAppearance( n, true, order_ext_alphabetically, pri );
901 if ( !_order_changed ) {
902 orderAppearance( n, false, order_ext_alphabetically, pri );
907 public static void postorderBranchColorAveragingExternalNodeBased( final Phylogeny p ) {
908 for( final PhylogenyNodeIterator iter = p.iteratorPostorder(); iter.hasNext(); ) {
909 final PhylogenyNode node = iter.next();
914 if ( node.isInternal() ) {
915 //for( final PhylogenyNodeIterator iterator = node.iterateChildNodesForward(); iterator.hasNext(); ) {
916 for( int i = 0; i < node.getNumberOfDescendants(); ++i ) {
917 final PhylogenyNode child_node = node.getChildNode( i );
918 final Color child_color = getBranchColorValue( child_node );
919 if ( child_color != null ) {
921 red += child_color.getRed();
922 green += child_color.getGreen();
923 blue += child_color.getBlue();
926 setBranchColorValue( node,
927 new Color( ForesterUtil.roundToInt( red / n ),
928 ForesterUtil.roundToInt( green / n ),
929 ForesterUtil.roundToInt( blue / n ) ) );
934 public static final void preOrderReId( final Phylogeny phy ) {
935 if ( phy.isEmpty() ) {
938 phy.setIdToNodeMap( null );
939 long i = PhylogenyNode.getNodeCount();
940 for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) {
941 it.next().setId( i++ );
943 PhylogenyNode.setNodeCount( i );
946 public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final File file )
948 final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
949 final Phylogeny[] trees = factory.create( file, parser );
950 if ( ( trees == null ) || ( trees.length == 0 ) ) {
951 throw new PhylogenyParserException( "Unable to parse phylogeny from file: " + file );
956 public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final List<File> files )
958 final List<Phylogeny> tree_list = new ArrayList<Phylogeny>();
959 for( final File file : files ) {
960 final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
961 final Phylogeny[] trees = factory.create( file, parser );
962 if ( ( trees == null ) || ( trees.length == 0 ) ) {
963 throw new PhylogenyParserException( "Unable to parse phylogeny from file: " + file );
965 tree_list.addAll( Arrays.asList( trees ) );
967 return tree_list.toArray( new Phylogeny[ tree_list.size() ] );
970 public static void removeNode( final PhylogenyNode remove_me, final Phylogeny phylogeny ) {
971 if ( remove_me.isRoot() ) {
972 if ( remove_me.getNumberOfDescendants() == 1 ) {
973 final PhylogenyNode desc = remove_me.getDescendants().get( 0 );
974 desc.setDistanceToParent( addPhylogenyDistances( remove_me.getDistanceToParent(),
975 desc.getDistanceToParent() ) );
976 desc.setParent( null );
977 phylogeny.setRoot( desc );
978 phylogeny.clearHashIdToNodeMap();
981 throw new IllegalArgumentException( "attempt to remove a root node with more than one descendants" );
984 else if ( remove_me.isExternal() ) {
985 phylogeny.deleteSubtree( remove_me, false );
986 phylogeny.clearHashIdToNodeMap();
987 phylogeny.externalNodesHaveChanged();
990 final PhylogenyNode parent = remove_me.getParent();
991 final List<PhylogenyNode> descs = remove_me.getDescendants();
992 parent.removeChildNode( remove_me );
993 for( final PhylogenyNode desc : descs ) {
994 parent.addAsChild( desc );
995 desc.setDistanceToParent( addPhylogenyDistances( remove_me.getDistanceToParent(),
996 desc.getDistanceToParent() ) );
998 remove_me.setParent( null );
999 phylogeny.clearHashIdToNodeMap();
1000 phylogeny.externalNodesHaveChanged();
1004 private static enum NDF {
1006 TaxonomyCode( "TC" ),
1007 TaxonomyCommonName( "TN" ),
1008 TaxonomyScientificName( "TS" ),
1009 TaxonomyIdentifier( "TI" ),
1010 TaxonomySynonym( "SY" ),
1011 SequenceName( "SN" ),
1013 SequenceSymbol( "SS" ),
1014 SequenceAccession( "SA" ),
1018 BinaryCharacter( "BC" ),
1019 MolecularSequence( "MS" );
1021 private final String _text;
1023 NDF( final String text ) {
1027 public static NDF fromString( final String text ) {
1028 for( final NDF n : NDF.values() ) {
1029 if ( text.startsWith( n._text ) ) {
1037 public static List<Long> searchData( final String query,
1038 final Phylogeny phy,
1039 final boolean case_sensitive,
1040 final boolean partial,
1041 final boolean regex,
1042 final boolean search_domains,
1043 final double domains_confidence_threshold ) {
1044 final List<Long> nodes = new ArrayList<Long>();
1045 if ( phy.isEmpty() || ( query == null ) ) {
1048 if ( ForesterUtil.isEmpty( query ) ) {
1051 String my_query = query;
1053 if ( ( my_query.length() > 2 ) && ( my_query.indexOf( ":" ) == 2 ) ) {
1054 ndf = NDF.fromString( my_query );
1055 if ( ndf != null ) {
1056 my_query = my_query.substring( 3 );
1059 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
1060 final PhylogenyNode node = iter.next();
1061 boolean match = false;
1062 if ( ( ( ndf == null ) || ( ndf == NDF.NodeName ) )
1063 && match( node.getName(), my_query, case_sensitive, partial, regex ) ) {
1066 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCode ) ) && node.getNodeData().isHasTaxonomy()
1067 && match( node.getNodeData().getTaxonomy().getTaxonomyCode(),
1074 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCommonName ) ) && node.getNodeData().isHasTaxonomy()
1075 && match( node.getNodeData().getTaxonomy().getCommonName(),
1082 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyScientificName ) ) && node.getNodeData().isHasTaxonomy()
1083 && match( node.getNodeData().getTaxonomy().getScientificName(),
1090 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyIdentifier ) ) && node.getNodeData().isHasTaxonomy()
1091 && ( node.getNodeData().getTaxonomy().getIdentifier() != null )
1092 && match( node.getNodeData().getTaxonomy().getIdentifier().getValue(),
1099 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomySynonym ) ) && node.getNodeData().isHasTaxonomy()
1100 && !node.getNodeData().getTaxonomy().getSynonyms().isEmpty() ) {
1101 final List<String> syns = node.getNodeData().getTaxonomy().getSynonyms();
1102 I: for( final String syn : syns ) {
1103 if ( match( syn, my_query, case_sensitive, partial, regex ) ) {
1109 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceName ) ) && node.getNodeData().isHasSequence()
1110 && match( node.getNodeData().getSequence().getName(), my_query, case_sensitive, partial, regex ) ) {
1113 if ( !match && ( ( ndf == null ) || ( ndf == NDF.GeneName ) ) && node.getNodeData().isHasSequence()
1114 && match( node.getNodeData().getSequence().getGeneName(),
1121 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceSymbol ) ) && node.getNodeData().isHasSequence()
1122 && match( node.getNodeData().getSequence().getSymbol(),
1129 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceAccession ) ) && node.getNodeData().isHasSequence()
1130 && ( node.getNodeData().getSequence().getAccession() != null )
1131 && match( node.getNodeData().getSequence().getAccession().getValue(),
1138 if ( !match && ( ( ( ndf == null ) && search_domains ) || ( ndf == NDF.Domain ) )
1139 && node.getNodeData().isHasSequence()
1140 && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) {
1141 final DomainArchitecture da = node.getNodeData().getSequence().getDomainArchitecture();
1142 I: for( int i = 0; i < da.getNumberOfDomains(); ++i ) {
1143 if ( ( da.getDomain( i ).getConfidence() <= domains_confidence_threshold )
1144 && ( match( da.getDomain( i ).getName(), my_query, case_sensitive, partial, regex ) ) ) {
1150 if ( !match && ( ( ndf == null ) || ( ndf == NDF.Annotation ) ) && node.getNodeData().isHasSequence()
1151 && ( node.getNodeData().getSequence().getAnnotations() != null ) ) {
1152 for( final Annotation ann : node.getNodeData().getSequence().getAnnotations() ) {
1153 if ( match( ann.getDesc(), my_query, case_sensitive, partial, regex ) ) {
1157 if ( match( ann.getRef(), my_query, case_sensitive, partial, regex ) ) {
1163 if ( !match && ( ( ndf == null ) || ( ndf == NDF.CrossRef ) ) && node.getNodeData().isHasSequence()
1164 && ( node.getNodeData().getSequence().getCrossReferences() != null ) ) {
1165 for( final Accession x : node.getNodeData().getSequence().getCrossReferences() ) {
1166 if ( match( x.getComment(), my_query, case_sensitive, partial, regex ) ) {
1170 if ( match( x.getSource(), my_query, case_sensitive, partial, regex ) ) {
1174 if ( match( x.getValue(), my_query, case_sensitive, partial, regex ) ) {
1180 if ( !match && ( ( ndf == null ) || ( ndf == NDF.BinaryCharacter ) )
1181 && ( node.getNodeData().getBinaryCharacters() != null ) ) {
1182 Iterator<String> it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator();
1183 I: while ( it.hasNext() ) {
1184 if ( match( it.next(), my_query, case_sensitive, partial, regex ) ) {
1189 it = node.getNodeData().getBinaryCharacters().getGainedCharacters().iterator();
1190 I: while ( it.hasNext() ) {
1191 if ( match( it.next(), my_query, case_sensitive, partial, regex ) ) {
1197 if ( !match && ( ndf == NDF.MolecularSequence ) && node.getNodeData().isHasSequence()
1198 && match( node.getNodeData().getSequence().getMolecularSequence(),
1206 nodes.add( node.getId() );
1212 public static List<Long> searchDataLogicalAnd( final String[] queries,
1213 final Phylogeny phy,
1214 final boolean case_sensitive,
1215 final boolean partial,
1216 final boolean search_domains,
1217 final double domains_confidence_threshold ) {
1218 final List<Long> nodes = new ArrayList<Long>();
1219 if ( phy.isEmpty() || ( queries == null ) || ( queries.length < 1 ) ) {
1222 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
1223 final PhylogenyNode node = iter.next();
1224 boolean all_matched = true;
1225 for( String query : queries ) {
1226 if ( query == null ) {
1229 query = query.trim();
1231 if ( ( query.length() > 2 ) && ( query.indexOf( ":" ) == 2 ) ) {
1232 ndf = NDF.fromString( query );
1233 if ( ndf != null ) {
1234 query = query.substring( 3 );
1237 boolean match = false;
1238 if ( ForesterUtil.isEmpty( query ) ) {
1241 if ( ( ( ndf == null ) || ( ndf == NDF.NodeName ) )
1242 && match( node.getName(), query, case_sensitive, partial, false ) ) {
1245 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCode ) ) && node.getNodeData().isHasTaxonomy()
1246 && match( node.getNodeData().getTaxonomy().getTaxonomyCode(),
1253 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCommonName ) ) && node.getNodeData().isHasTaxonomy()
1254 && match( node.getNodeData().getTaxonomy().getCommonName(),
1261 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyScientificName ) )
1262 && node.getNodeData().isHasTaxonomy()
1263 && match( node.getNodeData().getTaxonomy().getScientificName(),
1270 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyIdentifier ) ) && node.getNodeData().isHasTaxonomy()
1271 && ( node.getNodeData().getTaxonomy().getIdentifier() != null )
1272 && match( node.getNodeData().getTaxonomy().getIdentifier().getValue(),
1279 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomySynonym ) ) && node.getNodeData().isHasTaxonomy()
1280 && !node.getNodeData().getTaxonomy().getSynonyms().isEmpty() ) {
1281 final List<String> syns = node.getNodeData().getTaxonomy().getSynonyms();
1282 I: for( final String syn : syns ) {
1283 if ( match( syn, query, case_sensitive, partial, false ) ) {
1289 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceName ) ) && node.getNodeData().isHasSequence()
1290 && match( node.getNodeData().getSequence().getName(),
1297 if ( !match && ( ( ndf == null ) || ( ndf == NDF.GeneName ) ) && node.getNodeData().isHasSequence()
1298 && match( node.getNodeData().getSequence().getGeneName(),
1305 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceSymbol ) )
1306 && node.getNodeData().isHasSequence() && match( node.getNodeData().getSequence().getSymbol(),
1313 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceAccession ) )
1314 && node.getNodeData().isHasSequence()
1315 && ( node.getNodeData().getSequence().getAccession() != null )
1316 && match( node.getNodeData().getSequence().getAccession().getValue(),
1323 if ( !match && ( ( ( ndf == null ) && search_domains ) || ( ndf == NDF.Domain ) )
1324 && node.getNodeData().isHasSequence()
1325 && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) {
1326 final DomainArchitecture da = node.getNodeData().getSequence().getDomainArchitecture();
1327 I: for( int i = 0; i < da.getNumberOfDomains(); ++i ) {
1328 if ( ( da.getDomain( i ).getConfidence() <= domains_confidence_threshold )
1329 && match( da.getDomain( i ).getName(), query, case_sensitive, partial, false ) ) {
1335 if ( !match && ( ( ndf == null ) || ( ndf == NDF.Annotation ) ) && node.getNodeData().isHasSequence()
1336 && ( node.getNodeData().getSequence().getAnnotations() != null ) ) {
1337 for( final Annotation ann : node.getNodeData().getSequence().getAnnotations() ) {
1338 if ( match( ann.getDesc(), query, case_sensitive, partial, false ) ) {
1342 if ( match( ann.getRef(), query, case_sensitive, partial, false ) ) {
1348 if ( !match && ( ( ndf == null ) || ( ndf == NDF.CrossRef ) ) && node.getNodeData().isHasSequence()
1349 && ( node.getNodeData().getSequence().getCrossReferences() != null ) ) {
1350 for( final Accession x : node.getNodeData().getSequence().getCrossReferences() ) {
1351 if ( match( x.getComment(), query, case_sensitive, partial, false ) ) {
1355 if ( match( x.getSource(), query, case_sensitive, partial, false ) ) {
1359 if ( match( x.getValue(), query, case_sensitive, partial, false ) ) {
1365 if ( !match && ( ( ndf == null ) || ( ndf == NDF.BinaryCharacter ) )
1366 && ( node.getNodeData().getBinaryCharacters() != null ) ) {
1367 Iterator<String> it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator();
1368 I: while ( it.hasNext() ) {
1369 if ( match( it.next(), query, case_sensitive, partial, false ) ) {
1374 it = node.getNodeData().getBinaryCharacters().getGainedCharacters().iterator();
1375 I: while ( it.hasNext() ) {
1376 if ( match( it.next(), query, case_sensitive, partial, false ) ) {
1382 if ( !match && ( ndf == NDF.MolecularSequence ) && node.getNodeData().isHasSequence()
1383 && match( node.getNodeData().getSequence().getMolecularSequence(),
1391 all_matched = false;
1395 if ( all_matched ) {
1396 nodes.add( node.getId() );
1402 public static void setAllIndicatorsToZero( final Phylogeny phy ) {
1403 for( final PhylogenyNodeIterator it = phy.iteratorPostorder(); it.hasNext(); ) {
1404 it.next().setIndicator( ( byte ) 0 );
1409 * Convenience method.
1410 * Sets value for the first confidence value (created if not present, values overwritten otherwise).
1412 public static void setBootstrapConfidence( final PhylogenyNode node, final double bootstrap_confidence_value ) {
1413 setConfidence( node, bootstrap_confidence_value, "bootstrap" );
1416 public static void setBranchColorValue( final PhylogenyNode node, final Color color ) {
1417 if ( node.getBranchData().getBranchColor() == null ) {
1418 node.getBranchData().setBranchColor( new BranchColor() );
1420 node.getBranchData().getBranchColor().setValue( color );
1424 * Convenience method
1426 public static void setBranchWidthValue( final PhylogenyNode node, final double branch_width_value ) {
1427 node.getBranchData().setBranchWidth( new BranchWidth( branch_width_value ) );
1431 * Convenience method.
1432 * Sets value for the first confidence value (created if not present, values overwritten otherwise).
1434 public static void setConfidence( final PhylogenyNode node, final double confidence_value ) {
1435 setConfidence( node, confidence_value, "" );
1439 * Convenience method.
1440 * Sets value for the first confidence value (created if not present, values overwritten otherwise).
1442 public static void setConfidence( final PhylogenyNode node, final double confidence_value, final String type ) {
1443 Confidence c = null;
1444 if ( node.getBranchData().getNumberOfConfidences() > 0 ) {
1445 c = node.getBranchData().getConfidence( 0 );
1448 c = new Confidence();
1449 node.getBranchData().addConfidence( c );
1452 c.setValue( confidence_value );
1455 public static void setScientificName( final PhylogenyNode node, final String scientific_name ) {
1456 if ( !node.getNodeData().isHasTaxonomy() ) {
1457 node.getNodeData().setTaxonomy( new Taxonomy() );
1459 node.getNodeData().getTaxonomy().setScientificName( scientific_name );
1463 * Convenience method to set the taxonomy code of a phylogeny node.
1467 * @param taxonomy_code
1468 * @throws PhyloXmlDataFormatException
1470 public static void setTaxonomyCode( final PhylogenyNode node, final String taxonomy_code )
1471 throws PhyloXmlDataFormatException {
1472 if ( !node.getNodeData().isHasTaxonomy() ) {
1473 node.getNodeData().setTaxonomy( new Taxonomy() );
1475 node.getNodeData().getTaxonomy().setTaxonomyCode( taxonomy_code );
1478 final static public void sortNodeDescendents( final PhylogenyNode node, final DESCENDANT_SORT_PRIORITY pri ) {
1479 Comparator<PhylogenyNode> c;
1482 c = new PhylogenyNodeSortSequencePriority();
1485 c = new PhylogenyNodeSortNodeNamePriority();
1488 c = new PhylogenyNodeSortTaxonomyPriority();
1490 final List<PhylogenyNode> descs = node.getDescendants();
1491 Collections.sort( descs, c );
1493 for( final PhylogenyNode desc : descs ) {
1494 node.setChildNode( i++, desc );
1499 * Removes from Phylogeny to_be_stripped all external Nodes which are
1500 * associated with a species NOT found in Phylogeny reference.
1503 * a reference Phylogeny
1504 * @param to_be_stripped
1505 * Phylogeny to be stripped
1506 * @return nodes removed from to_be_stripped
1508 public static List<PhylogenyNode> taxonomyBasedDeletionOfExternalNodes( final Phylogeny reference,
1509 final Phylogeny to_be_stripped ) {
1510 final Set<String> ref_ext_taxo = new HashSet<String>();
1511 for( final PhylogenyNodeIterator it = reference.iteratorExternalForward(); it.hasNext(); ) {
1512 final PhylogenyNode n = it.next();
1513 if ( !n.getNodeData().isHasTaxonomy() ) {
1514 throw new IllegalArgumentException( "no taxonomic data in node: " + n );
1516 if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) {
1517 ref_ext_taxo.add( n.getNodeData().getTaxonomy().getScientificName() );
1519 if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) {
1520 ref_ext_taxo.add( n.getNodeData().getTaxonomy().getTaxonomyCode() );
1522 if ( ( n.getNodeData().getTaxonomy().getIdentifier() != null )
1523 && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getIdentifier().getValue() ) ) {
1524 ref_ext_taxo.add( n.getNodeData().getTaxonomy().getIdentifier().getValuePlusProvider() );
1527 final ArrayList<PhylogenyNode> nodes_to_delete = new ArrayList<PhylogenyNode>();
1528 for( final PhylogenyNodeIterator it = to_be_stripped.iteratorExternalForward(); it.hasNext(); ) {
1529 final PhylogenyNode n = it.next();
1530 if ( !n.getNodeData().isHasTaxonomy() ) {
1531 nodes_to_delete.add( n );
1533 else if ( !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getScientificName() ) )
1534 && !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getTaxonomyCode() ) )
1535 && !( ( n.getNodeData().getTaxonomy().getIdentifier() != null ) && ref_ext_taxo
1536 .contains( n.getNodeData().getTaxonomy().getIdentifier().getValuePlusProvider() ) ) ) {
1537 nodes_to_delete.add( n );
1540 for( final PhylogenyNode n : nodes_to_delete ) {
1541 to_be_stripped.deleteSubtree( n, true );
1543 to_be_stripped.clearHashIdToNodeMap();
1544 to_be_stripped.externalNodesHaveChanged();
1545 return nodes_to_delete;
1548 final static public void transferInternalNamesToBootstrapSupport( final Phylogeny phy ) {
1549 final PhylogenyNodeIterator it = phy.iteratorPostorder();
1550 while ( it.hasNext() ) {
1551 final PhylogenyNode n = it.next();
1552 if ( !n.isExternal() && !ForesterUtil.isEmpty( n.getName() ) ) {
1555 value = Double.parseDouble( n.getName() );
1557 catch ( final NumberFormatException e ) {
1558 throw new IllegalArgumentException( "failed to parse number from [" + n.getName() + "]: "
1559 + e.getLocalizedMessage() );
1561 if ( value >= 0.0 ) {
1562 n.getBranchData().addConfidence( new Confidence( value, "bootstrap" ) );
1569 final static public boolean isInternalNamesLookLikeConfidences( final Phylogeny phy ) {
1570 final PhylogenyNodeIterator it = phy.iteratorPostorder();
1571 while ( it.hasNext() ) {
1572 final PhylogenyNode n = it.next();
1573 if ( !n.isExternal() && !n.isRoot() ) {
1574 if ( !ForesterUtil.isEmpty( n.getName() ) ) {
1577 value = Double.parseDouble( n.getName() );
1579 catch ( final NumberFormatException e ) {
1582 if ( ( value < 0.0 ) || ( value > 100 ) ) {
1591 final static public void transferInternalNodeNamesToConfidence( final Phylogeny phy,
1592 final String confidence_type ) {
1593 final PhylogenyNodeIterator it = phy.iteratorPostorder();
1594 while ( it.hasNext() ) {
1595 transferInternalNodeNameToConfidence( confidence_type, it.next() );
1599 private static void transferInternalNodeNameToConfidence( final String confidence_type, final PhylogenyNode n ) {
1600 if ( !n.isExternal() && !n.getBranchData().isHasConfidences() ) {
1601 if ( !ForesterUtil.isEmpty( n.getName() ) ) {
1604 d = Double.parseDouble( n.getName() );
1606 catch ( final Exception e ) {
1610 n.getBranchData().addConfidence( new Confidence( d, confidence_type ) );
1617 final static public void transferNodeNameToField( final Phylogeny phy,
1618 final PhylogenyNodeField field,
1619 final boolean external_only )
1620 throws PhyloXmlDataFormatException {
1621 final PhylogenyNodeIterator it = phy.iteratorPostorder();
1622 while ( it.hasNext() ) {
1623 final PhylogenyNode n = it.next();
1624 if ( external_only && n.isInternal() ) {
1627 final String name = n.getName().trim();
1628 if ( !ForesterUtil.isEmpty( name ) ) {
1632 setTaxonomyCode( n, name );
1634 case TAXONOMY_SCIENTIFIC_NAME:
1636 if ( !n.getNodeData().isHasTaxonomy() ) {
1637 n.getNodeData().setTaxonomy( new Taxonomy() );
1639 n.getNodeData().getTaxonomy().setScientificName( name );
1641 case TAXONOMY_COMMON_NAME:
1643 if ( !n.getNodeData().isHasTaxonomy() ) {
1644 n.getNodeData().setTaxonomy( new Taxonomy() );
1646 n.getNodeData().getTaxonomy().setCommonName( name );
1648 case SEQUENCE_SYMBOL:
1650 if ( !n.getNodeData().isHasSequence() ) {
1651 n.getNodeData().setSequence( new Sequence() );
1653 n.getNodeData().getSequence().setSymbol( name );
1657 if ( !n.getNodeData().isHasSequence() ) {
1658 n.getNodeData().setSequence( new Sequence() );
1660 n.getNodeData().getSequence().setName( name );
1662 case TAXONOMY_ID_UNIPROT_1: {
1663 if ( !n.getNodeData().isHasTaxonomy() ) {
1664 n.getNodeData().setTaxonomy( new Taxonomy() );
1667 final int i = name.indexOf( '_' );
1669 id = name.substring( 0, i );
1674 n.getNodeData().getTaxonomy()
1675 .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) );
1678 case TAXONOMY_ID_UNIPROT_2: {
1679 if ( !n.getNodeData().isHasTaxonomy() ) {
1680 n.getNodeData().setTaxonomy( new Taxonomy() );
1683 final int i = name.indexOf( '_' );
1685 id = name.substring( i + 1, name.length() );
1690 n.getNodeData().getTaxonomy()
1691 .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) );
1695 if ( !n.getNodeData().isHasTaxonomy() ) {
1696 n.getNodeData().setTaxonomy( new Taxonomy() );
1698 n.getNodeData().getTaxonomy().setIdentifier( new Identifier( name ) );
1702 throw new IllegalArgumentException( "don't know what to do with " + field );
1709 static double addPhylogenyDistances( final double a, final double b ) {
1710 if ( ( a >= 0.0 ) && ( b >= 0.0 ) ) {
1713 else if ( a >= 0.0 ) {
1716 else if ( b >= 0.0 ) {
1719 return PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT;
1722 static double calculateDistanceToAncestor( final PhylogenyNode anc, PhylogenyNode desc ) {
1724 boolean all_default = true;
1725 while ( anc != desc ) {
1726 if ( desc.getDistanceToParent() != PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ) {
1727 d += desc.getDistanceToParent();
1728 if ( all_default ) {
1729 all_default = false;
1732 desc = desc.getParent();
1734 if ( all_default ) {
1735 return PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT;
1740 public static double calculateAverageTreeHeight( final PhylogenyNode node ) {
1741 final List<PhylogenyNode> ext = node.getAllExternalDescendants();
1743 for( PhylogenyNode n : ext ) {
1744 while ( n != node ) {
1745 if ( n.getDistanceToParent() > 0 ) {
1746 s += n.getDistanceToParent();
1751 return s / ext.size();
1755 * Deep copies the phylogeny originating from this node.
1757 static PhylogenyNode copySubTree( final PhylogenyNode source ) {
1758 if ( source == null ) {
1762 final PhylogenyNode newnode = source.copyNodeData();
1763 if ( !source.isExternal() ) {
1764 for( int i = 0; i < source.getNumberOfDescendants(); ++i ) {
1765 newnode.setChildNode( i, PhylogenyMethods.copySubTree( source.getChildNode( i ) ) );
1773 * Shallow copies the phylogeny originating from this node.
1775 static PhylogenyNode copySubTreeShallow( final PhylogenyNode source ) {
1776 if ( source == null ) {
1780 final PhylogenyNode newnode = source.copyNodeDataShallow();
1781 if ( !source.isExternal() ) {
1782 for( int i = 0; i < source.getNumberOfDescendants(); ++i ) {
1783 newnode.setChildNode( i, PhylogenyMethods.copySubTreeShallow( source.getChildNode( i ) ) );
1790 private final static List<PhylogenyNode> divideIntoSubTreesHelper( final PhylogenyNode node,
1791 final double min_distance_to_root ) {
1792 final List<PhylogenyNode> l = new ArrayList<PhylogenyNode>();
1793 final PhylogenyNode r = moveTowardsRoot( node, min_distance_to_root );
1794 for( final PhylogenyNode ext : r.getAllExternalDescendants() ) {
1795 if ( ext.getIndicator() != 0 ) {
1796 throw new RuntimeException( "this should not have happened" );
1798 ext.setIndicator( ( byte ) 1 );
1805 * Calculates the distance between PhylogenyNodes n1 and n2.
1806 * PRECONDITION: n1 is a descendant of n2.
1809 * a descendant of n2
1811 * @return distance between n1 and n2
1813 private static double getDistance( PhylogenyNode n1, final PhylogenyNode n2 ) {
1815 while ( n1 != n2 ) {
1816 if ( n1.getDistanceToParent() > 0.0 ) {
1817 d += n1.getDistanceToParent();
1819 n1 = n1.getParent();
1824 private static boolean match( final String s,
1826 final boolean case_sensitive,
1827 final boolean partial,
1828 final boolean regex ) {
1829 if ( ForesterUtil.isEmpty( s ) || ForesterUtil.isEmpty( query ) ) {
1832 String my_s = s.trim();
1833 String my_query = query.trim();
1834 if ( !case_sensitive && !regex ) {
1835 my_s = my_s.toLowerCase();
1836 my_query = my_query.toLowerCase();
1841 if ( case_sensitive ) {
1842 p = Pattern.compile( my_query );
1845 p = Pattern.compile( my_query, Pattern.CASE_INSENSITIVE );
1848 catch ( final PatternSyntaxException e ) {
1852 return p.matcher( my_s ).find();
1858 else if ( partial ) {
1859 return my_s.indexOf( my_query ) >= 0;
1864 p = Pattern.compile( "(\\b|_)" + Pattern.quote( my_query ) + "(\\b|_)" );
1866 catch ( final PatternSyntaxException e ) {
1870 return p.matcher( my_s ).find();
1878 private final static PhylogenyNode moveTowardsRoot( final PhylogenyNode node, final double min_distance_to_root ) {
1879 PhylogenyNode n = node;
1880 PhylogenyNode prev = node;
1881 while ( min_distance_to_root < n.calculateDistanceToRoot() ) {
1888 public static enum DESCENDANT_SORT_PRIORITY {
1894 public static enum PhylogenyNodeField {
1899 TAXONOMY_COMMON_NAME,
1901 TAXONOMY_ID_UNIPROT_1,
1902 TAXONOMY_ID_UNIPROT_2,
1903 TAXONOMY_SCIENTIFIC_NAME;
1906 public static void addMolecularSeqsToTree( final Phylogeny phy, final Msa msa ) {
1907 for( int s = 0; s < msa.getNumberOfSequences(); ++s ) {
1908 final org.forester.sequence.MolecularSequence seq = msa.getSequence( s );
1909 final PhylogenyNode node = phy.getNode( seq.getIdentifier() );
1910 final org.forester.phylogeny.data.Sequence new_seq = new Sequence();
1911 new_seq.setMolecularSequenceAligned( true );
1912 new_seq.setMolecularSequence( seq.getMolecularSequenceAsString() );
1913 new_seq.setName( seq.getIdentifier() );
1915 new_seq.setType( PhyloXmlUtil.SEQ_TYPE_PROTEIN );
1917 catch ( final PhyloXmlDataFormatException ignore ) {
1920 node.getNodeData().addSequence( new_seq );
1924 final private static class PhylogenyNodeSortTaxonomyPriority implements Comparator<PhylogenyNode> {
1927 public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) {
1928 if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) {
1929 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) )
1930 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) {
1931 return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase()
1932 .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() );
1934 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) )
1935 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
1936 return n1.getNodeData().getTaxonomy().getTaxonomyCode()
1937 .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() );
1940 if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) {
1941 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) )
1942 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) {
1943 return n1.getNodeData().getSequence().getName().toLowerCase()
1944 .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() );
1946 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) )
1947 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) {
1948 return n1.getNodeData().getSequence().getGeneName()
1949 .compareTo( n2.getNodeData().getSequence().getGeneName() );
1951 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) )
1952 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) {
1953 return n1.getNodeData().getSequence().getSymbol()
1954 .compareTo( n2.getNodeData().getSequence().getSymbol() );
1957 if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) {
1958 return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() );
1964 final private static class PhylogenyNodeSortSequencePriority implements Comparator<PhylogenyNode> {
1967 public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) {
1968 if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) {
1969 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) )
1970 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) {
1971 return n1.getNodeData().getSequence().getName().toLowerCase()
1972 .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() );
1974 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) )
1975 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) {
1976 return n1.getNodeData().getSequence().getGeneName()
1977 .compareTo( n2.getNodeData().getSequence().getGeneName() );
1979 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) )
1980 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) {
1981 return n1.getNodeData().getSequence().getSymbol()
1982 .compareTo( n2.getNodeData().getSequence().getSymbol() );
1985 if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) {
1986 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) )
1987 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) {
1988 return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase()
1989 .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() );
1991 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) )
1992 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
1993 return n1.getNodeData().getTaxonomy().getTaxonomyCode()
1994 .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() );
1997 if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) {
1998 return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() );
2004 final private static class PhylogenyNodeSortNodeNamePriority implements Comparator<PhylogenyNode> {
2007 public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) {
2008 if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) {
2009 return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() );
2011 if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) {
2012 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) )
2013 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) {
2014 return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase()
2015 .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() );
2017 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) )
2018 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
2019 return n1.getNodeData().getTaxonomy().getTaxonomyCode()
2020 .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() );
2023 if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) {
2024 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) )
2025 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) {
2026 return n1.getNodeData().getSequence().getName().toLowerCase()
2027 .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() );
2029 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) )
2030 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) {
2031 return n1.getNodeData().getSequence().getGeneName()
2032 .compareTo( n2.getNodeData().getSequence().getGeneName() );
2034 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) )
2035 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) {
2036 return n1.getNodeData().getSequence().getSymbol()
2037 .compareTo( n2.getNodeData().getSequence().getSymbol() );
2044 public final static Map<Long, Integer> calculateDepths( final Phylogeny phy ) {
2045 final Map<Long, Integer> depths = new HashMap<Long, Integer>();
2046 calculateDepthsHelper( phy.getRoot(), 0, depths );
2050 private final static void calculateDepthsHelper( final PhylogenyNode n, int d, final Map<Long, Integer> depths ) {
2051 depths.put( n.getId(), d );
2053 final List<PhylogenyNode> descs = n.getDescendants();
2054 for( final PhylogenyNode desc : descs ) {
2055 calculateDepthsHelper( desc, d, depths );
2059 public final static void collapseToDepth( final Phylogeny phy, final int depth ) {
2060 if ( phy.getNumberOfExternalNodes() < 3 ) {
2063 collapseToDepthHelper( phy.getRoot(), 0, depth );
2066 private final static void collapseToDepthHelper( final PhylogenyNode n, int d, final int depth ) {
2067 if ( n.isExternal() ) {
2068 n.setCollapse( false );
2072 n.setCollapse( true );
2073 final PhylogenyNodeIterator it = new PreorderTreeIterator( n );
2074 while ( it.hasNext() ) {
2075 it.next().setCollapse( true );
2079 n.setCollapse( false );
2081 final List<PhylogenyNode> descs = n.getDescendants();
2082 for( final PhylogenyNode desc : descs ) {
2083 collapseToDepthHelper( desc, d, depth );
2090 public final static void collapseToRank( final Phylogeny phy, final int rank ) {
2091 if ( phy.getNumberOfExternalNodes() < 3 ) {
2094 if ( rank < 0 || rank >= TaxonomyUtil.RANKS.length ) {
2095 throw new IllegalArgumentException( "Rank " + rank + " is out of range" );
2097 collapseToRankHelper( phy.getRoot(), rank );
2100 private final static void collapseToRankHelper( final PhylogenyNode n, final int target_rank ) {
2101 if ( n.isExternal() ) {
2102 n.setCollapse( false );
2105 if ( ( n.getNodeData().getTaxonomy() != null )
2106 && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getRank() ) ) {
2107 final String current_rank = n.getNodeData().getTaxonomy().getRank();
2108 if ( !TaxonomyUtil.RANK_TO_INT.containsKey( current_rank ) ) {
2109 System.out.println( "Don't know rank \"" + current_rank + "\", ignoring." );
2112 if ( TaxonomyUtil.RANK_TO_INT.get( current_rank ) >= target_rank ) {
2113 n.setCollapse( true );
2115 final PhylogenyNodeIterator it = new PreorderTreeIterator( n );
2116 while ( it.hasNext() ) {
2117 it.next().setCollapse( true );
2123 n.setCollapse( false );
2124 final List<PhylogenyNode> descs = n.getDescendants();
2125 for( final PhylogenyNode desc : descs ) {
2126 collapseToRankHelper( desc, target_rank );
2130 public final static PhylogenyNode getFirstExternalNode( final PhylogenyNode node ) {
2131 PhylogenyNode n = node;
2132 while ( n.isInternal() ) {
2133 n = n.getFirstChildNode();
2138 public final static PhylogenyNode getLastExternalNode( final PhylogenyNode node ) {
2139 PhylogenyNode n = node;
2140 while ( n.isInternal() ) {
2141 n = n.getLastChildNode();
2146 public final static boolean isHasCollapsedNodes( final Phylogeny phy ) {
2147 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
2148 final PhylogenyNode n = iter.next();
2149 if ( !n.isExternal() && ( n.isCollapse() ) ) {