2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
26 package org.forester.phylogeny;
28 import java.awt.Color;
30 import java.io.IOException;
31 import java.util.ArrayList;
32 import java.util.Arrays;
33 import java.util.Collections;
34 import java.util.Comparator;
35 import java.util.HashMap;
36 import java.util.HashSet;
37 import java.util.Iterator;
38 import java.util.List;
41 import java.util.regex.Matcher;
42 import java.util.regex.Pattern;
43 import java.util.regex.PatternSyntaxException;
45 import org.forester.io.parsers.FastaParser;
46 import org.forester.io.parsers.PhylogenyParser;
47 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
48 import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
49 import org.forester.io.parsers.util.PhylogenyParserException;
50 import org.forester.msa.Msa;
51 import org.forester.phylogeny.data.Accession;
52 import org.forester.phylogeny.data.Annotation;
53 import org.forester.phylogeny.data.BranchColor;
54 import org.forester.phylogeny.data.BranchWidth;
55 import org.forester.phylogeny.data.Confidence;
56 import org.forester.phylogeny.data.DomainArchitecture;
57 import org.forester.phylogeny.data.Event;
58 import org.forester.phylogeny.data.Identifier;
59 import org.forester.phylogeny.data.PhylogenyDataUtil;
60 import org.forester.phylogeny.data.Sequence;
61 import org.forester.phylogeny.data.Taxonomy;
62 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
63 import org.forester.phylogeny.factories.PhylogenyFactory;
64 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
65 import org.forester.phylogeny.iterators.PreorderTreeIterator;
66 import org.forester.util.BasicDescriptiveStatistics;
67 import org.forester.util.DescriptiveStatistics;
68 import org.forester.util.ForesterUtil;
69 import org.forester.util.TaxonomyUtil;
71 public class PhylogenyMethods {
73 private static boolean _order_changed;
75 private PhylogenyMethods() {
76 // Hidden constructor.
80 public Object clone() throws CloneNotSupportedException {
81 throw new CloneNotSupportedException();
84 public static boolean extractFastaInformation( final Phylogeny phy ) {
85 boolean could_extract = false;
86 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
87 final PhylogenyNode node = iter.next();
88 if ( !ForesterUtil.isEmpty( node.getName() ) ) {
89 final Matcher name_m = FastaParser.FASTA_DESC_LINE.matcher( node.getName() );
90 if ( name_m.lookingAt() ) {
92 final String acc_source = name_m.group( 1 );
93 final String acc = name_m.group( 2 );
94 final String seq_name = name_m.group( 3 );
95 final String tax_sn = name_m.group( 4 );
96 if ( !ForesterUtil.isEmpty( acc_source ) && !ForesterUtil.isEmpty( acc ) ) {
97 ForesterUtil.ensurePresenceOfSequence( node );
98 node.getNodeData().getSequence( 0 ).setAccession( new Accession( acc, acc_source ) );
100 if ( !ForesterUtil.isEmpty( seq_name ) ) {
101 ForesterUtil.ensurePresenceOfSequence( node );
102 node.getNodeData().getSequence( 0 ).setName( seq_name );
104 if ( !ForesterUtil.isEmpty( tax_sn ) ) {
105 ForesterUtil.ensurePresenceOfTaxonomy( node );
106 node.getNodeData().getTaxonomy( 0 ).setScientificName( tax_sn );
111 return could_extract;
114 public static DescriptiveStatistics calculateBranchLengthStatistics( final Phylogeny phy ) {
115 final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
116 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
117 final PhylogenyNode n = iter.next();
118 if ( !n.isRoot() && ( n.getDistanceToParent() >= 0.0 ) ) {
119 stats.addValue( n.getDistanceToParent() );
125 public static List<DescriptiveStatistics> calculateConfidenceStatistics( final Phylogeny phy ) {
126 final List<DescriptiveStatistics> stats = new ArrayList<DescriptiveStatistics>();
127 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
128 final PhylogenyNode n = iter.next();
129 if ( !n.isExternal() && !n.isRoot() ) {
130 if ( n.getBranchData().isHasConfidences() ) {
131 for( int i = 0; i < n.getBranchData().getConfidences().size(); ++i ) {
132 final Confidence c = n.getBranchData().getConfidences().get( i );
133 if ( ( i > ( stats.size() - 1 ) ) || ( stats.get( i ) == null ) ) {
134 stats.add( i, new BasicDescriptiveStatistics() );
136 if ( !ForesterUtil.isEmpty( c.getType() ) ) {
137 if ( !ForesterUtil.isEmpty( stats.get( i ).getDescription() ) ) {
138 if ( !stats.get( i ).getDescription().equalsIgnoreCase( c.getType() ) ) {
139 throw new IllegalArgumentException( "support values in node [" + n.toString()
140 + "] appear inconsistently ordered" );
143 stats.get( i ).setDescription( c.getType() );
145 stats.get( i ).addValue( ( ( c != null ) && ( c.getValue() >= 0 ) ) ? c.getValue() : 0 );
154 * For external nodes the level is 0.
159 public static int calculateLevel( final PhylogenyNode node ) {
160 if ( node.isExternal() ) {
164 for( PhylogenyNode ext : node.getAllExternalDescendants() ) {
166 while ( ext != node ) {
167 ext = ext.getParent();
170 if ( counter > level ) {
178 * Calculates the distance between PhylogenyNodes node1 and node2.
183 * @return distance between node1 and node2
185 public static double calculateDistance( final PhylogenyNode node1, final PhylogenyNode node2 ) {
186 final PhylogenyNode lca = calculateLCA( node1, node2 );
187 final PhylogenyNode n1 = node1;
188 final PhylogenyNode n2 = node2;
189 return ( PhylogenyMethods.getDistance( n1, lca ) + PhylogenyMethods.getDistance( n2, lca ) );
193 * Returns the LCA of PhylogenyNodes node1 and node2.
198 * @return LCA of node1 and node2
200 public final static PhylogenyNode calculateLCA( PhylogenyNode node1, PhylogenyNode node2 ) {
201 if ( node1 == null ) {
202 throw new IllegalArgumentException( "first argument (node) is null" );
204 if ( node2 == null ) {
205 throw new IllegalArgumentException( "second argument (node) is null" );
207 if ( node1 == node2 ) {
210 if ( ( node1.getParent() == node2.getParent() ) ) {
211 return node1.getParent();
213 int depth1 = node1.calculateDepth();
214 int depth2 = node2.calculateDepth();
215 while ( ( depth1 > -1 ) && ( depth2 > -1 ) ) {
216 if ( depth1 > depth2 ) {
217 node1 = node1.getParent();
220 else if ( depth2 > depth1 ) {
221 node2 = node2.getParent();
225 if ( node1 == node2 ) {
228 node1 = node1.getParent();
229 node2 = node2.getParent();
234 throw new IllegalArgumentException( "illegal attempt to calculate LCA of two nodes which do not share a common root" );
238 * Returns the LCA of PhylogenyNodes node1 and node2.
239 * Precondition: ids are in pre-order (or level-order).
244 * @return LCA of node1 and node2
246 public final static PhylogenyNode calculateLCAonTreeWithIdsInPreOrder( PhylogenyNode node1, PhylogenyNode node2 ) {
247 if ( node1 == null ) {
248 throw new IllegalArgumentException( "first argument (node) is null" );
250 if ( node2 == null ) {
251 throw new IllegalArgumentException( "second argument (node) is null" );
253 while ( node1 != node2 ) {
254 if ( node1.getId() > node2.getId() ) {
255 node1 = node1.getParent();
258 node2 = node2.getParent();
266 public static int calculateMaxDepth( final Phylogeny phy ) {
268 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
269 final PhylogenyNode node = iter.next();
270 final int steps = node.calculateDepth();
278 public static String[] obtainPresentRanksSorted( final Phylogeny phy ) {
279 final Set<String> present_ranks = new HashSet<String>();
280 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
281 final PhylogenyNode node = iter.next();
282 if ( !node.isExternal() && !node.isRoot() && ( node.getNodeData().getTaxonomy() != null )
283 && !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getRank() ) ) {
284 final String current_rank = node.getNodeData().getTaxonomy().getRank();
285 if ( TaxonomyUtil.RANK_TO_INT.containsKey( current_rank ) ) {
286 present_ranks.add( current_rank );
290 final String ordered_ranks[] = new String[ present_ranks.size() + 1 ];
292 for( final String rank : TaxonomyUtil.RANKS ) {
293 if ( present_ranks.contains( rank ) ) {
294 ordered_ranks[ c++ ] = rank;
297 ordered_ranks[ c ] = "off";
298 return ordered_ranks;
301 public static int calculateMaxDepthConsiderCollapsed( final Phylogeny phy ) {
303 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
304 PhylogenyNode n = iter.next();
306 while ( n.getParent() != null ) {
307 if ( !n.isCollapse() ) {
319 public static double calculateMaxDistanceToRoot( final Phylogeny phy ) {
321 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
322 final PhylogenyNode node = iter.next();
323 final double d = node.calculateDistanceToRoot();
331 public static PhylogenyNode calculateNodeWithMaxDistanceToRoot( final Phylogeny phy ) {
333 PhylogenyNode max_node = phy.getFirstExternalNode();
334 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
335 final PhylogenyNode node = iter.next();
336 final double d = node.calculateDistanceToRoot();
345 public static int calculateNumberOfExternalNodesWithoutTaxonomy( final PhylogenyNode node ) {
346 final List<PhylogenyNode> descs = node.getAllExternalDescendants();
348 for( final PhylogenyNode n : descs ) {
349 if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
356 public static DescriptiveStatistics calculateNumberOfDescendantsPerNodeStatistics( final Phylogeny phy ) {
357 final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
358 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
359 final PhylogenyNode n = iter.next();
360 if ( !n.isExternal() ) {
361 stats.addValue( n.getNumberOfDescendants() );
367 public final static void collapseSubtreeStructure( final PhylogenyNode n ) {
368 final List<PhylogenyNode> eds = n.getAllExternalDescendants();
369 final List<Double> d = new ArrayList<Double>();
370 for( final PhylogenyNode ed : eds ) {
371 d.add( calculateDistanceToAncestor( n, ed ) );
373 for( int i = 0; i < eds.size(); ++i ) {
374 n.setChildNode( i, eds.get( i ) );
375 eds.get( i ).setDistanceToParent( d.get( i ) );
379 public static int countNumberOfOneDescendantNodes( final Phylogeny phy ) {
381 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
382 final PhylogenyNode n = iter.next();
383 if ( !n.isExternal() && ( n.getNumberOfDescendants() == 1 ) ) {
390 public static int countNumberOfPolytomies( final Phylogeny phy ) {
392 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
393 final PhylogenyNode n = iter.next();
394 if ( !n.isExternal() && ( n.getNumberOfDescendants() > 2 ) ) {
401 public static final HashMap<String, PhylogenyNode> createNameToExtNodeMap( final Phylogeny phy ) {
402 final HashMap<String, PhylogenyNode> nodes = new HashMap<String, PhylogenyNode>();
403 final List<PhylogenyNode> ext = phy.getExternalNodes();
404 for( final PhylogenyNode n : ext ) {
405 nodes.put( n.getName(), n );
410 public static void deleteExternalNodesNegativeSelection( final Set<Long> to_delete, final Phylogeny phy ) {
411 for( final Long id : to_delete ) {
412 phy.deleteSubtree( phy.getNode( id ), true );
414 phy.clearHashIdToNodeMap();
415 phy.externalNodesHaveChanged();
418 public static void deleteExternalNodesNegativeSelection( final String[] node_names_to_delete, final Phylogeny p )
419 throws IllegalArgumentException {
420 for( final String element : node_names_to_delete ) {
421 if ( ForesterUtil.isEmpty( element ) ) {
424 List<PhylogenyNode> nodes = null;
425 nodes = p.getNodes( element );
426 final Iterator<PhylogenyNode> it = nodes.iterator();
427 while ( it.hasNext() ) {
428 final PhylogenyNode n = it.next();
429 if ( !n.isExternal() ) {
430 throw new IllegalArgumentException( "attempt to delete non-external node \"" + element + "\"" );
432 p.deleteSubtree( n, true );
435 p.clearHashIdToNodeMap();
436 p.externalNodesHaveChanged();
439 public static List<String> deleteExternalNodesPositiveSelection( final String[] node_names_to_keep,
440 final Phylogeny p ) {
441 final PhylogenyNodeIterator it = p.iteratorExternalForward();
442 final String[] to_delete = new String[ p.getNumberOfExternalNodes() ];
444 Arrays.sort( node_names_to_keep );
445 while ( it.hasNext() ) {
446 final String curent_name = it.next().getName();
447 if ( Arrays.binarySearch( node_names_to_keep, curent_name ) < 0 ) {
448 to_delete[ i++ ] = curent_name;
451 PhylogenyMethods.deleteExternalNodesNegativeSelection( to_delete, p );
452 final List<String> deleted = new ArrayList<String>();
453 for( final String n : to_delete ) {
454 if ( !ForesterUtil.isEmpty( n ) ) {
461 public static void deleteExternalNodesPositiveSelectionT( final List<Taxonomy> species_to_keep,
462 final Phylogeny phy ) {
463 final Set<Long> to_delete = new HashSet<Long>();
464 for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
465 final PhylogenyNode n = it.next();
466 if ( n.getNodeData().isHasTaxonomy() ) {
467 if ( !species_to_keep.contains( n.getNodeData().getTaxonomy() ) ) {
468 to_delete.add( n.getId() );
472 throw new IllegalArgumentException( "node " + n.getId() + " has no taxonomic data" );
475 deleteExternalNodesNegativeSelection( to_delete, phy );
478 final public static void deleteInternalNodesWithOnlyOneDescendent( final Phylogeny phy ) {
479 final ArrayList<PhylogenyNode> to_delete = new ArrayList<PhylogenyNode>();
480 for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) {
481 final PhylogenyNode n = iter.next();
482 if ( ( !n.isExternal() ) && ( n.getNumberOfDescendants() == 1 ) ) {
486 for( final PhylogenyNode d : to_delete ) {
487 PhylogenyMethods.removeNode( d, phy );
489 phy.clearHashIdToNodeMap();
490 phy.externalNodesHaveChanged();
493 final public static void deleteNonOrthologousExternalNodes( final Phylogeny phy, final PhylogenyNode n ) {
494 if ( n.isInternal() ) {
495 throw new IllegalArgumentException( "node is not external" );
497 final ArrayList<PhylogenyNode> to_delete = new ArrayList<PhylogenyNode>();
498 for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
499 final PhylogenyNode i = it.next();
500 if ( !PhylogenyMethods.getEventAtLCA( n, i ).isSpeciation() ) {
504 for( final PhylogenyNode d : to_delete ) {
505 phy.deleteSubtree( d, true );
507 phy.clearHashIdToNodeMap();
508 phy.externalNodesHaveChanged();
511 public final static List<List<PhylogenyNode>> divideIntoSubTrees( final Phylogeny phy,
512 final double min_distance_to_root ) {
513 if ( min_distance_to_root <= 0 ) {
514 throw new IllegalArgumentException( "attempt to use min distance to root of: " + min_distance_to_root );
516 final List<List<PhylogenyNode>> l = new ArrayList<List<PhylogenyNode>>();
517 setAllIndicatorsToZero( phy );
518 for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
519 final PhylogenyNode n = it.next();
520 if ( n.getIndicator() != 0 ) {
523 l.add( divideIntoSubTreesHelper( n, min_distance_to_root ) );
525 throw new RuntimeException( "this should not have happened" );
531 public static List<PhylogenyNode> getAllDescendants( final PhylogenyNode node ) {
532 final List<PhylogenyNode> descs = new ArrayList<PhylogenyNode>();
533 final Set<Long> encountered = new HashSet<Long>();
534 if ( !node.isExternal() ) {
535 final List<PhylogenyNode> exts = node.getAllExternalDescendants();
536 for( PhylogenyNode current : exts ) {
537 descs.add( current );
538 while ( current != node ) {
539 current = current.getParent();
540 if ( encountered.contains( current.getId() ) ) {
543 descs.add( current );
544 encountered.add( current.getId() );
558 public static Color getBranchColorValue( final PhylogenyNode node ) {
559 if ( node.getBranchData().getBranchColor() == null ) {
562 return node.getBranchData().getBranchColor().getValue();
568 public static double getBranchWidthValue( final PhylogenyNode node ) {
569 if ( !node.getBranchData().isHasBranchWidth() ) {
570 return BranchWidth.BRANCH_WIDTH_DEFAULT_VALUE;
572 return node.getBranchData().getBranchWidth().getValue();
578 public static double getConfidenceValue( final PhylogenyNode node ) {
579 if ( !node.getBranchData().isHasConfidences() ) {
580 return Confidence.CONFIDENCE_DEFAULT_VALUE;
582 return node.getBranchData().getConfidence( 0 ).getValue();
588 public static double[] getConfidenceValuesAsArray( final PhylogenyNode node ) {
589 if ( !node.getBranchData().isHasConfidences() ) {
590 return new double[ 0 ];
592 final double[] values = new double[ node.getBranchData().getConfidences().size() ];
594 for( final Confidence c : node.getBranchData().getConfidences() ) {
595 values[ i++ ] = c.getValue();
600 final public static Event getEventAtLCA( final PhylogenyNode n1, final PhylogenyNode n2 ) {
601 return calculateLCA( n1, n2 ).getNodeData().getEvent();
605 * Returns taxonomy t if all external descendants have
606 * the same taxonomy t, null otherwise.
609 public static Taxonomy getExternalDescendantsTaxonomy( final PhylogenyNode node ) {
610 final List<PhylogenyNode> descs = node.getAllExternalDescendants();
612 for( final PhylogenyNode n : descs ) {
613 if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
616 else if ( tax == null ) {
617 tax = n.getNodeData().getTaxonomy();
619 else if ( n.getNodeData().getTaxonomy().isEmpty() || !tax.isEqual( n.getNodeData().getTaxonomy() ) ) {
626 public static PhylogenyNode getFurthestDescendant( final PhylogenyNode node ) {
627 final List<PhylogenyNode> children = node.getAllExternalDescendants();
628 PhylogenyNode farthest = null;
629 double longest = -Double.MAX_VALUE;
630 for( final PhylogenyNode child : children ) {
631 if ( PhylogenyMethods.getDistance( child, node ) > longest ) {
633 longest = PhylogenyMethods.getDistance( child, node );
639 // public static PhylogenyMethods getInstance() {
640 // if ( PhylogenyMethods._instance == null ) {
641 // PhylogenyMethods._instance = new PhylogenyMethods();
643 // return PhylogenyMethods._instance;
646 * Returns the largest confidence value found on phy.
648 static public double getMaximumConfidenceValue( final Phylogeny phy ) {
649 double max = -Double.MAX_VALUE;
650 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
651 final double s = PhylogenyMethods.getConfidenceValue( iter.next() );
652 if ( ( s != Confidence.CONFIDENCE_DEFAULT_VALUE ) && ( s > max ) ) {
659 static public int getMinimumDescendentsPerInternalNodes( final Phylogeny phy ) {
660 int min = Integer.MAX_VALUE;
663 for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) {
665 if ( n.isInternal() ) {
666 d = n.getNumberOfDescendants();
676 * Convenience method for display purposes.
677 * Not intended for algorithms.
679 public static String getSpecies( final PhylogenyNode node ) {
680 if ( !node.getNodeData().isHasTaxonomy() ) {
683 else if ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getScientificName() ) ) {
684 return node.getNodeData().getTaxonomy().getScientificName();
686 if ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getTaxonomyCode() ) ) {
687 return node.getNodeData().getTaxonomy().getTaxonomyCode();
690 return node.getNodeData().getTaxonomy().getCommonName();
695 * Convenience method for display purposes.
696 * Not intended for algorithms.
698 public static String getTaxonomyIdentifier( final PhylogenyNode node ) {
699 if ( !node.getNodeData().isHasTaxonomy() || ( node.getNodeData().getTaxonomy().getIdentifier() == null ) ) {
702 return node.getNodeData().getTaxonomy().getIdentifier().getValue();
705 public final static boolean isAllDecendentsAreDuplications( final PhylogenyNode n ) {
706 if ( n.isExternal() ) {
710 if ( n.isDuplication() ) {
711 for( final PhylogenyNode desc : n.getDescendants() ) {
712 if ( !isAllDecendentsAreDuplications( desc ) ) {
724 public static boolean isHasExternalDescendant( final PhylogenyNode node ) {
725 for( int i = 0; i < node.getNumberOfDescendants(); ++i ) {
726 if ( node.getChildNode( i ).isExternal() ) {
734 * This is case insensitive.
737 public synchronized static boolean isTaxonomyHasIdentifierOfGivenProvider( final Taxonomy tax,
738 final String[] providers ) {
739 if ( ( tax.getIdentifier() != null ) && !ForesterUtil.isEmpty( tax.getIdentifier().getProvider() ) ) {
740 final String my_tax_prov = tax.getIdentifier().getProvider();
741 for( final String provider : providers ) {
742 if ( provider.equalsIgnoreCase( my_tax_prov ) ) {
753 public static void midpointRoot( final Phylogeny phylogeny ) {
754 if ( ( phylogeny.getNumberOfExternalNodes() < 2 ) || ( calculateMaxDistanceToRoot( phylogeny ) <= 0 ) ) {
758 final int total_nodes = phylogeny.getNodeCount();
760 if ( ++counter > total_nodes ) {
761 throw new RuntimeException( "this should not have happened: midpoint rooting does not converge" );
763 PhylogenyNode a = null;
766 for( int i = 0; i < phylogeny.getRoot().getNumberOfDescendants(); ++i ) {
767 final PhylogenyNode f = getFurthestDescendant( phylogeny.getRoot().getChildNode( i ) );
768 final double df = getDistance( f, phylogeny.getRoot() );
775 else if ( df > db ) {
780 final double diff = da - db;
781 if ( diff < 0.000001 ) {
784 double x = da - ( diff / 2.0 );
785 while ( ( x > a.getDistanceToParent() ) && !a.isRoot() ) {
786 x -= ( a.getDistanceToParent() > 0 ? a.getDistanceToParent() : 0 );
789 phylogeny.reRoot( a, x );
791 phylogeny.recalculateNumberOfExternalDescendants( true );
794 public static void normalizeBootstrapValues( final Phylogeny phylogeny,
795 final double max_bootstrap_value,
796 final double max_normalized_value ) {
797 for( final PhylogenyNodeIterator iter = phylogeny.iteratorPreorder(); iter.hasNext(); ) {
798 final PhylogenyNode node = iter.next();
799 if ( node.isInternal() ) {
800 final double confidence = getConfidenceValue( node );
801 if ( confidence != Confidence.CONFIDENCE_DEFAULT_VALUE ) {
802 if ( confidence >= max_bootstrap_value ) {
803 setBootstrapConfidence( node, max_normalized_value );
806 setBootstrapConfidence( node, ( confidence * max_normalized_value ) / max_bootstrap_value );
813 public static List<PhylogenyNode> obtainAllNodesAsList( final Phylogeny phy ) {
814 final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
815 if ( phy.isEmpty() ) {
818 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
819 nodes.add( iter.next() );
825 * Returns a map of distinct taxonomies of
826 * all external nodes of node.
827 * If at least one of the external nodes has no taxonomy,
831 public static Map<Taxonomy, Integer> obtainDistinctTaxonomyCounts( final PhylogenyNode node ) {
832 final List<PhylogenyNode> descs = node.getAllExternalDescendants();
833 final Map<Taxonomy, Integer> tax_map = new HashMap<Taxonomy, Integer>();
834 for( final PhylogenyNode n : descs ) {
835 if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
838 final Taxonomy t = n.getNodeData().getTaxonomy();
839 if ( tax_map.containsKey( t ) ) {
840 tax_map.put( t, tax_map.get( t ) + 1 );
850 * Arranges the order of childern for each node of this Phylogeny in such a
851 * way that either the branch with more children is on top (right) or on
852 * bottom (left), dependent on the value of boolean order.
855 * decides in which direction to order
858 public static void orderAppearance( final PhylogenyNode n,
860 final boolean order_ext_alphabetically,
861 final DESCENDANT_SORT_PRIORITY pri ) {
862 if ( n.isExternal() ) {
866 if ( ( n.getNumberOfDescendants() == 2 )
867 && ( n.getChildNode1().getNumberOfExternalNodes() != n.getChildNode2().getNumberOfExternalNodes() )
868 && ( ( n.getChildNode1().getNumberOfExternalNodes() < n.getChildNode2()
869 .getNumberOfExternalNodes() ) == order ) ) {
870 final PhylogenyNode temp = n.getChildNode1();
871 n.setChild1( n.getChildNode2() );
873 _order_changed = true;
875 else if ( order_ext_alphabetically ) {
876 boolean all_ext = true;
877 for( final PhylogenyNode i : n.getDescendants() ) {
878 if ( !i.isExternal() ) {
884 PhylogenyMethods.sortNodeDescendents( n, pri );
887 for( int i = 0; i < n.getNumberOfDescendants(); ++i ) {
888 orderAppearance( n.getChildNode( i ), order, order_ext_alphabetically, pri );
893 public synchronized static void orderAppearanceX( final PhylogenyNode n,
894 final boolean order_ext_alphabetically,
895 final DESCENDANT_SORT_PRIORITY pri ) {
896 if ( n.isExternal() ) {
900 _order_changed = false;
901 orderAppearance( n, true, order_ext_alphabetically, pri );
902 if ( !_order_changed ) {
903 orderAppearance( n, false, order_ext_alphabetically, pri );
908 public static void postorderBranchColorAveragingExternalNodeBased( final Phylogeny p ) {
909 for( final PhylogenyNodeIterator iter = p.iteratorPostorder(); iter.hasNext(); ) {
910 final PhylogenyNode node = iter.next();
915 if ( node.isInternal() ) {
916 //for( final PhylogenyNodeIterator iterator = node.iterateChildNodesForward(); iterator.hasNext(); ) {
917 for( int i = 0; i < node.getNumberOfDescendants(); ++i ) {
918 final PhylogenyNode child_node = node.getChildNode( i );
919 final Color child_color = getBranchColorValue( child_node );
920 if ( child_color != null ) {
922 red += child_color.getRed();
923 green += child_color.getGreen();
924 blue += child_color.getBlue();
927 setBranchColorValue( node,
928 new Color( ForesterUtil.roundToInt( red / n ),
929 ForesterUtil.roundToInt( green / n ),
930 ForesterUtil.roundToInt( blue / n ) ) );
935 public static final void preOrderReId( final Phylogeny phy ) {
936 if ( phy.isEmpty() ) {
939 phy.setIdToNodeMap( null );
940 long i = PhylogenyNode.getNodeCount();
941 for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) {
942 it.next().setId( i++ );
944 PhylogenyNode.setNodeCount( i );
947 public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final File file )
949 final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
950 final Phylogeny[] trees = factory.create( file, parser );
951 if ( ( trees == null ) || ( trees.length == 0 ) ) {
952 throw new PhylogenyParserException( "Unable to parse phylogeny from file: " + file );
957 public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final List<File> files )
959 final List<Phylogeny> tree_list = new ArrayList<Phylogeny>();
960 for( final File file : files ) {
961 final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
962 final Phylogeny[] trees = factory.create( file, parser );
963 if ( ( trees == null ) || ( trees.length == 0 ) ) {
964 throw new PhylogenyParserException( "Unable to parse phylogeny from file: " + file );
966 tree_list.addAll( Arrays.asList( trees ) );
968 return tree_list.toArray( new Phylogeny[ tree_list.size() ] );
971 public static void removeNode( final PhylogenyNode remove_me, final Phylogeny phylogeny ) {
972 if ( remove_me.isRoot() ) {
973 if ( remove_me.getNumberOfDescendants() == 1 ) {
974 final PhylogenyNode desc = remove_me.getDescendants().get( 0 );
975 desc.setDistanceToParent( addPhylogenyDistances( remove_me.getDistanceToParent(),
976 desc.getDistanceToParent() ) );
977 desc.setParent( null );
978 phylogeny.setRoot( desc );
979 phylogeny.clearHashIdToNodeMap();
982 throw new IllegalArgumentException( "attempt to remove a root node with more than one descendants" );
985 else if ( remove_me.isExternal() ) {
986 phylogeny.deleteSubtree( remove_me, false );
987 phylogeny.clearHashIdToNodeMap();
988 phylogeny.externalNodesHaveChanged();
991 final PhylogenyNode parent = remove_me.getParent();
992 final List<PhylogenyNode> descs = remove_me.getDescendants();
993 parent.removeChildNode( remove_me );
994 for( final PhylogenyNode desc : descs ) {
995 parent.addAsChild( desc );
996 desc.setDistanceToParent( addPhylogenyDistances( remove_me.getDistanceToParent(),
997 desc.getDistanceToParent() ) );
999 remove_me.setParent( null );
1000 phylogeny.clearHashIdToNodeMap();
1001 phylogeny.externalNodesHaveChanged();
1005 private static enum NDF {
1007 TaxonomyCode( "TC" ),
1008 TaxonomyCommonName( "TN" ),
1009 TaxonomyScientificName( "TS" ),
1010 TaxonomyIdentifier( "TI" ),
1011 TaxonomySynonym( "SY" ),
1012 SequenceName( "SN" ),
1014 SequenceSymbol( "SS" ),
1015 SequenceAccession( "SA" ),
1019 BinaryCharacter( "BC" ),
1020 MolecularSequence( "MS" );
1022 private final String _text;
1024 NDF( final String text ) {
1028 public static NDF fromString( final String text ) {
1029 for( final NDF n : NDF.values() ) {
1030 if ( text.startsWith( n._text ) ) {
1038 public static List<Long> searchData( final String query,
1039 final Phylogeny phy,
1040 final boolean case_sensitive,
1041 final boolean partial,
1042 final boolean regex,
1043 final boolean search_domains,
1044 final double domains_confidence_threshold ) {
1045 final List<Long> nodes = new ArrayList<Long>();
1046 if ( phy.isEmpty() || ( query == null ) ) {
1049 if ( ForesterUtil.isEmpty( query ) ) {
1052 String my_query = query;
1054 if ( ( my_query.length() > 2 ) && ( my_query.indexOf( ":" ) == 2 ) ) {
1055 ndf = NDF.fromString( my_query );
1056 if ( ndf != null ) {
1057 my_query = my_query.substring( 3 );
1060 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
1061 final PhylogenyNode node = iter.next();
1062 boolean match = false;
1063 if ( ( ( ndf == null ) || ( ndf == NDF.NodeName ) )
1064 && match( node.getName(), my_query, case_sensitive, partial, regex ) ) {
1067 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCode ) ) && node.getNodeData().isHasTaxonomy()
1068 && match( node.getNodeData().getTaxonomy().getTaxonomyCode(),
1075 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCommonName ) ) && node.getNodeData().isHasTaxonomy()
1076 && match( node.getNodeData().getTaxonomy().getCommonName(),
1083 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyScientificName ) ) && node.getNodeData().isHasTaxonomy()
1084 && match( node.getNodeData().getTaxonomy().getScientificName(),
1091 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyIdentifier ) ) && node.getNodeData().isHasTaxonomy()
1092 && ( node.getNodeData().getTaxonomy().getIdentifier() != null )
1093 && match( node.getNodeData().getTaxonomy().getIdentifier().getValue(),
1100 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomySynonym ) ) && node.getNodeData().isHasTaxonomy()
1101 && !node.getNodeData().getTaxonomy().getSynonyms().isEmpty() ) {
1102 final List<String> syns = node.getNodeData().getTaxonomy().getSynonyms();
1103 I: for( final String syn : syns ) {
1104 if ( match( syn, my_query, case_sensitive, partial, regex ) ) {
1110 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceName ) ) && node.getNodeData().isHasSequence()
1111 && match( node.getNodeData().getSequence().getName(), my_query, case_sensitive, partial, regex ) ) {
1114 if ( !match && ( ( ndf == null ) || ( ndf == NDF.GeneName ) ) && node.getNodeData().isHasSequence()
1115 && match( node.getNodeData().getSequence().getGeneName(),
1122 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceSymbol ) ) && node.getNodeData().isHasSequence()
1123 && match( node.getNodeData().getSequence().getSymbol(),
1130 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceAccession ) ) && node.getNodeData().isHasSequence()
1131 && ( node.getNodeData().getSequence().getAccession() != null )
1132 && match( node.getNodeData().getSequence().getAccession().getValue(),
1139 if ( !match && ( ( ( ndf == null ) && search_domains ) || ( ndf == NDF.Domain ) )
1140 && node.getNodeData().isHasSequence()
1141 && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) {
1142 final DomainArchitecture da = node.getNodeData().getSequence().getDomainArchitecture();
1143 I: for( int i = 0; i < da.getNumberOfDomains(); ++i ) {
1144 if ( ( da.getDomain( i ).getConfidence() <= domains_confidence_threshold )
1145 && ( match( da.getDomain( i ).getName(), my_query, case_sensitive, partial, regex ) ) ) {
1151 if ( !match && ( ( ndf == null ) || ( ndf == NDF.Annotation ) ) && node.getNodeData().isHasSequence()
1152 && ( node.getNodeData().getSequence().getAnnotations() != null ) ) {
1153 for( final Annotation ann : node.getNodeData().getSequence().getAnnotations() ) {
1154 if ( match( ann.getDesc(), my_query, case_sensitive, partial, regex ) ) {
1158 if ( match( ann.getRef(), my_query, case_sensitive, partial, regex ) ) {
1164 if ( !match && ( ( ndf == null ) || ( ndf == NDF.CrossRef ) ) && node.getNodeData().isHasSequence()
1165 && ( node.getNodeData().getSequence().getCrossReferences() != null ) ) {
1166 for( final Accession x : node.getNodeData().getSequence().getCrossReferences() ) {
1167 if ( match( x.getComment(), my_query, case_sensitive, partial, regex ) ) {
1171 if ( match( x.getSource(), my_query, case_sensitive, partial, regex ) ) {
1175 if ( match( x.getValue(), my_query, case_sensitive, partial, regex ) ) {
1181 if ( !match && ( ( ndf == null ) || ( ndf == NDF.BinaryCharacter ) )
1182 && ( node.getNodeData().getBinaryCharacters() != null ) ) {
1183 Iterator<String> it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator();
1184 I: while ( it.hasNext() ) {
1185 if ( match( it.next(), my_query, case_sensitive, partial, regex ) ) {
1190 it = node.getNodeData().getBinaryCharacters().getGainedCharacters().iterator();
1191 I: while ( it.hasNext() ) {
1192 if ( match( it.next(), my_query, case_sensitive, partial, regex ) ) {
1198 if ( !match && ( ndf == NDF.MolecularSequence ) && node.getNodeData().isHasSequence()
1199 && match( node.getNodeData().getSequence().getMolecularSequence(),
1207 nodes.add( node.getId() );
1213 public static List<Long> searchDataLogicalAnd( final String[] queries,
1214 final Phylogeny phy,
1215 final boolean case_sensitive,
1216 final boolean partial,
1217 final boolean search_domains,
1218 final double domains_confidence_threshold ) {
1219 final List<Long> nodes = new ArrayList<Long>();
1220 if ( phy.isEmpty() || ( queries == null ) || ( queries.length < 1 ) ) {
1223 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
1224 final PhylogenyNode node = iter.next();
1225 boolean all_matched = true;
1226 for( String query : queries ) {
1227 if ( query == null ) {
1230 query = query.trim();
1232 if ( ( query.length() > 2 ) && ( query.indexOf( ":" ) == 2 ) ) {
1233 ndf = NDF.fromString( query );
1234 if ( ndf != null ) {
1235 query = query.substring( 3 );
1238 boolean match = false;
1239 if ( ForesterUtil.isEmpty( query ) ) {
1242 if ( ( ( ndf == null ) || ( ndf == NDF.NodeName ) )
1243 && match( node.getName(), query, case_sensitive, partial, false ) ) {
1246 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCode ) ) && node.getNodeData().isHasTaxonomy()
1247 && match( node.getNodeData().getTaxonomy().getTaxonomyCode(),
1254 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCommonName ) ) && node.getNodeData().isHasTaxonomy()
1255 && match( node.getNodeData().getTaxonomy().getCommonName(),
1262 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyScientificName ) )
1263 && node.getNodeData().isHasTaxonomy()
1264 && match( node.getNodeData().getTaxonomy().getScientificName(),
1271 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyIdentifier ) ) && node.getNodeData().isHasTaxonomy()
1272 && ( node.getNodeData().getTaxonomy().getIdentifier() != null )
1273 && match( node.getNodeData().getTaxonomy().getIdentifier().getValue(),
1280 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomySynonym ) ) && node.getNodeData().isHasTaxonomy()
1281 && !node.getNodeData().getTaxonomy().getSynonyms().isEmpty() ) {
1282 final List<String> syns = node.getNodeData().getTaxonomy().getSynonyms();
1283 I: for( final String syn : syns ) {
1284 if ( match( syn, query, case_sensitive, partial, false ) ) {
1290 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceName ) ) && node.getNodeData().isHasSequence()
1291 && match( node.getNodeData().getSequence().getName(),
1298 if ( !match && ( ( ndf == null ) || ( ndf == NDF.GeneName ) ) && node.getNodeData().isHasSequence()
1299 && match( node.getNodeData().getSequence().getGeneName(),
1306 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceSymbol ) )
1307 && node.getNodeData().isHasSequence() && match( node.getNodeData().getSequence().getSymbol(),
1314 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceAccession ) )
1315 && node.getNodeData().isHasSequence()
1316 && ( node.getNodeData().getSequence().getAccession() != null )
1317 && match( node.getNodeData().getSequence().getAccession().getValue(),
1324 if ( !match && ( ( ( ndf == null ) && search_domains ) || ( ndf == NDF.Domain ) )
1325 && node.getNodeData().isHasSequence()
1326 && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) {
1327 final DomainArchitecture da = node.getNodeData().getSequence().getDomainArchitecture();
1328 I: for( int i = 0; i < da.getNumberOfDomains(); ++i ) {
1329 if ( ( da.getDomain( i ).getConfidence() <= domains_confidence_threshold )
1330 && match( da.getDomain( i ).getName(), query, case_sensitive, partial, false ) ) {
1336 if ( !match && ( ( ndf == null ) || ( ndf == NDF.Annotation ) ) && node.getNodeData().isHasSequence()
1337 && ( node.getNodeData().getSequence().getAnnotations() != null ) ) {
1338 for( final Annotation ann : node.getNodeData().getSequence().getAnnotations() ) {
1339 if ( match( ann.getDesc(), query, case_sensitive, partial, false ) ) {
1343 if ( match( ann.getRef(), query, case_sensitive, partial, false ) ) {
1349 if ( !match && ( ( ndf == null ) || ( ndf == NDF.CrossRef ) ) && node.getNodeData().isHasSequence()
1350 && ( node.getNodeData().getSequence().getCrossReferences() != null ) ) {
1351 for( final Accession x : node.getNodeData().getSequence().getCrossReferences() ) {
1352 if ( match( x.getComment(), query, case_sensitive, partial, false ) ) {
1356 if ( match( x.getSource(), query, case_sensitive, partial, false ) ) {
1360 if ( match( x.getValue(), query, case_sensitive, partial, false ) ) {
1366 if ( !match && ( ( ndf == null ) || ( ndf == NDF.BinaryCharacter ) )
1367 && ( node.getNodeData().getBinaryCharacters() != null ) ) {
1368 Iterator<String> it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator();
1369 I: while ( it.hasNext() ) {
1370 if ( match( it.next(), query, case_sensitive, partial, false ) ) {
1375 it = node.getNodeData().getBinaryCharacters().getGainedCharacters().iterator();
1376 I: while ( it.hasNext() ) {
1377 if ( match( it.next(), query, case_sensitive, partial, false ) ) {
1383 if ( !match && ( ndf == NDF.MolecularSequence ) && node.getNodeData().isHasSequence()
1384 && match( node.getNodeData().getSequence().getMolecularSequence(),
1392 all_matched = false;
1396 if ( all_matched ) {
1397 nodes.add( node.getId() );
1403 public static void setAllIndicatorsToZero( final Phylogeny phy ) {
1404 for( final PhylogenyNodeIterator it = phy.iteratorPostorder(); it.hasNext(); ) {
1405 it.next().setIndicator( ( byte ) 0 );
1410 * Convenience method.
1411 * Sets value for the first confidence value (created if not present, values overwritten otherwise).
1413 public static void setBootstrapConfidence( final PhylogenyNode node, final double bootstrap_confidence_value ) {
1414 setConfidence( node, bootstrap_confidence_value, "bootstrap" );
1417 public static void setBranchColorValue( final PhylogenyNode node, final Color color ) {
1418 if ( node.getBranchData().getBranchColor() == null ) {
1419 node.getBranchData().setBranchColor( new BranchColor() );
1421 node.getBranchData().getBranchColor().setValue( color );
1425 * Convenience method
1427 public static void setBranchWidthValue( final PhylogenyNode node, final double branch_width_value ) {
1428 node.getBranchData().setBranchWidth( new BranchWidth( branch_width_value ) );
1432 * Convenience method.
1433 * Sets value for the first confidence value (created if not present, values overwritten otherwise).
1435 public static void setConfidence( final PhylogenyNode node, final double confidence_value ) {
1436 setConfidence( node, confidence_value, "" );
1440 * Convenience method.
1441 * Sets value for the first confidence value (created if not present, values overwritten otherwise).
1443 public static void setConfidence( final PhylogenyNode node, final double confidence_value, final String type ) {
1444 Confidence c = null;
1445 if ( node.getBranchData().getNumberOfConfidences() > 0 ) {
1446 c = node.getBranchData().getConfidence( 0 );
1449 c = new Confidence();
1450 node.getBranchData().addConfidence( c );
1453 c.setValue( confidence_value );
1456 public static void setScientificName( final PhylogenyNode node, final String scientific_name ) {
1457 if ( !node.getNodeData().isHasTaxonomy() ) {
1458 node.getNodeData().setTaxonomy( new Taxonomy() );
1460 node.getNodeData().getTaxonomy().setScientificName( scientific_name );
1464 * Convenience method to set the taxonomy code of a phylogeny node.
1468 * @param taxonomy_code
1469 * @throws PhyloXmlDataFormatException
1471 public static void setTaxonomyCode( final PhylogenyNode node, final String taxonomy_code )
1472 throws PhyloXmlDataFormatException {
1473 if ( !node.getNodeData().isHasTaxonomy() ) {
1474 node.getNodeData().setTaxonomy( new Taxonomy() );
1476 node.getNodeData().getTaxonomy().setTaxonomyCode( taxonomy_code );
1479 final static public void sortNodeDescendents( final PhylogenyNode node, final DESCENDANT_SORT_PRIORITY pri ) {
1480 Comparator<PhylogenyNode> c;
1483 c = new PhylogenyNodeSortSequencePriority();
1486 c = new PhylogenyNodeSortNodeNamePriority();
1489 c = new PhylogenyNodeSortTaxonomyPriority();
1491 final List<PhylogenyNode> descs = node.getDescendants();
1492 Collections.sort( descs, c );
1494 for( final PhylogenyNode desc : descs ) {
1495 node.setChildNode( i++, desc );
1500 * Removes from Phylogeny to_be_stripped all external Nodes which are
1501 * associated with a species NOT found in Phylogeny reference.
1504 * a reference Phylogeny
1505 * @param to_be_stripped
1506 * Phylogeny to be stripped
1507 * @return nodes removed from to_be_stripped
1509 public static List<PhylogenyNode> taxonomyBasedDeletionOfExternalNodes( final Phylogeny reference,
1510 final Phylogeny to_be_stripped ) {
1511 final Set<String> ref_ext_taxo = new HashSet<String>();
1512 for( final PhylogenyNodeIterator it = reference.iteratorExternalForward(); it.hasNext(); ) {
1513 final PhylogenyNode n = it.next();
1514 if ( !n.getNodeData().isHasTaxonomy() ) {
1515 throw new IllegalArgumentException( "no taxonomic data in node: " + n );
1517 if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) {
1518 ref_ext_taxo.add( n.getNodeData().getTaxonomy().getScientificName() );
1520 if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) {
1521 ref_ext_taxo.add( n.getNodeData().getTaxonomy().getTaxonomyCode() );
1523 if ( ( n.getNodeData().getTaxonomy().getIdentifier() != null )
1524 && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getIdentifier().getValue() ) ) {
1525 ref_ext_taxo.add( n.getNodeData().getTaxonomy().getIdentifier().getValuePlusProvider() );
1528 final ArrayList<PhylogenyNode> nodes_to_delete = new ArrayList<PhylogenyNode>();
1529 for( final PhylogenyNodeIterator it = to_be_stripped.iteratorExternalForward(); it.hasNext(); ) {
1530 final PhylogenyNode n = it.next();
1531 if ( !n.getNodeData().isHasTaxonomy() ) {
1532 nodes_to_delete.add( n );
1534 else if ( !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getScientificName() ) )
1535 && !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getTaxonomyCode() ) )
1536 && !( ( n.getNodeData().getTaxonomy().getIdentifier() != null ) && ref_ext_taxo
1537 .contains( n.getNodeData().getTaxonomy().getIdentifier().getValuePlusProvider() ) ) ) {
1538 nodes_to_delete.add( n );
1541 for( final PhylogenyNode n : nodes_to_delete ) {
1542 to_be_stripped.deleteSubtree( n, true );
1544 to_be_stripped.clearHashIdToNodeMap();
1545 to_be_stripped.externalNodesHaveChanged();
1546 return nodes_to_delete;
1549 final static public void transferInternalNamesToConfidenceValues( final Phylogeny phy,
1550 final String confidence_type ) {
1551 final PhylogenyNodeIterator it = phy.iteratorPostorder();
1552 while ( it.hasNext() ) {
1553 final PhylogenyNode n = it.next();
1554 if ( !n.isExternal() && !ForesterUtil.isEmpty( n.getName() ) ) {
1557 value = Double.parseDouble( n.getName() );
1559 catch ( final NumberFormatException e ) {
1560 throw new IllegalArgumentException( "failed to parse number from [" + n.getName() + "]: "
1561 + e.getLocalizedMessage() );
1563 if ( value >= 0.0 ) {
1564 n.getBranchData().addConfidence( new Confidence( value, confidence_type ) );
1571 final static public boolean isInternalNamesLookLikeConfidences( final Phylogeny phy ) {
1572 final PhylogenyNodeIterator it = phy.iteratorPostorder();
1573 while ( it.hasNext() ) {
1574 final PhylogenyNode n = it.next();
1575 if ( !n.isExternal() && !n.isRoot() ) {
1576 if ( !ForesterUtil.isEmpty( n.getName() ) ) {
1579 value = Double.parseDouble( n.getName() );
1581 catch ( final NumberFormatException e ) {
1584 if ( ( value < 0.0 ) || ( value > 100 ) ) {
1593 final static public void transferInternalNodeNamesToConfidence( final Phylogeny phy,
1594 final String confidence_type ) {
1595 final PhylogenyNodeIterator it = phy.iteratorPostorder();
1596 while ( it.hasNext() ) {
1597 transferInternalNodeNameToConfidence( confidence_type, it.next() );
1601 private static void transferInternalNodeNameToConfidence( final String confidence_type, final PhylogenyNode n ) {
1602 if ( !n.isExternal() && !n.getBranchData().isHasConfidences() ) {
1603 if ( !ForesterUtil.isEmpty( n.getName() ) ) {
1606 d = Double.parseDouble( n.getName() );
1608 catch ( final Exception e ) {
1612 n.getBranchData().addConfidence( new Confidence( d, confidence_type ) );
1619 final static public void transferNodeNameToField( final Phylogeny phy,
1620 final PhylogenyNodeField field,
1621 final boolean external_only )
1622 throws PhyloXmlDataFormatException {
1623 final PhylogenyNodeIterator it = phy.iteratorPostorder();
1624 while ( it.hasNext() ) {
1625 final PhylogenyNode n = it.next();
1626 if ( external_only && n.isInternal() ) {
1629 final String name = n.getName().trim();
1630 if ( !ForesterUtil.isEmpty( name ) ) {
1634 setTaxonomyCode( n, name );
1636 case TAXONOMY_SCIENTIFIC_NAME:
1638 if ( !n.getNodeData().isHasTaxonomy() ) {
1639 n.getNodeData().setTaxonomy( new Taxonomy() );
1641 n.getNodeData().getTaxonomy().setScientificName( name );
1643 case TAXONOMY_COMMON_NAME:
1645 if ( !n.getNodeData().isHasTaxonomy() ) {
1646 n.getNodeData().setTaxonomy( new Taxonomy() );
1648 n.getNodeData().getTaxonomy().setCommonName( name );
1650 case SEQUENCE_SYMBOL:
1652 if ( !n.getNodeData().isHasSequence() ) {
1653 n.getNodeData().setSequence( new Sequence() );
1655 n.getNodeData().getSequence().setSymbol( name );
1659 if ( !n.getNodeData().isHasSequence() ) {
1660 n.getNodeData().setSequence( new Sequence() );
1662 n.getNodeData().getSequence().setName( name );
1664 case TAXONOMY_ID_UNIPROT_1: {
1665 if ( !n.getNodeData().isHasTaxonomy() ) {
1666 n.getNodeData().setTaxonomy( new Taxonomy() );
1669 final int i = name.indexOf( '_' );
1671 id = name.substring( 0, i );
1676 n.getNodeData().getTaxonomy()
1677 .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) );
1680 case TAXONOMY_ID_UNIPROT_2: {
1681 if ( !n.getNodeData().isHasTaxonomy() ) {
1682 n.getNodeData().setTaxonomy( new Taxonomy() );
1685 final int i = name.indexOf( '_' );
1687 id = name.substring( i + 1, name.length() );
1692 n.getNodeData().getTaxonomy()
1693 .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) );
1697 if ( !n.getNodeData().isHasTaxonomy() ) {
1698 n.getNodeData().setTaxonomy( new Taxonomy() );
1700 n.getNodeData().getTaxonomy().setIdentifier( new Identifier( name ) );
1707 throw new IllegalArgumentException( "don't know what to do with " + field );
1714 static double addPhylogenyDistances( final double a, final double b ) {
1715 if ( ( a >= 0.0 ) && ( b >= 0.0 ) ) {
1718 else if ( a >= 0.0 ) {
1721 else if ( b >= 0.0 ) {
1724 return PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT;
1727 static double calculateDistanceToAncestor( final PhylogenyNode anc, PhylogenyNode desc ) {
1729 boolean all_default = true;
1730 while ( anc != desc ) {
1731 if ( desc.getDistanceToParent() != PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ) {
1732 d += desc.getDistanceToParent();
1733 if ( all_default ) {
1734 all_default = false;
1737 desc = desc.getParent();
1739 if ( all_default ) {
1740 return PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT;
1745 public static double calculateAverageTreeHeight( final PhylogenyNode node ) {
1746 final List<PhylogenyNode> ext = node.getAllExternalDescendants();
1748 for( PhylogenyNode n : ext ) {
1749 while ( n != node ) {
1750 if ( n.getDistanceToParent() > 0 ) {
1751 s += n.getDistanceToParent();
1756 return s / ext.size();
1760 * Deep copies the phylogeny originating from this node.
1762 static PhylogenyNode copySubTree( final PhylogenyNode source ) {
1763 if ( source == null ) {
1767 final PhylogenyNode newnode = source.copyNodeData();
1768 if ( !source.isExternal() ) {
1769 for( int i = 0; i < source.getNumberOfDescendants(); ++i ) {
1770 newnode.setChildNode( i, PhylogenyMethods.copySubTree( source.getChildNode( i ) ) );
1778 * Shallow copies the phylogeny originating from this node.
1780 static PhylogenyNode copySubTreeShallow( final PhylogenyNode source ) {
1781 if ( source == null ) {
1785 final PhylogenyNode newnode = source.copyNodeDataShallow();
1786 if ( !source.isExternal() ) {
1787 for( int i = 0; i < source.getNumberOfDescendants(); ++i ) {
1788 newnode.setChildNode( i, PhylogenyMethods.copySubTreeShallow( source.getChildNode( i ) ) );
1795 private final static List<PhylogenyNode> divideIntoSubTreesHelper( final PhylogenyNode node,
1796 final double min_distance_to_root ) {
1797 final List<PhylogenyNode> l = new ArrayList<PhylogenyNode>();
1798 final PhylogenyNode r = moveTowardsRoot( node, min_distance_to_root );
1799 for( final PhylogenyNode ext : r.getAllExternalDescendants() ) {
1800 if ( ext.getIndicator() != 0 ) {
1801 throw new RuntimeException( "this should not have happened" );
1803 ext.setIndicator( ( byte ) 1 );
1810 * Calculates the distance between PhylogenyNodes n1 and n2.
1811 * PRECONDITION: n1 is a descendant of n2.
1814 * a descendant of n2
1816 * @return distance between n1 and n2
1818 private static double getDistance( PhylogenyNode n1, final PhylogenyNode n2 ) {
1820 while ( n1 != n2 ) {
1821 if ( n1.getDistanceToParent() > 0.0 ) {
1822 d += n1.getDistanceToParent();
1824 n1 = n1.getParent();
1829 private static boolean match( final String s,
1831 final boolean case_sensitive,
1832 final boolean partial,
1833 final boolean regex ) {
1834 if ( ForesterUtil.isEmpty( s ) || ForesterUtil.isEmpty( query ) ) {
1837 String my_s = s.trim();
1838 String my_query = query.trim();
1839 if ( !case_sensitive && !regex ) {
1840 my_s = my_s.toLowerCase();
1841 my_query = my_query.toLowerCase();
1846 if ( case_sensitive ) {
1847 p = Pattern.compile( my_query );
1850 p = Pattern.compile( my_query, Pattern.CASE_INSENSITIVE );
1853 catch ( final PatternSyntaxException e ) {
1857 return p.matcher( my_s ).find();
1863 else if ( partial ) {
1864 return my_s.indexOf( my_query ) >= 0;
1869 p = Pattern.compile( "(\\b|_)" + Pattern.quote( my_query ) + "(\\b|_)" );
1871 catch ( final PatternSyntaxException e ) {
1875 return p.matcher( my_s ).find();
1883 private final static PhylogenyNode moveTowardsRoot( final PhylogenyNode node, final double min_distance_to_root ) {
1884 PhylogenyNode n = node;
1885 PhylogenyNode prev = node;
1886 while ( min_distance_to_root < n.calculateDistanceToRoot() ) {
1893 public static enum DESCENDANT_SORT_PRIORITY {
1899 public static enum PhylogenyNodeField {
1904 TAXONOMY_COMMON_NAME,
1906 TAXONOMY_ID_UNIPROT_1,
1907 TAXONOMY_ID_UNIPROT_2,
1908 TAXONOMY_SCIENTIFIC_NAME;
1911 public static void addMolecularSeqsToTree( final Phylogeny phy, final Msa msa ) {
1912 for( int s = 0; s < msa.getNumberOfSequences(); ++s ) {
1913 final org.forester.sequence.MolecularSequence seq = msa.getSequence( s );
1914 final PhylogenyNode node = phy.getNode( seq.getIdentifier() );
1915 final org.forester.phylogeny.data.Sequence new_seq = new Sequence();
1916 new_seq.setMolecularSequenceAligned( true );
1917 new_seq.setMolecularSequence( seq.getMolecularSequenceAsString() );
1918 new_seq.setName( seq.getIdentifier() );
1920 new_seq.setType( PhyloXmlUtil.SEQ_TYPE_PROTEIN );
1922 catch ( final PhyloXmlDataFormatException ignore ) {
1925 node.getNodeData().addSequence( new_seq );
1929 final private static class PhylogenyNodeSortTaxonomyPriority implements Comparator<PhylogenyNode> {
1932 public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) {
1933 if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) {
1934 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) )
1935 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) {
1936 return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase()
1937 .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() );
1939 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) )
1940 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
1941 return n1.getNodeData().getTaxonomy().getTaxonomyCode()
1942 .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() );
1945 if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) {
1946 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) )
1947 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) {
1948 return n1.getNodeData().getSequence().getName().toLowerCase()
1949 .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() );
1951 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) )
1952 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) {
1953 return n1.getNodeData().getSequence().getGeneName()
1954 .compareTo( n2.getNodeData().getSequence().getGeneName() );
1956 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) )
1957 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) {
1958 return n1.getNodeData().getSequence().getSymbol()
1959 .compareTo( n2.getNodeData().getSequence().getSymbol() );
1962 if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) {
1963 return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() );
1969 final private static class PhylogenyNodeSortSequencePriority implements Comparator<PhylogenyNode> {
1972 public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) {
1973 if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) {
1974 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) )
1975 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) {
1976 return n1.getNodeData().getSequence().getName().toLowerCase()
1977 .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() );
1979 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) )
1980 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) {
1981 return n1.getNodeData().getSequence().getGeneName()
1982 .compareTo( n2.getNodeData().getSequence().getGeneName() );
1984 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) )
1985 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) {
1986 return n1.getNodeData().getSequence().getSymbol()
1987 .compareTo( n2.getNodeData().getSequence().getSymbol() );
1990 if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) {
1991 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) )
1992 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) {
1993 return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase()
1994 .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() );
1996 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) )
1997 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
1998 return n1.getNodeData().getTaxonomy().getTaxonomyCode()
1999 .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() );
2002 if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) {
2003 return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() );
2009 final private static class PhylogenyNodeSortNodeNamePriority implements Comparator<PhylogenyNode> {
2012 public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) {
2013 if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) {
2014 return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() );
2016 if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) {
2017 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) )
2018 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) {
2019 return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase()
2020 .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() );
2022 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) )
2023 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
2024 return n1.getNodeData().getTaxonomy().getTaxonomyCode()
2025 .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() );
2028 if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) {
2029 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) )
2030 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) {
2031 return n1.getNodeData().getSequence().getName().toLowerCase()
2032 .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() );
2034 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) )
2035 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) {
2036 return n1.getNodeData().getSequence().getGeneName()
2037 .compareTo( n2.getNodeData().getSequence().getGeneName() );
2039 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) )
2040 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) {
2041 return n1.getNodeData().getSequence().getSymbol()
2042 .compareTo( n2.getNodeData().getSequence().getSymbol() );
2049 public final static Map<Long, Integer> calculateDepths( final Phylogeny phy ) {
2050 final Map<Long, Integer> depths = new HashMap<Long, Integer>();
2051 calculateDepthsHelper( phy.getRoot(), 0, depths );
2055 private final static void calculateDepthsHelper( final PhylogenyNode n, int d, final Map<Long, Integer> depths ) {
2056 depths.put( n.getId(), d );
2058 final List<PhylogenyNode> descs = n.getDescendants();
2059 for( final PhylogenyNode desc : descs ) {
2060 calculateDepthsHelper( desc, d, depths );
2064 public final static void collapseToDepth( final Phylogeny phy, final int depth ) {
2065 if ( phy.getNumberOfExternalNodes() < 3 ) {
2068 collapseToDepthHelper( phy.getRoot(), 0, depth );
2071 private final static void collapseToDepthHelper( final PhylogenyNode n, int d, final int depth ) {
2072 if ( n.isExternal() ) {
2073 n.setCollapse( false );
2077 n.setCollapse( true );
2078 final PhylogenyNodeIterator it = new PreorderTreeIterator( n );
2079 while ( it.hasNext() ) {
2080 it.next().setCollapse( true );
2084 n.setCollapse( false );
2086 final List<PhylogenyNode> descs = n.getDescendants();
2087 for( final PhylogenyNode desc : descs ) {
2088 collapseToDepthHelper( desc, d, depth );
2093 public final static void collapseToRank( final Phylogeny phy, final int rank ) {
2094 if ( phy.getNumberOfExternalNodes() < 3 ) {
2097 if ( rank < 0 || rank >= TaxonomyUtil.RANKS.length ) {
2098 throw new IllegalArgumentException( "Rank " + rank + " is out of range" );
2100 collapseToRankHelper( phy.getRoot(), rank );
2103 private final static void collapseToRankHelper( final PhylogenyNode n, final int target_rank ) {
2104 if ( n.isExternal() ) {
2105 n.setCollapse( false );
2108 if ( ( n.getNodeData().getTaxonomy() != null )
2109 && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getRank() ) ) {
2110 final String current_rank = n.getNodeData().getTaxonomy().getRank();
2111 if ( !TaxonomyUtil.RANK_TO_INT.containsKey( current_rank ) ) {
2112 System.out.println( "Don't know rank \"" + current_rank + "\", ignoring." );
2115 if ( TaxonomyUtil.RANK_TO_INT.get( current_rank ) >= target_rank ) {
2116 n.setCollapse( true );
2117 final PhylogenyNodeIterator it = new PreorderTreeIterator( n );
2118 while ( it.hasNext() ) {
2119 it.next().setCollapse( true );
2125 n.setCollapse( false );
2126 final List<PhylogenyNode> descs = n.getDescendants();
2127 for( final PhylogenyNode desc : descs ) {
2128 collapseToRankHelper( desc, target_rank );
2132 public final static PhylogenyNode getFirstExternalNode( final PhylogenyNode node ) {
2133 PhylogenyNode n = node;
2134 while ( n.isInternal() ) {
2135 n = n.getFirstChildNode();
2140 public final static PhylogenyNode getLastExternalNode( final PhylogenyNode node ) {
2141 PhylogenyNode n = node;
2142 while ( n.isInternal() ) {
2143 n = n.getLastChildNode();
2148 public final static boolean isHasCollapsedNodes( final Phylogeny phy ) {
2149 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
2150 final PhylogenyNode n = iter.next();
2151 if ( !n.isExternal() && ( n.isCollapse() ) ) {