2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
26 package org.forester.phylogeny;
28 import java.awt.Color;
30 import java.io.IOException;
31 import java.util.ArrayList;
32 import java.util.Arrays;
33 import java.util.Collections;
34 import java.util.Comparator;
35 import java.util.HashMap;
36 import java.util.HashSet;
37 import java.util.Iterator;
38 import java.util.List;
41 import java.util.regex.Matcher;
42 import java.util.regex.Pattern;
43 import java.util.regex.PatternSyntaxException;
45 import org.forester.io.parsers.FastaParser;
46 import org.forester.io.parsers.PhylogenyParser;
47 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
48 import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
49 import org.forester.io.parsers.util.PhylogenyParserException;
50 import org.forester.msa.Msa;
51 import org.forester.phylogeny.data.Accession;
52 import org.forester.phylogeny.data.Annotation;
53 import org.forester.phylogeny.data.BranchColor;
54 import org.forester.phylogeny.data.BranchWidth;
55 import org.forester.phylogeny.data.Confidence;
56 import org.forester.phylogeny.data.DomainArchitecture;
57 import org.forester.phylogeny.data.Event;
58 import org.forester.phylogeny.data.Identifier;
59 import org.forester.phylogeny.data.PhylogenyDataUtil;
60 import org.forester.phylogeny.data.Sequence;
61 import org.forester.phylogeny.data.Taxonomy;
62 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
63 import org.forester.phylogeny.factories.PhylogenyFactory;
64 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
65 import org.forester.phylogeny.iterators.PreorderTreeIterator;
66 import org.forester.util.BasicDescriptiveStatistics;
67 import org.forester.util.DescriptiveStatistics;
68 import org.forester.util.ForesterUtil;
69 import org.forester.util.TaxonomyUtil;
71 public class PhylogenyMethods {
73 private static boolean _order_changed;
75 private PhylogenyMethods() {
76 // Hidden constructor.
80 public Object clone() throws CloneNotSupportedException {
81 throw new CloneNotSupportedException();
84 public static boolean extractFastaInformation( final Phylogeny phy ) {
85 boolean could_extract = false;
86 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
87 final PhylogenyNode node = iter.next();
88 if ( !ForesterUtil.isEmpty( node.getName() ) ) {
89 final Matcher name_m = FastaParser.FASTA_DESC_LINE.matcher( node.getName() );
90 if ( name_m.lookingAt() ) {
92 final String acc_source = name_m.group( 1 );
93 final String acc = name_m.group( 2 );
94 final String seq_name = name_m.group( 3 );
95 final String tax_sn = name_m.group( 4 );
96 if ( !ForesterUtil.isEmpty( acc_source ) && !ForesterUtil.isEmpty( acc ) ) {
97 ForesterUtil.ensurePresenceOfSequence( node );
98 node.getNodeData().getSequence( 0 ).setAccession( new Accession( acc, acc_source ) );
100 if ( !ForesterUtil.isEmpty( seq_name ) ) {
101 ForesterUtil.ensurePresenceOfSequence( node );
102 node.getNodeData().getSequence( 0 ).setName( seq_name );
104 if ( !ForesterUtil.isEmpty( tax_sn ) ) {
105 ForesterUtil.ensurePresenceOfTaxonomy( node );
106 node.getNodeData().getTaxonomy( 0 ).setScientificName( tax_sn );
111 return could_extract;
114 public static DescriptiveStatistics calculateBranchLengthStatistics( final Phylogeny phy ) {
115 final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
116 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
117 final PhylogenyNode n = iter.next();
118 if ( !n.isRoot() && ( n.getDistanceToParent() >= 0.0 ) ) {
119 stats.addValue( n.getDistanceToParent() );
125 public static List<DescriptiveStatistics> calculateConfidenceStatistics( final Phylogeny phy ) {
126 final List<DescriptiveStatistics> stats = new ArrayList<DescriptiveStatistics>();
127 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
128 final PhylogenyNode n = iter.next();
129 if ( !n.isExternal() && !n.isRoot() ) {
130 if ( n.getBranchData().isHasConfidences() ) {
131 for( int i = 0; i < n.getBranchData().getConfidences().size(); ++i ) {
132 final Confidence c = n.getBranchData().getConfidences().get( i );
133 if ( ( i > ( stats.size() - 1 ) ) || ( stats.get( i ) == null ) ) {
134 stats.add( i, new BasicDescriptiveStatistics() );
136 if ( !ForesterUtil.isEmpty( c.getType() ) ) {
137 if ( !ForesterUtil.isEmpty( stats.get( i ).getDescription() ) ) {
138 if ( !stats.get( i ).getDescription().equalsIgnoreCase( c.getType() ) ) {
139 throw new IllegalArgumentException( "support values in node [" + n.toString()
140 + "] appear inconsistently ordered" );
143 stats.get( i ).setDescription( c.getType() );
145 stats.get( i ).addValue( ( ( c != null ) && ( c.getValue() >= 0 ) ) ? c.getValue() : 0 );
154 * Calculates the distance between PhylogenyNodes node1 and node2.
159 * @return distance between node1 and node2
161 public static double calculateDistance( final PhylogenyNode node1, final PhylogenyNode node2 ) {
162 final PhylogenyNode lca = calculateLCA( node1, node2 );
163 final PhylogenyNode n1 = node1;
164 final PhylogenyNode n2 = node2;
165 return ( PhylogenyMethods.getDistance( n1, lca ) + PhylogenyMethods.getDistance( n2, lca ) );
169 * Returns the LCA of PhylogenyNodes node1 and node2.
174 * @return LCA of node1 and node2
176 public final static PhylogenyNode calculateLCA( PhylogenyNode node1, PhylogenyNode node2 ) {
177 if ( node1 == null ) {
178 throw new IllegalArgumentException( "first argument (node) is null" );
180 if ( node2 == null ) {
181 throw new IllegalArgumentException( "second argument (node) is null" );
183 if ( node1 == node2 ) {
186 if ( ( node1.getParent() == node2.getParent() ) ) {
187 return node1.getParent();
189 int depth1 = node1.calculateDepth();
190 int depth2 = node2.calculateDepth();
191 while ( ( depth1 > -1 ) && ( depth2 > -1 ) ) {
192 if ( depth1 > depth2 ) {
193 node1 = node1.getParent();
196 else if ( depth2 > depth1 ) {
197 node2 = node2.getParent();
201 if ( node1 == node2 ) {
204 node1 = node1.getParent();
205 node2 = node2.getParent();
210 throw new IllegalArgumentException( "illegal attempt to calculate LCA of two nodes which do not share a common root" );
214 * Returns the LCA of PhylogenyNodes node1 and node2.
215 * Precondition: ids are in pre-order (or level-order).
220 * @return LCA of node1 and node2
222 public final static PhylogenyNode calculateLCAonTreeWithIdsInPreOrder( PhylogenyNode node1, PhylogenyNode node2 ) {
223 if ( node1 == null ) {
224 throw new IllegalArgumentException( "first argument (node) is null" );
226 if ( node2 == null ) {
227 throw new IllegalArgumentException( "second argument (node) is null" );
229 while ( node1 != node2 ) {
230 if ( node1.getId() > node2.getId() ) {
231 node1 = node1.getParent();
234 node2 = node2.getParent();
240 public static short calculateMaxBranchesToLeaf( final PhylogenyNode node ) {
241 if ( node.isExternal() ) {
245 for( PhylogenyNode d : node.getAllExternalDescendants() ) {
247 while ( d != node ) {
248 if ( d.isCollapse() ) {
263 public static int calculateMaxDepth( final Phylogeny phy ) {
265 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
266 final PhylogenyNode node = iter.next();
267 final int steps = node.calculateDepth();
275 public static String[] obtainPresentRanksSorted( final Phylogeny phy ) {
276 final Set<String> present_ranks = new HashSet<String>();
277 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
278 final PhylogenyNode node = iter.next();
279 if ( !node.isExternal() && !node.isRoot() && ( node.getNodeData().getTaxonomy() != null )
280 && !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getRank() ) ) {
281 final String current_rank = node.getNodeData().getTaxonomy().getRank();
282 if ( TaxonomyUtil.RANK_TO_INT.containsKey( current_rank ) ) {
283 present_ranks.add( current_rank );
287 final String ordered_ranks[] = new String[present_ranks.size() + 1];
289 for( final String rank : TaxonomyUtil.RANKS ) {
290 if ( present_ranks.contains( rank ) ) {
291 ordered_ranks[ c++ ] = rank;
294 ordered_ranks[ c ] = "off";
295 return ordered_ranks;
298 public static int calculateMaxDepthConsiderCollapsed( final Phylogeny phy ) {
300 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
301 PhylogenyNode n = iter.next();
303 while ( n.getParent() != null ) {
304 if ( !n.isCollapse() ) {
316 public static double calculateMaxDistanceToRoot( final Phylogeny phy ) {
318 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
319 final PhylogenyNode node = iter.next();
320 final double d = node.calculateDistanceToRoot();
328 public static PhylogenyNode calculateNodeWithMaxDistanceToRoot( final Phylogeny phy ) {
330 PhylogenyNode max_node = phy.getFirstExternalNode();
331 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
332 final PhylogenyNode node = iter.next();
333 final double d = node.calculateDistanceToRoot();
342 public static int calculateNumberOfExternalNodesWithoutTaxonomy( final PhylogenyNode node ) {
343 final List<PhylogenyNode> descs = node.getAllExternalDescendants();
345 for( final PhylogenyNode n : descs ) {
346 if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
353 public static DescriptiveStatistics calculateNumberOfDescendantsPerNodeStatistics( final Phylogeny phy ) {
354 final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
355 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
356 final PhylogenyNode n = iter.next();
357 if ( !n.isExternal() ) {
358 stats.addValue( n.getNumberOfDescendants() );
364 public final static void collapseSubtreeStructure( final PhylogenyNode n ) {
365 final List<PhylogenyNode> eds = n.getAllExternalDescendants();
366 final List<Double> d = new ArrayList<Double>();
367 for( final PhylogenyNode ed : eds ) {
368 d.add( calculateDistanceToAncestor( n, ed ) );
370 for( int i = 0; i < eds.size(); ++i ) {
371 n.setChildNode( i, eds.get( i ) );
372 eds.get( i ).setDistanceToParent( d.get( i ) );
376 public static int countNumberOfOneDescendantNodes( final Phylogeny phy ) {
378 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
379 final PhylogenyNode n = iter.next();
380 if ( !n.isExternal() && ( n.getNumberOfDescendants() == 1 ) ) {
387 public static int countNumberOfPolytomies( final Phylogeny phy ) {
389 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
390 final PhylogenyNode n = iter.next();
391 if ( !n.isExternal() && ( n.getNumberOfDescendants() > 2 ) ) {
398 public static final HashMap<String, PhylogenyNode> createNameToExtNodeMap( final Phylogeny phy ) {
399 final HashMap<String, PhylogenyNode> nodes = new HashMap<String, PhylogenyNode>();
400 final List<PhylogenyNode> ext = phy.getExternalNodes();
401 for( final PhylogenyNode n : ext ) {
402 nodes.put( n.getName(), n );
407 public static void deleteExternalNodesNegativeSelection( final Set<Long> to_delete, final Phylogeny phy ) {
408 for( final Long id : to_delete ) {
409 phy.deleteSubtree( phy.getNode( id ), true );
411 phy.clearHashIdToNodeMap();
412 phy.externalNodesHaveChanged();
415 public static void deleteExternalNodesNegativeSelection( final String[] node_names_to_delete, final Phylogeny p )
416 throws IllegalArgumentException {
417 for( final String element : node_names_to_delete ) {
418 if ( ForesterUtil.isEmpty( element ) ) {
421 List<PhylogenyNode> nodes = null;
422 nodes = p.getNodes( element );
423 final Iterator<PhylogenyNode> it = nodes.iterator();
424 while ( it.hasNext() ) {
425 final PhylogenyNode n = it.next();
426 if ( !n.isExternal() ) {
427 throw new IllegalArgumentException( "attempt to delete non-external node \"" + element + "\"" );
429 p.deleteSubtree( n, true );
432 p.clearHashIdToNodeMap();
433 p.externalNodesHaveChanged();
436 public static List<String> deleteExternalNodesPositiveSelection( final String[] node_names_to_keep,
437 final Phylogeny p ) {
438 final PhylogenyNodeIterator it = p.iteratorExternalForward();
439 final String[] to_delete = new String[ p.getNumberOfExternalNodes() ];
441 Arrays.sort( node_names_to_keep );
442 while ( it.hasNext() ) {
443 final String curent_name = it.next().getName();
444 if ( Arrays.binarySearch( node_names_to_keep, curent_name ) < 0 ) {
445 to_delete[ i++ ] = curent_name;
448 PhylogenyMethods.deleteExternalNodesNegativeSelection( to_delete, p );
449 final List<String> deleted = new ArrayList<String>();
450 for( final String n : to_delete ) {
451 if ( !ForesterUtil.isEmpty( n ) ) {
458 public static void deleteExternalNodesPositiveSelectionT( final List<Taxonomy> species_to_keep,
459 final Phylogeny phy ) {
460 final Set<Long> to_delete = new HashSet<Long>();
461 for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
462 final PhylogenyNode n = it.next();
463 if ( n.getNodeData().isHasTaxonomy() ) {
464 if ( !species_to_keep.contains( n.getNodeData().getTaxonomy() ) ) {
465 to_delete.add( n.getId() );
469 throw new IllegalArgumentException( "node " + n.getId() + " has no taxonomic data" );
472 deleteExternalNodesNegativeSelection( to_delete, phy );
475 final public static void deleteInternalNodesWithOnlyOneDescendent( final Phylogeny phy ) {
476 final ArrayList<PhylogenyNode> to_delete = new ArrayList<PhylogenyNode>();
477 for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) {
478 final PhylogenyNode n = iter.next();
479 if ( ( !n.isExternal() ) && ( n.getNumberOfDescendants() == 1 ) ) {
483 for( final PhylogenyNode d : to_delete ) {
484 PhylogenyMethods.removeNode( d, phy );
486 phy.clearHashIdToNodeMap();
487 phy.externalNodesHaveChanged();
490 final public static void deleteNonOrthologousExternalNodes( final Phylogeny phy, final PhylogenyNode n ) {
491 if ( n.isInternal() ) {
492 throw new IllegalArgumentException( "node is not external" );
494 final ArrayList<PhylogenyNode> to_delete = new ArrayList<PhylogenyNode>();
495 for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
496 final PhylogenyNode i = it.next();
497 if ( !PhylogenyMethods.getEventAtLCA( n, i ).isSpeciation() ) {
501 for( final PhylogenyNode d : to_delete ) {
502 phy.deleteSubtree( d, true );
504 phy.clearHashIdToNodeMap();
505 phy.externalNodesHaveChanged();
508 public final static List<List<PhylogenyNode>> divideIntoSubTrees( final Phylogeny phy,
509 final double min_distance_to_root ) {
510 if ( min_distance_to_root <= 0 ) {
511 throw new IllegalArgumentException( "attempt to use min distance to root of: " + min_distance_to_root );
513 final List<List<PhylogenyNode>> l = new ArrayList<List<PhylogenyNode>>();
514 setAllIndicatorsToZero( phy );
515 for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
516 final PhylogenyNode n = it.next();
517 if ( n.getIndicator() != 0 ) {
520 l.add( divideIntoSubTreesHelper( n, min_distance_to_root ) );
522 throw new RuntimeException( "this should not have happened" );
528 public static List<PhylogenyNode> getAllDescendants( final PhylogenyNode node ) {
529 final List<PhylogenyNode> descs = new ArrayList<PhylogenyNode>();
530 final Set<Long> encountered = new HashSet<Long>();
531 if ( !node.isExternal() ) {
532 final List<PhylogenyNode> exts = node.getAllExternalDescendants();
533 for( PhylogenyNode current : exts ) {
534 descs.add( current );
535 while ( current != node ) {
536 current = current.getParent();
537 if ( encountered.contains( current.getId() ) ) {
540 descs.add( current );
541 encountered.add( current.getId() );
555 public static Color getBranchColorValue( final PhylogenyNode node ) {
556 if ( node.getBranchData().getBranchColor() == null ) {
559 return node.getBranchData().getBranchColor().getValue();
565 public static double getBranchWidthValue( final PhylogenyNode node ) {
566 if ( !node.getBranchData().isHasBranchWidth() ) {
567 return BranchWidth.BRANCH_WIDTH_DEFAULT_VALUE;
569 return node.getBranchData().getBranchWidth().getValue();
575 public static double getConfidenceValue( final PhylogenyNode node ) {
576 if ( !node.getBranchData().isHasConfidences() ) {
577 return Confidence.CONFIDENCE_DEFAULT_VALUE;
579 return node.getBranchData().getConfidence( 0 ).getValue();
585 public static double[] getConfidenceValuesAsArray( final PhylogenyNode node ) {
586 if ( !node.getBranchData().isHasConfidences() ) {
587 return new double[ 0 ];
589 final double[] values = new double[ node.getBranchData().getConfidences().size() ];
591 for( final Confidence c : node.getBranchData().getConfidences() ) {
592 values[ i++ ] = c.getValue();
597 final public static Event getEventAtLCA( final PhylogenyNode n1, final PhylogenyNode n2 ) {
598 return calculateLCA( n1, n2 ).getNodeData().getEvent();
602 * Returns taxonomy t if all external descendants have
603 * the same taxonomy t, null otherwise.
606 public static Taxonomy getExternalDescendantsTaxonomy( final PhylogenyNode node ) {
607 final List<PhylogenyNode> descs = node.getAllExternalDescendants();
609 for( final PhylogenyNode n : descs ) {
610 if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
613 else if ( tax == null ) {
614 tax = n.getNodeData().getTaxonomy();
616 else if ( n.getNodeData().getTaxonomy().isEmpty() || !tax.isEqual( n.getNodeData().getTaxonomy() ) ) {
623 public static PhylogenyNode getFurthestDescendant( final PhylogenyNode node ) {
624 final List<PhylogenyNode> children = node.getAllExternalDescendants();
625 PhylogenyNode farthest = null;
626 double longest = -Double.MAX_VALUE;
627 for( final PhylogenyNode child : children ) {
628 if ( PhylogenyMethods.getDistance( child, node ) > longest ) {
630 longest = PhylogenyMethods.getDistance( child, node );
636 // public static PhylogenyMethods getInstance() {
637 // if ( PhylogenyMethods._instance == null ) {
638 // PhylogenyMethods._instance = new PhylogenyMethods();
640 // return PhylogenyMethods._instance;
643 * Returns the largest confidence value found on phy.
645 static public double getMaximumConfidenceValue( final Phylogeny phy ) {
646 double max = -Double.MAX_VALUE;
647 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
648 final double s = PhylogenyMethods.getConfidenceValue( iter.next() );
649 if ( ( s != Confidence.CONFIDENCE_DEFAULT_VALUE ) && ( s > max ) ) {
656 static public int getMinimumDescendentsPerInternalNodes( final Phylogeny phy ) {
657 int min = Integer.MAX_VALUE;
660 for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) {
662 if ( n.isInternal() ) {
663 d = n.getNumberOfDescendants();
673 * Convenience method for display purposes.
674 * Not intended for algorithms.
676 public static String getSpecies( final PhylogenyNode node ) {
677 if ( !node.getNodeData().isHasTaxonomy() ) {
680 else if ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getScientificName() ) ) {
681 return node.getNodeData().getTaxonomy().getScientificName();
683 if ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getTaxonomyCode() ) ) {
684 return node.getNodeData().getTaxonomy().getTaxonomyCode();
687 return node.getNodeData().getTaxonomy().getCommonName();
692 * Convenience method for display purposes.
693 * Not intended for algorithms.
695 public static String getTaxonomyIdentifier( final PhylogenyNode node ) {
696 if ( !node.getNodeData().isHasTaxonomy() || ( node.getNodeData().getTaxonomy().getIdentifier() == null ) ) {
699 return node.getNodeData().getTaxonomy().getIdentifier().getValue();
702 public final static boolean isAllDecendentsAreDuplications( final PhylogenyNode n ) {
703 if ( n.isExternal() ) {
707 if ( n.isDuplication() ) {
708 for( final PhylogenyNode desc : n.getDescendants() ) {
709 if ( !isAllDecendentsAreDuplications( desc ) ) {
721 public static boolean isHasExternalDescendant( final PhylogenyNode node ) {
722 for( int i = 0; i < node.getNumberOfDescendants(); ++i ) {
723 if ( node.getChildNode( i ).isExternal() ) {
731 * This is case insensitive.
734 public synchronized static boolean isTaxonomyHasIdentifierOfGivenProvider( final Taxonomy tax,
735 final String[] providers ) {
736 if ( ( tax.getIdentifier() != null ) && !ForesterUtil.isEmpty( tax.getIdentifier().getProvider() ) ) {
737 final String my_tax_prov = tax.getIdentifier().getProvider();
738 for( final String provider : providers ) {
739 if ( provider.equalsIgnoreCase( my_tax_prov ) ) {
750 public static void midpointRoot( final Phylogeny phylogeny ) {
751 if ( ( phylogeny.getNumberOfExternalNodes() < 2 ) || ( calculateMaxDistanceToRoot( phylogeny ) <= 0 ) ) {
755 final int total_nodes = phylogeny.getNodeCount();
757 if ( ++counter > total_nodes ) {
758 throw new RuntimeException( "this should not have happened: midpoint rooting does not converge" );
760 PhylogenyNode a = null;
763 for( int i = 0; i < phylogeny.getRoot().getNumberOfDescendants(); ++i ) {
764 final PhylogenyNode f = getFurthestDescendant( phylogeny.getRoot().getChildNode( i ) );
765 final double df = getDistance( f, phylogeny.getRoot() );
772 else if ( df > db ) {
777 final double diff = da - db;
778 if ( diff < 0.000001 ) {
781 double x = da - ( diff / 2.0 );
782 while ( ( x > a.getDistanceToParent() ) && !a.isRoot() ) {
783 x -= ( a.getDistanceToParent() > 0 ? a.getDistanceToParent() : 0 );
786 phylogeny.reRoot( a, x );
788 phylogeny.recalculateNumberOfExternalDescendants( true );
791 public static void normalizeBootstrapValues( final Phylogeny phylogeny,
792 final double max_bootstrap_value,
793 final double max_normalized_value ) {
794 for( final PhylogenyNodeIterator iter = phylogeny.iteratorPreorder(); iter.hasNext(); ) {
795 final PhylogenyNode node = iter.next();
796 if ( node.isInternal() ) {
797 final double confidence = getConfidenceValue( node );
798 if ( confidence != Confidence.CONFIDENCE_DEFAULT_VALUE ) {
799 if ( confidence >= max_bootstrap_value ) {
800 setBootstrapConfidence( node, max_normalized_value );
803 setBootstrapConfidence( node, ( confidence * max_normalized_value ) / max_bootstrap_value );
810 public static List<PhylogenyNode> obtainAllNodesAsList( final Phylogeny phy ) {
811 final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
812 if ( phy.isEmpty() ) {
815 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
816 nodes.add( iter.next() );
822 * Returns a map of distinct taxonomies of
823 * all external nodes of node.
824 * If at least one of the external nodes has no taxonomy,
828 public static Map<Taxonomy, Integer> obtainDistinctTaxonomyCounts( final PhylogenyNode node ) {
829 final List<PhylogenyNode> descs = node.getAllExternalDescendants();
830 final Map<Taxonomy, Integer> tax_map = new HashMap<Taxonomy, Integer>();
831 for( final PhylogenyNode n : descs ) {
832 if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
835 final Taxonomy t = n.getNodeData().getTaxonomy();
836 if ( tax_map.containsKey( t ) ) {
837 tax_map.put( t, tax_map.get( t ) + 1 );
847 * Arranges the order of childern for each node of this Phylogeny in such a
848 * way that either the branch with more children is on top (right) or on
849 * bottom (left), dependent on the value of boolean order.
852 * decides in which direction to order
855 public static void orderAppearance( final PhylogenyNode n,
857 final boolean order_ext_alphabetically,
858 final DESCENDANT_SORT_PRIORITY pri ) {
859 if ( n.isExternal() ) {
863 if ( ( n.getNumberOfDescendants() == 2 )
864 && ( n.getChildNode1().getNumberOfExternalNodes() != n.getChildNode2().getNumberOfExternalNodes() )
865 && ( ( n.getChildNode1().getNumberOfExternalNodes() < n.getChildNode2()
866 .getNumberOfExternalNodes() ) == order ) ) {
867 final PhylogenyNode temp = n.getChildNode1();
868 n.setChild1( n.getChildNode2() );
870 _order_changed = true;
872 else if ( order_ext_alphabetically ) {
873 boolean all_ext = true;
874 for( final PhylogenyNode i : n.getDescendants() ) {
875 if ( !i.isExternal() ) {
881 PhylogenyMethods.sortNodeDescendents( n, pri );
884 for( int i = 0; i < n.getNumberOfDescendants(); ++i ) {
885 orderAppearance( n.getChildNode( i ), order, order_ext_alphabetically, pri );
890 public synchronized static void orderAppearanceX( final PhylogenyNode n,
891 final boolean order_ext_alphabetically,
892 final DESCENDANT_SORT_PRIORITY pri ) {
893 if ( n.isExternal() ) {
897 _order_changed = false;
898 orderAppearance( n, true, order_ext_alphabetically, pri );
899 if ( !_order_changed ) {
900 orderAppearance( n, false, order_ext_alphabetically, pri );
905 public static void postorderBranchColorAveragingExternalNodeBased( final Phylogeny p ) {
906 for( final PhylogenyNodeIterator iter = p.iteratorPostorder(); iter.hasNext(); ) {
907 final PhylogenyNode node = iter.next();
912 if ( node.isInternal() ) {
913 //for( final PhylogenyNodeIterator iterator = node.iterateChildNodesForward(); iterator.hasNext(); ) {
914 for( int i = 0; i < node.getNumberOfDescendants(); ++i ) {
915 final PhylogenyNode child_node = node.getChildNode( i );
916 final Color child_color = getBranchColorValue( child_node );
917 if ( child_color != null ) {
919 red += child_color.getRed();
920 green += child_color.getGreen();
921 blue += child_color.getBlue();
924 setBranchColorValue( node,
925 new Color( ForesterUtil.roundToInt( red / n ),
926 ForesterUtil.roundToInt( green / n ),
927 ForesterUtil.roundToInt( blue / n ) ) );
932 public static final void preOrderReId( final Phylogeny phy ) {
933 if ( phy.isEmpty() ) {
936 phy.setIdToNodeMap( null );
937 long i = PhylogenyNode.getNodeCount();
938 for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) {
939 it.next().setId( i++ );
941 PhylogenyNode.setNodeCount( i );
944 public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final File file )
946 final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
947 final Phylogeny[] trees = factory.create( file, parser );
948 if ( ( trees == null ) || ( trees.length == 0 ) ) {
949 throw new PhylogenyParserException( "Unable to parse phylogeny from file: " + file );
954 public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final List<File> files )
956 final List<Phylogeny> tree_list = new ArrayList<Phylogeny>();
957 for( final File file : files ) {
958 final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
959 final Phylogeny[] trees = factory.create( file, parser );
960 if ( ( trees == null ) || ( trees.length == 0 ) ) {
961 throw new PhylogenyParserException( "Unable to parse phylogeny from file: " + file );
963 tree_list.addAll( Arrays.asList( trees ) );
965 return tree_list.toArray( new Phylogeny[ tree_list.size() ] );
968 public static void removeNode( final PhylogenyNode remove_me, final Phylogeny phylogeny ) {
969 if ( remove_me.isRoot() ) {
970 if ( remove_me.getNumberOfDescendants() == 1 ) {
971 final PhylogenyNode desc = remove_me.getDescendants().get( 0 );
972 desc.setDistanceToParent( addPhylogenyDistances( remove_me.getDistanceToParent(),
973 desc.getDistanceToParent() ) );
974 desc.setParent( null );
975 phylogeny.setRoot( desc );
976 phylogeny.clearHashIdToNodeMap();
979 throw new IllegalArgumentException( "attempt to remove a root node with more than one descendants" );
982 else if ( remove_me.isExternal() ) {
983 phylogeny.deleteSubtree( remove_me, false );
984 phylogeny.clearHashIdToNodeMap();
985 phylogeny.externalNodesHaveChanged();
988 final PhylogenyNode parent = remove_me.getParent();
989 final List<PhylogenyNode> descs = remove_me.getDescendants();
990 parent.removeChildNode( remove_me );
991 for( final PhylogenyNode desc : descs ) {
992 parent.addAsChild( desc );
993 desc.setDistanceToParent( addPhylogenyDistances( remove_me.getDistanceToParent(),
994 desc.getDistanceToParent() ) );
996 remove_me.setParent( null );
997 phylogeny.clearHashIdToNodeMap();
998 phylogeny.externalNodesHaveChanged();
1002 private static enum NDF {
1004 TaxonomyCode( "TC" ),
1005 TaxonomyCommonName( "TN" ),
1006 TaxonomyScientificName( "TS" ),
1007 TaxonomyIdentifier( "TI" ),
1008 TaxonomySynonym( "SY" ),
1009 SequenceName( "SN" ),
1011 SequenceSymbol( "SS" ),
1012 SequenceAccession( "SA" ),
1016 BinaryCharacter( "BC" ),
1017 MolecularSequence( "MS" );
1019 private final String _text;
1021 NDF( final String text ) {
1025 public static NDF fromString( final String text ) {
1026 for( final NDF n : NDF.values() ) {
1027 if ( text.startsWith( n._text ) ) {
1035 public static List<Long> searchData( final String query,
1036 final Phylogeny phy,
1037 final boolean case_sensitive,
1038 final boolean partial,
1039 final boolean regex,
1040 final boolean search_domains,
1041 final double domains_confidence_threshold ) {
1042 final List<Long> nodes = new ArrayList<Long>();
1043 if ( phy.isEmpty() || ( query == null ) ) {
1046 if ( ForesterUtil.isEmpty( query ) ) {
1049 String my_query = query;
1051 if ( ( my_query.length() > 2 ) && ( my_query.indexOf( ":" ) == 2 ) ) {
1052 ndf = NDF.fromString( my_query );
1053 if ( ndf != null ) {
1054 my_query = my_query.substring( 3 );
1057 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
1058 final PhylogenyNode node = iter.next();
1059 boolean match = false;
1060 if ( ( ( ndf == null ) || ( ndf == NDF.NodeName ) )
1061 && match( node.getName(), my_query, case_sensitive, partial, regex ) ) {
1064 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCode ) ) && node.getNodeData().isHasTaxonomy()
1065 && match( node.getNodeData().getTaxonomy().getTaxonomyCode(),
1072 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCommonName ) ) && node.getNodeData().isHasTaxonomy()
1073 && match( node.getNodeData().getTaxonomy().getCommonName(),
1080 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyScientificName ) ) && node.getNodeData().isHasTaxonomy()
1081 && match( node.getNodeData().getTaxonomy().getScientificName(),
1088 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyIdentifier ) ) && node.getNodeData().isHasTaxonomy()
1089 && ( node.getNodeData().getTaxonomy().getIdentifier() != null )
1090 && match( node.getNodeData().getTaxonomy().getIdentifier().getValue(),
1097 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomySynonym ) ) && node.getNodeData().isHasTaxonomy()
1098 && !node.getNodeData().getTaxonomy().getSynonyms().isEmpty() ) {
1099 final List<String> syns = node.getNodeData().getTaxonomy().getSynonyms();
1100 I: for( final String syn : syns ) {
1101 if ( match( syn, my_query, case_sensitive, partial, regex ) ) {
1107 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceName ) ) && node.getNodeData().isHasSequence()
1108 && match( node.getNodeData().getSequence().getName(), my_query, case_sensitive, partial, regex ) ) {
1111 if ( !match && ( ( ndf == null ) || ( ndf == NDF.GeneName ) ) && node.getNodeData().isHasSequence()
1112 && match( node.getNodeData().getSequence().getGeneName(),
1119 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceSymbol ) ) && node.getNodeData().isHasSequence()
1120 && match( node.getNodeData().getSequence().getSymbol(),
1127 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceAccession ) ) && node.getNodeData().isHasSequence()
1128 && ( node.getNodeData().getSequence().getAccession() != null )
1129 && match( node.getNodeData().getSequence().getAccession().getValue(),
1136 if ( !match && ( ( ( ndf == null ) && search_domains ) || ( ndf == NDF.Domain ) )
1137 && node.getNodeData().isHasSequence()
1138 && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) {
1139 final DomainArchitecture da = node.getNodeData().getSequence().getDomainArchitecture();
1140 I: for( int i = 0; i < da.getNumberOfDomains(); ++i ) {
1141 if ( ( da.getDomain( i ).getConfidence() <= domains_confidence_threshold )
1142 && ( match( da.getDomain( i ).getName(), my_query, case_sensitive, partial, regex ) ) ) {
1148 if ( !match && ( ( ndf == null ) || ( ndf == NDF.Annotation ) ) && node.getNodeData().isHasSequence()
1149 && ( node.getNodeData().getSequence().getAnnotations() != null ) ) {
1150 for( final Annotation ann : node.getNodeData().getSequence().getAnnotations() ) {
1151 if ( match( ann.getDesc(), my_query, case_sensitive, partial, regex ) ) {
1155 if ( match( ann.getRef(), my_query, case_sensitive, partial, regex ) ) {
1161 if ( !match && ( ( ndf == null ) || ( ndf == NDF.CrossRef ) ) && node.getNodeData().isHasSequence()
1162 && ( node.getNodeData().getSequence().getCrossReferences() != null ) ) {
1163 for( final Accession x : node.getNodeData().getSequence().getCrossReferences() ) {
1164 if ( match( x.getComment(), my_query, case_sensitive, partial, regex ) ) {
1168 if ( match( x.getSource(), my_query, case_sensitive, partial, regex ) ) {
1172 if ( match( x.getValue(), my_query, case_sensitive, partial, regex ) ) {
1178 if ( !match && ( ( ndf == null ) || ( ndf == NDF.BinaryCharacter ) )
1179 && ( node.getNodeData().getBinaryCharacters() != null ) ) {
1180 Iterator<String> it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator();
1181 I: while ( it.hasNext() ) {
1182 if ( match( it.next(), my_query, case_sensitive, partial, regex ) ) {
1187 it = node.getNodeData().getBinaryCharacters().getGainedCharacters().iterator();
1188 I: while ( it.hasNext() ) {
1189 if ( match( it.next(), my_query, case_sensitive, partial, regex ) ) {
1195 if ( !match && ( ndf == NDF.MolecularSequence ) && node.getNodeData().isHasSequence()
1196 && match( node.getNodeData().getSequence().getMolecularSequence(),
1204 nodes.add( node.getId() );
1210 public static List<Long> searchDataLogicalAnd( final String[] queries,
1211 final Phylogeny phy,
1212 final boolean case_sensitive,
1213 final boolean partial,
1214 final boolean search_domains,
1215 final double domains_confidence_threshold ) {
1216 final List<Long> nodes = new ArrayList<Long>();
1217 if ( phy.isEmpty() || ( queries == null ) || ( queries.length < 1 ) ) {
1220 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
1221 final PhylogenyNode node = iter.next();
1222 boolean all_matched = true;
1223 for( String query : queries ) {
1224 if ( query == null ) {
1227 query = query.trim();
1229 if ( ( query.length() > 2 ) && ( query.indexOf( ":" ) == 2 ) ) {
1230 ndf = NDF.fromString( query );
1231 if ( ndf != null ) {
1232 query = query.substring( 3 );
1235 boolean match = false;
1236 if ( ForesterUtil.isEmpty( query ) ) {
1239 if ( ( ( ndf == null ) || ( ndf == NDF.NodeName ) )
1240 && match( node.getName(), query, case_sensitive, partial, false ) ) {
1243 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCode ) ) && node.getNodeData().isHasTaxonomy()
1244 && match( node.getNodeData().getTaxonomy().getTaxonomyCode(),
1251 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCommonName ) ) && node.getNodeData().isHasTaxonomy()
1252 && match( node.getNodeData().getTaxonomy().getCommonName(),
1259 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyScientificName ) )
1260 && node.getNodeData().isHasTaxonomy()
1261 && match( node.getNodeData().getTaxonomy().getScientificName(),
1268 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyIdentifier ) ) && node.getNodeData().isHasTaxonomy()
1269 && ( node.getNodeData().getTaxonomy().getIdentifier() != null )
1270 && match( node.getNodeData().getTaxonomy().getIdentifier().getValue(),
1277 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomySynonym ) ) && node.getNodeData().isHasTaxonomy()
1278 && !node.getNodeData().getTaxonomy().getSynonyms().isEmpty() ) {
1279 final List<String> syns = node.getNodeData().getTaxonomy().getSynonyms();
1280 I: for( final String syn : syns ) {
1281 if ( match( syn, query, case_sensitive, partial, false ) ) {
1287 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceName ) ) && node.getNodeData().isHasSequence()
1288 && match( node.getNodeData().getSequence().getName(),
1295 if ( !match && ( ( ndf == null ) || ( ndf == NDF.GeneName ) ) && node.getNodeData().isHasSequence()
1296 && match( node.getNodeData().getSequence().getGeneName(),
1303 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceSymbol ) )
1304 && node.getNodeData().isHasSequence() && match( node.getNodeData().getSequence().getSymbol(),
1311 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceAccession ) )
1312 && node.getNodeData().isHasSequence()
1313 && ( node.getNodeData().getSequence().getAccession() != null )
1314 && match( node.getNodeData().getSequence().getAccession().getValue(),
1321 if ( !match && ( ( ( ndf == null ) && search_domains ) || ( ndf == NDF.Domain ) )
1322 && node.getNodeData().isHasSequence()
1323 && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) {
1324 final DomainArchitecture da = node.getNodeData().getSequence().getDomainArchitecture();
1325 I: for( int i = 0; i < da.getNumberOfDomains(); ++i ) {
1326 if ( ( da.getDomain( i ).getConfidence() <= domains_confidence_threshold )
1327 && match( da.getDomain( i ).getName(), query, case_sensitive, partial, false ) ) {
1333 if ( !match && ( ( ndf == null ) || ( ndf == NDF.Annotation ) ) && node.getNodeData().isHasSequence()
1334 && ( node.getNodeData().getSequence().getAnnotations() != null ) ) {
1335 for( final Annotation ann : node.getNodeData().getSequence().getAnnotations() ) {
1336 if ( match( ann.getDesc(), query, case_sensitive, partial, false ) ) {
1340 if ( match( ann.getRef(), query, case_sensitive, partial, false ) ) {
1346 if ( !match && ( ( ndf == null ) || ( ndf == NDF.CrossRef ) ) && node.getNodeData().isHasSequence()
1347 && ( node.getNodeData().getSequence().getCrossReferences() != null ) ) {
1348 for( final Accession x : node.getNodeData().getSequence().getCrossReferences() ) {
1349 if ( match( x.getComment(), query, case_sensitive, partial, false ) ) {
1353 if ( match( x.getSource(), query, case_sensitive, partial, false ) ) {
1357 if ( match( x.getValue(), query, case_sensitive, partial, false ) ) {
1363 if ( !match && ( ( ndf == null ) || ( ndf == NDF.BinaryCharacter ) )
1364 && ( node.getNodeData().getBinaryCharacters() != null ) ) {
1365 Iterator<String> it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator();
1366 I: while ( it.hasNext() ) {
1367 if ( match( it.next(), query, case_sensitive, partial, false ) ) {
1372 it = node.getNodeData().getBinaryCharacters().getGainedCharacters().iterator();
1373 I: while ( it.hasNext() ) {
1374 if ( match( it.next(), query, case_sensitive, partial, false ) ) {
1380 if ( !match && ( ndf == NDF.MolecularSequence ) && node.getNodeData().isHasSequence()
1381 && match( node.getNodeData().getSequence().getMolecularSequence(),
1389 all_matched = false;
1393 if ( all_matched ) {
1394 nodes.add( node.getId() );
1400 public static void setAllIndicatorsToZero( final Phylogeny phy ) {
1401 for( final PhylogenyNodeIterator it = phy.iteratorPostorder(); it.hasNext(); ) {
1402 it.next().setIndicator( ( byte ) 0 );
1407 * Convenience method.
1408 * Sets value for the first confidence value (created if not present, values overwritten otherwise).
1410 public static void setBootstrapConfidence( final PhylogenyNode node, final double bootstrap_confidence_value ) {
1411 setConfidence( node, bootstrap_confidence_value, "bootstrap" );
1414 public static void setBranchColorValue( final PhylogenyNode node, final Color color ) {
1415 if ( node.getBranchData().getBranchColor() == null ) {
1416 node.getBranchData().setBranchColor( new BranchColor() );
1418 node.getBranchData().getBranchColor().setValue( color );
1422 * Convenience method
1424 public static void setBranchWidthValue( final PhylogenyNode node, final double branch_width_value ) {
1425 node.getBranchData().setBranchWidth( new BranchWidth( branch_width_value ) );
1429 * Convenience method.
1430 * Sets value for the first confidence value (created if not present, values overwritten otherwise).
1432 public static void setConfidence( final PhylogenyNode node, final double confidence_value ) {
1433 setConfidence( node, confidence_value, "" );
1437 * Convenience method.
1438 * Sets value for the first confidence value (created if not present, values overwritten otherwise).
1440 public static void setConfidence( final PhylogenyNode node, final double confidence_value, final String type ) {
1441 Confidence c = null;
1442 if ( node.getBranchData().getNumberOfConfidences() > 0 ) {
1443 c = node.getBranchData().getConfidence( 0 );
1446 c = new Confidence();
1447 node.getBranchData().addConfidence( c );
1450 c.setValue( confidence_value );
1453 public static void setScientificName( final PhylogenyNode node, final String scientific_name ) {
1454 if ( !node.getNodeData().isHasTaxonomy() ) {
1455 node.getNodeData().setTaxonomy( new Taxonomy() );
1457 node.getNodeData().getTaxonomy().setScientificName( scientific_name );
1461 * Convenience method to set the taxonomy code of a phylogeny node.
1465 * @param taxonomy_code
1466 * @throws PhyloXmlDataFormatException
1468 public static void setTaxonomyCode( final PhylogenyNode node, final String taxonomy_code )
1469 throws PhyloXmlDataFormatException {
1470 if ( !node.getNodeData().isHasTaxonomy() ) {
1471 node.getNodeData().setTaxonomy( new Taxonomy() );
1473 node.getNodeData().getTaxonomy().setTaxonomyCode( taxonomy_code );
1476 final static public void sortNodeDescendents( final PhylogenyNode node, final DESCENDANT_SORT_PRIORITY pri ) {
1477 Comparator<PhylogenyNode> c;
1480 c = new PhylogenyNodeSortSequencePriority();
1483 c = new PhylogenyNodeSortNodeNamePriority();
1486 c = new PhylogenyNodeSortTaxonomyPriority();
1488 final List<PhylogenyNode> descs = node.getDescendants();
1489 Collections.sort( descs, c );
1491 for( final PhylogenyNode desc : descs ) {
1492 node.setChildNode( i++, desc );
1497 * Removes from Phylogeny to_be_stripped all external Nodes which are
1498 * associated with a species NOT found in Phylogeny reference.
1501 * a reference Phylogeny
1502 * @param to_be_stripped
1503 * Phylogeny to be stripped
1504 * @return nodes removed from to_be_stripped
1506 public static List<PhylogenyNode> taxonomyBasedDeletionOfExternalNodes( final Phylogeny reference,
1507 final Phylogeny to_be_stripped ) {
1508 final Set<String> ref_ext_taxo = new HashSet<String>();
1509 for( final PhylogenyNodeIterator it = reference.iteratorExternalForward(); it.hasNext(); ) {
1510 final PhylogenyNode n = it.next();
1511 if ( !n.getNodeData().isHasTaxonomy() ) {
1512 throw new IllegalArgumentException( "no taxonomic data in node: " + n );
1514 if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) {
1515 ref_ext_taxo.add( n.getNodeData().getTaxonomy().getScientificName() );
1517 if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) {
1518 ref_ext_taxo.add( n.getNodeData().getTaxonomy().getTaxonomyCode() );
1520 if ( ( n.getNodeData().getTaxonomy().getIdentifier() != null )
1521 && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getIdentifier().getValue() ) ) {
1522 ref_ext_taxo.add( n.getNodeData().getTaxonomy().getIdentifier().getValuePlusProvider() );
1525 final ArrayList<PhylogenyNode> nodes_to_delete = new ArrayList<PhylogenyNode>();
1526 for( final PhylogenyNodeIterator it = to_be_stripped.iteratorExternalForward(); it.hasNext(); ) {
1527 final PhylogenyNode n = it.next();
1528 if ( !n.getNodeData().isHasTaxonomy() ) {
1529 nodes_to_delete.add( n );
1531 else if ( !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getScientificName() ) )
1532 && !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getTaxonomyCode() ) )
1533 && !( ( n.getNodeData().getTaxonomy().getIdentifier() != null ) && ref_ext_taxo
1534 .contains( n.getNodeData().getTaxonomy().getIdentifier().getValuePlusProvider() ) ) ) {
1535 nodes_to_delete.add( n );
1538 for( final PhylogenyNode n : nodes_to_delete ) {
1539 to_be_stripped.deleteSubtree( n, true );
1541 to_be_stripped.clearHashIdToNodeMap();
1542 to_be_stripped.externalNodesHaveChanged();
1543 return nodes_to_delete;
1546 final static public void transferInternalNamesToBootstrapSupport( final Phylogeny phy ) {
1547 final PhylogenyNodeIterator it = phy.iteratorPostorder();
1548 while ( it.hasNext() ) {
1549 final PhylogenyNode n = it.next();
1550 if ( !n.isExternal() && !ForesterUtil.isEmpty( n.getName() ) ) {
1553 value = Double.parseDouble( n.getName() );
1555 catch ( final NumberFormatException e ) {
1556 throw new IllegalArgumentException( "failed to parse number from [" + n.getName() + "]: "
1557 + e.getLocalizedMessage() );
1559 if ( value >= 0.0 ) {
1560 n.getBranchData().addConfidence( new Confidence( value, "bootstrap" ) );
1567 final static public boolean isInternalNamesLookLikeConfidences( final Phylogeny phy ) {
1568 final PhylogenyNodeIterator it = phy.iteratorPostorder();
1569 while ( it.hasNext() ) {
1570 final PhylogenyNode n = it.next();
1571 if ( !n.isExternal() && !n.isRoot() ) {
1572 if ( !ForesterUtil.isEmpty( n.getName() ) ) {
1575 value = Double.parseDouble( n.getName() );
1577 catch ( final NumberFormatException e ) {
1580 if ( ( value < 0.0 ) || ( value > 100 ) ) {
1589 final static public void transferInternalNodeNamesToConfidence( final Phylogeny phy,
1590 final String confidence_type ) {
1591 final PhylogenyNodeIterator it = phy.iteratorPostorder();
1592 while ( it.hasNext() ) {
1593 transferInternalNodeNameToConfidence( confidence_type, it.next() );
1597 private static void transferInternalNodeNameToConfidence( final String confidence_type, final PhylogenyNode n ) {
1598 if ( !n.isExternal() && !n.getBranchData().isHasConfidences() ) {
1599 if ( !ForesterUtil.isEmpty( n.getName() ) ) {
1602 d = Double.parseDouble( n.getName() );
1604 catch ( final Exception e ) {
1608 n.getBranchData().addConfidence( new Confidence( d, confidence_type ) );
1615 final static public void transferNodeNameToField( final Phylogeny phy,
1616 final PhylogenyNodeField field,
1617 final boolean external_only )
1618 throws PhyloXmlDataFormatException {
1619 final PhylogenyNodeIterator it = phy.iteratorPostorder();
1620 while ( it.hasNext() ) {
1621 final PhylogenyNode n = it.next();
1622 if ( external_only && n.isInternal() ) {
1625 final String name = n.getName().trim();
1626 if ( !ForesterUtil.isEmpty( name ) ) {
1630 setTaxonomyCode( n, name );
1632 case TAXONOMY_SCIENTIFIC_NAME:
1634 if ( !n.getNodeData().isHasTaxonomy() ) {
1635 n.getNodeData().setTaxonomy( new Taxonomy() );
1637 n.getNodeData().getTaxonomy().setScientificName( name );
1639 case TAXONOMY_COMMON_NAME:
1641 if ( !n.getNodeData().isHasTaxonomy() ) {
1642 n.getNodeData().setTaxonomy( new Taxonomy() );
1644 n.getNodeData().getTaxonomy().setCommonName( name );
1646 case SEQUENCE_SYMBOL:
1648 if ( !n.getNodeData().isHasSequence() ) {
1649 n.getNodeData().setSequence( new Sequence() );
1651 n.getNodeData().getSequence().setSymbol( name );
1655 if ( !n.getNodeData().isHasSequence() ) {
1656 n.getNodeData().setSequence( new Sequence() );
1658 n.getNodeData().getSequence().setName( name );
1660 case TAXONOMY_ID_UNIPROT_1: {
1661 if ( !n.getNodeData().isHasTaxonomy() ) {
1662 n.getNodeData().setTaxonomy( new Taxonomy() );
1665 final int i = name.indexOf( '_' );
1667 id = name.substring( 0, i );
1672 n.getNodeData().getTaxonomy()
1673 .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) );
1676 case TAXONOMY_ID_UNIPROT_2: {
1677 if ( !n.getNodeData().isHasTaxonomy() ) {
1678 n.getNodeData().setTaxonomy( new Taxonomy() );
1681 final int i = name.indexOf( '_' );
1683 id = name.substring( i + 1, name.length() );
1688 n.getNodeData().getTaxonomy()
1689 .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) );
1693 if ( !n.getNodeData().isHasTaxonomy() ) {
1694 n.getNodeData().setTaxonomy( new Taxonomy() );
1696 n.getNodeData().getTaxonomy().setIdentifier( new Identifier( name ) );
1703 throw new IllegalArgumentException( "don't know what to do with " + field );
1710 static double addPhylogenyDistances( final double a, final double b ) {
1711 if ( ( a >= 0.0 ) && ( b >= 0.0 ) ) {
1714 else if ( a >= 0.0 ) {
1717 else if ( b >= 0.0 ) {
1720 return PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT;
1723 static double calculateDistanceToAncestor( final PhylogenyNode anc, PhylogenyNode desc ) {
1725 boolean all_default = true;
1726 while ( anc != desc ) {
1727 if ( desc.getDistanceToParent() != PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ) {
1728 d += desc.getDistanceToParent();
1729 if ( all_default ) {
1730 all_default = false;
1733 desc = desc.getParent();
1735 if ( all_default ) {
1736 return PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT;
1741 public static double calculateAverageTreeHeight( final PhylogenyNode node ) {
1742 final List<PhylogenyNode> ext = node.getAllExternalDescendants();
1744 for( PhylogenyNode n : ext ) {
1745 while ( n != node ) {
1746 if ( n.getDistanceToParent() > 0 ) {
1747 s += n.getDistanceToParent();
1752 return s / ext.size();
1756 * Deep copies the phylogeny originating from this node.
1758 static PhylogenyNode copySubTree( final PhylogenyNode source ) {
1759 if ( source == null ) {
1763 final PhylogenyNode newnode = source.copyNodeData();
1764 if ( !source.isExternal() ) {
1765 for( int i = 0; i < source.getNumberOfDescendants(); ++i ) {
1766 newnode.setChildNode( i, PhylogenyMethods.copySubTree( source.getChildNode( i ) ) );
1774 * Shallow copies the phylogeny originating from this node.
1776 static PhylogenyNode copySubTreeShallow( final PhylogenyNode source ) {
1777 if ( source == null ) {
1781 final PhylogenyNode newnode = source.copyNodeDataShallow();
1782 if ( !source.isExternal() ) {
1783 for( int i = 0; i < source.getNumberOfDescendants(); ++i ) {
1784 newnode.setChildNode( i, PhylogenyMethods.copySubTreeShallow( source.getChildNode( i ) ) );
1791 private final static List<PhylogenyNode> divideIntoSubTreesHelper( final PhylogenyNode node,
1792 final double min_distance_to_root ) {
1793 final List<PhylogenyNode> l = new ArrayList<PhylogenyNode>();
1794 final PhylogenyNode r = moveTowardsRoot( node, min_distance_to_root );
1795 for( final PhylogenyNode ext : r.getAllExternalDescendants() ) {
1796 if ( ext.getIndicator() != 0 ) {
1797 throw new RuntimeException( "this should not have happened" );
1799 ext.setIndicator( ( byte ) 1 );
1806 * Calculates the distance between PhylogenyNodes n1 and n2.
1807 * PRECONDITION: n1 is a descendant of n2.
1810 * a descendant of n2
1812 * @return distance between n1 and n2
1814 private static double getDistance( PhylogenyNode n1, final PhylogenyNode n2 ) {
1816 while ( n1 != n2 ) {
1817 if ( n1.getDistanceToParent() > 0.0 ) {
1818 d += n1.getDistanceToParent();
1820 n1 = n1.getParent();
1825 private static boolean match( final String s,
1827 final boolean case_sensitive,
1828 final boolean partial,
1829 final boolean regex ) {
1830 if ( ForesterUtil.isEmpty( s ) || ForesterUtil.isEmpty( query ) ) {
1833 String my_s = s.trim();
1834 String my_query = query.trim();
1835 if ( !case_sensitive && !regex ) {
1836 my_s = my_s.toLowerCase();
1837 my_query = my_query.toLowerCase();
1842 if ( case_sensitive ) {
1843 p = Pattern.compile( my_query );
1846 p = Pattern.compile( my_query, Pattern.CASE_INSENSITIVE );
1849 catch ( final PatternSyntaxException e ) {
1853 return p.matcher( my_s ).find();
1859 else if ( partial ) {
1860 return my_s.indexOf( my_query ) >= 0;
1865 p = Pattern.compile( "(\\b|_)" + Pattern.quote( my_query ) + "(\\b|_)" );
1867 catch ( final PatternSyntaxException e ) {
1871 return p.matcher( my_s ).find();
1879 private final static PhylogenyNode moveTowardsRoot( final PhylogenyNode node, final double min_distance_to_root ) {
1880 PhylogenyNode n = node;
1881 PhylogenyNode prev = node;
1882 while ( min_distance_to_root < n.calculateDistanceToRoot() ) {
1889 public static enum DESCENDANT_SORT_PRIORITY {
1895 public static enum PhylogenyNodeField {
1900 TAXONOMY_COMMON_NAME,
1902 TAXONOMY_ID_UNIPROT_1,
1903 TAXONOMY_ID_UNIPROT_2,
1904 TAXONOMY_SCIENTIFIC_NAME;
1907 public static void addMolecularSeqsToTree( final Phylogeny phy, final Msa msa ) {
1908 for( int s = 0; s < msa.getNumberOfSequences(); ++s ) {
1909 final org.forester.sequence.MolecularSequence seq = msa.getSequence( s );
1910 final PhylogenyNode node = phy.getNode( seq.getIdentifier() );
1911 final org.forester.phylogeny.data.Sequence new_seq = new Sequence();
1912 new_seq.setMolecularSequenceAligned( true );
1913 new_seq.setMolecularSequence( seq.getMolecularSequenceAsString() );
1914 new_seq.setName( seq.getIdentifier() );
1916 new_seq.setType( PhyloXmlUtil.SEQ_TYPE_PROTEIN );
1918 catch ( final PhyloXmlDataFormatException ignore ) {
1921 node.getNodeData().addSequence( new_seq );
1925 final private static class PhylogenyNodeSortTaxonomyPriority implements Comparator<PhylogenyNode> {
1928 public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) {
1929 if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) {
1930 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) )
1931 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) {
1932 return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase()
1933 .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() );
1935 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) )
1936 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
1937 return n1.getNodeData().getTaxonomy().getTaxonomyCode()
1938 .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() );
1941 if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) {
1942 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) )
1943 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) {
1944 return n1.getNodeData().getSequence().getName().toLowerCase()
1945 .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() );
1947 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) )
1948 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) {
1949 return n1.getNodeData().getSequence().getGeneName()
1950 .compareTo( n2.getNodeData().getSequence().getGeneName() );
1952 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) )
1953 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) {
1954 return n1.getNodeData().getSequence().getSymbol()
1955 .compareTo( n2.getNodeData().getSequence().getSymbol() );
1958 if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) {
1959 return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() );
1965 final private static class PhylogenyNodeSortSequencePriority implements Comparator<PhylogenyNode> {
1968 public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) {
1969 if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) {
1970 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) )
1971 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) {
1972 return n1.getNodeData().getSequence().getName().toLowerCase()
1973 .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() );
1975 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) )
1976 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) {
1977 return n1.getNodeData().getSequence().getGeneName()
1978 .compareTo( n2.getNodeData().getSequence().getGeneName() );
1980 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) )
1981 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) {
1982 return n1.getNodeData().getSequence().getSymbol()
1983 .compareTo( n2.getNodeData().getSequence().getSymbol() );
1986 if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) {
1987 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) )
1988 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) {
1989 return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase()
1990 .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() );
1992 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) )
1993 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
1994 return n1.getNodeData().getTaxonomy().getTaxonomyCode()
1995 .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() );
1998 if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) {
1999 return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() );
2005 final private static class PhylogenyNodeSortNodeNamePriority implements Comparator<PhylogenyNode> {
2008 public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) {
2009 if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) {
2010 return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() );
2012 if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) {
2013 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) )
2014 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) {
2015 return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase()
2016 .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() );
2018 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) )
2019 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
2020 return n1.getNodeData().getTaxonomy().getTaxonomyCode()
2021 .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() );
2024 if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) {
2025 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) )
2026 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) {
2027 return n1.getNodeData().getSequence().getName().toLowerCase()
2028 .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() );
2030 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) )
2031 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) {
2032 return n1.getNodeData().getSequence().getGeneName()
2033 .compareTo( n2.getNodeData().getSequence().getGeneName() );
2035 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) )
2036 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) {
2037 return n1.getNodeData().getSequence().getSymbol()
2038 .compareTo( n2.getNodeData().getSequence().getSymbol() );
2045 public final static Map<Long, Integer> calculateDepths( final Phylogeny phy ) {
2046 final Map<Long, Integer> depths = new HashMap<Long, Integer>();
2047 calculateDepthsHelper( phy.getRoot(), 0, depths );
2051 private final static void calculateDepthsHelper( final PhylogenyNode n, int d, final Map<Long, Integer> depths ) {
2052 depths.put( n.getId(), d );
2054 final List<PhylogenyNode> descs = n.getDescendants();
2055 for( final PhylogenyNode desc : descs ) {
2056 calculateDepthsHelper( desc, d, depths );
2060 public final static void collapseToDepth( final Phylogeny phy, final int depth ) {
2061 if ( phy.getNumberOfExternalNodes() < 3 ) {
2064 collapseToDepthHelper( phy.getRoot(), 0, depth );
2067 private final static void collapseToDepthHelper( final PhylogenyNode n, int d, final int depth ) {
2068 if ( n.isExternal() ) {
2069 n.setCollapse( false );
2073 n.setCollapse( true );
2074 final PhylogenyNodeIterator it = new PreorderTreeIterator( n );
2075 while ( it.hasNext() ) {
2076 it.next().setCollapse( true );
2080 n.setCollapse( false );
2082 final List<PhylogenyNode> descs = n.getDescendants();
2083 for( final PhylogenyNode desc : descs ) {
2084 collapseToDepthHelper( desc, d, depth );
2091 public final static void collapseToRank( final Phylogeny phy, final int rank ) {
2092 if ( phy.getNumberOfExternalNodes() < 3 ) {
2095 if ( rank < 0 || rank >= TaxonomyUtil.RANKS.length ) {
2096 throw new IllegalArgumentException( "Rank " + rank + " is out of range" );
2098 collapseToRankHelper( phy.getRoot(), rank );
2101 private final static void collapseToRankHelper( final PhylogenyNode n, final int target_rank ) {
2102 if ( n.isExternal() ) {
2103 n.setCollapse( false );
2106 if ( ( n.getNodeData().getTaxonomy() != null )
2107 && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getRank() ) ) {
2108 final String current_rank = n.getNodeData().getTaxonomy().getRank();
2109 if ( !TaxonomyUtil.RANK_TO_INT.containsKey( current_rank ) ) {
2110 System.out.println( "Don't know rank \"" + current_rank + "\", ignoring." );
2113 if ( TaxonomyUtil.RANK_TO_INT.get( current_rank ) >= target_rank ) {
2114 n.setCollapse( true );
2116 final PhylogenyNodeIterator it = new PreorderTreeIterator( n );
2117 while ( it.hasNext() ) {
2118 it.next().setCollapse( true );
2124 n.setCollapse( false );
2125 final List<PhylogenyNode> descs = n.getDescendants();
2126 for( final PhylogenyNode desc : descs ) {
2127 collapseToRankHelper( desc, target_rank );
2131 public final static PhylogenyNode getFirstExternalNode( final PhylogenyNode node ) {
2132 PhylogenyNode n = node;
2133 while ( n.isInternal() ) {
2134 n = n.getFirstChildNode();
2139 public final static PhylogenyNode getLastExternalNode( final PhylogenyNode node ) {
2140 PhylogenyNode n = node;
2141 while ( n.isInternal() ) {
2142 n = n.getLastChildNode();
2147 public final static boolean isHasCollapsedNodes( final Phylogeny phy ) {
2148 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
2149 final PhylogenyNode n = iter.next();
2150 if ( !n.isExternal() && ( n.isCollapse() ) ) {