2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
26 package org.forester.phylogeny;
28 import java.awt.Color;
30 import java.io.IOException;
31 import java.util.ArrayList;
32 import java.util.Arrays;
33 import java.util.Collections;
34 import java.util.Comparator;
35 import java.util.HashMap;
36 import java.util.HashSet;
37 import java.util.Iterator;
38 import java.util.List;
41 import java.util.regex.Matcher;
42 import java.util.regex.Pattern;
43 import java.util.regex.PatternSyntaxException;
45 import org.forester.io.parsers.FastaParser;
46 import org.forester.io.parsers.PhylogenyParser;
47 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
48 import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
49 import org.forester.io.parsers.util.PhylogenyParserException;
50 import org.forester.msa.Msa;
51 import org.forester.phylogeny.data.Accession;
52 import org.forester.phylogeny.data.Annotation;
53 import org.forester.phylogeny.data.BranchColor;
54 import org.forester.phylogeny.data.BranchWidth;
55 import org.forester.phylogeny.data.Confidence;
56 import org.forester.phylogeny.data.DomainArchitecture;
57 import org.forester.phylogeny.data.Event;
58 import org.forester.phylogeny.data.Identifier;
59 import org.forester.phylogeny.data.PhylogenyDataUtil;
60 import org.forester.phylogeny.data.Sequence;
61 import org.forester.phylogeny.data.Taxonomy;
62 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
63 import org.forester.phylogeny.factories.PhylogenyFactory;
64 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
65 import org.forester.phylogeny.iterators.PreorderTreeIterator;
66 import org.forester.util.BasicDescriptiveStatistics;
67 import org.forester.util.DescriptiveStatistics;
68 import org.forester.util.ForesterUtil;
69 import org.forester.util.TaxonomyUtil;
71 public class PhylogenyMethods {
73 private static boolean _order_changed;
75 private PhylogenyMethods() {
76 // Hidden constructor.
80 public Object clone() throws CloneNotSupportedException {
81 throw new CloneNotSupportedException();
84 public static boolean extractFastaInformation( final Phylogeny phy ) {
85 boolean could_extract = false;
86 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
87 final PhylogenyNode node = iter.next();
88 if ( !ForesterUtil.isEmpty( node.getName() ) ) {
89 final Matcher name_m = FastaParser.FASTA_DESC_LINE.matcher( node.getName() );
90 if ( name_m.lookingAt() ) {
92 final String acc_source = name_m.group( 1 );
93 final String acc = name_m.group( 2 );
94 final String seq_name = name_m.group( 3 );
95 final String tax_sn = name_m.group( 4 );
96 if ( !ForesterUtil.isEmpty( acc_source ) && !ForesterUtil.isEmpty( acc ) ) {
97 ForesterUtil.ensurePresenceOfSequence( node );
98 node.getNodeData().getSequence( 0 ).setAccession( new Accession( acc, acc_source ) );
100 if ( !ForesterUtil.isEmpty( seq_name ) ) {
101 ForesterUtil.ensurePresenceOfSequence( node );
102 node.getNodeData().getSequence( 0 ).setName( seq_name );
104 if ( !ForesterUtil.isEmpty( tax_sn ) ) {
105 ForesterUtil.ensurePresenceOfTaxonomy( node );
106 node.getNodeData().getTaxonomy( 0 ).setScientificName( tax_sn );
111 return could_extract;
114 public static DescriptiveStatistics calculateBranchLengthStatistics( final Phylogeny phy ) {
115 final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
116 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
117 final PhylogenyNode n = iter.next();
118 if ( !n.isRoot() && ( n.getDistanceToParent() >= 0.0 ) ) {
119 stats.addValue( n.getDistanceToParent() );
125 public static List<DescriptiveStatistics> calculateConfidenceStatistics( final Phylogeny phy ) {
126 final List<DescriptiveStatistics> stats = new ArrayList<DescriptiveStatistics>();
127 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
128 final PhylogenyNode n = iter.next();
129 if ( !n.isExternal() && !n.isRoot() ) {
130 if ( n.getBranchData().isHasConfidences() ) {
131 for( int i = 0; i < n.getBranchData().getConfidences().size(); ++i ) {
132 final Confidence c = n.getBranchData().getConfidences().get( i );
133 if ( ( i > ( stats.size() - 1 ) ) || ( stats.get( i ) == null ) ) {
134 stats.add( i, new BasicDescriptiveStatistics() );
136 if ( !ForesterUtil.isEmpty( c.getType() ) ) {
137 if ( !ForesterUtil.isEmpty( stats.get( i ).getDescription() ) ) {
138 if ( !stats.get( i ).getDescription().equalsIgnoreCase( c.getType() ) ) {
139 throw new IllegalArgumentException( "support values in node [" + n.toString()
140 + "] appear inconsistently ordered" );
143 stats.get( i ).setDescription( c.getType() );
145 stats.get( i ).addValue( ( ( c != null ) && ( c.getValue() >= 0 ) ) ? c.getValue() : 0 );
154 * Calculates the distance between PhylogenyNodes node1 and node2.
159 * @return distance between node1 and node2
161 public static double calculateDistance( final PhylogenyNode node1, final PhylogenyNode node2 ) {
162 final PhylogenyNode lca = calculateLCA( node1, node2 );
163 final PhylogenyNode n1 = node1;
164 final PhylogenyNode n2 = node2;
165 return ( PhylogenyMethods.getDistance( n1, lca ) + PhylogenyMethods.getDistance( n2, lca ) );
169 * Returns the LCA of PhylogenyNodes node1 and node2.
174 * @return LCA of node1 and node2
176 public final static PhylogenyNode calculateLCA( PhylogenyNode node1, PhylogenyNode node2 ) {
177 if ( node1 == null ) {
178 throw new IllegalArgumentException( "first argument (node) is null" );
180 if ( node2 == null ) {
181 throw new IllegalArgumentException( "second argument (node) is null" );
183 if ( node1 == node2 ) {
186 if ( ( node1.getParent() == node2.getParent() ) ) {
187 return node1.getParent();
189 int depth1 = node1.calculateDepth();
190 int depth2 = node2.calculateDepth();
191 while ( ( depth1 > -1 ) && ( depth2 > -1 ) ) {
192 if ( depth1 > depth2 ) {
193 node1 = node1.getParent();
196 else if ( depth2 > depth1 ) {
197 node2 = node2.getParent();
201 if ( node1 == node2 ) {
204 node1 = node1.getParent();
205 node2 = node2.getParent();
210 throw new IllegalArgumentException( "illegal attempt to calculate LCA of two nodes which do not share a common root" );
214 * Returns the LCA of PhylogenyNodes node1 and node2.
215 * Precondition: ids are in pre-order (or level-order).
220 * @return LCA of node1 and node2
222 public final static PhylogenyNode calculateLCAonTreeWithIdsInPreOrder( PhylogenyNode node1, PhylogenyNode node2 ) {
223 if ( node1 == null ) {
224 throw new IllegalArgumentException( "first argument (node) is null" );
226 if ( node2 == null ) {
227 throw new IllegalArgumentException( "second argument (node) is null" );
229 while ( node1 != node2 ) {
230 if ( node1.getId() > node2.getId() ) {
231 node1 = node1.getParent();
234 node2 = node2.getParent();
240 public static short calculateMaxBranchesToLeaf( final PhylogenyNode node ) {
241 if ( node.isExternal() ) {
245 for( PhylogenyNode d : node.getAllExternalDescendants() ) {
247 while ( d != node ) {
248 if ( d.isCollapse() ) {
263 public static int calculateMaxDepth( final Phylogeny phy ) {
265 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
266 final PhylogenyNode node = iter.next();
267 final int steps = node.calculateDepth();
275 public static String[] obtainPresentRanksSorted( final Phylogeny phy ) {
276 final Set<String> present_ranks = new HashSet<String>();
277 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
278 final PhylogenyNode node = iter.next();
279 if ( !node.isExternal() && !node.isRoot() && ( node.getNodeData().getTaxonomy() != null )
280 && !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getRank() ) ) {
281 final String current_rank = node.getNodeData().getTaxonomy().getRank();
282 if ( TaxonomyUtil.RANK_TO_INT.containsKey( current_rank ) ) {
283 present_ranks.add( current_rank );
287 final String ordered_ranks[] = new String[present_ranks.size() + 1];
289 for( final String rank : TaxonomyUtil.RANKS ) {
290 if ( present_ranks.contains( rank ) ) {
291 ordered_ranks[ c++ ] = rank;
294 ordered_ranks[ c ] = "off";
295 return ordered_ranks;
298 public static int calculateMaxDepthConsiderCollapsed( final Phylogeny phy ) {
300 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
301 PhylogenyNode n = iter.next();
303 while ( n.getParent() != null ) {
304 if ( !n.isCollapse() ) {
316 public static double calculateMaxDistanceToRoot( final Phylogeny phy ) {
318 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
319 final PhylogenyNode node = iter.next();
320 final double d = node.calculateDistanceToRoot();
328 public static PhylogenyNode calculateNodeWithMaxDistanceToRoot( final Phylogeny phy ) {
330 PhylogenyNode max_node = phy.getFirstExternalNode();
331 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
332 final PhylogenyNode node = iter.next();
333 final double d = node.calculateDistanceToRoot();
342 public static int calculateNumberOfExternalNodesWithoutTaxonomy( final PhylogenyNode node ) {
343 final List<PhylogenyNode> descs = node.getAllExternalDescendants();
345 for( final PhylogenyNode n : descs ) {
346 if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
353 public static DescriptiveStatistics calculateNumberOfDescendantsPerNodeStatistics( final Phylogeny phy ) {
354 final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
355 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
356 final PhylogenyNode n = iter.next();
357 if ( !n.isExternal() ) {
358 stats.addValue( n.getNumberOfDescendants() );
364 public final static void collapseSubtreeStructure( final PhylogenyNode n ) {
365 final List<PhylogenyNode> eds = n.getAllExternalDescendants();
366 final List<Double> d = new ArrayList<Double>();
367 for( final PhylogenyNode ed : eds ) {
368 d.add( calculateDistanceToAncestor( n, ed ) );
370 for( int i = 0; i < eds.size(); ++i ) {
371 n.setChildNode( i, eds.get( i ) );
372 eds.get( i ).setDistanceToParent( d.get( i ) );
376 public static int countNumberOfOneDescendantNodes( final Phylogeny phy ) {
378 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
379 final PhylogenyNode n = iter.next();
380 if ( !n.isExternal() && ( n.getNumberOfDescendants() == 1 ) ) {
387 public static int countNumberOfPolytomies( final Phylogeny phy ) {
389 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
390 final PhylogenyNode n = iter.next();
391 if ( !n.isExternal() && ( n.getNumberOfDescendants() > 2 ) ) {
398 public static final HashMap<String, PhylogenyNode> createNameToExtNodeMap( final Phylogeny phy ) {
399 final HashMap<String, PhylogenyNode> nodes = new HashMap<String, PhylogenyNode>();
400 final List<PhylogenyNode> ext = phy.getExternalNodes();
401 for( final PhylogenyNode n : ext ) {
402 nodes.put( n.getName(), n );
407 public static void deleteExternalNodesNegativeSelection( final Set<Long> to_delete, final Phylogeny phy ) {
408 for( final Long id : to_delete ) {
409 phy.deleteSubtree( phy.getNode( id ), true );
411 phy.clearHashIdToNodeMap();
412 phy.externalNodesHaveChanged();
415 public static void deleteExternalNodesNegativeSelection( final String[] node_names_to_delete, final Phylogeny p )
416 throws IllegalArgumentException {
417 for( final String element : node_names_to_delete ) {
418 if ( ForesterUtil.isEmpty( element ) ) {
421 List<PhylogenyNode> nodes = null;
422 nodes = p.getNodes( element );
423 final Iterator<PhylogenyNode> it = nodes.iterator();
424 while ( it.hasNext() ) {
425 final PhylogenyNode n = it.next();
426 if ( !n.isExternal() ) {
427 throw new IllegalArgumentException( "attempt to delete non-external node \"" + element + "\"" );
429 p.deleteSubtree( n, true );
432 p.clearHashIdToNodeMap();
433 p.externalNodesHaveChanged();
436 public static List<String> deleteExternalNodesPositiveSelection( final String[] node_names_to_keep,
437 final Phylogeny p ) {
438 final PhylogenyNodeIterator it = p.iteratorExternalForward();
439 final String[] to_delete = new String[ p.getNumberOfExternalNodes() ];
441 Arrays.sort( node_names_to_keep );
442 while ( it.hasNext() ) {
443 final String curent_name = it.next().getName();
444 if ( Arrays.binarySearch( node_names_to_keep, curent_name ) < 0 ) {
445 to_delete[ i++ ] = curent_name;
448 PhylogenyMethods.deleteExternalNodesNegativeSelection( to_delete, p );
449 final List<String> deleted = new ArrayList<String>();
450 for( final String n : to_delete ) {
451 if ( !ForesterUtil.isEmpty( n ) ) {
458 public static void deleteExternalNodesPositiveSelectionT( final List<Taxonomy> species_to_keep,
459 final Phylogeny phy ) {
460 final Set<Long> to_delete = new HashSet<Long>();
461 for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
462 final PhylogenyNode n = it.next();
463 if ( n.getNodeData().isHasTaxonomy() ) {
464 if ( !species_to_keep.contains( n.getNodeData().getTaxonomy() ) ) {
465 to_delete.add( n.getId() );
469 throw new IllegalArgumentException( "node " + n.getId() + " has no taxonomic data" );
472 deleteExternalNodesNegativeSelection( to_delete, phy );
475 final public static void deleteInternalNodesWithOnlyOneDescendent( final Phylogeny phy ) {
476 final ArrayList<PhylogenyNode> to_delete = new ArrayList<PhylogenyNode>();
477 for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) {
478 final PhylogenyNode n = iter.next();
479 if ( ( !n.isExternal() ) && ( n.getNumberOfDescendants() == 1 ) ) {
483 for( final PhylogenyNode d : to_delete ) {
484 PhylogenyMethods.removeNode( d, phy );
486 phy.clearHashIdToNodeMap();
487 phy.externalNodesHaveChanged();
490 final public static void deleteNonOrthologousExternalNodes( final Phylogeny phy, final PhylogenyNode n ) {
491 if ( n.isInternal() ) {
492 throw new IllegalArgumentException( "node is not external" );
494 final ArrayList<PhylogenyNode> to_delete = new ArrayList<PhylogenyNode>();
495 for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
496 final PhylogenyNode i = it.next();
497 if ( !PhylogenyMethods.getEventAtLCA( n, i ).isSpeciation() ) {
501 for( final PhylogenyNode d : to_delete ) {
502 phy.deleteSubtree( d, true );
504 phy.clearHashIdToNodeMap();
505 phy.externalNodesHaveChanged();
508 public final static List<List<PhylogenyNode>> divideIntoSubTrees( final Phylogeny phy,
509 final double min_distance_to_root ) {
510 if ( min_distance_to_root <= 0 ) {
511 throw new IllegalArgumentException( "attempt to use min distance to root of: " + min_distance_to_root );
513 final List<List<PhylogenyNode>> l = new ArrayList<List<PhylogenyNode>>();
514 setAllIndicatorsToZero( phy );
515 for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
516 final PhylogenyNode n = it.next();
517 if ( n.getIndicator() != 0 ) {
520 l.add( divideIntoSubTreesHelper( n, min_distance_to_root ) );
522 throw new RuntimeException( "this should not have happened" );
528 public static List<PhylogenyNode> getAllDescendants( final PhylogenyNode node ) {
529 final List<PhylogenyNode> descs = new ArrayList<PhylogenyNode>();
530 final Set<Long> encountered = new HashSet<Long>();
531 if ( !node.isExternal() ) {
532 final List<PhylogenyNode> exts = node.getAllExternalDescendants();
533 for( PhylogenyNode current : exts ) {
534 descs.add( current );
535 while ( current != node ) {
536 current = current.getParent();
537 if ( encountered.contains( current.getId() ) ) {
540 descs.add( current );
541 encountered.add( current.getId() );
555 public static Color getBranchColorValue( final PhylogenyNode node ) {
556 if ( node.getBranchData().getBranchColor() == null ) {
559 return node.getBranchData().getBranchColor().getValue();
565 public static double getBranchWidthValue( final PhylogenyNode node ) {
566 if ( !node.getBranchData().isHasBranchWidth() ) {
567 return BranchWidth.BRANCH_WIDTH_DEFAULT_VALUE;
569 return node.getBranchData().getBranchWidth().getValue();
575 public static double getConfidenceValue( final PhylogenyNode node ) {
576 if ( !node.getBranchData().isHasConfidences() ) {
577 return Confidence.CONFIDENCE_DEFAULT_VALUE;
579 return node.getBranchData().getConfidence( 0 ).getValue();
585 public static double[] getConfidenceValuesAsArray( final PhylogenyNode node ) {
586 if ( !node.getBranchData().isHasConfidences() ) {
587 return new double[ 0 ];
589 final double[] values = new double[ node.getBranchData().getConfidences().size() ];
591 for( final Confidence c : node.getBranchData().getConfidences() ) {
592 values[ i++ ] = c.getValue();
597 final public static Event getEventAtLCA( final PhylogenyNode n1, final PhylogenyNode n2 ) {
598 return calculateLCA( n1, n2 ).getNodeData().getEvent();
602 * Returns taxonomy t if all external descendants have
603 * the same taxonomy t, null otherwise.
606 public static Taxonomy getExternalDescendantsTaxonomy( final PhylogenyNode node ) {
607 final List<PhylogenyNode> descs = node.getAllExternalDescendants();
609 for( final PhylogenyNode n : descs ) {
610 if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
613 else if ( tax == null ) {
614 tax = n.getNodeData().getTaxonomy();
616 else if ( n.getNodeData().getTaxonomy().isEmpty() || !tax.isEqual( n.getNodeData().getTaxonomy() ) ) {
623 public static PhylogenyNode getFurthestDescendant( final PhylogenyNode node ) {
624 final List<PhylogenyNode> children = node.getAllExternalDescendants();
625 PhylogenyNode farthest = null;
626 double longest = -Double.MAX_VALUE;
627 for( final PhylogenyNode child : children ) {
628 if ( PhylogenyMethods.getDistance( child, node ) > longest ) {
630 longest = PhylogenyMethods.getDistance( child, node );
636 // public static PhylogenyMethods getInstance() {
637 // if ( PhylogenyMethods._instance == null ) {
638 // PhylogenyMethods._instance = new PhylogenyMethods();
640 // return PhylogenyMethods._instance;
643 * Returns the largest confidence value found on phy.
645 static public double getMaximumConfidenceValue( final Phylogeny phy ) {
646 double max = -Double.MAX_VALUE;
647 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
648 final double s = PhylogenyMethods.getConfidenceValue( iter.next() );
649 if ( ( s != Confidence.CONFIDENCE_DEFAULT_VALUE ) && ( s > max ) ) {
656 static public int getMinimumDescendentsPerInternalNodes( final Phylogeny phy ) {
657 int min = Integer.MAX_VALUE;
660 for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) {
662 if ( n.isInternal() ) {
663 d = n.getNumberOfDescendants();
673 * Convenience method for display purposes.
674 * Not intended for algorithms.
676 public static String getSpecies( final PhylogenyNode node ) {
677 if ( !node.getNodeData().isHasTaxonomy() ) {
680 else if ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getScientificName() ) ) {
681 return node.getNodeData().getTaxonomy().getScientificName();
683 if ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getTaxonomyCode() ) ) {
684 return node.getNodeData().getTaxonomy().getTaxonomyCode();
687 return node.getNodeData().getTaxonomy().getCommonName();
692 * Convenience method for display purposes.
693 * Not intended for algorithms.
695 public static String getTaxonomyIdentifier( final PhylogenyNode node ) {
696 if ( !node.getNodeData().isHasTaxonomy() || ( node.getNodeData().getTaxonomy().getIdentifier() == null ) ) {
699 return node.getNodeData().getTaxonomy().getIdentifier().getValue();
702 public final static boolean isAllDecendentsAreDuplications( final PhylogenyNode n ) {
703 if ( n.isExternal() ) {
707 if ( n.isDuplication() ) {
708 for( final PhylogenyNode desc : n.getDescendants() ) {
709 if ( !isAllDecendentsAreDuplications( desc ) ) {
721 public static boolean isHasExternalDescendant( final PhylogenyNode node ) {
722 for( int i = 0; i < node.getNumberOfDescendants(); ++i ) {
723 if ( node.getChildNode( i ).isExternal() ) {
731 * This is case insensitive.
734 public synchronized static boolean isTaxonomyHasIdentifierOfGivenProvider( final Taxonomy tax,
735 final String[] providers ) {
736 if ( ( tax.getIdentifier() != null ) && !ForesterUtil.isEmpty( tax.getIdentifier().getProvider() ) ) {
737 final String my_tax_prov = tax.getIdentifier().getProvider();
738 for( final String provider : providers ) {
739 if ( provider.equalsIgnoreCase( my_tax_prov ) ) {
750 public static void midpointRoot( final Phylogeny phylogeny ) {
751 if ( ( phylogeny.getNumberOfExternalNodes() < 2 ) || ( calculateMaxDistanceToRoot( phylogeny ) <= 0 ) ) {
755 final int total_nodes = phylogeny.getNodeCount();
757 if ( ++counter > total_nodes ) {
758 throw new RuntimeException( "this should not have happened: midpoint rooting does not converge" );
760 PhylogenyNode a = null;
763 for( int i = 0; i < phylogeny.getRoot().getNumberOfDescendants(); ++i ) {
764 final PhylogenyNode f = getFurthestDescendant( phylogeny.getRoot().getChildNode( i ) );
765 final double df = getDistance( f, phylogeny.getRoot() );
772 else if ( df > db ) {
777 final double diff = da - db;
778 if ( diff < 0.000001 ) {
781 double x = da - ( diff / 2.0 );
782 while ( ( x > a.getDistanceToParent() ) && !a.isRoot() ) {
783 x -= ( a.getDistanceToParent() > 0 ? a.getDistanceToParent() : 0 );
786 phylogeny.reRoot( a, x );
788 phylogeny.recalculateNumberOfExternalDescendants( true );
791 public static void normalizeBootstrapValues( final Phylogeny phylogeny,
792 final double max_bootstrap_value,
793 final double max_normalized_value ) {
794 for( final PhylogenyNodeIterator iter = phylogeny.iteratorPreorder(); iter.hasNext(); ) {
795 final PhylogenyNode node = iter.next();
796 if ( node.isInternal() ) {
797 final double confidence = getConfidenceValue( node );
798 if ( confidence != Confidence.CONFIDENCE_DEFAULT_VALUE ) {
799 if ( confidence >= max_bootstrap_value ) {
800 setBootstrapConfidence( node, max_normalized_value );
803 setBootstrapConfidence( node, ( confidence * max_normalized_value ) / max_bootstrap_value );
810 public static List<PhylogenyNode> obtainAllNodesAsList( final Phylogeny phy ) {
811 final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
812 if ( phy.isEmpty() ) {
815 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
816 nodes.add( iter.next() );
822 * Returns a map of distinct taxonomies of
823 * all external nodes of node.
824 * If at least one of the external nodes has no taxonomy,
828 public static Map<Taxonomy, Integer> obtainDistinctTaxonomyCounts( final PhylogenyNode node ) {
829 final List<PhylogenyNode> descs = node.getAllExternalDescendants();
830 final Map<Taxonomy, Integer> tax_map = new HashMap<Taxonomy, Integer>();
831 for( final PhylogenyNode n : descs ) {
832 if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
835 final Taxonomy t = n.getNodeData().getTaxonomy();
836 if ( tax_map.containsKey( t ) ) {
837 tax_map.put( t, tax_map.get( t ) + 1 );
847 * Arranges the order of childern for each node of this Phylogeny in such a
848 * way that either the branch with more children is on top (right) or on
849 * bottom (left), dependent on the value of boolean order.
852 * decides in which direction to order
855 public static void orderAppearance( final PhylogenyNode n,
857 final boolean order_ext_alphabetically,
858 final DESCENDANT_SORT_PRIORITY pri ) {
859 if ( n.isExternal() ) {
863 if ( ( n.getNumberOfDescendants() == 2 )
864 && ( n.getChildNode1().getNumberOfExternalNodes() != n.getChildNode2().getNumberOfExternalNodes() )
865 && ( ( n.getChildNode1().getNumberOfExternalNodes() < n.getChildNode2()
866 .getNumberOfExternalNodes() ) == order ) ) {
867 final PhylogenyNode temp = n.getChildNode1();
868 n.setChild1( n.getChildNode2() );
870 _order_changed = true;
872 else if ( order_ext_alphabetically ) {
873 boolean all_ext = true;
874 for( final PhylogenyNode i : n.getDescendants() ) {
875 if ( !i.isExternal() ) {
881 PhylogenyMethods.sortNodeDescendents( n, pri );
884 for( int i = 0; i < n.getNumberOfDescendants(); ++i ) {
885 orderAppearance( n.getChildNode( i ), order, order_ext_alphabetically, pri );
890 public synchronized static void orderAppearanceX( final PhylogenyNode n,
891 final boolean order_ext_alphabetically,
892 final DESCENDANT_SORT_PRIORITY pri ) {
893 if ( n.isExternal() ) {
897 _order_changed = false;
898 orderAppearance( n, true, order_ext_alphabetically, pri );
899 if ( !_order_changed ) {
900 orderAppearance( n, false, order_ext_alphabetically, pri );
905 public static void postorderBranchColorAveragingExternalNodeBased( final Phylogeny p ) {
906 for( final PhylogenyNodeIterator iter = p.iteratorPostorder(); iter.hasNext(); ) {
907 final PhylogenyNode node = iter.next();
912 if ( node.isInternal() ) {
913 //for( final PhylogenyNodeIterator iterator = node.iterateChildNodesForward(); iterator.hasNext(); ) {
914 for( int i = 0; i < node.getNumberOfDescendants(); ++i ) {
915 final PhylogenyNode child_node = node.getChildNode( i );
916 final Color child_color = getBranchColorValue( child_node );
917 if ( child_color != null ) {
919 red += child_color.getRed();
920 green += child_color.getGreen();
921 blue += child_color.getBlue();
924 setBranchColorValue( node,
925 new Color( ForesterUtil.roundToInt( red / n ),
926 ForesterUtil.roundToInt( green / n ),
927 ForesterUtil.roundToInt( blue / n ) ) );
932 public static final void preOrderReId( final Phylogeny phy ) {
933 if ( phy.isEmpty() ) {
936 phy.setIdToNodeMap( null );
937 long i = PhylogenyNode.getNodeCount();
938 for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) {
939 it.next().setId( i++ );
941 PhylogenyNode.setNodeCount( i );
944 public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final File file )
946 final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
947 final Phylogeny[] trees = factory.create( file, parser );
948 if ( ( trees == null ) || ( trees.length == 0 ) ) {
949 throw new PhylogenyParserException( "Unable to parse phylogeny from file: " + file );
954 public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final List<File> files )
956 final List<Phylogeny> tree_list = new ArrayList<Phylogeny>();
957 for( final File file : files ) {
958 final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
959 final Phylogeny[] trees = factory.create( file, parser );
960 if ( ( trees == null ) || ( trees.length == 0 ) ) {
961 throw new PhylogenyParserException( "Unable to parse phylogeny from file: " + file );
963 tree_list.addAll( Arrays.asList( trees ) );
965 return tree_list.toArray( new Phylogeny[ tree_list.size() ] );
968 public static void removeNode( final PhylogenyNode remove_me, final Phylogeny phylogeny ) {
969 if ( remove_me.isRoot() ) {
970 if ( remove_me.getNumberOfDescendants() == 1 ) {
971 final PhylogenyNode desc = remove_me.getDescendants().get( 0 );
972 desc.setDistanceToParent( addPhylogenyDistances( remove_me.getDistanceToParent(),
973 desc.getDistanceToParent() ) );
974 desc.setParent( null );
975 phylogeny.setRoot( desc );
976 phylogeny.clearHashIdToNodeMap();
979 throw new IllegalArgumentException( "attempt to remove a root node with more than one descendants" );
982 else if ( remove_me.isExternal() ) {
983 phylogeny.deleteSubtree( remove_me, false );
984 phylogeny.clearHashIdToNodeMap();
985 phylogeny.externalNodesHaveChanged();
988 final PhylogenyNode parent = remove_me.getParent();
989 final List<PhylogenyNode> descs = remove_me.getDescendants();
990 parent.removeChildNode( remove_me );
991 for( final PhylogenyNode desc : descs ) {
992 parent.addAsChild( desc );
993 desc.setDistanceToParent( addPhylogenyDistances( remove_me.getDistanceToParent(),
994 desc.getDistanceToParent() ) );
996 remove_me.setParent( null );
997 phylogeny.clearHashIdToNodeMap();
998 phylogeny.externalNodesHaveChanged();
1002 private static enum NDF {
1004 TaxonomyCode( "TC" ),
1005 TaxonomyCommonName( "TN" ),
1006 TaxonomyScientificName( "TS" ),
1007 TaxonomyIdentifier( "TI" ),
1008 TaxonomySynonym( "SY" ),
1009 SequenceName( "SN" ),
1011 SequenceSymbol( "SS" ),
1012 SequenceAccession( "SA" ),
1016 BinaryCharacter( "BC" ),
1017 MolecularSequence( "MS" );
1019 private final String _text;
1021 NDF( final String text ) {
1025 public static NDF fromString( final String text ) {
1026 for( final NDF n : NDF.values() ) {
1027 if ( text.startsWith( n._text ) ) {
1035 public static List<Long> searchData( final String query,
1036 final Phylogeny phy,
1037 final boolean case_sensitive,
1038 final boolean partial,
1039 final boolean regex,
1040 final boolean search_domains,
1041 final double domains_confidence_threshold ) {
1042 final List<Long> nodes = new ArrayList<Long>();
1043 if ( phy.isEmpty() || ( query == null ) ) {
1046 if ( ForesterUtil.isEmpty( query ) ) {
1049 String my_query = query;
1051 if ( ( my_query.length() > 2 ) && ( my_query.indexOf( ":" ) == 2 ) ) {
1052 ndf = NDF.fromString( my_query );
1053 if ( ndf != null ) {
1054 my_query = my_query.substring( 3 );
1057 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
1058 final PhylogenyNode node = iter.next();
1059 boolean match = false;
1060 if ( ( ( ndf == null ) || ( ndf == NDF.NodeName ) )
1061 && match( node.getName(), my_query, case_sensitive, partial, regex ) ) {
1064 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCode ) ) && node.getNodeData().isHasTaxonomy()
1065 && match( node.getNodeData().getTaxonomy().getTaxonomyCode(),
1072 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCommonName ) ) && node.getNodeData().isHasTaxonomy()
1073 && match( node.getNodeData().getTaxonomy().getCommonName(),
1080 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyScientificName ) ) && node.getNodeData().isHasTaxonomy()
1081 && match( node.getNodeData().getTaxonomy().getScientificName(),
1088 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyIdentifier ) ) && node.getNodeData().isHasTaxonomy()
1089 && ( node.getNodeData().getTaxonomy().getIdentifier() != null )
1090 && match( node.getNodeData().getTaxonomy().getIdentifier().getValue(),
1097 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomySynonym ) ) && node.getNodeData().isHasTaxonomy()
1098 && !node.getNodeData().getTaxonomy().getSynonyms().isEmpty() ) {
1099 final List<String> syns = node.getNodeData().getTaxonomy().getSynonyms();
1100 I: for( final String syn : syns ) {
1101 if ( match( syn, my_query, case_sensitive, partial, regex ) ) {
1107 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceName ) ) && node.getNodeData().isHasSequence()
1108 && match( node.getNodeData().getSequence().getName(), my_query, case_sensitive, partial, regex ) ) {
1111 if ( !match && ( ( ndf == null ) || ( ndf == NDF.GeneName ) ) && node.getNodeData().isHasSequence()
1112 && match( node.getNodeData().getSequence().getGeneName(),
1119 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceSymbol ) ) && node.getNodeData().isHasSequence()
1120 && match( node.getNodeData().getSequence().getSymbol(),
1127 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceAccession ) ) && node.getNodeData().isHasSequence()
1128 && ( node.getNodeData().getSequence().getAccession() != null )
1129 && match( node.getNodeData().getSequence().getAccession().getValue(),
1136 if ( !match && ( ( ( ndf == null ) && search_domains ) || ( ndf == NDF.Domain ) )
1137 && node.getNodeData().isHasSequence()
1138 && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) {
1139 final DomainArchitecture da = node.getNodeData().getSequence().getDomainArchitecture();
1140 I: for( int i = 0; i < da.getNumberOfDomains(); ++i ) {
1141 if ( ( da.getDomain( i ).getConfidence() <= domains_confidence_threshold )
1142 && ( match( da.getDomain( i ).getName(), my_query, case_sensitive, partial, regex ) ) ) {
1148 if ( !match && ( ( ndf == null ) || ( ndf == NDF.Annotation ) ) && node.getNodeData().isHasSequence()
1149 && ( node.getNodeData().getSequence().getAnnotations() != null ) ) {
1150 for( final Annotation ann : node.getNodeData().getSequence().getAnnotations() ) {
1151 if ( match( ann.getDesc(), my_query, case_sensitive, partial, regex ) ) {
1155 if ( match( ann.getRef(), my_query, case_sensitive, partial, regex ) ) {
1161 if ( !match && ( ( ndf == null ) || ( ndf == NDF.CrossRef ) ) && node.getNodeData().isHasSequence()
1162 && ( node.getNodeData().getSequence().getCrossReferences() != null ) ) {
1163 for( final Accession x : node.getNodeData().getSequence().getCrossReferences() ) {
1164 if ( match( x.getComment(), my_query, case_sensitive, partial, regex ) ) {
1168 if ( match( x.getSource(), my_query, case_sensitive, partial, regex ) ) {
1172 if ( match( x.getValue(), my_query, case_sensitive, partial, regex ) ) {
1178 if ( !match && ( ( ndf == null ) || ( ndf == NDF.BinaryCharacter ) )
1179 && ( node.getNodeData().getBinaryCharacters() != null ) ) {
1180 Iterator<String> it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator();
1181 I: while ( it.hasNext() ) {
1182 if ( match( it.next(), my_query, case_sensitive, partial, regex ) ) {
1187 it = node.getNodeData().getBinaryCharacters().getGainedCharacters().iterator();
1188 I: while ( it.hasNext() ) {
1189 if ( match( it.next(), my_query, case_sensitive, partial, regex ) ) {
1195 if ( !match && ( ndf == NDF.MolecularSequence ) && node.getNodeData().isHasSequence()
1196 && match( node.getNodeData().getSequence().getMolecularSequence(),
1204 nodes.add( node.getId() );
1210 public static List<Long> searchDataLogicalAnd( final String[] queries,
1211 final Phylogeny phy,
1212 final boolean case_sensitive,
1213 final boolean partial,
1214 final boolean search_domains,
1215 final double domains_confidence_threshold ) {
1216 final List<Long> nodes = new ArrayList<Long>();
1217 if ( phy.isEmpty() || ( queries == null ) || ( queries.length < 1 ) ) {
1220 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
1221 final PhylogenyNode node = iter.next();
1222 boolean all_matched = true;
1223 for( String query : queries ) {
1224 if ( query == null ) {
1227 query = query.trim();
1229 if ( ( query.length() > 2 ) && ( query.indexOf( ":" ) == 2 ) ) {
1230 ndf = NDF.fromString( query );
1231 if ( ndf != null ) {
1232 query = query.substring( 3 );
1235 boolean match = false;
1236 if ( ForesterUtil.isEmpty( query ) ) {
1239 if ( ( ( ndf == null ) || ( ndf == NDF.NodeName ) )
1240 && match( node.getName(), query, case_sensitive, partial, false ) ) {
1243 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCode ) ) && node.getNodeData().isHasTaxonomy()
1244 && match( node.getNodeData().getTaxonomy().getTaxonomyCode(),
1251 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCommonName ) ) && node.getNodeData().isHasTaxonomy()
1252 && match( node.getNodeData().getTaxonomy().getCommonName(),
1259 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyScientificName ) )
1260 && node.getNodeData().isHasTaxonomy()
1261 && match( node.getNodeData().getTaxonomy().getScientificName(),
1268 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyIdentifier ) ) && node.getNodeData().isHasTaxonomy()
1269 && ( node.getNodeData().getTaxonomy().getIdentifier() != null )
1270 && match( node.getNodeData().getTaxonomy().getIdentifier().getValue(),
1277 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomySynonym ) ) && node.getNodeData().isHasTaxonomy()
1278 && !node.getNodeData().getTaxonomy().getSynonyms().isEmpty() ) {
1279 final List<String> syns = node.getNodeData().getTaxonomy().getSynonyms();
1280 I: for( final String syn : syns ) {
1281 if ( match( syn, query, case_sensitive, partial, false ) ) {
1287 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceName ) ) && node.getNodeData().isHasSequence()
1288 && match( node.getNodeData().getSequence().getName(),
1295 if ( !match && ( ( ndf == null ) || ( ndf == NDF.GeneName ) ) && node.getNodeData().isHasSequence()
1296 && match( node.getNodeData().getSequence().getGeneName(),
1303 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceSymbol ) )
1304 && node.getNodeData().isHasSequence() && match( node.getNodeData().getSequence().getSymbol(),
1311 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceAccession ) )
1312 && node.getNodeData().isHasSequence()
1313 && ( node.getNodeData().getSequence().getAccession() != null )
1314 && match( node.getNodeData().getSequence().getAccession().getValue(),
1321 if ( !match && ( ( ( ndf == null ) && search_domains ) || ( ndf == NDF.Domain ) )
1322 && node.getNodeData().isHasSequence()
1323 && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) {
1324 final DomainArchitecture da = node.getNodeData().getSequence().getDomainArchitecture();
1325 I: for( int i = 0; i < da.getNumberOfDomains(); ++i ) {
1326 if ( ( da.getDomain( i ).getConfidence() <= domains_confidence_threshold )
1327 && match( da.getDomain( i ).getName(), query, case_sensitive, partial, false ) ) {
1333 if ( !match && ( ( ndf == null ) || ( ndf == NDF.Annotation ) ) && node.getNodeData().isHasSequence()
1334 && ( node.getNodeData().getSequence().getAnnotations() != null ) ) {
1335 for( final Annotation ann : node.getNodeData().getSequence().getAnnotations() ) {
1336 if ( match( ann.getDesc(), query, case_sensitive, partial, false ) ) {
1340 if ( match( ann.getRef(), query, case_sensitive, partial, false ) ) {
1346 if ( !match && ( ( ndf == null ) || ( ndf == NDF.CrossRef ) ) && node.getNodeData().isHasSequence()
1347 && ( node.getNodeData().getSequence().getCrossReferences() != null ) ) {
1348 for( final Accession x : node.getNodeData().getSequence().getCrossReferences() ) {
1349 if ( match( x.getComment(), query, case_sensitive, partial, false ) ) {
1353 if ( match( x.getSource(), query, case_sensitive, partial, false ) ) {
1357 if ( match( x.getValue(), query, case_sensitive, partial, false ) ) {
1363 if ( !match && ( ( ndf == null ) || ( ndf == NDF.BinaryCharacter ) )
1364 && ( node.getNodeData().getBinaryCharacters() != null ) ) {
1365 Iterator<String> it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator();
1366 I: while ( it.hasNext() ) {
1367 if ( match( it.next(), query, case_sensitive, partial, false ) ) {
1372 it = node.getNodeData().getBinaryCharacters().getGainedCharacters().iterator();
1373 I: while ( it.hasNext() ) {
1374 if ( match( it.next(), query, case_sensitive, partial, false ) ) {
1380 if ( !match && ( ndf == NDF.MolecularSequence ) && node.getNodeData().isHasSequence()
1381 && match( node.getNodeData().getSequence().getMolecularSequence(),
1389 all_matched = false;
1393 if ( all_matched ) {
1394 nodes.add( node.getId() );
1400 public static void setAllIndicatorsToZero( final Phylogeny phy ) {
1401 for( final PhylogenyNodeIterator it = phy.iteratorPostorder(); it.hasNext(); ) {
1402 it.next().setIndicator( ( byte ) 0 );
1407 * Convenience method.
1408 * Sets value for the first confidence value (created if not present, values overwritten otherwise).
1410 public static void setBootstrapConfidence( final PhylogenyNode node, final double bootstrap_confidence_value ) {
1411 setConfidence( node, bootstrap_confidence_value, "bootstrap" );
1414 public static void setBranchColorValue( final PhylogenyNode node, final Color color ) {
1415 if ( node.getBranchData().getBranchColor() == null ) {
1416 node.getBranchData().setBranchColor( new BranchColor() );
1418 node.getBranchData().getBranchColor().setValue( color );
1422 * Convenience method
1424 public static void setBranchWidthValue( final PhylogenyNode node, final double branch_width_value ) {
1425 node.getBranchData().setBranchWidth( new BranchWidth( branch_width_value ) );
1429 * Convenience method.
1430 * Sets value for the first confidence value (created if not present, values overwritten otherwise).
1432 public static void setConfidence( final PhylogenyNode node, final double confidence_value ) {
1433 setConfidence( node, confidence_value, "" );
1437 * Convenience method.
1438 * Sets value for the first confidence value (created if not present, values overwritten otherwise).
1440 public static void setConfidence( final PhylogenyNode node, final double confidence_value, final String type ) {
1441 Confidence c = null;
1442 if ( node.getBranchData().getNumberOfConfidences() > 0 ) {
1443 c = node.getBranchData().getConfidence( 0 );
1446 c = new Confidence();
1447 node.getBranchData().addConfidence( c );
1450 c.setValue( confidence_value );
1453 public static void setScientificName( final PhylogenyNode node, final String scientific_name ) {
1454 if ( !node.getNodeData().isHasTaxonomy() ) {
1455 node.getNodeData().setTaxonomy( new Taxonomy() );
1457 node.getNodeData().getTaxonomy().setScientificName( scientific_name );
1461 * Convenience method to set the taxonomy code of a phylogeny node.
1465 * @param taxonomy_code
1466 * @throws PhyloXmlDataFormatException
1468 public static void setTaxonomyCode( final PhylogenyNode node, final String taxonomy_code )
1469 throws PhyloXmlDataFormatException {
1470 if ( !node.getNodeData().isHasTaxonomy() ) {
1471 node.getNodeData().setTaxonomy( new Taxonomy() );
1473 node.getNodeData().getTaxonomy().setTaxonomyCode( taxonomy_code );
1476 final static public void sortNodeDescendents( final PhylogenyNode node, final DESCENDANT_SORT_PRIORITY pri ) {
1477 Comparator<PhylogenyNode> c;
1480 c = new PhylogenyNodeSortSequencePriority();
1483 c = new PhylogenyNodeSortNodeNamePriority();
1486 c = new PhylogenyNodeSortTaxonomyPriority();
1488 final List<PhylogenyNode> descs = node.getDescendants();
1489 Collections.sort( descs, c );
1491 for( final PhylogenyNode desc : descs ) {
1492 node.setChildNode( i++, desc );
1497 * Removes from Phylogeny to_be_stripped all external Nodes which are
1498 * associated with a species NOT found in Phylogeny reference.
1501 * a reference Phylogeny
1502 * @param to_be_stripped
1503 * Phylogeny to be stripped
1504 * @return nodes removed from to_be_stripped
1506 public static List<PhylogenyNode> taxonomyBasedDeletionOfExternalNodes( final Phylogeny reference,
1507 final Phylogeny to_be_stripped ) {
1508 final Set<String> ref_ext_taxo = new HashSet<String>();
1509 for( final PhylogenyNodeIterator it = reference.iteratorExternalForward(); it.hasNext(); ) {
1510 final PhylogenyNode n = it.next();
1511 if ( !n.getNodeData().isHasTaxonomy() ) {
1512 throw new IllegalArgumentException( "no taxonomic data in node: " + n );
1514 if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) {
1515 ref_ext_taxo.add( n.getNodeData().getTaxonomy().getScientificName() );
1517 if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) {
1518 ref_ext_taxo.add( n.getNodeData().getTaxonomy().getTaxonomyCode() );
1520 if ( ( n.getNodeData().getTaxonomy().getIdentifier() != null )
1521 && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getIdentifier().getValue() ) ) {
1522 ref_ext_taxo.add( n.getNodeData().getTaxonomy().getIdentifier().getValuePlusProvider() );
1525 final ArrayList<PhylogenyNode> nodes_to_delete = new ArrayList<PhylogenyNode>();
1526 for( final PhylogenyNodeIterator it = to_be_stripped.iteratorExternalForward(); it.hasNext(); ) {
1527 final PhylogenyNode n = it.next();
1528 if ( !n.getNodeData().isHasTaxonomy() ) {
1529 nodes_to_delete.add( n );
1531 else if ( !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getScientificName() ) )
1532 && !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getTaxonomyCode() ) )
1533 && !( ( n.getNodeData().getTaxonomy().getIdentifier() != null ) && ref_ext_taxo
1534 .contains( n.getNodeData().getTaxonomy().getIdentifier().getValuePlusProvider() ) ) ) {
1535 nodes_to_delete.add( n );
1538 for( final PhylogenyNode n : nodes_to_delete ) {
1539 to_be_stripped.deleteSubtree( n, true );
1541 to_be_stripped.clearHashIdToNodeMap();
1542 to_be_stripped.externalNodesHaveChanged();
1543 return nodes_to_delete;
1546 final static public void transferInternalNamesToBootstrapSupport( final Phylogeny phy ) {
1547 final PhylogenyNodeIterator it = phy.iteratorPostorder();
1548 while ( it.hasNext() ) {
1549 final PhylogenyNode n = it.next();
1550 if ( !n.isExternal() && !ForesterUtil.isEmpty( n.getName() ) ) {
1553 value = Double.parseDouble( n.getName() );
1555 catch ( final NumberFormatException e ) {
1556 throw new IllegalArgumentException( "failed to parse number from [" + n.getName() + "]: "
1557 + e.getLocalizedMessage() );
1559 if ( value >= 0.0 ) {
1560 n.getBranchData().addConfidence( new Confidence( value, "bootstrap" ) );
1567 final static public boolean isInternalNamesLookLikeConfidences( final Phylogeny phy ) {
1568 final PhylogenyNodeIterator it = phy.iteratorPostorder();
1569 while ( it.hasNext() ) {
1570 final PhylogenyNode n = it.next();
1571 if ( !n.isExternal() && !n.isRoot() ) {
1572 if ( !ForesterUtil.isEmpty( n.getName() ) ) {
1575 value = Double.parseDouble( n.getName() );
1577 catch ( final NumberFormatException e ) {
1580 if ( ( value < 0.0 ) || ( value > 100 ) ) {
1589 final static public void transferInternalNodeNamesToConfidence( final Phylogeny phy,
1590 final String confidence_type ) {
1591 final PhylogenyNodeIterator it = phy.iteratorPostorder();
1592 while ( it.hasNext() ) {
1593 transferInternalNodeNameToConfidence( confidence_type, it.next() );
1597 private static void transferInternalNodeNameToConfidence( final String confidence_type, final PhylogenyNode n ) {
1598 if ( !n.isExternal() && !n.getBranchData().isHasConfidences() ) {
1599 if ( !ForesterUtil.isEmpty( n.getName() ) ) {
1602 d = Double.parseDouble( n.getName() );
1604 catch ( final Exception e ) {
1608 n.getBranchData().addConfidence( new Confidence( d, confidence_type ) );
1615 final static public void transferNodeNameToField( final Phylogeny phy,
1616 final PhylogenyNodeField field,
1617 final boolean external_only )
1618 throws PhyloXmlDataFormatException {
1619 final PhylogenyNodeIterator it = phy.iteratorPostorder();
1620 while ( it.hasNext() ) {
1621 final PhylogenyNode n = it.next();
1622 if ( external_only && n.isInternal() ) {
1625 final String name = n.getName().trim();
1626 if ( !ForesterUtil.isEmpty( name ) ) {
1630 setTaxonomyCode( n, name );
1632 case TAXONOMY_SCIENTIFIC_NAME:
1634 if ( !n.getNodeData().isHasTaxonomy() ) {
1635 n.getNodeData().setTaxonomy( new Taxonomy() );
1637 n.getNodeData().getTaxonomy().setScientificName( name );
1639 case TAXONOMY_COMMON_NAME:
1641 if ( !n.getNodeData().isHasTaxonomy() ) {
1642 n.getNodeData().setTaxonomy( new Taxonomy() );
1644 n.getNodeData().getTaxonomy().setCommonName( name );
1646 case SEQUENCE_SYMBOL:
1648 if ( !n.getNodeData().isHasSequence() ) {
1649 n.getNodeData().setSequence( new Sequence() );
1651 n.getNodeData().getSequence().setSymbol( name );
1655 if ( !n.getNodeData().isHasSequence() ) {
1656 n.getNodeData().setSequence( new Sequence() );
1658 n.getNodeData().getSequence().setName( name );
1660 case TAXONOMY_ID_UNIPROT_1: {
1661 if ( !n.getNodeData().isHasTaxonomy() ) {
1662 n.getNodeData().setTaxonomy( new Taxonomy() );
1665 final int i = name.indexOf( '_' );
1667 id = name.substring( 0, i );
1672 n.getNodeData().getTaxonomy()
1673 .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) );
1676 case TAXONOMY_ID_UNIPROT_2: {
1677 if ( !n.getNodeData().isHasTaxonomy() ) {
1678 n.getNodeData().setTaxonomy( new Taxonomy() );
1681 final int i = name.indexOf( '_' );
1683 id = name.substring( i + 1, name.length() );
1688 n.getNodeData().getTaxonomy()
1689 .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) );
1693 if ( !n.getNodeData().isHasTaxonomy() ) {
1694 n.getNodeData().setTaxonomy( new Taxonomy() );
1696 n.getNodeData().getTaxonomy().setIdentifier( new Identifier( name ) );
1700 throw new IllegalArgumentException( "don't know what to do with " + field );
1707 static double addPhylogenyDistances( final double a, final double b ) {
1708 if ( ( a >= 0.0 ) && ( b >= 0.0 ) ) {
1711 else if ( a >= 0.0 ) {
1714 else if ( b >= 0.0 ) {
1717 return PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT;
1720 static double calculateDistanceToAncestor( final PhylogenyNode anc, PhylogenyNode desc ) {
1722 boolean all_default = true;
1723 while ( anc != desc ) {
1724 if ( desc.getDistanceToParent() != PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ) {
1725 d += desc.getDistanceToParent();
1726 if ( all_default ) {
1727 all_default = false;
1730 desc = desc.getParent();
1732 if ( all_default ) {
1733 return PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT;
1738 public static double calculateAverageTreeHeight( final PhylogenyNode node ) {
1739 final List<PhylogenyNode> ext = node.getAllExternalDescendants();
1741 for( PhylogenyNode n : ext ) {
1742 while ( n != node ) {
1743 if ( n.getDistanceToParent() > 0 ) {
1744 s += n.getDistanceToParent();
1749 return s / ext.size();
1753 * Deep copies the phylogeny originating from this node.
1755 static PhylogenyNode copySubTree( final PhylogenyNode source ) {
1756 if ( source == null ) {
1760 final PhylogenyNode newnode = source.copyNodeData();
1761 if ( !source.isExternal() ) {
1762 for( int i = 0; i < source.getNumberOfDescendants(); ++i ) {
1763 newnode.setChildNode( i, PhylogenyMethods.copySubTree( source.getChildNode( i ) ) );
1771 * Shallow copies the phylogeny originating from this node.
1773 static PhylogenyNode copySubTreeShallow( final PhylogenyNode source ) {
1774 if ( source == null ) {
1778 final PhylogenyNode newnode = source.copyNodeDataShallow();
1779 if ( !source.isExternal() ) {
1780 for( int i = 0; i < source.getNumberOfDescendants(); ++i ) {
1781 newnode.setChildNode( i, PhylogenyMethods.copySubTreeShallow( source.getChildNode( i ) ) );
1788 private final static List<PhylogenyNode> divideIntoSubTreesHelper( final PhylogenyNode node,
1789 final double min_distance_to_root ) {
1790 final List<PhylogenyNode> l = new ArrayList<PhylogenyNode>();
1791 final PhylogenyNode r = moveTowardsRoot( node, min_distance_to_root );
1792 for( final PhylogenyNode ext : r.getAllExternalDescendants() ) {
1793 if ( ext.getIndicator() != 0 ) {
1794 throw new RuntimeException( "this should not have happened" );
1796 ext.setIndicator( ( byte ) 1 );
1803 * Calculates the distance between PhylogenyNodes n1 and n2.
1804 * PRECONDITION: n1 is a descendant of n2.
1807 * a descendant of n2
1809 * @return distance between n1 and n2
1811 private static double getDistance( PhylogenyNode n1, final PhylogenyNode n2 ) {
1813 while ( n1 != n2 ) {
1814 if ( n1.getDistanceToParent() > 0.0 ) {
1815 d += n1.getDistanceToParent();
1817 n1 = n1.getParent();
1822 private static boolean match( final String s,
1824 final boolean case_sensitive,
1825 final boolean partial,
1826 final boolean regex ) {
1827 if ( ForesterUtil.isEmpty( s ) || ForesterUtil.isEmpty( query ) ) {
1830 String my_s = s.trim();
1831 String my_query = query.trim();
1832 if ( !case_sensitive && !regex ) {
1833 my_s = my_s.toLowerCase();
1834 my_query = my_query.toLowerCase();
1839 if ( case_sensitive ) {
1840 p = Pattern.compile( my_query );
1843 p = Pattern.compile( my_query, Pattern.CASE_INSENSITIVE );
1846 catch ( final PatternSyntaxException e ) {
1850 return p.matcher( my_s ).find();
1856 else if ( partial ) {
1857 return my_s.indexOf( my_query ) >= 0;
1862 p = Pattern.compile( "(\\b|_)" + Pattern.quote( my_query ) + "(\\b|_)" );
1864 catch ( final PatternSyntaxException e ) {
1868 return p.matcher( my_s ).find();
1876 private final static PhylogenyNode moveTowardsRoot( final PhylogenyNode node, final double min_distance_to_root ) {
1877 PhylogenyNode n = node;
1878 PhylogenyNode prev = node;
1879 while ( min_distance_to_root < n.calculateDistanceToRoot() ) {
1886 public static enum DESCENDANT_SORT_PRIORITY {
1892 public static enum PhylogenyNodeField {
1897 TAXONOMY_COMMON_NAME,
1899 TAXONOMY_ID_UNIPROT_1,
1900 TAXONOMY_ID_UNIPROT_2,
1901 TAXONOMY_SCIENTIFIC_NAME;
1904 public static void addMolecularSeqsToTree( final Phylogeny phy, final Msa msa ) {
1905 for( int s = 0; s < msa.getNumberOfSequences(); ++s ) {
1906 final org.forester.sequence.MolecularSequence seq = msa.getSequence( s );
1907 final PhylogenyNode node = phy.getNode( seq.getIdentifier() );
1908 final org.forester.phylogeny.data.Sequence new_seq = new Sequence();
1909 new_seq.setMolecularSequenceAligned( true );
1910 new_seq.setMolecularSequence( seq.getMolecularSequenceAsString() );
1911 new_seq.setName( seq.getIdentifier() );
1913 new_seq.setType( PhyloXmlUtil.SEQ_TYPE_PROTEIN );
1915 catch ( final PhyloXmlDataFormatException ignore ) {
1918 node.getNodeData().addSequence( new_seq );
1922 final private static class PhylogenyNodeSortTaxonomyPriority implements Comparator<PhylogenyNode> {
1925 public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) {
1926 if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) {
1927 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) )
1928 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) {
1929 return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase()
1930 .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() );
1932 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) )
1933 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
1934 return n1.getNodeData().getTaxonomy().getTaxonomyCode()
1935 .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() );
1938 if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) {
1939 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) )
1940 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) {
1941 return n1.getNodeData().getSequence().getName().toLowerCase()
1942 .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() );
1944 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) )
1945 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) {
1946 return n1.getNodeData().getSequence().getGeneName()
1947 .compareTo( n2.getNodeData().getSequence().getGeneName() );
1949 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) )
1950 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) {
1951 return n1.getNodeData().getSequence().getSymbol()
1952 .compareTo( n2.getNodeData().getSequence().getSymbol() );
1955 if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) {
1956 return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() );
1962 final private static class PhylogenyNodeSortSequencePriority implements Comparator<PhylogenyNode> {
1965 public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) {
1966 if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) {
1967 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) )
1968 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) {
1969 return n1.getNodeData().getSequence().getName().toLowerCase()
1970 .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() );
1972 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) )
1973 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) {
1974 return n1.getNodeData().getSequence().getGeneName()
1975 .compareTo( n2.getNodeData().getSequence().getGeneName() );
1977 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) )
1978 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) {
1979 return n1.getNodeData().getSequence().getSymbol()
1980 .compareTo( n2.getNodeData().getSequence().getSymbol() );
1983 if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) {
1984 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) )
1985 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) {
1986 return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase()
1987 .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() );
1989 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) )
1990 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
1991 return n1.getNodeData().getTaxonomy().getTaxonomyCode()
1992 .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() );
1995 if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) {
1996 return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() );
2002 final private static class PhylogenyNodeSortNodeNamePriority implements Comparator<PhylogenyNode> {
2005 public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) {
2006 if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) {
2007 return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() );
2009 if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) {
2010 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) )
2011 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) {
2012 return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase()
2013 .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() );
2015 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) )
2016 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
2017 return n1.getNodeData().getTaxonomy().getTaxonomyCode()
2018 .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() );
2021 if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) {
2022 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) )
2023 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) {
2024 return n1.getNodeData().getSequence().getName().toLowerCase()
2025 .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() );
2027 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) )
2028 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) {
2029 return n1.getNodeData().getSequence().getGeneName()
2030 .compareTo( n2.getNodeData().getSequence().getGeneName() );
2032 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) )
2033 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) {
2034 return n1.getNodeData().getSequence().getSymbol()
2035 .compareTo( n2.getNodeData().getSequence().getSymbol() );
2042 public final static Map<Long, Integer> calculateDepths( final Phylogeny phy ) {
2043 final Map<Long, Integer> depths = new HashMap<Long, Integer>();
2044 calculateDepthsHelper( phy.getRoot(), 0, depths );
2048 private final static void calculateDepthsHelper( final PhylogenyNode n, int d, final Map<Long, Integer> depths ) {
2049 depths.put( n.getId(), d );
2051 final List<PhylogenyNode> descs = n.getDescendants();
2052 for( final PhylogenyNode desc : descs ) {
2053 calculateDepthsHelper( desc, d, depths );
2057 public final static void collapseToDepth( final Phylogeny phy, final int depth ) {
2058 if ( phy.getNumberOfExternalNodes() < 3 ) {
2061 collapseToDepthHelper( phy.getRoot(), 0, depth );
2064 private final static void collapseToDepthHelper( final PhylogenyNode n, int d, final int depth ) {
2065 if ( n.isExternal() ) {
2066 n.setCollapse( false );
2070 n.setCollapse( true );
2071 final PhylogenyNodeIterator it = new PreorderTreeIterator( n );
2072 while ( it.hasNext() ) {
2073 it.next().setCollapse( true );
2077 n.setCollapse( false );
2079 final List<PhylogenyNode> descs = n.getDescendants();
2080 for( final PhylogenyNode desc : descs ) {
2081 collapseToDepthHelper( desc, d, depth );
2088 public final static void collapseToRank( final Phylogeny phy, final int rank ) {
2089 if ( phy.getNumberOfExternalNodes() < 3 ) {
2092 if ( rank < 0 || rank >= TaxonomyUtil.RANKS.length ) {
2093 throw new IllegalArgumentException( "Rank " + rank + " is out of range" );
2095 collapseToRankHelper( phy.getRoot(), rank );
2098 private final static void collapseToRankHelper( final PhylogenyNode n, final int target_rank ) {
2099 if ( n.isExternal() ) {
2100 n.setCollapse( false );
2103 if ( ( n.getNodeData().getTaxonomy() != null )
2104 && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getRank() ) ) {
2105 final String current_rank = n.getNodeData().getTaxonomy().getRank();
2106 if ( !TaxonomyUtil.RANK_TO_INT.containsKey( current_rank ) ) {
2107 System.out.println( "Don't know rank \"" + current_rank + "\", ignoring." );
2110 if ( TaxonomyUtil.RANK_TO_INT.get( current_rank ) >= target_rank ) {
2111 n.setCollapse( true );
2113 final PhylogenyNodeIterator it = new PreorderTreeIterator( n );
2114 while ( it.hasNext() ) {
2115 it.next().setCollapse( true );
2121 n.setCollapse( false );
2122 final List<PhylogenyNode> descs = n.getDescendants();
2123 for( final PhylogenyNode desc : descs ) {
2124 collapseToRankHelper( desc, target_rank );
2128 public final static PhylogenyNode getFirstExternalNode( final PhylogenyNode node ) {
2129 PhylogenyNode n = node;
2130 while ( n.isInternal() ) {
2131 n = n.getFirstChildNode();
2136 public final static PhylogenyNode getLastExternalNode( final PhylogenyNode node ) {
2137 PhylogenyNode n = node;
2138 while ( n.isInternal() ) {
2139 n = n.getLastChildNode();
2144 public final static boolean isHasCollapsedNodes( final Phylogeny phy ) {
2145 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
2146 final PhylogenyNode n = iter.next();
2147 if ( !n.isExternal() && ( n.isCollapse() ) ) {