2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
26 package org.forester.phylogeny;
28 import java.awt.Color;
30 import java.io.IOException;
31 import java.util.ArrayList;
32 import java.util.Arrays;
33 import java.util.Collections;
34 import java.util.Comparator;
35 import java.util.HashMap;
36 import java.util.HashSet;
37 import java.util.Iterator;
38 import java.util.List;
41 import java.util.regex.Matcher;
42 import java.util.regex.Pattern;
43 import java.util.regex.PatternSyntaxException;
45 import org.forester.archaeopteryx.TreePanelUtil;
46 import org.forester.io.parsers.FastaParser;
47 import org.forester.io.parsers.PhylogenyParser;
48 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
49 import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
50 import org.forester.io.parsers.util.PhylogenyParserException;
51 import org.forester.msa.Msa;
52 import org.forester.phylogeny.data.Accession;
53 import org.forester.phylogeny.data.Annotation;
54 import org.forester.phylogeny.data.BranchColor;
55 import org.forester.phylogeny.data.BranchWidth;
56 import org.forester.phylogeny.data.Confidence;
57 import org.forester.phylogeny.data.DomainArchitecture;
58 import org.forester.phylogeny.data.Event;
59 import org.forester.phylogeny.data.Identifier;
60 import org.forester.phylogeny.data.PhylogenyDataUtil;
61 import org.forester.phylogeny.data.Sequence;
62 import org.forester.phylogeny.data.Taxonomy;
63 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
64 import org.forester.phylogeny.factories.PhylogenyFactory;
65 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
66 import org.forester.phylogeny.iterators.PreorderTreeIterator;
67 import org.forester.util.BasicDescriptiveStatistics;
68 import org.forester.util.DescriptiveStatistics;
69 import org.forester.util.ForesterUtil;
70 import org.forester.util.TaxonomyUtil;
72 public class PhylogenyMethods {
74 private static boolean _order_changed;
76 private PhylogenyMethods() {
77 // Hidden constructor.
81 public Object clone() throws CloneNotSupportedException {
82 throw new CloneNotSupportedException();
85 public static boolean extractFastaInformation( final Phylogeny phy ) {
86 boolean could_extract = false;
87 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
88 final PhylogenyNode node = iter.next();
89 if ( !ForesterUtil.isEmpty( node.getName() ) ) {
90 final Matcher name_m = FastaParser.FASTA_DESC_LINE.matcher( node.getName() );
91 if ( name_m.lookingAt() ) {
93 final String acc_source = name_m.group( 1 );
94 final String acc = name_m.group( 2 );
95 final String seq_name = name_m.group( 3 );
96 final String tax_sn = name_m.group( 4 );
97 if ( !ForesterUtil.isEmpty( acc_source ) && !ForesterUtil.isEmpty( acc ) ) {
98 ForesterUtil.ensurePresenceOfSequence( node );
99 node.getNodeData().getSequence( 0 ).setAccession( new Accession( acc, acc_source ) );
101 if ( !ForesterUtil.isEmpty( seq_name ) ) {
102 ForesterUtil.ensurePresenceOfSequence( node );
103 node.getNodeData().getSequence( 0 ).setName( seq_name );
105 if ( !ForesterUtil.isEmpty( tax_sn ) ) {
106 ForesterUtil.ensurePresenceOfTaxonomy( node );
107 node.getNodeData().getTaxonomy( 0 ).setScientificName( tax_sn );
112 return could_extract;
115 public static DescriptiveStatistics calculateBranchLengthStatistics( final Phylogeny phy ) {
116 final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
117 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
118 final PhylogenyNode n = iter.next();
119 if ( !n.isRoot() && ( n.getDistanceToParent() >= 0.0 ) ) {
120 stats.addValue( n.getDistanceToParent() );
126 public static List<DescriptiveStatistics> calculateConfidenceStatistics( final Phylogeny phy ) {
127 final List<DescriptiveStatistics> stats = new ArrayList<DescriptiveStatistics>();
128 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
129 final PhylogenyNode n = iter.next();
130 if ( !n.isExternal() && !n.isRoot() ) {
131 if ( n.getBranchData().isHasConfidences() ) {
132 for( int i = 0; i < n.getBranchData().getConfidences().size(); ++i ) {
133 final Confidence c = n.getBranchData().getConfidences().get( i );
134 if ( ( i > ( stats.size() - 1 ) ) || ( stats.get( i ) == null ) ) {
135 stats.add( i, new BasicDescriptiveStatistics() );
137 if ( !ForesterUtil.isEmpty( c.getType() ) ) {
138 if ( !ForesterUtil.isEmpty( stats.get( i ).getDescription() ) ) {
139 if ( !stats.get( i ).getDescription().equalsIgnoreCase( c.getType() ) ) {
140 throw new IllegalArgumentException( "support values in node [" + n.toString()
141 + "] appear inconsistently ordered" );
144 stats.get( i ).setDescription( c.getType() );
146 stats.get( i ).addValue( ( ( c != null ) && ( c.getValue() >= 0 ) ) ? c.getValue() : 0 );
155 * Calculates the distance between PhylogenyNodes node1 and node2.
160 * @return distance between node1 and node2
162 public static double calculateDistance( final PhylogenyNode node1, final PhylogenyNode node2 ) {
163 final PhylogenyNode lca = calculateLCA( node1, node2 );
164 final PhylogenyNode n1 = node1;
165 final PhylogenyNode n2 = node2;
166 return ( PhylogenyMethods.getDistance( n1, lca ) + PhylogenyMethods.getDistance( n2, lca ) );
170 * Returns the LCA of PhylogenyNodes node1 and node2.
175 * @return LCA of node1 and node2
177 public final static PhylogenyNode calculateLCA( PhylogenyNode node1, PhylogenyNode node2 ) {
178 if ( node1 == null ) {
179 throw new IllegalArgumentException( "first argument (node) is null" );
181 if ( node2 == null ) {
182 throw new IllegalArgumentException( "second argument (node) is null" );
184 if ( node1 == node2 ) {
187 if ( ( node1.getParent() == node2.getParent() ) ) {
188 return node1.getParent();
190 int depth1 = node1.calculateDepth();
191 int depth2 = node2.calculateDepth();
192 while ( ( depth1 > -1 ) && ( depth2 > -1 ) ) {
193 if ( depth1 > depth2 ) {
194 node1 = node1.getParent();
197 else if ( depth2 > depth1 ) {
198 node2 = node2.getParent();
202 if ( node1 == node2 ) {
205 node1 = node1.getParent();
206 node2 = node2.getParent();
211 throw new IllegalArgumentException( "illegal attempt to calculate LCA of two nodes which do not share a common root" );
215 * Returns the LCA of PhylogenyNodes node1 and node2.
216 * Precondition: ids are in pre-order (or level-order).
221 * @return LCA of node1 and node2
223 public final static PhylogenyNode calculateLCAonTreeWithIdsInPreOrder( PhylogenyNode node1, PhylogenyNode node2 ) {
224 if ( node1 == null ) {
225 throw new IllegalArgumentException( "first argument (node) is null" );
227 if ( node2 == null ) {
228 throw new IllegalArgumentException( "second argument (node) is null" );
230 while ( node1 != node2 ) {
231 if ( node1.getId() > node2.getId() ) {
232 node1 = node1.getParent();
235 node2 = node2.getParent();
241 public static short calculateMaxBranchesToLeaf( final PhylogenyNode node ) {
242 if ( node.isExternal() ) {
246 for( PhylogenyNode d : node.getAllExternalDescendants() ) {
248 while ( d != node ) {
249 if ( d.isCollapse() ) {
264 public static int calculateMaxDepth( final Phylogeny phy ) {
266 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
267 final PhylogenyNode node = iter.next();
268 final int steps = node.calculateDepth();
276 public static String[] obtainPresentRanksSorted( final Phylogeny phy ) {
277 final Set<String> present_ranks = new HashSet<String>();
278 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
279 final PhylogenyNode node = iter.next();
280 if ( !node.isExternal() && !node.isRoot() && ( node.getNodeData().getTaxonomy() != null )
281 && !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getRank() ) ) {
282 final String current_rank = node.getNodeData().getTaxonomy().getRank();
283 if ( TaxonomyUtil.RANK_TO_INT.containsKey( current_rank ) ) {
284 present_ranks.add( current_rank );
288 final String ordered_ranks[] = new String[present_ranks.size() + 1];
290 for( final String rank : TaxonomyUtil.RANKS ) {
291 if ( present_ranks.contains( rank ) ) {
292 ordered_ranks[ c++ ] = rank;
295 ordered_ranks[ c ] = "off";
296 return ordered_ranks;
299 public static int calculateMaxDepthConsiderCollapsed( final Phylogeny phy ) {
301 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
302 PhylogenyNode n = iter.next();
304 while ( n.getParent() != null ) {
305 if ( !n.isCollapse() ) {
317 public static double calculateMaxDistanceToRoot( final Phylogeny phy ) {
319 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
320 final PhylogenyNode node = iter.next();
321 final double d = node.calculateDistanceToRoot();
329 public static PhylogenyNode calculateNodeWithMaxDistanceToRoot( final Phylogeny phy ) {
331 PhylogenyNode max_node = phy.getFirstExternalNode();
332 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
333 final PhylogenyNode node = iter.next();
334 final double d = node.calculateDistanceToRoot();
343 public static int calculateNumberOfExternalNodesWithoutTaxonomy( final PhylogenyNode node ) {
344 final List<PhylogenyNode> descs = node.getAllExternalDescendants();
346 for( final PhylogenyNode n : descs ) {
347 if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
354 public static DescriptiveStatistics calculateNumberOfDescendantsPerNodeStatistics( final Phylogeny phy ) {
355 final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
356 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
357 final PhylogenyNode n = iter.next();
358 if ( !n.isExternal() ) {
359 stats.addValue( n.getNumberOfDescendants() );
365 public final static void collapseSubtreeStructure( final PhylogenyNode n ) {
366 final List<PhylogenyNode> eds = n.getAllExternalDescendants();
367 final List<Double> d = new ArrayList<Double>();
368 for( final PhylogenyNode ed : eds ) {
369 d.add( calculateDistanceToAncestor( n, ed ) );
371 for( int i = 0; i < eds.size(); ++i ) {
372 n.setChildNode( i, eds.get( i ) );
373 eds.get( i ).setDistanceToParent( d.get( i ) );
377 public static int countNumberOfOneDescendantNodes( final Phylogeny phy ) {
379 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
380 final PhylogenyNode n = iter.next();
381 if ( !n.isExternal() && ( n.getNumberOfDescendants() == 1 ) ) {
388 public static int countNumberOfPolytomies( final Phylogeny phy ) {
390 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
391 final PhylogenyNode n = iter.next();
392 if ( !n.isExternal() && ( n.getNumberOfDescendants() > 2 ) ) {
399 public static final HashMap<String, PhylogenyNode> createNameToExtNodeMap( final Phylogeny phy ) {
400 final HashMap<String, PhylogenyNode> nodes = new HashMap<String, PhylogenyNode>();
401 final List<PhylogenyNode> ext = phy.getExternalNodes();
402 for( final PhylogenyNode n : ext ) {
403 nodes.put( n.getName(), n );
408 public static void deleteExternalNodesNegativeSelection( final Set<Long> to_delete, final Phylogeny phy ) {
409 for( final Long id : to_delete ) {
410 phy.deleteSubtree( phy.getNode( id ), true );
412 phy.clearHashIdToNodeMap();
413 phy.externalNodesHaveChanged();
416 public static void deleteExternalNodesNegativeSelection( final String[] node_names_to_delete, final Phylogeny p )
417 throws IllegalArgumentException {
418 for( final String element : node_names_to_delete ) {
419 if ( ForesterUtil.isEmpty( element ) ) {
422 List<PhylogenyNode> nodes = null;
423 nodes = p.getNodes( element );
424 final Iterator<PhylogenyNode> it = nodes.iterator();
425 while ( it.hasNext() ) {
426 final PhylogenyNode n = it.next();
427 if ( !n.isExternal() ) {
428 throw new IllegalArgumentException( "attempt to delete non-external node \"" + element + "\"" );
430 p.deleteSubtree( n, true );
433 p.clearHashIdToNodeMap();
434 p.externalNodesHaveChanged();
437 public static List<String> deleteExternalNodesPositiveSelection( final String[] node_names_to_keep,
438 final Phylogeny p ) {
439 final PhylogenyNodeIterator it = p.iteratorExternalForward();
440 final String[] to_delete = new String[ p.getNumberOfExternalNodes() ];
442 Arrays.sort( node_names_to_keep );
443 while ( it.hasNext() ) {
444 final String curent_name = it.next().getName();
445 if ( Arrays.binarySearch( node_names_to_keep, curent_name ) < 0 ) {
446 to_delete[ i++ ] = curent_name;
449 PhylogenyMethods.deleteExternalNodesNegativeSelection( to_delete, p );
450 final List<String> deleted = new ArrayList<String>();
451 for( final String n : to_delete ) {
452 if ( !ForesterUtil.isEmpty( n ) ) {
459 public static void deleteExternalNodesPositiveSelectionT( final List<Taxonomy> species_to_keep,
460 final Phylogeny phy ) {
461 final Set<Long> to_delete = new HashSet<Long>();
462 for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
463 final PhylogenyNode n = it.next();
464 if ( n.getNodeData().isHasTaxonomy() ) {
465 if ( !species_to_keep.contains( n.getNodeData().getTaxonomy() ) ) {
466 to_delete.add( n.getId() );
470 throw new IllegalArgumentException( "node " + n.getId() + " has no taxonomic data" );
473 deleteExternalNodesNegativeSelection( to_delete, phy );
476 final public static void deleteInternalNodesWithOnlyOneDescendent( final Phylogeny phy ) {
477 final ArrayList<PhylogenyNode> to_delete = new ArrayList<PhylogenyNode>();
478 for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) {
479 final PhylogenyNode n = iter.next();
480 if ( ( !n.isExternal() ) && ( n.getNumberOfDescendants() == 1 ) ) {
484 for( final PhylogenyNode d : to_delete ) {
485 PhylogenyMethods.removeNode( d, phy );
487 phy.clearHashIdToNodeMap();
488 phy.externalNodesHaveChanged();
491 final public static void deleteNonOrthologousExternalNodes( final Phylogeny phy, final PhylogenyNode n ) {
492 if ( n.isInternal() ) {
493 throw new IllegalArgumentException( "node is not external" );
495 final ArrayList<PhylogenyNode> to_delete = new ArrayList<PhylogenyNode>();
496 for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
497 final PhylogenyNode i = it.next();
498 if ( !PhylogenyMethods.getEventAtLCA( n, i ).isSpeciation() ) {
502 for( final PhylogenyNode d : to_delete ) {
503 phy.deleteSubtree( d, true );
505 phy.clearHashIdToNodeMap();
506 phy.externalNodesHaveChanged();
509 public final static List<List<PhylogenyNode>> divideIntoSubTrees( final Phylogeny phy,
510 final double min_distance_to_root ) {
511 if ( min_distance_to_root <= 0 ) {
512 throw new IllegalArgumentException( "attempt to use min distance to root of: " + min_distance_to_root );
514 final List<List<PhylogenyNode>> l = new ArrayList<List<PhylogenyNode>>();
515 setAllIndicatorsToZero( phy );
516 for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
517 final PhylogenyNode n = it.next();
518 if ( n.getIndicator() != 0 ) {
521 l.add( divideIntoSubTreesHelper( n, min_distance_to_root ) );
523 throw new RuntimeException( "this should not have happened" );
529 public static List<PhylogenyNode> getAllDescendants( final PhylogenyNode node ) {
530 final List<PhylogenyNode> descs = new ArrayList<PhylogenyNode>();
531 final Set<Long> encountered = new HashSet<Long>();
532 if ( !node.isExternal() ) {
533 final List<PhylogenyNode> exts = node.getAllExternalDescendants();
534 for( PhylogenyNode current : exts ) {
535 descs.add( current );
536 while ( current != node ) {
537 current = current.getParent();
538 if ( encountered.contains( current.getId() ) ) {
541 descs.add( current );
542 encountered.add( current.getId() );
556 public static Color getBranchColorValue( final PhylogenyNode node ) {
557 if ( node.getBranchData().getBranchColor() == null ) {
560 return node.getBranchData().getBranchColor().getValue();
566 public static double getBranchWidthValue( final PhylogenyNode node ) {
567 if ( !node.getBranchData().isHasBranchWidth() ) {
568 return BranchWidth.BRANCH_WIDTH_DEFAULT_VALUE;
570 return node.getBranchData().getBranchWidth().getValue();
576 public static double getConfidenceValue( final PhylogenyNode node ) {
577 if ( !node.getBranchData().isHasConfidences() ) {
578 return Confidence.CONFIDENCE_DEFAULT_VALUE;
580 return node.getBranchData().getConfidence( 0 ).getValue();
586 public static double[] getConfidenceValuesAsArray( final PhylogenyNode node ) {
587 if ( !node.getBranchData().isHasConfidences() ) {
588 return new double[ 0 ];
590 final double[] values = new double[ node.getBranchData().getConfidences().size() ];
592 for( final Confidence c : node.getBranchData().getConfidences() ) {
593 values[ i++ ] = c.getValue();
598 final public static Event getEventAtLCA( final PhylogenyNode n1, final PhylogenyNode n2 ) {
599 return calculateLCA( n1, n2 ).getNodeData().getEvent();
603 * Returns taxonomy t if all external descendants have
604 * the same taxonomy t, null otherwise.
607 public static Taxonomy getExternalDescendantsTaxonomy( final PhylogenyNode node ) {
608 final List<PhylogenyNode> descs = node.getAllExternalDescendants();
610 for( final PhylogenyNode n : descs ) {
611 if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
614 else if ( tax == null ) {
615 tax = n.getNodeData().getTaxonomy();
617 else if ( n.getNodeData().getTaxonomy().isEmpty() || !tax.isEqual( n.getNodeData().getTaxonomy() ) ) {
624 public static PhylogenyNode getFurthestDescendant( final PhylogenyNode node ) {
625 final List<PhylogenyNode> children = node.getAllExternalDescendants();
626 PhylogenyNode farthest = null;
627 double longest = -Double.MAX_VALUE;
628 for( final PhylogenyNode child : children ) {
629 if ( PhylogenyMethods.getDistance( child, node ) > longest ) {
631 longest = PhylogenyMethods.getDistance( child, node );
637 // public static PhylogenyMethods getInstance() {
638 // if ( PhylogenyMethods._instance == null ) {
639 // PhylogenyMethods._instance = new PhylogenyMethods();
641 // return PhylogenyMethods._instance;
644 * Returns the largest confidence value found on phy.
646 static public double getMaximumConfidenceValue( final Phylogeny phy ) {
647 double max = -Double.MAX_VALUE;
648 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
649 final double s = PhylogenyMethods.getConfidenceValue( iter.next() );
650 if ( ( s != Confidence.CONFIDENCE_DEFAULT_VALUE ) && ( s > max ) ) {
657 static public int getMinimumDescendentsPerInternalNodes( final Phylogeny phy ) {
658 int min = Integer.MAX_VALUE;
661 for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) {
663 if ( n.isInternal() ) {
664 d = n.getNumberOfDescendants();
674 * Convenience method for display purposes.
675 * Not intended for algorithms.
677 public static String getSpecies( final PhylogenyNode node ) {
678 if ( !node.getNodeData().isHasTaxonomy() ) {
681 else if ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getScientificName() ) ) {
682 return node.getNodeData().getTaxonomy().getScientificName();
684 if ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getTaxonomyCode() ) ) {
685 return node.getNodeData().getTaxonomy().getTaxonomyCode();
688 return node.getNodeData().getTaxonomy().getCommonName();
693 * Convenience method for display purposes.
694 * Not intended for algorithms.
696 public static String getTaxonomyIdentifier( final PhylogenyNode node ) {
697 if ( !node.getNodeData().isHasTaxonomy() || ( node.getNodeData().getTaxonomy().getIdentifier() == null ) ) {
700 return node.getNodeData().getTaxonomy().getIdentifier().getValue();
703 public final static boolean isAllDecendentsAreDuplications( final PhylogenyNode n ) {
704 if ( n.isExternal() ) {
708 if ( n.isDuplication() ) {
709 for( final PhylogenyNode desc : n.getDescendants() ) {
710 if ( !isAllDecendentsAreDuplications( desc ) ) {
722 public static boolean isHasExternalDescendant( final PhylogenyNode node ) {
723 for( int i = 0; i < node.getNumberOfDescendants(); ++i ) {
724 if ( node.getChildNode( i ).isExternal() ) {
732 * This is case insensitive.
735 public synchronized static boolean isTaxonomyHasIdentifierOfGivenProvider( final Taxonomy tax,
736 final String[] providers ) {
737 if ( ( tax.getIdentifier() != null ) && !ForesterUtil.isEmpty( tax.getIdentifier().getProvider() ) ) {
738 final String my_tax_prov = tax.getIdentifier().getProvider();
739 for( final String provider : providers ) {
740 if ( provider.equalsIgnoreCase( my_tax_prov ) ) {
751 public static void midpointRoot( final Phylogeny phylogeny ) {
752 if ( ( phylogeny.getNumberOfExternalNodes() < 2 ) || ( calculateMaxDistanceToRoot( phylogeny ) <= 0 ) ) {
756 final int total_nodes = phylogeny.getNodeCount();
758 if ( ++counter > total_nodes ) {
759 throw new RuntimeException( "this should not have happened: midpoint rooting does not converge" );
761 PhylogenyNode a = null;
764 for( int i = 0; i < phylogeny.getRoot().getNumberOfDescendants(); ++i ) {
765 final PhylogenyNode f = getFurthestDescendant( phylogeny.getRoot().getChildNode( i ) );
766 final double df = getDistance( f, phylogeny.getRoot() );
773 else if ( df > db ) {
778 final double diff = da - db;
779 if ( diff < 0.000001 ) {
782 double x = da - ( diff / 2.0 );
783 while ( ( x > a.getDistanceToParent() ) && !a.isRoot() ) {
784 x -= ( a.getDistanceToParent() > 0 ? a.getDistanceToParent() : 0 );
787 phylogeny.reRoot( a, x );
789 phylogeny.recalculateNumberOfExternalDescendants( true );
792 public static void normalizeBootstrapValues( final Phylogeny phylogeny,
793 final double max_bootstrap_value,
794 final double max_normalized_value ) {
795 for( final PhylogenyNodeIterator iter = phylogeny.iteratorPreorder(); iter.hasNext(); ) {
796 final PhylogenyNode node = iter.next();
797 if ( node.isInternal() ) {
798 final double confidence = getConfidenceValue( node );
799 if ( confidence != Confidence.CONFIDENCE_DEFAULT_VALUE ) {
800 if ( confidence >= max_bootstrap_value ) {
801 setBootstrapConfidence( node, max_normalized_value );
804 setBootstrapConfidence( node, ( confidence * max_normalized_value ) / max_bootstrap_value );
811 public static List<PhylogenyNode> obtainAllNodesAsList( final Phylogeny phy ) {
812 final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
813 if ( phy.isEmpty() ) {
816 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
817 nodes.add( iter.next() );
823 * Returns a map of distinct taxonomies of
824 * all external nodes of node.
825 * If at least one of the external nodes has no taxonomy,
829 public static Map<Taxonomy, Integer> obtainDistinctTaxonomyCounts( final PhylogenyNode node ) {
830 final List<PhylogenyNode> descs = node.getAllExternalDescendants();
831 final Map<Taxonomy, Integer> tax_map = new HashMap<Taxonomy, Integer>();
832 for( final PhylogenyNode n : descs ) {
833 if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
836 final Taxonomy t = n.getNodeData().getTaxonomy();
837 if ( tax_map.containsKey( t ) ) {
838 tax_map.put( t, tax_map.get( t ) + 1 );
848 * Arranges the order of childern for each node of this Phylogeny in such a
849 * way that either the branch with more children is on top (right) or on
850 * bottom (left), dependent on the value of boolean order.
853 * decides in which direction to order
856 public static void orderAppearance( final PhylogenyNode n,
858 final boolean order_ext_alphabetically,
859 final DESCENDANT_SORT_PRIORITY pri ) {
860 if ( n.isExternal() ) {
864 if ( ( n.getNumberOfDescendants() == 2 )
865 && ( n.getChildNode1().getNumberOfExternalNodes() != n.getChildNode2().getNumberOfExternalNodes() )
866 && ( ( n.getChildNode1().getNumberOfExternalNodes() < n.getChildNode2()
867 .getNumberOfExternalNodes() ) == order ) ) {
868 final PhylogenyNode temp = n.getChildNode1();
869 n.setChild1( n.getChildNode2() );
871 _order_changed = true;
873 else if ( order_ext_alphabetically ) {
874 boolean all_ext = true;
875 for( final PhylogenyNode i : n.getDescendants() ) {
876 if ( !i.isExternal() ) {
882 PhylogenyMethods.sortNodeDescendents( n, pri );
885 for( int i = 0; i < n.getNumberOfDescendants(); ++i ) {
886 orderAppearance( n.getChildNode( i ), order, order_ext_alphabetically, pri );
891 public synchronized static void orderAppearanceX( final PhylogenyNode n,
892 final boolean order_ext_alphabetically,
893 final DESCENDANT_SORT_PRIORITY pri ) {
894 if ( n.isExternal() ) {
898 _order_changed = false;
899 orderAppearance( n, true, order_ext_alphabetically, pri );
900 if ( !_order_changed ) {
901 orderAppearance( n, false, order_ext_alphabetically, pri );
906 public static void postorderBranchColorAveragingExternalNodeBased( final Phylogeny p ) {
907 for( final PhylogenyNodeIterator iter = p.iteratorPostorder(); iter.hasNext(); ) {
908 final PhylogenyNode node = iter.next();
913 if ( node.isInternal() ) {
914 //for( final PhylogenyNodeIterator iterator = node.iterateChildNodesForward(); iterator.hasNext(); ) {
915 for( int i = 0; i < node.getNumberOfDescendants(); ++i ) {
916 final PhylogenyNode child_node = node.getChildNode( i );
917 final Color child_color = getBranchColorValue( child_node );
918 if ( child_color != null ) {
920 red += child_color.getRed();
921 green += child_color.getGreen();
922 blue += child_color.getBlue();
925 setBranchColorValue( node,
926 new Color( ForesterUtil.roundToInt( red / n ),
927 ForesterUtil.roundToInt( green / n ),
928 ForesterUtil.roundToInt( blue / n ) ) );
933 public static final void preOrderReId( final Phylogeny phy ) {
934 if ( phy.isEmpty() ) {
937 phy.setIdToNodeMap( null );
938 long i = PhylogenyNode.getNodeCount();
939 for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) {
940 it.next().setId( i++ );
942 PhylogenyNode.setNodeCount( i );
945 public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final File file )
947 final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
948 final Phylogeny[] trees = factory.create( file, parser );
949 if ( ( trees == null ) || ( trees.length == 0 ) ) {
950 throw new PhylogenyParserException( "Unable to parse phylogeny from file: " + file );
955 public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final List<File> files )
957 final List<Phylogeny> tree_list = new ArrayList<Phylogeny>();
958 for( final File file : files ) {
959 final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
960 final Phylogeny[] trees = factory.create( file, parser );
961 if ( ( trees == null ) || ( trees.length == 0 ) ) {
962 throw new PhylogenyParserException( "Unable to parse phylogeny from file: " + file );
964 tree_list.addAll( Arrays.asList( trees ) );
966 return tree_list.toArray( new Phylogeny[ tree_list.size() ] );
969 public static void removeNode( final PhylogenyNode remove_me, final Phylogeny phylogeny ) {
970 if ( remove_me.isRoot() ) {
971 if ( remove_me.getNumberOfDescendants() == 1 ) {
972 final PhylogenyNode desc = remove_me.getDescendants().get( 0 );
973 desc.setDistanceToParent( addPhylogenyDistances( remove_me.getDistanceToParent(),
974 desc.getDistanceToParent() ) );
975 desc.setParent( null );
976 phylogeny.setRoot( desc );
977 phylogeny.clearHashIdToNodeMap();
980 throw new IllegalArgumentException( "attempt to remove a root node with more than one descendants" );
983 else if ( remove_me.isExternal() ) {
984 phylogeny.deleteSubtree( remove_me, false );
985 phylogeny.clearHashIdToNodeMap();
986 phylogeny.externalNodesHaveChanged();
989 final PhylogenyNode parent = remove_me.getParent();
990 final List<PhylogenyNode> descs = remove_me.getDescendants();
991 parent.removeChildNode( remove_me );
992 for( final PhylogenyNode desc : descs ) {
993 parent.addAsChild( desc );
994 desc.setDistanceToParent( addPhylogenyDistances( remove_me.getDistanceToParent(),
995 desc.getDistanceToParent() ) );
997 remove_me.setParent( null );
998 phylogeny.clearHashIdToNodeMap();
999 phylogeny.externalNodesHaveChanged();
1003 private static enum NDF {
1005 TaxonomyCode( "TC" ),
1006 TaxonomyCommonName( "CN" ),
1007 TaxonomyScientificName( "TS" ),
1008 TaxonomyIdentifier( "TI" ),
1009 TaxonomySynonym( "SY" ),
1010 SequenceName( "SN" ),
1012 SequenceSymbol( "SS" ),
1013 SequenceAccession( "SA" ),
1017 BinaryCharacter( "BC" ),
1018 MolecularSequence( "MS" );
1020 private final String _text;
1022 NDF( final String text ) {
1026 public static NDF fromString( final String text ) {
1027 for( final NDF n : NDF.values() ) {
1028 if ( text.startsWith( n._text ) ) {
1036 public static List<Long> searchData( final String query,
1037 final Phylogeny phy,
1038 final boolean case_sensitive,
1039 final boolean partial,
1040 final boolean regex,
1041 final boolean search_domains,
1042 final double domains_confidence_threshold ) {
1043 final List<Long> nodes = new ArrayList<Long>();
1044 if ( phy.isEmpty() || ( query == null ) ) {
1047 if ( ForesterUtil.isEmpty( query ) ) {
1050 String my_query = query;
1052 if ( ( my_query.length() > 2 ) && ( my_query.indexOf( ":" ) == 2 ) ) {
1053 ndf = NDF.fromString( my_query );
1054 if ( ndf != null ) {
1055 my_query = my_query.substring( 3 );
1058 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
1059 final PhylogenyNode node = iter.next();
1060 boolean match = false;
1061 if ( ( ( ndf == null ) || ( ndf == NDF.NodeName ) )
1062 && match( node.getName(), my_query, case_sensitive, partial, regex ) ) {
1065 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCode ) ) && node.getNodeData().isHasTaxonomy()
1066 && match( node.getNodeData().getTaxonomy().getTaxonomyCode(),
1073 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCommonName ) ) && node.getNodeData().isHasTaxonomy()
1074 && match( node.getNodeData().getTaxonomy().getCommonName(),
1081 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyScientificName ) ) && node.getNodeData().isHasTaxonomy()
1082 && match( node.getNodeData().getTaxonomy().getScientificName(),
1089 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyIdentifier ) ) && node.getNodeData().isHasTaxonomy()
1090 && ( node.getNodeData().getTaxonomy().getIdentifier() != null )
1091 && match( node.getNodeData().getTaxonomy().getIdentifier().getValue(),
1098 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomySynonym ) ) && node.getNodeData().isHasTaxonomy()
1099 && !node.getNodeData().getTaxonomy().getSynonyms().isEmpty() ) {
1100 final List<String> syns = node.getNodeData().getTaxonomy().getSynonyms();
1101 I: for( final String syn : syns ) {
1102 if ( match( syn, my_query, case_sensitive, partial, regex ) ) {
1108 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceName ) ) && node.getNodeData().isHasSequence()
1109 && match( node.getNodeData().getSequence().getName(), my_query, case_sensitive, partial, regex ) ) {
1112 if ( !match && ( ( ndf == null ) || ( ndf == NDF.GeneName ) ) && node.getNodeData().isHasSequence()
1113 && match( node.getNodeData().getSequence().getGeneName(),
1120 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceSymbol ) ) && node.getNodeData().isHasSequence()
1121 && match( node.getNodeData().getSequence().getSymbol(),
1128 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceAccession ) ) && node.getNodeData().isHasSequence()
1129 && ( node.getNodeData().getSequence().getAccession() != null )
1130 && match( node.getNodeData().getSequence().getAccession().getValue(),
1137 if ( !match && ( ( ( ndf == null ) && search_domains ) || ( ndf == NDF.Domain ) )
1138 && node.getNodeData().isHasSequence()
1139 && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) {
1140 final DomainArchitecture da = node.getNodeData().getSequence().getDomainArchitecture();
1141 I: for( int i = 0; i < da.getNumberOfDomains(); ++i ) {
1142 if ( ( da.getDomain( i ).getConfidence() <= domains_confidence_threshold )
1143 && ( match( da.getDomain( i ).getName(), my_query, case_sensitive, partial, regex ) ) ) {
1149 if ( !match && ( ( ndf == null ) || ( ndf == NDF.Annotation ) ) && node.getNodeData().isHasSequence()
1150 && ( node.getNodeData().getSequence().getAnnotations() != null ) ) {
1151 for( final Annotation ann : node.getNodeData().getSequence().getAnnotations() ) {
1152 if ( match( ann.getDesc(), my_query, case_sensitive, partial, regex ) ) {
1156 if ( match( ann.getRef(), my_query, case_sensitive, partial, regex ) ) {
1162 if ( !match && ( ( ndf == null ) || ( ndf == NDF.CrossRef ) ) && node.getNodeData().isHasSequence()
1163 && ( node.getNodeData().getSequence().getCrossReferences() != null ) ) {
1164 for( final Accession x : node.getNodeData().getSequence().getCrossReferences() ) {
1165 if ( match( x.getComment(), my_query, case_sensitive, partial, regex ) ) {
1169 if ( match( x.getSource(), my_query, case_sensitive, partial, regex ) ) {
1173 if ( match( x.getValue(), my_query, case_sensitive, partial, regex ) ) {
1179 if ( !match && ( ( ndf == null ) || ( ndf == NDF.BinaryCharacter ) )
1180 && ( node.getNodeData().getBinaryCharacters() != null ) ) {
1181 Iterator<String> it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator();
1182 I: while ( it.hasNext() ) {
1183 if ( match( it.next(), my_query, case_sensitive, partial, regex ) ) {
1188 it = node.getNodeData().getBinaryCharacters().getGainedCharacters().iterator();
1189 I: while ( it.hasNext() ) {
1190 if ( match( it.next(), my_query, case_sensitive, partial, regex ) ) {
1196 if ( !match && ( ndf == NDF.MolecularSequence ) && node.getNodeData().isHasSequence()
1197 && match( node.getNodeData().getSequence().getMolecularSequence(),
1205 nodes.add( node.getId() );
1211 public static List<Long> searchDataLogicalAnd( final String[] queries,
1212 final Phylogeny phy,
1213 final boolean case_sensitive,
1214 final boolean partial,
1215 final boolean search_domains,
1216 final double domains_confidence_threshold ) {
1217 final List<Long> nodes = new ArrayList<Long>();
1218 if ( phy.isEmpty() || ( queries == null ) || ( queries.length < 1 ) ) {
1221 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
1222 final PhylogenyNode node = iter.next();
1223 boolean all_matched = true;
1224 for( String query : queries ) {
1225 if ( query == null ) {
1228 query = query.trim();
1230 if ( ( query.length() > 2 ) && ( query.indexOf( ":" ) == 2 ) ) {
1231 ndf = NDF.fromString( query );
1232 if ( ndf != null ) {
1233 query = query.substring( 3 );
1236 boolean match = false;
1237 if ( ForesterUtil.isEmpty( query ) ) {
1240 if ( ( ( ndf == null ) || ( ndf == NDF.NodeName ) )
1241 && match( node.getName(), query, case_sensitive, partial, false ) ) {
1244 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCode ) ) && node.getNodeData().isHasTaxonomy()
1245 && match( node.getNodeData().getTaxonomy().getTaxonomyCode(),
1252 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCommonName ) ) && node.getNodeData().isHasTaxonomy()
1253 && match( node.getNodeData().getTaxonomy().getCommonName(),
1260 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyScientificName ) )
1261 && node.getNodeData().isHasTaxonomy()
1262 && match( node.getNodeData().getTaxonomy().getScientificName(),
1269 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyIdentifier ) ) && node.getNodeData().isHasTaxonomy()
1270 && ( node.getNodeData().getTaxonomy().getIdentifier() != null )
1271 && match( node.getNodeData().getTaxonomy().getIdentifier().getValue(),
1278 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomySynonym ) ) && node.getNodeData().isHasTaxonomy()
1279 && !node.getNodeData().getTaxonomy().getSynonyms().isEmpty() ) {
1280 final List<String> syns = node.getNodeData().getTaxonomy().getSynonyms();
1281 I: for( final String syn : syns ) {
1282 if ( match( syn, query, case_sensitive, partial, false ) ) {
1288 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceName ) ) && node.getNodeData().isHasSequence()
1289 && match( node.getNodeData().getSequence().getName(),
1296 if ( !match && ( ( ndf == null ) || ( ndf == NDF.GeneName ) ) && node.getNodeData().isHasSequence()
1297 && match( node.getNodeData().getSequence().getGeneName(),
1304 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceSymbol ) )
1305 && node.getNodeData().isHasSequence() && match( node.getNodeData().getSequence().getSymbol(),
1312 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceAccession ) )
1313 && node.getNodeData().isHasSequence()
1314 && ( node.getNodeData().getSequence().getAccession() != null )
1315 && match( node.getNodeData().getSequence().getAccession().getValue(),
1322 if ( !match && ( ( ( ndf == null ) && search_domains ) || ( ndf == NDF.Domain ) )
1323 && node.getNodeData().isHasSequence()
1324 && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) {
1325 final DomainArchitecture da = node.getNodeData().getSequence().getDomainArchitecture();
1326 I: for( int i = 0; i < da.getNumberOfDomains(); ++i ) {
1327 if ( ( da.getDomain( i ).getConfidence() <= domains_confidence_threshold )
1328 && match( da.getDomain( i ).getName(), query, case_sensitive, partial, false ) ) {
1334 if ( !match && ( ( ndf == null ) || ( ndf == NDF.Annotation ) ) && node.getNodeData().isHasSequence()
1335 && ( node.getNodeData().getSequence().getAnnotations() != null ) ) {
1336 for( final Annotation ann : node.getNodeData().getSequence().getAnnotations() ) {
1337 if ( match( ann.getDesc(), query, case_sensitive, partial, false ) ) {
1341 if ( match( ann.getRef(), query, case_sensitive, partial, false ) ) {
1347 if ( !match && ( ( ndf == null ) || ( ndf == NDF.CrossRef ) ) && node.getNodeData().isHasSequence()
1348 && ( node.getNodeData().getSequence().getCrossReferences() != null ) ) {
1349 for( final Accession x : node.getNodeData().getSequence().getCrossReferences() ) {
1350 if ( match( x.getComment(), query, case_sensitive, partial, false ) ) {
1354 if ( match( x.getSource(), query, case_sensitive, partial, false ) ) {
1358 if ( match( x.getValue(), query, case_sensitive, partial, false ) ) {
1364 if ( !match && ( ( ndf == null ) || ( ndf == NDF.BinaryCharacter ) )
1365 && ( node.getNodeData().getBinaryCharacters() != null ) ) {
1366 Iterator<String> it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator();
1367 I: while ( it.hasNext() ) {
1368 if ( match( it.next(), query, case_sensitive, partial, false ) ) {
1373 it = node.getNodeData().getBinaryCharacters().getGainedCharacters().iterator();
1374 I: while ( it.hasNext() ) {
1375 if ( match( it.next(), query, case_sensitive, partial, false ) ) {
1381 if ( !match && ( ndf == NDF.MolecularSequence ) && node.getNodeData().isHasSequence()
1382 && match( node.getNodeData().getSequence().getMolecularSequence(),
1390 all_matched = false;
1394 if ( all_matched ) {
1395 nodes.add( node.getId() );
1401 public static void setAllIndicatorsToZero( final Phylogeny phy ) {
1402 for( final PhylogenyNodeIterator it = phy.iteratorPostorder(); it.hasNext(); ) {
1403 it.next().setIndicator( ( byte ) 0 );
1408 * Convenience method.
1409 * Sets value for the first confidence value (created if not present, values overwritten otherwise).
1411 public static void setBootstrapConfidence( final PhylogenyNode node, final double bootstrap_confidence_value ) {
1412 setConfidence( node, bootstrap_confidence_value, "bootstrap" );
1415 public static void setBranchColorValue( final PhylogenyNode node, final Color color ) {
1416 if ( node.getBranchData().getBranchColor() == null ) {
1417 node.getBranchData().setBranchColor( new BranchColor() );
1419 node.getBranchData().getBranchColor().setValue( color );
1423 * Convenience method
1425 public static void setBranchWidthValue( final PhylogenyNode node, final double branch_width_value ) {
1426 node.getBranchData().setBranchWidth( new BranchWidth( branch_width_value ) );
1430 * Convenience method.
1431 * Sets value for the first confidence value (created if not present, values overwritten otherwise).
1433 public static void setConfidence( final PhylogenyNode node, final double confidence_value ) {
1434 setConfidence( node, confidence_value, "" );
1438 * Convenience method.
1439 * Sets value for the first confidence value (created if not present, values overwritten otherwise).
1441 public static void setConfidence( final PhylogenyNode node, final double confidence_value, final String type ) {
1442 Confidence c = null;
1443 if ( node.getBranchData().getNumberOfConfidences() > 0 ) {
1444 c = node.getBranchData().getConfidence( 0 );
1447 c = new Confidence();
1448 node.getBranchData().addConfidence( c );
1451 c.setValue( confidence_value );
1454 public static void setScientificName( final PhylogenyNode node, final String scientific_name ) {
1455 if ( !node.getNodeData().isHasTaxonomy() ) {
1456 node.getNodeData().setTaxonomy( new Taxonomy() );
1458 node.getNodeData().getTaxonomy().setScientificName( scientific_name );
1462 * Convenience method to set the taxonomy code of a phylogeny node.
1466 * @param taxonomy_code
1467 * @throws PhyloXmlDataFormatException
1469 public static void setTaxonomyCode( final PhylogenyNode node, final String taxonomy_code )
1470 throws PhyloXmlDataFormatException {
1471 if ( !node.getNodeData().isHasTaxonomy() ) {
1472 node.getNodeData().setTaxonomy( new Taxonomy() );
1474 node.getNodeData().getTaxonomy().setTaxonomyCode( taxonomy_code );
1477 final static public void sortNodeDescendents( final PhylogenyNode node, final DESCENDANT_SORT_PRIORITY pri ) {
1478 Comparator<PhylogenyNode> c;
1481 c = new PhylogenyNodeSortSequencePriority();
1484 c = new PhylogenyNodeSortNodeNamePriority();
1487 c = new PhylogenyNodeSortTaxonomyPriority();
1489 final List<PhylogenyNode> descs = node.getDescendants();
1490 Collections.sort( descs, c );
1492 for( final PhylogenyNode desc : descs ) {
1493 node.setChildNode( i++, desc );
1498 * Removes from Phylogeny to_be_stripped all external Nodes which are
1499 * associated with a species NOT found in Phylogeny reference.
1502 * a reference Phylogeny
1503 * @param to_be_stripped
1504 * Phylogeny to be stripped
1505 * @return nodes removed from to_be_stripped
1507 public static List<PhylogenyNode> taxonomyBasedDeletionOfExternalNodes( final Phylogeny reference,
1508 final Phylogeny to_be_stripped ) {
1509 final Set<String> ref_ext_taxo = new HashSet<String>();
1510 for( final PhylogenyNodeIterator it = reference.iteratorExternalForward(); it.hasNext(); ) {
1511 final PhylogenyNode n = it.next();
1512 if ( !n.getNodeData().isHasTaxonomy() ) {
1513 throw new IllegalArgumentException( "no taxonomic data in node: " + n );
1515 if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) {
1516 ref_ext_taxo.add( n.getNodeData().getTaxonomy().getScientificName() );
1518 if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) {
1519 ref_ext_taxo.add( n.getNodeData().getTaxonomy().getTaxonomyCode() );
1521 if ( ( n.getNodeData().getTaxonomy().getIdentifier() != null )
1522 && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getIdentifier().getValue() ) ) {
1523 ref_ext_taxo.add( n.getNodeData().getTaxonomy().getIdentifier().getValuePlusProvider() );
1526 final ArrayList<PhylogenyNode> nodes_to_delete = new ArrayList<PhylogenyNode>();
1527 for( final PhylogenyNodeIterator it = to_be_stripped.iteratorExternalForward(); it.hasNext(); ) {
1528 final PhylogenyNode n = it.next();
1529 if ( !n.getNodeData().isHasTaxonomy() ) {
1530 nodes_to_delete.add( n );
1532 else if ( !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getScientificName() ) )
1533 && !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getTaxonomyCode() ) )
1534 && !( ( n.getNodeData().getTaxonomy().getIdentifier() != null ) && ref_ext_taxo
1535 .contains( n.getNodeData().getTaxonomy().getIdentifier().getValuePlusProvider() ) ) ) {
1536 nodes_to_delete.add( n );
1539 for( final PhylogenyNode n : nodes_to_delete ) {
1540 to_be_stripped.deleteSubtree( n, true );
1542 to_be_stripped.clearHashIdToNodeMap();
1543 to_be_stripped.externalNodesHaveChanged();
1544 return nodes_to_delete;
1547 final static public void transferInternalNamesToBootstrapSupport( final Phylogeny phy ) {
1548 final PhylogenyNodeIterator it = phy.iteratorPostorder();
1549 while ( it.hasNext() ) {
1550 final PhylogenyNode n = it.next();
1551 if ( !n.isExternal() && !ForesterUtil.isEmpty( n.getName() ) ) {
1554 value = Double.parseDouble( n.getName() );
1556 catch ( final NumberFormatException e ) {
1557 throw new IllegalArgumentException( "failed to parse number from [" + n.getName() + "]: "
1558 + e.getLocalizedMessage() );
1560 if ( value >= 0.0 ) {
1561 n.getBranchData().addConfidence( new Confidence( value, "bootstrap" ) );
1568 final static public boolean isInternalNamesLookLikeConfidences( final Phylogeny phy ) {
1569 final PhylogenyNodeIterator it = phy.iteratorPostorder();
1570 while ( it.hasNext() ) {
1571 final PhylogenyNode n = it.next();
1572 if ( !n.isExternal() && !n.isRoot() ) {
1573 if ( !ForesterUtil.isEmpty( n.getName() ) ) {
1576 value = Double.parseDouble( n.getName() );
1578 catch ( final NumberFormatException e ) {
1581 if ( ( value < 0.0 ) || ( value > 100 ) ) {
1590 final static public void transferInternalNodeNamesToConfidence( final Phylogeny phy,
1591 final String confidence_type ) {
1592 final PhylogenyNodeIterator it = phy.iteratorPostorder();
1593 while ( it.hasNext() ) {
1594 transferInternalNodeNameToConfidence( confidence_type, it.next() );
1598 private static void transferInternalNodeNameToConfidence( final String confidence_type, final PhylogenyNode n ) {
1599 if ( !n.isExternal() && !n.getBranchData().isHasConfidences() ) {
1600 if ( !ForesterUtil.isEmpty( n.getName() ) ) {
1603 d = Double.parseDouble( n.getName() );
1605 catch ( final Exception e ) {
1609 n.getBranchData().addConfidence( new Confidence( d, confidence_type ) );
1616 final static public void transferNodeNameToField( final Phylogeny phy,
1617 final PhylogenyNodeField field,
1618 final boolean external_only )
1619 throws PhyloXmlDataFormatException {
1620 final PhylogenyNodeIterator it = phy.iteratorPostorder();
1621 while ( it.hasNext() ) {
1622 final PhylogenyNode n = it.next();
1623 if ( external_only && n.isInternal() ) {
1626 final String name = n.getName().trim();
1627 if ( !ForesterUtil.isEmpty( name ) ) {
1631 setTaxonomyCode( n, name );
1633 case TAXONOMY_SCIENTIFIC_NAME:
1635 if ( !n.getNodeData().isHasTaxonomy() ) {
1636 n.getNodeData().setTaxonomy( new Taxonomy() );
1638 n.getNodeData().getTaxonomy().setScientificName( name );
1640 case TAXONOMY_COMMON_NAME:
1642 if ( !n.getNodeData().isHasTaxonomy() ) {
1643 n.getNodeData().setTaxonomy( new Taxonomy() );
1645 n.getNodeData().getTaxonomy().setCommonName( name );
1647 case SEQUENCE_SYMBOL:
1649 if ( !n.getNodeData().isHasSequence() ) {
1650 n.getNodeData().setSequence( new Sequence() );
1652 n.getNodeData().getSequence().setSymbol( name );
1656 if ( !n.getNodeData().isHasSequence() ) {
1657 n.getNodeData().setSequence( new Sequence() );
1659 n.getNodeData().getSequence().setName( name );
1661 case TAXONOMY_ID_UNIPROT_1: {
1662 if ( !n.getNodeData().isHasTaxonomy() ) {
1663 n.getNodeData().setTaxonomy( new Taxonomy() );
1666 final int i = name.indexOf( '_' );
1668 id = name.substring( 0, i );
1673 n.getNodeData().getTaxonomy()
1674 .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) );
1677 case TAXONOMY_ID_UNIPROT_2: {
1678 if ( !n.getNodeData().isHasTaxonomy() ) {
1679 n.getNodeData().setTaxonomy( new Taxonomy() );
1682 final int i = name.indexOf( '_' );
1684 id = name.substring( i + 1, name.length() );
1689 n.getNodeData().getTaxonomy()
1690 .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) );
1694 if ( !n.getNodeData().isHasTaxonomy() ) {
1695 n.getNodeData().setTaxonomy( new Taxonomy() );
1697 n.getNodeData().getTaxonomy().setIdentifier( new Identifier( name ) );
1701 throw new IllegalArgumentException( "don't know what to do with " + field );
1708 static double addPhylogenyDistances( final double a, final double b ) {
1709 if ( ( a >= 0.0 ) && ( b >= 0.0 ) ) {
1712 else if ( a >= 0.0 ) {
1715 else if ( b >= 0.0 ) {
1718 return PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT;
1721 static double calculateDistanceToAncestor( final PhylogenyNode anc, PhylogenyNode desc ) {
1723 boolean all_default = true;
1724 while ( anc != desc ) {
1725 if ( desc.getDistanceToParent() != PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ) {
1726 d += desc.getDistanceToParent();
1727 if ( all_default ) {
1728 all_default = false;
1731 desc = desc.getParent();
1733 if ( all_default ) {
1734 return PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT;
1739 public static double calculateAverageTreeHeight( final PhylogenyNode node ) {
1740 final List<PhylogenyNode> ext = node.getAllExternalDescendants();
1742 for( PhylogenyNode n : ext ) {
1743 while ( n != node ) {
1744 if ( n.getDistanceToParent() > 0 ) {
1745 s += n.getDistanceToParent();
1750 return s / ext.size();
1754 * Deep copies the phylogeny originating from this node.
1756 static PhylogenyNode copySubTree( final PhylogenyNode source ) {
1757 if ( source == null ) {
1761 final PhylogenyNode newnode = source.copyNodeData();
1762 if ( !source.isExternal() ) {
1763 for( int i = 0; i < source.getNumberOfDescendants(); ++i ) {
1764 newnode.setChildNode( i, PhylogenyMethods.copySubTree( source.getChildNode( i ) ) );
1772 * Shallow copies the phylogeny originating from this node.
1774 static PhylogenyNode copySubTreeShallow( final PhylogenyNode source ) {
1775 if ( source == null ) {
1779 final PhylogenyNode newnode = source.copyNodeDataShallow();
1780 if ( !source.isExternal() ) {
1781 for( int i = 0; i < source.getNumberOfDescendants(); ++i ) {
1782 newnode.setChildNode( i, PhylogenyMethods.copySubTreeShallow( source.getChildNode( i ) ) );
1789 private final static List<PhylogenyNode> divideIntoSubTreesHelper( final PhylogenyNode node,
1790 final double min_distance_to_root ) {
1791 final List<PhylogenyNode> l = new ArrayList<PhylogenyNode>();
1792 final PhylogenyNode r = moveTowardsRoot( node, min_distance_to_root );
1793 for( final PhylogenyNode ext : r.getAllExternalDescendants() ) {
1794 if ( ext.getIndicator() != 0 ) {
1795 throw new RuntimeException( "this should not have happened" );
1797 ext.setIndicator( ( byte ) 1 );
1804 * Calculates the distance between PhylogenyNodes n1 and n2.
1805 * PRECONDITION: n1 is a descendant of n2.
1808 * a descendant of n2
1810 * @return distance between n1 and n2
1812 private static double getDistance( PhylogenyNode n1, final PhylogenyNode n2 ) {
1814 while ( n1 != n2 ) {
1815 if ( n1.getDistanceToParent() > 0.0 ) {
1816 d += n1.getDistanceToParent();
1818 n1 = n1.getParent();
1823 private static boolean match( final String s,
1825 final boolean case_sensitive,
1826 final boolean partial,
1827 final boolean regex ) {
1828 if ( ForesterUtil.isEmpty( s ) || ForesterUtil.isEmpty( query ) ) {
1831 String my_s = s.trim();
1832 String my_query = query.trim();
1833 if ( !case_sensitive && !regex ) {
1834 my_s = my_s.toLowerCase();
1835 my_query = my_query.toLowerCase();
1840 if ( case_sensitive ) {
1841 p = Pattern.compile( my_query );
1844 p = Pattern.compile( my_query, Pattern.CASE_INSENSITIVE );
1847 catch ( final PatternSyntaxException e ) {
1851 return p.matcher( my_s ).find();
1857 else if ( partial ) {
1858 return my_s.indexOf( my_query ) >= 0;
1863 p = Pattern.compile( "(\\b|_)" + Pattern.quote( my_query ) + "(\\b|_)" );
1865 catch ( final PatternSyntaxException e ) {
1869 return p.matcher( my_s ).find();
1877 private final static PhylogenyNode moveTowardsRoot( final PhylogenyNode node, final double min_distance_to_root ) {
1878 PhylogenyNode n = node;
1879 PhylogenyNode prev = node;
1880 while ( min_distance_to_root < n.calculateDistanceToRoot() ) {
1887 public static enum DESCENDANT_SORT_PRIORITY {
1893 public static enum PhylogenyNodeField {
1898 TAXONOMY_COMMON_NAME,
1900 TAXONOMY_ID_UNIPROT_1,
1901 TAXONOMY_ID_UNIPROT_2,
1902 TAXONOMY_SCIENTIFIC_NAME;
1905 public static void addMolecularSeqsToTree( final Phylogeny phy, final Msa msa ) {
1906 for( int s = 0; s < msa.getNumberOfSequences(); ++s ) {
1907 final org.forester.sequence.MolecularSequence seq = msa.getSequence( s );
1908 final PhylogenyNode node = phy.getNode( seq.getIdentifier() );
1909 final org.forester.phylogeny.data.Sequence new_seq = new Sequence();
1910 new_seq.setMolecularSequenceAligned( true );
1911 new_seq.setMolecularSequence( seq.getMolecularSequenceAsString() );
1912 new_seq.setName( seq.getIdentifier() );
1914 new_seq.setType( PhyloXmlUtil.SEQ_TYPE_PROTEIN );
1916 catch ( final PhyloXmlDataFormatException ignore ) {
1919 node.getNodeData().addSequence( new_seq );
1923 final private static class PhylogenyNodeSortTaxonomyPriority implements Comparator<PhylogenyNode> {
1926 public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) {
1927 if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) {
1928 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) )
1929 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) {
1930 return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase()
1931 .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() );
1933 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) )
1934 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
1935 return n1.getNodeData().getTaxonomy().getTaxonomyCode()
1936 .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() );
1939 if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) {
1940 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) )
1941 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) {
1942 return n1.getNodeData().getSequence().getName().toLowerCase()
1943 .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() );
1945 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) )
1946 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) {
1947 return n1.getNodeData().getSequence().getGeneName()
1948 .compareTo( n2.getNodeData().getSequence().getGeneName() );
1950 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) )
1951 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) {
1952 return n1.getNodeData().getSequence().getSymbol()
1953 .compareTo( n2.getNodeData().getSequence().getSymbol() );
1956 if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) {
1957 return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() );
1963 final private static class PhylogenyNodeSortSequencePriority implements Comparator<PhylogenyNode> {
1966 public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) {
1967 if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) {
1968 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) )
1969 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) {
1970 return n1.getNodeData().getSequence().getName().toLowerCase()
1971 .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() );
1973 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) )
1974 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) {
1975 return n1.getNodeData().getSequence().getGeneName()
1976 .compareTo( n2.getNodeData().getSequence().getGeneName() );
1978 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) )
1979 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) {
1980 return n1.getNodeData().getSequence().getSymbol()
1981 .compareTo( n2.getNodeData().getSequence().getSymbol() );
1984 if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) {
1985 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) )
1986 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) {
1987 return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase()
1988 .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() );
1990 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) )
1991 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
1992 return n1.getNodeData().getTaxonomy().getTaxonomyCode()
1993 .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() );
1996 if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) {
1997 return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() );
2003 final private static class PhylogenyNodeSortNodeNamePriority implements Comparator<PhylogenyNode> {
2006 public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) {
2007 if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) {
2008 return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() );
2010 if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) {
2011 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) )
2012 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) {
2013 return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase()
2014 .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() );
2016 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) )
2017 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
2018 return n1.getNodeData().getTaxonomy().getTaxonomyCode()
2019 .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() );
2022 if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) {
2023 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) )
2024 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) {
2025 return n1.getNodeData().getSequence().getName().toLowerCase()
2026 .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() );
2028 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) )
2029 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) {
2030 return n1.getNodeData().getSequence().getGeneName()
2031 .compareTo( n2.getNodeData().getSequence().getGeneName() );
2033 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) )
2034 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) {
2035 return n1.getNodeData().getSequence().getSymbol()
2036 .compareTo( n2.getNodeData().getSequence().getSymbol() );
2043 public final static Map<Long, Integer> calculateDepths( final Phylogeny phy ) {
2044 final Map<Long, Integer> depths = new HashMap<Long, Integer>();
2045 calculateDepthsHelper( phy.getRoot(), 0, depths );
2049 private final static void calculateDepthsHelper( final PhylogenyNode n, int d, final Map<Long, Integer> depths ) {
2050 depths.put( n.getId(), d );
2052 final List<PhylogenyNode> descs = n.getDescendants();
2053 for( final PhylogenyNode desc : descs ) {
2054 calculateDepthsHelper( desc, d, depths );
2058 public final static void collapseToDepth( final Phylogeny phy, final int depth ) {
2059 if ( phy.getNumberOfExternalNodes() < 3 ) {
2062 collapseToDepthHelper( phy.getRoot(), 0, depth );
2065 private final static void collapseToDepthHelper( final PhylogenyNode n, int d, final int depth ) {
2066 if ( n.isExternal() ) {
2067 n.setCollapse( false );
2071 n.setCollapse( true );
2072 final PhylogenyNodeIterator it = new PreorderTreeIterator( n );
2073 while ( it.hasNext() ) {
2074 it.next().setCollapse( true );
2078 n.setCollapse( false );
2080 final List<PhylogenyNode> descs = n.getDescendants();
2081 for( final PhylogenyNode desc : descs ) {
2082 collapseToDepthHelper( desc, d, depth );
2089 public final static void collapseToRank( final Phylogeny phy, final int rank ) {
2090 if ( phy.getNumberOfExternalNodes() < 3 ) {
2093 if ( rank < 0 || rank >= TaxonomyUtil.RANKS.length ) {
2094 throw new IllegalArgumentException( "Rank " + rank + " is out of range" );
2096 collapseToRankHelper( phy.getRoot(), rank );
2099 private final static void collapseToRankHelper( final PhylogenyNode n, final int target_rank ) {
2100 if ( n.isExternal() ) {
2101 n.setCollapse( false );
2104 if ( ( n.getNodeData().getTaxonomy() != null )
2105 && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getRank() ) ) {
2106 final String current_rank = n.getNodeData().getTaxonomy().getRank();
2107 if ( !TaxonomyUtil.RANK_TO_INT.containsKey( current_rank ) ) {
2108 System.out.println( "Don't know rank \"" + current_rank + "\", ignoring." );
2111 if ( TaxonomyUtil.RANK_TO_INT.get( current_rank ) >= target_rank ) {
2112 n.setCollapse( true );
2114 final PhylogenyNodeIterator it = new PreorderTreeIterator( n );
2115 while ( it.hasNext() ) {
2116 it.next().setCollapse( true );
2122 n.setCollapse( false );
2123 final List<PhylogenyNode> descs = n.getDescendants();
2124 for( final PhylogenyNode desc : descs ) {
2125 collapseToRankHelper( desc, target_rank );