2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
26 package org.forester.phylogeny;
28 import java.awt.Color;
30 import java.io.IOException;
31 import java.util.ArrayList;
32 import java.util.Arrays;
33 import java.util.Collections;
34 import java.util.Comparator;
35 import java.util.HashMap;
36 import java.util.HashSet;
37 import java.util.Iterator;
38 import java.util.List;
41 import java.util.regex.Matcher;
42 import java.util.regex.Pattern;
43 import java.util.regex.PatternSyntaxException;
45 import org.forester.io.parsers.FastaParser;
46 import org.forester.io.parsers.PhylogenyParser;
47 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
48 import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
49 import org.forester.io.parsers.util.PhylogenyParserException;
50 import org.forester.msa.Msa;
51 import org.forester.phylogeny.data.Accession;
52 import org.forester.phylogeny.data.Annotation;
53 import org.forester.phylogeny.data.BranchColor;
54 import org.forester.phylogeny.data.BranchWidth;
55 import org.forester.phylogeny.data.Confidence;
56 import org.forester.phylogeny.data.DomainArchitecture;
57 import org.forester.phylogeny.data.Event;
58 import org.forester.phylogeny.data.Identifier;
59 import org.forester.phylogeny.data.PhylogenyDataUtil;
60 import org.forester.phylogeny.data.Sequence;
61 import org.forester.phylogeny.data.Taxonomy;
62 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
63 import org.forester.phylogeny.factories.PhylogenyFactory;
64 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
65 import org.forester.util.BasicDescriptiveStatistics;
66 import org.forester.util.DescriptiveStatistics;
67 import org.forester.util.ForesterUtil;
69 public class PhylogenyMethods {
71 private PhylogenyMethods() {
72 // Hidden constructor.
76 public Object clone() throws CloneNotSupportedException {
77 throw new CloneNotSupportedException();
80 public static boolean extractFastaInformation( final Phylogeny phy ) {
81 boolean could_extract = false;
82 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
83 final PhylogenyNode node = iter.next();
84 if ( !ForesterUtil.isEmpty( node.getName() ) ) {
85 final Matcher name_m = FastaParser.FASTA_DESC_LINE.matcher( node.getName() );
86 if ( name_m.lookingAt() ) {
88 final String acc_source = name_m.group( 1 );
89 final String acc = name_m.group( 2 );
90 final String seq_name = name_m.group( 3 );
91 final String tax_sn = name_m.group( 4 );
92 if ( !ForesterUtil.isEmpty( acc_source ) && !ForesterUtil.isEmpty( acc ) ) {
93 ForesterUtil.ensurePresenceOfSequence( node );
94 node.getNodeData().getSequence( 0 ).setAccession( new Accession( acc, acc_source ) );
96 if ( !ForesterUtil.isEmpty( seq_name ) ) {
97 ForesterUtil.ensurePresenceOfSequence( node );
98 node.getNodeData().getSequence( 0 ).setName( seq_name );
100 if ( !ForesterUtil.isEmpty( tax_sn ) ) {
101 ForesterUtil.ensurePresenceOfTaxonomy( node );
102 node.getNodeData().getTaxonomy( 0 ).setScientificName( tax_sn );
107 return could_extract;
110 public static DescriptiveStatistics calculateBranchLengthStatistics( final Phylogeny phy ) {
111 final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
112 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
113 final PhylogenyNode n = iter.next();
114 if ( !n.isRoot() && ( n.getDistanceToParent() >= 0.0 ) ) {
115 stats.addValue( n.getDistanceToParent() );
121 public static List<DescriptiveStatistics> calculateConfidenceStatistics( final Phylogeny phy ) {
122 final List<DescriptiveStatistics> stats = new ArrayList<DescriptiveStatistics>();
123 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
124 final PhylogenyNode n = iter.next();
125 if ( !n.isExternal() && !n.isRoot() ) {
126 if ( n.getBranchData().isHasConfidences() ) {
127 for( int i = 0; i < n.getBranchData().getConfidences().size(); ++i ) {
128 final Confidence c = n.getBranchData().getConfidences().get( i );
129 if ( ( i > ( stats.size() - 1 ) ) || ( stats.get( i ) == null ) ) {
130 stats.add( i, new BasicDescriptiveStatistics() );
132 if ( !ForesterUtil.isEmpty( c.getType() ) ) {
133 if ( !ForesterUtil.isEmpty( stats.get( i ).getDescription() ) ) {
134 if ( !stats.get( i ).getDescription().equalsIgnoreCase( c.getType() ) ) {
135 throw new IllegalArgumentException( "support values in node [" + n.toString()
136 + "] appear inconsistently ordered" );
139 stats.get( i ).setDescription( c.getType() );
141 stats.get( i ).addValue( ( ( c != null ) && ( c.getValue() >= 0 ) ) ? c.getValue() : 0 );
150 * Calculates the distance between PhylogenyNodes node1 and node2.
155 * @return distance between node1 and node2
157 public static double calculateDistance( final PhylogenyNode node1, final PhylogenyNode node2 ) {
158 final PhylogenyNode lca = calculateLCA( node1, node2 );
159 final PhylogenyNode n1 = node1;
160 final PhylogenyNode n2 = node2;
161 return ( PhylogenyMethods.getDistance( n1, lca ) + PhylogenyMethods.getDistance( n2, lca ) );
165 * Returns the LCA of PhylogenyNodes node1 and node2.
170 * @return LCA of node1 and node2
172 public final static PhylogenyNode calculateLCA( PhylogenyNode node1, PhylogenyNode node2 ) {
173 if ( node1 == null ) {
174 throw new IllegalArgumentException( "first argument (node) is null" );
176 if ( node2 == null ) {
177 throw new IllegalArgumentException( "second argument (node) is null" );
179 if ( node1 == node2 ) {
182 if ( ( node1.getParent() == node2.getParent() ) ) {
183 return node1.getParent();
185 int depth1 = node1.calculateDepth();
186 int depth2 = node2.calculateDepth();
187 while ( ( depth1 > -1 ) && ( depth2 > -1 ) ) {
188 if ( depth1 > depth2 ) {
189 node1 = node1.getParent();
192 else if ( depth2 > depth1 ) {
193 node2 = node2.getParent();
197 if ( node1 == node2 ) {
200 node1 = node1.getParent();
201 node2 = node2.getParent();
206 throw new IllegalArgumentException( "illegal attempt to calculate LCA of two nodes which do not share a common root" );
210 * Returns the LCA of PhylogenyNodes node1 and node2.
211 * Precondition: ids are in pre-order (or level-order).
216 * @return LCA of node1 and node2
218 public final static PhylogenyNode calculateLCAonTreeWithIdsInPreOrder( PhylogenyNode node1, PhylogenyNode node2 ) {
219 if ( node1 == null ) {
220 throw new IllegalArgumentException( "first argument (node) is null" );
222 if ( node2 == null ) {
223 throw new IllegalArgumentException( "second argument (node) is null" );
225 while ( node1 != node2 ) {
226 if ( node1.getId() > node2.getId() ) {
227 node1 = node1.getParent();
230 node2 = node2.getParent();
236 public static short calculateMaxBranchesToLeaf( final PhylogenyNode node ) {
237 if ( node.isExternal() ) {
241 for( PhylogenyNode d : node.getAllExternalDescendants() ) {
243 while ( d != node ) {
244 if ( d.isCollapse() ) {
259 public static int calculateMaxDepth( final Phylogeny phy ) {
261 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
262 final PhylogenyNode node = iter.next();
263 final int steps = node.calculateDepth();
271 public static double calculateMaxDistanceToRoot( final Phylogeny phy ) {
273 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
274 final PhylogenyNode node = iter.next();
275 final double d = node.calculateDistanceToRoot();
283 public static PhylogenyNode calculateNodeWithMaxDistanceToRoot( final Phylogeny phy ) {
285 PhylogenyNode max_node = phy.getFirstExternalNode();
286 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
287 final PhylogenyNode node = iter.next();
288 final double d = node.calculateDistanceToRoot();
297 public static int calculateNumberOfExternalNodesWithoutTaxonomy( final PhylogenyNode node ) {
298 final List<PhylogenyNode> descs = node.getAllExternalDescendants();
300 for( final PhylogenyNode n : descs ) {
301 if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
308 public static DescriptiveStatistics calculateNumberOfDescendantsPerNodeStatistics( final Phylogeny phy ) {
309 final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
310 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
311 final PhylogenyNode n = iter.next();
312 if ( !n.isExternal() ) {
313 stats.addValue( n.getNumberOfDescendants() );
319 public final static void collapseSubtreeStructure( final PhylogenyNode n ) {
320 final List<PhylogenyNode> eds = n.getAllExternalDescendants();
321 final List<Double> d = new ArrayList<Double>();
322 for( final PhylogenyNode ed : eds ) {
323 d.add( calculateDistanceToAncestor( n, ed ) );
325 for( int i = 0; i < eds.size(); ++i ) {
326 n.setChildNode( i, eds.get( i ) );
327 eds.get( i ).setDistanceToParent( d.get( i ) );
331 public static int countNumberOfOneDescendantNodes( final Phylogeny phy ) {
333 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
334 final PhylogenyNode n = iter.next();
335 if ( !n.isExternal() && ( n.getNumberOfDescendants() == 1 ) ) {
342 public static int countNumberOfPolytomies( final Phylogeny phy ) {
344 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
345 final PhylogenyNode n = iter.next();
346 if ( !n.isExternal() && ( n.getNumberOfDescendants() > 2 ) ) {
353 public static final HashMap<String, PhylogenyNode> createNameToExtNodeMap( final Phylogeny phy ) {
354 final HashMap<String, PhylogenyNode> nodes = new HashMap<String, PhylogenyNode>();
355 final List<PhylogenyNode> ext = phy.getExternalNodes();
356 for( final PhylogenyNode n : ext ) {
357 nodes.put( n.getName(), n );
362 public static void deleteExternalNodesNegativeSelection( final Set<Long> to_delete, final Phylogeny phy ) {
363 for( final Long id : to_delete ) {
364 phy.deleteSubtree( phy.getNode( id ), true );
366 phy.clearHashIdToNodeMap();
367 phy.externalNodesHaveChanged();
370 public static void deleteExternalNodesNegativeSelection( final String[] node_names_to_delete, final Phylogeny p )
371 throws IllegalArgumentException {
372 for( final String element : node_names_to_delete ) {
373 if ( ForesterUtil.isEmpty( element ) ) {
376 List<PhylogenyNode> nodes = null;
377 nodes = p.getNodes( element );
378 final Iterator<PhylogenyNode> it = nodes.iterator();
379 while ( it.hasNext() ) {
380 final PhylogenyNode n = it.next();
381 if ( !n.isExternal() ) {
382 throw new IllegalArgumentException( "attempt to delete non-external node \"" + element + "\"" );
384 p.deleteSubtree( n, true );
387 p.clearHashIdToNodeMap();
388 p.externalNodesHaveChanged();
391 public static List<String> deleteExternalNodesPositiveSelection( final String[] node_names_to_keep,
392 final Phylogeny p ) {
393 final PhylogenyNodeIterator it = p.iteratorExternalForward();
394 final String[] to_delete = new String[ p.getNumberOfExternalNodes() ];
396 Arrays.sort( node_names_to_keep );
397 while ( it.hasNext() ) {
398 final String curent_name = it.next().getName();
399 if ( Arrays.binarySearch( node_names_to_keep, curent_name ) < 0 ) {
400 to_delete[ i++ ] = curent_name;
403 PhylogenyMethods.deleteExternalNodesNegativeSelection( to_delete, p );
404 final List<String> deleted = new ArrayList<String>();
405 for( final String n : to_delete ) {
406 if ( !ForesterUtil.isEmpty( n ) ) {
413 public static void deleteExternalNodesPositiveSelectionT( final List<Taxonomy> species_to_keep, final Phylogeny phy ) {
414 final Set<Long> to_delete = new HashSet<Long>();
415 for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
416 final PhylogenyNode n = it.next();
417 if ( n.getNodeData().isHasTaxonomy() ) {
418 if ( !species_to_keep.contains( n.getNodeData().getTaxonomy() ) ) {
419 to_delete.add( n.getId() );
423 throw new IllegalArgumentException( "node " + n.getId() + " has no taxonomic data" );
426 deleteExternalNodesNegativeSelection( to_delete, phy );
429 final public static void deleteInternalNodesWithOnlyOneDescendent( final Phylogeny phy ) {
430 final ArrayList<PhylogenyNode> to_delete = new ArrayList<PhylogenyNode>();
431 for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) {
432 final PhylogenyNode n = iter.next();
433 if ( ( !n.isExternal() ) && ( n.getNumberOfDescendants() == 1 ) ) {
437 for( final PhylogenyNode d : to_delete ) {
438 PhylogenyMethods.removeNode( d, phy );
440 phy.clearHashIdToNodeMap();
441 phy.externalNodesHaveChanged();
444 final public static void deleteNonOrthologousExternalNodes( final Phylogeny phy, final PhylogenyNode n ) {
445 if ( n.isInternal() ) {
446 throw new IllegalArgumentException( "node is not external" );
448 final ArrayList<PhylogenyNode> to_delete = new ArrayList<PhylogenyNode>();
449 for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
450 final PhylogenyNode i = it.next();
451 if ( !PhylogenyMethods.getEventAtLCA( n, i ).isSpeciation() ) {
455 for( final PhylogenyNode d : to_delete ) {
456 phy.deleteSubtree( d, true );
458 phy.clearHashIdToNodeMap();
459 phy.externalNodesHaveChanged();
462 public final static List<List<PhylogenyNode>> divideIntoSubTrees( final Phylogeny phy,
463 final double min_distance_to_root ) {
464 if ( min_distance_to_root <= 0 ) {
465 throw new IllegalArgumentException( "attempt to use min distance to root of: " + min_distance_to_root );
467 final List<List<PhylogenyNode>> l = new ArrayList<List<PhylogenyNode>>();
468 setAllIndicatorsToZero( phy );
469 for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
470 final PhylogenyNode n = it.next();
471 if ( n.getIndicator() != 0 ) {
474 l.add( divideIntoSubTreesHelper( n, min_distance_to_root ) );
476 throw new RuntimeException( "this should not have happened" );
482 public static List<PhylogenyNode> getAllDescendants( final PhylogenyNode node ) {
483 final List<PhylogenyNode> descs = new ArrayList<PhylogenyNode>();
484 final Set<Long> encountered = new HashSet<Long>();
485 if ( !node.isExternal() ) {
486 final List<PhylogenyNode> exts = node.getAllExternalDescendants();
487 for( PhylogenyNode current : exts ) {
488 descs.add( current );
489 while ( current != node ) {
490 current = current.getParent();
491 if ( encountered.contains( current.getId() ) ) {
494 descs.add( current );
495 encountered.add( current.getId() );
509 public static Color getBranchColorValue( final PhylogenyNode node ) {
510 if ( node.getBranchData().getBranchColor() == null ) {
513 return node.getBranchData().getBranchColor().getValue();
519 public static double getBranchWidthValue( final PhylogenyNode node ) {
520 if ( !node.getBranchData().isHasBranchWidth() ) {
521 return BranchWidth.BRANCH_WIDTH_DEFAULT_VALUE;
523 return node.getBranchData().getBranchWidth().getValue();
529 public static double getConfidenceValue( final PhylogenyNode node ) {
530 if ( !node.getBranchData().isHasConfidences() ) {
531 return Confidence.CONFIDENCE_DEFAULT_VALUE;
533 return node.getBranchData().getConfidence( 0 ).getValue();
539 public static double[] getConfidenceValuesAsArray( final PhylogenyNode node ) {
540 if ( !node.getBranchData().isHasConfidences() ) {
541 return new double[ 0 ];
543 final double[] values = new double[ node.getBranchData().getConfidences().size() ];
545 for( final Confidence c : node.getBranchData().getConfidences() ) {
546 values[ i++ ] = c.getValue();
551 final public static Event getEventAtLCA( final PhylogenyNode n1, final PhylogenyNode n2 ) {
552 return calculateLCA( n1, n2 ).getNodeData().getEvent();
556 * Returns taxonomy t if all external descendants have
557 * the same taxonomy t, null otherwise.
560 public static Taxonomy getExternalDescendantsTaxonomy( final PhylogenyNode node ) {
561 final List<PhylogenyNode> descs = node.getAllExternalDescendants();
563 for( final PhylogenyNode n : descs ) {
564 if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
567 else if ( tax == null ) {
568 tax = n.getNodeData().getTaxonomy();
570 else if ( n.getNodeData().getTaxonomy().isEmpty() || !tax.isEqual( n.getNodeData().getTaxonomy() ) ) {
577 public static PhylogenyNode getFurthestDescendant( final PhylogenyNode node ) {
578 final List<PhylogenyNode> children = node.getAllExternalDescendants();
579 PhylogenyNode farthest = null;
580 double longest = -Double.MAX_VALUE;
581 for( final PhylogenyNode child : children ) {
582 if ( PhylogenyMethods.getDistance( child, node ) > longest ) {
584 longest = PhylogenyMethods.getDistance( child, node );
590 // public static PhylogenyMethods getInstance() {
591 // if ( PhylogenyMethods._instance == null ) {
592 // PhylogenyMethods._instance = new PhylogenyMethods();
594 // return PhylogenyMethods._instance;
597 * Returns the largest confidence value found on phy.
599 static public double getMaximumConfidenceValue( final Phylogeny phy ) {
600 double max = -Double.MAX_VALUE;
601 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
602 final double s = PhylogenyMethods.getConfidenceValue( iter.next() );
603 if ( ( s != Confidence.CONFIDENCE_DEFAULT_VALUE ) && ( s > max ) ) {
610 static public int getMinimumDescendentsPerInternalNodes( final Phylogeny phy ) {
611 int min = Integer.MAX_VALUE;
614 for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) {
616 if ( n.isInternal() ) {
617 d = n.getNumberOfDescendants();
627 * Convenience method for display purposes.
628 * Not intended for algorithms.
630 public static String getSpecies( final PhylogenyNode node ) {
631 if ( !node.getNodeData().isHasTaxonomy() ) {
634 else if ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getScientificName() ) ) {
635 return node.getNodeData().getTaxonomy().getScientificName();
637 if ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getTaxonomyCode() ) ) {
638 return node.getNodeData().getTaxonomy().getTaxonomyCode();
641 return node.getNodeData().getTaxonomy().getCommonName();
646 * Convenience method for display purposes.
647 * Not intended for algorithms.
649 public static String getTaxonomyIdentifier( final PhylogenyNode node ) {
650 if ( !node.getNodeData().isHasTaxonomy() || ( node.getNodeData().getTaxonomy().getIdentifier() == null ) ) {
653 return node.getNodeData().getTaxonomy().getIdentifier().getValue();
656 public final static boolean isAllDecendentsAreDuplications( final PhylogenyNode n ) {
657 if ( n.isExternal() ) {
661 if ( n.isDuplication() ) {
662 for( final PhylogenyNode desc : n.getDescendants() ) {
663 if ( !isAllDecendentsAreDuplications( desc ) ) {
675 public static boolean isHasExternalDescendant( final PhylogenyNode node ) {
676 for( int i = 0; i < node.getNumberOfDescendants(); ++i ) {
677 if ( node.getChildNode( i ).isExternal() ) {
685 * This is case insensitive.
688 public synchronized static boolean isTaxonomyHasIdentifierOfGivenProvider( final Taxonomy tax,
689 final String[] providers ) {
690 if ( ( tax.getIdentifier() != null ) && !ForesterUtil.isEmpty( tax.getIdentifier().getProvider() ) ) {
691 final String my_tax_prov = tax.getIdentifier().getProvider();
692 for( final String provider : providers ) {
693 if ( provider.equalsIgnoreCase( my_tax_prov ) ) {
704 public static void midpointRoot( final Phylogeny phylogeny ) {
705 if ( ( phylogeny.getNumberOfExternalNodes() < 2 ) || ( calculateMaxDistanceToRoot( phylogeny ) <= 0 ) ) {
709 final int total_nodes = phylogeny.getNodeCount();
711 if ( ++counter > total_nodes ) {
712 throw new RuntimeException( "this should not have happened: midpoint rooting does not converge" );
714 PhylogenyNode a = null;
717 for( int i = 0; i < phylogeny.getRoot().getNumberOfDescendants(); ++i ) {
718 final PhylogenyNode f = getFurthestDescendant( phylogeny.getRoot().getChildNode( i ) );
719 final double df = getDistance( f, phylogeny.getRoot() );
726 else if ( df > db ) {
731 final double diff = da - db;
732 if ( diff < 0.000001 ) {
735 double x = da - ( diff / 2.0 );
736 while ( ( x > a.getDistanceToParent() ) && !a.isRoot() ) {
737 x -= ( a.getDistanceToParent() > 0 ? a.getDistanceToParent() : 0 );
740 phylogeny.reRoot( a, x );
742 phylogeny.recalculateNumberOfExternalDescendants( true );
745 public static void normalizeBootstrapValues( final Phylogeny phylogeny,
746 final double max_bootstrap_value,
747 final double max_normalized_value ) {
748 for( final PhylogenyNodeIterator iter = phylogeny.iteratorPreorder(); iter.hasNext(); ) {
749 final PhylogenyNode node = iter.next();
750 if ( node.isInternal() ) {
751 final double confidence = getConfidenceValue( node );
752 if ( confidence != Confidence.CONFIDENCE_DEFAULT_VALUE ) {
753 if ( confidence >= max_bootstrap_value ) {
754 setBootstrapConfidence( node, max_normalized_value );
757 setBootstrapConfidence( node, ( confidence * max_normalized_value ) / max_bootstrap_value );
764 public static List<PhylogenyNode> obtainAllNodesAsList( final Phylogeny phy ) {
765 final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
766 if ( phy.isEmpty() ) {
769 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
770 nodes.add( iter.next() );
776 * Returns a map of distinct taxonomies of
777 * all external nodes of node.
778 * If at least one of the external nodes has no taxonomy,
782 public static Map<Taxonomy, Integer> obtainDistinctTaxonomyCounts( final PhylogenyNode node ) {
783 final List<PhylogenyNode> descs = node.getAllExternalDescendants();
784 final Map<Taxonomy, Integer> tax_map = new HashMap<Taxonomy, Integer>();
785 for( final PhylogenyNode n : descs ) {
786 if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
789 final Taxonomy t = n.getNodeData().getTaxonomy();
790 if ( tax_map.containsKey( t ) ) {
791 tax_map.put( t, tax_map.get( t ) + 1 );
801 * Arranges the order of childern for each node of this Phylogeny in such a
802 * way that either the branch with more children is on top (right) or on
803 * bottom (left), dependent on the value of boolean order.
806 * decides in which direction to order
809 public static void orderAppearance( final PhylogenyNode n,
811 final boolean order_ext_alphabetically,
812 final DESCENDANT_SORT_PRIORITY pri ) {
813 if ( n.isExternal() ) {
817 PhylogenyNode temp = null;
818 if ( ( n.getNumberOfDescendants() == 2 )
819 && ( n.getChildNode1().getNumberOfExternalNodes() != n.getChildNode2().getNumberOfExternalNodes() )
820 && ( ( n.getChildNode1().getNumberOfExternalNodes() < n.getChildNode2().getNumberOfExternalNodes() ) == order ) ) {
821 temp = n.getChildNode1();
822 n.setChild1( n.getChildNode2() );
825 else if ( order_ext_alphabetically ) {
826 boolean all_ext = true;
827 for( final PhylogenyNode i : n.getDescendants() ) {
828 if ( !i.isExternal() ) {
834 PhylogenyMethods.sortNodeDescendents( n, pri );
837 for( int i = 0; i < n.getNumberOfDescendants(); ++i ) {
838 orderAppearance( n.getChildNode( i ), order, order_ext_alphabetically, pri );
843 public static void postorderBranchColorAveragingExternalNodeBased( final Phylogeny p ) {
844 for( final PhylogenyNodeIterator iter = p.iteratorPostorder(); iter.hasNext(); ) {
845 final PhylogenyNode node = iter.next();
850 if ( node.isInternal() ) {
851 //for( final PhylogenyNodeIterator iterator = node.iterateChildNodesForward(); iterator.hasNext(); ) {
852 for( int i = 0; i < node.getNumberOfDescendants(); ++i ) {
853 final PhylogenyNode child_node = node.getChildNode( i );
854 final Color child_color = getBranchColorValue( child_node );
855 if ( child_color != null ) {
857 red += child_color.getRed();
858 green += child_color.getGreen();
859 blue += child_color.getBlue();
862 setBranchColorValue( node,
863 new Color( ForesterUtil.roundToInt( red / n ),
864 ForesterUtil.roundToInt( green / n ),
865 ForesterUtil.roundToInt( blue / n ) ) );
870 public static final void preOrderReId( final Phylogeny phy ) {
871 if ( phy.isEmpty() ) {
874 phy.setIdToNodeMap( null );
875 long i = PhylogenyNode.getNodeCount();
876 for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) {
877 it.next().setId( i++ );
879 PhylogenyNode.setNodeCount( i );
882 public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final File file ) throws IOException {
883 final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
884 final Phylogeny[] trees = factory.create( file, parser );
885 if ( ( trees == null ) || ( trees.length == 0 ) ) {
886 throw new PhylogenyParserException( "Unable to parse phylogeny from file: " + file );
891 public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final List<File> files )
893 final List<Phylogeny> tree_list = new ArrayList<Phylogeny>();
894 for( final File file : files ) {
895 final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
896 final Phylogeny[] trees = factory.create( file, parser );
897 if ( ( trees == null ) || ( trees.length == 0 ) ) {
898 throw new PhylogenyParserException( "Unable to parse phylogeny from file: " + file );
900 tree_list.addAll( Arrays.asList( trees ) );
902 return tree_list.toArray( new Phylogeny[ tree_list.size() ] );
905 public static void removeNode( final PhylogenyNode remove_me, final Phylogeny phylogeny ) {
906 if ( remove_me.isRoot() ) {
907 if ( remove_me.getNumberOfDescendants() == 1 ) {
908 final PhylogenyNode desc = remove_me.getDescendants().get( 0 );
909 desc.setDistanceToParent( addPhylogenyDistances( remove_me.getDistanceToParent(),
910 desc.getDistanceToParent() ) );
911 desc.setParent( null );
912 phylogeny.setRoot( desc );
913 phylogeny.clearHashIdToNodeMap();
916 throw new IllegalArgumentException( "attempt to remove a root node with more than one descendants" );
919 else if ( remove_me.isExternal() ) {
920 phylogeny.deleteSubtree( remove_me, false );
921 phylogeny.clearHashIdToNodeMap();
922 phylogeny.externalNodesHaveChanged();
925 final PhylogenyNode parent = remove_me.getParent();
926 final List<PhylogenyNode> descs = remove_me.getDescendants();
927 parent.removeChildNode( remove_me );
928 for( final PhylogenyNode desc : descs ) {
929 parent.addAsChild( desc );
930 desc.setDistanceToParent( addPhylogenyDistances( remove_me.getDistanceToParent(),
931 desc.getDistanceToParent() ) );
933 remove_me.setParent( null );
934 phylogeny.clearHashIdToNodeMap();
935 phylogeny.externalNodesHaveChanged();
939 private static enum NDF {
941 TaxonomyCode( "TC" ),
942 TaxonomyCommonName( "CN" ),
943 TaxonomyScientificName( "TS" ),
944 TaxonomyIdentifier( "TI" ),
945 TaxonomySynonym( "SY" ),
946 SequenceName( "SN" ),
948 SequenceSymbol( "SS" ),
949 SequenceAccession( "SA" ),
953 BinaryCharacter( "BC" ),
954 MolecularSequence( "MS" );
956 private final String _text;
958 NDF( final String text ) {
962 public static NDF fromString( final String text ) {
963 for( final NDF n : NDF.values() ) {
964 if ( text.startsWith( n._text ) ) {
972 public static List<Long> searchData( final String query,
974 final boolean case_sensitive,
975 final boolean partial,
977 final boolean search_domains,
978 final double domains_confidence_threshold ) {
979 final List<Long> nodes = new ArrayList<Long>();
980 if ( phy.isEmpty() || ( query == null ) ) {
983 if ( ForesterUtil.isEmpty( query ) ) {
986 String my_query = query;
988 if ( ( my_query.length() > 2 ) && ( my_query.indexOf( ":" ) == 2 ) ) {
989 ndf = NDF.fromString( my_query );
991 my_query = my_query.substring( 3 );
994 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
995 final PhylogenyNode node = iter.next();
996 boolean match = false;
997 if ( ( ( ndf == null ) || ( ndf == NDF.NodeName ) )
998 && match( node.getName(), my_query, case_sensitive, partial, regex ) ) {
1001 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCode ) )
1002 && node.getNodeData().isHasTaxonomy()
1003 && match( node.getNodeData().getTaxonomy().getTaxonomyCode(),
1010 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCommonName ) )
1011 && node.getNodeData().isHasTaxonomy()
1012 && match( node.getNodeData().getTaxonomy().getCommonName(),
1019 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyScientificName ) )
1020 && node.getNodeData().isHasTaxonomy()
1021 && match( node.getNodeData().getTaxonomy().getScientificName(),
1028 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyIdentifier ) )
1029 && node.getNodeData().isHasTaxonomy()
1030 && ( node.getNodeData().getTaxonomy().getIdentifier() != null )
1031 && match( node.getNodeData().getTaxonomy().getIdentifier().getValue(),
1038 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomySynonym ) ) && node.getNodeData().isHasTaxonomy()
1039 && !node.getNodeData().getTaxonomy().getSynonyms().isEmpty() ) {
1040 final List<String> syns = node.getNodeData().getTaxonomy().getSynonyms();
1041 I: for( final String syn : syns ) {
1042 if ( match( syn, my_query, case_sensitive, partial, regex ) ) {
1048 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceName ) ) && node.getNodeData().isHasSequence()
1049 && match( node.getNodeData().getSequence().getName(), my_query, case_sensitive, partial, regex ) ) {
1052 if ( !match && ( ( ndf == null ) || ( ndf == NDF.GeneName ) ) && node.getNodeData().isHasSequence()
1053 && match( node.getNodeData().getSequence().getGeneName(), my_query, case_sensitive, partial, regex ) ) {
1056 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceSymbol ) ) && node.getNodeData().isHasSequence()
1057 && match( node.getNodeData().getSequence().getSymbol(), my_query, case_sensitive, partial, regex ) ) {
1061 && ( ( ndf == null ) || ( ndf == NDF.SequenceAccession ) )
1062 && node.getNodeData().isHasSequence()
1063 && ( node.getNodeData().getSequence().getAccession() != null )
1064 && match( node.getNodeData().getSequence().getAccession().getValue(),
1071 if ( !match && ( ( ( ndf == null ) && search_domains ) || ( ndf == NDF.Domain ) )
1072 && node.getNodeData().isHasSequence()
1073 && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) {
1074 final DomainArchitecture da = node.getNodeData().getSequence().getDomainArchitecture();
1075 I: for( int i = 0; i < da.getNumberOfDomains(); ++i ) {
1076 if ( ( da.getDomain( i ).getConfidence() <= domains_confidence_threshold )
1077 && ( match( da.getDomain( i ).getName(), my_query, case_sensitive, partial, regex ) ) ) {
1083 if ( !match && ( ( ndf == null ) || ( ndf == NDF.Annotation ) ) && node.getNodeData().isHasSequence()
1084 && ( node.getNodeData().getSequence().getAnnotations() != null ) ) {
1085 for( final Annotation ann : node.getNodeData().getSequence().getAnnotations() ) {
1086 if ( match( ann.getDesc(), my_query, case_sensitive, partial, regex ) ) {
1090 if ( match( ann.getRef(), my_query, case_sensitive, partial, regex ) ) {
1096 if ( !match && ( ( ndf == null ) || ( ndf == NDF.CrossRef ) ) && node.getNodeData().isHasSequence()
1097 && ( node.getNodeData().getSequence().getCrossReferences() != null ) ) {
1098 for( final Accession x : node.getNodeData().getSequence().getCrossReferences() ) {
1099 if ( match( x.getComment(), my_query, case_sensitive, partial, regex ) ) {
1103 if ( match( x.getSource(), my_query, case_sensitive, partial, regex ) ) {
1107 if ( match( x.getValue(), my_query, case_sensitive, partial, regex ) ) {
1113 if ( !match && ( ( ndf == null ) || ( ndf == NDF.BinaryCharacter ) )
1114 && ( node.getNodeData().getBinaryCharacters() != null ) ) {
1115 Iterator<String> it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator();
1116 I: while ( it.hasNext() ) {
1117 if ( match( it.next(), my_query, case_sensitive, partial, regex ) ) {
1122 it = node.getNodeData().getBinaryCharacters().getGainedCharacters().iterator();
1123 I: while ( it.hasNext() ) {
1124 if ( match( it.next(), my_query, case_sensitive, partial, regex ) ) {
1131 && ( ndf == NDF.MolecularSequence )
1132 && node.getNodeData().isHasSequence()
1133 && match( node.getNodeData().getSequence().getMolecularSequence(),
1141 nodes.add( node.getId() );
1147 public static List<Long> searchDataLogicalAnd( final String[] queries,
1148 final Phylogeny phy,
1149 final boolean case_sensitive,
1150 final boolean partial,
1151 final boolean search_domains,
1152 final double domains_confidence_threshold ) {
1153 final List<Long> nodes = new ArrayList<Long>();
1154 if ( phy.isEmpty() || ( queries == null ) || ( queries.length < 1 ) ) {
1157 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
1158 final PhylogenyNode node = iter.next();
1159 boolean all_matched = true;
1160 for( String query : queries ) {
1161 if ( query == null ) {
1164 query = query.trim();
1166 if ( ( query.length() > 2 ) && ( query.indexOf( ":" ) == 2 ) ) {
1167 ndf = NDF.fromString( query );
1168 if ( ndf != null ) {
1169 query = query.substring( 3 );
1172 boolean match = false;
1173 if ( ForesterUtil.isEmpty( query ) ) {
1176 if ( ( ( ndf == null ) || ( ndf == NDF.NodeName ) )
1177 && match( node.getName(), query, case_sensitive, partial, false ) ) {
1180 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCode ) )
1181 && node.getNodeData().isHasTaxonomy()
1182 && match( node.getNodeData().getTaxonomy().getTaxonomyCode(),
1189 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCommonName ) )
1190 && node.getNodeData().isHasTaxonomy()
1191 && match( node.getNodeData().getTaxonomy().getCommonName(),
1198 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyScientificName ) )
1199 && node.getNodeData().isHasTaxonomy()
1200 && match( node.getNodeData().getTaxonomy().getScientificName(),
1207 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyIdentifier ) )
1208 && node.getNodeData().isHasTaxonomy()
1209 && ( node.getNodeData().getTaxonomy().getIdentifier() != null )
1210 && match( node.getNodeData().getTaxonomy().getIdentifier().getValue(),
1217 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomySynonym ) ) && node.getNodeData().isHasTaxonomy()
1218 && !node.getNodeData().getTaxonomy().getSynonyms().isEmpty() ) {
1219 final List<String> syns = node.getNodeData().getTaxonomy().getSynonyms();
1220 I: for( final String syn : syns ) {
1221 if ( match( syn, query, case_sensitive, partial, false ) ) {
1227 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceName ) ) && node.getNodeData().isHasSequence()
1228 && match( node.getNodeData().getSequence().getName(), query, case_sensitive, partial, false ) ) {
1232 && ( ( ndf == null ) || ( ndf == NDF.GeneName ) )
1233 && node.getNodeData().isHasSequence()
1234 && match( node.getNodeData().getSequence().getGeneName(), query, case_sensitive, partial, false ) ) {
1237 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceSymbol ) )
1238 && node.getNodeData().isHasSequence()
1239 && match( node.getNodeData().getSequence().getSymbol(), query, case_sensitive, partial, false ) ) {
1243 && ( ( ndf == null ) || ( ndf == NDF.SequenceAccession ) )
1244 && node.getNodeData().isHasSequence()
1245 && ( node.getNodeData().getSequence().getAccession() != null )
1246 && match( node.getNodeData().getSequence().getAccession().getValue(),
1253 if ( !match && ( ( ( ndf == null ) && search_domains ) || ( ndf == NDF.Domain ) )
1254 && node.getNodeData().isHasSequence()
1255 && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) {
1256 final DomainArchitecture da = node.getNodeData().getSequence().getDomainArchitecture();
1257 I: for( int i = 0; i < da.getNumberOfDomains(); ++i ) {
1258 if ( ( da.getDomain( i ).getConfidence() <= domains_confidence_threshold )
1259 && match( da.getDomain( i ).getName(), query, case_sensitive, partial, false ) ) {
1265 if ( !match && ( ( ndf == null ) || ( ndf == NDF.Annotation ) ) && node.getNodeData().isHasSequence()
1266 && ( node.getNodeData().getSequence().getAnnotations() != null ) ) {
1267 for( final Annotation ann : node.getNodeData().getSequence().getAnnotations() ) {
1268 if ( match( ann.getDesc(), query, case_sensitive, partial, false ) ) {
1272 if ( match( ann.getRef(), query, case_sensitive, partial, false ) ) {
1278 if ( !match && ( ( ndf == null ) || ( ndf == NDF.CrossRef ) ) && node.getNodeData().isHasSequence()
1279 && ( node.getNodeData().getSequence().getCrossReferences() != null ) ) {
1280 for( final Accession x : node.getNodeData().getSequence().getCrossReferences() ) {
1281 if ( match( x.getComment(), query, case_sensitive, partial, false ) ) {
1285 if ( match( x.getSource(), query, case_sensitive, partial, false ) ) {
1289 if ( match( x.getValue(), query, case_sensitive, partial, false ) ) {
1295 if ( !match && ( ( ndf == null ) || ( ndf == NDF.BinaryCharacter ) )
1296 && ( node.getNodeData().getBinaryCharacters() != null ) ) {
1297 Iterator<String> it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator();
1298 I: while ( it.hasNext() ) {
1299 if ( match( it.next(), query, case_sensitive, partial, false ) ) {
1304 it = node.getNodeData().getBinaryCharacters().getGainedCharacters().iterator();
1305 I: while ( it.hasNext() ) {
1306 if ( match( it.next(), query, case_sensitive, partial, false ) ) {
1313 && ( ndf == NDF.MolecularSequence )
1314 && node.getNodeData().isHasSequence()
1315 && match( node.getNodeData().getSequence().getMolecularSequence(),
1323 all_matched = false;
1327 if ( all_matched ) {
1328 nodes.add( node.getId() );
1334 public static void setAllIndicatorsToZero( final Phylogeny phy ) {
1335 for( final PhylogenyNodeIterator it = phy.iteratorPostorder(); it.hasNext(); ) {
1336 it.next().setIndicator( ( byte ) 0 );
1341 * Convenience method.
1342 * Sets value for the first confidence value (created if not present, values overwritten otherwise).
1344 public static void setBootstrapConfidence( final PhylogenyNode node, final double bootstrap_confidence_value ) {
1345 setConfidence( node, bootstrap_confidence_value, "bootstrap" );
1348 public static void setBranchColorValue( final PhylogenyNode node, final Color color ) {
1349 if ( node.getBranchData().getBranchColor() == null ) {
1350 node.getBranchData().setBranchColor( new BranchColor() );
1352 node.getBranchData().getBranchColor().setValue( color );
1356 * Convenience method
1358 public static void setBranchWidthValue( final PhylogenyNode node, final double branch_width_value ) {
1359 node.getBranchData().setBranchWidth( new BranchWidth( branch_width_value ) );
1363 * Convenience method.
1364 * Sets value for the first confidence value (created if not present, values overwritten otherwise).
1366 public static void setConfidence( final PhylogenyNode node, final double confidence_value ) {
1367 setConfidence( node, confidence_value, "" );
1371 * Convenience method.
1372 * Sets value for the first confidence value (created if not present, values overwritten otherwise).
1374 public static void setConfidence( final PhylogenyNode node, final double confidence_value, final String type ) {
1375 Confidence c = null;
1376 if ( node.getBranchData().getNumberOfConfidences() > 0 ) {
1377 c = node.getBranchData().getConfidence( 0 );
1380 c = new Confidence();
1381 node.getBranchData().addConfidence( c );
1384 c.setValue( confidence_value );
1387 public static void setScientificName( final PhylogenyNode node, final String scientific_name ) {
1388 if ( !node.getNodeData().isHasTaxonomy() ) {
1389 node.getNodeData().setTaxonomy( new Taxonomy() );
1391 node.getNodeData().getTaxonomy().setScientificName( scientific_name );
1395 * Convenience method to set the taxonomy code of a phylogeny node.
1399 * @param taxonomy_code
1400 * @throws PhyloXmlDataFormatException
1402 public static void setTaxonomyCode( final PhylogenyNode node, final String taxonomy_code )
1403 throws PhyloXmlDataFormatException {
1404 if ( !node.getNodeData().isHasTaxonomy() ) {
1405 node.getNodeData().setTaxonomy( new Taxonomy() );
1407 node.getNodeData().getTaxonomy().setTaxonomyCode( taxonomy_code );
1410 final static public void sortNodeDescendents( final PhylogenyNode node, final DESCENDANT_SORT_PRIORITY pri ) {
1411 Comparator<PhylogenyNode> c;
1414 c = new PhylogenyNodeSortSequencePriority();
1417 c = new PhylogenyNodeSortNodeNamePriority();
1420 c = new PhylogenyNodeSortTaxonomyPriority();
1422 final List<PhylogenyNode> descs = node.getDescendants();
1423 Collections.sort( descs, c );
1425 for( final PhylogenyNode desc : descs ) {
1426 node.setChildNode( i++, desc );
1431 * Removes from Phylogeny to_be_stripped all external Nodes which are
1432 * associated with a species NOT found in Phylogeny reference.
1435 * a reference Phylogeny
1436 * @param to_be_stripped
1437 * Phylogeny to be stripped
1438 * @return nodes removed from to_be_stripped
1440 public static List<PhylogenyNode> taxonomyBasedDeletionOfExternalNodes( final Phylogeny reference,
1441 final Phylogeny to_be_stripped ) {
1442 final Set<String> ref_ext_taxo = new HashSet<String>();
1443 for( final PhylogenyNodeIterator it = reference.iteratorExternalForward(); it.hasNext(); ) {
1444 final PhylogenyNode n = it.next();
1445 if ( !n.getNodeData().isHasTaxonomy() ) {
1446 throw new IllegalArgumentException( "no taxonomic data in node: " + n );
1448 if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) {
1449 ref_ext_taxo.add( n.getNodeData().getTaxonomy().getScientificName() );
1451 if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) {
1452 ref_ext_taxo.add( n.getNodeData().getTaxonomy().getTaxonomyCode() );
1454 if ( ( n.getNodeData().getTaxonomy().getIdentifier() != null )
1455 && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getIdentifier().getValue() ) ) {
1456 ref_ext_taxo.add( n.getNodeData().getTaxonomy().getIdentifier().getValuePlusProvider() );
1459 final ArrayList<PhylogenyNode> nodes_to_delete = new ArrayList<PhylogenyNode>();
1460 for( final PhylogenyNodeIterator it = to_be_stripped.iteratorExternalForward(); it.hasNext(); ) {
1461 final PhylogenyNode n = it.next();
1462 if ( !n.getNodeData().isHasTaxonomy() ) {
1463 nodes_to_delete.add( n );
1465 else if ( !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getScientificName() ) )
1466 && !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getTaxonomyCode() ) )
1467 && !( ( n.getNodeData().getTaxonomy().getIdentifier() != null ) && ref_ext_taxo.contains( n
1468 .getNodeData().getTaxonomy().getIdentifier().getValuePlusProvider() ) ) ) {
1469 nodes_to_delete.add( n );
1472 for( final PhylogenyNode n : nodes_to_delete ) {
1473 to_be_stripped.deleteSubtree( n, true );
1475 to_be_stripped.clearHashIdToNodeMap();
1476 to_be_stripped.externalNodesHaveChanged();
1477 return nodes_to_delete;
1480 final static public void transferInternalNamesToBootstrapSupport( final Phylogeny phy ) {
1481 final PhylogenyNodeIterator it = phy.iteratorPostorder();
1482 while ( it.hasNext() ) {
1483 final PhylogenyNode n = it.next();
1484 if ( !n.isExternal() && !ForesterUtil.isEmpty( n.getName() ) ) {
1487 value = Double.parseDouble( n.getName() );
1489 catch ( final NumberFormatException e ) {
1490 throw new IllegalArgumentException( "failed to parse number from [" + n.getName() + "]: "
1491 + e.getLocalizedMessage() );
1493 if ( value >= 0.0 ) {
1494 n.getBranchData().addConfidence( new Confidence( value, "bootstrap" ) );
1501 final static public boolean isInternalNamesLookLikeConfidences( final Phylogeny phy ) {
1502 final PhylogenyNodeIterator it = phy.iteratorPostorder();
1503 while ( it.hasNext() ) {
1504 final PhylogenyNode n = it.next();
1505 if ( !n.isExternal() && !n.isRoot() ) {
1506 if ( !ForesterUtil.isEmpty( n.getName() ) ) {
1509 value = Double.parseDouble( n.getName() );
1511 catch ( final NumberFormatException e ) {
1514 if ( ( value < 0.0 ) || ( value > 100 ) ) {
1523 final static public void transferInternalNodeNamesToConfidence( final Phylogeny phy, final String confidence_type ) {
1524 final PhylogenyNodeIterator it = phy.iteratorPostorder();
1525 while ( it.hasNext() ) {
1526 transferInternalNodeNameToConfidence( confidence_type, it.next() );
1530 private static void transferInternalNodeNameToConfidence( final String confidence_type, final PhylogenyNode n ) {
1531 if ( !n.isExternal() && !n.getBranchData().isHasConfidences() ) {
1532 if ( !ForesterUtil.isEmpty( n.getName() ) ) {
1535 d = Double.parseDouble( n.getName() );
1537 catch ( final Exception e ) {
1541 n.getBranchData().addConfidence( new Confidence( d, confidence_type ) );
1548 final static public void transferNodeNameToField( final Phylogeny phy,
1549 final PhylogenyNodeField field,
1550 final boolean external_only ) throws PhyloXmlDataFormatException {
1551 final PhylogenyNodeIterator it = phy.iteratorPostorder();
1552 while ( it.hasNext() ) {
1553 final PhylogenyNode n = it.next();
1554 if ( external_only && n.isInternal() ) {
1557 final String name = n.getName().trim();
1558 if ( !ForesterUtil.isEmpty( name ) ) {
1562 setTaxonomyCode( n, name );
1564 case TAXONOMY_SCIENTIFIC_NAME:
1566 if ( !n.getNodeData().isHasTaxonomy() ) {
1567 n.getNodeData().setTaxonomy( new Taxonomy() );
1569 n.getNodeData().getTaxonomy().setScientificName( name );
1571 case TAXONOMY_COMMON_NAME:
1573 if ( !n.getNodeData().isHasTaxonomy() ) {
1574 n.getNodeData().setTaxonomy( new Taxonomy() );
1576 n.getNodeData().getTaxonomy().setCommonName( name );
1578 case SEQUENCE_SYMBOL:
1580 if ( !n.getNodeData().isHasSequence() ) {
1581 n.getNodeData().setSequence( new Sequence() );
1583 n.getNodeData().getSequence().setSymbol( name );
1587 if ( !n.getNodeData().isHasSequence() ) {
1588 n.getNodeData().setSequence( new Sequence() );
1590 n.getNodeData().getSequence().setName( name );
1592 case TAXONOMY_ID_UNIPROT_1: {
1593 if ( !n.getNodeData().isHasTaxonomy() ) {
1594 n.getNodeData().setTaxonomy( new Taxonomy() );
1597 final int i = name.indexOf( '_' );
1599 id = name.substring( 0, i );
1604 n.getNodeData().getTaxonomy()
1605 .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) );
1608 case TAXONOMY_ID_UNIPROT_2: {
1609 if ( !n.getNodeData().isHasTaxonomy() ) {
1610 n.getNodeData().setTaxonomy( new Taxonomy() );
1613 final int i = name.indexOf( '_' );
1615 id = name.substring( i + 1, name.length() );
1620 n.getNodeData().getTaxonomy()
1621 .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) );
1625 if ( !n.getNodeData().isHasTaxonomy() ) {
1626 n.getNodeData().setTaxonomy( new Taxonomy() );
1628 n.getNodeData().getTaxonomy().setIdentifier( new Identifier( name ) );
1632 throw new IllegalArgumentException( "don't know what to do with " + field );
1639 static double addPhylogenyDistances( final double a, final double b ) {
1640 if ( ( a >= 0.0 ) && ( b >= 0.0 ) ) {
1643 else if ( a >= 0.0 ) {
1646 else if ( b >= 0.0 ) {
1649 return PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT;
1652 static double calculateDistanceToAncestor( final PhylogenyNode anc, PhylogenyNode desc ) {
1654 boolean all_default = true;
1655 while ( anc != desc ) {
1656 if ( desc.getDistanceToParent() != PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ) {
1657 d += desc.getDistanceToParent();
1658 if ( all_default ) {
1659 all_default = false;
1662 desc = desc.getParent();
1664 if ( all_default ) {
1665 return PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT;
1671 * Deep copies the phylogeny originating from this node.
1673 static PhylogenyNode copySubTree( final PhylogenyNode source ) {
1674 if ( source == null ) {
1678 final PhylogenyNode newnode = source.copyNodeData();
1679 if ( !source.isExternal() ) {
1680 for( int i = 0; i < source.getNumberOfDescendants(); ++i ) {
1681 newnode.setChildNode( i, PhylogenyMethods.copySubTree( source.getChildNode( i ) ) );
1689 * Shallow copies the phylogeny originating from this node.
1691 static PhylogenyNode copySubTreeShallow( final PhylogenyNode source ) {
1692 if ( source == null ) {
1696 final PhylogenyNode newnode = source.copyNodeDataShallow();
1697 if ( !source.isExternal() ) {
1698 for( int i = 0; i < source.getNumberOfDescendants(); ++i ) {
1699 newnode.setChildNode( i, PhylogenyMethods.copySubTreeShallow( source.getChildNode( i ) ) );
1706 private final static List<PhylogenyNode> divideIntoSubTreesHelper( final PhylogenyNode node,
1707 final double min_distance_to_root ) {
1708 final List<PhylogenyNode> l = new ArrayList<PhylogenyNode>();
1709 final PhylogenyNode r = moveTowardsRoot( node, min_distance_to_root );
1710 for( final PhylogenyNode ext : r.getAllExternalDescendants() ) {
1711 if ( ext.getIndicator() != 0 ) {
1712 throw new RuntimeException( "this should not have happened" );
1714 ext.setIndicator( ( byte ) 1 );
1721 * Calculates the distance between PhylogenyNodes n1 and n2.
1722 * PRECONDITION: n1 is a descendant of n2.
1725 * a descendant of n2
1727 * @return distance between n1 and n2
1729 private static double getDistance( PhylogenyNode n1, final PhylogenyNode n2 ) {
1731 while ( n1 != n2 ) {
1732 if ( n1.getDistanceToParent() > 0.0 ) {
1733 d += n1.getDistanceToParent();
1735 n1 = n1.getParent();
1740 private static boolean match( final String s,
1742 final boolean case_sensitive,
1743 final boolean partial,
1744 final boolean regex ) {
1745 if ( ForesterUtil.isEmpty( s ) || ForesterUtil.isEmpty( query ) ) {
1748 String my_s = s.trim();
1749 String my_query = query.trim();
1750 if ( !case_sensitive && !regex ) {
1751 my_s = my_s.toLowerCase();
1752 my_query = my_query.toLowerCase();
1757 if ( case_sensitive ) {
1758 p = Pattern.compile( my_query );
1761 p = Pattern.compile( my_query, Pattern.CASE_INSENSITIVE );
1764 catch ( final PatternSyntaxException e ) {
1768 return p.matcher( my_s ).find();
1774 else if ( partial ) {
1775 return my_s.indexOf( my_query ) >= 0;
1780 p = Pattern.compile( "(\\b|_)" + Pattern.quote( my_query ) + "(\\b|_)" );
1782 catch ( final PatternSyntaxException e ) {
1786 return p.matcher( my_s ).find();
1794 private final static PhylogenyNode moveTowardsRoot( final PhylogenyNode node, final double min_distance_to_root ) {
1795 PhylogenyNode n = node;
1796 PhylogenyNode prev = node;
1797 while ( min_distance_to_root < n.calculateDistanceToRoot() ) {
1804 public static enum DESCENDANT_SORT_PRIORITY {
1805 NODE_NAME, SEQUENCE, TAXONOMY;
1808 public static enum PhylogenyNodeField {
1813 TAXONOMY_COMMON_NAME,
1815 TAXONOMY_ID_UNIPROT_1,
1816 TAXONOMY_ID_UNIPROT_2,
1817 TAXONOMY_SCIENTIFIC_NAME;
1820 public static void addMolecularSeqsToTree( final Phylogeny phy, final Msa msa ) {
1821 for( int s = 0; s < msa.getNumberOfSequences(); ++s ) {
1822 final org.forester.sequence.MolecularSequence seq = msa.getSequence( s );
1823 final PhylogenyNode node = phy.getNode( seq.getIdentifier() );
1824 final org.forester.phylogeny.data.Sequence new_seq = new Sequence();
1825 new_seq.setMolecularSequenceAligned( true );
1826 new_seq.setMolecularSequence( seq.getMolecularSequenceAsString() );
1827 new_seq.setName( seq.getIdentifier() );
1829 new_seq.setType( PhyloXmlUtil.SEQ_TYPE_PROTEIN );
1831 catch ( final PhyloXmlDataFormatException ignore ) {
1834 node.getNodeData().addSequence( new_seq );
1838 final private static class PhylogenyNodeSortTaxonomyPriority implements Comparator<PhylogenyNode> {
1841 public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) {
1842 if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) {
1843 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) )
1844 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) {
1845 return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase()
1846 .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() );
1848 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) )
1849 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
1850 return n1.getNodeData().getTaxonomy().getTaxonomyCode()
1851 .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() );
1854 if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) {
1855 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) )
1856 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) {
1857 return n1.getNodeData().getSequence().getName().toLowerCase()
1858 .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() );
1860 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) )
1861 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) {
1862 return n1.getNodeData().getSequence().getGeneName()
1863 .compareTo( n2.getNodeData().getSequence().getGeneName() );
1865 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) )
1866 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) {
1867 return n1.getNodeData().getSequence().getSymbol()
1868 .compareTo( n2.getNodeData().getSequence().getSymbol() );
1871 if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) {
1872 return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() );
1878 final private static class PhylogenyNodeSortSequencePriority implements Comparator<PhylogenyNode> {
1881 public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) {
1882 if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) {
1883 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) )
1884 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) {
1885 return n1.getNodeData().getSequence().getName().toLowerCase()
1886 .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() );
1888 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) )
1889 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) {
1890 return n1.getNodeData().getSequence().getGeneName()
1891 .compareTo( n2.getNodeData().getSequence().getGeneName() );
1893 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) )
1894 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) {
1895 return n1.getNodeData().getSequence().getSymbol()
1896 .compareTo( n2.getNodeData().getSequence().getSymbol() );
1899 if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) {
1900 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) )
1901 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) {
1902 return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase()
1903 .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() );
1905 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) )
1906 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
1907 return n1.getNodeData().getTaxonomy().getTaxonomyCode()
1908 .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() );
1911 if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) {
1912 return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() );
1918 final private static class PhylogenyNodeSortNodeNamePriority implements Comparator<PhylogenyNode> {
1921 public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) {
1922 if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) {
1923 return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() );
1925 if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) {
1926 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) )
1927 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) {
1928 return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase()
1929 .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() );
1931 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) )
1932 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
1933 return n1.getNodeData().getTaxonomy().getTaxonomyCode()
1934 .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() );
1937 if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) {
1938 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) )
1939 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) {
1940 return n1.getNodeData().getSequence().getName().toLowerCase()
1941 .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() );
1943 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) )
1944 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) {
1945 return n1.getNodeData().getSequence().getGeneName()
1946 .compareTo( n2.getNodeData().getSequence().getGeneName() );
1948 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) )
1949 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) {
1950 return n1.getNodeData().getSequence().getSymbol()
1951 .compareTo( n2.getNodeData().getSequence().getSymbol() );