2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
26 package org.forester.phylogeny;
28 import java.awt.Color;
30 import java.io.IOException;
31 import java.util.ArrayList;
32 import java.util.Arrays;
33 import java.util.Collections;
34 import java.util.Comparator;
35 import java.util.HashMap;
36 import java.util.HashSet;
37 import java.util.Iterator;
38 import java.util.List;
41 import java.util.regex.Matcher;
42 import java.util.regex.Pattern;
43 import java.util.regex.PatternSyntaxException;
45 import org.forester.io.parsers.FastaParser;
46 import org.forester.io.parsers.PhylogenyParser;
47 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
48 import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
49 import org.forester.io.parsers.util.PhylogenyParserException;
50 import org.forester.msa.Msa;
51 import org.forester.phylogeny.data.Accession;
52 import org.forester.phylogeny.data.Annotation;
53 import org.forester.phylogeny.data.BranchColor;
54 import org.forester.phylogeny.data.BranchWidth;
55 import org.forester.phylogeny.data.Confidence;
56 import org.forester.phylogeny.data.DomainArchitecture;
57 import org.forester.phylogeny.data.Event;
58 import org.forester.phylogeny.data.Identifier;
59 import org.forester.phylogeny.data.PhylogenyDataUtil;
60 import org.forester.phylogeny.data.Sequence;
61 import org.forester.phylogeny.data.Taxonomy;
62 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
63 import org.forester.phylogeny.factories.PhylogenyFactory;
64 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
65 import org.forester.util.BasicDescriptiveStatistics;
66 import org.forester.util.DescriptiveStatistics;
67 import org.forester.util.ForesterUtil;
69 public class PhylogenyMethods {
71 private static boolean _order_changed;
73 private PhylogenyMethods() {
74 // Hidden constructor.
78 public Object clone() throws CloneNotSupportedException {
79 throw new CloneNotSupportedException();
82 public static boolean extractFastaInformation( final Phylogeny phy ) {
83 boolean could_extract = false;
84 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
85 final PhylogenyNode node = iter.next();
86 if ( !ForesterUtil.isEmpty( node.getName() ) ) {
87 final Matcher name_m = FastaParser.FASTA_DESC_LINE.matcher( node.getName() );
88 if ( name_m.lookingAt() ) {
90 final String acc_source = name_m.group( 1 );
91 final String acc = name_m.group( 2 );
92 final String seq_name = name_m.group( 3 );
93 final String tax_sn = name_m.group( 4 );
94 if ( !ForesterUtil.isEmpty( acc_source ) && !ForesterUtil.isEmpty( acc ) ) {
95 ForesterUtil.ensurePresenceOfSequence( node );
96 node.getNodeData().getSequence( 0 ).setAccession( new Accession( acc, acc_source ) );
98 if ( !ForesterUtil.isEmpty( seq_name ) ) {
99 ForesterUtil.ensurePresenceOfSequence( node );
100 node.getNodeData().getSequence( 0 ).setName( seq_name );
102 if ( !ForesterUtil.isEmpty( tax_sn ) ) {
103 ForesterUtil.ensurePresenceOfTaxonomy( node );
104 node.getNodeData().getTaxonomy( 0 ).setScientificName( tax_sn );
109 return could_extract;
112 public static DescriptiveStatistics calculateBranchLengthStatistics( final Phylogeny phy ) {
113 final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
114 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
115 final PhylogenyNode n = iter.next();
116 if ( !n.isRoot() && ( n.getDistanceToParent() >= 0.0 ) ) {
117 stats.addValue( n.getDistanceToParent() );
123 public static List<DescriptiveStatistics> calculateConfidenceStatistics( final Phylogeny phy ) {
124 final List<DescriptiveStatistics> stats = new ArrayList<DescriptiveStatistics>();
125 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
126 final PhylogenyNode n = iter.next();
127 if ( !n.isExternal() && !n.isRoot() ) {
128 if ( n.getBranchData().isHasConfidences() ) {
129 for( int i = 0; i < n.getBranchData().getConfidences().size(); ++i ) {
130 final Confidence c = n.getBranchData().getConfidences().get( i );
131 if ( ( i > ( stats.size() - 1 ) ) || ( stats.get( i ) == null ) ) {
132 stats.add( i, new BasicDescriptiveStatistics() );
134 if ( !ForesterUtil.isEmpty( c.getType() ) ) {
135 if ( !ForesterUtil.isEmpty( stats.get( i ).getDescription() ) ) {
136 if ( !stats.get( i ).getDescription().equalsIgnoreCase( c.getType() ) ) {
137 throw new IllegalArgumentException( "support values in node [" + n.toString()
138 + "] appear inconsistently ordered" );
141 stats.get( i ).setDescription( c.getType() );
143 stats.get( i ).addValue( ( ( c != null ) && ( c.getValue() >= 0 ) ) ? c.getValue() : 0 );
152 * Calculates the distance between PhylogenyNodes node1 and node2.
157 * @return distance between node1 and node2
159 public static double calculateDistance( final PhylogenyNode node1, final PhylogenyNode node2 ) {
160 final PhylogenyNode lca = calculateLCA( node1, node2 );
161 final PhylogenyNode n1 = node1;
162 final PhylogenyNode n2 = node2;
163 return ( PhylogenyMethods.getDistance( n1, lca ) + PhylogenyMethods.getDistance( n2, lca ) );
167 * Returns the LCA of PhylogenyNodes node1 and node2.
172 * @return LCA of node1 and node2
174 public final static PhylogenyNode calculateLCA( PhylogenyNode node1, PhylogenyNode node2 ) {
175 if ( node1 == null ) {
176 throw new IllegalArgumentException( "first argument (node) is null" );
178 if ( node2 == null ) {
179 throw new IllegalArgumentException( "second argument (node) is null" );
181 if ( node1 == node2 ) {
184 if ( ( node1.getParent() == node2.getParent() ) ) {
185 return node1.getParent();
187 int depth1 = node1.calculateDepth();
188 int depth2 = node2.calculateDepth();
189 while ( ( depth1 > -1 ) && ( depth2 > -1 ) ) {
190 if ( depth1 > depth2 ) {
191 node1 = node1.getParent();
194 else if ( depth2 > depth1 ) {
195 node2 = node2.getParent();
199 if ( node1 == node2 ) {
202 node1 = node1.getParent();
203 node2 = node2.getParent();
208 throw new IllegalArgumentException( "illegal attempt to calculate LCA of two nodes which do not share a common root" );
212 * Returns the LCA of PhylogenyNodes node1 and node2.
213 * Precondition: ids are in pre-order (or level-order).
218 * @return LCA of node1 and node2
220 public final static PhylogenyNode calculateLCAonTreeWithIdsInPreOrder( PhylogenyNode node1, PhylogenyNode node2 ) {
221 if ( node1 == null ) {
222 throw new IllegalArgumentException( "first argument (node) is null" );
224 if ( node2 == null ) {
225 throw new IllegalArgumentException( "second argument (node) is null" );
227 while ( node1 != node2 ) {
228 if ( node1.getId() > node2.getId() ) {
229 node1 = node1.getParent();
232 node2 = node2.getParent();
238 public static short calculateMaxBranchesToLeaf( final PhylogenyNode node ) {
239 if ( node.isExternal() ) {
243 for( PhylogenyNode d : node.getAllExternalDescendants() ) {
245 while ( d != node ) {
246 if ( d.isCollapse() ) {
261 public static int calculateMaxDepth( final Phylogeny phy ) {
263 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
264 final PhylogenyNode node = iter.next();
265 final int steps = node.calculateDepth();
273 public static double calculateMaxDistanceToRoot( final Phylogeny phy ) {
275 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
276 final PhylogenyNode node = iter.next();
277 final double d = node.calculateDistanceToRoot();
285 public static PhylogenyNode calculateNodeWithMaxDistanceToRoot( final Phylogeny phy ) {
287 PhylogenyNode max_node = phy.getFirstExternalNode();
288 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
289 final PhylogenyNode node = iter.next();
290 final double d = node.calculateDistanceToRoot();
299 public static int calculateNumberOfExternalNodesWithoutTaxonomy( final PhylogenyNode node ) {
300 final List<PhylogenyNode> descs = node.getAllExternalDescendants();
302 for( final PhylogenyNode n : descs ) {
303 if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
310 public static DescriptiveStatistics calculateNumberOfDescendantsPerNodeStatistics( final Phylogeny phy ) {
311 final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
312 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
313 final PhylogenyNode n = iter.next();
314 if ( !n.isExternal() ) {
315 stats.addValue( n.getNumberOfDescendants() );
321 public final static void collapseSubtreeStructure( final PhylogenyNode n ) {
322 final List<PhylogenyNode> eds = n.getAllExternalDescendants();
323 final List<Double> d = new ArrayList<Double>();
324 for( final PhylogenyNode ed : eds ) {
325 d.add( calculateDistanceToAncestor( n, ed ) );
327 for( int i = 0; i < eds.size(); ++i ) {
328 n.setChildNode( i, eds.get( i ) );
329 eds.get( i ).setDistanceToParent( d.get( i ) );
333 public static int countNumberOfOneDescendantNodes( final Phylogeny phy ) {
335 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
336 final PhylogenyNode n = iter.next();
337 if ( !n.isExternal() && ( n.getNumberOfDescendants() == 1 ) ) {
344 public static int countNumberOfPolytomies( final Phylogeny phy ) {
346 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
347 final PhylogenyNode n = iter.next();
348 if ( !n.isExternal() && ( n.getNumberOfDescendants() > 2 ) ) {
355 public static final HashMap<String, PhylogenyNode> createNameToExtNodeMap( final Phylogeny phy ) {
356 final HashMap<String, PhylogenyNode> nodes = new HashMap<String, PhylogenyNode>();
357 final List<PhylogenyNode> ext = phy.getExternalNodes();
358 for( final PhylogenyNode n : ext ) {
359 nodes.put( n.getName(), n );
364 public static void deleteExternalNodesNegativeSelection( final Set<Long> to_delete, final Phylogeny phy ) {
365 for( final Long id : to_delete ) {
366 phy.deleteSubtree( phy.getNode( id ), true );
368 phy.clearHashIdToNodeMap();
369 phy.externalNodesHaveChanged();
372 public static void deleteExternalNodesNegativeSelection( final String[] node_names_to_delete, final Phylogeny p )
373 throws IllegalArgumentException {
374 for( final String element : node_names_to_delete ) {
375 if ( ForesterUtil.isEmpty( element ) ) {
378 List<PhylogenyNode> nodes = null;
379 nodes = p.getNodes( element );
380 final Iterator<PhylogenyNode> it = nodes.iterator();
381 while ( it.hasNext() ) {
382 final PhylogenyNode n = it.next();
383 if ( !n.isExternal() ) {
384 throw new IllegalArgumentException( "attempt to delete non-external node \"" + element + "\"" );
386 p.deleteSubtree( n, true );
389 p.clearHashIdToNodeMap();
390 p.externalNodesHaveChanged();
393 public static List<String> deleteExternalNodesPositiveSelection( final String[] node_names_to_keep,
394 final Phylogeny p ) {
395 final PhylogenyNodeIterator it = p.iteratorExternalForward();
396 final String[] to_delete = new String[ p.getNumberOfExternalNodes() ];
398 Arrays.sort( node_names_to_keep );
399 while ( it.hasNext() ) {
400 final String curent_name = it.next().getName();
401 if ( Arrays.binarySearch( node_names_to_keep, curent_name ) < 0 ) {
402 to_delete[ i++ ] = curent_name;
405 PhylogenyMethods.deleteExternalNodesNegativeSelection( to_delete, p );
406 final List<String> deleted = new ArrayList<String>();
407 for( final String n : to_delete ) {
408 if ( !ForesterUtil.isEmpty( n ) ) {
415 public static void deleteExternalNodesPositiveSelectionT( final List<Taxonomy> species_to_keep, final Phylogeny phy ) {
416 final Set<Long> to_delete = new HashSet<Long>();
417 for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
418 final PhylogenyNode n = it.next();
419 if ( n.getNodeData().isHasTaxonomy() ) {
420 if ( !species_to_keep.contains( n.getNodeData().getTaxonomy() ) ) {
421 to_delete.add( n.getId() );
425 throw new IllegalArgumentException( "node " + n.getId() + " has no taxonomic data" );
428 deleteExternalNodesNegativeSelection( to_delete, phy );
431 final public static void deleteInternalNodesWithOnlyOneDescendent( final Phylogeny phy ) {
432 final ArrayList<PhylogenyNode> to_delete = new ArrayList<PhylogenyNode>();
433 for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) {
434 final PhylogenyNode n = iter.next();
435 if ( ( !n.isExternal() ) && ( n.getNumberOfDescendants() == 1 ) ) {
439 for( final PhylogenyNode d : to_delete ) {
440 PhylogenyMethods.removeNode( d, phy );
442 phy.clearHashIdToNodeMap();
443 phy.externalNodesHaveChanged();
446 final public static void deleteNonOrthologousExternalNodes( final Phylogeny phy, final PhylogenyNode n ) {
447 if ( n.isInternal() ) {
448 throw new IllegalArgumentException( "node is not external" );
450 final ArrayList<PhylogenyNode> to_delete = new ArrayList<PhylogenyNode>();
451 for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
452 final PhylogenyNode i = it.next();
453 if ( !PhylogenyMethods.getEventAtLCA( n, i ).isSpeciation() ) {
457 for( final PhylogenyNode d : to_delete ) {
458 phy.deleteSubtree( d, true );
460 phy.clearHashIdToNodeMap();
461 phy.externalNodesHaveChanged();
464 public final static List<List<PhylogenyNode>> divideIntoSubTrees( final Phylogeny phy,
465 final double min_distance_to_root ) {
466 if ( min_distance_to_root <= 0 ) {
467 throw new IllegalArgumentException( "attempt to use min distance to root of: " + min_distance_to_root );
469 final List<List<PhylogenyNode>> l = new ArrayList<List<PhylogenyNode>>();
470 setAllIndicatorsToZero( phy );
471 for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
472 final PhylogenyNode n = it.next();
473 if ( n.getIndicator() != 0 ) {
476 l.add( divideIntoSubTreesHelper( n, min_distance_to_root ) );
478 throw new RuntimeException( "this should not have happened" );
484 public static List<PhylogenyNode> getAllDescendants( final PhylogenyNode node ) {
485 final List<PhylogenyNode> descs = new ArrayList<PhylogenyNode>();
486 final Set<Long> encountered = new HashSet<Long>();
487 if ( !node.isExternal() ) {
488 final List<PhylogenyNode> exts = node.getAllExternalDescendants();
489 for( PhylogenyNode current : exts ) {
490 descs.add( current );
491 while ( current != node ) {
492 current = current.getParent();
493 if ( encountered.contains( current.getId() ) ) {
496 descs.add( current );
497 encountered.add( current.getId() );
511 public static Color getBranchColorValue( final PhylogenyNode node ) {
512 if ( node.getBranchData().getBranchColor() == null ) {
515 return node.getBranchData().getBranchColor().getValue();
521 public static double getBranchWidthValue( final PhylogenyNode node ) {
522 if ( !node.getBranchData().isHasBranchWidth() ) {
523 return BranchWidth.BRANCH_WIDTH_DEFAULT_VALUE;
525 return node.getBranchData().getBranchWidth().getValue();
531 public static double getConfidenceValue( final PhylogenyNode node ) {
532 if ( !node.getBranchData().isHasConfidences() ) {
533 return Confidence.CONFIDENCE_DEFAULT_VALUE;
535 return node.getBranchData().getConfidence( 0 ).getValue();
541 public static double[] getConfidenceValuesAsArray( final PhylogenyNode node ) {
542 if ( !node.getBranchData().isHasConfidences() ) {
543 return new double[ 0 ];
545 final double[] values = new double[ node.getBranchData().getConfidences().size() ];
547 for( final Confidence c : node.getBranchData().getConfidences() ) {
548 values[ i++ ] = c.getValue();
553 final public static Event getEventAtLCA( final PhylogenyNode n1, final PhylogenyNode n2 ) {
554 return calculateLCA( n1, n2 ).getNodeData().getEvent();
558 * Returns taxonomy t if all external descendants have
559 * the same taxonomy t, null otherwise.
562 public static Taxonomy getExternalDescendantsTaxonomy( final PhylogenyNode node ) {
563 final List<PhylogenyNode> descs = node.getAllExternalDescendants();
565 for( final PhylogenyNode n : descs ) {
566 if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
569 else if ( tax == null ) {
570 tax = n.getNodeData().getTaxonomy();
572 else if ( n.getNodeData().getTaxonomy().isEmpty() || !tax.isEqual( n.getNodeData().getTaxonomy() ) ) {
579 public static PhylogenyNode getFurthestDescendant( final PhylogenyNode node ) {
580 final List<PhylogenyNode> children = node.getAllExternalDescendants();
581 PhylogenyNode farthest = null;
582 double longest = -Double.MAX_VALUE;
583 for( final PhylogenyNode child : children ) {
584 if ( PhylogenyMethods.getDistance( child, node ) > longest ) {
586 longest = PhylogenyMethods.getDistance( child, node );
592 // public static PhylogenyMethods getInstance() {
593 // if ( PhylogenyMethods._instance == null ) {
594 // PhylogenyMethods._instance = new PhylogenyMethods();
596 // return PhylogenyMethods._instance;
599 * Returns the largest confidence value found on phy.
601 static public double getMaximumConfidenceValue( final Phylogeny phy ) {
602 double max = -Double.MAX_VALUE;
603 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
604 final double s = PhylogenyMethods.getConfidenceValue( iter.next() );
605 if ( ( s != Confidence.CONFIDENCE_DEFAULT_VALUE ) && ( s > max ) ) {
612 static public int getMinimumDescendentsPerInternalNodes( final Phylogeny phy ) {
613 int min = Integer.MAX_VALUE;
616 for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) {
618 if ( n.isInternal() ) {
619 d = n.getNumberOfDescendants();
629 * Convenience method for display purposes.
630 * Not intended for algorithms.
632 public static String getSpecies( final PhylogenyNode node ) {
633 if ( !node.getNodeData().isHasTaxonomy() ) {
636 else if ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getScientificName() ) ) {
637 return node.getNodeData().getTaxonomy().getScientificName();
639 if ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getTaxonomyCode() ) ) {
640 return node.getNodeData().getTaxonomy().getTaxonomyCode();
643 return node.getNodeData().getTaxonomy().getCommonName();
648 * Convenience method for display purposes.
649 * Not intended for algorithms.
651 public static String getTaxonomyIdentifier( final PhylogenyNode node ) {
652 if ( !node.getNodeData().isHasTaxonomy() || ( node.getNodeData().getTaxonomy().getIdentifier() == null ) ) {
655 return node.getNodeData().getTaxonomy().getIdentifier().getValue();
658 public final static boolean isAllDecendentsAreDuplications( final PhylogenyNode n ) {
659 if ( n.isExternal() ) {
663 if ( n.isDuplication() ) {
664 for( final PhylogenyNode desc : n.getDescendants() ) {
665 if ( !isAllDecendentsAreDuplications( desc ) ) {
677 public static boolean isHasExternalDescendant( final PhylogenyNode node ) {
678 for( int i = 0; i < node.getNumberOfDescendants(); ++i ) {
679 if ( node.getChildNode( i ).isExternal() ) {
687 * This is case insensitive.
690 public synchronized static boolean isTaxonomyHasIdentifierOfGivenProvider( final Taxonomy tax,
691 final String[] providers ) {
692 if ( ( tax.getIdentifier() != null ) && !ForesterUtil.isEmpty( tax.getIdentifier().getProvider() ) ) {
693 final String my_tax_prov = tax.getIdentifier().getProvider();
694 for( final String provider : providers ) {
695 if ( provider.equalsIgnoreCase( my_tax_prov ) ) {
706 public static void midpointRoot( final Phylogeny phylogeny ) {
707 if ( ( phylogeny.getNumberOfExternalNodes() < 2 ) || ( calculateMaxDistanceToRoot( phylogeny ) <= 0 ) ) {
711 final int total_nodes = phylogeny.getNodeCount();
713 if ( ++counter > total_nodes ) {
714 throw new RuntimeException( "this should not have happened: midpoint rooting does not converge" );
716 PhylogenyNode a = null;
719 for( int i = 0; i < phylogeny.getRoot().getNumberOfDescendants(); ++i ) {
720 final PhylogenyNode f = getFurthestDescendant( phylogeny.getRoot().getChildNode( i ) );
721 final double df = getDistance( f, phylogeny.getRoot() );
728 else if ( df > db ) {
733 final double diff = da - db;
734 if ( diff < 0.000001 ) {
737 double x = da - ( diff / 2.0 );
738 while ( ( x > a.getDistanceToParent() ) && !a.isRoot() ) {
739 x -= ( a.getDistanceToParent() > 0 ? a.getDistanceToParent() : 0 );
742 phylogeny.reRoot( a, x );
744 phylogeny.recalculateNumberOfExternalDescendants( true );
747 public static void normalizeBootstrapValues( final Phylogeny phylogeny,
748 final double max_bootstrap_value,
749 final double max_normalized_value ) {
750 for( final PhylogenyNodeIterator iter = phylogeny.iteratorPreorder(); iter.hasNext(); ) {
751 final PhylogenyNode node = iter.next();
752 if ( node.isInternal() ) {
753 final double confidence = getConfidenceValue( node );
754 if ( confidence != Confidence.CONFIDENCE_DEFAULT_VALUE ) {
755 if ( confidence >= max_bootstrap_value ) {
756 setBootstrapConfidence( node, max_normalized_value );
759 setBootstrapConfidence( node, ( confidence * max_normalized_value ) / max_bootstrap_value );
766 public static List<PhylogenyNode> obtainAllNodesAsList( final Phylogeny phy ) {
767 final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
768 if ( phy.isEmpty() ) {
771 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
772 nodes.add( iter.next() );
778 * Returns a map of distinct taxonomies of
779 * all external nodes of node.
780 * If at least one of the external nodes has no taxonomy,
784 public static Map<Taxonomy, Integer> obtainDistinctTaxonomyCounts( final PhylogenyNode node ) {
785 final List<PhylogenyNode> descs = node.getAllExternalDescendants();
786 final Map<Taxonomy, Integer> tax_map = new HashMap<Taxonomy, Integer>();
787 for( final PhylogenyNode n : descs ) {
788 if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
791 final Taxonomy t = n.getNodeData().getTaxonomy();
792 if ( tax_map.containsKey( t ) ) {
793 tax_map.put( t, tax_map.get( t ) + 1 );
803 * Arranges the order of childern for each node of this Phylogeny in such a
804 * way that either the branch with more children is on top (right) or on
805 * bottom (left), dependent on the value of boolean order.
808 * decides in which direction to order
811 public static void orderAppearance( final PhylogenyNode n,
813 final boolean order_ext_alphabetically,
814 final DESCENDANT_SORT_PRIORITY pri ) {
815 if ( n.isExternal() ) {
819 if ( ( n.getNumberOfDescendants() == 2 )
820 && ( n.getChildNode1().getNumberOfExternalNodes() != n.getChildNode2().getNumberOfExternalNodes() )
821 && ( ( n.getChildNode1().getNumberOfExternalNodes() < n.getChildNode2().getNumberOfExternalNodes() ) == order ) ) {
822 final PhylogenyNode temp = n.getChildNode1();
823 n.setChild1( n.getChildNode2() );
825 _order_changed = true;
827 else if ( order_ext_alphabetically ) {
828 boolean all_ext = true;
829 for( final PhylogenyNode i : n.getDescendants() ) {
830 if ( !i.isExternal() ) {
836 PhylogenyMethods.sortNodeDescendents( n, pri );
839 for( int i = 0; i < n.getNumberOfDescendants(); ++i ) {
840 orderAppearance( n.getChildNode( i ), order, order_ext_alphabetically, pri );
845 public synchronized static void orderAppearanceX( final PhylogenyNode n,
846 final boolean order_ext_alphabetically,
847 final DESCENDANT_SORT_PRIORITY pri ) {
848 if ( n.isExternal() ) {
852 _order_changed = false;
855 order_ext_alphabetically,
857 if (!_order_changed ) {
860 order_ext_alphabetically,
866 public static void postorderBranchColorAveragingExternalNodeBased( final Phylogeny p ) {
867 for( final PhylogenyNodeIterator iter = p.iteratorPostorder(); iter.hasNext(); ) {
868 final PhylogenyNode node = iter.next();
873 if ( node.isInternal() ) {
874 //for( final PhylogenyNodeIterator iterator = node.iterateChildNodesForward(); iterator.hasNext(); ) {
875 for( int i = 0; i < node.getNumberOfDescendants(); ++i ) {
876 final PhylogenyNode child_node = node.getChildNode( i );
877 final Color child_color = getBranchColorValue( child_node );
878 if ( child_color != null ) {
880 red += child_color.getRed();
881 green += child_color.getGreen();
882 blue += child_color.getBlue();
885 setBranchColorValue( node,
886 new Color( ForesterUtil.roundToInt( red / n ),
887 ForesterUtil.roundToInt( green / n ),
888 ForesterUtil.roundToInt( blue / n ) ) );
893 public static final void preOrderReId( final Phylogeny phy ) {
894 if ( phy.isEmpty() ) {
897 phy.setIdToNodeMap( null );
898 long i = PhylogenyNode.getNodeCount();
899 for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) {
900 it.next().setId( i++ );
902 PhylogenyNode.setNodeCount( i );
905 public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final File file ) throws IOException {
906 final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
907 final Phylogeny[] trees = factory.create( file, parser );
908 if ( ( trees == null ) || ( trees.length == 0 ) ) {
909 throw new PhylogenyParserException( "Unable to parse phylogeny from file: " + file );
914 public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final List<File> files )
916 final List<Phylogeny> tree_list = new ArrayList<Phylogeny>();
917 for( final File file : files ) {
918 final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
919 final Phylogeny[] trees = factory.create( file, parser );
920 if ( ( trees == null ) || ( trees.length == 0 ) ) {
921 throw new PhylogenyParserException( "Unable to parse phylogeny from file: " + file );
923 tree_list.addAll( Arrays.asList( trees ) );
925 return tree_list.toArray( new Phylogeny[ tree_list.size() ] );
928 public static void removeNode( final PhylogenyNode remove_me, final Phylogeny phylogeny ) {
929 if ( remove_me.isRoot() ) {
930 if ( remove_me.getNumberOfDescendants() == 1 ) {
931 final PhylogenyNode desc = remove_me.getDescendants().get( 0 );
932 desc.setDistanceToParent( addPhylogenyDistances( remove_me.getDistanceToParent(),
933 desc.getDistanceToParent() ) );
934 desc.setParent( null );
935 phylogeny.setRoot( desc );
936 phylogeny.clearHashIdToNodeMap();
939 throw new IllegalArgumentException( "attempt to remove a root node with more than one descendants" );
942 else if ( remove_me.isExternal() ) {
943 phylogeny.deleteSubtree( remove_me, false );
944 phylogeny.clearHashIdToNodeMap();
945 phylogeny.externalNodesHaveChanged();
948 final PhylogenyNode parent = remove_me.getParent();
949 final List<PhylogenyNode> descs = remove_me.getDescendants();
950 parent.removeChildNode( remove_me );
951 for( final PhylogenyNode desc : descs ) {
952 parent.addAsChild( desc );
953 desc.setDistanceToParent( addPhylogenyDistances( remove_me.getDistanceToParent(),
954 desc.getDistanceToParent() ) );
956 remove_me.setParent( null );
957 phylogeny.clearHashIdToNodeMap();
958 phylogeny.externalNodesHaveChanged();
962 private static enum NDF {
964 TaxonomyCode( "TC" ),
965 TaxonomyCommonName( "CN" ),
966 TaxonomyScientificName( "TS" ),
967 TaxonomyIdentifier( "TI" ),
968 TaxonomySynonym( "SY" ),
969 SequenceName( "SN" ),
971 SequenceSymbol( "SS" ),
972 SequenceAccession( "SA" ),
976 BinaryCharacter( "BC" ),
977 MolecularSequence( "MS" );
979 private final String _text;
981 NDF( final String text ) {
985 public static NDF fromString( final String text ) {
986 for( final NDF n : NDF.values() ) {
987 if ( text.startsWith( n._text ) ) {
995 public static List<Long> searchData( final String query,
997 final boolean case_sensitive,
998 final boolean partial,
1000 final boolean search_domains,
1001 final double domains_confidence_threshold ) {
1002 final List<Long> nodes = new ArrayList<Long>();
1003 if ( phy.isEmpty() || ( query == null ) ) {
1006 if ( ForesterUtil.isEmpty( query ) ) {
1009 String my_query = query;
1011 if ( ( my_query.length() > 2 ) && ( my_query.indexOf( ":" ) == 2 ) ) {
1012 ndf = NDF.fromString( my_query );
1013 if ( ndf != null ) {
1014 my_query = my_query.substring( 3 );
1017 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
1018 final PhylogenyNode node = iter.next();
1019 boolean match = false;
1020 if ( ( ( ndf == null ) || ( ndf == NDF.NodeName ) )
1021 && match( node.getName(), my_query, case_sensitive, partial, regex ) ) {
1024 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCode ) )
1025 && node.getNodeData().isHasTaxonomy()
1026 && match( node.getNodeData().getTaxonomy().getTaxonomyCode(),
1033 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCommonName ) )
1034 && node.getNodeData().isHasTaxonomy()
1035 && match( node.getNodeData().getTaxonomy().getCommonName(),
1042 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyScientificName ) )
1043 && node.getNodeData().isHasTaxonomy()
1044 && match( node.getNodeData().getTaxonomy().getScientificName(),
1051 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyIdentifier ) )
1052 && node.getNodeData().isHasTaxonomy()
1053 && ( node.getNodeData().getTaxonomy().getIdentifier() != null )
1054 && match( node.getNodeData().getTaxonomy().getIdentifier().getValue(),
1061 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomySynonym ) ) && node.getNodeData().isHasTaxonomy()
1062 && !node.getNodeData().getTaxonomy().getSynonyms().isEmpty() ) {
1063 final List<String> syns = node.getNodeData().getTaxonomy().getSynonyms();
1064 I: for( final String syn : syns ) {
1065 if ( match( syn, my_query, case_sensitive, partial, regex ) ) {
1071 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceName ) ) && node.getNodeData().isHasSequence()
1072 && match( node.getNodeData().getSequence().getName(), my_query, case_sensitive, partial, regex ) ) {
1075 if ( !match && ( ( ndf == null ) || ( ndf == NDF.GeneName ) ) && node.getNodeData().isHasSequence()
1076 && match( node.getNodeData().getSequence().getGeneName(), my_query, case_sensitive, partial, regex ) ) {
1079 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceSymbol ) ) && node.getNodeData().isHasSequence()
1080 && match( node.getNodeData().getSequence().getSymbol(), my_query, case_sensitive, partial, regex ) ) {
1084 && ( ( ndf == null ) || ( ndf == NDF.SequenceAccession ) )
1085 && node.getNodeData().isHasSequence()
1086 && ( node.getNodeData().getSequence().getAccession() != null )
1087 && match( node.getNodeData().getSequence().getAccession().getValue(),
1094 if ( !match && ( ( ( ndf == null ) && search_domains ) || ( ndf == NDF.Domain ) )
1095 && node.getNodeData().isHasSequence()
1096 && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) {
1097 final DomainArchitecture da = node.getNodeData().getSequence().getDomainArchitecture();
1098 I: for( int i = 0; i < da.getNumberOfDomains(); ++i ) {
1099 if ( ( da.getDomain( i ).getConfidence() <= domains_confidence_threshold )
1100 && ( match( da.getDomain( i ).getName(), my_query, case_sensitive, partial, regex ) ) ) {
1106 if ( !match && ( ( ndf == null ) || ( ndf == NDF.Annotation ) ) && node.getNodeData().isHasSequence()
1107 && ( node.getNodeData().getSequence().getAnnotations() != null ) ) {
1108 for( final Annotation ann : node.getNodeData().getSequence().getAnnotations() ) {
1109 if ( match( ann.getDesc(), my_query, case_sensitive, partial, regex ) ) {
1113 if ( match( ann.getRef(), my_query, case_sensitive, partial, regex ) ) {
1119 if ( !match && ( ( ndf == null ) || ( ndf == NDF.CrossRef ) ) && node.getNodeData().isHasSequence()
1120 && ( node.getNodeData().getSequence().getCrossReferences() != null ) ) {
1121 for( final Accession x : node.getNodeData().getSequence().getCrossReferences() ) {
1122 if ( match( x.getComment(), my_query, case_sensitive, partial, regex ) ) {
1126 if ( match( x.getSource(), my_query, case_sensitive, partial, regex ) ) {
1130 if ( match( x.getValue(), my_query, case_sensitive, partial, regex ) ) {
1136 if ( !match && ( ( ndf == null ) || ( ndf == NDF.BinaryCharacter ) )
1137 && ( node.getNodeData().getBinaryCharacters() != null ) ) {
1138 Iterator<String> it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator();
1139 I: while ( it.hasNext() ) {
1140 if ( match( it.next(), my_query, case_sensitive, partial, regex ) ) {
1145 it = node.getNodeData().getBinaryCharacters().getGainedCharacters().iterator();
1146 I: while ( it.hasNext() ) {
1147 if ( match( it.next(), my_query, case_sensitive, partial, regex ) ) {
1154 && ( ndf == NDF.MolecularSequence )
1155 && node.getNodeData().isHasSequence()
1156 && match( node.getNodeData().getSequence().getMolecularSequence(),
1164 nodes.add( node.getId() );
1170 public static List<Long> searchDataLogicalAnd( final String[] queries,
1171 final Phylogeny phy,
1172 final boolean case_sensitive,
1173 final boolean partial,
1174 final boolean search_domains,
1175 final double domains_confidence_threshold ) {
1176 final List<Long> nodes = new ArrayList<Long>();
1177 if ( phy.isEmpty() || ( queries == null ) || ( queries.length < 1 ) ) {
1180 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
1181 final PhylogenyNode node = iter.next();
1182 boolean all_matched = true;
1183 for( String query : queries ) {
1184 if ( query == null ) {
1187 query = query.trim();
1189 if ( ( query.length() > 2 ) && ( query.indexOf( ":" ) == 2 ) ) {
1190 ndf = NDF.fromString( query );
1191 if ( ndf != null ) {
1192 query = query.substring( 3 );
1195 boolean match = false;
1196 if ( ForesterUtil.isEmpty( query ) ) {
1199 if ( ( ( ndf == null ) || ( ndf == NDF.NodeName ) )
1200 && match( node.getName(), query, case_sensitive, partial, false ) ) {
1203 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCode ) )
1204 && node.getNodeData().isHasTaxonomy()
1205 && match( node.getNodeData().getTaxonomy().getTaxonomyCode(),
1212 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCommonName ) )
1213 && node.getNodeData().isHasTaxonomy()
1214 && match( node.getNodeData().getTaxonomy().getCommonName(),
1221 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyScientificName ) )
1222 && node.getNodeData().isHasTaxonomy()
1223 && match( node.getNodeData().getTaxonomy().getScientificName(),
1230 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyIdentifier ) )
1231 && node.getNodeData().isHasTaxonomy()
1232 && ( node.getNodeData().getTaxonomy().getIdentifier() != null )
1233 && match( node.getNodeData().getTaxonomy().getIdentifier().getValue(),
1240 else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomySynonym ) ) && node.getNodeData().isHasTaxonomy()
1241 && !node.getNodeData().getTaxonomy().getSynonyms().isEmpty() ) {
1242 final List<String> syns = node.getNodeData().getTaxonomy().getSynonyms();
1243 I: for( final String syn : syns ) {
1244 if ( match( syn, query, case_sensitive, partial, false ) ) {
1250 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceName ) ) && node.getNodeData().isHasSequence()
1251 && match( node.getNodeData().getSequence().getName(), query, case_sensitive, partial, false ) ) {
1255 && ( ( ndf == null ) || ( ndf == NDF.GeneName ) )
1256 && node.getNodeData().isHasSequence()
1257 && match( node.getNodeData().getSequence().getGeneName(), query, case_sensitive, partial, false ) ) {
1260 if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceSymbol ) )
1261 && node.getNodeData().isHasSequence()
1262 && match( node.getNodeData().getSequence().getSymbol(), query, case_sensitive, partial, false ) ) {
1266 && ( ( ndf == null ) || ( ndf == NDF.SequenceAccession ) )
1267 && node.getNodeData().isHasSequence()
1268 && ( node.getNodeData().getSequence().getAccession() != null )
1269 && match( node.getNodeData().getSequence().getAccession().getValue(),
1276 if ( !match && ( ( ( ndf == null ) && search_domains ) || ( ndf == NDF.Domain ) )
1277 && node.getNodeData().isHasSequence()
1278 && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) {
1279 final DomainArchitecture da = node.getNodeData().getSequence().getDomainArchitecture();
1280 I: for( int i = 0; i < da.getNumberOfDomains(); ++i ) {
1281 if ( ( da.getDomain( i ).getConfidence() <= domains_confidence_threshold )
1282 && match( da.getDomain( i ).getName(), query, case_sensitive, partial, false ) ) {
1288 if ( !match && ( ( ndf == null ) || ( ndf == NDF.Annotation ) ) && node.getNodeData().isHasSequence()
1289 && ( node.getNodeData().getSequence().getAnnotations() != null ) ) {
1290 for( final Annotation ann : node.getNodeData().getSequence().getAnnotations() ) {
1291 if ( match( ann.getDesc(), query, case_sensitive, partial, false ) ) {
1295 if ( match( ann.getRef(), query, case_sensitive, partial, false ) ) {
1301 if ( !match && ( ( ndf == null ) || ( ndf == NDF.CrossRef ) ) && node.getNodeData().isHasSequence()
1302 && ( node.getNodeData().getSequence().getCrossReferences() != null ) ) {
1303 for( final Accession x : node.getNodeData().getSequence().getCrossReferences() ) {
1304 if ( match( x.getComment(), query, case_sensitive, partial, false ) ) {
1308 if ( match( x.getSource(), query, case_sensitive, partial, false ) ) {
1312 if ( match( x.getValue(), query, case_sensitive, partial, false ) ) {
1318 if ( !match && ( ( ndf == null ) || ( ndf == NDF.BinaryCharacter ) )
1319 && ( node.getNodeData().getBinaryCharacters() != null ) ) {
1320 Iterator<String> it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator();
1321 I: while ( it.hasNext() ) {
1322 if ( match( it.next(), query, case_sensitive, partial, false ) ) {
1327 it = node.getNodeData().getBinaryCharacters().getGainedCharacters().iterator();
1328 I: while ( it.hasNext() ) {
1329 if ( match( it.next(), query, case_sensitive, partial, false ) ) {
1336 && ( ndf == NDF.MolecularSequence )
1337 && node.getNodeData().isHasSequence()
1338 && match( node.getNodeData().getSequence().getMolecularSequence(),
1346 all_matched = false;
1350 if ( all_matched ) {
1351 nodes.add( node.getId() );
1357 public static void setAllIndicatorsToZero( final Phylogeny phy ) {
1358 for( final PhylogenyNodeIterator it = phy.iteratorPostorder(); it.hasNext(); ) {
1359 it.next().setIndicator( ( byte ) 0 );
1364 * Convenience method.
1365 * Sets value for the first confidence value (created if not present, values overwritten otherwise).
1367 public static void setBootstrapConfidence( final PhylogenyNode node, final double bootstrap_confidence_value ) {
1368 setConfidence( node, bootstrap_confidence_value, "bootstrap" );
1371 public static void setBranchColorValue( final PhylogenyNode node, final Color color ) {
1372 if ( node.getBranchData().getBranchColor() == null ) {
1373 node.getBranchData().setBranchColor( new BranchColor() );
1375 node.getBranchData().getBranchColor().setValue( color );
1379 * Convenience method
1381 public static void setBranchWidthValue( final PhylogenyNode node, final double branch_width_value ) {
1382 node.getBranchData().setBranchWidth( new BranchWidth( branch_width_value ) );
1386 * Convenience method.
1387 * Sets value for the first confidence value (created if not present, values overwritten otherwise).
1389 public static void setConfidence( final PhylogenyNode node, final double confidence_value ) {
1390 setConfidence( node, confidence_value, "" );
1394 * Convenience method.
1395 * Sets value for the first confidence value (created if not present, values overwritten otherwise).
1397 public static void setConfidence( final PhylogenyNode node, final double confidence_value, final String type ) {
1398 Confidence c = null;
1399 if ( node.getBranchData().getNumberOfConfidences() > 0 ) {
1400 c = node.getBranchData().getConfidence( 0 );
1403 c = new Confidence();
1404 node.getBranchData().addConfidence( c );
1407 c.setValue( confidence_value );
1410 public static void setScientificName( final PhylogenyNode node, final String scientific_name ) {
1411 if ( !node.getNodeData().isHasTaxonomy() ) {
1412 node.getNodeData().setTaxonomy( new Taxonomy() );
1414 node.getNodeData().getTaxonomy().setScientificName( scientific_name );
1418 * Convenience method to set the taxonomy code of a phylogeny node.
1422 * @param taxonomy_code
1423 * @throws PhyloXmlDataFormatException
1425 public static void setTaxonomyCode( final PhylogenyNode node, final String taxonomy_code )
1426 throws PhyloXmlDataFormatException {
1427 if ( !node.getNodeData().isHasTaxonomy() ) {
1428 node.getNodeData().setTaxonomy( new Taxonomy() );
1430 node.getNodeData().getTaxonomy().setTaxonomyCode( taxonomy_code );
1433 final static public void sortNodeDescendents( final PhylogenyNode node, final DESCENDANT_SORT_PRIORITY pri ) {
1434 Comparator<PhylogenyNode> c;
1437 c = new PhylogenyNodeSortSequencePriority();
1440 c = new PhylogenyNodeSortNodeNamePriority();
1443 c = new PhylogenyNodeSortTaxonomyPriority();
1445 final List<PhylogenyNode> descs = node.getDescendants();
1446 Collections.sort( descs, c );
1448 for( final PhylogenyNode desc : descs ) {
1449 node.setChildNode( i++, desc );
1454 * Removes from Phylogeny to_be_stripped all external Nodes which are
1455 * associated with a species NOT found in Phylogeny reference.
1458 * a reference Phylogeny
1459 * @param to_be_stripped
1460 * Phylogeny to be stripped
1461 * @return nodes removed from to_be_stripped
1463 public static List<PhylogenyNode> taxonomyBasedDeletionOfExternalNodes( final Phylogeny reference,
1464 final Phylogeny to_be_stripped ) {
1465 final Set<String> ref_ext_taxo = new HashSet<String>();
1466 for( final PhylogenyNodeIterator it = reference.iteratorExternalForward(); it.hasNext(); ) {
1467 final PhylogenyNode n = it.next();
1468 if ( !n.getNodeData().isHasTaxonomy() ) {
1469 throw new IllegalArgumentException( "no taxonomic data in node: " + n );
1471 if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) {
1472 ref_ext_taxo.add( n.getNodeData().getTaxonomy().getScientificName() );
1474 if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) {
1475 ref_ext_taxo.add( n.getNodeData().getTaxonomy().getTaxonomyCode() );
1477 if ( ( n.getNodeData().getTaxonomy().getIdentifier() != null )
1478 && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getIdentifier().getValue() ) ) {
1479 ref_ext_taxo.add( n.getNodeData().getTaxonomy().getIdentifier().getValuePlusProvider() );
1482 final ArrayList<PhylogenyNode> nodes_to_delete = new ArrayList<PhylogenyNode>();
1483 for( final PhylogenyNodeIterator it = to_be_stripped.iteratorExternalForward(); it.hasNext(); ) {
1484 final PhylogenyNode n = it.next();
1485 if ( !n.getNodeData().isHasTaxonomy() ) {
1486 nodes_to_delete.add( n );
1488 else if ( !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getScientificName() ) )
1489 && !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getTaxonomyCode() ) )
1490 && !( ( n.getNodeData().getTaxonomy().getIdentifier() != null ) && ref_ext_taxo.contains( n
1491 .getNodeData().getTaxonomy().getIdentifier().getValuePlusProvider() ) ) ) {
1492 nodes_to_delete.add( n );
1495 for( final PhylogenyNode n : nodes_to_delete ) {
1496 to_be_stripped.deleteSubtree( n, true );
1498 to_be_stripped.clearHashIdToNodeMap();
1499 to_be_stripped.externalNodesHaveChanged();
1500 return nodes_to_delete;
1503 final static public void transferInternalNamesToBootstrapSupport( final Phylogeny phy ) {
1504 final PhylogenyNodeIterator it = phy.iteratorPostorder();
1505 while ( it.hasNext() ) {
1506 final PhylogenyNode n = it.next();
1507 if ( !n.isExternal() && !ForesterUtil.isEmpty( n.getName() ) ) {
1510 value = Double.parseDouble( n.getName() );
1512 catch ( final NumberFormatException e ) {
1513 throw new IllegalArgumentException( "failed to parse number from [" + n.getName() + "]: "
1514 + e.getLocalizedMessage() );
1516 if ( value >= 0.0 ) {
1517 n.getBranchData().addConfidence( new Confidence( value, "bootstrap" ) );
1524 final static public boolean isInternalNamesLookLikeConfidences( final Phylogeny phy ) {
1525 final PhylogenyNodeIterator it = phy.iteratorPostorder();
1526 while ( it.hasNext() ) {
1527 final PhylogenyNode n = it.next();
1528 if ( !n.isExternal() && !n.isRoot() ) {
1529 if ( !ForesterUtil.isEmpty( n.getName() ) ) {
1532 value = Double.parseDouble( n.getName() );
1534 catch ( final NumberFormatException e ) {
1537 if ( ( value < 0.0 ) || ( value > 100 ) ) {
1546 final static public void transferInternalNodeNamesToConfidence( final Phylogeny phy, final String confidence_type ) {
1547 final PhylogenyNodeIterator it = phy.iteratorPostorder();
1548 while ( it.hasNext() ) {
1549 transferInternalNodeNameToConfidence( confidence_type, it.next() );
1553 private static void transferInternalNodeNameToConfidence( final String confidence_type, final PhylogenyNode n ) {
1554 if ( !n.isExternal() && !n.getBranchData().isHasConfidences() ) {
1555 if ( !ForesterUtil.isEmpty( n.getName() ) ) {
1558 d = Double.parseDouble( n.getName() );
1560 catch ( final Exception e ) {
1564 n.getBranchData().addConfidence( new Confidence( d, confidence_type ) );
1571 final static public void transferNodeNameToField( final Phylogeny phy,
1572 final PhylogenyNodeField field,
1573 final boolean external_only ) throws PhyloXmlDataFormatException {
1574 final PhylogenyNodeIterator it = phy.iteratorPostorder();
1575 while ( it.hasNext() ) {
1576 final PhylogenyNode n = it.next();
1577 if ( external_only && n.isInternal() ) {
1580 final String name = n.getName().trim();
1581 if ( !ForesterUtil.isEmpty( name ) ) {
1585 setTaxonomyCode( n, name );
1587 case TAXONOMY_SCIENTIFIC_NAME:
1589 if ( !n.getNodeData().isHasTaxonomy() ) {
1590 n.getNodeData().setTaxonomy( new Taxonomy() );
1592 n.getNodeData().getTaxonomy().setScientificName( name );
1594 case TAXONOMY_COMMON_NAME:
1596 if ( !n.getNodeData().isHasTaxonomy() ) {
1597 n.getNodeData().setTaxonomy( new Taxonomy() );
1599 n.getNodeData().getTaxonomy().setCommonName( name );
1601 case SEQUENCE_SYMBOL:
1603 if ( !n.getNodeData().isHasSequence() ) {
1604 n.getNodeData().setSequence( new Sequence() );
1606 n.getNodeData().getSequence().setSymbol( name );
1610 if ( !n.getNodeData().isHasSequence() ) {
1611 n.getNodeData().setSequence( new Sequence() );
1613 n.getNodeData().getSequence().setName( name );
1615 case TAXONOMY_ID_UNIPROT_1: {
1616 if ( !n.getNodeData().isHasTaxonomy() ) {
1617 n.getNodeData().setTaxonomy( new Taxonomy() );
1620 final int i = name.indexOf( '_' );
1622 id = name.substring( 0, i );
1627 n.getNodeData().getTaxonomy()
1628 .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) );
1631 case TAXONOMY_ID_UNIPROT_2: {
1632 if ( !n.getNodeData().isHasTaxonomy() ) {
1633 n.getNodeData().setTaxonomy( new Taxonomy() );
1636 final int i = name.indexOf( '_' );
1638 id = name.substring( i + 1, name.length() );
1643 n.getNodeData().getTaxonomy()
1644 .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) );
1648 if ( !n.getNodeData().isHasTaxonomy() ) {
1649 n.getNodeData().setTaxonomy( new Taxonomy() );
1651 n.getNodeData().getTaxonomy().setIdentifier( new Identifier( name ) );
1655 throw new IllegalArgumentException( "don't know what to do with " + field );
1662 static double addPhylogenyDistances( final double a, final double b ) {
1663 if ( ( a >= 0.0 ) && ( b >= 0.0 ) ) {
1666 else if ( a >= 0.0 ) {
1669 else if ( b >= 0.0 ) {
1672 return PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT;
1675 static double calculateDistanceToAncestor( final PhylogenyNode anc, PhylogenyNode desc ) {
1677 boolean all_default = true;
1678 while ( anc != desc ) {
1679 if ( desc.getDistanceToParent() != PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ) {
1680 d += desc.getDistanceToParent();
1681 if ( all_default ) {
1682 all_default = false;
1685 desc = desc.getParent();
1687 if ( all_default ) {
1688 return PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT;
1694 * Deep copies the phylogeny originating from this node.
1696 static PhylogenyNode copySubTree( final PhylogenyNode source ) {
1697 if ( source == null ) {
1701 final PhylogenyNode newnode = source.copyNodeData();
1702 if ( !source.isExternal() ) {
1703 for( int i = 0; i < source.getNumberOfDescendants(); ++i ) {
1704 newnode.setChildNode( i, PhylogenyMethods.copySubTree( source.getChildNode( i ) ) );
1712 * Shallow copies the phylogeny originating from this node.
1714 static PhylogenyNode copySubTreeShallow( final PhylogenyNode source ) {
1715 if ( source == null ) {
1719 final PhylogenyNode newnode = source.copyNodeDataShallow();
1720 if ( !source.isExternal() ) {
1721 for( int i = 0; i < source.getNumberOfDescendants(); ++i ) {
1722 newnode.setChildNode( i, PhylogenyMethods.copySubTreeShallow( source.getChildNode( i ) ) );
1729 private final static List<PhylogenyNode> divideIntoSubTreesHelper( final PhylogenyNode node,
1730 final double min_distance_to_root ) {
1731 final List<PhylogenyNode> l = new ArrayList<PhylogenyNode>();
1732 final PhylogenyNode r = moveTowardsRoot( node, min_distance_to_root );
1733 for( final PhylogenyNode ext : r.getAllExternalDescendants() ) {
1734 if ( ext.getIndicator() != 0 ) {
1735 throw new RuntimeException( "this should not have happened" );
1737 ext.setIndicator( ( byte ) 1 );
1744 * Calculates the distance between PhylogenyNodes n1 and n2.
1745 * PRECONDITION: n1 is a descendant of n2.
1748 * a descendant of n2
1750 * @return distance between n1 and n2
1752 private static double getDistance( PhylogenyNode n1, final PhylogenyNode n2 ) {
1754 while ( n1 != n2 ) {
1755 if ( n1.getDistanceToParent() > 0.0 ) {
1756 d += n1.getDistanceToParent();
1758 n1 = n1.getParent();
1763 private static boolean match( final String s,
1765 final boolean case_sensitive,
1766 final boolean partial,
1767 final boolean regex ) {
1768 if ( ForesterUtil.isEmpty( s ) || ForesterUtil.isEmpty( query ) ) {
1771 String my_s = s.trim();
1772 String my_query = query.trim();
1773 if ( !case_sensitive && !regex ) {
1774 my_s = my_s.toLowerCase();
1775 my_query = my_query.toLowerCase();
1780 if ( case_sensitive ) {
1781 p = Pattern.compile( my_query );
1784 p = Pattern.compile( my_query, Pattern.CASE_INSENSITIVE );
1787 catch ( final PatternSyntaxException e ) {
1791 return p.matcher( my_s ).find();
1797 else if ( partial ) {
1798 return my_s.indexOf( my_query ) >= 0;
1803 p = Pattern.compile( "(\\b|_)" + Pattern.quote( my_query ) + "(\\b|_)" );
1805 catch ( final PatternSyntaxException e ) {
1809 return p.matcher( my_s ).find();
1817 private final static PhylogenyNode moveTowardsRoot( final PhylogenyNode node, final double min_distance_to_root ) {
1818 PhylogenyNode n = node;
1819 PhylogenyNode prev = node;
1820 while ( min_distance_to_root < n.calculateDistanceToRoot() ) {
1827 public static enum DESCENDANT_SORT_PRIORITY {
1828 NODE_NAME, SEQUENCE, TAXONOMY;
1831 public static enum PhylogenyNodeField {
1836 TAXONOMY_COMMON_NAME,
1838 TAXONOMY_ID_UNIPROT_1,
1839 TAXONOMY_ID_UNIPROT_2,
1840 TAXONOMY_SCIENTIFIC_NAME;
1843 public static void addMolecularSeqsToTree( final Phylogeny phy, final Msa msa ) {
1844 for( int s = 0; s < msa.getNumberOfSequences(); ++s ) {
1845 final org.forester.sequence.MolecularSequence seq = msa.getSequence( s );
1846 final PhylogenyNode node = phy.getNode( seq.getIdentifier() );
1847 final org.forester.phylogeny.data.Sequence new_seq = new Sequence();
1848 new_seq.setMolecularSequenceAligned( true );
1849 new_seq.setMolecularSequence( seq.getMolecularSequenceAsString() );
1850 new_seq.setName( seq.getIdentifier() );
1852 new_seq.setType( PhyloXmlUtil.SEQ_TYPE_PROTEIN );
1854 catch ( final PhyloXmlDataFormatException ignore ) {
1857 node.getNodeData().addSequence( new_seq );
1861 final private static class PhylogenyNodeSortTaxonomyPriority implements Comparator<PhylogenyNode> {
1864 public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) {
1865 if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) {
1866 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) )
1867 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) {
1868 return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase()
1869 .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() );
1871 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) )
1872 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
1873 return n1.getNodeData().getTaxonomy().getTaxonomyCode()
1874 .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() );
1877 if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) {
1878 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) )
1879 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) {
1880 return n1.getNodeData().getSequence().getName().toLowerCase()
1881 .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() );
1883 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) )
1884 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) {
1885 return n1.getNodeData().getSequence().getGeneName()
1886 .compareTo( n2.getNodeData().getSequence().getGeneName() );
1888 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) )
1889 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) {
1890 return n1.getNodeData().getSequence().getSymbol()
1891 .compareTo( n2.getNodeData().getSequence().getSymbol() );
1894 if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) {
1895 return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() );
1901 final private static class PhylogenyNodeSortSequencePriority implements Comparator<PhylogenyNode> {
1904 public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) {
1905 if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) {
1906 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) )
1907 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) {
1908 return n1.getNodeData().getSequence().getName().toLowerCase()
1909 .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() );
1911 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) )
1912 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) {
1913 return n1.getNodeData().getSequence().getGeneName()
1914 .compareTo( n2.getNodeData().getSequence().getGeneName() );
1916 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) )
1917 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) {
1918 return n1.getNodeData().getSequence().getSymbol()
1919 .compareTo( n2.getNodeData().getSequence().getSymbol() );
1922 if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) {
1923 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) )
1924 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) {
1925 return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase()
1926 .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() );
1928 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) )
1929 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
1930 return n1.getNodeData().getTaxonomy().getTaxonomyCode()
1931 .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() );
1934 if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) {
1935 return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() );
1941 final private static class PhylogenyNodeSortNodeNamePriority implements Comparator<PhylogenyNode> {
1944 public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) {
1945 if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) {
1946 return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() );
1948 if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) {
1949 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) )
1950 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) {
1951 return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase()
1952 .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() );
1954 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) )
1955 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
1956 return n1.getNodeData().getTaxonomy().getTaxonomyCode()
1957 .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() );
1960 if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) {
1961 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) )
1962 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) {
1963 return n1.getNodeData().getSequence().getName().toLowerCase()
1964 .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() );
1966 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) )
1967 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) {
1968 return n1.getNodeData().getSequence().getGeneName()
1969 .compareTo( n2.getNodeData().getSequence().getGeneName() );
1971 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) )
1972 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) {
1973 return n1.getNodeData().getSequence().getSymbol()
1974 .compareTo( n2.getNodeData().getSequence().getSymbol() );