2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: www.phylosoft.org/forester
26 package org.forester.phylogeny;
28 import java.awt.Color;
30 import java.io.IOException;
31 import java.util.ArrayList;
32 import java.util.Arrays;
33 import java.util.Collections;
34 import java.util.HashSet;
35 import java.util.Iterator;
36 import java.util.List;
38 import java.util.SortedMap;
39 import java.util.TreeMap;
41 import org.forester.io.parsers.PhylogenyParser;
42 import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
43 import org.forester.io.parsers.util.PhylogenyParserException;
44 import org.forester.phylogeny.data.BranchColor;
45 import org.forester.phylogeny.data.BranchWidth;
46 import org.forester.phylogeny.data.Confidence;
47 import org.forester.phylogeny.data.DomainArchitecture;
48 import org.forester.phylogeny.data.Identifier;
49 import org.forester.phylogeny.data.PhylogenyDataUtil;
50 import org.forester.phylogeny.data.Sequence;
51 import org.forester.phylogeny.data.Taxonomy;
52 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
53 import org.forester.phylogeny.factories.PhylogenyFactory;
54 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
55 import org.forester.util.BasicDescriptiveStatistics;
56 import org.forester.util.DescriptiveStatistics;
57 import org.forester.util.FailedConditionCheckException;
58 import org.forester.util.ForesterUtil;
60 public class PhylogenyMethods {
62 private static PhylogenyMethods _instance = null;
63 private final Set<Integer> _temp_hash_set = new HashSet<Integer>();
64 private PhylogenyNode _farthest_1 = null;
65 private PhylogenyNode _farthest_2 = null;
67 private PhylogenyMethods() {
68 // Hidden constructor.
72 * Calculates the distance between PhylogenyNodes node1 and node2.
77 * @return distance between node1 and node2
79 public double calculateDistance( final PhylogenyNode node1, final PhylogenyNode node2 ) {
80 final PhylogenyNode lca = obtainLCA( node1, node2 );
81 final PhylogenyNode n1 = node1;
82 final PhylogenyNode n2 = node2;
83 return ( PhylogenyMethods.getDistance( n1, lca ) + PhylogenyMethods.getDistance( n2, lca ) );
86 public double calculateFurthestDistance( final Phylogeny phylogeny ) {
87 if ( phylogeny.getNumberOfExternalNodes() < 2 ) {
92 PhylogenyNode node_1 = null;
93 PhylogenyNode node_2 = null;
94 double farthest_d = -Double.MAX_VALUE;
95 final PhylogenyMethods methods = PhylogenyMethods.getInstance();
96 final List<PhylogenyNode> ext_nodes = phylogeny.getRoot().getAllExternalDescendants();
97 for( int i = 1; i < ext_nodes.size(); ++i ) {
98 for( int j = 0; j < i; ++j ) {
99 final double d = methods.calculateDistance( ext_nodes.get( i ), ext_nodes.get( j ) );
101 throw new RuntimeException( "distance cannot be negative" );
103 if ( d > farthest_d ) {
105 node_1 = ext_nodes.get( i );
106 node_2 = ext_nodes.get( j );
110 _farthest_1 = node_1;
111 _farthest_2 = node_2;
116 public Object clone() throws CloneNotSupportedException {
117 throw new CloneNotSupportedException();
120 public PhylogenyNode getFarthestNode1() {
124 public PhylogenyNode getFarthestNode2() {
129 * Returns the LCA of PhylogenyNodes node1 and node2.
134 * @return LCA of node1 and node2
136 public PhylogenyNode obtainLCA( final PhylogenyNode node1, final PhylogenyNode node2 ) {
137 _temp_hash_set.clear();
138 PhylogenyNode n1 = node1;
139 PhylogenyNode n2 = node2;
140 _temp_hash_set.add( n1.getId() );
141 while ( !n1.isRoot() ) {
143 _temp_hash_set.add( n1.getId() );
145 while ( !_temp_hash_set.contains( n2.getId() ) && !n2.isRoot() ) {
148 if ( !_temp_hash_set.contains( n2.getId() ) ) {
149 throw new IllegalArgumentException( "attempt to get LCA of two nodes which do not share a common root" );
155 * Returns all orthologs of the external PhylogenyNode n of this Phylogeny.
156 * Orthologs are returned as List of node references.
158 * PRECONDITION: This tree must be binary and rooted, and speciation -
159 * duplication need to be assigned for each of its internal Nodes.
161 * Returns null if this Phylogeny is empty or if n is internal.
163 * external PhylogenyNode whose orthologs are to be returned
164 * @return Vector of references to all orthologous Nodes of PhylogenyNode n
165 * of this Phylogeny, null if this Phylogeny is empty or if n is
168 public List<PhylogenyNode> getOrthologousNodes( final Phylogeny phy, final PhylogenyNode node ) {
169 final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
170 final PhylogenyNodeIterator it = phy.iteratorExternalForward();
171 while ( it.hasNext() ) {
172 final PhylogenyNode temp_node = it.next();
173 if ( ( temp_node != node ) && isAreOrthologous( node, temp_node ) ) {
174 nodes.add( temp_node );
180 public boolean isAreOrthologous( final PhylogenyNode node1, final PhylogenyNode node2 ) {
181 return !obtainLCA( node1, node2 ).isDuplication();
184 public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final File file ) throws IOException {
185 final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
186 final Phylogeny[] trees = factory.create( file, parser );
187 if ( ( trees == null ) || ( trees.length == 0 ) ) {
188 throw new PhylogenyParserException( "Unable to parse phylogeny from file: " + file );
193 final static public void transferInternalNodeNamesToConfidence( final Phylogeny phy ) {
194 final PhylogenyNodeIterator it = phy.iteratorPostorder();
195 while ( it.hasNext() ) {
196 final PhylogenyNode n = it.next();
197 if ( !n.isExternal() && !n.getBranchData().isHasConfidences() ) {
198 if ( !ForesterUtil.isEmpty( n.getName() ) ) {
201 d = Double.parseDouble( n.getName() );
203 catch ( final Exception e ) {
207 n.getBranchData().addConfidence( new Confidence( d, "" ) );
215 final static public void transferInternalNamesToBootstrapSupport( final Phylogeny phy ) {
216 final PhylogenyNodeIterator it = phy.iteratorPostorder();
217 while ( it.hasNext() ) {
218 final PhylogenyNode n = it.next();
219 if ( !n.isExternal() && !ForesterUtil.isEmpty( n.getName() ) ) {
222 value = Double.parseDouble( n.getName() );
224 catch ( final NumberFormatException e ) {
225 throw new IllegalArgumentException( "failed to parse number from [" + n.getName() + "]: "
226 + e.getLocalizedMessage() );
228 if ( value >= 0.0 ) {
229 n.getBranchData().addConfidence( new Confidence( value, "bootstrap" ) );
238 final static public void sortNodeDescendents( PhylogenyNode node ) {
239 final List<PhylogenyNode> descs = node.getDescendants();
240 // Collections.sort( arg0, comparator );
241 Collections.sort( descs );
244 for( PhylogenyNode desc : descs ) {
245 node.setChildNode( i++, desc );
251 final static public void transferNodeNameToField( final Phylogeny phy,
252 final PhylogenyMethods.PhylogenyNodeField field ) {
253 final PhylogenyNodeIterator it = phy.iteratorPostorder();
254 while ( it.hasNext() ) {
255 final PhylogenyNode n = it.next();
256 final String name = n.getName().trim();
257 if ( !ForesterUtil.isEmpty( name ) ) {
261 // if ( name.length() > 5 ) {
263 // if ( !n.getNodeData().isHasTaxonomy() ) {
264 // n.getNodeData().setTaxonomy( new Taxonomy() );
266 // n.getNodeData().getTaxonomy().setScientificName( name );
271 setTaxonomyCode( n, name );
273 case TAXONOMY_SCIENTIFIC_NAME:
275 if ( !n.getNodeData().isHasTaxonomy() ) {
276 n.getNodeData().setTaxonomy( new Taxonomy() );
278 n.getNodeData().getTaxonomy().setScientificName( name );
280 case TAXONOMY_COMMON_NAME:
282 if ( !n.getNodeData().isHasTaxonomy() ) {
283 n.getNodeData().setTaxonomy( new Taxonomy() );
285 n.getNodeData().getTaxonomy().setCommonName( name );
287 case SEQUENCE_SYMBOL:
289 if ( !n.getNodeData().isHasSequence() ) {
290 n.getNodeData().setSequence( new Sequence() );
292 n.getNodeData().getSequence().setSymbol( name );
296 if ( !n.getNodeData().isHasSequence() ) {
297 n.getNodeData().setSequence( new Sequence() );
299 n.getNodeData().getSequence().setName( name );
301 case TAXONOMY_ID_UNIPROT_1: {
302 if ( !n.getNodeData().isHasTaxonomy() ) {
303 n.getNodeData().setTaxonomy( new Taxonomy() );
306 final int i = name.indexOf( '_' );
308 id = name.substring( 0, i );
313 n.getNodeData().getTaxonomy()
314 .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) );
317 case TAXONOMY_ID_UNIPROT_2: {
318 if ( !n.getNodeData().isHasTaxonomy() ) {
319 n.getNodeData().setTaxonomy( new Taxonomy() );
322 final int i = name.indexOf( '_' );
324 id = name.substring( i + 1, name.length() );
329 n.getNodeData().getTaxonomy()
330 .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) );
338 static double addPhylogenyDistances( final double a, final double b ) {
339 if ( ( a >= 0.0 ) && ( b >= 0.0 ) ) {
342 else if ( a >= 0.0 ) {
345 else if ( b >= 0.0 ) {
348 return PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT;
351 // Helper for getUltraParalogousNodes( PhylogenyNode ).
352 public static boolean areAllChildrenDuplications( final PhylogenyNode n ) {
353 if ( n.isExternal() ) {
357 if ( n.isDuplication() ) {
359 for( final PhylogenyNode desc : n.getDescendants() ) {
360 if ( !areAllChildrenDuplications( desc ) ) {
372 public static int calculateDepth( final PhylogenyNode node ) {
373 PhylogenyNode n = node;
375 while ( !n.isRoot() ) {
382 public static double calculateDistanceToRoot( final PhylogenyNode node ) {
383 PhylogenyNode n = node;
385 while ( !n.isRoot() ) {
386 if ( n.getDistanceToParent() > 0.0 ) {
387 d += n.getDistanceToParent();
394 public static short calculateMaxBranchesToLeaf( final PhylogenyNode node ) {
395 if ( node.isExternal() ) {
399 for( PhylogenyNode d : node.getAllExternalDescendants() ) {
401 while ( d != node ) {
402 if ( d.isCollapse() ) {
417 public static int calculateMaxDepth( final Phylogeny phy ) {
419 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
420 final PhylogenyNode node = iter.next();
421 final int steps = calculateDepth( node );
429 public static double calculateMaxDistanceToRoot( final Phylogeny phy ) {
431 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
432 final PhylogenyNode node = iter.next();
433 final double d = calculateDistanceToRoot( node );
441 public static DescriptiveStatistics calculatNumberOfDescendantsPerNodeStatistics( final Phylogeny phy ) {
442 final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
443 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
444 final PhylogenyNode n = iter.next();
445 if ( !n.isExternal() ) {
446 stats.addValue( n.getNumberOfDescendants() );
452 public static DescriptiveStatistics calculatConfidenceStatistics( final Phylogeny phy ) {
453 final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
454 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
455 final PhylogenyNode n = iter.next();
456 if ( !n.isExternal() ) {
457 if ( n.getBranchData().isHasConfidences() ) {
458 stats.addValue( n.getBranchData().getConfidence( 0 ).getValue() );
466 * Returns the set of distinct taxonomies of
467 * all external nodes of node.
468 * If at least one the external nodes has no taxonomy,
472 public static Set<Taxonomy> obtainDistinctTaxonomies( final PhylogenyNode node ) {
473 final List<PhylogenyNode> descs = node.getAllExternalDescendants();
474 final Set<Taxonomy> tax_set = new HashSet<Taxonomy>();
475 for( final PhylogenyNode n : descs ) {
476 if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
479 tax_set.add( n.getNodeData().getTaxonomy() );
485 * Returns a map of distinct taxonomies of
486 * all external nodes of node.
487 * If at least one of the external nodes has no taxonomy,
491 public static SortedMap<Taxonomy, Integer> obtainDistinctTaxonomyCounts( final PhylogenyNode node ) {
492 final List<PhylogenyNode> descs = node.getAllExternalDescendants();
493 final SortedMap<Taxonomy, Integer> tax_map = new TreeMap<Taxonomy, Integer>();
494 for( final PhylogenyNode n : descs ) {
495 if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
498 final Taxonomy t = n.getNodeData().getTaxonomy();
499 if ( tax_map.containsKey( t ) ) {
500 tax_map.put( t, tax_map.get( t ) + 1 );
509 public static int calculateNumberOfExternalNodesWithoutTaxonomy( final PhylogenyNode node ) {
510 final List<PhylogenyNode> descs = node.getAllExternalDescendants();
512 for( final PhylogenyNode n : descs ) {
513 if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
521 * Deep copies the phylogeny originating from this node.
523 static PhylogenyNode copySubTree( final PhylogenyNode source ) {
524 if ( source == null ) {
528 final PhylogenyNode newnode = source.copyNodeData();
529 if ( !source.isExternal() ) {
530 for( int i = 0; i < source.getNumberOfDescendants(); ++i ) {
531 newnode.setChildNode( i, PhylogenyMethods.copySubTree( source.getChildNode( i ) ) );
539 * Shallow copies the phylogeny originating from this node.
541 static PhylogenyNode copySubTreeShallow( final PhylogenyNode source ) {
542 if ( source == null ) {
546 final PhylogenyNode newnode = source.copyNodeDataShallow();
547 if ( !source.isExternal() ) {
548 for( int i = 0; i < source.getNumberOfDescendants(); ++i ) {
549 newnode.setChildNode( i, PhylogenyMethods.copySubTreeShallow( source.getChildNode( i ) ) );
556 public static void deleteExternalNodesNegativeSelection( final Set<Integer> to_delete, final Phylogeny phy ) {
558 for( final Integer id : to_delete ) {
559 phy.deleteSubtree( phy.getNode( id ), true );
564 public static void deleteExternalNodesNegativeSelection( final String[] node_names_to_delete, final Phylogeny p )
565 throws IllegalArgumentException {
566 for( int i = 0; i < node_names_to_delete.length; ++i ) {
567 if ( ForesterUtil.isEmpty( node_names_to_delete[ i ] ) ) {
570 List<PhylogenyNode> nodes = null;
571 nodes = p.getNodes( node_names_to_delete[ i ] );
572 final Iterator<PhylogenyNode> it = nodes.iterator();
573 while ( it.hasNext() ) {
574 final PhylogenyNode n = it.next();
575 if ( !n.isExternal() ) {
576 throw new IllegalArgumentException( "attempt to delete non-external node \""
577 + node_names_to_delete[ i ] + "\"" );
579 p.deleteSubtree( n, true );
584 public static void deleteExternalNodesPositiveSelection( final Set<Taxonomy> species_to_keep, final Phylogeny phy ) {
585 // final Set<Integer> to_delete = new HashSet<Integer>();
586 for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
587 final PhylogenyNode n = it.next();
588 if ( n.getNodeData().isHasTaxonomy() ) {
589 if ( !species_to_keep.contains( n.getNodeData().getTaxonomy() ) ) {
590 //to_delete.add( n.getNodeId() );
591 phy.deleteSubtree( n, true );
595 throw new IllegalArgumentException( "node " + n.getId() + " has no taxonomic data" );
599 phy.externalNodesHaveChanged();
600 // deleteExternalNodesNegativeSelection( to_delete, phy );
603 public static List<String> deleteExternalNodesPositiveSelection( final String[] node_names_to_keep,
604 final Phylogeny p ) {
605 final PhylogenyNodeIterator it = p.iteratorExternalForward();
606 final String[] to_delete = new String[ p.getNumberOfExternalNodes() ];
608 Arrays.sort( node_names_to_keep );
609 while ( it.hasNext() ) {
610 final String curent_name = it.next().getName();
611 if ( Arrays.binarySearch( node_names_to_keep, curent_name ) < 0 ) {
612 to_delete[ i++ ] = curent_name;
615 PhylogenyMethods.deleteExternalNodesNegativeSelection( to_delete, p );
616 final List<String> deleted = new ArrayList<String>();
617 for( final String n : to_delete ) {
618 if ( !ForesterUtil.isEmpty( n ) ) {
625 public static List<PhylogenyNode> getAllDescendants( final PhylogenyNode node ) {
626 final List<PhylogenyNode> descs = new ArrayList<PhylogenyNode>();
627 final Set<Integer> encountered = new HashSet<Integer>();
628 if ( !node.isExternal() ) {
629 final List<PhylogenyNode> exts = node.getAllExternalDescendants();
630 for( PhylogenyNode current : exts ) {
631 descs.add( current );
632 while ( current != node ) {
633 current = current.getParent();
634 if ( encountered.contains( current.getId() ) ) {
637 descs.add( current );
638 encountered.add( current.getId() );
652 public static Color getBranchColorValue( final PhylogenyNode node ) {
653 if ( node.getBranchData().getBranchColor() == null ) {
656 return node.getBranchData().getBranchColor().getValue();
662 public static double getBranchWidthValue( final PhylogenyNode node ) {
663 if ( !node.getBranchData().isHasBranchWidth() ) {
664 return BranchWidth.BRANCH_WIDTH_DEFAULT_VALUE;
666 return node.getBranchData().getBranchWidth().getValue();
672 public static double getConfidenceValue( final PhylogenyNode node ) {
673 if ( !node.getBranchData().isHasConfidences() ) {
674 return Confidence.CONFIDENCE_DEFAULT_VALUE;
676 return node.getBranchData().getConfidence( 0 ).getValue();
682 public static double[] getConfidenceValuesAsArray( final PhylogenyNode node ) {
683 if ( !node.getBranchData().isHasConfidences() ) {
684 return new double[ 0 ];
686 final double[] values = new double[ node.getBranchData().getConfidences().size() ];
688 for( final Confidence c : node.getBranchData().getConfidences() ) {
689 values[ i++ ] = c.getValue();
695 * Calculates the distance between PhylogenyNodes n1 and n2.
696 * PRECONDITION: n1 is a descendant of n2.
701 * @return distance between n1 and n2
703 private static double getDistance( PhylogenyNode n1, final PhylogenyNode n2 ) {
706 if ( n1.getDistanceToParent() > 0.0 ) {
707 d += n1.getDistanceToParent();
715 * Returns taxonomy t if all external descendants have
716 * the same taxonomy t, null otherwise.
719 public static Taxonomy getExternalDescendantsTaxonomy( final PhylogenyNode node ) {
720 final List<PhylogenyNode> descs = node.getAllExternalDescendants();
722 for( final PhylogenyNode n : descs ) {
723 if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
726 else if ( tax == null ) {
727 tax = n.getNodeData().getTaxonomy();
729 else if ( n.getNodeData().getTaxonomy().isEmpty() || !tax.isEqual( n.getNodeData().getTaxonomy() ) ) {
736 public static PhylogenyNode getFurthestDescendant( final PhylogenyNode node ) {
737 final List<PhylogenyNode> children = node.getAllExternalDescendants();
738 PhylogenyNode farthest = null;
739 double longest = -Double.MAX_VALUE;
740 for( final PhylogenyNode child : children ) {
741 if ( PhylogenyMethods.getDistance( child, node ) > longest ) {
743 longest = PhylogenyMethods.getDistance( child, node );
749 public static PhylogenyMethods getInstance() {
750 if ( PhylogenyMethods._instance == null ) {
751 PhylogenyMethods._instance = new PhylogenyMethods();
753 return PhylogenyMethods._instance;
757 * Returns the largest confidence value found on phy.
759 static public double getMaximumConfidenceValue( final Phylogeny phy ) {
760 double max = -Double.MAX_VALUE;
761 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
762 final double s = PhylogenyMethods.getConfidenceValue( iter.next() );
763 if ( ( s != Confidence.CONFIDENCE_DEFAULT_VALUE ) && ( s > max ) ) {
770 static public int getMinimumDescendentsPerInternalNodes( final Phylogeny phy ) {
771 int min = Integer.MAX_VALUE;
774 for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) {
776 if ( n.isInternal() ) {
777 d = n.getNumberOfDescendants();
787 * Convenience method for display purposes.
788 * Not intended for algorithms.
790 public static String getSpecies( final PhylogenyNode node ) {
791 if ( !node.getNodeData().isHasTaxonomy() ) {
794 if ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getTaxonomyCode() ) ) {
795 return node.getNodeData().getTaxonomy().getTaxonomyCode();
797 else if ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getScientificName() ) ) {
798 return node.getNodeData().getTaxonomy().getScientificName();
801 return node.getNodeData().getTaxonomy().getCommonName();
806 * Returns all Nodes which are connected to external PhylogenyNode n of this
807 * Phylogeny by a path containing only speciation events. We call these
808 * "super orthologs". Nodes are returned as Vector of references to Nodes.
810 * PRECONDITION: This tree must be binary and rooted, and speciation -
811 * duplication need to be assigned for each of its internal Nodes.
813 * Returns null if this Phylogeny is empty or if n is internal.
815 * external PhylogenyNode whose strictly speciation related Nodes
817 * @return Vector of references to all strictly speciation related Nodes of
818 * PhylogenyNode n of this Phylogeny, null if this Phylogeny is
819 * empty or if n is internal
821 public static List<PhylogenyNode> getSuperOrthologousNodes( final PhylogenyNode n ) {
823 PhylogenyNode node = n, deepest = null;
824 final List<PhylogenyNode> v = new ArrayList<PhylogenyNode>();
825 if ( !node.isExternal() ) {
828 while ( !node.isRoot() && !node.getParent().isDuplication() ) {
829 node = node.getParent();
832 deepest.setIndicatorsToZero();
834 if ( !node.isExternal() ) {
835 if ( node.getIndicator() == 0 ) {
836 node.setIndicator( ( byte ) 1 );
837 if ( !node.isDuplication() ) {
838 node = node.getChildNode1();
841 if ( node.getIndicator() == 1 ) {
842 node.setIndicator( ( byte ) 2 );
843 if ( !node.isDuplication() ) {
844 node = node.getChildNode2();
847 if ( ( node != deepest ) && ( node.getIndicator() == 2 ) ) {
848 node = node.getParent();
855 if ( node != deepest ) {
856 node = node.getParent();
859 node.setIndicator( ( byte ) 2 );
862 } while ( ( node != deepest ) || ( deepest.getIndicator() != 2 ) );
867 * Convenience method for display purposes.
868 * Not intended for algorithms.
870 public static String getTaxonomyIdentifier( final PhylogenyNode node ) {
871 if ( !node.getNodeData().isHasTaxonomy() || ( node.getNodeData().getTaxonomy().getIdentifier() == null ) ) {
874 return node.getNodeData().getTaxonomy().getIdentifier().getValue();
878 * Returns all Nodes which are connected to external PhylogenyNode n of this
879 * Phylogeny by a path containing, and leading to, only duplication events.
880 * We call these "ultra paralogs". Nodes are returned as Vector of
881 * references to Nodes.
883 * PRECONDITION: This tree must be binary and rooted, and speciation -
884 * duplication need to be assigned for each of its internal Nodes.
886 * Returns null if this Phylogeny is empty or if n is internal.
888 * (Last modified: 10/06/01)
891 * external PhylogenyNode whose ultra paralogs are to be returned
892 * @return Vector of references to all ultra paralogs of PhylogenyNode n of
893 * this Phylogeny, null if this Phylogeny is empty or if n is
896 public static List<PhylogenyNode> getUltraParalogousNodes( final PhylogenyNode n ) {
898 PhylogenyNode node = n;
899 if ( !node.isExternal() ) {
902 while ( !node.isRoot() && node.getParent().isDuplication() && areAllChildrenDuplications( node.getParent() ) ) {
903 node = node.getParent();
905 final List<PhylogenyNode> nodes = node.getAllExternalDescendants();
910 public static String inferCommonPartOfScientificNameOfDescendants( final PhylogenyNode node ) {
911 final List<PhylogenyNode> descs = node.getDescendants();
913 for( final PhylogenyNode n : descs ) {
914 if ( !n.getNodeData().isHasTaxonomy()
915 || ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) {
918 else if ( sn == null ) {
919 sn = n.getNodeData().getTaxonomy().getScientificName().trim();
922 String sn_current = n.getNodeData().getTaxonomy().getScientificName().trim();
923 if ( !sn.equals( sn_current ) ) {
924 boolean overlap = false;
925 while ( ( sn.indexOf( ' ' ) >= 0 ) || ( sn_current.indexOf( ' ' ) >= 0 ) ) {
926 if ( ForesterUtil.countChars( sn, ' ' ) > ForesterUtil.countChars( sn_current, ' ' ) ) {
927 sn = sn.substring( 0, sn.lastIndexOf( ' ' ) ).trim();
930 sn_current = sn_current.substring( 0, sn_current.lastIndexOf( ' ' ) ).trim();
932 if ( sn.equals( sn_current ) ) {
946 public static boolean isHasExternalDescendant( final PhylogenyNode node ) {
947 for( int i = 0; i < node.getNumberOfDescendants(); ++i ) {
948 if ( node.getChildNode( i ).isExternal() ) {
956 * This is case insensitive.
959 public synchronized static boolean isTaxonomyHasIdentifierOfGivenProvider( final Taxonomy tax,
960 final String[] providers ) {
961 if ( ( tax.getIdentifier() != null ) && !ForesterUtil.isEmpty( tax.getIdentifier().getProvider() ) ) {
962 final String my_tax_prov = tax.getIdentifier().getProvider();
963 for( final String provider : providers ) {
964 if ( provider.equalsIgnoreCase( my_tax_prov ) ) {
975 private static boolean match( final String s,
977 final boolean case_sensitive,
978 final boolean partial ) {
979 if ( ForesterUtil.isEmpty( s ) || ForesterUtil.isEmpty( query ) ) {
982 String my_s = s.trim();
983 String my_query = query.trim();
984 if ( !case_sensitive ) {
985 my_s = my_s.toLowerCase();
986 my_query = my_query.toLowerCase();
989 return my_s.indexOf( my_query ) >= 0;
992 return my_s.equals( my_query );
996 public static void midpointRoot( final Phylogeny phylogeny ) {
997 if ( phylogeny.getNumberOfExternalNodes() < 2 ) {
1000 final PhylogenyMethods methods = getInstance();
1001 final double farthest_d = methods.calculateFurthestDistance( phylogeny );
1002 final PhylogenyNode f1 = methods.getFarthestNode1();
1003 final PhylogenyNode f2 = methods.getFarthestNode2();
1004 if ( farthest_d <= 0.0 ) {
1007 double x = farthest_d / 2.0;
1008 PhylogenyNode n = f1;
1009 if ( PhylogenyMethods.getDistance( f1, phylogeny.getRoot() ) < PhylogenyMethods.getDistance( f2, phylogeny
1013 while ( ( x > n.getDistanceToParent() ) && !n.isRoot() ) {
1014 x -= ( n.getDistanceToParent() > 0 ? n.getDistanceToParent() : 0 );
1017 phylogeny.reRoot( n, x );
1018 phylogeny.recalculateNumberOfExternalDescendants( true );
1019 final PhylogenyNode a = getFurthestDescendant( phylogeny.getRoot().getChildNode1() );
1020 final PhylogenyNode b = getFurthestDescendant( phylogeny.getRoot().getChildNode2() );
1021 final double da = getDistance( a, phylogeny.getRoot() );
1022 final double db = getDistance( b, phylogeny.getRoot() );
1023 if ( Math.abs( da - db ) > 0.000001 ) {
1024 throw new FailedConditionCheckException( "this should not have happened: midpoint rooting failed: da="
1025 + da + ", db=" + db + ", diff=" + Math.abs( da - db ) );
1029 public static void normalizeBootstrapValues( final Phylogeny phylogeny,
1030 final double max_bootstrap_value,
1031 final double max_normalized_value ) {
1032 for( final PhylogenyNodeIterator iter = phylogeny.iteratorPreorder(); iter.hasNext(); ) {
1033 final PhylogenyNode node = iter.next();
1034 if ( node.isInternal() ) {
1035 final double confidence = getConfidenceValue( node );
1036 if ( confidence != Confidence.CONFIDENCE_DEFAULT_VALUE ) {
1037 if ( confidence >= max_bootstrap_value ) {
1038 setBootstrapConfidence( node, max_normalized_value );
1041 setBootstrapConfidence( node, ( confidence * max_normalized_value ) / max_bootstrap_value );
1048 public static List<PhylogenyNode> obtainAllNodesAsList( final Phylogeny phy ) {
1049 final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
1050 if ( phy.isEmpty() ) {
1053 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
1054 nodes.add( iter.next() );
1059 public static void postorderBranchColorAveragingExternalNodeBased( final Phylogeny p ) {
1060 for( final PhylogenyNodeIterator iter = p.iteratorPostorder(); iter.hasNext(); ) {
1061 final PhylogenyNode node = iter.next();
1066 if ( node.isInternal() ) {
1067 for( final PhylogenyNodeIterator iterator = node.iterateChildNodesForward(); iterator.hasNext(); ) {
1068 final PhylogenyNode child_node = iterator.next();
1069 final Color child_color = getBranchColorValue( child_node );
1070 if ( child_color != null ) {
1072 red += child_color.getRed();
1073 green += child_color.getGreen();
1074 blue += child_color.getBlue();
1077 setBranchColorValue( node,
1078 new Color( ForesterUtil.roundToInt( red / n ),
1079 ForesterUtil.roundToInt( green / n ),
1080 ForesterUtil.roundToInt( blue / n ) ) );
1085 public static void removeNode( final PhylogenyNode remove_me, final Phylogeny phylogeny ) {
1086 if ( remove_me.isRoot() ) {
1087 throw new IllegalArgumentException( "ill advised attempt to remove root node" );
1089 if ( remove_me.isExternal() ) {
1090 phylogeny.deleteSubtree( remove_me, false );
1093 final PhylogenyNode parent = remove_me.getParent();
1094 final List<PhylogenyNode> descs = remove_me.getDescendants();
1095 parent.removeChildNode( remove_me );
1096 for( final PhylogenyNode desc : descs ) {
1097 parent.addAsChild( desc );
1098 desc.setDistanceToParent( addPhylogenyDistances( remove_me.getDistanceToParent(),
1099 desc.getDistanceToParent() ) );
1101 remove_me.setParent( null );
1102 phylogeny.setIdHash( null );
1103 phylogeny.externalNodesHaveChanged();
1107 public static List<PhylogenyNode> searchData( final String query,
1108 final Phylogeny phy,
1109 final boolean case_sensitive,
1110 final boolean partial ) {
1111 final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
1112 if ( phy.isEmpty() || ( query == null ) ) {
1115 if ( ForesterUtil.isEmpty( query ) ) {
1118 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
1119 final PhylogenyNode node = iter.next();
1120 boolean match = false;
1121 if ( match( node.getName(), query, case_sensitive, partial ) ) {
1124 else if ( node.getNodeData().isHasTaxonomy()
1125 && match( node.getNodeData().getTaxonomy().getTaxonomyCode(), query, case_sensitive, partial ) ) {
1128 else if ( node.getNodeData().isHasTaxonomy()
1129 && match( node.getNodeData().getTaxonomy().getCommonName(), query, case_sensitive, partial ) ) {
1132 else if ( node.getNodeData().isHasTaxonomy()
1133 && match( node.getNodeData().getTaxonomy().getScientificName(), query, case_sensitive, partial ) ) {
1136 else if ( node.getNodeData().isHasTaxonomy()
1137 && ( node.getNodeData().getTaxonomy().getIdentifier() != null )
1138 && match( node.getNodeData().getTaxonomy().getIdentifier().getValue(),
1144 else if ( node.getNodeData().isHasTaxonomy() && !node.getNodeData().getTaxonomy().getSynonyms().isEmpty() ) {
1145 final List<String> syns = node.getNodeData().getTaxonomy().getSynonyms();
1146 I: for( final String syn : syns ) {
1147 if ( match( syn, query, case_sensitive, partial ) ) {
1153 if ( !match && node.getNodeData().isHasSequence()
1154 && match( node.getNodeData().getSequence().getName(), query, case_sensitive, partial ) ) {
1157 if ( !match && node.getNodeData().isHasSequence()
1158 && match( node.getNodeData().getSequence().getSymbol(), query, case_sensitive, partial ) ) {
1162 && node.getNodeData().isHasSequence()
1163 && ( node.getNodeData().getSequence().getAccession() != null )
1164 && match( node.getNodeData().getSequence().getAccession().getValue(),
1170 if ( !match && node.getNodeData().isHasSequence()
1171 && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) {
1172 final DomainArchitecture da = node.getNodeData().getSequence().getDomainArchitecture();
1173 I: for( int i = 0; i < da.getNumberOfDomains(); ++i ) {
1174 if ( match( da.getDomain( i ).getName(), query, case_sensitive, partial ) ) {
1180 if ( !match && ( node.getNodeData().getBinaryCharacters() != null ) ) {
1181 Iterator<String> it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator();
1182 I: while ( it.hasNext() ) {
1183 if ( match( it.next(), query, case_sensitive, partial ) ) {
1188 it = node.getNodeData().getBinaryCharacters().getGainedCharacters().iterator();
1189 I: while ( it.hasNext() ) {
1190 if ( match( it.next(), query, case_sensitive, partial ) ) {
1203 public static List<PhylogenyNode> searchDataLogicalAnd( final String[] queries,
1204 final Phylogeny phy,
1205 final boolean case_sensitive,
1206 final boolean partial ) {
1207 final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
1208 if ( phy.isEmpty() || ( queries == null ) || ( queries.length < 1 ) ) {
1211 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
1212 final PhylogenyNode node = iter.next();
1213 boolean all_matched = true;
1214 for( final String query : queries ) {
1215 boolean match = false;
1216 if ( ForesterUtil.isEmpty( query ) ) {
1219 if ( match( node.getName(), query, case_sensitive, partial ) ) {
1222 else if ( node.getNodeData().isHasTaxonomy()
1223 && match( node.getNodeData().getTaxonomy().getTaxonomyCode(), query, case_sensitive, partial ) ) {
1226 else if ( node.getNodeData().isHasTaxonomy()
1227 && match( node.getNodeData().getTaxonomy().getCommonName(), query, case_sensitive, partial ) ) {
1230 else if ( node.getNodeData().isHasTaxonomy()
1231 && match( node.getNodeData().getTaxonomy().getScientificName(), query, case_sensitive, partial ) ) {
1234 else if ( node.getNodeData().isHasTaxonomy()
1235 && ( node.getNodeData().getTaxonomy().getIdentifier() != null )
1236 && match( node.getNodeData().getTaxonomy().getIdentifier().getValue(),
1242 else if ( node.getNodeData().isHasTaxonomy()
1243 && !node.getNodeData().getTaxonomy().getSynonyms().isEmpty() ) {
1244 final List<String> syns = node.getNodeData().getTaxonomy().getSynonyms();
1245 I: for( final String syn : syns ) {
1246 if ( match( syn, query, case_sensitive, partial ) ) {
1252 if ( !match && node.getNodeData().isHasSequence()
1253 && match( node.getNodeData().getSequence().getName(), query, case_sensitive, partial ) ) {
1256 if ( !match && node.getNodeData().isHasSequence()
1257 && match( node.getNodeData().getSequence().getSymbol(), query, case_sensitive, partial ) ) {
1261 && node.getNodeData().isHasSequence()
1262 && ( node.getNodeData().getSequence().getAccession() != null )
1263 && match( node.getNodeData().getSequence().getAccession().getValue(),
1269 if ( !match && node.getNodeData().isHasSequence()
1270 && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) {
1271 final DomainArchitecture da = node.getNodeData().getSequence().getDomainArchitecture();
1272 I: for( int i = 0; i < da.getNumberOfDomains(); ++i ) {
1273 if ( match( da.getDomain( i ).getName(), query, case_sensitive, partial ) ) {
1279 if ( !match && ( node.getNodeData().getBinaryCharacters() != null ) ) {
1280 Iterator<String> it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator();
1281 I: while ( it.hasNext() ) {
1282 if ( match( it.next(), query, case_sensitive, partial ) ) {
1287 it = node.getNodeData().getBinaryCharacters().getGainedCharacters().iterator();
1288 I: while ( it.hasNext() ) {
1289 if ( match( it.next(), query, case_sensitive, partial ) ) {
1294 // final String[] bcp_ary = node.getNodeData().getBinaryCharacters()
1295 // .getPresentCharactersAsStringArray();
1296 // I: for( final String bc : bcp_ary ) {
1297 // if ( match( bc, query, case_sensitive, partial ) ) {
1302 // final String[] bcg_ary = node.getNodeData().getBinaryCharacters()
1303 // .getGainedCharactersAsStringArray();
1304 // I: for( final String bc : bcg_ary ) {
1305 // if ( match( bc, query, case_sensitive, partial ) ) {
1312 all_matched = false;
1316 if ( all_matched ) {
1324 * Convenience method.
1325 * Sets value for the first confidence value (created if not present, values overwritten otherwise).
1327 public static void setBootstrapConfidence( final PhylogenyNode node, final double bootstrap_confidence_value ) {
1328 setConfidence( node, bootstrap_confidence_value, "bootstrap" );
1331 public static void setBranchColorValue( final PhylogenyNode node, final Color color ) {
1332 if ( node.getBranchData().getBranchColor() == null ) {
1333 node.getBranchData().setBranchColor( new BranchColor() );
1335 node.getBranchData().getBranchColor().setValue( color );
1339 * Convenience method
1341 public static void setBranchWidthValue( final PhylogenyNode node, final double branch_width_value ) {
1342 node.getBranchData().setBranchWidth( new BranchWidth( branch_width_value ) );
1346 * Convenience method.
1347 * Sets value for the first confidence value (created if not present, values overwritten otherwise).
1349 public static void setConfidence( final PhylogenyNode node, final double confidence_value ) {
1350 setConfidence( node, confidence_value, "" );
1354 * Convenience method.
1355 * Sets value for the first confidence value (created if not present, values overwritten otherwise).
1357 public static void setConfidence( final PhylogenyNode node, final double confidence_value, final String type ) {
1358 Confidence c = null;
1359 if ( node.getBranchData().getNumberOfConfidences() > 0 ) {
1360 c = node.getBranchData().getConfidence( 0 );
1363 c = new Confidence();
1364 node.getBranchData().addConfidence( c );
1367 c.setValue( confidence_value );
1370 public static void setScientificName( final PhylogenyNode node, final String scientific_name ) {
1371 if ( !node.getNodeData().isHasTaxonomy() ) {
1372 node.getNodeData().setTaxonomy( new Taxonomy() );
1374 node.getNodeData().getTaxonomy().setScientificName( scientific_name );
1378 * Convenience method to set the taxonomy code of a phylogeny node.
1382 * @param taxonomy_code
1384 public static void setTaxonomyCode( final PhylogenyNode node, final String taxonomy_code ) {
1385 if ( !node.getNodeData().isHasTaxonomy() ) {
1386 node.getNodeData().setTaxonomy( new Taxonomy() );
1388 node.getNodeData().getTaxonomy().setTaxonomyCode( taxonomy_code );
1392 * Removes from Phylogeny to_be_stripped all external Nodes which are
1393 * associated with a species NOT found in Phylogeny reference.
1396 * a reference Phylogeny
1397 * @param to_be_stripped
1398 * Phylogeny to be stripped
1399 * @return number of external nodes removed from to_be_stripped
1401 public static int taxonomyBasedDeletionOfExternalNodes( final Phylogeny reference, final Phylogeny to_be_stripped ) {
1402 final Set<String> ref_ext_taxo = new HashSet<String>();
1403 final ArrayList<PhylogenyNode> nodes_to_delete = new ArrayList<PhylogenyNode>();
1404 for( final PhylogenyNodeIterator it = reference.iteratorExternalForward(); it.hasNext(); ) {
1405 ref_ext_taxo.add( getSpecies( it.next() ) );
1407 for( final PhylogenyNodeIterator it = to_be_stripped.iteratorExternalForward(); it.hasNext(); ) {
1408 final PhylogenyNode n = it.next();
1409 if ( !ref_ext_taxo.contains( getSpecies( n ) ) ) {
1410 nodes_to_delete.add( n );
1413 for( final PhylogenyNode phylogenyNode : nodes_to_delete ) {
1414 to_be_stripped.deleteSubtree( phylogenyNode, true );
1416 return nodes_to_delete.size();
1419 public static enum PhylogenyNodeField {
1422 TAXONOMY_SCIENTIFIC_NAME,
1423 TAXONOMY_COMMON_NAME,
1426 TAXONOMY_ID_UNIPROT_1,
1427 TAXONOMY_ID_UNIPROT_2;
1430 public static enum TAXONOMY_EXTRACTION {
1431 NO, YES, PFAM_STYLE_ONLY;