2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: www.phylosoft.org/forester
26 package org.forester.phylogeny;
28 import java.awt.Color;
30 import java.io.IOException;
31 import java.util.ArrayList;
32 import java.util.Arrays;
33 import java.util.Collections;
34 import java.util.Comparator;
35 import java.util.HashMap;
36 import java.util.HashSet;
37 import java.util.Iterator;
38 import java.util.List;
40 import java.util.SortedMap;
41 import java.util.TreeMap;
43 import org.forester.io.parsers.PhylogenyParser;
44 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
45 import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
46 import org.forester.io.parsers.util.PhylogenyParserException;
47 import org.forester.phylogeny.data.BranchColor;
48 import org.forester.phylogeny.data.BranchWidth;
49 import org.forester.phylogeny.data.Confidence;
50 import org.forester.phylogeny.data.DomainArchitecture;
51 import org.forester.phylogeny.data.Event;
52 import org.forester.phylogeny.data.Identifier;
53 import org.forester.phylogeny.data.PhylogenyDataUtil;
54 import org.forester.phylogeny.data.Sequence;
55 import org.forester.phylogeny.data.Taxonomy;
56 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
57 import org.forester.phylogeny.factories.PhylogenyFactory;
58 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
59 import org.forester.util.BasicDescriptiveStatistics;
60 import org.forester.util.DescriptiveStatistics;
61 import org.forester.util.FailedConditionCheckException;
62 import org.forester.util.ForesterUtil;
64 public class PhylogenyMethods {
66 private static PhylogenyMethods _instance = null;
67 private PhylogenyNode _farthest_1 = null;
68 private PhylogenyNode _farthest_2 = null;
70 private PhylogenyMethods() {
71 // Hidden constructor.
75 * Calculates the distance between PhylogenyNodes node1 and node2.
80 * @return distance between node1 and node2
82 public double calculateDistance( final PhylogenyNode node1, final PhylogenyNode node2 ) {
83 final PhylogenyNode lca = calculateLCA( node1, node2 );
84 final PhylogenyNode n1 = node1;
85 final PhylogenyNode n2 = node2;
86 return ( PhylogenyMethods.getDistance( n1, lca ) + PhylogenyMethods.getDistance( n2, lca ) );
89 public double calculateFurthestDistance( final Phylogeny phylogeny ) {
90 if ( phylogeny.getNumberOfExternalNodes() < 2 ) {
95 PhylogenyNode node_1 = null;
96 PhylogenyNode node_2 = null;
97 double farthest_d = -Double.MAX_VALUE;
98 final PhylogenyMethods methods = PhylogenyMethods.getInstance();
99 final List<PhylogenyNode> ext_nodes = phylogeny.getRoot().getAllExternalDescendants();
100 for( int i = 1; i < ext_nodes.size(); ++i ) {
101 for( int j = 0; j < i; ++j ) {
102 final double d = methods.calculateDistance( ext_nodes.get( i ), ext_nodes.get( j ) );
104 throw new RuntimeException( "distance cannot be negative" );
106 if ( d > farthest_d ) {
108 node_1 = ext_nodes.get( i );
109 node_2 = ext_nodes.get( j );
113 _farthest_1 = node_1;
114 _farthest_2 = node_2;
118 final public static Event getEventAtLCA( final PhylogenyNode n1, final PhylogenyNode n2 ) {
119 return calculateLCA( n1, n2 ).getNodeData().getEvent();
123 public Object clone() throws CloneNotSupportedException {
124 throw new CloneNotSupportedException();
127 public PhylogenyNode getFarthestNode1() {
131 public PhylogenyNode getFarthestNode2() {
135 final public static void deleteNonOrthologousExternalNodes( final Phylogeny phy, final PhylogenyNode n ) {
136 if ( n.isInternal() ) {
137 throw new IllegalArgumentException( "node is not external" );
139 final ArrayList<PhylogenyNode> to_delete = new ArrayList<PhylogenyNode>();
140 for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
141 final PhylogenyNode i = it.next();
142 if ( !PhylogenyMethods.getEventAtLCA( n, i ).isSpeciation() ) {
146 for( final PhylogenyNode d : to_delete ) {
147 phy.deleteSubtree( d, true );
149 phy.clearHashIdToNodeMap();
150 phy.externalNodesHaveChanged();
154 * Returns the LCA of PhylogenyNodes node1 and node2.
159 * @return LCA of node1 and node2
161 public final static PhylogenyNode calculateLCA( PhylogenyNode node1, PhylogenyNode node2 ) {
162 if ( node1 == node2 ) {
165 if ( ( node1.getParent() == node2.getParent() ) ) {
166 return node1.getParent();
168 int depth1 = node1.calculateDepth();
169 int depth2 = node2.calculateDepth();
170 while ( ( depth1 > -1 ) && ( depth2 > -1 ) ) {
171 if ( depth1 > depth2 ) {
172 node1 = node1.getParent();
175 else if ( depth2 > depth1 ) {
176 node2 = node2.getParent();
180 if ( node1 == node2 ) {
183 node1 = node1.getParent();
184 node2 = node2.getParent();
189 throw new IllegalArgumentException( "illegal attempt to calculate LCA of two nodes which do not share a common root" );
192 public static final void preOrderReId( final Phylogeny phy ) {
193 if ( phy.isEmpty() ) {
196 phy.setIdToNodeMap( null );
197 int i = PhylogenyNode.getNodeCount();
198 for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) {
199 it.next().setId( i++ );
201 PhylogenyNode.setNodeCount( i );
205 * Returns the LCA of PhylogenyNodes node1 and node2.
206 * Precondition: ids are in pre-order (or level-order).
211 * @return LCA of node1 and node2
213 public final static PhylogenyNode calculateLCAonTreeWithIdsInPreOrder( PhylogenyNode node1, PhylogenyNode node2 ) {
214 while ( node1 != node2 ) {
215 if ( node1.getId() > node2.getId() ) {
216 node1 = node1.getParent();
219 node2 = node2.getParent();
226 * Returns all orthologs of the external PhylogenyNode n of this Phylogeny.
227 * Orthologs are returned as List of node references.
229 * PRECONDITION: This tree must be binary and rooted, and speciation -
230 * duplication need to be assigned for each of its internal Nodes.
232 * Returns null if this Phylogeny is empty or if n is internal.
234 * external PhylogenyNode whose orthologs are to be returned
235 * @return Vector of references to all orthologous Nodes of PhylogenyNode n
236 * of this Phylogeny, null if this Phylogeny is empty or if n is
239 public final static List<PhylogenyNode> getOrthologousNodes( final Phylogeny phy, final PhylogenyNode node ) {
240 final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
241 PhylogenyMethods.preOrderReId( phy );
242 final PhylogenyNodeIterator it = phy.iteratorExternalForward();
243 while ( it.hasNext() ) {
244 final PhylogenyNode temp_node = it.next();
245 if ( ( temp_node != node ) && !calculateLCAonTreeWithIdsInPreOrder( node, temp_node ).isDuplication() ) {
246 nodes.add( temp_node );
252 public static final HashMap<String, PhylogenyNode> createNameToExtNodeMap( final Phylogeny phy ) {
253 final HashMap<String, PhylogenyNode> nodes = new HashMap<String, PhylogenyNode>();
254 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
255 final PhylogenyNode n = iter.next();
256 nodes.put( n.getName(), n );
261 public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final File file ) throws IOException {
262 final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
263 final Phylogeny[] trees = factory.create( file, parser );
264 if ( ( trees == null ) || ( trees.length == 0 ) ) {
265 throw new PhylogenyParserException( "Unable to parse phylogeny from file: " + file );
270 public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final List<File> files )
272 final List<Phylogeny> tree_list = new ArrayList<Phylogeny>();
273 for( final File file : files ) {
274 final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
275 final Phylogeny[] trees = factory.create( file, parser );
276 if ( ( trees == null ) || ( trees.length == 0 ) ) {
277 throw new PhylogenyParserException( "Unable to parse phylogeny from file: " + file );
279 tree_list.addAll( Arrays.asList( trees ) );
281 return tree_list.toArray( new Phylogeny[ tree_list.size() ] );
284 final static public void transferInternalNodeNamesToConfidence( final Phylogeny phy ) {
285 final PhylogenyNodeIterator it = phy.iteratorPostorder();
286 while ( it.hasNext() ) {
287 final PhylogenyNode n = it.next();
288 if ( !n.isExternal() && !n.getBranchData().isHasConfidences() ) {
289 if ( !ForesterUtil.isEmpty( n.getName() ) ) {
292 d = Double.parseDouble( n.getName() );
294 catch ( final Exception e ) {
298 n.getBranchData().addConfidence( new Confidence( d, "" ) );
306 final static public void transferInternalNamesToBootstrapSupport( final Phylogeny phy ) {
307 final PhylogenyNodeIterator it = phy.iteratorPostorder();
308 while ( it.hasNext() ) {
309 final PhylogenyNode n = it.next();
310 if ( !n.isExternal() && !ForesterUtil.isEmpty( n.getName() ) ) {
313 value = Double.parseDouble( n.getName() );
315 catch ( final NumberFormatException e ) {
316 throw new IllegalArgumentException( "failed to parse number from [" + n.getName() + "]: "
317 + e.getLocalizedMessage() );
319 if ( value >= 0.0 ) {
320 n.getBranchData().addConfidence( new Confidence( value, "bootstrap" ) );
327 final static public void sortNodeDescendents( final PhylogenyNode node, final DESCENDANT_SORT_PRIORITY pri ) {
328 class PhylogenyNodeSortTaxonomyPriority implements Comparator<PhylogenyNode> {
331 public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) {
332 if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) {
333 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) )
334 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) {
335 return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase()
336 .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() );
338 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) )
339 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
340 return n1.getNodeData().getTaxonomy().getTaxonomyCode()
341 .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() );
343 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getCommonName() ) )
344 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getCommonName() ) ) ) {
345 return n1.getNodeData().getTaxonomy().getCommonName().toLowerCase()
346 .compareTo( n2.getNodeData().getTaxonomy().getCommonName().toLowerCase() );
349 if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) {
350 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) )
351 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) {
352 return n1.getNodeData().getSequence().getName().toLowerCase()
353 .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() );
355 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) )
356 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) {
357 return n1.getNodeData().getSequence().getSymbol()
358 .compareTo( n2.getNodeData().getSequence().getSymbol() );
360 if ( ( n1.getNodeData().getSequence().getAccession() != null )
361 && ( n2.getNodeData().getSequence().getAccession() != null )
362 && !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getAccession().getValue() )
363 && !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getAccession().getValue() ) ) {
364 return n1.getNodeData().getSequence().getAccession().getValue()
365 .compareTo( n2.getNodeData().getSequence().getAccession().getValue() );
368 if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) {
369 return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() );
374 class PhylogenyNodeSortSequencePriority implements Comparator<PhylogenyNode> {
377 public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) {
378 if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) {
379 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) )
380 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) {
381 return n1.getNodeData().getSequence().getName().toLowerCase()
382 .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() );
384 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) )
385 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) {
386 return n1.getNodeData().getSequence().getSymbol()
387 .compareTo( n2.getNodeData().getSequence().getSymbol() );
389 if ( ( n1.getNodeData().getSequence().getAccession() != null )
390 && ( n2.getNodeData().getSequence().getAccession() != null )
391 && !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getAccession().getValue() )
392 && !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getAccession().getValue() ) ) {
393 return n1.getNodeData().getSequence().getAccession().getValue()
394 .compareTo( n2.getNodeData().getSequence().getAccession().getValue() );
397 if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) {
398 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) )
399 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) {
400 return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase()
401 .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() );
403 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) )
404 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
405 return n1.getNodeData().getTaxonomy().getTaxonomyCode()
406 .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() );
408 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getCommonName() ) )
409 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getCommonName() ) ) ) {
410 return n1.getNodeData().getTaxonomy().getCommonName().toLowerCase()
411 .compareTo( n2.getNodeData().getTaxonomy().getCommonName().toLowerCase() );
414 if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) {
415 return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() );
420 class PhylogenyNodeSortNodeNamePriority implements Comparator<PhylogenyNode> {
423 public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) {
424 if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) {
425 return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() );
427 if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) {
428 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) )
429 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) {
430 return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase()
431 .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() );
433 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) )
434 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
435 return n1.getNodeData().getTaxonomy().getTaxonomyCode()
436 .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() );
438 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getCommonName() ) )
439 && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getCommonName() ) ) ) {
440 return n1.getNodeData().getTaxonomy().getCommonName().toLowerCase()
441 .compareTo( n2.getNodeData().getTaxonomy().getCommonName().toLowerCase() );
444 if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) {
445 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) )
446 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) {
447 return n1.getNodeData().getSequence().getName().toLowerCase()
448 .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() );
450 if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) )
451 && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) {
452 return n1.getNodeData().getSequence().getSymbol()
453 .compareTo( n2.getNodeData().getSequence().getSymbol() );
455 if ( ( n1.getNodeData().getSequence().getAccession() != null )
456 && ( n2.getNodeData().getSequence().getAccession() != null )
457 && !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getAccession().getValue() )
458 && !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getAccession().getValue() ) ) {
459 return n1.getNodeData().getSequence().getAccession().getValue()
460 .compareTo( n2.getNodeData().getSequence().getAccession().getValue() );
466 Comparator<PhylogenyNode> c;
469 c = new PhylogenyNodeSortSequencePriority();
472 c = new PhylogenyNodeSortNodeNamePriority();
475 c = new PhylogenyNodeSortTaxonomyPriority();
477 final List<PhylogenyNode> descs = node.getDescendants();
478 Collections.sort( descs, c );
480 for( final PhylogenyNode desc : descs ) {
481 node.setChildNode( i++, desc );
485 final static public void transferNodeNameToField( final Phylogeny phy,
486 final PhylogenyMethods.PhylogenyNodeField field,
487 final boolean external_only ) throws PhyloXmlDataFormatException {
488 final PhylogenyNodeIterator it = phy.iteratorPostorder();
489 while ( it.hasNext() ) {
490 final PhylogenyNode n = it.next();
491 if ( external_only && n.isInternal() ) {
494 final String name = n.getName().trim();
495 if ( !ForesterUtil.isEmpty( name ) ) {
499 setTaxonomyCode( n, name );
501 case TAXONOMY_SCIENTIFIC_NAME:
503 if ( !n.getNodeData().isHasTaxonomy() ) {
504 n.getNodeData().setTaxonomy( new Taxonomy() );
506 n.getNodeData().getTaxonomy().setScientificName( name );
508 case TAXONOMY_COMMON_NAME:
510 if ( !n.getNodeData().isHasTaxonomy() ) {
511 n.getNodeData().setTaxonomy( new Taxonomy() );
513 n.getNodeData().getTaxonomy().setCommonName( name );
515 case SEQUENCE_SYMBOL:
517 if ( !n.getNodeData().isHasSequence() ) {
518 n.getNodeData().setSequence( new Sequence() );
520 n.getNodeData().getSequence().setSymbol( name );
524 if ( !n.getNodeData().isHasSequence() ) {
525 n.getNodeData().setSequence( new Sequence() );
527 n.getNodeData().getSequence().setName( name );
529 case TAXONOMY_ID_UNIPROT_1: {
530 if ( !n.getNodeData().isHasTaxonomy() ) {
531 n.getNodeData().setTaxonomy( new Taxonomy() );
534 final int i = name.indexOf( '_' );
536 id = name.substring( 0, i );
541 n.getNodeData().getTaxonomy()
542 .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) );
545 case TAXONOMY_ID_UNIPROT_2: {
546 if ( !n.getNodeData().isHasTaxonomy() ) {
547 n.getNodeData().setTaxonomy( new Taxonomy() );
550 final int i = name.indexOf( '_' );
552 id = name.substring( i + 1, name.length() );
557 n.getNodeData().getTaxonomy()
558 .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) );
562 if ( !n.getNodeData().isHasTaxonomy() ) {
563 n.getNodeData().setTaxonomy( new Taxonomy() );
565 n.getNodeData().getTaxonomy().setIdentifier( new Identifier( name ) );
573 static double addPhylogenyDistances( final double a, final double b ) {
574 if ( ( a >= 0.0 ) && ( b >= 0.0 ) ) {
577 else if ( a >= 0.0 ) {
580 else if ( b >= 0.0 ) {
583 return PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT;
586 public final static boolean isAllDecendentsAreDuplications( final PhylogenyNode n ) {
587 if ( n.isExternal() ) {
591 if ( n.isDuplication() ) {
592 for( final PhylogenyNode desc : n.getDescendants() ) {
593 if ( !isAllDecendentsAreDuplications( desc ) ) {
605 public static short calculateMaxBranchesToLeaf( final PhylogenyNode node ) {
606 if ( node.isExternal() ) {
610 for( PhylogenyNode d : node.getAllExternalDescendants() ) {
612 while ( d != node ) {
613 if ( d.isCollapse() ) {
628 public static int calculateMaxDepth( final Phylogeny phy ) {
630 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
631 final PhylogenyNode node = iter.next();
632 final int steps = node.calculateDepth();
640 public static double calculateMaxDistanceToRoot( final Phylogeny phy ) {
642 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
643 final PhylogenyNode node = iter.next();
644 final double d = node.calculateDistanceToRoot();
652 public static int countNumberOfPolytomies( final Phylogeny phy ) {
654 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
655 final PhylogenyNode n = iter.next();
656 if ( !n.isExternal() && ( n.getNumberOfDescendants() > 2 ) ) {
663 public static DescriptiveStatistics calculatNumberOfDescendantsPerNodeStatistics( final Phylogeny phy ) {
664 final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
665 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
666 final PhylogenyNode n = iter.next();
667 if ( !n.isExternal() ) {
668 stats.addValue( n.getNumberOfDescendants() );
674 public static DescriptiveStatistics calculatBranchLengthStatistics( final Phylogeny phy ) {
675 final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
676 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
677 final PhylogenyNode n = iter.next();
678 if ( !n.isRoot() && ( n.getDistanceToParent() >= 0.0 ) ) {
679 stats.addValue( n.getDistanceToParent() );
685 public static List<DescriptiveStatistics> calculatConfidenceStatistics( final Phylogeny phy ) {
686 final List<DescriptiveStatistics> stats = new ArrayList<DescriptiveStatistics>();
687 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
688 final PhylogenyNode n = iter.next();
689 if ( !n.isExternal() && !n.isRoot() ) {
690 if ( n.getBranchData().isHasConfidences() ) {
691 for( int i = 0; i < n.getBranchData().getConfidences().size(); ++i ) {
692 final Confidence c = n.getBranchData().getConfidences().get( i );
693 if ( ( i > ( stats.size() - 1 ) ) || ( stats.get( i ) == null ) ) {
694 stats.add( i, new BasicDescriptiveStatistics() );
696 if ( !ForesterUtil.isEmpty( c.getType() ) ) {
697 if ( !ForesterUtil.isEmpty( stats.get( i ).getDescription() ) ) {
698 if ( !stats.get( i ).getDescription().equalsIgnoreCase( c.getType() ) ) {
699 throw new IllegalArgumentException( "support values in node [" + n.toString()
700 + "] appear inconsistently ordered" );
703 stats.get( i ).setDescription( c.getType() );
705 stats.get( i ).addValue( ( ( c != null ) && ( c.getValue() >= 0 ) ) ? c.getValue() : 0 );
714 * Returns the set of distinct taxonomies of
715 * all external nodes of node.
716 * If at least one the external nodes has no taxonomy,
720 public static Set<Taxonomy> obtainDistinctTaxonomies( final PhylogenyNode node ) {
721 final List<PhylogenyNode> descs = node.getAllExternalDescendants();
722 final Set<Taxonomy> tax_set = new HashSet<Taxonomy>();
723 for( final PhylogenyNode n : descs ) {
724 if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
727 tax_set.add( n.getNodeData().getTaxonomy() );
733 * Returns a map of distinct taxonomies of
734 * all external nodes of node.
735 * If at least one of the external nodes has no taxonomy,
739 public static SortedMap<Taxonomy, Integer> obtainDistinctTaxonomyCounts( final PhylogenyNode node ) {
740 final List<PhylogenyNode> descs = node.getAllExternalDescendants();
741 final SortedMap<Taxonomy, Integer> tax_map = new TreeMap<Taxonomy, Integer>();
742 for( final PhylogenyNode n : descs ) {
743 if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
746 final Taxonomy t = n.getNodeData().getTaxonomy();
747 if ( tax_map.containsKey( t ) ) {
748 tax_map.put( t, tax_map.get( t ) + 1 );
757 public static int calculateNumberOfExternalNodesWithoutTaxonomy( final PhylogenyNode node ) {
758 final List<PhylogenyNode> descs = node.getAllExternalDescendants();
760 for( final PhylogenyNode n : descs ) {
761 if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
769 * Deep copies the phylogeny originating from this node.
771 static PhylogenyNode copySubTree( final PhylogenyNode source ) {
772 if ( source == null ) {
776 final PhylogenyNode newnode = source.copyNodeData();
777 if ( !source.isExternal() ) {
778 for( int i = 0; i < source.getNumberOfDescendants(); ++i ) {
779 newnode.setChildNode( i, PhylogenyMethods.copySubTree( source.getChildNode( i ) ) );
787 * Shallow copies the phylogeny originating from this node.
789 static PhylogenyNode copySubTreeShallow( final PhylogenyNode source ) {
790 if ( source == null ) {
794 final PhylogenyNode newnode = source.copyNodeDataShallow();
795 if ( !source.isExternal() ) {
796 for( int i = 0; i < source.getNumberOfDescendants(); ++i ) {
797 newnode.setChildNode( i, PhylogenyMethods.copySubTreeShallow( source.getChildNode( i ) ) );
804 public static void deleteExternalNodesNegativeSelection( final Set<Integer> to_delete, final Phylogeny phy ) {
805 phy.clearHashIdToNodeMap();
806 for( final Integer id : to_delete ) {
807 phy.deleteSubtree( phy.getNode( id ), true );
809 phy.clearHashIdToNodeMap();
810 phy.externalNodesHaveChanged();
813 public static void deleteExternalNodesNegativeSelection( final String[] node_names_to_delete, final Phylogeny p )
814 throws IllegalArgumentException {
815 for( final String element : node_names_to_delete ) {
816 if ( ForesterUtil.isEmpty( element ) ) {
819 List<PhylogenyNode> nodes = null;
820 nodes = p.getNodes( element );
821 final Iterator<PhylogenyNode> it = nodes.iterator();
822 while ( it.hasNext() ) {
823 final PhylogenyNode n = it.next();
824 if ( !n.isExternal() ) {
825 throw new IllegalArgumentException( "attempt to delete non-external node \"" + element + "\"" );
827 p.deleteSubtree( n, true );
830 p.clearHashIdToNodeMap();
831 p.externalNodesHaveChanged();
834 public static void deleteExternalNodesPositiveSelection( final Set<Taxonomy> species_to_keep, final Phylogeny phy ) {
835 // final Set<Integer> to_delete = new HashSet<Integer>();
836 for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
837 final PhylogenyNode n = it.next();
838 if ( n.getNodeData().isHasTaxonomy() ) {
839 if ( !species_to_keep.contains( n.getNodeData().getTaxonomy() ) ) {
840 //to_delete.add( n.getNodeId() );
841 phy.deleteSubtree( n, true );
845 throw new IllegalArgumentException( "node " + n.getId() + " has no taxonomic data" );
848 phy.clearHashIdToNodeMap();
849 phy.externalNodesHaveChanged();
852 public static List<String> deleteExternalNodesPositiveSelection( final String[] node_names_to_keep,
853 final Phylogeny p ) {
854 final PhylogenyNodeIterator it = p.iteratorExternalForward();
855 final String[] to_delete = new String[ p.getNumberOfExternalNodes() ];
857 Arrays.sort( node_names_to_keep );
858 while ( it.hasNext() ) {
859 final String curent_name = it.next().getName();
860 if ( Arrays.binarySearch( node_names_to_keep, curent_name ) < 0 ) {
861 to_delete[ i++ ] = curent_name;
864 PhylogenyMethods.deleteExternalNodesNegativeSelection( to_delete, p );
865 final List<String> deleted = new ArrayList<String>();
866 for( final String n : to_delete ) {
867 if ( !ForesterUtil.isEmpty( n ) ) {
874 public static List<PhylogenyNode> getAllDescendants( final PhylogenyNode node ) {
875 final List<PhylogenyNode> descs = new ArrayList<PhylogenyNode>();
876 final Set<Integer> encountered = new HashSet<Integer>();
877 if ( !node.isExternal() ) {
878 final List<PhylogenyNode> exts = node.getAllExternalDescendants();
879 for( PhylogenyNode current : exts ) {
880 descs.add( current );
881 while ( current != node ) {
882 current = current.getParent();
883 if ( encountered.contains( current.getId() ) ) {
886 descs.add( current );
887 encountered.add( current.getId() );
901 public static Color getBranchColorValue( final PhylogenyNode node ) {
902 if ( node.getBranchData().getBranchColor() == null ) {
905 return node.getBranchData().getBranchColor().getValue();
911 public static double getBranchWidthValue( final PhylogenyNode node ) {
912 if ( !node.getBranchData().isHasBranchWidth() ) {
913 return BranchWidth.BRANCH_WIDTH_DEFAULT_VALUE;
915 return node.getBranchData().getBranchWidth().getValue();
921 public static double getConfidenceValue( final PhylogenyNode node ) {
922 if ( !node.getBranchData().isHasConfidences() ) {
923 return Confidence.CONFIDENCE_DEFAULT_VALUE;
925 return node.getBranchData().getConfidence( 0 ).getValue();
931 public static double[] getConfidenceValuesAsArray( final PhylogenyNode node ) {
932 if ( !node.getBranchData().isHasConfidences() ) {
933 return new double[ 0 ];
935 final double[] values = new double[ node.getBranchData().getConfidences().size() ];
937 for( final Confidence c : node.getBranchData().getConfidences() ) {
938 values[ i++ ] = c.getValue();
944 * Calculates the distance between PhylogenyNodes n1 and n2.
945 * PRECONDITION: n1 is a descendant of n2.
950 * @return distance between n1 and n2
952 private static double getDistance( PhylogenyNode n1, final PhylogenyNode n2 ) {
955 if ( n1.getDistanceToParent() > 0.0 ) {
956 d += n1.getDistanceToParent();
964 * Returns taxonomy t if all external descendants have
965 * the same taxonomy t, null otherwise.
968 public static Taxonomy getExternalDescendantsTaxonomy( final PhylogenyNode node ) {
969 final List<PhylogenyNode> descs = node.getAllExternalDescendants();
971 for( final PhylogenyNode n : descs ) {
972 if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
975 else if ( tax == null ) {
976 tax = n.getNodeData().getTaxonomy();
978 else if ( n.getNodeData().getTaxonomy().isEmpty() || !tax.isEqual( n.getNodeData().getTaxonomy() ) ) {
985 public static PhylogenyNode getFurthestDescendant( final PhylogenyNode node ) {
986 final List<PhylogenyNode> children = node.getAllExternalDescendants();
987 PhylogenyNode farthest = null;
988 double longest = -Double.MAX_VALUE;
989 for( final PhylogenyNode child : children ) {
990 if ( PhylogenyMethods.getDistance( child, node ) > longest ) {
992 longest = PhylogenyMethods.getDistance( child, node );
998 public static PhylogenyMethods getInstance() {
999 if ( PhylogenyMethods._instance == null ) {
1000 PhylogenyMethods._instance = new PhylogenyMethods();
1002 return PhylogenyMethods._instance;
1006 * Returns the largest confidence value found on phy.
1008 static public double getMaximumConfidenceValue( final Phylogeny phy ) {
1009 double max = -Double.MAX_VALUE;
1010 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
1011 final double s = PhylogenyMethods.getConfidenceValue( iter.next() );
1012 if ( ( s != Confidence.CONFIDENCE_DEFAULT_VALUE ) && ( s > max ) ) {
1019 static public int getMinimumDescendentsPerInternalNodes( final Phylogeny phy ) {
1020 int min = Integer.MAX_VALUE;
1023 for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) {
1025 if ( n.isInternal() ) {
1026 d = n.getNumberOfDescendants();
1036 * Convenience method for display purposes.
1037 * Not intended for algorithms.
1039 public static String getSpecies( final PhylogenyNode node ) {
1040 if ( !node.getNodeData().isHasTaxonomy() ) {
1043 else if ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getScientificName() ) ) {
1044 return node.getNodeData().getTaxonomy().getScientificName();
1046 if ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getTaxonomyCode() ) ) {
1047 return node.getNodeData().getTaxonomy().getTaxonomyCode();
1050 return node.getNodeData().getTaxonomy().getCommonName();
1055 * Returns all Nodes which are connected to external PhylogenyNode n of this
1056 * Phylogeny by a path containing only speciation events. We call these
1057 * "super orthologs". Nodes are returned as Vector of references to Nodes.
1059 * PRECONDITION: This tree must be binary and rooted, and speciation -
1060 * duplication need to be assigned for each of its internal Nodes.
1062 * Returns null if this Phylogeny is empty or if n is internal.
1064 * external PhylogenyNode whose strictly speciation related Nodes
1065 * are to be returned
1066 * @return References to all strictly speciation related Nodes of
1067 * PhylogenyNode n of this Phylogeny, null if this Phylogeny is
1068 * empty or if n is internal
1070 public static List<PhylogenyNode> getSuperOrthologousNodes( final PhylogenyNode n ) {
1072 PhylogenyNode node = n;
1073 PhylogenyNode deepest = null;
1074 final List<PhylogenyNode> v = new ArrayList<PhylogenyNode>();
1075 if ( !node.isExternal() ) {
1078 while ( !node.isRoot() && !node.getParent().isDuplication() ) {
1079 node = node.getParent();
1082 deepest.setIndicatorsToZero();
1084 if ( !node.isExternal() ) {
1085 if ( node.getIndicator() == 0 ) {
1086 node.setIndicator( ( byte ) 1 );
1087 if ( !node.isDuplication() ) {
1088 node = node.getChildNode1();
1091 if ( node.getIndicator() == 1 ) {
1092 node.setIndicator( ( byte ) 2 );
1093 if ( !node.isDuplication() ) {
1094 node = node.getChildNode2();
1097 if ( ( node != deepest ) && ( node.getIndicator() == 2 ) ) {
1098 node = node.getParent();
1105 if ( node != deepest ) {
1106 node = node.getParent();
1109 node.setIndicator( ( byte ) 2 );
1112 } while ( ( node != deepest ) || ( deepest.getIndicator() != 2 ) );
1117 * Convenience method for display purposes.
1118 * Not intended for algorithms.
1120 public static String getTaxonomyIdentifier( final PhylogenyNode node ) {
1121 if ( !node.getNodeData().isHasTaxonomy() || ( node.getNodeData().getTaxonomy().getIdentifier() == null ) ) {
1124 return node.getNodeData().getTaxonomy().getIdentifier().getValue();
1128 * Returns all Nodes which are connected to external PhylogenyNode n of this
1129 * Phylogeny by a path containing, and leading to, only duplication events.
1130 * We call these "ultra paralogs". Nodes are returned as Vector of
1131 * references to Nodes.
1133 * PRECONDITION: This tree must be binary and rooted, and speciation -
1134 * duplication need to be assigned for each of its internal Nodes.
1136 * Returns null if this Phylogeny is empty or if n is internal.
1138 * (Last modified: 10/06/01)
1141 * external PhylogenyNode whose ultra paralogs are to be returned
1142 * @return Vector of references to all ultra paralogs of PhylogenyNode n of
1143 * this Phylogeny, null if this Phylogeny is empty or if n is
1146 public static List<PhylogenyNode> getUltraParalogousNodes( final PhylogenyNode n ) {
1148 PhylogenyNode node = n;
1149 if ( !node.isExternal() ) {
1150 throw new IllegalArgumentException( "attempt to get ultra-paralogous nodes of internal node" );
1152 while ( !node.isRoot() && node.getParent().isDuplication() && isAllDecendentsAreDuplications( node.getParent() ) ) {
1153 node = node.getParent();
1155 final List<PhylogenyNode> nodes = node.getAllExternalDescendants();
1160 public static boolean isHasExternalDescendant( final PhylogenyNode node ) {
1161 for( int i = 0; i < node.getNumberOfDescendants(); ++i ) {
1162 if ( node.getChildNode( i ).isExternal() ) {
1170 * This is case insensitive.
1173 public synchronized static boolean isTaxonomyHasIdentifierOfGivenProvider( final Taxonomy tax,
1174 final String[] providers ) {
1175 if ( ( tax.getIdentifier() != null ) && !ForesterUtil.isEmpty( tax.getIdentifier().getProvider() ) ) {
1176 final String my_tax_prov = tax.getIdentifier().getProvider();
1177 for( final String provider : providers ) {
1178 if ( provider.equalsIgnoreCase( my_tax_prov ) ) {
1189 private static boolean match( final String s,
1191 final boolean case_sensitive,
1192 final boolean partial ) {
1193 if ( ForesterUtil.isEmpty( s ) || ForesterUtil.isEmpty( query ) ) {
1196 String my_s = s.trim();
1197 String my_query = query.trim();
1198 if ( !case_sensitive ) {
1199 my_s = my_s.toLowerCase();
1200 my_query = my_query.toLowerCase();
1203 return my_s.indexOf( my_query ) >= 0;
1206 return my_s.equals( my_query );
1210 public static void midpointRoot( final Phylogeny phylogeny ) {
1211 if ( phylogeny.getNumberOfExternalNodes() < 2 ) {
1214 final PhylogenyMethods methods = getInstance();
1215 final double farthest_d = methods.calculateFurthestDistance( phylogeny );
1216 final PhylogenyNode f1 = methods.getFarthestNode1();
1217 final PhylogenyNode f2 = methods.getFarthestNode2();
1218 if ( farthest_d <= 0.0 ) {
1221 double x = farthest_d / 2.0;
1222 PhylogenyNode n = f1;
1223 if ( PhylogenyMethods.getDistance( f1, phylogeny.getRoot() ) < PhylogenyMethods.getDistance( f2, phylogeny
1227 while ( ( x > n.getDistanceToParent() ) && !n.isRoot() ) {
1228 x -= ( n.getDistanceToParent() > 0 ? n.getDistanceToParent() : 0 );
1231 phylogeny.reRoot( n, x );
1232 phylogeny.recalculateNumberOfExternalDescendants( true );
1233 final PhylogenyNode a = getFurthestDescendant( phylogeny.getRoot().getChildNode1() );
1234 final PhylogenyNode b = getFurthestDescendant( phylogeny.getRoot().getChildNode2() );
1235 final double da = getDistance( a, phylogeny.getRoot() );
1236 final double db = getDistance( b, phylogeny.getRoot() );
1237 if ( Math.abs( da - db ) > 0.000001 ) {
1238 throw new FailedConditionCheckException( "this should not have happened: midpoint rooting failed: da="
1239 + da + ", db=" + db + ", diff=" + Math.abs( da - db ) );
1243 public static void normalizeBootstrapValues( final Phylogeny phylogeny,
1244 final double max_bootstrap_value,
1245 final double max_normalized_value ) {
1246 for( final PhylogenyNodeIterator iter = phylogeny.iteratorPreorder(); iter.hasNext(); ) {
1247 final PhylogenyNode node = iter.next();
1248 if ( node.isInternal() ) {
1249 final double confidence = getConfidenceValue( node );
1250 if ( confidence != Confidence.CONFIDENCE_DEFAULT_VALUE ) {
1251 if ( confidence >= max_bootstrap_value ) {
1252 setBootstrapConfidence( node, max_normalized_value );
1255 setBootstrapConfidence( node, ( confidence * max_normalized_value ) / max_bootstrap_value );
1262 public static List<PhylogenyNode> obtainAllNodesAsList( final Phylogeny phy ) {
1263 final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
1264 if ( phy.isEmpty() ) {
1267 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
1268 nodes.add( iter.next() );
1273 public static void postorderBranchColorAveragingExternalNodeBased( final Phylogeny p ) {
1274 for( final PhylogenyNodeIterator iter = p.iteratorPostorder(); iter.hasNext(); ) {
1275 final PhylogenyNode node = iter.next();
1280 if ( node.isInternal() ) {
1281 //for( final PhylogenyNodeIterator iterator = node.iterateChildNodesForward(); iterator.hasNext(); ) {
1282 for( int i = 0; i < node.getNumberOfDescendants(); ++i ) {
1283 final PhylogenyNode child_node = node.getChildNode( i );
1284 final Color child_color = getBranchColorValue( child_node );
1285 if ( child_color != null ) {
1287 red += child_color.getRed();
1288 green += child_color.getGreen();
1289 blue += child_color.getBlue();
1292 setBranchColorValue( node,
1293 new Color( ForesterUtil.roundToInt( red / n ),
1294 ForesterUtil.roundToInt( green / n ),
1295 ForesterUtil.roundToInt( blue / n ) ) );
1300 public static void removeNode( final PhylogenyNode remove_me, final Phylogeny phylogeny ) {
1301 if ( remove_me.isRoot() ) {
1302 throw new IllegalArgumentException( "ill advised attempt to remove root node" );
1304 if ( remove_me.isExternal() ) {
1305 phylogeny.deleteSubtree( remove_me, false );
1306 phylogeny.clearHashIdToNodeMap();
1307 phylogeny.externalNodesHaveChanged();
1310 final PhylogenyNode parent = remove_me.getParent();
1311 final List<PhylogenyNode> descs = remove_me.getDescendants();
1312 parent.removeChildNode( remove_me );
1313 for( final PhylogenyNode desc : descs ) {
1314 parent.addAsChild( desc );
1315 desc.setDistanceToParent( addPhylogenyDistances( remove_me.getDistanceToParent(),
1316 desc.getDistanceToParent() ) );
1318 remove_me.setParent( null );
1319 phylogeny.clearHashIdToNodeMap();
1320 phylogeny.externalNodesHaveChanged();
1324 public static List<PhylogenyNode> searchData( final String query,
1325 final Phylogeny phy,
1326 final boolean case_sensitive,
1327 final boolean partial,
1328 final boolean search_domains ) {
1329 final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
1330 if ( phy.isEmpty() || ( query == null ) ) {
1333 if ( ForesterUtil.isEmpty( query ) ) {
1336 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
1337 final PhylogenyNode node = iter.next();
1338 boolean match = false;
1339 if ( match( node.getName(), query, case_sensitive, partial ) ) {
1342 else if ( node.getNodeData().isHasTaxonomy()
1343 && match( node.getNodeData().getTaxonomy().getTaxonomyCode(), query, case_sensitive, partial ) ) {
1346 else if ( node.getNodeData().isHasTaxonomy()
1347 && match( node.getNodeData().getTaxonomy().getCommonName(), query, case_sensitive, partial ) ) {
1350 else if ( node.getNodeData().isHasTaxonomy()
1351 && match( node.getNodeData().getTaxonomy().getScientificName(), query, case_sensitive, partial ) ) {
1354 else if ( node.getNodeData().isHasTaxonomy()
1355 && ( node.getNodeData().getTaxonomy().getIdentifier() != null )
1356 && match( node.getNodeData().getTaxonomy().getIdentifier().getValue(),
1362 else if ( node.getNodeData().isHasTaxonomy() && !node.getNodeData().getTaxonomy().getSynonyms().isEmpty() ) {
1363 final List<String> syns = node.getNodeData().getTaxonomy().getSynonyms();
1364 I: for( final String syn : syns ) {
1365 if ( match( syn, query, case_sensitive, partial ) ) {
1371 if ( !match && node.getNodeData().isHasSequence()
1372 && match( node.getNodeData().getSequence().getName(), query, case_sensitive, partial ) ) {
1375 if ( !match && node.getNodeData().isHasSequence()
1376 && match( node.getNodeData().getSequence().getSymbol(), query, case_sensitive, partial ) ) {
1380 && node.getNodeData().isHasSequence()
1381 && ( node.getNodeData().getSequence().getAccession() != null )
1382 && match( node.getNodeData().getSequence().getAccession().getValue(),
1388 if ( search_domains && !match && node.getNodeData().isHasSequence()
1389 && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) {
1390 final DomainArchitecture da = node.getNodeData().getSequence().getDomainArchitecture();
1391 I: for( int i = 0; i < da.getNumberOfDomains(); ++i ) {
1392 if ( match( da.getDomain( i ).getName(), query, case_sensitive, partial ) ) {
1398 if ( !match && ( node.getNodeData().getBinaryCharacters() != null ) ) {
1399 Iterator<String> it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator();
1400 I: while ( it.hasNext() ) {
1401 if ( match( it.next(), query, case_sensitive, partial ) ) {
1406 it = node.getNodeData().getBinaryCharacters().getGainedCharacters().iterator();
1407 I: while ( it.hasNext() ) {
1408 if ( match( it.next(), query, case_sensitive, partial ) ) {
1421 public static List<PhylogenyNode> searchDataLogicalAnd( final String[] queries,
1422 final Phylogeny phy,
1423 final boolean case_sensitive,
1424 final boolean partial,
1425 final boolean search_domains ) {
1426 final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
1427 if ( phy.isEmpty() || ( queries == null ) || ( queries.length < 1 ) ) {
1430 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
1431 final PhylogenyNode node = iter.next();
1432 boolean all_matched = true;
1433 for( final String query : queries ) {
1434 boolean match = false;
1435 if ( ForesterUtil.isEmpty( query ) ) {
1438 if ( match( node.getName(), query, case_sensitive, partial ) ) {
1441 else if ( node.getNodeData().isHasTaxonomy()
1442 && match( node.getNodeData().getTaxonomy().getTaxonomyCode(), query, case_sensitive, partial ) ) {
1445 else if ( node.getNodeData().isHasTaxonomy()
1446 && match( node.getNodeData().getTaxonomy().getCommonName(), query, case_sensitive, partial ) ) {
1449 else if ( node.getNodeData().isHasTaxonomy()
1450 && match( node.getNodeData().getTaxonomy().getScientificName(), query, case_sensitive, partial ) ) {
1453 else if ( node.getNodeData().isHasTaxonomy()
1454 && ( node.getNodeData().getTaxonomy().getIdentifier() != null )
1455 && match( node.getNodeData().getTaxonomy().getIdentifier().getValue(),
1461 else if ( node.getNodeData().isHasTaxonomy()
1462 && !node.getNodeData().getTaxonomy().getSynonyms().isEmpty() ) {
1463 final List<String> syns = node.getNodeData().getTaxonomy().getSynonyms();
1464 I: for( final String syn : syns ) {
1465 if ( match( syn, query, case_sensitive, partial ) ) {
1471 if ( !match && node.getNodeData().isHasSequence()
1472 && match( node.getNodeData().getSequence().getName(), query, case_sensitive, partial ) ) {
1475 if ( !match && node.getNodeData().isHasSequence()
1476 && match( node.getNodeData().getSequence().getSymbol(), query, case_sensitive, partial ) ) {
1480 && node.getNodeData().isHasSequence()
1481 && ( node.getNodeData().getSequence().getAccession() != null )
1482 && match( node.getNodeData().getSequence().getAccession().getValue(),
1488 if ( search_domains && !match && node.getNodeData().isHasSequence()
1489 && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) {
1490 final DomainArchitecture da = node.getNodeData().getSequence().getDomainArchitecture();
1491 I: for( int i = 0; i < da.getNumberOfDomains(); ++i ) {
1492 if ( match( da.getDomain( i ).getName(), query, case_sensitive, partial ) ) {
1498 if ( !match && ( node.getNodeData().getBinaryCharacters() != null ) ) {
1499 Iterator<String> it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator();
1500 I: while ( it.hasNext() ) {
1501 if ( match( it.next(), query, case_sensitive, partial ) ) {
1506 it = node.getNodeData().getBinaryCharacters().getGainedCharacters().iterator();
1507 I: while ( it.hasNext() ) {
1508 if ( match( it.next(), query, case_sensitive, partial ) ) {
1515 all_matched = false;
1519 if ( all_matched ) {
1527 * Convenience method.
1528 * Sets value for the first confidence value (created if not present, values overwritten otherwise).
1530 public static void setBootstrapConfidence( final PhylogenyNode node, final double bootstrap_confidence_value ) {
1531 setConfidence( node, bootstrap_confidence_value, "bootstrap" );
1534 public static void setBranchColorValue( final PhylogenyNode node, final Color color ) {
1535 if ( node.getBranchData().getBranchColor() == null ) {
1536 node.getBranchData().setBranchColor( new BranchColor() );
1538 node.getBranchData().getBranchColor().setValue( color );
1542 * Convenience method
1544 public static void setBranchWidthValue( final PhylogenyNode node, final double branch_width_value ) {
1545 node.getBranchData().setBranchWidth( new BranchWidth( branch_width_value ) );
1549 * Convenience method.
1550 * Sets value for the first confidence value (created if not present, values overwritten otherwise).
1552 public static void setConfidence( final PhylogenyNode node, final double confidence_value ) {
1553 setConfidence( node, confidence_value, "" );
1557 * Convenience method.
1558 * Sets value for the first confidence value (created if not present, values overwritten otherwise).
1560 public static void setConfidence( final PhylogenyNode node, final double confidence_value, final String type ) {
1561 Confidence c = null;
1562 if ( node.getBranchData().getNumberOfConfidences() > 0 ) {
1563 c = node.getBranchData().getConfidence( 0 );
1566 c = new Confidence();
1567 node.getBranchData().addConfidence( c );
1570 c.setValue( confidence_value );
1573 public static void setScientificName( final PhylogenyNode node, final String scientific_name ) {
1574 if ( !node.getNodeData().isHasTaxonomy() ) {
1575 node.getNodeData().setTaxonomy( new Taxonomy() );
1577 node.getNodeData().getTaxonomy().setScientificName( scientific_name );
1581 * Convenience method to set the taxonomy code of a phylogeny node.
1585 * @param taxonomy_code
1586 * @throws PhyloXmlDataFormatException
1588 public static void setTaxonomyCode( final PhylogenyNode node, final String taxonomy_code )
1589 throws PhyloXmlDataFormatException {
1590 if ( !node.getNodeData().isHasTaxonomy() ) {
1591 node.getNodeData().setTaxonomy( new Taxonomy() );
1593 node.getNodeData().getTaxonomy().setTaxonomyCode( taxonomy_code );
1597 * Removes from Phylogeny to_be_stripped all external Nodes which are
1598 * associated with a species NOT found in Phylogeny reference.
1601 * a reference Phylogeny
1602 * @param to_be_stripped
1603 * Phylogeny to be stripped
1604 * @return number of external nodes removed from to_be_stripped
1606 public static int taxonomyBasedDeletionOfExternalNodes( final Phylogeny reference, final Phylogeny to_be_stripped ) {
1607 final Set<String> ref_ext_taxo = new HashSet<String>();
1608 for( final PhylogenyNodeIterator it = reference.iteratorExternalForward(); it.hasNext(); ) {
1609 final PhylogenyNode n = it.next();
1610 if ( !n.getNodeData().isHasTaxonomy() ) {
1611 throw new IllegalArgumentException( "no taxonomic data in node: " + n );
1613 // ref_ext_taxo.add( getSpecies( n ) );
1614 if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) {
1615 ref_ext_taxo.add( n.getNodeData().getTaxonomy().getScientificName() );
1617 if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) {
1618 ref_ext_taxo.add( n.getNodeData().getTaxonomy().getTaxonomyCode() );
1621 final ArrayList<PhylogenyNode> nodes_to_delete = new ArrayList<PhylogenyNode>();
1622 for( final PhylogenyNodeIterator it = to_be_stripped.iteratorExternalForward(); it.hasNext(); ) {
1623 final PhylogenyNode n = it.next();
1624 if ( !n.getNodeData().isHasTaxonomy() ) {
1625 nodes_to_delete.add( n );
1627 else if ( !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getScientificName() ) )
1628 && !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
1629 nodes_to_delete.add( n );
1632 for( final PhylogenyNode phylogenyNode : nodes_to_delete ) {
1633 to_be_stripped.deleteSubtree( phylogenyNode, true );
1635 to_be_stripped.clearHashIdToNodeMap();
1636 to_be_stripped.externalNodesHaveChanged();
1637 return nodes_to_delete.size();
1641 * Arranges the order of childern for each node of this Phylogeny in such a
1642 * way that either the branch with more children is on top (right) or on
1643 * bottom (left), dependent on the value of boolean order.
1646 * decides in which direction to order
1649 public static void orderAppearance( final PhylogenyNode n,
1650 final boolean order,
1651 final boolean order_ext_alphabetically,
1652 final DESCENDANT_SORT_PRIORITY pri ) {
1653 if ( n.isExternal() ) {
1657 PhylogenyNode temp = null;
1658 if ( ( n.getNumberOfDescendants() == 2 )
1659 && ( n.getChildNode1().getNumberOfExternalNodes() != n.getChildNode2().getNumberOfExternalNodes() )
1660 && ( ( n.getChildNode1().getNumberOfExternalNodes() < n.getChildNode2().getNumberOfExternalNodes() ) == order ) ) {
1661 temp = n.getChildNode1();
1662 n.setChild1( n.getChildNode2() );
1663 n.setChild2( temp );
1665 else if ( order_ext_alphabetically ) {
1666 boolean all_ext = true;
1667 for( final PhylogenyNode i : n.getDescendants() ) {
1668 if ( !i.isExternal() ) {
1674 PhylogenyMethods.sortNodeDescendents( n, pri );
1677 for( int i = 0; i < n.getNumberOfDescendants(); ++i ) {
1678 orderAppearance( n.getChildNode( i ), order, order_ext_alphabetically, pri );
1683 public static enum PhylogenyNodeField {
1686 TAXONOMY_SCIENTIFIC_NAME,
1687 TAXONOMY_COMMON_NAME,
1690 TAXONOMY_ID_UNIPROT_1,
1691 TAXONOMY_ID_UNIPROT_2,
1695 public static enum DESCENDANT_SORT_PRIORITY {
1696 TAXONOMY, SEQUENCE, NODE_NAME;