From dcd260b82367d6edd39ac2d86a37dcce7a769f64 Mon Sep 17 00:00:00 2001 From: cmzmasek Date: Wed, 13 Jul 2016 16:05:39 -0700 Subject: [PATCH] in progress --- .../org/forester/archaeopteryx/AptxConstants.java | 4 +- .../org/forester/archaeopteryx/ControlPanel.java | 38 +- .../src/org/forester/archaeopteryx/MainFrame.java | 6 +- .../org/forester/archaeopteryx/TreeColorSet.java | 37 +- .../org/forester/phylogeny/PhylogenyMethods.java | 3911 ++++++++++---------- .../src/org/forester/phylogeny/PhylogenyNode.java | 4 + forester/java/src/org/forester/test/Test.java | 32 +- 7 files changed, 2055 insertions(+), 1977 deletions(-) diff --git a/forester/java/src/org/forester/archaeopteryx/AptxConstants.java b/forester/java/src/org/forester/archaeopteryx/AptxConstants.java index f6049ce..2824995 100644 --- a/forester/java/src/org/forester/archaeopteryx/AptxConstants.java +++ b/forester/java/src/org/forester/archaeopteryx/AptxConstants.java @@ -38,8 +38,8 @@ public final class AptxConstants { final static boolean __ALLOW_PHYLOGENETIC_INFERENCE = true; public final static String PRG_NAME = "Archaeopteryx"; - final static String VERSION = "0.9914 beta"; - final static String PRG_DATE = "160712"; + final static String VERSION = "0.9914 test"; + final static String PRG_DATE = "160713"; final static String DEFAULT_CONFIGURATION_FILE_NAME = "_aptx_configuration_file"; final static String[] DEFAULT_FONT_CHOICES = { "Arial Unicode MS", "Dialog", "SansSerif", "Sans", "Arial", "Helvetica" }; diff --git a/forester/java/src/org/forester/archaeopteryx/ControlPanel.java b/forester/java/src/org/forester/archaeopteryx/ControlPanel.java index 3c9990f..47a9f72 100644 --- a/forester/java/src/org/forester/archaeopteryx/ControlPanel.java +++ b/forester/java/src/org/forester/archaeopteryx/ControlPanel.java @@ -587,7 +587,7 @@ final class ControlPanel extends JPanel implements ActionListener { getSearchResetButton0().setEnabled( true ); getSearchResetButton0().setVisible( true ); String[] queries = null; - Set nodes = null; + Set nodes = null; if ( ( query_str.indexOf( ',' ) >= 0 ) && !getOptions().isSearchWithRegex() ) { queries = query_str.split( ",+" ); } @@ -596,7 +596,7 @@ final class ControlPanel extends JPanel implements ActionListener { queries[ 0 ] = query_str.trim(); } if ( ( queries != null ) && ( queries.length > 0 ) ) { - nodes = new HashSet(); + nodes = new HashSet(); for( String query : queries ) { if ( ForesterUtil.isEmpty( query ) ) { continue; @@ -626,15 +626,19 @@ final class ControlPanel extends JPanel implements ActionListener { } if ( getOptions().isInverseSearchResult() ) { final List all = PhylogenyMethods.obtainAllNodesAsList( tree ); - all.removeAll( nodes ); - nodes = new HashSet(); - nodes.addAll( all ); + final Set temp_nodes = nodes; + nodes = new HashSet(); + for( final PhylogenyNode n : all ) { + if ( (!temp_nodes.contains( n.getId() )) && n.isHasNodeData() ) { + nodes.add( n.getId() ); + } + } } } if ( ( nodes != null ) && ( nodes.size() > 0 ) ) { main_panel.getCurrentTreePanel().setFoundNodes0( new HashSet() ); - for( final PhylogenyNode node : nodes ) { - main_panel.getCurrentTreePanel().getFoundNodes0().add( node.getId() ); + for( final Long node : nodes ) { + main_panel.getCurrentTreePanel().getFoundNodes0().add( node ); } setSearchFoundCountsOnLabel0( nodes.size() ); } @@ -649,7 +653,7 @@ final class ControlPanel extends JPanel implements ActionListener { getSearchResetButton1().setEnabled( true ); getSearchResetButton1().setVisible( true ); String[] queries = null; - Set nodes = null; + Set nodes = null; if ( ( query_str.indexOf( ',' ) >= 0 ) && !getOptions().isSearchWithRegex() ) { queries = query_str.split( ",+" ); } @@ -658,7 +662,7 @@ final class ControlPanel extends JPanel implements ActionListener { queries[ 0 ] = query_str.trim(); } if ( ( queries != null ) && ( queries.length > 0 ) ) { - nodes = new HashSet(); + nodes = new HashSet(); for( String query : queries ) { if ( ForesterUtil.isEmpty( query ) ) { continue; @@ -688,15 +692,19 @@ final class ControlPanel extends JPanel implements ActionListener { } if ( getOptions().isInverseSearchResult() ) { final List all = PhylogenyMethods.obtainAllNodesAsList( tree ); - all.removeAll( nodes ); - nodes = new HashSet(); - nodes.addAll( all ); + final Set temp_nodes = nodes; + nodes = new HashSet(); + for( final PhylogenyNode n : all ) { + if ( (!temp_nodes.contains( n.getId() )) && n.isHasNodeData() ) { + nodes.add( n.getId() ); + } + } } } if ( ( nodes != null ) && ( nodes.size() > 0 ) ) { main_panel.getCurrentTreePanel().setFoundNodes1( new HashSet() ); - for( final PhylogenyNode node : nodes ) { - main_panel.getCurrentTreePanel().getFoundNodes1().add( node.getId() ); + for( final Long node : nodes ) { + main_panel.getCurrentTreePanel().getFoundNodes1().add( node ); } setSearchFoundCountsOnLabel1( nodes.size() ); } @@ -705,7 +713,7 @@ final class ControlPanel extends JPanel implements ActionListener { searchReset1(); } } - + private void setDrawPhylogram( final int index, final boolean b ) { getIsDrawPhylogramList().set( index, b ); } diff --git a/forester/java/src/org/forester/archaeopteryx/MainFrame.java b/forester/java/src/org/forester/archaeopteryx/MainFrame.java index 76d9ba7..e6309db 100644 --- a/forester/java/src/org/forester/archaeopteryx/MainFrame.java +++ b/forester/java/src/org/forester/archaeopteryx/MainFrame.java @@ -890,8 +890,10 @@ public abstract class MainFrame extends JFrame implements ActionListener { fc.showDialog( this, "Select the Base Font" ); getMainPanel().getTreeFontSet().setBaseFont( fc.getFont() ); getControlPanel().displayedPhylogenyMightHaveChanged( true ); - getMainPanel().getCurrentTreePanel().resetPreferredSize(); - getMainPanel().getCurrentTreePanel().updateOvSizes(); + if ( getMainPanel().getCurrentTreePanel() != null ) { + getMainPanel().getCurrentTreePanel().resetPreferredSize(); + getMainPanel().getCurrentTreePanel().updateOvSizes(); + } repaint(); } diff --git a/forester/java/src/org/forester/archaeopteryx/TreeColorSet.java b/forester/java/src/org/forester/archaeopteryx/TreeColorSet.java index f90ec22..56c2cc1 100644 --- a/forester/java/src/org/forester/archaeopteryx/TreeColorSet.java +++ b/forester/java/src/org/forester/archaeopteryx/TreeColorSet.java @@ -57,7 +57,7 @@ public final class TreeColorSet { TAXONOMY, CONFIDENCE, BRANCH_LENGTH, BRANCH, NODE_BOX, COLLAPSED, MATCHING_NODES_A, MATCHING_NODES_B, MATCHING_NODES_A_AND_B, DUPLICATION, SPECIATION, DUPLICATION_OR_SPECATION, DOMAIN_LABEL, DOMAIN_BASE, BINARY_DOMAIN_COMBINATIONS, ANNOTATION, OVERVIEW }; - static final String[] SCHEME_NAMES = { "Default", "Black", "Black & White", "Silver", "Green", + static final String[] SCHEME_NAMES = { "Default", "Black", "Black & White", "Simple", "Silver", "Green", "White & Blue", "Cyan", "Orange", "Blue", "Blue & White", "Neon" }; private int _color_scheme; private final Color[][] _color_schemes = { { new Color( 0, 0, 0 ), // background_color @@ -120,7 +120,40 @@ public final class TreeColorSet { new Color( 0, 0, 0 ), // binary_domain_combinations_color new Color( 0, 0, 0 ) // annotation , new Color( 220, 220, 220 ) // ov - }, { new Color( 0, 0, 0 ), // background_color + }, + + + + + { new Color( 255, 255, 255 ), // background_color + new Color( 0, 255, 255 ), // background_color_gradient_bottom + new Color( 0, 0, 153 ), //sequence __ NEW + new Color( 0, 0, 102 ), // taxonomy + new Color( 0, 0, 204 ), // support + new Color( 0, 51, 255 ), // branch_length_color + new Color( 0, 0, 0 ), // branch_color + new Color( 0, 51, 255 ), // box_color + new Color( 0, 51, 255 ), // collapesed_fill_color + new Color( 0, 0, 255 ), // found_color 0 + new Color( 0, 255, 0 ), // found_color 1 + new Color( 0, 255, 255 ), // found_color 0 + 1 + new Color( 102, 51, 255 ), // duplication_box_color + new Color( 153, 153, 153 ), // speciation_box_color + new Color( 255, 255, 0 ), // duplication_speciation_color + new Color( 51, 51, 51), // domain_label + new Color( 51, 51, 51 ), // domains_base + new Color( 0, 0, 153 ), // binary_domain_combinations_color + new Color( 0, 0, 153 ),// annotation + new Color( 51, 51, 51 ) // ov + } , + + + + + + + + { new Color( 0, 0, 0 ), // background_color new Color( 0, 255, 255 ), // background_color_gradient_bottom new Color( 220, 220, 220 ), // sequence __ Silver new Color( 180, 180, 180 ), // taxonomy diff --git a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java index c3fbc95..a5cf36a 100644 --- a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java +++ b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java @@ -1,1954 +1,1957 @@ -// $Id: -// FORESTER -- software libraries and applications -// for evolutionary biology research and applications. -// -// Copyright (C) 2008-2009 Christian M. Zmasek -// Copyright (C) 2008-2009 Burnham Institute for Medical Research -// All rights reserved -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA -// -// Contact: phylosoft @ gmail . com -// WWW: https://sites.google.com/site/cmzmasek/home/software/forester - -package org.forester.phylogeny; - -import java.awt.Color; -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import java.util.regex.PatternSyntaxException; - -import org.forester.io.parsers.FastaParser; -import org.forester.io.parsers.PhylogenyParser; -import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; -import org.forester.io.parsers.phyloxml.PhyloXmlUtil; -import org.forester.io.parsers.util.PhylogenyParserException; -import org.forester.msa.Msa; -import org.forester.phylogeny.data.Accession; -import org.forester.phylogeny.data.Annotation; -import org.forester.phylogeny.data.BranchColor; -import org.forester.phylogeny.data.BranchWidth; -import org.forester.phylogeny.data.Confidence; -import org.forester.phylogeny.data.DomainArchitecture; -import org.forester.phylogeny.data.Event; -import org.forester.phylogeny.data.Identifier; -import org.forester.phylogeny.data.PhylogenyDataUtil; -import org.forester.phylogeny.data.Sequence; -import org.forester.phylogeny.data.Taxonomy; -import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; -import org.forester.phylogeny.factories.PhylogenyFactory; -import org.forester.phylogeny.iterators.PhylogenyNodeIterator; -import org.forester.util.BasicDescriptiveStatistics; -import org.forester.util.DescriptiveStatistics; -import org.forester.util.ForesterUtil; - -public class PhylogenyMethods { - - private PhylogenyMethods() { - // Hidden constructor. - } - - @Override - public Object clone() throws CloneNotSupportedException { - throw new CloneNotSupportedException(); - } - - public static boolean extractFastaInformation( final Phylogeny phy ) { - boolean could_extract = false; - for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { - final PhylogenyNode node = iter.next(); - if ( !ForesterUtil.isEmpty( node.getName() ) ) { - final Matcher name_m = FastaParser.FASTA_DESC_LINE.matcher( node.getName() ); - if ( name_m.lookingAt() ) { - could_extract = true; - final String acc_source = name_m.group( 1 ); - final String acc = name_m.group( 2 ); - final String seq_name = name_m.group( 3 ); - final String tax_sn = name_m.group( 4 ); - if ( !ForesterUtil.isEmpty( acc_source ) && !ForesterUtil.isEmpty( acc ) ) { - ForesterUtil.ensurePresenceOfSequence( node ); - node.getNodeData().getSequence( 0 ).setAccession( new Accession( acc, acc_source ) ); - } - if ( !ForesterUtil.isEmpty( seq_name ) ) { - ForesterUtil.ensurePresenceOfSequence( node ); - node.getNodeData().getSequence( 0 ).setName( seq_name ); - } - if ( !ForesterUtil.isEmpty( tax_sn ) ) { - ForesterUtil.ensurePresenceOfTaxonomy( node ); - node.getNodeData().getTaxonomy( 0 ).setScientificName( tax_sn ); - } - } - } - } - return could_extract; - } - - public static DescriptiveStatistics calculateBranchLengthStatistics( final Phylogeny phy ) { - final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); - for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { - final PhylogenyNode n = iter.next(); - if ( !n.isRoot() && ( n.getDistanceToParent() >= 0.0 ) ) { - stats.addValue( n.getDistanceToParent() ); - } - } - return stats; - } - - public static List calculateConfidenceStatistics( final Phylogeny phy ) { - final List stats = new ArrayList(); - for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { - final PhylogenyNode n = iter.next(); - if ( !n.isExternal() && !n.isRoot() ) { - if ( n.getBranchData().isHasConfidences() ) { - for( int i = 0; i < n.getBranchData().getConfidences().size(); ++i ) { - final Confidence c = n.getBranchData().getConfidences().get( i ); - if ( ( i > ( stats.size() - 1 ) ) || ( stats.get( i ) == null ) ) { - stats.add( i, new BasicDescriptiveStatistics() ); - } - if ( !ForesterUtil.isEmpty( c.getType() ) ) { - if ( !ForesterUtil.isEmpty( stats.get( i ).getDescription() ) ) { - if ( !stats.get( i ).getDescription().equalsIgnoreCase( c.getType() ) ) { - throw new IllegalArgumentException( "support values in node [" + n.toString() - + "] appear inconsistently ordered" ); - } - } - stats.get( i ).setDescription( c.getType() ); - } - stats.get( i ).addValue( ( ( c != null ) && ( c.getValue() >= 0 ) ) ? c.getValue() : 0 ); - } - } - } - } - return stats; - } - - /** - * Calculates the distance between PhylogenyNodes node1 and node2. - * - * - * @param node1 - * @param node2 - * @return distance between node1 and node2 - */ - public static double calculateDistance( final PhylogenyNode node1, final PhylogenyNode node2 ) { - final PhylogenyNode lca = calculateLCA( node1, node2 ); - final PhylogenyNode n1 = node1; - final PhylogenyNode n2 = node2; - return ( PhylogenyMethods.getDistance( n1, lca ) + PhylogenyMethods.getDistance( n2, lca ) ); - } - - /** - * Returns the LCA of PhylogenyNodes node1 and node2. - * - * - * @param node1 - * @param node2 - * @return LCA of node1 and node2 - */ - public final static PhylogenyNode calculateLCA( PhylogenyNode node1, PhylogenyNode node2 ) { - if ( node1 == null ) { - throw new IllegalArgumentException( "first argument (node) is null" ); - } - if ( node2 == null ) { - throw new IllegalArgumentException( "second argument (node) is null" ); - } - if ( node1 == node2 ) { - return node1; - } - if ( ( node1.getParent() == node2.getParent() ) ) { - return node1.getParent(); - } - int depth1 = node1.calculateDepth(); - int depth2 = node2.calculateDepth(); - while ( ( depth1 > -1 ) && ( depth2 > -1 ) ) { - if ( depth1 > depth2 ) { - node1 = node1.getParent(); - depth1--; - } - else if ( depth2 > depth1 ) { - node2 = node2.getParent(); - depth2--; - } - else { - if ( node1 == node2 ) { - return node1; - } - node1 = node1.getParent(); - node2 = node2.getParent(); - depth1--; - depth2--; - } - } - throw new IllegalArgumentException( "illegal attempt to calculate LCA of two nodes which do not share a common root" ); - } - - /** - * Returns the LCA of PhylogenyNodes node1 and node2. - * Precondition: ids are in pre-order (or level-order). - * - * - * @param node1 - * @param node2 - * @return LCA of node1 and node2 - */ - public final static PhylogenyNode calculateLCAonTreeWithIdsInPreOrder( PhylogenyNode node1, PhylogenyNode node2 ) { - if ( node1 == null ) { - throw new IllegalArgumentException( "first argument (node) is null" ); - } - if ( node2 == null ) { - throw new IllegalArgumentException( "second argument (node) is null" ); - } - while ( node1 != node2 ) { - if ( node1.getId() > node2.getId() ) { - node1 = node1.getParent(); - } - else { - node2 = node2.getParent(); - } - } - return node1; - } - - public static short calculateMaxBranchesToLeaf( final PhylogenyNode node ) { - if ( node.isExternal() ) { - return 0; - } - short max = 0; - for( PhylogenyNode d : node.getAllExternalDescendants() ) { - short steps = 0; - while ( d != node ) { - if ( d.isCollapse() ) { - steps = 0; - } - else { - steps++; - } - d = d.getParent(); - } - if ( max < steps ) { - max = steps; - } - } - return max; - } - - public static int calculateMaxDepth( final Phylogeny phy ) { - int max = 0; - for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { - final PhylogenyNode node = iter.next(); - final int steps = node.calculateDepth(); - if ( steps > max ) { - max = steps; - } - } - return max; - } - - public static double calculateMaxDistanceToRoot( final Phylogeny phy ) { - double max = 0.0; - for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { - final PhylogenyNode node = iter.next(); - final double d = node.calculateDistanceToRoot(); - if ( d > max ) { - max = d; - } - } - return max; - } - - public static PhylogenyNode calculateNodeWithMaxDistanceToRoot( final Phylogeny phy ) { - double max = 0.0; - PhylogenyNode max_node = phy.getFirstExternalNode(); - for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { - final PhylogenyNode node = iter.next(); - final double d = node.calculateDistanceToRoot(); - if ( d > max ) { - max = d; - max_node = node; - } - } - return max_node; - } - - public static int calculateNumberOfExternalNodesWithoutTaxonomy( final PhylogenyNode node ) { - final List descs = node.getAllExternalDescendants(); - int x = 0; - for( final PhylogenyNode n : descs ) { - if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) { - x++; - } - } - return x; - } - - public static DescriptiveStatistics calculateNumberOfDescendantsPerNodeStatistics( final Phylogeny phy ) { - final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); - for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { - final PhylogenyNode n = iter.next(); - if ( !n.isExternal() ) { - stats.addValue( n.getNumberOfDescendants() ); - } - } - return stats; - } - - public final static void collapseSubtreeStructure( final PhylogenyNode n ) { - final List eds = n.getAllExternalDescendants(); - final List d = new ArrayList(); - for( final PhylogenyNode ed : eds ) { - d.add( calculateDistanceToAncestor( n, ed ) ); - } - for( int i = 0; i < eds.size(); ++i ) { - n.setChildNode( i, eds.get( i ) ); - eds.get( i ).setDistanceToParent( d.get( i ) ); - } - } - - public static int countNumberOfOneDescendantNodes( final Phylogeny phy ) { - int count = 0; - for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { - final PhylogenyNode n = iter.next(); - if ( !n.isExternal() && ( n.getNumberOfDescendants() == 1 ) ) { - count++; - } - } - return count; - } - - public static int countNumberOfPolytomies( final Phylogeny phy ) { - int count = 0; - for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { - final PhylogenyNode n = iter.next(); - if ( !n.isExternal() && ( n.getNumberOfDescendants() > 2 ) ) { - count++; - } - } - return count; - } - - public static final HashMap createNameToExtNodeMap( final Phylogeny phy ) { - final HashMap nodes = new HashMap(); - final List ext = phy.getExternalNodes(); - for( final PhylogenyNode n : ext ) { - nodes.put( n.getName(), n ); - } - return nodes; - } - - public static void deleteExternalNodesNegativeSelection( final Set to_delete, final Phylogeny phy ) { - for( final Long id : to_delete ) { - phy.deleteSubtree( phy.getNode( id ), true ); - } - phy.clearHashIdToNodeMap(); - phy.externalNodesHaveChanged(); - } - - public static void deleteExternalNodesNegativeSelection( final String[] node_names_to_delete, final Phylogeny p ) - throws IllegalArgumentException { - for( final String element : node_names_to_delete ) { - if ( ForesterUtil.isEmpty( element ) ) { - continue; - } - List nodes = null; - nodes = p.getNodes( element ); - final Iterator it = nodes.iterator(); - while ( it.hasNext() ) { - final PhylogenyNode n = it.next(); - if ( !n.isExternal() ) { - throw new IllegalArgumentException( "attempt to delete non-external node \"" + element + "\"" ); - } - p.deleteSubtree( n, true ); - } - } - p.clearHashIdToNodeMap(); - p.externalNodesHaveChanged(); - } - - public static List deleteExternalNodesPositiveSelection( final String[] node_names_to_keep, - final Phylogeny p ) { - final PhylogenyNodeIterator it = p.iteratorExternalForward(); - final String[] to_delete = new String[ p.getNumberOfExternalNodes() ]; - int i = 0; - Arrays.sort( node_names_to_keep ); - while ( it.hasNext() ) { - final String curent_name = it.next().getName(); - if ( Arrays.binarySearch( node_names_to_keep, curent_name ) < 0 ) { - to_delete[ i++ ] = curent_name; - } - } - PhylogenyMethods.deleteExternalNodesNegativeSelection( to_delete, p ); - final List deleted = new ArrayList(); - for( final String n : to_delete ) { - if ( !ForesterUtil.isEmpty( n ) ) { - deleted.add( n ); - } - } - return deleted; - } - - public static void deleteExternalNodesPositiveSelectionT( final List species_to_keep, final Phylogeny phy ) { - final Set to_delete = new HashSet(); - for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) { - final PhylogenyNode n = it.next(); - if ( n.getNodeData().isHasTaxonomy() ) { - if ( !species_to_keep.contains( n.getNodeData().getTaxonomy() ) ) { - to_delete.add( n.getId() ); - } - } - else { - throw new IllegalArgumentException( "node " + n.getId() + " has no taxonomic data" ); - } - } - deleteExternalNodesNegativeSelection( to_delete, phy ); - } - - final public static void deleteInternalNodesWithOnlyOneDescendent( final Phylogeny phy ) { - final ArrayList to_delete = new ArrayList(); - for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) { - final PhylogenyNode n = iter.next(); - if ( ( !n.isExternal() ) && ( n.getNumberOfDescendants() == 1 ) ) { - to_delete.add( n ); - } - } - for( final PhylogenyNode d : to_delete ) { - PhylogenyMethods.removeNode( d, phy ); - } - phy.clearHashIdToNodeMap(); - phy.externalNodesHaveChanged(); - } - - final public static void deleteNonOrthologousExternalNodes( final Phylogeny phy, final PhylogenyNode n ) { - if ( n.isInternal() ) { - throw new IllegalArgumentException( "node is not external" ); - } - final ArrayList to_delete = new ArrayList(); - for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) { - final PhylogenyNode i = it.next(); - if ( !PhylogenyMethods.getEventAtLCA( n, i ).isSpeciation() ) { - to_delete.add( i ); - } - } - for( final PhylogenyNode d : to_delete ) { - phy.deleteSubtree( d, true ); - } - phy.clearHashIdToNodeMap(); - phy.externalNodesHaveChanged(); - } - - public final static List> divideIntoSubTrees( final Phylogeny phy, - final double min_distance_to_root ) { - if ( min_distance_to_root <= 0 ) { - throw new IllegalArgumentException( "attempt to use min distance to root of: " + min_distance_to_root ); - } - final List> l = new ArrayList>(); - setAllIndicatorsToZero( phy ); - for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) { - final PhylogenyNode n = it.next(); - if ( n.getIndicator() != 0 ) { - continue; - } - l.add( divideIntoSubTreesHelper( n, min_distance_to_root ) ); - if ( l.isEmpty() ) { - throw new RuntimeException( "this should not have happened" ); - } - } - return l; - } - - public static List getAllDescendants( final PhylogenyNode node ) { - final List descs = new ArrayList(); - final Set encountered = new HashSet(); - if ( !node.isExternal() ) { - final List exts = node.getAllExternalDescendants(); - for( PhylogenyNode current : exts ) { - descs.add( current ); - while ( current != node ) { - current = current.getParent(); - if ( encountered.contains( current.getId() ) ) { - continue; - } - descs.add( current ); - encountered.add( current.getId() ); - } - } - } - return descs; - } - - /** - * - * Convenience method - * - * @param node - * @return - */ - public static Color getBranchColorValue( final PhylogenyNode node ) { - if ( node.getBranchData().getBranchColor() == null ) { - return null; - } - return node.getBranchData().getBranchColor().getValue(); - } - - /** - * Convenience method - */ - public static double getBranchWidthValue( final PhylogenyNode node ) { - if ( !node.getBranchData().isHasBranchWidth() ) { - return BranchWidth.BRANCH_WIDTH_DEFAULT_VALUE; - } - return node.getBranchData().getBranchWidth().getValue(); - } - - /** - * Convenience method - */ - public static double getConfidenceValue( final PhylogenyNode node ) { - if ( !node.getBranchData().isHasConfidences() ) { - return Confidence.CONFIDENCE_DEFAULT_VALUE; - } - return node.getBranchData().getConfidence( 0 ).getValue(); - } - - /** - * Convenience method - */ - public static double[] getConfidenceValuesAsArray( final PhylogenyNode node ) { - if ( !node.getBranchData().isHasConfidences() ) { - return new double[ 0 ]; - } - final double[] values = new double[ node.getBranchData().getConfidences().size() ]; - int i = 0; - for( final Confidence c : node.getBranchData().getConfidences() ) { - values[ i++ ] = c.getValue(); - } - return values; - } - - final public static Event getEventAtLCA( final PhylogenyNode n1, final PhylogenyNode n2 ) { - return calculateLCA( n1, n2 ).getNodeData().getEvent(); - } - - /** - * Returns taxonomy t if all external descendants have - * the same taxonomy t, null otherwise. - * - */ - public static Taxonomy getExternalDescendantsTaxonomy( final PhylogenyNode node ) { - final List descs = node.getAllExternalDescendants(); - Taxonomy tax = null; - for( final PhylogenyNode n : descs ) { - if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) { - return null; - } - else if ( tax == null ) { - tax = n.getNodeData().getTaxonomy(); - } - else if ( n.getNodeData().getTaxonomy().isEmpty() || !tax.isEqual( n.getNodeData().getTaxonomy() ) ) { - return null; - } - } - return tax; - } - - public static PhylogenyNode getFurthestDescendant( final PhylogenyNode node ) { - final List children = node.getAllExternalDescendants(); - PhylogenyNode farthest = null; - double longest = -Double.MAX_VALUE; - for( final PhylogenyNode child : children ) { - if ( PhylogenyMethods.getDistance( child, node ) > longest ) { - farthest = child; - longest = PhylogenyMethods.getDistance( child, node ); - } - } - return farthest; - } - - // public static PhylogenyMethods getInstance() { - // if ( PhylogenyMethods._instance == null ) { - // PhylogenyMethods._instance = new PhylogenyMethods(); - // } - // return PhylogenyMethods._instance; - // } - /** - * Returns the largest confidence value found on phy. - */ - static public double getMaximumConfidenceValue( final Phylogeny phy ) { - double max = -Double.MAX_VALUE; - for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { - final double s = PhylogenyMethods.getConfidenceValue( iter.next() ); - if ( ( s != Confidence.CONFIDENCE_DEFAULT_VALUE ) && ( s > max ) ) { - max = s; - } - } - return max; - } - - static public int getMinimumDescendentsPerInternalNodes( final Phylogeny phy ) { - int min = Integer.MAX_VALUE; - int d = 0; - PhylogenyNode n; - for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) { - n = it.next(); - if ( n.isInternal() ) { - d = n.getNumberOfDescendants(); - if ( d < min ) { - min = d; - } - } - } - return min; - } - - /** - * Convenience method for display purposes. - * Not intended for algorithms. - */ - public static String getSpecies( final PhylogenyNode node ) { - if ( !node.getNodeData().isHasTaxonomy() ) { - return ""; - } - else if ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getScientificName() ) ) { - return node.getNodeData().getTaxonomy().getScientificName(); - } - if ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getTaxonomyCode() ) ) { - return node.getNodeData().getTaxonomy().getTaxonomyCode(); - } - else { - return node.getNodeData().getTaxonomy().getCommonName(); - } - } - - /** - * Convenience method for display purposes. - * Not intended for algorithms. - */ - public static String getTaxonomyIdentifier( final PhylogenyNode node ) { - if ( !node.getNodeData().isHasTaxonomy() || ( node.getNodeData().getTaxonomy().getIdentifier() == null ) ) { - return ""; - } - return node.getNodeData().getTaxonomy().getIdentifier().getValue(); - } - - public final static boolean isAllDecendentsAreDuplications( final PhylogenyNode n ) { - if ( n.isExternal() ) { - return true; - } - else { - if ( n.isDuplication() ) { - for( final PhylogenyNode desc : n.getDescendants() ) { - if ( !isAllDecendentsAreDuplications( desc ) ) { - return false; - } - } - return true; - } - else { - return false; - } - } - } - - public static boolean isHasExternalDescendant( final PhylogenyNode node ) { - for( int i = 0; i < node.getNumberOfDescendants(); ++i ) { - if ( node.getChildNode( i ).isExternal() ) { - return true; - } - } - return false; - } - - /* - * This is case insensitive. - * - */ - public synchronized static boolean isTaxonomyHasIdentifierOfGivenProvider( final Taxonomy tax, - final String[] providers ) { - if ( ( tax.getIdentifier() != null ) && !ForesterUtil.isEmpty( tax.getIdentifier().getProvider() ) ) { - final String my_tax_prov = tax.getIdentifier().getProvider(); - for( final String provider : providers ) { - if ( provider.equalsIgnoreCase( my_tax_prov ) ) { - return true; - } - } - return false; - } - else { - return false; - } - } - - public static void midpointRoot( final Phylogeny phylogeny ) { - if ( ( phylogeny.getNumberOfExternalNodes() < 2 ) || ( calculateMaxDistanceToRoot( phylogeny ) <= 0 ) ) { - return; - } - int counter = 0; - final int total_nodes = phylogeny.getNodeCount(); - while ( true ) { - if ( ++counter > total_nodes ) { - throw new RuntimeException( "this should not have happened: midpoint rooting does not converge" ); - } - PhylogenyNode a = null; - double da = 0; - double db = 0; - for( int i = 0; i < phylogeny.getRoot().getNumberOfDescendants(); ++i ) { - final PhylogenyNode f = getFurthestDescendant( phylogeny.getRoot().getChildNode( i ) ); - final double df = getDistance( f, phylogeny.getRoot() ); - if ( df > 0 ) { - if ( df > da ) { - db = da; - da = df; - a = f; - } - else if ( df > db ) { - db = df; - } - } - } - final double diff = da - db; - if ( diff < 0.000001 ) { - break; - } - double x = da - ( diff / 2.0 ); - while ( ( x > a.getDistanceToParent() ) && !a.isRoot() ) { - x -= ( a.getDistanceToParent() > 0 ? a.getDistanceToParent() : 0 ); - a = a.getParent(); - } - phylogeny.reRoot( a, x ); - } - phylogeny.recalculateNumberOfExternalDescendants( true ); - } - - public static void normalizeBootstrapValues( final Phylogeny phylogeny, - final double max_bootstrap_value, - final double max_normalized_value ) { - for( final PhylogenyNodeIterator iter = phylogeny.iteratorPreorder(); iter.hasNext(); ) { - final PhylogenyNode node = iter.next(); - if ( node.isInternal() ) { - final double confidence = getConfidenceValue( node ); - if ( confidence != Confidence.CONFIDENCE_DEFAULT_VALUE ) { - if ( confidence >= max_bootstrap_value ) { - setBootstrapConfidence( node, max_normalized_value ); - } - else { - setBootstrapConfidence( node, ( confidence * max_normalized_value ) / max_bootstrap_value ); - } - } - } - } - } - - public static List obtainAllNodesAsList( final Phylogeny phy ) { - final List nodes = new ArrayList(); - if ( phy.isEmpty() ) { - return nodes; - } - for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { - nodes.add( iter.next() ); - } - return nodes; - } - - /** - * Returns a map of distinct taxonomies of - * all external nodes of node. - * If at least one of the external nodes has no taxonomy, - * null is returned. - * - */ - public static Map obtainDistinctTaxonomyCounts( final PhylogenyNode node ) { - final List descs = node.getAllExternalDescendants(); - final Map tax_map = new HashMap(); - for( final PhylogenyNode n : descs ) { - if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) { - return null; - } - final Taxonomy t = n.getNodeData().getTaxonomy(); - if ( tax_map.containsKey( t ) ) { - tax_map.put( t, tax_map.get( t ) + 1 ); - } - else { - tax_map.put( t, 1 ); - } - } - return tax_map; - } - - /** - * Arranges the order of childern for each node of this Phylogeny in such a - * way that either the branch with more children is on top (right) or on - * bottom (left), dependent on the value of boolean order. - * - * @param order - * decides in which direction to order - * @param pri - */ - public static void orderAppearance( final PhylogenyNode n, - final boolean order, - final boolean order_ext_alphabetically, - final DESCENDANT_SORT_PRIORITY pri ) { - if ( n.isExternal() ) { - return; - } - else { - PhylogenyNode temp = null; - if ( ( n.getNumberOfDescendants() == 2 ) - && ( n.getChildNode1().getNumberOfExternalNodes() != n.getChildNode2().getNumberOfExternalNodes() ) - && ( ( n.getChildNode1().getNumberOfExternalNodes() < n.getChildNode2().getNumberOfExternalNodes() ) == order ) ) { - temp = n.getChildNode1(); - n.setChild1( n.getChildNode2() ); - n.setChild2( temp ); - } - else if ( order_ext_alphabetically ) { - boolean all_ext = true; - for( final PhylogenyNode i : n.getDescendants() ) { - if ( !i.isExternal() ) { - all_ext = false; - break; - } - } - if ( all_ext ) { - PhylogenyMethods.sortNodeDescendents( n, pri ); - } - } - for( int i = 0; i < n.getNumberOfDescendants(); ++i ) { - orderAppearance( n.getChildNode( i ), order, order_ext_alphabetically, pri ); - } - } - } - - public static void postorderBranchColorAveragingExternalNodeBased( final Phylogeny p ) { - for( final PhylogenyNodeIterator iter = p.iteratorPostorder(); iter.hasNext(); ) { - final PhylogenyNode node = iter.next(); - double red = 0.0; - double green = 0.0; - double blue = 0.0; - int n = 0; - if ( node.isInternal() ) { - //for( final PhylogenyNodeIterator iterator = node.iterateChildNodesForward(); iterator.hasNext(); ) { - for( int i = 0; i < node.getNumberOfDescendants(); ++i ) { - final PhylogenyNode child_node = node.getChildNode( i ); - final Color child_color = getBranchColorValue( child_node ); - if ( child_color != null ) { - ++n; - red += child_color.getRed(); - green += child_color.getGreen(); - blue += child_color.getBlue(); - } - } - setBranchColorValue( node, - new Color( ForesterUtil.roundToInt( red / n ), - ForesterUtil.roundToInt( green / n ), - ForesterUtil.roundToInt( blue / n ) ) ); - } - } - } - - public static final void preOrderReId( final Phylogeny phy ) { - if ( phy.isEmpty() ) { - return; - } - phy.setIdToNodeMap( null ); - long i = PhylogenyNode.getNodeCount(); - for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) { - it.next().setId( i++ ); - } - PhylogenyNode.setNodeCount( i ); - } - - public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final File file ) throws IOException { - final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final Phylogeny[] trees = factory.create( file, parser ); - if ( ( trees == null ) || ( trees.length == 0 ) ) { - throw new PhylogenyParserException( "Unable to parse phylogeny from file: " + file ); - } - return trees; - } - - public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final List files ) - throws IOException { - final List tree_list = new ArrayList(); - for( final File file : files ) { - final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final Phylogeny[] trees = factory.create( file, parser ); - if ( ( trees == null ) || ( trees.length == 0 ) ) { - throw new PhylogenyParserException( "Unable to parse phylogeny from file: " + file ); - } - tree_list.addAll( Arrays.asList( trees ) ); - } - return tree_list.toArray( new Phylogeny[ tree_list.size() ] ); - } - - public static void removeNode( final PhylogenyNode remove_me, final Phylogeny phylogeny ) { - if ( remove_me.isRoot() ) { - if ( remove_me.getNumberOfDescendants() == 1 ) { - final PhylogenyNode desc = remove_me.getDescendants().get( 0 ); - desc.setDistanceToParent( addPhylogenyDistances( remove_me.getDistanceToParent(), - desc.getDistanceToParent() ) ); - desc.setParent( null ); - phylogeny.setRoot( desc ); - phylogeny.clearHashIdToNodeMap(); - } - else { - throw new IllegalArgumentException( "attempt to remove a root node with more than one descendants" ); - } - } - else if ( remove_me.isExternal() ) { - phylogeny.deleteSubtree( remove_me, false ); - phylogeny.clearHashIdToNodeMap(); - phylogeny.externalNodesHaveChanged(); - } - else { - final PhylogenyNode parent = remove_me.getParent(); - final List descs = remove_me.getDescendants(); - parent.removeChildNode( remove_me ); - for( final PhylogenyNode desc : descs ) { - parent.addAsChild( desc ); - desc.setDistanceToParent( addPhylogenyDistances( remove_me.getDistanceToParent(), - desc.getDistanceToParent() ) ); - } - remove_me.setParent( null ); - phylogeny.clearHashIdToNodeMap(); - phylogeny.externalNodesHaveChanged(); - } - } - - private static enum NDF { - NodeName( "NN" ), - TaxonomyCode( "TC" ), - TaxonomyCommonName( "CN" ), - TaxonomyScientificName( "TS" ), - TaxonomyIdentifier( "TI" ), - TaxonomySynonym( "SY" ), - SequenceName( "SN" ), - GeneName( "GN" ), - SequenceSymbol( "SS" ), - SequenceAccession( "SA" ), - Domain( "DO" ), - Annotation( "AN" ), - CrossRef( "XR" ), - BinaryCharacter( "BC" ), - MolecularSequence( "MS" ); - - private final String _text; - - NDF( final String text ) { - _text = text; - } - - public static NDF fromString( final String text ) { - for( final NDF n : NDF.values() ) { - if ( text.startsWith( n._text ) ) { - return n; - } - } - return null; - } - } - - public static List searchData( final String query, - final Phylogeny phy, - final boolean case_sensitive, - final boolean partial, - final boolean regex, - final boolean search_domains, - final double domains_confidence_threshold ) { - final List nodes = new ArrayList(); - if ( phy.isEmpty() || ( query == null ) ) { - return nodes; - } - if ( ForesterUtil.isEmpty( query ) ) { - return nodes; - } - String my_query = query; - NDF ndf = null; - if ( ( my_query.length() > 2 ) && ( my_query.indexOf( ":" ) == 2 ) ) { - ndf = NDF.fromString( my_query ); - if ( ndf != null ) { - my_query = my_query.substring( 3 ); - } - } - for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { - final PhylogenyNode node = iter.next(); - boolean match = false; - if ( ( ( ndf == null ) || ( ndf == NDF.NodeName ) ) - && match( node.getName(), my_query, case_sensitive, partial, regex ) ) { - match = true; - } - else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCode ) ) - && node.getNodeData().isHasTaxonomy() - && match( node.getNodeData().getTaxonomy().getTaxonomyCode(), - my_query, - case_sensitive, - partial, - regex ) ) { - match = true; - } - else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCommonName ) ) - && node.getNodeData().isHasTaxonomy() - && match( node.getNodeData().getTaxonomy().getCommonName(), - my_query, - case_sensitive, - partial, - regex ) ) { - match = true; - } - else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyScientificName ) ) - && node.getNodeData().isHasTaxonomy() - && match( node.getNodeData().getTaxonomy().getScientificName(), - my_query, - case_sensitive, - partial, - regex ) ) { - match = true; - } - else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyIdentifier ) ) - && node.getNodeData().isHasTaxonomy() - && ( node.getNodeData().getTaxonomy().getIdentifier() != null ) - && match( node.getNodeData().getTaxonomy().getIdentifier().getValue(), - my_query, - case_sensitive, - partial, - regex ) ) { - match = true; - } - else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomySynonym ) ) && node.getNodeData().isHasTaxonomy() - && !node.getNodeData().getTaxonomy().getSynonyms().isEmpty() ) { - final List syns = node.getNodeData().getTaxonomy().getSynonyms(); - I: for( final String syn : syns ) { - if ( match( syn, my_query, case_sensitive, partial, regex ) ) { - match = true; - break I; - } - } - } - if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceName ) ) && node.getNodeData().isHasSequence() - && match( node.getNodeData().getSequence().getName(), my_query, case_sensitive, partial, regex ) ) { - match = true; - } - if ( !match && ( ( ndf == null ) || ( ndf == NDF.GeneName ) ) && node.getNodeData().isHasSequence() - && match( node.getNodeData().getSequence().getGeneName(), my_query, case_sensitive, partial, regex ) ) { - match = true; - } - if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceSymbol ) ) && node.getNodeData().isHasSequence() - && match( node.getNodeData().getSequence().getSymbol(), my_query, case_sensitive, partial, regex ) ) { - match = true; - } - if ( !match - && ( ( ndf == null ) || ( ndf == NDF.SequenceAccession ) ) - && node.getNodeData().isHasSequence() - && ( node.getNodeData().getSequence().getAccession() != null ) - && match( node.getNodeData().getSequence().getAccession().getValue(), - my_query, - case_sensitive, - partial, - regex ) ) { - match = true; - } - if ( !match && ( ( ( ndf == null ) && search_domains ) || ( ndf == NDF.Domain ) ) - && node.getNodeData().isHasSequence() - && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) { - final DomainArchitecture da = node.getNodeData().getSequence().getDomainArchitecture(); - I: for( int i = 0; i < da.getNumberOfDomains(); ++i ) { - if ( ( da.getDomain( i ).getConfidence() <= domains_confidence_threshold ) - && ( match( da.getDomain( i ).getName(), my_query, case_sensitive, partial, regex ) ) ) { - match = true; - break I; - } - } - } - if ( !match && ( ( ndf == null ) || ( ndf == NDF.Annotation ) ) && node.getNodeData().isHasSequence() - && ( node.getNodeData().getSequence().getAnnotations() != null ) ) { - for( final Annotation ann : node.getNodeData().getSequence().getAnnotations() ) { - if ( match( ann.getDesc(), my_query, case_sensitive, partial, regex ) ) { - match = true; - break; - } - if ( match( ann.getRef(), my_query, case_sensitive, partial, regex ) ) { - match = true; - break; - } - } - } - if ( !match && ( ( ndf == null ) || ( ndf == NDF.CrossRef ) ) && node.getNodeData().isHasSequence() - && ( node.getNodeData().getSequence().getCrossReferences() != null ) ) { - for( final Accession x : node.getNodeData().getSequence().getCrossReferences() ) { - if ( match( x.getComment(), my_query, case_sensitive, partial, regex ) ) { - match = true; - break; - } - if ( match( x.getSource(), my_query, case_sensitive, partial, regex ) ) { - match = true; - break; - } - if ( match( x.getValue(), my_query, case_sensitive, partial, regex ) ) { - match = true; - break; - } - } - } - if ( !match && ( ( ndf == null ) || ( ndf == NDF.BinaryCharacter ) ) - && ( node.getNodeData().getBinaryCharacters() != null ) ) { - Iterator it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator(); - I: while ( it.hasNext() ) { - if ( match( it.next(), my_query, case_sensitive, partial, regex ) ) { - match = true; - break I; - } - } - it = node.getNodeData().getBinaryCharacters().getGainedCharacters().iterator(); - I: while ( it.hasNext() ) { - if ( match( it.next(), my_query, case_sensitive, partial, regex ) ) { - match = true; - break I; - } - } - } - if ( !match - && ( ndf == NDF.MolecularSequence ) - && node.getNodeData().isHasSequence() - && match( node.getNodeData().getSequence().getMolecularSequence(), - my_query, - case_sensitive, - true, - regex ) ) { - match = true; - } - if ( match ) { - nodes.add( node ); - } - } - return nodes; - } - - public static List searchDataLogicalAnd( final String[] queries, - final Phylogeny phy, - final boolean case_sensitive, - final boolean partial, - final boolean search_domains, - final double domains_confidence_threshold ) { - final List nodes = new ArrayList(); - if ( phy.isEmpty() || ( queries == null ) || ( queries.length < 1 ) ) { - return nodes; - } - for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { - final PhylogenyNode node = iter.next(); - boolean all_matched = true; - for( String query : queries ) { - if ( query == null ) { - continue; - } - query = query.trim(); - NDF ndf = null; - if ( ( query.length() > 2 ) && ( query.indexOf( ":" ) == 2 ) ) { - ndf = NDF.fromString( query ); - if ( ndf != null ) { - query = query.substring( 3 ); - } - } - boolean match = false; - if ( ForesterUtil.isEmpty( query ) ) { - continue; - } - if ( ( ( ndf == null ) || ( ndf == NDF.NodeName ) ) - && match( node.getName(), query, case_sensitive, partial, false ) ) { - match = true; - } - else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCode ) ) - && node.getNodeData().isHasTaxonomy() - && match( node.getNodeData().getTaxonomy().getTaxonomyCode(), - query, - case_sensitive, - partial, - false ) ) { - match = true; - } - else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCommonName ) ) - && node.getNodeData().isHasTaxonomy() - && match( node.getNodeData().getTaxonomy().getCommonName(), - query, - case_sensitive, - partial, - false ) ) { - match = true; - } - else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyScientificName ) ) - && node.getNodeData().isHasTaxonomy() - && match( node.getNodeData().getTaxonomy().getScientificName(), - query, - case_sensitive, - partial, - false ) ) { - match = true; - } - else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyIdentifier ) ) - && node.getNodeData().isHasTaxonomy() - && ( node.getNodeData().getTaxonomy().getIdentifier() != null ) - && match( node.getNodeData().getTaxonomy().getIdentifier().getValue(), - query, - case_sensitive, - partial, - false ) ) { - match = true; - } - else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomySynonym ) ) && node.getNodeData().isHasTaxonomy() - && !node.getNodeData().getTaxonomy().getSynonyms().isEmpty() ) { - final List syns = node.getNodeData().getTaxonomy().getSynonyms(); - I: for( final String syn : syns ) { - if ( match( syn, query, case_sensitive, partial, false ) ) { - match = true; - break I; - } - } - } - if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceName ) ) && node.getNodeData().isHasSequence() - && match( node.getNodeData().getSequence().getName(), query, case_sensitive, partial, false ) ) { - match = true; - } - if ( !match - && ( ( ndf == null ) || ( ndf == NDF.GeneName ) ) - && node.getNodeData().isHasSequence() - && match( node.getNodeData().getSequence().getGeneName(), query, case_sensitive, partial, false ) ) { - match = true; - } - if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceSymbol ) ) - && node.getNodeData().isHasSequence() - && match( node.getNodeData().getSequence().getSymbol(), query, case_sensitive, partial, false ) ) { - match = true; - } - if ( !match - && ( ( ndf == null ) || ( ndf == NDF.SequenceAccession ) ) - && node.getNodeData().isHasSequence() - && ( node.getNodeData().getSequence().getAccession() != null ) - && match( node.getNodeData().getSequence().getAccession().getValue(), - query, - case_sensitive, - partial, - false ) ) { - match = true; - } - if ( !match && ( ( ( ndf == null ) && search_domains ) || ( ndf == NDF.Domain ) ) - && node.getNodeData().isHasSequence() - && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) { - final DomainArchitecture da = node.getNodeData().getSequence().getDomainArchitecture(); - I: for( int i = 0; i < da.getNumberOfDomains(); ++i ) { - if ( ( da.getDomain( i ).getConfidence() <= domains_confidence_threshold ) - && match( da.getDomain( i ).getName(), query, case_sensitive, partial, false ) ) { - match = true; - break I; - } - } - } - if ( !match && ( ( ndf == null ) || ( ndf == NDF.Annotation ) ) && node.getNodeData().isHasSequence() - && ( node.getNodeData().getSequence().getAnnotations() != null ) ) { - for( final Annotation ann : node.getNodeData().getSequence().getAnnotations() ) { - if ( match( ann.getDesc(), query, case_sensitive, partial, false ) ) { - match = true; - break; - } - if ( match( ann.getRef(), query, case_sensitive, partial, false ) ) { - match = true; - break; - } - } - } - if ( !match && ( ( ndf == null ) || ( ndf == NDF.CrossRef ) ) && node.getNodeData().isHasSequence() - && ( node.getNodeData().getSequence().getCrossReferences() != null ) ) { - for( final Accession x : node.getNodeData().getSequence().getCrossReferences() ) { - if ( match( x.getComment(), query, case_sensitive, partial, false ) ) { - match = true; - break; - } - if ( match( x.getSource(), query, case_sensitive, partial, false ) ) { - match = true; - break; - } - if ( match( x.getValue(), query, case_sensitive, partial, false ) ) { - match = true; - break; - } - } - } - if ( !match && ( ( ndf == null ) || ( ndf == NDF.BinaryCharacter ) ) - && ( node.getNodeData().getBinaryCharacters() != null ) ) { - Iterator it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator(); - I: while ( it.hasNext() ) { - if ( match( it.next(), query, case_sensitive, partial, false ) ) { - match = true; - break I; - } - } - it = node.getNodeData().getBinaryCharacters().getGainedCharacters().iterator(); - I: while ( it.hasNext() ) { - if ( match( it.next(), query, case_sensitive, partial, false ) ) { - match = true; - break I; - } - } - } - if ( !match - && ( ndf == NDF.MolecularSequence ) - && node.getNodeData().isHasSequence() - && match( node.getNodeData().getSequence().getMolecularSequence(), - query, - case_sensitive, - true, - false ) ) { - match = true; - } - if ( !match ) { - all_matched = false; - break; - } - } - if ( all_matched ) { - nodes.add( node ); - } - } - return nodes; - } - - public static void setAllIndicatorsToZero( final Phylogeny phy ) { - for( final PhylogenyNodeIterator it = phy.iteratorPostorder(); it.hasNext(); ) { - it.next().setIndicator( ( byte ) 0 ); - } - } - - /** - * Convenience method. - * Sets value for the first confidence value (created if not present, values overwritten otherwise). - */ - public static void setBootstrapConfidence( final PhylogenyNode node, final double bootstrap_confidence_value ) { - setConfidence( node, bootstrap_confidence_value, "bootstrap" ); - } - - public static void setBranchColorValue( final PhylogenyNode node, final Color color ) { - if ( node.getBranchData().getBranchColor() == null ) { - node.getBranchData().setBranchColor( new BranchColor() ); - } - node.getBranchData().getBranchColor().setValue( color ); - } - - /** - * Convenience method - */ - public static void setBranchWidthValue( final PhylogenyNode node, final double branch_width_value ) { - node.getBranchData().setBranchWidth( new BranchWidth( branch_width_value ) ); - } - - /** - * Convenience method. - * Sets value for the first confidence value (created if not present, values overwritten otherwise). - */ - public static void setConfidence( final PhylogenyNode node, final double confidence_value ) { - setConfidence( node, confidence_value, "" ); - } - - /** - * Convenience method. - * Sets value for the first confidence value (created if not present, values overwritten otherwise). - */ - public static void setConfidence( final PhylogenyNode node, final double confidence_value, final String type ) { - Confidence c = null; - if ( node.getBranchData().getNumberOfConfidences() > 0 ) { - c = node.getBranchData().getConfidence( 0 ); - } - else { - c = new Confidence(); - node.getBranchData().addConfidence( c ); - } - c.setType( type ); - c.setValue( confidence_value ); - } - - public static void setScientificName( final PhylogenyNode node, final String scientific_name ) { - if ( !node.getNodeData().isHasTaxonomy() ) { - node.getNodeData().setTaxonomy( new Taxonomy() ); - } - node.getNodeData().getTaxonomy().setScientificName( scientific_name ); - } - - /** - * Convenience method to set the taxonomy code of a phylogeny node. - * - * - * @param node - * @param taxonomy_code - * @throws PhyloXmlDataFormatException - */ - public static void setTaxonomyCode( final PhylogenyNode node, final String taxonomy_code ) - throws PhyloXmlDataFormatException { - if ( !node.getNodeData().isHasTaxonomy() ) { - node.getNodeData().setTaxonomy( new Taxonomy() ); - } - node.getNodeData().getTaxonomy().setTaxonomyCode( taxonomy_code ); - } - - final static public void sortNodeDescendents( final PhylogenyNode node, final DESCENDANT_SORT_PRIORITY pri ) { - Comparator c; - switch ( pri ) { - case SEQUENCE: - c = new PhylogenyNodeSortSequencePriority(); - break; - case NODE_NAME: - c = new PhylogenyNodeSortNodeNamePriority(); - break; - default: - c = new PhylogenyNodeSortTaxonomyPriority(); - } - final List descs = node.getDescendants(); - Collections.sort( descs, c ); - int i = 0; - for( final PhylogenyNode desc : descs ) { - node.setChildNode( i++, desc ); - } - } - - /** - * Removes from Phylogeny to_be_stripped all external Nodes which are - * associated with a species NOT found in Phylogeny reference. - * - * @param reference - * a reference Phylogeny - * @param to_be_stripped - * Phylogeny to be stripped - * @return nodes removed from to_be_stripped - */ - public static List taxonomyBasedDeletionOfExternalNodes( final Phylogeny reference, - final Phylogeny to_be_stripped ) { - final Set ref_ext_taxo = new HashSet(); - for( final PhylogenyNodeIterator it = reference.iteratorExternalForward(); it.hasNext(); ) { - final PhylogenyNode n = it.next(); - if ( !n.getNodeData().isHasTaxonomy() ) { - throw new IllegalArgumentException( "no taxonomic data in node: " + n ); - } - if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) { - ref_ext_taxo.add( n.getNodeData().getTaxonomy().getScientificName() ); - } - if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) { - ref_ext_taxo.add( n.getNodeData().getTaxonomy().getTaxonomyCode() ); - } - if ( ( n.getNodeData().getTaxonomy().getIdentifier() != null ) - && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getIdentifier().getValue() ) ) { - ref_ext_taxo.add( n.getNodeData().getTaxonomy().getIdentifier().getValuePlusProvider() ); - } - } - final ArrayList nodes_to_delete = new ArrayList(); - for( final PhylogenyNodeIterator it = to_be_stripped.iteratorExternalForward(); it.hasNext(); ) { - final PhylogenyNode n = it.next(); - if ( !n.getNodeData().isHasTaxonomy() ) { - nodes_to_delete.add( n ); - } - else if ( !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getScientificName() ) ) - && !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) - && !( ( n.getNodeData().getTaxonomy().getIdentifier() != null ) && ref_ext_taxo.contains( n - .getNodeData().getTaxonomy().getIdentifier().getValuePlusProvider() ) ) ) { - nodes_to_delete.add( n ); - } - } - for( final PhylogenyNode n : nodes_to_delete ) { - to_be_stripped.deleteSubtree( n, true ); - } - to_be_stripped.clearHashIdToNodeMap(); - to_be_stripped.externalNodesHaveChanged(); - return nodes_to_delete; - } - - final static public void transferInternalNamesToBootstrapSupport( final Phylogeny phy ) { - final PhylogenyNodeIterator it = phy.iteratorPostorder(); - while ( it.hasNext() ) { - final PhylogenyNode n = it.next(); - if ( !n.isExternal() && !ForesterUtil.isEmpty( n.getName() ) ) { - double value = -1; - try { - value = Double.parseDouble( n.getName() ); - } - catch ( final NumberFormatException e ) { - throw new IllegalArgumentException( "failed to parse number from [" + n.getName() + "]: " - + e.getLocalizedMessage() ); - } - if ( value >= 0.0 ) { - n.getBranchData().addConfidence( new Confidence( value, "bootstrap" ) ); - n.setName( "" ); - } - } - } - } - - final static public boolean isInternalNamesLookLikeConfidences( final Phylogeny phy ) { - final PhylogenyNodeIterator it = phy.iteratorPostorder(); - while ( it.hasNext() ) { - final PhylogenyNode n = it.next(); - if ( !n.isExternal() && !n.isRoot() ) { - if ( !ForesterUtil.isEmpty( n.getName() ) ) { - double value = -1; - try { - value = Double.parseDouble( n.getName() ); - } - catch ( final NumberFormatException e ) { - return false; - } - if ( ( value < 0.0 ) || ( value > 100 ) ) { - return false; - } - } - } - } - return true; - } - - final static public void transferInternalNodeNamesToConfidence( final Phylogeny phy, final String confidence_type ) { - final PhylogenyNodeIterator it = phy.iteratorPostorder(); - while ( it.hasNext() ) { - transferInternalNodeNameToConfidence( confidence_type, it.next() ); - } - } - - private static void transferInternalNodeNameToConfidence( final String confidence_type, final PhylogenyNode n ) { - if ( !n.isExternal() && !n.getBranchData().isHasConfidences() ) { - if ( !ForesterUtil.isEmpty( n.getName() ) ) { - double d = -1.0; - try { - d = Double.parseDouble( n.getName() ); - } - catch ( final Exception e ) { - d = -1.0; - } - if ( d >= 0.0 ) { - n.getBranchData().addConfidence( new Confidence( d, confidence_type ) ); - n.setName( "" ); - } - } - } - } - - final static public void transferNodeNameToField( final Phylogeny phy, - final PhylogenyNodeField field, - final boolean external_only ) throws PhyloXmlDataFormatException { - final PhylogenyNodeIterator it = phy.iteratorPostorder(); - while ( it.hasNext() ) { - final PhylogenyNode n = it.next(); - if ( external_only && n.isInternal() ) { - continue; - } - final String name = n.getName().trim(); - if ( !ForesterUtil.isEmpty( name ) ) { - switch ( field ) { - case TAXONOMY_CODE: - n.setName( "" ); - setTaxonomyCode( n, name ); - break; - case TAXONOMY_SCIENTIFIC_NAME: - n.setName( "" ); - if ( !n.getNodeData().isHasTaxonomy() ) { - n.getNodeData().setTaxonomy( new Taxonomy() ); - } - n.getNodeData().getTaxonomy().setScientificName( name ); - break; - case TAXONOMY_COMMON_NAME: - n.setName( "" ); - if ( !n.getNodeData().isHasTaxonomy() ) { - n.getNodeData().setTaxonomy( new Taxonomy() ); - } - n.getNodeData().getTaxonomy().setCommonName( name ); - break; - case SEQUENCE_SYMBOL: - n.setName( "" ); - if ( !n.getNodeData().isHasSequence() ) { - n.getNodeData().setSequence( new Sequence() ); - } - n.getNodeData().getSequence().setSymbol( name ); - break; - case SEQUENCE_NAME: - n.setName( "" ); - if ( !n.getNodeData().isHasSequence() ) { - n.getNodeData().setSequence( new Sequence() ); - } - n.getNodeData().getSequence().setName( name ); - break; - case TAXONOMY_ID_UNIPROT_1: { - if ( !n.getNodeData().isHasTaxonomy() ) { - n.getNodeData().setTaxonomy( new Taxonomy() ); - } - String id = name; - final int i = name.indexOf( '_' ); - if ( i > 0 ) { - id = name.substring( 0, i ); - } - else { - n.setName( "" ); - } - n.getNodeData().getTaxonomy() - .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) ); - break; - } - case TAXONOMY_ID_UNIPROT_2: { - if ( !n.getNodeData().isHasTaxonomy() ) { - n.getNodeData().setTaxonomy( new Taxonomy() ); - } - String id = name; - final int i = name.indexOf( '_' ); - if ( i > 0 ) { - id = name.substring( i + 1, name.length() ); - } - else { - n.setName( "" ); - } - n.getNodeData().getTaxonomy() - .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) ); - break; - } - case TAXONOMY_ID: { - if ( !n.getNodeData().isHasTaxonomy() ) { - n.getNodeData().setTaxonomy( new Taxonomy() ); - } - n.getNodeData().getTaxonomy().setIdentifier( new Identifier( name ) ); - break; - } - } - } - } - } - - static double addPhylogenyDistances( final double a, final double b ) { - if ( ( a >= 0.0 ) && ( b >= 0.0 ) ) { - return a + b; - } - else if ( a >= 0.0 ) { - return a; - } - else if ( b >= 0.0 ) { - return b; - } - return PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT; - } - - static double calculateDistanceToAncestor( final PhylogenyNode anc, PhylogenyNode desc ) { - double d = 0; - boolean all_default = true; - while ( anc != desc ) { - if ( desc.getDistanceToParent() != PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ) { - d += desc.getDistanceToParent(); - if ( all_default ) { - all_default = false; - } - } - desc = desc.getParent(); - } - if ( all_default ) { - return PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT; - } - return d; - } - - /** - * Deep copies the phylogeny originating from this node. - */ - static PhylogenyNode copySubTree( final PhylogenyNode source ) { - if ( source == null ) { - return null; - } - else { - final PhylogenyNode newnode = source.copyNodeData(); - if ( !source.isExternal() ) { - for( int i = 0; i < source.getNumberOfDescendants(); ++i ) { - newnode.setChildNode( i, PhylogenyMethods.copySubTree( source.getChildNode( i ) ) ); - } - } - return newnode; - } - } - - /** - * Shallow copies the phylogeny originating from this node. - */ - static PhylogenyNode copySubTreeShallow( final PhylogenyNode source ) { - if ( source == null ) { - return null; - } - else { - final PhylogenyNode newnode = source.copyNodeDataShallow(); - if ( !source.isExternal() ) { - for( int i = 0; i < source.getNumberOfDescendants(); ++i ) { - newnode.setChildNode( i, PhylogenyMethods.copySubTreeShallow( source.getChildNode( i ) ) ); - } - } - return newnode; - } - } - - private final static List divideIntoSubTreesHelper( final PhylogenyNode node, - final double min_distance_to_root ) { - final List l = new ArrayList(); - final PhylogenyNode r = moveTowardsRoot( node, min_distance_to_root ); - for( final PhylogenyNode ext : r.getAllExternalDescendants() ) { - if ( ext.getIndicator() != 0 ) { - throw new RuntimeException( "this should not have happened" ); - } - ext.setIndicator( ( byte ) 1 ); - l.add( ext ); - } - return l; - } - - /** - * Calculates the distance between PhylogenyNodes n1 and n2. - * PRECONDITION: n1 is a descendant of n2. - * - * @param n1 - * a descendant of n2 - * @param n2 - * @return distance between n1 and n2 - */ - private static double getDistance( PhylogenyNode n1, final PhylogenyNode n2 ) { - double d = 0.0; - while ( n1 != n2 ) { - if ( n1.getDistanceToParent() > 0.0 ) { - d += n1.getDistanceToParent(); - } - n1 = n1.getParent(); - } - return d; - } - - private static boolean match( final String s, - final String query, - final boolean case_sensitive, - final boolean partial, - final boolean regex ) { - if ( ForesterUtil.isEmpty( s ) || ForesterUtil.isEmpty( query ) ) { - return false; - } - String my_s = s.trim(); - String my_query = query.trim(); - if ( !case_sensitive && !regex ) { - my_s = my_s.toLowerCase(); - my_query = my_query.toLowerCase(); - } - if ( regex ) { - Pattern p = null; - try { - if ( case_sensitive ) { - p = Pattern.compile( my_query ); - } - else { - p = Pattern.compile( my_query, Pattern.CASE_INSENSITIVE ); - } - } - catch ( final PatternSyntaxException e ) { - return false; - } - if ( p != null ) { - return p.matcher( my_s ).find(); - } - else { - return false; - } - } - else if ( partial ) { - return my_s.indexOf( my_query ) >= 0; - } - else { - Pattern p = null; - try { - p = Pattern.compile( "(\\b|_)" + Pattern.quote( my_query ) + "(\\b|_)" ); - } - catch ( final PatternSyntaxException e ) { - return false; - } - if ( p != null ) { - return p.matcher( my_s ).find(); - } - else { - return false; - } - } - } - - private final static PhylogenyNode moveTowardsRoot( final PhylogenyNode node, final double min_distance_to_root ) { - PhylogenyNode n = node; - PhylogenyNode prev = node; - while ( min_distance_to_root < n.calculateDistanceToRoot() ) { - prev = n; - n = n.getParent(); - } - return prev; - } - - public static enum DESCENDANT_SORT_PRIORITY { - NODE_NAME, SEQUENCE, TAXONOMY; - } - - public static enum PhylogenyNodeField { - CLADE_NAME, - SEQUENCE_NAME, - SEQUENCE_SYMBOL, - TAXONOMY_CODE, - TAXONOMY_COMMON_NAME, - TAXONOMY_ID, - TAXONOMY_ID_UNIPROT_1, - TAXONOMY_ID_UNIPROT_2, - TAXONOMY_SCIENTIFIC_NAME; - } - - public static void addMolecularSeqsToTree( final Phylogeny phy, final Msa msa ) { - for( int s = 0; s < msa.getNumberOfSequences(); ++s ) { - final org.forester.sequence.MolecularSequence seq = msa.getSequence( s ); - final PhylogenyNode node = phy.getNode( seq.getIdentifier() ); - final org.forester.phylogeny.data.Sequence new_seq = new Sequence(); - new_seq.setMolecularSequenceAligned( true ); - new_seq.setMolecularSequence( seq.getMolecularSequenceAsString() ); - new_seq.setName( seq.getIdentifier() ); - try { - new_seq.setType( PhyloXmlUtil.SEQ_TYPE_PROTEIN ); - } - catch ( final PhyloXmlDataFormatException ignore ) { - // do nothing - } - node.getNodeData().addSequence( new_seq ); - } - } - - final private static class PhylogenyNodeSortTaxonomyPriority implements Comparator { - - @Override - public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) { - if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) { - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) { - return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase() - .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() ); - } - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) { - return n1.getNodeData().getTaxonomy().getTaxonomyCode() - .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() ); - } - } - if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) { - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) { - return n1.getNodeData().getSequence().getName().toLowerCase() - .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() ); - } - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) { - return n1.getNodeData().getSequence().getGeneName() - .compareTo( n2.getNodeData().getSequence().getGeneName() ); - } - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) { - return n1.getNodeData().getSequence().getSymbol() - .compareTo( n2.getNodeData().getSequence().getSymbol() ); - } - } - if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) { - return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() ); - } - return 0; - } - } - - final private static class PhylogenyNodeSortSequencePriority implements Comparator { - - @Override - public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) { - if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) { - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) { - return n1.getNodeData().getSequence().getName().toLowerCase() - .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() ); - } - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) { - return n1.getNodeData().getSequence().getGeneName() - .compareTo( n2.getNodeData().getSequence().getGeneName() ); - } - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) { - return n1.getNodeData().getSequence().getSymbol() - .compareTo( n2.getNodeData().getSequence().getSymbol() ); - } - } - if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) { - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) { - return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase() - .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() ); - } - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) { - return n1.getNodeData().getTaxonomy().getTaxonomyCode() - .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() ); - } - } - if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) { - return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() ); - } - return 0; - } - } - - final private static class PhylogenyNodeSortNodeNamePriority implements Comparator { - - @Override - public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) { - if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) { - return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() ); - } - if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) { - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) { - return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase() - .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() ); - } - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) { - return n1.getNodeData().getTaxonomy().getTaxonomyCode() - .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() ); - } - } - if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) { - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) { - return n1.getNodeData().getSequence().getName().toLowerCase() - .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() ); - } - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) { - return n1.getNodeData().getSequence().getGeneName() - .compareTo( n2.getNodeData().getSequence().getGeneName() ); - } - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) { - return n1.getNodeData().getSequence().getSymbol() - .compareTo( n2.getNodeData().getSequence().getSymbol() ); - } - } - return 0; - } - } -} +// $Id: +// FORESTER -- software libraries and applications +// for evolutionary biology research and applications. +// +// Copyright (C) 2008-2009 Christian M. Zmasek +// Copyright (C) 2008-2009 Burnham Institute for Medical Research +// All rights reserved +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +// +// Contact: phylosoft @ gmail . com +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester + +package org.forester.phylogeny; + +import java.awt.Color; +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; + +import org.forester.io.parsers.FastaParser; +import org.forester.io.parsers.PhylogenyParser; +import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; +import org.forester.io.parsers.phyloxml.PhyloXmlUtil; +import org.forester.io.parsers.util.PhylogenyParserException; +import org.forester.msa.Msa; +import org.forester.phylogeny.data.Accession; +import org.forester.phylogeny.data.Annotation; +import org.forester.phylogeny.data.BranchColor; +import org.forester.phylogeny.data.BranchWidth; +import org.forester.phylogeny.data.Confidence; +import org.forester.phylogeny.data.DomainArchitecture; +import org.forester.phylogeny.data.Event; +import org.forester.phylogeny.data.Identifier; +import org.forester.phylogeny.data.PhylogenyDataUtil; +import org.forester.phylogeny.data.Sequence; +import org.forester.phylogeny.data.Taxonomy; +import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; +import org.forester.phylogeny.factories.PhylogenyFactory; +import org.forester.phylogeny.iterators.PhylogenyNodeIterator; +import org.forester.util.BasicDescriptiveStatistics; +import org.forester.util.DescriptiveStatistics; +import org.forester.util.ForesterUtil; + +public class PhylogenyMethods { + + private PhylogenyMethods() { + // Hidden constructor. + } + + @Override + public Object clone() throws CloneNotSupportedException { + throw new CloneNotSupportedException(); + } + + public static boolean extractFastaInformation( final Phylogeny phy ) { + boolean could_extract = false; + for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { + final PhylogenyNode node = iter.next(); + if ( !ForesterUtil.isEmpty( node.getName() ) ) { + final Matcher name_m = FastaParser.FASTA_DESC_LINE.matcher( node.getName() ); + if ( name_m.lookingAt() ) { + could_extract = true; + final String acc_source = name_m.group( 1 ); + final String acc = name_m.group( 2 ); + final String seq_name = name_m.group( 3 ); + final String tax_sn = name_m.group( 4 ); + if ( !ForesterUtil.isEmpty( acc_source ) && !ForesterUtil.isEmpty( acc ) ) { + ForesterUtil.ensurePresenceOfSequence( node ); + node.getNodeData().getSequence( 0 ).setAccession( new Accession( acc, acc_source ) ); + } + if ( !ForesterUtil.isEmpty( seq_name ) ) { + ForesterUtil.ensurePresenceOfSequence( node ); + node.getNodeData().getSequence( 0 ).setName( seq_name ); + } + if ( !ForesterUtil.isEmpty( tax_sn ) ) { + ForesterUtil.ensurePresenceOfTaxonomy( node ); + node.getNodeData().getTaxonomy( 0 ).setScientificName( tax_sn ); + } + } + } + } + return could_extract; + } + + public static DescriptiveStatistics calculateBranchLengthStatistics( final Phylogeny phy ) { + final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); + for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { + final PhylogenyNode n = iter.next(); + if ( !n.isRoot() && ( n.getDistanceToParent() >= 0.0 ) ) { + stats.addValue( n.getDistanceToParent() ); + } + } + return stats; + } + + public static List calculateConfidenceStatistics( final Phylogeny phy ) { + final List stats = new ArrayList(); + for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { + final PhylogenyNode n = iter.next(); + if ( !n.isExternal() && !n.isRoot() ) { + if ( n.getBranchData().isHasConfidences() ) { + for( int i = 0; i < n.getBranchData().getConfidences().size(); ++i ) { + final Confidence c = n.getBranchData().getConfidences().get( i ); + if ( ( i > ( stats.size() - 1 ) ) || ( stats.get( i ) == null ) ) { + stats.add( i, new BasicDescriptiveStatistics() ); + } + if ( !ForesterUtil.isEmpty( c.getType() ) ) { + if ( !ForesterUtil.isEmpty( stats.get( i ).getDescription() ) ) { + if ( !stats.get( i ).getDescription().equalsIgnoreCase( c.getType() ) ) { + throw new IllegalArgumentException( "support values in node [" + n.toString() + + "] appear inconsistently ordered" ); + } + } + stats.get( i ).setDescription( c.getType() ); + } + stats.get( i ).addValue( ( ( c != null ) && ( c.getValue() >= 0 ) ) ? c.getValue() : 0 ); + } + } + } + } + return stats; + } + + /** + * Calculates the distance between PhylogenyNodes node1 and node2. + * + * + * @param node1 + * @param node2 + * @return distance between node1 and node2 + */ + public static double calculateDistance( final PhylogenyNode node1, final PhylogenyNode node2 ) { + final PhylogenyNode lca = calculateLCA( node1, node2 ); + final PhylogenyNode n1 = node1; + final PhylogenyNode n2 = node2; + return ( PhylogenyMethods.getDistance( n1, lca ) + PhylogenyMethods.getDistance( n2, lca ) ); + } + + /** + * Returns the LCA of PhylogenyNodes node1 and node2. + * + * + * @param node1 + * @param node2 + * @return LCA of node1 and node2 + */ + public final static PhylogenyNode calculateLCA( PhylogenyNode node1, PhylogenyNode node2 ) { + if ( node1 == null ) { + throw new IllegalArgumentException( "first argument (node) is null" ); + } + if ( node2 == null ) { + throw new IllegalArgumentException( "second argument (node) is null" ); + } + if ( node1 == node2 ) { + return node1; + } + if ( ( node1.getParent() == node2.getParent() ) ) { + return node1.getParent(); + } + int depth1 = node1.calculateDepth(); + int depth2 = node2.calculateDepth(); + while ( ( depth1 > -1 ) && ( depth2 > -1 ) ) { + if ( depth1 > depth2 ) { + node1 = node1.getParent(); + depth1--; + } + else if ( depth2 > depth1 ) { + node2 = node2.getParent(); + depth2--; + } + else { + if ( node1 == node2 ) { + return node1; + } + node1 = node1.getParent(); + node2 = node2.getParent(); + depth1--; + depth2--; + } + } + throw new IllegalArgumentException( "illegal attempt to calculate LCA of two nodes which do not share a common root" ); + } + + /** + * Returns the LCA of PhylogenyNodes node1 and node2. + * Precondition: ids are in pre-order (or level-order). + * + * + * @param node1 + * @param node2 + * @return LCA of node1 and node2 + */ + public final static PhylogenyNode calculateLCAonTreeWithIdsInPreOrder( PhylogenyNode node1, PhylogenyNode node2 ) { + if ( node1 == null ) { + throw new IllegalArgumentException( "first argument (node) is null" ); + } + if ( node2 == null ) { + throw new IllegalArgumentException( "second argument (node) is null" ); + } + while ( node1 != node2 ) { + if ( node1.getId() > node2.getId() ) { + node1 = node1.getParent(); + } + else { + node2 = node2.getParent(); + } + } + return node1; + } + + public static short calculateMaxBranchesToLeaf( final PhylogenyNode node ) { + if ( node.isExternal() ) { + return 0; + } + short max = 0; + for( PhylogenyNode d : node.getAllExternalDescendants() ) { + short steps = 0; + while ( d != node ) { + if ( d.isCollapse() ) { + steps = 0; + } + else { + steps++; + } + d = d.getParent(); + } + if ( max < steps ) { + max = steps; + } + } + return max; + } + + public static int calculateMaxDepth( final Phylogeny phy ) { + int max = 0; + for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { + final PhylogenyNode node = iter.next(); + final int steps = node.calculateDepth(); + if ( steps > max ) { + max = steps; + } + } + return max; + } + + public static double calculateMaxDistanceToRoot( final Phylogeny phy ) { + double max = 0.0; + for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { + final PhylogenyNode node = iter.next(); + final double d = node.calculateDistanceToRoot(); + if ( d > max ) { + max = d; + } + } + return max; + } + + public static PhylogenyNode calculateNodeWithMaxDistanceToRoot( final Phylogeny phy ) { + double max = 0.0; + PhylogenyNode max_node = phy.getFirstExternalNode(); + for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { + final PhylogenyNode node = iter.next(); + final double d = node.calculateDistanceToRoot(); + if ( d > max ) { + max = d; + max_node = node; + } + } + return max_node; + } + + public static int calculateNumberOfExternalNodesWithoutTaxonomy( final PhylogenyNode node ) { + final List descs = node.getAllExternalDescendants(); + int x = 0; + for( final PhylogenyNode n : descs ) { + if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) { + x++; + } + } + return x; + } + + public static DescriptiveStatistics calculateNumberOfDescendantsPerNodeStatistics( final Phylogeny phy ) { + final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); + for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { + final PhylogenyNode n = iter.next(); + if ( !n.isExternal() ) { + stats.addValue( n.getNumberOfDescendants() ); + } + } + return stats; + } + + public final static void collapseSubtreeStructure( final PhylogenyNode n ) { + final List eds = n.getAllExternalDescendants(); + final List d = new ArrayList(); + for( final PhylogenyNode ed : eds ) { + d.add( calculateDistanceToAncestor( n, ed ) ); + } + for( int i = 0; i < eds.size(); ++i ) { + n.setChildNode( i, eds.get( i ) ); + eds.get( i ).setDistanceToParent( d.get( i ) ); + } + } + + public static int countNumberOfOneDescendantNodes( final Phylogeny phy ) { + int count = 0; + for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { + final PhylogenyNode n = iter.next(); + if ( !n.isExternal() && ( n.getNumberOfDescendants() == 1 ) ) { + count++; + } + } + return count; + } + + public static int countNumberOfPolytomies( final Phylogeny phy ) { + int count = 0; + for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { + final PhylogenyNode n = iter.next(); + if ( !n.isExternal() && ( n.getNumberOfDescendants() > 2 ) ) { + count++; + } + } + return count; + } + + public static final HashMap createNameToExtNodeMap( final Phylogeny phy ) { + final HashMap nodes = new HashMap(); + final List ext = phy.getExternalNodes(); + for( final PhylogenyNode n : ext ) { + nodes.put( n.getName(), n ); + } + return nodes; + } + + public static void deleteExternalNodesNegativeSelection( final Set to_delete, final Phylogeny phy ) { + for( final Long id : to_delete ) { + phy.deleteSubtree( phy.getNode( id ), true ); + } + phy.clearHashIdToNodeMap(); + phy.externalNodesHaveChanged(); + } + + public static void deleteExternalNodesNegativeSelection( final String[] node_names_to_delete, final Phylogeny p ) + throws IllegalArgumentException { + for( final String element : node_names_to_delete ) { + if ( ForesterUtil.isEmpty( element ) ) { + continue; + } + List nodes = null; + nodes = p.getNodes( element ); + final Iterator it = nodes.iterator(); + while ( it.hasNext() ) { + final PhylogenyNode n = it.next(); + if ( !n.isExternal() ) { + throw new IllegalArgumentException( "attempt to delete non-external node \"" + element + "\"" ); + } + p.deleteSubtree( n, true ); + } + } + p.clearHashIdToNodeMap(); + p.externalNodesHaveChanged(); + } + + public static List deleteExternalNodesPositiveSelection( final String[] node_names_to_keep, + final Phylogeny p ) { + final PhylogenyNodeIterator it = p.iteratorExternalForward(); + final String[] to_delete = new String[ p.getNumberOfExternalNodes() ]; + int i = 0; + Arrays.sort( node_names_to_keep ); + while ( it.hasNext() ) { + final String curent_name = it.next().getName(); + if ( Arrays.binarySearch( node_names_to_keep, curent_name ) < 0 ) { + to_delete[ i++ ] = curent_name; + } + } + PhylogenyMethods.deleteExternalNodesNegativeSelection( to_delete, p ); + final List deleted = new ArrayList(); + for( final String n : to_delete ) { + if ( !ForesterUtil.isEmpty( n ) ) { + deleted.add( n ); + } + } + return deleted; + } + + public static void deleteExternalNodesPositiveSelectionT( final List species_to_keep, final Phylogeny phy ) { + final Set to_delete = new HashSet(); + for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) { + final PhylogenyNode n = it.next(); + if ( n.getNodeData().isHasTaxonomy() ) { + if ( !species_to_keep.contains( n.getNodeData().getTaxonomy() ) ) { + to_delete.add( n.getId() ); + } + } + else { + throw new IllegalArgumentException( "node " + n.getId() + " has no taxonomic data" ); + } + } + deleteExternalNodesNegativeSelection( to_delete, phy ); + } + + final public static void deleteInternalNodesWithOnlyOneDescendent( final Phylogeny phy ) { + final ArrayList to_delete = new ArrayList(); + for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) { + final PhylogenyNode n = iter.next(); + if ( ( !n.isExternal() ) && ( n.getNumberOfDescendants() == 1 ) ) { + to_delete.add( n ); + } + } + for( final PhylogenyNode d : to_delete ) { + PhylogenyMethods.removeNode( d, phy ); + } + phy.clearHashIdToNodeMap(); + phy.externalNodesHaveChanged(); + } + + final public static void deleteNonOrthologousExternalNodes( final Phylogeny phy, final PhylogenyNode n ) { + if ( n.isInternal() ) { + throw new IllegalArgumentException( "node is not external" ); + } + final ArrayList to_delete = new ArrayList(); + for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) { + final PhylogenyNode i = it.next(); + if ( !PhylogenyMethods.getEventAtLCA( n, i ).isSpeciation() ) { + to_delete.add( i ); + } + } + for( final PhylogenyNode d : to_delete ) { + phy.deleteSubtree( d, true ); + } + phy.clearHashIdToNodeMap(); + phy.externalNodesHaveChanged(); + } + + public final static List> divideIntoSubTrees( final Phylogeny phy, + final double min_distance_to_root ) { + if ( min_distance_to_root <= 0 ) { + throw new IllegalArgumentException( "attempt to use min distance to root of: " + min_distance_to_root ); + } + final List> l = new ArrayList>(); + setAllIndicatorsToZero( phy ); + for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) { + final PhylogenyNode n = it.next(); + if ( n.getIndicator() != 0 ) { + continue; + } + l.add( divideIntoSubTreesHelper( n, min_distance_to_root ) ); + if ( l.isEmpty() ) { + throw new RuntimeException( "this should not have happened" ); + } + } + return l; + } + + public static List getAllDescendants( final PhylogenyNode node ) { + final List descs = new ArrayList(); + final Set encountered = new HashSet(); + if ( !node.isExternal() ) { + final List exts = node.getAllExternalDescendants(); + for( PhylogenyNode current : exts ) { + descs.add( current ); + while ( current != node ) { + current = current.getParent(); + if ( encountered.contains( current.getId() ) ) { + continue; + } + descs.add( current ); + encountered.add( current.getId() ); + } + } + } + return descs; + } + + /** + * + * Convenience method + * + * @param node + * @return + */ + public static Color getBranchColorValue( final PhylogenyNode node ) { + if ( node.getBranchData().getBranchColor() == null ) { + return null; + } + return node.getBranchData().getBranchColor().getValue(); + } + + /** + * Convenience method + */ + public static double getBranchWidthValue( final PhylogenyNode node ) { + if ( !node.getBranchData().isHasBranchWidth() ) { + return BranchWidth.BRANCH_WIDTH_DEFAULT_VALUE; + } + return node.getBranchData().getBranchWidth().getValue(); + } + + /** + * Convenience method + */ + public static double getConfidenceValue( final PhylogenyNode node ) { + if ( !node.getBranchData().isHasConfidences() ) { + return Confidence.CONFIDENCE_DEFAULT_VALUE; + } + return node.getBranchData().getConfidence( 0 ).getValue(); + } + + /** + * Convenience method + */ + public static double[] getConfidenceValuesAsArray( final PhylogenyNode node ) { + if ( !node.getBranchData().isHasConfidences() ) { + return new double[ 0 ]; + } + final double[] values = new double[ node.getBranchData().getConfidences().size() ]; + int i = 0; + for( final Confidence c : node.getBranchData().getConfidences() ) { + values[ i++ ] = c.getValue(); + } + return values; + } + + final public static Event getEventAtLCA( final PhylogenyNode n1, final PhylogenyNode n2 ) { + return calculateLCA( n1, n2 ).getNodeData().getEvent(); + } + + /** + * Returns taxonomy t if all external descendants have + * the same taxonomy t, null otherwise. + * + */ + public static Taxonomy getExternalDescendantsTaxonomy( final PhylogenyNode node ) { + final List descs = node.getAllExternalDescendants(); + Taxonomy tax = null; + for( final PhylogenyNode n : descs ) { + if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) { + return null; + } + else if ( tax == null ) { + tax = n.getNodeData().getTaxonomy(); + } + else if ( n.getNodeData().getTaxonomy().isEmpty() || !tax.isEqual( n.getNodeData().getTaxonomy() ) ) { + return null; + } + } + return tax; + } + + public static PhylogenyNode getFurthestDescendant( final PhylogenyNode node ) { + final List children = node.getAllExternalDescendants(); + PhylogenyNode farthest = null; + double longest = -Double.MAX_VALUE; + for( final PhylogenyNode child : children ) { + if ( PhylogenyMethods.getDistance( child, node ) > longest ) { + farthest = child; + longest = PhylogenyMethods.getDistance( child, node ); + } + } + return farthest; + } + + // public static PhylogenyMethods getInstance() { + // if ( PhylogenyMethods._instance == null ) { + // PhylogenyMethods._instance = new PhylogenyMethods(); + // } + // return PhylogenyMethods._instance; + // } + /** + * Returns the largest confidence value found on phy. + */ + static public double getMaximumConfidenceValue( final Phylogeny phy ) { + double max = -Double.MAX_VALUE; + for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { + final double s = PhylogenyMethods.getConfidenceValue( iter.next() ); + if ( ( s != Confidence.CONFIDENCE_DEFAULT_VALUE ) && ( s > max ) ) { + max = s; + } + } + return max; + } + + static public int getMinimumDescendentsPerInternalNodes( final Phylogeny phy ) { + int min = Integer.MAX_VALUE; + int d = 0; + PhylogenyNode n; + for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) { + n = it.next(); + if ( n.isInternal() ) { + d = n.getNumberOfDescendants(); + if ( d < min ) { + min = d; + } + } + } + return min; + } + + /** + * Convenience method for display purposes. + * Not intended for algorithms. + */ + public static String getSpecies( final PhylogenyNode node ) { + if ( !node.getNodeData().isHasTaxonomy() ) { + return ""; + } + else if ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getScientificName() ) ) { + return node.getNodeData().getTaxonomy().getScientificName(); + } + if ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getTaxonomyCode() ) ) { + return node.getNodeData().getTaxonomy().getTaxonomyCode(); + } + else { + return node.getNodeData().getTaxonomy().getCommonName(); + } + } + + /** + * Convenience method for display purposes. + * Not intended for algorithms. + */ + public static String getTaxonomyIdentifier( final PhylogenyNode node ) { + if ( !node.getNodeData().isHasTaxonomy() || ( node.getNodeData().getTaxonomy().getIdentifier() == null ) ) { + return ""; + } + return node.getNodeData().getTaxonomy().getIdentifier().getValue(); + } + + public final static boolean isAllDecendentsAreDuplications( final PhylogenyNode n ) { + if ( n.isExternal() ) { + return true; + } + else { + if ( n.isDuplication() ) { + for( final PhylogenyNode desc : n.getDescendants() ) { + if ( !isAllDecendentsAreDuplications( desc ) ) { + return false; + } + } + return true; + } + else { + return false; + } + } + } + + public static boolean isHasExternalDescendant( final PhylogenyNode node ) { + for( int i = 0; i < node.getNumberOfDescendants(); ++i ) { + if ( node.getChildNode( i ).isExternal() ) { + return true; + } + } + return false; + } + + /* + * This is case insensitive. + * + */ + public synchronized static boolean isTaxonomyHasIdentifierOfGivenProvider( final Taxonomy tax, + final String[] providers ) { + if ( ( tax.getIdentifier() != null ) && !ForesterUtil.isEmpty( tax.getIdentifier().getProvider() ) ) { + final String my_tax_prov = tax.getIdentifier().getProvider(); + for( final String provider : providers ) { + if ( provider.equalsIgnoreCase( my_tax_prov ) ) { + return true; + } + } + return false; + } + else { + return false; + } + } + + public static void midpointRoot( final Phylogeny phylogeny ) { + if ( ( phylogeny.getNumberOfExternalNodes() < 2 ) || ( calculateMaxDistanceToRoot( phylogeny ) <= 0 ) ) { + return; + } + int counter = 0; + final int total_nodes = phylogeny.getNodeCount(); + while ( true ) { + if ( ++counter > total_nodes ) { + throw new RuntimeException( "this should not have happened: midpoint rooting does not converge" ); + } + PhylogenyNode a = null; + double da = 0; + double db = 0; + for( int i = 0; i < phylogeny.getRoot().getNumberOfDescendants(); ++i ) { + final PhylogenyNode f = getFurthestDescendant( phylogeny.getRoot().getChildNode( i ) ); + final double df = getDistance( f, phylogeny.getRoot() ); + if ( df > 0 ) { + if ( df > da ) { + db = da; + da = df; + a = f; + } + else if ( df > db ) { + db = df; + } + } + } + final double diff = da - db; + if ( diff < 0.000001 ) { + break; + } + double x = da - ( diff / 2.0 ); + while ( ( x > a.getDistanceToParent() ) && !a.isRoot() ) { + x -= ( a.getDistanceToParent() > 0 ? a.getDistanceToParent() : 0 ); + a = a.getParent(); + } + phylogeny.reRoot( a, x ); + } + phylogeny.recalculateNumberOfExternalDescendants( true ); + } + + public static void normalizeBootstrapValues( final Phylogeny phylogeny, + final double max_bootstrap_value, + final double max_normalized_value ) { + for( final PhylogenyNodeIterator iter = phylogeny.iteratorPreorder(); iter.hasNext(); ) { + final PhylogenyNode node = iter.next(); + if ( node.isInternal() ) { + final double confidence = getConfidenceValue( node ); + if ( confidence != Confidence.CONFIDENCE_DEFAULT_VALUE ) { + if ( confidence >= max_bootstrap_value ) { + setBootstrapConfidence( node, max_normalized_value ); + } + else { + setBootstrapConfidence( node, ( confidence * max_normalized_value ) / max_bootstrap_value ); + } + } + } + } + } + + public static List obtainAllNodesAsList( final Phylogeny phy ) { + final List nodes = new ArrayList(); + if ( phy.isEmpty() ) { + return nodes; + } + for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { + nodes.add( iter.next() ); + } + return nodes; + } + + /** + * Returns a map of distinct taxonomies of + * all external nodes of node. + * If at least one of the external nodes has no taxonomy, + * null is returned. + * + */ + public static Map obtainDistinctTaxonomyCounts( final PhylogenyNode node ) { + final List descs = node.getAllExternalDescendants(); + final Map tax_map = new HashMap(); + for( final PhylogenyNode n : descs ) { + if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) { + return null; + } + final Taxonomy t = n.getNodeData().getTaxonomy(); + if ( tax_map.containsKey( t ) ) { + tax_map.put( t, tax_map.get( t ) + 1 ); + } + else { + tax_map.put( t, 1 ); + } + } + return tax_map; + } + + /** + * Arranges the order of childern for each node of this Phylogeny in such a + * way that either the branch with more children is on top (right) or on + * bottom (left), dependent on the value of boolean order. + * + * @param order + * decides in which direction to order + * @param pri + */ + public static void orderAppearance( final PhylogenyNode n, + final boolean order, + final boolean order_ext_alphabetically, + final DESCENDANT_SORT_PRIORITY pri ) { + if ( n.isExternal() ) { + return; + } + else { + PhylogenyNode temp = null; + if ( ( n.getNumberOfDescendants() == 2 ) + && ( n.getChildNode1().getNumberOfExternalNodes() != n.getChildNode2().getNumberOfExternalNodes() ) + && ( ( n.getChildNode1().getNumberOfExternalNodes() < n.getChildNode2().getNumberOfExternalNodes() ) == order ) ) { + temp = n.getChildNode1(); + n.setChild1( n.getChildNode2() ); + n.setChild2( temp ); + } + else if ( order_ext_alphabetically ) { + boolean all_ext = true; + for( final PhylogenyNode i : n.getDescendants() ) { + if ( !i.isExternal() ) { + all_ext = false; + break; + } + } + if ( all_ext ) { + PhylogenyMethods.sortNodeDescendents( n, pri ); + } + } + for( int i = 0; i < n.getNumberOfDescendants(); ++i ) { + orderAppearance( n.getChildNode( i ), order, order_ext_alphabetically, pri ); + } + } + } + + public static void postorderBranchColorAveragingExternalNodeBased( final Phylogeny p ) { + for( final PhylogenyNodeIterator iter = p.iteratorPostorder(); iter.hasNext(); ) { + final PhylogenyNode node = iter.next(); + double red = 0.0; + double green = 0.0; + double blue = 0.0; + int n = 0; + if ( node.isInternal() ) { + //for( final PhylogenyNodeIterator iterator = node.iterateChildNodesForward(); iterator.hasNext(); ) { + for( int i = 0; i < node.getNumberOfDescendants(); ++i ) { + final PhylogenyNode child_node = node.getChildNode( i ); + final Color child_color = getBranchColorValue( child_node ); + if ( child_color != null ) { + ++n; + red += child_color.getRed(); + green += child_color.getGreen(); + blue += child_color.getBlue(); + } + } + setBranchColorValue( node, + new Color( ForesterUtil.roundToInt( red / n ), + ForesterUtil.roundToInt( green / n ), + ForesterUtil.roundToInt( blue / n ) ) ); + } + } + } + + public static final void preOrderReId( final Phylogeny phy ) { + if ( phy.isEmpty() ) { + return; + } + phy.setIdToNodeMap( null ); + long i = PhylogenyNode.getNodeCount(); + for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) { + it.next().setId( i++ ); + } + PhylogenyNode.setNodeCount( i ); + } + + public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final File file ) throws IOException { + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final Phylogeny[] trees = factory.create( file, parser ); + if ( ( trees == null ) || ( trees.length == 0 ) ) { + throw new PhylogenyParserException( "Unable to parse phylogeny from file: " + file ); + } + return trees; + } + + public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final List files ) + throws IOException { + final List tree_list = new ArrayList(); + for( final File file : files ) { + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final Phylogeny[] trees = factory.create( file, parser ); + if ( ( trees == null ) || ( trees.length == 0 ) ) { + throw new PhylogenyParserException( "Unable to parse phylogeny from file: " + file ); + } + tree_list.addAll( Arrays.asList( trees ) ); + } + return tree_list.toArray( new Phylogeny[ tree_list.size() ] ); + } + + public static void removeNode( final PhylogenyNode remove_me, final Phylogeny phylogeny ) { + if ( remove_me.isRoot() ) { + if ( remove_me.getNumberOfDescendants() == 1 ) { + final PhylogenyNode desc = remove_me.getDescendants().get( 0 ); + desc.setDistanceToParent( addPhylogenyDistances( remove_me.getDistanceToParent(), + desc.getDistanceToParent() ) ); + desc.setParent( null ); + phylogeny.setRoot( desc ); + phylogeny.clearHashIdToNodeMap(); + } + else { + throw new IllegalArgumentException( "attempt to remove a root node with more than one descendants" ); + } + } + else if ( remove_me.isExternal() ) { + phylogeny.deleteSubtree( remove_me, false ); + phylogeny.clearHashIdToNodeMap(); + phylogeny.externalNodesHaveChanged(); + } + else { + final PhylogenyNode parent = remove_me.getParent(); + final List descs = remove_me.getDescendants(); + parent.removeChildNode( remove_me ); + for( final PhylogenyNode desc : descs ) { + parent.addAsChild( desc ); + desc.setDistanceToParent( addPhylogenyDistances( remove_me.getDistanceToParent(), + desc.getDistanceToParent() ) ); + } + remove_me.setParent( null ); + phylogeny.clearHashIdToNodeMap(); + phylogeny.externalNodesHaveChanged(); + } + } + + private static enum NDF { + NodeName( "NN" ), + TaxonomyCode( "TC" ), + TaxonomyCommonName( "CN" ), + TaxonomyScientificName( "TS" ), + TaxonomyIdentifier( "TI" ), + TaxonomySynonym( "SY" ), + SequenceName( "SN" ), + GeneName( "GN" ), + SequenceSymbol( "SS" ), + SequenceAccession( "SA" ), + Domain( "DO" ), + Annotation( "AN" ), + CrossRef( "XR" ), + BinaryCharacter( "BC" ), + MolecularSequence( "MS" ); + + private final String _text; + + NDF( final String text ) { + _text = text; + } + + public static NDF fromString( final String text ) { + for( final NDF n : NDF.values() ) { + if ( text.startsWith( n._text ) ) { + return n; + } + } + return null; + } + } + + public static List searchData( final String query, + final Phylogeny phy, + final boolean case_sensitive, + final boolean partial, + final boolean regex, + final boolean search_domains, + final double domains_confidence_threshold ) { + final List nodes = new ArrayList(); + if ( phy.isEmpty() || ( query == null ) ) { + return nodes; + } + if ( ForesterUtil.isEmpty( query ) ) { + return nodes; + } + String my_query = query; + NDF ndf = null; + if ( ( my_query.length() > 2 ) && ( my_query.indexOf( ":" ) == 2 ) ) { + ndf = NDF.fromString( my_query ); + if ( ndf != null ) { + my_query = my_query.substring( 3 ); + } + } + for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { + final PhylogenyNode node = iter.next(); + boolean match = false; + if ( ( ( ndf == null ) || ( ndf == NDF.NodeName ) ) + && match( node.getName(), my_query, case_sensitive, partial, regex ) ) { + match = true; + } + else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCode ) ) + && node.getNodeData().isHasTaxonomy() + && match( node.getNodeData().getTaxonomy().getTaxonomyCode(), + my_query, + case_sensitive, + partial, + regex ) ) { + match = true; + } + else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCommonName ) ) + && node.getNodeData().isHasTaxonomy() + && match( node.getNodeData().getTaxonomy().getCommonName(), + my_query, + case_sensitive, + partial, + regex ) ) { + match = true; + } + else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyScientificName ) ) + && node.getNodeData().isHasTaxonomy() + && match( node.getNodeData().getTaxonomy().getScientificName(), + my_query, + case_sensitive, + partial, + regex ) ) { + match = true; + } + else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyIdentifier ) ) + && node.getNodeData().isHasTaxonomy() + && ( node.getNodeData().getTaxonomy().getIdentifier() != null ) + && match( node.getNodeData().getTaxonomy().getIdentifier().getValue(), + my_query, + case_sensitive, + partial, + regex ) ) { + match = true; + } + else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomySynonym ) ) && node.getNodeData().isHasTaxonomy() + && !node.getNodeData().getTaxonomy().getSynonyms().isEmpty() ) { + final List syns = node.getNodeData().getTaxonomy().getSynonyms(); + I: for( final String syn : syns ) { + if ( match( syn, my_query, case_sensitive, partial, regex ) ) { + match = true; + break I; + } + } + } + if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceName ) ) && node.getNodeData().isHasSequence() + && match( node.getNodeData().getSequence().getName(), my_query, case_sensitive, partial, regex ) ) { + match = true; + } + if ( !match && ( ( ndf == null ) || ( ndf == NDF.GeneName ) ) && node.getNodeData().isHasSequence() + && match( node.getNodeData().getSequence().getGeneName(), my_query, case_sensitive, partial, regex ) ) { + match = true; + } + if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceSymbol ) ) && node.getNodeData().isHasSequence() + && match( node.getNodeData().getSequence().getSymbol(), my_query, case_sensitive, partial, regex ) ) { + match = true; + } + if ( !match + && ( ( ndf == null ) || ( ndf == NDF.SequenceAccession ) ) + && node.getNodeData().isHasSequence() + && ( node.getNodeData().getSequence().getAccession() != null ) + && match( node.getNodeData().getSequence().getAccession().getValue(), + my_query, + case_sensitive, + partial, + regex ) ) { + match = true; + } + if ( !match && ( ( ( ndf == null ) && search_domains ) || ( ndf == NDF.Domain ) ) + && node.getNodeData().isHasSequence() + && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) { + final DomainArchitecture da = node.getNodeData().getSequence().getDomainArchitecture(); + I: for( int i = 0; i < da.getNumberOfDomains(); ++i ) { + if ( ( da.getDomain( i ).getConfidence() <= domains_confidence_threshold ) + && ( match( da.getDomain( i ).getName(), my_query, case_sensitive, partial, regex ) ) ) { + match = true; + break I; + } + } + } + if ( !match && ( ( ndf == null ) || ( ndf == NDF.Annotation ) ) && node.getNodeData().isHasSequence() + && ( node.getNodeData().getSequence().getAnnotations() != null ) ) { + for( final Annotation ann : node.getNodeData().getSequence().getAnnotations() ) { + if ( match( ann.getDesc(), my_query, case_sensitive, partial, regex ) ) { + match = true; + break; + } + if ( match( ann.getRef(), my_query, case_sensitive, partial, regex ) ) { + match = true; + break; + } + } + } + if ( !match && ( ( ndf == null ) || ( ndf == NDF.CrossRef ) ) && node.getNodeData().isHasSequence() + && ( node.getNodeData().getSequence().getCrossReferences() != null ) ) { + for( final Accession x : node.getNodeData().getSequence().getCrossReferences() ) { + if ( match( x.getComment(), my_query, case_sensitive, partial, regex ) ) { + match = true; + break; + } + if ( match( x.getSource(), my_query, case_sensitive, partial, regex ) ) { + match = true; + break; + } + if ( match( x.getValue(), my_query, case_sensitive, partial, regex ) ) { + match = true; + break; + } + } + } + if ( !match && ( ( ndf == null ) || ( ndf == NDF.BinaryCharacter ) ) + && ( node.getNodeData().getBinaryCharacters() != null ) ) { + Iterator it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator(); + I: while ( it.hasNext() ) { + if ( match( it.next(), my_query, case_sensitive, partial, regex ) ) { + match = true; + break I; + } + } + it = node.getNodeData().getBinaryCharacters().getGainedCharacters().iterator(); + I: while ( it.hasNext() ) { + if ( match( it.next(), my_query, case_sensitive, partial, regex ) ) { + match = true; + break I; + } + } + } + if ( !match + && ( ndf == NDF.MolecularSequence ) + && node.getNodeData().isHasSequence() + && match( node.getNodeData().getSequence().getMolecularSequence(), + my_query, + case_sensitive, + true, + regex ) ) { + match = true; + } + if ( match ) { + nodes.add( node.getId() ); + } + } + return nodes; + } + + public static List searchDataLogicalAnd( final String[] queries, + final Phylogeny phy, + final boolean case_sensitive, + final boolean partial, + final boolean search_domains, + final double domains_confidence_threshold ) { + final List nodes = new ArrayList(); + if ( phy.isEmpty() || ( queries == null ) || ( queries.length < 1 ) ) { + return nodes; + } + for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { + final PhylogenyNode node = iter.next(); + boolean all_matched = true; + for( String query : queries ) { + if ( query == null ) { + continue; + } + query = query.trim(); + NDF ndf = null; + if ( ( query.length() > 2 ) && ( query.indexOf( ":" ) == 2 ) ) { + ndf = NDF.fromString( query ); + if ( ndf != null ) { + query = query.substring( 3 ); + } + } + boolean match = false; + if ( ForesterUtil.isEmpty( query ) ) { + continue; + } + if ( ( ( ndf == null ) || ( ndf == NDF.NodeName ) ) + && match( node.getName(), query, case_sensitive, partial, false ) ) { + match = true; + } + else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCode ) ) + && node.getNodeData().isHasTaxonomy() + && match( node.getNodeData().getTaxonomy().getTaxonomyCode(), + query, + case_sensitive, + partial, + false ) ) { + match = true; + } + else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCommonName ) ) + && node.getNodeData().isHasTaxonomy() + && match( node.getNodeData().getTaxonomy().getCommonName(), + query, + case_sensitive, + partial, + false ) ) { + match = true; + } + else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyScientificName ) ) + && node.getNodeData().isHasTaxonomy() + && match( node.getNodeData().getTaxonomy().getScientificName(), + query, + case_sensitive, + partial, + false ) ) { + match = true; + } + else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyIdentifier ) ) + && node.getNodeData().isHasTaxonomy() + && ( node.getNodeData().getTaxonomy().getIdentifier() != null ) + && match( node.getNodeData().getTaxonomy().getIdentifier().getValue(), + query, + case_sensitive, + partial, + false ) ) { + match = true; + } + else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomySynonym ) ) && node.getNodeData().isHasTaxonomy() + && !node.getNodeData().getTaxonomy().getSynonyms().isEmpty() ) { + final List syns = node.getNodeData().getTaxonomy().getSynonyms(); + I: for( final String syn : syns ) { + if ( match( syn, query, case_sensitive, partial, false ) ) { + match = true; + break I; + } + } + } + if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceName ) ) && node.getNodeData().isHasSequence() + && match( node.getNodeData().getSequence().getName(), query, case_sensitive, partial, false ) ) { + match = true; + } + if ( !match + && ( ( ndf == null ) || ( ndf == NDF.GeneName ) ) + && node.getNodeData().isHasSequence() + && match( node.getNodeData().getSequence().getGeneName(), query, case_sensitive, partial, false ) ) { + match = true; + } + if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceSymbol ) ) + && node.getNodeData().isHasSequence() + && match( node.getNodeData().getSequence().getSymbol(), query, case_sensitive, partial, false ) ) { + match = true; + } + if ( !match + && ( ( ndf == null ) || ( ndf == NDF.SequenceAccession ) ) + && node.getNodeData().isHasSequence() + && ( node.getNodeData().getSequence().getAccession() != null ) + && match( node.getNodeData().getSequence().getAccession().getValue(), + query, + case_sensitive, + partial, + false ) ) { + match = true; + } + if ( !match && ( ( ( ndf == null ) && search_domains ) || ( ndf == NDF.Domain ) ) + && node.getNodeData().isHasSequence() + && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) { + final DomainArchitecture da = node.getNodeData().getSequence().getDomainArchitecture(); + I: for( int i = 0; i < da.getNumberOfDomains(); ++i ) { + if ( ( da.getDomain( i ).getConfidence() <= domains_confidence_threshold ) + && match( da.getDomain( i ).getName(), query, case_sensitive, partial, false ) ) { + match = true; + break I; + } + } + } + if ( !match && ( ( ndf == null ) || ( ndf == NDF.Annotation ) ) && node.getNodeData().isHasSequence() + && ( node.getNodeData().getSequence().getAnnotations() != null ) ) { + for( final Annotation ann : node.getNodeData().getSequence().getAnnotations() ) { + if ( match( ann.getDesc(), query, case_sensitive, partial, false ) ) { + match = true; + break; + } + if ( match( ann.getRef(), query, case_sensitive, partial, false ) ) { + match = true; + break; + } + } + } + if ( !match && ( ( ndf == null ) || ( ndf == NDF.CrossRef ) ) && node.getNodeData().isHasSequence() + && ( node.getNodeData().getSequence().getCrossReferences() != null ) ) { + for( final Accession x : node.getNodeData().getSequence().getCrossReferences() ) { + if ( match( x.getComment(), query, case_sensitive, partial, false ) ) { + match = true; + break; + } + if ( match( x.getSource(), query, case_sensitive, partial, false ) ) { + match = true; + break; + } + if ( match( x.getValue(), query, case_sensitive, partial, false ) ) { + match = true; + break; + } + } + } + if ( !match && ( ( ndf == null ) || ( ndf == NDF.BinaryCharacter ) ) + && ( node.getNodeData().getBinaryCharacters() != null ) ) { + Iterator it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator(); + I: while ( it.hasNext() ) { + if ( match( it.next(), query, case_sensitive, partial, false ) ) { + match = true; + break I; + } + } + it = node.getNodeData().getBinaryCharacters().getGainedCharacters().iterator(); + I: while ( it.hasNext() ) { + if ( match( it.next(), query, case_sensitive, partial, false ) ) { + match = true; + break I; + } + } + } + if ( !match + && ( ndf == NDF.MolecularSequence ) + && node.getNodeData().isHasSequence() + && match( node.getNodeData().getSequence().getMolecularSequence(), + query, + case_sensitive, + true, + false ) ) { + match = true; + } + if ( !match ) { + all_matched = false; + break; + } + } + if ( all_matched ) { + nodes.add( node.getId() ); + } + } + return nodes; + } + + public static void setAllIndicatorsToZero( final Phylogeny phy ) { + for( final PhylogenyNodeIterator it = phy.iteratorPostorder(); it.hasNext(); ) { + it.next().setIndicator( ( byte ) 0 ); + } + } + + /** + * Convenience method. + * Sets value for the first confidence value (created if not present, values overwritten otherwise). + */ + public static void setBootstrapConfidence( final PhylogenyNode node, final double bootstrap_confidence_value ) { + setConfidence( node, bootstrap_confidence_value, "bootstrap" ); + } + + public static void setBranchColorValue( final PhylogenyNode node, final Color color ) { + if ( node.getBranchData().getBranchColor() == null ) { + node.getBranchData().setBranchColor( new BranchColor() ); + } + node.getBranchData().getBranchColor().setValue( color ); + } + + /** + * Convenience method + */ + public static void setBranchWidthValue( final PhylogenyNode node, final double branch_width_value ) { + node.getBranchData().setBranchWidth( new BranchWidth( branch_width_value ) ); + } + + /** + * Convenience method. + * Sets value for the first confidence value (created if not present, values overwritten otherwise). + */ + public static void setConfidence( final PhylogenyNode node, final double confidence_value ) { + setConfidence( node, confidence_value, "" ); + } + + /** + * Convenience method. + * Sets value for the first confidence value (created if not present, values overwritten otherwise). + */ + public static void setConfidence( final PhylogenyNode node, final double confidence_value, final String type ) { + Confidence c = null; + if ( node.getBranchData().getNumberOfConfidences() > 0 ) { + c = node.getBranchData().getConfidence( 0 ); + } + else { + c = new Confidence(); + node.getBranchData().addConfidence( c ); + } + c.setType( type ); + c.setValue( confidence_value ); + } + + public static void setScientificName( final PhylogenyNode node, final String scientific_name ) { + if ( !node.getNodeData().isHasTaxonomy() ) { + node.getNodeData().setTaxonomy( new Taxonomy() ); + } + node.getNodeData().getTaxonomy().setScientificName( scientific_name ); + } + + /** + * Convenience method to set the taxonomy code of a phylogeny node. + * + * + * @param node + * @param taxonomy_code + * @throws PhyloXmlDataFormatException + */ + public static void setTaxonomyCode( final PhylogenyNode node, final String taxonomy_code ) + throws PhyloXmlDataFormatException { + if ( !node.getNodeData().isHasTaxonomy() ) { + node.getNodeData().setTaxonomy( new Taxonomy() ); + } + node.getNodeData().getTaxonomy().setTaxonomyCode( taxonomy_code ); + } + + final static public void sortNodeDescendents( final PhylogenyNode node, final DESCENDANT_SORT_PRIORITY pri ) { + Comparator c; + switch ( pri ) { + case SEQUENCE: + c = new PhylogenyNodeSortSequencePriority(); + break; + case NODE_NAME: + c = new PhylogenyNodeSortNodeNamePriority(); + break; + default: + c = new PhylogenyNodeSortTaxonomyPriority(); + } + final List descs = node.getDescendants(); + Collections.sort( descs, c ); + int i = 0; + for( final PhylogenyNode desc : descs ) { + node.setChildNode( i++, desc ); + } + } + + /** + * Removes from Phylogeny to_be_stripped all external Nodes which are + * associated with a species NOT found in Phylogeny reference. + * + * @param reference + * a reference Phylogeny + * @param to_be_stripped + * Phylogeny to be stripped + * @return nodes removed from to_be_stripped + */ + public static List taxonomyBasedDeletionOfExternalNodes( final Phylogeny reference, + final Phylogeny to_be_stripped ) { + final Set ref_ext_taxo = new HashSet(); + for( final PhylogenyNodeIterator it = reference.iteratorExternalForward(); it.hasNext(); ) { + final PhylogenyNode n = it.next(); + if ( !n.getNodeData().isHasTaxonomy() ) { + throw new IllegalArgumentException( "no taxonomic data in node: " + n ); + } + if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) { + ref_ext_taxo.add( n.getNodeData().getTaxonomy().getScientificName() ); + } + if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) { + ref_ext_taxo.add( n.getNodeData().getTaxonomy().getTaxonomyCode() ); + } + if ( ( n.getNodeData().getTaxonomy().getIdentifier() != null ) + && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getIdentifier().getValue() ) ) { + ref_ext_taxo.add( n.getNodeData().getTaxonomy().getIdentifier().getValuePlusProvider() ); + } + } + final ArrayList nodes_to_delete = new ArrayList(); + for( final PhylogenyNodeIterator it = to_be_stripped.iteratorExternalForward(); it.hasNext(); ) { + final PhylogenyNode n = it.next(); + if ( !n.getNodeData().isHasTaxonomy() ) { + nodes_to_delete.add( n ); + } + else if ( !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getScientificName() ) ) + && !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) + && !( ( n.getNodeData().getTaxonomy().getIdentifier() != null ) && ref_ext_taxo.contains( n + .getNodeData().getTaxonomy().getIdentifier().getValuePlusProvider() ) ) ) { + nodes_to_delete.add( n ); + } + } + for( final PhylogenyNode n : nodes_to_delete ) { + to_be_stripped.deleteSubtree( n, true ); + } + to_be_stripped.clearHashIdToNodeMap(); + to_be_stripped.externalNodesHaveChanged(); + return nodes_to_delete; + } + + final static public void transferInternalNamesToBootstrapSupport( final Phylogeny phy ) { + final PhylogenyNodeIterator it = phy.iteratorPostorder(); + while ( it.hasNext() ) { + final PhylogenyNode n = it.next(); + if ( !n.isExternal() && !ForesterUtil.isEmpty( n.getName() ) ) { + double value = -1; + try { + value = Double.parseDouble( n.getName() ); + } + catch ( final NumberFormatException e ) { + throw new IllegalArgumentException( "failed to parse number from [" + n.getName() + "]: " + + e.getLocalizedMessage() ); + } + if ( value >= 0.0 ) { + n.getBranchData().addConfidence( new Confidence( value, "bootstrap" ) ); + n.setName( "" ); + } + } + } + } + + final static public boolean isInternalNamesLookLikeConfidences( final Phylogeny phy ) { + final PhylogenyNodeIterator it = phy.iteratorPostorder(); + while ( it.hasNext() ) { + final PhylogenyNode n = it.next(); + if ( !n.isExternal() && !n.isRoot() ) { + if ( !ForesterUtil.isEmpty( n.getName() ) ) { + double value = -1; + try { + value = Double.parseDouble( n.getName() ); + } + catch ( final NumberFormatException e ) { + return false; + } + if ( ( value < 0.0 ) || ( value > 100 ) ) { + return false; + } + } + } + } + return true; + } + + final static public void transferInternalNodeNamesToConfidence( final Phylogeny phy, final String confidence_type ) { + final PhylogenyNodeIterator it = phy.iteratorPostorder(); + while ( it.hasNext() ) { + transferInternalNodeNameToConfidence( confidence_type, it.next() ); + } + } + + private static void transferInternalNodeNameToConfidence( final String confidence_type, final PhylogenyNode n ) { + if ( !n.isExternal() && !n.getBranchData().isHasConfidences() ) { + if ( !ForesterUtil.isEmpty( n.getName() ) ) { + double d = -1.0; + try { + d = Double.parseDouble( n.getName() ); + } + catch ( final Exception e ) { + d = -1.0; + } + if ( d >= 0.0 ) { + n.getBranchData().addConfidence( new Confidence( d, confidence_type ) ); + n.setName( "" ); + } + } + } + } + + final static public void transferNodeNameToField( final Phylogeny phy, + final PhylogenyNodeField field, + final boolean external_only ) throws PhyloXmlDataFormatException { + final PhylogenyNodeIterator it = phy.iteratorPostorder(); + while ( it.hasNext() ) { + final PhylogenyNode n = it.next(); + if ( external_only && n.isInternal() ) { + continue; + } + final String name = n.getName().trim(); + if ( !ForesterUtil.isEmpty( name ) ) { + switch ( field ) { + case TAXONOMY_CODE: + n.setName( "" ); + setTaxonomyCode( n, name ); + break; + case TAXONOMY_SCIENTIFIC_NAME: + n.setName( "" ); + if ( !n.getNodeData().isHasTaxonomy() ) { + n.getNodeData().setTaxonomy( new Taxonomy() ); + } + n.getNodeData().getTaxonomy().setScientificName( name ); + break; + case TAXONOMY_COMMON_NAME: + n.setName( "" ); + if ( !n.getNodeData().isHasTaxonomy() ) { + n.getNodeData().setTaxonomy( new Taxonomy() ); + } + n.getNodeData().getTaxonomy().setCommonName( name ); + break; + case SEQUENCE_SYMBOL: + n.setName( "" ); + if ( !n.getNodeData().isHasSequence() ) { + n.getNodeData().setSequence( new Sequence() ); + } + n.getNodeData().getSequence().setSymbol( name ); + break; + case SEQUENCE_NAME: + n.setName( "" ); + if ( !n.getNodeData().isHasSequence() ) { + n.getNodeData().setSequence( new Sequence() ); + } + n.getNodeData().getSequence().setName( name ); + break; + case TAXONOMY_ID_UNIPROT_1: { + if ( !n.getNodeData().isHasTaxonomy() ) { + n.getNodeData().setTaxonomy( new Taxonomy() ); + } + String id = name; + final int i = name.indexOf( '_' ); + if ( i > 0 ) { + id = name.substring( 0, i ); + } + else { + n.setName( "" ); + } + n.getNodeData().getTaxonomy() + .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) ); + break; + } + case TAXONOMY_ID_UNIPROT_2: { + if ( !n.getNodeData().isHasTaxonomy() ) { + n.getNodeData().setTaxonomy( new Taxonomy() ); + } + String id = name; + final int i = name.indexOf( '_' ); + if ( i > 0 ) { + id = name.substring( i + 1, name.length() ); + } + else { + n.setName( "" ); + } + n.getNodeData().getTaxonomy() + .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) ); + break; + } + case TAXONOMY_ID: { + if ( !n.getNodeData().isHasTaxonomy() ) { + n.getNodeData().setTaxonomy( new Taxonomy() ); + } + n.getNodeData().getTaxonomy().setIdentifier( new Identifier( name ) ); + break; + } + default: { + throw new IllegalArgumentException( "don't know what to do with " + field ); + } + } + } + } + } + + static double addPhylogenyDistances( final double a, final double b ) { + if ( ( a >= 0.0 ) && ( b >= 0.0 ) ) { + return a + b; + } + else if ( a >= 0.0 ) { + return a; + } + else if ( b >= 0.0 ) { + return b; + } + return PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT; + } + + static double calculateDistanceToAncestor( final PhylogenyNode anc, PhylogenyNode desc ) { + double d = 0; + boolean all_default = true; + while ( anc != desc ) { + if ( desc.getDistanceToParent() != PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ) { + d += desc.getDistanceToParent(); + if ( all_default ) { + all_default = false; + } + } + desc = desc.getParent(); + } + if ( all_default ) { + return PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT; + } + return d; + } + + /** + * Deep copies the phylogeny originating from this node. + */ + static PhylogenyNode copySubTree( final PhylogenyNode source ) { + if ( source == null ) { + return null; + } + else { + final PhylogenyNode newnode = source.copyNodeData(); + if ( !source.isExternal() ) { + for( int i = 0; i < source.getNumberOfDescendants(); ++i ) { + newnode.setChildNode( i, PhylogenyMethods.copySubTree( source.getChildNode( i ) ) ); + } + } + return newnode; + } + } + + /** + * Shallow copies the phylogeny originating from this node. + */ + static PhylogenyNode copySubTreeShallow( final PhylogenyNode source ) { + if ( source == null ) { + return null; + } + else { + final PhylogenyNode newnode = source.copyNodeDataShallow(); + if ( !source.isExternal() ) { + for( int i = 0; i < source.getNumberOfDescendants(); ++i ) { + newnode.setChildNode( i, PhylogenyMethods.copySubTreeShallow( source.getChildNode( i ) ) ); + } + } + return newnode; + } + } + + private final static List divideIntoSubTreesHelper( final PhylogenyNode node, + final double min_distance_to_root ) { + final List l = new ArrayList(); + final PhylogenyNode r = moveTowardsRoot( node, min_distance_to_root ); + for( final PhylogenyNode ext : r.getAllExternalDescendants() ) { + if ( ext.getIndicator() != 0 ) { + throw new RuntimeException( "this should not have happened" ); + } + ext.setIndicator( ( byte ) 1 ); + l.add( ext ); + } + return l; + } + + /** + * Calculates the distance between PhylogenyNodes n1 and n2. + * PRECONDITION: n1 is a descendant of n2. + * + * @param n1 + * a descendant of n2 + * @param n2 + * @return distance between n1 and n2 + */ + private static double getDistance( PhylogenyNode n1, final PhylogenyNode n2 ) { + double d = 0.0; + while ( n1 != n2 ) { + if ( n1.getDistanceToParent() > 0.0 ) { + d += n1.getDistanceToParent(); + } + n1 = n1.getParent(); + } + return d; + } + + private static boolean match( final String s, + final String query, + final boolean case_sensitive, + final boolean partial, + final boolean regex ) { + if ( ForesterUtil.isEmpty( s ) || ForesterUtil.isEmpty( query ) ) { + return false; + } + String my_s = s.trim(); + String my_query = query.trim(); + if ( !case_sensitive && !regex ) { + my_s = my_s.toLowerCase(); + my_query = my_query.toLowerCase(); + } + if ( regex ) { + Pattern p = null; + try { + if ( case_sensitive ) { + p = Pattern.compile( my_query ); + } + else { + p = Pattern.compile( my_query, Pattern.CASE_INSENSITIVE ); + } + } + catch ( final PatternSyntaxException e ) { + return false; + } + if ( p != null ) { + return p.matcher( my_s ).find(); + } + else { + return false; + } + } + else if ( partial ) { + return my_s.indexOf( my_query ) >= 0; + } + else { + Pattern p = null; + try { + p = Pattern.compile( "(\\b|_)" + Pattern.quote( my_query ) + "(\\b|_)" ); + } + catch ( final PatternSyntaxException e ) { + return false; + } + if ( p != null ) { + return p.matcher( my_s ).find(); + } + else { + return false; + } + } + } + + private final static PhylogenyNode moveTowardsRoot( final PhylogenyNode node, final double min_distance_to_root ) { + PhylogenyNode n = node; + PhylogenyNode prev = node; + while ( min_distance_to_root < n.calculateDistanceToRoot() ) { + prev = n; + n = n.getParent(); + } + return prev; + } + + public static enum DESCENDANT_SORT_PRIORITY { + NODE_NAME, SEQUENCE, TAXONOMY; + } + + public static enum PhylogenyNodeField { + CLADE_NAME, + SEQUENCE_NAME, + SEQUENCE_SYMBOL, + TAXONOMY_CODE, + TAXONOMY_COMMON_NAME, + TAXONOMY_ID, + TAXONOMY_ID_UNIPROT_1, + TAXONOMY_ID_UNIPROT_2, + TAXONOMY_SCIENTIFIC_NAME; + } + + public static void addMolecularSeqsToTree( final Phylogeny phy, final Msa msa ) { + for( int s = 0; s < msa.getNumberOfSequences(); ++s ) { + final org.forester.sequence.MolecularSequence seq = msa.getSequence( s ); + final PhylogenyNode node = phy.getNode( seq.getIdentifier() ); + final org.forester.phylogeny.data.Sequence new_seq = new Sequence(); + new_seq.setMolecularSequenceAligned( true ); + new_seq.setMolecularSequence( seq.getMolecularSequenceAsString() ); + new_seq.setName( seq.getIdentifier() ); + try { + new_seq.setType( PhyloXmlUtil.SEQ_TYPE_PROTEIN ); + } + catch ( final PhyloXmlDataFormatException ignore ) { + // do nothing + } + node.getNodeData().addSequence( new_seq ); + } + } + + final private static class PhylogenyNodeSortTaxonomyPriority implements Comparator { + + @Override + public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) { + if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) { + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) { + return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase() + .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() ); + } + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) { + return n1.getNodeData().getTaxonomy().getTaxonomyCode() + .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() ); + } + } + if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) { + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) { + return n1.getNodeData().getSequence().getName().toLowerCase() + .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() ); + } + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) { + return n1.getNodeData().getSequence().getGeneName() + .compareTo( n2.getNodeData().getSequence().getGeneName() ); + } + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) { + return n1.getNodeData().getSequence().getSymbol() + .compareTo( n2.getNodeData().getSequence().getSymbol() ); + } + } + if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) { + return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() ); + } + return 0; + } + } + + final private static class PhylogenyNodeSortSequencePriority implements Comparator { + + @Override + public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) { + if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) { + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) { + return n1.getNodeData().getSequence().getName().toLowerCase() + .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() ); + } + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) { + return n1.getNodeData().getSequence().getGeneName() + .compareTo( n2.getNodeData().getSequence().getGeneName() ); + } + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) { + return n1.getNodeData().getSequence().getSymbol() + .compareTo( n2.getNodeData().getSequence().getSymbol() ); + } + } + if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) { + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) { + return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase() + .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() ); + } + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) { + return n1.getNodeData().getTaxonomy().getTaxonomyCode() + .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() ); + } + } + if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) { + return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() ); + } + return 0; + } + } + + final private static class PhylogenyNodeSortNodeNamePriority implements Comparator { + + @Override + public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) { + if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) { + return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() ); + } + if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) { + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) { + return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase() + .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() ); + } + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) { + return n1.getNodeData().getTaxonomy().getTaxonomyCode() + .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() ); + } + } + if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) { + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) { + return n1.getNodeData().getSequence().getName().toLowerCase() + .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() ); + } + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) { + return n1.getNodeData().getSequence().getGeneName() + .compareTo( n2.getNodeData().getSequence().getGeneName() ); + } + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) { + return n1.getNodeData().getSequence().getSymbol() + .compareTo( n2.getNodeData().getSequence().getSymbol() ); + } + } + return 0; + } + } +} diff --git a/forester/java/src/org/forester/phylogeny/PhylogenyNode.java b/forester/java/src/org/forester/phylogeny/PhylogenyNode.java index d37cc3b..3928bc3 100644 --- a/forester/java/src/org/forester/phylogeny/PhylogenyNode.java +++ b/forester/java/src/org/forester/phylogeny/PhylogenyNode.java @@ -515,6 +515,10 @@ public final class PhylogenyNode implements Comparable { } return _node_data; } + + public final boolean isHasNodeData() { + return ( !( _node_data == null || _node_data.isEmpty() ) ); + } final public int getNumberOfDescendants() { if ( _descendants == null ) { diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index 1c76c8d..13d2f33 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -139,8 +139,8 @@ public final class Test { private final static String PATH_TO_TEST_DATA = System.getProperty( "user.dir" ) + ForesterUtil.getFileSeparator() + "test_data" + ForesterUtil.getFileSeparator(); - private final static boolean PERFORM_DB_TESTS = true; - private static final boolean PERFORM_WEB_TREE_ACCESS = true; + private final static boolean PERFORM_DB_TESTS = false; + private static final boolean PERFORM_WEB_TREE_ACCESS = false; private static final String PHYLOXML_LOCAL_XSD = PATH_TO_RESOURCES + "phyloxml_schema/" + ForesterConstants.PHYLO_XML_VERSION + "/" + ForesterConstants.PHYLO_XML_XSD; @@ -8916,6 +8916,34 @@ public final class Test { System.out.println( p61.toNewHampshire() ); return false; } + final String s62 = "(1[&type=\"X\",size=123,subtree=(1,2);]:0.003,2[&type=\"(X,Y:3)\"]:0.004)[&type=\"(X,Y)\"]:0.0;"; + final Phylogeny p62 = factory.create( s62, new NHXParser() )[ 0 ]; + if ( !p62.toNewHampshire() + .equals( "(1:0.003,2:0.004):0.0;" ) ) { + System.out.println( p62.toNewHampshire() ); + return false; + } + final String s63 = "(1:0.003[&type=\"X\",size=123,subtree=(1,2);],2:0.004[&type=\"(X,Y:3)\"]):0.0[&type=\"(X,Y)\"];"; + final Phylogeny p63 = factory.create( s63, new NHXParser() )[ 0 ]; + if ( !p63.toNewHampshire() + .equals( "(1:0.003,2:0.004):0.0;" ) ) { + System.out.println( p63.toNewHampshire() ); + return false; + } + final String s64 = "((1,2):[95.5],3);"; + final Phylogeny p64 = factory.create( s64, new NHXParser() )[ 0 ]; + if ( !p64.toNewHampshireX() + .equals( "((1,2)[&&NHX:B=95.5],3)" ) ) { + System.out.println( p64.toNewHampshireX() ); + return false; + } + final String s65 = "((1:0.1,2:0.2):0.3[10.2],3);"; + final Phylogeny p65 = factory.create( s65, new NHXParser() )[ 0 ]; + if ( !p65.toNewHampshireX() + .equals( "((1:0.1,2:0.2):0.3[&&NHX:B=10.2],3)" ) ) { + System.out.println( p65.toNewHampshireX() ); + return false; + } } catch ( final Exception e ) { e.printStackTrace( System.out ); -- 1.7.10.2