X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fphylogeny%2FPhylogenyMethods.java;h=09db386ca18e72d7ec9111453503d29413611e08;hb=bfd17a215d89bce322b5d77163f0d1d6cedb390c;hp=052b23f7da9619c89e32596c59f9d9defc8f3882;hpb=841c7135a24076a444b2f6652cde00ef0cf60239;p=jalview.git diff --git a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java index 052b23f..09db386 100644 --- a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java +++ b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java @@ -32,6 +32,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; +import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; @@ -79,7 +80,7 @@ public class PhylogenyMethods { * @return distance between node1 and node2 */ public double calculateDistance( final PhylogenyNode node1, final PhylogenyNode node2 ) { - final PhylogenyNode lca =calculateLCA( node1, node2 ); + final PhylogenyNode lca = calculateLCA( node1, node2 ); final PhylogenyNode n1 = node1; final PhylogenyNode n2 = node2; return ( PhylogenyMethods.getDistance( n1, lca ) + PhylogenyMethods.getDistance( n2, lca ) ); @@ -114,10 +115,6 @@ public class PhylogenyMethods { return farthest_d; } - final public static Event getEventAtLCA( final PhylogenyNode n1, final PhylogenyNode n2 ) { - return calculateLCA( n1, n2 ).getNodeData().getEvent(); - } - @Override public Object clone() throws CloneNotSupportedException { throw new CloneNotSupportedException(); @@ -131,22 +128,43 @@ public class PhylogenyMethods { return _farthest_2; } - final public static void deleteNonOrthologousExternalNodes( final Phylogeny phy, final PhylogenyNode n ) { - if ( n.isInternal() ) { - throw new IllegalArgumentException( "node is not external" ); - } - final ArrayList to_delete = new ArrayList(); - for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) { - final PhylogenyNode i = it.next(); - if ( !PhylogenyMethods.getEventAtLCA( n, i ).isSpeciation() ) { - to_delete.add( i ); + public static DescriptiveStatistics calculatBranchLengthStatistics( final Phylogeny phy ) { + final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); + for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { + final PhylogenyNode n = iter.next(); + if ( !n.isRoot() && ( n.getDistanceToParent() >= 0.0 ) ) { + stats.addValue( n.getDistanceToParent() ); } } - for( final PhylogenyNode d : to_delete ) { - phy.deleteSubtree( d, true ); + return stats; + } + + public static List calculatConfidenceStatistics( final Phylogeny phy ) { + final List stats = new ArrayList(); + for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { + final PhylogenyNode n = iter.next(); + if ( !n.isExternal() && !n.isRoot() ) { + if ( n.getBranchData().isHasConfidences() ) { + for( int i = 0; i < n.getBranchData().getConfidences().size(); ++i ) { + final Confidence c = n.getBranchData().getConfidences().get( i ); + if ( ( i > ( stats.size() - 1 ) ) || ( stats.get( i ) == null ) ) { + stats.add( i, new BasicDescriptiveStatistics() ); + } + if ( !ForesterUtil.isEmpty( c.getType() ) ) { + if ( !ForesterUtil.isEmpty( stats.get( i ).getDescription() ) ) { + if ( !stats.get( i ).getDescription().equalsIgnoreCase( c.getType() ) ) { + throw new IllegalArgumentException( "support values in node [" + n.toString() + + "] appear inconsistently ordered" ); + } + } + stats.get( i ).setDescription( c.getType() ); + } + stats.get( i ).addValue( ( ( c != null ) && ( c.getValue() >= 0 ) ) ? c.getValue() : 0 ); + } + } + } } - phy.clearHashIdToNodeMap(); - phy.externalNodesHaveChanged(); + return stats; } /** @@ -158,14 +176,20 @@ public class PhylogenyMethods { * @return LCA of node1 and node2 */ public final static PhylogenyNode calculateLCA( PhylogenyNode node1, PhylogenyNode node2 ) { + if ( node1 == null ) { + throw new IllegalArgumentException( "first argument (node) is null" ); + } + if ( node2 == null ) { + throw new IllegalArgumentException( "second argument (node) is null" ); + } if ( node1 == node2 ) { return node1; } - if (( node1.getParent() == node2.getParent() ) /*&& node1.getParent() != null */ ) { + if ( ( node1.getParent() == node2.getParent() ) ) { return node1.getParent(); } - int depth1 = node1.calculateDepth() ; - int depth2 = node2.calculateDepth() ; + int depth1 = node1.calculateDepth(); + int depth2 = node2.calculateDepth(); while ( ( depth1 > -1 ) && ( depth2 > -1 ) ) { if ( depth1 > depth2 ) { node1 = node1.getParent(); @@ -189,604 +213,145 @@ public class PhylogenyMethods { } /** - * Returns all orthologs of the external PhylogenyNode n of this Phylogeny. - * Orthologs are returned as List of node references. - *

- * PRECONDITION: This tree must be binary and rooted, and speciation - - * duplication need to be assigned for each of its internal Nodes. - *

- * Returns null if this Phylogeny is empty or if n is internal. - * @param n - * external PhylogenyNode whose orthologs are to be returned - * @return Vector of references to all orthologous Nodes of PhylogenyNode n - * of this Phylogeny, null if this Phylogeny is empty or if n is - * internal + * Returns the LCA of PhylogenyNodes node1 and node2. + * Precondition: ids are in pre-order (or level-order). + * + * + * @param node1 + * @param node2 + * @return LCA of node1 and node2 */ - public List getOrthologousNodes( final Phylogeny phy, final PhylogenyNode node ) { - final List nodes = new ArrayList(); - final PhylogenyNodeIterator it = phy.iteratorExternalForward(); - while ( it.hasNext() ) { - final PhylogenyNode temp_node = it.next(); - if ( ( temp_node != node ) && isAreOrthologous( node, temp_node ) ) { - nodes.add( temp_node ); + public final static PhylogenyNode calculateLCAonTreeWithIdsInPreOrder( PhylogenyNode node1, PhylogenyNode node2 ) { + if ( node1 == null ) { + throw new IllegalArgumentException( "first argument (node) is null" ); + } + if ( node2 == null ) { + throw new IllegalArgumentException( "second argument (node) is null" ); + } + while ( node1 != node2 ) { + if ( node1.getId() > node2.getId() ) { + node1 = node1.getParent(); + } + else { + node2 = node2.getParent(); } } - return nodes; + return node1; } - public static boolean isAreOrthologous( final PhylogenyNode node1, final PhylogenyNode node2 ) { - return !calculateLCA( node1, node2 ).isDuplication(); + public static short calculateMaxBranchesToLeaf( final PhylogenyNode node ) { + if ( node.isExternal() ) { + return 0; + } + short max = 0; + for( PhylogenyNode d : node.getAllExternalDescendants() ) { + short steps = 0; + while ( d != node ) { + if ( d.isCollapse() ) { + steps = 0; + } + else { + steps++; + } + d = d.getParent(); + } + if ( max < steps ) { + max = steps; + } + } + return max; } - public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final File file ) throws IOException { - final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final Phylogeny[] trees = factory.create( file, parser ); - if ( ( trees == null ) || ( trees.length == 0 ) ) { - throw new PhylogenyParserException( "Unable to parse phylogeny from file: " + file ); + public static int calculateMaxDepth( final Phylogeny phy ) { + int max = 0; + for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { + final PhylogenyNode node = iter.next(); + final int steps = node.calculateDepth(); + if ( steps > max ) { + max = steps; + } } - return trees; + return max; } - public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final List files ) - throws IOException { - final List tree_list = new ArrayList(); - for( final File file : files ) { - final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final Phylogeny[] trees = factory.create( file, parser ); - if ( ( trees == null ) || ( trees.length == 0 ) ) { - throw new PhylogenyParserException( "Unable to parse phylogeny from file: " + file ); + public static double calculateMaxDistanceToRoot( final Phylogeny phy ) { + double max = 0.0; + for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { + final PhylogenyNode node = iter.next(); + final double d = node.calculateDistanceToRoot(); + if ( d > max ) { + max = d; } - tree_list.addAll( Arrays.asList( trees ) ); } - return tree_list.toArray( new Phylogeny[ tree_list.size() ] ); + return max; } - final static public void transferInternalNodeNamesToConfidence( final Phylogeny phy ) { - final PhylogenyNodeIterator it = phy.iteratorPostorder(); - while ( it.hasNext() ) { - final PhylogenyNode n = it.next(); - if ( !n.isExternal() && !n.getBranchData().isHasConfidences() ) { - if ( !ForesterUtil.isEmpty( n.getName() ) ) { - double d = -1.0; - try { - d = Double.parseDouble( n.getName() ); - } - catch ( final Exception e ) { - d = -1.0; - } - if ( d >= 0.0 ) { - n.getBranchData().addConfidence( new Confidence( d, "" ) ); - n.setName( "" ); - } - } + public static int calculateNumberOfExternalNodesWithoutTaxonomy( final PhylogenyNode node ) { + final List descs = node.getAllExternalDescendants(); + int x = 0; + for( final PhylogenyNode n : descs ) { + if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) { + x++; } } + return x; } - final static public void transferInternalNamesToBootstrapSupport( final Phylogeny phy ) { - final PhylogenyNodeIterator it = phy.iteratorPostorder(); - while ( it.hasNext() ) { - final PhylogenyNode n = it.next(); - if ( !n.isExternal() && !ForesterUtil.isEmpty( n.getName() ) ) { - double value = -1; - try { - value = Double.parseDouble( n.getName() ); - } - catch ( final NumberFormatException e ) { - throw new IllegalArgumentException( "failed to parse number from [" + n.getName() + "]: " - + e.getLocalizedMessage() ); - } - if ( value >= 0.0 ) { - n.getBranchData().addConfidence( new Confidence( value, "bootstrap" ) ); - n.setName( "" ); - } + public static DescriptiveStatistics calculatNumberOfDescendantsPerNodeStatistics( final Phylogeny phy ) { + final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); + for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { + final PhylogenyNode n = iter.next(); + if ( !n.isExternal() ) { + stats.addValue( n.getNumberOfDescendants() ); } } + return stats; } - final static public void sortNodeDescendents( final PhylogenyNode node, final DESCENDANT_SORT_PRIORITY pri ) { - class PhylogenyNodeSortTaxonomyPriority implements Comparator { + public static int countNumberOfPolytomies( final Phylogeny phy ) { + int count = 0; + for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { + final PhylogenyNode n = iter.next(); + if ( !n.isExternal() && ( n.getNumberOfDescendants() > 2 ) ) { + count++; + } + } + return count; + } - @Override - public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) { - if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) { - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) { - return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase() - .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() ); - } - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) { - return n1.getNodeData().getTaxonomy().getTaxonomyCode() - .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() ); - } - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getCommonName() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getCommonName() ) ) ) { - return n1.getNodeData().getTaxonomy().getCommonName().toLowerCase() - .compareTo( n2.getNodeData().getTaxonomy().getCommonName().toLowerCase() ); - } - } - if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) { - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) { - return n1.getNodeData().getSequence().getName().toLowerCase() - .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() ); - } - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) { - return n1.getNodeData().getSequence().getSymbol() - .compareTo( n2.getNodeData().getSequence().getSymbol() ); - } - if ( ( n1.getNodeData().getSequence().getAccession() != null ) - && ( n2.getNodeData().getSequence().getAccession() != null ) - && !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getAccession().getValue() ) - && !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getAccession().getValue() ) ) { - return n1.getNodeData().getSequence().getAccession().getValue() - .compareTo( n2.getNodeData().getSequence().getAccession().getValue() ); - } - } - if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) { - return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() ); + public static final HashMap createNameToExtNodeMap( final Phylogeny phy ) { + final HashMap nodes = new HashMap(); + for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { + final PhylogenyNode n = iter.next(); + nodes.put( n.getName(), n ); + } + return nodes; + } + + public static void deleteExternalNodesNegativeSelection( final Set to_delete, final Phylogeny phy ) { + phy.clearHashIdToNodeMap(); + for( final Integer id : to_delete ) { + phy.deleteSubtree( phy.getNode( id ), true ); + } + phy.clearHashIdToNodeMap(); + phy.externalNodesHaveChanged(); + } + + public static void deleteExternalNodesNegativeSelection( final String[] node_names_to_delete, final Phylogeny p ) + throws IllegalArgumentException { + for( final String element : node_names_to_delete ) { + if ( ForesterUtil.isEmpty( element ) ) { + continue; + } + List nodes = null; + nodes = p.getNodes( element ); + final Iterator it = nodes.iterator(); + while ( it.hasNext() ) { + final PhylogenyNode n = it.next(); + if ( !n.isExternal() ) { + throw new IllegalArgumentException( "attempt to delete non-external node \"" + element + "\"" ); } - return 0; - } - } - class PhylogenyNodeSortSequencePriority implements Comparator { - - @Override - public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) { - if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) { - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) { - return n1.getNodeData().getSequence().getName().toLowerCase() - .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() ); - } - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) { - return n1.getNodeData().getSequence().getSymbol() - .compareTo( n2.getNodeData().getSequence().getSymbol() ); - } - if ( ( n1.getNodeData().getSequence().getAccession() != null ) - && ( n2.getNodeData().getSequence().getAccession() != null ) - && !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getAccession().getValue() ) - && !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getAccession().getValue() ) ) { - return n1.getNodeData().getSequence().getAccession().getValue() - .compareTo( n2.getNodeData().getSequence().getAccession().getValue() ); - } - } - if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) { - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) { - return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase() - .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() ); - } - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) { - return n1.getNodeData().getTaxonomy().getTaxonomyCode() - .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() ); - } - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getCommonName() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getCommonName() ) ) ) { - return n1.getNodeData().getTaxonomy().getCommonName().toLowerCase() - .compareTo( n2.getNodeData().getTaxonomy().getCommonName().toLowerCase() ); - } - } - if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) { - return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() ); - } - return 0; - } - } - class PhylogenyNodeSortNodeNamePriority implements Comparator { - - @Override - public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) { - if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) { - return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() ); - } - if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) { - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) { - return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase() - .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() ); - } - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) { - return n1.getNodeData().getTaxonomy().getTaxonomyCode() - .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() ); - } - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getCommonName() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getCommonName() ) ) ) { - return n1.getNodeData().getTaxonomy().getCommonName().toLowerCase() - .compareTo( n2.getNodeData().getTaxonomy().getCommonName().toLowerCase() ); - } - } - if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) { - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) { - return n1.getNodeData().getSequence().getName().toLowerCase() - .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() ); - } - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) { - return n1.getNodeData().getSequence().getSymbol() - .compareTo( n2.getNodeData().getSequence().getSymbol() ); - } - if ( ( n1.getNodeData().getSequence().getAccession() != null ) - && ( n2.getNodeData().getSequence().getAccession() != null ) - && !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getAccession().getValue() ) - && !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getAccession().getValue() ) ) { - return n1.getNodeData().getSequence().getAccession().getValue() - .compareTo( n2.getNodeData().getSequence().getAccession().getValue() ); - } - } - return 0; - } - } - Comparator c; - switch ( pri ) { - case SEQUENCE: - c = new PhylogenyNodeSortSequencePriority(); - break; - case NODE_NAME: - c = new PhylogenyNodeSortNodeNamePriority(); - break; - default: - c = new PhylogenyNodeSortTaxonomyPriority(); - } - final List descs = node.getDescendants(); - Collections.sort( descs, c ); - int i = 0; - for( final PhylogenyNode desc : descs ) { - node.setChildNode( i++, desc ); - } - } - - final static public void transferNodeNameToField( final Phylogeny phy, - final PhylogenyMethods.PhylogenyNodeField field, - final boolean external_only ) throws PhyloXmlDataFormatException { - final PhylogenyNodeIterator it = phy.iteratorPostorder(); - while ( it.hasNext() ) { - final PhylogenyNode n = it.next(); - if ( external_only && n.isInternal() ) { - continue; - } - final String name = n.getName().trim(); - if ( !ForesterUtil.isEmpty( name ) ) { - switch ( field ) { - case TAXONOMY_CODE: - n.setName( "" ); - setTaxonomyCode( n, name ); - break; - case TAXONOMY_SCIENTIFIC_NAME: - n.setName( "" ); - if ( !n.getNodeData().isHasTaxonomy() ) { - n.getNodeData().setTaxonomy( new Taxonomy() ); - } - n.getNodeData().getTaxonomy().setScientificName( name ); - break; - case TAXONOMY_COMMON_NAME: - n.setName( "" ); - if ( !n.getNodeData().isHasTaxonomy() ) { - n.getNodeData().setTaxonomy( new Taxonomy() ); - } - n.getNodeData().getTaxonomy().setCommonName( name ); - break; - case SEQUENCE_SYMBOL: - n.setName( "" ); - if ( !n.getNodeData().isHasSequence() ) { - n.getNodeData().setSequence( new Sequence() ); - } - n.getNodeData().getSequence().setSymbol( name ); - break; - case SEQUENCE_NAME: - n.setName( "" ); - if ( !n.getNodeData().isHasSequence() ) { - n.getNodeData().setSequence( new Sequence() ); - } - n.getNodeData().getSequence().setName( name ); - break; - case TAXONOMY_ID_UNIPROT_1: { - if ( !n.getNodeData().isHasTaxonomy() ) { - n.getNodeData().setTaxonomy( new Taxonomy() ); - } - String id = name; - final int i = name.indexOf( '_' ); - if ( i > 0 ) { - id = name.substring( 0, i ); - } - else { - n.setName( "" ); - } - n.getNodeData().getTaxonomy() - .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) ); - break; - } - case TAXONOMY_ID_UNIPROT_2: { - if ( !n.getNodeData().isHasTaxonomy() ) { - n.getNodeData().setTaxonomy( new Taxonomy() ); - } - String id = name; - final int i = name.indexOf( '_' ); - if ( i > 0 ) { - id = name.substring( i + 1, name.length() ); - } - else { - n.setName( "" ); - } - n.getNodeData().getTaxonomy() - .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) ); - break; - } - case TAXONOMY_ID: { - if ( !n.getNodeData().isHasTaxonomy() ) { - n.getNodeData().setTaxonomy( new Taxonomy() ); - } - n.getNodeData().getTaxonomy().setIdentifier( new Identifier( name ) ); - break; - } - } - } - } - } - - static double addPhylogenyDistances( final double a, final double b ) { - if ( ( a >= 0.0 ) && ( b >= 0.0 ) ) { - return a + b; - } - else if ( a >= 0.0 ) { - return a; - } - else if ( b >= 0.0 ) { - return b; - } - return PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT; - } - - // Helper for getUltraParalogousNodes( PhylogenyNode ). - public static boolean areAllChildrenDuplications( final PhylogenyNode n ) { - if ( n.isExternal() ) { - return false; - } - else { - if ( n.isDuplication() ) { - //FIXME test me! - for( final PhylogenyNode desc : n.getDescendants() ) { - if ( !areAllChildrenDuplications( desc ) ) { - return false; - } - } - return true; - } - else { - return false; - } - } - } - - public static short calculateMaxBranchesToLeaf( final PhylogenyNode node ) { - if ( node.isExternal() ) { - return 0; - } - short max = 0; - for( PhylogenyNode d : node.getAllExternalDescendants() ) { - short steps = 0; - while ( d != node ) { - if ( d.isCollapse() ) { - steps = 0; - } - else { - steps++; - } - d = d.getParent(); - } - if ( max < steps ) { - max = steps; - } - } - return max; - } - - public static int calculateMaxDepth( final Phylogeny phy ) { - int max = 0; - for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { - final PhylogenyNode node = iter.next(); - final int steps = node.calculateDepth(); - if ( steps > max ) { - max = steps; - } - } - return max; - } - - public static double calculateMaxDistanceToRoot( final Phylogeny phy ) { - double max = 0.0; - for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { - final PhylogenyNode node = iter.next(); - final double d = node.calculateDistanceToRoot(); - if ( d > max ) { - max = d; - } - } - return max; - } - - public static int countNumberOfPolytomies( final Phylogeny phy ) { - int count = 0; - for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { - final PhylogenyNode n = iter.next(); - if ( !n.isExternal() && ( n.getNumberOfDescendants() > 2 ) ) { - count++; - } - } - return count; - } - - public static DescriptiveStatistics calculatNumberOfDescendantsPerNodeStatistics( final Phylogeny phy ) { - final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); - for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { - final PhylogenyNode n = iter.next(); - if ( !n.isExternal() ) { - stats.addValue( n.getNumberOfDescendants() ); - } - } - return stats; - } - - public static DescriptiveStatistics calculatBranchLengthStatistics( final Phylogeny phy ) { - final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); - for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { - final PhylogenyNode n = iter.next(); - if ( !n.isRoot() && ( n.getDistanceToParent() >= 0.0 ) ) { - stats.addValue( n.getDistanceToParent() ); - } - } - return stats; - } - - public static List calculatConfidenceStatistics( final Phylogeny phy ) { - final List stats = new ArrayList(); - for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { - final PhylogenyNode n = iter.next(); - if ( !n.isExternal() && !n.isRoot() ) { - if ( n.getBranchData().isHasConfidences() ) { - for( int i = 0; i < n.getBranchData().getConfidences().size(); ++i ) { - final Confidence c = n.getBranchData().getConfidences().get( i ); - if ( ( i > ( stats.size() - 1 ) ) || ( stats.get( i ) == null ) ) { - stats.add( i, new BasicDescriptiveStatistics() ); - } - if ( !ForesterUtil.isEmpty( c.getType() ) ) { - if ( !ForesterUtil.isEmpty( stats.get( i ).getDescription() ) ) { - if ( !stats.get( i ).getDescription().equalsIgnoreCase( c.getType() ) ) { - throw new IllegalArgumentException( "support values in node [" + n.toString() - + "] appear inconsistently ordered" ); - } - } - stats.get( i ).setDescription( c.getType() ); - } - stats.get( i ).addValue( ( ( c != null ) && ( c.getValue() >= 0 ) ) ? c.getValue() : 0 ); - } - } - } - } - return stats; - } - - /** - * Returns the set of distinct taxonomies of - * all external nodes of node. - * If at least one the external nodes has no taxonomy, - * null is returned. - * - */ - public static Set obtainDistinctTaxonomies( final PhylogenyNode node ) { - final List descs = node.getAllExternalDescendants(); - final Set tax_set = new HashSet(); - for( final PhylogenyNode n : descs ) { - if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) { - return null; - } - tax_set.add( n.getNodeData().getTaxonomy() ); - } - return tax_set; - } - - /** - * Returns a map of distinct taxonomies of - * all external nodes of node. - * If at least one of the external nodes has no taxonomy, - * null is returned. - * - */ - public static SortedMap obtainDistinctTaxonomyCounts( final PhylogenyNode node ) { - final List descs = node.getAllExternalDescendants(); - final SortedMap tax_map = new TreeMap(); - for( final PhylogenyNode n : descs ) { - if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) { - return null; - } - final Taxonomy t = n.getNodeData().getTaxonomy(); - if ( tax_map.containsKey( t ) ) { - tax_map.put( t, tax_map.get( t ) + 1 ); - } - else { - tax_map.put( t, 1 ); - } - } - return tax_map; - } - - public static int calculateNumberOfExternalNodesWithoutTaxonomy( final PhylogenyNode node ) { - final List descs = node.getAllExternalDescendants(); - int x = 0; - for( final PhylogenyNode n : descs ) { - if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) { - x++; - } - } - return x; - } - - /** - * Deep copies the phylogeny originating from this node. - */ - static PhylogenyNode copySubTree( final PhylogenyNode source ) { - if ( source == null ) { - return null; - } - else { - final PhylogenyNode newnode = source.copyNodeData(); - if ( !source.isExternal() ) { - for( int i = 0; i < source.getNumberOfDescendants(); ++i ) { - newnode.setChildNode( i, PhylogenyMethods.copySubTree( source.getChildNode( i ) ) ); - } - } - return newnode; - } - } - - /** - * Shallow copies the phylogeny originating from this node. - */ - static PhylogenyNode copySubTreeShallow( final PhylogenyNode source ) { - if ( source == null ) { - return null; - } - else { - final PhylogenyNode newnode = source.copyNodeDataShallow(); - if ( !source.isExternal() ) { - for( int i = 0; i < source.getNumberOfDescendants(); ++i ) { - newnode.setChildNode( i, PhylogenyMethods.copySubTreeShallow( source.getChildNode( i ) ) ); - } - } - return newnode; - } - } - - public static void deleteExternalNodesNegativeSelection( final Set to_delete, final Phylogeny phy ) { - phy.clearHashIdToNodeMap(); - for( final Integer id : to_delete ) { - phy.deleteSubtree( phy.getNode( id ), true ); - } - phy.clearHashIdToNodeMap(); - phy.externalNodesHaveChanged(); - } - - public static void deleteExternalNodesNegativeSelection( final String[] node_names_to_delete, final Phylogeny p ) - throws IllegalArgumentException { - for( final String element : node_names_to_delete ) { - if ( ForesterUtil.isEmpty( element ) ) { - continue; - } - List nodes = null; - nodes = p.getNodes( element ); - final Iterator it = nodes.iterator(); - while ( it.hasNext() ) { - final PhylogenyNode n = it.next(); - if ( !n.isExternal() ) { - throw new IllegalArgumentException( "attempt to delete non-external node \"" + element + "\"" ); - } - p.deleteSubtree( n, true ); + p.deleteSubtree( n, true ); } } p.clearHashIdToNodeMap(); @@ -833,6 +398,24 @@ public class PhylogenyMethods { return deleted; } + final public static void deleteNonOrthologousExternalNodes( final Phylogeny phy, final PhylogenyNode n ) { + if ( n.isInternal() ) { + throw new IllegalArgumentException( "node is not external" ); + } + final ArrayList to_delete = new ArrayList(); + for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) { + final PhylogenyNode i = it.next(); + if ( !PhylogenyMethods.getEventAtLCA( n, i ).isSpeciation() ) { + to_delete.add( i ); + } + } + for( final PhylogenyNode d : to_delete ) { + phy.deleteSubtree( d, true ); + } + phy.clearHashIdToNodeMap(); + phy.externalNodesHaveChanged(); + } + public static List getAllDescendants( final PhylogenyNode node ) { final List descs = new ArrayList(); final Set encountered = new HashSet(); @@ -902,24 +485,8 @@ public class PhylogenyMethods { return values; } - /** - * Calculates the distance between PhylogenyNodes n1 and n2. - * PRECONDITION: n1 is a descendant of n2. - * - * @param n1 - * a descendant of n2 - * @param n2 - * @return distance between n1 and n2 - */ - private static double getDistance( PhylogenyNode n1, final PhylogenyNode n2 ) { - double d = 0.0; - while ( n1 != n2 ) { - if ( n1.getDistanceToParent() > 0.0 ) { - d += n1.getDistanceToParent(); - } - n1 = n1.getParent(); - } - return d; + final public static Event getEventAtLCA( final PhylogenyNode n1, final PhylogenyNode n2 ) { + return calculateLCA( n1, n2 ).getNodeData().getEvent(); } /** @@ -1014,67 +581,6 @@ public class PhylogenyMethods { } /** - * Returns all Nodes which are connected to external PhylogenyNode n of this - * Phylogeny by a path containing only speciation events. We call these - * "super orthologs". Nodes are returned as Vector of references to Nodes. - *

- * PRECONDITION: This tree must be binary and rooted, and speciation - - * duplication need to be assigned for each of its internal Nodes. - *

- * Returns null if this Phylogeny is empty or if n is internal. - * @param n - * external PhylogenyNode whose strictly speciation related Nodes - * are to be returned - * @return Vector of references to all strictly speciation related Nodes of - * PhylogenyNode n of this Phylogeny, null if this Phylogeny is - * empty or if n is internal - */ - public static List getSuperOrthologousNodes( final PhylogenyNode n ) { - // FIXME - PhylogenyNode node = n, deepest = null; - final List v = new ArrayList(); - if ( !node.isExternal() ) { - return null; - } - while ( !node.isRoot() && !node.getParent().isDuplication() ) { - node = node.getParent(); - } - deepest = node; - deepest.setIndicatorsToZero(); - do { - if ( !node.isExternal() ) { - if ( node.getIndicator() == 0 ) { - node.setIndicator( ( byte ) 1 ); - if ( !node.isDuplication() ) { - node = node.getChildNode1(); - } - } - if ( node.getIndicator() == 1 ) { - node.setIndicator( ( byte ) 2 ); - if ( !node.isDuplication() ) { - node = node.getChildNode2(); - } - } - if ( ( node != deepest ) && ( node.getIndicator() == 2 ) ) { - node = node.getParent(); - } - } - else { - if ( node != n ) { - v.add( node ); - } - if ( node != deepest ) { - node = node.getParent(); - } - else { - node.setIndicator( ( byte ) 2 ); - } - } - } while ( ( node != deepest ) || ( deepest.getIndicator() != 2 ) ); - return v; - } - - /** * Convenience method for display purposes. * Not intended for algorithms. */ @@ -1085,73 +591,23 @@ public class PhylogenyMethods { return node.getNodeData().getTaxonomy().getIdentifier().getValue(); } - /** - * Returns all Nodes which are connected to external PhylogenyNode n of this - * Phylogeny by a path containing, and leading to, only duplication events. - * We call these "ultra paralogs". Nodes are returned as Vector of - * references to Nodes. - *

- * PRECONDITION: This tree must be binary and rooted, and speciation - - * duplication need to be assigned for each of its internal Nodes. - *

- * Returns null if this Phylogeny is empty or if n is internal. - *

- * (Last modified: 10/06/01) - * - * @param n - * external PhylogenyNode whose ultra paralogs are to be returned - * @return Vector of references to all ultra paralogs of PhylogenyNode n of - * this Phylogeny, null if this Phylogeny is empty or if n is - * internal - */ - public static List getUltraParalogousNodes( final PhylogenyNode n ) { - // FIXME test me - PhylogenyNode node = n; - if ( !node.isExternal() ) { - return null; - } - while ( !node.isRoot() && node.getParent().isDuplication() && areAllChildrenDuplications( node.getParent() ) ) { - node = node.getParent(); + public final static boolean isAllDecendentsAreDuplications( final PhylogenyNode n ) { + if ( n.isExternal() ) { + return true; } - final List nodes = node.getAllExternalDescendants(); - nodes.remove( n ); - return nodes; - } - - public static String inferCommonPartOfScientificNameOfDescendants( final PhylogenyNode node ) { - final List descs = node.getDescendants(); - String sn = null; - for( final PhylogenyNode n : descs ) { - if ( !n.getNodeData().isHasTaxonomy() - || ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) { - return null; - } - else if ( sn == null ) { - sn = n.getNodeData().getTaxonomy().getScientificName().trim(); - } - else { - String sn_current = n.getNodeData().getTaxonomy().getScientificName().trim(); - if ( !sn.equals( sn_current ) ) { - boolean overlap = false; - while ( ( sn.indexOf( ' ' ) >= 0 ) || ( sn_current.indexOf( ' ' ) >= 0 ) ) { - if ( ForesterUtil.countChars( sn, ' ' ) > ForesterUtil.countChars( sn_current, ' ' ) ) { - sn = sn.substring( 0, sn.lastIndexOf( ' ' ) ).trim(); - } - else { - sn_current = sn_current.substring( 0, sn_current.lastIndexOf( ' ' ) ).trim(); - } - if ( sn.equals( sn_current ) ) { - overlap = true; - break; - } - } - if ( !overlap ) { - return null; + else { + if ( n.isDuplication() ) { + for( final PhylogenyNode desc : n.getDescendants() ) { + if ( !isAllDecendentsAreDuplications( desc ) ) { + return false; } } + return true; + } + else { + return false; } } - return sn; } public static boolean isHasExternalDescendant( final PhylogenyNode node ) { @@ -1183,27 +639,6 @@ public class PhylogenyMethods { } } - private static boolean match( final String s, - final String query, - final boolean case_sensitive, - final boolean partial ) { - if ( ForesterUtil.isEmpty( s ) || ForesterUtil.isEmpty( query ) ) { - return false; - } - String my_s = s.trim(); - String my_query = query.trim(); - if ( !case_sensitive ) { - my_s = my_s.toLowerCase(); - my_query = my_query.toLowerCase(); - } - if ( partial ) { - return my_s.indexOf( my_query ) >= 0; - } - else { - return my_s.equals( my_query ); - } - } - public static void midpointRoot( final Phylogeny phylogeny ) { if ( phylogeny.getNumberOfExternalNodes() < 2 ) { return; @@ -1237,36 +672,123 @@ public class PhylogenyMethods { } } - public static void normalizeBootstrapValues( final Phylogeny phylogeny, - final double max_bootstrap_value, - final double max_normalized_value ) { - for( final PhylogenyNodeIterator iter = phylogeny.iteratorPreorder(); iter.hasNext(); ) { - final PhylogenyNode node = iter.next(); - if ( node.isInternal() ) { - final double confidence = getConfidenceValue( node ); - if ( confidence != Confidence.CONFIDENCE_DEFAULT_VALUE ) { - if ( confidence >= max_bootstrap_value ) { - setBootstrapConfidence( node, max_normalized_value ); - } - else { - setBootstrapConfidence( node, ( confidence * max_normalized_value ) / max_bootstrap_value ); + public static void normalizeBootstrapValues( final Phylogeny phylogeny, + final double max_bootstrap_value, + final double max_normalized_value ) { + for( final PhylogenyNodeIterator iter = phylogeny.iteratorPreorder(); iter.hasNext(); ) { + final PhylogenyNode node = iter.next(); + if ( node.isInternal() ) { + final double confidence = getConfidenceValue( node ); + if ( confidence != Confidence.CONFIDENCE_DEFAULT_VALUE ) { + if ( confidence >= max_bootstrap_value ) { + setBootstrapConfidence( node, max_normalized_value ); + } + else { + setBootstrapConfidence( node, ( confidence * max_normalized_value ) / max_bootstrap_value ); + } + } + } + } + } + + public static List obtainAllNodesAsList( final Phylogeny phy ) { + final List nodes = new ArrayList(); + if ( phy.isEmpty() ) { + return nodes; + } + for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { + nodes.add( iter.next() ); + } + return nodes; + } + + /** + * Returns the set of distinct taxonomies of + * all external nodes of node. + * If at least one the external nodes has no taxonomy, + * null is returned. + * + */ + public static Set obtainDistinctTaxonomies( final PhylogenyNode node ) { + final List descs = node.getAllExternalDescendants(); + final Set tax_set = new HashSet(); + for( final PhylogenyNode n : descs ) { + if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) { + return null; + } + tax_set.add( n.getNodeData().getTaxonomy() ); + } + return tax_set; + } + + /** + * Returns a map of distinct taxonomies of + * all external nodes of node. + * If at least one of the external nodes has no taxonomy, + * null is returned. + * + */ + public static SortedMap obtainDistinctTaxonomyCounts( final PhylogenyNode node ) { + final List descs = node.getAllExternalDescendants(); + final SortedMap tax_map = new TreeMap(); + for( final PhylogenyNode n : descs ) { + if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) { + return null; + } + final Taxonomy t = n.getNodeData().getTaxonomy(); + if ( tax_map.containsKey( t ) ) { + tax_map.put( t, tax_map.get( t ) + 1 ); + } + else { + tax_map.put( t, 1 ); + } + } + return tax_map; + } + + /** + * Arranges the order of childern for each node of this Phylogeny in such a + * way that either the branch with more children is on top (right) or on + * bottom (left), dependent on the value of boolean order. + * + * @param order + * decides in which direction to order + * @param pri + */ + public static void orderAppearance( final PhylogenyNode n, + final boolean order, + final boolean order_ext_alphabetically, + final DESCENDANT_SORT_PRIORITY pri ) { + if ( n.isExternal() ) { + return; + } + else { + PhylogenyNode temp = null; + if ( ( n.getNumberOfDescendants() == 2 ) + && ( n.getChildNode1().getNumberOfExternalNodes() != n.getChildNode2().getNumberOfExternalNodes() ) + && ( ( n.getChildNode1().getNumberOfExternalNodes() < n.getChildNode2().getNumberOfExternalNodes() ) == order ) ) { + temp = n.getChildNode1(); + n.setChild1( n.getChildNode2() ); + n.setChild2( temp ); + } + else if ( order_ext_alphabetically ) { + boolean all_ext = true; + for( final PhylogenyNode i : n.getDescendants() ) { + if ( !i.isExternal() ) { + all_ext = false; + break; } } + if ( all_ext ) { + PhylogenyMethods.sortNodeDescendents( n, pri ); + } + } + for( int i = 0; i < n.getNumberOfDescendants(); ++i ) { + orderAppearance( n.getChildNode( i ), order, order_ext_alphabetically, pri ); } } } - public static List obtainAllNodesAsList( final Phylogeny phy ) { - final List nodes = new ArrayList(); - if ( phy.isEmpty() ) { - return nodes; - } - for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { - nodes.add( iter.next() ); - } - return nodes; - } - public static void postorderBranchColorAveragingExternalNodeBased( final Phylogeny p ) { for( final PhylogenyNodeIterator iter = p.iteratorPostorder(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); @@ -1294,6 +816,41 @@ public class PhylogenyMethods { } } + public static final void preOrderReId( final Phylogeny phy ) { + if ( phy.isEmpty() ) { + return; + } + phy.setIdToNodeMap( null ); + int i = PhylogenyNode.getNodeCount(); + for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) { + it.next().setId( i++ ); + } + PhylogenyNode.setNodeCount( i ); + } + + public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final File file ) throws IOException { + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final Phylogeny[] trees = factory.create( file, parser ); + if ( ( trees == null ) || ( trees.length == 0 ) ) { + throw new PhylogenyParserException( "Unable to parse phylogeny from file: " + file ); + } + return trees; + } + + public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final List files ) + throws IOException { + final List tree_list = new ArrayList(); + for( final File file : files ) { + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final Phylogeny[] trees = factory.create( file, parser ); + if ( ( trees == null ) || ( trees.length == 0 ) ) { + throw new PhylogenyParserException( "Unable to parse phylogeny from file: " + file ); + } + tree_list.addAll( Arrays.asList( trees ) ); + } + return tree_list.toArray( new Phylogeny[ tree_list.size() ] ); + } + public static void removeNode( final PhylogenyNode remove_me, final Phylogeny phylogeny ) { if ( remove_me.isRoot() ) { throw new IllegalArgumentException( "ill advised attempt to remove root node" ); @@ -1587,7 +1144,165 @@ public class PhylogenyMethods { if ( !node.getNodeData().isHasTaxonomy() ) { node.getNodeData().setTaxonomy( new Taxonomy() ); } - node.getNodeData().getTaxonomy().setTaxonomyCode( taxonomy_code ); + node.getNodeData().getTaxonomy().setTaxonomyCode( taxonomy_code ); + } + + final static public void sortNodeDescendents( final PhylogenyNode node, final DESCENDANT_SORT_PRIORITY pri ) { + class PhylogenyNodeSortTaxonomyPriority implements Comparator { + + @Override + public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) { + if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) { + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) { + return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase() + .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() ); + } + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) { + return n1.getNodeData().getTaxonomy().getTaxonomyCode() + .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() ); + } + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getCommonName() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getCommonName() ) ) ) { + return n1.getNodeData().getTaxonomy().getCommonName().toLowerCase() + .compareTo( n2.getNodeData().getTaxonomy().getCommonName().toLowerCase() ); + } + } + if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) { + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) { + return n1.getNodeData().getSequence().getName().toLowerCase() + .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() ); + } + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) { + return n1.getNodeData().getSequence().getSymbol() + .compareTo( n2.getNodeData().getSequence().getSymbol() ); + } + if ( ( n1.getNodeData().getSequence().getAccession() != null ) + && ( n2.getNodeData().getSequence().getAccession() != null ) + && !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getAccession().getValue() ) + && !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getAccession().getValue() ) ) { + return n1.getNodeData().getSequence().getAccession().getValue() + .compareTo( n2.getNodeData().getSequence().getAccession().getValue() ); + } + } + if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) { + return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() ); + } + return 0; + } + } + class PhylogenyNodeSortSequencePriority implements Comparator { + + @Override + public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) { + if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) { + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) { + return n1.getNodeData().getSequence().getName().toLowerCase() + .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() ); + } + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) { + return n1.getNodeData().getSequence().getSymbol() + .compareTo( n2.getNodeData().getSequence().getSymbol() ); + } + if ( ( n1.getNodeData().getSequence().getAccession() != null ) + && ( n2.getNodeData().getSequence().getAccession() != null ) + && !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getAccession().getValue() ) + && !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getAccession().getValue() ) ) { + return n1.getNodeData().getSequence().getAccession().getValue() + .compareTo( n2.getNodeData().getSequence().getAccession().getValue() ); + } + } + if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) { + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) { + return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase() + .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() ); + } + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) { + return n1.getNodeData().getTaxonomy().getTaxonomyCode() + .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() ); + } + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getCommonName() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getCommonName() ) ) ) { + return n1.getNodeData().getTaxonomy().getCommonName().toLowerCase() + .compareTo( n2.getNodeData().getTaxonomy().getCommonName().toLowerCase() ); + } + } + if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) { + return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() ); + } + return 0; + } + } + class PhylogenyNodeSortNodeNamePriority implements Comparator { + + @Override + public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) { + if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) { + return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() ); + } + if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) { + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) { + return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase() + .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() ); + } + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) { + return n1.getNodeData().getTaxonomy().getTaxonomyCode() + .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() ); + } + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getCommonName() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getCommonName() ) ) ) { + return n1.getNodeData().getTaxonomy().getCommonName().toLowerCase() + .compareTo( n2.getNodeData().getTaxonomy().getCommonName().toLowerCase() ); + } + } + if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) { + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) { + return n1.getNodeData().getSequence().getName().toLowerCase() + .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() ); + } + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) { + return n1.getNodeData().getSequence().getSymbol() + .compareTo( n2.getNodeData().getSequence().getSymbol() ); + } + if ( ( n1.getNodeData().getSequence().getAccession() != null ) + && ( n2.getNodeData().getSequence().getAccession() != null ) + && !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getAccession().getValue() ) + && !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getAccession().getValue() ) ) { + return n1.getNodeData().getSequence().getAccession().getValue() + .compareTo( n2.getNodeData().getSequence().getAccession().getValue() ); + } + } + return 0; + } + } + Comparator c; + switch ( pri ) { + case SEQUENCE: + c = new PhylogenyNodeSortSequencePriority(); + break; + case NODE_NAME: + c = new PhylogenyNodeSortNodeNamePriority(); + break; + default: + c = new PhylogenyNodeSortTaxonomyPriority(); + } + final List descs = node.getDescendants(); + Collections.sort( descs, c ); + int i = 0; + for( final PhylogenyNode desc : descs ) { + node.setChildNode( i++, desc ); + } } /** @@ -1598,9 +1313,10 @@ public class PhylogenyMethods { * a reference Phylogeny * @param to_be_stripped * Phylogeny to be stripped - * @return number of external nodes removed from to_be_stripped + * @return nodes removed from to_be_stripped */ - public static int taxonomyBasedDeletionOfExternalNodes( final Phylogeny reference, final Phylogeny to_be_stripped ) { + public static List taxonomyBasedDeletionOfExternalNodes( final Phylogeny reference, + final Phylogeny to_be_stripped ) { final Set ref_ext_taxo = new HashSet(); for( final PhylogenyNodeIterator it = reference.iteratorExternalForward(); it.hasNext(); ) { final PhylogenyNode n = it.next(); @@ -1615,10 +1331,6 @@ public class PhylogenyMethods { ref_ext_taxo.add( n.getNodeData().getTaxonomy().getTaxonomyCode() ); } } - System.out.println( " ref_ext_tax:" ); - for( final String string : ref_ext_taxo ) { - System.out.println( string ); - } final ArrayList nodes_to_delete = new ArrayList(); for( final PhylogenyNodeIterator it = to_be_stripped.iteratorExternalForward(); it.hasNext(); ) { final PhylogenyNode n = it.next(); @@ -1630,59 +1342,237 @@ public class PhylogenyMethods { nodes_to_delete.add( n ); } } - System.out.println( " to delete:" ); - for( final PhylogenyNode string : nodes_to_delete ) { - System.out.println( string.getNodeData().getTaxonomy().getTaxonomyCode() ); - } - for( final PhylogenyNode phylogenyNode : nodes_to_delete ) { - to_be_stripped.deleteSubtree( phylogenyNode, true ); + for( final PhylogenyNode n : nodes_to_delete ) { + to_be_stripped.deleteSubtree( n, true ); } to_be_stripped.clearHashIdToNodeMap(); to_be_stripped.externalNodesHaveChanged(); - return nodes_to_delete.size(); + return nodes_to_delete; } - /** - * Arranges the order of childern for each node of this Phylogeny in such a - * way that either the branch with more children is on top (right) or on - * bottom (left), dependent on the value of boolean order. - * - * @param order - * decides in which direction to order - * @param pri - */ - public static void orderAppearance( final PhylogenyNode n, - final boolean order, - final boolean order_ext_alphabetically, - final DESCENDANT_SORT_PRIORITY pri ) { - if ( n.isExternal() ) { - return; + final static public void transferInternalNamesToBootstrapSupport( final Phylogeny phy ) { + final PhylogenyNodeIterator it = phy.iteratorPostorder(); + while ( it.hasNext() ) { + final PhylogenyNode n = it.next(); + if ( !n.isExternal() && !ForesterUtil.isEmpty( n.getName() ) ) { + double value = -1; + try { + value = Double.parseDouble( n.getName() ); + } + catch ( final NumberFormatException e ) { + throw new IllegalArgumentException( "failed to parse number from [" + n.getName() + "]: " + + e.getLocalizedMessage() ); + } + if ( value >= 0.0 ) { + n.getBranchData().addConfidence( new Confidence( value, "bootstrap" ) ); + n.setName( "" ); + } + } } - else { - PhylogenyNode temp = null; - if ( ( n.getNumberOfDescendants() == 2 ) - && ( n.getChildNode1().getNumberOfExternalNodes() != n.getChildNode2().getNumberOfExternalNodes() ) - && ( ( n.getChildNode1().getNumberOfExternalNodes() < n.getChildNode2().getNumberOfExternalNodes() ) == order ) ) { - temp = n.getChildNode1(); - n.setChild1( n.getChildNode2() ); - n.setChild2( temp ); + } + + final static public void transferInternalNodeNamesToConfidence( final Phylogeny phy ) { + final PhylogenyNodeIterator it = phy.iteratorPostorder(); + while ( it.hasNext() ) { + final PhylogenyNode n = it.next(); + if ( !n.isExternal() && !n.getBranchData().isHasConfidences() ) { + if ( !ForesterUtil.isEmpty( n.getName() ) ) { + double d = -1.0; + try { + d = Double.parseDouble( n.getName() ); + } + catch ( final Exception e ) { + d = -1.0; + } + if ( d >= 0.0 ) { + n.getBranchData().addConfidence( new Confidence( d, "" ) ); + n.setName( "" ); + } + } } - else if ( order_ext_alphabetically ) { - boolean all_ext = true; - for( final PhylogenyNode i : n.getDescendants() ) { - if ( !i.isExternal() ) { - all_ext = false; + } + } + + final static public void transferNodeNameToField( final Phylogeny phy, + final PhylogenyNodeField field, + final boolean external_only ) throws PhyloXmlDataFormatException { + final PhylogenyNodeIterator it = phy.iteratorPostorder(); + while ( it.hasNext() ) { + final PhylogenyNode n = it.next(); + if ( external_only && n.isInternal() ) { + continue; + } + final String name = n.getName().trim(); + if ( !ForesterUtil.isEmpty( name ) ) { + switch ( field ) { + case TAXONOMY_CODE: + n.setName( "" ); + setTaxonomyCode( n, name ); + break; + case TAXONOMY_SCIENTIFIC_NAME: + n.setName( "" ); + if ( !n.getNodeData().isHasTaxonomy() ) { + n.getNodeData().setTaxonomy( new Taxonomy() ); + } + n.getNodeData().getTaxonomy().setScientificName( name ); + break; + case TAXONOMY_COMMON_NAME: + n.setName( "" ); + if ( !n.getNodeData().isHasTaxonomy() ) { + n.getNodeData().setTaxonomy( new Taxonomy() ); + } + n.getNodeData().getTaxonomy().setCommonName( name ); + break; + case SEQUENCE_SYMBOL: + n.setName( "" ); + if ( !n.getNodeData().isHasSequence() ) { + n.getNodeData().setSequence( new Sequence() ); + } + n.getNodeData().getSequence().setSymbol( name ); + break; + case SEQUENCE_NAME: + n.setName( "" ); + if ( !n.getNodeData().isHasSequence() ) { + n.getNodeData().setSequence( new Sequence() ); + } + n.getNodeData().getSequence().setName( name ); + break; + case TAXONOMY_ID_UNIPROT_1: { + if ( !n.getNodeData().isHasTaxonomy() ) { + n.getNodeData().setTaxonomy( new Taxonomy() ); + } + String id = name; + final int i = name.indexOf( '_' ); + if ( i > 0 ) { + id = name.substring( 0, i ); + } + else { + n.setName( "" ); + } + n.getNodeData().getTaxonomy() + .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) ); + break; + } + case TAXONOMY_ID_UNIPROT_2: { + if ( !n.getNodeData().isHasTaxonomy() ) { + n.getNodeData().setTaxonomy( new Taxonomy() ); + } + String id = name; + final int i = name.indexOf( '_' ); + if ( i > 0 ) { + id = name.substring( i + 1, name.length() ); + } + else { + n.setName( "" ); + } + n.getNodeData().getTaxonomy() + .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) ); + break; + } + case TAXONOMY_ID: { + if ( !n.getNodeData().isHasTaxonomy() ) { + n.getNodeData().setTaxonomy( new Taxonomy() ); + } + n.getNodeData().getTaxonomy().setIdentifier( new Identifier( name ) ); break; } } - if ( all_ext ) { - PhylogenyMethods.sortNodeDescendents( n, pri ); + } + } + } + + static double addPhylogenyDistances( final double a, final double b ) { + if ( ( a >= 0.0 ) && ( b >= 0.0 ) ) { + return a + b; + } + else if ( a >= 0.0 ) { + return a; + } + else if ( b >= 0.0 ) { + return b; + } + return PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT; + } + + /** + * Deep copies the phylogeny originating from this node. + */ + static PhylogenyNode copySubTree( final PhylogenyNode source ) { + if ( source == null ) { + return null; + } + else { + final PhylogenyNode newnode = source.copyNodeData(); + if ( !source.isExternal() ) { + for( int i = 0; i < source.getNumberOfDescendants(); ++i ) { + newnode.setChildNode( i, PhylogenyMethods.copySubTree( source.getChildNode( i ) ) ); } } - for( int i = 0; i < n.getNumberOfDescendants(); ++i ) { - orderAppearance( n.getChildNode( i ), order, order_ext_alphabetically, pri ); + return newnode; + } + } + + /** + * Shallow copies the phylogeny originating from this node. + */ + static PhylogenyNode copySubTreeShallow( final PhylogenyNode source ) { + if ( source == null ) { + return null; + } + else { + final PhylogenyNode newnode = source.copyNodeDataShallow(); + if ( !source.isExternal() ) { + for( int i = 0; i < source.getNumberOfDescendants(); ++i ) { + newnode.setChildNode( i, PhylogenyMethods.copySubTreeShallow( source.getChildNode( i ) ) ); + } + } + return newnode; + } + } + + /** + * Calculates the distance between PhylogenyNodes n1 and n2. + * PRECONDITION: n1 is a descendant of n2. + * + * @param n1 + * a descendant of n2 + * @param n2 + * @return distance between n1 and n2 + */ + private static double getDistance( PhylogenyNode n1, final PhylogenyNode n2 ) { + double d = 0.0; + while ( n1 != n2 ) { + if ( n1.getDistanceToParent() > 0.0 ) { + d += n1.getDistanceToParent(); } + n1 = n1.getParent(); } + return d; + } + + private static boolean match( final String s, + final String query, + final boolean case_sensitive, + final boolean partial ) { + if ( ForesterUtil.isEmpty( s ) || ForesterUtil.isEmpty( query ) ) { + return false; + } + String my_s = s.trim(); + String my_query = query.trim(); + if ( !case_sensitive ) { + my_s = my_s.toLowerCase(); + my_query = my_query.toLowerCase(); + } + if ( partial ) { + return my_s.indexOf( my_query ) >= 0; + } + else { + return my_s.equals( my_query ); + } + } + + public static enum DESCENDANT_SORT_PRIORITY { + TAXONOMY, SEQUENCE, NODE_NAME; } public static enum PhylogenyNodeField { @@ -1696,12 +1586,4 @@ public class PhylogenyMethods { TAXONOMY_ID_UNIPROT_2, TAXONOMY_ID; } - - public static enum TAXONOMY_EXTRACTION { - NO, YES, PFAM_STYLE_ONLY; - } - - public static enum DESCENDANT_SORT_PRIORITY { - TAXONOMY, SEQUENCE, NODE_NAME; - } }