X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fphylogeny%2FPhylogenyMethods.java;h=f839fd6ce43d34017dfff00a7164d9f2a2843304;hb=5b11e68cf46c363bd5ae256f794871c194776bd7;hp=4c8616602766cf8016ca6c74f98b74529cd6591f;hpb=72c535142a5e6b0da9c7edb2f605eb835b43e6fb;p=jalview.git diff --git a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java index 4c86166..f839fd6 100644 --- a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java +++ b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java @@ -32,6 +32,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; +import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; @@ -79,7 +80,7 @@ public class PhylogenyMethods { * @return distance between node1 and node2 */ public double calculateDistance( final PhylogenyNode node1, final PhylogenyNode node2 ) { - final PhylogenyNode lca = obtainLCA( node1, node2 ); + final PhylogenyNode lca = calculateLCA( node1, node2 ); final PhylogenyNode n1 = node1; final PhylogenyNode n2 = node2; return ( PhylogenyMethods.getDistance( n1, lca ) + PhylogenyMethods.getDistance( n2, lca ) ); @@ -115,7 +116,7 @@ public class PhylogenyMethods { } final public static Event getEventAtLCA( final PhylogenyNode n1, final PhylogenyNode n2 ) { - return obtainLCA( n1, n2 ).getNodeData().getEvent(); + return calculateLCA( n1, n2 ).getNodeData().getEvent(); } @Override @@ -157,22 +158,80 @@ public class PhylogenyMethods { * @param node2 * @return LCA of node1 and node2 */ - public final static PhylogenyNode obtainLCA( final PhylogenyNode node1, final PhylogenyNode node2 ) { - final HashSet ids_set = new HashSet(); - PhylogenyNode n1 = node1; - PhylogenyNode n2 = node2; - ids_set.add( n1.getId() ); - while ( !n1.isRoot() ) { - n1 = n1.getParent(); - ids_set.add( n1.getId() ); + public final static PhylogenyNode calculateLCA( PhylogenyNode node1, PhylogenyNode node2 ) { + if ( node1 == null ) { + throw new IllegalArgumentException( "first argument (node) is null" ); + } + if ( node2 == null ) { + throw new IllegalArgumentException( "second argument (node) is null" ); + } + if ( node1 == node2 ) { + return node1; + } + if ( ( node1.getParent() == node2.getParent() ) ) { + return node1.getParent(); + } + int depth1 = node1.calculateDepth(); + int depth2 = node2.calculateDepth(); + while ( ( depth1 > -1 ) && ( depth2 > -1 ) ) { + if ( depth1 > depth2 ) { + node1 = node1.getParent(); + depth1--; + } + else if ( depth2 > depth1 ) { + node2 = node2.getParent(); + depth2--; + } + else { + if ( node1 == node2 ) { + return node1; + } + node1 = node1.getParent(); + node2 = node2.getParent(); + depth1--; + depth2--; + } } - while ( !ids_set.contains( n2.getId() ) && !n2.isRoot() ) { - n2 = n2.getParent(); + throw new IllegalArgumentException( "illegal attempt to calculate LCA of two nodes which do not share a common root" ); + } + + public static final void preOrderReId( final Phylogeny phy ) { + if ( phy.isEmpty() ) { + return; } - if ( !ids_set.contains( n2.getId() ) ) { - throw new IllegalArgumentException( "attempt to get LCA of two nodes which do not share a common root" ); + phy.setIdToNodeMap( null ); + int i = PhylogenyNode.getNodeCount(); + for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) { + it.next().setId( i++ ); } - return n2; + PhylogenyNode.setNodeCount( i ); + } + + /** + * Returns the LCA of PhylogenyNodes node1 and node2. + * Precondition: ids are in pre-order (or level-order). + * + * + * @param node1 + * @param node2 + * @return LCA of node1 and node2 + */ + public final static PhylogenyNode calculateLCAonTreeWithIdsInPreOrder( PhylogenyNode node1, PhylogenyNode node2 ) { + if ( node1 == null ) { + throw new IllegalArgumentException( "first argument (node) is null" ); + } + if ( node2 == null ) { + throw new IllegalArgumentException( "second argument (node) is null" ); + } + while ( node1 != node2 ) { + if ( node1.getId() > node2.getId() ) { + node1 = node1.getParent(); + } + else { + node2 = node2.getParent(); + } + } + return node1; } /** @@ -189,20 +248,26 @@ public class PhylogenyMethods { * of this Phylogeny, null if this Phylogeny is empty or if n is * internal */ - public List getOrthologousNodes( final Phylogeny phy, final PhylogenyNode node ) { + public final static List getOrthologousNodes( final Phylogeny phy, final PhylogenyNode node ) { final List nodes = new ArrayList(); + PhylogenyMethods.preOrderReId( phy ); final PhylogenyNodeIterator it = phy.iteratorExternalForward(); while ( it.hasNext() ) { final PhylogenyNode temp_node = it.next(); - if ( ( temp_node != node ) && isAreOrthologous( node, temp_node ) ) { + if ( ( temp_node != node ) && !calculateLCAonTreeWithIdsInPreOrder( node, temp_node ).isDuplication() ) { nodes.add( temp_node ); } } return nodes; } - public boolean isAreOrthologous( final PhylogenyNode node1, final PhylogenyNode node2 ) { - return !obtainLCA( node1, node2 ).isDuplication(); + public static final HashMap createNameToExtNodeMap( final Phylogeny phy ) { + final HashMap nodes = new HashMap(); + for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { + final PhylogenyNode n = iter.next(); + nodes.put( n.getName(), n ); + } + return nodes; } public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final File file ) throws IOException { @@ -530,16 +595,14 @@ public class PhylogenyMethods { return PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT; } - // Helper for getUltraParalogousNodes( PhylogenyNode ). - public static boolean areAllChildrenDuplications( final PhylogenyNode n ) { + public final static boolean isAllDecendentsAreDuplications( final PhylogenyNode n ) { if ( n.isExternal() ) { - return false; + return true; } else { if ( n.isDuplication() ) { - //FIXME test me! for( final PhylogenyNode desc : n.getDescendants() ) { - if ( !areAllChildrenDuplications( desc ) ) { + if ( !isAllDecendentsAreDuplications( desc ) ) { return false; } } @@ -551,28 +614,6 @@ public class PhylogenyMethods { } } - public static int calculateDepth( final PhylogenyNode node ) { - PhylogenyNode n = node; - int steps = 0; - while ( !n.isRoot() ) { - steps++; - n = n.getParent(); - } - return steps; - } - - public static double calculateDistanceToRoot( final PhylogenyNode node ) { - PhylogenyNode n = node; - double d = 0.0; - while ( !n.isRoot() ) { - if ( n.getDistanceToParent() > 0.0 ) { - d += n.getDistanceToParent(); - } - n = n.getParent(); - } - return d; - } - public static short calculateMaxBranchesToLeaf( final PhylogenyNode node ) { if ( node.isExternal() ) { return 0; @@ -600,7 +641,7 @@ public class PhylogenyMethods { int max = 0; for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); - final int steps = calculateDepth( node ); + final int steps = node.calculateDepth(); if ( steps > max ) { max = steps; } @@ -612,7 +653,7 @@ public class PhylogenyMethods { double max = 0.0; for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); - final double d = calculateDistanceToRoot( node ); + final double d = node.calculateDistanceToRoot(); if ( d > max ) { max = d; } @@ -783,18 +824,17 @@ public class PhylogenyMethods { public static void deleteExternalNodesNegativeSelection( final String[] node_names_to_delete, final Phylogeny p ) throws IllegalArgumentException { - for( int i = 0; i < node_names_to_delete.length; ++i ) { - if ( ForesterUtil.isEmpty( node_names_to_delete[ i ] ) ) { + for( final String element : node_names_to_delete ) { + if ( ForesterUtil.isEmpty( element ) ) { continue; } List nodes = null; - nodes = p.getNodes( node_names_to_delete[ i ] ); + nodes = p.getNodes( element ); final Iterator it = nodes.iterator(); while ( it.hasNext() ) { final PhylogenyNode n = it.next(); if ( !n.isExternal() ) { - throw new IllegalArgumentException( "attempt to delete non-external node \"" - + node_names_to_delete[ i ] + "\"" ); + throw new IllegalArgumentException( "attempt to delete non-external node \"" + element + "\"" ); } p.deleteSubtree( n, true ); } @@ -1035,13 +1075,14 @@ public class PhylogenyMethods { * @param n * external PhylogenyNode whose strictly speciation related Nodes * are to be returned - * @return Vector of references to all strictly speciation related Nodes of + * @return References to all strictly speciation related Nodes of * PhylogenyNode n of this Phylogeny, null if this Phylogeny is * empty or if n is internal */ public static List getSuperOrthologousNodes( final PhylogenyNode n ) { // FIXME - PhylogenyNode node = n, deepest = null; + PhylogenyNode node = n; + PhylogenyNode deepest = null; final List v = new ArrayList(); if ( !node.isExternal() ) { return null; @@ -1118,9 +1159,9 @@ public class PhylogenyMethods { // FIXME test me PhylogenyNode node = n; if ( !node.isExternal() ) { - return null; + throw new IllegalArgumentException( "attempt to get ultra-paralogous nodes of internal node" ); } - while ( !node.isRoot() && node.getParent().isDuplication() && areAllChildrenDuplications( node.getParent() ) ) { + while ( !node.isRoot() && node.getParent().isDuplication() && isAllDecendentsAreDuplications( node.getParent() ) ) { node = node.getParent(); } final List nodes = node.getAllExternalDescendants(); @@ -1128,42 +1169,6 @@ public class PhylogenyMethods { return nodes; } - public static String inferCommonPartOfScientificNameOfDescendants( final PhylogenyNode node ) { - final List descs = node.getDescendants(); - String sn = null; - for( final PhylogenyNode n : descs ) { - if ( !n.getNodeData().isHasTaxonomy() - || ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) { - return null; - } - else if ( sn == null ) { - sn = n.getNodeData().getTaxonomy().getScientificName().trim(); - } - else { - String sn_current = n.getNodeData().getTaxonomy().getScientificName().trim(); - if ( !sn.equals( sn_current ) ) { - boolean overlap = false; - while ( ( sn.indexOf( ' ' ) >= 0 ) || ( sn_current.indexOf( ' ' ) >= 0 ) ) { - if ( ForesterUtil.countChars( sn, ' ' ) > ForesterUtil.countChars( sn_current, ' ' ) ) { - sn = sn.substring( 0, sn.lastIndexOf( ' ' ) ).trim(); - } - else { - sn_current = sn_current.substring( 0, sn_current.lastIndexOf( ' ' ) ).trim(); - } - if ( sn.equals( sn_current ) ) { - overlap = true; - break; - } - } - if ( !overlap ) { - return null; - } - } - } - } - return sn; - } - public static boolean isHasExternalDescendant( final PhylogenyNode node ) { for( int i = 0; i < node.getNumberOfDescendants(); ++i ) { if ( node.getChildNode( i ).isExternal() ) { @@ -1331,7 +1336,8 @@ public class PhylogenyMethods { public static List searchData( final String query, final Phylogeny phy, final boolean case_sensitive, - final boolean partial ) { + final boolean partial, + final boolean search_domains ) { final List nodes = new ArrayList(); if ( phy.isEmpty() || ( query == null ) ) { return nodes; @@ -1391,7 +1397,7 @@ public class PhylogenyMethods { partial ) ) { match = true; } - if ( !match && node.getNodeData().isHasSequence() + if ( search_domains && !match && node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) { final DomainArchitecture da = node.getNodeData().getSequence().getDomainArchitecture(); I: for( int i = 0; i < da.getNumberOfDomains(); ++i ) { @@ -1427,7 +1433,8 @@ public class PhylogenyMethods { public static List searchDataLogicalAnd( final String[] queries, final Phylogeny phy, final boolean case_sensitive, - final boolean partial ) { + final boolean partial, + final boolean search_domains ) { final List nodes = new ArrayList(); if ( phy.isEmpty() || ( queries == null ) || ( queries.length < 1 ) ) { return nodes; @@ -1490,7 +1497,7 @@ public class PhylogenyMethods { partial ) ) { match = true; } - if ( !match && node.getNodeData().isHasSequence() + if ( search_domains && !match && node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) { final DomainArchitecture da = node.getNodeData().getSequence().getDomainArchitecture(); I: for( int i = 0; i < da.getNumberOfDomains(); ++i ) { @@ -1515,22 +1522,6 @@ public class PhylogenyMethods { break I; } } - // final String[] bcp_ary = node.getNodeData().getBinaryCharacters() - // .getPresentCharactersAsStringArray(); - // I: for( final String bc : bcp_ary ) { - // if ( match( bc, query, case_sensitive, partial ) ) { - // match = true; - // break I; - // } - // } - // final String[] bcg_ary = node.getNodeData().getBinaryCharacters() - // .getGainedCharactersAsStringArray(); - // I: for( final String bc : bcg_ary ) { - // if ( match( bc, query, case_sensitive, partial ) ) { - // match = true; - // break I; - // } - // } } if ( !match ) { all_matched = false; @@ -1626,13 +1617,27 @@ public class PhylogenyMethods { */ public static int taxonomyBasedDeletionOfExternalNodes( final Phylogeny reference, final Phylogeny to_be_stripped ) { final Set ref_ext_taxo = new HashSet(); - final ArrayList nodes_to_delete = new ArrayList(); for( final PhylogenyNodeIterator it = reference.iteratorExternalForward(); it.hasNext(); ) { - ref_ext_taxo.add( getSpecies( it.next() ) ); + final PhylogenyNode n = it.next(); + if ( !n.getNodeData().isHasTaxonomy() ) { + throw new IllegalArgumentException( "no taxonomic data in node: " + n ); + } + // ref_ext_taxo.add( getSpecies( n ) ); + if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) { + ref_ext_taxo.add( n.getNodeData().getTaxonomy().getScientificName() ); + } + if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) { + ref_ext_taxo.add( n.getNodeData().getTaxonomy().getTaxonomyCode() ); + } } + final ArrayList nodes_to_delete = new ArrayList(); for( final PhylogenyNodeIterator it = to_be_stripped.iteratorExternalForward(); it.hasNext(); ) { final PhylogenyNode n = it.next(); - if ( !ref_ext_taxo.contains( getSpecies( n ) ) ) { + if ( !n.getNodeData().isHasTaxonomy() ) { + nodes_to_delete.add( n ); + } + else if ( !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getScientificName() ) ) + && !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) { nodes_to_delete.add( n ); } } @@ -1699,10 +1704,6 @@ public class PhylogenyMethods { TAXONOMY_ID; } - public static enum TAXONOMY_EXTRACTION { - NO, YES, PFAM_STYLE_ONLY; - } - public static enum DESCENDANT_SORT_PRIORITY { TAXONOMY, SEQUENCE, NODE_NAME; }