X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fphylogeny%2FPhylogenyMethods.java;h=6678e702716c78fcd4dfc7123dc17d253d560708;hb=7ad84913635263ddc191fcbebd8a9bd46a9ac7e3;hp=c5d676dfbcc1f90b95845aad8049b925098f5076;hpb=5c10c6e6d65f086e4a021d358314f377bb97342d;p=jalview.git diff --git a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java index c5d676d..6678e70 100644 --- a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java +++ b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java @@ -40,12 +40,14 @@ import java.util.SortedMap; import java.util.TreeMap; import org.forester.io.parsers.PhylogenyParser; +import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlUtil; import org.forester.io.parsers.util.PhylogenyParserException; import org.forester.phylogeny.data.BranchColor; import org.forester.phylogeny.data.BranchWidth; import org.forester.phylogeny.data.Confidence; import org.forester.phylogeny.data.DomainArchitecture; +import org.forester.phylogeny.data.Event; import org.forester.phylogeny.data.Identifier; import org.forester.phylogeny.data.PhylogenyDataUtil; import org.forester.phylogeny.data.Sequence; @@ -60,10 +62,9 @@ import org.forester.util.ForesterUtil; public class PhylogenyMethods { - private static PhylogenyMethods _instance = null; - private final Set _temp_hash_set = new HashSet(); - private PhylogenyNode _farthest_1 = null; - private PhylogenyNode _farthest_2 = null; + private static PhylogenyMethods _instance = null; + private PhylogenyNode _farthest_1 = null; + private PhylogenyNode _farthest_2 = null; private PhylogenyMethods() { // Hidden constructor. @@ -113,6 +114,10 @@ public class PhylogenyMethods { return farthest_d; } + final public static Event getEventAtLCA( final PhylogenyNode n1, final PhylogenyNode n2 ) { + return obtainLCA( n1, n2 ).getNodeData().getEvent(); + } + @Override public Object clone() throws CloneNotSupportedException { throw new CloneNotSupportedException(); @@ -126,6 +131,24 @@ public class PhylogenyMethods { return _farthest_2; } + final public static void deleteNonOrthologousExternalNodes( final Phylogeny phy, final PhylogenyNode n ) { + if ( n.isInternal() ) { + throw new IllegalArgumentException( "node is not external" ); + } + final ArrayList to_delete = new ArrayList(); + for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) { + final PhylogenyNode i = it.next(); + if ( !PhylogenyMethods.getEventAtLCA( n, i ).isSpeciation() ) { + to_delete.add( i ); + } + } + for( final PhylogenyNode d : to_delete ) { + phy.deleteSubtree( d, true ); + } + phy.clearHashIdToNodeMap(); + phy.externalNodesHaveChanged(); + } + /** * Returns the LCA of PhylogenyNodes node1 and node2. * @@ -134,19 +157,19 @@ public class PhylogenyMethods { * @param node2 * @return LCA of node1 and node2 */ - public PhylogenyNode obtainLCA( final PhylogenyNode node1, final PhylogenyNode node2 ) { - _temp_hash_set.clear(); + public final static PhylogenyNode obtainLCA( final PhylogenyNode node1, final PhylogenyNode node2 ) { + final HashSet ids_set = new HashSet(); PhylogenyNode n1 = node1; PhylogenyNode n2 = node2; - _temp_hash_set.add( n1.getId() ); + ids_set.add( n1.getId() ); while ( !n1.isRoot() ) { n1 = n1.getParent(); - _temp_hash_set.add( n1.getId() ); + ids_set.add( n1.getId() ); } - while ( !_temp_hash_set.contains( n2.getId() ) && !n2.isRoot() ) { + while ( !ids_set.contains( n2.getId() ) && !n2.isRoot() ) { n2 = n2.getParent(); } - if ( !_temp_hash_set.contains( n2.getId() ) ) { + if ( !ids_set.contains( n2.getId() ) ) { throw new IllegalArgumentException( "attempt to get LCA of two nodes which do not share a common root" ); } return n2; @@ -191,6 +214,20 @@ public class PhylogenyMethods { return trees; } + public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final List files ) + throws IOException { + final List tree_list = new ArrayList(); + for( final File file : files ) { + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final Phylogeny[] trees = factory.create( file, parser ); + if ( ( trees == null ) || ( trees.length == 0 ) ) { + throw new PhylogenyParserException( "Unable to parse phylogeny from file: " + file ); + } + tree_list.addAll( Arrays.asList( trees ) ); + } + return tree_list.toArray( new Phylogeny[ tree_list.size() ] ); + } + final static public void transferInternalNodeNamesToConfidence( final Phylogeny phy ) { final PhylogenyNodeIterator it = phy.iteratorPostorder(); while ( it.hasNext() ) { @@ -393,24 +430,18 @@ public class PhylogenyMethods { } final static public void transferNodeNameToField( final Phylogeny phy, - final PhylogenyMethods.PhylogenyNodeField field ) { + final PhylogenyMethods.PhylogenyNodeField field, + final boolean external_only ) throws PhyloXmlDataFormatException { final PhylogenyNodeIterator it = phy.iteratorPostorder(); while ( it.hasNext() ) { final PhylogenyNode n = it.next(); + if ( external_only && n.isInternal() ) { + continue; + } final String name = n.getName().trim(); if ( !ForesterUtil.isEmpty( name ) ) { switch ( field ) { case TAXONOMY_CODE: - //temp hack - // if ( name.length() > 5 ) { - // n.setName( "" ); - // if ( !n.getNodeData().isHasTaxonomy() ) { - // n.getNodeData().setTaxonomy( new Taxonomy() ); - // } - // n.getNodeData().getTaxonomy().setScientificName( name ); - // break; - // } - // n.setName( "" ); setTaxonomyCode( n, name ); break; @@ -474,6 +505,13 @@ public class PhylogenyMethods { .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) ); break; } + case TAXONOMY_ID: { + if ( !n.getNodeData().isHasTaxonomy() ) { + n.getNodeData().setTaxonomy( new Taxonomy() ); + } + n.getNodeData().getTaxonomy().setIdentifier( new Identifier( name ) ); + break; + } } } } @@ -582,6 +620,17 @@ public class PhylogenyMethods { return max; } + public static int countNumberOfPolytomies( final Phylogeny phy ) { + int count = 0; + for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { + final PhylogenyNode n = iter.next(); + if ( !n.isExternal() && ( n.getNumberOfDescendants() > 2 ) ) { + count++; + } + } + return count; + } + public static DescriptiveStatistics calculatNumberOfDescendantsPerNodeStatistics( final Phylogeny phy ) { final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { @@ -593,13 +642,39 @@ public class PhylogenyMethods { return stats; } - public static DescriptiveStatistics calculatConfidenceStatistics( final Phylogeny phy ) { + public static DescriptiveStatistics calculatBranchLengthStatistics( final Phylogeny phy ) { final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { final PhylogenyNode n = iter.next(); - if ( !n.isExternal() ) { + if ( !n.isRoot() && ( n.getDistanceToParent() >= 0.0 ) ) { + stats.addValue( n.getDistanceToParent() ); + } + } + return stats; + } + + public static List calculatConfidenceStatistics( final Phylogeny phy ) { + final List stats = new ArrayList(); + for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { + final PhylogenyNode n = iter.next(); + if ( !n.isExternal() && !n.isRoot() ) { if ( n.getBranchData().isHasConfidences() ) { - stats.addValue( n.getBranchData().getConfidence( 0 ).getValue() ); + for( int i = 0; i < n.getBranchData().getConfidences().size(); ++i ) { + final Confidence c = n.getBranchData().getConfidences().get( i ); + if ( ( i > ( stats.size() - 1 ) ) || ( stats.get( i ) == null ) ) { + stats.add( i, new BasicDescriptiveStatistics() ); + } + if ( !ForesterUtil.isEmpty( c.getType() ) ) { + if ( !ForesterUtil.isEmpty( stats.get( i ).getDescription() ) ) { + if ( !stats.get( i ).getDescription().equalsIgnoreCase( c.getType() ) ) { + throw new IllegalArgumentException( "support values in node [" + n.toString() + + "] appear inconsistently ordered" ); + } + } + stats.get( i ).setDescription( c.getType() ); + } + stats.get( i ).addValue( ( ( c != null ) && ( c.getValue() >= 0 ) ) ? c.getValue() : 0 ); + } } } } @@ -698,11 +773,12 @@ public class PhylogenyMethods { } public static void deleteExternalNodesNegativeSelection( final Set to_delete, final Phylogeny phy ) { - phy.hashIDs(); + phy.clearHashIdToNodeMap(); for( final Integer id : to_delete ) { phy.deleteSubtree( phy.getNode( id ), true ); } - phy.hashIDs(); + phy.clearHashIdToNodeMap(); + phy.externalNodesHaveChanged(); } public static void deleteExternalNodesNegativeSelection( final String[] node_names_to_delete, final Phylogeny p ) @@ -723,6 +799,8 @@ public class PhylogenyMethods { p.deleteSubtree( n, true ); } } + p.clearHashIdToNodeMap(); + p.externalNodesHaveChanged(); } public static void deleteExternalNodesPositiveSelection( final Set species_to_keep, final Phylogeny phy ) { @@ -739,9 +817,8 @@ public class PhylogenyMethods { throw new IllegalArgumentException( "node " + n.getId() + " has no taxonomic data" ); } } - phy.hashIDs(); + phy.clearHashIdToNodeMap(); phy.externalNodesHaveChanged(); - // deleteExternalNodesNegativeSelection( to_delete, phy ); } public static List deleteExternalNodesPositiveSelection( final String[] node_names_to_keep, @@ -935,12 +1012,12 @@ public class PhylogenyMethods { if ( !node.getNodeData().isHasTaxonomy() ) { return ""; } - if ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getTaxonomyCode() ) ) { - return node.getNodeData().getTaxonomy().getTaxonomyCode(); - } else if ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getScientificName() ) ) { return node.getNodeData().getTaxonomy().getScientificName(); } + if ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getTaxonomyCode() ) ) { + return node.getNodeData().getTaxonomy().getTaxonomyCode(); + } else { return node.getNodeData().getTaxonomy().getCommonName(); } @@ -1208,8 +1285,9 @@ public class PhylogenyMethods { double blue = 0.0; int n = 0; if ( node.isInternal() ) { - for( final PhylogenyNodeIterator iterator = node.iterateChildNodesForward(); iterator.hasNext(); ) { - final PhylogenyNode child_node = iterator.next(); + //for( final PhylogenyNodeIterator iterator = node.iterateChildNodesForward(); iterator.hasNext(); ) { + for( int i = 0; i < node.getNumberOfDescendants(); ++i ) { + final PhylogenyNode child_node = node.getChildNode( i ); final Color child_color = getBranchColorValue( child_node ); if ( child_color != null ) { ++n; @@ -1232,6 +1310,8 @@ public class PhylogenyMethods { } if ( remove_me.isExternal() ) { phylogeny.deleteSubtree( remove_me, false ); + phylogeny.clearHashIdToNodeMap(); + phylogeny.externalNodesHaveChanged(); } else { final PhylogenyNode parent = remove_me.getParent(); @@ -1243,7 +1323,7 @@ public class PhylogenyMethods { desc.getDistanceToParent() ) ); } remove_me.setParent( null ); - phylogeny.setIdHash( null ); + phylogeny.clearHashIdToNodeMap(); phylogeny.externalNodesHaveChanged(); } } @@ -1251,7 +1331,8 @@ public class PhylogenyMethods { public static List searchData( final String query, final Phylogeny phy, final boolean case_sensitive, - final boolean partial ) { + final boolean partial, + final boolean search_domains ) { final List nodes = new ArrayList(); if ( phy.isEmpty() || ( query == null ) ) { return nodes; @@ -1311,7 +1392,7 @@ public class PhylogenyMethods { partial ) ) { match = true; } - if ( !match && node.getNodeData().isHasSequence() + if ( search_domains && !match && node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) { final DomainArchitecture da = node.getNodeData().getSequence().getDomainArchitecture(); I: for( int i = 0; i < da.getNumberOfDomains(); ++i ) { @@ -1347,7 +1428,8 @@ public class PhylogenyMethods { public static List searchDataLogicalAnd( final String[] queries, final Phylogeny phy, final boolean case_sensitive, - final boolean partial ) { + final boolean partial, + final boolean search_domains ) { final List nodes = new ArrayList(); if ( phy.isEmpty() || ( queries == null ) || ( queries.length < 1 ) ) { return nodes; @@ -1410,7 +1492,7 @@ public class PhylogenyMethods { partial ) ) { match = true; } - if ( !match && node.getNodeData().isHasSequence() + if ( search_domains && !match && node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) { final DomainArchitecture da = node.getNodeData().getSequence().getDomainArchitecture(); I: for( int i = 0; i < da.getNumberOfDomains(); ++i ) { @@ -1435,22 +1517,6 @@ public class PhylogenyMethods { break I; } } - // final String[] bcp_ary = node.getNodeData().getBinaryCharacters() - // .getPresentCharactersAsStringArray(); - // I: for( final String bc : bcp_ary ) { - // if ( match( bc, query, case_sensitive, partial ) ) { - // match = true; - // break I; - // } - // } - // final String[] bcg_ary = node.getNodeData().getBinaryCharacters() - // .getGainedCharactersAsStringArray(); - // I: for( final String bc : bcg_ary ) { - // if ( match( bc, query, case_sensitive, partial ) ) { - // match = true; - // break I; - // } - // } } if ( !match ) { all_matched = false; @@ -1524,8 +1590,10 @@ public class PhylogenyMethods { * * @param node * @param taxonomy_code + * @throws PhyloXmlDataFormatException */ - public static void setTaxonomyCode( final PhylogenyNode node, final String taxonomy_code ) { + public static void setTaxonomyCode( final PhylogenyNode node, final String taxonomy_code ) + throws PhyloXmlDataFormatException { if ( !node.getNodeData().isHasTaxonomy() ) { node.getNodeData().setTaxonomy( new Taxonomy() ); } @@ -1557,6 +1625,8 @@ public class PhylogenyMethods { for( final PhylogenyNode phylogenyNode : nodes_to_delete ) { to_be_stripped.deleteSubtree( phylogenyNode, true ); } + to_be_stripped.clearHashIdToNodeMap(); + to_be_stripped.externalNodesHaveChanged(); return nodes_to_delete.size(); } @@ -1586,16 +1656,16 @@ public class PhylogenyMethods { n.setChild2( temp ); } else if ( order_ext_alphabetically ) { - // boolean all_ext = true; - // for( PhylogenyNode i : n.getDescendants() ) { - // if ( !i.isExternal() ) { - // all_ext = false; - // break; - // } - // } - // if ( all_ext ) { - PhylogenyMethods.sortNodeDescendents( n, pri ); - // } + boolean all_ext = true; + for( final PhylogenyNode i : n.getDescendants() ) { + if ( !i.isExternal() ) { + all_ext = false; + break; + } + } + if ( all_ext ) { + PhylogenyMethods.sortNodeDescendents( n, pri ); + } } for( int i = 0; i < n.getNumberOfDescendants(); ++i ) { orderAppearance( n.getChildNode( i ), order, order_ext_alphabetically, pri ); @@ -1611,7 +1681,8 @@ public class PhylogenyMethods { SEQUENCE_SYMBOL, SEQUENCE_NAME, TAXONOMY_ID_UNIPROT_1, - TAXONOMY_ID_UNIPROT_2; + TAXONOMY_ID_UNIPROT_2, + TAXONOMY_ID; } public static enum TAXONOMY_EXTRACTION {