X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fphylogeny%2FPhylogenyMethods.java;h=1532e41e0c0e28e2ac1cb182963a04ed710aa1be;hb=665e671efec73fcb36a9aac45f119330f290fa81;hp=09f9955834e3ca7985b154622f34113ccb5f8242;hpb=12fb7d9470cefe81e135eb79a17288ca055ec0ed;p=jalview.git diff --git a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java index 09f9955..1532e41 100644 --- a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java +++ b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java @@ -42,6 +42,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; +import org.forester.archaeopteryx.TreePanelUtil; import org.forester.io.parsers.FastaParser; import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; @@ -62,9 +63,12 @@ import org.forester.phylogeny.data.Taxonomy; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; +import org.forester.phylogeny.iterators.PreorderTreeIterator; import org.forester.util.BasicDescriptiveStatistics; import org.forester.util.DescriptiveStatistics; +import org.forester.util.FailedConditionCheckException; import org.forester.util.ForesterUtil; +import org.forester.util.TaxonomyUtil; public class PhylogenyMethods { @@ -269,6 +273,47 @@ public class PhylogenyMethods { } return max; } + + public static String[] obtainPresentRanksSorted( final Phylogeny phy ) { + final Set present_ranks = new HashSet(); + for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { + final PhylogenyNode node = iter.next(); + if ( !node.isExternal() && !node.isRoot() && ( node.getNodeData().getTaxonomy() != null ) + && !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getRank() ) ) { + final String current_rank = node.getNodeData().getTaxonomy().getRank(); + if ( TaxonomyUtil.RANK_TO_INT.containsKey( current_rank ) ) { + present_ranks.add( current_rank ); + } + } + } + final String ordered_ranks[] = new String[present_ranks.size() + 1]; + int c = 0; + for( final String rank : TaxonomyUtil.RANKS ) { + if ( present_ranks.contains( rank ) ) { + ordered_ranks[ c++ ] = rank; + } + } + ordered_ranks[ c ] = "off"; + return ordered_ranks; + } + + public static int calculateMaxDepthConsiderCollapsed( final Phylogeny phy ) { + int max = 0; + for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { + PhylogenyNode n = iter.next(); + int steps = 0; + while ( n.getParent() != null ) { + if ( !n.isCollapse() ) { + steps++; + } + n = n.getParent(); + } + if ( steps > max ) { + max = steps; + } + } + return max; + } public static double calculateMaxDistanceToRoot( final Phylogeny phy ) { double max = 0.0; @@ -412,7 +457,8 @@ public class PhylogenyMethods { return deleted; } - public static void deleteExternalNodesPositiveSelectionT( final List species_to_keep, final Phylogeny phy ) { + public static void deleteExternalNodesPositiveSelectionT( final List species_to_keep, + final Phylogeny phy ) { final Set to_delete = new HashSet(); for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) { final PhylogenyNode n = it.next(); @@ -818,7 +864,8 @@ public class PhylogenyMethods { else { if ( ( n.getNumberOfDescendants() == 2 ) && ( n.getChildNode1().getNumberOfExternalNodes() != n.getChildNode2().getNumberOfExternalNodes() ) - && ( ( n.getChildNode1().getNumberOfExternalNodes() < n.getChildNode2().getNumberOfExternalNodes() ) == order ) ) { + && ( ( n.getChildNode1().getNumberOfExternalNodes() < n.getChildNode2() + .getNumberOfExternalNodes() ) == order ) ) { final PhylogenyNode temp = n.getChildNode1(); n.setChild1( n.getChildNode2() ); n.setChild2( temp ); @@ -841,7 +888,7 @@ public class PhylogenyMethods { } } } - + public synchronized static void orderAppearanceX( final PhylogenyNode n, final boolean order_ext_alphabetically, final DESCENDANT_SORT_PRIORITY pri ) { @@ -849,17 +896,11 @@ public class PhylogenyMethods { return; } else { - _order_changed = false; - orderAppearance( n, - true, - order_ext_alphabetically, - pri ); - if (!_order_changed ) { - orderAppearance( n, - false, - order_ext_alphabetically, - pri ); - } + _order_changed = false; + orderAppearance( n, true, order_ext_alphabetically, pri ); + if ( !_order_changed ) { + orderAppearance( n, false, order_ext_alphabetically, pri ); + } } } @@ -902,7 +943,8 @@ public class PhylogenyMethods { PhylogenyNode.setNodeCount( i ); } - public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final File file ) throws IOException { + public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final File file ) + throws IOException { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny[] trees = factory.create( file, parser ); if ( ( trees == null ) || ( trees.length == 0 ) ) { @@ -960,21 +1002,21 @@ public class PhylogenyMethods { } private static enum NDF { - NodeName( "NN" ), - TaxonomyCode( "TC" ), - TaxonomyCommonName( "CN" ), - TaxonomyScientificName( "TS" ), - TaxonomyIdentifier( "TI" ), - TaxonomySynonym( "SY" ), - SequenceName( "SN" ), - GeneName( "GN" ), - SequenceSymbol( "SS" ), - SequenceAccession( "SA" ), - Domain( "DO" ), - Annotation( "AN" ), - CrossRef( "XR" ), - BinaryCharacter( "BC" ), - MolecularSequence( "MS" ); + NodeName( "NN" ), + TaxonomyCode( "TC" ), + TaxonomyCommonName( "TN" ), + TaxonomyScientificName( "TS" ), + TaxonomyIdentifier( "TI" ), + TaxonomySynonym( "SY" ), + SequenceName( "SN" ), + GeneName( "GN" ), + SequenceSymbol( "SS" ), + SequenceAccession( "SA" ), + Domain( "DO" ), + Annotation( "AN" ), + CrossRef( "XR" ), + BinaryCharacter( "BC" ), + MolecularSequence( "MS" ); private final String _text; @@ -993,12 +1035,12 @@ public class PhylogenyMethods { } public static List searchData( final String query, - final Phylogeny phy, - final boolean case_sensitive, - final boolean partial, - final boolean regex, - final boolean search_domains, - final double domains_confidence_threshold ) { + final Phylogeny phy, + final boolean case_sensitive, + final boolean partial, + final boolean regex, + final boolean search_domains, + final double domains_confidence_threshold ) { final List nodes = new ArrayList(); if ( phy.isEmpty() || ( query == null ) ) { return nodes; @@ -1021,8 +1063,7 @@ public class PhylogenyMethods { && match( node.getName(), my_query, case_sensitive, partial, regex ) ) { match = true; } - else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCode ) ) - && node.getNodeData().isHasTaxonomy() + else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCode ) ) && node.getNodeData().isHasTaxonomy() && match( node.getNodeData().getTaxonomy().getTaxonomyCode(), my_query, case_sensitive, @@ -1030,8 +1071,7 @@ public class PhylogenyMethods { regex ) ) { match = true; } - else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCommonName ) ) - && node.getNodeData().isHasTaxonomy() + else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCommonName ) ) && node.getNodeData().isHasTaxonomy() && match( node.getNodeData().getTaxonomy().getCommonName(), my_query, case_sensitive, @@ -1039,8 +1079,7 @@ public class PhylogenyMethods { regex ) ) { match = true; } - else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyScientificName ) ) - && node.getNodeData().isHasTaxonomy() + else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyScientificName ) ) && node.getNodeData().isHasTaxonomy() && match( node.getNodeData().getTaxonomy().getScientificName(), my_query, case_sensitive, @@ -1048,8 +1087,7 @@ public class PhylogenyMethods { regex ) ) { match = true; } - else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyIdentifier ) ) - && node.getNodeData().isHasTaxonomy() + else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyIdentifier ) ) && node.getNodeData().isHasTaxonomy() && ( node.getNodeData().getTaxonomy().getIdentifier() != null ) && match( node.getNodeData().getTaxonomy().getIdentifier().getValue(), my_query, @@ -1073,16 +1111,22 @@ public class PhylogenyMethods { match = true; } if ( !match && ( ( ndf == null ) || ( ndf == NDF.GeneName ) ) && node.getNodeData().isHasSequence() - && match( node.getNodeData().getSequence().getGeneName(), my_query, case_sensitive, partial, regex ) ) { + && match( node.getNodeData().getSequence().getGeneName(), + my_query, + case_sensitive, + partial, + regex ) ) { match = true; } if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceSymbol ) ) && node.getNodeData().isHasSequence() - && match( node.getNodeData().getSequence().getSymbol(), my_query, case_sensitive, partial, regex ) ) { + && match( node.getNodeData().getSequence().getSymbol(), + my_query, + case_sensitive, + partial, + regex ) ) { match = true; } - if ( !match - && ( ( ndf == null ) || ( ndf == NDF.SequenceAccession ) ) - && node.getNodeData().isHasSequence() + if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceAccession ) ) && node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getAccession() != null ) && match( node.getNodeData().getSequence().getAccession().getValue(), my_query, @@ -1150,9 +1194,7 @@ public class PhylogenyMethods { } } } - if ( !match - && ( ndf == NDF.MolecularSequence ) - && node.getNodeData().isHasSequence() + if ( !match && ( ndf == NDF.MolecularSequence ) && node.getNodeData().isHasSequence() && match( node.getNodeData().getSequence().getMolecularSequence(), my_query, case_sensitive, @@ -1168,11 +1210,11 @@ public class PhylogenyMethods { } public static List searchDataLogicalAnd( final String[] queries, - final Phylogeny phy, - final boolean case_sensitive, - final boolean partial, - final boolean search_domains, - final double domains_confidence_threshold ) { + final Phylogeny phy, + final boolean case_sensitive, + final boolean partial, + final boolean search_domains, + final double domains_confidence_threshold ) { final List nodes = new ArrayList(); if ( phy.isEmpty() || ( queries == null ) || ( queries.length < 1 ) ) { return nodes; @@ -1200,8 +1242,7 @@ public class PhylogenyMethods { && match( node.getName(), query, case_sensitive, partial, false ) ) { match = true; } - else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCode ) ) - && node.getNodeData().isHasTaxonomy() + else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCode ) ) && node.getNodeData().isHasTaxonomy() && match( node.getNodeData().getTaxonomy().getTaxonomyCode(), query, case_sensitive, @@ -1209,8 +1250,7 @@ public class PhylogenyMethods { false ) ) { match = true; } - else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCommonName ) ) - && node.getNodeData().isHasTaxonomy() + else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCommonName ) ) && node.getNodeData().isHasTaxonomy() && match( node.getNodeData().getTaxonomy().getCommonName(), query, case_sensitive, @@ -1227,8 +1267,7 @@ public class PhylogenyMethods { false ) ) { match = true; } - else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyIdentifier ) ) - && node.getNodeData().isHasTaxonomy() + else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyIdentifier ) ) && node.getNodeData().isHasTaxonomy() && ( node.getNodeData().getTaxonomy().getIdentifier() != null ) && match( node.getNodeData().getTaxonomy().getIdentifier().getValue(), query, @@ -1248,22 +1287,30 @@ public class PhylogenyMethods { } } if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceName ) ) && node.getNodeData().isHasSequence() - && match( node.getNodeData().getSequence().getName(), query, case_sensitive, partial, false ) ) { + && match( node.getNodeData().getSequence().getName(), + query, + case_sensitive, + partial, + false ) ) { match = true; } - if ( !match - && ( ( ndf == null ) || ( ndf == NDF.GeneName ) ) - && node.getNodeData().isHasSequence() - && match( node.getNodeData().getSequence().getGeneName(), query, case_sensitive, partial, false ) ) { + if ( !match && ( ( ndf == null ) || ( ndf == NDF.GeneName ) ) && node.getNodeData().isHasSequence() + && match( node.getNodeData().getSequence().getGeneName(), + query, + case_sensitive, + partial, + false ) ) { match = true; } if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceSymbol ) ) - && node.getNodeData().isHasSequence() - && match( node.getNodeData().getSequence().getSymbol(), query, case_sensitive, partial, false ) ) { + && node.getNodeData().isHasSequence() && match( node.getNodeData().getSequence().getSymbol(), + query, + case_sensitive, + partial, + false ) ) { match = true; } - if ( !match - && ( ( ndf == null ) || ( ndf == NDF.SequenceAccession ) ) + if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceAccession ) ) && node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getAccession() != null ) && match( node.getNodeData().getSequence().getAccession().getValue(), @@ -1332,9 +1379,7 @@ public class PhylogenyMethods { } } } - if ( !match - && ( ndf == NDF.MolecularSequence ) - && node.getNodeData().isHasSequence() + if ( !match && ( ndf == NDF.MolecularSequence ) && node.getNodeData().isHasSequence() && match( node.getNodeData().getSequence().getMolecularSequence(), query, case_sensitive, @@ -1487,8 +1532,8 @@ public class PhylogenyMethods { } else if ( !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getScientificName() ) ) && !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) - && !( ( n.getNodeData().getTaxonomy().getIdentifier() != null ) && ref_ext_taxo.contains( n - .getNodeData().getTaxonomy().getIdentifier().getValuePlusProvider() ) ) ) { + && !( ( n.getNodeData().getTaxonomy().getIdentifier() != null ) && ref_ext_taxo + .contains( n.getNodeData().getTaxonomy().getIdentifier().getValuePlusProvider() ) ) ) { nodes_to_delete.add( n ); } } @@ -1543,7 +1588,8 @@ public class PhylogenyMethods { return true; } - final static public void transferInternalNodeNamesToConfidence( final Phylogeny phy, final String confidence_type ) { + final static public void transferInternalNodeNamesToConfidence( final Phylogeny phy, + final String confidence_type ) { final PhylogenyNodeIterator it = phy.iteratorPostorder(); while ( it.hasNext() ) { transferInternalNodeNameToConfidence( confidence_type, it.next() ); @@ -1570,7 +1616,8 @@ public class PhylogenyMethods { final static public void transferNodeNameToField( final Phylogeny phy, final PhylogenyNodeField field, - final boolean external_only ) throws PhyloXmlDataFormatException { + final boolean external_only ) + throws PhyloXmlDataFormatException { final PhylogenyNodeIterator it = phy.iteratorPostorder(); while ( it.hasNext() ) { final PhylogenyNode n = it.next(); @@ -1690,6 +1737,20 @@ public class PhylogenyMethods { return d; } + public static double calculateAverageTreeHeight( final PhylogenyNode node ) { + final List ext = node.getAllExternalDescendants(); + double s = 0; + for( PhylogenyNode n : ext ) { + while ( n != node ) { + if ( n.getDistanceToParent() > 0 ) { + s += n.getDistanceToParent(); + } + n = n.getParent(); + } + } + return s / ext.size(); + } + /** * Deep copies the phylogeny originating from this node. */ @@ -1825,19 +1886,21 @@ public class PhylogenyMethods { } public static enum DESCENDANT_SORT_PRIORITY { - NODE_NAME, SEQUENCE, TAXONOMY; + NODE_NAME, + SEQUENCE, + TAXONOMY; } public static enum PhylogenyNodeField { - CLADE_NAME, - SEQUENCE_NAME, - SEQUENCE_SYMBOL, - TAXONOMY_CODE, - TAXONOMY_COMMON_NAME, - TAXONOMY_ID, - TAXONOMY_ID_UNIPROT_1, - TAXONOMY_ID_UNIPROT_2, - TAXONOMY_SCIENTIFIC_NAME; + CLADE_NAME, + SEQUENCE_NAME, + SEQUENCE_SYMBOL, + TAXONOMY_CODE, + TAXONOMY_COMMON_NAME, + TAXONOMY_ID, + TAXONOMY_ID_UNIPROT_1, + TAXONOMY_ID_UNIPROT_2, + TAXONOMY_SCIENTIFIC_NAME; } public static void addMolecularSeqsToTree( final Phylogeny phy, final Msa msa ) { @@ -1977,4 +2040,117 @@ public class PhylogenyMethods { return 0; } } + + public final static Map calculateDepths( final Phylogeny phy ) { + final Map depths = new HashMap(); + calculateDepthsHelper( phy.getRoot(), 0, depths ); + return depths; + } + + private final static void calculateDepthsHelper( final PhylogenyNode n, int d, final Map depths ) { + depths.put( n.getId(), d ); + ++d; + final List descs = n.getDescendants(); + for( final PhylogenyNode desc : descs ) { + calculateDepthsHelper( desc, d, depths ); + } + } + + public final static void collapseToDepth( final Phylogeny phy, final int depth ) { + if ( phy.getNumberOfExternalNodes() < 3 ) { + return; + } + collapseToDepthHelper( phy.getRoot(), 0, depth ); + } + + private final static void collapseToDepthHelper( final PhylogenyNode n, int d, final int depth ) { + if ( n.isExternal() ) { + n.setCollapse( false ); + return; + } + if ( d >= depth ) { + n.setCollapse( true ); + final PhylogenyNodeIterator it = new PreorderTreeIterator( n ); + while ( it.hasNext() ) { + it.next().setCollapse( true ); + } + } + else { + n.setCollapse( false ); + ++d; + final List descs = n.getDescendants(); + for( final PhylogenyNode desc : descs ) { + collapseToDepthHelper( desc, d, depth ); + } + } + } + + + + public final static void collapseToRank( final Phylogeny phy, final int rank ) { + if ( phy.getNumberOfExternalNodes() < 3 ) { + return; + } + if ( rank < 0 || rank >= TaxonomyUtil.RANKS.length ) { + throw new IllegalArgumentException( "Rank " + rank + " is out of range" ); + } + collapseToRankHelper( phy.getRoot(), rank ); + } + + private final static void collapseToRankHelper( final PhylogenyNode n, final int target_rank ) { + if ( n.isExternal() ) { + n.setCollapse( false ); + return; + } + if ( ( n.getNodeData().getTaxonomy() != null ) + && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getRank() ) ) { + final String current_rank = n.getNodeData().getTaxonomy().getRank(); + if ( !TaxonomyUtil.RANK_TO_INT.containsKey( current_rank ) ) { + System.out.println( "Don't know rank \"" + current_rank + "\", ignoring." ); + } + else { + if ( TaxonomyUtil.RANK_TO_INT.get( current_rank ) >= target_rank ) { + n.setCollapse( true ); + + final PhylogenyNodeIterator it = new PreorderTreeIterator( n ); + while ( it.hasNext() ) { + it.next().setCollapse( true ); + } + return; + } + } + } + n.setCollapse( false ); + final List descs = n.getDescendants(); + for( final PhylogenyNode desc : descs ) { + collapseToRankHelper( desc, target_rank ); + } + } + + public final static PhylogenyNode getFirstExternalNode( final PhylogenyNode node ) { + PhylogenyNode n = node; + while ( n.isInternal() ) { + n = n.getFirstChildNode(); + } + return n; + } + + public final static PhylogenyNode getLastExternalNode( final PhylogenyNode node ) { + PhylogenyNode n = node; + while ( n.isInternal() ) { + n = n.getLastChildNode(); + } + return n; + } + + public final static boolean isHasCollapsedNodes( final Phylogeny phy ) { + for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { + final PhylogenyNode n = iter.next(); + if ( !n.isExternal() && ( n.isCollapse() ) ) { + return true; + } + } + return false; + } + }