X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fphylogeny%2FPhylogenyMethods.java;h=42fde76f3feeda24ca57c8bf39a7e09f47906d5b;hb=26d8d47ed32a0eb9bd43c409d0e26ff13570b246;hp=608653447857c987f966e481f46d9deda44d1837;hpb=1d144f35479f96cb36c5247d9711e22ed234651a;p=jalview.git diff --git a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java index 6086534..42fde76 100644 --- a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java +++ b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java @@ -38,11 +38,16 @@ import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; +import org.forester.io.parsers.FastaParser; import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlUtil; import org.forester.io.parsers.util.PhylogenyParserException; +import org.forester.msa.Msa; import org.forester.phylogeny.data.Accession; import org.forester.phylogeny.data.Annotation; import org.forester.phylogeny.data.BranchColor; @@ -57,12 +62,16 @@ import org.forester.phylogeny.data.Taxonomy; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; +import org.forester.phylogeny.iterators.PreorderTreeIterator; import org.forester.util.BasicDescriptiveStatistics; import org.forester.util.DescriptiveStatistics; import org.forester.util.ForesterUtil; +import org.forester.util.TaxonomyUtil; public class PhylogenyMethods { + private static boolean _order_changed; + private PhylogenyMethods() { // Hidden constructor. } @@ -72,7 +81,37 @@ public class PhylogenyMethods { throw new CloneNotSupportedException(); } - public static DescriptiveStatistics calculatBranchLengthStatistics( final Phylogeny phy ) { + public static boolean extractFastaInformation( final Phylogeny phy ) { + boolean could_extract = false; + for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { + final PhylogenyNode node = iter.next(); + if ( !ForesterUtil.isEmpty( node.getName() ) ) { + final Matcher name_m = FastaParser.FASTA_DESC_LINE.matcher( node.getName() ); + if ( name_m.lookingAt() ) { + could_extract = true; + final String acc_source = name_m.group( 1 ); + final String acc = name_m.group( 2 ); + final String seq_name = name_m.group( 3 ); + final String tax_sn = name_m.group( 4 ); + if ( !ForesterUtil.isEmpty( acc_source ) && !ForesterUtil.isEmpty( acc ) ) { + ForesterUtil.ensurePresenceOfSequence( node ); + node.getNodeData().getSequence( 0 ).setAccession( new Accession( acc, acc_source ) ); + } + if ( !ForesterUtil.isEmpty( seq_name ) ) { + ForesterUtil.ensurePresenceOfSequence( node ); + node.getNodeData().getSequence( 0 ).setName( seq_name ); + } + if ( !ForesterUtil.isEmpty( tax_sn ) ) { + ForesterUtil.ensurePresenceOfTaxonomy( node ); + node.getNodeData().getTaxonomy( 0 ).setScientificName( tax_sn ); + } + } + } + } + return could_extract; + } + + public static DescriptiveStatistics calculateBranchLengthStatistics( final Phylogeny phy ) { final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { final PhylogenyNode n = iter.next(); @@ -83,7 +122,7 @@ public class PhylogenyMethods { return stats; } - public static List calculatConfidenceStatistics( final Phylogeny phy ) { + public static List calculateConfidenceStatistics( final Phylogeny phy ) { final List stats = new ArrayList(); for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { final PhylogenyNode n = iter.next(); @@ -112,9 +151,33 @@ public class PhylogenyMethods { } /** - * Calculates the distance between PhylogenyNodes node1 and node2. - * + * For external nodes the level is 0. * + * @param node + * @return + */ + public static int calculateLevel( final PhylogenyNode node ) { + if ( node.isExternal() ) { + return 0; + } + int level = 0; + for( PhylogenyNode ext : node.getAllExternalDescendants() ) { + int counter = 0; + while ( ext != node ) { + ext = ext.getParent(); + ++counter; + } + if ( counter > level ) { + level = counter; + } + } + return level; + } + + /** + * Calculates the distance between PhylogenyNodes node1 and node2. + * + * * @param node1 * @param node2 * @return distance between node1 and node2 @@ -128,8 +191,8 @@ public class PhylogenyMethods { /** * Returns the LCA of PhylogenyNodes node1 and node2. - * - * + * + * * @param node1 * @param node2 * @return LCA of node1 and node2 @@ -174,8 +237,8 @@ public class PhylogenyMethods { /** * Returns the LCA of PhylogenyNodes node1 and node2. * Precondition: ids are in pre-order (or level-order). - * - * + * + * * @param node1 * @param node2 * @return LCA of node1 and node2 @@ -198,34 +261,52 @@ public class PhylogenyMethods { return node1; } - public static short calculateMaxBranchesToLeaf( final PhylogenyNode node ) { - if ( node.isExternal() ) { - return 0; + public static int calculateMaxDepth( final Phylogeny phy ) { + int max = 0; + for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { + final PhylogenyNode node = iter.next(); + final int steps = node.calculateDepth(); + if ( steps > max ) { + max = steps; + } } - short max = 0; - for( PhylogenyNode d : node.getAllExternalDescendants() ) { - short steps = 0; - while ( d != node ) { - if ( d.isCollapse() ) { - steps = 0; - } - else { - steps++; + return max; + } + + public static String[] obtainPresentRanksSorted( final Phylogeny phy ) { + final Set present_ranks = new HashSet(); + for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { + final PhylogenyNode node = iter.next(); + if ( !node.isExternal() && !node.isRoot() && ( node.getNodeData().getTaxonomy() != null ) + && !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getRank() ) ) { + final String current_rank = node.getNodeData().getTaxonomy().getRank(); + if ( TaxonomyUtil.RANK_TO_INT.containsKey( current_rank ) ) { + present_ranks.add( current_rank ); } - d = d.getParent(); } - if ( max < steps ) { - max = steps; + } + final String ordered_ranks[] = new String[ present_ranks.size() + 1 ]; + int c = 0; + for( final String rank : TaxonomyUtil.RANKS ) { + if ( present_ranks.contains( rank ) ) { + ordered_ranks[ c++ ] = rank; } } - return max; + ordered_ranks[ c ] = "off"; + return ordered_ranks; } - public static int calculateMaxDepth( final Phylogeny phy ) { + public static int calculateMaxDepthConsiderCollapsed( final Phylogeny phy ) { int max = 0; for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { - final PhylogenyNode node = iter.next(); - final int steps = node.calculateDepth(); + PhylogenyNode n = iter.next(); + int steps = 0; + while ( n.getParent() != null ) { + if ( !n.isCollapse() ) { + steps++; + } + n = n.getParent(); + } if ( steps > max ) { max = steps; } @@ -245,6 +326,20 @@ public class PhylogenyMethods { return max; } + public static PhylogenyNode calculateNodeWithMaxDistanceToRoot( final Phylogeny phy ) { + double max = 0.0; + PhylogenyNode max_node = phy.getFirstExternalNode(); + for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { + final PhylogenyNode node = iter.next(); + final double d = node.calculateDistanceToRoot(); + if ( d > max ) { + max = d; + max_node = node; + } + } + return max_node; + } + public static int calculateNumberOfExternalNodesWithoutTaxonomy( final PhylogenyNode node ) { final List descs = node.getAllExternalDescendants(); int x = 0; @@ -256,7 +351,7 @@ public class PhylogenyMethods { return x; } - public static DescriptiveStatistics calculatNumberOfDescendantsPerNodeStatistics( final Phylogeny phy ) { + public static DescriptiveStatistics calculateNumberOfDescendantsPerNodeStatistics( final Phylogeny phy ) { final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { final PhylogenyNode n = iter.next(); @@ -361,7 +456,8 @@ public class PhylogenyMethods { return deleted; } - public static void deleteExternalNodesPositiveSelectionT( final List species_to_keep, final Phylogeny phy ) { + public static void deleteExternalNodesPositiveSelectionT( final List species_to_keep, + final Phylogeny phy ) { final Set to_delete = new HashSet(); for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) { final PhylogenyNode n = it.next(); @@ -450,10 +546,34 @@ public class PhylogenyMethods { return descs; } + public static List getAllDescendantsOfGivenLevel( final PhylogenyNode node, final int level ) { + final List descs = new ArrayList(); + final Set encountered = new HashSet(); + if ( !node.isExternal() ) { + final List exts = node.getAllExternalDescendants(); + for( PhylogenyNode current : exts ) { + if ( calculateLevel( current ) == level ) { + descs.add( current ); + } + while ( current != node ) { + current = current.getParent(); + if ( encountered.contains( current.getId() ) ) { + continue; + } + if ( calculateLevel( current ) == level ) { + descs.add( current ); + } + encountered.add( current.getId() ); + } + } + } + return descs; + } + /** - * + * * Convenience method - * + * * @param node * @return */ @@ -504,9 +624,9 @@ public class PhylogenyMethods { } /** - * Returns taxonomy t if all external descendants have + * Returns taxonomy t if all external descendants have * the same taxonomy t, null otherwise. - * + * */ public static Taxonomy getExternalDescendantsTaxonomy( final PhylogenyNode node ) { final List descs = node.getAllExternalDescendants(); @@ -634,7 +754,7 @@ public class PhylogenyMethods { /* * This is case insensitive. - * + * */ public synchronized static boolean isTaxonomyHasIdentifierOfGivenProvider( final Taxonomy tax, final String[] providers ) { @@ -728,7 +848,7 @@ public class PhylogenyMethods { * all external nodes of node. * If at least one of the external nodes has no taxonomy, * null is returned. - * + * */ public static Map obtainDistinctTaxonomyCounts( final PhylogenyNode node ) { final List descs = node.getAllExternalDescendants(); @@ -752,10 +872,10 @@ public class PhylogenyMethods { * Arranges the order of childern for each node of this Phylogeny in such a * way that either the branch with more children is on top (right) or on * bottom (left), dependent on the value of boolean order. - * + * * @param order * decides in which direction to order - * @param pri + * @param pri */ public static void orderAppearance( final PhylogenyNode n, final boolean order, @@ -765,13 +885,14 @@ public class PhylogenyMethods { return; } else { - PhylogenyNode temp = null; if ( ( n.getNumberOfDescendants() == 2 ) && ( n.getChildNode1().getNumberOfExternalNodes() != n.getChildNode2().getNumberOfExternalNodes() ) - && ( ( n.getChildNode1().getNumberOfExternalNodes() < n.getChildNode2().getNumberOfExternalNodes() ) == order ) ) { - temp = n.getChildNode1(); + && ( ( n.getChildNode1().getNumberOfExternalNodes() < n.getChildNode2() + .getNumberOfExternalNodes() ) == order ) ) { + final PhylogenyNode temp = n.getChildNode1(); n.setChild1( n.getChildNode2() ); n.setChild2( temp ); + _order_changed = true; } else if ( order_ext_alphabetically ) { boolean all_ext = true; @@ -791,6 +912,21 @@ public class PhylogenyMethods { } } + public synchronized static void orderAppearanceX( final PhylogenyNode n, + final boolean order_ext_alphabetically, + final DESCENDANT_SORT_PRIORITY pri ) { + if ( n.isExternal() ) { + return; + } + else { + _order_changed = false; + orderAppearance( n, true, order_ext_alphabetically, pri ); + if ( !_order_changed ) { + orderAppearance( n, false, order_ext_alphabetically, pri ); + } + } + } + public static void postorderBranchColorAveragingExternalNodeBased( final Phylogeny p ) { for( final PhylogenyNodeIterator iter = p.iteratorPostorder(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); @@ -830,7 +966,8 @@ public class PhylogenyMethods { PhylogenyNode.setNodeCount( i ); } - public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final File file ) throws IOException { + public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final File file ) + throws IOException { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny[] trees = factory.create( file, parser ); if ( ( trees == null ) || ( trees.length == 0 ) ) { @@ -887,275 +1024,399 @@ public class PhylogenyMethods { } } - public static List searchData( final String query, - final Phylogeny phy, - final boolean case_sensitive, - final boolean partial, - final boolean search_domains ) { - final List nodes = new ArrayList(); + private static enum NDF { + NodeName( "NN" ), + TaxonomyCode( "TC" ), + TaxonomyCommonName( "TN" ), + TaxonomyScientificName( "TS" ), + TaxonomyIdentifier( "TI" ), + TaxonomySynonym( "SY" ), + SequenceName( "SN" ), + GeneName( "GN" ), + SequenceSymbol( "SS" ), + SequenceAccession( "SA" ), + Domain( "DO" ), + Annotation( "AN" ), + CrossRef( "XR" ), + BinaryCharacter( "BC" ), + MolecularSequence( "MS" ); + + private final String _text; + + NDF( final String text ) { + _text = text; + } + + public static NDF fromString( final String text ) { + for( final NDF n : NDF.values() ) { + if ( text.startsWith( n._text ) ) { + return n; + } + } + return null; + } + } + + public static List searchData( final String query, + final Phylogeny phy, + final boolean case_sensitive, + final boolean partial, + final boolean regex, + final boolean search_domains, + final double domains_confidence_threshold ) { + final List nodes = new ArrayList(); if ( phy.isEmpty() || ( query == null ) ) { return nodes; } if ( ForesterUtil.isEmpty( query ) ) { return nodes; } + String my_query = query; + NDF ndf = null; + if ( ( my_query.length() > 2 ) && ( my_query.indexOf( ":" ) == 2 ) ) { + ndf = NDF.fromString( my_query ); + if ( ndf != null ) { + my_query = my_query.substring( 3 ); + } + } for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); boolean match = false; - if ( match( node.getName(), query, case_sensitive, partial ) ) { + if ( ( ( ndf == null ) || ( ndf == NDF.NodeName ) ) + && match( node.getName(), my_query, case_sensitive, partial, regex ) ) { match = true; } - else if ( node.getNodeData().isHasTaxonomy() - && match( node.getNodeData().getTaxonomy().getTaxonomyCode(), query, case_sensitive, partial ) ) { + else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCode ) ) && node.getNodeData().isHasTaxonomy() + && match( node.getNodeData().getTaxonomy().getTaxonomyCode(), + my_query, + case_sensitive, + partial, + regex ) ) { match = true; } - else if ( node.getNodeData().isHasTaxonomy() - && match( node.getNodeData().getTaxonomy().getCommonName(), query, case_sensitive, partial ) ) { + else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCommonName ) ) && node.getNodeData().isHasTaxonomy() + && match( node.getNodeData().getTaxonomy().getCommonName(), + my_query, + case_sensitive, + partial, + regex ) ) { match = true; } - else if ( node.getNodeData().isHasTaxonomy() - && match( node.getNodeData().getTaxonomy().getScientificName(), query, case_sensitive, partial ) ) { + else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyScientificName ) ) && node.getNodeData().isHasTaxonomy() + && match( node.getNodeData().getTaxonomy().getScientificName(), + my_query, + case_sensitive, + partial, + regex ) ) { match = true; } - else if ( node.getNodeData().isHasTaxonomy() + else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyIdentifier ) ) && node.getNodeData().isHasTaxonomy() && ( node.getNodeData().getTaxonomy().getIdentifier() != null ) && match( node.getNodeData().getTaxonomy().getIdentifier().getValue(), - query, + my_query, case_sensitive, - partial ) ) { + partial, + regex ) ) { match = true; } - else if ( node.getNodeData().isHasTaxonomy() && !node.getNodeData().getTaxonomy().getSynonyms().isEmpty() ) { + else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomySynonym ) ) && node.getNodeData().isHasTaxonomy() + && !node.getNodeData().getTaxonomy().getSynonyms().isEmpty() ) { final List syns = node.getNodeData().getTaxonomy().getSynonyms(); I: for( final String syn : syns ) { - if ( match( syn, query, case_sensitive, partial ) ) { + if ( match( syn, my_query, case_sensitive, partial, regex ) ) { match = true; break I; } } } - if ( !match && node.getNodeData().isHasSequence() - && match( node.getNodeData().getSequence().getName(), query, case_sensitive, partial ) ) { + if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceName ) ) && node.getNodeData().isHasSequence() + && match( node.getNodeData().getSequence().getName(), my_query, case_sensitive, partial, regex ) ) { match = true; } - if ( !match && node.getNodeData().isHasSequence() - && match( node.getNodeData().getSequence().getGeneName(), query, case_sensitive, partial ) ) { + if ( !match && ( ( ndf == null ) || ( ndf == NDF.GeneName ) ) && node.getNodeData().isHasSequence() + && match( node.getNodeData().getSequence().getGeneName(), + my_query, + case_sensitive, + partial, + regex ) ) { match = true; } - if ( !match && node.getNodeData().isHasSequence() - && match( node.getNodeData().getSequence().getSymbol(), query, case_sensitive, partial ) ) { + if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceSymbol ) ) && node.getNodeData().isHasSequence() + && match( node.getNodeData().getSequence().getSymbol(), + my_query, + case_sensitive, + partial, + regex ) ) { match = true; } - if ( !match - && node.getNodeData().isHasSequence() + if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceAccession ) ) && node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getAccession() != null ) && match( node.getNodeData().getSequence().getAccession().getValue(), - query, + my_query, case_sensitive, - partial ) ) { + partial, + regex ) ) { match = true; } - if ( search_domains && !match && node.getNodeData().isHasSequence() + if ( !match && ( ( ( ndf == null ) && search_domains ) || ( ndf == NDF.Domain ) ) + && node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) { final DomainArchitecture da = node.getNodeData().getSequence().getDomainArchitecture(); I: for( int i = 0; i < da.getNumberOfDomains(); ++i ) { - if ( match( da.getDomain( i ).getName(), query, case_sensitive, partial ) ) { + if ( ( da.getDomain( i ).getConfidence() <= domains_confidence_threshold ) + && ( match( da.getDomain( i ).getName(), my_query, case_sensitive, partial, regex ) ) ) { match = true; break I; } } } - // - if ( !match && node.getNodeData().isHasSequence() + if ( !match && ( ( ndf == null ) || ( ndf == NDF.Annotation ) ) && node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getAnnotations() != null ) ) { for( final Annotation ann : node.getNodeData().getSequence().getAnnotations() ) { - if ( match( ann.getDesc(), query, case_sensitive, partial ) ) { + if ( match( ann.getDesc(), my_query, case_sensitive, partial, regex ) ) { match = true; break; } - if ( match( ann.getRef(), query, case_sensitive, partial ) ) { + if ( match( ann.getRef(), my_query, case_sensitive, partial, regex ) ) { match = true; break; } } } - if ( !match && node.getNodeData().isHasSequence() + if ( !match && ( ( ndf == null ) || ( ndf == NDF.CrossRef ) ) && node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getCrossReferences() != null ) ) { for( final Accession x : node.getNodeData().getSequence().getCrossReferences() ) { - if ( match( x.getComment(), query, case_sensitive, partial ) ) { + if ( match( x.getComment(), my_query, case_sensitive, partial, regex ) ) { match = true; break; } - if ( match( x.getSource(), query, case_sensitive, partial ) ) { + if ( match( x.getSource(), my_query, case_sensitive, partial, regex ) ) { match = true; break; } - if ( match( x.getValue(), query, case_sensitive, partial ) ) { + if ( match( x.getValue(), my_query, case_sensitive, partial, regex ) ) { match = true; break; } } } - // - if ( !match && ( node.getNodeData().getBinaryCharacters() != null ) ) { + if ( !match && ( ( ndf == null ) || ( ndf == NDF.BinaryCharacter ) ) + && ( node.getNodeData().getBinaryCharacters() != null ) ) { Iterator it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator(); I: while ( it.hasNext() ) { - if ( match( it.next(), query, case_sensitive, partial ) ) { + if ( match( it.next(), my_query, case_sensitive, partial, regex ) ) { match = true; break I; } } it = node.getNodeData().getBinaryCharacters().getGainedCharacters().iterator(); I: while ( it.hasNext() ) { - if ( match( it.next(), query, case_sensitive, partial ) ) { + if ( match( it.next(), my_query, case_sensitive, partial, regex ) ) { match = true; break I; } } } + if ( !match && ( ndf == NDF.MolecularSequence ) && node.getNodeData().isHasSequence() + && match( node.getNodeData().getSequence().getMolecularSequence(), + my_query, + case_sensitive, + true, + regex ) ) { + match = true; + } if ( match ) { - nodes.add( node ); + nodes.add( node.getId() ); } } return nodes; } - public static List searchDataLogicalAnd( final String[] queries, - final Phylogeny phy, - final boolean case_sensitive, - final boolean partial, - final boolean search_domains ) { - final List nodes = new ArrayList(); + public static List searchDataLogicalAnd( final String[] queries, + final Phylogeny phy, + final boolean case_sensitive, + final boolean partial, + final boolean search_domains, + final double domains_confidence_threshold ) { + final List nodes = new ArrayList(); if ( phy.isEmpty() || ( queries == null ) || ( queries.length < 1 ) ) { return nodes; } for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); boolean all_matched = true; - for( final String query : queries ) { + for( String query : queries ) { + if ( query == null ) { + continue; + } + query = query.trim(); + NDF ndf = null; + if ( ( query.length() > 2 ) && ( query.indexOf( ":" ) == 2 ) ) { + ndf = NDF.fromString( query ); + if ( ndf != null ) { + query = query.substring( 3 ); + } + } boolean match = false; if ( ForesterUtil.isEmpty( query ) ) { continue; } - if ( match( node.getName(), query, case_sensitive, partial ) ) { + if ( ( ( ndf == null ) || ( ndf == NDF.NodeName ) ) + && match( node.getName(), query, case_sensitive, partial, false ) ) { match = true; } - else if ( node.getNodeData().isHasTaxonomy() - && match( node.getNodeData().getTaxonomy().getTaxonomyCode(), query, case_sensitive, partial ) ) { + else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCode ) ) && node.getNodeData().isHasTaxonomy() + && match( node.getNodeData().getTaxonomy().getTaxonomyCode(), + query, + case_sensitive, + partial, + false ) ) { match = true; } - else if ( node.getNodeData().isHasTaxonomy() - && match( node.getNodeData().getTaxonomy().getCommonName(), query, case_sensitive, partial ) ) { + else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCommonName ) ) && node.getNodeData().isHasTaxonomy() + && match( node.getNodeData().getTaxonomy().getCommonName(), + query, + case_sensitive, + partial, + false ) ) { match = true; } - else if ( node.getNodeData().isHasTaxonomy() - && match( node.getNodeData().getTaxonomy().getScientificName(), query, case_sensitive, partial ) ) { + else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyScientificName ) ) + && node.getNodeData().isHasTaxonomy() + && match( node.getNodeData().getTaxonomy().getScientificName(), + query, + case_sensitive, + partial, + false ) ) { match = true; } - else if ( node.getNodeData().isHasTaxonomy() + else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyIdentifier ) ) && node.getNodeData().isHasTaxonomy() && ( node.getNodeData().getTaxonomy().getIdentifier() != null ) && match( node.getNodeData().getTaxonomy().getIdentifier().getValue(), query, case_sensitive, - partial ) ) { + partial, + false ) ) { match = true; } - else if ( node.getNodeData().isHasTaxonomy() + else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomySynonym ) ) && node.getNodeData().isHasTaxonomy() && !node.getNodeData().getTaxonomy().getSynonyms().isEmpty() ) { final List syns = node.getNodeData().getTaxonomy().getSynonyms(); I: for( final String syn : syns ) { - if ( match( syn, query, case_sensitive, partial ) ) { + if ( match( syn, query, case_sensitive, partial, false ) ) { match = true; break I; } } } - if ( !match && node.getNodeData().isHasSequence() - && match( node.getNodeData().getSequence().getName(), query, case_sensitive, partial ) ) { + if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceName ) ) && node.getNodeData().isHasSequence() + && match( node.getNodeData().getSequence().getName(), + query, + case_sensitive, + partial, + false ) ) { match = true; } - if ( !match && node.getNodeData().isHasSequence() - && match( node.getNodeData().getSequence().getGeneName(), query, case_sensitive, partial ) ) { + if ( !match && ( ( ndf == null ) || ( ndf == NDF.GeneName ) ) && node.getNodeData().isHasSequence() + && match( node.getNodeData().getSequence().getGeneName(), + query, + case_sensitive, + partial, + false ) ) { match = true; } - if ( !match && node.getNodeData().isHasSequence() - && match( node.getNodeData().getSequence().getSymbol(), query, case_sensitive, partial ) ) { + if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceSymbol ) ) + && node.getNodeData().isHasSequence() && match( node.getNodeData().getSequence().getSymbol(), + query, + case_sensitive, + partial, + false ) ) { match = true; } - if ( !match + if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceAccession ) ) && node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getAccession() != null ) && match( node.getNodeData().getSequence().getAccession().getValue(), query, case_sensitive, - partial ) ) { + partial, + false ) ) { match = true; } - if ( search_domains && !match && node.getNodeData().isHasSequence() + if ( !match && ( ( ( ndf == null ) && search_domains ) || ( ndf == NDF.Domain ) ) + && node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) { final DomainArchitecture da = node.getNodeData().getSequence().getDomainArchitecture(); I: for( int i = 0; i < da.getNumberOfDomains(); ++i ) { - if ( match( da.getDomain( i ).getName(), query, case_sensitive, partial ) ) { + if ( ( da.getDomain( i ).getConfidence() <= domains_confidence_threshold ) + && match( da.getDomain( i ).getName(), query, case_sensitive, partial, false ) ) { match = true; break I; } } } - // - if ( !match && node.getNodeData().isHasSequence() + if ( !match && ( ( ndf == null ) || ( ndf == NDF.Annotation ) ) && node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getAnnotations() != null ) ) { for( final Annotation ann : node.getNodeData().getSequence().getAnnotations() ) { - if ( match( ann.getDesc(), query, case_sensitive, partial ) ) { + if ( match( ann.getDesc(), query, case_sensitive, partial, false ) ) { match = true; break; } - if ( match( ann.getRef(), query, case_sensitive, partial ) ) { + if ( match( ann.getRef(), query, case_sensitive, partial, false ) ) { match = true; break; } } } - if ( !match && node.getNodeData().isHasSequence() + if ( !match && ( ( ndf == null ) || ( ndf == NDF.CrossRef ) ) && node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getCrossReferences() != null ) ) { for( final Accession x : node.getNodeData().getSequence().getCrossReferences() ) { - if ( match( x.getComment(), query, case_sensitive, partial ) ) { + if ( match( x.getComment(), query, case_sensitive, partial, false ) ) { match = true; break; } - if ( match( x.getSource(), query, case_sensitive, partial ) ) { + if ( match( x.getSource(), query, case_sensitive, partial, false ) ) { match = true; break; } - if ( match( x.getValue(), query, case_sensitive, partial ) ) { + if ( match( x.getValue(), query, case_sensitive, partial, false ) ) { match = true; break; } } } - // - if ( !match && ( node.getNodeData().getBinaryCharacters() != null ) ) { + if ( !match && ( ( ndf == null ) || ( ndf == NDF.BinaryCharacter ) ) + && ( node.getNodeData().getBinaryCharacters() != null ) ) { Iterator it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator(); I: while ( it.hasNext() ) { - if ( match( it.next(), query, case_sensitive, partial ) ) { + if ( match( it.next(), query, case_sensitive, partial, false ) ) { match = true; break I; } } it = node.getNodeData().getBinaryCharacters().getGainedCharacters().iterator(); I: while ( it.hasNext() ) { - if ( match( it.next(), query, case_sensitive, partial ) ) { + if ( match( it.next(), query, case_sensitive, partial, false ) ) { match = true; break I; } } } + if ( !match && ( ndf == NDF.MolecularSequence ) && node.getNodeData().isHasSequence() + && match( node.getNodeData().getSequence().getMolecularSequence(), + query, + case_sensitive, + true, + false ) ) { + match = true; + } if ( !match ) { all_matched = false; break; } } if ( all_matched ) { - nodes.add( node ); + nodes.add( node.getId() ); } } return nodes; @@ -1169,7 +1430,7 @@ public class PhylogenyMethods { /** * Convenience method. - * Sets value for the first confidence value (created if not present, values overwritten otherwise). + * Sets value for the first confidence value (created if not present, values overwritten otherwise). */ public static void setBootstrapConfidence( final PhylogenyNode node, final double bootstrap_confidence_value ) { setConfidence( node, bootstrap_confidence_value, "bootstrap" ); @@ -1191,7 +1452,7 @@ public class PhylogenyMethods { /** * Convenience method. - * Sets value for the first confidence value (created if not present, values overwritten otherwise). + * Sets value for the first confidence value (created if not present, values overwritten otherwise). */ public static void setConfidence( final PhylogenyNode node, final double confidence_value ) { setConfidence( node, confidence_value, "" ); @@ -1199,7 +1460,7 @@ public class PhylogenyMethods { /** * Convenience method. - * Sets value for the first confidence value (created if not present, values overwritten otherwise). + * Sets value for the first confidence value (created if not present, values overwritten otherwise). */ public static void setConfidence( final PhylogenyNode node, final double confidence_value, final String type ) { Confidence c = null; @@ -1223,11 +1484,11 @@ public class PhylogenyMethods { /** * Convenience method to set the taxonomy code of a phylogeny node. - * - * + * + * * @param node * @param taxonomy_code - * @throws PhyloXmlDataFormatException + * @throws PhyloXmlDataFormatException */ public static void setTaxonomyCode( final PhylogenyNode node, final String taxonomy_code ) throws PhyloXmlDataFormatException { @@ -1238,159 +1499,6 @@ public class PhylogenyMethods { } final static public void sortNodeDescendents( final PhylogenyNode node, final DESCENDANT_SORT_PRIORITY pri ) { - class PhylogenyNodeSortTaxonomyPriority implements Comparator { - - @Override - public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) { - if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) { - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) { - return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase() - .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() ); - } - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) { - return n1.getNodeData().getTaxonomy().getTaxonomyCode() - .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() ); - } - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getCommonName() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getCommonName() ) ) ) { - return n1.getNodeData().getTaxonomy().getCommonName().toLowerCase() - .compareTo( n2.getNodeData().getTaxonomy().getCommonName().toLowerCase() ); - } - } - if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) { - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) { - return n1.getNodeData().getSequence().getName().toLowerCase() - .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() ); - } - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) { - return n1.getNodeData().getSequence().getSymbol() - .compareTo( n2.getNodeData().getSequence().getSymbol() ); - } - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) { - return n1.getNodeData().getSequence().getGeneName() - .compareTo( n2.getNodeData().getSequence().getGeneName() ); - } - if ( ( n1.getNodeData().getSequence().getAccession() != null ) - && ( n2.getNodeData().getSequence().getAccession() != null ) - && !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getAccession().getValue() ) - && !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getAccession().getValue() ) ) { - return n1.getNodeData().getSequence().getAccession().getValue() - .compareTo( n2.getNodeData().getSequence().getAccession().getValue() ); - } - } - if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) { - return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() ); - } - return 0; - } - } - class PhylogenyNodeSortSequencePriority implements Comparator { - - @Override - public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) { - if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) { - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) { - return n1.getNodeData().getSequence().getName().toLowerCase() - .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() ); - } - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) { - return n1.getNodeData().getSequence().getSymbol() - .compareTo( n2.getNodeData().getSequence().getSymbol() ); - } - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) { - return n1.getNodeData().getSequence().getGeneName() - .compareTo( n2.getNodeData().getSequence().getGeneName() ); - } - if ( ( n1.getNodeData().getSequence().getAccession() != null ) - && ( n2.getNodeData().getSequence().getAccession() != null ) - && !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getAccession().getValue() ) - && !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getAccession().getValue() ) ) { - return n1.getNodeData().getSequence().getAccession().getValue() - .compareTo( n2.getNodeData().getSequence().getAccession().getValue() ); - } - } - if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) { - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) { - return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase() - .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() ); - } - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) { - return n1.getNodeData().getTaxonomy().getTaxonomyCode() - .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() ); - } - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getCommonName() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getCommonName() ) ) ) { - return n1.getNodeData().getTaxonomy().getCommonName().toLowerCase() - .compareTo( n2.getNodeData().getTaxonomy().getCommonName().toLowerCase() ); - } - } - if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) { - return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() ); - } - return 0; - } - } - class PhylogenyNodeSortNodeNamePriority implements Comparator { - - @Override - public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) { - if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) { - return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() ); - } - if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) { - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) { - return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase() - .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() ); - } - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) { - return n1.getNodeData().getTaxonomy().getTaxonomyCode() - .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() ); - } - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getCommonName() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getCommonName() ) ) ) { - return n1.getNodeData().getTaxonomy().getCommonName().toLowerCase() - .compareTo( n2.getNodeData().getTaxonomy().getCommonName().toLowerCase() ); - } - } - if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) { - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) { - return n1.getNodeData().getSequence().getName().toLowerCase() - .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() ); - } - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) { - return n1.getNodeData().getSequence().getSymbol() - .compareTo( n2.getNodeData().getSequence().getSymbol() ); - } - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) { - return n1.getNodeData().getSequence().getGeneName() - .compareTo( n2.getNodeData().getSequence().getGeneName() ); - } - if ( ( n1.getNodeData().getSequence().getAccession() != null ) - && ( n2.getNodeData().getSequence().getAccession() != null ) - && !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getAccession().getValue() ) - && !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getAccession().getValue() ) ) { - return n1.getNodeData().getSequence().getAccession().getValue() - .compareTo( n2.getNodeData().getSequence().getAccession().getValue() ); - } - } - return 0; - } - } Comparator c; switch ( pri ) { case SEQUENCE: @@ -1413,7 +1521,7 @@ public class PhylogenyMethods { /** * Removes from Phylogeny to_be_stripped all external Nodes which are * associated with a species NOT found in Phylogeny reference. - * + * * @param reference * a reference Phylogeny * @param to_be_stripped @@ -1447,8 +1555,8 @@ public class PhylogenyMethods { } else if ( !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getScientificName() ) ) && !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) - && !( ( n.getNodeData().getTaxonomy().getIdentifier() != null ) && ref_ext_taxo.contains( n - .getNodeData().getTaxonomy().getIdentifier().getValuePlusProvider() ) ) ) { + && !( ( n.getNodeData().getTaxonomy().getIdentifier() != null ) && ref_ext_taxo + .contains( n.getNodeData().getTaxonomy().getIdentifier().getValuePlusProvider() ) ) ) { nodes_to_delete.add( n ); } } @@ -1460,7 +1568,8 @@ public class PhylogenyMethods { return nodes_to_delete; } - final static public void transferInternalNamesToBootstrapSupport( final Phylogeny phy ) { + final static public void transferInternalNamesToConfidenceValues( final Phylogeny phy, + final String confidence_type ) { final PhylogenyNodeIterator it = phy.iteratorPostorder(); while ( it.hasNext() ) { final PhylogenyNode n = it.next(); @@ -1474,38 +1583,65 @@ public class PhylogenyMethods { + e.getLocalizedMessage() ); } if ( value >= 0.0 ) { - n.getBranchData().addConfidence( new Confidence( value, "bootstrap" ) ); + n.getBranchData().addConfidence( new Confidence( value, confidence_type ) ); n.setName( "" ); } } } } - final static public void transferInternalNodeNamesToConfidence( final Phylogeny phy ) { + final static public boolean isInternalNamesLookLikeConfidences( final Phylogeny phy ) { final PhylogenyNodeIterator it = phy.iteratorPostorder(); while ( it.hasNext() ) { final PhylogenyNode n = it.next(); - if ( !n.isExternal() && !n.getBranchData().isHasConfidences() ) { + if ( !n.isExternal() && !n.isRoot() ) { if ( !ForesterUtil.isEmpty( n.getName() ) ) { - double d = -1.0; + double value = -1; try { - d = Double.parseDouble( n.getName() ); + value = Double.parseDouble( n.getName() ); } - catch ( final Exception e ) { - d = -1.0; + catch ( final NumberFormatException e ) { + return false; } - if ( d >= 0.0 ) { - n.getBranchData().addConfidence( new Confidence( d, "" ) ); - n.setName( "" ); + if ( ( value < 0.0 ) || ( value > 100 ) ) { + return false; } } } } + return true; + } + + final static public void transferInternalNodeNamesToConfidence( final Phylogeny phy, + final String confidence_type ) { + final PhylogenyNodeIterator it = phy.iteratorPostorder(); + while ( it.hasNext() ) { + transferInternalNodeNameToConfidence( confidence_type, it.next() ); + } + } + + private static void transferInternalNodeNameToConfidence( final String confidence_type, final PhylogenyNode n ) { + if ( !n.isExternal() && !n.getBranchData().isHasConfidences() ) { + if ( !ForesterUtil.isEmpty( n.getName() ) ) { + double d = -1.0; + try { + d = Double.parseDouble( n.getName() ); + } + catch ( final Exception e ) { + d = -1.0; + } + if ( d >= 0.0 ) { + n.getBranchData().addConfidence( new Confidence( d, confidence_type ) ); + n.setName( "" ); + } + } + } } final static public void transferNodeNameToField( final Phylogeny phy, final PhylogenyNodeField field, - final boolean external_only ) throws PhyloXmlDataFormatException { + final boolean external_only ) + throws PhyloXmlDataFormatException { final PhylogenyNodeIterator it = phy.iteratorPostorder(); while ( it.hasNext() ) { final PhylogenyNode n = it.next(); @@ -1586,6 +1722,12 @@ public class PhylogenyMethods { n.getNodeData().getTaxonomy().setIdentifier( new Identifier( name ) ); break; } + case CLADE_NAME: + n.setName( name ); + break; + default: { + throw new IllegalArgumentException( "don't know what to do with " + field ); + } } } } @@ -1622,6 +1764,20 @@ public class PhylogenyMethods { return d; } + public static double calculateAverageTreeHeight( final PhylogenyNode node ) { + final List ext = node.getAllExternalDescendants(); + double s = 0; + for( PhylogenyNode n : ext ) { + while ( n != node ) { + if ( n.getDistanceToParent() > 0 ) { + s += n.getDistanceToParent(); + } + n = n.getParent(); + } + } + return s / ext.size(); + } + /** * Deep copies the phylogeny originating from this node. */ @@ -1675,7 +1831,7 @@ public class PhylogenyMethods { /** * Calculates the distance between PhylogenyNodes n1 and n2. * PRECONDITION: n1 is a descendant of n2. - * + * * @param n1 * a descendant of n2 * @param n2 @@ -1695,21 +1851,54 @@ public class PhylogenyMethods { private static boolean match( final String s, final String query, final boolean case_sensitive, - final boolean partial ) { + final boolean partial, + final boolean regex ) { if ( ForesterUtil.isEmpty( s ) || ForesterUtil.isEmpty( query ) ) { return false; } String my_s = s.trim(); String my_query = query.trim(); - if ( !case_sensitive ) { + if ( !case_sensitive && !regex ) { my_s = my_s.toLowerCase(); my_query = my_query.toLowerCase(); } - if ( partial ) { + if ( regex ) { + Pattern p = null; + try { + if ( case_sensitive ) { + p = Pattern.compile( my_query ); + } + else { + p = Pattern.compile( my_query, Pattern.CASE_INSENSITIVE ); + } + } + catch ( final PatternSyntaxException e ) { + return false; + } + if ( p != null ) { + return p.matcher( my_s ).find(); + } + else { + return false; + } + } + else if ( partial ) { return my_s.indexOf( my_query ) >= 0; } else { - return my_s.equals( my_query ); + Pattern p = null; + try { + p = Pattern.compile( "(\\b|_)" + Pattern.quote( my_query ) + "(\\b|_)" ); + } + catch ( final PatternSyntaxException e ) { + return false; + } + if ( p != null ) { + return p.matcher( my_s ).find(); + } + else { + return false; + } } } @@ -1724,18 +1913,267 @@ public class PhylogenyMethods { } public static enum DESCENDANT_SORT_PRIORITY { - NODE_NAME, SEQUENCE, TAXONOMY; + NODE_NAME, + SEQUENCE, + TAXONOMY; } public static enum PhylogenyNodeField { - CLADE_NAME, - SEQUENCE_NAME, - SEQUENCE_SYMBOL, - TAXONOMY_CODE, - TAXONOMY_COMMON_NAME, - TAXONOMY_ID, - TAXONOMY_ID_UNIPROT_1, - TAXONOMY_ID_UNIPROT_2, - TAXONOMY_SCIENTIFIC_NAME; + CLADE_NAME, + SEQUENCE_NAME, + SEQUENCE_SYMBOL, + TAXONOMY_CODE, + TAXONOMY_COMMON_NAME, + TAXONOMY_ID, + TAXONOMY_ID_UNIPROT_1, + TAXONOMY_ID_UNIPROT_2, + TAXONOMY_SCIENTIFIC_NAME; + } + + public static void addMolecularSeqsToTree( final Phylogeny phy, final Msa msa ) { + for( int s = 0; s < msa.getNumberOfSequences(); ++s ) { + final org.forester.sequence.MolecularSequence seq = msa.getSequence( s ); + final PhylogenyNode node = phy.getNode( seq.getIdentifier() ); + final org.forester.phylogeny.data.Sequence new_seq = new Sequence(); + new_seq.setMolecularSequenceAligned( true ); + new_seq.setMolecularSequence( seq.getMolecularSequenceAsString() ); + new_seq.setName( seq.getIdentifier() ); + try { + new_seq.setType( PhyloXmlUtil.SEQ_TYPE_PROTEIN ); + } + catch ( final PhyloXmlDataFormatException ignore ) { + // do nothing + } + node.getNodeData().addSequence( new_seq ); + } + } + + final private static class PhylogenyNodeSortTaxonomyPriority implements Comparator { + + @Override + public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) { + if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) { + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) { + return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase() + .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() ); + } + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) { + return n1.getNodeData().getTaxonomy().getTaxonomyCode() + .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() ); + } + } + if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) { + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) { + return n1.getNodeData().getSequence().getName().toLowerCase() + .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() ); + } + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) { + return n1.getNodeData().getSequence().getGeneName() + .compareTo( n2.getNodeData().getSequence().getGeneName() ); + } + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) { + return n1.getNodeData().getSequence().getSymbol() + .compareTo( n2.getNodeData().getSequence().getSymbol() ); + } + } + if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) { + return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() ); + } + return 0; + } + } + + final private static class PhylogenyNodeSortSequencePriority implements Comparator { + + @Override + public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) { + if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) { + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) { + return n1.getNodeData().getSequence().getName().toLowerCase() + .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() ); + } + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) { + return n1.getNodeData().getSequence().getGeneName() + .compareTo( n2.getNodeData().getSequence().getGeneName() ); + } + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) { + return n1.getNodeData().getSequence().getSymbol() + .compareTo( n2.getNodeData().getSequence().getSymbol() ); + } + } + if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) { + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) { + return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase() + .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() ); + } + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) { + return n1.getNodeData().getTaxonomy().getTaxonomyCode() + .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() ); + } + } + if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) { + return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() ); + } + return 0; + } + } + + final private static class PhylogenyNodeSortNodeNamePriority implements Comparator { + + @Override + public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) { + if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) { + return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() ); + } + if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) { + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) { + return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase() + .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() ); + } + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) { + return n1.getNodeData().getTaxonomy().getTaxonomyCode() + .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() ); + } + } + if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) { + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) { + return n1.getNodeData().getSequence().getName().toLowerCase() + .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() ); + } + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) { + return n1.getNodeData().getSequence().getGeneName() + .compareTo( n2.getNodeData().getSequence().getGeneName() ); + } + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) { + return n1.getNodeData().getSequence().getSymbol() + .compareTo( n2.getNodeData().getSequence().getSymbol() ); + } + } + return 0; + } + } + + public final static Map calculateDepths( final Phylogeny phy ) { + final Map depths = new HashMap(); + calculateDepthsHelper( phy.getRoot(), 0, depths ); + return depths; + } + + private final static void calculateDepthsHelper( final PhylogenyNode n, int d, final Map depths ) { + depths.put( n.getId(), d ); + ++d; + final List descs = n.getDescendants(); + for( final PhylogenyNode desc : descs ) { + calculateDepthsHelper( desc, d, depths ); + } + } + + public final static void collapseToDepth( final Phylogeny phy, final int depth ) { + if ( phy.getNumberOfExternalNodes() < 3 ) { + return; + } + collapseToDepthHelper( phy.getRoot(), 0, depth ); + } + + private final static void collapseToDepthHelper( final PhylogenyNode n, int d, final int depth ) { + if ( n.isExternal() ) { + n.setCollapse( false ); + return; + } + if ( d >= depth ) { + n.setCollapse( true ); + final PhylogenyNodeIterator it = new PreorderTreeIterator( n ); + while ( it.hasNext() ) { + it.next().setCollapse( true ); + } + } + else { + n.setCollapse( false ); + ++d; + final List descs = n.getDescendants(); + for( final PhylogenyNode desc : descs ) { + collapseToDepthHelper( desc, d, depth ); + } + } + } + + public final static void collapseToRank( final Phylogeny phy, final int rank ) { + if ( phy.getNumberOfExternalNodes() < 3 ) { + return; + } + if ( rank < 0 || rank >= TaxonomyUtil.RANKS.length ) { + throw new IllegalArgumentException( "Rank " + rank + " is out of range" ); + } + collapseToRankHelper( phy.getRoot(), rank ); + } + + private final static void collapseToRankHelper( final PhylogenyNode n, final int target_rank ) { + if ( n.isExternal() ) { + n.setCollapse( false ); + return; + } + if ( ( n.getNodeData().getTaxonomy() != null ) + && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getRank() ) ) { + final String current_rank = n.getNodeData().getTaxonomy().getRank(); + if ( !TaxonomyUtil.RANK_TO_INT.containsKey( current_rank ) ) { + System.out.println( "Don't know rank \"" + current_rank + "\", ignoring." ); + } + else { + if ( TaxonomyUtil.RANK_TO_INT.get( current_rank ) >= target_rank ) { + n.setCollapse( true ); + final PhylogenyNodeIterator it = new PreorderTreeIterator( n ); + while ( it.hasNext() ) { + it.next().setCollapse( true ); + } + return; + } + } + } + n.setCollapse( false ); + final List descs = n.getDescendants(); + for( final PhylogenyNode desc : descs ) { + collapseToRankHelper( desc, target_rank ); + } + } + + public final static PhylogenyNode getFirstExternalNode( final PhylogenyNode node ) { + PhylogenyNode n = node; + while ( n.isInternal() ) { + n = n.getFirstChildNode(); + } + return n; + } + + public final static PhylogenyNode getLastExternalNode( final PhylogenyNode node ) { + PhylogenyNode n = node; + while ( n.isInternal() ) { + n = n.getLastChildNode(); + } + return n; + } + + public final static boolean isHasCollapsedNodes( final Phylogeny phy ) { + for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { + final PhylogenyNode n = iter.next(); + if ( !n.isExternal() && ( n.isCollapse() ) ) { + return true; + } + } + return false; } }