import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
+import java.util.Map;
import java.util.Set;
-import java.util.SortedMap;
-import java.util.TreeMap;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+import org.forester.archaeopteryx.TreePanelUtil;
+import org.forester.io.parsers.FastaParser;
import org.forester.io.parsers.PhylogenyParser;
import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
import org.forester.io.parsers.util.PhylogenyParserException;
+import org.forester.msa.Msa;
import org.forester.phylogeny.data.Accession;
import org.forester.phylogeny.data.Annotation;
import org.forester.phylogeny.data.BranchColor;
import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
import org.forester.phylogeny.factories.PhylogenyFactory;
import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+import org.forester.phylogeny.iterators.PreorderTreeIterator;
import org.forester.util.BasicDescriptiveStatistics;
import org.forester.util.DescriptiveStatistics;
+import org.forester.util.FailedConditionCheckException;
import org.forester.util.ForesterUtil;
+import org.forester.util.TaxonomyUtil;
public class PhylogenyMethods {
+ private static boolean _order_changed;
+
private PhylogenyMethods() {
// Hidden constructor.
}
throw new CloneNotSupportedException();
}
- public static DescriptiveStatistics calculatBranchLengthStatistics( final Phylogeny phy ) {
+ public static boolean extractFastaInformation( final Phylogeny phy ) {
+ boolean could_extract = false;
+ for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
+ final PhylogenyNode node = iter.next();
+ if ( !ForesterUtil.isEmpty( node.getName() ) ) {
+ final Matcher name_m = FastaParser.FASTA_DESC_LINE.matcher( node.getName() );
+ if ( name_m.lookingAt() ) {
+ could_extract = true;
+ final String acc_source = name_m.group( 1 );
+ final String acc = name_m.group( 2 );
+ final String seq_name = name_m.group( 3 );
+ final String tax_sn = name_m.group( 4 );
+ if ( !ForesterUtil.isEmpty( acc_source ) && !ForesterUtil.isEmpty( acc ) ) {
+ ForesterUtil.ensurePresenceOfSequence( node );
+ node.getNodeData().getSequence( 0 ).setAccession( new Accession( acc, acc_source ) );
+ }
+ if ( !ForesterUtil.isEmpty( seq_name ) ) {
+ ForesterUtil.ensurePresenceOfSequence( node );
+ node.getNodeData().getSequence( 0 ).setName( seq_name );
+ }
+ if ( !ForesterUtil.isEmpty( tax_sn ) ) {
+ ForesterUtil.ensurePresenceOfTaxonomy( node );
+ node.getNodeData().getTaxonomy( 0 ).setScientificName( tax_sn );
+ }
+ }
+ }
+ }
+ return could_extract;
+ }
+
+ public static DescriptiveStatistics calculateBranchLengthStatistics( final Phylogeny phy ) {
final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
final PhylogenyNode n = iter.next();
return stats;
}
- public static List<DescriptiveStatistics> calculatConfidenceStatistics( final Phylogeny phy ) {
+ public static List<DescriptiveStatistics> calculateConfidenceStatistics( final Phylogeny phy ) {
final List<DescriptiveStatistics> stats = new ArrayList<DescriptiveStatistics>();
for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
final PhylogenyNode n = iter.next();
/**
* Calculates the distance between PhylogenyNodes node1 and node2.
- *
- *
+ *
+ *
* @param node1
* @param node2
* @return distance between node1 and node2
/**
* Returns the LCA of PhylogenyNodes node1 and node2.
- *
- *
+ *
+ *
* @param node1
* @param node2
* @return LCA of node1 and node2
/**
* Returns the LCA of PhylogenyNodes node1 and node2.
* Precondition: ids are in pre-order (or level-order).
- *
- *
+ *
+ *
* @param node1
* @param node2
* @return LCA of node1 and node2
}
return max;
}
+
+ public static String[] obtainPresentRanksSorted( final Phylogeny phy ) {
+ final Set<String> present_ranks = new HashSet<String>();
+ for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
+ final PhylogenyNode node = iter.next();
+ if ( !node.isExternal() && !node.isRoot() && ( node.getNodeData().getTaxonomy() != null )
+ && !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getRank() ) ) {
+ final String current_rank = node.getNodeData().getTaxonomy().getRank();
+ if ( TaxonomyUtil.RANK_TO_INT.containsKey( current_rank ) ) {
+ present_ranks.add( current_rank );
+ }
+ }
+ }
+ final String ordered_ranks[] = new String[present_ranks.size() + 1];
+ int c = 0;
+ for( final String rank : TaxonomyUtil.RANKS ) {
+ if ( present_ranks.contains( rank ) ) {
+ ordered_ranks[ c++ ] = rank;
+ }
+ }
+ ordered_ranks[ c ] = "off";
+ return ordered_ranks;
+ }
+
+ public static int calculateMaxDepthConsiderCollapsed( final Phylogeny phy ) {
+ int max = 0;
+ for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
+ PhylogenyNode n = iter.next();
+ int steps = 0;
+ while ( n.getParent() != null ) {
+ if ( !n.isCollapse() ) {
+ steps++;
+ }
+ n = n.getParent();
+ }
+ if ( steps > max ) {
+ max = steps;
+ }
+ }
+ return max;
+ }
public static double calculateMaxDistanceToRoot( final Phylogeny phy ) {
double max = 0.0;
return max;
}
+ public static PhylogenyNode calculateNodeWithMaxDistanceToRoot( final Phylogeny phy ) {
+ double max = 0.0;
+ PhylogenyNode max_node = phy.getFirstExternalNode();
+ for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
+ final PhylogenyNode node = iter.next();
+ final double d = node.calculateDistanceToRoot();
+ if ( d > max ) {
+ max = d;
+ max_node = node;
+ }
+ }
+ return max_node;
+ }
+
public static int calculateNumberOfExternalNodesWithoutTaxonomy( final PhylogenyNode node ) {
final List<PhylogenyNode> descs = node.getAllExternalDescendants();
int x = 0;
return x;
}
- public static DescriptiveStatistics calculatNumberOfDescendantsPerNodeStatistics( final Phylogeny phy ) {
+ public static DescriptiveStatistics calculateNumberOfDescendantsPerNodeStatistics( final Phylogeny phy ) {
final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
final PhylogenyNode n = iter.next();
return deleted;
}
- public static void deleteExternalNodesPositiveSelectionT( final List<Taxonomy> species_to_keep, final Phylogeny phy ) {
+ public static void deleteExternalNodesPositiveSelectionT( final List<Taxonomy> species_to_keep,
+ final Phylogeny phy ) {
final Set<Long> to_delete = new HashSet<Long>();
for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
final PhylogenyNode n = it.next();
phy.externalNodesHaveChanged();
}
+ public final static List<List<PhylogenyNode>> divideIntoSubTrees( final Phylogeny phy,
+ final double min_distance_to_root ) {
+ if ( min_distance_to_root <= 0 ) {
+ throw new IllegalArgumentException( "attempt to use min distance to root of: " + min_distance_to_root );
+ }
+ final List<List<PhylogenyNode>> l = new ArrayList<List<PhylogenyNode>>();
+ setAllIndicatorsToZero( phy );
+ for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
+ final PhylogenyNode n = it.next();
+ if ( n.getIndicator() != 0 ) {
+ continue;
+ }
+ l.add( divideIntoSubTreesHelper( n, min_distance_to_root ) );
+ if ( l.isEmpty() ) {
+ throw new RuntimeException( "this should not have happened" );
+ }
+ }
+ return l;
+ }
+
public static List<PhylogenyNode> getAllDescendants( final PhylogenyNode node ) {
final List<PhylogenyNode> descs = new ArrayList<PhylogenyNode>();
final Set<Long> encountered = new HashSet<Long>();
}
/**
- *
+ *
* Convenience method
- *
+ *
* @param node
* @return
*/
}
/**
- * Returns taxonomy t if all external descendants have
+ * Returns taxonomy t if all external descendants have
* the same taxonomy t, null otherwise.
- *
+ *
*/
public static Taxonomy getExternalDescendantsTaxonomy( final PhylogenyNode node ) {
final List<PhylogenyNode> descs = node.getAllExternalDescendants();
/*
* This is case insensitive.
- *
+ *
*/
public synchronized static boolean isTaxonomyHasIdentifierOfGivenProvider( final Taxonomy tax,
final String[] providers ) {
* all external nodes of node.
* If at least one of the external nodes has no taxonomy,
* null is returned.
- *
+ *
*/
- public static SortedMap<Taxonomy, Integer> obtainDistinctTaxonomyCounts( final PhylogenyNode node ) {
+ public static Map<Taxonomy, Integer> obtainDistinctTaxonomyCounts( final PhylogenyNode node ) {
final List<PhylogenyNode> descs = node.getAllExternalDescendants();
- final SortedMap<Taxonomy, Integer> tax_map = new TreeMap<Taxonomy, Integer>();
+ final Map<Taxonomy, Integer> tax_map = new HashMap<Taxonomy, Integer>();
for( final PhylogenyNode n : descs ) {
if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
return null;
* Arranges the order of childern for each node of this Phylogeny in such a
* way that either the branch with more children is on top (right) or on
* bottom (left), dependent on the value of boolean order.
- *
+ *
* @param order
* decides in which direction to order
- * @param pri
+ * @param pri
*/
public static void orderAppearance( final PhylogenyNode n,
final boolean order,
return;
}
else {
- PhylogenyNode temp = null;
if ( ( n.getNumberOfDescendants() == 2 )
&& ( n.getChildNode1().getNumberOfExternalNodes() != n.getChildNode2().getNumberOfExternalNodes() )
- && ( ( n.getChildNode1().getNumberOfExternalNodes() < n.getChildNode2().getNumberOfExternalNodes() ) == order ) ) {
- temp = n.getChildNode1();
+ && ( ( n.getChildNode1().getNumberOfExternalNodes() < n.getChildNode2()
+ .getNumberOfExternalNodes() ) == order ) ) {
+ final PhylogenyNode temp = n.getChildNode1();
n.setChild1( n.getChildNode2() );
n.setChild2( temp );
+ _order_changed = true;
}
else if ( order_ext_alphabetically ) {
boolean all_ext = true;
}
}
+ public synchronized static void orderAppearanceX( final PhylogenyNode n,
+ final boolean order_ext_alphabetically,
+ final DESCENDANT_SORT_PRIORITY pri ) {
+ if ( n.isExternal() ) {
+ return;
+ }
+ else {
+ _order_changed = false;
+ orderAppearance( n, true, order_ext_alphabetically, pri );
+ if ( !_order_changed ) {
+ orderAppearance( n, false, order_ext_alphabetically, pri );
+ }
+ }
+ }
+
public static void postorderBranchColorAveragingExternalNodeBased( final Phylogeny p ) {
for( final PhylogenyNodeIterator iter = p.iteratorPostorder(); iter.hasNext(); ) {
final PhylogenyNode node = iter.next();
PhylogenyNode.setNodeCount( i );
}
- public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final File file ) throws IOException {
+ public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final File file )
+ throws IOException {
final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
final Phylogeny[] trees = factory.create( file, parser );
if ( ( trees == null ) || ( trees.length == 0 ) ) {
}
}
- public static List<PhylogenyNode> searchData( final String query,
- final Phylogeny phy,
- final boolean case_sensitive,
- final boolean partial,
- final boolean search_domains ) {
- final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
+ private static enum NDF {
+ NodeName( "NN" ),
+ TaxonomyCode( "TC" ),
+ TaxonomyCommonName( "TN" ),
+ TaxonomyScientificName( "TS" ),
+ TaxonomyIdentifier( "TI" ),
+ TaxonomySynonym( "SY" ),
+ SequenceName( "SN" ),
+ GeneName( "GN" ),
+ SequenceSymbol( "SS" ),
+ SequenceAccession( "SA" ),
+ Domain( "DO" ),
+ Annotation( "AN" ),
+ CrossRef( "XR" ),
+ BinaryCharacter( "BC" ),
+ MolecularSequence( "MS" );
+
+ private final String _text;
+
+ NDF( final String text ) {
+ _text = text;
+ }
+
+ public static NDF fromString( final String text ) {
+ for( final NDF n : NDF.values() ) {
+ if ( text.startsWith( n._text ) ) {
+ return n;
+ }
+ }
+ return null;
+ }
+ }
+
+ public static List<Long> searchData( final String query,
+ final Phylogeny phy,
+ final boolean case_sensitive,
+ final boolean partial,
+ final boolean regex,
+ final boolean search_domains,
+ final double domains_confidence_threshold ) {
+ final List<Long> nodes = new ArrayList<Long>();
if ( phy.isEmpty() || ( query == null ) ) {
return nodes;
}
if ( ForesterUtil.isEmpty( query ) ) {
return nodes;
}
+ String my_query = query;
+ NDF ndf = null;
+ if ( ( my_query.length() > 2 ) && ( my_query.indexOf( ":" ) == 2 ) ) {
+ ndf = NDF.fromString( my_query );
+ if ( ndf != null ) {
+ my_query = my_query.substring( 3 );
+ }
+ }
for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
final PhylogenyNode node = iter.next();
boolean match = false;
- if ( match( node.getName(), query, case_sensitive, partial ) ) {
+ if ( ( ( ndf == null ) || ( ndf == NDF.NodeName ) )
+ && match( node.getName(), my_query, case_sensitive, partial, regex ) ) {
match = true;
}
- else if ( node.getNodeData().isHasTaxonomy()
- && match( node.getNodeData().getTaxonomy().getTaxonomyCode(), query, case_sensitive, partial ) ) {
+ else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCode ) ) && node.getNodeData().isHasTaxonomy()
+ && match( node.getNodeData().getTaxonomy().getTaxonomyCode(),
+ my_query,
+ case_sensitive,
+ partial,
+ regex ) ) {
match = true;
}
- else if ( node.getNodeData().isHasTaxonomy()
- && match( node.getNodeData().getTaxonomy().getCommonName(), query, case_sensitive, partial ) ) {
+ else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCommonName ) ) && node.getNodeData().isHasTaxonomy()
+ && match( node.getNodeData().getTaxonomy().getCommonName(),
+ my_query,
+ case_sensitive,
+ partial,
+ regex ) ) {
match = true;
}
- else if ( node.getNodeData().isHasTaxonomy()
- && match( node.getNodeData().getTaxonomy().getScientificName(), query, case_sensitive, partial ) ) {
+ else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyScientificName ) ) && node.getNodeData().isHasTaxonomy()
+ && match( node.getNodeData().getTaxonomy().getScientificName(),
+ my_query,
+ case_sensitive,
+ partial,
+ regex ) ) {
match = true;
}
- else if ( node.getNodeData().isHasTaxonomy()
+ else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyIdentifier ) ) && node.getNodeData().isHasTaxonomy()
&& ( node.getNodeData().getTaxonomy().getIdentifier() != null )
&& match( node.getNodeData().getTaxonomy().getIdentifier().getValue(),
- query,
+ my_query,
case_sensitive,
- partial ) ) {
+ partial,
+ regex ) ) {
match = true;
}
- else if ( node.getNodeData().isHasTaxonomy() && !node.getNodeData().getTaxonomy().getSynonyms().isEmpty() ) {
+ else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomySynonym ) ) && node.getNodeData().isHasTaxonomy()
+ && !node.getNodeData().getTaxonomy().getSynonyms().isEmpty() ) {
final List<String> syns = node.getNodeData().getTaxonomy().getSynonyms();
I: for( final String syn : syns ) {
- if ( match( syn, query, case_sensitive, partial ) ) {
+ if ( match( syn, my_query, case_sensitive, partial, regex ) ) {
match = true;
break I;
}
}
}
- if ( !match && node.getNodeData().isHasSequence()
- && match( node.getNodeData().getSequence().getName(), query, case_sensitive, partial ) ) {
+ if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceName ) ) && node.getNodeData().isHasSequence()
+ && match( node.getNodeData().getSequence().getName(), my_query, case_sensitive, partial, regex ) ) {
match = true;
}
- if ( !match && node.getNodeData().isHasSequence()
- && match( node.getNodeData().getSequence().getGeneName(), query, case_sensitive, partial ) ) {
+ if ( !match && ( ( ndf == null ) || ( ndf == NDF.GeneName ) ) && node.getNodeData().isHasSequence()
+ && match( node.getNodeData().getSequence().getGeneName(),
+ my_query,
+ case_sensitive,
+ partial,
+ regex ) ) {
match = true;
}
- if ( !match && node.getNodeData().isHasSequence()
- && match( node.getNodeData().getSequence().getSymbol(), query, case_sensitive, partial ) ) {
+ if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceSymbol ) ) && node.getNodeData().isHasSequence()
+ && match( node.getNodeData().getSequence().getSymbol(),
+ my_query,
+ case_sensitive,
+ partial,
+ regex ) ) {
match = true;
}
- if ( !match
- && node.getNodeData().isHasSequence()
+ if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceAccession ) ) && node.getNodeData().isHasSequence()
&& ( node.getNodeData().getSequence().getAccession() != null )
&& match( node.getNodeData().getSequence().getAccession().getValue(),
- query,
+ my_query,
case_sensitive,
- partial ) ) {
+ partial,
+ regex ) ) {
match = true;
}
- if ( search_domains && !match && node.getNodeData().isHasSequence()
+ if ( !match && ( ( ( ndf == null ) && search_domains ) || ( ndf == NDF.Domain ) )
+ && node.getNodeData().isHasSequence()
&& ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) {
final DomainArchitecture da = node.getNodeData().getSequence().getDomainArchitecture();
I: for( int i = 0; i < da.getNumberOfDomains(); ++i ) {
- if ( match( da.getDomain( i ).getName(), query, case_sensitive, partial ) ) {
+ if ( ( da.getDomain( i ).getConfidence() <= domains_confidence_threshold )
+ && ( match( da.getDomain( i ).getName(), my_query, case_sensitive, partial, regex ) ) ) {
match = true;
break I;
}
}
}
- //
- if ( !match && node.getNodeData().isHasSequence()
+ if ( !match && ( ( ndf == null ) || ( ndf == NDF.Annotation ) ) && node.getNodeData().isHasSequence()
&& ( node.getNodeData().getSequence().getAnnotations() != null ) ) {
for( final Annotation ann : node.getNodeData().getSequence().getAnnotations() ) {
- if ( match( ann.getDesc(), query, case_sensitive, partial ) ) {
+ if ( match( ann.getDesc(), my_query, case_sensitive, partial, regex ) ) {
match = true;
break;
}
- if ( match( ann.getRef(), query, case_sensitive, partial ) ) {
+ if ( match( ann.getRef(), my_query, case_sensitive, partial, regex ) ) {
match = true;
break;
}
}
}
- if ( !match && node.getNodeData().isHasSequence()
+ if ( !match && ( ( ndf == null ) || ( ndf == NDF.CrossRef ) ) && node.getNodeData().isHasSequence()
&& ( node.getNodeData().getSequence().getCrossReferences() != null ) ) {
for( final Accession x : node.getNodeData().getSequence().getCrossReferences() ) {
- if ( match( x.getComment(), query, case_sensitive, partial ) ) {
+ if ( match( x.getComment(), my_query, case_sensitive, partial, regex ) ) {
match = true;
break;
}
- if ( match( x.getSource(), query, case_sensitive, partial ) ) {
+ if ( match( x.getSource(), my_query, case_sensitive, partial, regex ) ) {
match = true;
break;
}
- if ( match( x.getValue(), query, case_sensitive, partial ) ) {
+ if ( match( x.getValue(), my_query, case_sensitive, partial, regex ) ) {
match = true;
break;
}
}
}
- //
- if ( !match && ( node.getNodeData().getBinaryCharacters() != null ) ) {
+ if ( !match && ( ( ndf == null ) || ( ndf == NDF.BinaryCharacter ) )
+ && ( node.getNodeData().getBinaryCharacters() != null ) ) {
Iterator<String> it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator();
I: while ( it.hasNext() ) {
- if ( match( it.next(), query, case_sensitive, partial ) ) {
+ if ( match( it.next(), my_query, case_sensitive, partial, regex ) ) {
match = true;
break I;
}
}
it = node.getNodeData().getBinaryCharacters().getGainedCharacters().iterator();
I: while ( it.hasNext() ) {
- if ( match( it.next(), query, case_sensitive, partial ) ) {
+ if ( match( it.next(), my_query, case_sensitive, partial, regex ) ) {
match = true;
break I;
}
}
}
+ if ( !match && ( ndf == NDF.MolecularSequence ) && node.getNodeData().isHasSequence()
+ && match( node.getNodeData().getSequence().getMolecularSequence(),
+ my_query,
+ case_sensitive,
+ true,
+ regex ) ) {
+ match = true;
+ }
if ( match ) {
- nodes.add( node );
+ nodes.add( node.getId() );
}
}
return nodes;
}
- public static List<PhylogenyNode> searchDataLogicalAnd( final String[] queries,
- final Phylogeny phy,
- final boolean case_sensitive,
- final boolean partial,
- final boolean search_domains ) {
- final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
+ public static List<Long> searchDataLogicalAnd( final String[] queries,
+ final Phylogeny phy,
+ final boolean case_sensitive,
+ final boolean partial,
+ final boolean search_domains,
+ final double domains_confidence_threshold ) {
+ final List<Long> nodes = new ArrayList<Long>();
if ( phy.isEmpty() || ( queries == null ) || ( queries.length < 1 ) ) {
return nodes;
}
for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
final PhylogenyNode node = iter.next();
boolean all_matched = true;
- for( final String query : queries ) {
+ for( String query : queries ) {
+ if ( query == null ) {
+ continue;
+ }
+ query = query.trim();
+ NDF ndf = null;
+ if ( ( query.length() > 2 ) && ( query.indexOf( ":" ) == 2 ) ) {
+ ndf = NDF.fromString( query );
+ if ( ndf != null ) {
+ query = query.substring( 3 );
+ }
+ }
boolean match = false;
if ( ForesterUtil.isEmpty( query ) ) {
continue;
}
- if ( match( node.getName(), query, case_sensitive, partial ) ) {
+ if ( ( ( ndf == null ) || ( ndf == NDF.NodeName ) )
+ && match( node.getName(), query, case_sensitive, partial, false ) ) {
match = true;
}
- else if ( node.getNodeData().isHasTaxonomy()
- && match( node.getNodeData().getTaxonomy().getTaxonomyCode(), query, case_sensitive, partial ) ) {
+ else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCode ) ) && node.getNodeData().isHasTaxonomy()
+ && match( node.getNodeData().getTaxonomy().getTaxonomyCode(),
+ query,
+ case_sensitive,
+ partial,
+ false ) ) {
match = true;
}
- else if ( node.getNodeData().isHasTaxonomy()
- && match( node.getNodeData().getTaxonomy().getCommonName(), query, case_sensitive, partial ) ) {
+ else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCommonName ) ) && node.getNodeData().isHasTaxonomy()
+ && match( node.getNodeData().getTaxonomy().getCommonName(),
+ query,
+ case_sensitive,
+ partial,
+ false ) ) {
match = true;
}
- else if ( node.getNodeData().isHasTaxonomy()
- && match( node.getNodeData().getTaxonomy().getScientificName(), query, case_sensitive, partial ) ) {
+ else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyScientificName ) )
+ && node.getNodeData().isHasTaxonomy()
+ && match( node.getNodeData().getTaxonomy().getScientificName(),
+ query,
+ case_sensitive,
+ partial,
+ false ) ) {
match = true;
}
- else if ( node.getNodeData().isHasTaxonomy()
+ else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyIdentifier ) ) && node.getNodeData().isHasTaxonomy()
&& ( node.getNodeData().getTaxonomy().getIdentifier() != null )
&& match( node.getNodeData().getTaxonomy().getIdentifier().getValue(),
query,
case_sensitive,
- partial ) ) {
+ partial,
+ false ) ) {
match = true;
}
- else if ( node.getNodeData().isHasTaxonomy()
+ else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomySynonym ) ) && node.getNodeData().isHasTaxonomy()
&& !node.getNodeData().getTaxonomy().getSynonyms().isEmpty() ) {
final List<String> syns = node.getNodeData().getTaxonomy().getSynonyms();
I: for( final String syn : syns ) {
- if ( match( syn, query, case_sensitive, partial ) ) {
+ if ( match( syn, query, case_sensitive, partial, false ) ) {
match = true;
break I;
}
}
}
- if ( !match && node.getNodeData().isHasSequence()
- && match( node.getNodeData().getSequence().getName(), query, case_sensitive, partial ) ) {
+ if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceName ) ) && node.getNodeData().isHasSequence()
+ && match( node.getNodeData().getSequence().getName(),
+ query,
+ case_sensitive,
+ partial,
+ false ) ) {
match = true;
}
- if ( !match && node.getNodeData().isHasSequence()
- && match( node.getNodeData().getSequence().getGeneName(), query, case_sensitive, partial ) ) {
+ if ( !match && ( ( ndf == null ) || ( ndf == NDF.GeneName ) ) && node.getNodeData().isHasSequence()
+ && match( node.getNodeData().getSequence().getGeneName(),
+ query,
+ case_sensitive,
+ partial,
+ false ) ) {
match = true;
}
- if ( !match && node.getNodeData().isHasSequence()
- && match( node.getNodeData().getSequence().getSymbol(), query, case_sensitive, partial ) ) {
+ if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceSymbol ) )
+ && node.getNodeData().isHasSequence() && match( node.getNodeData().getSequence().getSymbol(),
+ query,
+ case_sensitive,
+ partial,
+ false ) ) {
match = true;
}
- if ( !match
+ if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceAccession ) )
&& node.getNodeData().isHasSequence()
&& ( node.getNodeData().getSequence().getAccession() != null )
&& match( node.getNodeData().getSequence().getAccession().getValue(),
query,
case_sensitive,
- partial ) ) {
+ partial,
+ false ) ) {
match = true;
}
- if ( search_domains && !match && node.getNodeData().isHasSequence()
+ if ( !match && ( ( ( ndf == null ) && search_domains ) || ( ndf == NDF.Domain ) )
+ && node.getNodeData().isHasSequence()
&& ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) {
final DomainArchitecture da = node.getNodeData().getSequence().getDomainArchitecture();
I: for( int i = 0; i < da.getNumberOfDomains(); ++i ) {
- if ( match( da.getDomain( i ).getName(), query, case_sensitive, partial ) ) {
+ if ( ( da.getDomain( i ).getConfidence() <= domains_confidence_threshold )
+ && match( da.getDomain( i ).getName(), query, case_sensitive, partial, false ) ) {
match = true;
break I;
}
}
}
- //
- if ( !match && node.getNodeData().isHasSequence()
+ if ( !match && ( ( ndf == null ) || ( ndf == NDF.Annotation ) ) && node.getNodeData().isHasSequence()
&& ( node.getNodeData().getSequence().getAnnotations() != null ) ) {
for( final Annotation ann : node.getNodeData().getSequence().getAnnotations() ) {
- if ( match( ann.getDesc(), query, case_sensitive, partial ) ) {
+ if ( match( ann.getDesc(), query, case_sensitive, partial, false ) ) {
match = true;
break;
}
- if ( match( ann.getRef(), query, case_sensitive, partial ) ) {
+ if ( match( ann.getRef(), query, case_sensitive, partial, false ) ) {
match = true;
break;
}
}
}
- if ( !match && node.getNodeData().isHasSequence()
+ if ( !match && ( ( ndf == null ) || ( ndf == NDF.CrossRef ) ) && node.getNodeData().isHasSequence()
&& ( node.getNodeData().getSequence().getCrossReferences() != null ) ) {
for( final Accession x : node.getNodeData().getSequence().getCrossReferences() ) {
- if ( match( x.getComment(), query, case_sensitive, partial ) ) {
+ if ( match( x.getComment(), query, case_sensitive, partial, false ) ) {
match = true;
break;
}
- if ( match( x.getSource(), query, case_sensitive, partial ) ) {
+ if ( match( x.getSource(), query, case_sensitive, partial, false ) ) {
match = true;
break;
}
- if ( match( x.getValue(), query, case_sensitive, partial ) ) {
+ if ( match( x.getValue(), query, case_sensitive, partial, false ) ) {
match = true;
break;
}
}
}
- //
- if ( !match && ( node.getNodeData().getBinaryCharacters() != null ) ) {
+ if ( !match && ( ( ndf == null ) || ( ndf == NDF.BinaryCharacter ) )
+ && ( node.getNodeData().getBinaryCharacters() != null ) ) {
Iterator<String> it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator();
I: while ( it.hasNext() ) {
- if ( match( it.next(), query, case_sensitive, partial ) ) {
+ if ( match( it.next(), query, case_sensitive, partial, false ) ) {
match = true;
break I;
}
}
it = node.getNodeData().getBinaryCharacters().getGainedCharacters().iterator();
I: while ( it.hasNext() ) {
- if ( match( it.next(), query, case_sensitive, partial ) ) {
+ if ( match( it.next(), query, case_sensitive, partial, false ) ) {
match = true;
break I;
}
}
}
+ if ( !match && ( ndf == NDF.MolecularSequence ) && node.getNodeData().isHasSequence()
+ && match( node.getNodeData().getSequence().getMolecularSequence(),
+ query,
+ case_sensitive,
+ true,
+ false ) ) {
+ match = true;
+ }
if ( !match ) {
all_matched = false;
break;
}
}
if ( all_matched ) {
- nodes.add( node );
+ nodes.add( node.getId() );
}
}
return nodes;
}
+ public static void setAllIndicatorsToZero( final Phylogeny phy ) {
+ for( final PhylogenyNodeIterator it = phy.iteratorPostorder(); it.hasNext(); ) {
+ it.next().setIndicator( ( byte ) 0 );
+ }
+ }
+
/**
* Convenience method.
- * Sets value for the first confidence value (created if not present, values overwritten otherwise).
+ * Sets value for the first confidence value (created if not present, values overwritten otherwise).
*/
public static void setBootstrapConfidence( final PhylogenyNode node, final double bootstrap_confidence_value ) {
setConfidence( node, bootstrap_confidence_value, "bootstrap" );
/**
* Convenience method.
- * Sets value for the first confidence value (created if not present, values overwritten otherwise).
+ * Sets value for the first confidence value (created if not present, values overwritten otherwise).
*/
public static void setConfidence( final PhylogenyNode node, final double confidence_value ) {
setConfidence( node, confidence_value, "" );
/**
* Convenience method.
- * Sets value for the first confidence value (created if not present, values overwritten otherwise).
+ * Sets value for the first confidence value (created if not present, values overwritten otherwise).
*/
public static void setConfidence( final PhylogenyNode node, final double confidence_value, final String type ) {
Confidence c = null;
/**
* Convenience method to set the taxonomy code of a phylogeny node.
- *
- *
+ *
+ *
* @param node
* @param taxonomy_code
- * @throws PhyloXmlDataFormatException
+ * @throws PhyloXmlDataFormatException
*/
public static void setTaxonomyCode( final PhylogenyNode node, final String taxonomy_code )
throws PhyloXmlDataFormatException {
}
final static public void sortNodeDescendents( final PhylogenyNode node, final DESCENDANT_SORT_PRIORITY pri ) {
- class PhylogenyNodeSortTaxonomyPriority implements Comparator<PhylogenyNode> {
-
- @Override
- public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) {
- if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) {
- if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) )
- && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) {
- return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase()
- .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() );
- }
- if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) )
- && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
- return n1.getNodeData().getTaxonomy().getTaxonomyCode()
- .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() );
- }
- if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getCommonName() ) )
- && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getCommonName() ) ) ) {
- return n1.getNodeData().getTaxonomy().getCommonName().toLowerCase()
- .compareTo( n2.getNodeData().getTaxonomy().getCommonName().toLowerCase() );
- }
- }
- if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) {
- if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) )
- && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) {
- return n1.getNodeData().getSequence().getName().toLowerCase()
- .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() );
- }
- if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) )
- && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) {
- return n1.getNodeData().getSequence().getSymbol()
- .compareTo( n2.getNodeData().getSequence().getSymbol() );
- }
- if ( ( n1.getNodeData().getSequence().getAccession() != null )
- && ( n2.getNodeData().getSequence().getAccession() != null )
- && !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getAccession().getValue() )
- && !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getAccession().getValue() ) ) {
- return n1.getNodeData().getSequence().getAccession().getValue()
- .compareTo( n2.getNodeData().getSequence().getAccession().getValue() );
- }
- }
- if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) {
- return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() );
- }
- return 0;
- }
- }
- class PhylogenyNodeSortSequencePriority implements Comparator<PhylogenyNode> {
-
- @Override
- public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) {
- if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) {
- if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) )
- && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) {
- return n1.getNodeData().getSequence().getName().toLowerCase()
- .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() );
- }
- if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) )
- && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) {
- return n1.getNodeData().getSequence().getSymbol()
- .compareTo( n2.getNodeData().getSequence().getSymbol() );
- }
- if ( ( n1.getNodeData().getSequence().getAccession() != null )
- && ( n2.getNodeData().getSequence().getAccession() != null )
- && !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getAccession().getValue() )
- && !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getAccession().getValue() ) ) {
- return n1.getNodeData().getSequence().getAccession().getValue()
- .compareTo( n2.getNodeData().getSequence().getAccession().getValue() );
- }
- }
- if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) {
- if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) )
- && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) {
- return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase()
- .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() );
- }
- if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) )
- && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
- return n1.getNodeData().getTaxonomy().getTaxonomyCode()
- .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() );
- }
- if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getCommonName() ) )
- && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getCommonName() ) ) ) {
- return n1.getNodeData().getTaxonomy().getCommonName().toLowerCase()
- .compareTo( n2.getNodeData().getTaxonomy().getCommonName().toLowerCase() );
- }
- }
- if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) {
- return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() );
- }
- return 0;
- }
- }
- class PhylogenyNodeSortNodeNamePriority implements Comparator<PhylogenyNode> {
-
- @Override
- public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) {
- if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) {
- return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() );
- }
- if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) {
- if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) )
- && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) {
- return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase()
- .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() );
- }
- if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) )
- && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
- return n1.getNodeData().getTaxonomy().getTaxonomyCode()
- .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() );
- }
- if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getCommonName() ) )
- && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getCommonName() ) ) ) {
- return n1.getNodeData().getTaxonomy().getCommonName().toLowerCase()
- .compareTo( n2.getNodeData().getTaxonomy().getCommonName().toLowerCase() );
- }
- }
- if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) {
- if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) )
- && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) {
- return n1.getNodeData().getSequence().getName().toLowerCase()
- .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() );
- }
- if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) )
- && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) {
- return n1.getNodeData().getSequence().getSymbol()
- .compareTo( n2.getNodeData().getSequence().getSymbol() );
- }
- if ( ( n1.getNodeData().getSequence().getAccession() != null )
- && ( n2.getNodeData().getSequence().getAccession() != null )
- && !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getAccession().getValue() )
- && !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getAccession().getValue() ) ) {
- return n1.getNodeData().getSequence().getAccession().getValue()
- .compareTo( n2.getNodeData().getSequence().getAccession().getValue() );
- }
- }
- return 0;
- }
- }
Comparator<PhylogenyNode> c;
switch ( pri ) {
case SEQUENCE:
/**
* Removes from Phylogeny to_be_stripped all external Nodes which are
* associated with a species NOT found in Phylogeny reference.
- *
+ *
* @param reference
* a reference Phylogeny
* @param to_be_stripped
}
else if ( !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getScientificName() ) )
&& !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getTaxonomyCode() ) )
- && !( ( n.getNodeData().getTaxonomy().getIdentifier() != null ) && ref_ext_taxo.contains( n
- .getNodeData().getTaxonomy().getIdentifier().getValuePlusProvider() ) ) ) {
+ && !( ( n.getNodeData().getTaxonomy().getIdentifier() != null ) && ref_ext_taxo
+ .contains( n.getNodeData().getTaxonomy().getIdentifier().getValuePlusProvider() ) ) ) {
nodes_to_delete.add( n );
}
}
}
}
- final static public void transferInternalNodeNamesToConfidence( final Phylogeny phy ) {
+ final static public boolean isInternalNamesLookLikeConfidences( final Phylogeny phy ) {
final PhylogenyNodeIterator it = phy.iteratorPostorder();
while ( it.hasNext() ) {
final PhylogenyNode n = it.next();
- if ( !n.isExternal() && !n.getBranchData().isHasConfidences() ) {
+ if ( !n.isExternal() && !n.isRoot() ) {
if ( !ForesterUtil.isEmpty( n.getName() ) ) {
- double d = -1.0;
+ double value = -1;
try {
- d = Double.parseDouble( n.getName() );
+ value = Double.parseDouble( n.getName() );
}
- catch ( final Exception e ) {
- d = -1.0;
+ catch ( final NumberFormatException e ) {
+ return false;
}
- if ( d >= 0.0 ) {
- n.getBranchData().addConfidence( new Confidence( d, "" ) );
- n.setName( "" );
+ if ( ( value < 0.0 ) || ( value > 100 ) ) {
+ return false;
}
}
}
}
+ return true;
+ }
+
+ final static public void transferInternalNodeNamesToConfidence( final Phylogeny phy,
+ final String confidence_type ) {
+ final PhylogenyNodeIterator it = phy.iteratorPostorder();
+ while ( it.hasNext() ) {
+ transferInternalNodeNameToConfidence( confidence_type, it.next() );
+ }
+ }
+
+ private static void transferInternalNodeNameToConfidence( final String confidence_type, final PhylogenyNode n ) {
+ if ( !n.isExternal() && !n.getBranchData().isHasConfidences() ) {
+ if ( !ForesterUtil.isEmpty( n.getName() ) ) {
+ double d = -1.0;
+ try {
+ d = Double.parseDouble( n.getName() );
+ }
+ catch ( final Exception e ) {
+ d = -1.0;
+ }
+ if ( d >= 0.0 ) {
+ n.getBranchData().addConfidence( new Confidence( d, confidence_type ) );
+ n.setName( "" );
+ }
+ }
+ }
}
final static public void transferNodeNameToField( final Phylogeny phy,
final PhylogenyNodeField field,
- final boolean external_only ) throws PhyloXmlDataFormatException {
+ final boolean external_only )
+ throws PhyloXmlDataFormatException {
final PhylogenyNodeIterator it = phy.iteratorPostorder();
while ( it.hasNext() ) {
final PhylogenyNode n = it.next();
n.getNodeData().getTaxonomy().setIdentifier( new Identifier( name ) );
break;
}
+ default: {
+ throw new IllegalArgumentException( "don't know what to do with " + field );
+ }
}
}
}
return d;
}
+ public static double calculateAverageTreeHeight( final PhylogenyNode node ) {
+ final List<PhylogenyNode> ext = node.getAllExternalDescendants();
+ double s = 0;
+ for( PhylogenyNode n : ext ) {
+ while ( n != node ) {
+ if ( n.getDistanceToParent() > 0 ) {
+ s += n.getDistanceToParent();
+ }
+ n = n.getParent();
+ }
+ }
+ return s / ext.size();
+ }
+
/**
* Deep copies the phylogeny originating from this node.
*/
}
}
+ private final static List<PhylogenyNode> divideIntoSubTreesHelper( final PhylogenyNode node,
+ final double min_distance_to_root ) {
+ final List<PhylogenyNode> l = new ArrayList<PhylogenyNode>();
+ final PhylogenyNode r = moveTowardsRoot( node, min_distance_to_root );
+ for( final PhylogenyNode ext : r.getAllExternalDescendants() ) {
+ if ( ext.getIndicator() != 0 ) {
+ throw new RuntimeException( "this should not have happened" );
+ }
+ ext.setIndicator( ( byte ) 1 );
+ l.add( ext );
+ }
+ return l;
+ }
+
/**
* Calculates the distance between PhylogenyNodes n1 and n2.
* PRECONDITION: n1 is a descendant of n2.
- *
+ *
* @param n1
* a descendant of n2
* @param n2
private static boolean match( final String s,
final String query,
final boolean case_sensitive,
- final boolean partial ) {
+ final boolean partial,
+ final boolean regex ) {
if ( ForesterUtil.isEmpty( s ) || ForesterUtil.isEmpty( query ) ) {
return false;
}
String my_s = s.trim();
String my_query = query.trim();
- if ( !case_sensitive ) {
+ if ( !case_sensitive && !regex ) {
my_s = my_s.toLowerCase();
my_query = my_query.toLowerCase();
}
- if ( partial ) {
+ if ( regex ) {
+ Pattern p = null;
+ try {
+ if ( case_sensitive ) {
+ p = Pattern.compile( my_query );
+ }
+ else {
+ p = Pattern.compile( my_query, Pattern.CASE_INSENSITIVE );
+ }
+ }
+ catch ( final PatternSyntaxException e ) {
+ return false;
+ }
+ if ( p != null ) {
+ return p.matcher( my_s ).find();
+ }
+ else {
+ return false;
+ }
+ }
+ else if ( partial ) {
return my_s.indexOf( my_query ) >= 0;
}
else {
- return my_s.equals( my_query );
+ Pattern p = null;
+ try {
+ p = Pattern.compile( "(\\b|_)" + Pattern.quote( my_query ) + "(\\b|_)" );
+ }
+ catch ( final PatternSyntaxException e ) {
+ return false;
+ }
+ if ( p != null ) {
+ return p.matcher( my_s ).find();
+ }
+ else {
+ return false;
+ }
+ }
+ }
+
+ private final static PhylogenyNode moveTowardsRoot( final PhylogenyNode node, final double min_distance_to_root ) {
+ PhylogenyNode n = node;
+ PhylogenyNode prev = node;
+ while ( min_distance_to_root < n.calculateDistanceToRoot() ) {
+ prev = n;
+ n = n.getParent();
}
+ return prev;
}
public static enum DESCENDANT_SORT_PRIORITY {
- TAXONOMY, SEQUENCE, NODE_NAME;
+ NODE_NAME,
+ SEQUENCE,
+ TAXONOMY;
}
public static enum PhylogenyNodeField {
- CLADE_NAME,
- TAXONOMY_CODE,
- TAXONOMY_SCIENTIFIC_NAME,
- TAXONOMY_COMMON_NAME,
- SEQUENCE_SYMBOL,
- SEQUENCE_NAME,
- TAXONOMY_ID_UNIPROT_1,
- TAXONOMY_ID_UNIPROT_2,
- TAXONOMY_ID;
+ CLADE_NAME,
+ SEQUENCE_NAME,
+ SEQUENCE_SYMBOL,
+ TAXONOMY_CODE,
+ TAXONOMY_COMMON_NAME,
+ TAXONOMY_ID,
+ TAXONOMY_ID_UNIPROT_1,
+ TAXONOMY_ID_UNIPROT_2,
+ TAXONOMY_SCIENTIFIC_NAME;
+ }
+
+ public static void addMolecularSeqsToTree( final Phylogeny phy, final Msa msa ) {
+ for( int s = 0; s < msa.getNumberOfSequences(); ++s ) {
+ final org.forester.sequence.MolecularSequence seq = msa.getSequence( s );
+ final PhylogenyNode node = phy.getNode( seq.getIdentifier() );
+ final org.forester.phylogeny.data.Sequence new_seq = new Sequence();
+ new_seq.setMolecularSequenceAligned( true );
+ new_seq.setMolecularSequence( seq.getMolecularSequenceAsString() );
+ new_seq.setName( seq.getIdentifier() );
+ try {
+ new_seq.setType( PhyloXmlUtil.SEQ_TYPE_PROTEIN );
+ }
+ catch ( final PhyloXmlDataFormatException ignore ) {
+ // do nothing
+ }
+ node.getNodeData().addSequence( new_seq );
+ }
+ }
+
+ final private static class PhylogenyNodeSortTaxonomyPriority implements Comparator<PhylogenyNode> {
+
+ @Override
+ public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) {
+ if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) {
+ if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) )
+ && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) {
+ return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase()
+ .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() );
+ }
+ if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) )
+ && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
+ return n1.getNodeData().getTaxonomy().getTaxonomyCode()
+ .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() );
+ }
+ }
+ if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) {
+ if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) )
+ && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) {
+ return n1.getNodeData().getSequence().getName().toLowerCase()
+ .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() );
+ }
+ if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) )
+ && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) {
+ return n1.getNodeData().getSequence().getGeneName()
+ .compareTo( n2.getNodeData().getSequence().getGeneName() );
+ }
+ if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) )
+ && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) {
+ return n1.getNodeData().getSequence().getSymbol()
+ .compareTo( n2.getNodeData().getSequence().getSymbol() );
+ }
+ }
+ if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) {
+ return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() );
+ }
+ return 0;
+ }
+ }
+
+ final private static class PhylogenyNodeSortSequencePriority implements Comparator<PhylogenyNode> {
+
+ @Override
+ public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) {
+ if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) {
+ if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) )
+ && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) {
+ return n1.getNodeData().getSequence().getName().toLowerCase()
+ .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() );
+ }
+ if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) )
+ && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) {
+ return n1.getNodeData().getSequence().getGeneName()
+ .compareTo( n2.getNodeData().getSequence().getGeneName() );
+ }
+ if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) )
+ && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) {
+ return n1.getNodeData().getSequence().getSymbol()
+ .compareTo( n2.getNodeData().getSequence().getSymbol() );
+ }
+ }
+ if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) {
+ if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) )
+ && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) {
+ return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase()
+ .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() );
+ }
+ if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) )
+ && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
+ return n1.getNodeData().getTaxonomy().getTaxonomyCode()
+ .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() );
+ }
+ }
+ if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) {
+ return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() );
+ }
+ return 0;
+ }
+ }
+
+ final private static class PhylogenyNodeSortNodeNamePriority implements Comparator<PhylogenyNode> {
+
+ @Override
+ public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) {
+ if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) {
+ return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() );
+ }
+ if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) {
+ if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) )
+ && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) {
+ return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase()
+ .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() );
+ }
+ if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) )
+ && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
+ return n1.getNodeData().getTaxonomy().getTaxonomyCode()
+ .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() );
+ }
+ }
+ if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) {
+ if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) )
+ && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) {
+ return n1.getNodeData().getSequence().getName().toLowerCase()
+ .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() );
+ }
+ if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) )
+ && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) {
+ return n1.getNodeData().getSequence().getGeneName()
+ .compareTo( n2.getNodeData().getSequence().getGeneName() );
+ }
+ if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) )
+ && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) {
+ return n1.getNodeData().getSequence().getSymbol()
+ .compareTo( n2.getNodeData().getSequence().getSymbol() );
+ }
+ }
+ return 0;
+ }
+ }
+
+ public final static Map<Long, Integer> calculateDepths( final Phylogeny phy ) {
+ final Map<Long, Integer> depths = new HashMap<Long, Integer>();
+ calculateDepthsHelper( phy.getRoot(), 0, depths );
+ return depths;
+ }
+
+ private final static void calculateDepthsHelper( final PhylogenyNode n, int d, final Map<Long, Integer> depths ) {
+ depths.put( n.getId(), d );
+ ++d;
+ final List<PhylogenyNode> descs = n.getDescendants();
+ for( final PhylogenyNode desc : descs ) {
+ calculateDepthsHelper( desc, d, depths );
+ }
+ }
+
+ public final static void collapseToDepth( final Phylogeny phy, final int depth ) {
+ if ( phy.getNumberOfExternalNodes() < 3 ) {
+ return;
+ }
+ collapseToDepthHelper( phy.getRoot(), 0, depth );
+ }
+
+ private final static void collapseToDepthHelper( final PhylogenyNode n, int d, final int depth ) {
+ if ( n.isExternal() ) {
+ n.setCollapse( false );
+ return;
+ }
+ if ( d >= depth ) {
+ n.setCollapse( true );
+ final PhylogenyNodeIterator it = new PreorderTreeIterator( n );
+ while ( it.hasNext() ) {
+ it.next().setCollapse( true );
+ }
+ }
+ else {
+ n.setCollapse( false );
+ ++d;
+ final List<PhylogenyNode> descs = n.getDescendants();
+ for( final PhylogenyNode desc : descs ) {
+ collapseToDepthHelper( desc, d, depth );
+ }
+ }
+ }
+
+
+
+ public final static void collapseToRank( final Phylogeny phy, final int rank ) {
+ if ( phy.getNumberOfExternalNodes() < 3 ) {
+ return;
+ }
+ if ( rank < 0 || rank >= TaxonomyUtil.RANKS.length ) {
+ throw new IllegalArgumentException( "Rank " + rank + " is out of range" );
+ }
+ collapseToRankHelper( phy.getRoot(), rank );
+ }
+
+ private final static void collapseToRankHelper( final PhylogenyNode n, final int target_rank ) {
+ if ( n.isExternal() ) {
+ n.setCollapse( false );
+ return;
+ }
+ if ( ( n.getNodeData().getTaxonomy() != null )
+ && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getRank() ) ) {
+ final String current_rank = n.getNodeData().getTaxonomy().getRank();
+ if ( !TaxonomyUtil.RANK_TO_INT.containsKey( current_rank ) ) {
+ System.out.println( "Don't know rank \"" + current_rank + "\", ignoring." );
+ }
+ else {
+ if ( TaxonomyUtil.RANK_TO_INT.get( current_rank ) >= target_rank ) {
+ n.setCollapse( true );
+
+ final PhylogenyNodeIterator it = new PreorderTreeIterator( n );
+ while ( it.hasNext() ) {
+ it.next().setCollapse( true );
+ }
+ return;
+ }
+ }
+ }
+ n.setCollapse( false );
+ final List<PhylogenyNode> descs = n.getDescendants();
+ for( final PhylogenyNode desc : descs ) {
+ collapseToRankHelper( desc, target_rank );
+ }
+ }
+
+ public final static PhylogenyNode getFirstExternalNode( final PhylogenyNode node ) {
+ PhylogenyNode n = node;
+ while ( n.isInternal() ) {
+ n = n.getFirstChildNode();
+ }
+ return n;
+ }
+
+ public final static PhylogenyNode getLastExternalNode( final PhylogenyNode node ) {
+ PhylogenyNode n = node;
+ while ( n.isInternal() ) {
+ n = n.getLastChildNode();
+ }
+ return n;
+ }
+
+ public final static boolean isHasCollapsedNodes( final Phylogeny phy ) {
+ for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
+ final PhylogenyNode n = iter.next();
+ if ( !n.isExternal() && ( n.isCollapse() ) ) {
+ return true;
+ }
+ }
+ return false;
}
+
}