import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
+import java.util.Map;
import java.util.Set;
-import java.util.SortedMap;
-import java.util.TreeMap;
+import java.util.regex.Pattern;
import org.forester.io.parsers.PhylogenyParser;
import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
phy.externalNodesHaveChanged();
}
+ public final static List<List<PhylogenyNode>> divideIntoSubTrees( final Phylogeny phy,
+ final double min_distance_to_root ) {
+ if ( min_distance_to_root <= 0 ) {
+ throw new IllegalArgumentException( "attempt to use min distance to root of: " + min_distance_to_root );
+ }
+ final List<List<PhylogenyNode>> l = new ArrayList<List<PhylogenyNode>>();
+ setAllIndicatorsToZero( phy );
+ for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
+ final PhylogenyNode n = it.next();
+ if ( n.getIndicator() != 0 ) {
+ continue;
+ }
+ l.add( divideIntoSubTreesHelper( n, min_distance_to_root ) );
+ if ( l.isEmpty() ) {
+ throw new RuntimeException( "this should not have happened" );
+ }
+ }
+ return l;
+ }
+
public static List<PhylogenyNode> getAllDescendants( final PhylogenyNode node ) {
final List<PhylogenyNode> descs = new ArrayList<PhylogenyNode>();
final Set<Long> encountered = new HashSet<Long>();
* null is returned.
*
*/
- public static SortedMap<Taxonomy, Integer> obtainDistinctTaxonomyCounts( final PhylogenyNode node ) {
+ public static Map<Taxonomy, Integer> obtainDistinctTaxonomyCounts( final PhylogenyNode node ) {
final List<PhylogenyNode> descs = node.getAllExternalDescendants();
- final SortedMap<Taxonomy, Integer> tax_map = new TreeMap<Taxonomy, Integer>();
+ final Map<Taxonomy, Integer> tax_map = new HashMap<Taxonomy, Integer>();
for( final PhylogenyNode n : descs ) {
if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
return null;
return nodes;
}
+ public static void setAllIndicatorsToZero( final Phylogeny phy ) {
+ for( final PhylogenyNodeIterator it = phy.iteratorPostorder(); it.hasNext(); ) {
+ it.next().setIndicator( ( byte ) 0 );
+ }
+ }
+
/**
* Convenience method.
* Sets value for the first confidence value (created if not present, values overwritten otherwise).
}
}
+ private final static List<PhylogenyNode> divideIntoSubTreesHelper( final PhylogenyNode node,
+ final double min_distance_to_root ) {
+ final List<PhylogenyNode> l = new ArrayList<PhylogenyNode>();
+ final PhylogenyNode r = moveTowardsRoot( node, min_distance_to_root );
+ for( final PhylogenyNode ext : r.getAllExternalDescendants() ) {
+ if ( ext.getIndicator() != 0 ) {
+ throw new RuntimeException( "this should not have happened" );
+ }
+ ext.setIndicator( ( byte ) 1 );
+ l.add( ext );
+ }
+ return l;
+ }
+
/**
* Calculates the distance between PhylogenyNodes n1 and n2.
* PRECONDITION: n1 is a descendant of n2.
return my_s.indexOf( my_query ) >= 0;
}
else {
- return my_s.equals( my_query );
+ return Pattern.compile( "(\\b|_)" + Pattern.quote( my_query ) + "(\\b|_)" ).matcher( my_s ).find();
}
}
+ private final static PhylogenyNode moveTowardsRoot( final PhylogenyNode node, final double min_distance_to_root ) {
+ PhylogenyNode n = node;
+ PhylogenyNode prev = node;
+ while ( min_distance_to_root < n.calculateDistanceToRoot() ) {
+ prev = n;
+ n = n.getParent();
+ }
+ return prev;
+ }
+
public static enum DESCENDANT_SORT_PRIORITY {
- TAXONOMY, SEQUENCE, NODE_NAME;
+ NODE_NAME, SEQUENCE, TAXONOMY;
}
public static enum PhylogenyNodeField {
CLADE_NAME,
+ SEQUENCE_NAME,
+ SEQUENCE_SYMBOL,
TAXONOMY_CODE,
- TAXONOMY_SCIENTIFIC_NAME,
TAXONOMY_COMMON_NAME,
- SEQUENCE_SYMBOL,
- SEQUENCE_NAME,
+ TAXONOMY_ID,
TAXONOMY_ID_UNIPROT_1,
TAXONOMY_ID_UNIPROT_2,
- TAXONOMY_ID;
+ TAXONOMY_SCIENTIFIC_NAME;
}
}