import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
+import java.util.Map;
import java.util.Set;
-import java.util.SortedMap;
-import java.util.TreeMap;
+import java.util.regex.Pattern;
import org.forester.io.parsers.PhylogenyParser;
import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
import org.forester.io.parsers.util.PhylogenyParserException;
+import org.forester.phylogeny.data.Accession;
+import org.forester.phylogeny.data.Annotation;
import org.forester.phylogeny.data.BranchColor;
import org.forester.phylogeny.data.BranchWidth;
import org.forester.phylogeny.data.Confidence;
public class PhylogenyMethods {
- //private static PhylogenyMethods _instance = null;
- //private final PhylogenyNode _farthest_1 = null;
- //private final PhylogenyNode _farthest_2 = null;
private PhylogenyMethods() {
// Hidden constructor.
}
- // public double calculateFurthestDistance( final Phylogeny phylogeny ) {
- // if ( phylogeny.getNumberOfExternalNodes() < 2 ) {
- // return 0.0;
- // }
- // _farthest_1 = null;
- // _farthest_2 = null;
- // PhylogenyNode node_1 = null;
- // PhylogenyNode node_2 = null;
- // double farthest_d = -Double.MAX_VALUE;
- // final PhylogenyMethods methods = PhylogenyMethods.getInstance();
- // final List<PhylogenyNode> ext_nodes = phylogeny.getRoot().getAllExternalDescendants();
- // for( int i = 1; i < ext_nodes.size(); ++i ) {
- // for( int j = 0; j < i; ++j ) {
- // final double d = methods.calculateDistance( ext_nodes.get( i ), ext_nodes.get( j ) );
- // if ( d < 0.0 ) {
- // throw new RuntimeException( "distance cannot be negative" );
- // }
- // if ( d > farthest_d ) {
- // farthest_d = d;
- // node_1 = ext_nodes.get( i );
- // node_2 = ext_nodes.get( j );
- // }
- // }
- // }
- // _farthest_1 = node_1;
- // _farthest_2 = node_2;
- // return farthest_d;
- // }
@Override
public Object clone() throws CloneNotSupportedException {
throw new CloneNotSupportedException();
}
- // public PhylogenyNode getFarthestNode1() {
- // return _farthest_1;
- // }
- // public PhylogenyNode getFarthestNode2() {
- // return _farthest_2;
- // }
public static DescriptiveStatistics calculatBranchLengthStatistics( final Phylogeny phy ) {
final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
return stats;
}
+ public final static void collapseSubtreeStructure( final PhylogenyNode n ) {
+ final List<PhylogenyNode> eds = n.getAllExternalDescendants();
+ final List<Double> d = new ArrayList<Double>();
+ for( final PhylogenyNode ed : eds ) {
+ d.add( calculateDistanceToAncestor( n, ed ) );
+ }
+ for( int i = 0; i < eds.size(); ++i ) {
+ n.setChildNode( i, eds.get( i ) );
+ eds.get( i ).setDistanceToParent( d.get( i ) );
+ }
+ }
+
public static int countNumberOfOneDescendantNodes( final Phylogeny phy ) {
int count = 0;
for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
public static final HashMap<String, PhylogenyNode> createNameToExtNodeMap( final Phylogeny phy ) {
final HashMap<String, PhylogenyNode> nodes = new HashMap<String, PhylogenyNode>();
- for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
- final PhylogenyNode n = iter.next();
+ final List<PhylogenyNode> ext = phy.getExternalNodes();
+ for( final PhylogenyNode n : ext ) {
nodes.put( n.getName(), n );
}
return nodes;
}
- public static void deleteExternalNodesNegativeSelection( final Set<Integer> to_delete, final Phylogeny phy ) {
- phy.clearHashIdToNodeMap();
- for( final Integer id : to_delete ) {
+ public static void deleteExternalNodesNegativeSelection( final Set<Long> to_delete, final Phylogeny phy ) {
+ for( final Long id : to_delete ) {
phy.deleteSubtree( phy.getNode( id ), true );
}
phy.clearHashIdToNodeMap();
p.externalNodesHaveChanged();
}
- public static void deleteExternalNodesPositiveSelection( final Set<Taxonomy> species_to_keep, final Phylogeny phy ) {
- // final Set<Integer> to_delete = new HashSet<Integer>();
- for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
- final PhylogenyNode n = it.next();
- if ( n.getNodeData().isHasTaxonomy() ) {
- if ( !species_to_keep.contains( n.getNodeData().getTaxonomy() ) ) {
- //to_delete.add( n.getNodeId() );
- phy.deleteSubtree( n, true );
- }
- }
- else {
- throw new IllegalArgumentException( "node " + n.getId() + " has no taxonomic data" );
- }
- }
- phy.clearHashIdToNodeMap();
- phy.externalNodesHaveChanged();
- }
-
public static List<String> deleteExternalNodesPositiveSelection( final String[] node_names_to_keep,
final Phylogeny p ) {
final PhylogenyNodeIterator it = p.iteratorExternalForward();
return deleted;
}
+ public static void deleteExternalNodesPositiveSelectionT( final List<Taxonomy> species_to_keep, final Phylogeny phy ) {
+ final Set<Long> to_delete = new HashSet<Long>();
+ for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
+ final PhylogenyNode n = it.next();
+ if ( n.getNodeData().isHasTaxonomy() ) {
+ if ( !species_to_keep.contains( n.getNodeData().getTaxonomy() ) ) {
+ to_delete.add( n.getId() );
+ }
+ }
+ else {
+ throw new IllegalArgumentException( "node " + n.getId() + " has no taxonomic data" );
+ }
+ }
+ deleteExternalNodesNegativeSelection( to_delete, phy );
+ }
+
final public static void deleteInternalNodesWithOnlyOneDescendent( final Phylogeny phy ) {
final ArrayList<PhylogenyNode> to_delete = new ArrayList<PhylogenyNode>();
for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) {
phy.externalNodesHaveChanged();
}
+ public final static List<List<PhylogenyNode>> divideIntoSubTrees( final Phylogeny phy,
+ final double min_distance_to_root ) {
+ if ( min_distance_to_root <= 0 ) {
+ throw new IllegalArgumentException( "attempt to use min distance to root of: " + min_distance_to_root );
+ }
+ final List<List<PhylogenyNode>> l = new ArrayList<List<PhylogenyNode>>();
+ setAllIndicatorsToZero( phy );
+ for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
+ final PhylogenyNode n = it.next();
+ if ( n.getIndicator() != 0 ) {
+ continue;
+ }
+ l.add( divideIntoSubTreesHelper( n, min_distance_to_root ) );
+ if ( l.isEmpty() ) {
+ throw new RuntimeException( "this should not have happened" );
+ }
+ }
+ return l;
+ }
+
public static List<PhylogenyNode> getAllDescendants( final PhylogenyNode node ) {
final List<PhylogenyNode> descs = new ArrayList<PhylogenyNode>();
- final Set<Integer> encountered = new HashSet<Integer>();
+ final Set<Long> encountered = new HashSet<Long>();
if ( !node.isExternal() ) {
final List<PhylogenyNode> exts = node.getAllExternalDescendants();
for( PhylogenyNode current : exts ) {
phylogeny.recalculateNumberOfExternalDescendants( true );
}
- public static void midpointRootOLD( final Phylogeny phylogeny ) {
- // if ( phylogeny.getNumberOfExternalNodes() < 2 ) {
- // return;
- // }
- // final PhylogenyMethods methods = getInstance();
- //final double farthest_d = methods.calculateFurthestDistance( phylogeny );
- // final PhylogenyNode f1 = methods.getFarthestNode1();
- // final PhylogenyNode f2 = methods.getFarthestNode2();
- // if ( farthest_d <= 0.0 ) {
- // return;
- // }
- // double x = farthest_d / 2.0;
- // PhylogenyNode n = f1;
- // if ( PhylogenyMethods.getDistance( f1, phylogeny.getRoot() ) < PhylogenyMethods.getDistance( f2, phylogeny
- // .getRoot() ) ) {
- // n = f2;
- // }
- // while ( ( x > n.getDistanceToParent() ) && !n.isRoot() ) {
- // x -= ( n.getDistanceToParent() > 0 ? n.getDistanceToParent() : 0 );
- // n = n.getParent();
- // }
- // phylogeny.reRoot( n, x );
- // phylogeny.recalculateNumberOfExternalDescendants( true );
- // final PhylogenyNode a = getFurthestDescendant( phylogeny.getRoot().getChildNode1() );
- // final PhylogenyNode b = getFurthestDescendant( phylogeny.getRoot().getChildNode2() );
- // final double da = getDistance( a, phylogeny.getRoot() );
- // final double db = getDistance( b, phylogeny.getRoot() );
- // if ( Math.abs( da - db ) > 0.000001 ) {
- // throw new FailedConditionCheckException( "this should not have happened: midpoint rooting failed: da="
- // + da + ", db=" + db + ", diff=" + Math.abs( da - db ) );
- // }
- }
-
public static void normalizeBootstrapValues( final Phylogeny phylogeny,
final double max_bootstrap_value,
final double max_normalized_value ) {
}
/**
- * Returns the set of distinct taxonomies of
- * all external nodes of node.
- * If at least one the external nodes has no taxonomy,
- * null is returned.
- *
- */
- public static Set<Taxonomy> obtainDistinctTaxonomies( final PhylogenyNode node ) {
- final List<PhylogenyNode> descs = node.getAllExternalDescendants();
- final Set<Taxonomy> tax_set = new HashSet<Taxonomy>();
- for( final PhylogenyNode n : descs ) {
- if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
- return null;
- }
- tax_set.add( n.getNodeData().getTaxonomy() );
- }
- return tax_set;
- }
-
- /**
* Returns a map of distinct taxonomies of
* all external nodes of node.
* If at least one of the external nodes has no taxonomy,
* null is returned.
*
*/
- public static SortedMap<Taxonomy, Integer> obtainDistinctTaxonomyCounts( final PhylogenyNode node ) {
+ public static Map<Taxonomy, Integer> obtainDistinctTaxonomyCounts( final PhylogenyNode node ) {
final List<PhylogenyNode> descs = node.getAllExternalDescendants();
- final SortedMap<Taxonomy, Integer> tax_map = new TreeMap<Taxonomy, Integer>();
+ final Map<Taxonomy, Integer> tax_map = new HashMap<Taxonomy, Integer>();
for( final PhylogenyNode n : descs ) {
if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
return null;
return;
}
phy.setIdToNodeMap( null );
- int i = PhylogenyNode.getNodeCount();
+ long i = PhylogenyNode.getNodeCount();
for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) {
it.next().setId( i++ );
}
match = true;
}
if ( !match && node.getNodeData().isHasSequence()
+ && match( node.getNodeData().getSequence().getGeneName(), query, case_sensitive, partial ) ) {
+ match = true;
+ }
+ if ( !match && node.getNodeData().isHasSequence()
&& match( node.getNodeData().getSequence().getSymbol(), query, case_sensitive, partial ) ) {
match = true;
}
}
}
}
+ //
+ if ( !match && node.getNodeData().isHasSequence()
+ && ( node.getNodeData().getSequence().getAnnotations() != null ) ) {
+ for( final Annotation ann : node.getNodeData().getSequence().getAnnotations() ) {
+ if ( match( ann.getDesc(), query, case_sensitive, partial ) ) {
+ match = true;
+ break;
+ }
+ if ( match( ann.getRef(), query, case_sensitive, partial ) ) {
+ match = true;
+ break;
+ }
+ }
+ }
+ if ( !match && node.getNodeData().isHasSequence()
+ && ( node.getNodeData().getSequence().getCrossReferences() != null ) ) {
+ for( final Accession x : node.getNodeData().getSequence().getCrossReferences() ) {
+ if ( match( x.getComment(), query, case_sensitive, partial ) ) {
+ match = true;
+ break;
+ }
+ if ( match( x.getSource(), query, case_sensitive, partial ) ) {
+ match = true;
+ break;
+ }
+ if ( match( x.getValue(), query, case_sensitive, partial ) ) {
+ match = true;
+ break;
+ }
+ }
+ }
+ //
if ( !match && ( node.getNodeData().getBinaryCharacters() != null ) ) {
Iterator<String> it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator();
I: while ( it.hasNext() ) {
match = true;
}
if ( !match && node.getNodeData().isHasSequence()
+ && match( node.getNodeData().getSequence().getGeneName(), query, case_sensitive, partial ) ) {
+ match = true;
+ }
+ if ( !match && node.getNodeData().isHasSequence()
&& match( node.getNodeData().getSequence().getSymbol(), query, case_sensitive, partial ) ) {
match = true;
}
}
}
}
+ //
+ if ( !match && node.getNodeData().isHasSequence()
+ && ( node.getNodeData().getSequence().getAnnotations() != null ) ) {
+ for( final Annotation ann : node.getNodeData().getSequence().getAnnotations() ) {
+ if ( match( ann.getDesc(), query, case_sensitive, partial ) ) {
+ match = true;
+ break;
+ }
+ if ( match( ann.getRef(), query, case_sensitive, partial ) ) {
+ match = true;
+ break;
+ }
+ }
+ }
+ if ( !match && node.getNodeData().isHasSequence()
+ && ( node.getNodeData().getSequence().getCrossReferences() != null ) ) {
+ for( final Accession x : node.getNodeData().getSequence().getCrossReferences() ) {
+ if ( match( x.getComment(), query, case_sensitive, partial ) ) {
+ match = true;
+ break;
+ }
+ if ( match( x.getSource(), query, case_sensitive, partial ) ) {
+ match = true;
+ break;
+ }
+ if ( match( x.getValue(), query, case_sensitive, partial ) ) {
+ match = true;
+ break;
+ }
+ }
+ }
+ //
if ( !match && ( node.getNodeData().getBinaryCharacters() != null ) ) {
Iterator<String> it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator();
I: while ( it.hasNext() ) {
return nodes;
}
+ public static void setAllIndicatorsToZero( final Phylogeny phy ) {
+ for( final PhylogenyNodeIterator it = phy.iteratorPostorder(); it.hasNext(); ) {
+ it.next().setIndicator( ( byte ) 0 );
+ }
+ }
+
/**
* Convenience method.
* Sets value for the first confidence value (created if not present, values overwritten otherwise).
return n1.getNodeData().getSequence().getSymbol()
.compareTo( n2.getNodeData().getSequence().getSymbol() );
}
+ if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) )
+ && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) {
+ return n1.getNodeData().getSequence().getGeneName()
+ .compareTo( n2.getNodeData().getSequence().getGeneName() );
+ }
if ( ( n1.getNodeData().getSequence().getAccession() != null )
&& ( n2.getNodeData().getSequence().getAccession() != null )
&& !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getAccession().getValue() )
return n1.getNodeData().getSequence().getSymbol()
.compareTo( n2.getNodeData().getSequence().getSymbol() );
}
+ if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) )
+ && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) {
+ return n1.getNodeData().getSequence().getGeneName()
+ .compareTo( n2.getNodeData().getSequence().getGeneName() );
+ }
if ( ( n1.getNodeData().getSequence().getAccession() != null )
&& ( n2.getNodeData().getSequence().getAccession() != null )
&& !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getAccession().getValue() )
return n1.getNodeData().getSequence().getSymbol()
.compareTo( n2.getNodeData().getSequence().getSymbol() );
}
+ if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) )
+ && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) {
+ return n1.getNodeData().getSequence().getGeneName()
+ .compareTo( n2.getNodeData().getSequence().getGeneName() );
+ }
if ( ( n1.getNodeData().getSequence().getAccession() != null )
&& ( n2.getNodeData().getSequence().getAccession() != null )
&& !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getAccession().getValue() )
return PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT;
}
+ static double calculateDistanceToAncestor( final PhylogenyNode anc, PhylogenyNode desc ) {
+ double d = 0;
+ boolean all_default = true;
+ while ( anc != desc ) {
+ if ( desc.getDistanceToParent() != PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ) {
+ d += desc.getDistanceToParent();
+ if ( all_default ) {
+ all_default = false;
+ }
+ }
+ desc = desc.getParent();
+ }
+ if ( all_default ) {
+ return PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT;
+ }
+ return d;
+ }
+
/**
* Deep copies the phylogeny originating from this node.
*/
}
}
+ private final static List<PhylogenyNode> divideIntoSubTreesHelper( final PhylogenyNode node,
+ final double min_distance_to_root ) {
+ final List<PhylogenyNode> l = new ArrayList<PhylogenyNode>();
+ final PhylogenyNode r = moveTowardsRoot( node, min_distance_to_root );
+ for( final PhylogenyNode ext : r.getAllExternalDescendants() ) {
+ if ( ext.getIndicator() != 0 ) {
+ throw new RuntimeException( "this should not have happened" );
+ }
+ ext.setIndicator( ( byte ) 1 );
+ l.add( ext );
+ }
+ return l;
+ }
+
/**
* Calculates the distance between PhylogenyNodes n1 and n2.
* PRECONDITION: n1 is a descendant of n2.
return my_s.indexOf( my_query ) >= 0;
}
else {
- return my_s.equals( my_query );
+ return Pattern.compile( "(\\b|_)" + Pattern.quote( my_query ) + "(\\b|_)" ).matcher( my_s ).find();
}
}
+ private final static PhylogenyNode moveTowardsRoot( final PhylogenyNode node, final double min_distance_to_root ) {
+ PhylogenyNode n = node;
+ PhylogenyNode prev = node;
+ while ( min_distance_to_root < n.calculateDistanceToRoot() ) {
+ prev = n;
+ n = n.getParent();
+ }
+ return prev;
+ }
+
public static enum DESCENDANT_SORT_PRIORITY {
- TAXONOMY, SEQUENCE, NODE_NAME;
+ NODE_NAME, SEQUENCE, TAXONOMY;
}
public static enum PhylogenyNodeField {
CLADE_NAME,
+ SEQUENCE_NAME,
+ SEQUENCE_SYMBOL,
TAXONOMY_CODE,
- TAXONOMY_SCIENTIFIC_NAME,
TAXONOMY_COMMON_NAME,
- SEQUENCE_SYMBOL,
- SEQUENCE_NAME,
+ TAXONOMY_ID,
TAXONOMY_ID_UNIPROT_1,
TAXONOMY_ID_UNIPROT_2,
- TAXONOMY_ID;
+ TAXONOMY_SCIENTIFIC_NAME;
}
}