// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
//
// Contact: phylosoft @ gmail . com
-// WWW: www.phylosoft.org/forester
+// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
package org.forester.phylogeny;
import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
import org.forester.io.parsers.util.PhylogenyParserException;
+import org.forester.phylogeny.data.Accession;
+import org.forester.phylogeny.data.Annotation;
import org.forester.phylogeny.data.BranchColor;
import org.forester.phylogeny.data.BranchWidth;
import org.forester.phylogeny.data.Confidence;
public class PhylogenyMethods {
- //private static PhylogenyMethods _instance = null;
- //private final PhylogenyNode _farthest_1 = null;
- //private final PhylogenyNode _farthest_2 = null;
private PhylogenyMethods() {
// Hidden constructor.
}
- // public double calculateFurthestDistance( final Phylogeny phylogeny ) {
- // if ( phylogeny.getNumberOfExternalNodes() < 2 ) {
- // return 0.0;
- // }
- // _farthest_1 = null;
- // _farthest_2 = null;
- // PhylogenyNode node_1 = null;
- // PhylogenyNode node_2 = null;
- // double farthest_d = -Double.MAX_VALUE;
- // final PhylogenyMethods methods = PhylogenyMethods.getInstance();
- // final List<PhylogenyNode> ext_nodes = phylogeny.getRoot().getAllExternalDescendants();
- // for( int i = 1; i < ext_nodes.size(); ++i ) {
- // for( int j = 0; j < i; ++j ) {
- // final double d = methods.calculateDistance( ext_nodes.get( i ), ext_nodes.get( j ) );
- // if ( d < 0.0 ) {
- // throw new RuntimeException( "distance cannot be negative" );
- // }
- // if ( d > farthest_d ) {
- // farthest_d = d;
- // node_1 = ext_nodes.get( i );
- // node_2 = ext_nodes.get( j );
- // }
- // }
- // }
- // _farthest_1 = node_1;
- // _farthest_2 = node_2;
- // return farthest_d;
- // }
@Override
public Object clone() throws CloneNotSupportedException {
throw new CloneNotSupportedException();
}
- // public PhylogenyNode getFarthestNode1() {
- // return _farthest_1;
- // }
- // public PhylogenyNode getFarthestNode2() {
- // return _farthest_2;
- // }
public static DescriptiveStatistics calculatBranchLengthStatistics( final Phylogeny phy ) {
final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
return stats;
}
+ public final static void collapseSubtreeStructure( final PhylogenyNode n ) {
+ final List<PhylogenyNode> eds = n.getAllExternalDescendants();
+ final List<Double> d = new ArrayList<Double>();
+ for( final PhylogenyNode ed : eds ) {
+ d.add( calculateDistanceToAncestor( n, ed ) );
+ }
+ for( int i = 0; i < eds.size(); ++i ) {
+ n.setChildNode( i, eds.get( i ) );
+ eds.get( i ).setDistanceToParent( d.get( i ) );
+ }
+ }
+
public static int countNumberOfOneDescendantNodes( final Phylogeny phy ) {
int count = 0;
for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
public static final HashMap<String, PhylogenyNode> createNameToExtNodeMap( final Phylogeny phy ) {
final HashMap<String, PhylogenyNode> nodes = new HashMap<String, PhylogenyNode>();
- for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
- final PhylogenyNode n = iter.next();
+ final List<PhylogenyNode> ext = phy.getExternalNodes();
+ for( final PhylogenyNode n : ext ) {
nodes.put( n.getName(), n );
}
return nodes;
}
- public static void deleteExternalNodesNegativeSelection( final Set<Integer> to_delete, final Phylogeny phy ) {
- phy.clearHashIdToNodeMap();
- for( final Integer id : to_delete ) {
+ public static void deleteExternalNodesNegativeSelection( final Set<Long> to_delete, final Phylogeny phy ) {
+ for( final Long id : to_delete ) {
phy.deleteSubtree( phy.getNode( id ), true );
}
phy.clearHashIdToNodeMap();
p.externalNodesHaveChanged();
}
- public static void deleteExternalNodesPositiveSelection( final Set<Taxonomy> species_to_keep, final Phylogeny phy ) {
- // final Set<Integer> to_delete = new HashSet<Integer>();
- for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
- final PhylogenyNode n = it.next();
- if ( n.getNodeData().isHasTaxonomy() ) {
- if ( !species_to_keep.contains( n.getNodeData().getTaxonomy() ) ) {
- //to_delete.add( n.getNodeId() );
- phy.deleteSubtree( n, true );
- }
- }
- else {
- throw new IllegalArgumentException( "node " + n.getId() + " has no taxonomic data" );
- }
- }
- phy.clearHashIdToNodeMap();
- phy.externalNodesHaveChanged();
- }
-
public static List<String> deleteExternalNodesPositiveSelection( final String[] node_names_to_keep,
final Phylogeny p ) {
final PhylogenyNodeIterator it = p.iteratorExternalForward();
return deleted;
}
+ public static void deleteExternalNodesPositiveSelectionT( final List<Taxonomy> species_to_keep, final Phylogeny phy ) {
+ final Set<Long> to_delete = new HashSet<Long>();
+ for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
+ final PhylogenyNode n = it.next();
+ if ( n.getNodeData().isHasTaxonomy() ) {
+ if ( !species_to_keep.contains( n.getNodeData().getTaxonomy() ) ) {
+ to_delete.add( n.getId() );
+ }
+ }
+ else {
+ throw new IllegalArgumentException( "node " + n.getId() + " has no taxonomic data" );
+ }
+ }
+ deleteExternalNodesNegativeSelection( to_delete, phy );
+ }
+
final public static void deleteInternalNodesWithOnlyOneDescendent( final Phylogeny phy ) {
final ArrayList<PhylogenyNode> to_delete = new ArrayList<PhylogenyNode>();
for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) {
final PhylogenyNode n = iter.next();
- if ( ( !n.isExternal() ) && ( !n.isRoot() ) && ( n.getNumberOfDescendants() == 1 ) ) {
+ if ( ( !n.isExternal() ) && ( n.getNumberOfDescendants() == 1 ) ) {
to_delete.add( n );
}
}
public static List<PhylogenyNode> getAllDescendants( final PhylogenyNode node ) {
final List<PhylogenyNode> descs = new ArrayList<PhylogenyNode>();
- final Set<Integer> encountered = new HashSet<Integer>();
+ final Set<Long> encountered = new HashSet<Long>();
if ( !node.isExternal() ) {
final List<PhylogenyNode> exts = node.getAllExternalDescendants();
for( PhylogenyNode current : exts ) {
phylogeny.recalculateNumberOfExternalDescendants( true );
}
- public static void midpointRootOLD( final Phylogeny phylogeny ) {
- // if ( phylogeny.getNumberOfExternalNodes() < 2 ) {
- // return;
- // }
- // final PhylogenyMethods methods = getInstance();
- //final double farthest_d = methods.calculateFurthestDistance( phylogeny );
- // final PhylogenyNode f1 = methods.getFarthestNode1();
- // final PhylogenyNode f2 = methods.getFarthestNode2();
- // if ( farthest_d <= 0.0 ) {
- // return;
- // }
- // double x = farthest_d / 2.0;
- // PhylogenyNode n = f1;
- // if ( PhylogenyMethods.getDistance( f1, phylogeny.getRoot() ) < PhylogenyMethods.getDistance( f2, phylogeny
- // .getRoot() ) ) {
- // n = f2;
- // }
- // while ( ( x > n.getDistanceToParent() ) && !n.isRoot() ) {
- // x -= ( n.getDistanceToParent() > 0 ? n.getDistanceToParent() : 0 );
- // n = n.getParent();
- // }
- // phylogeny.reRoot( n, x );
- // phylogeny.recalculateNumberOfExternalDescendants( true );
- // final PhylogenyNode a = getFurthestDescendant( phylogeny.getRoot().getChildNode1() );
- // final PhylogenyNode b = getFurthestDescendant( phylogeny.getRoot().getChildNode2() );
- // final double da = getDistance( a, phylogeny.getRoot() );
- // final double db = getDistance( b, phylogeny.getRoot() );
- // if ( Math.abs( da - db ) > 0.000001 ) {
- // throw new FailedConditionCheckException( "this should not have happened: midpoint rooting failed: da="
- // + da + ", db=" + db + ", diff=" + Math.abs( da - db ) );
- // }
- }
-
public static void normalizeBootstrapValues( final Phylogeny phylogeny,
final double max_bootstrap_value,
final double max_normalized_value ) {
}
/**
- * Returns the set of distinct taxonomies of
- * all external nodes of node.
- * If at least one the external nodes has no taxonomy,
- * null is returned.
- *
- */
- public static Set<Taxonomy> obtainDistinctTaxonomies( final PhylogenyNode node ) {
- final List<PhylogenyNode> descs = node.getAllExternalDescendants();
- final Set<Taxonomy> tax_set = new HashSet<Taxonomy>();
- for( final PhylogenyNode n : descs ) {
- if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
- return null;
- }
- tax_set.add( n.getNodeData().getTaxonomy() );
- }
- return tax_set;
- }
-
- /**
* Returns a map of distinct taxonomies of
* all external nodes of node.
* If at least one of the external nodes has no taxonomy,
return;
}
phy.setIdToNodeMap( null );
- int i = PhylogenyNode.getNodeCount();
+ long i = PhylogenyNode.getNodeCount();
for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) {
it.next().setId( i++ );
}
}
public static void removeNode( final PhylogenyNode remove_me, final Phylogeny phylogeny ) {
- if ( remove_me.isRoot() && remove_me.getNumberOfDescendants() != 1 ) {
- throw new IllegalArgumentException( "attempt to remove a root node with more than one descendants" );
- }
-
- if ( remove_me.isRoot() && remove_me.getNumberOfDescendants() == 1 ) {
- final PhylogenyNode desc = remove_me.getDescendants().get( 0 );
-
- desc.setDistanceToParent( addPhylogenyDistances( remove_me.getDistanceToParent(),
- desc.getDistanceToParent() ) );
- desc.setParent( null );
- phylogeny.setRoot( desc );
- phylogeny.clearHashIdToNodeMap();
+ if ( remove_me.isRoot() ) {
+ if ( remove_me.getNumberOfDescendants() == 1 ) {
+ final PhylogenyNode desc = remove_me.getDescendants().get( 0 );
+ desc.setDistanceToParent( addPhylogenyDistances( remove_me.getDistanceToParent(),
+ desc.getDistanceToParent() ) );
+ desc.setParent( null );
+ phylogeny.setRoot( desc );
+ phylogeny.clearHashIdToNodeMap();
+ }
+ else {
+ throw new IllegalArgumentException( "attempt to remove a root node with more than one descendants" );
+ }
}
-
else if ( remove_me.isExternal() ) {
phylogeny.deleteSubtree( remove_me, false );
phylogeny.clearHashIdToNodeMap();
match = true;
}
if ( !match && node.getNodeData().isHasSequence()
+ && match( node.getNodeData().getSequence().getGeneName(), query, case_sensitive, partial ) ) {
+ match = true;
+ }
+ if ( !match && node.getNodeData().isHasSequence()
&& match( node.getNodeData().getSequence().getSymbol(), query, case_sensitive, partial ) ) {
match = true;
}
}
}
}
+ //
+ if ( !match && node.getNodeData().isHasSequence()
+ && ( node.getNodeData().getSequence().getAnnotations() != null ) ) {
+ for( final Annotation ann : node.getNodeData().getSequence().getAnnotations() ) {
+ if ( match( ann.getDesc(), query, case_sensitive, partial ) ) {
+ match = true;
+ break;
+ }
+ if ( match( ann.getRef(), query, case_sensitive, partial ) ) {
+ match = true;
+ break;
+ }
+ }
+ }
+ if ( !match && node.getNodeData().isHasSequence()
+ && ( node.getNodeData().getSequence().getCrossReferences() != null ) ) {
+ for( final Accession x : node.getNodeData().getSequence().getCrossReferences() ) {
+ if ( match( x.getComment(), query, case_sensitive, partial ) ) {
+ match = true;
+ break;
+ }
+ if ( match( x.getSource(), query, case_sensitive, partial ) ) {
+ match = true;
+ break;
+ }
+ if ( match( x.getValue(), query, case_sensitive, partial ) ) {
+ match = true;
+ break;
+ }
+ }
+ }
+ //
if ( !match && ( node.getNodeData().getBinaryCharacters() != null ) ) {
Iterator<String> it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator();
I: while ( it.hasNext() ) {
match = true;
}
if ( !match && node.getNodeData().isHasSequence()
+ && match( node.getNodeData().getSequence().getGeneName(), query, case_sensitive, partial ) ) {
+ match = true;
+ }
+ if ( !match && node.getNodeData().isHasSequence()
&& match( node.getNodeData().getSequence().getSymbol(), query, case_sensitive, partial ) ) {
match = true;
}
}
}
}
+ //
+ if ( !match && node.getNodeData().isHasSequence()
+ && ( node.getNodeData().getSequence().getAnnotations() != null ) ) {
+ for( final Annotation ann : node.getNodeData().getSequence().getAnnotations() ) {
+ if ( match( ann.getDesc(), query, case_sensitive, partial ) ) {
+ match = true;
+ break;
+ }
+ if ( match( ann.getRef(), query, case_sensitive, partial ) ) {
+ match = true;
+ break;
+ }
+ }
+ }
+ if ( !match && node.getNodeData().isHasSequence()
+ && ( node.getNodeData().getSequence().getCrossReferences() != null ) ) {
+ for( final Accession x : node.getNodeData().getSequence().getCrossReferences() ) {
+ if ( match( x.getComment(), query, case_sensitive, partial ) ) {
+ match = true;
+ break;
+ }
+ if ( match( x.getSource(), query, case_sensitive, partial ) ) {
+ match = true;
+ break;
+ }
+ if ( match( x.getValue(), query, case_sensitive, partial ) ) {
+ match = true;
+ break;
+ }
+ }
+ }
+ //
if ( !match && ( node.getNodeData().getBinaryCharacters() != null ) ) {
Iterator<String> it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator();
I: while ( it.hasNext() ) {
return n1.getNodeData().getSequence().getSymbol()
.compareTo( n2.getNodeData().getSequence().getSymbol() );
}
+ if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) )
+ && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) {
+ return n1.getNodeData().getSequence().getGeneName()
+ .compareTo( n2.getNodeData().getSequence().getGeneName() );
+ }
if ( ( n1.getNodeData().getSequence().getAccession() != null )
&& ( n2.getNodeData().getSequence().getAccession() != null )
&& !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getAccession().getValue() )
return n1.getNodeData().getSequence().getSymbol()
.compareTo( n2.getNodeData().getSequence().getSymbol() );
}
+ if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) )
+ && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) {
+ return n1.getNodeData().getSequence().getGeneName()
+ .compareTo( n2.getNodeData().getSequence().getGeneName() );
+ }
if ( ( n1.getNodeData().getSequence().getAccession() != null )
&& ( n2.getNodeData().getSequence().getAccession() != null )
&& !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getAccession().getValue() )
return n1.getNodeData().getSequence().getSymbol()
.compareTo( n2.getNodeData().getSequence().getSymbol() );
}
+ if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) )
+ && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) {
+ return n1.getNodeData().getSequence().getGeneName()
+ .compareTo( n2.getNodeData().getSequence().getGeneName() );
+ }
if ( ( n1.getNodeData().getSequence().getAccession() != null )
&& ( n2.getNodeData().getSequence().getAccession() != null )
&& !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getAccession().getValue() )
if ( !n.getNodeData().isHasTaxonomy() ) {
throw new IllegalArgumentException( "no taxonomic data in node: " + n );
}
- // ref_ext_taxo.add( getSpecies( n ) );
if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) {
ref_ext_taxo.add( n.getNodeData().getTaxonomy().getScientificName() );
}
if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) {
ref_ext_taxo.add( n.getNodeData().getTaxonomy().getTaxonomyCode() );
}
+ if ( ( n.getNodeData().getTaxonomy().getIdentifier() != null )
+ && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getIdentifier().getValue() ) ) {
+ ref_ext_taxo.add( n.getNodeData().getTaxonomy().getIdentifier().getValuePlusProvider() );
+ }
}
final ArrayList<PhylogenyNode> nodes_to_delete = new ArrayList<PhylogenyNode>();
for( final PhylogenyNodeIterator it = to_be_stripped.iteratorExternalForward(); it.hasNext(); ) {
nodes_to_delete.add( n );
}
else if ( !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getScientificName() ) )
- && !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
+ && !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getTaxonomyCode() ) )
+ && !( ( n.getNodeData().getTaxonomy().getIdentifier() != null ) && ref_ext_taxo.contains( n
+ .getNodeData().getTaxonomy().getIdentifier().getValuePlusProvider() ) ) ) {
nodes_to_delete.add( n );
}
}
return PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT;
}
+ static double calculateDistanceToAncestor( final PhylogenyNode anc, PhylogenyNode desc ) {
+ double d = 0;
+ boolean all_default = true;
+ while ( anc != desc ) {
+ if ( desc.getDistanceToParent() != PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ) {
+ d += desc.getDistanceToParent();
+ if ( all_default ) {
+ all_default = false;
+ }
+ }
+ desc = desc.getParent();
+ }
+ if ( all_default ) {
+ return PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT;
+ }
+ return d;
+ }
+
/**
* Deep copies the phylogeny originating from this node.
*/