X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fsdi%2FSDI.java;h=535d99e0cebc680e25157aac3deb0e1ceba83868;hb=0b39293d1577083551ba528d0ac7afcacbf69dde;hp=59cc489f1bf70270d1897b2292e8f349d3cefc9f;hpb=0ee466206ea1e3ac3025df1110538e3815c160b4;p=jalview.git diff --git a/forester/java/src/org/forester/sdi/SDI.java b/forester/java/src/org/forester/sdi/SDI.java index 59cc489..535d99e 100644 --- a/forester/java/src/org/forester/sdi/SDI.java +++ b/forester/java/src/org/forester/sdi/SDI.java @@ -23,7 +23,7 @@ // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com -// WWW: www.phylosoft.org +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.sdi; @@ -31,12 +31,41 @@ import java.util.HashMap; import java.util.Map; import org.forester.phylogeny.Phylogeny; +import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; +import org.forester.phylogeny.data.Event; import org.forester.phylogeny.data.Taxonomy; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; +import org.forester.sdi.SDIutil.TaxonomyComparisonBase; import org.forester.util.ForesterUtil; -public abstract class SDI { +/* + * Implements our algorithm for speciation - duplication inference (SDI).
+ * Reference:
The initialization is accomplished by: + *
The recursion + * part is accomplished by this class' method + * "geneTreePostOrderTraversal(PhylogenyNode)".
Requires JDK 1.2 or greater. + * + * @see SDI#linkNodesOfG() + * + * @see Phylogeny#preorderReID(int) + * + * @see + * PhylogenyMethods#taxonomyBasedDeletionOfExternalNodes(Phylogeny,Phylogeny) + * + * @see #geneTreePostOrderTraversal(PhylogenyNode) + * + * @author Christian M. Zmasek + * + * @version 1.102 -- last modified: 10/02/01 + */ +public class SDI { final Phylogeny _gene_tree; final Phylogeny _species_tree; @@ -46,22 +75,15 @@ public abstract class SDI { /** * Constructor which sets the gene tree and the species tree to be compared. * species_tree is the species tree to which the gene tree gene_tree will be - * compared to. - * Infers for each PhylogenyNode of gene_tree whether - * it represents a speciation or duplication event by calculating and - * interpreting the mapping function M. The most parsimonious sequence of - * speciation and duplication events is assumed. - * The mapping cost L can be - * calculated with method "computeMappingCost()". + * compared to - with method "infer(boolean)". Both Trees must be completely + * binary and rooted. The actual inference is accomplished with method + * "infer(boolean)". The mapping cost L can then be calculated with method + * "computeMappingCost()". *
- * Conditions: - *
- *+ * Reference. Zhang, L. (1997) On a Mirkin-Muchnik-Smith Conjecture for + * Comparing Molecular Phylogenies. Journal of Computational Biology 4 + * 177-187. + * + * @return the mapping cost "L" + */ + public int computeMappingCostL() { + _species_tree.levelOrderReID(); + _mapping_cost = 0; + computeMappingCostHelper( _gene_tree.getRoot() ); + return _mapping_cost; + } + + /** + * Returns the number of duplications. + * + * @return number of duplications + */ + public int getDuplicationsSum() { + return _duplications_sum; + } + + /** + * Returns the gene tree. + * + * @return gene tree + */ + public Phylogeny getGeneTree() { + return _gene_tree; + } + + /** + * Returns the species tree. + * + * @return species tree + */ + public Phylogeny getSpeciesTree() { + return _species_tree; + } + + @Override + public String toString() { + final StringBuffer sb = new StringBuffer(); + sb.append( getClass() ); + sb.append( ForesterUtil.LINE_SEPARATOR ); + sb.append( "Duplications sum : " + getDuplicationsSum() ); + sb.append( ForesterUtil.LINE_SEPARATOR ); + sb.append( "mapping cost L : " + computeMappingCostL() ); + return sb.toString(); + } + + /** + * Traverses the subtree of PhylogenyNode g in postorder, calculating the + * mapping function M, and determines which nodes represent speciation + * events and which ones duplication events. + *
+ * Preconditions: Mapping M for external nodes must have been calculated and + * the species tree must be labelled in preorder. + *
+ * (Last modified: 01/11/01)
+ *
+ * @param g
+ * starting node of a gene tree - normally the root
+ */
+ void geneTreePostOrderTraversal( final PhylogenyNode g ) {
+ PhylogenyNode a, b;
+ if ( !g.isExternal() ) {
+ geneTreePostOrderTraversal( g.getChildNode( 0 ) );
+ geneTreePostOrderTraversal( g.getChildNode( 1 ) );
+ a = g.getChildNode( 0 ).getLink();
+ b = g.getChildNode( 1 ).getLink();
+ while ( a != b ) {
+ if ( a.getId() > b.getId() ) {
+ a = a.getParent();
+ }
+ else {
+ b = b.getParent();
+ }
+ }
+ g.setLink( a );
+ // Determines whether dup. or spec.
+ Event event = null;
+ if ( ( a == g.getChildNode( 0 ).getLink() ) || ( a == g.getChildNode( 1 ).getLink() ) ) {
+ event = Event.createSingleDuplicationEvent();
+ ++_duplications_sum;
+ }
+ else {
+ event = Event.createSingleSpeciationEvent();
+ }
+ g.getNodeData().setEvent( event );
+ }
+ } // geneTreePostOrderTraversal( PhylogenyNode )
+
+ /**
+ * Calculates the mapping function for the external nodes of the gene tree:
+ * links (sets the field "link" of PhylogenyNode) each external
+ * PhylogenyNode of gene_tree to the external PhylogenyNode of species_tree
+ * which has the same species name.
+ * @throws SDIException
+ */
+ final void linkNodesOfG() throws SDIException {
+ final Map
+ * To be used ONLY by method "SDIunrooted.fastInfer(Phylogeny,Phylogeny)".
+ *
+ * (Last modfied: 10/02/01)
+ *
+ * @param prev_root_was_dup
+ * true if the previous root was a duplication, false otherwise
+ * @param prev_root_c1
+ * child 1 of the previous root
+ * @param prev_root_c2
+ * child 2 of the previous root
+ * @return number of duplications which have been assigned in gene tree
+ */
+ int updateM( final boolean prev_root_was_dup, final PhylogenyNode prev_root_c1, final PhylogenyNode prev_root_c2 ) {
+ final PhylogenyNode root = getGeneTree().getRoot();
+ if ( ( root.getChildNode1() == prev_root_c1 ) || ( root.getChildNode2() == prev_root_c1 ) ) {
+ calculateMforNode( prev_root_c1 );
+ }
+ else {
+ calculateMforNode( prev_root_c2 );
+ }
+ Event event = null;
+ if ( prev_root_was_dup ) {
+ event = Event.createSingleDuplicationEvent();
+ }
+ else {
+ event = Event.createSingleSpeciationEvent();
+ }
+ root.getNodeData().setEvent( event );
+ calculateMforNode( root );
+ return getDuplicationsSum();
+ } // updateM( boolean, PhylogenyNode, PhylogenyNode )
+
+ // Helper method for updateM( boolean, PhylogenyNode, PhylogenyNode )
+ // Calculates M for PhylogenyNode n, given that M for the two children
+ // of n has been calculated.
+ // (Last modified: 10/02/01)
+ private void calculateMforNode( final PhylogenyNode n ) {
+ if ( !n.isExternal() ) {
+ final boolean was_duplication = n.isDuplication();
+ PhylogenyNode a = n.getChildNode1().getLink();
+ PhylogenyNode b = n.getChildNode2().getLink();
+ while ( a != b ) {
+ if ( a.getId() > b.getId() ) {
+ a = a.getParent();
+ }
+ else {
+ b = b.getParent();
+ }
+ }
+ n.setLink( a );
+ Event event = null;
+ if ( ( a == n.getChildNode1().getLink() ) || ( a == n.getChildNode2().getLink() ) ) {
+ event = Event.createSingleDuplicationEvent();
+ if ( !was_duplication ) {
+ ++_duplications_sum;
+ }
+ }
+ else {
+ event = Event.createSingleSpeciationEvent();
+ if ( was_duplication ) {
+ --_duplications_sum;
+ }
+ }
+ n.getNodeData().setEvent( event );
+ }
+ } // calculateMforNode( PhylogenyNode )
+
// Helper method for "computeMappingCost()".
private void computeMappingCostHelper( final PhylogenyNode g ) {
if ( !g.isExternal() ) {
computeMappingCostHelper( g.getChildNode1() );
computeMappingCostHelper( g.getChildNode2() );
if ( ( g.getLink() != g.getChildNode1().getLink() ) && ( g.getLink() != g.getChildNode2().getLink() ) ) {
- _mapping_cost += ( g.getChildNode1().getLink().getId() + g.getChildNode2().getLink().getId()
+ _mapping_cost += ( ( g.getChildNode1().getLink().getId() + g.getChildNode2().getLink().getId() )
- ( 2 * g.getLink().getId() ) - 2 );
}
else if ( ( g.getLink() != g.getChildNode1().getLink() ) && ( g.getLink() == g.getChildNode2().getLink() ) ) {
- _mapping_cost += ( g.getChildNode1().getLink().getId() - g.getLink().getId() + 1 );
+ _mapping_cost += ( ( g.getChildNode1().getLink().getId() - g.getLink().getId() ) + 1 );
}
else if ( ( g.getLink() == g.getChildNode1().getLink() ) && ( g.getLink() != g.getChildNode2().getLink() ) ) {
- _mapping_cost += ( g.getChildNode2().getLink().getId() - g.getLink().getId() + 1 );
+ _mapping_cost += ( ( g.getChildNode2().getLink().getId() - g.getLink().getId() ) + 1 );
}
else {
_mapping_cost++;
@@ -108,24 +335,6 @@ public abstract class SDI {
}
}
- /**
- * Computes the cost of mapping the gene tree gene_tree onto the species
- * tree species_tree. Before this method can be called, the mapping has to
- * be calculated with method "infer(boolean)".
- *
- * Reference. Zhang, L. (1997) On a Mirkin-Muchnik-Smith Conjecture for
- * Comparing Molecular Phylogenies. Journal of Computational Biology 4
- * 177-187.
- *
- * @return the mapping cost "L"
- */
- public int computeMappingCostL() {
- _species_tree.levelOrderReID();
- _mapping_cost = 0;
- computeMappingCostHelper( _gene_tree.getRoot() );
- return _mapping_cost;
- }
-
private TaxonomyComparisonBase determineTaxonomyComparisonBase() {
TaxonomyComparisonBase base = null;
boolean all_have_id = true;
@@ -183,67 +392,6 @@ public abstract class SDI {
}
/**
- * Returns the number of duplications.
- *
- * @return number of duplications
- */
- public int getDuplicationsSum() {
- return _duplications_sum;
- }
-
- /**
- * Returns the gene tree.
- *
- * @return gene tree
- */
- public Phylogeny getGeneTree() {
- return _gene_tree;
- }
-
- /**
- * Returns the species tree.
- *
- * @return species tree
- */
- public Phylogeny getSpeciesTree() {
- return _species_tree;
- }
-
- /**
- * Calculates the mapping function for the external nodes of the gene tree:
- * links (sets the field "link" of PhylogenyNode) each external
- * PhylogenyNode of gene_tree to the external PhylogenyNode of species_tree
- * which has the same species name.
- * @throws SDIException
- */
- void linkNodesOfG() throws SDIException {
- final Map