X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fsdi%2FSDI.java;h=2c59efad25e8fbe2b6b5e3efb86397d9e5f831f5;hb=d15038d8870ccccd726ad82ac9a707afb6a7f31e;hp=59cc489f1bf70270d1897b2292e8f349d3cefc9f;hpb=0ee466206ea1e3ac3025df1110538e3815c160b4;p=jalview.git diff --git a/forester/java/src/org/forester/sdi/SDI.java b/forester/java/src/org/forester/sdi/SDI.java index 59cc489..2c59efa 100644 --- a/forester/java/src/org/forester/sdi/SDI.java +++ b/forester/java/src/org/forester/sdi/SDI.java @@ -23,7 +23,7 @@ // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com -// WWW: www.phylosoft.org +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.sdi; @@ -31,12 +31,41 @@ import java.util.HashMap; import java.util.Map; import org.forester.phylogeny.Phylogeny; +import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; +import org.forester.phylogeny.data.Event; import org.forester.phylogeny.data.Taxonomy; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; +import org.forester.sdi.SDIutil.TaxonomyComparisonBase; import org.forester.util.ForesterUtil; -public abstract class SDI { +/* + * Implements our algorithm for speciation - duplication inference (SDI).

+ * Reference:

The initialization is accomplished by: + *

The recursion + * part is accomplished by this class' method + * "geneTreePostOrderTraversal(PhylogenyNode)".

Requires JDK 1.2 or greater. + * + * @see SDI#linkNodesOfG() + * + * @see Phylogeny#preorderReID(int) + * + * @see + * PhylogenyMethods#taxonomyBasedDeletionOfExternalNodes(Phylogeny,Phylogeny) + * + * @see #geneTreePostOrderTraversal(PhylogenyNode) + * + * @author Christian M. Zmasek + * + * @version 1.102 -- last modified: 10/02/01 + */ +public class SDI { final Phylogeny _gene_tree; final Phylogeny _species_tree; @@ -46,22 +75,15 @@ public abstract class SDI { /** * Constructor which sets the gene tree and the species tree to be compared. * species_tree is the species tree to which the gene tree gene_tree will be - * compared to. - * Infers for each PhylogenyNode of gene_tree whether - * it represents a speciation or duplication event by calculating and - * interpreting the mapping function M. The most parsimonious sequence of - * speciation and duplication events is assumed. - * The mapping cost L can be - * calculated with method "computeMappingCost()". + * compared to - with method "infer(boolean)". Both Trees must be completely + * binary and rooted. The actual inference is accomplished with method + * "infer(boolean)". The mapping cost L can then be calculated with method + * "computeMappingCost()". *

- * Conditions: - *

- * + * (Last modified: 01/11/01) * + * @see #infer(boolean) + * @see SDI#computeMappingCostL() * @param gene_tree * reference to a rooted binary gene Phylogeny to which assign * duplication vs speciation, must have species names in the @@ -70,42 +92,22 @@ public abstract class SDI { * reference to a rooted binary species Phylogeny which might get * stripped in the process, must have species names in the * species name fields for all external nodes + * @throws SDIException */ - public SDI( final Phylogeny gene_tree, final Phylogeny species_tree ) { + public SDI( final Phylogeny gene_tree, final Phylogeny species_tree ) throws SDIException { if ( species_tree.isEmpty() || gene_tree.isEmpty() ) { throw new IllegalArgumentException( "attempt to infer duplications using empty tree(s)" ); } - if ( !gene_tree.isRooted() ) { - throw new IllegalArgumentException( "attempt to infer duplications on unrooted gene tree" ); - } if ( !species_tree.isRooted() ) { throw new IllegalArgumentException( "attempt to infer duplications on unrooted species tree" ); } _gene_tree = gene_tree; _species_tree = species_tree; - _duplications_sum = 0; _mapping_cost = -1; - } - - // Helper method for "computeMappingCost()". - private void computeMappingCostHelper( final PhylogenyNode g ) { - if ( !g.isExternal() ) { - computeMappingCostHelper( g.getChildNode1() ); - computeMappingCostHelper( g.getChildNode2() ); - if ( ( g.getLink() != g.getChildNode1().getLink() ) && ( g.getLink() != g.getChildNode2().getLink() ) ) { - _mapping_cost += ( g.getChildNode1().getLink().getId() + g.getChildNode2().getLink().getId() - - ( 2 * g.getLink().getId() ) - 2 ); - } - else if ( ( g.getLink() != g.getChildNode1().getLink() ) && ( g.getLink() == g.getChildNode2().getLink() ) ) { - _mapping_cost += ( g.getChildNode1().getLink().getId() - g.getLink().getId() + 1 ); - } - else if ( ( g.getLink() == g.getChildNode1().getLink() ) && ( g.getLink() != g.getChildNode2().getLink() ) ) { - _mapping_cost += ( g.getChildNode2().getLink().getId() - g.getLink().getId() + 1 ); - } - else { - _mapping_cost++; - } - } + _duplications_sum = 0; + PhylogenyMethods.preOrderReId( getSpeciesTree() ); + linkNodesOfG(); + geneTreePostOrderTraversal( getGeneTree().getRoot() ); } /** @@ -126,62 +128,6 @@ public abstract class SDI { return _mapping_cost; } - private TaxonomyComparisonBase determineTaxonomyComparisonBase() { - TaxonomyComparisonBase base = null; - boolean all_have_id = true; - boolean all_have_code = true; - boolean all_have_sn = true; - for( final PhylogenyNodeIterator iter = _species_tree.iteratorExternalForward(); iter.hasNext(); ) { - final PhylogenyNode n = iter.next(); - if ( n.getNodeData().isHasTaxonomy() ) { - final Taxonomy tax = n.getNodeData().getTaxonomy(); - if ( ( tax.getIdentifier() == null ) || ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) ) { - all_have_id = false; - } - if ( ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) { - all_have_code = false; - } - if ( ForesterUtil.isEmpty( tax.getScientificName() ) ) { - all_have_sn = false; - } - } - else { - throw new IllegalArgumentException( "species tree node [" + n + "] has no taxonomic data" ); - } - } - for( final PhylogenyNodeIterator iter = _gene_tree.iteratorExternalForward(); iter.hasNext(); ) { - final PhylogenyNode n = iter.next(); - if ( n.getNodeData().isHasTaxonomy() ) { - final Taxonomy tax = n.getNodeData().getTaxonomy(); - if ( ( tax.getIdentifier() == null ) || ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) ) { - all_have_id = false; - } - if ( ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) { - all_have_code = false; - } - if ( ForesterUtil.isEmpty( tax.getScientificName() ) ) { - all_have_sn = false; - } - } - else { - throw new IllegalArgumentException( "gene tree node [" + n + "] has no taxonomic data" ); - } - } - if ( all_have_id ) { - base = TaxonomyComparisonBase.ID; - } - else if ( all_have_code ) { - base = TaxonomyComparisonBase.CODE; - } - else if ( all_have_sn ) { - base = TaxonomyComparisonBase.SCIENTIFIC_NAME; - } - else { - throw new IllegalArgumentException( "gene tree and species tree have incomparable taxonomies" ); - } - return base; - } - /** * Returns the number of duplications. * @@ -209,6 +155,59 @@ public abstract class SDI { return _species_tree; } + @Override + public String toString() { + final StringBuffer sb = new StringBuffer(); + sb.append( getClass() ); + sb.append( ForesterUtil.LINE_SEPARATOR ); + sb.append( "Duplications sum : " + getDuplicationsSum() ); + sb.append( ForesterUtil.LINE_SEPARATOR ); + sb.append( "mapping cost L : " + computeMappingCostL() ); + return sb.toString(); + } + + /** + * Traverses the subtree of PhylogenyNode g in postorder, calculating the + * mapping function M, and determines which nodes represent speciation + * events and which ones duplication events. + *

+ * Preconditions: Mapping M for external nodes must have been calculated and + * the species tree must be labelled in preorder. + *

+ * (Last modified: 01/11/01) + * + * @param g + * starting node of a gene tree - normally the root + */ + void geneTreePostOrderTraversal( final PhylogenyNode g ) { + PhylogenyNode a, b; + if ( !g.isExternal() ) { + geneTreePostOrderTraversal( g.getChildNode( 0 ) ); + geneTreePostOrderTraversal( g.getChildNode( 1 ) ); + a = g.getChildNode( 0 ).getLink(); + b = g.getChildNode( 1 ).getLink(); + while ( a != b ) { + if ( a.getId() > b.getId() ) { + a = a.getParent(); + } + else { + b = b.getParent(); + } + } + g.setLink( a ); + // Determines whether dup. or spec. + Event event = null; + if ( ( a == g.getChildNode( 0 ).getLink() ) || ( a == g.getChildNode( 1 ).getLink() ) ) { + event = Event.createSingleDuplicationEvent(); + ++_duplications_sum; + } + else { + event = Event.createSingleSpeciationEvent(); + } + g.getNodeData().setEvent( event ); + } + } // geneTreePostOrderTraversal( PhylogenyNode ) + /** * Calculates the mapping function for the external nodes of the gene tree: * links (sets the field "link" of PhylogenyNode) each external @@ -216,14 +215,14 @@ public abstract class SDI { * which has the same species name. * @throws SDIException */ - void linkNodesOfG() throws SDIException { + final void linkNodesOfG() throws SDIException { final Map speciestree_ext_nodes = new HashMap(); final TaxonomyComparisonBase tax_comp_base = determineTaxonomyComparisonBase(); // Put references to all external nodes of the species tree into a map. // Stringyfied taxonomy is the key, node is the value. for( final PhylogenyNodeIterator iter = _species_tree.iteratorExternalForward(); iter.hasNext(); ) { final PhylogenyNode s = iter.next(); - final String tax_str = taxonomyToString( s, tax_comp_base ); + final String tax_str = SDIutil.taxonomyToString( s, tax_comp_base ); if ( speciestree_ext_nodes.containsKey( tax_str ) ) { throw new IllegalArgumentException( "taxonomy [" + s.getNodeData().getTaxonomy() + "] is not unique in species phylogeny" ); @@ -233,7 +232,7 @@ public abstract class SDI { // Retrieve the reference to the node with a matching stringyfied taxonomy. for( final PhylogenyNodeIterator iter = _gene_tree.iteratorExternalForward(); iter.hasNext(); ) { final PhylogenyNode g = iter.next(); - final String tax_str = taxonomyToString( g, tax_comp_base ); + final String tax_str = SDIutil.taxonomyToString( g, tax_comp_base ); final PhylogenyNode s = speciestree_ext_nodes.get( tax_str ); if ( s == null ) { throw new IllegalArgumentException( "taxonomy [" + g.getNodeData().getTaxonomy() @@ -244,6 +243,155 @@ public abstract class SDI { } /** + * Updates the mapping function M after the root of the gene tree has been + * moved by one branch. It calculates M for the root of the gene tree and + * one of its two children. + *

+ * To be used ONLY by method "SDIunrooted.fastInfer(Phylogeny,Phylogeny)". + *

+ * (Last modfied: 10/02/01) + * + * @param prev_root_was_dup + * true if the previous root was a duplication, false otherwise + * @param prev_root_c1 + * child 1 of the previous root + * @param prev_root_c2 + * child 2 of the previous root + * @return number of duplications which have been assigned in gene tree + */ + int updateM( final boolean prev_root_was_dup, final PhylogenyNode prev_root_c1, final PhylogenyNode prev_root_c2 ) { + final PhylogenyNode root = getGeneTree().getRoot(); + if ( ( root.getChildNode1() == prev_root_c1 ) || ( root.getChildNode2() == prev_root_c1 ) ) { + calculateMforNode( prev_root_c1 ); + } + else { + calculateMforNode( prev_root_c2 ); + } + Event event = null; + if ( prev_root_was_dup ) { + event = Event.createSingleDuplicationEvent(); + } + else { + event = Event.createSingleSpeciationEvent(); + } + root.getNodeData().setEvent( event ); + calculateMforNode( root ); + return getDuplicationsSum(); + } // updateM( boolean, PhylogenyNode, PhylogenyNode ) + + // Helper method for updateM( boolean, PhylogenyNode, PhylogenyNode ) + // Calculates M for PhylogenyNode n, given that M for the two children + // of n has been calculated. + // (Last modified: 10/02/01) + private void calculateMforNode( final PhylogenyNode n ) { + if ( !n.isExternal() ) { + final boolean was_duplication = n.isDuplication(); + PhylogenyNode a = n.getChildNode1().getLink(); + PhylogenyNode b = n.getChildNode2().getLink(); + while ( a != b ) { + if ( a.getId() > b.getId() ) { + a = a.getParent(); + } + else { + b = b.getParent(); + } + } + n.setLink( a ); + Event event = null; + if ( ( a == n.getChildNode1().getLink() ) || ( a == n.getChildNode2().getLink() ) ) { + event = Event.createSingleDuplicationEvent(); + if ( !was_duplication ) { + ++_duplications_sum; + } + } + else { + event = Event.createSingleSpeciationEvent(); + if ( was_duplication ) { + --_duplications_sum; + } + } + n.getNodeData().setEvent( event ); + } + } // calculateMforNode( PhylogenyNode ) + + // Helper method for "computeMappingCost()". + private void computeMappingCostHelper( final PhylogenyNode g ) { + if ( !g.isExternal() ) { + computeMappingCostHelper( g.getChildNode1() ); + computeMappingCostHelper( g.getChildNode2() ); + if ( ( g.getLink() != g.getChildNode1().getLink() ) && ( g.getLink() != g.getChildNode2().getLink() ) ) { + _mapping_cost += ( ( g.getChildNode1().getLink().getId() + g.getChildNode2().getLink().getId() ) + - ( 2 * g.getLink().getId() ) - 2 ); + } + else if ( ( g.getLink() != g.getChildNode1().getLink() ) && ( g.getLink() == g.getChildNode2().getLink() ) ) { + _mapping_cost += ( ( g.getChildNode1().getLink().getId() - g.getLink().getId() ) + 1 ); + } + else if ( ( g.getLink() == g.getChildNode1().getLink() ) && ( g.getLink() != g.getChildNode2().getLink() ) ) { + _mapping_cost += ( ( g.getChildNode2().getLink().getId() - g.getLink().getId() ) + 1 ); + } + else { + _mapping_cost++; + } + } + } + + private TaxonomyComparisonBase determineTaxonomyComparisonBase() { + TaxonomyComparisonBase base = null; + boolean all_have_id = true; + boolean all_have_code = true; + boolean all_have_sn = true; + for( final PhylogenyNodeIterator iter = _species_tree.iteratorExternalForward(); iter.hasNext(); ) { + final PhylogenyNode n = iter.next(); + if ( n.getNodeData().isHasTaxonomy() ) { + final Taxonomy tax = n.getNodeData().getTaxonomy(); + if ( ( tax.getIdentifier() == null ) || ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) ) { + all_have_id = false; + } + if ( ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) { + all_have_code = false; + } + if ( ForesterUtil.isEmpty( tax.getScientificName() ) ) { + all_have_sn = false; + } + } + else { + throw new IllegalArgumentException( "species tree node [" + n + "] has no taxonomic data" ); + } + } + for( final PhylogenyNodeIterator iter = _gene_tree.iteratorExternalForward(); iter.hasNext(); ) { + final PhylogenyNode n = iter.next(); + if ( n.getNodeData().isHasTaxonomy() ) { + final Taxonomy tax = n.getNodeData().getTaxonomy(); + if ( ( tax.getIdentifier() == null ) || ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) ) { + all_have_id = false; + } + if ( ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) { + all_have_code = false; + } + if ( ForesterUtil.isEmpty( tax.getScientificName() ) ) { + all_have_sn = false; + } + } + else { + throw new IllegalArgumentException( "gene tree node [" + n + "] has no taxonomic data" ); + } + } + if ( all_have_id ) { + base = TaxonomyComparisonBase.ID; + } + else if ( all_have_code ) { + base = TaxonomyComparisonBase.CODE; + } + else if ( all_have_sn ) { + base = TaxonomyComparisonBase.SCIENTIFIC_NAME; + } + else { + throw new IllegalArgumentException( "gene tree and species tree have incomparable taxonomies" ); + } + return base; + } + + /** * Calculates the mapping function for the external nodes of the gene tree: * links (sets the field "link" of PhylogenyNode) each external by taxonomy * identifier @@ -251,7 +399,7 @@ public abstract class SDI { * which has the same species name. * Olivier CHABROL : olivier.chabrol@univ-provence.fr */ - void linkNodesOfGByTaxonomyIdentifier() { + private final void linkNodesOfGByTaxonomyIdentifier() { final HashMap speciestree_ext_nodes = new HashMap(); if ( _species_tree.getFirstExternalNode().isRoot() ) { speciestree_ext_nodes.put( _species_tree.getFirstExternalNode().getNodeData().getTaxonomy().getIdentifier() @@ -275,53 +423,4 @@ public abstract class SDI { g.setLink( s ); } } - - @Override - public String toString() { - final StringBuffer sb = new StringBuffer(); - sb.append( getClass() ); - sb.append( ForesterUtil.LINE_SEPARATOR ); - sb.append( "Duplications sum : " + getDuplicationsSum() ); - sb.append( ForesterUtil.LINE_SEPARATOR ); - sb.append( "mapping cost L : " + computeMappingCostL() ); - return sb.toString(); - } - - static String taxonomyToString( final PhylogenyNode n, final TaxonomyComparisonBase base ) { - final Taxonomy tax = n.getNodeData().getTaxonomy(); - switch ( base ) { - case ID: - return tax.getIdentifier().getValue(); - case CODE: - return tax.getTaxonomyCode(); - case SCIENTIFIC_NAME: - return tax.getScientificName(); - default: - throw new IllegalArgumentException( "unknown comparison base for taxonomies: " + base ); - } - } - - public enum TaxonomyComparisonBase { - ID { - - @Override - public String toString() { - return "taxonomy id"; - } - }, - CODE { - - @Override - public String toString() { - return "taxonomy code/mnemonic"; - } - }, - SCIENTIFIC_NAME { - - @Override - public String toString() { - return "scientific name"; - } - } - } -} +} // End of class SDIse.