// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // Copyright (C) 2000-2001 Washington University School of Medicine // and Howard Hughes Medical Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: www.phylosoft.org package org.forester.sdi; import java.util.HashMap; import java.util.Map; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Taxonomy; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.util.ForesterUtil; public abstract class SDI { final Phylogeny _gene_tree; final Phylogeny _species_tree; int _duplications_sum; // Sum of duplications. int _mapping_cost; // Mapping cost "L". /** * Constructor which sets the gene tree and the species tree to be compared. * species_tree is the species tree to which the gene tree gene_tree will be * compared to. * Infers for each PhylogenyNode of gene_tree whether * it represents a speciation or duplication event by calculating and * interpreting the mapping function M. The most parsimonious sequence of * speciation and duplication events is assumed. * The mapping cost L can be * calculated with method "computeMappingCost()". *
* Conditions: *
*
* Reference. Zhang, L. (1997) On a Mirkin-Muchnik-Smith Conjecture for
* Comparing Molecular Phylogenies. Journal of Computational Biology 4
* 177-187.
*
* @return the mapping cost "L"
*/
public int computeMappingCostL() {
_species_tree.levelOrderReID();
_mapping_cost = 0;
computeMappingCostHelper( _gene_tree.getRoot() );
return _mapping_cost;
}
private TaxonomyComparisonBase determineTaxonomyComparisonBase() {
TaxonomyComparisonBase base = null;
boolean all_have_id = true;
boolean all_have_code = true;
boolean all_have_sn = true;
for( final PhylogenyNodeIterator iter = _species_tree.iteratorExternalForward(); iter.hasNext(); ) {
final PhylogenyNode n = iter.next();
if ( n.getNodeData().isHasTaxonomy() ) {
final Taxonomy tax = n.getNodeData().getTaxonomy();
if ( ( tax.getIdentifier() == null ) || ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) ) {
all_have_id = false;
}
if ( ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) {
all_have_code = false;
}
if ( ForesterUtil.isEmpty( tax.getScientificName() ) ) {
all_have_sn = false;
}
}
else {
throw new IllegalArgumentException( "species tree node [" + n + "] has no taxonomic data" );
}
}
for( final PhylogenyNodeIterator iter = _gene_tree.iteratorExternalForward(); iter.hasNext(); ) {
final PhylogenyNode n = iter.next();
if ( n.getNodeData().isHasTaxonomy() ) {
final Taxonomy tax = n.getNodeData().getTaxonomy();
if ( ( tax.getIdentifier() == null ) || ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) ) {
all_have_id = false;
}
if ( ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) {
all_have_code = false;
}
if ( ForesterUtil.isEmpty( tax.getScientificName() ) ) {
all_have_sn = false;
}
}
else {
throw new IllegalArgumentException( "gene tree node [" + n + "] has no taxonomic data" );
}
}
if ( all_have_id ) {
base = TaxonomyComparisonBase.ID;
}
else if ( all_have_code ) {
base = TaxonomyComparisonBase.CODE;
}
else if ( all_have_sn ) {
base = TaxonomyComparisonBase.SCIENTIFIC_NAME;
}
else {
throw new IllegalArgumentException( "gene tree and species tree have incomparable taxonomies" );
}
return base;
}
/**
* Returns the number of duplications.
*
* @return number of duplications
*/
public int getDuplicationsSum() {
return _duplications_sum;
}
/**
* Returns the gene tree.
*
* @return gene tree
*/
public Phylogeny getGeneTree() {
return _gene_tree;
}
/**
* Returns the species tree.
*
* @return species tree
*/
public Phylogeny getSpeciesTree() {
return _species_tree;
}
/**
* Calculates the mapping function for the external nodes of the gene tree:
* links (sets the field "link" of PhylogenyNode) each external
* PhylogenyNode of gene_tree to the external PhylogenyNode of species_tree
* which has the same species name.
* @throws SDIException
*/
void linkNodesOfG() throws SDIException {
final Map