// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
//
// Contact: phylosoft @ gmail . com
-// WWW: www.phylosoft.org/forester
+// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
package org.forester.sdi;
import org.forester.sdi.SDIutil.TaxonomyComparisonBase;
import org.forester.util.ForesterUtil;
-public final class GSDI implements GSDII {
+public final class GSDI implements GSDII {
private final boolean _most_parsimonious_duplication_model;
private final int _speciation_or_duplication_events_sum;
final boolean most_parsimonious_duplication_model,
final boolean strip_gene_tree,
final boolean strip_species_tree ) throws SDIException {
+ this( gene_tree, species_tree, most_parsimonious_duplication_model, strip_gene_tree, strip_species_tree, true );
+ }
+
+ public GSDI( final Phylogeny gene_tree,
+ final Phylogeny species_tree,
+ final boolean most_parsimonious_duplication_model,
+ final boolean strip_gene_tree,
+ final boolean strip_species_tree,
+ final boolean transfer_taxonomy ) throws SDIException {
_most_parsimonious_duplication_model = most_parsimonious_duplication_model;
+ if ( gene_tree.getRoot().getNumberOfDescendants() == 3 ) {
+ gene_tree.reRoot( gene_tree.getRoot().getChildNode( 2 ) );
+ }
final NodesLinkingResult nodes_linking_result = linkNodesOfG( gene_tree,
species_tree,
- null,
strip_gene_tree,
strip_species_tree );
_stripped_gene_tree_nodes = nodes_linking_result.getStrippedGeneTreeNodes();
_tax_comp_base = nodes_linking_result.getTaxCompBase();
PhylogenyMethods.preOrderReId( species_tree );
final GSDIsummaryResult gsdi_summary_result = geneTreePostOrderTraversal( gene_tree,
- _most_parsimonious_duplication_model );
+ _most_parsimonious_duplication_model,
+ transfer_taxonomy );
_speciation_or_duplication_events_sum = gsdi_summary_result.getSpeciationOrDuplicationEventsSum();
_speciations_sum = gsdi_summary_result.getSpeciationsSum();
_duplications_sum = gsdi_summary_result.getDuplicationsSum();
return _duplications_sum;
}
+ @Override
public Set<PhylogenyNode> getMappedExternalSpeciesTreeNodes() {
return _mapped_species_tree_nodes;
}
+ @Override
public final SortedSet<String> getReMappedScientificNamesFromGeneTree() {
return _scientific_names_mapped_to_reduced_specificity;
}
return _speciation_or_duplication_events_sum;
}
+ @Override
public final int getSpeciationsSum() {
return _speciations_sum;
}
+ @Override
public List<PhylogenyNode> getStrippedExternalGeneTreeNodes() {
return _stripped_gene_tree_nodes;
}
+ @Override
public List<PhylogenyNode> getStrippedSpeciesTreeNodes() {
return _stripped_species_tree_nodes;
}
+ @Override
public TaxonomyComparisonBase getTaxCompBase() {
return _tax_comp_base;
}
* Preconditions: Mapping M for external nodes must have been calculated and
* the species tree must be labeled in preorder.
* <p>
+ * @param transfer_taxonomy
* @return
+ * @throws SDIException
*
*/
final static GSDIsummaryResult geneTreePostOrderTraversal( final Phylogeny gene_tree,
- final boolean most_parsimonious_duplication_model ) {
+ final boolean most_parsimonious_duplication_model,
+ final boolean transfer_taxonomy ) throws SDIException {
final GSDIsummaryResult res = new GSDIsummaryResult();
for( final PhylogenyNodeIterator it = gene_tree.iteratorPostorder(); it.hasNext(); ) {
final PhylogenyNode g = it.next();
if ( g.isInternal() ) {
+ if ( g.getNumberOfDescendants() != 2 ) {
+ throw new SDIException( "gene tree contains internal node with " + g.getNumberOfDescendants()
+ + " descendents" );
+ }
PhylogenyNode s1 = g.getChildNode1().getLink();
PhylogenyNode s2 = g.getChildNode2().getLink();
while ( s1 != s2 ) {
g.setLink( s1 );
determineEvent( s1, g, most_parsimonious_duplication_model, res );
}
+ if ( transfer_taxonomy ) {
+ transferTaxonomy( g );
+ }
}
return res;
}
+ final static GSDIsummaryResult geneTreePostOrderTraversal( final Phylogeny gene_tree,
+ final boolean most_parsimonious_duplication_model,
+ final int min_duplications ) throws SDIException {
+ final GSDIsummaryResult res = new GSDIsummaryResult();
+ for( final PhylogenyNodeIterator it = gene_tree.iteratorPostorder(); it.hasNext(); ) {
+ final PhylogenyNode g = it.next();
+ if ( g.isInternal() ) {
+ if ( g.getNumberOfDescendants() != 2 ) {
+ throw new SDIException( "gene tree contains internal node with " + g.getNumberOfDescendants()
+ + " descendents" );
+ }
+ PhylogenyNode s1 = g.getChildNode1().getLink();
+ PhylogenyNode s2 = g.getChildNode2().getLink();
+ while ( s1 != s2 ) {
+ if ( s1.getId() > s2.getId() ) {
+ s1 = s1.getParent();
+ }
+ else {
+ s2 = s2.getParent();
+ }
+ }
+ g.setLink( s1 );
+ determineEvent( s1, g, most_parsimonious_duplication_model, res );
+ if ( res.getDuplicationsSum() > min_duplications ) {
+ return null;
+ }
+ }
+ }
+ return res;
+ }
+
+ final static NodesLinkingResult linkNodesOfG( final Phylogeny gene_tree,
+ final Phylogeny species_tree,
+ final boolean strip_gene_tree,
+ final boolean strip_species_tree ) throws SDIException {
+ final TaxonomyComparisonBase tax_comp_base = SDIutil.determineTaxonomyComparisonBase( gene_tree );
+ if ( tax_comp_base == null ) {
+ throw new RuntimeException( "failed to establish taxonomy linking base (taxonomy linking base is null)" );
+ }
+ return linkNodesOfG( gene_tree, species_tree, tax_comp_base, strip_gene_tree, strip_species_tree );
+ }
+
/**
* This allows for linking of internal nodes of the species tree (as opposed
* to just external nodes, as in the method it overrides.
final TaxonomyComparisonBase tax_comp_base,
final boolean strip_gene_tree,
final boolean strip_species_tree ) throws SDIException {
+ if ( tax_comp_base == null ) {
+ throw new IllegalArgumentException( "taxonomy linking base is null" );
+ }
final Map<String, PhylogenyNode> species_to_node_map = new HashMap<String, PhylogenyNode>();
final List<PhylogenyNode> species_tree_ext_nodes = new ArrayList<PhylogenyNode>();
final NodesLinkingResult res = new NodesLinkingResult();
- if ( tax_comp_base == null ) {
- res.setTaxCompBase( SDIutil.determineTaxonomyComparisonBase( gene_tree ) );
- }
- else {
- res.setTaxCompBase( tax_comp_base );
- }
+ res.setTaxCompBase( tax_comp_base );
// Stringyfied taxonomy is the key, node is the value.
for( final PhylogenyNodeIterator iter = species_tree.iteratorExternalForward(); iter.hasNext(); ) {
final PhylogenyNode s = iter.next();
final String tax_str = SDIutil.taxonomyToString( s, res.getTaxCompBase() );
if ( !ForesterUtil.isEmpty( tax_str ) ) {
if ( species_to_node_map.containsKey( tax_str ) ) {
- throw new SDIException( "taxonomy \"" + s + "\" is not unique in species tree" );
+ throw new SDIException( "taxonomy \"" + tax_str + "\" is not unique in species tree (using "
+ + res.getTaxCompBase() + " for linking to gene tree)" );
}
species_to_node_map.put( tax_str, s );
}
if ( strip_gene_tree ) {
stripTree( gene_tree, res.getStrippedGeneTreeNodes() );
if ( gene_tree.isEmpty() || ( gene_tree.getNumberOfExternalNodes() < 2 ) ) {
- throw new SDIException( "species could not be mapped between gene tree and species tree" );
+ throw new SDIException( "species could not be mapped between gene tree and species tree (based on "
+ + res.getTaxCompBase() + ")" );
}
}
if ( strip_species_tree ) {
return res;
}
+ static final void transferTaxonomy( final PhylogenyNode g ) {
+ if ( g == null ) {
+ throw new IllegalArgumentException( "gene tree node is null" );
+ }
+ final PhylogenyNode s = g.getLink();
+ if ( s == null ) {
+ throw new IllegalArgumentException( "mapped species tree node is null" );
+ }
+ if ( s.getNodeData().isHasTaxonomy() ) {
+ g.getNodeData().setTaxonomy( s.getNodeData().getTaxonomy() );
+ if ( g.isInternal() ) {
+ if ( g.getChildNode1().isInternal() && g.getChildNode1().getNodeData().isHasTaxonomy()
+ && ( g.getChildNode1().getNodeData().getTaxonomy() == s.getNodeData().getTaxonomy() ) ) {
+ g.getChildNode1().getNodeData().setTaxonomy( null );
+ }
+ if ( g.getChildNode2().isInternal() && g.getChildNode2().getNodeData().isHasTaxonomy()
+ && ( g.getChildNode2().getNodeData().getTaxonomy() == s.getNodeData().getTaxonomy() ) ) {
+ g.getChildNode2().getNodeData().setTaxonomy( null );
+ }
+ }
+ }
+ else if ( ForesterUtil.isEmpty( g.getName() ) && !ForesterUtil.isEmpty( s.getName() ) ) {
+ g.setName( s.getName() );
+ if ( g.isInternal() ) {
+ if ( g.getChildNode1().isInternal() && ( g.getChildNode1().getName() == s.getName() ) ) {
+ g.getChildNode1().setName( "" );
+ }
+ if ( g.getChildNode2().isInternal() && ( g.getChildNode2().getName() == s.getName() ) ) {
+ g.getChildNode2().setName( "" );
+ }
+ }
+ }
+ }
+
private final static void addScientificNamesMappedToReducedSpecificity( final String s1,
final String s2,
final SortedSet<String> scientific_names_mapped_to_reduced_specificity ) {
final Set<PhylogenyNode> set = new HashSet<PhylogenyNode>();
for( PhylogenyNode n : g.getChildNode1().getAllExternalDescendants() ) {
n = n.getLink();
- while ( n.getParent() != s ) {
+ while ( ( n.getParent() != s ) && ( n.getParent() != null ) ) {
n = n.getParent();
if ( n.isRoot() ) {
break;
boolean multiple = false;
for( PhylogenyNode n : g.getChildNode2().getAllExternalDescendants() ) {
n = n.getLink();
- while ( n.getParent() != s ) {
+ while ( ( n.getParent() != s ) && ( n.getParent() != null ) ) {
n = n.getParent();
if ( n.isRoot() ) {
break;