import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.SortedSet;
import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyMethods;
import org.forester.phylogeny.PhylogenyNode;
import org.forester.phylogeny.data.Event;
-import org.forester.phylogeny.data.Taxonomy;
import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+import org.forester.sdi.SDIutil.TaxonomyComparisonBase;
import org.forester.util.ForesterUtil;
-/*
- * Implements our algorithm for speciation - duplication inference (SDI). <p>
- * The initialization is accomplished by: </p> <ul> <li>method
- * "linkExtNodesOfG()" of class SDI: setting the links for the external nodes of
- * the gene tree <li>"preorderReID(int)" from class Phylogeny: numbering of
- * nodes of the species tree in preorder <li>the optional stripping of the
- * species tree is accomplished by method "stripTree(Phylogeny,Phylogeny)" of
- * class Phylogeny </ul> <p> The recursion part is accomplished by this class'
- * method "geneTreePostOrderTraversal(PhylogenyNode)". <p> Requires JDK 1.5 or
- * greater.
- *
- * @see SDI#linkNodesOfG()
- *
- * @see Phylogeny#preorderReID(int)
- *
- * @see
- * PhylogenyMethods#taxonomyBasedDeletionOfExternalNodes(Phylogeny,Phylogeny)
- *
- * @see #geneTreePostOrderTraversal(PhylogenyNode)
- *
- * @author Christian M. Zmasek
- */
-public final class GSDI extends SDI {
+public final class GSDI implements GSDII {
- private final HashMap<PhylogenyNode, Integer> _transversal_counts;
- private final boolean _most_parsimonious_duplication_model;
- private final boolean _strip_gene_tree;
- private final boolean _strip_species_tree;
- private int _speciation_or_duplication_events_sum;
- private int _speciations_sum;
- private final List<PhylogenyNode> _stripped_gene_tree_nodes;
- private final List<PhylogenyNode> _stripped_species_tree_nodes;
- private final Set<PhylogenyNode> _mapped_species_tree_nodes;
+ private final boolean _most_parsimonious_duplication_model;
+ private final int _speciation_or_duplication_events_sum;
+ private final int _speciations_sum;
+ private final int _duplications_sum;
+ private final List<PhylogenyNode> _stripped_gene_tree_nodes;
+ private final List<PhylogenyNode> _stripped_species_tree_nodes;
+ private final Set<PhylogenyNode> _mapped_species_tree_nodes;
+ private final TaxonomyComparisonBase _tax_comp_base;
+ private final SortedSet<String> _scientific_names_mapped_to_reduced_specificity;
- /**
- * Constructor which sets the gene tree and the species tree to be compared.
- * species_tree is the species tree to which the gene tree gene_tree will be
- * compared to - with method "infer(boolean)". Both Trees must be completely
- * binary and rooted. The actual inference is accomplished with method
- * "infer(boolean)". The mapping cost L can then be calculated with method
- * "computeMappingCost()".
- * <p>
- *
- * @see #infer(boolean)
- * @see SDI#computeMappingCostL()
- * @param gene_tree
- * reference to a rooted gene tree to which assign duplication vs
- * speciation, must have species names in the species name fields
- * for all external nodes
- * @param species_tree
- * reference to a rooted binary species tree which might get
- * stripped in the process, must have species names in the
- * species name fields for all external nodes
- *
- * @param most_parsimonious_duplication_model
- * set to true to assign nodes as speciations which would
- * otherwise be assiged as unknown because of polytomies in the
- * species tree.
- * @throws SdiException
- *
- */
public GSDI( final Phylogeny gene_tree,
final Phylogeny species_tree,
final boolean most_parsimonious_duplication_model,
final boolean strip_gene_tree,
- final boolean strip_species_tree ) throws SdiException {
- super( gene_tree, species_tree );
- _speciation_or_duplication_events_sum = 0;
- _speciations_sum = 0;
+ final boolean strip_species_tree ) throws SDIException {
_most_parsimonious_duplication_model = most_parsimonious_duplication_model;
- _transversal_counts = new HashMap<PhylogenyNode, Integer>();
- _duplications_sum = 0;
- _strip_gene_tree = strip_gene_tree;
- _strip_species_tree = strip_species_tree;
- _stripped_gene_tree_nodes = new ArrayList<PhylogenyNode>();
- _stripped_species_tree_nodes = new ArrayList<PhylogenyNode>();
- _mapped_species_tree_nodes = new HashSet<PhylogenyNode>();
- getSpeciesTree().preOrderReId();
- linkNodesOfG();
- geneTreePostOrderTraversal( getGeneTree().getRoot() );
+ if ( gene_tree.getRoot().getNumberOfDescendants() == 3 ) {
+ gene_tree.reRoot( gene_tree.getRoot().getChildNode( 2 ) );
+ }
+ final NodesLinkingResult nodes_linking_result = linkNodesOfG( gene_tree,
+ species_tree,
+ null,
+ strip_gene_tree,
+ strip_species_tree );
+ _stripped_gene_tree_nodes = nodes_linking_result.getStrippedGeneTreeNodes();
+ _stripped_species_tree_nodes = nodes_linking_result.getStrippedSpeciesTreeNodes();
+ _mapped_species_tree_nodes = nodes_linking_result.getMappedSpeciesTreeNodes();
+ _scientific_names_mapped_to_reduced_specificity = nodes_linking_result
+ .getScientificNamesMappedToReducedSpecificity();
+ _tax_comp_base = nodes_linking_result.getTaxCompBase();
+ PhylogenyMethods.preOrderReId( species_tree );
+ final GSDIsummaryResult gsdi_summary_result = geneTreePostOrderTraversal( gene_tree,
+ _most_parsimonious_duplication_model );
+ _speciation_or_duplication_events_sum = gsdi_summary_result.getSpeciationOrDuplicationEventsSum();
+ _speciations_sum = gsdi_summary_result.getSpeciationsSum();
+ _duplications_sum = gsdi_summary_result.getDuplicationsSum();
}
- GSDI( final Phylogeny gene_tree, final Phylogeny species_tree, final boolean most_parsimonious_duplication_model )
- throws SdiException {
- this( gene_tree, species_tree, most_parsimonious_duplication_model, false, false );
+ public int getDuplicationsSum() {
+ return _duplications_sum;
}
- private final Event createDuplicationEvent() {
- final Event event = Event.createSingleDuplicationEvent();
- ++_duplications_sum;
- return event;
+ @Override
+ public Set<PhylogenyNode> getMappedExternalSpeciesTreeNodes() {
+ return _mapped_species_tree_nodes;
}
- private final Event createSingleSpeciationOrDuplicationEvent() {
- final Event event = Event.createSingleSpeciationOrDuplicationEvent();
- ++_speciation_or_duplication_events_sum;
- return event;
+ @Override
+ public final SortedSet<String> getReMappedScientificNamesFromGeneTree() {
+ return _scientific_names_mapped_to_reduced_specificity;
}
- private final Event createSpeciationEvent() {
- final Event event = Event.createSingleSpeciationEvent();
- ++_speciations_sum;
- return event;
+ public final int getSpeciationOrDuplicationEventsSum() {
+ return _speciation_or_duplication_events_sum;
}
- // s is the node on the species tree g maps to.
- private final void determineEvent( final PhylogenyNode s, final PhylogenyNode g ) {
- Event event = null;
- // Determine how many children map to same node as parent.
- int sum_g_childs_mapping_to_s = 0;
- for( int i = 0; i < g.getNumberOfDescendants(); ++i ) {
- final PhylogenyNode c = g.getChildNode( i );
- if ( c.getLink() == s ) {
- ++sum_g_childs_mapping_to_s;
- }
- }
- // Determine the sum of traversals.
- int traversals_sum = 0;
- int max_traversals = 0;
- PhylogenyNode max_traversals_node = null;
- if ( !s.isExternal() ) {
- for( int i = 0; i < s.getNumberOfDescendants(); ++i ) {
- final PhylogenyNode current_node = s.getChildNode( i );
- final int traversals = getTraversalCount( current_node );
- traversals_sum += traversals;
- if ( traversals > max_traversals ) {
- max_traversals = traversals;
- max_traversals_node = current_node;
- }
- }
- }
- // System.out.println( " sum=" + traversals_sum );
- // System.out.println( " max=" + max_traversals );
- // System.out.println( " m=" + sum_g_childs_mapping_to_s );
- if ( sum_g_childs_mapping_to_s > 0 ) {
- if ( traversals_sum == 2 ) {
- event = createDuplicationEvent();
- System.out.print( g.toString() );
- System.out.println( " : ==2" );
- // _transversal_counts.clear();
- }
- else if ( traversals_sum > 2 ) {
- if ( max_traversals <= 1 ) {
- if ( _most_parsimonious_duplication_model ) {
- event = createSpeciationEvent();
- }
- else {
- event = createSingleSpeciationOrDuplicationEvent();
- }
- }
- else {
- event = createDuplicationEvent();
- //System.out.println( g.toString() );
- _transversal_counts.put( max_traversals_node, 1 );
- // _transversal_counts.clear();
- }
- }
- else {
- event = createDuplicationEvent();
- // _transversal_counts.clear();
- }
- }
- else {
- event = createSpeciationEvent();
+ @Override
+ public final int getSpeciationsSum() {
+ return _speciations_sum;
+ }
+
+ @Override
+ public List<PhylogenyNode> getStrippedExternalGeneTreeNodes() {
+ return _stripped_gene_tree_nodes;
+ }
+
+ @Override
+ public List<PhylogenyNode> getStrippedSpeciesTreeNodes() {
+ return _stripped_species_tree_nodes;
+ }
+
+ @Override
+ public TaxonomyComparisonBase getTaxCompBase() {
+ return _tax_comp_base;
+ }
+
+ @Override
+ public final String toString() {
+ final StringBuffer sb = new StringBuffer();
+ sb.append( "Most parsimonious duplication model: " + _most_parsimonious_duplication_model );
+ sb.append( ForesterUtil.getLineSeparator() );
+ sb.append( "Speciations sum : " + getSpeciationsSum() );
+ sb.append( ForesterUtil.getLineSeparator() );
+ sb.append( "Duplications sum : " + getDuplicationsSum() );
+ sb.append( ForesterUtil.getLineSeparator() );
+ if ( !_most_parsimonious_duplication_model ) {
+ sb.append( "Speciation or duplications sum : " + getSpeciationOrDuplicationEventsSum() );
+ sb.append( ForesterUtil.getLineSeparator() );
}
- g.getNodeData().setEvent( event );
+ return sb.toString();
}
/**
* Preconditions: Mapping M for external nodes must have been calculated and
* the species tree must be labeled in preorder.
* <p>
+ * @return
+ * @throws SDIException
*
- * @param g
- * starting node of a gene tree - normally the root
*/
- final void geneTreePostOrderTraversal( final PhylogenyNode g ) {
- if ( !g.isExternal() ) {
- boolean all_ext = true;
- for( int i = 0; i < g.getNumberOfDescendants(); ++i ) {
- if ( g.getChildNode( i ).isInternal() ) {
- all_ext = false;
- break;
+ final static GSDIsummaryResult geneTreePostOrderTraversal( final Phylogeny gene_tree,
+ final boolean most_parsimonious_duplication_model )
+ throws SDIException {
+ final GSDIsummaryResult res = new GSDIsummaryResult();
+ for( final PhylogenyNodeIterator it = gene_tree.iteratorPostorder(); it.hasNext(); ) {
+ final PhylogenyNode g = it.next();
+ if ( g.isInternal() ) {
+ if ( g.getNumberOfDescendants() != 2 ) {
+ throw new SDIException( "gene tree contains internal node with " + g.getNumberOfDescendants()
+ + " descendents" );
}
- }
- if ( all_ext ) {
- _transversal_counts.clear();
- }
- for( int i = 0; i < g.getNumberOfDescendants(); ++i ) {
- geneTreePostOrderTraversal( g.getChildNode( i ) );
- }
- final PhylogenyNode[] linked_nodes = new PhylogenyNode[ g.getNumberOfDescendants() ];
- for( int i = 0; i < linked_nodes.length; ++i ) {
- if ( g.getChildNode( i ).getLink() == null ) {
- System.out.println( "link is null for " + g.getChildNode( i ) );
- System.exit( -1 );
+ PhylogenyNode s1 = g.getChildNode1().getLink();
+ PhylogenyNode s2 = g.getChildNode2().getLink();
+ while ( s1 != s2 ) {
+ if ( s1.getId() > s2.getId() ) {
+ s1 = s1.getParent();
+ }
+ else {
+ s2 = s2.getParent();
+ }
}
- linked_nodes[ i ] = g.getChildNode( i ).getLink();
- }
- final int[] min_max = obtainMinMaxIdIndices( linked_nodes );
- int min_i = min_max[ 0 ];
- int max_i = min_max[ 1 ];
- // initTransversalCounts();
- while ( linked_nodes[ min_i ] != linked_nodes[ max_i ] ) {
- increaseTraversalCount( linked_nodes[ max_i ] );
- linked_nodes[ max_i ] = linked_nodes[ max_i ].getParent();
- final int[] min_max_ = obtainMinMaxIdIndices( linked_nodes );
- min_i = min_max_[ 0 ];
- max_i = min_max_[ 1 ];
+ g.setLink( s1 );
+ determineEvent( s1, g, most_parsimonious_duplication_model, res );
}
- final PhylogenyNode s = linked_nodes[ max_i ];
- g.setLink( s );
- // Determines whether dup. or spec.
- determineEvent( s, g );
}
- }
-
- public final int getSpeciationOrDuplicationEventsSum() {
- return _speciation_or_duplication_events_sum;
- }
-
- public final int getSpeciationsSum() {
- return _speciations_sum;
- }
-
- private final int getTraversalCount( final PhylogenyNode node ) {
- if ( _transversal_counts.containsKey( node ) ) {
- return _transversal_counts.get( node );
- }
- return 0;
- }
-
- private final void increaseTraversalCount( final PhylogenyNode node ) {
- if ( _transversal_counts.containsKey( node ) ) {
- _transversal_counts.put( node, _transversal_counts.get( node ) + 1 );
- }
- else {
- _transversal_counts.put( node, 1 );
- }
- // System.out.println( "count for node " + node.getID() + " is now "
- // + getTraversalCount( node ) );
+ return res;
}
/**
* This allows for linking of internal nodes of the species tree (as opposed
* to just external nodes, as in the method it overrides.
- * @throws SdiException
+ * If TaxonomyComparisonBase is null, it will try to determine it.
+ * @throws SDIException
*
*/
- @Override
- // final void linkNodesOfG() {
- // final HashMap<Taxonomy, PhylogenyNode> speciestree_ext_nodes = createTaxonomyToNodeMap();
- // if ( _strip_gene_tree ) {
- // stripGeneTree( speciestree_ext_nodes );
- // if ( ( _gene_tree == null ) || ( _gene_tree.getNumberOfExternalNodes() < 2 ) ) {
- // throw new IllegalArgumentException( "species tree does not contain any"
- // + " nodes matching species in the gene tree" );
- // }
- // }
- // // Retrieve the reference to the PhylogenyNode with a matching species.
- // for( final PhylogenyNodeIterator iter = _gene_tree.iteratorExternalForward(); iter.hasNext(); ) {
- // final PhylogenyNode g = iter.next();
- // if ( !g.getNodeData().isHasTaxonomy() ) {
- // throw new IllegalArgumentException( "gene tree node " + g + " has no taxonomic data" );
- // }
- // final PhylogenyNode s = speciestree_ext_nodes.get( g.getNodeData().getTaxonomy() );
- // if ( s == null ) {
- // throw new IllegalArgumentException( "species " + g.getNodeData().getTaxonomy()
- // + " not present in species tree" );
- // }
- // g.setLink( s );
- // }
- // }
- final void linkNodesOfG() throws SdiException {
+ final static NodesLinkingResult linkNodesOfG( final Phylogeny gene_tree,
+ final Phylogeny species_tree,
+ final TaxonomyComparisonBase tax_comp_base,
+ final boolean strip_gene_tree,
+ final boolean strip_species_tree ) throws SDIException {
final Map<String, PhylogenyNode> species_to_node_map = new HashMap<String, PhylogenyNode>();
final List<PhylogenyNode> species_tree_ext_nodes = new ArrayList<PhylogenyNode>();
- final TaxonomyComparisonBase tax_comp_base = determineTaxonomyComparisonBase( _gene_tree );
- // System.out.println( "comp base is: " + tax_comp_base );
+ final NodesLinkingResult res = new NodesLinkingResult();
+ if ( tax_comp_base == null ) {
+ res.setTaxCompBase( SDIutil.determineTaxonomyComparisonBase( gene_tree ) );
+ }
+ else {
+ res.setTaxCompBase( tax_comp_base );
+ }
// Stringyfied taxonomy is the key, node is the value.
- for( final PhylogenyNodeIterator iter = _species_tree.iteratorExternalForward(); iter.hasNext(); ) {
+ for( final PhylogenyNodeIterator iter = species_tree.iteratorExternalForward(); iter.hasNext(); ) {
final PhylogenyNode s = iter.next();
species_tree_ext_nodes.add( s );
- final String tax_str = taxonomyToString( s, tax_comp_base );
- if ( !ForesterUtil.isEmpty( tax_str ) ) {
- if ( species_to_node_map.containsKey( tax_str ) ) {
- throw new SdiException( "taxonomy \"" + s + "\" is not unique in species tree" );
+ if ( s.getNodeData().isHasTaxonomy() ) {
+ final String tax_str = SDIutil.taxonomyToString( s, res.getTaxCompBase() );
+ if ( !ForesterUtil.isEmpty( tax_str ) ) {
+ if ( species_to_node_map.containsKey( tax_str ) ) {
+ throw new SDIException( "taxonomy \"" + tax_str + "\" is not unique in species tree (using "
+ + res.getTaxCompBase() + " for linking to gene tree)" );
+ }
+ species_to_node_map.put( tax_str, s );
}
- species_to_node_map.put( tax_str, s );
}
}
// Retrieve the reference to the node with a matching stringyfied taxonomy.
- for( final PhylogenyNodeIterator iter = _gene_tree.iteratorExternalForward(); iter.hasNext(); ) {
+ for( final PhylogenyNodeIterator iter = gene_tree.iteratorExternalForward(); iter.hasNext(); ) {
final PhylogenyNode g = iter.next();
if ( !g.getNodeData().isHasTaxonomy() ) {
- if ( _strip_gene_tree ) {
- _stripped_gene_tree_nodes.add( g );
+ if ( strip_gene_tree ) {
+ res.getStrippedGeneTreeNodes().add( g );
}
else {
- throw new SdiException( "gene tree node \"" + g + "\" has no taxonomic data" );
+ throw new SDIException( "gene tree node \"" + g + "\" has no taxonomic data" );
}
}
else {
- final String tax_str = taxonomyToString( g, tax_comp_base );
+ final String tax_str = SDIutil.taxonomyToString( g, res.getTaxCompBase() );
if ( ForesterUtil.isEmpty( tax_str ) ) {
- if ( _strip_gene_tree ) {
- _stripped_gene_tree_nodes.add( g );
+ if ( strip_gene_tree ) {
+ res.getStrippedGeneTreeNodes().add( g );
}
else {
- throw new SdiException( "gene tree node \"" + g + "\" has no appropriate taxonomic data" );
+ throw new SDIException( "gene tree node \"" + g + "\" has no appropriate taxonomic data" );
}
}
else {
- final PhylogenyNode s = species_to_node_map.get( tax_str );
+ PhylogenyNode s = species_to_node_map.get( tax_str );
+ if ( ( res.getTaxCompBase() == TaxonomyComparisonBase.SCIENTIFIC_NAME ) && ( s == null )
+ && ( ForesterUtil.countChars( tax_str, ' ' ) > 1 ) ) {
+ s = tryMapByRemovingOverlySpecificData( species_to_node_map,
+ tax_str,
+ res.getScientificNamesMappedToReducedSpecificity() );
+ }
if ( s == null ) {
- if ( _strip_gene_tree ) {
- _stripped_gene_tree_nodes.add( g );
+ if ( strip_gene_tree ) {
+ res.getStrippedGeneTreeNodes().add( g );
}
else {
- throw new SdiException( "taxonomy \"" + g.getNodeData().getTaxonomy()
+ throw new SDIException( "taxonomy \"" + g.getNodeData().getTaxonomy()
+ "\" not present in species tree" );
}
}
else {
g.setLink( s );
- _mapped_species_tree_nodes.add( s );
- // System.out.println( "setting link of " + g + " to " + s );
+ res.getMappedSpeciesTreeNodes().add( s );
}
}
}
} // for loop
- if ( _strip_gene_tree ) {
- for( final PhylogenyNode g : _stripped_gene_tree_nodes ) {
- _gene_tree.deleteSubtree( g, true );
+ if ( strip_gene_tree ) {
+ stripTree( gene_tree, res.getStrippedGeneTreeNodes() );
+ if ( gene_tree.isEmpty() || ( gene_tree.getNumberOfExternalNodes() < 2 ) ) {
+ throw new SDIException( "species could not be mapped between gene tree and species tree" );
}
}
- if ( _strip_species_tree ) {
- for( final PhylogenyNode s : species_tree_ext_nodes ) {
- if ( !_mapped_species_tree_nodes.contains( s ) ) {
- _species_tree.deleteSubtree( s, true );
- }
- }
+ if ( strip_species_tree ) {
+ stripSpeciesTree( species_tree, species_tree_ext_nodes, res );
}
+ return res;
}
- public Set<PhylogenyNode> getMappedExternalSpeciesTreeNodes() {
- return _mapped_species_tree_nodes;
+ private final static void addScientificNamesMappedToReducedSpecificity( final String s1,
+ final String s2,
+ final SortedSet<String> scientific_names_mapped_to_reduced_specificity ) {
+ scientific_names_mapped_to_reduced_specificity.add( s1 + " -> " + s2 );
}
- // final private HashMap<Taxonomy, PhylogenyNode> createTaxonomyToNodeMap() {
- // final HashMap<Taxonomy, PhylogenyNode> speciestree_ext_nodes = new HashMap<Taxonomy, PhylogenyNode>();
- // for( final PhylogenyNodeIterator iter = _species_tree.iteratorLevelOrder(); iter.hasNext(); ) {
- // final PhylogenyNode n = iter.next();
- // if ( n.getNodeData().isHasTaxonomy() ) {
- // if ( speciestree_ext_nodes.containsKey( n.getNodeData().getTaxonomy() ) ) {
- // throw new IllegalArgumentException( "taxonomy [" + n.getNodeData().getTaxonomy()
- // + "] is not unique in species phylogeny" );
- // }
- // speciestree_ext_nodes.put( n.getNodeData().getTaxonomy(), n );
- // }
- // }
- // return speciestree_ext_nodes;
- // }
- // private final void stripGeneTree( final HashMap<Taxonomy, PhylogenyNode> speciestree_ext_nodes ) {
- // // final Set<PhylogenyNode> to_delete = new HashSet<PhylogenyNode>();
- // for( final PhylogenyNodeIterator iter = _gene_tree.iteratorExternalForward(); iter.hasNext(); ) {
- // final PhylogenyNode g = iter.next();
- // if ( !g.getNodeData().isHasTaxonomy() ) {
- // throw new IllegalArgumentException( "gene tree node " + g + " has no taxonomic data" );
- // }
- // if ( !speciestree_ext_nodes.containsKey( g.getNodeData().getTaxonomy() ) ) {
- // _stripped_gene_tree_nodes.add( g );
- // }
- // }
- // for( final PhylogenyNode n : _stripped_gene_tree_nodes ) {
- // _gene_tree.deleteSubtree( n, true );
- // }
- // }
- // private final void stripGeneTree2( final HashMap<Taxonomy, PhylogenyNode> speciestree_ext_nodes ) {
- // // final Set<PhylogenyNode> to_delete = new HashSet<PhylogenyNode>();
- // for( final PhylogenyNodeIterator iter = _gene_tree.iteratorExternalForward(); iter.hasNext(); ) {
- // final PhylogenyNode g = iter.next();
- // if ( !g.getNodeData().isHasTaxonomy() ) {
- // _stripped_gene_tree_nodes.add( g );
- // }
- // else {
- // if ( !speciestree_ext_nodes.containsKey( g.getNodeData().getTaxonomy() ) ) {
- // _stripped_gene_tree_nodes.add( g );
- // }
- // }
- // }
- // for( final PhylogenyNode n : _stripped_gene_tree_nodes ) {
- // _gene_tree.deleteSubtree( n, true );
- // }
- // }
- public static TaxonomyComparisonBase determineTaxonomyComparisonBase( final Phylogeny gene_tree ) {
- int with_id_count = 0;
- int with_code_count = 0;
- int with_sn_count = 0;
- int max = 0;
- for( final PhylogenyNodeIterator iter = gene_tree.iteratorExternalForward(); iter.hasNext(); ) {
- final PhylogenyNode g = iter.next();
- if ( g.getNodeData().isHasTaxonomy() ) {
- final Taxonomy tax = g.getNodeData().getTaxonomy();
- if ( ( tax.getIdentifier() != null ) && !ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) ) {
- if ( ++with_id_count > max ) {
- max = with_id_count;
+ private final static void determineEvent( final PhylogenyNode s,
+ final PhylogenyNode g,
+ final boolean most_parsimonious_duplication_model,
+ final GSDIsummaryResult res ) {
+ boolean oyako = false;
+ if ( ( g.getChildNode1().getLink() == s ) || ( g.getChildNode2().getLink() == s ) ) {
+ oyako = true;
+ }
+ if ( g.getLink().getNumberOfDescendants() == 2 ) {
+ if ( oyako ) {
+ g.getNodeData().setEvent( Event.createSingleDuplicationEvent() );
+ res.increaseDuplicationsSum();
+ }
+ else {
+ g.getNodeData().setEvent( Event.createSingleSpeciationEvent() );
+ res.increaseSpeciationsSum();
+ }
+ }
+ else {
+ if ( oyako ) {
+ final Set<PhylogenyNode> set = new HashSet<PhylogenyNode>();
+ for( PhylogenyNode n : g.getChildNode1().getAllExternalDescendants() ) {
+ n = n.getLink();
+ while ( n.getParent() != s ) {
+ n = n.getParent();
+ if ( n.isRoot() ) {
+ break;
+ }
}
+ set.add( n );
}
- if ( !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) {
- if ( ++with_code_count > max ) {
- max = with_code_count;
+ boolean multiple = false;
+ for( PhylogenyNode n : g.getChildNode2().getAllExternalDescendants() ) {
+ n = n.getLink();
+ while ( n.getParent() != s ) {
+ n = n.getParent();
+ if ( n.isRoot() ) {
+ break;
+ }
+ }
+ if ( set.contains( n ) ) {
+ multiple = true;
+ break;
}
}
- if ( !ForesterUtil.isEmpty( tax.getScientificName() ) ) {
- if ( ++with_sn_count > max ) {
- max = with_sn_count;
+ if ( multiple ) {
+ g.getNodeData().setEvent( Event.createSingleDuplicationEvent() );
+ res.increaseDuplicationsSum();
+ }
+ else {
+ if ( most_parsimonious_duplication_model ) {
+ g.getNodeData().setEvent( Event.createSingleSpeciationEvent() );
+ res.increaseSpeciationsSum();
+ }
+ else {
+ g.getNodeData().setEvent( Event.createSingleSpeciationOrDuplicationEvent() );
+ res.increaseSpeciationOrDuplicationEventsSum();
}
}
}
- }
- if ( max == 0 ) {
- throw new IllegalArgumentException( "gene tree has no taxonomic data" );
- }
- else if ( max == 1 ) {
- throw new IllegalArgumentException( "gene tree has only one node with taxonomic data" );
- }
- else if ( max == with_sn_count ) {
- return SDI.TaxonomyComparisonBase.SCIENTIFIC_NAME;
- }
- else if ( max == with_id_count ) {
- return SDI.TaxonomyComparisonBase.ID;
- }
- else {
- return SDI.TaxonomyComparisonBase.CODE;
+ else {
+ g.getNodeData().setEvent( Event.createSingleSpeciationEvent() );
+ res.increaseSpeciationsSum();
+ }
}
}
- public List<PhylogenyNode> getStrippedExternalGeneTreeNodes() {
- return _stripped_gene_tree_nodes;
+ private final static void stripSpeciesTree( final Phylogeny species_tree,
+ final List<PhylogenyNode> species_tree_ext_nodes,
+ final NodesLinkingResult res ) {
+ for( final PhylogenyNode s : species_tree_ext_nodes ) {
+ if ( !res.getMappedSpeciesTreeNodes().contains( s ) ) {
+ species_tree.deleteSubtree( s, true );
+ res.getStrippedSpeciesTreeNodes().add( s );
+ }
+ }
+ species_tree.clearHashIdToNodeMap();
+ species_tree.externalNodesHaveChanged();
}
- @Override
- public final String toString() {
- final StringBuffer sb = new StringBuffer();
- sb.append( "Most parsimonious duplication model: " + _most_parsimonious_duplication_model );
- sb.append( ForesterUtil.getLineSeparator() );
- sb.append( "Speciations sum : " + getSpeciationsSum() );
- sb.append( ForesterUtil.getLineSeparator() );
- sb.append( "Duplications sum : " + getDuplicationsSum() );
- sb.append( ForesterUtil.getLineSeparator() );
- if ( !_most_parsimonious_duplication_model ) {
- sb.append( "Speciation or duplications sum : " + getSpeciationOrDuplicationEventsSum() );
- sb.append( ForesterUtil.getLineSeparator() );
+ private final static void stripTree( final Phylogeny phy, final List<PhylogenyNode> strip_nodes ) {
+ for( final PhylogenyNode g : strip_nodes ) {
+ phy.deleteSubtree( g, true );
}
- sb.append( "mapping cost L : " + computeMappingCostL() );
- return sb.toString();
+ phy.clearHashIdToNodeMap();
+ phy.externalNodesHaveChanged();
}
- static final int[] obtainMinMaxIdIndices( final PhylogenyNode[] linked_nodes ) {
- int max_i = 0;
- int min_i = 0;
- int max_i_id = -Integer.MAX_VALUE;
- int min_i_id = Integer.MAX_VALUE;
- for( int i = 0; i < linked_nodes.length; ++i ) {
- final int id_i = linked_nodes[ i ].getId();
- if ( id_i > max_i_id ) {
- max_i = i;
- max_i_id = linked_nodes[ max_i ].getId();
+ private final static PhylogenyNode tryMapByRemovingOverlySpecificData( final Map<String, PhylogenyNode> species_to_node_map,
+ final String tax_str,
+ final SortedSet<String> scientific_names_mapped_to_reduced_specificity ) {
+ PhylogenyNode s = tryMapByRemovingOverlySpecificData( species_to_node_map,
+ tax_str,
+ " (",
+ scientific_names_mapped_to_reduced_specificity );
+ if ( s == null ) {
+ if ( ForesterUtil.countChars( tax_str, ' ' ) == 2 ) {
+ final String new_tax_str = tax_str.substring( 0, tax_str.lastIndexOf( ' ' ) ).trim();
+ s = species_to_node_map.get( new_tax_str );
+ if ( s != null ) {
+ addScientificNamesMappedToReducedSpecificity( tax_str,
+ new_tax_str,
+ scientific_names_mapped_to_reduced_specificity );
+ }
+ }
+ }
+ if ( s == null ) {
+ for( final String t : new String[] { " subspecies ", " strain ", " variety ", " varietas ", " subvariety ",
+ " form ", " subform ", " cultivar ", " section ", " subsection " } ) {
+ s = tryMapByRemovingOverlySpecificData( species_to_node_map,
+ tax_str,
+ t,
+ scientific_names_mapped_to_reduced_specificity );
+ if ( s != null ) {
+ break;
+ }
}
- if ( id_i < min_i_id ) {
- min_i = i;
- min_i_id = linked_nodes[ min_i ].getId();
+ }
+ return s;
+ }
+
+ private final static PhylogenyNode tryMapByRemovingOverlySpecificData( final Map<String, PhylogenyNode> species_to_node_map,
+ final String tax_str,
+ final String term,
+ final SortedSet<String> scientific_names_mapped_to_reduced_specificity ) {
+ final int i = tax_str.indexOf( term );
+ if ( i > 4 ) {
+ final String new_tax_str = tax_str.substring( 0, i ).trim();
+ final PhylogenyNode s = species_to_node_map.get( new_tax_str );
+ if ( s != null ) {
+ addScientificNamesMappedToReducedSpecificity( tax_str,
+ new_tax_str,
+ scientific_names_mapped_to_reduced_specificity );
}
+ return s;
}
- return new int[] { min_i, max_i };
+ return null;
}
- /**
- * Updates the mapping function M after the root of the gene tree has been
- * moved by one branch. It calculates M for the root of the gene tree and
- * one of its two children.
- * <p>
- * To be used ONLY by method "SDIunrooted.fastInfer(Phylogeny,Phylogeny)".
- * <p>
- * (Last modfied: )
- *
- * @param prev_root_was_dup
- * true if the previous root was a duplication, false otherwise
- * @param prev_root_c1
- * child 1 of the previous root
- * @param prev_root_c2
- * child 2 of the previous root
- * @return number of duplications which have been assigned in gene tree
- */
- // int updateM( final boolean prev_root_was_dup,
- // final PhylogenyNode prev_root_c1, final PhylogenyNode prev_root_c2 ) {
- // final PhylogenyNode root = getGeneTree().getRoot();
- // if ( ( root.getChildNode1() == prev_root_c1 )
- // || ( root.getChildNode2() == prev_root_c1 ) ) {
- // calculateMforNode( prev_root_c1 );
- // }
- // else {
- // calculateMforNode( prev_root_c2 );
- // }
- // Event event = null;
- // if ( prev_root_was_dup ) {
- // event = Event.createSingleDuplicationEvent();
- // }
- // else {
- // event = Event.createSingleSpeciationEvent();
- // }
- // root.getPhylogenyNodeData().setEvent( event );
- // calculateMforNode( root );
- // return getDuplications();
- // } // updateM( boolean, PhylogenyNode, PhylogenyNode )
- // Helper method for updateM( boolean, PhylogenyNode, PhylogenyNode )
- // Calculates M for PhylogenyNode n, given that M for the two children
- // of n has been calculated.
- // (Last modified: 10/02/01)
- // private void calculateMforNode( final PhylogenyNode n ) {
- // if ( !n.isExternal() ) {
- // boolean was_duplication = n.isDuplication();
- // PhylogenyNode a = n.getChildNode1().getLink(), b = n
- // .getChildNode2().getLink();
- // while ( a != b ) {
- // if ( a.getID() > b.getID() ) {
- // a = a.getParent();
- // }
- // else {
- // b = b.getParent();
- // }
- // }
- // n.setLink( a );
- // Event event = null;
- // if ( ( a == n.getChildNode1().getLink() )
- // || ( a == n.getChildNode2().getLink() ) ) {
- // event = Event.createSingleDuplicationEvent();
- // if ( !was_duplication ) {
- // increaseDuplications();
- // }
- // }
- // else {
- // event = Event.createSingleSpeciationEvent();
- // if ( was_duplication ) {
- // decreaseDuplications();
- // }
- // }
- // n.getPhylogenyNodeData().setEvent( event );
- // }
- // } // calculateMforNode( PhylogenyNode )
}