X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fsdi%2FGSDIR.java;h=7ca8dca5e490411d96f5dddf454e310b4e8f8587;hb=b819fa043cac2722618af63f0d4752ffa1a40890;hp=5fd0a3b605575a2868a59c2aace0f24707af59e0;hpb=bb367671ad9b009584da301ad4591c2f1e8f0901;p=jalview.git diff --git a/forester/java/src/org/forester/sdi/GSDIR.java b/forester/java/src/org/forester/sdi/GSDIR.java index 5fd0a3b..7ca8dca 100644 --- a/forester/java/src/org/forester/sdi/GSDIR.java +++ b/forester/java/src/org/forester/sdi/GSDIR.java @@ -26,70 +26,198 @@ package org.forester.sdi; import java.util.ArrayList; import java.util.List; +import java.util.Set; +import java.util.SortedSet; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyBranch; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; +import org.forester.sdi.SDIutil.TaxonomyComparisonBase; import org.forester.util.BasicDescriptiveStatistics; -public class GSDIR extends GSDI { +public class GSDIR implements GSDII { - private int _min_duplications_sum; + private final int _min_duplications_sum; + private final int _speciations_sum; private final BasicDescriptiveStatistics _duplications_sum_stats; - private final List _min_duplications_sum_gene_trees; + private Phylogeny _min_duplications_sum_gene_tree; + private final List _stripped_gene_tree_nodes; + private final List _stripped_species_tree_nodes; + private final Set _mapped_species_tree_nodes; + private final TaxonomyComparisonBase _tax_comp_base; + private final SortedSet _scientific_names_mapped_to_reduced_specificity; public GSDIR( final Phylogeny gene_tree, final Phylogeny species_tree, final boolean strip_gene_tree, - final boolean strip_species_tree ) throws SDIException { - super( gene_tree.copy(), species_tree, strip_gene_tree ); - linkNodesOfG( null, strip_gene_tree, strip_species_tree ); + final boolean strip_species_tree, + final boolean transfer_taxonomy ) throws SDIException { + final NodesLinkingResult nodes_linking_result = GSDI.linkNodesOfG( gene_tree, + species_tree, + strip_gene_tree, + strip_species_tree ); + _stripped_gene_tree_nodes = nodes_linking_result.getStrippedGeneTreeNodes(); + _stripped_species_tree_nodes = nodes_linking_result.getStrippedSpeciesTreeNodes(); + _mapped_species_tree_nodes = nodes_linking_result.getMappedSpeciesTreeNodes(); + _scientific_names_mapped_to_reduced_specificity = nodes_linking_result + .getScientificNamesMappedToReducedSpecificity(); + _tax_comp_base = nodes_linking_result.getTaxCompBase(); final List gene_tree_branches_post_order = new ArrayList(); - for( final PhylogenyNodeIterator it = _gene_tree.iteratorPostorder(); it.hasNext(); ) { + for( final PhylogenyNodeIterator it = gene_tree.iteratorPostorder(); it.hasNext(); ) { final PhylogenyNode n = it.next(); - if ( !n.isRoot() && !( n.getParent().isRoot() && n.isFirstChildNode() ) ) { + if ( !n.isRoot() && !( n.getParent().isRoot() && ( gene_tree.getRoot().getNumberOfDescendants() == 2 ) ) ) { gene_tree_branches_post_order.add( new PhylogenyBranch( n, n.getParent() ) ); } } - _min_duplications_sum = Integer.MAX_VALUE; - _min_duplications_sum_gene_trees = new ArrayList(); + if ( gene_tree.getRoot().getNumberOfDescendants() == 2 ) { + gene_tree_branches_post_order.add( new PhylogenyBranch( gene_tree.getRoot().getChildNode1(), gene_tree + .getRoot().getChildNode2() ) ); + } + int min_duplications_sum = Integer.MAX_VALUE; + int speciations_sum = 0; _duplications_sum_stats = new BasicDescriptiveStatistics(); for( final PhylogenyBranch branch : gene_tree_branches_post_order ) { - _duplications_sum = 0; - _speciations_sum = 0; - _gene_tree.reRoot( branch ); - PhylogenyMethods.preOrderReId( getSpeciesTree() ); - //TEST, remove later - // for( final PhylogenyNodeIterator it = _gene_tree.iteratorPostorder(); it.hasNext(); ) { - // final PhylogenyNode g = it.next(); - // if ( g.isInternal() ) { - // g.setLink( null ); - // } - // } - geneTreePostOrderTraversal(); - if ( _duplications_sum < _min_duplications_sum ) { - _min_duplications_sum = _duplications_sum; - _min_duplications_sum_gene_trees.clear(); - _min_duplications_sum_gene_trees.add( getGeneTree().copy() ); + reRoot( branch, gene_tree ); + PhylogenyMethods.preOrderReId( species_tree ); + final GSDIsummaryResult gsdi_result = GSDI.geneTreePostOrderTraversal( gene_tree, + true, + min_duplications_sum ); + if ( gsdi_result == null ) { + continue; + } + if ( gsdi_result.getDuplicationsSum() < min_duplications_sum ) { + min_duplications_sum = gsdi_result.getDuplicationsSum(); + speciations_sum = gsdi_result.getSpeciationsSum(); + if ( transfer_taxonomy ) { + transferTaxonomy( gene_tree ); + } + _min_duplications_sum_gene_tree = gene_tree.copy(); } - else if ( _duplications_sum == _min_duplications_sum ) { - _min_duplications_sum_gene_trees.add( getGeneTree().copy() ); + else if ( gsdi_result.getDuplicationsSum() == min_duplications_sum ) { + final List l = new ArrayList(); + l.add( _min_duplications_sum_gene_tree ); + l.add( gene_tree ); + final int index = getIndexesOfShortestTree( l ).get( 0 ); + if ( index == 1 ) { + if ( transfer_taxonomy ) { + transferTaxonomy( gene_tree ); + } + _min_duplications_sum_gene_tree = gene_tree.copy(); + } } - _duplications_sum_stats.addValue( _duplications_sum ); + _duplications_sum_stats.addValue( gsdi_result.getDuplicationsSum() ); } + _min_duplications_sum = min_duplications_sum; + _speciations_sum = speciations_sum; + } + + public BasicDescriptiveStatistics getDuplicationsSumStats() { + return _duplications_sum_stats; + } + + @Override + public Set getMappedExternalSpeciesTreeNodes() { + return _mapped_species_tree_nodes; } public int getMinDuplicationsSum() { return _min_duplications_sum; } - public List getMinDuplicationsSumGeneTrees() { - return _min_duplications_sum_gene_trees; + public Phylogeny getMinDuplicationsSumGeneTree() { + return _min_duplications_sum_gene_tree; } - public BasicDescriptiveStatistics getDuplicationsSumStats() { - return _duplications_sum_stats; + @Override + public final SortedSet getReMappedScientificNamesFromGeneTree() { + return _scientific_names_mapped_to_reduced_specificity; + } + + @Override + public int getSpeciationsSum() { + return _speciations_sum; + } + + @Override + public List getStrippedExternalGeneTreeNodes() { + return _stripped_gene_tree_nodes; + } + + @Override + public List getStrippedSpeciesTreeNodes() { + return _stripped_species_tree_nodes; + } + + @Override + public TaxonomyComparisonBase getTaxCompBase() { + return _tax_comp_base; + } + + public final static List getIndexesOfShortestTree( final List assigned_trees ) { + final List shortests = new ArrayList(); + boolean depth = true; + double x = Double.MAX_VALUE; + for( int i = 0; i < assigned_trees.size(); ++i ) { + final Phylogeny phy = assigned_trees.get( i ); + if ( i == 0 ) { + if ( PhylogenyMethods.calculateMaxDistanceToRoot( phy ) > 0 ) { + depth = false; + } + } + final double d; + if ( depth ) { + d = PhylogenyMethods.calculateMaxDepth( phy ); + } + else { + d = PhylogenyMethods.calculateMaxDistanceToRoot( phy ); + } + if ( d < x ) { + x = d; + shortests.clear(); + shortests.add( i ); + } + else if ( d == x ) { + shortests.add( i ); + } + } + return shortests; + } + + /** + * Places the root of this Phylogeny on Branch b. The new root is always + * placed on the middle of the branch b. + * + */ + static final void reRoot( final PhylogenyBranch b, final Phylogeny phy ) { + final PhylogenyNode n1 = b.getFirstNode(); + final PhylogenyNode n2 = b.getSecondNode(); + if ( n1.isExternal() ) { + phy.reRoot( n1 ); + } + else if ( n2.isExternal() ) { + phy.reRoot( n2 ); + } + else if ( ( n2 == n1.getChildNode1() ) || ( n2 == n1.getChildNode2() ) ) { + phy.reRoot( n2 ); + } + else if ( ( n1 == n2.getChildNode1() ) || ( n1 == n2.getChildNode2() ) ) { + phy.reRoot( n1 ); + } + // else if ( ( n1.getParent() != null ) && n1.getParent().isRoot() + // && ( ( n1.getParent().getChildNode1() == n2 ) || ( n1.getParent().getChildNode2() == n2 ) ) ) { + // phy.reRoot( n1 ); + // + // } + else { + throw new IllegalArgumentException( "reRoot( Branch b ): b is not a branch." ); + } + } + + private final static void transferTaxonomy( final Phylogeny gt ) { + for( final PhylogenyNodeIterator it = gt.iteratorPostorder(); it.hasNext(); ) { + GSDI.transferTaxonomy( it.next() ); + } } }