2 // FORESTER -- software libraries and applications
\r
3 // for evolutionary biology research and applications.
\r
5 // Copyright (C) 2008-2013 Christian M. Zmasek
\r
6 // All rights reserved
\r
8 // This library is free software; you can redistribute it and/or
\r
9 // modify it under the terms of the GNU Lesser General Public
\r
10 // License as published by the Free Software Foundation; either
\r
11 // version 2.1 of the License, or (at your option) any later version.
\r
13 // This library is distributed in the hope that it will be useful,
\r
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
\r
16 // Lesser General Public License for more details.
\r
18 // You should have received a copy of the GNU Lesser General Public
\r
19 // License along with this library; if not, write to the Free Software
\r
20 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
\r
22 // Contact: phylosoft @ gmail . com
\r
23 // WWW: www.phylosoft.org
\r
25 package org.forester.sdi;
\r
27 import java.util.ArrayList;
\r
28 import java.util.List;
\r
29 import java.util.Set;
\r
30 import java.util.SortedSet;
\r
32 import org.forester.phylogeny.Phylogeny;
\r
33 import org.forester.phylogeny.PhylogenyBranch;
\r
34 import org.forester.phylogeny.PhylogenyMethods;
\r
35 import org.forester.phylogeny.PhylogenyNode;
\r
36 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
\r
37 import org.forester.sdi.SDIutil.TaxonomyComparisonBase;
\r
38 import org.forester.util.BasicDescriptiveStatistics;
\r
40 public class GSDIR implements GSDII {
\r
42 private final int _min_duplications_sum;
\r
43 private final int _speciations_sum;
\r
44 private final BasicDescriptiveStatistics _duplications_sum_stats;
\r
45 private Phylogeny _min_duplications_sum_gene_tree;
\r
46 private final List<PhylogenyNode> _stripped_gene_tree_nodes;
\r
47 private final List<PhylogenyNode> _stripped_species_tree_nodes;
\r
48 private final Set<PhylogenyNode> _mapped_species_tree_nodes;
\r
49 private final TaxonomyComparisonBase _tax_comp_base;
\r
50 private final SortedSet<String> _scientific_names_mapped_to_reduced_specificity;
\r
52 public GSDIR( final Phylogeny gene_tree,
\r
53 final Phylogeny species_tree,
\r
54 final boolean strip_gene_tree,
\r
55 final boolean strip_species_tree,
\r
56 final boolean transfer_taxonomy ) throws SDIException {
\r
57 final NodesLinkingResult nodes_linking_result = GSDI.linkNodesOfG( gene_tree,
\r
60 strip_species_tree );
\r
61 _stripped_gene_tree_nodes = nodes_linking_result.getStrippedGeneTreeNodes();
\r
62 _stripped_species_tree_nodes = nodes_linking_result.getStrippedSpeciesTreeNodes();
\r
63 _mapped_species_tree_nodes = nodes_linking_result.getMappedSpeciesTreeNodes();
\r
64 _scientific_names_mapped_to_reduced_specificity = nodes_linking_result
\r
65 .getScientificNamesMappedToReducedSpecificity();
\r
66 _tax_comp_base = nodes_linking_result.getTaxCompBase();
\r
67 final List<PhylogenyBranch> gene_tree_branches_post_order = new ArrayList<PhylogenyBranch>();
\r
68 for( final PhylogenyNodeIterator it = gene_tree.iteratorPostorder(); it.hasNext(); ) {
\r
69 final PhylogenyNode n = it.next();
\r
70 if ( !n.isRoot() && !( n.getParent().isRoot() && ( gene_tree.getRoot().getNumberOfDescendants() == 2 ) ) ) {
\r
71 gene_tree_branches_post_order.add( new PhylogenyBranch( n, n.getParent() ) );
\r
74 if ( gene_tree.getRoot().getNumberOfDescendants() == 2 ) {
\r
75 gene_tree_branches_post_order.add( new PhylogenyBranch( gene_tree.getRoot().getChildNode1(), gene_tree
\r
76 .getRoot().getChildNode2() ) );
\r
78 int min_duplications_sum = Integer.MAX_VALUE;
\r
79 int speciations_sum = 0;
\r
80 _duplications_sum_stats = new BasicDescriptiveStatistics();
\r
81 for( final PhylogenyBranch branch : gene_tree_branches_post_order ) {
\r
82 reRoot( branch, gene_tree );
\r
83 PhylogenyMethods.preOrderReId( species_tree );
\r
84 final GSDIsummaryResult gsdi_result = GSDI.geneTreePostOrderTraversal( gene_tree,
\r
86 min_duplications_sum );
\r
87 if ( gsdi_result == null ) {
\r
90 if ( gsdi_result.getDuplicationsSum() < min_duplications_sum ) {
\r
91 min_duplications_sum = gsdi_result.getDuplicationsSum();
\r
92 speciations_sum = gsdi_result.getSpeciationsSum();
\r
93 _min_duplications_sum_gene_tree = gene_tree.copy();
\r
94 if ( transfer_taxonomy ) {
\r
95 transferTaxonomy( _min_duplications_sum_gene_tree );
\r
98 else if ( gsdi_result.getDuplicationsSum() == min_duplications_sum ) {
\r
99 final List<Phylogeny> l = new ArrayList<Phylogeny>();
\r
100 l.add( _min_duplications_sum_gene_tree );
\r
101 l.add( gene_tree );
\r
102 final int index = getIndexesOfShortestTree( l ).get( 0 );
\r
103 if ( index == 1 ) {
\r
104 _min_duplications_sum_gene_tree = gene_tree.copy();
\r
105 if ( transfer_taxonomy ) {
\r
106 transferTaxonomy( _min_duplications_sum_gene_tree );
\r
110 _duplications_sum_stats.addValue( gsdi_result.getDuplicationsSum() );
\r
112 _min_duplications_sum = min_duplications_sum;
\r
113 _speciations_sum = speciations_sum;
\r
116 public BasicDescriptiveStatistics getDuplicationsSumStats() {
\r
117 return _duplications_sum_stats;
\r
121 public Set<PhylogenyNode> getMappedExternalSpeciesTreeNodes() {
\r
122 return _mapped_species_tree_nodes;
\r
125 public int getMinDuplicationsSum() {
\r
126 return _min_duplications_sum;
\r
129 public Phylogeny getMinDuplicationsSumGeneTree() {
\r
130 return _min_duplications_sum_gene_tree;
\r
134 public final SortedSet<String> getReMappedScientificNamesFromGeneTree() {
\r
135 return _scientific_names_mapped_to_reduced_specificity;
\r
139 public int getSpeciationsSum() {
\r
140 return _speciations_sum;
\r
144 public List<PhylogenyNode> getStrippedExternalGeneTreeNodes() {
\r
145 return _stripped_gene_tree_nodes;
\r
149 public List<PhylogenyNode> getStrippedSpeciesTreeNodes() {
\r
150 return _stripped_species_tree_nodes;
\r
154 public TaxonomyComparisonBase getTaxCompBase() {
\r
155 return _tax_comp_base;
\r
158 public final static List<Integer> getIndexesOfShortestTree( final List<Phylogeny> assigned_trees ) {
\r
159 final List<Integer> shortests = new ArrayList<Integer>();
\r
160 boolean depth = true;
\r
161 double x = Double.MAX_VALUE;
\r
162 for( int i = 0; i < assigned_trees.size(); ++i ) {
\r
163 final Phylogeny phy = assigned_trees.get( i );
\r
165 if ( PhylogenyMethods.calculateMaxDistanceToRoot( phy ) > 0 ) {
\r
171 d = PhylogenyMethods.calculateMaxDepth( phy );
\r
174 d = PhylogenyMethods.calculateMaxDistanceToRoot( phy );
\r
179 shortests.add( i );
\r
181 else if ( d == x ) {
\r
182 shortests.add( i );
\r
189 * Places the root of this Phylogeny on Branch b. The new root is always
\r
190 * placed on the middle of the branch b.
\r
193 static final void reRoot( final PhylogenyBranch b, final Phylogeny phy ) {
\r
194 final PhylogenyNode n1 = b.getFirstNode();
\r
195 final PhylogenyNode n2 = b.getSecondNode();
\r
196 if ( n1.isExternal() ) {
\r
199 else if ( n2.isExternal() ) {
\r
202 else if ( ( n2 == n1.getChildNode1() ) || ( n2 == n1.getChildNode2() ) ) {
\r
205 else if ( ( n1 == n2.getChildNode1() ) || ( n1 == n2.getChildNode2() ) ) {
\r
208 // else if ( ( n1.getParent() != null ) && n1.getParent().isRoot()
\r
209 // && ( ( n1.getParent().getChildNode1() == n2 ) || ( n1.getParent().getChildNode2() == n2 ) ) ) {
\r
210 // phy.reRoot( n1 );
\r
214 throw new IllegalArgumentException( "reRoot( Branch b ): b is not a branch." );
\r
218 private final static void transferTaxonomy( final Phylogeny gt ) {
\r
219 for( final PhylogenyNodeIterator it = gt.iteratorPostorder(); it.hasNext(); ) {
\r
220 GSDI.transferTaxonomy( it.next() );
\r