transfer of taxonomy in GSDI and RIO
[jalview.git] / forester / java / src / org / forester / sdi / GSDIR.java
index 611ef4a..7ca8dca 100644 (file)
@@ -39,15 +39,10 @@ import org.forester.util.BasicDescriptiveStatistics;
 \r
 public class GSDIR implements GSDII {\r
 \r
-    private final int _min_duplications_sum;\r
-    private final int _speciations_sum;\r
-\r
-    @Override\r
-    public int getSpeciationsSum() {\r
-        return _speciations_sum;\r
-    }\r
+    private final int                        _min_duplications_sum;\r
+    private final int                        _speciations_sum;\r
     private final BasicDescriptiveStatistics _duplications_sum_stats;\r
-    private final List<Phylogeny>            _min_duplications_sum_gene_trees;\r
+    private Phylogeny                        _min_duplications_sum_gene_tree;\r
     private final List<PhylogenyNode>        _stripped_gene_tree_nodes;\r
     private final List<PhylogenyNode>        _stripped_species_tree_nodes;\r
     private final Set<PhylogenyNode>         _mapped_species_tree_nodes;\r
@@ -57,10 +52,10 @@ public class GSDIR implements GSDII {
     public GSDIR( final Phylogeny gene_tree,\r
                   final Phylogeny species_tree,\r
                   final boolean strip_gene_tree,\r
-                  final boolean strip_species_tree ) throws SDIException {\r
+                  final boolean strip_species_tree,\r
+                  final boolean transfer_taxonomy ) throws SDIException {\r
         final NodesLinkingResult nodes_linking_result = GSDI.linkNodesOfG( gene_tree,\r
                                                                            species_tree,\r
-                                                                           null,\r
                                                                            strip_gene_tree,\r
                                                                            strip_species_tree );\r
         _stripped_gene_tree_nodes = nodes_linking_result.getStrippedGeneTreeNodes();\r
@@ -72,34 +67,45 @@ public class GSDIR implements GSDII {
         final List<PhylogenyBranch> gene_tree_branches_post_order = new ArrayList<PhylogenyBranch>();\r
         for( final PhylogenyNodeIterator it = gene_tree.iteratorPostorder(); it.hasNext(); ) {\r
             final PhylogenyNode n = it.next();\r
-            if ( !n.isRoot() /*&& !( n.getParent().isRoot() && n.isFirstChildNode() )*/) {\r
+            if ( !n.isRoot() && !( n.getParent().isRoot() && ( gene_tree.getRoot().getNumberOfDescendants() == 2 ) ) ) {\r
                 gene_tree_branches_post_order.add( new PhylogenyBranch( n, n.getParent() ) );\r
             }\r
         }\r
+        if ( gene_tree.getRoot().getNumberOfDescendants() == 2 ) {\r
+            gene_tree_branches_post_order.add( new PhylogenyBranch( gene_tree.getRoot().getChildNode1(), gene_tree\r
+                    .getRoot().getChildNode2() ) );\r
+        }\r
         int min_duplications_sum = Integer.MAX_VALUE;\r
         int speciations_sum = 0;\r
-        _min_duplications_sum_gene_trees = new ArrayList<Phylogeny>();\r
         _duplications_sum_stats = new BasicDescriptiveStatistics();\r
         for( final PhylogenyBranch branch : gene_tree_branches_post_order ) {\r
-            gene_tree.reRoot( branch );\r
+            reRoot( branch, gene_tree );\r
             PhylogenyMethods.preOrderReId( species_tree );\r
-            //TEST, remove later\r
-            //            for( final PhylogenyNodeIterator it = _gene_tree.iteratorPostorder(); it.hasNext(); ) {\r
-            //                final PhylogenyNode g = it.next();\r
-            //                if ( g.isInternal() ) {\r
-            //                    g.setLink( null );\r
-            //                }\r
-            //            }\r
-            final GSDIsummaryResult gsdi_result = GSDI.geneTreePostOrderTraversal( gene_tree, true );\r
+            final GSDIsummaryResult gsdi_result = GSDI.geneTreePostOrderTraversal( gene_tree,\r
+                                                                                   true,\r
+                                                                                   min_duplications_sum );\r
+            if ( gsdi_result == null ) {\r
+                continue;\r
+            }\r
             if ( gsdi_result.getDuplicationsSum() < min_duplications_sum ) {\r
                 min_duplications_sum = gsdi_result.getDuplicationsSum();\r
                 speciations_sum = gsdi_result.getSpeciationsSum();\r
-                _min_duplications_sum_gene_trees.clear();\r
-                _min_duplications_sum_gene_trees.add( gene_tree.copy() );\r
-                //_speciations_sum\r
+                if ( transfer_taxonomy ) {\r
+                    transferTaxonomy( gene_tree );\r
+                }\r
+                _min_duplications_sum_gene_tree = gene_tree.copy();\r
             }\r
             else if ( gsdi_result.getDuplicationsSum() == min_duplications_sum ) {\r
-                _min_duplications_sum_gene_trees.add( gene_tree.copy() );\r
+                final List<Phylogeny> l = new ArrayList<Phylogeny>();\r
+                l.add( _min_duplications_sum_gene_tree );\r
+                l.add( gene_tree );\r
+                final int index = getIndexesOfShortestTree( l ).get( 0 );\r
+                if ( index == 1 ) {\r
+                    if ( transfer_taxonomy ) {\r
+                        transferTaxonomy( gene_tree );\r
+                    }\r
+                    _min_duplications_sum_gene_tree = gene_tree.copy();\r
+                }\r
             }\r
             _duplications_sum_stats.addValue( gsdi_result.getDuplicationsSum() );\r
         }\r
@@ -107,14 +113,6 @@ public class GSDIR implements GSDII {
         _speciations_sum = speciations_sum;\r
     }\r
 \r
-    public int getMinDuplicationsSum() {\r
-        return _min_duplications_sum;\r
-    }\r
-\r
-    public List<Phylogeny> getMinDuplicationsSumGeneTrees() {\r
-        return _min_duplications_sum_gene_trees;\r
-    }\r
-\r
     public BasicDescriptiveStatistics getDuplicationsSumStats() {\r
         return _duplications_sum_stats;\r
     }\r
@@ -124,12 +122,25 @@ public class GSDIR implements GSDII {
         return _mapped_species_tree_nodes;\r
     }\r
 \r
+    public int getMinDuplicationsSum() {\r
+        return _min_duplications_sum;\r
+    }\r
+\r
+    public Phylogeny getMinDuplicationsSumGeneTree() {\r
+        return _min_duplications_sum_gene_tree;\r
+    }\r
+\r
     @Override\r
     public final SortedSet<String> getReMappedScientificNamesFromGeneTree() {\r
         return _scientific_names_mapped_to_reduced_specificity;\r
     }\r
 \r
     @Override\r
+    public int getSpeciationsSum() {\r
+        return _speciations_sum;\r
+    }\r
+\r
+    @Override\r
     public List<PhylogenyNode> getStrippedExternalGeneTreeNodes() {\r
         return _stripped_gene_tree_nodes;\r
     }\r
@@ -143,4 +154,70 @@ public class GSDIR implements GSDII {
     public TaxonomyComparisonBase getTaxCompBase() {\r
         return _tax_comp_base;\r
     }\r
+\r
+    public final static List<Integer> getIndexesOfShortestTree( final List<Phylogeny> assigned_trees ) {\r
+        final List<Integer> shortests = new ArrayList<Integer>();\r
+        boolean depth = true;\r
+        double x = Double.MAX_VALUE;\r
+        for( int i = 0; i < assigned_trees.size(); ++i ) {\r
+            final Phylogeny phy = assigned_trees.get( i );\r
+            if ( i == 0 ) {\r
+                if ( PhylogenyMethods.calculateMaxDistanceToRoot( phy ) > 0 ) {\r
+                    depth = false;\r
+                }\r
+            }\r
+            final double d;\r
+            if ( depth ) {\r
+                d = PhylogenyMethods.calculateMaxDepth( phy );\r
+            }\r
+            else {\r
+                d = PhylogenyMethods.calculateMaxDistanceToRoot( phy );\r
+            }\r
+            if ( d < x ) {\r
+                x = d;\r
+                shortests.clear();\r
+                shortests.add( i );\r
+            }\r
+            else if ( d == x ) {\r
+                shortests.add( i );\r
+            }\r
+        }\r
+        return shortests;\r
+    }\r
+\r
+    /**\r
+     * Places the root of this Phylogeny on Branch b. The new root is always\r
+     * placed on the middle of the branch b.\r
+     * \r
+     */\r
+    static final void reRoot( final PhylogenyBranch b, final Phylogeny phy ) {\r
+        final PhylogenyNode n1 = b.getFirstNode();\r
+        final PhylogenyNode n2 = b.getSecondNode();\r
+        if ( n1.isExternal() ) {\r
+            phy.reRoot( n1 );\r
+        }\r
+        else if ( n2.isExternal() ) {\r
+            phy.reRoot( n2 );\r
+        }\r
+        else if ( ( n2 == n1.getChildNode1() ) || ( n2 == n1.getChildNode2() ) ) {\r
+            phy.reRoot( n2 );\r
+        }\r
+        else if ( ( n1 == n2.getChildNode1() ) || ( n1 == n2.getChildNode2() ) ) {\r
+            phy.reRoot( n1 );\r
+        }\r
+        //        else if ( ( n1.getParent() != null ) && n1.getParent().isRoot()\r
+        //                && ( ( n1.getParent().getChildNode1() == n2 ) || ( n1.getParent().getChildNode2() == n2 ) ) ) {\r
+        //            phy.reRoot( n1 );\r
+        //           \r
+        //        }\r
+        else {\r
+            throw new IllegalArgumentException( "reRoot( Branch b ): b is not a branch." );\r
+        }\r
+    }\r
+\r
+    private final static void transferTaxonomy( final Phylogeny gt ) {\r
+        for( final PhylogenyNodeIterator it = gt.iteratorPostorder(); it.hasNext(); ) {\r
+            GSDI.transferTaxonomy( it.next() );\r
+        }\r
+    }\r
 }\r