"rio" work
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Fri, 14 Dec 2012 05:02:39 +0000 (05:02 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Fri, 14 Dec 2012 05:02:39 +0000 (05:02 +0000)
15 files changed:
forester/java/src/org/forester/application/gsdi.java
forester/java/src/org/forester/application/rio.java
forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java
forester/java/src/org/forester/rio/RIO.java
forester/java/src/org/forester/rio/TestRIO.java
forester/java/src/org/forester/sdi/GSDI.java
forester/java/src/org/forester/sdi/GSDIR.java
forester/java/src/org/forester/sdi/GSDIsummaryResult.java [new file with mode: 0644]
forester/java/src/org/forester/sdi/NodesLinkingResult.java [new file with mode: 0644]
forester/java/src/org/forester/sdi/SDI.java
forester/java/src/org/forester/sdi/SDIR.java
forester/java/src/org/forester/sdi/SDIse.java [deleted file]
forester/java/src/org/forester/sdi/SDIutil.java [new file with mode: 0644]
forester/java/src/org/forester/sdi/TestGSDI.java
forester/java/src/org/forester/test/Test.java

index 304d12d..871658c 100644 (file)
@@ -50,10 +50,10 @@ import org.forester.phylogeny.factories.PhylogenyFactory;
 import org.forester.sdi.GSDI;
 import org.forester.sdi.GSDIR;
 import org.forester.sdi.SDI;
-import org.forester.sdi.SDI.ALGORITHM;
-import org.forester.sdi.SDI.TaxonomyComparisonBase;
 import org.forester.sdi.SDIException;
-import org.forester.sdi.SDIse;
+import org.forester.sdi.SDIutil;
+import org.forester.sdi.SDIutil.ALGORITHM;
+import org.forester.sdi.SDIutil.TaxonomyComparisonBase;
 import org.forester.util.CommandLineArguments;
 import org.forester.util.EasyWriter;
 import org.forester.util.ForesterConstants;
@@ -200,7 +200,7 @@ public final class gsdi {
                     ( ( NHXParser ) p ).setReplaceUnderscores( true );
                 }
                 species_tree = factory.create( species_tree_file, p )[ 0 ];
-                final TaxonomyComparisonBase comp_base = GSDI.determineTaxonomyComparisonBase( gene_tree );
+                final TaxonomyComparisonBase comp_base = SDIutil.determineTaxonomyComparisonBase( gene_tree );
                 switch ( comp_base ) {
                     case SCIENTIFIC_NAME:
                         try {
@@ -280,7 +280,7 @@ public final class gsdi {
                 + ( ForesterUtil.isEmpty( species_tree.getName() ) ? "" : gene_tree.getName() ) );
         System.out.println( "Species tree name                        : "
                 + ( ForesterUtil.isEmpty( species_tree.getName() ) ? "" : gene_tree.getName() ) );
-        SDI sdi = null;
+        Object sdi = null;
         final long start_time = new Date().getTime();
         try {
             if ( ( base_algorithm == ALGORITHM.GSDI ) || ( base_algorithm == ALGORITHM.GSDIR ) ) {
@@ -313,7 +313,7 @@ public final class gsdi {
                 System.out.println( "Algorithm       : SDI" );
                 log_writer.println( "Algorithm       : SDI" );
                 log_writer.flush();
-                sdi = new SDIse( gene_tree, species_tree );
+                sdi = new SDI( gene_tree, species_tree );
             }
         }
         catch ( final SDIException e ) {
@@ -363,6 +363,7 @@ public final class gsdi {
         System.out.println( "Wrote resulting gene tree to             : " + out_file.getCanonicalPath() );
         log_writer.println( "Wrote resulting gene tree to             : " + out_file.getCanonicalPath() );
         if ( base_algorithm == ALGORITHM.SDI ) {
+            sdi = sdi;
             sdi.computeMappingCostL();
             System.out.println( "Mapping cost                             : " + sdi.computeMappingCostL() );
             log_writer.println( "Mapping cost                             : " + sdi.computeMappingCostL() );
index 2715461..8ff6def 100644 (file)
@@ -43,8 +43,8 @@ import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
 import org.forester.phylogeny.factories.PhylogenyFactory;
 import org.forester.rio.RIO;
 import org.forester.rio.RIOException;
-import org.forester.sdi.SDI.ALGORITHM;
 import org.forester.sdi.SDIException;
+import org.forester.sdi.SDIutil.ALGORITHM;
 import org.forester.util.CommandLineArguments;
 import org.forester.util.EasyWriter;
 import org.forester.util.ForesterUtil;
index aa78613..6a0846b 100644 (file)
@@ -1292,9 +1292,13 @@ public final class MainFrameApplication extends MainFrame {
         _mainpanel.getTabbedPane().setSelectedIndex( selected );
         showWhole();
         _mainpanel.getCurrentTreePanel().setEdited( true );
-        JOptionPane.showMessageDialog( this, "Duplications: " + gsdir.getDuplicationsSum() + "\n"
-                + "Potential duplications: " + gsdir.getSpeciationOrDuplicationEventsSum() + "\n" + "Speciations: "
-                + gsdir.getSpeciationsSum(), "GSDI successfully completed", JOptionPane.INFORMATION_MESSAGE );
+        JOptionPane.showMessageDialog( this,
+                                       "Duplications: " + gsdir.getMinDuplicationsSum() + "\n" + "Speciations: "
+                                               + gsdir.getSpeciationsSum() + "\n"
+                                               + "Number of root positions minimizing duplications sum: "
+                                               + gsdir.getMinDuplicationsSumGeneTrees().size(),
+                                       "GSDIR successfully completed",
+                                       JOptionPane.INFORMATION_MESSAGE );
     }
 
     void executeFunctionAnalysis() {
index 84d1d15..a42d1f5 100644 (file)
@@ -47,10 +47,10 @@ import org.forester.phylogeny.PhylogenyNode;
 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
 import org.forester.phylogeny.factories.PhylogenyFactory;
 import org.forester.sdi.GSDIR;
-import org.forester.sdi.SDI.ALGORITHM;
-import org.forester.sdi.SDI.TaxonomyComparisonBase;
 import org.forester.sdi.SDIException;
 import org.forester.sdi.SDIR;
+import org.forester.sdi.SDIutil.ALGORITHM;
+import org.forester.sdi.SDIutil.TaxonomyComparisonBase;
 import org.forester.util.BasicDescriptiveStatistics;
 import org.forester.util.ForesterUtil;
 
index 00eab1d..06468af 100644 (file)
@@ -8,8 +8,8 @@ import org.forester.phylogeny.PhylogenyMethods;
 import org.forester.phylogeny.PhylogenyMethods.PhylogenyNodeField;
 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
 import org.forester.phylogeny.factories.PhylogenyFactory;
-import org.forester.sdi.SDI.ALGORITHM;
-import org.forester.sdi.SDI.TaxonomyComparisonBase;
+import org.forester.sdi.SDIutil.ALGORITHM;
+import org.forester.sdi.SDIutil.TaxonomyComparisonBase;
 import org.forester.util.ForesterUtil;
 
 public final class TestRIO {
index fa5265d..d6e6fb6 100644 (file)
@@ -32,139 +32,98 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.SortedSet;
-import java.util.TreeSet;
 
 import org.forester.phylogeny.Phylogeny;
 import org.forester.phylogeny.PhylogenyMethods;
 import org.forester.phylogeny.PhylogenyNode;
 import org.forester.phylogeny.data.Event;
-import org.forester.phylogeny.data.Taxonomy;
 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+import org.forester.sdi.SDIutil.TaxonomyComparisonBase;
 import org.forester.util.ForesterUtil;
 
-/*
- * Implements our algorithm for speciation - duplication inference (SDI). <p>
- * The initialization is accomplished by: </p> <ul> <li>method
- * "linkExtNodesOfG()" of class SDI: setting the links for the external nodes of
- * the gene tree <li>"preorderReID(int)" from class Phylogeny: numbering of
- * nodes of the species tree in preorder <li>the optional stripping of the
- * species tree is accomplished by method "stripTree(Phylogeny,Phylogeny)" of
- * class Phylogeny </ul> <p> The recursion part is accomplished by this class'
- * method "geneTreePostOrderTraversal(PhylogenyNode)". <p> Requires JDK 1.5 or
- * greater.
- * 
- * @see SDI#linkNodesOfG()
- * 
- * @see Phylogeny#preorderReID(int)
- * 
- * @see
- * PhylogenyMethods#taxonomyBasedDeletionOfExternalNodes(Phylogeny,Phylogeny)
- * 
- * @see #geneTreePostOrderTraversal(PhylogenyNode)
- * 
- * @author Christian M. Zmasek
- */
-public class GSDI extends SDI {
+public final class GSDI {
 
-    private final boolean             _most_parsimonious_duplication_model;
-    protected int                     _speciation_or_duplication_events_sum;
-    protected int                     _speciations_sum;
-    private final List<PhylogenyNode> _stripped_gene_tree_nodes;
-    private final List<PhylogenyNode> _stripped_species_tree_nodes;
-    private final Set<PhylogenyNode>  _mapped_species_tree_nodes;
-    private TaxonomyComparisonBase    _tax_comp_base;
-    private final SortedSet<String>   _scientific_names_mapped_to_reduced_specificity;
+    private final boolean                _most_parsimonious_duplication_model;
+    private final int                    _speciation_or_duplication_events_sum;
+    private final int                    _speciations_sum;
+    private final int                    _duplications_sum;
+    private final List<PhylogenyNode>    _stripped_gene_tree_nodes;
+    private final List<PhylogenyNode>    _stripped_species_tree_nodes;
+    private final Set<PhylogenyNode>     _mapped_species_tree_nodes;
+    private final TaxonomyComparisonBase _tax_comp_base;
+    private final SortedSet<String>      _scientific_names_mapped_to_reduced_specificity;
 
     public GSDI( final Phylogeny gene_tree,
                  final Phylogeny species_tree,
                  final boolean most_parsimonious_duplication_model,
                  final boolean strip_gene_tree,
                  final boolean strip_species_tree ) throws SDIException {
-        super( gene_tree, species_tree );
-        _speciation_or_duplication_events_sum = 0;
-        _speciations_sum = 0;
         _most_parsimonious_duplication_model = most_parsimonious_duplication_model;
-        _duplications_sum = 0;
-        _stripped_gene_tree_nodes = new ArrayList<PhylogenyNode>();
-        _stripped_species_tree_nodes = new ArrayList<PhylogenyNode>();
-        _mapped_species_tree_nodes = new HashSet<PhylogenyNode>();
-        _scientific_names_mapped_to_reduced_specificity = new TreeSet<String>();
-        linkNodesOfG( null, strip_gene_tree, strip_species_tree );
-        PhylogenyMethods.preOrderReId( getSpeciesTree() );
-        geneTreePostOrderTraversal();
+        final NodesLinkingResult nodes_linking_result = linkNodesOfG( gene_tree,
+                                                                      species_tree,
+                                                                      null,
+                                                                      strip_gene_tree,
+                                                                      strip_species_tree );
+        _stripped_gene_tree_nodes = nodes_linking_result.getStrippedGeneTreeNodes();
+        _stripped_species_tree_nodes = nodes_linking_result.getStrippedSpeciesTreeNodes();
+        _mapped_species_tree_nodes = nodes_linking_result.getMappedSpeciesTreeNodes();
+        _scientific_names_mapped_to_reduced_specificity = nodes_linking_result
+                .getScientificNamesMappedToReducedSpecificity();
+        _tax_comp_base = nodes_linking_result.getTaxCompBase();
+        PhylogenyMethods.preOrderReId( species_tree );
+        final GSDIsummaryResult gsdi_summary_result = new GSDIsummaryResult();
+        geneTreePostOrderTraversal( gene_tree, _most_parsimonious_duplication_model, gsdi_summary_result );
+        _speciation_or_duplication_events_sum = gsdi_summary_result.getSpeciationOrDuplicationEventsSum();
+        _speciations_sum = gsdi_summary_result.getSpeciationsSum();
+        _duplications_sum = gsdi_summary_result.getDuplicationsSum();
     }
 
-    // Used by GSDIR
-    protected GSDI( final Phylogeny gene_tree, final Phylogeny species_tree, final boolean strip_gene_tree )
-            throws SDIException {
-        super( gene_tree, species_tree );
-        _speciation_or_duplication_events_sum = -1;
-        _speciations_sum = 0;
-        _most_parsimonious_duplication_model = true;
-        _duplications_sum = 0;
-        _stripped_gene_tree_nodes = new ArrayList<PhylogenyNode>();
-        _stripped_species_tree_nodes = new ArrayList<PhylogenyNode>();
-        _mapped_species_tree_nodes = new HashSet<PhylogenyNode>();
-        _scientific_names_mapped_to_reduced_specificity = new TreeSet<String>();
+    public int getDuplicationsSum() {
+        return _duplications_sum;
     }
 
-    // s is the node on the species tree g maps to.
-    private final void determineEvent( final PhylogenyNode s, final PhylogenyNode g ) {
-        boolean oyako = false;
-        if ( ( g.getChildNode1().getLink() == s ) || ( g.getChildNode2().getLink() == s ) ) {
-            oyako = true;
-        }
-        if ( g.getLink().getNumberOfDescendants() == 2 ) {
-            if ( oyako ) {
-                g.getNodeData().setEvent( createDuplicationEvent() );
-            }
-            else {
-                g.getNodeData().setEvent( createSpeciationEvent() );
-            }
-        }
-        else {
-            if ( oyako ) {
-                final Set<PhylogenyNode> set = new HashSet<PhylogenyNode>();
-                for( PhylogenyNode n : g.getChildNode1().getAllExternalDescendants() ) {
-                    n = n.getLink();
-                    while ( n.getParent() != s ) {
-                        n = n.getParent();
-                        if ( n.isRoot() ) {
-                            break;
-                        }
-                    }
-                    set.add( n );
-                }
-                boolean multiple = false;
-                for( PhylogenyNode n : g.getChildNode2().getAllExternalDescendants() ) {
-                    n = n.getLink();
-                    while ( n.getParent() != s ) {
-                        n = n.getParent();
-                        if ( n.isRoot() ) {
-                            break;
-                        }
-                    }
-                    if ( set.contains( n ) ) {
-                        multiple = true;
-                        break;
-                    }
-                }
-                if ( multiple ) {
-                    g.getNodeData().setEvent( createDuplicationEvent() );
-                }
-                else {
-                    if ( _most_parsimonious_duplication_model ) {
-                        g.getNodeData().setEvent( createSpeciationEvent() );
-                    }
-                    else {
-                        g.getNodeData().setEvent( createSingleSpeciationOrDuplicationEvent() );
-                    }
-                }
-            }
-            else {
-                g.getNodeData().setEvent( createSpeciationEvent() );
-            }
+    public Set<PhylogenyNode> getMappedExternalSpeciesTreeNodes() {
+        return _mapped_species_tree_nodes;
+    }
+
+    public final SortedSet<String> getReMappedScientificNamesFromGeneTree() {
+        return _scientific_names_mapped_to_reduced_specificity;
+    }
+
+    public final int getSpeciationOrDuplicationEventsSum() {
+        return _speciation_or_duplication_events_sum;
+    }
+
+    public final int getSpeciationsSum() {
+        return _speciations_sum;
+    }
+
+    public List<PhylogenyNode> getStrippedExternalGeneTreeNodes() {
+        return _stripped_gene_tree_nodes;
+    }
+
+    public List<PhylogenyNode> getStrippedSpeciesTreeNodes() {
+        return _stripped_species_tree_nodes;
+    }
+
+    public TaxonomyComparisonBase getTaxCompBase() {
+        return _tax_comp_base;
+    }
+
+    @Override
+    public final String toString() {
+        final StringBuffer sb = new StringBuffer();
+        sb.append( "Most parsimonious duplication model: " + _most_parsimonious_duplication_model );
+        sb.append( ForesterUtil.getLineSeparator() );
+        sb.append( "Speciations sum                    : " + getSpeciationsSum() );
+        sb.append( ForesterUtil.getLineSeparator() );
+        sb.append( "Duplications sum                   : " + getDuplicationsSum() );
+        sb.append( ForesterUtil.getLineSeparator() );
+        if ( !_most_parsimonious_duplication_model ) {
+            sb.append( "Speciation or duplications sum     : " + getSpeciationOrDuplicationEventsSum() );
+            sb.append( ForesterUtil.getLineSeparator() );
         }
+        return sb.toString();
     }
 
     /**
@@ -177,8 +136,10 @@ public class GSDI extends SDI {
      * <p>
      * 
      */
-    final void geneTreePostOrderTraversal() {
-        for( final PhylogenyNodeIterator it = getGeneTree().iteratorPostorder(); it.hasNext(); ) {
+    final static void geneTreePostOrderTraversal( final Phylogeny gene_tree,
+                                                  final boolean most_parsimonious_duplication_model,
+                                                  final GSDIsummaryResult res ) {
+        for( final PhylogenyNodeIterator it = gene_tree.iteratorPostorder(); it.hasNext(); ) {
             final PhylogenyNode g = it.next();
             if ( g.isInternal() ) {
                 PhylogenyNode s1 = g.getChildNode1().getLink();
@@ -192,37 +153,11 @@ public class GSDI extends SDI {
                     }
                 }
                 g.setLink( s1 );
-                determineEvent( s1, g );
+                determineEvent( s1, g, most_parsimonious_duplication_model, res );
             }
         }
     }
 
-    private final Event createDuplicationEvent() {
-        final Event event = Event.createSingleDuplicationEvent();
-        ++_duplications_sum;
-        return event;
-    }
-
-    private final Event createSingleSpeciationOrDuplicationEvent() {
-        final Event event = Event.createSingleSpeciationOrDuplicationEvent();
-        ++_speciation_or_duplication_events_sum;
-        return event;
-    }
-
-    private final Event createSpeciationEvent() {
-        final Event event = Event.createSingleSpeciationEvent();
-        ++_speciations_sum;
-        return event;
-    }
-
-    public final int getSpeciationOrDuplicationEventsSum() {
-        return _speciation_or_duplication_events_sum;
-    }
-
-    public final int getSpeciationsSum() {
-        return _speciations_sum;
-    }
-
     /**
      * This allows for linking of internal nodes of the species tree (as opposed
      * to just external nodes, as in the method it overrides.
@@ -230,23 +165,26 @@ public class GSDI extends SDI {
      * @throws SDIException 
      * 
      */
-    final void linkNodesOfG( final TaxonomyComparisonBase tax_comp_base,
-                             final boolean strip_gene_tree,
-                             final boolean strip_species_tree ) throws SDIException {
+    final static NodesLinkingResult linkNodesOfG( final Phylogeny gene_tree,
+                                                  final Phylogeny species_tree,
+                                                  final TaxonomyComparisonBase tax_comp_base,
+                                                  final boolean strip_gene_tree,
+                                                  final boolean strip_species_tree ) throws SDIException {
         final Map<String, PhylogenyNode> species_to_node_map = new HashMap<String, PhylogenyNode>();
         final List<PhylogenyNode> species_tree_ext_nodes = new ArrayList<PhylogenyNode>();
+        final NodesLinkingResult res = new NodesLinkingResult();
         if ( tax_comp_base == null ) {
-            _tax_comp_base = determineTaxonomyComparisonBase( _gene_tree );
+            res.setTaxCompBase( SDIutil.determineTaxonomyComparisonBase( gene_tree ) );
         }
         else {
-            _tax_comp_base = tax_comp_base;
+            res.setTaxCompBase( tax_comp_base );
         }
         // Stringyfied taxonomy is the key, node is the value.
-        for( final PhylogenyNodeIterator iter = _species_tree.iteratorExternalForward(); iter.hasNext(); ) {
+        for( final PhylogenyNodeIterator iter = species_tree.iteratorExternalForward(); iter.hasNext(); ) {
             final PhylogenyNode s = iter.next();
             species_tree_ext_nodes.add( s );
             if ( s.getNodeData().isHasTaxonomy() ) {
-                final String tax_str = taxonomyToString( s, _tax_comp_base );
+                final String tax_str = SDIutil.taxonomyToString( s, res.getTaxCompBase() );
                 if ( !ForesterUtil.isEmpty( tax_str ) ) {
                     if ( species_to_node_map.containsKey( tax_str ) ) {
                         throw new SDIException( "taxonomy \"" + s + "\" is not unique in species tree" );
@@ -256,21 +194,21 @@ public class GSDI extends SDI {
             }
         }
         // Retrieve the reference to the node with a matching stringyfied taxonomy.
-        for( final PhylogenyNodeIterator iter = _gene_tree.iteratorExternalForward(); iter.hasNext(); ) {
+        for( final PhylogenyNodeIterator iter = gene_tree.iteratorExternalForward(); iter.hasNext(); ) {
             final PhylogenyNode g = iter.next();
             if ( !g.getNodeData().isHasTaxonomy() ) {
                 if ( strip_gene_tree ) {
-                    _stripped_gene_tree_nodes.add( g );
+                    res.getStrippedGeneTreeNodes().add( g );
                 }
                 else {
                     throw new SDIException( "gene tree node \"" + g + "\" has no taxonomic data" );
                 }
             }
             else {
-                final String tax_str = taxonomyToString( g, _tax_comp_base );
+                final String tax_str = SDIutil.taxonomyToString( g, res.getTaxCompBase() );
                 if ( ForesterUtil.isEmpty( tax_str ) ) {
                     if ( strip_gene_tree ) {
-                        _stripped_gene_tree_nodes.add( g );
+                        res.getStrippedGeneTreeNodes().add( g );
                     }
                     else {
                         throw new SDIException( "gene tree node \"" + g + "\" has no appropriate taxonomic data" );
@@ -278,13 +216,15 @@ public class GSDI extends SDI {
                 }
                 else {
                     PhylogenyNode s = species_to_node_map.get( tax_str );
-                    if ( ( _tax_comp_base == TaxonomyComparisonBase.SCIENTIFIC_NAME ) && ( s == null )
+                    if ( ( res.getTaxCompBase() == TaxonomyComparisonBase.SCIENTIFIC_NAME ) && ( s == null )
                             && ( ForesterUtil.countChars( tax_str, ' ' ) > 1 ) ) {
-                        s = tryMapByRemovingOverlySpecificData( species_to_node_map, tax_str );
+                        s = tryMapByRemovingOverlySpecificData( species_to_node_map,
+                                                                tax_str,
+                                                                res.getScientificNamesMappedToReducedSpecificity() );
                     }
                     if ( s == null ) {
                         if ( strip_gene_tree ) {
-                            _stripped_gene_tree_nodes.add( g );
+                            res.getStrippedGeneTreeNodes().add( g );
                         }
                         else {
                             throw new SDIException( "taxonomy \"" + g.getNodeData().getTaxonomy()
@@ -293,38 +233,158 @@ public class GSDI extends SDI {
                     }
                     else {
                         g.setLink( s );
-                        _mapped_species_tree_nodes.add( s );
+                        res.getMappedSpeciesTreeNodes().add( s );
                     }
                 }
             }
         } // for loop
         if ( strip_gene_tree ) {
-            stripGeneTree();
-            if ( getGeneTree().isEmpty() || ( getGeneTree().getNumberOfExternalNodes() < 2 ) ) {
+            stripTree( gene_tree, res.getStrippedGeneTreeNodes() );
+            if ( gene_tree.isEmpty() || ( gene_tree.getNumberOfExternalNodes() < 2 ) ) {
                 throw new SDIException( "species could not be mapped between gene tree and species tree" );
             }
         }
         if ( strip_species_tree ) {
-            stripSpeciesTree( species_tree_ext_nodes );
+            stripSpeciesTree( species_tree, species_tree_ext_nodes, res.getMappedSpeciesTreeNodes() );
         }
+        return res;
     }
 
-    private final PhylogenyNode tryMapByRemovingOverlySpecificData( final Map<String, PhylogenyNode> species_to_node_map,
-                                                                    final String tax_str ) {
-        PhylogenyNode s = tryMapByRemovingOverlySpecificData( species_to_node_map, tax_str, " (" );
+    private final static void addScientificNamesMappedToReducedSpecificity( final String s1,
+                                                                            final String s2,
+                                                                            final SortedSet<String> scientific_names_mapped_to_reduced_specificity ) {
+        scientific_names_mapped_to_reduced_specificity.add( s1 + " -> " + s2 );
+    }
+
+    // Used by GSDIR
+    //    protected GSDI( final Phylogeny gene_tree, final Phylogeny species_tree, final boolean strip_gene_tree )
+    //            throws SDIException {
+    //        super( gene_tree, species_tree );
+    //        _speciation_or_duplication_events_sum = -1;
+    //        _speciations_sum = 0;
+    //        _most_parsimonious_duplication_model = true;
+    //        _duplications_sum = 0;
+    //        _stripped_gene_tree_nodes = new ArrayList<PhylogenyNode>();
+    //        _stripped_species_tree_nodes = new ArrayList<PhylogenyNode>();
+    //        _mapped_species_tree_nodes = new HashSet<PhylogenyNode>();
+    //        _scientific_names_mapped_to_reduced_specificity = new TreeSet<String>();
+    //    }
+    // s is the node on the species tree g maps to.
+    private final static void determineEvent( final PhylogenyNode s,
+                                              final PhylogenyNode g,
+                                              final boolean most_parsimonious_duplication_model,
+                                              final GSDIsummaryResult res ) {
+        boolean oyako = false;
+        if ( ( g.getChildNode1().getLink() == s ) || ( g.getChildNode2().getLink() == s ) ) {
+            oyako = true;
+        }
+        if ( g.getLink().getNumberOfDescendants() == 2 ) {
+            if ( oyako ) {
+                g.getNodeData().setEvent( Event.createSingleDuplicationEvent() );
+                res.increaseDuplicationsSum();
+            }
+            else {
+                g.getNodeData().setEvent( Event.createSingleSpeciationEvent() );
+                res.increaseSpeciationsSum();
+            }
+        }
+        else {
+            if ( oyako ) {
+                final Set<PhylogenyNode> set = new HashSet<PhylogenyNode>();
+                for( PhylogenyNode n : g.getChildNode1().getAllExternalDescendants() ) {
+                    n = n.getLink();
+                    while ( n.getParent() != s ) {
+                        n = n.getParent();
+                        if ( n.isRoot() ) {
+                            break;
+                        }
+                    }
+                    set.add( n );
+                }
+                boolean multiple = false;
+                for( PhylogenyNode n : g.getChildNode2().getAllExternalDescendants() ) {
+                    n = n.getLink();
+                    while ( n.getParent() != s ) {
+                        n = n.getParent();
+                        if ( n.isRoot() ) {
+                            break;
+                        }
+                    }
+                    if ( set.contains( n ) ) {
+                        multiple = true;
+                        break;
+                    }
+                }
+                if ( multiple ) {
+                    g.getNodeData().setEvent( Event.createSingleDuplicationEvent() );
+                    res.increaseDuplicationsSum();
+                }
+                else {
+                    if ( most_parsimonious_duplication_model ) {
+                        g.getNodeData().setEvent( Event.createSingleSpeciationEvent() );
+                        res.increaseSpeciationsSum();
+                    }
+                    else {
+                        g.getNodeData().setEvent( Event.createSingleSpeciationOrDuplicationEvent() );
+                        res.increaseSpeciationOrDuplicationEventsSum();
+                    }
+                }
+            }
+            else {
+                g.getNodeData().setEvent( Event.createSingleSpeciationEvent() );
+                res.increaseSpeciationsSum();
+            }
+        }
+    }
+
+    private final static List<PhylogenyNode> stripSpeciesTree( final Phylogeny species_tree,
+                                                               final List<PhylogenyNode> species_tree_ext_nodes,
+                                                               final Set<PhylogenyNode> keep ) {
+        final List<PhylogenyNode> stripped_species_tree_nodes = new ArrayList<PhylogenyNode>();
+        for( final PhylogenyNode s : species_tree_ext_nodes ) {
+            if ( !keep.contains( s ) ) {
+                species_tree.deleteSubtree( s, true );
+                stripped_species_tree_nodes.add( s );
+            }
+        }
+        species_tree.clearHashIdToNodeMap();
+        species_tree.externalNodesHaveChanged();
+        return stripped_species_tree_nodes;
+    }
+
+    private final static void stripTree( final Phylogeny phy, final List<PhylogenyNode> strip_nodes ) {
+        for( final PhylogenyNode g : strip_nodes ) {
+            phy.deleteSubtree( g, true );
+        }
+        phy.clearHashIdToNodeMap();
+        phy.externalNodesHaveChanged();
+    }
+
+    private final static PhylogenyNode tryMapByRemovingOverlySpecificData( final Map<String, PhylogenyNode> species_to_node_map,
+                                                                           final String tax_str,
+                                                                           final SortedSet<String> scientific_names_mapped_to_reduced_specificity ) {
+        PhylogenyNode s = tryMapByRemovingOverlySpecificData( species_to_node_map,
+                                                              tax_str,
+                                                              " (",
+                                                              scientific_names_mapped_to_reduced_specificity );
         if ( s == null ) {
             if ( ForesterUtil.countChars( tax_str, ' ' ) == 2 ) {
                 final String new_tax_str = tax_str.substring( 0, tax_str.lastIndexOf( ' ' ) ).trim();
                 s = species_to_node_map.get( new_tax_str );
                 if ( s != null ) {
-                    addScientificNamesMappedToReducedSpecificity( tax_str, new_tax_str );
+                    addScientificNamesMappedToReducedSpecificity( tax_str,
+                                                                  new_tax_str,
+                                                                  scientific_names_mapped_to_reduced_specificity );
                 }
             }
         }
         if ( s == null ) {
             for( final String t : new String[] { " subspecies ", " strain ", " variety ", " varietas ", " subvariety ",
                     " form ", " subform ", " cultivar ", " section ", " subsection " } ) {
-                s = tryMapByRemovingOverlySpecificData( species_to_node_map, tax_str, t );
+                s = tryMapByRemovingOverlySpecificData( species_to_node_map,
+                                                        tax_str,
+                                                        t,
+                                                        scientific_names_mapped_to_reduced_specificity );
                 if ( s != null ) {
                     break;
                 }
@@ -333,121 +393,21 @@ public class GSDI extends SDI {
         return s;
     }
 
-    private final PhylogenyNode tryMapByRemovingOverlySpecificData( final Map<String, PhylogenyNode> species_to_node_map,
-                                                                    final String tax_str,
-                                                                    final String term ) {
+    private final static PhylogenyNode tryMapByRemovingOverlySpecificData( final Map<String, PhylogenyNode> species_to_node_map,
+                                                                           final String tax_str,
+                                                                           final String term,
+                                                                           final SortedSet<String> scientific_names_mapped_to_reduced_specificity ) {
         final int i = tax_str.indexOf( term );
         if ( i > 4 ) {
             final String new_tax_str = tax_str.substring( 0, i ).trim();
             final PhylogenyNode s = species_to_node_map.get( new_tax_str );
             if ( s != null ) {
-                addScientificNamesMappedToReducedSpecificity( tax_str, new_tax_str );
+                addScientificNamesMappedToReducedSpecificity( tax_str,
+                                                              new_tax_str,
+                                                              scientific_names_mapped_to_reduced_specificity );
             }
             return s;
         }
         return null;
     }
-
-    private final void addScientificNamesMappedToReducedSpecificity( final String s1, final String s2 ) {
-        _scientific_names_mapped_to_reduced_specificity.add( s1 + " -> " + s2 );
-    }
-
-    public final SortedSet<String> getReMappedScientificNamesFromGeneTree() {
-        return _scientific_names_mapped_to_reduced_specificity;
-    }
-
-    public TaxonomyComparisonBase getTaxCompBase() {
-        return _tax_comp_base;
-    }
-
-    private void stripSpeciesTree( final List<PhylogenyNode> species_tree_ext_nodes ) {
-        for( final PhylogenyNode s : species_tree_ext_nodes ) {
-            if ( !_mapped_species_tree_nodes.contains( s ) ) {
-                _species_tree.deleteSubtree( s, true );
-                _stripped_species_tree_nodes.add( s );
-            }
-        }
-        _species_tree.clearHashIdToNodeMap();
-        _species_tree.externalNodesHaveChanged();
-    }
-
-    public List<PhylogenyNode> getStrippedSpeciesTreeNodes() {
-        return _stripped_species_tree_nodes;
-    }
-
-    private void stripGeneTree() {
-        for( final PhylogenyNode g : _stripped_gene_tree_nodes ) {
-            _gene_tree.deleteSubtree( g, true );
-        }
-        _gene_tree.clearHashIdToNodeMap();
-        _gene_tree.externalNodesHaveChanged();
-    }
-
-    public Set<PhylogenyNode> getMappedExternalSpeciesTreeNodes() {
-        return _mapped_species_tree_nodes;
-    }
-
-    public static TaxonomyComparisonBase determineTaxonomyComparisonBase( final Phylogeny gene_tree ) {
-        int with_id_count = 0;
-        int with_code_count = 0;
-        int with_sn_count = 0;
-        int max = 0;
-        for( final PhylogenyNodeIterator iter = gene_tree.iteratorExternalForward(); iter.hasNext(); ) {
-            final PhylogenyNode g = iter.next();
-            if ( g.getNodeData().isHasTaxonomy() ) {
-                final Taxonomy tax = g.getNodeData().getTaxonomy();
-                if ( ( tax.getIdentifier() != null ) && !ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) ) {
-                    if ( ++with_id_count > max ) {
-                        max = with_id_count;
-                    }
-                }
-                if ( !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) {
-                    if ( ++with_code_count > max ) {
-                        max = with_code_count;
-                    }
-                }
-                if ( !ForesterUtil.isEmpty( tax.getScientificName() ) ) {
-                    if ( ++with_sn_count > max ) {
-                        max = with_sn_count;
-                    }
-                }
-            }
-        }
-        if ( max == 0 ) {
-            throw new IllegalArgumentException( "gene tree has no taxonomic data" );
-        }
-        else if ( max == 1 ) {
-            throw new IllegalArgumentException( "gene tree has only one node with taxonomic data" );
-        }
-        else if ( max == with_id_count ) {
-            return SDI.TaxonomyComparisonBase.ID;
-        }
-        else if ( max == with_sn_count ) {
-            return SDI.TaxonomyComparisonBase.SCIENTIFIC_NAME;
-        }
-        else {
-            return SDI.TaxonomyComparisonBase.CODE;
-        }
-    }
-
-    public List<PhylogenyNode> getStrippedExternalGeneTreeNodes() {
-        return _stripped_gene_tree_nodes;
-    }
-
-    @Override
-    public final String toString() {
-        final StringBuffer sb = new StringBuffer();
-        sb.append( "Most parsimonious duplication model: " + _most_parsimonious_duplication_model );
-        sb.append( ForesterUtil.getLineSeparator() );
-        sb.append( "Speciations sum                    : " + getSpeciationsSum() );
-        sb.append( ForesterUtil.getLineSeparator() );
-        sb.append( "Duplications sum                   : " + getDuplicationsSum() );
-        sb.append( ForesterUtil.getLineSeparator() );
-        if ( !_most_parsimonious_duplication_model ) {
-            sb.append( "Speciation or duplications sum     : " + getSpeciationOrDuplicationEventsSum() );
-            sb.append( ForesterUtil.getLineSeparator() );
-        }
-        sb.append( "mapping cost L                     : " + computeMappingCostL() );
-        return sb.toString();
-    }
 }
index 5fd0a3b..6c4d267 100644 (file)
@@ -26,28 +26,49 @@ package org.forester.sdi;
 \r
 import java.util.ArrayList;\r
 import java.util.List;\r
+import java.util.Set;\r
+import java.util.SortedSet;\r
 \r
 import org.forester.phylogeny.Phylogeny;\r
 import org.forester.phylogeny.PhylogenyBranch;\r
 import org.forester.phylogeny.PhylogenyMethods;\r
 import org.forester.phylogeny.PhylogenyNode;\r
 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;\r
+import org.forester.sdi.SDIutil.TaxonomyComparisonBase;\r
 import org.forester.util.BasicDescriptiveStatistics;\r
 \r
-public class GSDIR extends GSDI {\r
+public class GSDIR {\r
 \r
     private int                              _min_duplications_sum;\r
     private final BasicDescriptiveStatistics _duplications_sum_stats;\r
     private final List<Phylogeny>            _min_duplications_sum_gene_trees;\r
+    protected int                            _speciations_sum;\r
+    protected int                            _duplications_sum;\r
+    private final List<PhylogenyNode>        _stripped_gene_tree_nodes;\r
+    private final List<PhylogenyNode>        _stripped_species_tree_nodes;\r
+    private final Set<PhylogenyNode>         _mapped_species_tree_nodes;\r
+    private final TaxonomyComparisonBase     _tax_comp_base;\r
+    private final SortedSet<String>          _scientific_names_mapped_to_reduced_specificity;\r
 \r
     public GSDIR( final Phylogeny gene_tree,\r
                   final Phylogeny species_tree,\r
                   final boolean strip_gene_tree,\r
                   final boolean strip_species_tree ) throws SDIException {\r
-        super( gene_tree.copy(), species_tree, strip_gene_tree );\r
-        linkNodesOfG( null, strip_gene_tree, strip_species_tree );\r
+        _speciations_sum = 0;\r
+        _duplications_sum = 0;\r
+        final NodesLinkingResult nodes_linking_result = GSDI.linkNodesOfG( gene_tree,\r
+                                                                           species_tree,\r
+                                                                           null,\r
+                                                                           strip_gene_tree,\r
+                                                                           strip_species_tree );\r
+        _stripped_gene_tree_nodes = nodes_linking_result.getStrippedGeneTreeNodes();\r
+        _stripped_species_tree_nodes = nodes_linking_result.getStrippedSpeciesTreeNodes();\r
+        _mapped_species_tree_nodes = nodes_linking_result.getMappedSpeciesTreeNodes();\r
+        _scientific_names_mapped_to_reduced_specificity = nodes_linking_result\r
+                .getScientificNamesMappedToReducedSpecificity();\r
+        _tax_comp_base = nodes_linking_result.getTaxCompBase();\r
         final List<PhylogenyBranch> gene_tree_branches_post_order = new ArrayList<PhylogenyBranch>();\r
-        for( final PhylogenyNodeIterator it = _gene_tree.iteratorPostorder(); it.hasNext(); ) {\r
+        for( final PhylogenyNodeIterator it = gene_tree.iteratorPostorder(); it.hasNext(); ) {\r
             final PhylogenyNode n = it.next();\r
             if ( !n.isRoot() && !( n.getParent().isRoot() && n.isFirstChildNode() ) ) {\r
                 gene_tree_branches_post_order.add( new PhylogenyBranch( n, n.getParent() ) );\r
@@ -59,8 +80,8 @@ public class GSDIR extends GSDI {
         for( final PhylogenyBranch branch : gene_tree_branches_post_order ) {\r
             _duplications_sum = 0;\r
             _speciations_sum = 0;\r
-            _gene_tree.reRoot( branch );\r
-            PhylogenyMethods.preOrderReId( getSpeciesTree() );\r
+            gene_tree.reRoot( branch );\r
+            PhylogenyMethods.preOrderReId( species_tree );\r
             //TEST, remove later\r
             //            for( final PhylogenyNodeIterator it = _gene_tree.iteratorPostorder(); it.hasNext(); ) {\r
             //                final PhylogenyNode g = it.next();\r
@@ -68,14 +89,16 @@ public class GSDIR extends GSDI {
             //                    g.setLink( null );\r
             //                }\r
             //            }\r
-            geneTreePostOrderTraversal();\r
+            final GSDIsummaryResult gsdi_summary_result = new GSDIsummaryResult();\r
+            GSDI.geneTreePostOrderTraversal( gene_tree, true, gsdi_summary_result );\r
             if ( _duplications_sum < _min_duplications_sum ) {\r
                 _min_duplications_sum = _duplications_sum;\r
                 _min_duplications_sum_gene_trees.clear();\r
-                _min_duplications_sum_gene_trees.add( getGeneTree().copy() );\r
+                _min_duplications_sum_gene_trees.add( gene_tree.copy() );\r
+                //_speciations_sum\r
             }\r
             else if ( _duplications_sum == _min_duplications_sum ) {\r
-                _min_duplications_sum_gene_trees.add( getGeneTree().copy() );\r
+                _min_duplications_sum_gene_trees.add( gene_tree.copy() );\r
             }\r
             _duplications_sum_stats.addValue( _duplications_sum );\r
         }\r
@@ -92,4 +115,24 @@ public class GSDIR extends GSDI {
     public BasicDescriptiveStatistics getDuplicationsSumStats() {\r
         return _duplications_sum_stats;\r
     }\r
+\r
+    public Set<PhylogenyNode> getMappedExternalSpeciesTreeNodes() {\r
+        return _mapped_species_tree_nodes;\r
+    }\r
+\r
+    public final SortedSet<String> getReMappedScientificNamesFromGeneTree() {\r
+        return _scientific_names_mapped_to_reduced_specificity;\r
+    }\r
+\r
+    public List<PhylogenyNode> getStrippedExternalGeneTreeNodes() {\r
+        return _stripped_gene_tree_nodes;\r
+    }\r
+\r
+    public List<PhylogenyNode> getStrippedSpeciesTreeNodes() {\r
+        return _stripped_species_tree_nodes;\r
+    }\r
+\r
+    public TaxonomyComparisonBase getTaxCompBase() {\r
+        return _tax_comp_base;\r
+    }\r
 }\r
diff --git a/forester/java/src/org/forester/sdi/GSDIsummaryResult.java b/forester/java/src/org/forester/sdi/GSDIsummaryResult.java
new file mode 100644 (file)
index 0000000..2fdf251
--- /dev/null
@@ -0,0 +1,39 @@
+
+package org.forester.sdi;
+
+final class GSDIsummaryResult {
+
+    private int _speciation_or_duplication_events_sum;
+    private int _speciations_sum;
+    private int _duplications_sum;
+
+    GSDIsummaryResult() {
+        _speciation_or_duplication_events_sum = 0;
+        _speciations_sum = 0;
+        _duplications_sum = 0;
+    }
+
+    final int getDuplicationsSum() {
+        return _duplications_sum;
+    }
+
+    final int getSpeciationOrDuplicationEventsSum() {
+        return _speciation_or_duplication_events_sum;
+    }
+
+    final int getSpeciationsSum() {
+        return _speciations_sum;
+    }
+
+    final void increaseDuplicationsSum() {
+        ++_duplications_sum;
+    }
+
+    final void increaseSpeciationOrDuplicationEventsSum() {
+        ++_speciation_or_duplication_events_sum;
+    }
+
+    final void increaseSpeciationsSum() {
+        ++_speciations_sum;
+    }
+}
diff --git a/forester/java/src/org/forester/sdi/NodesLinkingResult.java b/forester/java/src/org/forester/sdi/NodesLinkingResult.java
new file mode 100644 (file)
index 0000000..dd5bc83
--- /dev/null
@@ -0,0 +1,53 @@
+
+package org.forester.sdi;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.sdi.SDIutil.TaxonomyComparisonBase;
+
+final class NodesLinkingResult {
+
+    private final List<PhylogenyNode> _stripped_gene_tree_nodes;
+    private final List<PhylogenyNode> _stripped_species_tree_nodes;
+    private final Set<PhylogenyNode>  _mapped_species_tree_nodes;
+    private TaxonomyComparisonBase    _tax_comp_base;
+    private final SortedSet<String>   _scientific_names_mapped_to_reduced_specificity;
+
+    NodesLinkingResult() {
+        _stripped_gene_tree_nodes = new ArrayList<PhylogenyNode>();
+        _stripped_species_tree_nodes = new ArrayList<PhylogenyNode>();
+        _mapped_species_tree_nodes = new HashSet<PhylogenyNode>();
+        _scientific_names_mapped_to_reduced_specificity = new TreeSet<String>();
+        _tax_comp_base = null;
+    }
+
+    final List<PhylogenyNode> getStrippedGeneTreeNodes() {
+        return _stripped_gene_tree_nodes;
+    }
+
+    final List<PhylogenyNode> getStrippedSpeciesTreeNodes() {
+        return _stripped_species_tree_nodes;
+    }
+
+    final Set<PhylogenyNode> getMappedSpeciesTreeNodes() {
+        return _mapped_species_tree_nodes;
+    }
+
+    final TaxonomyComparisonBase getTaxCompBase() {
+        return _tax_comp_base;
+    }
+
+    final void setTaxCompBase( final TaxonomyComparisonBase tax_comp_base ) {
+        _tax_comp_base = tax_comp_base;
+    }
+
+    final SortedSet<String> getScientificNamesMappedToReducedSpecificity() {
+        return _scientific_names_mapped_to_reduced_specificity;
+    }
+}
index fbc3c65..0bb74d3 100644 (file)
@@ -23,7 +23,7 @@
 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
 //
 // Contact: phylosoft @ gmail . com
-// WWW: www.phylosoft.org
+// WWW: www.phylosoft.org/forester
 
 package org.forester.sdi;
 
@@ -31,17 +31,42 @@ import java.util.HashMap;
 import java.util.Map;
 
 import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyMethods;
 import org.forester.phylogeny.PhylogenyNode;
-import org.forester.phylogeny.data.Identifier;
+import org.forester.phylogeny.data.Event;
 import org.forester.phylogeny.data.Taxonomy;
 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+import org.forester.sdi.SDIutil.TaxonomyComparisonBase;
 import org.forester.util.ForesterUtil;
 
-public abstract class SDI {
+/*
+ * Implements our algorithm for speciation - duplication inference (SDI). <p>
+ * Reference: </p> <ul> <li>Zmasek, C.M. and Eddy, S.R. (2001) "A simple
+ * algorithm to infer gene duplication and speciation events on a gene tree".
+ * Bioinformatics, in press. </ul> <p> The initialization is accomplished by:
+ * </p> <ul> <li>method "linkExtNodesOfG()" of class SDI: setting the links for
+ * the external nodes of the gene tree <li>"preorderReID(int)" from class
+ * Phylogeny: numbering of nodes of the species tree in preorder <li>the
+ * optional stripping of the species tree is accomplished by method
+ * "stripTree(Phylogeny,Phylogeny)" of class Phylogeny </ul> <p> The recursion
+ * part is accomplished by this class' method
+ * "geneTreePostOrderTraversal(PhylogenyNode)". <p> Requires JDK 1.2 or greater.
+ * 
+ * @see SDI#linkNodesOfG()
+ * 
+ * @see Phylogeny#preorderReID(int)
+ * 
+ * @see
+ * PhylogenyMethods#taxonomyBasedDeletionOfExternalNodes(Phylogeny,Phylogeny)
+ * 
+ * @see #geneTreePostOrderTraversal(PhylogenyNode)
+ * 
+ * @author Christian M. Zmasek
+ * 
+ * @version 1.102 -- last modified: 10/02/01
+ */
+public class SDI {
 
-    public enum ALGORITHM {
-        GSDIR, GSDI, SDI, SDIR
-    }
     final Phylogeny _gene_tree;
     final Phylogeny _species_tree;
     int             _duplications_sum; // Sum of duplications.
@@ -50,22 +75,15 @@ public abstract class SDI {
     /**
      * Constructor which sets the gene tree and the species tree to be compared.
      * species_tree is the species tree to which the gene tree gene_tree will be
-     * compared to.
-     * Infers for each PhylogenyNode of gene_tree whether
-     * it represents a speciation or duplication event by calculating and
-     * interpreting the mapping function M. The most parsimonious sequence of
-     * speciation and duplication events is assumed.
-     * The mapping cost L can be
-     * calculated with method "computeMappingCost()".
+     * compared to - with method "infer(boolean)". Both Trees must be completely
+     * binary and rooted. The actual inference is accomplished with method
+     * "infer(boolean)". The mapping cost L can then be calculated with method
+     * "computeMappingCost()".
      * <p>
-     * Conditions:
-     * </p>
-     * <ul>
-     * <li>Both Trees must be rooted
-     * <li>Both Trees must have species names in the species name fields of all
-     * their external nodes
-     * </ul>
+     * (Last modified: 01/11/01)
      * 
+     * @see #infer(boolean)
+     * @see SDI#computeMappingCostL()
      * @param gene_tree
      *            reference to a rooted binary gene Phylogeny to which assign
      *            duplication vs speciation, must have species names in the
@@ -74,8 +92,9 @@ public abstract class SDI {
      *            reference to a rooted binary species Phylogeny which might get
      *            stripped in the process, must have species names in the
      *            species name fields for all external nodes
+     * @throws SDIException 
      */
-    public SDI( final Phylogeny gene_tree, final Phylogeny species_tree ) {
+    public SDI( final Phylogeny gene_tree, final Phylogeny species_tree ) throws SDIException {
         if ( species_tree.isEmpty() || gene_tree.isEmpty() ) {
             throw new IllegalArgumentException( "attempt to infer duplications using empty tree(s)" );
         }
@@ -84,10 +103,217 @@ public abstract class SDI {
         }
         _gene_tree = gene_tree;
         _species_tree = species_tree;
-        _duplications_sum = 0;
         _mapping_cost = -1;
+        _duplications_sum = 0;
+        PhylogenyMethods.preOrderReId( getSpeciesTree() );
+        linkNodesOfG();
+        geneTreePostOrderTraversal( getGeneTree().getRoot() );
+    }
+
+    /**
+     * Computes the cost of mapping the gene tree gene_tree onto the species
+     * tree species_tree. Before this method can be called, the mapping has to
+     * be calculated with method "infer(boolean)".
+     * <p>
+     * Reference. Zhang, L. (1997) On a Mirkin-Muchnik-Smith Conjecture for
+     * Comparing Molecular Phylogenies. Journal of Computational Biology 4
+     * 177-187.
+     * 
+     * @return the mapping cost "L"
+     */
+    public int computeMappingCostL() {
+        _species_tree.levelOrderReID();
+        _mapping_cost = 0;
+        computeMappingCostHelper( _gene_tree.getRoot() );
+        return _mapping_cost;
+    }
+
+    /**
+     * Returns the number of duplications.
+     * 
+     * @return number of duplications
+     */
+    public int getDuplicationsSum() {
+        return _duplications_sum;
+    }
+
+    /**
+     * Returns the gene tree.
+     * 
+     * @return gene tree
+     */
+    public Phylogeny getGeneTree() {
+        return _gene_tree;
+    }
+
+    /**
+     * Returns the species tree.
+     * 
+     * @return species tree
+     */
+    public Phylogeny getSpeciesTree() {
+        return _species_tree;
+    }
+
+    @Override
+    public String toString() {
+        final StringBuffer sb = new StringBuffer();
+        sb.append( getClass() );
+        sb.append( ForesterUtil.LINE_SEPARATOR );
+        sb.append( "Duplications sum                   : " + getDuplicationsSum() );
+        sb.append( ForesterUtil.LINE_SEPARATOR );
+        sb.append( "mapping cost L                     : " + computeMappingCostL() );
+        return sb.toString();
     }
 
+    /**
+     * Traverses the subtree of PhylogenyNode g in postorder, calculating the
+     * mapping function M, and determines which nodes represent speciation
+     * events and which ones duplication events.
+     * <p>
+     * Preconditions: Mapping M for external nodes must have been calculated and
+     * the species tree must be labelled in preorder.
+     * <p>
+     * (Last modified: 01/11/01)
+     * 
+     * @param g
+     *            starting node of a gene tree - normally the root
+     */
+    void geneTreePostOrderTraversal( final PhylogenyNode g ) {
+        PhylogenyNode a, b;
+        if ( !g.isExternal() ) {
+            geneTreePostOrderTraversal( g.getChildNode( 0 ) );
+            geneTreePostOrderTraversal( g.getChildNode( 1 ) );
+            a = g.getChildNode( 0 ).getLink();
+            b = g.getChildNode( 1 ).getLink();
+            while ( a != b ) {
+                if ( a.getId() > b.getId() ) {
+                    a = a.getParent();
+                }
+                else {
+                    b = b.getParent();
+                }
+            }
+            g.setLink( a );
+            // Determines whether dup. or spec.
+            Event event = null;
+            if ( ( a == g.getChildNode( 0 ).getLink() ) || ( a == g.getChildNode( 1 ).getLink() ) ) {
+                event = Event.createSingleDuplicationEvent();
+                ++_duplications_sum;
+            }
+            else {
+                event = Event.createSingleSpeciationEvent();
+            }
+            g.getNodeData().setEvent( event );
+        }
+    } // geneTreePostOrderTraversal( PhylogenyNode )
+
+    /**
+     * Calculates the mapping function for the external nodes of the gene tree:
+     * links (sets the field "link" of PhylogenyNode) each external
+     * PhylogenyNode of gene_tree to the external PhylogenyNode of species_tree
+     * which has the same species name.
+     * @throws SDIException 
+     */
+    final void linkNodesOfG() throws SDIException {
+        final Map<String, PhylogenyNode> speciestree_ext_nodes = new HashMap<String, PhylogenyNode>();
+        final TaxonomyComparisonBase tax_comp_base = determineTaxonomyComparisonBase();
+        // Put references to all external nodes of the species tree into a map.
+        // Stringyfied taxonomy is the key, node is the value.
+        for( final PhylogenyNodeIterator iter = _species_tree.iteratorExternalForward(); iter.hasNext(); ) {
+            final PhylogenyNode s = iter.next();
+            final String tax_str = SDIutil.taxonomyToString( s, tax_comp_base );
+            if ( speciestree_ext_nodes.containsKey( tax_str ) ) {
+                throw new IllegalArgumentException( "taxonomy [" + s.getNodeData().getTaxonomy()
+                        + "] is not unique in species phylogeny" );
+            }
+            speciestree_ext_nodes.put( tax_str, s );
+        }
+        // Retrieve the reference to the node with a matching stringyfied taxonomy.
+        for( final PhylogenyNodeIterator iter = _gene_tree.iteratorExternalForward(); iter.hasNext(); ) {
+            final PhylogenyNode g = iter.next();
+            final String tax_str = SDIutil.taxonomyToString( g, tax_comp_base );
+            final PhylogenyNode s = speciestree_ext_nodes.get( tax_str );
+            if ( s == null ) {
+                throw new IllegalArgumentException( "taxonomy [" + g.getNodeData().getTaxonomy()
+                        + "] not present in species tree" );
+            }
+            g.setLink( s );
+        }
+    }
+
+    /**
+     * Updates the mapping function M after the root of the gene tree has been
+     * moved by one branch. It calculates M for the root of the gene tree and
+     * one of its two children.
+     * <p>
+     * To be used ONLY by method "SDIunrooted.fastInfer(Phylogeny,Phylogeny)".
+     * <p>
+     * (Last modfied: 10/02/01)
+     * 
+     * @param prev_root_was_dup
+     *            true if the previous root was a duplication, false otherwise
+     * @param prev_root_c1
+     *            child 1 of the previous root
+     * @param prev_root_c2
+     *            child 2 of the previous root
+     * @return number of duplications which have been assigned in gene tree
+     */
+    int updateM( final boolean prev_root_was_dup, final PhylogenyNode prev_root_c1, final PhylogenyNode prev_root_c2 ) {
+        final PhylogenyNode root = getGeneTree().getRoot();
+        if ( ( root.getChildNode1() == prev_root_c1 ) || ( root.getChildNode2() == prev_root_c1 ) ) {
+            calculateMforNode( prev_root_c1 );
+        }
+        else {
+            calculateMforNode( prev_root_c2 );
+        }
+        Event event = null;
+        if ( prev_root_was_dup ) {
+            event = Event.createSingleDuplicationEvent();
+        }
+        else {
+            event = Event.createSingleSpeciationEvent();
+        }
+        root.getNodeData().setEvent( event );
+        calculateMforNode( root );
+        return getDuplicationsSum();
+    } // updateM( boolean, PhylogenyNode, PhylogenyNode )
+
+    // Helper method for updateM( boolean, PhylogenyNode, PhylogenyNode )
+    // Calculates M for PhylogenyNode n, given that M for the two children
+    // of n has been calculated.
+    // (Last modified: 10/02/01)
+    private void calculateMforNode( final PhylogenyNode n ) {
+        if ( !n.isExternal() ) {
+            final boolean was_duplication = n.isDuplication();
+            PhylogenyNode a = n.getChildNode1().getLink();
+            PhylogenyNode b = n.getChildNode2().getLink();
+            while ( a != b ) {
+                if ( a.getId() > b.getId() ) {
+                    a = a.getParent();
+                }
+                else {
+                    b = b.getParent();
+                }
+            }
+            n.setLink( a );
+            Event event = null;
+            if ( ( a == n.getChildNode1().getLink() ) || ( a == n.getChildNode2().getLink() ) ) {
+                event = Event.createSingleDuplicationEvent();
+                if ( !was_duplication ) {
+                    ++_duplications_sum;
+                }
+            }
+            else {
+                event = Event.createSingleSpeciationEvent();
+                if ( was_duplication ) {
+                    --_duplications_sum;
+                }
+            }
+            n.getNodeData().setEvent( event );
+        }
+    } // calculateMforNode( PhylogenyNode )
+
     // Helper method for "computeMappingCost()".
     private void computeMappingCostHelper( final PhylogenyNode g ) {
         if ( !g.isExternal() ) {
@@ -109,24 +335,6 @@ public abstract class SDI {
         }
     }
 
-    /**
-     * Computes the cost of mapping the gene tree gene_tree onto the species
-     * tree species_tree. Before this method can be called, the mapping has to
-     * be calculated with method "infer(boolean)".
-     * <p>
-     * Reference. Zhang, L. (1997) On a Mirkin-Muchnik-Smith Conjecture for
-     * Comparing Molecular Phylogenies. Journal of Computational Biology 4
-     * 177-187.
-     * 
-     * @return the mapping cost "L"
-     */
-    public int computeMappingCostL() {
-        _species_tree.levelOrderReID();
-        _mapping_cost = 0;
-        computeMappingCostHelper( _gene_tree.getRoot() );
-        return _mapping_cost;
-    }
-
     private TaxonomyComparisonBase determineTaxonomyComparisonBase() {
         TaxonomyComparisonBase base = null;
         boolean all_have_id = true;
@@ -184,67 +392,6 @@ public abstract class SDI {
     }
 
     /**
-     * Returns the number of duplications.
-     * 
-     * @return number of duplications
-     */
-    public int getDuplicationsSum() {
-        return _duplications_sum;
-    }
-
-    /**
-     * Returns the gene tree.
-     * 
-     * @return gene tree
-     */
-    public Phylogeny getGeneTree() {
-        return _gene_tree;
-    }
-
-    /**
-     * Returns the species tree.
-     * 
-     * @return species tree
-     */
-    public Phylogeny getSpeciesTree() {
-        return _species_tree;
-    }
-
-    /**
-     * Calculates the mapping function for the external nodes of the gene tree:
-     * links (sets the field "link" of PhylogenyNode) each external
-     * PhylogenyNode of gene_tree to the external PhylogenyNode of species_tree
-     * which has the same species name.
-     * @throws SDIException 
-     */
-    void linkNodesOfG() throws SDIException {
-        final Map<String, PhylogenyNode> speciestree_ext_nodes = new HashMap<String, PhylogenyNode>();
-        final TaxonomyComparisonBase tax_comp_base = determineTaxonomyComparisonBase();
-        // Put references to all external nodes of the species tree into a map.
-        // Stringyfied taxonomy is the key, node is the value.
-        for( final PhylogenyNodeIterator iter = _species_tree.iteratorExternalForward(); iter.hasNext(); ) {
-            final PhylogenyNode s = iter.next();
-            final String tax_str = taxonomyToString( s, tax_comp_base );
-            if ( speciestree_ext_nodes.containsKey( tax_str ) ) {
-                throw new IllegalArgumentException( "taxonomy [" + s.getNodeData().getTaxonomy()
-                        + "] is not unique in species phylogeny" );
-            }
-            speciestree_ext_nodes.put( tax_str, s );
-        }
-        // Retrieve the reference to the node with a matching stringyfied taxonomy.
-        for( final PhylogenyNodeIterator iter = _gene_tree.iteratorExternalForward(); iter.hasNext(); ) {
-            final PhylogenyNode g = iter.next();
-            final String tax_str = taxonomyToString( g, tax_comp_base );
-            final PhylogenyNode s = speciestree_ext_nodes.get( tax_str );
-            if ( s == null ) {
-                throw new IllegalArgumentException( "taxonomy [" + g.getNodeData().getTaxonomy()
-                        + "] not present in species tree" );
-            }
-            g.setLink( s );
-        }
-    }
-
-    /**
      * Calculates the mapping function for the external nodes of the gene tree:
      * links (sets the field "link" of PhylogenyNode) each external by taxonomy
      * identifier
@@ -252,7 +399,7 @@ public abstract class SDI {
      * which has the same species name.
      * Olivier CHABROL : olivier.chabrol@univ-provence.fr
      */
-    void linkNodesOfGByTaxonomyIdentifier() {
+    private final void linkNodesOfGByTaxonomyIdentifier() {
         final HashMap<String, PhylogenyNode> speciestree_ext_nodes = new HashMap<String, PhylogenyNode>();
         if ( _species_tree.getFirstExternalNode().isRoot() ) {
             speciestree_ext_nodes.put( _species_tree.getFirstExternalNode().getNodeData().getTaxonomy().getIdentifier()
@@ -276,56 +423,4 @@ public abstract class SDI {
             g.setLink( s );
         }
     }
-
-    @Override
-    public String toString() {
-        final StringBuffer sb = new StringBuffer();
-        sb.append( getClass() );
-        sb.append( ForesterUtil.LINE_SEPARATOR );
-        sb.append( "Duplications sum                   : " + getDuplicationsSum() );
-        sb.append( ForesterUtil.LINE_SEPARATOR );
-        sb.append( "mapping cost L                     : " + computeMappingCostL() );
-        return sb.toString();
-    }
-
-    static String taxonomyToString( final PhylogenyNode n, final TaxonomyComparisonBase base ) {
-        switch ( base ) {
-            case ID:
-                final Identifier id = n.getNodeData().getTaxonomy().getIdentifier();
-                if ( id == null ) {
-                    return null;
-                }
-                return id.getValuePlusProvider();
-            case CODE:
-                return n.getNodeData().getTaxonomy().getTaxonomyCode();
-            case SCIENTIFIC_NAME:
-                return n.getNodeData().getTaxonomy().getScientificName();
-            default:
-                throw new IllegalArgumentException( "unknown comparison base for taxonomies: " + base );
-        }
-    }
-
-    public enum TaxonomyComparisonBase {
-        ID {
-
-            @Override
-            public String toString() {
-                return "taxonomy id";
-            }
-        },
-        CODE {
-
-            @Override
-            public String toString() {
-                return "taxonomy code/mnemonic";
-            }
-        },
-        SCIENTIFIC_NAME {
-
-            @Override
-            public String toString() {
-                return "scientific name";
-            }
-        }
-    }
-}
+} // End of class SDIse.
index ce7fc00..7af2359 100644 (file)
@@ -222,7 +222,7 @@ public class SDIR {
                               final boolean return_trees,
                               int max_trees_to_return ) throws SDIException {
         init();
-        SDIse sdise = null;
+        SDI sdise = null;
         final ArrayList<Phylogeny> trees = new ArrayList<Phylogeny>();
         Phylogeny[] tree_array = null;
         List<PhylogenyBranch> branches = null;
@@ -282,7 +282,7 @@ public class SDIR {
         g.reRoot( g.getFirstExternalNode() );
         branches = SDIR.getBranchesInPreorder( g );
         if ( minimize_mapping_cost || minimize_sum_of_dup ) {
-            sdise = new SDIse( g, species_tree );
+            sdise = new SDI( g, species_tree );
             duplications = sdise.getDuplicationsSum();
         }
         final Set<PhylogenyBranch> used_root_placements = new HashSet<PhylogenyBranch>();
@@ -418,7 +418,7 @@ public class SDIR {
                     height = height__diff[ 0 ];
                     diff = height__diff[ 1 ];
                     if ( Math.abs( diff ) < SDIR.ZERO_DIFF ) {
-                        sdise = new SDIse( g, species_tree );
+                        sdise = new SDI( g, species_tree );
                         min_duplications = sdise.getDuplicationsSum();
                         min_height = height;
                         min_diff = Math.abs( diff );
diff --git a/forester/java/src/org/forester/sdi/SDIse.java b/forester/java/src/org/forester/sdi/SDIse.java
deleted file mode 100644 (file)
index bf06a41..0000000
+++ /dev/null
@@ -1,206 +0,0 @@
-// $Id:
-// FORESTER -- software libraries and applications
-// for evolutionary biology research and applications.
-//
-// Copyright (C) 2008-2009 Christian M. Zmasek
-// Copyright (C) 2008-2009 Burnham Institute for Medical Research
-// Copyright (C) 2000-2001 Washington University School of Medicine
-// and Howard Hughes Medical Institute
-// All rights reserved
-//
-// This library is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 2.1 of the License, or (at your option) any later version.
-//
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// Lesser General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License along with this library; if not, write to the Free Software
-// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
-//
-// Contact: phylosoft @ gmail . com
-// WWW: www.phylosoft.org/forester
-
-package org.forester.sdi;
-
-import org.forester.phylogeny.Phylogeny;
-import org.forester.phylogeny.PhylogenyMethods;
-import org.forester.phylogeny.PhylogenyNode;
-import org.forester.phylogeny.data.Event;
-
-/*
- * Implements our algorithm for speciation - duplication inference (SDI). <p>
- * Reference: </p> <ul> <li>Zmasek, C.M. and Eddy, S.R. (2001) "A simple
- * algorithm to infer gene duplication and speciation events on a gene tree".
- * Bioinformatics, in press. </ul> <p> The initialization is accomplished by:
- * </p> <ul> <li>method "linkExtNodesOfG()" of class SDI: setting the links for
- * the external nodes of the gene tree <li>"preorderReID(int)" from class
- * Phylogeny: numbering of nodes of the species tree in preorder <li>the
- * optional stripping of the species tree is accomplished by method
- * "stripTree(Phylogeny,Phylogeny)" of class Phylogeny </ul> <p> The recursion
- * part is accomplished by this class' method
- * "geneTreePostOrderTraversal(PhylogenyNode)". <p> Requires JDK 1.2 or greater.
- * 
- * @see SDI#linkNodesOfG()
- * 
- * @see Phylogeny#preorderReID(int)
- * 
- * @see
- * PhylogenyMethods#taxonomyBasedDeletionOfExternalNodes(Phylogeny,Phylogeny)
- * 
- * @see #geneTreePostOrderTraversal(PhylogenyNode)
- * 
- * @author Christian M. Zmasek
- * 
- * @version 1.102 -- last modified: 10/02/01
- */
-public class SDIse extends SDI {
-
-    /**
-     * Constructor which sets the gene tree and the species tree to be compared.
-     * species_tree is the species tree to which the gene tree gene_tree will be
-     * compared to - with method "infer(boolean)". Both Trees must be completely
-     * binary and rooted. The actual inference is accomplished with method
-     * "infer(boolean)". The mapping cost L can then be calculated with method
-     * "computeMappingCost()".
-     * <p>
-     * (Last modified: 01/11/01)
-     * 
-     * @see #infer(boolean)
-     * @see SDI#computeMappingCostL()
-     * @param gene_tree
-     *            reference to a rooted binary gene Phylogeny to which assign
-     *            duplication vs speciation, must have species names in the
-     *            species name fields for all external nodes
-     * @param species_tree
-     *            reference to a rooted binary species Phylogeny which might get
-     *            stripped in the process, must have species names in the
-     *            species name fields for all external nodes
-     * @throws SDIException 
-     */
-    public SDIse( final Phylogeny gene_tree, final Phylogeny species_tree ) throws SDIException {
-        super( gene_tree, species_tree );
-        _duplications_sum = 0;
-        PhylogenyMethods.preOrderReId( getSpeciesTree() );
-        linkNodesOfG();
-        geneTreePostOrderTraversal( getGeneTree().getRoot() );
-    }
-
-    // Helper method for updateM( boolean, PhylogenyNode, PhylogenyNode )
-    // Calculates M for PhylogenyNode n, given that M for the two children
-    // of n has been calculated.
-    // (Last modified: 10/02/01)
-    private void calculateMforNode( final PhylogenyNode n ) {
-        if ( !n.isExternal() ) {
-            final boolean was_duplication = n.isDuplication();
-            PhylogenyNode a = n.getChildNode1().getLink();
-            PhylogenyNode b = n.getChildNode2().getLink();
-            while ( a != b ) {
-                if ( a.getId() > b.getId() ) {
-                    a = a.getParent();
-                }
-                else {
-                    b = b.getParent();
-                }
-            }
-            n.setLink( a );
-            Event event = null;
-            if ( ( a == n.getChildNode1().getLink() ) || ( a == n.getChildNode2().getLink() ) ) {
-                event = Event.createSingleDuplicationEvent();
-                if ( !was_duplication ) {
-                    ++_duplications_sum;
-                }
-            }
-            else {
-                event = Event.createSingleSpeciationEvent();
-                if ( was_duplication ) {
-                    --_duplications_sum;
-                }
-            }
-            n.getNodeData().setEvent( event );
-        }
-    } // calculateMforNode( PhylogenyNode )
-
-    /**
-     * Traverses the subtree of PhylogenyNode g in postorder, calculating the
-     * mapping function M, and determines which nodes represent speciation
-     * events and which ones duplication events.
-     * <p>
-     * Preconditions: Mapping M for external nodes must have been calculated and
-     * the species tree must be labelled in preorder.
-     * <p>
-     * (Last modified: 01/11/01)
-     * 
-     * @param g
-     *            starting node of a gene tree - normally the root
-     */
-    void geneTreePostOrderTraversal( final PhylogenyNode g ) {
-        PhylogenyNode a, b;
-        if ( !g.isExternal() ) {
-            geneTreePostOrderTraversal( g.getChildNode( 0 ) );
-            geneTreePostOrderTraversal( g.getChildNode( 1 ) );
-            a = g.getChildNode( 0 ).getLink();
-            b = g.getChildNode( 1 ).getLink();
-            while ( a != b ) {
-                if ( a.getId() > b.getId() ) {
-                    a = a.getParent();
-                }
-                else {
-                    b = b.getParent();
-                }
-            }
-            g.setLink( a );
-            // Determines whether dup. or spec.
-            Event event = null;
-            if ( ( a == g.getChildNode( 0 ).getLink() ) || ( a == g.getChildNode( 1 ).getLink() ) ) {
-                event = Event.createSingleDuplicationEvent();
-                ++_duplications_sum;
-            }
-            else {
-                event = Event.createSingleSpeciationEvent();
-            }
-            g.getNodeData().setEvent( event );
-        }
-    } // geneTreePostOrderTraversal( PhylogenyNode )
-
-    /**
-     * Updates the mapping function M after the root of the gene tree has been
-     * moved by one branch. It calculates M for the root of the gene tree and
-     * one of its two children.
-     * <p>
-     * To be used ONLY by method "SDIunrooted.fastInfer(Phylogeny,Phylogeny)".
-     * <p>
-     * (Last modfied: 10/02/01)
-     * 
-     * @param prev_root_was_dup
-     *            true if the previous root was a duplication, false otherwise
-     * @param prev_root_c1
-     *            child 1 of the previous root
-     * @param prev_root_c2
-     *            child 2 of the previous root
-     * @return number of duplications which have been assigned in gene tree
-     */
-    int updateM( final boolean prev_root_was_dup, final PhylogenyNode prev_root_c1, final PhylogenyNode prev_root_c2 ) {
-        final PhylogenyNode root = getGeneTree().getRoot();
-        if ( ( root.getChildNode1() == prev_root_c1 ) || ( root.getChildNode2() == prev_root_c1 ) ) {
-            calculateMforNode( prev_root_c1 );
-        }
-        else {
-            calculateMforNode( prev_root_c2 );
-        }
-        Event event = null;
-        if ( prev_root_was_dup ) {
-            event = Event.createSingleDuplicationEvent();
-        }
-        else {
-            event = Event.createSingleSpeciationEvent();
-        }
-        root.getNodeData().setEvent( event );
-        calculateMforNode( root );
-        return getDuplicationsSum();
-    } // updateM( boolean, PhylogenyNode, PhylogenyNode )
-} // End of class SDIse.
diff --git a/forester/java/src/org/forester/sdi/SDIutil.java b/forester/java/src/org/forester/sdi/SDIutil.java
new file mode 100644 (file)
index 0000000..efe1830
--- /dev/null
@@ -0,0 +1,100 @@
+
+package org.forester.sdi;
+
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.data.Identifier;
+import org.forester.phylogeny.data.Taxonomy;
+import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+import org.forester.util.ForesterUtil;
+
+public class SDIutil {
+
+    static String taxonomyToString( final PhylogenyNode n, final TaxonomyComparisonBase base ) {
+        switch ( base ) {
+            case ID:
+                final Identifier id = n.getNodeData().getTaxonomy().getIdentifier();
+                if ( id == null ) {
+                    return null;
+                }
+                return id.getValuePlusProvider();
+            case CODE:
+                return n.getNodeData().getTaxonomy().getTaxonomyCode();
+            case SCIENTIFIC_NAME:
+                return n.getNodeData().getTaxonomy().getScientificName();
+            default:
+                throw new IllegalArgumentException( "unknown comparison base for taxonomies: " + base );
+        }
+    }
+
+    public enum ALGORITHM {
+        GSDIR, GSDI, SDI, SDIR
+    }
+
+    public enum TaxonomyComparisonBase {
+        ID {
+
+            @Override
+            public String toString() {
+                return "taxonomy id";
+            }
+        },
+        CODE {
+
+            @Override
+            public String toString() {
+                return "taxonomy code/mnemonic";
+            }
+        },
+        SCIENTIFIC_NAME {
+
+            @Override
+            public String toString() {
+                return "scientific name";
+            }
+        }
+    }
+
+    public final static TaxonomyComparisonBase determineTaxonomyComparisonBase( final Phylogeny gene_tree ) {
+        int with_id_count = 0;
+        int with_code_count = 0;
+        int with_sn_count = 0;
+        int max = 0;
+        for( final PhylogenyNodeIterator iter = gene_tree.iteratorExternalForward(); iter.hasNext(); ) {
+            final PhylogenyNode g = iter.next();
+            if ( g.getNodeData().isHasTaxonomy() ) {
+                final Taxonomy tax = g.getNodeData().getTaxonomy();
+                if ( ( tax.getIdentifier() != null ) && !ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) ) {
+                    if ( ++with_id_count > max ) {
+                        max = with_id_count;
+                    }
+                }
+                if ( !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) {
+                    if ( ++with_code_count > max ) {
+                        max = with_code_count;
+                    }
+                }
+                if ( !ForesterUtil.isEmpty( tax.getScientificName() ) ) {
+                    if ( ++with_sn_count > max ) {
+                        max = with_sn_count;
+                    }
+                }
+            }
+        }
+        if ( max == 0 ) {
+            throw new IllegalArgumentException( "gene tree has no taxonomic data" );
+        }
+        else if ( max == 1 ) {
+            throw new IllegalArgumentException( "gene tree has only one node with taxonomic data" );
+        }
+        else if ( max == with_id_count ) {
+            return TaxonomyComparisonBase.ID;
+        }
+        else if ( max == with_sn_count ) {
+            return TaxonomyComparisonBase.SCIENTIFIC_NAME;
+        }
+        else {
+            return TaxonomyComparisonBase.CODE;
+        }
+    }
+}
index a9dfa66..16f6655 100644 (file)
@@ -35,7 +35,7 @@ import org.forester.phylogeny.PhylogenyMethods;
 import org.forester.phylogeny.data.Event;
 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
 import org.forester.phylogeny.factories.PhylogenyFactory;
-import org.forester.sdi.SDI.TaxonomyComparisonBase;
+import org.forester.sdi.SDIutil.TaxonomyComparisonBase;
 import org.forester.util.ForesterUtil;
 
 public final class TestGSDI {
index 0ab8c91..f739330 100644 (file)
@@ -89,7 +89,7 @@ import org.forester.protein.Protein;
 import org.forester.rio.TestRIO;
 import org.forester.sdi.SDI;
 import org.forester.sdi.SDIR;
-import org.forester.sdi.SDIse;
+import org.forester.sdi.SDI;
 import org.forester.sdi.TestGSDI;
 import org.forester.sequence.BasicSequence;
 import org.forester.sequence.Sequence;
@@ -6255,7 +6255,7 @@ public final class Test {
             final Phylogeny gene1 = factory.create( "(A1[&&NHX:S=yeast],A2[&&NHX:S=yeast])", new NHXParser() )[ 0 ];
             gene1.setRooted( true );
             species1.setRooted( true );
-            final SDI sdi = new SDIse( gene1, species1 );
+            final SDI sdi = new SDI( gene1, species1 );
             if ( !gene1.getRoot().isDuplication() ) {
                 return false;
             }
@@ -6267,7 +6267,7 @@ public final class Test {
                              new NHXParser() )[ 0 ];
             species2.setRooted( true );
             gene2.setRooted( true );
-            final SDI sdi2 = new SDIse( gene2, species2 );
+            final SDI sdi2 = new SDI( gene2, species2 );
             if ( sdi2.getDuplicationsSum() != 0 ) {
                 return false;
             }
@@ -6297,7 +6297,7 @@ public final class Test {
                              new NHXParser() )[ 0 ];
             species3.setRooted( true );
             gene3.setRooted( true );
-            final SDI sdi3 = new SDIse( gene3, species3 );
+            final SDI sdi3 = new SDI( gene3, species3 );
             if ( sdi3.getDuplicationsSum() != 1 ) {
                 return false;
             }
@@ -6315,7 +6315,7 @@ public final class Test {
                              new NHXParser() )[ 0 ];
             species4.setRooted( true );
             gene4.setRooted( true );
-            final SDI sdi4 = new SDIse( gene4, species4 );
+            final SDI sdi4 = new SDI( gene4, species4 );
             if ( sdi4.getDuplicationsSum() != 1 ) {
                 return false;
             }
@@ -6342,7 +6342,7 @@ public final class Test {
                              new NHXParser() )[ 0 ];
             species5.setRooted( true );
             gene5.setRooted( true );
-            final SDI sdi5 = new SDIse( gene5, species5 );
+            final SDI sdi5 = new SDI( gene5, species5 );
             if ( sdi5.getDuplicationsSum() != 2 ) {
                 return false;
             }
@@ -6375,7 +6375,7 @@ public final class Test {
                              new NHXParser() )[ 0 ];
             species6.setRooted( true );
             gene6.setRooted( true );
-            final SDI sdi6 = new SDIse( gene6, species6 );
+            final SDI sdi6 = new SDI( gene6, species6 );
             if ( sdi6.getDuplicationsSum() != 3 ) {
                 return false;
             }
@@ -6422,7 +6422,7 @@ public final class Test {
             final Phylogeny gene7_1 = Test
                     .createPhylogeny( "((((((((a1[&&NHX:S=a1],a2[&&NHX:S=a2]),b1[&&NHX:S=b1]),x[&&NHX:S=x]),m1[&&NHX:S=m1]),i1[&&NHX:S=i1]),e1[&&NHX:S=e1]),y[&&NHX:S=y]),z[&&NHX:S=z])" );
             gene7_1.setRooted( true );
-            final SDI sdi7 = new SDIse( gene7_1, species7 );
+            final SDI sdi7 = new SDI( gene7_1, species7 );
             if ( sdi7.getDuplicationsSum() != 0 ) {
                 return false;
             }
@@ -6453,7 +6453,7 @@ public final class Test {
             final Phylogeny gene7_2 = Test
                     .createPhylogeny( "(((((((((a1[&&NHX:S=a1],a2[&&NHX:S=a2]),b1[&&NHX:S=b1]),x[&&NHX:S=x]),m1[&&NHX:S=m1]),i1[&&NHX:S=i1]),j2[&&NHX:S=j2]),e1[&&NHX:S=e1]),y[&&NHX:S=y]),z[&&NHX:S=z])" );
             gene7_2.setRooted( true );
-            final SDI sdi7_2 = new SDIse( gene7_2, species7 );
+            final SDI sdi7_2 = new SDI( gene7_2, species7 );
             if ( sdi7_2.getDuplicationsSum() != 1 ) {
                 return false;
             }