taxonomy extraction changed
[jalview.git] / forester / java / src / org / forester / archaeopteryx / MainFrame.java
index 36e691c..bc565ac 100644 (file)
@@ -34,7 +34,6 @@ import java.io.IOException;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Locale;
-import java.util.Map;
 import java.util.NoSuchElementException;
 import java.util.Set;
 
@@ -60,11 +59,16 @@ import org.forester.archaeopteryx.tools.ProcessPool;
 import org.forester.archaeopteryx.tools.ProcessRunning;
 import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION;
 import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyMethods;
+import org.forester.phylogeny.PhylogenyMethods.DESCENDANT_SORT_PRIORITY;
 import org.forester.phylogeny.PhylogenyNode;
 import org.forester.phylogeny.PhylogenyNode.NH_CONVERSION_SUPPORT_VALUE_STYLE;
 import org.forester.phylogeny.data.Annotation;
 import org.forester.phylogeny.data.NodeVisualization.NodeFill;
 import org.forester.phylogeny.data.NodeVisualization.NodeShape;
+import org.forester.sdi.GSDI;
+import org.forester.sdi.GSDIR;
+import org.forester.sdi.SDIException;
 import org.forester.util.ForesterConstants;
 import org.forester.util.ForesterUtil;
 
@@ -116,6 +120,7 @@ public abstract class MainFrame extends JFrame implements ActionListener {
     static final String         SHOW_CONF_STDDEV_LABEL                  = "Show Confidence Standard Deviations";
     static final String         USE_BRACKETS_FOR_CONF_IN_NH_LABEL       = "Use Brackets for Confidence Values";
     static final String         USE_INTERNAL_NAMES_FOR_CONF_IN_NH_LABEL = "Use Internal Node Names for Confidence Values";
+    static final String         SHOW_BASIC_TREE_INFORMATION_LABEL       = "Show Basic Tree Information";
     JMenuBar                    _jmenubar;
     JMenu                       _file_jmenu;
     JMenu                       _tools_menu;
@@ -124,6 +129,13 @@ public abstract class MainFrame extends JFrame implements ActionListener {
     JMenu                       _font_size_menu;
     JMenu                       _help_jmenu;
     JMenuItem[]                 _load_phylogeny_from_webservice_menu_items;
+    // Analysis menu
+    JMenu                       _analysis_menu;
+    JMenuItem                   _load_species_tree_item;
+    JMenuItem                   _gsdi_item;
+    JMenuItem                   _gsdir_item;
+    JMenuItem                   _lineage_inference;
+    JMenuItem                   _function_analysis;
     // file menu:
     JMenuItem                   _open_item;
     JMenuItem                   _open_url_item;
@@ -189,8 +201,9 @@ public abstract class MainFrame extends JFrame implements ActionListener {
     // _  parsing
     JCheckBoxMenuItem           _internal_number_are_confidence_for_nh_parsing_cbmi;
     JRadioButtonMenuItem        _extract_taxonomy_no_rbmi;
-    JRadioButtonMenuItem        _extract_taxonomy_yes_rbmi;
-    JRadioButtonMenuItem        _extract_taxonomy_pfam_rbmi;
+    JRadioButtonMenuItem        _extract_taxonomy_agressive_rbmi;
+    JRadioButtonMenuItem        _extract_taxonomy_pfam_strict_rbmi;
+    JRadioButtonMenuItem        _extract_taxonomy_pfam_relaxed_rbmi;
     JCheckBoxMenuItem           _replace_underscores_cbmi;
     JCheckBoxMenuItem           _use_brackets_for_conf_in_nh_export_cbmi;
     JCheckBoxMenuItem           _use_internal_names_for_conf_in_nh_export_cbmi;
@@ -229,6 +242,7 @@ public abstract class MainFrame extends JFrame implements ActionListener {
     Configuration               _configuration;
     JMenuItem                   _remove_branch_color_item;
     Options                     _options;
+    private Phylogeny           _species_tree;
     InferenceManager            _inference_manager;
     final ProcessPool           _process_pool;
     private String              _previous_node_annotation_ref;
@@ -257,6 +271,18 @@ public abstract class MainFrame extends JFrame implements ActionListener {
         else if ( o == _exit_item ) {
             close();
         }
+        else if ( o == _gsdi_item ) {
+            if ( isSubtreeDisplayed() ) {
+                return;
+            }
+            executeGSDI();
+        }
+        else if ( o == _gsdir_item ) {
+            if ( isSubtreeDisplayed() ) {
+                return;
+            }
+            executeGSDIR();
+        }
         else if ( o == _taxcolor_item ) {
             taxColor();
         }
@@ -440,7 +466,7 @@ public abstract class MainFrame extends JFrame implements ActionListener {
             about();
         }
         else if ( o == _help_item ) {
-            help( getConfiguration().getWebLinks() );
+            help();
         }
         else if ( o == _website_item ) {
             try {
@@ -627,7 +653,7 @@ public abstract class MainFrame extends JFrame implements ActionListener {
 
     void buildViewMenu() {
         _view_jmenu = createMenu( "View", getConfiguration() );
-        _view_jmenu.add( _display_basic_information_item = new JMenuItem( "Display Basic Information" ) );
+        _view_jmenu.add( _display_basic_information_item = new JMenuItem( SHOW_BASIC_TREE_INFORMATION_LABEL ) );
         _view_jmenu.addSeparator();
         _view_jmenu.add( _view_as_XML_item = new JMenuItem( "View as phyloXML" ) );
         _view_jmenu.add( _view_as_NH_item = new JMenuItem( "View as Newick" ) );
@@ -759,6 +785,148 @@ public abstract class MainFrame extends JFrame implements ActionListener {
         JOptionPane.showMessageDialog( this, "Exception" + e, "Error during File|SaveAs", JOptionPane.ERROR_MESSAGE );
     }
 
+    void executeGSDI() {
+        if ( !isOKforSDI( false, true ) ) {
+            return;
+        }
+        if ( !_mainpanel.getCurrentPhylogeny().isRooted() ) {
+            JOptionPane.showMessageDialog( this,
+                                           "Gene tree is not rooted.",
+                                           "Cannot execute GSDI",
+                                           JOptionPane.ERROR_MESSAGE );
+            return;
+        }
+        final Phylogeny gene_tree = _mainpanel.getCurrentPhylogeny().copy();
+        gene_tree.setAllNodesToNotCollapse();
+        gene_tree.recalculateNumberOfExternalDescendants( false );
+        GSDI gsdi = null;
+        final Phylogeny species_tree = getSpeciesTree().copy();
+        try {
+            gsdi = new GSDI( gene_tree, species_tree, false, true, true, true );
+        }
+        catch ( final SDIException e ) {
+            JOptionPane.showMessageDialog( this,
+                                           e.getLocalizedMessage(),
+                                           "Error during GSDI",
+                                           JOptionPane.ERROR_MESSAGE );
+            return;
+        }
+        catch ( final Exception e ) {
+            AptxUtil.unexpectedException( e );
+            return;
+        }
+        gene_tree.setRerootable( false );
+        gene_tree.clearHashIdToNodeMap();
+        gene_tree.recalculateNumberOfExternalDescendants( true );
+        _mainpanel.addPhylogenyInNewTab( gene_tree, getConfiguration(), "gene tree", null );
+        getMainPanel().getControlPanel().setShowEvents( true );
+        showWhole();
+        final int selected = _mainpanel.getTabbedPane().getSelectedIndex();
+        _mainpanel.addPhylogenyInNewTab( species_tree, getConfiguration(), "species tree", null );
+        showWhole();
+        _mainpanel.getTabbedPane().setSelectedIndex( selected );
+        showWhole();
+        _mainpanel.getCurrentTreePanel().setEdited( true );
+        final int poly = PhylogenyMethods.countNumberOfPolytomies( species_tree );
+        if ( gsdi.getStrippedExternalGeneTreeNodes().size() > 0 ) {
+            JOptionPane.showMessageDialog( this,
+                                           "Duplications: " + gsdi.getDuplicationsSum() + "\n"
+                                                   + "Potential duplications: "
+                                                   + gsdi.getSpeciationOrDuplicationEventsSum() + "\n"
+                                                   + "Speciations: " + gsdi.getSpeciationsSum() + "\n"
+                                                   + "Stripped gene tree nodes: "
+                                                   + gsdi.getStrippedExternalGeneTreeNodes().size() + "\n"
+                                                   + "Taxonomy linkage based on: " + gsdi.getTaxCompBase() + "\n"
+                                                   + "Number of polytomies in species tree used: " + poly + "\n",
+                                           "GSDI successfully completed",
+                                           JOptionPane.WARNING_MESSAGE );
+        }
+        else {
+            JOptionPane.showMessageDialog( this,
+                                           "Duplications: " + gsdi.getDuplicationsSum() + "\n"
+                                                   + "Potential duplications: "
+                                                   + gsdi.getSpeciationOrDuplicationEventsSum() + "\n"
+                                                   + "Speciations: " + gsdi.getSpeciationsSum() + "\n"
+                                                   + "Stripped gene tree nodes: "
+                                                   + gsdi.getStrippedExternalGeneTreeNodes().size() + "\n"
+                                                   + "Taxonomy linkage based on: " + gsdi.getTaxCompBase() + "\n"
+                                                   + "Number of polytomies in species tree used: " + poly + "\n",
+                                           "GSDI successfully completed",
+                                           JOptionPane.INFORMATION_MESSAGE );
+        }
+    }
+
+    void executeGSDIR() {
+        if ( !isOKforSDI( false, false ) ) {
+            return;
+        }
+        final int p = PhylogenyMethods.countNumberOfPolytomies( _mainpanel.getCurrentPhylogeny() );
+        if ( ( p > 0 )
+                && !( ( p == 1 ) && ( _mainpanel.getCurrentPhylogeny().getRoot().getNumberOfDescendants() == 3 ) ) ) {
+            JOptionPane.showMessageDialog( this,
+                                           "Gene tree is not completely binary",
+                                           "Cannot execute GSDI",
+                                           JOptionPane.ERROR_MESSAGE );
+            return;
+        }
+        final Phylogeny gene_tree = _mainpanel.getCurrentPhylogeny().copy();
+        gene_tree.setAllNodesToNotCollapse();
+        gene_tree.recalculateNumberOfExternalDescendants( false );
+        GSDIR gsdir = null;
+        final Phylogeny species_tree = getSpeciesTree().copy();
+        try {
+            gsdir = new GSDIR( gene_tree, species_tree, true, true, true );
+        }
+        catch ( final SDIException e ) {
+            JOptionPane.showMessageDialog( this,
+                                           e.getLocalizedMessage(),
+                                           "Error during GSDIR",
+                                           JOptionPane.ERROR_MESSAGE );
+            return;
+        }
+        catch ( final Exception e ) {
+            AptxUtil.unexpectedException( e );
+            return;
+        }
+        final Phylogeny result_gene_tree = gsdir.getMinDuplicationsSumGeneTree();
+        result_gene_tree.setRerootable( false );
+        result_gene_tree.clearHashIdToNodeMap();
+        result_gene_tree.recalculateNumberOfExternalDescendants( true );
+        PhylogenyMethods.orderAppearance( result_gene_tree.getRoot(), true, true, DESCENDANT_SORT_PRIORITY.NODE_NAME );
+        _mainpanel.addPhylogenyInNewTab( result_gene_tree, getConfiguration(), "gene tree", null );
+        getMainPanel().getControlPanel().setShowEvents( true );
+        showWhole();
+        final int selected = _mainpanel.getTabbedPane().getSelectedIndex();
+        _mainpanel.addPhylogenyInNewTab( species_tree, getConfiguration(), "species tree", null );
+        showWhole();
+        _mainpanel.getTabbedPane().setSelectedIndex( selected );
+        showWhole();
+        _mainpanel.getCurrentTreePanel().setEdited( true );
+        final int poly = PhylogenyMethods.countNumberOfPolytomies( species_tree );
+        if ( gsdir.getStrippedExternalGeneTreeNodes().size() > 0 ) {
+            JOptionPane.showMessageDialog( this,
+                                           "Minimal duplications: " + gsdir.getMinDuplicationsSum() + "\n"
+                                                   + "Speciations: " + gsdir.getSpeciationsSum() + "\n"
+                                                   + "Stripped gene tree nodes: "
+                                                   + gsdir.getStrippedExternalGeneTreeNodes().size() + "\n"
+                                                   + "Taxonomy linkage based on: " + gsdir.getTaxCompBase() + "\n"
+                                                   + "Number of polytomies in species tree used: " + poly + "\n",
+                                           "GSDIR successfully completed",
+                                           JOptionPane.WARNING_MESSAGE );
+        }
+        else {
+            JOptionPane.showMessageDialog( this,
+                                           "Minimal duplications: " + gsdir.getMinDuplicationsSum() + "\n"
+                                                   + "Speciations: " + gsdir.getSpeciationsSum() + "\n"
+                                                   + "Stripped gene tree nodes: "
+                                                   + gsdir.getStrippedExternalGeneTreeNodes().size() + "\n"
+                                                   + "Taxonomy linkage based on: " + gsdir.getTaxCompBase() + "\n"
+                                                   + "Number of polytomies in species tree used: " + poly + "\n",
+                                           "GSDIR successfully completed",
+                                           JOptionPane.INFORMATION_MESSAGE );
+        }
+    }
+
     boolean GAndSDoHaveMoreThanOneSpeciesInComman( final Phylogeny gene_tree ) {
         if ( ( gene_tree == null ) || gene_tree.isEmpty() ) {
             JOptionPane.showMessageDialog( this,
@@ -795,7 +963,11 @@ public abstract class MainFrame extends JFrame implements ActionListener {
         return _jmenubar;
     }
 
-    void help( final Map<String, WebLink> weblinks ) {
+    final Phylogeny getSpeciesTree() {
+        return _species_tree;
+    }
+
+    void help() {
         final StringBuilder sb = new StringBuilder();
         sb.append( "Display options\n" );
         sb.append( "-------------------\n" );
@@ -833,14 +1005,7 @@ public abstract class MainFrame extends JFrame implements ActionListener {
         sb.append( "Since the Java default memory allocation is quite small, it might by necessary (for trees\n" );
         sb.append( "with more than approximately 5000 external nodes) to increase the memory which Java can use, with\n" );
         sb.append( "the '-Xmx' Java command line option. For example:\n" );
-        sb.append( "java -Xms32m -Xmx256m -cp path\\to\\forester.jar org.forester.archaeopteryx.Archaeopteryx\n\n" );
-        if ( ( weblinks != null ) && ( weblinks.size() > 0 ) ) {
-            sb.append( "Active web links\n" );
-            sb.append( "--------------------\n" );
-            for( final String key : weblinks.keySet() ) {
-                sb.append( " " + weblinks.get( key ).toString() + "\n" );
-            }
-        }
+        sb.append( "java -Xmx1024m -cp path\\to\\forester.jar org.forester.archaeopteryx.Archaeopteryx\n\n" );
         // + "General remarks\n"
         // + "---------------\n"
         // +
@@ -881,7 +1046,6 @@ public abstract class MainFrame extends JFrame implements ActionListener {
         // + "    incorrect and need to be inferred again\n"
         // +
         // "    with: \"SDI\"|\"SDI (Speciation Duplication Inference)\n\n"
-        sb.append( "\n" );
         sb.append( "phyloXML\n" );
         sb.append( "-------------------\n" );
         sb.append( "Reference: " + Constants.PHYLOXML_REFERENCE + "\n" );
@@ -923,6 +1087,36 @@ public abstract class MainFrame extends JFrame implements ActionListener {
         }
     }
 
+    boolean isOKforSDI( final boolean species_tree_has_to_binary, final boolean gene_tree_has_to_binary ) {
+        if ( ( _mainpanel.getCurrentPhylogeny() == null ) || _mainpanel.getCurrentPhylogeny().isEmpty() ) {
+            return false;
+        }
+        else if ( ( getSpeciesTree() == null ) || getSpeciesTree().isEmpty() ) {
+            JOptionPane.showMessageDialog( this,
+                                           "No species tree loaded",
+                                           "Cannot execute GSDI",
+                                           JOptionPane.ERROR_MESSAGE );
+            return false;
+        }
+        else if ( species_tree_has_to_binary && !getSpeciesTree().isCompletelyBinary() ) {
+            JOptionPane.showMessageDialog( this,
+                                           "Species tree is not completely binary",
+                                           "Cannot execute GSDI",
+                                           JOptionPane.ERROR_MESSAGE );
+            return false;
+        }
+        else if ( gene_tree_has_to_binary && !_mainpanel.getCurrentPhylogeny().isCompletelyBinary() ) {
+            JOptionPane.showMessageDialog( this,
+                                           "Gene tree is not completely binary",
+                                           "Cannot execute GSDI",
+                                           JOptionPane.ERROR_MESSAGE );
+            return false;
+        }
+        else {
+            return true;
+        }
+    }
+
     boolean isSubtreeDisplayed() {
         if ( getCurrentTreePanel() != null ) {
             if ( getCurrentTreePanel().isCurrentTreeIsSubtree() ) {
@@ -1003,6 +1197,10 @@ public abstract class MainFrame extends JFrame implements ActionListener {
         }
     }
 
+    final void setSpeciesTree( final Phylogeny species_tree ) {
+        _species_tree = species_tree;
+    }
+
     void setTypeMenuToAllUnselected() {
         _convex_type_cbmi.setSelected( false );
         _curved_type_cbmi.setSelected( false );
@@ -1120,14 +1318,17 @@ public abstract class MainFrame extends JFrame implements ActionListener {
                 && _print_black_and_white_cbmi.isSelected() );
         options.setInternalNumberAreConfidenceForNhParsing( ( _internal_number_are_confidence_for_nh_parsing_cbmi != null )
                 && _internal_number_are_confidence_for_nh_parsing_cbmi.isSelected() );
-        if ( ( _extract_taxonomy_yes_rbmi != null ) && _extract_taxonomy_yes_rbmi.isSelected() ) {
-            options.setTaxonomyExtractio( TAXONOMY_EXTRACTION.YES );
+        if ( ( _extract_taxonomy_pfam_strict_rbmi != null ) && _extract_taxonomy_pfam_strict_rbmi.isSelected() ) {
+            options.setTaxonomyExtraction( TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT );
+        }
+        else if ( ( _extract_taxonomy_pfam_relaxed_rbmi != null ) && _extract_taxonomy_pfam_relaxed_rbmi.isSelected() ) {
+            options.setTaxonomyExtraction( TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
         }
-        else if ( ( _extract_taxonomy_pfam_rbmi != null ) && _extract_taxonomy_pfam_rbmi.isSelected() ) {
-            options.setTaxonomyExtractio( TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY );
+        else if ( ( _extract_taxonomy_agressive_rbmi != null ) && _extract_taxonomy_agressive_rbmi.isSelected() ) {
+            options.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGGRESSIVE );
         }
         else if ( ( _extract_taxonomy_no_rbmi != null ) && _extract_taxonomy_no_rbmi.isSelected() ) {
-            options.setTaxonomyExtractio( TAXONOMY_EXTRACTION.NO );
+            options.setTaxonomyExtraction( TAXONOMY_EXTRACTION.NO );
         }
         options.setReplaceUnderscoresInNhParsing( ( _replace_underscores_cbmi != null )
                 && _replace_underscores_cbmi.isSelected() );
@@ -1211,7 +1412,7 @@ public abstract class MainFrame extends JFrame implements ActionListener {
 
     private void annotateSequences() {
         if ( getCurrentTreePanel() != null ) {
-            final Set<Integer> nodes = getCurrentTreePanel().getFoundNodes();
+            final Set<Long> nodes = getCurrentTreePanel().getFoundNodes();
             if ( ( nodes == null ) || nodes.isEmpty() ) {
                 JOptionPane
                         .showMessageDialog( this,
@@ -1260,7 +1461,7 @@ public abstract class MainFrame extends JFrame implements ActionListener {
                         desc = desc.replaceAll( "\\s+", " " );
                     }
                     if ( !ForesterUtil.isEmpty( ref ) || !ForesterUtil.isEmpty( desc ) ) {
-                        for( final Integer id : nodes ) {
+                        for( final Long id : nodes ) {
                             final PhylogenyNode n = phy.getNode( id );
                             ForesterUtil.ensurePresenceOfSequence( n );
                             final Annotation ann = ForesterUtil.isEmpty( ref ) ? new Annotation()
@@ -1360,7 +1561,7 @@ public abstract class MainFrame extends JFrame implements ActionListener {
      */
     static void about() {
         final StringBuffer about = new StringBuffer( "Archaeopteryx\nVersion " + Constants.VERSION + "\n" );
-        about.append( "Copyright (C) 2007-2012 Christian M. Zmasek\n" );
+        about.append( "Copyright (C) 2013 Christian M. Zmasek\n" );
         about.append( "All Rights Reserved\n" );
         about.append( "License: GNU Lesser General Public License (LGPL)\n" );
         about.append( "Last modified: " + Constants.PRG_DATE + "\n" );