blast
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Wed, 10 Oct 2012 02:12:11 +0000 (02:12 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Wed, 10 Oct 2012 02:12:11 +0000 (02:12 +0000)
forester/java/src/org/forester/archaeopteryx/ArchaeopteryxE.java
forester/java/src/org/forester/archaeopteryx/Constants.java
forester/java/src/org/forester/archaeopteryx/NodeEditPanel.java
forester/java/src/org/forester/archaeopteryx/TreePanel.java
forester/java/src/org/forester/archaeopteryx/tools/Blast.java
forester/java/src/org/forester/phylogeny/data/Identifier.java
forester/java/src/org/forester/test/Test.java
forester/java/src/org/forester/util/SequenceIdParser.java

index b72d1b5..c388c5e 100644 (file)
@@ -53,75 +53,76 @@ import org.forester.util.ForesterUtil;
 // </applet>
 public class ArchaeopteryxE extends JApplet implements ActionListener {
 
-    private final static String  NAME             = "ArchaeopteryxE";
-    private static final long    serialVersionUID = -1220055577935759443L;
-    private Configuration        _configuration;
-    private MainPanelApplets     _main_panel;
-    private JMenuBar             _jmenubar;
-    private JMenu                _options_jmenu;
-    private JMenu                _font_size_menu;
-    private JMenuItem            _super_tiny_fonts_mi;
-    private JMenuItem            _tiny_fonts_mi;
-    private JMenuItem            _small_fonts_mi;
-    private JMenuItem            _medium_fonts_mi;
-    private JMenuItem            _large_fonts_mi;
-    private JMenu                _tools_menu;
-    private JMenuItem            _taxcolor_item;
-    private JMenuItem            _confcolor_item;
-    private JMenuItem            _midpoint_root_item;
-    private JMenu                _view_jmenu;
-    private JMenuItem            _view_as_XML_item;
-    private JMenuItem            _view_as_NH_item;
-    private JMenuItem            _view_as_NHX_item;
-    private JMenuItem            _view_as_nexus_item;
-    private JMenuItem            _display_basic_information_item;
-    private JMenu                _type_menu;
-    private JCheckBoxMenuItem    _rectangular_type_cbmi;
-    private JCheckBoxMenuItem    _triangular_type_cbmi;
-    private JCheckBoxMenuItem    _curved_type_cbmi;
-    private JCheckBoxMenuItem    _convex_type_cbmi;
-    private JCheckBoxMenuItem    _euro_type_cbmi;
-    private JCheckBoxMenuItem    _rounded_type_cbmi;
-    private JCheckBoxMenuItem    _unrooted_type_cbmi;
-    private JCheckBoxMenuItem    _circular_type_cbmi;
-    private JMenuItem            _help_item;
-    private JMenuItem            _about_item;
-    private JMenu                _help_jmenu;
-    private JMenuItem            _website_item;
-    private JMenuItem            _phyloxml_website_item;
-    private JMenuItem            _phyloxml_ref_item;
-    private JMenuItem            _aptx_ref_item;
-    private JMenuItem            _remove_branch_color_item;
-    private JMenuItem            _infer_common_sn_names_item;
-    private JCheckBoxMenuItem    _show_domain_labels;
-    private JCheckBoxMenuItem    _color_labels_same_as_parent_branch;
-    private JCheckBoxMenuItem    _abbreviate_scientific_names;
-    private JCheckBoxMenuItem    _screen_antialias_cbmi;
-    private JCheckBoxMenuItem    _background_gradient_cbmi;
-    private JRadioButtonMenuItem _non_lined_up_cladograms_rbmi;
-    private JRadioButtonMenuItem _uniform_cladograms_rbmi;
-    private JRadioButtonMenuItem _ext_node_dependent_cladogram_rbmi;
-    private Options              _options;
-    private JMenuItem            _choose_font_mi;
-    private JMenuItem            _switch_colors_mi;
-    JCheckBoxMenuItem            _label_direction_cbmi;
-    private JCheckBoxMenuItem    _show_scale_cbmi;
-    private JCheckBoxMenuItem    _search_case_senstive_cbmi;
-    private JCheckBoxMenuItem    _search_whole_words_only_cbmi;
-    private JCheckBoxMenuItem    _inverse_search_result_cbmi;
-    private JCheckBoxMenuItem    _show_overview_cbmi;
-    private JMenuItem            _choose_minimal_confidence_mi;
-    private JCheckBoxMenuItem    _show_branch_length_values_cbmi;
-    private JMenuItem            _collapse_species_specific_subtrees;
-    private JMenuItem            _overview_placment_mi;
-    private ButtonGroup          _radio_group_1;
-    private JCheckBoxMenuItem    _show_default_node_shapes_cbmi;
-    private JMenuItem            _cycle_node_shape_mi;
-    private JMenuItem            _cycle_node_fill_mi;
-    private JMenuItem            _choose_node_size_mi;
-    private JCheckBoxMenuItem    _taxonomy_colorize_node_shapes_cbmi;
-    private JCheckBoxMenuItem    _show_confidence_stddev_cbmi;
-    final LinkedList<TextFrame>  _textframes      = new LinkedList<TextFrame>(); ;
+    private final static String         NAME                  = "ArchaeopteryxE";
+    private static final long           serialVersionUID      = -1220055577935759443L;
+    private Configuration               _configuration;
+    private MainPanelApplets            _main_panel;
+    private JMenuBar                    _jmenubar;
+    private JMenu                       _options_jmenu;
+    private JMenu                       _font_size_menu;
+    private JMenuItem                   _super_tiny_fonts_mi;
+    private JMenuItem                   _tiny_fonts_mi;
+    private JMenuItem                   _small_fonts_mi;
+    private JMenuItem                   _medium_fonts_mi;
+    private JMenuItem                   _large_fonts_mi;
+    private JMenu                       _tools_menu;
+    private JMenuItem                   _taxcolor_item;
+    private JMenuItem                   _confcolor_item;
+    private JMenuItem                   _midpoint_root_item;
+    private JMenu                       _view_jmenu;
+    private JMenuItem                   _view_as_XML_item;
+    private JMenuItem                   _view_as_NH_item;
+    private JMenuItem                   _view_as_NHX_item;
+    private JMenuItem                   _view_as_nexus_item;
+    private JMenuItem                   _display_basic_information_item;
+    private JMenu                       _type_menu;
+    private JCheckBoxMenuItem           _rectangular_type_cbmi;
+    private JCheckBoxMenuItem           _triangular_type_cbmi;
+    private JCheckBoxMenuItem           _curved_type_cbmi;
+    private JCheckBoxMenuItem           _convex_type_cbmi;
+    private JCheckBoxMenuItem           _euro_type_cbmi;
+    private JCheckBoxMenuItem           _rounded_type_cbmi;
+    private JCheckBoxMenuItem           _unrooted_type_cbmi;
+    private JCheckBoxMenuItem           _circular_type_cbmi;
+    private JMenuItem                   _help_item;
+    private JMenuItem                   _about_item;
+    private JMenu                       _help_jmenu;
+    private JMenuItem                   _website_item;
+    private JMenuItem                   _phyloxml_website_item;
+    private JMenuItem                   _phyloxml_ref_item;
+    private JMenuItem                   _aptx_ref_item;
+    private JMenuItem                   _remove_branch_color_item;
+    private JMenuItem                   _infer_common_sn_names_item;
+    private JCheckBoxMenuItem           _show_domain_labels;
+    private JCheckBoxMenuItem           _color_labels_same_as_parent_branch;
+    private JCheckBoxMenuItem           _abbreviate_scientific_names;
+    private JCheckBoxMenuItem           _screen_antialias_cbmi;
+    private JCheckBoxMenuItem           _background_gradient_cbmi;
+    private JRadioButtonMenuItem        _non_lined_up_cladograms_rbmi;
+    private JRadioButtonMenuItem        _uniform_cladograms_rbmi;
+    private JRadioButtonMenuItem        _ext_node_dependent_cladogram_rbmi;
+    private Options                     _options;
+    private JMenuItem                   _choose_font_mi;
+    private JMenuItem                   _switch_colors_mi;
+    JCheckBoxMenuItem                   _label_direction_cbmi;
+    private JCheckBoxMenuItem           _show_scale_cbmi;
+    private JCheckBoxMenuItem           _search_case_senstive_cbmi;
+    private JCheckBoxMenuItem           _search_whole_words_only_cbmi;
+    private JCheckBoxMenuItem           _inverse_search_result_cbmi;
+    private JCheckBoxMenuItem           _show_overview_cbmi;
+    private JMenuItem                   _choose_minimal_confidence_mi;
+    private JCheckBoxMenuItem           _show_branch_length_values_cbmi;
+    private JMenuItem                   _collapse_species_specific_subtrees;
+    private JMenuItem                   _overview_placment_mi;
+    private ButtonGroup                 _radio_group_1;
+    private JCheckBoxMenuItem           _show_default_node_shapes_cbmi;
+    private JMenuItem                   _cycle_node_shape_mi;
+    private JMenuItem                   _cycle_node_fill_mi;
+    private JMenuItem                   _choose_node_size_mi;
+    private JCheckBoxMenuItem           _taxonomy_colorize_node_shapes_cbmi;
+    private JCheckBoxMenuItem           _show_confidence_stddev_cbmi;
+    private final LinkedList<TextFrame> _textframes           = new LinkedList<TextFrame>();
+    private String                      _ext_node_data_buffer = "";
 
     @Override
     public void actionPerformed( final ActionEvent e ) {
@@ -379,6 +380,27 @@ public class ArchaeopteryxE extends JApplet implements ActionListener {
         TextFrame.instantiate( sb.toString(), "Help", _textframes );
     }
 
+    void setCurrentExternalNodesDataBuffer( final String s ) {
+        if ( !ForesterUtil.isEmpty( s ) ) {
+            _ext_node_data_buffer = s.trim();
+        }
+        else {
+            _ext_node_data_buffer = "";
+        }
+    }
+
+    /**
+     * This method returns the current external node data which
+     * has been selected by the user by clicking the "Return ..."
+     * menu item. This method is expected to be called from Javascript or
+     * something like it.
+     * 
+     * @return current external node data as String
+     */
+    public String getCurrentExternalNodesDataBuffer() {
+        return _ext_node_data_buffer;
+    }
+
     /**
      * This method returns the current phylogeny as a string in the chosen format
      * 
index 1b47fb9..cb14cef 100644 (file)
@@ -36,14 +36,14 @@ import org.forester.util.ForesterConstants;
 
 public final class Constants {
 
-    final static boolean        __ALLOW_PHYLOGENETIC_INFERENCE                                = true;
+    final static boolean        __ALLOW_PHYLOGENETIC_INFERENCE                                = false;
     public final static boolean __RELEASE                                                     = false;                                                    // TODO remove me
     public final static boolean __SNAPSHOT_RELEASE                                            = false;                                                    // TODO remove me
     public final static boolean __SYNTH_LF                                                    = false;                                                    // TODO remove me
     public final static boolean ALLOW_DDBJ_BLAST                                              = false;
     public final static String  PRG_NAME                                                      = "Archaeopteryx";
-    final static String         VERSION                                                       = "0.974";
-    final static String         PRG_DATE                                                      = "121005";
+    final static String         VERSION                                                       = "0.975";
+    final static String         PRG_DATE                                                      = "121009";
     final static String         DEFAULT_CONFIGURATION_FILE_NAME                               = "_aptx_configuration_file";
     final static String[]       DEFAULT_FONT_CHOICES                                          = { "Verdana", "Tahoma",
             "Arial", "Helvetica", "Dialog", "Lucida Sans", "SansSerif", "Sans-serif", "Sans" };
index a7eb07d..2a6fdd3 100644 (file)
@@ -825,7 +825,7 @@ class NodeEditPanel extends JPanel {
                 break;
             case SEQ_MOL_SEQ:
                 AptxUtil.ensurePresenceOfSequence( getMyNode() );
-                getMyNode().getNodeData().getSequence().setMolecularSequence( value );
+                getMyNode().getNodeData().getSequence().setMolecularSequence( value.replaceAll( "[^a-zA-Z-]", "" ) );
                 break;
             case SEQ_NAME:
                 AptxUtil.ensurePresenceOfSequence( getMyNode() );
index 47ca58d..0a3cf22 100644 (file)
@@ -125,6 +125,7 @@ import org.forester.util.BasicDescriptiveStatistics;
 import org.forester.util.DescriptiveStatistics;
 import org.forester.util.ForesterConstants;
 import org.forester.util.ForesterUtil;
+import org.forester.util.SequenceIdParser;
 
 public final class TreePanel extends JPanel implements ActionListener, MouseWheelListener, Printable {
 
@@ -459,26 +460,41 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee
     final private void blast( final PhylogenyNode node ) {
         if ( !isCanBlast( node ) ) {
             JOptionPane.showMessageDialog( this,
-                                           "No sequence information present",
+                                           "Insufficient information present",
                                            "Cannot Blast",
-                                           JOptionPane.WARNING_MESSAGE );
+                                           JOptionPane.INFORMATION_MESSAGE );
             return;
         }
-        if ( node.getNodeData().isHasSequence() || !ForesterUtil.isEmpty( node.getName() ) ) {
+        else {
             final String query = Blast.obtainQueryForBlast( node );
             System.out.println( "query for BLAST is: " + query );
-            boolean nucleotide = false;
+            char type = '?';
             if ( !ForesterUtil.isEmpty( query ) ) {
                 if ( node.getNodeData().isHasSequence() ) {
                     if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getType() ) ) {
-                        if ( !node.getNodeData().getSequence().getType().toLowerCase()
+                        if ( node.getNodeData().getSequence().getType().toLowerCase()
                                 .equals( PhyloXmlUtil.SEQ_TYPE_PROTEIN ) ) {
-                            nucleotide = true;
+                            type = 'p';
+                        }
+                        else {
+                            type = 'n';
                         }
                     }
                     else if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getMolecularSequence() ) ) {
-                        nucleotide = !ForesterUtil.seqIsLikelyToBeAa( node.getNodeData().getSequence()
-                                .getMolecularSequence() );
+                        if ( ForesterUtil.seqIsLikelyToBeAa( node.getNodeData().getSequence().getMolecularSequence() ) ) {
+                            type = 'p';
+                        }
+                        else {
+                            type = 'n';
+                        }
+                    }
+                }
+                if ( type == '?' ) {
+                    if ( SequenceIdParser.isProtein( query ) ) {
+                        type = 'p';
+                    }
+                    else {
+                        type = 'n';
                     }
                 }
                 JApplet applet = null;
@@ -486,7 +502,7 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee
                     applet = obtainApplet();
                 }
                 try {
-                    Blast.openNcbiBlastWeb( query, nucleotide, applet, this );
+                    Blast.openNcbiBlastWeb( query, type == 'n', applet, this );
                 }
                 catch ( final Exception e ) {
                     e.printStackTrace();
@@ -1431,8 +1447,10 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee
                         + node.getNumberOfExternalNodes() + ") For Node " + node;
                 if ( getMainPanel().getMainFrame() == null ) {
                     // Must be "E" applet version.
-                    ( ( ArchaeopteryxE ) ( ( MainPanelApplets ) getMainPanel() ).getApplet() ).showTextFrame( sb
-                            .toString(), title );
+                    final ArchaeopteryxE ae = ( ArchaeopteryxE ) ( ( MainPanelApplets ) getMainPanel() ).getApplet();
+                    final String s = sb.toString().trim();
+                    ae.showTextFrame( s, title );
+                    ae.setCurrentExternalNodesDataBuffer( s );
                 }
                 else {
                     getMainPanel().getMainFrame().showTextFrame( sb.toString(), title );
@@ -1580,11 +1598,10 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee
     }
 
     final private boolean isCanBlast( final PhylogenyNode node ) {
-        return ( ( node.getNodeData().isHasSequence() && ( ( ( node.getNodeData().getSequence().getAccession() != null ) && !ForesterUtil
-                .isEmpty( node.getNodeData().getSequence().getAccession().getValue() ) )
-                || !ForesterUtil.isEmpty( node.getNodeData().getSequence().getName() ) || !ForesterUtil.isEmpty( node
-                .getNodeData().getSequence().getMolecularSequence() ) ) ) || ( ( !ForesterUtil.isEmpty( node.getName() ) ) && Blast
-                .isContainsQueryForBlast( node ) ) );
+        if ( !node.getNodeData().isHasSequence() && ForesterUtil.isEmpty( node.getName() ) ) {
+            return false;
+        }
+        return Blast.isContainsQueryForBlast( node );
     }
 
     final boolean isCanCollapse() {
index 4d94450..dc99878 100644 (file)
@@ -38,7 +38,6 @@ import javax.swing.JApplet;
 import org.forester.archaeopteryx.AptxUtil;
 import org.forester.archaeopteryx.TreePanel;
 import org.forester.phylogeny.PhylogenyNode;
-import org.forester.phylogeny.data.Accession;
 import org.forester.phylogeny.data.Identifier;
 import org.forester.util.ForesterUtil;
 import org.forester.util.SequenceIdParser;
@@ -80,33 +79,33 @@ public final class Blast {
             if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getMolecularSequence() ) ) {
                 query = node.getNodeData().getSequence().getMolecularSequence();
             }
-            else if ( ( node.getNodeData().getSequence().getAccession() != null )
+            if ( ForesterUtil.isEmpty( query ) && ( node.getNodeData().getSequence().getAccession() != null )
                     && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getAccession().getValue() ) ) {
-                if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getAccession().getSource() ) ) {
-                    query = node.getNodeData().getSequence().getAccession().getSource() + "%7C";
+                final Identifier id = SequenceIdParser.parse( node.getNodeData().getSequence().getAccession()
+                        .getValue() );
+                if ( id != null ) {
+                    query = id.getValue();
                 }
-                query += node.getNodeData().getSequence().getAccession().getValue();
             }
-            else if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getName() ) ) {
-                final Accession acc = AptxUtil.obtainSequenceAccessionFromName( node.getNodeData().getSequence()
-                        .getName() );
-                if ( acc != null ) {
-                    query = acc.getSource() + "%7C" + acc.getValue();
+            if ( ForesterUtil.isEmpty( query ) && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getName() ) ) {
+                final Identifier id = SequenceIdParser.parse( node.getNodeData().getSequence().getName() );
+                if ( id != null ) {
+                    query = id.getValue();
                 }
             }
-        }
-        else if ( !ForesterUtil.isEmpty( node.getName() ) ) {
-            final Accession acc = AptxUtil.obtainSequenceAccessionFromName( node.getName() );
-            if ( acc != null ) {
-                query = acc.getSource() + "%7C" + acc.getValue();
-            }
-            else {
-                final Identifier id = SequenceIdParser.parse( node.getName() );
+            if ( ForesterUtil.isEmpty( query ) && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getSymbol() ) ) {
+                final Identifier id = SequenceIdParser.parse( node.getNodeData().getSequence().getSymbol() );
                 if ( id != null ) {
                     query = id.getValue();
                 }
             }
         }
+        if ( ForesterUtil.isEmpty( query ) && !ForesterUtil.isEmpty( node.getName() ) ) {
+            final Identifier id = SequenceIdParser.parse( node.getName() );
+            if ( id != null ) {
+                query = id.getValue();
+            }
+        }
         return query;
     }
 
index 1314df4..97297f8 100644 (file)
@@ -36,6 +36,7 @@ public final class Identifier implements PhylogenyData {
 
     final public static String NCBI   = "ncbi";
     final public static String REFSEQ = "refseq";
+    final public static String SP     = "sp";
     final private String       _value;
     final private String       _provider;
     final private String       _value_provider;
index 5961561..7c554d4 100644 (file)
@@ -8672,8 +8672,30 @@ public final class Test {
                 return false;
             }
             // 
+            id = SequenceIdParser.parse( "P4A123" );
+            if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() )
+                    || !id.getValue().equals( "P4A123" ) || !id.getProvider().equals( "sp" ) ) {
+                if ( id != null ) {
+                    System.out.println( "value   =" + id.getValue() );
+                    System.out.println( "provider=" + id.getProvider() );
+                }
+                return false;
+            }
+            // 
+            id = SequenceIdParser.parse( "pllf[pok P4A123_osdjfosnqo035-9233332904i000490 vf tmv x45" );
+            if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() )
+                    || !id.getValue().equals( "P4A123" ) || !id.getProvider().equals( "sp" ) ) {
+                if ( id != null ) {
+                    System.out.println( "value   =" + id.getValue() );
+                    System.out.println( "provider=" + id.getProvider() );
+                }
+                return false;
+            }
+            // 
             id = SequenceIdParser.parse( "XP_12345" );
             if ( id != null ) {
+                System.out.println( "value   =" + id.getValue() );
+                System.out.println( "provider=" + id.getProvider() );
                 return false;
             }
             // lcl_91970_unknown_
index ff9b91d..3b6bc5d 100644 (file)
@@ -59,6 +59,9 @@ public final class SequenceIdParser {
     // underscore character ('_'). For example, a RefSeq protein accession is NP_015325. \r
     private final static Pattern REFSEQ_PATTERN                  = Pattern\r
                                                                          .compile( "(?:\\A|.*[^a-zA-Z0-9])([A-Z]{2}_\\d{6,})(?:[^a-zA-Z0-9]|\\Z)" );\r
+    // See: http://web.expasy.org/docs/userman.html#ID_line\r
+    private final static Pattern TREMBL_PATTERN                  = Pattern\r
+                                                                         .compile( "(?:\\A|.*[^a-zA-Z0-9])([A-Z][0-9][A-Z0-9]{3}[0-9])(?:[^a-zA-Z0-9]|\\Z)" );\r
 \r
     /**\r
      * Returns null if no match.\r
@@ -73,10 +76,22 @@ public final class SequenceIdParser {
         if ( !ForesterUtil.isEmpty( v ) ) {\r
             return new Identifier( v, Identifier.REFSEQ );\r
         }\r
+        v = parseTrEMBLAccessor( s );\r
+        if ( !ForesterUtil.isEmpty( v ) ) {\r
+            return new Identifier( v, Identifier.SP );\r
+        }\r
         return null;\r
     }\r
 \r
-    public static boolean isProtein( final String query ) {\r
+    public final static boolean isProtein( final String query ) {\r
+        final String r1 = parseRefSeqAccessor( query );\r
+        if ( !ForesterUtil.isEmpty( r1 ) && ( r1.charAt( 1 ) == 'P' ) ) {\r
+            return true;\r
+        }\r
+        final String r2 = parseTrEMBLAccessor( query );\r
+        if ( !ForesterUtil.isEmpty( r2 ) ) {\r
+            return true;\r
+        }\r
         return GENBANK_PROTEIN_AC_PATTERN.matcher( query ).lookingAt();\r
     }\r
 \r
@@ -118,6 +133,18 @@ public final class SequenceIdParser {
         return null;\r
     }\r
 \r
+    /**\r
+     * Returns null if no match.\r
+     * \r
+     */\r
+    private final static String parseTrEMBLAccessor( final String query ) {\r
+        final Matcher m = TREMBL_PATTERN.matcher( query );\r
+        if ( m.lookingAt() ) {\r
+            return m.group( 1 );\r
+        }\r
+        return null;\r
+    }\r
+\r
     private SequenceIdParser() {\r
         // Hiding the constructor.\r
     }\r