From 9a612b9eac1a3bc8dfaf2798ee732b52ff6e34e7 Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Wed, 10 Oct 2012 02:12:11 +0000 Subject: [PATCH] blast --- .../org/forester/archaeopteryx/ArchaeopteryxE.java | 160 +++++++++++--------- .../src/org/forester/archaeopteryx/Constants.java | 6 +- .../org/forester/archaeopteryx/NodeEditPanel.java | 2 +- .../src/org/forester/archaeopteryx/TreePanel.java | 49 ++++-- .../org/forester/archaeopteryx/tools/Blast.java | 35 +++-- .../org/forester/phylogeny/data/Identifier.java | 1 + forester/java/src/org/forester/test/Test.java | 22 +++ .../src/org/forester/util/SequenceIdParser.java | 29 +++- 8 files changed, 196 insertions(+), 108 deletions(-) diff --git a/forester/java/src/org/forester/archaeopteryx/ArchaeopteryxE.java b/forester/java/src/org/forester/archaeopteryx/ArchaeopteryxE.java index b72d1b5..c388c5e 100644 --- a/forester/java/src/org/forester/archaeopteryx/ArchaeopteryxE.java +++ b/forester/java/src/org/forester/archaeopteryx/ArchaeopteryxE.java @@ -53,75 +53,76 @@ import org.forester.util.ForesterUtil; // public class ArchaeopteryxE extends JApplet implements ActionListener { - private final static String NAME = "ArchaeopteryxE"; - private static final long serialVersionUID = -1220055577935759443L; - private Configuration _configuration; - private MainPanelApplets _main_panel; - private JMenuBar _jmenubar; - private JMenu _options_jmenu; - private JMenu _font_size_menu; - private JMenuItem _super_tiny_fonts_mi; - private JMenuItem _tiny_fonts_mi; - private JMenuItem _small_fonts_mi; - private JMenuItem _medium_fonts_mi; - private JMenuItem _large_fonts_mi; - private JMenu _tools_menu; - private JMenuItem _taxcolor_item; - private JMenuItem _confcolor_item; - private JMenuItem _midpoint_root_item; - private JMenu _view_jmenu; - private JMenuItem _view_as_XML_item; - private JMenuItem _view_as_NH_item; - private JMenuItem _view_as_NHX_item; - private JMenuItem _view_as_nexus_item; - private JMenuItem _display_basic_information_item; - private JMenu _type_menu; - private JCheckBoxMenuItem _rectangular_type_cbmi; - private JCheckBoxMenuItem _triangular_type_cbmi; - private JCheckBoxMenuItem _curved_type_cbmi; - private JCheckBoxMenuItem _convex_type_cbmi; - private JCheckBoxMenuItem _euro_type_cbmi; - private JCheckBoxMenuItem _rounded_type_cbmi; - private JCheckBoxMenuItem _unrooted_type_cbmi; - private JCheckBoxMenuItem _circular_type_cbmi; - private JMenuItem _help_item; - private JMenuItem _about_item; - private JMenu _help_jmenu; - private JMenuItem _website_item; - private JMenuItem _phyloxml_website_item; - private JMenuItem _phyloxml_ref_item; - private JMenuItem _aptx_ref_item; - private JMenuItem _remove_branch_color_item; - private JMenuItem _infer_common_sn_names_item; - private JCheckBoxMenuItem _show_domain_labels; - private JCheckBoxMenuItem _color_labels_same_as_parent_branch; - private JCheckBoxMenuItem _abbreviate_scientific_names; - private JCheckBoxMenuItem _screen_antialias_cbmi; - private JCheckBoxMenuItem _background_gradient_cbmi; - private JRadioButtonMenuItem _non_lined_up_cladograms_rbmi; - private JRadioButtonMenuItem _uniform_cladograms_rbmi; - private JRadioButtonMenuItem _ext_node_dependent_cladogram_rbmi; - private Options _options; - private JMenuItem _choose_font_mi; - private JMenuItem _switch_colors_mi; - JCheckBoxMenuItem _label_direction_cbmi; - private JCheckBoxMenuItem _show_scale_cbmi; - private JCheckBoxMenuItem _search_case_senstive_cbmi; - private JCheckBoxMenuItem _search_whole_words_only_cbmi; - private JCheckBoxMenuItem _inverse_search_result_cbmi; - private JCheckBoxMenuItem _show_overview_cbmi; - private JMenuItem _choose_minimal_confidence_mi; - private JCheckBoxMenuItem _show_branch_length_values_cbmi; - private JMenuItem _collapse_species_specific_subtrees; - private JMenuItem _overview_placment_mi; - private ButtonGroup _radio_group_1; - private JCheckBoxMenuItem _show_default_node_shapes_cbmi; - private JMenuItem _cycle_node_shape_mi; - private JMenuItem _cycle_node_fill_mi; - private JMenuItem _choose_node_size_mi; - private JCheckBoxMenuItem _taxonomy_colorize_node_shapes_cbmi; - private JCheckBoxMenuItem _show_confidence_stddev_cbmi; - final LinkedList _textframes = new LinkedList(); ; + private final static String NAME = "ArchaeopteryxE"; + private static final long serialVersionUID = -1220055577935759443L; + private Configuration _configuration; + private MainPanelApplets _main_panel; + private JMenuBar _jmenubar; + private JMenu _options_jmenu; + private JMenu _font_size_menu; + private JMenuItem _super_tiny_fonts_mi; + private JMenuItem _tiny_fonts_mi; + private JMenuItem _small_fonts_mi; + private JMenuItem _medium_fonts_mi; + private JMenuItem _large_fonts_mi; + private JMenu _tools_menu; + private JMenuItem _taxcolor_item; + private JMenuItem _confcolor_item; + private JMenuItem _midpoint_root_item; + private JMenu _view_jmenu; + private JMenuItem _view_as_XML_item; + private JMenuItem _view_as_NH_item; + private JMenuItem _view_as_NHX_item; + private JMenuItem _view_as_nexus_item; + private JMenuItem _display_basic_information_item; + private JMenu _type_menu; + private JCheckBoxMenuItem _rectangular_type_cbmi; + private JCheckBoxMenuItem _triangular_type_cbmi; + private JCheckBoxMenuItem _curved_type_cbmi; + private JCheckBoxMenuItem _convex_type_cbmi; + private JCheckBoxMenuItem _euro_type_cbmi; + private JCheckBoxMenuItem _rounded_type_cbmi; + private JCheckBoxMenuItem _unrooted_type_cbmi; + private JCheckBoxMenuItem _circular_type_cbmi; + private JMenuItem _help_item; + private JMenuItem _about_item; + private JMenu _help_jmenu; + private JMenuItem _website_item; + private JMenuItem _phyloxml_website_item; + private JMenuItem _phyloxml_ref_item; + private JMenuItem _aptx_ref_item; + private JMenuItem _remove_branch_color_item; + private JMenuItem _infer_common_sn_names_item; + private JCheckBoxMenuItem _show_domain_labels; + private JCheckBoxMenuItem _color_labels_same_as_parent_branch; + private JCheckBoxMenuItem _abbreviate_scientific_names; + private JCheckBoxMenuItem _screen_antialias_cbmi; + private JCheckBoxMenuItem _background_gradient_cbmi; + private JRadioButtonMenuItem _non_lined_up_cladograms_rbmi; + private JRadioButtonMenuItem _uniform_cladograms_rbmi; + private JRadioButtonMenuItem _ext_node_dependent_cladogram_rbmi; + private Options _options; + private JMenuItem _choose_font_mi; + private JMenuItem _switch_colors_mi; + JCheckBoxMenuItem _label_direction_cbmi; + private JCheckBoxMenuItem _show_scale_cbmi; + private JCheckBoxMenuItem _search_case_senstive_cbmi; + private JCheckBoxMenuItem _search_whole_words_only_cbmi; + private JCheckBoxMenuItem _inverse_search_result_cbmi; + private JCheckBoxMenuItem _show_overview_cbmi; + private JMenuItem _choose_minimal_confidence_mi; + private JCheckBoxMenuItem _show_branch_length_values_cbmi; + private JMenuItem _collapse_species_specific_subtrees; + private JMenuItem _overview_placment_mi; + private ButtonGroup _radio_group_1; + private JCheckBoxMenuItem _show_default_node_shapes_cbmi; + private JMenuItem _cycle_node_shape_mi; + private JMenuItem _cycle_node_fill_mi; + private JMenuItem _choose_node_size_mi; + private JCheckBoxMenuItem _taxonomy_colorize_node_shapes_cbmi; + private JCheckBoxMenuItem _show_confidence_stddev_cbmi; + private final LinkedList _textframes = new LinkedList(); + private String _ext_node_data_buffer = ""; @Override public void actionPerformed( final ActionEvent e ) { @@ -379,6 +380,27 @@ public class ArchaeopteryxE extends JApplet implements ActionListener { TextFrame.instantiate( sb.toString(), "Help", _textframes ); } + void setCurrentExternalNodesDataBuffer( final String s ) { + if ( !ForesterUtil.isEmpty( s ) ) { + _ext_node_data_buffer = s.trim(); + } + else { + _ext_node_data_buffer = ""; + } + } + + /** + * This method returns the current external node data which + * has been selected by the user by clicking the "Return ..." + * menu item. This method is expected to be called from Javascript or + * something like it. + * + * @return current external node data as String + */ + public String getCurrentExternalNodesDataBuffer() { + return _ext_node_data_buffer; + } + /** * This method returns the current phylogeny as a string in the chosen format * diff --git a/forester/java/src/org/forester/archaeopteryx/Constants.java b/forester/java/src/org/forester/archaeopteryx/Constants.java index 1b47fb9..cb14cef 100644 --- a/forester/java/src/org/forester/archaeopteryx/Constants.java +++ b/forester/java/src/org/forester/archaeopteryx/Constants.java @@ -36,14 +36,14 @@ import org.forester.util.ForesterConstants; public final class Constants { - final static boolean __ALLOW_PHYLOGENETIC_INFERENCE = true; + final static boolean __ALLOW_PHYLOGENETIC_INFERENCE = false; public final static boolean __RELEASE = false; // TODO remove me public final static boolean __SNAPSHOT_RELEASE = false; // TODO remove me public final static boolean __SYNTH_LF = false; // TODO remove me public final static boolean ALLOW_DDBJ_BLAST = false; public final static String PRG_NAME = "Archaeopteryx"; - final static String VERSION = "0.974"; - final static String PRG_DATE = "121005"; + final static String VERSION = "0.975"; + final static String PRG_DATE = "121009"; final static String DEFAULT_CONFIGURATION_FILE_NAME = "_aptx_configuration_file"; final static String[] DEFAULT_FONT_CHOICES = { "Verdana", "Tahoma", "Arial", "Helvetica", "Dialog", "Lucida Sans", "SansSerif", "Sans-serif", "Sans" }; diff --git a/forester/java/src/org/forester/archaeopteryx/NodeEditPanel.java b/forester/java/src/org/forester/archaeopteryx/NodeEditPanel.java index a7eb07d..2a6fdd39 100644 --- a/forester/java/src/org/forester/archaeopteryx/NodeEditPanel.java +++ b/forester/java/src/org/forester/archaeopteryx/NodeEditPanel.java @@ -825,7 +825,7 @@ class NodeEditPanel extends JPanel { break; case SEQ_MOL_SEQ: AptxUtil.ensurePresenceOfSequence( getMyNode() ); - getMyNode().getNodeData().getSequence().setMolecularSequence( value ); + getMyNode().getNodeData().getSequence().setMolecularSequence( value.replaceAll( "[^a-zA-Z-]", "" ) ); break; case SEQ_NAME: AptxUtil.ensurePresenceOfSequence( getMyNode() ); diff --git a/forester/java/src/org/forester/archaeopteryx/TreePanel.java b/forester/java/src/org/forester/archaeopteryx/TreePanel.java index 47ca58d..0a3cf22 100644 --- a/forester/java/src/org/forester/archaeopteryx/TreePanel.java +++ b/forester/java/src/org/forester/archaeopteryx/TreePanel.java @@ -125,6 +125,7 @@ import org.forester.util.BasicDescriptiveStatistics; import org.forester.util.DescriptiveStatistics; import org.forester.util.ForesterConstants; import org.forester.util.ForesterUtil; +import org.forester.util.SequenceIdParser; public final class TreePanel extends JPanel implements ActionListener, MouseWheelListener, Printable { @@ -459,26 +460,41 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee final private void blast( final PhylogenyNode node ) { if ( !isCanBlast( node ) ) { JOptionPane.showMessageDialog( this, - "No sequence information present", + "Insufficient information present", "Cannot Blast", - JOptionPane.WARNING_MESSAGE ); + JOptionPane.INFORMATION_MESSAGE ); return; } - if ( node.getNodeData().isHasSequence() || !ForesterUtil.isEmpty( node.getName() ) ) { + else { final String query = Blast.obtainQueryForBlast( node ); System.out.println( "query for BLAST is: " + query ); - boolean nucleotide = false; + char type = '?'; if ( !ForesterUtil.isEmpty( query ) ) { if ( node.getNodeData().isHasSequence() ) { if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getType() ) ) { - if ( !node.getNodeData().getSequence().getType().toLowerCase() + if ( node.getNodeData().getSequence().getType().toLowerCase() .equals( PhyloXmlUtil.SEQ_TYPE_PROTEIN ) ) { - nucleotide = true; + type = 'p'; + } + else { + type = 'n'; } } else if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getMolecularSequence() ) ) { - nucleotide = !ForesterUtil.seqIsLikelyToBeAa( node.getNodeData().getSequence() - .getMolecularSequence() ); + if ( ForesterUtil.seqIsLikelyToBeAa( node.getNodeData().getSequence().getMolecularSequence() ) ) { + type = 'p'; + } + else { + type = 'n'; + } + } + } + if ( type == '?' ) { + if ( SequenceIdParser.isProtein( query ) ) { + type = 'p'; + } + else { + type = 'n'; } } JApplet applet = null; @@ -486,7 +502,7 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee applet = obtainApplet(); } try { - Blast.openNcbiBlastWeb( query, nucleotide, applet, this ); + Blast.openNcbiBlastWeb( query, type == 'n', applet, this ); } catch ( final Exception e ) { e.printStackTrace(); @@ -1431,8 +1447,10 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee + node.getNumberOfExternalNodes() + ") For Node " + node; if ( getMainPanel().getMainFrame() == null ) { // Must be "E" applet version. - ( ( ArchaeopteryxE ) ( ( MainPanelApplets ) getMainPanel() ).getApplet() ).showTextFrame( sb - .toString(), title ); + final ArchaeopteryxE ae = ( ArchaeopteryxE ) ( ( MainPanelApplets ) getMainPanel() ).getApplet(); + final String s = sb.toString().trim(); + ae.showTextFrame( s, title ); + ae.setCurrentExternalNodesDataBuffer( s ); } else { getMainPanel().getMainFrame().showTextFrame( sb.toString(), title ); @@ -1580,11 +1598,10 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee } final private boolean isCanBlast( final PhylogenyNode node ) { - return ( ( node.getNodeData().isHasSequence() && ( ( ( node.getNodeData().getSequence().getAccession() != null ) && !ForesterUtil - .isEmpty( node.getNodeData().getSequence().getAccession().getValue() ) ) - || !ForesterUtil.isEmpty( node.getNodeData().getSequence().getName() ) || !ForesterUtil.isEmpty( node - .getNodeData().getSequence().getMolecularSequence() ) ) ) || ( ( !ForesterUtil.isEmpty( node.getName() ) ) && Blast - .isContainsQueryForBlast( node ) ) ); + if ( !node.getNodeData().isHasSequence() && ForesterUtil.isEmpty( node.getName() ) ) { + return false; + } + return Blast.isContainsQueryForBlast( node ); } final boolean isCanCollapse() { diff --git a/forester/java/src/org/forester/archaeopteryx/tools/Blast.java b/forester/java/src/org/forester/archaeopteryx/tools/Blast.java index 4d94450..dc99878 100644 --- a/forester/java/src/org/forester/archaeopteryx/tools/Blast.java +++ b/forester/java/src/org/forester/archaeopteryx/tools/Blast.java @@ -38,7 +38,6 @@ import javax.swing.JApplet; import org.forester.archaeopteryx.AptxUtil; import org.forester.archaeopteryx.TreePanel; import org.forester.phylogeny.PhylogenyNode; -import org.forester.phylogeny.data.Accession; import org.forester.phylogeny.data.Identifier; import org.forester.util.ForesterUtil; import org.forester.util.SequenceIdParser; @@ -80,33 +79,33 @@ public final class Blast { if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getMolecularSequence() ) ) { query = node.getNodeData().getSequence().getMolecularSequence(); } - else if ( ( node.getNodeData().getSequence().getAccession() != null ) + if ( ForesterUtil.isEmpty( query ) && ( node.getNodeData().getSequence().getAccession() != null ) && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getAccession().getValue() ) ) { - if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getAccession().getSource() ) ) { - query = node.getNodeData().getSequence().getAccession().getSource() + "%7C"; + final Identifier id = SequenceIdParser.parse( node.getNodeData().getSequence().getAccession() + .getValue() ); + if ( id != null ) { + query = id.getValue(); } - query += node.getNodeData().getSequence().getAccession().getValue(); } - else if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getName() ) ) { - final Accession acc = AptxUtil.obtainSequenceAccessionFromName( node.getNodeData().getSequence() - .getName() ); - if ( acc != null ) { - query = acc.getSource() + "%7C" + acc.getValue(); + if ( ForesterUtil.isEmpty( query ) && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getName() ) ) { + final Identifier id = SequenceIdParser.parse( node.getNodeData().getSequence().getName() ); + if ( id != null ) { + query = id.getValue(); } } - } - else if ( !ForesterUtil.isEmpty( node.getName() ) ) { - final Accession acc = AptxUtil.obtainSequenceAccessionFromName( node.getName() ); - if ( acc != null ) { - query = acc.getSource() + "%7C" + acc.getValue(); - } - else { - final Identifier id = SequenceIdParser.parse( node.getName() ); + if ( ForesterUtil.isEmpty( query ) && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getSymbol() ) ) { + final Identifier id = SequenceIdParser.parse( node.getNodeData().getSequence().getSymbol() ); if ( id != null ) { query = id.getValue(); } } } + if ( ForesterUtil.isEmpty( query ) && !ForesterUtil.isEmpty( node.getName() ) ) { + final Identifier id = SequenceIdParser.parse( node.getName() ); + if ( id != null ) { + query = id.getValue(); + } + } return query; } diff --git a/forester/java/src/org/forester/phylogeny/data/Identifier.java b/forester/java/src/org/forester/phylogeny/data/Identifier.java index 1314df4..97297f8 100644 --- a/forester/java/src/org/forester/phylogeny/data/Identifier.java +++ b/forester/java/src/org/forester/phylogeny/data/Identifier.java @@ -36,6 +36,7 @@ public final class Identifier implements PhylogenyData { final public static String NCBI = "ncbi"; final public static String REFSEQ = "refseq"; + final public static String SP = "sp"; final private String _value; final private String _provider; final private String _value_provider; diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index 5961561..7c554d4 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -8672,8 +8672,30 @@ public final class Test { return false; } // + id = SequenceIdParser.parse( "P4A123" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() ) + || !id.getValue().equals( "P4A123" ) || !id.getProvider().equals( "sp" ) ) { + if ( id != null ) { + System.out.println( "value =" + id.getValue() ); + System.out.println( "provider=" + id.getProvider() ); + } + return false; + } + // + id = SequenceIdParser.parse( "pllf[pok P4A123_osdjfosnqo035-9233332904i000490 vf tmv x45" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() ) + || !id.getValue().equals( "P4A123" ) || !id.getProvider().equals( "sp" ) ) { + if ( id != null ) { + System.out.println( "value =" + id.getValue() ); + System.out.println( "provider=" + id.getProvider() ); + } + return false; + } + // id = SequenceIdParser.parse( "XP_12345" ); if ( id != null ) { + System.out.println( "value =" + id.getValue() ); + System.out.println( "provider=" + id.getProvider() ); return false; } // lcl_91970_unknown_ diff --git a/forester/java/src/org/forester/util/SequenceIdParser.java b/forester/java/src/org/forester/util/SequenceIdParser.java index ff9b91d..3b6bc5d 100644 --- a/forester/java/src/org/forester/util/SequenceIdParser.java +++ b/forester/java/src/org/forester/util/SequenceIdParser.java @@ -59,6 +59,9 @@ public final class SequenceIdParser { // underscore character ('_'). For example, a RefSeq protein accession is NP_015325. private final static Pattern REFSEQ_PATTERN = Pattern .compile( "(?:\\A|.*[^a-zA-Z0-9])([A-Z]{2}_\\d{6,})(?:[^a-zA-Z0-9]|\\Z)" ); + // See: http://web.expasy.org/docs/userman.html#ID_line + private final static Pattern TREMBL_PATTERN = Pattern + .compile( "(?:\\A|.*[^a-zA-Z0-9])([A-Z][0-9][A-Z0-9]{3}[0-9])(?:[^a-zA-Z0-9]|\\Z)" ); /** * Returns null if no match. @@ -73,10 +76,22 @@ public final class SequenceIdParser { if ( !ForesterUtil.isEmpty( v ) ) { return new Identifier( v, Identifier.REFSEQ ); } + v = parseTrEMBLAccessor( s ); + if ( !ForesterUtil.isEmpty( v ) ) { + return new Identifier( v, Identifier.SP ); + } return null; } - public static boolean isProtein( final String query ) { + public final static boolean isProtein( final String query ) { + final String r1 = parseRefSeqAccessor( query ); + if ( !ForesterUtil.isEmpty( r1 ) && ( r1.charAt( 1 ) == 'P' ) ) { + return true; + } + final String r2 = parseTrEMBLAccessor( query ); + if ( !ForesterUtil.isEmpty( r2 ) ) { + return true; + } return GENBANK_PROTEIN_AC_PATTERN.matcher( query ).lookingAt(); } @@ -118,6 +133,18 @@ public final class SequenceIdParser { return null; } + /** + * Returns null if no match. + * + */ + private final static String parseTrEMBLAccessor( final String query ) { + final Matcher m = TREMBL_PATTERN.matcher( query ); + if ( m.lookingAt() ) { + return m.group( 1 ); + } + return null; + } + private SequenceIdParser() { // Hiding the constructor. } -- 1.7.10.2