From f8ecfc1d254f7f413ee5c47fbb012b609f7f4fd1 Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Wed, 2 Oct 2013 03:04:51 +0000 Subject: [PATCH] inprogress --- .../org/forester/archaeopteryx/Configuration.java | 73 +++-------- .../org/forester/archaeopteryx/ControlPanel.java | 67 ++++++---- .../archaeopteryx/MainFrameApplication.java | 3 + .../org/forester/archaeopteryx/NodeEditPanel.java | 6 + .../src/org/forester/archaeopteryx/NodePanel.java | 102 +++++++-------- .../src/org/forester/archaeopteryx/TreePanel.java | 58 +++++++-- .../org/forester/archaeopteryx/TreePanelUtil.java | 13 +- .../org/forester/archaeopteryx/tools/Blast.java | 7 ++ .../java/src/org/forester/phylogeny/Phylogeny.java | 14 +++ .../org/forester/phylogeny/PhylogenyMethods.java | 15 +++ .../src/org/forester/phylogeny/PhylogenyNode.java | 4 + .../src/org/forester/phylogeny/data/NodeData.java | 1 + .../src/org/forester/phylogeny/data/Sequence.java | 131 ++++++++++++-------- forester/java/src/org/forester/rio/RIO.java | 4 + .../java/src/org/forester/util/ForesterUtil.java | 18 +++ .../src/org/forester/util/SequenceIdParser.java | 24 ++-- .../org/forester/ws/seqdb/SequenceDbWsTools.java | 96 +++++++++++--- 17 files changed, 415 insertions(+), 221 deletions(-) diff --git a/forester/java/src/org/forester/archaeopteryx/Configuration.java b/forester/java/src/org/forester/archaeopteryx/Configuration.java index e68b220..5cfceac 100644 --- a/forester/java/src/org/forester/archaeopteryx/Configuration.java +++ b/forester/java/src/org/forester/archaeopteryx/Configuration.java @@ -115,19 +115,20 @@ public final class Configuration { final static int show_domain_architectures = 10; final static int show_binary_characters = 11; final static int show_binary_character_counts = 12; - final static int show_gene_names = 13; + final static int show_seq_names = 13; final static int show_sequence_acc = 14; final static int display_internal_data = 15; final static int dynamically_hide_data = 16; final static int show_taxonomy_scientific_names = 17; final static int show_taxonomy_common_names = 18; final static int color_according_to_annotation = 19; - final static int show_gene_symbols = 20; + final static int show_seq_symbols = 20; final static int node_data_popup = 21; final static int show_relation_confidence = 22; final static int show_vector_data = 23; final static int show_taxonomy_images = 24; final static int show_properties = 25; + final static int show_gene_names = 26; // ------------------ // Click-to options // ------------------ @@ -165,13 +166,13 @@ public final class Configuration { { "Use Branch Colors", "display", "no" }, { "Use Branch Widths", "display", "no" }, { "Show Custom Nodes", "display", "yes" }, { "Protein Domains", "nodisplay", "no" }, { "Binary Characters", "nodisplay", "no" }, { "Binary Char Counts", "nodisplay", "no" }, - { "Seq Name", "display", "yes" }, { "Seq Acc", "display", "no" }, + { "Seq Name", "display", "yes" }, { "Seq Accession", "display", "no" }, { "Show Internal Data", "display", "yes" }, { "Dyna Hide", "display", "yes" }, { "Taxonomy Scientific", "display", "yes" }, { "Taxonomy Common", "display", "no" }, { "Colorize by Annotation", "nodisplay", "no" }, { "Seq Symbol", "display", "yes" }, { "Rollover", "display", "yes" }, { "Relation Confidence", "nodisplay", "no" }, { "Vector Data", "nodisplay", "no" }, { "Taxonomy Images", "display", "no" }, - { "Properties", "nodisplay", "no" } }; + { "Properties", "nodisplay", "no" }, { "Gene Name", "display", "yes" } }; final static String clickto_options[][] = { { "Display Node Data", "display" }, { "Collapse/Uncollapse", "display" }, { "Root/Reroot", "display" }, { "Sub/Super Tree", "display" }, { "Swap Descendants", "display" }, @@ -458,7 +459,7 @@ public final class Configuration { } public void setDisplaySequenceNames( final boolean b ) { - display_options[ show_gene_names ][ 2 ] = b ? "yes" : "no"; + display_options[ show_seq_names ][ 2 ] = b ? "yes" : "no"; } public void setDisplaySequenceRelations( final boolean display_sequence_relations ) { @@ -466,7 +467,7 @@ public final class Configuration { } public void setDisplaySequenceSymbols( final boolean b ) { - display_options[ show_gene_symbols ][ 2 ] = b ? "yes" : "no"; + display_options[ show_seq_symbols ][ 2 ] = b ? "yes" : "no"; } public void setDisplayTaxonomyCode( final boolean b ) { @@ -1437,6 +1438,9 @@ public final class Configuration { else if ( s.equalsIgnoreCase( "sequence_name" ) ) { setExtDescNodeDataToReturn( NODE_DATA.SEQUENCE_NAME ); } + else if ( s.equalsIgnoreCase( "gene_name" ) ) { + setExtDescNodeDataToReturn( NODE_DATA.GENE_NAME ); + } else if ( s.equalsIgnoreCase( "sequence_symbol" ) ) { setExtDescNodeDataToReturn( NODE_DATA.SEQUENCE_SYMBOL ); } @@ -1486,13 +1490,8 @@ public final class Configuration { else if ( st.countTokens() >= 2 ) { // counts the tokens that are not // yet retrieved! int key_index = -1; - if ( key.equals( "use_real_br_lengths" ) || key.equals( "phylogram" ) ) { + if ( key.equals( "phylogram" ) ) { key_index = Configuration.display_as_phylogram; - if ( key.equals( "use_real_br_lengths" ) ) { - ForesterUtil - .printWarningMessage( Constants.PRG_NAME, - "configuration key [use_real_br_lengths] is deprecated, use [phylogram] instead" ); - } } else if ( key.equals( "rollover" ) ) { key_index = Configuration.node_data_popup; @@ -1503,34 +1502,14 @@ public final class Configuration { else if ( key.equals( "show_node_names" ) ) { key_index = Configuration.show_node_names; } - else if ( key.equals( "show_taxonomy" ) || key.equals( "show_taxonomy_code" ) ) { + else if ( key.equals( "show_taxonomy_code" ) ) { key_index = Configuration.show_tax_code; - if ( key.equals( "show_taxonomy" ) ) { - ForesterUtil - .printWarningMessage( Constants.PRG_NAME, - "configuration key [show_taxonomy] is deprecated, use [show_taxonomy_code] instead" ); - } } - else if ( key.equals( "write_br_length_values" ) ) { - ForesterUtil.printWarningMessage( Constants.PRG_NAME, - "configuration key [write_br_length_values] is deprecated" ); - key_index = DEPRECATED; - } - else if ( key.equals( "write_bootstrap_values" ) || key.equals( "write_confidence_values" ) ) { + else if ( key.equals( "write_confidence_values" ) ) { key_index = Configuration.write_confidence_values; - if ( key.equals( "write_bootstrap_values" ) ) { - ForesterUtil - .printWarningMessage( Constants.PRG_NAME, - "configuration key [write_bootstrap_values] is deprecated, use [write_confidence_values] instead" ); - } } - else if ( key.equals( "write_events" ) || key.equals( "write_dup_spec" ) ) { + else if ( key.equals( "write_events" ) ) { key_index = Configuration.write_events; - if ( key.equals( "write_dup_spec" ) ) { - ForesterUtil - .printWarningMessage( Constants.PRG_NAME, - "configuration key [write_dup_spec] is deprecated, use [write_events] instead" ); - } } else if ( key.equals( "color_branches" ) ) { key_index = Configuration.color_branches; @@ -1538,11 +1517,6 @@ public final class Configuration { else if ( key.equals( "width_branches" ) ) { key_index = Configuration.width_branches; } - else if ( key.equals( "mark_nodes_with_box" ) ) { - ForesterUtil.printWarningMessage( Constants.PRG_NAME, - "configuration key [mark_nodes_with_box] is deprecated" ); - key_index = DEPRECATED; - } else if ( key.equals( "show_domain_architectures" ) ) { key_index = Configuration.show_domain_architectures; } @@ -1555,31 +1529,24 @@ public final class Configuration { else if ( key.equals( "show_binary_character_counts" ) ) { key_index = Configuration.show_binary_character_counts; } + else if ( key.equals( "show_seq_names" ) ) { + key_index = Configuration.show_seq_names; + } else if ( key.equals( "show_gene_names" ) ) { key_index = Configuration.show_gene_names; } - else if ( key.equals( "show_gene_symbols" ) ) { - key_index = Configuration.show_gene_symbols; + else if ( key.equals( "show_seq_symbols" ) ) { + key_index = Configuration.show_seq_symbols; } - else if ( key.equals( "show_sequence_acc" ) ) { + else if ( key.equals( "show_seq_acc" ) ) { key_index = Configuration.show_sequence_acc; } - else if ( key.equals( "show_node_ids" ) ) { - ForesterUtil - .printWarningMessage( Constants.PRG_NAME, "configuration key [show_node_ids] is deprecated" ); - key_index = DEPRECATED; - } else if ( key.equals( "display_internal_data" ) ) { key_index = Configuration.display_internal_data; } else if ( key.equals( "dynamically_hide_data" ) ) { key_index = Configuration.dynamically_hide_data; } - else if ( key.equals( "show_taxonomy_names" ) ) { - ForesterUtil.printWarningMessage( Constants.PRG_NAME, - "configuration key [show_taxonomy_names] is deprecated" ); - key_index = DEPRECATED; - } else if ( key.equals( "show_taxonomy_scientific_names" ) ) { key_index = Configuration.show_taxonomy_scientific_names; } diff --git a/forester/java/src/org/forester/archaeopteryx/ControlPanel.java b/forester/java/src/org/forester/archaeopteryx/ControlPanel.java index 656cb75..7586722 100644 --- a/forester/java/src/org/forester/archaeopteryx/ControlPanel.java +++ b/forester/java/src/org/forester/archaeopteryx/ControlPanel.java @@ -97,8 +97,9 @@ final class ControlPanel extends JPanel implements ActionListener { private JCheckBox _show_annotation; private JCheckBox _show_binary_characters; private JCheckBox _show_binary_character_counts; + private JCheckBox _show_seq_names; + private JCheckBox _show_seq_symbols; private JCheckBox _show_gene_names; - private JCheckBox _show_gene_symbols; private JCheckBox _show_sequence_acc; private JCheckBox _node_desc_popup_cb; private JCheckBox _dynamically_hide_data; @@ -237,7 +238,7 @@ final class ControlPanel extends JPanel implements ActionListener { else if ( e.getSource() == _order ) { DESCENDANT_SORT_PRIORITY pri = DESCENDANT_SORT_PRIORITY.TAXONOMY; if ( ( !isShowTaxonomyScientificNames() && !isShowTaxonomyCode() && !isShowTaxonomyCommonNames() ) ) { - if ( ( isShowSequenceAcc() || isShowGeneNames() || isShowGeneSymbols() ) ) { + if ( ( isShowSequenceAcc() || isShowSeqNames() || isShowSeqSymbols() ) ) { pri = DESCENDANT_SORT_PRIORITY.SEQUENCE; } else if ( isShowNodeNames() ) { @@ -610,14 +611,19 @@ final class ControlPanel extends JPanel implements ActionListener { addJCheckBox( _show_domain_architectures, ch_panel ); add( ch_panel ); break; + case Configuration.show_seq_names: + _show_seq_names = new JCheckBox( title ); + addJCheckBox( _show_seq_names, ch_panel ); + add( ch_panel ); + break; case Configuration.show_gene_names: _show_gene_names = new JCheckBox( title ); addJCheckBox( _show_gene_names, ch_panel ); add( ch_panel ); break; - case Configuration.show_gene_symbols: - _show_gene_symbols = new JCheckBox( title ); - addJCheckBox( _show_gene_symbols, ch_panel ); + case Configuration.show_seq_symbols: + _show_seq_symbols = new JCheckBox( title ); + addJCheckBox( _show_seq_symbols, ch_panel ); add( ch_panel ); break; case Configuration.show_sequence_acc: @@ -817,12 +823,12 @@ final class ControlPanel extends JPanel implements ActionListener { return ( ( _show_domain_architectures != null ) && _show_domain_architectures.isSelected() ); } - boolean isShowGeneNames() { - return ( ( _show_gene_names != null ) && _show_gene_names.isSelected() ); + boolean isShowSeqNames() { + return ( ( _show_seq_names != null ) && _show_seq_names.isSelected() ); } - boolean isShowGeneSymbols() { - return ( ( _show_gene_symbols != null ) && _show_gene_symbols.isSelected() ); + boolean isShowSeqSymbols() { + return ( ( _show_seq_symbols != null ) && _show_seq_symbols.isSelected() ); } boolean isShowInternalData() { @@ -837,6 +843,10 @@ final class ControlPanel extends JPanel implements ActionListener { return ( ( _show_sequence_acc != null ) && _show_sequence_acc.isSelected() ); } + boolean isShowGeneNames() { + return ( ( _show_gene_names != null ) && _show_gene_names.isSelected() ); + } + boolean isShowSequenceRelationConfidence() { return ( ( _seq_relation_confidence_switch != null ) && ( _seq_relation_confidence_switch.isSelected() ) ); } @@ -991,14 +1001,19 @@ final class ControlPanel extends JPanel implements ActionListener { _show_domain_architectures.setSelected( state ); } break; + case Configuration.show_seq_names: + if ( _show_seq_names != null ) { + _show_seq_names.setSelected( state ); + } + break; case Configuration.show_gene_names: if ( _show_gene_names != null ) { _show_gene_names.setSelected( state ); } break; - case Configuration.show_gene_symbols: - if ( _show_gene_symbols != null ) { - _show_gene_symbols.setSelected( state ); + case Configuration.show_seq_symbols: + if ( _show_seq_symbols != null ) { + _show_seq_symbols.setSelected( state ); } break; case Configuration.show_vector_data: @@ -1766,6 +1781,9 @@ final class ControlPanel extends JPanel implements ActionListener { case SEQUENCE_NAME: s = "Sequence Names"; break; + case GENE_NAME: + s = "Gene Names"; + break; case SEQUENCE_SYMBOL: s = "Sequence Symbols"; break; @@ -1919,22 +1937,19 @@ final class ControlPanel extends JPanel implements ActionListener { setCheckbox( Configuration.show_taxonomy_common_names, _configuration.doCheckOption( Configuration.show_taxonomy_common_names ) ); } - if ( _configuration.doDisplayOption( Configuration.show_taxonomy_images ) ) { - addCheckbox( Configuration.show_taxonomy_images, - _configuration.getDisplayTitle( Configuration.show_taxonomy_images ) ); - setCheckbox( Configuration.show_taxonomy_images, - _configuration.doCheckOption( Configuration.show_taxonomy_images ) ); - } - if ( _configuration.doDisplayOption( Configuration.show_gene_symbols ) ) { - addCheckbox( Configuration.show_gene_symbols, - _configuration.getDisplayTitle( Configuration.show_gene_symbols ) ); - setCheckbox( Configuration.show_gene_symbols, - _configuration.doCheckOption( Configuration.show_gene_symbols ) ); + if ( _configuration.doDisplayOption( Configuration.show_seq_names ) ) { + addCheckbox( Configuration.show_seq_names, _configuration.getDisplayTitle( Configuration.show_seq_names ) ); + setCheckbox( Configuration.show_seq_names, _configuration.doCheckOption( Configuration.show_seq_names ) ); } if ( _configuration.doDisplayOption( Configuration.show_gene_names ) ) { addCheckbox( Configuration.show_gene_names, _configuration.getDisplayTitle( Configuration.show_gene_names ) ); setCheckbox( Configuration.show_gene_names, _configuration.doCheckOption( Configuration.show_gene_names ) ); } + if ( _configuration.doDisplayOption( Configuration.show_seq_symbols ) ) { + addCheckbox( Configuration.show_seq_symbols, + _configuration.getDisplayTitle( Configuration.show_seq_symbols ) ); + setCheckbox( Configuration.show_seq_symbols, _configuration.doCheckOption( Configuration.show_seq_symbols ) ); + } if ( _configuration.doDisplayOption( Configuration.show_sequence_acc ) ) { addCheckbox( Configuration.show_sequence_acc, _configuration.getDisplayTitle( Configuration.show_sequence_acc ) ); @@ -1982,6 +1997,12 @@ final class ControlPanel extends JPanel implements ActionListener { addCheckbox( Configuration.show_properties, _configuration.getDisplayTitle( Configuration.show_properties ) ); setCheckbox( Configuration.show_properties, _configuration.doCheckOption( Configuration.show_properties ) ); } + if ( _configuration.doDisplayOption( Configuration.show_taxonomy_images ) ) { + addCheckbox( Configuration.show_taxonomy_images, + _configuration.getDisplayTitle( Configuration.show_taxonomy_images ) ); + setCheckbox( Configuration.show_taxonomy_images, + _configuration.doCheckOption( Configuration.show_taxonomy_images ) ); + } } private void setVisibilityOfDomainStrucureControls() { diff --git a/forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java b/forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java index 248a5a4..8f65e7b 100644 --- a/forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java +++ b/forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java @@ -1473,6 +1473,9 @@ public final class MainFrameApplication extends MainFrame { nodes = phy.getNodesViaSequenceSymbol( seq_name ); } if ( nodes.isEmpty() ) { + nodes = phy.getNodesViaGeneName( seq_name ); + } + if ( nodes.isEmpty() ) { nodes = phy.getNodes( seq_name ); } if ( nodes.size() > 1 ) { diff --git a/forester/java/src/org/forester/archaeopteryx/NodeEditPanel.java b/forester/java/src/org/forester/archaeopteryx/NodeEditPanel.java index 1892411..f593a78 100644 --- a/forester/java/src/org/forester/archaeopteryx/NodeEditPanel.java +++ b/forester/java/src/org/forester/archaeopteryx/NodeEditPanel.java @@ -337,6 +337,7 @@ class NodeEditPanel extends JPanel { } addSubelementEditable( category, NodePanel.SEQ_NAME, seq.getName(), PHYLOXML_TAG.SEQ_NAME ); addSubelementEditable( category, NodePanel.SEQ_SYMBOL, seq.getSymbol(), PHYLOXML_TAG.SEQ_SYMBOL ); + addSubelementEditable( category, NodePanel.SEQ_GENE_NAME, seq.getGeneName(), PHYLOXML_TAG.SEQ_GENE_NAME ); addSubelementEditable( category, NodePanel.SEQ_ACCESSION, acc.getValue(), @@ -841,6 +842,10 @@ class NodeEditPanel extends JPanel { break; } break; + case SEQ_GENE_NAME: + ForesterUtil.ensurePresenceOfSequence( getMyNode() ); + getMyNode().getNodeData().getSequence().setGeneName( value ); + break; case SEQ_TYPE: ForesterUtil.ensurePresenceOfSequence( getMyNode() ); try { @@ -1062,6 +1067,7 @@ class NodeEditPanel extends JPanel { TAXONOMY_URI, SEQ_SYMBOL, SEQ_NAME, + SEQ_GENE_NAME, SEQ_LOCATION, SEQ_TYPE, SEQ_MOL_SEQ, diff --git a/forester/java/src/org/forester/archaeopteryx/NodePanel.java b/forester/java/src/org/forester/archaeopteryx/NodePanel.java index 90c7010..c861354 100644 --- a/forester/java/src/org/forester/archaeopteryx/NodePanel.java +++ b/forester/java/src/org/forester/archaeopteryx/NodePanel.java @@ -63,57 +63,57 @@ import org.forester.util.ForesterUtil; class NodePanel extends JPanel implements TreeSelectionListener { - static final String BASIC = "Basic"; - static final String BINARY_CHARACTERS = "Binary characters"; - static final String CONFIDENCE = "Confidence"; - static final String CONFIDENCE_TYPE = "type"; - static final String DATE = "Date"; - static final String DATE_DESCRIPTION = "Description"; - static final String DATE_MAX = "Max"; - static final String DATE_MIN = "Min"; - static final String DATE_UNIT = "Unit"; - static final String DATE_VALUE = "Value"; - static final String DIST_ALT_UNIT = "Altitude unit"; - static final String DIST_ALTITUDE = "Altitude"; - static final String DIST_DESCRIPTION = "Description"; - static final String DIST_GEODETIC_DATUM = "Geodetic datum"; - static final String DIST_LATITUDE = "Latitude"; - static final String DIST_LONGITUDE = "Longitude"; - static final String DISTRIBUTION = "Distribution"; - static final String EVENTS = "Events"; - static final String EVENTS_DUPLICATIONS = "Duplications"; - static final String EVENTS_GENE_LOSSES = "Gene losses"; - static final String EVENTS_SPECIATIONS = "Speciations"; - static final String LIT_REFERENCE = "Reference"; - static final String LIT_REFERENCE_DESC = "Description"; - static final String LIT_REFERENCE_DOI = "DOI"; - static final String NODE_BRANCH_COLOR = "Branch color"; - static final String NODE_BRANCH_LENGTH = "Branch length"; - static final String NODE_BRANCH_WIDTH = "Branch width"; - static final String NODE_NAME = "Name"; - static final String PROP = "Properties"; - static final String REFERENCE = "Reference"; - static final String SEQ_ACCESSION = "Accession"; - static final String SEQ_LOCATION = "Location"; - static final String SEQ_MOL_SEQ = "Mol seq"; - static final String SEQ_NAME = "Name"; - static final String SEQ_SYMBOL = "Symbol"; - static final String SEQ_TYPE = "Type"; - static final String SEQ_URI = "URI"; - static final String SEQUENCE = "Sequence"; - static final String TAXONOMY = "Taxonomy"; - static final String TAXONOMY_AUTHORITY = "Authority"; - static final String TAXONOMY_CODE = "Code"; - static final String TAXONOMY_COMMON_NAME = "Common name"; - static final String TAXONOMY_IDENTIFIER = "Identifier"; - static final String TAXONOMY_RANK = "Rank"; - static final String TAXONOMY_SCIENTIFIC_NAME = "Scientific name"; - static final String TAXONOMY_SYNONYM = "Synonym"; - static final String TAXONOMY_URI = "URI"; - private static final String SEQ_GENE_NAME = "Gene name"; - private static final long serialVersionUID = 5120159904388100771L; - private final JEditorPane _pane; - private final JTree _tree; + static final String BASIC = "Basic"; + static final String BINARY_CHARACTERS = "Binary characters"; + static final String CONFIDENCE = "Confidence"; + static final String CONFIDENCE_TYPE = "type"; + static final String DATE = "Date"; + static final String DATE_DESCRIPTION = "Description"; + static final String DATE_MAX = "Max"; + static final String DATE_MIN = "Min"; + static final String DATE_UNIT = "Unit"; + static final String DATE_VALUE = "Value"; + static final String DIST_ALT_UNIT = "Altitude unit"; + static final String DIST_ALTITUDE = "Altitude"; + static final String DIST_DESCRIPTION = "Description"; + static final String DIST_GEODETIC_DATUM = "Geodetic datum"; + static final String DIST_LATITUDE = "Latitude"; + static final String DIST_LONGITUDE = "Longitude"; + static final String DISTRIBUTION = "Distribution"; + static final String EVENTS = "Events"; + static final String EVENTS_DUPLICATIONS = "Duplications"; + static final String EVENTS_GENE_LOSSES = "Gene losses"; + static final String EVENTS_SPECIATIONS = "Speciations"; + static final String LIT_REFERENCE = "Reference"; + static final String LIT_REFERENCE_DESC = "Description"; + static final String LIT_REFERENCE_DOI = "DOI"; + static final String NODE_BRANCH_COLOR = "Branch color"; + static final String NODE_BRANCH_LENGTH = "Branch length"; + static final String NODE_BRANCH_WIDTH = "Branch width"; + static final String NODE_NAME = "Name"; + static final String PROP = "Properties"; + static final String REFERENCE = "Reference"; + static final String SEQ_ACCESSION = "Accession"; + static final String SEQ_LOCATION = "Location"; + static final String SEQ_MOL_SEQ = "Mol seq"; + static final String SEQ_NAME = "Name"; + static final String SEQ_SYMBOL = "Symbol"; + static final String SEQ_GENE_NAME = "Gene name"; + static final String SEQ_TYPE = "Type"; + static final String SEQ_URI = "URI"; + static final String SEQUENCE = "Sequence"; + static final String TAXONOMY = "Taxonomy"; + static final String TAXONOMY_AUTHORITY = "Authority"; + static final String TAXONOMY_CODE = "Code"; + static final String TAXONOMY_COMMON_NAME = "Common name"; + static final String TAXONOMY_IDENTIFIER = "Identifier"; + static final String TAXONOMY_RANK = "Rank"; + static final String TAXONOMY_SCIENTIFIC_NAME = "Scientific name"; + static final String TAXONOMY_SYNONYM = "Synonym"; + static final String TAXONOMY_URI = "URI"; + private static final long serialVersionUID = 5120159904388100771L; + private final JEditorPane _pane; + private final JTree _tree; public NodePanel( final PhylogenyNode phylogeny_node ) { String node_name = ""; diff --git a/forester/java/src/org/forester/archaeopteryx/TreePanel.java b/forester/java/src/org/forester/archaeopteryx/TreePanel.java index 32dd030..6d793c8 100644 --- a/forester/java/src/org/forester/archaeopteryx/TreePanel.java +++ b/forester/java/src/org/forester/archaeopteryx/TreePanel.java @@ -607,13 +607,18 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee .getValue() + " " ); } - if ( getControlPanel().isShowGeneNames() && ( node.getNodeData().getSequence().getName().length() > 0 ) ) { + if ( getControlPanel().isShowSeqNames() && ( node.getNodeData().getSequence().getName().length() > 0 ) ) { sum += getTreeFontSet()._fm_large.stringWidth( node.getNodeData().getSequence().getName() + " " ); } - if ( getControlPanel().isShowGeneSymbols() + if ( getControlPanel().isShowSeqSymbols() && ( node.getNodeData().getSequence().getSymbol().length() > 0 ) ) { sum += getTreeFontSet()._fm_large.stringWidth( node.getNodeData().getSequence().getSymbol() + " " ); } + if ( getControlPanel().isShowGeneNames() + && ( node.getNodeData().getSequence().getGeneName().length() > 0 ) ) { + sum += getTreeFontSet()._fm_large + .stringWidth( node.getNodeData().getSequence().getGeneName() + " " ); + } if ( getControlPanel().isShowAnnotation() && ( node.getNodeData().getSequence().getAnnotations() != null ) && !node.getNodeData().getSequence().getAnnotations().isEmpty() ) { @@ -1925,8 +1930,8 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee DESCENDANT_SORT_PRIORITY pri = DESCENDANT_SORT_PRIORITY.TAXONOMY; if ( ( !getControlPanel().isShowTaxonomyScientificNames() && !getControlPanel().isShowTaxonomyCode() && !getControlPanel() .isShowTaxonomyCommonNames() ) ) { - if ( ( getControlPanel().isShowSequenceAcc() || getControlPanel().isShowGeneNames() || getControlPanel() - .isShowGeneSymbols() ) ) { + if ( ( getControlPanel().isShowSequenceAcc() || getControlPanel().isShowSeqNames() || getControlPanel() + .isShowSeqSymbols() ) ) { pri = DESCENDANT_SORT_PRIORITY.SEQUENCE; } else if ( getControlPanel().isShowNodeNames() ) { @@ -3224,6 +3229,8 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee switch ( getOptions().getExtDescNodeDataToReturn() ) { case NODE_NAME: return "Node Names"; + case GENE_NAME: + return "Gene Names"; case SEQUENCE_NAME: return "Sequence Names"; case SEQUENCE_SYMBOL: @@ -3984,13 +3991,19 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee _sb.append( node.getName() ); } if ( node.getNodeData().isHasSequence() ) { - if ( getControlPanel().isShowGeneSymbols() && ( node.getNodeData().getSequence().getSymbol().length() > 0 ) ) { + if ( getControlPanel().isShowSeqSymbols() && ( node.getNodeData().getSequence().getSymbol().length() > 0 ) ) { if ( _sb.length() > 0 ) { _sb.append( " " ); } _sb.append( node.getNodeData().getSequence().getSymbol() ); } - if ( getControlPanel().isShowGeneNames() && ( node.getNodeData().getSequence().getName().length() > 0 ) ) { + if ( getControlPanel().isShowGeneNames() && ( node.getNodeData().getSequence().getGeneName().length() > 0 ) ) { + if ( _sb.length() > 0 ) { + _sb.append( " " ); + } + _sb.append( node.getNodeData().getSequence().getGeneName() ); + } + if ( getControlPanel().isShowSeqNames() && ( node.getNodeData().getSequence().getName().length() > 0 ) ) { if ( _sb.length() > 0 ) { _sb.append( " " ); } @@ -4234,7 +4247,7 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee } _sb.append( node.getNodeData().getSequence().getAccession().getValue() ); } - if ( getControlPanel().isShowGeneNames() && ( node.getNodeData().getSequence().getName().length() > 0 ) ) { + if ( getControlPanel().isShowSeqNames() && ( node.getNodeData().getSequence().getName().length() > 0 ) ) { if ( _sb.length() > 0 ) { _sb.append( " " ); } @@ -4477,16 +4490,21 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee } } if ( node.getNodeData().isHasSequence() ) { - if ( getControlPanel().isShowGeneNames() + if ( getControlPanel().isShowSeqNames() && ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getName() ) ) ) { x += getTreeFontSet()._fm_large.stringWidth( node.getNodeData().getSequence().getName() + " " ); } - if ( getControlPanel().isShowGeneSymbols() + if ( getControlPanel().isShowSeqSymbols() && ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getSymbol() ) ) ) { x += getTreeFontSet()._fm_large.stringWidth( node.getNodeData().getSequence().getSymbol() + " " ); } + if ( getControlPanel().isShowGeneNames() + && ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getGeneName() ) ) ) { + x += getTreeFontSet()._fm_large.stringWidth( node.getNodeData().getSequence().getGeneName() + + " " ); + } if ( getControlPanel().isShowSequenceAcc() && ( node.getNodeData().getSequence().getAccession() != null ) ) { x += getTreeFontSet()._fm_large.stringWidth( node.getNodeData().getSequence() @@ -5093,6 +5111,12 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee data.add( n.getNodeData().getSequence().getName() ); } break; + case GENE_NAME: + if ( n.getNodeData().isHasSequence() + && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getGeneName() ) ) { + data.add( n.getNodeData().getSequence().getGeneName() ); + } + break; case SEQUENCE_SYMBOL: if ( n.getNodeData().isHasSequence() && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getSymbol() ) ) { @@ -5124,6 +5148,11 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee ann.append( n.getNodeData().getSequence().getName() ); ann.append( "|" ); } + if ( !ForesterUtil.isEmpty( n.getNodeData().getSequence().getGeneName() ) ) { + ann.append( "GN=" ); + ann.append( n.getNodeData().getSequence().getGeneName() ); + ann.append( "|" ); + } if ( n.getNodeData().getSequence().getAccession() != null ) { ann.append( "ACC=" ); ann.append( n.getNodeData().getSequence().getAccession().asText() ); @@ -5366,6 +5395,17 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee _popup_buffer.append( "]" ); enc_data = true; } + if ( !ForesterUtil.isEmpty( seq.getGeneName() ) ) { + if ( enc_data ) { + _popup_buffer.append( " [" ); + } + else { + _popup_buffer.append( "[" ); + } + _popup_buffer.append( seq.getGeneName() ); + _popup_buffer.append( "]" ); + enc_data = true; + } if ( !ForesterUtil.isEmpty( seq.getName() ) ) { if ( enc_data ) { _popup_buffer.append( " " ); diff --git a/forester/java/src/org/forester/archaeopteryx/TreePanelUtil.java b/forester/java/src/org/forester/archaeopteryx/TreePanelUtil.java index 322199e..7c1cbaf 100644 --- a/forester/java/src/org/forester/archaeopteryx/TreePanelUtil.java +++ b/forester/java/src/org/forester/archaeopteryx/TreePanelUtil.java @@ -165,15 +165,20 @@ public class TreePanelUtil { if ( cp.isShowNodeNames() && !ForesterUtil.isEmpty( node.getName() ) ) { TreePanelUtil.showExtDescNodeDataUserSelectedHelperHelper( node.getName(), sb ); } - if ( cp.isShowGeneNames() && node.getNodeData().isHasSequence() + if ( cp.isShowSeqNames() && node.getNodeData().isHasSequence() && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getName() ) ) { TreePanelUtil.showExtDescNodeDataUserSelectedHelperHelper( node.getNodeData().getSequence().getName(), sb ); } - if ( cp.isShowGeneSymbols() && node.getNodeData().isHasSequence() + if ( cp.isShowSeqSymbols() && node.getNodeData().isHasSequence() && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getSymbol() ) ) { TreePanelUtil .showExtDescNodeDataUserSelectedHelperHelper( node.getNodeData().getSequence().getSymbol(), sb ); } + if ( cp.isShowGeneNames() && node.getNodeData().isHasSequence() + && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getGeneName() ) ) { + TreePanelUtil.showExtDescNodeDataUserSelectedHelperHelper( node.getNodeData().getSequence().getGeneName(), + sb ); + } if ( cp.isShowSequenceAcc() && node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getAccession() != null ) && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getAccession().toString() ) ) { @@ -195,7 +200,7 @@ public class TreePanelUtil { TreePanelUtil .showExtDescNodeDataUserSelectedHelperHelper( node.getNodeData().getTaxonomy().getCommonName(), sb ); } - if ( ( cp.isShowGeneNames() || cp.isShowGeneSymbols() || cp.isShowSequenceAcc() ) + if ( ( cp.isShowSeqNames() || cp.isShowSeqSymbols() || cp.isShowSequenceAcc() ) && node.getNodeData().isHasSequence() && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getMolecularSequence() ) ) { TreePanelUtil.showExtDescNodeDataUserSelectedHelperHelper( node.getNodeData().getSequence() @@ -485,7 +490,7 @@ public class TreePanelUtil { final static boolean isSequenceEmpty( final Sequence seq ) { return ( seq.getAccession() == null ) && ForesterUtil.isEmpty( seq.getName() ) - && ForesterUtil.isEmpty( seq.getSymbol() ); + && ForesterUtil.isEmpty( seq.getGeneName() ) && ForesterUtil.isEmpty( seq.getSymbol() ); } final static boolean isTaxonomyEmpty( final Taxonomy tax ) { diff --git a/forester/java/src/org/forester/archaeopteryx/tools/Blast.java b/forester/java/src/org/forester/archaeopteryx/tools/Blast.java index 839a328..49e2841 100644 --- a/forester/java/src/org/forester/archaeopteryx/tools/Blast.java +++ b/forester/java/src/org/forester/archaeopteryx/tools/Blast.java @@ -99,6 +99,13 @@ public final class Blast { query = id.getValue(); } } + if ( ForesterUtil.isEmpty( query ) + && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getGeneName() ) ) { + final Identifier id = SequenceIdParser.parse( node.getNodeData().getSequence().getGeneName() ); + if ( id != null ) { + query = id.getValue(); + } + } } if ( ForesterUtil.isEmpty( query ) && !ForesterUtil.isEmpty( node.getName() ) ) { final Identifier id = SequenceIdParser.parse( node.getName() ); diff --git a/forester/java/src/org/forester/phylogeny/Phylogeny.java b/forester/java/src/org/forester/phylogeny/Phylogeny.java index 723fab8..3679cd5 100644 --- a/forester/java/src/org/forester/phylogeny/Phylogeny.java +++ b/forester/java/src/org/forester/phylogeny/Phylogeny.java @@ -489,6 +489,20 @@ public class Phylogeny { return nodes; } + public List getNodesViaGeneName( final String seq_name ) { + if ( isEmpty() ) { + return null; + } + final List nodes = new ArrayList(); + for( final PhylogenyNodeIterator iter = iteratorPreorder(); iter.hasNext(); ) { + final PhylogenyNode n = iter.next(); + if ( n.getNodeData().isHasSequence() && n.getNodeData().getSequence().getGeneName().equals( seq_name ) ) { + nodes.add( n ); + } + } + return nodes; + } + public List getNodesViaTaxonomyCode( final String taxonomy_code ) { if ( isEmpty() ) { return null; diff --git a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java index 9c83603..08fe7f0 100644 --- a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java +++ b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java @@ -1245,6 +1245,11 @@ public class PhylogenyMethods { return n1.getNodeData().getSequence().getSymbol() .compareTo( n2.getNodeData().getSequence().getSymbol() ); } + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) { + return n1.getNodeData().getSequence().getGeneName() + .compareTo( n2.getNodeData().getSequence().getGeneName() ); + } if ( ( n1.getNodeData().getSequence().getAccession() != null ) && ( n2.getNodeData().getSequence().getAccession() != null ) && !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getAccession().getValue() ) @@ -1274,6 +1279,11 @@ public class PhylogenyMethods { return n1.getNodeData().getSequence().getSymbol() .compareTo( n2.getNodeData().getSequence().getSymbol() ); } + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) { + return n1.getNodeData().getSequence().getGeneName() + .compareTo( n2.getNodeData().getSequence().getGeneName() ); + } if ( ( n1.getNodeData().getSequence().getAccession() != null ) && ( n2.getNodeData().getSequence().getAccession() != null ) && !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getAccession().getValue() ) @@ -1340,6 +1350,11 @@ public class PhylogenyMethods { return n1.getNodeData().getSequence().getSymbol() .compareTo( n2.getNodeData().getSequence().getSymbol() ); } + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) { + return n1.getNodeData().getSequence().getGeneName() + .compareTo( n2.getNodeData().getSequence().getGeneName() ); + } if ( ( n1.getNodeData().getSequence().getAccession() != null ) && ( n2.getNodeData().getSequence().getAccession() != null ) && !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getAccession().getValue() ) diff --git a/forester/java/src/org/forester/phylogeny/PhylogenyNode.java b/forester/java/src/org/forester/phylogeny/PhylogenyNode.java index 3994605..05876e7 100644 --- a/forester/java/src/org/forester/phylogeny/PhylogenyNode.java +++ b/forester/java/src/org/forester/phylogeny/PhylogenyNode.java @@ -1097,6 +1097,10 @@ public final class PhylogenyNode implements Comparable { sb.append( getNodeData().getSequence().getSymbol() ); sb.append( " " ); } + if ( !ForesterUtil.isEmpty( getNodeData().getSequence().getGeneName() ) ) { + sb.append( getNodeData().getSequence().getGeneName() ); + sb.append( " " ); + } if ( getNodeData().getSequence().getAccession() != null ) { sb.append( getNodeData().getSequence().getAccession().toString() ); sb.append( " " ); diff --git a/forester/java/src/org/forester/phylogeny/data/NodeData.java b/forester/java/src/org/forester/phylogeny/data/NodeData.java index 4591155..da6d557 100644 --- a/forester/java/src/org/forester/phylogeny/data/NodeData.java +++ b/forester/java/src/org/forester/phylogeny/data/NodeData.java @@ -42,6 +42,7 @@ public class NodeData implements PhylogenyData { NODE_NAME, EVENT, SEQUENCE_NAME, + GENE_NAME, SEQUENCE_SYMBOL, SEQUENCE_MOL_SEQ, SEQUENCE_MOL_SEQ_FASTA, diff --git a/forester/java/src/org/forester/phylogeny/data/Sequence.java b/forester/java/src/org/forester/phylogeny/data/Sequence.java index 25f7e70..afc342b 100644 --- a/forester/java/src/org/forester/phylogeny/data/Sequence.java +++ b/forester/java/src/org/forester/phylogeny/data/Sequence.java @@ -39,36 +39,27 @@ import org.forester.io.parsers.phyloxml.PhyloXmlUtil; import org.forester.io.writers.PhylogenyWriter; import org.forester.util.ForesterUtil; -public class Sequence implements PhylogenyData, MultipleUris { +public class Sequence implements PhylogenyData, MultipleUris, Comparable { + private Accession _accession; + private SortedSet _annotations; + private DomainArchitecture _da; + private String _gene_name; + private String _location; private String _mol_sequence; private boolean _mol_sequence_is_aligned; private String _name; - private String _gene_name; + private List _seq_relations; private String _source_id; - private Accession _accession; private String _symbol; - private String _location; private String _type; - private SortedSet _annotations; - private DomainArchitecture _da; private List _uris; - private List _seq_relations; private SortedSet _xrefs; public Sequence() { init(); } - public boolean isEmpty() { - return ( getAccession() == null ) && ForesterUtil.isEmpty( getName() ) && ForesterUtil.isEmpty( getSymbol() ) - && ForesterUtil.isEmpty( getType() ) && ForesterUtil.isEmpty( getLocation() ) - && ForesterUtil.isEmpty( getSourceId() ) && ForesterUtil.isEmpty( getMolecularSequence() ) - && ( getDomainArchitecture() == null ) && ForesterUtil.isEmpty( _annotations ) - && ForesterUtil.isEmpty( _uris ) && ForesterUtil.isEmpty( _seq_relations ) - && ( ( getCrossReferences() == null ) || getCrossReferences().isEmpty() ); - } - public void addAnnotation( final Annotation annotation ) { getAnnotations().add( annotation ); } @@ -80,12 +71,8 @@ public class Sequence implements PhylogenyData, MultipleUris { getCrossReferences().add( cross_reference ); } - public SortedSet getCrossReferences() { - return _xrefs; - } - - private void setCrossReferences( final TreeSet cross_references ) { - _xrefs = cross_references; + public void addSequenceRelation( final SequenceRelation sr ) { + getSequenceRelations().add( sr ); } @Override @@ -96,10 +83,6 @@ public class Sequence implements PhylogenyData, MultipleUris { getUris().add( uri ); } - public void addSequenceRelation( final SequenceRelation sr ) { - _seq_relations.add( sr ); - } - @Override public StringBuffer asSimpleText() { final StringBuffer sb = new StringBuffer(); @@ -123,6 +106,29 @@ public class Sequence implements PhylogenyData, MultipleUris { return asSimpleText(); } + @Override + public int compareTo( final Sequence o ) { + if ( ( !ForesterUtil.isEmpty( getName() ) ) && ( !ForesterUtil.isEmpty( o.getName() ) ) ) { + return getName().compareTo( o.getName() ); + } + if ( ( !ForesterUtil.isEmpty( getSymbol() ) ) && ( !ForesterUtil.isEmpty( o.getSymbol() ) ) ) { + return getSymbol().compareTo( o.getSymbol() ); + } + if ( ( !ForesterUtil.isEmpty( getGeneName() ) ) && ( !ForesterUtil.isEmpty( o.getGeneName() ) ) ) { + return getGeneName().compareTo( o.getGeneName() ); + } + if ( ( getAccession() != null ) && ( o.getAccession() != null ) + && !ForesterUtil.isEmpty( getAccession().getValue() ) + && !ForesterUtil.isEmpty( o.getAccession().getValue() ) ) { + return getAccession().getValue().compareTo( o.getAccession().getValue() ); + } + if ( ( !ForesterUtil.isEmpty( getMolecularSequence() ) ) + && ( !ForesterUtil.isEmpty( o.getMolecularSequence() ) ) ) { + return getMolecularSequence().compareTo( o.getMolecularSequence() ); + } + return 0; + } + /** * Not a deep copy. * @@ -211,10 +217,18 @@ public class Sequence implements PhylogenyData, MultipleUris { return _annotations; } + public SortedSet getCrossReferences() { + return _xrefs; + } + public DomainArchitecture getDomainArchitecture() { return _da; } + public String getGeneName() { + return _gene_name; + } + public String getLocation() { return _location; } @@ -223,18 +237,10 @@ public class Sequence implements PhylogenyData, MultipleUris { return _mol_sequence; } - public boolean isMolecularSequenceAligned() { - return _mol_sequence_is_aligned; - } - public String getName() { return _name; } - public String getGeneName() { - return _gene_name; - } - public List getSequenceRelations() { if ( _seq_relations == null ) { _seq_relations = new ArrayList(); @@ -242,10 +248,6 @@ public class Sequence implements PhylogenyData, MultipleUris { return _seq_relations; } - private void setSequenceRelations( final List seq_relations ) { - _seq_relations = seq_relations; - } - public String getSourceId() { return _source_id; } @@ -259,13 +261,13 @@ public class Sequence implements PhylogenyData, MultipleUris { } @Override - public List getUris() { - return _uris; + public Uri getUri( final int index ) { + return getUris().get( index ); } @Override - public Uri getUri( final int index ) { - return getUris().get( index ); + public List getUris() { + return _uris; } @Override @@ -273,10 +275,13 @@ public class Sequence implements PhylogenyData, MultipleUris { if ( getAccession() != null ) { return getAccession().hashCode(); } - int result = getSymbol().hashCode(); - if ( getName().length() > 0 ) { + int result = getName().hashCode(); + if ( getSymbol().length() > 0 ) { result ^= getName().hashCode(); } + if ( getGeneName().length() > 0 ) { + result ^= getGeneName().hashCode(); + } if ( getMolecularSequence().length() > 0 ) { result ^= getMolecularSequence().hashCode(); } @@ -314,6 +319,16 @@ public class Sequence implements PhylogenyData, MultipleUris { setAnnotations( null ); } + public boolean isEmpty() { + return ( getAccession() == null ) && ForesterUtil.isEmpty( getName() ) && ForesterUtil.isEmpty( getSymbol() ) + && ForesterUtil.isEmpty( getGeneName() ) && ForesterUtil.isEmpty( getType() ) + && ForesterUtil.isEmpty( getLocation() ) && ForesterUtil.isEmpty( getSourceId() ) + && ForesterUtil.isEmpty( getMolecularSequence() ) && ( getDomainArchitecture() == null ) + && ForesterUtil.isEmpty( _annotations ) && ForesterUtil.isEmpty( _uris ) + && ForesterUtil.isEmpty( _seq_relations ) + && ( ( getCrossReferences() == null ) || getCrossReferences().isEmpty() ); + } + @Override public boolean isEqual( final PhylogenyData data ) { if ( this == data ) { @@ -327,18 +342,22 @@ public class Sequence implements PhylogenyData, MultipleUris { && s.getSymbol().equals( getSymbol() ) && s.getGeneName().equals( getGeneName() ); } - public void setAccession( final Accession accession ) { - _accession = accession; + public boolean isMolecularSequenceAligned() { + return _mol_sequence_is_aligned; } - private void setAnnotations( final SortedSet annotations ) { - _annotations = annotations; + public void setAccession( final Accession accession ) { + _accession = accession; } public void setDomainArchitecture( final DomainArchitecture ds ) { _da = ds; } + public void setGeneName( final String gene_name ) { + _gene_name = gene_name; + } + public void setLocation( final String description ) { _location = description; } @@ -355,10 +374,6 @@ public class Sequence implements PhylogenyData, MultipleUris { _name = name; } - public void setGeneName( final String gene_name ) { - _gene_name = gene_name; - } - public void setSourceId( final String source_id ) { _source_id = source_id; } @@ -463,4 +478,16 @@ public class Sequence implements PhylogenyData, MultipleUris { public String toString() { return asText().toString(); } + + private void setAnnotations( final SortedSet annotations ) { + _annotations = annotations; + } + + private void setCrossReferences( final TreeSet cross_references ) { + _xrefs = cross_references; + } + + private void setSequenceRelations( final List seq_relations ) { + _seq_relations = seq_relations; + } } diff --git a/forester/java/src/org/forester/rio/RIO.java b/forester/java/src/org/forester/rio/RIO.java index aca0f7a..0de8e9a 100644 --- a/forester/java/src/org/forester/rio/RIO.java +++ b/forester/java/src/org/forester/rio/RIO.java @@ -881,6 +881,10 @@ public final class RIO { else if ( n.getNodeData().isHasSequence() && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getSymbol() ) ) { label = n.getNodeData().getSequence().getSymbol(); } + else if ( n.getNodeData().isHasSequence() + && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getGeneName() ) ) { + label = n.getNodeData().getSequence().getGeneName(); + } else if ( !ForesterUtil.isEmpty( n.getName() ) ) { label = n.getName(); } diff --git a/forester/java/src/org/forester/util/ForesterUtil.java b/forester/java/src/org/forester/util/ForesterUtil.java index 46eceac..aed217b 100644 --- a/forester/java/src/org/forester/util/ForesterUtil.java +++ b/forester/java/src/org/forester/util/ForesterUtil.java @@ -112,6 +112,9 @@ public final class ForesterUtil { if ( !isEmpty( seq.getSymbol() ) ) { v = SequenceIdParser.parseRefSeqAccessor( seq.getSymbol() ); } + if ( !isEmpty( seq.getGeneName() ) ) { + v = SequenceIdParser.parseRefSeqAccessor( seq.getGeneName() ); + } if ( isEmpty( v ) && !isEmpty( seq.getName() ) ) { v = SequenceIdParser.parseRefSeqAccessor( seq.getName() ); } @@ -133,6 +136,9 @@ public final class ForesterUtil { if ( !isEmpty( seq.getSymbol() ) ) { v = SequenceIdParser.parseGenbankAccessor( seq.getSymbol() ); } + if ( !isEmpty( seq.getGeneName() ) ) { + v = SequenceIdParser.parseGenbankAccessor( seq.getGeneName() ); + } if ( isEmpty( v ) && !isEmpty( seq.getName() ) ) { v = SequenceIdParser.parseGenbankAccessor( seq.getName() ); } @@ -209,6 +215,18 @@ public final class ForesterUtil { } } } + if ( isEmpty( upkb ) && !isEmpty( seq.getGeneName() ) ) { + m = UNIPROT_KB_PATTERN_1.matcher( seq.getGeneName() ); + if ( m.find() ) { + upkb = m.group( 1 ); + } + else { + m = UNIPROT_KB_PATTERN_2.matcher( seq.getGeneName() ); + if ( m.find() ) { + upkb = m.group(); + } + } + } if ( isEmpty( upkb ) && ( node.getNodeData().getSequence().getAccession() != null ) && !isEmpty( seq.getAccession().getValue() ) ) { m = UNIPROT_KB_PATTERN_1.matcher( seq.getAccession().getValue() ); diff --git a/forester/java/src/org/forester/util/SequenceIdParser.java b/forester/java/src/org/forester/util/SequenceIdParser.java index 7b400d2..8fcf6ee 100644 --- a/forester/java/src/org/forester/util/SequenceIdParser.java +++ b/forester/java/src/org/forester/util/SequenceIdParser.java @@ -70,17 +70,19 @@ public final class SequenceIdParser { * */ public final static Identifier parse( final String s ) { - String v = parseGenbankAccessor( s ); - if ( !ForesterUtil.isEmpty( v ) ) { - return new Identifier( v, Identifier.NCBI ); - } - v = parseRefSeqAccessor( s ); - if ( !ForesterUtil.isEmpty( v ) ) { - return new Identifier( v, Identifier.REFSEQ ); - } - v = parseTrEMBLAccessor( s ); - if ( !ForesterUtil.isEmpty( v ) ) { - return new Identifier( v, Identifier.SP ); + if ( !ForesterUtil.isEmpty( s ) ) { + String v = parseGenbankAccessor( s ); + if ( !ForesterUtil.isEmpty( v ) ) { + return new Identifier( v, Identifier.NCBI ); + } + v = parseRefSeqAccessor( s ); + if ( !ForesterUtil.isEmpty( v ) ) { + return new Identifier( v, Identifier.REFSEQ ); + } + v = parseTrEMBLAccessor( s ); + if ( !ForesterUtil.isEmpty( v ) ) { + return new Identifier( v, Identifier.SP ); + } } return null; } diff --git a/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java b/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java index 111fa68..f5f83e4 100644 --- a/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java +++ b/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java @@ -162,10 +162,19 @@ public final class SequenceDbWsTools { String query = null; Identifier id = null; Db db = Db.NONE; - if ( node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getAccession() != null ) + if ( node.getNodeData().isHasSequence() + && ( node.getNodeData().getSequence().getAccession() != null ) && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getAccession().getSource() ) && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getAccession().getValue() ) - && node.getNodeData().getSequence().getAccession().getValue().toLowerCase().startsWith( "uniprot" ) ) { + && ( node.getNodeData().getSequence().getAccession().getValue().toLowerCase() + .startsWith( "uniprot" ) + || node.getNodeData().getSequence().getAccession().getValue().toLowerCase() + .startsWith( "swissprot" ) + || node.getNodeData().getSequence().getAccession().getValue().toLowerCase() + .startsWith( "trembl" ) + || node.getNodeData().getSequence().getAccession().getValue().toLowerCase() + .startsWith( "sp" ) || node.getNodeData().getSequence().getAccession().getValue() + .toLowerCase().startsWith( "uniprotkb" ) ) ) { query = node.getNodeData().getSequence().getAccession().getValue(); db = Db.UNIPROT; } @@ -178,21 +187,63 @@ public final class SequenceDbWsTools { query = node.getNodeData().getSequence().getAccession().getValue(); db = Db.EMBL; } - else if ( !ForesterUtil.isEmpty( node.getName() ) ) { + else if ( node.getNodeData().isHasSequence() + && ( node.getNodeData().getSequence().getAccession() != null ) + && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getAccession().getSource() ) + && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getAccession().getValue() ) + && ( node.getNodeData().getSequence().getAccession().getValue().toLowerCase().startsWith( "ncbi" ) || node + .getNodeData().getSequence().getAccession().getValue().toLowerCase().startsWith( "genbank" ) ) ) { + query = node.getNodeData().getSequence().getAccession().getValue(); + // db = Db.NCBI; + } + else if ( node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getAccession() != null ) + && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getAccession().getSource() ) + && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getAccession().getValue() ) + && node.getNodeData().getSequence().getAccession().getValue().toLowerCase().startsWith( "refseq" ) ) { + query = node.getNodeData().getSequence().getAccession().getValue(); + db = Db.REFSEQ; + } + else { if ( ( query = ForesterUtil.extractUniProtKbProteinSeqIdentifier( node ) ) != null ) { db = Db.UNIPROT; } - else if ( ( id = SequenceIdParser.parse( node.getName() ) ) != null ) { - if ( id.getProvider().equalsIgnoreCase( Identifier.NCBI ) ) { - db = Db.NCBI; + else if ( node.getNodeData().isHasSequence() ) { + if ( ( id = SequenceIdParser.parse( node.getName() ) ) != null ) { + if ( id.getProvider().equalsIgnoreCase( Identifier.NCBI ) ) { + // db = Db.NCBI; + } + else if ( id.getProvider().equalsIgnoreCase( Identifier.REFSEQ ) ) { + db = Db.REFSEQ; + } + } + else if ( ( id = SequenceIdParser.parse( node.getNodeData().getSequence().getName() ) ) != null ) { + if ( id.getProvider().equalsIgnoreCase( Identifier.NCBI ) ) { + // = Db.NCBI; + } + else if ( id.getProvider().equalsIgnoreCase( Identifier.REFSEQ ) ) { + db = Db.REFSEQ; + } } - else if ( id.getProvider().equalsIgnoreCase( Identifier.REFSEQ ) ) { - db = Db.REFSEQ; + else if ( ( id = SequenceIdParser.parse( node.getNodeData().getSequence().getGeneName() ) ) != null ) { + if ( id.getProvider().equalsIgnoreCase( Identifier.NCBI ) ) { + // db = Db.NCBI; + } + else if ( id.getProvider().equalsIgnoreCase( Identifier.REFSEQ ) ) { + db = Db.REFSEQ; + } + } + else if ( ( id = SequenceIdParser.parse( node.getNodeData().getSequence().getSymbol() ) ) != null ) { + if ( id.getProvider().equalsIgnoreCase( Identifier.NCBI ) ) { + // db = Db.NCBI; + } + else if ( id.getProvider().equalsIgnoreCase( Identifier.REFSEQ ) ) { + db = Db.REFSEQ; + } } } } if ( db == Db.NONE ) { - not_found.add( node.getName() ); + not_found.add( node.toString() ); } SequenceDatabaseEntry db_entry = null; if ( !ForesterUtil.isEmpty( query ) ) { @@ -202,22 +253,31 @@ public final class SequenceDbWsTools { } db_entry = obtainUniProtEntry( query, lines_to_return ); } - if ( ( db == Db.EMBL ) || ( ( db == Db.UNIPROT ) && ( db_entry == null ) ) ) { + else if ( db == Db.EMBL ) { if ( DEBUG ) { System.out.println( "embl: " + query ); } db_entry = obtainEmblEntry( new Identifier( query ), lines_to_return ); - if ( ( db == Db.UNIPROT ) && ( db_entry != null ) ) { - db = Db.EMBL; + } + else if ( db == Db.REFSEQ ) { + if ( DEBUG ) { + System.out.println( "refseq: " + query ); } + db_entry = obtainRefSeqEntryFromEmbl( new Identifier( query ), lines_to_return ); } + // else if ( db == Db.NCBI ) { + // if ( DEBUG ) { + // System.out.println( "ncbi: " + query ); + // } + // db_entry = obtainNcbiEntry( new Identifier( query ), lines_to_return ); + // } } else if ( ( db == Db.REFSEQ ) && ( id != null ) ) { db_entry = obtainRefSeqEntryFromEmbl( id, lines_to_return ); } - else if ( ( db == Db.NCBI ) && ( id != null ) ) { - db_entry = obtainEmblEntry( id, lines_to_return ); //TODO ? - } + //else if ( ( db == Db.NCBI ) && ( id != null ) ) { + // db_entry = obtainNcbiEntry( id, lines_to_return ); + //} if ( ( db_entry != null ) && !db_entry.isEmpty() ) { final Sequence seq = node.getNodeData().isHasSequence() ? node.getNodeData().getSequence() : new Sequence(); @@ -229,9 +289,9 @@ public final class SequenceDbWsTools { else if ( db == Db.UNIPROT ) { type = "uniprot"; } - else if ( db == Db.NCBI ) { - type = "ncbi"; - } + // else if ( db == Db.NCBI ) { + // type = "ncbi"; + // } else if ( db == Db.REFSEQ ) { type = "refseq"; } -- 1.7.10.2