From 74b46d4227fcc7eedf7c3eeefa521191cf22fa30 Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Tue, 19 Apr 2011 01:36:48 +0000 Subject: [PATCH] in progress --- .../src/org/forester/archaeopteryx/MainFrame.java | 20 +- .../archaeopteryx/MainFrameApplication.java | 19 ++ .../archaeopteryx/TaxonomyDataObtainer.java | 2 +- .../src/org/forester/archaeopteryx/TreePanel.java | 1 + .../archaeopteryx/UniProtSequenceObtainer.java | 197 ++++++++++++++++++++ ...UniProtTaxonomyEntry.java => UniProtEntry.java} | 68 +++---- 6 files changed, 265 insertions(+), 42 deletions(-) create mode 100644 forester/java/src/org/forester/archaeopteryx/UniProtSequenceObtainer.java rename forester/java/src/org/forester/ws/uniprot/{UniProtTaxonomyEntry.java => UniProtEntry.java} (71%) diff --git a/forester/java/src/org/forester/archaeopteryx/MainFrame.java b/forester/java/src/org/forester/archaeopteryx/MainFrame.java index 4127698..6cb8d5b 100644 --- a/forester/java/src/org/forester/archaeopteryx/MainFrame.java +++ b/forester/java/src/org/forester/archaeopteryx/MainFrame.java @@ -113,6 +113,7 @@ public abstract class MainFrame extends JFrame implements ActionListener { JMenuItem _collapse_species_specific_subtrees; JMenuItem _collapse_below_threshold; //TODO implememt me JMenuItem _obtain_detailed_taxonomic_information_jmi; + JMenuItem _obtain_uniprot_seq_information_jmi; JMenuItem _move_node_names_to_tax_sn_jmi; JMenuItem _move_node_names_to_seq_names_jmi; JMenuItem _extract_tax_code_from_node_names_jmi; @@ -417,13 +418,15 @@ public abstract class MainFrame extends JFrame implements ActionListener { } boolean isSubtreeDisplayed() { - if ( getCurrentTreePanel().isCurrentTreeIsSubtree() ) { - JOptionPane - .showMessageDialog( this, - "This operation can only be performed on a complete tree, not on the currently displayed sub-tree only.", - "Operation can not be exectuted on a sub-tree", - JOptionPane.WARNING_MESSAGE ); - return true; + if ( getCurrentTreePanel() != null ) { + if ( getCurrentTreePanel().isCurrentTreeIsSubtree() ) { + JOptionPane + .showMessageDialog( this, + "This operation can only be performed on a complete tree, not on the currently displayed sub-tree only.", + "Operation can not be exectuted on a sub-tree", + JOptionPane.WARNING_MESSAGE ); + return true; + } } return false; } @@ -508,11 +511,12 @@ public abstract class MainFrame extends JFrame implements ActionListener { void buildViewMenu() { _view_jmenu = createMenu( "View", getConfiguration() ); + _view_jmenu.add( _display_basic_information_item = new JMenuItem( "Display Basic Information" ) ); + _view_jmenu.addSeparator(); _view_jmenu.add( _view_as_XML_item = new JMenuItem( "View as phyloXML" ) ); _view_jmenu.add( _view_as_NH_item = new JMenuItem( "View as Newick" ) ); _view_jmenu.add( _view_as_NHX_item = new JMenuItem( "View as NHX" ) ); _view_jmenu.add( _view_as_nexus_item = new JMenuItem( "View as Nexus" ) ); - _view_jmenu.add( _display_basic_information_item = new JMenuItem( "Display Basic Information" ) ); customizeJMenuItem( _display_basic_information_item ); customizeJMenuItem( _view_as_NH_item ); customizeJMenuItem( _view_as_NHX_item ); diff --git a/forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java b/forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java index 071863d..e5195c4 100644 --- a/forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java +++ b/forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java @@ -471,6 +471,9 @@ public final class MainFrameApplication extends MainFrame { } obtainDetailedTaxonomicInformation(); } + else if ( o == _obtain_uniprot_seq_information_jmi ) { + obtainUniProtSequenceInformation(); + } else if ( o == _read_values_jmi ) { if ( isSubtreeDisplayed() ) { return; @@ -829,6 +832,10 @@ public final class MainFrameApplication extends MainFrame { customizeJMenuItem( _obtain_detailed_taxonomic_information_jmi ); _obtain_detailed_taxonomic_information_jmi .setToolTipText( "To add additional taxonomic information (from UniProt Taxonomy)" ); + _tools_menu + .add( _obtain_uniprot_seq_information_jmi = new JMenuItem( "Obtain Sequence Information (from UniProt)" ) ); + customizeJMenuItem( _obtain_uniprot_seq_information_jmi ); + _obtain_uniprot_seq_information_jmi.setToolTipText( "To add additional sequence information (from UniProt)" ); _tools_menu.addSeparator(); if ( !Constants.__RELEASE ) { _tools_menu.add( _function_analysis = new JMenuItem( "Add UniProtKB Annotations" ) ); @@ -1383,6 +1390,18 @@ public final class MainFrameApplication extends MainFrame { } } + private void obtainUniProtSequenceInformation() { + if ( getCurrentTreePanel() != null ) { + final Phylogeny phy = getCurrentTreePanel().getPhylogeny(); + if ( ( phy != null ) && !phy.isEmpty() ) { + final UniProtSequenceObtainer u = new UniProtSequenceObtainer( this, + _mainpanel.getCurrentTreePanel(), + phy.copy() ); + new Thread( u ).start(); + } + } + } + private void print() { if ( ( getCurrentTreePanel() == null ) || ( getCurrentTreePanel().getPhylogeny() == null ) || getCurrentTreePanel().getPhylogeny().isEmpty() ) { diff --git a/forester/java/src/org/forester/archaeopteryx/TaxonomyDataObtainer.java b/forester/java/src/org/forester/archaeopteryx/TaxonomyDataObtainer.java index 841f38c..cab948a 100644 --- a/forester/java/src/org/forester/archaeopteryx/TaxonomyDataObtainer.java +++ b/forester/java/src/org/forester/archaeopteryx/TaxonomyDataObtainer.java @@ -135,4 +135,4 @@ public class TaxonomyDataObtainer implements Runnable { public void run() { execute(); } -} +} \ No newline at end of file diff --git a/forester/java/src/org/forester/archaeopteryx/TreePanel.java b/forester/java/src/org/forester/archaeopteryx/TreePanel.java index 1609344..c01fb48 100644 --- a/forester/java/src/org/forester/archaeopteryx/TreePanel.java +++ b/forester/java/src/org/forester/archaeopteryx/TreePanel.java @@ -4313,6 +4313,7 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee * an instance of a Phylogeny */ final void setTree( final Phylogeny t ) { + _nodes_in_preorder = null; _phylogeny = t; } diff --git a/forester/java/src/org/forester/archaeopteryx/UniProtSequenceObtainer.java b/forester/java/src/org/forester/archaeopteryx/UniProtSequenceObtainer.java new file mode 100644 index 0000000..4751335 --- /dev/null +++ b/forester/java/src/org/forester/archaeopteryx/UniProtSequenceObtainer.java @@ -0,0 +1,197 @@ +// Exp $ +// forester -- software libraries and applications +// for genomics and evolutionary biology research. +// +// Copyright (C) 2010 Christian M Zmasek +// Copyright (C) 2010 Sanford-Burnham Medical Research Institute +// All rights reserved +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +// +// Contact: phylosoft @ gmail . com +// WWW: www.phylosoft.org/forester + +package org.forester.archaeopteryx; + +import java.io.IOException; +import java.net.UnknownHostException; +import java.util.List; +import java.util.SortedSet; +import java.util.TreeSet; + +import javax.swing.JOptionPane; + +import org.forester.phylogeny.Phylogeny; +import org.forester.phylogeny.PhylogenyNode; +import org.forester.phylogeny.data.Accession; +import org.forester.phylogeny.data.Identifier; +import org.forester.phylogeny.data.Sequence; +import org.forester.phylogeny.data.Taxonomy; +import org.forester.phylogeny.iterators.PhylogenyNodeIterator; +import org.forester.util.ForesterUtil; +import org.forester.ws.uniprot.UniProtEntry; +import org.forester.ws.uniprot.UniProtWsTools; + +public class UniProtSequenceObtainer implements Runnable { + + private final Phylogeny _phy; + private final MainFrameApplication _mf; + private final TreePanel _treepanel; + + UniProtSequenceObtainer( final MainFrameApplication mf, final TreePanel treepanel, final Phylogeny phy ) { + _phy = phy; + _mf = mf; + _treepanel = treepanel; + } + + private String getBaseUrl() { + return UniProtWsTools.BASE_URL; + } + + private void execute() { + _mf.getMainPanel().getCurrentTreePanel().setWaitCursor(); + SortedSet not_found = null; + try { + not_found = obtainSeqInformation( _phy ); + } + catch ( final UnknownHostException e ) { + _mf.getMainPanel().getCurrentTreePanel().setArrowCursor(); + JOptionPane.showMessageDialog( _mf, + "Could not connect to \"" + getBaseUrl() + "\"", + "Network error during taxonomic information gathering", + JOptionPane.ERROR_MESSAGE ); + return; + } + catch ( final IOException e ) { + _mf.getMainPanel().getCurrentTreePanel().setArrowCursor(); + e.printStackTrace(); + JOptionPane.showMessageDialog( _mf, + e.toString(), + "Failed to obtain taxonomic information", + JOptionPane.ERROR_MESSAGE ); + return; + } + finally { + _mf.getMainPanel().getCurrentTreePanel().setArrowCursor(); + } + _treepanel.setTree( _phy ); + _mf.showWhole(); + _treepanel.setEdited( true ); + if ( ( not_found != null ) && ( not_found.size() > 0 ) ) { + int max = not_found.size(); + boolean more = false; + if ( max > 20 ) { + more = true; + max = 20; + } + final StringBuffer sb = new StringBuffer(); + sb.append( "Not all identifiers could be resolved.\n" ); + if ( not_found.size() == 1 ) { + sb.append( "The following identifier was not found:\n" ); + } + else { + sb.append( "The following identifiers were not found (total: " + not_found.size() + "):\n" ); + } + int i = 0; + for( final String string : not_found ) { + if ( i > 19 ) { + break; + } + sb.append( string ); + sb.append( "\n" ); + ++i; + } + if ( more ) { + sb.append( "..." ); + } + try { + JOptionPane.showMessageDialog( _mf, + sb.toString(), + "UniProt Sequence Tool Completed", + JOptionPane.WARNING_MESSAGE ); + } + catch ( final Exception e ) { + // Not important if this fails, do nothing. + } + } + else { + try { + JOptionPane.showMessageDialog( _mf, + "UniProt sequence tool successfully completed", + "UniProt Sequence Tool Completed", + JOptionPane.INFORMATION_MESSAGE ); + } + catch ( final Exception e ) { + // Not important if this fails, do nothing. + } + } + } + + synchronized public static SortedSet obtainSeqInformation( final Phylogeny phy ) throws IOException { + final SortedSet not_found = new TreeSet(); + for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) { + final PhylogenyNode node = iter.next(); + if ( node.getNodeData().isHasSequence() ) { + //TODO Do something + } + // else if ( !ForesterUtil.isEmpty( node.getName() ) ) { + // not_found.add( node.getName() ); + // } + else if ( !ForesterUtil.isEmpty( node.getName() ) ) { + String query = node.getName(); + if ( query.indexOf( '/' ) > 0 ) { + query = query.substring( 0, query.indexOf( '/' ) ); + } + final UniProtEntry upe = obtainUniProtEntry( query ); + if ( upe != null ) { + final Sequence seq = new Sequence(); + final Taxonomy tax = new Taxonomy(); + if ( !ForesterUtil.isEmpty( upe.getAc() ) ) { + seq.setAccession( new Accession( upe.getAc(), "uniprot" ) ); + } + if ( !ForesterUtil.isEmpty( upe.getRecName() ) ) { + seq.setName( upe.getRecName() ); + } + if ( !ForesterUtil.isEmpty( upe.getSymbol() ) ) { + seq.setSymbol( upe.getSymbol() ); + } + if ( !ForesterUtil.isEmpty( upe.getOsScientificName() ) ) { + tax.setScientificName( upe.getOsScientificName() ); + } + if ( !ForesterUtil.isEmpty( upe.getTaxId() ) ) { + tax.setIdentifier( new Identifier( upe.getTaxId(), "uniprot" ) ); + } + node.getNodeData().setTaxonomy( tax ); + node.getNodeData().setSequence( seq ); + } + else { + not_found.add( node.getName() ); + } + //} + } + } + return not_found; + } + + static UniProtEntry obtainUniProtEntry( final String query ) throws IOException { + final List lines = UniProtWsTools.queryUniprot( "uniprot/" + query + ".txt", 200 ); + return UniProtEntry.createInstanceFromPlainText( lines ); + } + + @Override + public void run() { + execute(); + } +} diff --git a/forester/java/src/org/forester/ws/uniprot/UniProtTaxonomyEntry.java b/forester/java/src/org/forester/ws/uniprot/UniProtEntry.java similarity index 71% rename from forester/java/src/org/forester/ws/uniprot/UniProtTaxonomyEntry.java rename to forester/java/src/org/forester/ws/uniprot/UniProtEntry.java index 1a0cbfa..906473c 100644 --- a/forester/java/src/org/forester/ws/uniprot/UniProtTaxonomyEntry.java +++ b/forester/java/src/org/forester/ws/uniprot/UniProtEntry.java @@ -27,33 +27,31 @@ package org.forester.ws.uniprot; import java.util.List; -public final class UniProtTaxonomyEntry { +public final class UniProtEntry { - private String _id; private String _ac; private String _rec_name; private String _os_scientific_name; - private String _os_common_name; private String _tax_id; + private String _symbol; - private UniProtTaxonomyEntry() { + private UniProtEntry() { } - public static UniProtTaxonomyEntry createInstanceFromPlainText( final List lines ) { - final UniProtTaxonomyEntry e = new UniProtTaxonomyEntry(); + public static UniProtEntry createInstanceFromPlainText( final List lines ) { + final UniProtEntry e = new UniProtEntry(); for( final String line : lines ) { - if ( line.startsWith( "ID" ) ) { - e.setId( line.split( "\\s+" )[ 1 ] ); - } - else if ( line.startsWith( "AC" ) ) { + if ( line.startsWith( "AC" ) ) { e.setAc( extract( line, "AC", ";" ) ); } else if ( line.startsWith( "DE" ) ) { if ( ( line.indexOf( "RecName:" ) > 0 ) && ( line.indexOf( "Full=" ) > 0 ) ) { e.setRecName( extract( line, "Full=", ";" ) ); } - if ( ( line.indexOf( "RecName:" ) > 0 ) && ( line.indexOf( "Full=" ) > 0 ) ) { - e.setRecName( extract( line, "Full=", ";" ) ); + } + else if ( line.startsWith( "GN" ) ) { + if ( ( line.indexOf( "Name=" ) > 0 ) ) { + e.setSymbol( extract( line, "Name=", ";" ) ); } } else if ( line.startsWith( "OS" ) ) { @@ -65,7 +63,9 @@ public final class UniProtTaxonomyEntry { } } else if ( line.startsWith( "OX" ) ) { - e.setTaxId( extract( line, "OX", ";" ) ); + if ( line.indexOf( "NCBI_TaxID=" ) > 0 ) { + e.setTaxId( extract( line, "NCBI_TaxID=", ";" ) ); + } } } return e; @@ -78,15 +78,7 @@ public final class UniProtTaxonomyEntry { throw new IllegalArgumentException( "attempt to extract from [" + target + "] between [" + a + "] and [" + b + "]" ); } - return target.substring( i_a + a.length() + 1, i_b - 1 ).trim(); - } - - public String getId() { - return _id; - } - - private void setId( final String id ) { - _id = id; + return target.substring( i_a + a.length(), i_b ).trim(); } public String getAc() { @@ -94,7 +86,9 @@ public final class UniProtTaxonomyEntry { } private void setAc( final String ac ) { - _ac = ac; + if ( _ac == null ) { + _ac = ac; + } } public String getRecName() { @@ -102,7 +96,9 @@ public final class UniProtTaxonomyEntry { } private void setRecName( final String rec_name ) { - _rec_name = rec_name; + if ( _rec_name == null ) { + _rec_name = rec_name; + } } public String getOsScientificName() { @@ -110,22 +106,28 @@ public final class UniProtTaxonomyEntry { } private void setOsScientificName( final String os_scientific_name ) { - _os_scientific_name = os_scientific_name; + if ( _os_scientific_name == null ) { + _os_scientific_name = os_scientific_name; + } } - public String getOsCommonName() { - return _os_common_name; + public String getTaxId() { + return _tax_id; } - private void setOsCommonName( final String os_common_name ) { - _os_common_name = os_common_name; + private void setTaxId( final String tax_id ) { + if ( _tax_id == null ) { + _tax_id = tax_id; + } } - public String getTaxId() { - return _tax_id; + public String getSymbol() { + return _symbol; } - private void setTaxId( final String tax_id ) { - _tax_id = tax_id; + private void setSymbol( final String symbol ) { + if ( _symbol == null ) { + _symbol = symbol; + } } } -- 1.7.10.2