From: cmzmasek@gmail.com Date: Fri, 27 Sep 2013 22:31:21 +0000 (+0000) Subject: inprogress X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=caeb9639011c5792bb9e5c06928454f3f8bf6867;p=jalview.git inprogress --- diff --git a/forester/java/src/org/forester/archaeopteryx/MainFrame.java b/forester/java/src/org/forester/archaeopteryx/MainFrame.java index ada05c2..cc2c9eb 100644 --- a/forester/java/src/org/forester/archaeopteryx/MainFrame.java +++ b/forester/java/src/org/forester/archaeopteryx/MainFrame.java @@ -136,7 +136,6 @@ public abstract class MainFrame extends JFrame implements ActionListener { JMenuItem _gsdi_item; JMenuItem _gsdir_item; JMenuItem _lineage_inference; - JMenuItem _function_analysis; // file menu: JMenuItem _open_item; JMenuItem _open_url_item; diff --git a/forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java b/forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java index 774190c..248a5a4 100644 --- a/forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java +++ b/forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java @@ -66,7 +66,6 @@ import org.forester.archaeopteryx.Options.CLADOGRAM_TYPE; import org.forester.archaeopteryx.Options.NODE_LABEL_DIRECTION; import org.forester.archaeopteryx.Options.PHYLOGENY_GRAPHICS_TYPE; import org.forester.archaeopteryx.tools.AncestralTaxonomyInferrer; -import org.forester.archaeopteryx.tools.GoAnnotation; import org.forester.archaeopteryx.tools.InferenceManager; import org.forester.archaeopteryx.tools.PhyloInferenceDialog; import org.forester.archaeopteryx.tools.PhylogeneticInferenceOptions; @@ -466,9 +465,6 @@ public final class MainFrameApplication extends MainFrame { } executeLineageInference(); } - else if ( o == _function_analysis ) { - executeFunctionAnalysis(); - } else if ( o == _obtain_detailed_taxonomic_information_jmi ) { if ( isSubtreeDisplayed() ) { return; @@ -1087,6 +1083,9 @@ public final class MainFrameApplication extends MainFrame { customizeJMenuItem( _move_node_names_to_seq_names_jmi ); _move_node_names_to_seq_names_jmi.setToolTipText( "To interpret node names as sequence (protein, gene) names" ); _tools_menu.addSeparator(); + _tools_menu.add( _obtain_seq_information_jmi = new JMenuItem( "Obtain Sequence Information" ) ); + customizeJMenuItem( _obtain_seq_information_jmi ); + _obtain_seq_information_jmi.setToolTipText( "To add additional sequence information" ); _tools_menu .add( _obtain_detailed_taxonomic_information_jmi = new JMenuItem( OBTAIN_DETAILED_TAXONOMIC_INFORMATION ) ); customizeJMenuItem( _obtain_detailed_taxonomic_information_jmi ); @@ -1097,17 +1096,7 @@ public final class MainFrameApplication extends MainFrame { customizeJMenuItem( _obtain_detailed_taxonomic_information_deleting_jmi ); _obtain_detailed_taxonomic_information_deleting_jmi .setToolTipText( "To add additional taxonomic information, deletes nodes for which taxonomy cannot found (from UniProt Taxonomy)" ); - _tools_menu.add( _obtain_seq_information_jmi = new JMenuItem( "Obtain Sequence Information" ) ); - customizeJMenuItem( _obtain_seq_information_jmi ); - _obtain_seq_information_jmi.setToolTipText( "To add additional sequence information" ); _tools_menu.addSeparator(); - if ( !Constants.__RELEASE ) { - _tools_menu.add( _function_analysis = new JMenuItem( "Add UniProtKB Annotations" ) ); - customizeJMenuItem( _function_analysis ); - _function_analysis - .setToolTipText( "To add UniProtKB annotations for sequences with appropriate identifiers" ); - _tools_menu.addSeparator(); - } _tools_menu.add( _read_values_jmi = new JMenuItem( "Attach Vector/Expression Values" ) ); customizeJMenuItem( _read_values_jmi ); _read_values_jmi.setToolTipText( "To attach vector (e.g. gene expression) values to tree nodes (beta)" ); @@ -1133,16 +1122,6 @@ public final class MainFrameApplication extends MainFrame { exit(); } - void executeFunctionAnalysis() { - if ( ( _mainpanel.getCurrentPhylogeny() == null ) || ( _mainpanel.getCurrentPhylogeny().isEmpty() ) ) { - return; - } - final GoAnnotation a = new GoAnnotation( this, - _mainpanel.getCurrentTreePanel(), - _mainpanel.getCurrentPhylogeny() ); - new Thread( a ).start(); - } - void executeLineageInference() { if ( ( _mainpanel.getCurrentPhylogeny() == null ) || ( _mainpanel.getCurrentPhylogeny().isEmpty() ) ) { return; diff --git a/forester/java/src/org/forester/archaeopteryx/tools/GoAnnotation.java b/forester/java/src/org/forester/archaeopteryx/tools/GoAnnotation.java deleted file mode 100644 index 702f30c..0000000 --- a/forester/java/src/org/forester/archaeopteryx/tools/GoAnnotation.java +++ /dev/null @@ -1,137 +0,0 @@ -// $Id: -// forester -- software libraries and applications -// for genomics and evolutionary biology research. -// -// Copyright (C) 2010 Christian M Zmasek -// Copyright (C) 2010 Sanford-Burnham Medical Research Institute -// All rights reserved -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA -// -// Contact: phylosoft @ gmail . com -// WWW: https://sites.google.com/site/cmzmasek/home/software/forester - -package org.forester.archaeopteryx.tools; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStreamReader; -import java.net.HttpURLConnection; -import java.net.URL; -import java.util.Arrays; -import java.util.List; - -import org.forester.archaeopteryx.MainFrameApplication; -import org.forester.archaeopteryx.TreePanel; -import org.forester.phylogeny.Phylogeny; -import org.forester.phylogeny.PhylogenyNode; -import org.forester.phylogeny.data.Accession; -import org.forester.phylogeny.data.Annotation; -import org.forester.phylogeny.data.Sequence; -import org.forester.phylogeny.iterators.PhylogenyNodeIterator; -import org.forester.util.ForesterUtil; - -public class GoAnnotation extends RunnableProcess { - - private static final String SYMBOL = "Symbol"; - private static final String ASPECT = "Aspect"; - private static final String DB = "DB"; - private static final String EVIDENCE = "Evidence"; - private static final String GO_NAME = "GO Name"; - private static final String GO_ID = "GO ID"; - private final Phylogeny _phy; - private final MainFrameApplication _mf; - private final TreePanel _treepanel; - - public GoAnnotation( final MainFrameApplication mf, final TreePanel treepanel, final Phylogeny phy ) { - _phy = phy; - _mf = mf; - _treepanel = treepanel; - } - - private void annotate() { - start( _mf, "GO annotate" ); - for( final PhylogenyNodeIterator iter = _phy.iteratorPostorder(); iter.hasNext(); ) { - final PhylogenyNode node = iter.next(); - if ( ( node.getNodeData().getSequences() != null ) && !node.getNodeData().getSequences().isEmpty() ) { - for( final Sequence seq : node.getNodeData().getSequences() ) { - if ( ( ( seq.getAccession() != null ) && !ForesterUtil.isEmpty( seq.getAccession().getValue() ) && ( seq - .getAnnotations() == null ) ) || seq.getAnnotations().isEmpty() ) { - final Accession acc = seq.getAccession(); - try { - final URL url = new URL( "http://www.ebi.ac.uk/QuickGO/GAnnotation?protein=" - + acc.getValue() + "&format=tsv" ); - final HttpURLConnection url_connection = ( HttpURLConnection ) url.openConnection(); - final BufferedReader br = new BufferedReader( new InputStreamReader( url_connection.getInputStream() ) ); - final List columns = Arrays.asList( br.readLine().split( "\t" ) ); - System.out.println( columns ); - final int db_index = columns.indexOf( DB ); - final int goid_index = columns.indexOf( GO_ID ); - final int name_index = columns.indexOf( GO_NAME ); - final int evidence_index = columns.indexOf( EVIDENCE ); - final int taxon_index = columns.indexOf( "Taxon" ); - final int qualifier_index = columns.indexOf( "Qualifier" ); - final int reference_index = columns.indexOf( "Reference" ); - final int symbol_index = columns.indexOf( SYMBOL ); - final int splice_index = columns.indexOf( "Splice" ); - final int with_index = columns.indexOf( "With" ); - final int aspect_index = columns.indexOf( ASPECT ); - final int source_index = columns.indexOf( "Source" ); - String line; - while ( ( line = br.readLine() ) != null ) { - final String[] fields = line.split( "\t" ); - final Annotation a = new Annotation( fields[ goid_index ] ); - a.setDesc( name_index >= 0 ? fields[ name_index ] : "" ); - a.setSource( db_index >= 0 ? fields[ db_index ] : "" ); - a.setEvidence( evidence_index >= 0 ? fields[ evidence_index ] : "" ); - a.setType( aspect_index >= 0 ? fields[ aspect_index ] : "" ); - seq.addAnnotation( a ); - if ( ForesterUtil.isEmpty( seq.getSymbol() ) && ( symbol_index >= 0 ) - && !ForesterUtil.isEmpty( fields[ symbol_index ] ) ) { - seq.setSymbol( fields[ symbol_index ] ); - } - System.out.println( DB + ": " + fields[ db_index ] ); - System.out.println( GO_ID + ": " + fields[ goid_index ] ); - System.out.println( GO_NAME + ": " + fields[ name_index ] ); - System.out.println( EVIDENCE + ": " + fields[ evidence_index ] ); - System.out.println( " taxon" + ": " + fields[ taxon_index ] ); - System.out.println( " qualifier" + ": " + fields[ qualifier_index ] ); - System.out.println( " reference" + ": " + fields[ reference_index ] ); - System.out.println( SYMBOL + ": " + fields[ symbol_index ] ); - System.out.println( " splice" + ": " + fields[ splice_index ] ); - System.out.println( " with" + ": " + fields[ with_index ] ); - System.out.println( ASPECT + ": " + fields[ aspect_index ] ); - System.out.println( " source" + ": " + fields[ source_index ] ); - } - br.close(); - } - catch ( final IOException e ) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - } - } - } - } - end( _mf ); - _treepanel.repaint(); - _treepanel.setEdited( true ); - } - - @Override - public void run() { - annotate(); - } -} diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index 796acd0..609b7df 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -10818,6 +10818,12 @@ public final class Test { if ( !entry.getSequenceName().equals( "Aspartate aminotransferase, mitochondrial" ) ) { return false; } + if ( !entry.getSequenceSymbol().equals( "mAspAT" ) ) { + return false; + } + if ( !entry.getGeneName().equals( "GOT2" ) ) { + return false; + } if ( !entry.getTaxonomyIdentifier().equals( "9986" ) ) { return false; } diff --git a/forester/java/src/org/forester/ws/seqdb/EbiDbEntry.java b/forester/java/src/org/forester/ws/seqdb/EbiDbEntry.java index c8131ad..091ec83 100644 --- a/forester/java/src/org/forester/ws/seqdb/EbiDbEntry.java +++ b/forester/java/src/org/forester/ws/seqdb/EbiDbEntry.java @@ -81,15 +81,8 @@ public final class EbiDbEntry implements SequenceDatabaseEntry { e.setPA( DatabaseTools.extract( line, "PA" ) ); } else if ( line.startsWith( "DE" ) ) { - // if ( ( line.indexOf( "RecName:" ) > 0 ) && ( line.indexOf( "Full=" ) > 0 ) ) { e.setDe( DatabaseTools.extract( line, "DE" ) ); - //} } - // else if ( line.startsWith( "GN" ) ) { - // if ( ( line.indexOf( "Name=" ) > 0 ) ) { - // e.setSymbol( extract( line, "Name=", ";" ) ); - // } - // } else if ( line.startsWith( "OS" ) ) { if ( line.indexOf( "(" ) > 0 ) { e.setOs( DatabaseTools.extract( line, "OS", "(" ) ); @@ -156,12 +149,6 @@ public final class EbiDbEntry implements SequenceDatabaseEntry { return _symbol; } - private void setSymbol( final String symbol ) { - if ( _symbol == null ) { - _symbol = symbol; - } - } - @Override public boolean isEmpty() { return ( ForesterUtil.isEmpty( getAccession() ) && ForesterUtil.isEmpty( getSequenceName() ) @@ -177,4 +164,9 @@ public final class EbiDbEntry implements SequenceDatabaseEntry { public void setProvider( final String provider ) { _provider = provider; } + + @Override + public String getGeneName() { + return null; + } } diff --git a/forester/java/src/org/forester/ws/seqdb/SequenceDatabaseEntry.java b/forester/java/src/org/forester/ws/seqdb/SequenceDatabaseEntry.java index 46c5fdf..da3a5c2 100644 --- a/forester/java/src/org/forester/ws/seqdb/SequenceDatabaseEntry.java +++ b/forester/java/src/org/forester/ws/seqdb/SequenceDatabaseEntry.java @@ -27,6 +27,8 @@ package org.forester.ws.seqdb; public interface SequenceDatabaseEntry { + public String getGeneName(); + public boolean isEmpty(); public String getAccession(); diff --git a/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java b/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java index 1bb2e98..eb80141 100644 --- a/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java +++ b/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java @@ -37,9 +37,11 @@ import java.util.List; import java.util.SortedSet; import java.util.TreeSet; +import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Accession; +import org.forester.phylogeny.data.Annotation; import org.forester.phylogeny.data.Identifier; import org.forester.phylogeny.data.Sequence; import org.forester.phylogeny.data.Taxonomy; @@ -237,8 +239,16 @@ public final class SequenceDbWsTools { if ( !ForesterUtil.isEmpty( db_entry.getSequenceName() ) ) { seq.setName( db_entry.getSequenceName() ); } - if ( !ForesterUtil.isEmpty( db_entry.getSequenceSymbol() ) ) { - seq.setSymbol( db_entry.getSequenceSymbol() ); + if ( !ForesterUtil.isEmpty( db_entry.getGeneName() ) ) { + try { + seq.setSymbol( db_entry.getGeneName() ); + } + catch ( PhyloXmlDataFormatException e ) { + // Eat this exception. + } + } + if ( !ForesterUtil.isEmpty( db_entry.getGeneName() ) ) { + seq.addAnnotation( new Annotation( "GN", db_entry.getGeneName() ) ); } final Taxonomy tax = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy() : new Taxonomy(); diff --git a/forester/java/src/org/forester/ws/seqdb/UniProtEntry.java b/forester/java/src/org/forester/ws/seqdb/UniProtEntry.java index 05e2e59..c30dc4c 100644 --- a/forester/java/src/org/forester/ws/seqdb/UniProtEntry.java +++ b/forester/java/src/org/forester/ws/seqdb/UniProtEntry.java @@ -33,6 +33,8 @@ public final class UniProtEntry implements SequenceDatabaseEntry { private String _ac; private String _name; + private String _symbol; + private String _gene_name; private String _os_scientific_name; private String _tax_id; @@ -47,6 +49,7 @@ public final class UniProtEntry implements SequenceDatabaseEntry { public static SequenceDatabaseEntry createInstanceFromPlainText( final List lines ) { final UniProtEntry e = new UniProtEntry(); for( final String line : lines ) { + System.out.println( line ); if ( line.startsWith( "AC" ) ) { e.setAc( DatabaseTools.extract( line, "AC", ";" ) ); } @@ -58,6 +61,16 @@ public final class UniProtEntry implements SequenceDatabaseEntry { e.setSequenceName( DatabaseTools.extract( line, "Full=", ";" ) ); } } + else if ( line.startsWith( "DE" ) && ForesterUtil.isEmpty( e.getSequenceSymbol() ) ) { + if ( line.indexOf( "Short=" ) > 0 ) { + e.setSequenceSymbol( DatabaseTools.extract( line, "Short=", ";" ) ); + } + } + else if ( line.startsWith( "GN" ) && ForesterUtil.isEmpty( e.getGeneName() ) ) { + if ( line.indexOf( "Name=" ) > 0 ) { + e.setGeneName( DatabaseTools.extract( line, "Name=", ";" ) ); + } + } else if ( line.startsWith( "OS" ) ) { if ( line.indexOf( "(" ) > 0 ) { e.setOsScientificName( DatabaseTools.extract( line, "OS", "(" ) ); @@ -75,6 +88,10 @@ public final class UniProtEntry implements SequenceDatabaseEntry { return e; } + private void setSequenceSymbol( String symbol ) { + _symbol = symbol; + } + @Override public String getAccession() { return _ac; @@ -119,20 +136,32 @@ public final class UniProtEntry implements SequenceDatabaseEntry { } } + private void setGeneName( final String gene_name ) { + if ( _gene_name == null ) { + _gene_name = gene_name; + } + } + @Override public String getSequenceSymbol() { - return ""; + return _symbol; } @Override public boolean isEmpty() { return ( ForesterUtil.isEmpty( getAccession() ) && ForesterUtil.isEmpty( getSequenceName() ) - && ForesterUtil.isEmpty( getTaxonomyScientificName() ) - && ForesterUtil.isEmpty( getTaxonomyIdentifier() ) && ForesterUtil.isEmpty( getSequenceSymbol() ) ); + && ForesterUtil.isEmpty( getTaxonomyScientificName() ) && ForesterUtil.isEmpty( getSequenceSymbol() ) + && ForesterUtil.isEmpty( getGeneName() ) && ForesterUtil.isEmpty( getTaxonomyIdentifier() ) && ForesterUtil + .isEmpty( getSequenceSymbol() ) ); } @Override public String getProvider() { return "uniprot"; } + + @Override + public String getGeneName() { + return _gene_name; + } }