inprogress
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Fri, 27 Sep 2013 22:31:21 +0000 (22:31 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Fri, 27 Sep 2013 22:31:21 +0000 (22:31 +0000)
forester/java/src/org/forester/archaeopteryx/MainFrame.java
forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java
forester/java/src/org/forester/archaeopteryx/tools/GoAnnotation.java [deleted file]
forester/java/src/org/forester/test/Test.java
forester/java/src/org/forester/ws/seqdb/EbiDbEntry.java
forester/java/src/org/forester/ws/seqdb/SequenceDatabaseEntry.java
forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java
forester/java/src/org/forester/ws/seqdb/UniProtEntry.java

index ada05c2..cc2c9eb 100644 (file)
@@ -136,7 +136,6 @@ public abstract class MainFrame extends JFrame implements ActionListener {
     JMenuItem                   _gsdi_item;
     JMenuItem                   _gsdir_item;
     JMenuItem                   _lineage_inference;
-    JMenuItem                   _function_analysis;
     // file menu:
     JMenuItem                   _open_item;
     JMenuItem                   _open_url_item;
index 774190c..248a5a4 100644 (file)
@@ -66,7 +66,6 @@ import org.forester.archaeopteryx.Options.CLADOGRAM_TYPE;
 import org.forester.archaeopteryx.Options.NODE_LABEL_DIRECTION;
 import org.forester.archaeopteryx.Options.PHYLOGENY_GRAPHICS_TYPE;
 import org.forester.archaeopteryx.tools.AncestralTaxonomyInferrer;
-import org.forester.archaeopteryx.tools.GoAnnotation;
 import org.forester.archaeopteryx.tools.InferenceManager;
 import org.forester.archaeopteryx.tools.PhyloInferenceDialog;
 import org.forester.archaeopteryx.tools.PhylogeneticInferenceOptions;
@@ -466,9 +465,6 @@ public final class MainFrameApplication extends MainFrame {
                 }
                 executeLineageInference();
             }
-            else if ( o == _function_analysis ) {
-                executeFunctionAnalysis();
-            }
             else if ( o == _obtain_detailed_taxonomic_information_jmi ) {
                 if ( isSubtreeDisplayed() ) {
                     return;
@@ -1087,6 +1083,9 @@ public final class MainFrameApplication extends MainFrame {
         customizeJMenuItem( _move_node_names_to_seq_names_jmi );
         _move_node_names_to_seq_names_jmi.setToolTipText( "To interpret node names as sequence (protein, gene) names" );
         _tools_menu.addSeparator();
+        _tools_menu.add( _obtain_seq_information_jmi = new JMenuItem( "Obtain Sequence Information" ) );
+        customizeJMenuItem( _obtain_seq_information_jmi );
+        _obtain_seq_information_jmi.setToolTipText( "To add additional sequence information" );
         _tools_menu
                 .add( _obtain_detailed_taxonomic_information_jmi = new JMenuItem( OBTAIN_DETAILED_TAXONOMIC_INFORMATION ) );
         customizeJMenuItem( _obtain_detailed_taxonomic_information_jmi );
@@ -1097,17 +1096,7 @@ public final class MainFrameApplication extends MainFrame {
         customizeJMenuItem( _obtain_detailed_taxonomic_information_deleting_jmi );
         _obtain_detailed_taxonomic_information_deleting_jmi
                 .setToolTipText( "To add additional taxonomic information, deletes nodes for which taxonomy cannot found (from UniProt Taxonomy)" );
-        _tools_menu.add( _obtain_seq_information_jmi = new JMenuItem( "Obtain Sequence Information" ) );
-        customizeJMenuItem( _obtain_seq_information_jmi );
-        _obtain_seq_information_jmi.setToolTipText( "To add additional sequence information" );
         _tools_menu.addSeparator();
-        if ( !Constants.__RELEASE ) {
-            _tools_menu.add( _function_analysis = new JMenuItem( "Add UniProtKB Annotations" ) );
-            customizeJMenuItem( _function_analysis );
-            _function_analysis
-                    .setToolTipText( "To add UniProtKB annotations for sequences with appropriate identifiers" );
-            _tools_menu.addSeparator();
-        }
         _tools_menu.add( _read_values_jmi = new JMenuItem( "Attach Vector/Expression Values" ) );
         customizeJMenuItem( _read_values_jmi );
         _read_values_jmi.setToolTipText( "To attach vector (e.g. gene expression) values to tree nodes (beta)" );
@@ -1133,16 +1122,6 @@ public final class MainFrameApplication extends MainFrame {
         exit();
     }
 
-    void executeFunctionAnalysis() {
-        if ( ( _mainpanel.getCurrentPhylogeny() == null ) || ( _mainpanel.getCurrentPhylogeny().isEmpty() ) ) {
-            return;
-        }
-        final GoAnnotation a = new GoAnnotation( this,
-                                                 _mainpanel.getCurrentTreePanel(),
-                                                 _mainpanel.getCurrentPhylogeny() );
-        new Thread( a ).start();
-    }
-
     void executeLineageInference() {
         if ( ( _mainpanel.getCurrentPhylogeny() == null ) || ( _mainpanel.getCurrentPhylogeny().isEmpty() ) ) {
             return;
diff --git a/forester/java/src/org/forester/archaeopteryx/tools/GoAnnotation.java b/forester/java/src/org/forester/archaeopteryx/tools/GoAnnotation.java
deleted file mode 100644 (file)
index 702f30c..0000000
+++ /dev/null
@@ -1,137 +0,0 @@
-// $Id:
-// forester -- software libraries and applications
-// for genomics and evolutionary biology research.
-//
-// Copyright (C) 2010 Christian M Zmasek
-// Copyright (C) 2010 Sanford-Burnham Medical Research Institute
-// All rights reserved
-//
-// This library is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 2.1 of the License, or (at your option) any later version.
-//
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// Lesser General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License along with this library; if not, write to the Free Software
-// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
-//
-// Contact: phylosoft @ gmail . com
-// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
-
-package org.forester.archaeopteryx.tools;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.net.HttpURLConnection;
-import java.net.URL;
-import java.util.Arrays;
-import java.util.List;
-
-import org.forester.archaeopteryx.MainFrameApplication;
-import org.forester.archaeopteryx.TreePanel;
-import org.forester.phylogeny.Phylogeny;
-import org.forester.phylogeny.PhylogenyNode;
-import org.forester.phylogeny.data.Accession;
-import org.forester.phylogeny.data.Annotation;
-import org.forester.phylogeny.data.Sequence;
-import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
-import org.forester.util.ForesterUtil;
-
-public class GoAnnotation extends RunnableProcess {
-
-    private static final String        SYMBOL   = "Symbol";
-    private static final String        ASPECT   = "Aspect";
-    private static final String        DB       = "DB";
-    private static final String        EVIDENCE = "Evidence";
-    private static final String        GO_NAME  = "GO Name";
-    private static final String        GO_ID    = "GO ID";
-    private final Phylogeny            _phy;
-    private final MainFrameApplication _mf;
-    private final TreePanel            _treepanel;
-
-    public GoAnnotation( final MainFrameApplication mf, final TreePanel treepanel, final Phylogeny phy ) {
-        _phy = phy;
-        _mf = mf;
-        _treepanel = treepanel;
-    }
-
-    private void annotate() {
-        start( _mf, "GO annotate" );
-        for( final PhylogenyNodeIterator iter = _phy.iteratorPostorder(); iter.hasNext(); ) {
-            final PhylogenyNode node = iter.next();
-            if ( ( node.getNodeData().getSequences() != null ) && !node.getNodeData().getSequences().isEmpty() ) {
-                for( final Sequence seq : node.getNodeData().getSequences() ) {
-                    if ( ( ( seq.getAccession() != null ) && !ForesterUtil.isEmpty( seq.getAccession().getValue() ) && ( seq
-                            .getAnnotations() == null ) ) || seq.getAnnotations().isEmpty() ) {
-                        final Accession acc = seq.getAccession();
-                        try {
-                            final URL url = new URL( "http://www.ebi.ac.uk/QuickGO/GAnnotation?protein="
-                                    + acc.getValue() + "&format=tsv" );
-                            final HttpURLConnection url_connection = ( HttpURLConnection ) url.openConnection();
-                            final BufferedReader br = new BufferedReader( new InputStreamReader( url_connection.getInputStream() ) );
-                            final List<String> columns = Arrays.asList( br.readLine().split( "\t" ) );
-                            System.out.println( columns );
-                            final int db_index = columns.indexOf( DB );
-                            final int goid_index = columns.indexOf( GO_ID );
-                            final int name_index = columns.indexOf( GO_NAME );
-                            final int evidence_index = columns.indexOf( EVIDENCE );
-                            final int taxon_index = columns.indexOf( "Taxon" );
-                            final int qualifier_index = columns.indexOf( "Qualifier" );
-                            final int reference_index = columns.indexOf( "Reference" );
-                            final int symbol_index = columns.indexOf( SYMBOL );
-                            final int splice_index = columns.indexOf( "Splice" );
-                            final int with_index = columns.indexOf( "With" );
-                            final int aspect_index = columns.indexOf( ASPECT );
-                            final int source_index = columns.indexOf( "Source" );
-                            String line;
-                            while ( ( line = br.readLine() ) != null ) {
-                                final String[] fields = line.split( "\t" );
-                                final Annotation a = new Annotation( fields[ goid_index ] );
-                                a.setDesc( name_index >= 0 ? fields[ name_index ] : "" );
-                                a.setSource( db_index >= 0 ? fields[ db_index ] : "" );
-                                a.setEvidence( evidence_index >= 0 ? fields[ evidence_index ] : "" );
-                                a.setType( aspect_index >= 0 ? fields[ aspect_index ] : "" );
-                                seq.addAnnotation( a );
-                                if ( ForesterUtil.isEmpty( seq.getSymbol() ) && ( symbol_index >= 0 )
-                                        && !ForesterUtil.isEmpty( fields[ symbol_index ] ) ) {
-                                    seq.setSymbol( fields[ symbol_index ] );
-                                }
-                                System.out.println( DB + ": " + fields[ db_index ] );
-                                System.out.println( GO_ID + ": " + fields[ goid_index ] );
-                                System.out.println( GO_NAME + ": " + fields[ name_index ] );
-                                System.out.println( EVIDENCE + ": " + fields[ evidence_index ] );
-                                System.out.println( " taxon" + ": " + fields[ taxon_index ] );
-                                System.out.println( " qualifier" + ": " + fields[ qualifier_index ] );
-                                System.out.println( " reference" + ": " + fields[ reference_index ] );
-                                System.out.println( SYMBOL + ": " + fields[ symbol_index ] );
-                                System.out.println( " splice" + ": " + fields[ splice_index ] );
-                                System.out.println( " with" + ": " + fields[ with_index ] );
-                                System.out.println( ASPECT + ": " + fields[ aspect_index ] );
-                                System.out.println( " source" + ": " + fields[ source_index ] );
-                            }
-                            br.close();
-                        }
-                        catch ( final IOException e ) {
-                            // TODO Auto-generated catch block
-                            e.printStackTrace();
-                        }
-                    }
-                }
-            }
-        }
-        end( _mf );
-        _treepanel.repaint();
-        _treepanel.setEdited( true );
-    }
-
-    @Override
-    public void run() {
-        annotate();
-    }
-}
index 796acd0..609b7df 100644 (file)
@@ -10818,6 +10818,12 @@ public final class Test {
             if ( !entry.getSequenceName().equals( "Aspartate aminotransferase, mitochondrial" ) ) {
                 return false;
             }
+            if ( !entry.getSequenceSymbol().equals( "mAspAT" ) ) {
+                return false;
+            }
+            if ( !entry.getGeneName().equals( "GOT2" ) ) {
+                return false;
+            }
             if ( !entry.getTaxonomyIdentifier().equals( "9986" ) ) {
                 return false;
             }
index c8131ad..091ec83 100644 (file)
@@ -81,15 +81,8 @@ public final class EbiDbEntry implements SequenceDatabaseEntry {
                 e.setPA( DatabaseTools.extract( line, "PA" ) );
             }
             else if ( line.startsWith( "DE" ) ) {
-                // if ( ( line.indexOf( "RecName:" ) > 0 ) && ( line.indexOf( "Full=" ) > 0 ) ) {
                 e.setDe( DatabaseTools.extract( line, "DE" ) );
-                //}
             }
-            //  else if ( line.startsWith( "GN" ) ) {
-            //      if ( ( line.indexOf( "Name=" ) > 0 ) ) {
-            //          e.setSymbol( extract( line, "Name=", ";" ) );
-            //      }
-            //  }
             else if ( line.startsWith( "OS" ) ) {
                 if ( line.indexOf( "(" ) > 0 ) {
                     e.setOs( DatabaseTools.extract( line, "OS", "(" ) );
@@ -156,12 +149,6 @@ public final class EbiDbEntry implements SequenceDatabaseEntry {
         return _symbol;
     }
 
-    private void setSymbol( final String symbol ) {
-        if ( _symbol == null ) {
-            _symbol = symbol;
-        }
-    }
-
     @Override
     public boolean isEmpty() {
         return ( ForesterUtil.isEmpty( getAccession() ) && ForesterUtil.isEmpty( getSequenceName() )
@@ -177,4 +164,9 @@ public final class EbiDbEntry implements SequenceDatabaseEntry {
     public void setProvider( final String provider ) {
         _provider = provider;
     }
+
+    @Override
+    public String getGeneName() {
+        return null;
+    }
 }
index 46c5fdf..da3a5c2 100644 (file)
@@ -27,6 +27,8 @@ package org.forester.ws.seqdb;
 
 public interface SequenceDatabaseEntry {
 
+    public String getGeneName();
+
     public boolean isEmpty();
 
     public String getAccession();
index 1bb2e98..eb80141 100644 (file)
@@ -37,9 +37,11 @@ import java.util.List;
 import java.util.SortedSet;
 import java.util.TreeSet;
 
+import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
 import org.forester.phylogeny.Phylogeny;
 import org.forester.phylogeny.PhylogenyNode;
 import org.forester.phylogeny.data.Accession;
+import org.forester.phylogeny.data.Annotation;
 import org.forester.phylogeny.data.Identifier;
 import org.forester.phylogeny.data.Sequence;
 import org.forester.phylogeny.data.Taxonomy;
@@ -237,8 +239,16 @@ public final class SequenceDbWsTools {
                 if ( !ForesterUtil.isEmpty( db_entry.getSequenceName() ) ) {
                     seq.setName( db_entry.getSequenceName() );
                 }
-                if ( !ForesterUtil.isEmpty( db_entry.getSequenceSymbol() ) ) {
-                    seq.setSymbol( db_entry.getSequenceSymbol() );
+                if ( !ForesterUtil.isEmpty( db_entry.getGeneName() ) ) {
+                    try {
+                        seq.setSymbol( db_entry.getGeneName() );
+                    }
+                    catch ( PhyloXmlDataFormatException e ) {
+                        // Eat this exception.
+                    }
+                }
+                if ( !ForesterUtil.isEmpty( db_entry.getGeneName() ) ) {
+                    seq.addAnnotation( new Annotation( "GN", db_entry.getGeneName() ) );
                 }
                 final Taxonomy tax = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy()
                         : new Taxonomy();
index 05e2e59..c30dc4c 100644 (file)
@@ -33,6 +33,8 @@ public final class UniProtEntry implements SequenceDatabaseEntry {
 
     private String _ac;
     private String _name;
+    private String _symbol;
+    private String _gene_name;
     private String _os_scientific_name;
     private String _tax_id;
 
@@ -47,6 +49,7 @@ public final class UniProtEntry implements SequenceDatabaseEntry {
     public static SequenceDatabaseEntry createInstanceFromPlainText( final List<String> lines ) {
         final UniProtEntry e = new UniProtEntry();
         for( final String line : lines ) {
+            System.out.println( line );
             if ( line.startsWith( "AC" ) ) {
                 e.setAc( DatabaseTools.extract( line, "AC", ";" ) );
             }
@@ -58,6 +61,16 @@ public final class UniProtEntry implements SequenceDatabaseEntry {
                     e.setSequenceName( DatabaseTools.extract( line, "Full=", ";" ) );
                 }
             }
+            else if ( line.startsWith( "DE" ) && ForesterUtil.isEmpty( e.getSequenceSymbol() ) ) {
+                if ( line.indexOf( "Short=" ) > 0 ) {
+                    e.setSequenceSymbol( DatabaseTools.extract( line, "Short=", ";" ) );
+                }
+            }
+            else if ( line.startsWith( "GN" ) && ForesterUtil.isEmpty( e.getGeneName() ) ) {
+                if ( line.indexOf( "Name=" ) > 0 ) {
+                    e.setGeneName( DatabaseTools.extract( line, "Name=", ";" ) );
+                }
+            }
             else if ( line.startsWith( "OS" ) ) {
                 if ( line.indexOf( "(" ) > 0 ) {
                     e.setOsScientificName( DatabaseTools.extract( line, "OS", "(" ) );
@@ -75,6 +88,10 @@ public final class UniProtEntry implements SequenceDatabaseEntry {
         return e;
     }
 
+    private void setSequenceSymbol( String symbol ) {
+        _symbol = symbol;
+    }
+
     @Override
     public String getAccession() {
         return _ac;
@@ -119,20 +136,32 @@ public final class UniProtEntry implements SequenceDatabaseEntry {
         }
     }
 
+    private void setGeneName( final String gene_name ) {
+        if ( _gene_name == null ) {
+            _gene_name = gene_name;
+        }
+    }
+
     @Override
     public String getSequenceSymbol() {
-        return "";
+        return _symbol;
     }
 
     @Override
     public boolean isEmpty() {
         return ( ForesterUtil.isEmpty( getAccession() ) && ForesterUtil.isEmpty( getSequenceName() )
-                && ForesterUtil.isEmpty( getTaxonomyScientificName() )
-                && ForesterUtil.isEmpty( getTaxonomyIdentifier() ) && ForesterUtil.isEmpty( getSequenceSymbol() ) );
+                && ForesterUtil.isEmpty( getTaxonomyScientificName() ) && ForesterUtil.isEmpty( getSequenceSymbol() )
+                && ForesterUtil.isEmpty( getGeneName() ) && ForesterUtil.isEmpty( getTaxonomyIdentifier() ) && ForesterUtil
+                .isEmpty( getSequenceSymbol() ) );
     }
 
     @Override
     public String getProvider() {
         return "uniprot";
     }
+
+    @Override
+    public String getGeneName() {
+        return _gene_name;
+    }
 }