in progress
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Fri, 29 Apr 2011 01:19:50 +0000 (01:19 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Fri, 29 Apr 2011 01:19:50 +0000 (01:19 +0000)
forester/java/src/org/forester/archaeopteryx/tools/SequenceDataRetriver.java
forester/java/src/org/forester/ws/uniprot/EbiDbEntry.java
forester/java/src/org/forester/ws/uniprot/SequenceDatabaseEntry.java
forester/java/src/org/forester/ws/uniprot/UniProtEntry.java
forester/java/src/org/forester/ws/uniprot/UniProtWsTools.java

index 5b454e3..7aed148 100644 (file)
@@ -52,7 +52,7 @@ public final class SequenceDataRetriver implements Runnable {
     private final Phylogeny            _phy;
     private final MainFrameApplication _mf;
     private final TreePanel            _treepanel;
-    private final static boolean       DEBUG = true;
+    private final static boolean       DEBUG = false;
 
     private enum Db {
         UNKNOWN, UNIPROT, EMBL;
@@ -105,12 +105,12 @@ public final class SequenceDataRetriver implements Runnable {
                 max = 20;
             }
             final StringBuffer sb = new StringBuffer();
-            sb.append( "Not all identifiers could be resolved.\n" );
             if ( not_found.size() == 1 ) {
-                sb.append( "The following identifier was not found:\n" );
+                sb.append( "Data for the following sequence identifier was not found:\n" );
             }
             else {
-                sb.append( "The following identifiers were not found (total: " + not_found.size() + "):\n" );
+                sb.append( "Data for the following sequence identifiers was not found (total: " + not_found.size()
+                        + "):\n" );
             }
             int i = 0;
             for( final String string : not_found ) {
@@ -127,7 +127,7 @@ public final class SequenceDataRetriver implements Runnable {
             try {
                 JOptionPane.showMessageDialog( _mf,
                                                sb.toString(),
-                                               "UniProt Sequence Tool Completed",
+                                               "Sequence Tool Completed",
                                                JOptionPane.WARNING_MESSAGE );
             }
             catch ( final Exception e ) {
@@ -204,7 +204,7 @@ public final class SequenceDataRetriver implements Runnable {
                         // Ignore.
                     }
                 }
-                else if ( db == Db.EMBL ) {
+                if ( ( db == Db.EMBL ) || ( ( db == Db.UNIPROT ) && ( db_entry == null ) ) ) {
                     if ( DEBUG ) {
                         System.out.println( "embl: " + query );
                     }
@@ -214,10 +214,20 @@ public final class SequenceDataRetriver implements Runnable {
                     catch ( final FileNotFoundException e ) {
                         // Ignore.
                     }
+                    if ( ( db == Db.UNIPROT ) && ( db_entry != null ) ) {
+                        db = Db.EMBL;
+                    }
                 }
-                if ( db_entry != null ) {
+                if ( ( db_entry != null ) && !db_entry.isEmpty() ) {
                     if ( !ForesterUtil.isEmpty( db_entry.getAccession() ) ) {
-                        seq.setAccession( new Accession( db_entry.getAccession(), "uniprot" ) );
+                        String type = null;
+                        if ( db == Db.EMBL ) {
+                            type = "embl";
+                        }
+                        else if ( db == Db.UNIPROT ) {
+                            type = "uniprot";
+                        }
+                        seq.setAccession( new Accession( db_entry.getAccession(), type ) );
                     }
                     if ( !ForesterUtil.isEmpty( db_entry.getSequenceName() ) ) {
                         seq.setName( db_entry.getSequenceName() );
index 98fe38d..5f0d599 100644 (file)
@@ -27,6 +27,8 @@ package org.forester.ws.uniprot;
 
 import java.util.List;
 
+import org.forester.util.ForesterUtil;
+
 public final class EbiDbEntry implements SequenceDatabaseEntry {
 
     //http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/emb/AAR37336/
@@ -48,7 +50,7 @@ public final class EbiDbEntry implements SequenceDatabaseEntry {
         final EbiDbEntry e = new EbiDbEntry();
         for( final String line : lines ) {
             if ( line.startsWith( "PA" ) ) {
-                e.setPA( DatabaseTools.extract( line, "PA", ";" ) );
+                e.setPA( DatabaseTools.extract( line, "PA" ) );
             }
             else if ( line.startsWith( "DE" ) ) {
                 // if ( ( line.indexOf( "RecName:" ) > 0 ) && ( line.indexOf( "Full=" ) > 0 ) ) {
@@ -131,4 +133,11 @@ public final class EbiDbEntry implements SequenceDatabaseEntry {
             _symbol = symbol;
         }
     }
+
+    @Override
+    public boolean isEmpty() {
+        return ( ForesterUtil.isEmpty( getAccession() ) && ForesterUtil.isEmpty( getSequenceName() )
+                && ForesterUtil.isEmpty( getTaxonomyScientificName() )
+                && ForesterUtil.isEmpty( getTaxonomyIdentifier() ) && ForesterUtil.isEmpty( getSequenceSymbol() ) );
+    }
 }
index e62ee5f..9089737 100644 (file)
@@ -27,6 +27,8 @@ package org.forester.ws.uniprot;
 
 public interface SequenceDatabaseEntry {
 
+    public boolean isEmpty();
+
     public String getAccession();
 
     public String getSequenceName();
index 5a5568a..cd5d541 100644 (file)
@@ -27,6 +27,8 @@ package org.forester.ws.uniprot;
 
 import java.util.List;
 
+import org.forester.util.ForesterUtil;
+
 public final class UniProtEntry implements SequenceDatabaseEntry {
 
     private String _ac;
@@ -130,4 +132,11 @@ public final class UniProtEntry implements SequenceDatabaseEntry {
             _symbol = symbol;
         }
     }
+
+    @Override
+    public boolean isEmpty() {
+        return ( ForesterUtil.isEmpty( getAccession() ) && ForesterUtil.isEmpty( getSequenceName() )
+                && ForesterUtil.isEmpty( getTaxonomyScientificName() )
+                && ForesterUtil.isEmpty( getTaxonomyIdentifier() ) && ForesterUtil.isEmpty( getSequenceSymbol() ) );
+    }
 }
index 8bef111..d24b171 100644 (file)
@@ -54,7 +54,7 @@ public final class UniProtWsTools {
     // \Z => end of String
     private final static Pattern UNIPROT_AC_PATTERN = Pattern
                                                             .compile( "(?:\\A|.*[^a-zA-Z0-9])([A-Z]\\d[A-Z0-9]{3}\\d)(?:[^a-zA-Z0-9]|\\Z)" );
-    private final static boolean DEBUG              = false;
+    private final static boolean DEBUG              = true;
 
     private static String encode( final String str ) throws UnsupportedEncodingException {
         return URLEncoder.encode( str.trim(), URL_ENC );
@@ -237,6 +237,7 @@ public final class UniProtWsTools {
         String line;
         final List<String> result = new ArrayList<String>();
         while ( ( line = in.readLine() ) != null ) {
+            System.out.println( line );
             result.add( line );
             if ( result.size() > max_lines_to_return ) {
                 break;
@@ -254,7 +255,7 @@ public final class UniProtWsTools {
 
     public static SequenceDatabaseEntry obtainEmblEntry( final String query, final int max_lines_to_return )
             throws IOException {
-        final List<String> lines = queryEmblDb( "query", max_lines_to_return );
+        final List<String> lines = queryEmblDb( query, max_lines_to_return );
         return EbiDbEntry.createInstanceFromPlainText( lines );
     }
 }