JAL-2051 better checking of retrieved / duplicated accession ids
[jalview.git] / src / jalview / gui / SequenceFetcher.java
index ab78ad3..812bf76 100755 (executable)
@@ -380,11 +380,12 @@ public class SequenceFetcher extends JPanel implements Runnable
         {
           pdbSourceAction();
         }
-        else if (currentSelection.equalsIgnoreCase("uniprot")
-                && (database.action == KeyEvent.VK_ENTER || ((debounceTrap % 2) == 0)))
-        {
-          uniprotSourceAction();
-        }
+        // else if (currentSelection.equalsIgnoreCase("uniprot")
+        // && (database.action == KeyEvent.VK_ENTER || ((debounceTrap % 2) ==
+        // 0)))
+        // {
+        // uniprotSourceAction();
+        // }
         else
         {
           otherSourceAction();
@@ -538,30 +539,31 @@ public class SequenceFetcher extends JPanel implements Runnable
     // TODO: Refactor to GUI independent code and write tests.
     // indicate if successive sources should be merged into one alignment.
     boolean addToLast = false;
-    ArrayList<String> aresultq = new ArrayList<String>(), presultTitle = new ArrayList<String>();
-    ArrayList<AlignmentI> presult = new ArrayList<AlignmentI>(), aresult = new ArrayList<AlignmentI>();
+    List<String> aresultq = new ArrayList<String>();
+    List<String> presultTitle = new ArrayList<String>();
+    List<AlignmentI> presult = new ArrayList<AlignmentI>();
+    List<AlignmentI> aresult = new ArrayList<AlignmentI>();
     Iterator<DbSourceProxy> proxies = database.getSelectedSources()
             .iterator();
     String[] qries;
-    List<String> nextfetch = Arrays.asList(qries = textArea.getText()
+    List<String> nextFetch = Arrays.asList(qries = textArea.getText()
             .split(";"));
     Iterator<String> en = Arrays.asList(new String[0]).iterator();
     int nqueries = qries.length;
 
     FeatureSettingsModelI preferredFeatureColours = null;
-    while (proxies.hasNext() && (en.hasNext() || nextfetch.size() > 0))
+    while (proxies.hasNext() && (en.hasNext() || nextFetch.size() > 0))
     {
-      if (!en.hasNext() && nextfetch.size() > 0)
+      if (!en.hasNext() && nextFetch.size() > 0)
       {
-        en = nextfetch.iterator();
-        nqueries = nextfetch.size();
+        en = nextFetch.iterator();
+        nqueries = nextFetch.size();
         // save the remaining queries in the original array
-        qries = nextfetch.toArray(new String[nqueries]);
-        nextfetch = new ArrayList<String>();
+        qries = nextFetch.toArray(new String[nqueries]);
+        nextFetch = new ArrayList<String>();
       }
 
       DbSourceProxy proxy = proxies.next();
-      boolean isAliSource = false;
       try
       {
         // update status
@@ -572,122 +574,27 @@ public class SequenceFetcher extends JPanel implements Runnable
                             Integer.valueOf(nqueries).toString(),
                             proxy.getDbName() }), Thread.currentThread()
                         .hashCode());
-        isAliSource = proxy.isAlignmentSource();
         if (proxy.getMaximumQueryCount() == 1)
         {
+          /*
+           * proxy only handles one accession id at a time
+           */
           while (en.hasNext())
           {
-            String item = en.next();
-            try
-            {
-              if (aresult != null)
-              {
-                try
-                {
-                  // give the server a chance to breathe
-                  Thread.sleep(5);
-                } catch (Exception e)
-                {
-                  //
-                }
-
-              }
-
-              AlignmentI indres = null;
-              try
-              {
-                indres = proxy.getSequenceRecords(item);
-              } catch (OutOfMemoryError oome)
-              {
-                new OOMWarning("fetching " + item + " from "
-                        + proxy.getDbName(), oome, this);
-              }
-              if (indres != null)
-              {
-                aresultq.add(item);
-                aresult.add(indres);
-              }
-              else
-              {
-                nextfetch.add(item);
-              }
-            } catch (Exception e)
+            String acc = en.next();
+            if (!fetchSingleAccession(proxy, acc, aresultq, aresult))
             {
-              Cache.log.info(
-                      "Error retrieving " + item
-                      + " from " + proxy.getDbName(), e);
-              nextfetch.add(item);
+              nextFetch.add(acc);
             }
           }
         }
         else
         {
-          StringBuffer multiacc = new StringBuffer();
-          ArrayList<String> tosend = new ArrayList<String>();
-          while (en.hasNext())
-          {
-            String nel = en.next();
-            tosend.add(nel);
-            multiacc.append(nel);
-            if (en.hasNext())
-            {
-              multiacc.append(proxy.getAccessionSeparator());
-            }
-          }
-          try
-          {
-            AlignmentI rslt;
-            SequenceI[] rs;
-            List<String> nores = new ArrayList<String>();
-            rslt = proxy.getSequenceRecords(multiacc.toString());
-            if (rslt == null || rslt.getHeight() == 0)
-            {
-              // no results - pass on all queries to next source
-              nextfetch.addAll(tosend);
-            }
-            else
-            {
-              aresultq.add(multiacc.toString());
-              aresult.add(rslt);
-
-              rs = rslt.getSequencesArray();
-              // search for each query in the dbrefs associated with each
-              // sequence
-              // returned.
-              // ones we do not find will be used to query next source (if any)
-              for (String q : tosend)
-              {
-                DBRefEntry dbr = new DBRefEntry(), found[] = null;
-                dbr.setSource(proxy.getDbSource());
-                dbr.setVersion(null);
-                String accId = proxy.getAccessionIdFromQuery(q);
-                dbr.setAccessionId(accId);
-                boolean rfound = false;
-                for (int r = 0; r < rs.length; r++)
-                {
-                  if (rs[r] != null)
-                  {
-                    found = DBRefUtils.searchRefs(rs[r].getDBRefs(), accId);
-                    if (found != null && found.length > 0)
-                    {
-                      rfound = true;
-                      rs[r] = null;
-                    }
-                  }
-                }
-                if (!rfound)
-                {
-                  nextfetch.add(q);
-                }
-              }
-            }
-          } catch (OutOfMemoryError oome)
-          {
-            new OOMWarning("fetching " + multiacc + " from "
-                    + database.getSelectedItem(), oome, this);
-          }
+          /*
+           * proxy can fetch multiple accessions at one time
+           */
+          fetchMultipleAccessions(proxy, en, aresultq, aresult, nextFetch);
         }
-
       } catch (Exception e)
       {
         showErrorMessage("Error retrieving " + textArea.getText()
@@ -700,7 +607,6 @@ public class SequenceFetcher extends JPanel implements Runnable
         e.printStackTrace();
       } catch (OutOfMemoryError e)
       {
-        // resets dialog box - so we don't use OOMwarning here.
         showErrorMessage("Out of Memory when retrieving "
                 + textArea.getText()
                 + " from "
@@ -713,6 +619,7 @@ public class SequenceFetcher extends JPanel implements Runnable
                 + " from " + database.getSelectedItem());
         e.printStackTrace();
       }
+
       // Stack results ready for opening in alignment windows
       if (aresult != null && aresult.size() > 0)
       {
@@ -724,7 +631,7 @@ public class SequenceFetcher extends JPanel implements Runnable
         }
 
         AlignmentI ar = null;
-        if (isAliSource)
+        if (proxy.isAlignmentSource())
         {
           addToLast = false;
           // new window for each result
@@ -754,7 +661,6 @@ public class SequenceFetcher extends JPanel implements Runnable
             {
               ar.append(aresult.remove(0));
             }
-            ;
           }
           addToLast = true;
           presult.add(ar);
@@ -778,14 +684,14 @@ public class SequenceFetcher extends JPanel implements Runnable
     }
     // only remove visual delay after we finished parsing.
     guiWindow.setProgressBar(null, Thread.currentThread().hashCode());
-    if (nextfetch.size() > 0)
+    if (nextFetch.size() > 0)
     {
       StringBuffer sb = new StringBuffer();
       sb.append("Didn't retrieve the following "
-              + (nextfetch.size() == 1 ? "query" : nextfetch.size()
+              + (nextFetch.size() == 1 ? "query" : nextFetch.size()
                       + " queries") + ": \n");
       int l = sb.length(), lr = 0;
-      for (String s : nextfetch)
+      for (String s : nextFetch)
       {
         if (l != sb.length())
         {
@@ -803,6 +709,161 @@ public class SequenceFetcher extends JPanel implements Runnable
   }
 
   /**
+   * Tries to fetch one or more accession ids from the database proxy
+   * 
+   * @param proxy
+   * @param accessions
+   *          the queries to fetch
+   * @param aresultq
+   *          a successful queries list to add to
+   * @param aresult
+   *          a list of retrieved alignments to add to
+   * @param nextFetch
+   *          failed queries are added to this list
+   * @throws Exception
+   */
+  void fetchMultipleAccessions(DbSourceProxy proxy,
+          Iterator<String> accessions, List<String> aresultq,
+          List<AlignmentI> aresult, List<String> nextFetch)
+          throws Exception
+  {
+    StringBuilder multiacc = new StringBuilder();
+    List<String> tosend = new ArrayList<String>();
+    while (accessions.hasNext())
+    {
+      String nel = accessions.next();
+      tosend.add(nel);
+      multiacc.append(nel);
+      if (accessions.hasNext())
+      {
+        multiacc.append(proxy.getAccessionSeparator());
+      }
+    }
+
+    try
+    {
+      String query = multiacc.toString();
+      AlignmentI rslt = proxy.getSequenceRecords(query);
+      if (rslt == null || rslt.getHeight() == 0)
+      {
+        // no results - pass on all queries to next source
+        nextFetch.addAll(tosend);
+      }
+      else
+      {
+        aresultq.add(query);
+        aresult.add(rslt);
+        if (tosend.size() > 1)
+        {
+          checkResultForQueries(rslt, tosend, nextFetch, proxy);
+        }
+      }
+    } catch (OutOfMemoryError oome)
+    {
+      new OOMWarning("fetching " + multiacc + " from "
+              + database.getSelectedItem(), oome, this);
+    }
+  }
+
+  /**
+   * Query for a single accession id via the database proxy
+   * 
+   * @param proxy
+   * @param accession
+   * @param aresultq
+   *          a list of successful queries to add to
+   * @param aresult
+   *          a list of retrieved alignments to add to
+   * @return true if the fetch was successful, else false
+   */
+  boolean fetchSingleAccession(DbSourceProxy proxy, String accession,
+          List<String> aresultq, List<AlignmentI> aresult)
+  {
+    boolean success = false;
+    try
+    {
+      if (aresult != null)
+      {
+        try
+        {
+          // give the server a chance to breathe
+          Thread.sleep(5);
+        } catch (Exception e)
+        {
+          //
+        }
+      }
+
+      AlignmentI indres = null;
+      try
+      {
+        indres = proxy.getSequenceRecords(accession);
+      } catch (OutOfMemoryError oome)
+      {
+        new OOMWarning("fetching " + accession + " from "
+                + proxy.getDbName(), oome, this);
+      }
+      if (indres != null)
+      {
+        aresultq.add(accession);
+        aresult.add(indres);
+        success = true;
+      }
+    } catch (Exception e)
+    {
+      Cache.log.info(
+              "Error retrieving " + accession
+              + " from " + proxy.getDbName(), e);
+    } finally
+    {
+      return success;
+    }
+  }
+
+  /**
+   * Checks which of the queries were successfully retrieved by searching the
+   * DBRefs of the retrieved sequences for a match. Any not found are added to
+   * the 'nextFetch' list.
+   * 
+   * @param rslt
+   * @param queries
+   * @param nextFetch
+   * @param proxy
+   */
+  void checkResultForQueries(AlignmentI rslt, List<String> queries,
+          List<String> nextFetch, DbSourceProxy proxy)
+  {
+    SequenceI[] rs = rslt.getSequencesArray();
+
+    for (String q : queries)
+    {
+      DBRefEntry[] found = null;
+      DBRefEntry dbr = new DBRefEntry();
+      dbr.setSource(proxy.getDbSource());
+      dbr.setVersion(null);
+      String accId = proxy.getAccessionIdFromQuery(q);
+      dbr.setAccessionId(accId);
+      boolean rfound = false;
+      for (int r = 0; r < rs.length; r++)
+      {
+        if (rs[r] != null)
+        {
+          found = DBRefUtils.searchRefs(rs[r].getDBRefs(), accId);
+          if (found != null && found.length > 0)
+          {
+            rfound = true;
+            break;
+          }
+        }
+      }
+      if (!rfound)
+      {
+        nextFetch.add(q);
+      }
+    }
+  }
+
+  /**
    * 
    * @return a standard title for any results retrieved using the currently
    *         selected source and settings