JAL-2213 removed PDB from db sources list; also code tidy (typed
authorgmungoc <g.m.carstairs@dundee.ac.uk>
Thu, 29 Sep 2016 12:18:42 +0000 (13:18 +0100)
committergmungoc <g.m.carstairs@dundee.ac.uk>
Thu, 29 Sep 2016 12:18:42 +0000 (13:18 +0100)
collections,

src/jalview/ws/DBRefFetcher.java

index ca403c5..748cb72 100644 (file)
@@ -29,15 +29,19 @@ import jalview.datamodel.Mapping;
 import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.SequenceI;
 import jalview.gui.CutAndPasteTransfer;
+import jalview.gui.DasSourceBrowser;
 import jalview.gui.Desktop;
 import jalview.gui.FeatureSettings;
 import jalview.gui.IProgressIndicator;
 import jalview.gui.OOMWarning;
+import jalview.util.DBRefUtils;
 import jalview.util.MessageManager;
 import jalview.ws.dbsources.das.api.jalviewSourceI;
+import jalview.ws.dbsources.das.datamodel.DasSequenceSource;
 import jalview.ws.seqfetcher.DbSourceProxy;
 
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Enumeration;
 import java.util.Hashtable;
 import java.util.List;
@@ -45,6 +49,7 @@ import java.util.StringTokenizer;
 import java.util.Vector;
 
 import uk.ac.ebi.picr.model.UPEntry;
+import uk.ac.ebi.www.picr.AccessionMappingService.AccessionMapperServiceLocator;
 
 /**
  * Implements a runnable for validating a sequence against external databases
@@ -60,8 +65,6 @@ public class DBRefFetcher implements Runnable
     void finished();
   }
 
-  private List<FetchFinishedListenerI> listeners;
-
   SequenceI[] dataset;
 
   IProgressIndicator progressWindow;
@@ -77,17 +80,19 @@ public class DBRefFetcher implements Runnable
    */
   uk.ac.ebi.www.picr.AccessionMappingService.AccessionMapperInterface picrClient = null;
 
-  // /This will be a collection of Vectors of sequenceI refs.
+  // This will be a collection of Vectors of sequenceI refs.
   // The key will be the seq name or accession id of the seq
-  Hashtable seqRefs;
+  Hashtable<String, Vector<SequenceI>> seqRefs;
 
   DbSourceProxy[] dbSources;
 
   SequenceFetcher sfetcher;
 
+  private List<FetchFinishedListenerI> listeners;
+
   private SequenceI[] alseqs;
 
-  /**
+  /*
    * when true - retrieved sequences will be trimmed to cover longest derived
    * alignment sequence
    */
@@ -137,59 +142,74 @@ public class DBRefFetcher implements Runnable
     trimDsSeqs = Cache.getDefault("TRIM_FETCHED_DATASET_SEQS", true);
     if (sources == null)
     {
-      // af.featureSettings_actionPerformed(null);
-      String[] defdb = null, otherdb = sfetcher
-              .getDbInstances(jalview.ws.dbsources.das.datamodel.DasSequenceSource.class);
-      List<DbSourceProxy> selsources = new ArrayList<DbSourceProxy>();
-      Vector<jalviewSourceI> dasselsrc = (featureSettings != null) ? featureSettings
-              .getSelectedSources() : new jalview.gui.DasSourceBrowser()
-              .getSelectedSources();
-      Enumeration<jalviewSourceI> en = dasselsrc.elements();
-      while (en.hasMoreElements())
+      setDatabaseSources(featureSettings, isNucleotide);
+    }
+    else
+    {
+      // we assume the caller knows what they're doing and ensured that all the
+      // db source names are valid
+      dbSources = sources;
+    }
+  }
+
+  /**
+   * Helper method to configure the list of database sources to query
+   * 
+   * @param featureSettings
+   * @param forNucleotide
+   */
+  void setDatabaseSources(FeatureSettings featureSettings,
+          boolean forNucleotide)
+  {
+    // af.featureSettings_actionPerformed(null);
+    String[] defdb = null;
+    List<DbSourceProxy> selsources = new ArrayList<DbSourceProxy>();
+    Vector<jalviewSourceI> dasselsrc = (featureSettings != null) ? featureSettings
+            .getSelectedSources() : new DasSourceBrowser()
+            .getSelectedSources();
+
+    for (jalviewSourceI src : dasselsrc)
+    {
+      List<DbSourceProxy> sp = src.getSequenceSourceProxies();
+      if (sp != null)
       {
-        jalviewSourceI src = en.nextElement();
-        List<DbSourceProxy> sp = src.getSequenceSourceProxies();
-        if (sp != null)
+        selsources.addAll(sp);
+        if (sp.size() > 1)
         {
-          selsources.addAll(sp);
-          if (sp.size() > 1)
-          {
-            Cache.log.debug("Added many Db Sources for :" + src.getTitle());
-          }
+          Cache.log.debug("Added many Db Sources for :" + src.getTitle());
         }
       }
-      // select appropriate databases based on alignFrame context.
-      if (isNucleotide)
-      {
-        defdb = DBRefSource.DNACODINGDBS;
-      }
-      else
-      {
-        defdb = DBRefSource.PROTEINDBS;
-      }
-      List<DbSourceProxy> srces = new ArrayList<DbSourceProxy>();
-      for (String ddb : defdb)
+    }
+    // select appropriate databases based on alignFrame context.
+    if (forNucleotide)
+    {
+      defdb = DBRefSource.DNACODINGDBS;
+    }
+    else
+    {
+      defdb = DBRefSource.PROTEINDBS;
+    }
+    List<DbSourceProxy> srces = new ArrayList<DbSourceProxy>();
+    for (String ddb : defdb)
+    {
+      List<DbSourceProxy> srcesfordb = sfetcher.getSourceProxy(ddb);
+      if (srcesfordb != null)
       {
-        List<DbSourceProxy> srcesfordb = sfetcher.getSourceProxy(ddb);
-        if (srcesfordb != null)
+        for (DbSourceProxy src : srcesfordb)
         {
-          srces.addAll(srcesfordb);
+          if (!srces.contains(src))
+          {
+            srces.addAll(srcesfordb);
+          }
         }
       }
-      // append the PDB data source, since it is 'special', catering for both
-      // nucleotide and protein
-      srces.addAll(sfetcher.getSourceProxy(DBRefSource.PDB));
-
-      // append the selected sequence sources to the default dbs
-      srces.addAll(selsources);
-      dbSources = srces.toArray(new DbSourceProxy[0]);
-    }
-    else
-    {
-      // we assume the caller knows what they're doing and ensured that all the
-      // db source names are valid
-      dbSources = sources;
     }
+    // append the PDB data source, since it is 'special', catering for both
+    // nucleotide and protein
+    // srces.addAll(sfetcher.getSourceProxy(DBRefSource.PDB));
+
+    srces.addAll(selsources);
+    dbSources = srces.toArray(new DbSourceProxy[srces.size()]);
   }
 
   /**
@@ -224,7 +244,7 @@ public class DBRefFetcher implements Runnable
     }
     // append additional sources
     DbSourceProxy[] otherdb = sfetcher
-            .getDbSourceProxyInstances(jalview.ws.dbsources.das.datamodel.DasSequenceSource.class);
+            .getDbSourceProxyInstances(DasSequenceSource.class);
     if (otherdb != null && otherdb.length > 0)
     {
       DbSourceProxy[] newsrc = new DbSourceProxy[dbSources.length
@@ -243,6 +263,9 @@ public class DBRefFetcher implements Runnable
    */
   public void fetchDBRefs(boolean waitTillFinished)
   {
+    // TODO can we not simply write
+    // if (waitTillFinished) { run(); } else { new Thread(this).start(); }
+
     Thread thread = new Thread(this);
     thread.start();
     running = true;
@@ -274,10 +297,10 @@ public class DBRefFetcher implements Runnable
   {
     key = key.toUpperCase();
 
-    Vector seqs;
+    Vector<SequenceI> seqs;
     if (seqRefs.containsKey(key))
     {
-      seqs = (Vector) seqRefs.get(key);
+      seqs = seqRefs.get(key);
 
       if (seqs != null && !seqs.contains(seq))
       {
@@ -285,14 +308,14 @@ public class DBRefFetcher implements Runnable
       }
       else if (seqs == null)
       {
-        seqs = new Vector();
+        seqs = new Vector<SequenceI>();
         seqs.addElement(seq);
       }
 
     }
     else
     {
-      seqs = new Vector();
+      seqs = new Vector<SequenceI>();
       seqs.addElement(seq);
     }
 
@@ -323,7 +346,7 @@ public class DBRefFetcher implements Runnable
     {
       if (Cache.getDefault("DBREFFETCH_USEPICR", false))
       {
-        picrClient = new uk.ac.ebi.www.picr.AccessionMappingService.AccessionMapperServiceLocator()
+        picrClient = new AccessionMapperServiceLocator()
                 .getAccessionMapperPort();
       }
     } catch (Exception e)
@@ -331,149 +354,144 @@ public class DBRefFetcher implements Runnable
       System.err.println("Couldn't locate PICR service instance.\n");
       e.printStackTrace();
     }
+
+    Vector<SequenceI> sdataset = new Vector<SequenceI>(
+            Arrays.asList(dataset));
+
     int db = 0;
-    Vector sdataset = new Vector();
-    for (int s = 0; s < dataset.length; s++)
-    {
-      sdataset.addElement(dataset[s]);
-    }
     while (sdataset.size() > 0 && db < dbSources.length)
     {
-      int maxqlen = 1; // default number of queries made to at one time
-      System.err.println("Verifying against " + dbSources[db].getDbName());
-      boolean dn = false;
+      int maxqlen = 1; // default number of queries made at one time
+      System.out.println("Verifying against " + dbSources[db].getDbName());
 
       // iterate through db for each remaining un-verified sequence
       SequenceI[] currSeqs = new SequenceI[sdataset.size()];
       sdataset.copyInto(currSeqs);// seqs that are to be validated against
       // dbSources[db]
-      Vector queries = new Vector(); // generated queries curSeq
-      seqRefs = new Hashtable();
+      Vector<String> queries = new Vector<String>(); // generated queries curSeq
+      seqRefs = new Hashtable<String, Vector<SequenceI>>();
 
       int seqIndex = 0;
 
-      jalview.ws.seqfetcher.DbSourceProxy dbsource = dbSources[db];
+      DbSourceProxy dbsource = dbSources[db];
+      // for moment, we dumbly iterate over all retrieval sources for a
+      // particular database
+      // TODO: introduce multithread multisource queries and logic to remove a
+      // query from other sources if any source for a database returns a
+      // record
+      maxqlen = dbsource.getMaximumQueryCount();
+
+      while (queries.size() > 0 || seqIndex < currSeqs.length)
       {
-        // for moment, we dumbly iterate over all retrieval sources for a
-        // particular database
-        // TODO: introduce multithread multisource queries and logic to remove a
-        // query from other sources if any source for a database returns a
-        // record
-        maxqlen = dbsource.getMaximumQueryCount();
-
-        while (queries.size() > 0 || seqIndex < currSeqs.length)
+        if (queries.size() > 0)
         {
-          if (queries.size() > 0)
-          {
-            // Still queries to make for current seqIndex
-            StringBuffer queryString = new StringBuffer("");
-            int numq = 0, nqSize = (maxqlen > queries.size()) ? queries
-                    .size() : maxqlen;
+          // Still queries to make for current seqIndex
+          StringBuffer queryString = new StringBuffer("");
+          int numq = 0;
+          int nqSize = (maxqlen > queries.size()) ? queries
+                  .size() : maxqlen;
 
-            while (queries.size() > 0 && numq < nqSize)
-            {
-              String query = (String) queries.elementAt(0);
-              if (dbsource.isValidReference(query))
-              {
-                queryString.append((numq == 0) ? "" : dbsource
-                        .getAccessionSeparator());
-                queryString.append(query);
-                numq++;
-              }
-              // remove the extracted query string
-              queries.removeElementAt(0);
-            }
-            // make the queries and process the response
-            AlignmentI retrieved = null;
-            try
-            {
-              if (jalview.bin.Cache.log.isDebugEnabled())
-              {
-                jalview.bin.Cache.log.debug("Querying "
-                        + dbsource.getDbName() + " with : '"
-                        + queryString.toString() + "'");
-              }
-              retrieved = dbsource.getSequenceRecords(queryString
-                      .toString());
-            } catch (Exception ex)
-            {
-              ex.printStackTrace();
-            } catch (OutOfMemoryError err)
+          while (queries.size() > 0 && numq < nqSize)
+          {
+            String query = queries.elementAt(0);
+            if (dbsource.isValidReference(query))
             {
-              new OOMWarning("retrieving database references ("
-                      + queryString.toString() + ")", err);
+              queryString.append((numq == 0) ? "" : dbsource
+                      .getAccessionSeparator());
+              queryString.append(query);
+              numq++;
             }
-            if (retrieved != null)
+            // remove the extracted query string
+            queries.removeElementAt(0);
+          }
+          // make the queries and process the response
+          AlignmentI retrieved = null;
+          try
+          {
+            if (Cache.log.isDebugEnabled())
             {
-              transferReferences(sdataset, dbsource.getDbSource(),
-                      retrieved, trimDsSeqs);
+              Cache.log.debug("Querying " + dbsource.getDbName()
+                      + " with : '" + queryString.toString() + "'");
             }
+            retrieved = dbsource.getSequenceRecords(queryString.toString());
+          } catch (Exception ex)
+          {
+            ex.printStackTrace();
+          } catch (OutOfMemoryError err)
+          {
+            new OOMWarning("retrieving database references ("
+                    + queryString.toString() + ")", err);
+          }
+          if (retrieved != null)
+          {
+            transferReferences(sdataset, dbsource.getDbSource(), retrieved,
+                    trimDsSeqs);
           }
-          else
+        }
+        else
+        {
+          // make some more strings for use as queries
+          for (int i = 0; (seqIndex < dataset.length) && (i < 50); seqIndex++, i++)
           {
-            // make some more strings for use as queries
-            for (int i = 0; (seqIndex < dataset.length) && (i < 50); seqIndex++, i++)
+            SequenceI sequence = dataset[seqIndex];
+            DBRefEntry[] uprefs = DBRefUtils.selectRefs(
+                    sequence.getDBRefs(),
+                    new String[] { dbsource.getDbSource() }); // jalview.datamodel.DBRefSource.UNIPROT
+            // });
+            // check for existing dbrefs to use
+            if (uprefs != null && uprefs.length > 0)
             {
-              SequenceI sequence = dataset[seqIndex];
-              DBRefEntry[] uprefs = jalview.util.DBRefUtils.selectRefs(
-                      sequence.getDBRefs(),
-                      new String[] { dbsource.getDbSource() }); // jalview.datamodel.DBRefSource.UNIPROT
-              // });
-              // check for existing dbrefs to use
-              if (uprefs != null && uprefs.length > 0)
+              for (int j = 0; j < uprefs.length; j++)
               {
-                for (int j = 0; j < uprefs.length; j++)
-                {
-                  addSeqId(sequence, uprefs[j].getAccessionId());
-                  queries.addElement(uprefs[j].getAccessionId()
-                          .toUpperCase());
-                }
+                addSeqId(sequence, uprefs[j].getAccessionId());
+                queries.addElement(uprefs[j].getAccessionId().toUpperCase());
               }
-              else
+            }
+            else
+            {
+              // generate queries from sequence ID string
+              StringTokenizer st = new StringTokenizer(sequence.getName(),
+                      "|");
+              while (st.hasMoreTokens())
               {
-                // generate queries from sequence ID string
-                StringTokenizer st = new StringTokenizer(
-                        sequence.getName(), "|");
-                while (st.hasMoreTokens())
+                String token = st.nextToken();
+                UPEntry[] presp = null;
+                if (picrClient != null)
                 {
-                  String token = st.nextToken();
-                  UPEntry[] presp = null;
-                  if (picrClient != null)
+                  // resolve the string against PICR to recover valid IDs
+                  try
                   {
-                    // resolve the string against PICR to recover valid IDs
-                    try
-                    {
-                      presp = picrClient.getUPIForAccession(token, null,
-                              picrClient.getMappedDatabaseNames(), null,
-                              true);
-                    } catch (Exception e)
-                    {
-                      System.err.println("Exception with Picr for '"
-                              + token + "'\n");
-                      e.printStackTrace();
-                    }
-                  }
-                  if (presp != null && presp.length > 0)
+                    presp = picrClient
+                            .getUPIForAccession(token, null,
+                                    picrClient.getMappedDatabaseNames(),
+                                    null, true);
+                  } catch (Exception e)
                   {
-                    for (int id = 0; id < presp.length; id++)
-                    {
-                      // construct sequences from response if sequences are
-                      // present, and do a transferReferences
-                      // otherwise transfer non sequence x-references directly.
-                    }
-                    System.out
-                            .println("Validated ID against PICR... (for what its worth):"
-                                    + token);
-                    addSeqId(sequence, token);
-                    queries.addElement(token.toUpperCase());
+                    System.err.println("Exception with Picr for '" + token
+                            + "'\n");
+                    e.printStackTrace();
                   }
-                  else
+                }
+                if (presp != null && presp.length > 0)
+                {
+                  for (int id = 0; id < presp.length; id++)
                   {
-                    // if ()
-                    // System.out.println("Not querying source with token="+token+"\n");
-                    addSeqId(sequence, token);
-                    queries.addElement(token.toUpperCase());
+                    // construct sequences from response if sequences are
+                    // present, and do a transferReferences
+                    // otherwise transfer non sequence x-references directly.
                   }
+                  System.out
+                          .println("Validated ID against PICR... (for what its worth):"
+                                  + token);
+                  addSeqId(sequence, token);
+                  queries.addElement(token.toUpperCase());
+                }
+                else
+                {
+                  // if ()
+                  // System.out.println("Not querying source with token="+token+"\n");
+                  addSeqId(sequence, token);
+                  queries.addElement(token.toUpperCase());
                 }
               }
             }
@@ -482,7 +500,7 @@ public class DBRefFetcher implements Runnable
       }
       // advance to next database
       db++;
-    } // all databases have been queries.
+    } // all databases have been queried
     if (sbuffer.length() > 0)
     {
       output.setText(MessageManager
@@ -516,11 +534,10 @@ public class DBRefFetcher implements Runnable
    * @param trimDatasetSeqs
    * 
    */
-  void transferReferences(Vector sdataset, String dbSource,
-          AlignmentI retrievedAl, boolean trimDatasetSeqs) // File
-  // file)
+  void transferReferences(Vector<SequenceI> sdataset, String dbSource,
+          AlignmentI retrievedAl, boolean trimDatasetSeqs)
   {
-    System.out.println("trimming ? " + trimDatasetSeqs);
+    // System.out.println("trimming ? " + trimDatasetSeqs);
     if (retrievedAl == null || retrievedAl.getHeight() == 0)
     {
       return;
@@ -529,19 +546,13 @@ public class DBRefFetcher implements Runnable
             .getSequencesArray());
     SequenceI sequence = null;
     boolean transferred = false;
-    StringBuffer messages = new StringBuffer();
+    StringBuilder messages = new StringBuilder(64);
 
-    // Vector entries = new Uniprot().getUniprotEntries(file);
-
-    int i, iSize = retrieved.length; // entries == null ? 0 : entries.size();
-    // UniprotEntry entry;
-    for (i = 0; i < iSize; i++)
+    for (SequenceI entry : retrieved)
     {
-      SequenceI entry = retrieved[i]; // (UniprotEntry) entries.elementAt(i);
-
       // Work out which sequences this sequence matches,
       // taking into account all accessionIds and names in the file
-      Vector sequenceMatches = new Vector();
+      Vector<SequenceI> sequenceMatches = new Vector<SequenceI>();
       // look for corresponding accession ids
       DBRefEntry[] entryRefs = jalview.util.DBRefUtils.selectRefs(
               entry.getDBRefs(), new String[] { dbSource });
@@ -554,14 +565,14 @@ public class DBRefFetcher implements Runnable
       }
       for (int j = 0; j < entryRefs.length; j++)
       {
-        String accessionId = entryRefs[j].getAccessionId(); // .getAccession().elementAt(j).toString();
+        String accessionId = entryRefs[j].getAccessionId();
         // match up on accessionId
         if (seqRefs.containsKey(accessionId.toUpperCase()))
         {
-          Vector seqs = (Vector) seqRefs.get(accessionId);
+          Vector<SequenceI> seqs = seqRefs.get(accessionId);
           for (int jj = 0; jj < seqs.size(); jj++)
           {
-            sequence = (SequenceI) seqs.elementAt(jj);
+            sequence = seqs.elementAt(jj);
             if (!sequenceMatches.contains(sequence))
             {
               sequenceMatches.addElement(sequence);
@@ -569,17 +580,17 @@ public class DBRefFetcher implements Runnable
           }
         }
       }
-      if (sequenceMatches.size() == 0)
+      if (sequenceMatches.isEmpty())
       {
         // failed to match directly on accessionId==query so just compare all
         // sequences to entry
-        Enumeration e = seqRefs.keys();
+        Enumeration<String> e = seqRefs.keys();
         while (e.hasMoreElements())
         {
-          Vector sqs = (Vector) seqRefs.get(e.nextElement());
+          Vector<SequenceI> sqs = seqRefs.get(e.nextElement());
           if (sqs != null && sqs.size() > 0)
           {
-            Enumeration sqe = sqs.elements();
+            Enumeration<SequenceI> sqe = sqs.elements();
             while (sqe.hasMoreElements())
             {
               sequenceMatches.addElement(sqe.nextElement());
@@ -606,7 +617,7 @@ public class DBRefFetcher implements Runnable
       String entrySeq = entry.getSequenceAsString().toUpperCase();
       for (int m = 0; m < sequenceMatches.size(); m++)
       {
-        sequence = (SequenceI) sequenceMatches.elementAt(m);
+        sequence = sequenceMatches.elementAt(m);
         // only update start and end positions and shift features if there are
         // no existing references
         // TODO: test for legacy where uniprot or EMBL refs exist but no
@@ -633,7 +644,8 @@ public class DBRefFetcher implements Runnable
             // verification failed. couldn't find any relationship between
             // entrySeq and local sequence
             messages.append(sequence.getName()
-                    + " SEQUENCE NOT %100 MATCH \n");
+                    + " Sequence not 100% match with " + entry.getName()
+                    + "\n");
             continue;
           }
           /*
@@ -641,8 +653,9 @@ public class DBRefFetcher implements Runnable
            * sequence's reference frame. 
            */
           transferred = true;
-          sbuffer.append(sequence.getName() + " HAS " + absStart
-                  + " PREFIXED RESIDUES COMPARED TO " + dbSource + "\n");
+          sbuffer.append(sequence.getName() + " has " + absStart
+                  + " prefixed residues compared to " + entry.getName()
+                  + "\n");
 
           /*
            * So create a mapping to the external entry from the matching region of 
@@ -767,12 +780,12 @@ public class DBRefFetcher implements Runnable
    */
   private SequenceI[] recoverDbSequences(SequenceI[] sequencesArray)
   {
-    Vector nseq = new Vector();
+    Vector<SequenceI> nseq = new Vector<SequenceI>();
     for (int i = 0; sequencesArray != null && i < sequencesArray.length; i++)
     {
       nseq.addElement(sequencesArray[i]);
-      DBRefEntry dbr[] = sequencesArray[i].getDBRefs();
-      jalview.datamodel.Mapping map = null;
+      DBRefEntry[] dbr = sequencesArray[i].getDBRefs();
+      Mapping map = null;
       for (int r = 0; (dbr != null) && r < dbr.length; r++)
       {
         if ((map = dbr[r].getMap()) != null)