JAL-1953 2.11.2 with Archeopteryx!
[jalview.git] / src / jalview / ws / DBRefFetcher.java
index c01b99c..00bd074 100644 (file)
  */
 package jalview.ws;
 
+import java.util.Locale;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Enumeration;
+import java.util.HashMap;
+import java.util.Hashtable;
+import java.util.List;
+import java.util.Map;
+import java.util.StringTokenizer;
+import java.util.Vector;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
 import jalview.analysis.AlignSeq;
+import jalview.api.FeatureSettingsModelI;
 import jalview.bin.Cache;
+import jalview.bin.Console;
 import jalview.datamodel.AlignmentI;
 import jalview.datamodel.DBRefEntry;
 import jalview.datamodel.DBRefSource;
 import jalview.datamodel.Mapping;
 import jalview.datamodel.SequenceI;
 import jalview.gui.CutAndPasteTransfer;
-import jalview.gui.DasSourceBrowser;
 import jalview.gui.Desktop;
 import jalview.gui.FeatureSettings;
 import jalview.gui.IProgressIndicator;
 import jalview.gui.OOMWarning;
 import jalview.util.DBRefUtils;
 import jalview.util.MessageManager;
-import jalview.ws.dbsources.das.api.jalviewSourceI;
-import jalview.ws.dbsources.das.datamodel.DasSequenceSource;
 import jalview.ws.seqfetcher.DbSourceProxy;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Enumeration;
-import java.util.Hashtable;
-import java.util.List;
-import java.util.StringTokenizer;
-import java.util.Vector;
-
 import uk.ac.ebi.picr.model.UPEntry;
 import uk.ac.ebi.www.picr.AccessionMappingService.AccessionMapperServiceLocator;
 
@@ -61,6 +65,8 @@ public class DBRefFetcher implements Runnable
 {
   private static final String NEWLINE = System.lineSeparator();
 
+  public static final String TRIM_RETRIEVED_SEQUENCES = "TRIM_FETCHED_DATASET_SEQS";
+
   public interface FetchFinishedListenerI
   {
     void finished();
@@ -72,8 +78,6 @@ public class DBRefFetcher implements Runnable
 
   CutAndPasteTransfer output = new CutAndPasteTransfer();
 
-  boolean running = false;
-
   /**
    * picr client instance
    */
@@ -135,11 +139,10 @@ public class DBRefFetcher implements Runnable
     }
     this.dataset = ds;
     // TODO Jalview 2.5 lots of this code should be in the gui package!
-    sfetcher = jalview.gui.SequenceFetcher
-            .getSequenceFetcherSingleton(progressIndicatorFrame);
+    sfetcher = jalview.gui.SequenceFetcher.getSequenceFetcherSingleton();
     // set default behaviour for transferring excess sequence data to the
     // dataset
-    trimDsSeqs = Cache.getDefault("TRIM_FETCHED_DATASET_SEQS", true);
+    trimDsSeqs = Cache.getDefault(TRIM_RETRIEVED_SEQUENCES, true);
     if (sources == null)
     {
       setDatabaseSources(featureSettings, isNucleotide);
@@ -164,22 +167,6 @@ public class DBRefFetcher implements Runnable
     // af.featureSettings_actionPerformed(null);
     String[] defdb = null;
     List<DbSourceProxy> selsources = new ArrayList<>();
-    Vector<jalviewSourceI> dasselsrc = (featureSettings != null)
-            ? featureSettings.getSelectedSources()
-            : new DasSourceBrowser().getSelectedSources();
-
-    for (jalviewSourceI src : dasselsrc)
-    {
-      List<DbSourceProxy> sp = src.getSequenceSourceProxies();
-      if (sp != null)
-      {
-        selsources.addAll(sp);
-        if (sp.size() > 1)
-        {
-          Cache.log.debug("Added many Db Sources for :" + src.getTitle());
-        }
-      }
-    }
     // select appropriate databases based on alignFrame context.
     if (forNucleotide)
     {
@@ -233,30 +220,6 @@ public class DBRefFetcher implements Runnable
   }
 
   /**
-   * retrieve all the das sequence sources and add them to the list of db
-   * sources to retrieve from
-   */
-  public void appendAllDasSources()
-  {
-    if (dbSources == null)
-    {
-      dbSources = new DbSourceProxy[0];
-    }
-    // append additional sources
-    DbSourceProxy[] otherdb = sfetcher
-            .getDbSourceProxyInstances(DasSequenceSource.class);
-    if (otherdb != null && otherdb.length > 0)
-    {
-      DbSourceProxy[] newsrc = new DbSourceProxy[dbSources.length
-              + otherdb.length];
-      System.arraycopy(dbSources, 0, newsrc, 0, dbSources.length);
-      System.arraycopy(otherdb, 0, newsrc, dbSources.length,
-              otherdb.length);
-      dbSources = newsrc;
-    }
-  }
-
-  /**
    * start the fetcher thread
    * 
    * @param waitTillFinished
@@ -264,24 +227,13 @@ public class DBRefFetcher implements Runnable
    */
   public void fetchDBRefs(boolean waitTillFinished)
   {
-    // TODO can we not simply write
-    // if (waitTillFinished) { run(); } else { new Thread(this).start(); }
-
-    Thread thread = new Thread(this, "FetchDBRef");
-    thread.start();
-    running = true;
-
     if (waitTillFinished)
     {
-      while (running)
-      {
-        try
-        {
-          Thread.sleep(500);
-        } catch (Exception ex)
-        {
-        }
-      }
+      run();
+    }
+    else
+    {
+      new Thread(this,"FetchDBRef").start();
     }
   }
 
@@ -296,7 +248,7 @@ public class DBRefFetcher implements Runnable
    */
   void addSeqId(SequenceI seq, String key)
   {
-    key = key.toUpperCase();
+    key = key.toUpperCase(Locale.ROOT);
 
     Vector<SequenceI> seqs;
     if (seqRefs.containsKey(key))
@@ -334,7 +286,6 @@ public class DBRefFetcher implements Runnable
       throw new Error(MessageManager
               .getString("error.implementation_error_must_init_dbsources"));
     }
-    running = true;
     long startTime = System.currentTimeMillis();
     if (progressWindow != null)
     {
@@ -355,10 +306,12 @@ public class DBRefFetcher implements Runnable
       e.printStackTrace();
     }
 
-    Vector<SequenceI> sdataset = new Vector<>(
-            Arrays.asList(dataset));
+    Vector<SequenceI> sdataset = new Vector<>(Arrays.asList(dataset));
     List<String> warningMessages = new ArrayList<>();
 
+    // clear any old feature display settings recorded from past sessions
+    featureDisplaySettings = null;
+
     int db = 0;
     while (sdataset.size() > 0 && db < dbSources.length)
     {
@@ -409,10 +362,10 @@ public class DBRefFetcher implements Runnable
           AlignmentI retrieved = null;
           try
           {
-            if (Cache.log.isDebugEnabled())
+            if (Console.isDebugEnabled())
             {
-              Cache.log.debug("Querying " + dbsource.getDbName()
-                      + " with : '" + queryString.toString() + "'");
+              Console.debug("Querying " + dbsource.getDbName() + " with : '"
+                      + queryString.toString() + "'");
             }
             retrieved = dbsource.getSequenceRecords(queryString.toString());
           } catch (Exception ex)
@@ -425,8 +378,8 @@ public class DBRefFetcher implements Runnable
           }
           if (retrieved != null)
           {
-            transferReferences(sdataset, dbsource.getDbSource(), retrieved,
-                    trimDsSeqs, warningMessages);
+            transferReferences(sdataset, dbsource, retrieved, trimDsSeqs,
+                    warningMessages);
           }
         }
         else
@@ -436,28 +389,31 @@ public class DBRefFetcher implements Runnable
                   && (i < 50); seqIndex++, i++)
           {
             SequenceI sequence = dataset[seqIndex];
-            DBRefEntry[] uprefs = DBRefUtils
+            List<DBRefEntry> uprefs = DBRefUtils
                     .selectRefs(sequence.getDBRefs(), new String[]
                     { dbsource.getDbSource() }); // jalview.datamodel.DBRefSource.UNIPROT
             // });
             // check for existing dbrefs to use
-            if (uprefs != null && uprefs.length > 0)
+            if (uprefs != null && uprefs.size() > 0)
             {
-              for (int j = 0; j < uprefs.length; j++)
+              for (int j = 0, n = uprefs.size(); j < n; j++)
               {
-                addSeqId(sequence, uprefs[j].getAccessionId());
+                DBRefEntry upref = uprefs.get(j);
+                addSeqId(sequence, upref.getAccessionId());
                 queries.addElement(
-                        uprefs[j].getAccessionId().toUpperCase());
+                        upref.getAccessionId().toUpperCase(Locale.ROOT));
               }
             }
             else
             {
+              Pattern possibleIds = Pattern.compile("[A-Za-z0-9_]+");
               // generate queries from sequence ID string
-              StringTokenizer st = new StringTokenizer(sequence.getName(),
-                      "|");
-              while (st.hasMoreTokens())
+              Matcher tokens = possibleIds.matcher(sequence.getName());
+              int p = 0;
+              while (tokens.find(p))
               {
-                String token = st.nextToken();
+                String token = tokens.group();
+                p = tokens.end();
                 UPEntry[] presp = null;
                 if (picrClient != null)
                 {
@@ -486,7 +442,7 @@ public class DBRefFetcher implements Runnable
                           "Validated ID against PICR... (for what its worth):"
                                   + token);
                   addSeqId(sequence, token);
-                  queries.addElement(token.toUpperCase());
+                  queries.addElement(token.toUpperCase(Locale.ROOT));
                 }
                 else
                 {
@@ -494,7 +450,7 @@ public class DBRefFetcher implements Runnable
                   // System.out.println("Not querying source with
                   // token="+token+"\n");
                   addSeqId(sequence, token);
-                  queries.addElement(token.toUpperCase());
+                  queries.addElement(token.toUpperCase(Locale.ROOT));
                 }
               }
             }
@@ -533,7 +489,6 @@ public class DBRefFetcher implements Runnable
     {
       listener.finished();
     }
-    running = false;
   }
 
   /**
@@ -553,9 +508,9 @@ public class DBRefFetcher implements Runnable
    * @param warningMessages
    *          a list of messages to add to
    */
-  boolean transferReferences(Vector<SequenceI> sdataset, String dbSource,
-          AlignmentI retrievedAl, boolean trimDatasetSeqs,
-          List<String> warningMessages)
+  boolean transferReferences(Vector<SequenceI> sdataset,
+          DbSourceProxy dbSourceProxy, AlignmentI retrievedAl,
+          boolean trimDatasetSeqs, List<String> warningMessages)
   {
     // System.out.println("trimming ? " + trimDatasetSeqs);
     if (retrievedAl == null || retrievedAl.getHeight() == 0)
@@ -563,6 +518,7 @@ public class DBRefFetcher implements Runnable
       return false;
     }
 
+    String dbSource = dbSourceProxy.getDbName();
     boolean modified = false;
     SequenceI[] retrieved = recoverDbSequences(
             retrievedAl.getSequencesArray());
@@ -574,7 +530,7 @@ public class DBRefFetcher implements Runnable
       // taking into account all accessionIds and names in the file
       Vector<SequenceI> sequenceMatches = new Vector<>();
       // look for corresponding accession ids
-      DBRefEntry[] entryRefs = DBRefUtils
+      List<DBRefEntry> entryRefs = DBRefUtils
               .selectRefs(retrievedSeq.getDBRefs(), new String[]
               { dbSource });
       if (entryRefs == null)
@@ -584,11 +540,12 @@ public class DBRefFetcher implements Runnable
                         + dbSource + " on " + retrievedSeq.getName());
         continue;
       }
-      for (int j = 0; j < entryRefs.length; j++)
+      for (int j = 0, n = entryRefs.size(); j < n; j++)
       {
-        String accessionId = entryRefs[j].getAccessionId();
+        DBRefEntry ref = entryRefs.get(j);
+        String accessionId = ref.getAccessionId();
         // match up on accessionId
-        if (seqRefs.containsKey(accessionId.toUpperCase()))
+        if (seqRefs.containsKey(accessionId.toUpperCase(Locale.ROOT)))
         {
           Vector<SequenceI> seqs = seqRefs.get(accessionId);
           for (int jj = 0; jj < seqs.size(); jj++)
@@ -624,7 +581,7 @@ public class DBRefFetcher implements Runnable
       // could be useful to extend this so we try to find any 'significant'
       // information in common between two sequence objects.
       /*
-       * DBRefEntry[] entryRefs =
+       * List<DBRefEntry> entryRefs =
        * jalview.util.DBRefUtils.selectRefs(entry.getDBRef(), new String[] {
        * dbSource }); for (int j = 0; j < entry.getName().size(); j++) { String
        * name = entry.getName().elementAt(j).toString(); if
@@ -633,10 +590,14 @@ public class DBRefFetcher implements Runnable
        * seqs.elementAt(jj); if (!sequenceMatches.contains(sequence)) {
        * sequenceMatches.addElement(sequence); } } } }
        */
+      if (sequenceMatches.size() > 0)
+      {
+        addFeatureSettings(dbSourceProxy);
+      }
       // sequenceMatches now contains the set of all sequences associated with
       // the returned db record
       final String retrievedSeqString = retrievedSeq.getSequenceAsString();
-      String entrySeq = retrievedSeqString.toUpperCase();
+      String entrySeq = retrievedSeqString.toUpperCase(Locale.ROOT);
       for (int m = 0; m < sequenceMatches.size(); m++)
       {
         sequence = sequenceMatches.elementAt(m);
@@ -645,7 +606,7 @@ public class DBRefFetcher implements Runnable
         // TODO: test for legacy where uniprot or EMBL refs exist but no
         // mappings are made (but content matches retrieved set)
         boolean updateRefFrame = sequence.getDBRefs() == null
-                || sequence.getDBRefs().length == 0;
+                || sequence.getDBRefs().size() == 0;
         // TODO:
         // verify sequence against the entry sequence
 
@@ -655,7 +616,7 @@ public class DBRefFetcher implements Runnable
         boolean remoteEnclosesLocal = false;
         String nonGapped = AlignSeq
                 .extractGaps("-. ", sequence.getSequenceAsString())
-                .toUpperCase();
+                .toUpperCase(Locale.ROOT);
         int absStart = entrySeq.indexOf(nonGapped);
         if (absStart == -1)
         {
@@ -711,7 +672,8 @@ public class DBRefFetcher implements Runnable
             int startShift = absStart - sequenceStart + 1;
             if (startShift != 0)
             {
-              modified |= sequence.getFeatures().shiftFeatures(startShift);
+              modified |= sequence.getFeatures().shiftFeatures(1,
+                      startShift);
             }
           }
         }
@@ -774,10 +736,11 @@ public class DBRefFetcher implements Runnable
               String ngAlsq = AlignSeq
                       .extractGaps("-. ",
                               alseqs[alsq].getSequenceAsString())
-                      .toUpperCase();
+                      .toUpperCase(Locale.ROOT);
               int oldstrt = alseqs[alsq].getStart();
               alseqs[alsq].setStart(sequence.getSequenceAsString()
-                      .toUpperCase().indexOf(ngAlsq) + sequence.getStart());
+                      .toUpperCase(Locale.ROOT).indexOf(ngAlsq)
+                      + sequence.getStart());
               if (oldstrt != alseqs[alsq].getStart())
               {
                 alseqs[alsq].setEnd(
@@ -795,13 +758,40 @@ public class DBRefFetcher implements Runnable
         // and remove it from the rest
         // TODO: decide if we should remove annotated sequence from set
         sdataset.remove(sequence);
-        // TODO: should we make a note of sequences that have received new DB
-        // ids, so we can query all enabled DAS servers for them ?
       }
     }
     return modified;
   }
 
+  Map<String, FeatureSettingsModelI> featureDisplaySettings = null;
+
+  private void addFeatureSettings(DbSourceProxy dbSourceProxy)
+  {
+    FeatureSettingsModelI fsettings = dbSourceProxy
+            .getFeatureColourScheme();
+    if (fsettings != null)
+    {
+      if (featureDisplaySettings == null)
+      {
+        featureDisplaySettings = new HashMap<>();
+      }
+      featureDisplaySettings.put(dbSourceProxy.getDbName(), fsettings);
+    }
+  }
+
+  /**
+   * 
+   * @return any feature settings associated with sources that have provided
+   *         sequences
+   */
+  public List<FeatureSettingsModelI> getFeatureSettingsModels()
+  {
+    return featureDisplaySettings == null
+            ? Arrays.asList(new FeatureSettingsModelI[0])
+            : Arrays.asList(featureDisplaySettings.values()
+                    .toArray(new FeatureSettingsModelI[1]));
+  }
+
   /**
    * Adds the message to the list unless it already contains it
    * 
@@ -824,28 +814,36 @@ public class DBRefFetcher implements Runnable
    */
   private SequenceI[] recoverDbSequences(SequenceI[] sequencesArray)
   {
-    Vector<SequenceI> nseq = new Vector<>();
-    for (int i = 0; sequencesArray != null
-            && i < sequencesArray.length; i++)
+    int n;
+    if (sequencesArray == null || (n = sequencesArray.length) == 0)
     {
-      nseq.addElement(sequencesArray[i]);
-      DBRefEntry[] dbr = sequencesArray[i].getDBRefs();
+      return sequencesArray;
+    }
+    ArrayList<SequenceI> nseq = new ArrayList<>();
+    for (int i = 0; i < n; i++)
+    {
+      nseq.add(sequencesArray[i]);
+      List<DBRefEntry> dbr = sequencesArray[i].getDBRefs();
       Mapping map = null;
-      for (int r = 0; (dbr != null) && r < dbr.length; r++)
+      if (dbr != null)
       {
-        if ((map = dbr[r].getMap()) != null)
+        for (int r = 0, rn = dbr.size(); r < rn; r++)
         {
-          if (map.getTo() != null && !nseq.contains(map.getTo()))
+          if ((map = dbr.get(r).getMap()) != null)
           {
-            nseq.addElement(map.getTo());
+            if (map.getTo() != null && !nseq.contains(map.getTo()))
+            {
+              nseq.add(map.getTo());
+            }
           }
         }
       }
     }
+    // BH 2019.01.25 question here if this is the right logic. Return the
+    // original if nothing found?
     if (nseq.size() > 0)
     {
-      sequencesArray = new SequenceI[nseq.size()];
-      nseq.toArray(sequencesArray);
+      return nseq.toArray(new SequenceI[nseq.size()]);
     }
     return sequencesArray;
   }