JAL-3949 Complete new abstracted logging framework in jalview.log. Updated log calls...
[jalview.git] / src / jalview / ws / DBRefFetcher.java
index 5f03ec7..899fffc 100644 (file)
  */
 package jalview.ws;
 
+import java.util.Locale;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Enumeration;
+import java.util.HashMap;
+import java.util.Hashtable;
+import java.util.List;
+import java.util.Map;
+import java.util.StringTokenizer;
+import java.util.Vector;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
 import jalview.analysis.AlignSeq;
+import jalview.api.FeatureSettingsModelI;
 import jalview.bin.Cache;
 import jalview.datamodel.AlignmentI;
 import jalview.datamodel.DBRefEntry;
 import jalview.datamodel.DBRefSource;
 import jalview.datamodel.Mapping;
-import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.SequenceI;
-import jalview.gui.AlignFrame;
 import jalview.gui.CutAndPasteTransfer;
 import jalview.gui.Desktop;
+import jalview.gui.FeatureSettings;
 import jalview.gui.IProgressIndicator;
 import jalview.gui.OOMWarning;
+import jalview.util.DBRefUtils;
 import jalview.util.MessageManager;
-import jalview.ws.dbsources.das.api.jalviewSourceI;
 import jalview.ws.seqfetcher.DbSourceProxy;
-
-import java.util.ArrayList;
-import java.util.Enumeration;
-import java.util.Hashtable;
-import java.util.List;
-import java.util.StringTokenizer;
-import java.util.Vector;
-
 import uk.ac.ebi.picr.model.UPEntry;
+import uk.ac.ebi.www.picr.AccessionMappingService.AccessionMapperServiceLocator;
 
 /**
  * Implements a runnable for validating a sequence against external databases
@@ -55,131 +62,89 @@ import uk.ac.ebi.picr.model.UPEntry;
  */
 public class DBRefFetcher implements Runnable
 {
-  SequenceI[] dataset;
+  private static final String NEWLINE = System.lineSeparator();
 
-  IProgressIndicator af;
+  public static final String TRIM_RETRIEVED_SEQUENCES = "TRIM_FETCHED_DATASET_SEQS";
 
-  CutAndPasteTransfer output = new CutAndPasteTransfer();
+  public interface FetchFinishedListenerI
+  {
+    void finished();
+  }
 
-  StringBuffer sbuffer = new StringBuffer();
+  SequenceI[] dataset;
 
-  boolean running = false;
+  IProgressIndicator progressWindow;
+
+  CutAndPasteTransfer output = new CutAndPasteTransfer();
 
   /**
    * picr client instance
    */
   uk.ac.ebi.www.picr.AccessionMappingService.AccessionMapperInterface picrClient = null;
 
-  // /This will be a collection of Vectors of sequenceI refs.
+  // This will be a collection of Vectors of sequenceI refs.
   // The key will be the seq name or accession id of the seq
-  Hashtable seqRefs;
+  Hashtable<String, Vector<SequenceI>> seqRefs;
 
   DbSourceProxy[] dbSources;
 
   SequenceFetcher sfetcher;
 
+  private List<FetchFinishedListenerI> listeners;
+
   private SequenceI[] alseqs;
 
-  /**
+  /*
    * when true - retrieved sequences will be trimmed to cover longest derived
    * alignment sequence
    */
   private boolean trimDsSeqs = true;
 
-  public DBRefFetcher()
-  {
-  }
-
-  /**
-   * Creates a new SequenceFeatureFetcher object and fetches from the currently
-   * selected set of databases.
-   * 
-   * @param seqs
-   *          fetch references for these sequences
-   * @param af
-   *          the parent alignframe for progress bar monitoring.
-   */
-  public DBRefFetcher(SequenceI[] seqs, AlignFrame af)
-  {
-    this(seqs, af, null);
-  }
-
   /**
-   * Creates a new SequenceFeatureFetcher object and fetches from the currently
-   * selected set of databases.
+   * Creates a new DBRefFetcher object and fetches from the currently selected
+   * set of databases, if this is null then it fetches based on feature settings
    * 
    * @param seqs
-   *          fetch references for these sequences
-   * @param af
-   *          the parent alignframe for progress bar monitoring.
+   *          fetch references for these SequenceI array
+   * @param progressIndicatorFrame
+   *          the frame for progress bar monitoring
    * @param sources
-   *          array of database source strings to query references from
+   *          array of DbSourceProxy to query references form
+   * @param featureSettings
+   *          FeatureSettings to get alternative DbSourceProxy from
+   * @param isNucleotide
+   *          indicates if the array of SequenceI are Nucleotides or not
    */
-  public DBRefFetcher(SequenceI[] seqs, AlignFrame af,
-          DbSourceProxy[] sources)
+  public DBRefFetcher(SequenceI[] seqs,
+          IProgressIndicator progressIndicatorFrame,
+          DbSourceProxy[] sources, FeatureSettings featureSettings,
+          boolean isNucleotide)
   {
-    this.af = af;
+    listeners = new ArrayList<>();
+    this.progressWindow = progressIndicatorFrame;
     alseqs = new SequenceI[seqs.length];
     SequenceI[] ds = new SequenceI[seqs.length];
     for (int i = 0; i < seqs.length; i++)
     {
       alseqs[i] = seqs[i];
       if (seqs[i].getDatasetSequence() != null)
+      {
         ds[i] = seqs[i].getDatasetSequence();
+      }
       else
+      {
         ds[i] = seqs[i];
+      }
     }
     this.dataset = ds;
     // TODO Jalview 2.5 lots of this code should be in the gui package!
-    sfetcher = jalview.gui.SequenceFetcher.getSequenceFetcherSingleton(af);
+    sfetcher = jalview.gui.SequenceFetcher.getSequenceFetcherSingleton();
     // set default behaviour for transferring excess sequence data to the
     // dataset
-    trimDsSeqs = Cache.getDefault("TRIM_FETCHED_DATASET_SEQS", true);
+    trimDsSeqs = Cache.getDefault(TRIM_RETRIEVED_SEQUENCES, true);
     if (sources == null)
     {
-      // af.featureSettings_actionPerformed(null);
-      String[] defdb = null, otherdb = sfetcher
-              .getDbInstances(jalview.ws.dbsources.das.datamodel.DasSequenceSource.class);
-      List<DbSourceProxy> selsources = new ArrayList<DbSourceProxy>();
-      Vector dasselsrc = (af.featureSettings != null) ? af.featureSettings
-              .getSelectedSources() : new jalview.gui.DasSourceBrowser()
-              .getSelectedSources();
-      Enumeration<jalviewSourceI> en = dasselsrc.elements();
-      while (en.hasMoreElements())
-      {
-        jalviewSourceI src = en.nextElement();
-        List<DbSourceProxy> sp = src.getSequenceSourceProxies();
-        if (sp != null)
-        {
-          selsources.addAll(sp);
-          if (sp.size() > 1)
-          {
-            Cache.log.debug("Added many Db Sources for :" + src.getTitle());
-          }
-        }
-      }
-      // select appropriate databases based on alignFrame context.
-      if (af.getViewport().getAlignment().isNucleotide())
-      {
-        defdb = DBRefSource.DNACODINGDBS;
-      }
-      else
-      {
-        defdb = DBRefSource.PROTEINDBS;
-      }
-      List<DbSourceProxy> srces = new ArrayList<DbSourceProxy>();
-      for (String ddb : defdb)
-      {
-        List<DbSourceProxy> srcesfordb = sfetcher.getSourceProxy(ddb);
-        if (srcesfordb != null)
-        {
-          srces.addAll(srcesfordb);
-        }
-      }
-
-      // append the selected sequence sources to the default dbs
-      srces.addAll(selsources);
-      dbSources = srces.toArray(new DbSourceProxy[0]);
+      setDatabaseSources(featureSettings, isNucleotide);
     }
     else
     {
@@ -190,26 +155,67 @@ public class DBRefFetcher implements Runnable
   }
 
   /**
-   * retrieve all the das sequence sources and add them to the list of db
-   * sources to retrieve from
+   * Helper method to configure the list of database sources to query
+   * 
+   * @param featureSettings
+   * @param forNucleotide
    */
-  public void appendAllDasSources()
+  void setDatabaseSources(FeatureSettings featureSettings,
+          boolean forNucleotide)
   {
-    if (dbSources == null)
+    // af.featureSettings_actionPerformed(null);
+    String[] defdb = null;
+    List<DbSourceProxy> selsources = new ArrayList<>();
+    // select appropriate databases based on alignFrame context.
+    if (forNucleotide)
     {
-      dbSources = new DbSourceProxy[0];
+      defdb = DBRefSource.DNACODINGDBS;
     }
-    // append additional sources
-    DbSourceProxy[] otherdb = sfetcher
-            .getDbSourceProxyInstances(jalview.ws.dbsources.das.datamodel.DasSequenceSource.class);
-    if (otherdb != null && otherdb.length > 0)
+    else
     {
-      DbSourceProxy[] newsrc = new DbSourceProxy[dbSources.length
-              + otherdb.length];
-      System.arraycopy(dbSources, 0, newsrc, 0, dbSources.length);
-      System.arraycopy(otherdb, 0, newsrc, dbSources.length, otherdb.length);
-      dbSources = newsrc;
+      defdb = DBRefSource.PROTEINDBS;
     }
+    List<DbSourceProxy> srces = new ArrayList<>();
+    for (String ddb : defdb)
+    {
+      List<DbSourceProxy> srcesfordb = sfetcher.getSourceProxy(ddb);
+      if (srcesfordb != null)
+      {
+        for (DbSourceProxy src : srcesfordb)
+        {
+          if (!srces.contains(src))
+          {
+            srces.addAll(srcesfordb);
+          }
+        }
+      }
+    }
+    // append the PDB data source, since it is 'special', catering for both
+    // nucleotide and protein
+    // srces.addAll(sfetcher.getSourceProxy(DBRefSource.PDB));
+
+    srces.addAll(selsources);
+    dbSources = srces.toArray(new DbSourceProxy[srces.size()]);
+  }
+
+  /**
+   * Constructor with only sequences provided
+   * 
+   * @param sequences
+   */
+  public DBRefFetcher(SequenceI[] sequences)
+  {
+    this(sequences, null, null, null, false);
+  }
+
+  /**
+   * Add a listener to be notified when sequence fetching is complete
+   * 
+   * @param l
+   */
+  public void addListener(FetchFinishedListenerI l)
+  {
+    listeners.add(l);
   }
 
   /**
@@ -220,21 +226,13 @@ public class DBRefFetcher implements Runnable
    */
   public void fetchDBRefs(boolean waitTillFinished)
   {
-    Thread thread = new Thread(this);
-    thread.start();
-    running = true;
-
     if (waitTillFinished)
     {
-      while (running)
-      {
-        try
-        {
-          Thread.sleep(500);
-        } catch (Exception ex)
-        {
-        }
-      }
+      run();
+    }
+    else
+    {
+      new Thread(this).start();
     }
   }
 
@@ -249,12 +247,12 @@ public class DBRefFetcher implements Runnable
    */
   void addSeqId(SequenceI seq, String key)
   {
-    key = key.toUpperCase();
+    key = key.toUpperCase(Locale.ROOT);
 
-    Vector seqs;
+    Vector<SequenceI> seqs;
     if (seqRefs.containsKey(key))
     {
-      seqs = (Vector) seqRefs.get(key);
+      seqs = seqRefs.get(key);
 
       if (seqs != null && !seqs.contains(seq))
       {
@@ -262,14 +260,14 @@ public class DBRefFetcher implements Runnable
       }
       else if (seqs == null)
       {
-        seqs = new Vector();
+        seqs = new Vector<>();
         seqs.addElement(seq);
       }
 
     }
     else
     {
-      seqs = new Vector();
+      seqs = new Vector<>();
       seqs.addElement(seq);
     }
 
@@ -279,23 +277,26 @@ public class DBRefFetcher implements Runnable
   /**
    * DOCUMENT ME!
    */
+  @Override
   public void run()
   {
     if (dbSources == null)
     {
-      throw new Error(
-              MessageManager
-                      .getString("error.implementation_error_must_init_dbsources"));
+      throw new Error(MessageManager
+              .getString("error.implementation_error_must_init_dbsources"));
     }
-    running = true;
     long startTime = System.currentTimeMillis();
-    af.setProgressBar(MessageManager.getString("status.fetching_db_refs"),
-            startTime);
+    if (progressWindow != null)
+    {
+      progressWindow.setProgressBar(
+              MessageManager.getString("status.fetching_db_refs"),
+              startTime);
+    }
     try
     {
       if (Cache.getDefault("DBREFFETCH_USEPICR", false))
       {
-        picrClient = new uk.ac.ebi.www.picr.AccessionMappingService.AccessionMapperServiceLocator()
+        picrClient = new AccessionMapperServiceLocator()
                 .getAccessionMapperPort();
       }
     } catch (Exception e)
@@ -303,157 +304,153 @@ public class DBRefFetcher implements Runnable
       System.err.println("Couldn't locate PICR service instance.\n");
       e.printStackTrace();
     }
+
+    Vector<SequenceI> sdataset = new Vector<>(
+            Arrays.asList(dataset));
+    List<String> warningMessages = new ArrayList<>();
+
+    // clear any old feature display settings recorded from past sessions
+    featureDisplaySettings = null;
+
     int db = 0;
-    Vector sdataset = new Vector();
-    for (int s = 0; s < dataset.length; s++)
-    {
-      sdataset.addElement(dataset[s]);
-    }
     while (sdataset.size() > 0 && db < dbSources.length)
     {
-      int maxqlen = 1; // default number of queries made to at one time
-      System.err.println("Verifying against " + dbSources[db].getDbName());
-      boolean dn = false;
+      int maxqlen = 1; // default number of queries made at one time
+      System.out.println("Verifying against " + dbSources[db].getDbName());
 
       // iterate through db for each remaining un-verified sequence
       SequenceI[] currSeqs = new SequenceI[sdataset.size()];
       sdataset.copyInto(currSeqs);// seqs that are to be validated against
       // dbSources[db]
-      Vector queries = new Vector(); // generated queries curSeq
-      seqRefs = new Hashtable();
+      Vector<String> queries = new Vector<>(); // generated queries curSeq
+      seqRefs = new Hashtable<>();
 
       int seqIndex = 0;
 
-      jalview.ws.seqfetcher.DbSourceProxy dbsource = dbSources[db];
+      DbSourceProxy dbsource = dbSources[db];
+      // for moment, we dumbly iterate over all retrieval sources for a
+      // particular database
+      // TODO: introduce multithread multisource queries and logic to remove a
+      // query from other sources if any source for a database returns a
+      // record
+      maxqlen = dbsource.getMaximumQueryCount();
+
+      while (queries.size() > 0 || seqIndex < currSeqs.length)
       {
-        // for moment, we dumbly iterate over all retrieval sources for a
-        // particular database
-        // TODO: introduce multithread multisource queries and logic to remove a
-        // query from other sources if any source for a database returns a
-        // record
-        if (dbsource.getDbSourceProperties().containsKey(
-                DBRefSource.MULTIACC))
-        {
-          maxqlen = ((Integer) dbsource.getDbSourceProperties().get(
-                  DBRefSource.MULTIACC)).intValue();
-        }
-        else
-        {
-          maxqlen = 1;
-        }
-        while (queries.size() > 0 || seqIndex < currSeqs.length)
+        if (queries.size() > 0)
         {
-          if (queries.size() > 0)
-          {
-            // Still queries to make for current seqIndex
-            StringBuffer queryString = new StringBuffer("");
-            int numq = 0, nqSize = (maxqlen > queries.size()) ? queries
-                    .size() : maxqlen;
+          // Still queries to make for current seqIndex
+          StringBuffer queryString = new StringBuffer("");
+          int numq = 0;
+          int nqSize = (maxqlen > queries.size()) ? queries.size()
+                  : maxqlen;
 
-            while (queries.size() > 0 && numq < nqSize)
-            {
-              String query = (String) queries.elementAt(0);
-              if (dbsource.isValidReference(query))
-              {
-                queryString.append((numq == 0) ? "" : dbsource
-                        .getAccessionSeparator());
-                queryString.append(query);
-                numq++;
-              }
-              // remove the extracted query string
-              queries.removeElementAt(0);
-            }
-            // make the queries and process the response
-            AlignmentI retrieved = null;
-            try
-            {
-              if (jalview.bin.Cache.log.isDebugEnabled())
-              {
-                jalview.bin.Cache.log.debug("Querying "
-                        + dbsource.getDbName() + " with : '"
-                        + queryString.toString() + "'");
-              }
-              retrieved = dbsource.getSequenceRecords(queryString
-                      .toString());
-            } catch (Exception ex)
-            {
-              ex.printStackTrace();
-            } catch (OutOfMemoryError err)
+          while (queries.size() > 0 && numq < nqSize)
+          {
+            String query = queries.elementAt(0);
+            if (dbsource.isValidReference(query))
             {
-              new OOMWarning("retrieving database references ("
-                      + queryString.toString() + ")", err);
+              queryString.append(
+                      (numq == 0) ? "" : dbsource.getAccessionSeparator());
+              queryString.append(query);
+              numq++;
             }
-            if (retrieved != null)
+            // remove the extracted query string
+            queries.removeElementAt(0);
+          }
+          // make the queries and process the response
+          AlignmentI retrieved = null;
+          try
+          {
+            if (Cache.isDebugEnabled())
             {
-              transferReferences(sdataset, dbsource.getDbSource(),
-                      retrieved, trimDsSeqs);
+              Cache.debug("Querying " + dbsource.getDbName()
+                      + " with : '" + queryString.toString() + "'");
             }
+            retrieved = dbsource.getSequenceRecords(queryString.toString());
+          } catch (Exception ex)
+          {
+            ex.printStackTrace();
+          } catch (OutOfMemoryError err)
+          {
+            new OOMWarning("retrieving database references ("
+                    + queryString.toString() + ")", err);
+          }
+          if (retrieved != null)
+          {
+            transferReferences(sdataset, dbsource, retrieved,
+                    trimDsSeqs, warningMessages);
           }
-          else
+        }
+        else
+        {
+          // make some more strings for use as queries
+          for (int i = 0; (seqIndex < dataset.length)
+                  && (i < 50); seqIndex++, i++)
           {
-            // make some more strings for use as queries
-            for (int i = 0; (seqIndex < dataset.length) && (i < 50); seqIndex++, i++)
+            SequenceI sequence = dataset[seqIndex];
+            List<DBRefEntry> uprefs = DBRefUtils
+                    .selectRefs(sequence.getDBRefs(), new String[]
+                    { dbsource.getDbSource() }); // jalview.datamodel.DBRefSource.UNIPROT
+            // });
+            // check for existing dbrefs to use
+            if (uprefs != null && uprefs.size() > 0)
             {
-              SequenceI sequence = dataset[seqIndex];
-              DBRefEntry[] uprefs = jalview.util.DBRefUtils.selectRefs(
-                      sequence.getDBRef(),
-                      new String[] { dbsource.getDbSource() }); // jalview.datamodel.DBRefSource.UNIPROT
-              // });
-              // check for existing dbrefs to use
-              if (uprefs != null && uprefs.length > 0)
+              for (int j = 0, n = uprefs.size(); j < n; j++)
               {
-                for (int j = 0; j < uprefs.length; j++)
-                {
-                  addSeqId(sequence, uprefs[j].getAccessionId());
-                  queries.addElement(uprefs[j].getAccessionId()
-                          .toUpperCase());
-                }
+               DBRefEntry upref = uprefs.get(j);
+                addSeqId(sequence, upref.getAccessionId());
+                queries.addElement(
+                        upref.getAccessionId().toUpperCase(Locale.ROOT));
               }
-              else
+            }
+            else
+            {
+              Pattern possibleIds = Pattern.compile("[A-Za-z0-9_]+"); 
+              // generate queries from sequence ID string
+              Matcher tokens = possibleIds.matcher(sequence.getName());
+              int p=0;
+              while (tokens.find(p))
               {
-                // generate queries from sequence ID string
-                StringTokenizer st = new StringTokenizer(
-                        sequence.getName(), "|");
-                while (st.hasMoreTokens())
+                String token = tokens.group();
+                p = tokens.end();
+                UPEntry[] presp = null;
+                if (picrClient != null)
                 {
-                  String token = st.nextToken();
-                  UPEntry[] presp = null;
-                  if (picrClient != null)
+                  // resolve the string against PICR to recover valid IDs
+                  try
                   {
-                    // resolve the string against PICR to recover valid IDs
-                    try
-                    {
-                      presp = picrClient.getUPIForAccession(token, null,
-                              picrClient.getMappedDatabaseNames(), null,
-                              true);
-                    } catch (Exception e)
-                    {
-                      System.err.println("Exception with Picr for '"
-                              + token + "'\n");
-                      e.printStackTrace();
-                    }
-                  }
-                  if (presp != null && presp.length > 0)
+                    presp = picrClient.getUPIForAccession(token, null,
+                            picrClient.getMappedDatabaseNames(), null,
+                            true);
+                  } catch (Exception e)
                   {
-                    for (int id = 0; id < presp.length; id++)
-                    {
-                      // construct sequences from response if sequences are
-                      // present, and do a transferReferences
-                      // otherwise transfer non sequence x-references directly.
-                    }
-                    System.out
-                            .println("Validated ID against PICR... (for what its worth):"
-                                    + token);
-                    addSeqId(sequence, token);
-                    queries.addElement(token.toUpperCase());
+                    System.err.println(
+                            "Exception with Picr for '" + token + "'\n");
+                    e.printStackTrace();
                   }
-                  else
+                }
+                if (presp != null && presp.length > 0)
+                {
+                  for (int id = 0; id < presp.length; id++)
                   {
-                    // if ()
-                    // System.out.println("Not querying source with token="+token+"\n");
-                    addSeqId(sequence, token);
-                    queries.addElement(token.toUpperCase());
+                    // construct sequences from response if sequences are
+                    // present, and do a transferReferences
+                    // otherwise transfer non sequence x-references directly.
                   }
+                  System.out.println(
+                          "Validated ID against PICR... (for what its worth):"
+                                  + token);
+                  addSeqId(sequence, token);
+                  queries.addElement(token.toUpperCase(Locale.ROOT));
+                }
+                else
+                {
+                  // if ()
+                  // System.out.println("Not querying source with
+                  // token="+token+"\n");
+                  addSeqId(sequence, token);
+                  queries.addElement(token.toUpperCase(Locale.ROOT));
                 }
               }
             }
@@ -462,82 +459,99 @@ public class DBRefFetcher implements Runnable
       }
       // advance to next database
       db++;
-    } // all databases have been queries.
-    if (sbuffer.length() > 0)
+    } // all databases have been queried
+    if (!warningMessages.isEmpty())
     {
-      output.setText(MessageManager
-              .getString("label.your_sequences_have_been_verified")
-              + sbuffer.toString());
+      StringBuilder sb = new StringBuilder(warningMessages.size() * 30);
+      sb.append(MessageManager
+              .getString("label.your_sequences_have_been_verified"));
+      for (String msg : warningMessages)
+      {
+        sb.append(msg).append(NEWLINE);
+      }
+      output.setText(sb.toString());
+
       Desktop.addInternalFrame(output,
-              MessageManager.getString("label.sequence_names_updated"),
-              600, 300);
+              MessageManager.getString("label.sequences_updated"), 600,
+              300);
       // The above is the dataset, we must now find out the index
       // of the viewed sequence
 
     }
+    if (progressWindow != null)
+    {
+      progressWindow.setProgressBar(
+              MessageManager.getString("label.dbref_search_completed"),
+              startTime);
+    }
 
-    af.setProgressBar(
-            MessageManager.getString("label.dbref_search_completed"),
-            startTime);
-    // promptBeforeBlast();
-
-    running = false;
-
+    for (FetchFinishedListenerI listener : listeners)
+    {
+      listener.finished();
+    }
   }
 
   /**
    * Verify local sequences in seqRefs against the retrieved sequence database
-   * records.
+   * records. Returns true if any sequence was modified as a result (start/end
+   * changed and/or sequence enlarged), else false.
    * 
+   * @param sdataset
+   *          dataset sequences we are retrieving for
+   * @param dbSource
+   *          database source we are retrieving from
+   * @param retrievedAl
+   *          retrieved sequences as alignment
    * @param trimDatasetSeqs
-   * 
+   *          if true, sequences will not be enlarged to match longer retrieved
+   *          sequences, only their start/end adjusted
+   * @param warningMessages
+   *          a list of messages to add to
    */
-  void transferReferences(Vector sdataset, String dbSource,
-          AlignmentI retrievedAl, boolean trimDatasetSeqs) // File
-  // file)
+  boolean transferReferences(Vector<SequenceI> sdataset,
+          DbSourceProxy dbSourceProxy,
+          AlignmentI retrievedAl, boolean trimDatasetSeqs,
+          List<String> warningMessages)
   {
-    System.out.println("trimming ? " + trimDatasetSeqs);
+    // System.out.println("trimming ? " + trimDatasetSeqs);
     if (retrievedAl == null || retrievedAl.getHeight() == 0)
     {
-      return;
+      return false;
     }
-    SequenceI[] retrieved = recoverDbSequences(retrievedAl
-            .getSequencesArray());
-    SequenceI sequence = null;
-    boolean transferred = false;
-    StringBuffer messages = new StringBuffer();
 
-    // Vector entries = new Uniprot().getUniprotEntries(file);
+    String dbSource = dbSourceProxy.getDbName();
+    boolean modified = false;
+    SequenceI[] retrieved = recoverDbSequences(
+            retrievedAl.getSequencesArray());
+    SequenceI sequence = null;
 
-    int i, iSize = retrieved.length; // entries == null ? 0 : entries.size();
-    // UniprotEntry entry;
-    for (i = 0; i < iSize; i++)
+    for (SequenceI retrievedSeq : retrieved)
     {
-      SequenceI entry = retrieved[i]; // (UniprotEntry) entries.elementAt(i);
-
       // Work out which sequences this sequence matches,
       // taking into account all accessionIds and names in the file
-      Vector sequenceMatches = new Vector();
+      Vector<SequenceI> sequenceMatches = new Vector<>();
       // look for corresponding accession ids
-      DBRefEntry[] entryRefs = jalview.util.DBRefUtils.selectRefs(
-              entry.getDBRef(), new String[] { dbSource });
+      List<DBRefEntry> entryRefs = DBRefUtils
+              .selectRefs(retrievedSeq.getDBRefs(), new String[]
+              { dbSource });
       if (entryRefs == null)
       {
         System.err
                 .println("Dud dbSource string ? no entryrefs selected for "
-                        + dbSource + " on " + entry.getName());
+                        + dbSource + " on " + retrievedSeq.getName());
         continue;
       }
-      for (int j = 0; j < entryRefs.length; j++)
+      for (int j = 0, n = entryRefs.size(); j < n; j++)
       {
-        String accessionId = entryRefs[j].getAccessionId(); // .getAccession().elementAt(j).toString();
+       DBRefEntry ref = entryRefs.get(j);
+        String accessionId = ref.getAccessionId();
         // match up on accessionId
-        if (seqRefs.containsKey(accessionId.toUpperCase()))
+        if (seqRefs.containsKey(accessionId.toUpperCase(Locale.ROOT)))
         {
-          Vector seqs = (Vector) seqRefs.get(accessionId);
+          Vector<SequenceI> seqs = seqRefs.get(accessionId);
           for (int jj = 0; jj < seqs.size(); jj++)
           {
-            sequence = (SequenceI) seqs.elementAt(jj);
+            sequence = seqs.elementAt(jj);
             if (!sequenceMatches.contains(sequence))
             {
               sequenceMatches.addElement(sequence);
@@ -545,17 +559,17 @@ public class DBRefFetcher implements Runnable
           }
         }
       }
-      if (sequenceMatches.size() == 0)
+      if (sequenceMatches.isEmpty())
       {
         // failed to match directly on accessionId==query so just compare all
         // sequences to entry
-        Enumeration e = seqRefs.keys();
+        Enumeration<String> e = seqRefs.keys();
         while (e.hasMoreElements())
         {
-          Vector sqs = (Vector) seqRefs.get(e.nextElement());
+          Vector<SequenceI> sqs = seqRefs.get(e.nextElement());
           if (sqs != null && sqs.size() > 0)
           {
-            Enumeration sqe = sqs.elements();
+            Enumeration<SequenceI> sqe = sqs.elements();
             while (sqe.hasMoreElements())
             {
               sequenceMatches.addElement(sqe.nextElement());
@@ -568,7 +582,7 @@ public class DBRefFetcher implements Runnable
       // could be useful to extend this so we try to find any 'significant'
       // information in common between two sequence objects.
       /*
-       * DBRefEntry[] entryRefs =
+       * List<DBRefEntry> entryRefs =
        * jalview.util.DBRefUtils.selectRefs(entry.getDBRef(), new String[] {
        * dbSource }); for (int j = 0; j < entry.getName().size(); j++) { String
        * name = entry.getName().elementAt(j).toString(); if
@@ -577,103 +591,126 @@ public class DBRefFetcher implements Runnable
        * seqs.elementAt(jj); if (!sequenceMatches.contains(sequence)) {
        * sequenceMatches.addElement(sequence); } } } }
        */
+      if (sequenceMatches.size() > 0)
+      {
+        addFeatureSettings(dbSourceProxy);
+      }
       // sequenceMatches now contains the set of all sequences associated with
       // the returned db record
-      String entrySeq = entry.getSequenceAsString().toUpperCase();
+      final String retrievedSeqString = retrievedSeq.getSequenceAsString();
+      String entrySeq = retrievedSeqString.toUpperCase(Locale.ROOT);
       for (int m = 0; m < sequenceMatches.size(); m++)
       {
-        sequence = (SequenceI) sequenceMatches.elementAt(m);
+        sequence = sequenceMatches.elementAt(m);
         // only update start and end positions and shift features if there are
         // no existing references
         // TODO: test for legacy where uniprot or EMBL refs exist but no
         // mappings are made (but content matches retrieved set)
-        boolean updateRefFrame = sequence.getDBRef() == null
-                || sequence.getDBRef().length == 0;
+        boolean updateRefFrame = sequence.getDBRefs() == null
+                || sequence.getDBRefs().size() == 0;
         // TODO:
         // verify sequence against the entry sequence
 
-        String nonGapped = AlignSeq.extractGaps("-. ",
-                sequence.getSequenceAsString()).toUpperCase();
+        Mapping mp;
+        final int sequenceStart = sequence.getStart();
 
+        boolean remoteEnclosesLocal = false;
+        String nonGapped = AlignSeq
+                .extractGaps("-. ", sequence.getSequenceAsString())
+                .toUpperCase(Locale.ROOT);
         int absStart = entrySeq.indexOf(nonGapped);
-        int mapStart = entry.getStart();
-        jalview.datamodel.Mapping mp;
-
         if (absStart == -1)
         {
-          // Is local sequence contained in dataset sequence?
+          // couldn't find local sequence in sequence from database, so check if
+          // the database sequence is a subsequence of local sequence
           absStart = nonGapped.indexOf(entrySeq);
           if (absStart == -1)
-          { // verification failed.
-            messages.append(sequence.getName()
-                    + " SEQUENCE NOT %100 MATCH \n");
+          {
+            // verification failed. couldn't find any relationship between
+            // entrySeq and local sequence
+            // messages suppressed as many-to-many matches are confusing
+            // String msg = sequence.getName()
+            // + " Sequence not 100% match with "
+            // + retrievedSeq.getName();
+            // addWarningMessage(warningMessages, msg);
             continue;
           }
-          transferred = true;
-          sbuffer.append(sequence.getName() + " HAS " + absStart
-                  + " PREFIXED RESIDUES COMPARED TO " + dbSource + "\n");
-          //
-          // + " - ANY SEQUENCE FEATURES"
-          // + " HAVE BEEN ADJUSTED ACCORDINGLY \n");
-          // absStart = 0;
-          // create valid mapping between matching region of local sequence and
-          // the mapped sequence
-          mp = new Mapping(null, new int[] {
-              sequence.getStart() + absStart,
-              sequence.getStart() + absStart + entrySeq.length() - 1 },
-                  new int[] { entry.getStart(),
-                      entry.getStart() + entrySeq.length() - 1 }, 1, 1);
-          updateRefFrame = false; // mapping is based on current start/end so
-          // don't modify start and end
+          /*
+           * retrieved sequence is a proper subsequence of local sequence
+           */
+          String msg = sequence.getName() + " has " + absStart
+                  + " prefixed residues compared to "
+                  + retrievedSeq.getName();
+          addWarningMessage(warningMessages, msg);
+
+          /*
+           * So create a mapping to the external entry from the matching region of 
+           * the local sequence, and leave local start/end untouched. 
+           */
+          mp = new Mapping(null,
+                  new int[]
+                  { sequenceStart + absStart,
+                      sequenceStart + absStart + entrySeq.length() - 1 },
+                  new int[]
+                  { retrievedSeq.getStart(),
+                      retrievedSeq.getStart() + entrySeq.length() - 1 },
+                  1, 1);
+          updateRefFrame = false;
         }
         else
         {
-          transferred = true;
-          // update start and end of local sequence to place it in entry's
-          // reference frame.
-          // apply identity map map from whole of local sequence to matching
-          // region of database
-          // sequence
-          mp = null; // Mapping.getIdentityMap();
-          // new Mapping(null,
-          // new int[] { absStart+sequence.getStart(),
-          // absStart+sequence.getStart()+entrySeq.length()-1},
-          // new int[] { entry.getStart(), entry.getEnd() }, 1, 1);
-          // relocate local features for updated start
+          /*
+           * local sequence is a subsequence of (or matches) retrieved sequence
+           */
+          remoteEnclosesLocal = true;
+          mp = null;
+
           if (updateRefFrame)
           {
-            if (sequence.getSequenceFeatures() != null)
+            /*
+             * relocate existing sequence features by offset
+             */
+            int startShift = absStart - sequenceStart + 1;
+            if (startShift != 0)
             {
-              SequenceFeature[] sf = sequence.getSequenceFeatures();
-              int start = sequence.getStart();
-              int end = sequence.getEnd();
-              int startShift = 1 - absStart - start; // how much the features
-                                                     // are
-              // to be shifted by
-              for (int sfi = 0; sfi < sf.length; sfi++)
-              {
-                if (sf[sfi].getBegin() >= start && sf[sfi].getEnd() <= end)
-                {
-                  // shift feature along by absstart
-                  sf[sfi].setBegin(sf[sfi].getBegin() + startShift);
-                  sf[sfi].setEnd(sf[sfi].getEnd() + startShift);
-                }
-              }
+              modified |= sequence.getFeatures().shiftFeatures(1,
+                      startShift);
             }
           }
         }
 
         System.out.println("Adding dbrefs to " + sequence.getName()
-                + " from " + dbSource + " sequence : " + entry.getName());
-        sequence.transferAnnotation(entry, mp);
-        // unknownSequences.remove(sequence);
-        int absEnd = absStart + nonGapped.length();
-        absStart += 1;
+                + " from " + dbSource + " sequence : "
+                + retrievedSeq.getName());
+        sequence.transferAnnotation(retrievedSeq, mp);
+
+        absStart += retrievedSeq.getStart();
+        int absEnd = absStart + nonGapped.length() - 1;
         if (!trimDatasetSeqs)
         {
-          // insert full length sequence from record
-          sequence.setSequence(entry.getSequenceAsString());
-          sequence.setStart(entry.getStart());
+          /*
+           * update start position and/or expand to longer retrieved sequence
+           */
+          if (!retrievedSeqString.equals(sequence.getSequenceAsString())
+                  && remoteEnclosesLocal)
+          {
+            sequence.setSequence(retrievedSeqString);
+            modified = true;
+            addWarningMessage(warningMessages,
+                    "Sequence for " + sequence.getName() + " expanded from "
+                            + retrievedSeq.getName());
+          }
+          if (sequence.getStart() != retrievedSeq.getStart())
+          {
+            sequence.setStart(retrievedSeq.getStart());
+            modified = true;
+            if (absStart != sequenceStart)
+            {
+              addWarningMessage(warningMessages,
+                      "Start/end position for " + sequence.getName()
+                              + " updated from " + retrievedSeq.getName());
+            }
+          }
         }
         if (updateRefFrame)
         {
@@ -681,24 +718,34 @@ public class DBRefFetcher implements Runnable
           if (trimDatasetSeqs)
           {
             // just fix start/end
-            sequence.setStart(absStart);
-            sequence.setEnd(absEnd);
+            if (sequence.getStart() != absStart
+                    || sequence.getEnd() != absEnd)
+            {
+              sequence.setStart(absStart);
+              sequence.setEnd(absEnd);
+              modified = true;
+              addWarningMessage(warningMessages,
+                      "Start/end for " + sequence.getName()
+                              + " updated from " + retrievedSeq.getName());
+            }
           }
           // search for alignment sequences to update coordinate frame for
           for (int alsq = 0; alsq < alseqs.length; alsq++)
           {
             if (alseqs[alsq].getDatasetSequence() == sequence)
             {
-              String ngAlsq = AlignSeq.extractGaps("-. ",
-                      alseqs[alsq].getSequenceAsString()).toUpperCase();
+              String ngAlsq = AlignSeq
+                      .extractGaps("-. ",
+                              alseqs[alsq].getSequenceAsString())
+                      .toUpperCase(Locale.ROOT);
               int oldstrt = alseqs[alsq].getStart();
               alseqs[alsq].setStart(sequence.getSequenceAsString()
-                      .toUpperCase().indexOf(ngAlsq)
-                      + sequence.getStart());
+                      .toUpperCase(Locale.ROOT).indexOf(ngAlsq) + sequence.getStart());
               if (oldstrt != alseqs[alsq].getStart())
               {
-                alseqs[alsq].setEnd(ngAlsq.length()
-                        + alseqs[alsq].getStart() - 1);
+                alseqs[alsq].setEnd(
+                        ngAlsq.length() + alseqs[alsq].getStart() - 1);
+                modified = true;
               }
             }
           }
@@ -711,14 +758,49 @@ public class DBRefFetcher implements Runnable
         // and remove it from the rest
         // TODO: decide if we should remove annotated sequence from set
         sdataset.remove(sequence);
-        // TODO: should we make a note of sequences that have received new DB
-        // ids, so we can query all enabled DAS servers for them ?
       }
     }
-    if (!transferred)
+    return modified;
+  }
+
+  Map<String, FeatureSettingsModelI> featureDisplaySettings = null;
+
+  private void addFeatureSettings(DbSourceProxy dbSourceProxy)
+  {
+    FeatureSettingsModelI fsettings = dbSourceProxy
+            .getFeatureColourScheme();
+    if (fsettings != null)
+    {
+      if (featureDisplaySettings == null)
+      {
+        featureDisplaySettings = new HashMap<>();
+      }
+      featureDisplaySettings.put(dbSourceProxy.getDbName(), fsettings);
+    }
+  }
+
+  /**
+   * 
+   * @return any feature settings associated with sources that have provided sequences
+   */
+  public List<FeatureSettingsModelI>getFeatureSettingsModels()
+  {
+    return featureDisplaySettings == null
+            ? Arrays.asList(new FeatureSettingsModelI[0])
+            : Arrays.asList(featureDisplaySettings.values()
+                    .toArray(new FeatureSettingsModelI[1]));
+  }
+  /**
+   * Adds the message to the list unless it already contains it
+   * 
+   * @param messageList
+   * @param msg
+   */
+  void addWarningMessage(List<String> messageList, String msg)
+  {
+    if (!messageList.contains(msg))
     {
-      // report the ID/sequence mismatches
-      sbuffer.append(messages);
+      messageList.add(msg);
     }
   }
 
@@ -730,27 +812,33 @@ public class DBRefFetcher implements Runnable
    */
   private SequenceI[] recoverDbSequences(SequenceI[] sequencesArray)
   {
-    Vector nseq = new Vector();
-    for (int i = 0; sequencesArray != null && i < sequencesArray.length; i++)
+       int n;
+       if (sequencesArray == null || (n = sequencesArray.length) == 0)
+         return sequencesArray;
+    ArrayList<SequenceI> nseq = new ArrayList<>();
+    for (int i = 0;i < n; i++)
     {
-      nseq.addElement(sequencesArray[i]);
-      DBRefEntry dbr[] = sequencesArray[i].getDBRef();
-      jalview.datamodel.Mapping map = null;
-      for (int r = 0; (dbr != null) && r < dbr.length; r++)
+      nseq.add(sequencesArray[i]);
+      List<DBRefEntry> dbr = sequencesArray[i].getDBRefs();
+      Mapping map = null;
+      if (dbr != null)
       {
-        if ((map = dbr[r].getMap()) != null)
+        for (int r = 0, rn = dbr.size(); r < rn; r++)
         {
-          if (map.getTo() != null && !nseq.contains(map.getTo()))
+          if ((map = dbr.get(r).getMap()) != null)
           {
-            nseq.addElement(map.getTo());
-          }
+            if (map.getTo() != null && !nseq.contains(map.getTo()))
+            {
+              nseq.add(map.getTo());
+            }
+          }  
         }
       }
     }
+    // BH 2019.01.25 question here if this is the right logic. Return the original if nothing found?
     if (nseq.size() > 0)
     {
-      sequencesArray = new SequenceI[nseq.size()];
-      nseq.toArray(sequencesArray);
+      return nseq.toArray(new SequenceI[nseq.size()]);
     }
     return sequencesArray;
   }