JAL-2089 patch broken merge to master for Release 2.10.0b1
[jalview.git] / src / jalview / ws / DBRefFetcher.java
index 2e0197c..fd511dc 100644 (file)
@@ -29,15 +29,19 @@ import jalview.datamodel.Mapping;
 import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.SequenceI;
 import jalview.gui.CutAndPasteTransfer;
+import jalview.gui.DasSourceBrowser;
 import jalview.gui.Desktop;
 import jalview.gui.FeatureSettings;
 import jalview.gui.IProgressIndicator;
 import jalview.gui.OOMWarning;
+import jalview.util.DBRefUtils;
 import jalview.util.MessageManager;
 import jalview.ws.dbsources.das.api.jalviewSourceI;
+import jalview.ws.dbsources.das.datamodel.DasSequenceSource;
 import jalview.ws.seqfetcher.DbSourceProxy;
 
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Enumeration;
 import java.util.Hashtable;
 import java.util.List;
@@ -45,6 +49,7 @@ import java.util.StringTokenizer;
 import java.util.Vector;
 
 import uk.ac.ebi.picr.model.UPEntry;
+import uk.ac.ebi.www.picr.AccessionMappingService.AccessionMapperServiceLocator;
 
 /**
  * Implements a runnable for validating a sequence against external databases
@@ -55,14 +60,19 @@ import uk.ac.ebi.picr.model.UPEntry;
  */
 public class DBRefFetcher implements Runnable
 {
+  private static final String NEWLINE = System.lineSeparator();
+
+  public interface FetchFinishedListenerI
+  {
+    void finished();
+  }
+
   SequenceI[] dataset;
 
-  IProgressIndicator af;
+  IProgressIndicator progressWindow;
 
   CutAndPasteTransfer output = new CutAndPasteTransfer();
 
-  StringBuffer sbuffer = new StringBuffer();
-
   boolean running = false;
 
   /**
@@ -70,26 +80,24 @@ public class DBRefFetcher implements Runnable
    */
   uk.ac.ebi.www.picr.AccessionMappingService.AccessionMapperInterface picrClient = null;
 
-  // /This will be a collection of Vectors of sequenceI refs.
+  // This will be a collection of Vectors of sequenceI refs.
   // The key will be the seq name or accession id of the seq
-  Hashtable seqRefs;
+  Hashtable<String, Vector<SequenceI>> seqRefs;
 
   DbSourceProxy[] dbSources;
 
   SequenceFetcher sfetcher;
 
+  private List<FetchFinishedListenerI> listeners;
+
   private SequenceI[] alseqs;
 
-  /**
+  /*
    * when true - retrieved sequences will be trimmed to cover longest derived
    * alignment sequence
    */
   private boolean trimDsSeqs = true;
 
-  public DBRefFetcher()
-  {
-  }
-
   /**
    * Creates a new DBRefFetcher object and fetches from the currently selected
    * set of databases, if this is null then it fetches based on feature settings
@@ -107,9 +115,11 @@ public class DBRefFetcher implements Runnable
    */
   public DBRefFetcher(SequenceI[] seqs,
           IProgressIndicator progressIndicatorFrame,
-          DbSourceProxy[] sources, FeatureSettings featureSettings, boolean isNucleotide)
+          DbSourceProxy[] sources, FeatureSettings featureSettings,
+          boolean isNucleotide)
   {
-    this.af = progressIndicatorFrame;
+    listeners = new ArrayList<FetchFinishedListenerI>();
+    this.progressWindow = progressIndicatorFrame;
     alseqs = new SequenceI[seqs.length];
     SequenceI[] ds = new SequenceI[seqs.length];
     for (int i = 0; i < seqs.length; i++)
@@ -133,56 +143,94 @@ public class DBRefFetcher implements Runnable
     trimDsSeqs = Cache.getDefault("TRIM_FETCHED_DATASET_SEQS", true);
     if (sources == null)
     {
-      // af.featureSettings_actionPerformed(null);
-      String[] defdb = null, otherdb = sfetcher
-              .getDbInstances(jalview.ws.dbsources.das.datamodel.DasSequenceSource.class);
-      List<DbSourceProxy> selsources = new ArrayList<DbSourceProxy>();
-      Vector dasselsrc = (featureSettings != null) ? featureSettings
-              .getSelectedSources() : new jalview.gui.DasSourceBrowser()
-              .getSelectedSources();
-      Enumeration<jalviewSourceI> en = dasselsrc.elements();
-      while (en.hasMoreElements())
+      setDatabaseSources(featureSettings, isNucleotide);
+    }
+    else
+    {
+      // we assume the caller knows what they're doing and ensured that all the
+      // db source names are valid
+      dbSources = sources;
+    }
+  }
+
+  /**
+   * Helper method to configure the list of database sources to query
+   * 
+   * @param featureSettings
+   * @param forNucleotide
+   */
+  void setDatabaseSources(FeatureSettings featureSettings,
+          boolean forNucleotide)
+  {
+    // af.featureSettings_actionPerformed(null);
+    String[] defdb = null;
+    List<DbSourceProxy> selsources = new ArrayList<DbSourceProxy>();
+    Vector<jalviewSourceI> dasselsrc = (featureSettings != null) ? featureSettings
+            .getSelectedSources() : new DasSourceBrowser()
+            .getSelectedSources();
+
+    for (jalviewSourceI src : dasselsrc)
+    {
+      List<DbSourceProxy> sp = src.getSequenceSourceProxies();
+      if (sp != null)
       {
-        jalviewSourceI src = en.nextElement();
-        List<DbSourceProxy> sp = src.getSequenceSourceProxies();
-        if (sp != null)
+        selsources.addAll(sp);
+        if (sp.size() > 1)
         {
-          selsources.addAll(sp);
-          if (sp.size() > 1)
-          {
-            Cache.log.debug("Added many Db Sources for :" + src.getTitle());
-          }
+          Cache.log.debug("Added many Db Sources for :" + src.getTitle());
         }
       }
-      // select appropriate databases based on alignFrame context.
-      if (isNucleotide)
-      {
-        defdb = DBRefSource.DNACODINGDBS;
-      }
-      else
-      {
-        defdb = DBRefSource.PROTEINDBS;
-      }
-      List<DbSourceProxy> srces = new ArrayList<DbSourceProxy>();
-      for (String ddb : defdb)
+    }
+    // select appropriate databases based on alignFrame context.
+    if (forNucleotide)
+    {
+      defdb = DBRefSource.DNACODINGDBS;
+    }
+    else
+    {
+      defdb = DBRefSource.PROTEINDBS;
+    }
+    List<DbSourceProxy> srces = new ArrayList<DbSourceProxy>();
+    for (String ddb : defdb)
+    {
+      List<DbSourceProxy> srcesfordb = sfetcher.getSourceProxy(ddb);
+      if (srcesfordb != null)
       {
-        List<DbSourceProxy> srcesfordb = sfetcher.getSourceProxy(ddb);
-        if (srcesfordb != null)
+        for (DbSourceProxy src : srcesfordb)
         {
-          srces.addAll(srcesfordb);
+          if (!srces.contains(src))
+          {
+            srces.addAll(srcesfordb);
+          }
         }
       }
-
-      // append the selected sequence sources to the default dbs
-      srces.addAll(selsources);
-      dbSources = srces.toArray(new DbSourceProxy[0]);
-    }
-    else
-    {
-      // we assume the caller knows what they're doing and ensured that all the
-      // db source names are valid
-      dbSources = sources;
     }
+    // append the PDB data source, since it is 'special', catering for both
+    // nucleotide and protein
+    // srces.addAll(sfetcher.getSourceProxy(DBRefSource.PDB));
+
+    srces.addAll(selsources);
+    dbSources = srces.toArray(new DbSourceProxy[srces.size()]);
+  }
+
+  /**
+   * Constructor with only sequences provided
+   * 
+   * @param sequences
+   */
+  public DBRefFetcher(SequenceI[] sequences)
+  {
+    this(sequences, null, null, null, false);
+  }
+
+  /**
+   * Add a listener to be notified when sequence fetching is complete
+   * 
+   * @param l
+   */
+  public void addListener(FetchFinishedListenerI l)
+  {
+    listeners.add(l);
   }
 
   /**
@@ -197,7 +245,7 @@ public class DBRefFetcher implements Runnable
     }
     // append additional sources
     DbSourceProxy[] otherdb = sfetcher
-            .getDbSourceProxyInstances(jalview.ws.dbsources.das.datamodel.DasSequenceSource.class);
+            .getDbSourceProxyInstances(DasSequenceSource.class);
     if (otherdb != null && otherdb.length > 0)
     {
       DbSourceProxy[] newsrc = new DbSourceProxy[dbSources.length
@@ -216,6 +264,9 @@ public class DBRefFetcher implements Runnable
    */
   public void fetchDBRefs(boolean waitTillFinished)
   {
+    // TODO can we not simply write
+    // if (waitTillFinished) { run(); } else { new Thread(this).start(); }
+
     Thread thread = new Thread(this);
     thread.start();
     running = true;
@@ -247,10 +298,10 @@ public class DBRefFetcher implements Runnable
   {
     key = key.toUpperCase();
 
-    Vector seqs;
+    Vector<SequenceI> seqs;
     if (seqRefs.containsKey(key))
     {
-      seqs = (Vector) seqRefs.get(key);
+      seqs = seqRefs.get(key);
 
       if (seqs != null && !seqs.contains(seq))
       {
@@ -258,14 +309,14 @@ public class DBRefFetcher implements Runnable
       }
       else if (seqs == null)
       {
-        seqs = new Vector();
+        seqs = new Vector<SequenceI>();
         seqs.addElement(seq);
       }
 
     }
     else
     {
-      seqs = new Vector();
+      seqs = new Vector<SequenceI>();
       seqs.addElement(seq);
     }
 
@@ -286,13 +337,17 @@ public class DBRefFetcher implements Runnable
     }
     running = true;
     long startTime = System.currentTimeMillis();
-    af.setProgressBar(MessageManager.getString("status.fetching_db_refs"),
-            startTime);
+    if (progressWindow != null)
+    {
+      progressWindow.setProgressBar(
+              MessageManager.getString("status.fetching_db_refs"),
+              startTime);
+    }
     try
     {
       if (Cache.getDefault("DBREFFETCH_USEPICR", false))
       {
-        picrClient = new uk.ac.ebi.www.picr.AccessionMappingService.AccessionMapperServiceLocator()
+        picrClient = new AccessionMapperServiceLocator()
                 .getAccessionMapperPort();
       }
     } catch (Exception e)
@@ -300,157 +355,145 @@ public class DBRefFetcher implements Runnable
       System.err.println("Couldn't locate PICR service instance.\n");
       e.printStackTrace();
     }
+
+    Vector<SequenceI> sdataset = new Vector<SequenceI>(
+            Arrays.asList(dataset));
+    List<String> warningMessages = new ArrayList<String>();
+
     int db = 0;
-    Vector sdataset = new Vector();
-    for (int s = 0; s < dataset.length; s++)
-    {
-      sdataset.addElement(dataset[s]);
-    }
     while (sdataset.size() > 0 && db < dbSources.length)
     {
-      int maxqlen = 1; // default number of queries made to at one time
-      System.err.println("Verifying against " + dbSources[db].getDbName());
-      boolean dn = false;
+      int maxqlen = 1; // default number of queries made at one time
+      System.out.println("Verifying against " + dbSources[db].getDbName());
 
       // iterate through db for each remaining un-verified sequence
       SequenceI[] currSeqs = new SequenceI[sdataset.size()];
       sdataset.copyInto(currSeqs);// seqs that are to be validated against
       // dbSources[db]
-      Vector queries = new Vector(); // generated queries curSeq
-      seqRefs = new Hashtable();
+      Vector<String> queries = new Vector<String>(); // generated queries curSeq
+      seqRefs = new Hashtable<String, Vector<SequenceI>>();
 
       int seqIndex = 0;
 
-      jalview.ws.seqfetcher.DbSourceProxy dbsource = dbSources[db];
+      DbSourceProxy dbsource = dbSources[db];
+      // for moment, we dumbly iterate over all retrieval sources for a
+      // particular database
+      // TODO: introduce multithread multisource queries and logic to remove a
+      // query from other sources if any source for a database returns a
+      // record
+      maxqlen = dbsource.getMaximumQueryCount();
+
+      while (queries.size() > 0 || seqIndex < currSeqs.length)
       {
-        // for moment, we dumbly iterate over all retrieval sources for a
-        // particular database
-        // TODO: introduce multithread multisource queries and logic to remove a
-        // query from other sources if any source for a database returns a
-        // record
-        if (dbsource.getDbSourceProperties().containsKey(
-                DBRefSource.MULTIACC))
-        {
-          maxqlen = ((Integer) dbsource.getDbSourceProperties().get(
-                  DBRefSource.MULTIACC)).intValue();
-        }
-        else
-        {
-          maxqlen = 1;
-        }
-        while (queries.size() > 0 || seqIndex < currSeqs.length)
+        if (queries.size() > 0)
         {
-          if (queries.size() > 0)
-          {
-            // Still queries to make for current seqIndex
-            StringBuffer queryString = new StringBuffer("");
-            int numq = 0, nqSize = (maxqlen > queries.size()) ? queries
-                    .size() : maxqlen;
+          // Still queries to make for current seqIndex
+          StringBuffer queryString = new StringBuffer("");
+          int numq = 0;
+          int nqSize = (maxqlen > queries.size()) ? queries.size()
+                  : maxqlen;
 
-            while (queries.size() > 0 && numq < nqSize)
-            {
-              String query = (String) queries.elementAt(0);
-              if (dbsource.isValidReference(query))
-              {
-                queryString.append((numq == 0) ? "" : dbsource
-                        .getAccessionSeparator());
-                queryString.append(query);
-                numq++;
-              }
-              // remove the extracted query string
-              queries.removeElementAt(0);
-            }
-            // make the queries and process the response
-            AlignmentI retrieved = null;
-            try
-            {
-              if (jalview.bin.Cache.log.isDebugEnabled())
-              {
-                jalview.bin.Cache.log.debug("Querying "
-                        + dbsource.getDbName() + " with : '"
-                        + queryString.toString() + "'");
-              }
-              retrieved = dbsource.getSequenceRecords(queryString
-                      .toString());
-            } catch (Exception ex)
-            {
-              ex.printStackTrace();
-            } catch (OutOfMemoryError err)
+          while (queries.size() > 0 && numq < nqSize)
+          {
+            String query = queries.elementAt(0);
+            if (dbsource.isValidReference(query))
             {
-              new OOMWarning("retrieving database references ("
-                      + queryString.toString() + ")", err);
+              queryString.append((numq == 0) ? "" : dbsource
+                      .getAccessionSeparator());
+              queryString.append(query);
+              numq++;
             }
-            if (retrieved != null)
+            // remove the extracted query string
+            queries.removeElementAt(0);
+          }
+          // make the queries and process the response
+          AlignmentI retrieved = null;
+          try
+          {
+            if (Cache.log.isDebugEnabled())
             {
-              transferReferences(sdataset, dbsource.getDbSource(),
-                      retrieved, trimDsSeqs);
+              Cache.log.debug("Querying " + dbsource.getDbName()
+                      + " with : '" + queryString.toString() + "'");
             }
+            retrieved = dbsource.getSequenceRecords(queryString.toString());
+          } catch (Exception ex)
+          {
+            ex.printStackTrace();
+          } catch (OutOfMemoryError err)
+          {
+            new OOMWarning("retrieving database references ("
+                    + queryString.toString() + ")", err);
+          }
+          if (retrieved != null)
+          {
+            transferReferences(sdataset, dbsource.getDbSource(), retrieved,
+                    trimDsSeqs, warningMessages);
           }
-          else
+        }
+        else
+        {
+          // make some more strings for use as queries
+          for (int i = 0; (seqIndex < dataset.length) && (i < 50); seqIndex++, i++)
           {
-            // make some more strings for use as queries
-            for (int i = 0; (seqIndex < dataset.length) && (i < 50); seqIndex++, i++)
+            SequenceI sequence = dataset[seqIndex];
+            DBRefEntry[] uprefs = DBRefUtils.selectRefs(
+                    sequence.getDBRefs(),
+                    new String[] { dbsource.getDbSource() }); // jalview.datamodel.DBRefSource.UNIPROT
+            // });
+            // check for existing dbrefs to use
+            if (uprefs != null && uprefs.length > 0)
             {
-              SequenceI sequence = dataset[seqIndex];
-              DBRefEntry[] uprefs = jalview.util.DBRefUtils.selectRefs(
-                      sequence.getDBRefs(),
-                      new String[] { dbsource.getDbSource() }); // jalview.datamodel.DBRefSource.UNIPROT
-              // });
-              // check for existing dbrefs to use
-              if (uprefs != null && uprefs.length > 0)
+              for (int j = 0; j < uprefs.length; j++)
               {
-                for (int j = 0; j < uprefs.length; j++)
-                {
-                  addSeqId(sequence, uprefs[j].getAccessionId());
-                  queries.addElement(uprefs[j].getAccessionId()
-                          .toUpperCase());
-                }
+                addSeqId(sequence, uprefs[j].getAccessionId());
+                queries.addElement(uprefs[j].getAccessionId().toUpperCase());
               }
-              else
+            }
+            else
+            {
+              // generate queries from sequence ID string
+              StringTokenizer st = new StringTokenizer(sequence.getName(),
+                      "|");
+              while (st.hasMoreTokens())
               {
-                // generate queries from sequence ID string
-                StringTokenizer st = new StringTokenizer(
-                        sequence.getName(), "|");
-                while (st.hasMoreTokens())
+                String token = st.nextToken();
+                UPEntry[] presp = null;
+                if (picrClient != null)
                 {
-                  String token = st.nextToken();
-                  UPEntry[] presp = null;
-                  if (picrClient != null)
+                  // resolve the string against PICR to recover valid IDs
+                  try
                   {
-                    // resolve the string against PICR to recover valid IDs
-                    try
-                    {
-                      presp = picrClient.getUPIForAccession(token, null,
-                              picrClient.getMappedDatabaseNames(), null,
-                              true);
-                    } catch (Exception e)
-                    {
-                      System.err.println("Exception with Picr for '"
-                              + token + "'\n");
-                      e.printStackTrace();
-                    }
-                  }
-                  if (presp != null && presp.length > 0)
+                    presp = picrClient
+                            .getUPIForAccession(token, null,
+                                    picrClient.getMappedDatabaseNames(),
+                                    null, true);
+                  } catch (Exception e)
                   {
-                    for (int id = 0; id < presp.length; id++)
-                    {
-                      // construct sequences from response if sequences are
-                      // present, and do a transferReferences
-                      // otherwise transfer non sequence x-references directly.
-                    }
-                    System.out
-                            .println("Validated ID against PICR... (for what its worth):"
-                                    + token);
-                    addSeqId(sequence, token);
-                    queries.addElement(token.toUpperCase());
+                    System.err.println("Exception with Picr for '" + token
+                            + "'\n");
+                    e.printStackTrace();
                   }
-                  else
+                }
+                if (presp != null && presp.length > 0)
+                {
+                  for (int id = 0; id < presp.length; id++)
                   {
-                    // if ()
-                    // System.out.println("Not querying source with token="+token+"\n");
-                    addSeqId(sequence, token);
-                    queries.addElement(token.toUpperCase());
+                    // construct sequences from response if sequences are
+                    // present, and do a transferReferences
+                    // otherwise transfer non sequence x-references directly.
                   }
+                  System.out
+                          .println("Validated ID against PICR... (for what its worth):"
+                                  + token);
+                  addSeqId(sequence, token);
+                  queries.addElement(token.toUpperCase());
+                }
+                else
+                {
+                  // if ()
+                  // System.out.println("Not querying source with token="+token+"\n");
+                  addSeqId(sequence, token);
+                  queries.addElement(token.toUpperCase());
                 }
               }
             }
@@ -459,82 +502,95 @@ public class DBRefFetcher implements Runnable
       }
       // advance to next database
       db++;
-    } // all databases have been queries.
-    if (sbuffer.length() > 0)
+    } // all databases have been queried
+    if (!warningMessages.isEmpty())
     {
-      output.setText(MessageManager
-              .getString("label.your_sequences_have_been_verified")
-              + sbuffer.toString());
+      StringBuilder sb = new StringBuilder(warningMessages.size() * 30);
+      sb.append(MessageManager
+              .getString("label.your_sequences_have_been_verified"));
+      for (String msg : warningMessages)
+      {
+        sb.append(msg).append(NEWLINE);
+      }
+      output.setText(sb.toString());
+
       Desktop.addInternalFrame(output,
-              MessageManager.getString("label.sequence_names_updated"),
-              600, 300);
+              MessageManager.getString("label.sequences_updated"), 600, 300);
       // The above is the dataset, we must now find out the index
       // of the viewed sequence
 
     }
+    if (progressWindow != null)
+    {
+      progressWindow.setProgressBar(
+              MessageManager.getString("label.dbref_search_completed"),
+              startTime);
+    }
 
-    af.setProgressBar(
-            MessageManager.getString("label.dbref_search_completed"),
-            startTime);
-    // promptBeforeBlast();
-
+    for (FetchFinishedListenerI listener : listeners)
+    {
+      listener.finished();
+    }
     running = false;
-
   }
 
   /**
    * Verify local sequences in seqRefs against the retrieved sequence database
-   * records.
+   * records. Returns true if any sequence was modified as a result (start/end
+   * changed and/or sequence enlarged), else false.
    * 
+   * @param sdataset
+   *          dataset sequences we are retrieving for
+   * @param dbSource
+   *          database source we are retrieving from
+   * @param retrievedAl
+   *          retrieved sequences as alignment
    * @param trimDatasetSeqs
-   * 
+   *          if true, sequences will not be enlarged to match longer retrieved
+   *          sequences, only their start/end adjusted
+   * @param warningMessages
+   *          a list of messages to add to
    */
-  void transferReferences(Vector sdataset, String dbSource,
-          AlignmentI retrievedAl, boolean trimDatasetSeqs) // File
-  // file)
+  boolean transferReferences(Vector<SequenceI> sdataset, String dbSource,
+          AlignmentI retrievedAl, boolean trimDatasetSeqs,
+          List<String> warningMessages)
   {
-    System.out.println("trimming ? " + trimDatasetSeqs);
+    // System.out.println("trimming ? " + trimDatasetSeqs);
     if (retrievedAl == null || retrievedAl.getHeight() == 0)
     {
-      return;
+      return false;
     }
+
+    boolean modified = false;
     SequenceI[] retrieved = recoverDbSequences(retrievedAl
             .getSequencesArray());
     SequenceI sequence = null;
-    boolean transferred = false;
-    StringBuffer messages = new StringBuffer();
-
-    // Vector entries = new Uniprot().getUniprotEntries(file);
 
-    int i, iSize = retrieved.length; // entries == null ? 0 : entries.size();
-    // UniprotEntry entry;
-    for (i = 0; i < iSize; i++)
+    for (SequenceI retrievedSeq : retrieved)
     {
-      SequenceI entry = retrieved[i]; // (UniprotEntry) entries.elementAt(i);
-
       // Work out which sequences this sequence matches,
       // taking into account all accessionIds and names in the file
-      Vector sequenceMatches = new Vector();
+      Vector<SequenceI> sequenceMatches = new Vector<SequenceI>();
       // look for corresponding accession ids
-      DBRefEntry[] entryRefs = jalview.util.DBRefUtils.selectRefs(
-              entry.getDBRefs(), new String[] { dbSource });
+      DBRefEntry[] entryRefs = DBRefUtils.selectRefs(
+              retrievedSeq.getDBRefs(), new String[] { dbSource });
       if (entryRefs == null)
       {
         System.err
                 .println("Dud dbSource string ? no entryrefs selected for "
-                        + dbSource + " on " + entry.getName());
+                        + dbSource + " on " + retrievedSeq.getName());
         continue;
       }
       for (int j = 0; j < entryRefs.length; j++)
       {
-        String accessionId = entryRefs[j].getAccessionId(); // .getAccession().elementAt(j).toString();
+        String accessionId = entryRefs[j].getAccessionId();
         // match up on accessionId
         if (seqRefs.containsKey(accessionId.toUpperCase()))
         {
-          Vector seqs = (Vector) seqRefs.get(accessionId);
+          Vector<SequenceI> seqs = seqRefs.get(accessionId);
           for (int jj = 0; jj < seqs.size(); jj++)
           {
-            sequence = (SequenceI) seqs.elementAt(jj);
+            sequence = seqs.elementAt(jj);
             if (!sequenceMatches.contains(sequence))
             {
               sequenceMatches.addElement(sequence);
@@ -542,17 +598,17 @@ public class DBRefFetcher implements Runnable
           }
         }
       }
-      if (sequenceMatches.size() == 0)
+      if (sequenceMatches.isEmpty())
       {
         // failed to match directly on accessionId==query so just compare all
         // sequences to entry
-        Enumeration e = seqRefs.keys();
+        Enumeration<String> e = seqRefs.keys();
         while (e.hasMoreElements())
         {
-          Vector sqs = (Vector) seqRefs.get(e.nextElement());
+          Vector<SequenceI> sqs = seqRefs.get(e.nextElement());
           if (sqs != null && sqs.size() > 0)
           {
-            Enumeration sqe = sqs.elements();
+            Enumeration<SequenceI> sqe = sqs.elements();
             while (sqe.hasMoreElements())
             {
               sequenceMatches.addElement(sqe.nextElement());
@@ -576,10 +632,11 @@ public class DBRefFetcher implements Runnable
        */
       // sequenceMatches now contains the set of all sequences associated with
       // the returned db record
-      String entrySeq = entry.getSequenceAsString().toUpperCase();
+      final String retrievedSeqString = retrievedSeq.getSequenceAsString();
+      String entrySeq = retrievedSeqString.toUpperCase();
       for (int m = 0; m < sequenceMatches.size(); m++)
       {
-        sequence = (SequenceI) sequenceMatches.elementAt(m);
+        sequence = sequenceMatches.elementAt(m);
         // only update start and end positions and shift features if there are
         // no existing references
         // TODO: test for legacy where uniprot or EMBL refs exist but no
@@ -589,70 +646,77 @@ public class DBRefFetcher implements Runnable
         // TODO:
         // verify sequence against the entry sequence
 
+        Mapping mp;
+        final int sequenceStart = sequence.getStart();
+
+        boolean remoteEnclosesLocal = false;
         String nonGapped = AlignSeq.extractGaps("-. ",
                 sequence.getSequenceAsString()).toUpperCase();
-
         int absStart = entrySeq.indexOf(nonGapped);
-        Mapping mp;
-
-        final int sequenceStart = sequence.getStart();
         if (absStart == -1)
         {
-          // Is local sequence contained in dataset sequence?
+          // couldn't find local sequence in sequence from database, so check if
+          // the database sequence is a subsequence of local sequence
           absStart = nonGapped.indexOf(entrySeq);
           if (absStart == -1)
-          { // verification failed.
-            messages.append(sequence.getName()
-                    + " SEQUENCE NOT %100 MATCH \n");
+          {
+            // verification failed. couldn't find any relationship between
+            // entrySeq and local sequence
+            // messages suppressed as many-to-many matches are confusing
+            // String msg = sequence.getName()
+            // + " Sequence not 100% match with "
+            // + retrievedSeq.getName();
+            // addWarningMessage(warningMessages, msg);
             continue;
           }
-          transferred = true;
-          sbuffer.append(sequence.getName() + " HAS " + absStart
-                  + " PREFIXED RESIDUES COMPARED TO " + dbSource + "\n");
-          //
-          // + " - ANY SEQUENCE FEATURES"
-          // + " HAVE BEEN ADJUSTED ACCORDINGLY \n");
-          // absStart = 0;
-          // create valid mapping between matching region of local sequence and
-          // the mapped sequence
+          /*
+           * retrieved sequence is a proper subsequence of local sequence
+           */
+          String msg = sequence.getName() + " has " + absStart
+                  + " prefixed residues compared to "
+                  + retrievedSeq.getName();
+          addWarningMessage(warningMessages, msg);
+
+          /*
+           * So create a mapping to the external entry from the matching region of 
+           * the local sequence, and leave local start/end untouched. 
+           */
           mp = new Mapping(null, new int[] { sequenceStart + absStart,
               sequenceStart + absStart + entrySeq.length() - 1 }, new int[]
-          { entry.getStart(), entry.getStart() + entrySeq.length() - 1 },
-                  1, 1);
-          updateRefFrame = false; // mapping is based on current start/end so
-          // don't modify start and end
+          { retrievedSeq.getStart(),
+              retrievedSeq.getStart() + entrySeq.length() - 1 }, 1, 1);
+          updateRefFrame = false;
         }
         else
         {
-          transferred = true;
-          // update start and end of local sequence to place it in entry's
-          // reference frame.
-          // apply identity map map from whole of local sequence to matching
-          // region of database
-          // sequence
-          mp = null; // Mapping.getIdentityMap();
-          // new Mapping(null,
-          // new int[] { absStart+sequence.getStart(),
-          // absStart+sequence.getStart()+entrySeq.length()-1},
-          // new int[] { entry.getStart(), entry.getEnd() }, 1, 1);
-          // relocate local features for updated start
+          /*
+           * local sequence is a subsequence of (or matches) retrieved sequence
+           */
+          remoteEnclosesLocal = true;
+          mp = null;
+
           if (updateRefFrame)
           {
-            if (sequence.getSequenceFeatures() != null)
+            SequenceFeature[] sfs = sequence.getSequenceFeatures();
+            if (sfs != null)
             {
-              SequenceFeature[] sf = sequence.getSequenceFeatures();
+              /*
+               * relocate existing sequence features by offset
+               */
               int start = sequenceStart;
               int end = sequence.getEnd();
-              int startShift = 1 - absStart - start; // how much the features
-                                                     // are
-              // to be shifted by
-              for (int sfi = 0; sfi < sf.length; sfi++)
+              int startShift = 1 - absStart - start;
+
+              if (startShift != 0)
               {
-                if (sf[sfi].getBegin() >= start && sf[sfi].getEnd() <= end)
+                for (SequenceFeature sf : sfs)
                 {
-                  // shift feature along by absstart
-                  sf[sfi].setBegin(sf[sfi].getBegin() + startShift);
-                  sf[sfi].setEnd(sf[sfi].getEnd() + startShift);
+                  if (sf.getBegin() >= start && sf.getEnd() <= end)
+                  {
+                    sf.setBegin(sf.getBegin() + startShift);
+                    sf.setEnd(sf.getEnd() + startShift);
+                    modified = true;
+                  }
                 }
               }
             }
@@ -660,16 +724,37 @@ public class DBRefFetcher implements Runnable
         }
 
         System.out.println("Adding dbrefs to " + sequence.getName()
-                + " from " + dbSource + " sequence : " + entry.getName());
-        sequence.transferAnnotation(entry, mp);
-        // unknownSequences.remove(sequence);
-        absStart += entry.getStart();
+                + " from " + dbSource + " sequence : "
+                + retrievedSeq.getName());
+        sequence.transferAnnotation(retrievedSeq, mp);
+
+        absStart += retrievedSeq.getStart();
         int absEnd = absStart + nonGapped.length() - 1;
         if (!trimDatasetSeqs)
         {
-          // insert full length sequence from record
-          sequence.setSequence(entry.getSequenceAsString());
-          sequence.setStart(entry.getStart());
+          /*
+           * update start position and/or expand to longer retrieved sequence
+           */
+          if (!retrievedSeqString.equals(sequence.getSequenceAsString())
+                  && remoteEnclosesLocal)
+          {
+            sequence.setSequence(retrievedSeqString);
+            modified = true;
+            addWarningMessage(warningMessages,
+                    "Sequence for " + sequence.getName()
+                            + " expanded from " + retrievedSeq.getName());
+          }
+          if (sequence.getStart() != retrievedSeq.getStart())
+          {
+            sequence.setStart(retrievedSeq.getStart());
+            modified = true;
+            if (absStart != sequenceStart)
+            {
+              addWarningMessage(warningMessages, "Start/end position for "
+                      + sequence.getName() + " updated from "
+                      + retrievedSeq.getName());
+            }
+          }
         }
         if (updateRefFrame)
         {
@@ -677,8 +762,16 @@ public class DBRefFetcher implements Runnable
           if (trimDatasetSeqs)
           {
             // just fix start/end
-            sequence.setStart(absStart);
-            sequence.setEnd(absEnd);
+            if (sequence.getStart() != absStart
+                    || sequence.getEnd() != absEnd)
+            {
+              sequence.setStart(absStart);
+              sequence.setEnd(absEnd);
+              modified = true;
+              addWarningMessage(warningMessages, "Start/end for "
+                      + sequence.getName() + " updated from "
+                      + retrievedSeq.getName());
+            }
           }
           // search for alignment sequences to update coordinate frame for
           for (int alsq = 0; alsq < alseqs.length; alsq++)
@@ -695,6 +788,7 @@ public class DBRefFetcher implements Runnable
               {
                 alseqs[alsq].setEnd(ngAlsq.length()
                         + alseqs[alsq].getStart() - 1);
+                modified = true;
               }
             }
           }
@@ -711,10 +805,20 @@ public class DBRefFetcher implements Runnable
         // ids, so we can query all enabled DAS servers for them ?
       }
     }
-    if (!transferred)
+    return modified;
+  }
+
+  /**
+   * Adds the message to the list unless it already contains it
+   * 
+   * @param messageList
+   * @param msg
+   */
+  void addWarningMessage(List<String> messageList, String msg)
+  {
+    if (!messageList.contains(msg))
     {
-      // report the ID/sequence mismatches
-      sbuffer.append(messages);
+      messageList.add(msg);
     }
   }
 
@@ -726,12 +830,12 @@ public class DBRefFetcher implements Runnable
    */
   private SequenceI[] recoverDbSequences(SequenceI[] sequencesArray)
   {
-    Vector nseq = new Vector();
+    Vector<SequenceI> nseq = new Vector<SequenceI>();
     for (int i = 0; sequencesArray != null && i < sequencesArray.length; i++)
     {
       nseq.addElement(sequencesArray[i]);
-      DBRefEntry dbr[] = sequencesArray[i].getDBRefs();
-      jalview.datamodel.Mapping map = null;
+      DBRefEntry[] dbr = sequencesArray[i].getDBRefs();
+      Mapping map = null;
       for (int r = 0; (dbr != null) && r < dbr.length; r++)
       {
         if ((map = dbr[r].getMap()) != null)