import jalview.datamodel.DBRefEntry;
import jalview.datamodel.DBRefSource;
import jalview.datamodel.Mapping;
-import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
import jalview.gui.CutAndPasteTransfer;
-import jalview.gui.DasSourceBrowser;
import jalview.gui.Desktop;
import jalview.gui.FeatureSettings;
import jalview.gui.IProgressIndicator;
import jalview.gui.OOMWarning;
import jalview.util.DBRefUtils;
import jalview.util.MessageManager;
-import jalview.ws.dbsources.das.api.jalviewSourceI;
-import jalview.ws.dbsources.das.datamodel.DasSequenceSource;
import jalview.ws.seqfetcher.DbSourceProxy;
import java.util.ArrayList;
*/
public class DBRefFetcher implements Runnable
{
+ private static final String NEWLINE = System.lineSeparator();
+
+ public static final String TRIM_RETRIEVED_SEQUENCES = "TRIM_FETCHED_DATASET_SEQS";
+
public interface FetchFinishedListenerI
{
void finished();
CutAndPasteTransfer output = new CutAndPasteTransfer();
- StringBuffer sbuffer = new StringBuffer();
-
boolean running = false;
/**
*/
public DBRefFetcher(SequenceI[] seqs,
IProgressIndicator progressIndicatorFrame,
- DbSourceProxy[] sources, FeatureSettings featureSettings, boolean isNucleotide)
+ DbSourceProxy[] sources, FeatureSettings featureSettings,
+ boolean isNucleotide)
{
- listeners = new ArrayList<FetchFinishedListenerI>();
+ listeners = new ArrayList<>();
this.progressWindow = progressIndicatorFrame;
alseqs = new SequenceI[seqs.length];
SequenceI[] ds = new SequenceI[seqs.length];
}
this.dataset = ds;
// TODO Jalview 2.5 lots of this code should be in the gui package!
- sfetcher = jalview.gui.SequenceFetcher
- .getSequenceFetcherSingleton(progressIndicatorFrame);
+ sfetcher = jalview.gui.SequenceFetcher.getSequenceFetcherSingleton();
// set default behaviour for transferring excess sequence data to the
// dataset
- trimDsSeqs = Cache.getDefault("TRIM_FETCHED_DATASET_SEQS", true);
+ trimDsSeqs = Cache.getDefault(TRIM_RETRIEVED_SEQUENCES, true);
if (sources == null)
{
setDatabaseSources(featureSettings, isNucleotide);
{
// af.featureSettings_actionPerformed(null);
String[] defdb = null;
- List<DbSourceProxy> selsources = new ArrayList<DbSourceProxy>();
- Vector<jalviewSourceI> dasselsrc = (featureSettings != null) ? featureSettings
- .getSelectedSources() : new DasSourceBrowser()
- .getSelectedSources();
-
- for (jalviewSourceI src : dasselsrc)
- {
- List<DbSourceProxy> sp = src.getSequenceSourceProxies();
- if (sp != null)
- {
- selsources.addAll(sp);
- if (sp.size() > 1)
- {
- Cache.log.debug("Added many Db Sources for :" + src.getTitle());
- }
- }
- }
+ List<DbSourceProxy> selsources = new ArrayList<>();
// select appropriate databases based on alignFrame context.
if (forNucleotide)
{
{
defdb = DBRefSource.PROTEINDBS;
}
- List<DbSourceProxy> srces = new ArrayList<DbSourceProxy>();
+ List<DbSourceProxy> srces = new ArrayList<>();
for (String ddb : defdb)
{
List<DbSourceProxy> srcesfordb = sfetcher.getSourceProxy(ddb);
}
/**
- * retrieve all the das sequence sources and add them to the list of db
- * sources to retrieve from
- */
- public void appendAllDasSources()
- {
- if (dbSources == null)
- {
- dbSources = new DbSourceProxy[0];
- }
- // append additional sources
- DbSourceProxy[] otherdb = sfetcher
- .getDbSourceProxyInstances(DasSequenceSource.class);
- if (otherdb != null && otherdb.length > 0)
- {
- DbSourceProxy[] newsrc = new DbSourceProxy[dbSources.length
- + otherdb.length];
- System.arraycopy(dbSources, 0, newsrc, 0, dbSources.length);
- System.arraycopy(otherdb, 0, newsrc, dbSources.length, otherdb.length);
- dbSources = newsrc;
- }
- }
-
- /**
* start the fetcher thread
*
* @param waitTillFinished
}
else if (seqs == null)
{
- seqs = new Vector<SequenceI>();
+ seqs = new Vector<>();
seqs.addElement(seq);
}
}
else
{
- seqs = new Vector<SequenceI>();
+ seqs = new Vector<>();
seqs.addElement(seq);
}
{
if (dbSources == null)
{
- throw new Error(
- MessageManager
- .getString("error.implementation_error_must_init_dbsources"));
+ throw new Error(MessageManager
+ .getString("error.implementation_error_must_init_dbsources"));
}
running = true;
long startTime = System.currentTimeMillis();
{
progressWindow.setProgressBar(
MessageManager.getString("status.fetching_db_refs"),
- startTime);
+ startTime);
}
try
{
e.printStackTrace();
}
- Vector<SequenceI> sdataset = new Vector<SequenceI>(
+ Vector<SequenceI> sdataset = new Vector<>(
Arrays.asList(dataset));
+ List<String> warningMessages = new ArrayList<>();
int db = 0;
while (sdataset.size() > 0 && db < dbSources.length)
SequenceI[] currSeqs = new SequenceI[sdataset.size()];
sdataset.copyInto(currSeqs);// seqs that are to be validated against
// dbSources[db]
- Vector<String> queries = new Vector<String>(); // generated queries curSeq
- seqRefs = new Hashtable<String, Vector<SequenceI>>();
+ Vector<String> queries = new Vector<>(); // generated queries curSeq
+ seqRefs = new Hashtable<>();
int seqIndex = 0;
// Still queries to make for current seqIndex
StringBuffer queryString = new StringBuffer("");
int numq = 0;
- int nqSize = (maxqlen > queries.size()) ? queries
- .size() : maxqlen;
+ int nqSize = (maxqlen > queries.size()) ? queries.size()
+ : maxqlen;
while (queries.size() > 0 && numq < nqSize)
{
String query = queries.elementAt(0);
if (dbsource.isValidReference(query))
{
- queryString.append((numq == 0) ? "" : dbsource
- .getAccessionSeparator());
+ queryString.append(
+ (numq == 0) ? "" : dbsource.getAccessionSeparator());
queryString.append(query);
numq++;
}
if (retrieved != null)
{
transferReferences(sdataset, dbsource.getDbSource(), retrieved,
- trimDsSeqs);
+ trimDsSeqs, warningMessages);
}
}
else
{
// make some more strings for use as queries
- for (int i = 0; (seqIndex < dataset.length) && (i < 50); seqIndex++, i++)
+ for (int i = 0; (seqIndex < dataset.length)
+ && (i < 50); seqIndex++, i++)
{
SequenceI sequence = dataset[seqIndex];
- DBRefEntry[] uprefs = DBRefUtils.selectRefs(
- sequence.getDBRefs(),
- new String[] { dbsource.getDbSource() }); // jalview.datamodel.DBRefSource.UNIPROT
+ List<DBRefEntry> uprefs = DBRefUtils
+ .selectRefs(sequence.getDBRefs(), new String[]
+ { dbsource.getDbSource() }); // jalview.datamodel.DBRefSource.UNIPROT
// });
// check for existing dbrefs to use
- if (uprefs != null && uprefs.length > 0)
+ if (uprefs != null && uprefs.size() > 0)
{
- for (int j = 0; j < uprefs.length; j++)
+ for (int j = 0, n = uprefs.size(); j < n; j++)
{
- addSeqId(sequence, uprefs[j].getAccessionId());
- queries.addElement(uprefs[j].getAccessionId().toUpperCase());
+ DBRefEntry upref = uprefs.get(j);
+ addSeqId(sequence, upref.getAccessionId());
+ queries.addElement(
+ upref.getAccessionId().toUpperCase());
}
}
else
// resolve the string against PICR to recover valid IDs
try
{
- presp = picrClient
- .getUPIForAccession(token, null,
- picrClient.getMappedDatabaseNames(),
- null, true);
+ presp = picrClient.getUPIForAccession(token, null,
+ picrClient.getMappedDatabaseNames(), null,
+ true);
} catch (Exception e)
{
- System.err.println("Exception with Picr for '" + token
- + "'\n");
+ System.err.println(
+ "Exception with Picr for '" + token + "'\n");
e.printStackTrace();
}
}
// present, and do a transferReferences
// otherwise transfer non sequence x-references directly.
}
- System.out
- .println("Validated ID against PICR... (for what its worth):"
+ System.out.println(
+ "Validated ID against PICR... (for what its worth):"
+ token);
addSeqId(sequence, token);
queries.addElement(token.toUpperCase());
else
{
// if ()
- // System.out.println("Not querying source with token="+token+"\n");
+ // System.out.println("Not querying source with
+ // token="+token+"\n");
addSeqId(sequence, token);
queries.addElement(token.toUpperCase());
}
// advance to next database
db++;
} // all databases have been queried
- if (sbuffer.length() > 0)
+ if (!warningMessages.isEmpty())
{
- output.setText(MessageManager
- .getString("label.your_sequences_have_been_verified")
- + sbuffer.toString());
+ StringBuilder sb = new StringBuilder(warningMessages.size() * 30);
+ sb.append(MessageManager
+ .getString("label.your_sequences_have_been_verified"));
+ for (String msg : warningMessages)
+ {
+ sb.append(msg).append(NEWLINE);
+ }
+ output.setText(sb.toString());
+
Desktop.addInternalFrame(output,
- MessageManager.getString("label.sequence_names_updated"),
- 600, 300);
+ MessageManager.getString("label.sequences_updated"), 600,
+ 300);
// The above is the dataset, we must now find out the index
// of the viewed sequence
/**
* Verify local sequences in seqRefs against the retrieved sequence database
- * records.
+ * records. Returns true if any sequence was modified as a result (start/end
+ * changed and/or sequence enlarged), else false.
*
+ * @param sdataset
+ * dataset sequences we are retrieving for
+ * @param dbSource
+ * database source we are retrieving from
+ * @param retrievedAl
+ * retrieved sequences as alignment
* @param trimDatasetSeqs
- *
+ * if true, sequences will not be enlarged to match longer retrieved
+ * sequences, only their start/end adjusted
+ * @param warningMessages
+ * a list of messages to add to
*/
- void transferReferences(Vector<SequenceI> sdataset, String dbSource,
- AlignmentI retrievedAl, boolean trimDatasetSeqs)
+ boolean transferReferences(Vector<SequenceI> sdataset, String dbSource,
+ AlignmentI retrievedAl, boolean trimDatasetSeqs,
+ List<String> warningMessages)
{
// System.out.println("trimming ? " + trimDatasetSeqs);
if (retrievedAl == null || retrievedAl.getHeight() == 0)
{
- return;
+ return false;
}
- SequenceI[] retrieved = recoverDbSequences(retrievedAl
- .getSequencesArray());
+
+ boolean modified = false;
+ SequenceI[] retrieved = recoverDbSequences(
+ retrievedAl.getSequencesArray());
SequenceI sequence = null;
- boolean transferred = false;
- StringBuilder messages = new StringBuilder(64);
- for (SequenceI entry : retrieved)
+ for (SequenceI retrievedSeq : retrieved)
{
// Work out which sequences this sequence matches,
// taking into account all accessionIds and names in the file
- Vector<SequenceI> sequenceMatches = new Vector<SequenceI>();
+ Vector<SequenceI> sequenceMatches = new Vector<>();
// look for corresponding accession ids
- DBRefEntry[] entryRefs = jalview.util.DBRefUtils.selectRefs(
- entry.getDBRefs(), new String[] { dbSource });
+ List<DBRefEntry> entryRefs = DBRefUtils
+ .selectRefs(retrievedSeq.getDBRefs(), new String[]
+ { dbSource });
if (entryRefs == null)
{
System.err
.println("Dud dbSource string ? no entryrefs selected for "
- + dbSource + " on " + entry.getName());
+ + dbSource + " on " + retrievedSeq.getName());
continue;
}
- for (int j = 0; j < entryRefs.length; j++)
+ for (int j = 0, n = entryRefs.size(); j < n; j++)
{
- String accessionId = entryRefs[j].getAccessionId();
+ DBRefEntry ref = entryRefs.get(j);
+ String accessionId = ref.getAccessionId();
// match up on accessionId
if (seqRefs.containsKey(accessionId.toUpperCase()))
{
// could be useful to extend this so we try to find any 'significant'
// information in common between two sequence objects.
/*
- * DBRefEntry[] entryRefs =
+ * List<DBRefEntry> entryRefs =
* jalview.util.DBRefUtils.selectRefs(entry.getDBRef(), new String[] {
* dbSource }); for (int j = 0; j < entry.getName().size(); j++) { String
* name = entry.getName().elementAt(j).toString(); if
*/
// sequenceMatches now contains the set of all sequences associated with
// the returned db record
- String entrySeq = entry.getSequenceAsString().toUpperCase();
+ final String retrievedSeqString = retrievedSeq.getSequenceAsString();
+ String entrySeq = retrievedSeqString.toUpperCase();
for (int m = 0; m < sequenceMatches.size(); m++)
{
sequence = sequenceMatches.elementAt(m);
// TODO: test for legacy where uniprot or EMBL refs exist but no
// mappings are made (but content matches retrieved set)
boolean updateRefFrame = sequence.getDBRefs() == null
- || sequence.getDBRefs().length == 0;
+ || sequence.getDBRefs().size() == 0;
// TODO:
// verify sequence against the entry sequence
- String nonGapped = AlignSeq.extractGaps("-. ",
- sequence.getSequenceAsString()).toUpperCase();
-
- int absStart = entrySeq.indexOf(nonGapped);
Mapping mp;
-
final int sequenceStart = sequence.getStart();
+
+ boolean remoteEnclosesLocal = false;
+ String nonGapped = AlignSeq
+ .extractGaps("-. ", sequence.getSequenceAsString())
+ .toUpperCase();
+ int absStart = entrySeq.indexOf(nonGapped);
if (absStart == -1)
{
// couldn't find local sequence in sequence from database, so check if
{
// verification failed. couldn't find any relationship between
// entrySeq and local sequence
- messages.append(sequence.getName()
- + " Sequence not 100% match with " + entry.getName()
- + "\n");
+ // messages suppressed as many-to-many matches are confusing
+ // String msg = sequence.getName()
+ // + " Sequence not 100% match with "
+ // + retrievedSeq.getName();
+ // addWarningMessage(warningMessages, msg);
continue;
}
/*
- * found match for the whole of the database sequence within the local
- * sequence's reference frame.
+ * retrieved sequence is a proper subsequence of local sequence
*/
- transferred = true;
- sbuffer.append(sequence.getName() + " has " + absStart
- + " prefixed residues compared to " + entry.getName()
- + "\n");
+ String msg = sequence.getName() + " has " + absStart
+ + " prefixed residues compared to "
+ + retrievedSeq.getName();
+ addWarningMessage(warningMessages, msg);
/*
* So create a mapping to the external entry from the matching region of
* the local sequence, and leave local start/end untouched.
*/
- mp = new Mapping(null, new int[] { sequenceStart + absStart,
- sequenceStart + absStart + entrySeq.length() - 1 }, new int[]
- { entry.getStart(), entry.getStart() + entrySeq.length() - 1 },
+ mp = new Mapping(null,
+ new int[]
+ { sequenceStart + absStart,
+ sequenceStart + absStart + entrySeq.length() - 1 },
+ new int[]
+ { retrievedSeq.getStart(),
+ retrievedSeq.getStart() + entrySeq.length() - 1 },
1, 1);
updateRefFrame = false;
}
else
{
/*
- * found a match for the local sequence within sequence from
- * the external database
+ * local sequence is a subsequence of (or matches) retrieved sequence
*/
- transferred = true;
-
- // update start and end of local sequence to place it in entry's
- // reference frame.
- // apply identity map map from whole of local sequence to matching
- // region of database
- // sequence
- mp = null; // Mapping.getIdentityMap();
- // new Mapping(null,
- // new int[] { absStart+sequence.getStart(),
- // absStart+sequence.getStart()+entrySeq.length()-1},
- // new int[] { entry.getStart(), entry.getEnd() }, 1, 1);
- // relocate local features for updated start
+ remoteEnclosesLocal = true;
+ mp = null;
if (updateRefFrame)
{
- if (sequence.getSequenceFeatures() != null)
+ /*
+ * relocate existing sequence features by offset
+ */
+ int startShift = absStart - sequenceStart + 1;
+ if (startShift != 0)
{
- /*
- * relocate existing sequence features by offset
- */
- SequenceFeature[] sf = sequence.getSequenceFeatures();
- int start = sequenceStart;
- int end = sequence.getEnd();
- int startShift = 1 - absStart - start; // how much the features
- // are
- // to be shifted by
- for (int sfi = 0; sfi < sf.length; sfi++)
- {
- if (sf[sfi].getBegin() >= start && sf[sfi].getEnd() <= end)
- {
- // shift feature along by absstart
- sf[sfi].setBegin(sf[sfi].getBegin() + startShift);
- sf[sfi].setEnd(sf[sfi].getEnd() + startShift);
- }
- }
+ modified |= sequence.getFeatures().shiftFeatures(1,
+ startShift);
}
}
}
System.out.println("Adding dbrefs to " + sequence.getName()
- + " from " + dbSource + " sequence : " + entry.getName());
- sequence.transferAnnotation(entry, mp);
+ + " from " + dbSource + " sequence : "
+ + retrievedSeq.getName());
+ sequence.transferAnnotation(retrievedSeq, mp);
- absStart += entry.getStart();
+ absStart += retrievedSeq.getStart();
int absEnd = absStart + nonGapped.length() - 1;
if (!trimDatasetSeqs)
{
- // insert full length sequence from record
- sequence.setSequence(entry.getSequenceAsString());
- sequence.setStart(entry.getStart());
+ /*
+ * update start position and/or expand to longer retrieved sequence
+ */
+ if (!retrievedSeqString.equals(sequence.getSequenceAsString())
+ && remoteEnclosesLocal)
+ {
+ sequence.setSequence(retrievedSeqString);
+ modified = true;
+ addWarningMessage(warningMessages,
+ "Sequence for " + sequence.getName() + " expanded from "
+ + retrievedSeq.getName());
+ }
+ if (sequence.getStart() != retrievedSeq.getStart())
+ {
+ sequence.setStart(retrievedSeq.getStart());
+ modified = true;
+ if (absStart != sequenceStart)
+ {
+ addWarningMessage(warningMessages,
+ "Start/end position for " + sequence.getName()
+ + " updated from " + retrievedSeq.getName());
+ }
+ }
}
if (updateRefFrame)
{
if (trimDatasetSeqs)
{
// just fix start/end
- sequence.setStart(absStart);
- sequence.setEnd(absEnd);
+ if (sequence.getStart() != absStart
+ || sequence.getEnd() != absEnd)
+ {
+ sequence.setStart(absStart);
+ sequence.setEnd(absEnd);
+ modified = true;
+ addWarningMessage(warningMessages,
+ "Start/end for " + sequence.getName()
+ + " updated from " + retrievedSeq.getName());
+ }
}
// search for alignment sequences to update coordinate frame for
for (int alsq = 0; alsq < alseqs.length; alsq++)
{
if (alseqs[alsq].getDatasetSequence() == sequence)
{
- String ngAlsq = AlignSeq.extractGaps("-. ",
- alseqs[alsq].getSequenceAsString()).toUpperCase();
+ String ngAlsq = AlignSeq
+ .extractGaps("-. ",
+ alseqs[alsq].getSequenceAsString())
+ .toUpperCase();
int oldstrt = alseqs[alsq].getStart();
alseqs[alsq].setStart(sequence.getSequenceAsString()
- .toUpperCase().indexOf(ngAlsq)
- + sequence.getStart());
+ .toUpperCase().indexOf(ngAlsq) + sequence.getStart());
if (oldstrt != alseqs[alsq].getStart())
{
- alseqs[alsq].setEnd(ngAlsq.length()
- + alseqs[alsq].getStart() - 1);
+ alseqs[alsq].setEnd(
+ ngAlsq.length() + alseqs[alsq].getStart() - 1);
+ modified = true;
}
}
}
// and remove it from the rest
// TODO: decide if we should remove annotated sequence from set
sdataset.remove(sequence);
- // TODO: should we make a note of sequences that have received new DB
- // ids, so we can query all enabled DAS servers for them ?
}
}
- if (!transferred)
+ return modified;
+ }
+
+ /**
+ * Adds the message to the list unless it already contains it
+ *
+ * @param messageList
+ * @param msg
+ */
+ void addWarningMessage(List<String> messageList, String msg)
+ {
+ if (!messageList.contains(msg))
{
- // report the ID/sequence mismatches
- sbuffer.append(messages);
+ messageList.add(msg);
}
}
*/
private SequenceI[] recoverDbSequences(SequenceI[] sequencesArray)
{
- Vector<SequenceI> nseq = new Vector<SequenceI>();
- for (int i = 0; sequencesArray != null && i < sequencesArray.length; i++)
+ int n;
+ if (sequencesArray == null || (n = sequencesArray.length) == 0)
+ return sequencesArray;
+ ArrayList<SequenceI> nseq = new ArrayList<>();
+ for (int i = 0;i < n; i++)
{
- nseq.addElement(sequencesArray[i]);
- DBRefEntry[] dbr = sequencesArray[i].getDBRefs();
+ nseq.add(sequencesArray[i]);
+ List<DBRefEntry> dbr = sequencesArray[i].getDBRefs();
Mapping map = null;
- for (int r = 0; (dbr != null) && r < dbr.length; r++)
+ if (dbr != null)
{
- if ((map = dbr[r].getMap()) != null)
+ for (int r = 0, rn = dbr.size(); r < rn; r++)
{
- if (map.getTo() != null && !nseq.contains(map.getTo()))
+ if ((map = dbr.get(r).getMap()) != null)
{
- nseq.addElement(map.getTo());
- }
+ if (map.getTo() != null && !nseq.contains(map.getTo()))
+ {
+ nseq.add(map.getTo());
+ }
+ }
}
}
}
+ // BH 2019.01.25 question here if this is the right logic. Return the original if nothing found?
if (nseq.size() > 0)
{
- sequencesArray = new SequenceI[nseq.size()];
- nseq.toArray(sequencesArray);
+ return nseq.toArray(new SequenceI[nseq.size()]);
}
return sequencesArray;
}