import jalview.datamodel.DBRefEntry;
import jalview.datamodel.DBRefSource;
import jalview.datamodel.Mapping;
-import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
import jalview.gui.CutAndPasteTransfer;
import jalview.gui.DasSourceBrowser;
*/
public class DBRefFetcher implements Runnable
{
+ private static final String NEWLINE = System.lineSeparator();
+
public interface FetchFinishedListenerI
{
void finished();
CutAndPasteTransfer output = new CutAndPasteTransfer();
- StringBuffer sbuffer = new StringBuffer();
-
boolean running = false;
/**
*/
public DBRefFetcher(SequenceI[] seqs,
IProgressIndicator progressIndicatorFrame,
- DbSourceProxy[] sources, FeatureSettings featureSettings, boolean isNucleotide)
+ DbSourceProxy[] sources, FeatureSettings featureSettings,
+ boolean isNucleotide)
{
listeners = new ArrayList<FetchFinishedListenerI>();
this.progressWindow = progressIndicatorFrame;
// af.featureSettings_actionPerformed(null);
String[] defdb = null;
List<DbSourceProxy> selsources = new ArrayList<DbSourceProxy>();
- Vector<jalviewSourceI> dasselsrc = (featureSettings != null) ? featureSettings
- .getSelectedSources() : new DasSourceBrowser()
- .getSelectedSources();
+ Vector<jalviewSourceI> dasselsrc = (featureSettings != null)
+ ? featureSettings.getSelectedSources()
+ : new DasSourceBrowser().getSelectedSources();
for (jalviewSourceI src : dasselsrc)
{
DbSourceProxy[] newsrc = new DbSourceProxy[dbSources.length
+ otherdb.length];
System.arraycopy(dbSources, 0, newsrc, 0, dbSources.length);
- System.arraycopy(otherdb, 0, newsrc, dbSources.length, otherdb.length);
+ System.arraycopy(otherdb, 0, newsrc, dbSources.length,
+ otherdb.length);
dbSources = newsrc;
}
}
{
if (dbSources == null)
{
- throw new Error(
- MessageManager
- .getString("error.implementation_error_must_init_dbsources"));
+ throw new Error(MessageManager
+ .getString("error.implementation_error_must_init_dbsources"));
}
running = true;
long startTime = System.currentTimeMillis();
{
progressWindow.setProgressBar(
MessageManager.getString("status.fetching_db_refs"),
- startTime);
+ startTime);
}
try
{
Vector<SequenceI> sdataset = new Vector<SequenceI>(
Arrays.asList(dataset));
+ List<String> warningMessages = new ArrayList<String>();
int db = 0;
while (sdataset.size() > 0 && db < dbSources.length)
// Still queries to make for current seqIndex
StringBuffer queryString = new StringBuffer("");
int numq = 0;
- int nqSize = (maxqlen > queries.size()) ? queries
- .size() : maxqlen;
+ int nqSize = (maxqlen > queries.size()) ? queries.size()
+ : maxqlen;
while (queries.size() > 0 && numq < nqSize)
{
String query = queries.elementAt(0);
if (dbsource.isValidReference(query))
{
- queryString.append((numq == 0) ? "" : dbsource
- .getAccessionSeparator());
+ queryString.append(
+ (numq == 0) ? "" : dbsource.getAccessionSeparator());
queryString.append(query);
numq++;
}
if (retrieved != null)
{
transferReferences(sdataset, dbsource.getDbSource(), retrieved,
- trimDsSeqs);
+ trimDsSeqs, warningMessages);
}
}
else
{
// make some more strings for use as queries
- for (int i = 0; (seqIndex < dataset.length) && (i < 50); seqIndex++, i++)
+ for (int i = 0; (seqIndex < dataset.length)
+ && (i < 50); seqIndex++, i++)
{
SequenceI sequence = dataset[seqIndex];
- DBRefEntry[] uprefs = DBRefUtils.selectRefs(
- sequence.getDBRefs(),
- new String[] { dbsource.getDbSource() }); // jalview.datamodel.DBRefSource.UNIPROT
+ DBRefEntry[] uprefs = DBRefUtils
+ .selectRefs(sequence.getDBRefs(), new String[]
+ { dbsource.getDbSource() }); // jalview.datamodel.DBRefSource.UNIPROT
// });
// check for existing dbrefs to use
if (uprefs != null && uprefs.length > 0)
for (int j = 0; j < uprefs.length; j++)
{
addSeqId(sequence, uprefs[j].getAccessionId());
- queries.addElement(uprefs[j].getAccessionId().toUpperCase());
+ queries.addElement(
+ uprefs[j].getAccessionId().toUpperCase());
}
}
else
// resolve the string against PICR to recover valid IDs
try
{
- presp = picrClient
- .getUPIForAccession(token, null,
- picrClient.getMappedDatabaseNames(),
- null, true);
+ presp = picrClient.getUPIForAccession(token, null,
+ picrClient.getMappedDatabaseNames(), null,
+ true);
} catch (Exception e)
{
- System.err.println("Exception with Picr for '" + token
- + "'\n");
+ System.err.println(
+ "Exception with Picr for '" + token + "'\n");
e.printStackTrace();
}
}
// present, and do a transferReferences
// otherwise transfer non sequence x-references directly.
}
- System.out
- .println("Validated ID against PICR... (for what its worth):"
+ System.out.println(
+ "Validated ID against PICR... (for what its worth):"
+ token);
addSeqId(sequence, token);
queries.addElement(token.toUpperCase());
else
{
// if ()
- // System.out.println("Not querying source with token="+token+"\n");
+ // System.out.println("Not querying source with
+ // token="+token+"\n");
addSeqId(sequence, token);
queries.addElement(token.toUpperCase());
}
// advance to next database
db++;
} // all databases have been queried
- if (sbuffer.length() > 0)
+ if (!warningMessages.isEmpty())
{
- output.setText(MessageManager
- .getString("label.your_sequences_have_been_verified")
- + sbuffer.toString());
+ StringBuilder sb = new StringBuilder(warningMessages.size() * 30);
+ sb.append(MessageManager
+ .getString("label.your_sequences_have_been_verified"));
+ for (String msg : warningMessages)
+ {
+ sb.append(msg).append(NEWLINE);
+ }
+ output.setText(sb.toString());
+
Desktop.addInternalFrame(output,
- MessageManager.getString("label.sequence_names_updated"),
- 600, 300);
+ MessageManager.getString("label.sequences_updated"), 600,
+ 300);
// The above is the dataset, we must now find out the index
// of the viewed sequence
/**
* Verify local sequences in seqRefs against the retrieved sequence database
- * records.
+ * records. Returns true if any sequence was modified as a result (start/end
+ * changed and/or sequence enlarged), else false.
*
+ * @param sdataset
+ * dataset sequences we are retrieving for
+ * @param dbSource
+ * database source we are retrieving from
+ * @param retrievedAl
+ * retrieved sequences as alignment
* @param trimDatasetSeqs
- *
+ * if true, sequences will not be enlarged to match longer retrieved
+ * sequences, only their start/end adjusted
+ * @param warningMessages
+ * a list of messages to add to
*/
- void transferReferences(Vector<SequenceI> sdataset, String dbSource,
- AlignmentI retrievedAl, boolean trimDatasetSeqs)
+ boolean transferReferences(Vector<SequenceI> sdataset, String dbSource,
+ AlignmentI retrievedAl, boolean trimDatasetSeqs,
+ List<String> warningMessages)
{
// System.out.println("trimming ? " + trimDatasetSeqs);
if (retrievedAl == null || retrievedAl.getHeight() == 0)
{
- return;
+ return false;
}
- SequenceI[] retrieved = recoverDbSequences(retrievedAl
- .getSequencesArray());
+
+ boolean modified = false;
+ SequenceI[] retrieved = recoverDbSequences(
+ retrievedAl.getSequencesArray());
SequenceI sequence = null;
- boolean transferred = false;
- StringBuilder messages = new StringBuilder(64);
- for (SequenceI entry : retrieved)
+ for (SequenceI retrievedSeq : retrieved)
{
// Work out which sequences this sequence matches,
// taking into account all accessionIds and names in the file
Vector<SequenceI> sequenceMatches = new Vector<SequenceI>();
// look for corresponding accession ids
- DBRefEntry[] entryRefs = jalview.util.DBRefUtils.selectRefs(
- entry.getDBRefs(), new String[] { dbSource });
+ DBRefEntry[] entryRefs = DBRefUtils
+ .selectRefs(retrievedSeq.getDBRefs(), new String[]
+ { dbSource });
if (entryRefs == null)
{
System.err
.println("Dud dbSource string ? no entryrefs selected for "
- + dbSource + " on " + entry.getName());
+ + dbSource + " on " + retrievedSeq.getName());
continue;
}
for (int j = 0; j < entryRefs.length; j++)
*/
// sequenceMatches now contains the set of all sequences associated with
// the returned db record
- String entrySeq = entry.getSequenceAsString().toUpperCase();
+ final String retrievedSeqString = retrievedSeq.getSequenceAsString();
+ String entrySeq = retrievedSeqString.toUpperCase();
for (int m = 0; m < sequenceMatches.size(); m++)
{
sequence = sequenceMatches.elementAt(m);
// TODO:
// verify sequence against the entry sequence
- String nonGapped = AlignSeq.extractGaps("-. ",
- sequence.getSequenceAsString()).toUpperCase();
-
- int absStart = entrySeq.indexOf(nonGapped);
Mapping mp;
-
final int sequenceStart = sequence.getStart();
+
+ boolean remoteEnclosesLocal = false;
+ String nonGapped = AlignSeq
+ .extractGaps("-. ", sequence.getSequenceAsString())
+ .toUpperCase();
+ int absStart = entrySeq.indexOf(nonGapped);
if (absStart == -1)
{
// couldn't find local sequence in sequence from database, so check if
{
// verification failed. couldn't find any relationship between
// entrySeq and local sequence
- messages.append(sequence.getName()
- + " Sequence not 100% match with " + entry.getName()
- + "\n");
+ // messages suppressed as many-to-many matches are confusing
+ // String msg = sequence.getName()
+ // + " Sequence not 100% match with "
+ // + retrievedSeq.getName();
+ // addWarningMessage(warningMessages, msg);
continue;
}
/*
- * found match for the whole of the database sequence within the local
- * sequence's reference frame.
+ * retrieved sequence is a proper subsequence of local sequence
*/
- transferred = true;
- sbuffer.append(sequence.getName() + " has " + absStart
- + " prefixed residues compared to " + entry.getName()
- + "\n");
+ String msg = sequence.getName() + " has " + absStart
+ + " prefixed residues compared to "
+ + retrievedSeq.getName();
+ addWarningMessage(warningMessages, msg);
/*
* So create a mapping to the external entry from the matching region of
* the local sequence, and leave local start/end untouched.
*/
- mp = new Mapping(null, new int[] { sequenceStart + absStart,
- sequenceStart + absStart + entrySeq.length() - 1 }, new int[]
- { entry.getStart(), entry.getStart() + entrySeq.length() - 1 },
+ mp = new Mapping(null,
+ new int[]
+ { sequenceStart + absStart,
+ sequenceStart + absStart + entrySeq.length() - 1 },
+ new int[]
+ { retrievedSeq.getStart(),
+ retrievedSeq.getStart() + entrySeq.length() - 1 },
1, 1);
updateRefFrame = false;
}
else
{
/*
- * found a match for the local sequence within sequence from
- * the external database
+ * local sequence is a subsequence of (or matches) retrieved sequence
*/
- transferred = true;
-
- // update start and end of local sequence to place it in entry's
- // reference frame.
- // apply identity map map from whole of local sequence to matching
- // region of database
- // sequence
- mp = null; // Mapping.getIdentityMap();
- // new Mapping(null,
- // new int[] { absStart+sequence.getStart(),
- // absStart+sequence.getStart()+entrySeq.length()-1},
- // new int[] { entry.getStart(), entry.getEnd() }, 1, 1);
- // relocate local features for updated start
+ remoteEnclosesLocal = true;
+ mp = null;
if (updateRefFrame)
{
- if (sequence.getSequenceFeatures() != null)
+ /*
+ * relocate existing sequence features by offset
+ */
+ int startShift = absStart - sequenceStart + 1;
+ if (startShift != 0)
{
- /*
- * relocate existing sequence features by offset
- */
- SequenceFeature[] sf = sequence.getSequenceFeatures();
- int start = sequenceStart;
- int end = sequence.getEnd();
- int startShift = 1 - absStart - start; // how much the features
- // are
- // to be shifted by
- for (int sfi = 0; sfi < sf.length; sfi++)
- {
- if (sf[sfi].getBegin() >= start && sf[sfi].getEnd() <= end)
- {
- // shift feature along by absstart
- sf[sfi].setBegin(sf[sfi].getBegin() + startShift);
- sf[sfi].setEnd(sf[sfi].getEnd() + startShift);
- }
- }
+ modified |= sequence.getFeatures().shiftFeatures(startShift);
}
}
}
System.out.println("Adding dbrefs to " + sequence.getName()
- + " from " + dbSource + " sequence : " + entry.getName());
- sequence.transferAnnotation(entry, mp);
+ + " from " + dbSource + " sequence : "
+ + retrievedSeq.getName());
+ sequence.transferAnnotation(retrievedSeq, mp);
- absStart += entry.getStart();
+ absStart += retrievedSeq.getStart();
int absEnd = absStart + nonGapped.length() - 1;
if (!trimDatasetSeqs)
{
- // insert full length sequence from record
- sequence.setSequence(entry.getSequenceAsString());
- sequence.setStart(entry.getStart());
+ /*
+ * update start position and/or expand to longer retrieved sequence
+ */
+ if (!retrievedSeqString.equals(sequence.getSequenceAsString())
+ && remoteEnclosesLocal)
+ {
+ sequence.setSequence(retrievedSeqString);
+ modified = true;
+ addWarningMessage(warningMessages,
+ "Sequence for " + sequence.getName() + " expanded from "
+ + retrievedSeq.getName());
+ }
+ if (sequence.getStart() != retrievedSeq.getStart())
+ {
+ sequence.setStart(retrievedSeq.getStart());
+ modified = true;
+ if (absStart != sequenceStart)
+ {
+ addWarningMessage(warningMessages,
+ "Start/end position for " + sequence.getName()
+ + " updated from " + retrievedSeq.getName());
+ }
+ }
}
if (updateRefFrame)
{
if (trimDatasetSeqs)
{
// just fix start/end
- sequence.setStart(absStart);
- sequence.setEnd(absEnd);
+ if (sequence.getStart() != absStart
+ || sequence.getEnd() != absEnd)
+ {
+ sequence.setStart(absStart);
+ sequence.setEnd(absEnd);
+ modified = true;
+ addWarningMessage(warningMessages,
+ "Start/end for " + sequence.getName()
+ + " updated from " + retrievedSeq.getName());
+ }
}
// search for alignment sequences to update coordinate frame for
for (int alsq = 0; alsq < alseqs.length; alsq++)
{
if (alseqs[alsq].getDatasetSequence() == sequence)
{
- String ngAlsq = AlignSeq.extractGaps("-. ",
- alseqs[alsq].getSequenceAsString()).toUpperCase();
+ String ngAlsq = AlignSeq
+ .extractGaps("-. ",
+ alseqs[alsq].getSequenceAsString())
+ .toUpperCase();
int oldstrt = alseqs[alsq].getStart();
alseqs[alsq].setStart(sequence.getSequenceAsString()
- .toUpperCase().indexOf(ngAlsq)
- + sequence.getStart());
+ .toUpperCase().indexOf(ngAlsq) + sequence.getStart());
if (oldstrt != alseqs[alsq].getStart())
{
- alseqs[alsq].setEnd(ngAlsq.length()
- + alseqs[alsq].getStart() - 1);
+ alseqs[alsq].setEnd(
+ ngAlsq.length() + alseqs[alsq].getStart() - 1);
+ modified = true;
}
}
}
// ids, so we can query all enabled DAS servers for them ?
}
}
- if (!transferred)
+ return modified;
+ }
+
+ /**
+ * Adds the message to the list unless it already contains it
+ *
+ * @param messageList
+ * @param msg
+ */
+ void addWarningMessage(List<String> messageList, String msg)
+ {
+ if (!messageList.contains(msg))
{
- // report the ID/sequence mismatches
- sbuffer.append(messages);
+ messageList.add(msg);
}
}
private SequenceI[] recoverDbSequences(SequenceI[] sequencesArray)
{
Vector<SequenceI> nseq = new Vector<SequenceI>();
- for (int i = 0; sequencesArray != null && i < sequencesArray.length; i++)
+ for (int i = 0; sequencesArray != null
+ && i < sequencesArray.length; i++)
{
nseq.addElement(sequencesArray[i]);
DBRefEntry[] dbr = sequencesArray[i].getDBRefs();