*/
public class DBRefFetcher implements Runnable
{
+ private static final String NEWLINE = System.lineSeparator();
+
public interface FetchFinishedListenerI
{
void finished();
CutAndPasteTransfer output = new CutAndPasteTransfer();
- StringBuffer sbuffer = new StringBuffer();
-
boolean running = false;
/**
Vector<SequenceI> sdataset = new Vector<SequenceI>(
Arrays.asList(dataset));
+ List<String> warningMessages = new ArrayList<String>();
int db = 0;
while (sdataset.size() > 0 && db < dbSources.length)
if (retrieved != null)
{
transferReferences(sdataset, dbsource.getDbSource(), retrieved,
- trimDsSeqs);
+ trimDsSeqs, warningMessages);
}
}
else
// advance to next database
db++;
} // all databases have been queried
- if (sbuffer.length() > 0)
+ if (!warningMessages.isEmpty())
{
- output.setText(MessageManager
- .getString("label.your_sequences_have_been_verified")
- + sbuffer.toString());
+ StringBuilder sb = new StringBuilder(warningMessages.size() * 30);
+ sb.append(MessageManager
+ .getString("label.your_sequences_have_been_verified"));
+ for (String msg : warningMessages)
+ {
+ sb.append(msg).append(NEWLINE);
+ }
+ output.setText(sb.toString());
+
Desktop.addInternalFrame(output,
- MessageManager.getString("label.sequence_names_updated"),
+ MessageManager.getString("label.sequences_updated"),
600, 300);
// The above is the dataset, we must now find out the index
// of the viewed sequence
/**
* Verify local sequences in seqRefs against the retrieved sequence database
- * records.
+ * records. Returns true if any sequence was modified as a result (start/end
+ * changed and/or sequence enlarged), else false.
*
+ * @param sdataset
+ * dataset sequences we are retrieving for
+ * @param dbSource
+ * database source we are retrieving from
+ * @param retrievedAl
+ * retrieved sequences as alignment
* @param trimDatasetSeqs
- *
+ * if true, sequences will not be enlarged to match longer retrieved
+ * sequences, only their start/end adjusted
+ * @param warningMessages
+ * a list of messages to add to
*/
- void transferReferences(Vector<SequenceI> sdataset, String dbSource,
- AlignmentI retrievedAl, boolean trimDatasetSeqs)
+ boolean transferReferences(Vector<SequenceI> sdataset,
+ String dbSource,
+ AlignmentI retrievedAl, boolean trimDatasetSeqs,
+ List<String> warningMessages)
{
// System.out.println("trimming ? " + trimDatasetSeqs);
if (retrievedAl == null || retrievedAl.getHeight() == 0)
{
- return;
+ return false;
}
+
+ boolean modified = false;
SequenceI[] retrieved = recoverDbSequences(retrievedAl
.getSequencesArray());
SequenceI sequence = null;
- boolean transferred = false;
- StringBuilder messages = new StringBuilder(64);
- for (SequenceI entry : retrieved)
+ for (SequenceI retrievedSeq : retrieved)
{
// Work out which sequences this sequence matches,
// taking into account all accessionIds and names in the file
Vector<SequenceI> sequenceMatches = new Vector<SequenceI>();
// look for corresponding accession ids
- DBRefEntry[] entryRefs = jalview.util.DBRefUtils.selectRefs(
- entry.getDBRefs(), new String[] { dbSource });
+ DBRefEntry[] entryRefs = DBRefUtils.selectRefs(retrievedSeq.getDBRefs(),
+ new String[] { dbSource });
if (entryRefs == null)
{
System.err
.println("Dud dbSource string ? no entryrefs selected for "
- + dbSource + " on " + entry.getName());
+ + dbSource + " on " + retrievedSeq.getName());
continue;
}
for (int j = 0; j < entryRefs.length; j++)
*/
// sequenceMatches now contains the set of all sequences associated with
// the returned db record
- String entrySeq = entry.getSequenceAsString().toUpperCase();
+ final String retrievedSeqString = retrievedSeq.getSequenceAsString();
+ String entrySeq = retrievedSeqString.toUpperCase();
for (int m = 0; m < sequenceMatches.size(); m++)
{
sequence = sequenceMatches.elementAt(m);
// TODO:
// verify sequence against the entry sequence
+ Mapping mp;
+ final int sequenceStart = sequence.getStart();
+
+ boolean remoteEnclosesLocal = false;
String nonGapped = AlignSeq.extractGaps("-. ",
sequence.getSequenceAsString()).toUpperCase();
-
int absStart = entrySeq.indexOf(nonGapped);
- Mapping mp;
-
- final int sequenceStart = sequence.getStart();
if (absStart == -1)
{
// couldn't find local sequence in sequence from database, so check if
{
// verification failed. couldn't find any relationship between
// entrySeq and local sequence
- messages.append(sequence.getName()
- + " Sequence not 100% match with " + entry.getName()
- + "\n");
+ // messages suppressed as many-to-many matches are confusing
+ // String msg = sequence.getName()
+ // + " Sequence not 100% match with "
+ // + retrievedSeq.getName();
+ // addWarningMessage(warningMessages, msg);
continue;
}
/*
- * found match for the whole of the database sequence within the local
- * sequence's reference frame.
+ * retrieved sequence is a proper subsequence of local sequence
*/
- transferred = true;
- sbuffer.append(sequence.getName() + " has " + absStart
- + " prefixed residues compared to " + entry.getName()
- + "\n");
+ String msg = sequence.getName() + " has " + absStart
+ + " prefixed residues compared to "
+ + retrievedSeq.getName();
+ addWarningMessage(warningMessages, msg);
/*
* So create a mapping to the external entry from the matching region of
*/
mp = new Mapping(null, new int[] { sequenceStart + absStart,
sequenceStart + absStart + entrySeq.length() - 1 }, new int[]
- { entry.getStart(), entry.getStart() + entrySeq.length() - 1 },
+ { retrievedSeq.getStart(), retrievedSeq.getStart() + entrySeq.length() - 1 },
1, 1);
updateRefFrame = false;
}
else
{
/*
- * found a match for the local sequence within sequence from
- * the external database
+ * local sequence is a subsequence of (or matches) retrieved sequence
*/
- transferred = true;
-
- // update start and end of local sequence to place it in entry's
- // reference frame.
- // apply identity map map from whole of local sequence to matching
- // region of database
- // sequence
- mp = null; // Mapping.getIdentityMap();
- // new Mapping(null,
- // new int[] { absStart+sequence.getStart(),
- // absStart+sequence.getStart()+entrySeq.length()-1},
- // new int[] { entry.getStart(), entry.getEnd() }, 1, 1);
- // relocate local features for updated start
+ remoteEnclosesLocal = true;
+ mp = null;
if (updateRefFrame)
{
- if (sequence.getSequenceFeatures() != null)
+ SequenceFeature[] sfs = sequence.getSequenceFeatures();
+ if (sfs != null)
{
/*
* relocate existing sequence features by offset
*/
- SequenceFeature[] sf = sequence.getSequenceFeatures();
int start = sequenceStart;
int end = sequence.getEnd();
- int startShift = 1 - absStart - start; // how much the features
- // are
- // to be shifted by
- for (int sfi = 0; sfi < sf.length; sfi++)
+ int startShift = 1 - absStart - start;
+
+ if (startShift != 0)
{
- if (sf[sfi].getBegin() >= start && sf[sfi].getEnd() <= end)
+ for (SequenceFeature sf : sfs)
{
- // shift feature along by absstart
- sf[sfi].setBegin(sf[sfi].getBegin() + startShift);
- sf[sfi].setEnd(sf[sfi].getEnd() + startShift);
+ if (sf.getBegin() >= start && sf.getEnd() <= end)
+ {
+ sf.setBegin(sf.getBegin() + startShift);
+ sf.setEnd(sf.getEnd() + startShift);
+ modified = true;
+ }
}
}
}
}
System.out.println("Adding dbrefs to " + sequence.getName()
- + " from " + dbSource + " sequence : " + entry.getName());
- sequence.transferAnnotation(entry, mp);
+ + " from " + dbSource + " sequence : " + retrievedSeq.getName());
+ sequence.transferAnnotation(retrievedSeq, mp);
- absStart += entry.getStart();
+ absStart += retrievedSeq.getStart();
int absEnd = absStart + nonGapped.length() - 1;
if (!trimDatasetSeqs)
{
- // insert full length sequence from record
- sequence.setSequence(entry.getSequenceAsString());
- sequence.setStart(entry.getStart());
+ /*
+ * update start position and/or expand to longer retrieved sequence
+ */
+ if (!retrievedSeqString.equals(sequence.getSequenceAsString())
+ && remoteEnclosesLocal)
+ {
+ sequence.setSequence(retrievedSeqString);
+ modified = true;
+ addWarningMessage(warningMessages,
+ "Sequence for " + sequence.getName()
+ + " expanded from " + retrievedSeq.getName());
+ }
+ if (sequence.getStart() != retrievedSeq.getStart())
+ {
+ sequence.setStart(retrievedSeq.getStart());
+ modified = true;
+ if (absStart != sequenceStart)
+ {
+ addWarningMessage(warningMessages, "Start/end position for "
+ + sequence.getName() + " updated from "
+ + retrievedSeq.getName());
+ }
+ }
}
if (updateRefFrame)
{
if (trimDatasetSeqs)
{
// just fix start/end
- sequence.setStart(absStart);
- sequence.setEnd(absEnd);
+ if (sequence.getStart() != absStart
+ || sequence.getEnd() != absEnd)
+ {
+ sequence.setStart(absStart);
+ sequence.setEnd(absEnd);
+ modified = true;
+ addWarningMessage(warningMessages, "Start/end for "
+ + sequence.getName() + " updated from "
+ + retrievedSeq.getName());
+ }
}
// search for alignment sequences to update coordinate frame for
for (int alsq = 0; alsq < alseqs.length; alsq++)
{
alseqs[alsq].setEnd(ngAlsq.length()
+ alseqs[alsq].getStart() - 1);
+ modified = true;
}
}
}
// ids, so we can query all enabled DAS servers for them ?
}
}
- if (!transferred)
+ return modified;
+ }
+
+ /**
+ * Adds the message to the list unless it already contains it
+ *
+ * @param messageList
+ * @param msg
+ */
+ void addWarningMessage(List<String> messageList, String msg)
+ {
+ if (!messageList.contains(msg))
{
- // report the ID/sequence mismatches
- sbuffer.append(messages);
+ messageList.add(msg);
}
}