X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fws%2FDBRefFetcher.java;h=08178642832bc415c1be86f446db77fc8f43e61b;hb=1580169b8b5b244d380ec7b99a732dd34b0d454a;hp=748cb72daa301d050856855600c0ff8a0fc9efbf;hpb=b5f2dfda36c463b93f34db95fe5bc5ff2a1516bf;p=jalview.git diff --git a/src/jalview/ws/DBRefFetcher.java b/src/jalview/ws/DBRefFetcher.java index 748cb72..0817864 100644 --- a/src/jalview/ws/DBRefFetcher.java +++ b/src/jalview/ws/DBRefFetcher.java @@ -26,7 +26,6 @@ import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefEntry; import jalview.datamodel.DBRefSource; import jalview.datamodel.Mapping; -import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.gui.CutAndPasteTransfer; import jalview.gui.DasSourceBrowser; @@ -60,6 +59,8 @@ import uk.ac.ebi.www.picr.AccessionMappingService.AccessionMapperServiceLocator; */ public class DBRefFetcher implements Runnable { + private static final String NEWLINE = System.lineSeparator(); + public interface FetchFinishedListenerI { void finished(); @@ -71,8 +72,6 @@ public class DBRefFetcher implements Runnable CutAndPasteTransfer output = new CutAndPasteTransfer(); - StringBuffer sbuffer = new StringBuffer(); - boolean running = false; /** @@ -115,7 +114,8 @@ public class DBRefFetcher implements Runnable */ public DBRefFetcher(SequenceI[] seqs, IProgressIndicator progressIndicatorFrame, - DbSourceProxy[] sources, FeatureSettings featureSettings, boolean isNucleotide) + DbSourceProxy[] sources, FeatureSettings featureSettings, + boolean isNucleotide) { listeners = new ArrayList(); this.progressWindow = progressIndicatorFrame; @@ -340,7 +340,7 @@ public class DBRefFetcher implements Runnable { progressWindow.setProgressBar( MessageManager.getString("status.fetching_db_refs"), - startTime); + startTime); } try { @@ -357,6 +357,7 @@ public class DBRefFetcher implements Runnable Vector sdataset = new Vector( Arrays.asList(dataset)); + List warningMessages = new ArrayList(); int db = 0; while (sdataset.size() > 0 && db < dbSources.length) @@ -388,8 +389,8 @@ public class DBRefFetcher implements Runnable // Still queries to make for current seqIndex StringBuffer queryString = new StringBuffer(""); int numq = 0; - int nqSize = (maxqlen > queries.size()) ? queries - .size() : maxqlen; + int nqSize = (maxqlen > queries.size()) ? queries.size() + : maxqlen; while (queries.size() > 0 && numq < nqSize) { @@ -425,7 +426,7 @@ public class DBRefFetcher implements Runnable if (retrieved != null) { transferReferences(sdataset, dbsource.getDbSource(), retrieved, - trimDsSeqs); + trimDsSeqs, warningMessages); } } else @@ -501,14 +502,19 @@ public class DBRefFetcher implements Runnable // advance to next database db++; } // all databases have been queried - if (sbuffer.length() > 0) + if (!warningMessages.isEmpty()) { - output.setText(MessageManager - .getString("label.your_sequences_have_been_verified") - + sbuffer.toString()); + StringBuilder sb = new StringBuilder(warningMessages.size() * 30); + sb.append(MessageManager + .getString("label.your_sequences_have_been_verified")); + for (String msg : warningMessages) + { + sb.append(msg).append(NEWLINE); + } + output.setText(sb.toString()); + Desktop.addInternalFrame(output, - MessageManager.getString("label.sequence_names_updated"), - 600, 300); + MessageManager.getString("label.sequences_updated"), 600, 300); // The above is the dataset, we must now find out the index // of the viewed sequence @@ -529,38 +535,49 @@ public class DBRefFetcher implements Runnable /** * Verify local sequences in seqRefs against the retrieved sequence database - * records. + * records. Returns true if any sequence was modified as a result (start/end + * changed and/or sequence enlarged), else false. * + * @param sdataset + * dataset sequences we are retrieving for + * @param dbSource + * database source we are retrieving from + * @param retrievedAl + * retrieved sequences as alignment * @param trimDatasetSeqs - * + * if true, sequences will not be enlarged to match longer retrieved + * sequences, only their start/end adjusted + * @param warningMessages + * a list of messages to add to */ - void transferReferences(Vector sdataset, String dbSource, - AlignmentI retrievedAl, boolean trimDatasetSeqs) + boolean transferReferences(Vector sdataset, String dbSource, + AlignmentI retrievedAl, boolean trimDatasetSeqs, + List warningMessages) { // System.out.println("trimming ? " + trimDatasetSeqs); if (retrievedAl == null || retrievedAl.getHeight() == 0) { - return; + return false; } + + boolean modified = false; SequenceI[] retrieved = recoverDbSequences(retrievedAl .getSequencesArray()); SequenceI sequence = null; - boolean transferred = false; - StringBuilder messages = new StringBuilder(64); - for (SequenceI entry : retrieved) + for (SequenceI retrievedSeq : retrieved) { // Work out which sequences this sequence matches, // taking into account all accessionIds and names in the file Vector sequenceMatches = new Vector(); // look for corresponding accession ids - DBRefEntry[] entryRefs = jalview.util.DBRefUtils.selectRefs( - entry.getDBRefs(), new String[] { dbSource }); + DBRefEntry[] entryRefs = DBRefUtils.selectRefs( + retrievedSeq.getDBRefs(), new String[] { dbSource }); if (entryRefs == null) { System.err .println("Dud dbSource string ? no entryrefs selected for " - + dbSource + " on " + entry.getName()); + + dbSource + " on " + retrievedSeq.getName()); continue; } for (int j = 0; j < entryRefs.length; j++) @@ -614,7 +631,8 @@ public class DBRefFetcher implements Runnable */ // sequenceMatches now contains the set of all sequences associated with // the returned db record - String entrySeq = entry.getSequenceAsString().toUpperCase(); + final String retrievedSeqString = retrievedSeq.getSequenceAsString(); + String entrySeq = retrievedSeqString.toUpperCase(); for (int m = 0; m < sequenceMatches.size(); m++) { sequence = sequenceMatches.elementAt(m); @@ -627,13 +645,13 @@ public class DBRefFetcher implements Runnable // TODO: // verify sequence against the entry sequence + Mapping mp; + final int sequenceStart = sequence.getStart(); + + boolean remoteEnclosesLocal = false; String nonGapped = AlignSeq.extractGaps("-. ", sequence.getSequenceAsString()).toUpperCase(); - int absStart = entrySeq.indexOf(nonGapped); - Mapping mp; - - final int sequenceStart = sequence.getStart(); if (absStart == -1) { // couldn't find local sequence in sequence from database, so check if @@ -643,19 +661,20 @@ public class DBRefFetcher implements Runnable { // verification failed. couldn't find any relationship between // entrySeq and local sequence - messages.append(sequence.getName() - + " Sequence not 100% match with " + entry.getName() - + "\n"); + // messages suppressed as many-to-many matches are confusing + // String msg = sequence.getName() + // + " Sequence not 100% match with " + // + retrievedSeq.getName(); + // addWarningMessage(warningMessages, msg); continue; } /* - * found match for the whole of the database sequence within the local - * sequence's reference frame. + * retrieved sequence is a proper subsequence of local sequence */ - transferred = true; - sbuffer.append(sequence.getName() + " has " + absStart - + " prefixed residues compared to " + entry.getName() - + "\n"); + String msg = sequence.getName() + " has " + absStart + + " prefixed residues compared to " + + retrievedSeq.getName(); + addWarningMessage(warningMessages, msg); /* * So create a mapping to the external entry from the matching region of @@ -663,67 +682,64 @@ public class DBRefFetcher implements Runnable */ mp = new Mapping(null, new int[] { sequenceStart + absStart, sequenceStart + absStart + entrySeq.length() - 1 }, new int[] - { entry.getStart(), entry.getStart() + entrySeq.length() - 1 }, - 1, 1); + { retrievedSeq.getStart(), + retrievedSeq.getStart() + entrySeq.length() - 1 }, 1, 1); updateRefFrame = false; } else { /* - * found a match for the local sequence within sequence from - * the external database + * local sequence is a subsequence of (or matches) retrieved sequence */ - transferred = true; - - // update start and end of local sequence to place it in entry's - // reference frame. - // apply identity map map from whole of local sequence to matching - // region of database - // sequence - mp = null; // Mapping.getIdentityMap(); - // new Mapping(null, - // new int[] { absStart+sequence.getStart(), - // absStart+sequence.getStart()+entrySeq.length()-1}, - // new int[] { entry.getStart(), entry.getEnd() }, 1, 1); - // relocate local features for updated start + remoteEnclosesLocal = true; + mp = null; if (updateRefFrame) { - if (sequence.getSequenceFeatures() != null) + /* + * relocate existing sequence features by offset + */ + int startShift = absStart - sequenceStart + 1; + if (startShift != 0) { - /* - * relocate existing sequence features by offset - */ - SequenceFeature[] sf = sequence.getSequenceFeatures(); - int start = sequenceStart; - int end = sequence.getEnd(); - int startShift = 1 - absStart - start; // how much the features - // are - // to be shifted by - for (int sfi = 0; sfi < sf.length; sfi++) - { - if (sf[sfi].getBegin() >= start && sf[sfi].getEnd() <= end) - { - // shift feature along by absstart - sf[sfi].setBegin(sf[sfi].getBegin() + startShift); - sf[sfi].setEnd(sf[sfi].getEnd() + startShift); - } - } + modified |= sequence.getFeatures().shiftFeatures(1, + startShift); } } } System.out.println("Adding dbrefs to " + sequence.getName() - + " from " + dbSource + " sequence : " + entry.getName()); - sequence.transferAnnotation(entry, mp); + + " from " + dbSource + " sequence : " + + retrievedSeq.getName()); + sequence.transferAnnotation(retrievedSeq, mp); - absStart += entry.getStart(); + absStart += retrievedSeq.getStart(); int absEnd = absStart + nonGapped.length() - 1; if (!trimDatasetSeqs) { - // insert full length sequence from record - sequence.setSequence(entry.getSequenceAsString()); - sequence.setStart(entry.getStart()); + /* + * update start position and/or expand to longer retrieved sequence + */ + if (!retrievedSeqString.equals(sequence.getSequenceAsString()) + && remoteEnclosesLocal) + { + sequence.setSequence(retrievedSeqString); + modified = true; + addWarningMessage(warningMessages, + "Sequence for " + sequence.getName() + + " expanded from " + retrievedSeq.getName()); + } + if (sequence.getStart() != retrievedSeq.getStart()) + { + sequence.setStart(retrievedSeq.getStart()); + modified = true; + if (absStart != sequenceStart) + { + addWarningMessage(warningMessages, "Start/end position for " + + sequence.getName() + " updated from " + + retrievedSeq.getName()); + } + } } if (updateRefFrame) { @@ -731,8 +747,16 @@ public class DBRefFetcher implements Runnable if (trimDatasetSeqs) { // just fix start/end - sequence.setStart(absStart); - sequence.setEnd(absEnd); + if (sequence.getStart() != absStart + || sequence.getEnd() != absEnd) + { + sequence.setStart(absStart); + sequence.setEnd(absEnd); + modified = true; + addWarningMessage(warningMessages, "Start/end for " + + sequence.getName() + " updated from " + + retrievedSeq.getName()); + } } // search for alignment sequences to update coordinate frame for for (int alsq = 0; alsq < alseqs.length; alsq++) @@ -749,6 +773,7 @@ public class DBRefFetcher implements Runnable { alseqs[alsq].setEnd(ngAlsq.length() + alseqs[alsq].getStart() - 1); + modified = true; } } } @@ -765,10 +790,20 @@ public class DBRefFetcher implements Runnable // ids, so we can query all enabled DAS servers for them ? } } - if (!transferred) + return modified; + } + + /** + * Adds the message to the list unless it already contains it + * + * @param messageList + * @param msg + */ + void addWarningMessage(List messageList, String msg) + { + if (!messageList.contains(msg)) { - // report the ID/sequence mismatches - sbuffer.append(messages); + messageList.add(msg); } }