From: gmungoc Date: Thu, 29 Sep 2016 15:45:28 +0000 (+0100) Subject: JAL-1856 better warning messages for sequence matching in DBRefFetcher X-Git-Tag: Release_2_10_0~24 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=167ce32fb49494a4d6ae03ffdb430f05f4d93f76;p=jalview.git JAL-1856 better warning messages for sequence matching in DBRefFetcher --- diff --git a/resources/lang/Messages.properties b/resources/lang/Messages.properties index f5b4f7a..378fb64 100644 --- a/resources/lang/Messages.properties +++ b/resources/lang/Messages.properties @@ -729,8 +729,8 @@ label.move_url_down = Move URL Down label.add_sbrs_definition = Add a SBRS Definition label.edit_sbrs_definition = Edit SBRS Definition label.delete_sbrs_definition = Delete SBRS Definition -label.your_sequences_have_been_verified = Your sequences have been verified against known sequence databases. Some of the ids have been\n altered, most likely the start/end residue will have been updated.\n Save your alignment to maintain the updated id.\n\n -label.sequence_names_updated = Sequence names updated +label.your_sequences_have_been_verified = Your sequences have been verified against known sequence databases.\n(Use Calculate | Show flanking regions to show enclosing sequence.)\nTo preserve data changes, save your alignment.\n\n +label.sequences_updated = Sequences updated label.dbref_search_completed = DBRef search completed label.show_all_chains = Show all chains label.fetch_all_param = Fetch all {0} diff --git a/resources/lang/Messages_es.properties b/resources/lang/Messages_es.properties index 87538be..7bea515 100644 --- a/resources/lang/Messages_es.properties +++ b/resources/lang/Messages_es.properties @@ -674,8 +674,8 @@ label.move_url_down = Mover la URL hacia abajo label.add_sbrs_definition = Añadir una definición SBRS label.edit_sbrs_definition = Editar una definición SBRS label.delete_sbrs_definition = Borrar una definición SBRS -label.your_sequences_have_been_verified = Sus secuencias has sido verificadas en una base de datos de secuencias conocidas. Algunos de sus ID se han alterado y\n, probablemente, el residuo de inicio/fin se haya actualizado.\nGuarde su alineamiento para mantener el ID actualizado.\n\n -label.sequence_names_updated = Nombres de secuencia actualizados +label.your_sequences_have_been_verified = Sus secuencias has sido verificadas en una base de datos de secuencias conocidas.\n(Usar Calcular | Mostrar flancos para ver ampliación.)\nPara mantener los datos actualizados, guarde su alineamiento.\n\n +label.sequences_updated = Secuencias actualizadas label.dbref_search_completed = Búsqueda de DBRef terminada label.show_all_chains = Mostrar todas las cadenas label.fetch_all_param = Recuperar todas {0} diff --git a/src/jalview/ws/DBRefFetcher.java b/src/jalview/ws/DBRefFetcher.java index 748cb72..453cd00 100644 --- a/src/jalview/ws/DBRefFetcher.java +++ b/src/jalview/ws/DBRefFetcher.java @@ -60,6 +60,8 @@ import uk.ac.ebi.www.picr.AccessionMappingService.AccessionMapperServiceLocator; */ public class DBRefFetcher implements Runnable { + private static final String NEWLINE = System.lineSeparator(); + public interface FetchFinishedListenerI { void finished(); @@ -71,8 +73,6 @@ public class DBRefFetcher implements Runnable CutAndPasteTransfer output = new CutAndPasteTransfer(); - StringBuffer sbuffer = new StringBuffer(); - boolean running = false; /** @@ -357,6 +357,7 @@ public class DBRefFetcher implements Runnable Vector sdataset = new Vector( Arrays.asList(dataset)); + List warningMessages = new ArrayList(); int db = 0; while (sdataset.size() > 0 && db < dbSources.length) @@ -425,7 +426,7 @@ public class DBRefFetcher implements Runnable if (retrieved != null) { transferReferences(sdataset, dbsource.getDbSource(), retrieved, - trimDsSeqs); + trimDsSeqs, warningMessages); } } else @@ -501,13 +502,19 @@ public class DBRefFetcher implements Runnable // advance to next database db++; } // all databases have been queried - if (sbuffer.length() > 0) + if (!warningMessages.isEmpty()) { - output.setText(MessageManager - .getString("label.your_sequences_have_been_verified") - + sbuffer.toString()); + StringBuilder sb = new StringBuilder(warningMessages.size() * 30); + sb.append(MessageManager + .getString("label.your_sequences_have_been_verified")); + for (String msg : warningMessages) + { + sb.append(msg).append(NEWLINE); + } + output.setText(sb.toString()); + Desktop.addInternalFrame(output, - MessageManager.getString("label.sequence_names_updated"), + MessageManager.getString("label.sequences_updated"), 600, 300); // The above is the dataset, we must now find out the index // of the viewed sequence @@ -529,38 +536,50 @@ public class DBRefFetcher implements Runnable /** * Verify local sequences in seqRefs against the retrieved sequence database - * records. + * records. Returns true if any sequence was modified as a result (start/end + * changed and/or sequence enlarged), else false. * + * @param sdataset + * dataset sequences we are retrieving for + * @param dbSource + * database source we are retrieving from + * @param retrievedAl + * retrieved sequences as alignment * @param trimDatasetSeqs - * + * if true, sequences will not be enlarged to match longer retrieved + * sequences, only their start/end adjusted + * @param warningMessages + * a list of messages to add to */ - void transferReferences(Vector sdataset, String dbSource, - AlignmentI retrievedAl, boolean trimDatasetSeqs) + boolean transferReferences(Vector sdataset, + String dbSource, + AlignmentI retrievedAl, boolean trimDatasetSeqs, + List warningMessages) { // System.out.println("trimming ? " + trimDatasetSeqs); if (retrievedAl == null || retrievedAl.getHeight() == 0) { - return; + return false; } + + boolean modified = false; SequenceI[] retrieved = recoverDbSequences(retrievedAl .getSequencesArray()); SequenceI sequence = null; - boolean transferred = false; - StringBuilder messages = new StringBuilder(64); - for (SequenceI entry : retrieved) + for (SequenceI retrievedSeq : retrieved) { // Work out which sequences this sequence matches, // taking into account all accessionIds and names in the file Vector sequenceMatches = new Vector(); // look for corresponding accession ids - DBRefEntry[] entryRefs = jalview.util.DBRefUtils.selectRefs( - entry.getDBRefs(), new String[] { dbSource }); + DBRefEntry[] entryRefs = DBRefUtils.selectRefs(retrievedSeq.getDBRefs(), + new String[] { dbSource }); if (entryRefs == null) { System.err .println("Dud dbSource string ? no entryrefs selected for " - + dbSource + " on " + entry.getName()); + + dbSource + " on " + retrievedSeq.getName()); continue; } for (int j = 0; j < entryRefs.length; j++) @@ -614,7 +633,8 @@ public class DBRefFetcher implements Runnable */ // sequenceMatches now contains the set of all sequences associated with // the returned db record - String entrySeq = entry.getSequenceAsString().toUpperCase(); + final String retrievedSeqString = retrievedSeq.getSequenceAsString(); + String entrySeq = retrievedSeqString.toUpperCase(); for (int m = 0; m < sequenceMatches.size(); m++) { sequence = sequenceMatches.elementAt(m); @@ -627,13 +647,13 @@ public class DBRefFetcher implements Runnable // TODO: // verify sequence against the entry sequence + Mapping mp; + final int sequenceStart = sequence.getStart(); + + boolean remoteEnclosesLocal = false; String nonGapped = AlignSeq.extractGaps("-. ", sequence.getSequenceAsString()).toUpperCase(); - int absStart = entrySeq.indexOf(nonGapped); - Mapping mp; - - final int sequenceStart = sequence.getStart(); if (absStart == -1) { // couldn't find local sequence in sequence from database, so check if @@ -643,19 +663,20 @@ public class DBRefFetcher implements Runnable { // verification failed. couldn't find any relationship between // entrySeq and local sequence - messages.append(sequence.getName() - + " Sequence not 100% match with " + entry.getName() - + "\n"); + // messages suppressed as many-to-many matches are confusing + // String msg = sequence.getName() + // + " Sequence not 100% match with " + // + retrievedSeq.getName(); + // addWarningMessage(warningMessages, msg); continue; } /* - * found match for the whole of the database sequence within the local - * sequence's reference frame. + * retrieved sequence is a proper subsequence of local sequence */ - transferred = true; - sbuffer.append(sequence.getName() + " has " + absStart - + " prefixed residues compared to " + entry.getName() - + "\n"); + String msg = sequence.getName() + " has " + absStart + + " prefixed residues compared to " + + retrievedSeq.getName(); + addWarningMessage(warningMessages, msg); /* * So create a mapping to the external entry from the matching region of @@ -663,50 +684,40 @@ public class DBRefFetcher implements Runnable */ mp = new Mapping(null, new int[] { sequenceStart + absStart, sequenceStart + absStart + entrySeq.length() - 1 }, new int[] - { entry.getStart(), entry.getStart() + entrySeq.length() - 1 }, + { retrievedSeq.getStart(), retrievedSeq.getStart() + entrySeq.length() - 1 }, 1, 1); updateRefFrame = false; } else { /* - * found a match for the local sequence within sequence from - * the external database + * local sequence is a subsequence of (or matches) retrieved sequence */ - transferred = true; - - // update start and end of local sequence to place it in entry's - // reference frame. - // apply identity map map from whole of local sequence to matching - // region of database - // sequence - mp = null; // Mapping.getIdentityMap(); - // new Mapping(null, - // new int[] { absStart+sequence.getStart(), - // absStart+sequence.getStart()+entrySeq.length()-1}, - // new int[] { entry.getStart(), entry.getEnd() }, 1, 1); - // relocate local features for updated start + remoteEnclosesLocal = true; + mp = null; if (updateRefFrame) { - if (sequence.getSequenceFeatures() != null) + SequenceFeature[] sfs = sequence.getSequenceFeatures(); + if (sfs != null) { /* * relocate existing sequence features by offset */ - SequenceFeature[] sf = sequence.getSequenceFeatures(); int start = sequenceStart; int end = sequence.getEnd(); - int startShift = 1 - absStart - start; // how much the features - // are - // to be shifted by - for (int sfi = 0; sfi < sf.length; sfi++) + int startShift = 1 - absStart - start; + + if (startShift != 0) { - if (sf[sfi].getBegin() >= start && sf[sfi].getEnd() <= end) + for (SequenceFeature sf : sfs) { - // shift feature along by absstart - sf[sfi].setBegin(sf[sfi].getBegin() + startShift); - sf[sfi].setEnd(sf[sfi].getEnd() + startShift); + if (sf.getBegin() >= start && sf.getEnd() <= end) + { + sf.setBegin(sf.getBegin() + startShift); + sf.setEnd(sf.getEnd() + startShift); + modified = true; + } } } } @@ -714,16 +725,36 @@ public class DBRefFetcher implements Runnable } System.out.println("Adding dbrefs to " + sequence.getName() - + " from " + dbSource + " sequence : " + entry.getName()); - sequence.transferAnnotation(entry, mp); + + " from " + dbSource + " sequence : " + retrievedSeq.getName()); + sequence.transferAnnotation(retrievedSeq, mp); - absStart += entry.getStart(); + absStart += retrievedSeq.getStart(); int absEnd = absStart + nonGapped.length() - 1; if (!trimDatasetSeqs) { - // insert full length sequence from record - sequence.setSequence(entry.getSequenceAsString()); - sequence.setStart(entry.getStart()); + /* + * update start position and/or expand to longer retrieved sequence + */ + if (!retrievedSeqString.equals(sequence.getSequenceAsString()) + && remoteEnclosesLocal) + { + sequence.setSequence(retrievedSeqString); + modified = true; + addWarningMessage(warningMessages, + "Sequence for " + sequence.getName() + + " expanded from " + retrievedSeq.getName()); + } + if (sequence.getStart() != retrievedSeq.getStart()) + { + sequence.setStart(retrievedSeq.getStart()); + modified = true; + if (absStart != sequenceStart) + { + addWarningMessage(warningMessages, "Start/end position for " + + sequence.getName() + " updated from " + + retrievedSeq.getName()); + } + } } if (updateRefFrame) { @@ -731,8 +762,16 @@ public class DBRefFetcher implements Runnable if (trimDatasetSeqs) { // just fix start/end - sequence.setStart(absStart); - sequence.setEnd(absEnd); + if (sequence.getStart() != absStart + || sequence.getEnd() != absEnd) + { + sequence.setStart(absStart); + sequence.setEnd(absEnd); + modified = true; + addWarningMessage(warningMessages, "Start/end for " + + sequence.getName() + " updated from " + + retrievedSeq.getName()); + } } // search for alignment sequences to update coordinate frame for for (int alsq = 0; alsq < alseqs.length; alsq++) @@ -749,6 +788,7 @@ public class DBRefFetcher implements Runnable { alseqs[alsq].setEnd(ngAlsq.length() + alseqs[alsq].getStart() - 1); + modified = true; } } } @@ -765,10 +805,20 @@ public class DBRefFetcher implements Runnable // ids, so we can query all enabled DAS servers for them ? } } - if (!transferred) + return modified; + } + + /** + * Adds the message to the list unless it already contains it + * + * @param messageList + * @param msg + */ + void addWarningMessage(List messageList, String msg) + { + if (!messageList.contains(msg)) { - // report the ID/sequence mismatches - sbuffer.append(messages); + messageList.add(msg); } }