X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fws%2FDBRefFetcher.java;h=ae4207b0e6b0e7e9367d2c8f3f83140b416fd709;hb=383fd91269f510b189450fbe174e9c051103c7eb;hp=748cb72daa301d050856855600c0ff8a0fc9efbf;hpb=b5f2dfda36c463b93f34db95fe5bc5ff2a1516bf;p=jalview.git diff --git a/src/jalview/ws/DBRefFetcher.java b/src/jalview/ws/DBRefFetcher.java index 748cb72..ae4207b 100644 --- a/src/jalview/ws/DBRefFetcher.java +++ b/src/jalview/ws/DBRefFetcher.java @@ -26,18 +26,14 @@ import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefEntry; import jalview.datamodel.DBRefSource; import jalview.datamodel.Mapping; -import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.gui.CutAndPasteTransfer; -import jalview.gui.DasSourceBrowser; import jalview.gui.Desktop; import jalview.gui.FeatureSettings; import jalview.gui.IProgressIndicator; import jalview.gui.OOMWarning; import jalview.util.DBRefUtils; import jalview.util.MessageManager; -import jalview.ws.dbsources.das.api.jalviewSourceI; -import jalview.ws.dbsources.das.datamodel.DasSequenceSource; import jalview.ws.seqfetcher.DbSourceProxy; import java.util.ArrayList; @@ -60,6 +56,10 @@ import uk.ac.ebi.www.picr.AccessionMappingService.AccessionMapperServiceLocator; */ public class DBRefFetcher implements Runnable { + private static final String NEWLINE = System.lineSeparator(); + + public static final String TRIM_RETRIEVED_SEQUENCES = "TRIM_FETCHED_DATASET_SEQS"; + public interface FetchFinishedListenerI { void finished(); @@ -71,8 +71,6 @@ public class DBRefFetcher implements Runnable CutAndPasteTransfer output = new CutAndPasteTransfer(); - StringBuffer sbuffer = new StringBuffer(); - boolean running = false; /** @@ -115,9 +113,10 @@ public class DBRefFetcher implements Runnable */ public DBRefFetcher(SequenceI[] seqs, IProgressIndicator progressIndicatorFrame, - DbSourceProxy[] sources, FeatureSettings featureSettings, boolean isNucleotide) + DbSourceProxy[] sources, FeatureSettings featureSettings, + boolean isNucleotide) { - listeners = new ArrayList(); + listeners = new ArrayList<>(); this.progressWindow = progressIndicatorFrame; alseqs = new SequenceI[seqs.length]; SequenceI[] ds = new SequenceI[seqs.length]; @@ -139,7 +138,7 @@ public class DBRefFetcher implements Runnable .getSequenceFetcherSingleton(progressIndicatorFrame); // set default behaviour for transferring excess sequence data to the // dataset - trimDsSeqs = Cache.getDefault("TRIM_FETCHED_DATASET_SEQS", true); + trimDsSeqs = Cache.getDefault(TRIM_RETRIEVED_SEQUENCES, true); if (sources == null) { setDatabaseSources(featureSettings, isNucleotide); @@ -163,23 +162,7 @@ public class DBRefFetcher implements Runnable { // af.featureSettings_actionPerformed(null); String[] defdb = null; - List selsources = new ArrayList(); - Vector dasselsrc = (featureSettings != null) ? featureSettings - .getSelectedSources() : new DasSourceBrowser() - .getSelectedSources(); - - for (jalviewSourceI src : dasselsrc) - { - List sp = src.getSequenceSourceProxies(); - if (sp != null) - { - selsources.addAll(sp); - if (sp.size() > 1) - { - Cache.log.debug("Added many Db Sources for :" + src.getTitle()); - } - } - } + List selsources = new ArrayList<>(); // select appropriate databases based on alignFrame context. if (forNucleotide) { @@ -189,7 +172,7 @@ public class DBRefFetcher implements Runnable { defdb = DBRefSource.PROTEINDBS; } - List srces = new ArrayList(); + List srces = new ArrayList<>(); for (String ddb : defdb) { List srcesfordb = sfetcher.getSourceProxy(ddb); @@ -233,29 +216,6 @@ public class DBRefFetcher implements Runnable } /** - * retrieve all the das sequence sources and add them to the list of db - * sources to retrieve from - */ - public void appendAllDasSources() - { - if (dbSources == null) - { - dbSources = new DbSourceProxy[0]; - } - // append additional sources - DbSourceProxy[] otherdb = sfetcher - .getDbSourceProxyInstances(DasSequenceSource.class); - if (otherdb != null && otherdb.length > 0) - { - DbSourceProxy[] newsrc = new DbSourceProxy[dbSources.length - + otherdb.length]; - System.arraycopy(dbSources, 0, newsrc, 0, dbSources.length); - System.arraycopy(otherdb, 0, newsrc, dbSources.length, otherdb.length); - dbSources = newsrc; - } - } - - /** * start the fetcher thread * * @param waitTillFinished @@ -308,14 +268,14 @@ public class DBRefFetcher implements Runnable } else if (seqs == null) { - seqs = new Vector(); + seqs = new Vector<>(); seqs.addElement(seq); } } else { - seqs = new Vector(); + seqs = new Vector<>(); seqs.addElement(seq); } @@ -330,9 +290,8 @@ public class DBRefFetcher implements Runnable { if (dbSources == null) { - throw new Error( - MessageManager - .getString("error.implementation_error_must_init_dbsources")); + throw new Error(MessageManager + .getString("error.implementation_error_must_init_dbsources")); } running = true; long startTime = System.currentTimeMillis(); @@ -340,7 +299,7 @@ public class DBRefFetcher implements Runnable { progressWindow.setProgressBar( MessageManager.getString("status.fetching_db_refs"), - startTime); + startTime); } try { @@ -355,8 +314,9 @@ public class DBRefFetcher implements Runnable e.printStackTrace(); } - Vector sdataset = new Vector( + Vector sdataset = new Vector<>( Arrays.asList(dataset)); + List warningMessages = new ArrayList<>(); int db = 0; while (sdataset.size() > 0 && db < dbSources.length) @@ -368,8 +328,8 @@ public class DBRefFetcher implements Runnable SequenceI[] currSeqs = new SequenceI[sdataset.size()]; sdataset.copyInto(currSeqs);// seqs that are to be validated against // dbSources[db] - Vector queries = new Vector(); // generated queries curSeq - seqRefs = new Hashtable>(); + Vector queries = new Vector<>(); // generated queries curSeq + seqRefs = new Hashtable<>(); int seqIndex = 0; @@ -388,16 +348,16 @@ public class DBRefFetcher implements Runnable // Still queries to make for current seqIndex StringBuffer queryString = new StringBuffer(""); int numq = 0; - int nqSize = (maxqlen > queries.size()) ? queries - .size() : maxqlen; + int nqSize = (maxqlen > queries.size()) ? queries.size() + : maxqlen; while (queries.size() > 0 && numq < nqSize) { String query = queries.elementAt(0); if (dbsource.isValidReference(query)) { - queryString.append((numq == 0) ? "" : dbsource - .getAccessionSeparator()); + queryString.append( + (numq == 0) ? "" : dbsource.getAccessionSeparator()); queryString.append(query); numq++; } @@ -425,18 +385,19 @@ public class DBRefFetcher implements Runnable if (retrieved != null) { transferReferences(sdataset, dbsource.getDbSource(), retrieved, - trimDsSeqs); + trimDsSeqs, warningMessages); } } else { // make some more strings for use as queries - for (int i = 0; (seqIndex < dataset.length) && (i < 50); seqIndex++, i++) + for (int i = 0; (seqIndex < dataset.length) + && (i < 50); seqIndex++, i++) { SequenceI sequence = dataset[seqIndex]; - DBRefEntry[] uprefs = DBRefUtils.selectRefs( - sequence.getDBRefs(), - new String[] { dbsource.getDbSource() }); // jalview.datamodel.DBRefSource.UNIPROT + DBRefEntry[] uprefs = DBRefUtils + .selectRefs(sequence.getDBRefs(), new String[] + { dbsource.getDbSource() }); // jalview.datamodel.DBRefSource.UNIPROT // }); // check for existing dbrefs to use if (uprefs != null && uprefs.length > 0) @@ -444,7 +405,8 @@ public class DBRefFetcher implements Runnable for (int j = 0; j < uprefs.length; j++) { addSeqId(sequence, uprefs[j].getAccessionId()); - queries.addElement(uprefs[j].getAccessionId().toUpperCase()); + queries.addElement( + uprefs[j].getAccessionId().toUpperCase()); } } else @@ -461,14 +423,13 @@ public class DBRefFetcher implements Runnable // resolve the string against PICR to recover valid IDs try { - presp = picrClient - .getUPIForAccession(token, null, - picrClient.getMappedDatabaseNames(), - null, true); + presp = picrClient.getUPIForAccession(token, null, + picrClient.getMappedDatabaseNames(), null, + true); } catch (Exception e) { - System.err.println("Exception with Picr for '" + token - + "'\n"); + System.err.println( + "Exception with Picr for '" + token + "'\n"); e.printStackTrace(); } } @@ -480,8 +441,8 @@ public class DBRefFetcher implements Runnable // present, and do a transferReferences // otherwise transfer non sequence x-references directly. } - System.out - .println("Validated ID against PICR... (for what its worth):" + System.out.println( + "Validated ID against PICR... (for what its worth):" + token); addSeqId(sequence, token); queries.addElement(token.toUpperCase()); @@ -489,7 +450,8 @@ public class DBRefFetcher implements Runnable else { // if () - // System.out.println("Not querying source with token="+token+"\n"); + // System.out.println("Not querying source with + // token="+token+"\n"); addSeqId(sequence, token); queries.addElement(token.toUpperCase()); } @@ -501,14 +463,20 @@ public class DBRefFetcher implements Runnable // advance to next database db++; } // all databases have been queried - if (sbuffer.length() > 0) + if (!warningMessages.isEmpty()) { - output.setText(MessageManager - .getString("label.your_sequences_have_been_verified") - + sbuffer.toString()); + StringBuilder sb = new StringBuilder(warningMessages.size() * 30); + sb.append(MessageManager + .getString("label.your_sequences_have_been_verified")); + for (String msg : warningMessages) + { + sb.append(msg).append(NEWLINE); + } + output.setText(sb.toString()); + Desktop.addInternalFrame(output, - MessageManager.getString("label.sequence_names_updated"), - 600, 300); + MessageManager.getString("label.sequences_updated"), 600, + 300); // The above is the dataset, we must now find out the index // of the viewed sequence @@ -529,38 +497,50 @@ public class DBRefFetcher implements Runnable /** * Verify local sequences in seqRefs against the retrieved sequence database - * records. + * records. Returns true if any sequence was modified as a result (start/end + * changed and/or sequence enlarged), else false. * + * @param sdataset + * dataset sequences we are retrieving for + * @param dbSource + * database source we are retrieving from + * @param retrievedAl + * retrieved sequences as alignment * @param trimDatasetSeqs - * + * if true, sequences will not be enlarged to match longer retrieved + * sequences, only their start/end adjusted + * @param warningMessages + * a list of messages to add to */ - void transferReferences(Vector sdataset, String dbSource, - AlignmentI retrievedAl, boolean trimDatasetSeqs) + boolean transferReferences(Vector sdataset, String dbSource, + AlignmentI retrievedAl, boolean trimDatasetSeqs, + List warningMessages) { // System.out.println("trimming ? " + trimDatasetSeqs); if (retrievedAl == null || retrievedAl.getHeight() == 0) { - return; + return false; } - SequenceI[] retrieved = recoverDbSequences(retrievedAl - .getSequencesArray()); + + boolean modified = false; + SequenceI[] retrieved = recoverDbSequences( + retrievedAl.getSequencesArray()); SequenceI sequence = null; - boolean transferred = false; - StringBuilder messages = new StringBuilder(64); - for (SequenceI entry : retrieved) + for (SequenceI retrievedSeq : retrieved) { // Work out which sequences this sequence matches, // taking into account all accessionIds and names in the file - Vector sequenceMatches = new Vector(); + Vector sequenceMatches = new Vector<>(); // look for corresponding accession ids - DBRefEntry[] entryRefs = jalview.util.DBRefUtils.selectRefs( - entry.getDBRefs(), new String[] { dbSource }); + DBRefEntry[] entryRefs = DBRefUtils + .selectRefs(retrievedSeq.getDBRefs(), new String[] + { dbSource }); if (entryRefs == null) { System.err .println("Dud dbSource string ? no entryrefs selected for " - + dbSource + " on " + entry.getName()); + + dbSource + " on " + retrievedSeq.getName()); continue; } for (int j = 0; j < entryRefs.length; j++) @@ -614,7 +594,8 @@ public class DBRefFetcher implements Runnable */ // sequenceMatches now contains the set of all sequences associated with // the returned db record - String entrySeq = entry.getSequenceAsString().toUpperCase(); + final String retrievedSeqString = retrievedSeq.getSequenceAsString(); + String entrySeq = retrievedSeqString.toUpperCase(); for (int m = 0; m < sequenceMatches.size(); m++) { sequence = sequenceMatches.elementAt(m); @@ -627,13 +608,14 @@ public class DBRefFetcher implements Runnable // TODO: // verify sequence against the entry sequence - String nonGapped = AlignSeq.extractGaps("-. ", - sequence.getSequenceAsString()).toUpperCase(); - - int absStart = entrySeq.indexOf(nonGapped); Mapping mp; - final int sequenceStart = sequence.getStart(); + + boolean remoteEnclosesLocal = false; + String nonGapped = AlignSeq + .extractGaps("-. ", sequence.getSequenceAsString()) + .toUpperCase(); + int absStart = entrySeq.indexOf(nonGapped); if (absStart == -1) { // couldn't find local sequence in sequence from database, so check if @@ -643,87 +625,89 @@ public class DBRefFetcher implements Runnable { // verification failed. couldn't find any relationship between // entrySeq and local sequence - messages.append(sequence.getName() - + " Sequence not 100% match with " + entry.getName() - + "\n"); + // messages suppressed as many-to-many matches are confusing + // String msg = sequence.getName() + // + " Sequence not 100% match with " + // + retrievedSeq.getName(); + // addWarningMessage(warningMessages, msg); continue; } /* - * found match for the whole of the database sequence within the local - * sequence's reference frame. + * retrieved sequence is a proper subsequence of local sequence */ - transferred = true; - sbuffer.append(sequence.getName() + " has " + absStart - + " prefixed residues compared to " + entry.getName() - + "\n"); + String msg = sequence.getName() + " has " + absStart + + " prefixed residues compared to " + + retrievedSeq.getName(); + addWarningMessage(warningMessages, msg); /* * So create a mapping to the external entry from the matching region of * the local sequence, and leave local start/end untouched. */ - mp = new Mapping(null, new int[] { sequenceStart + absStart, - sequenceStart + absStart + entrySeq.length() - 1 }, new int[] - { entry.getStart(), entry.getStart() + entrySeq.length() - 1 }, + mp = new Mapping(null, + new int[] + { sequenceStart + absStart, + sequenceStart + absStart + entrySeq.length() - 1 }, + new int[] + { retrievedSeq.getStart(), + retrievedSeq.getStart() + entrySeq.length() - 1 }, 1, 1); updateRefFrame = false; } else { /* - * found a match for the local sequence within sequence from - * the external database + * local sequence is a subsequence of (or matches) retrieved sequence */ - transferred = true; - - // update start and end of local sequence to place it in entry's - // reference frame. - // apply identity map map from whole of local sequence to matching - // region of database - // sequence - mp = null; // Mapping.getIdentityMap(); - // new Mapping(null, - // new int[] { absStart+sequence.getStart(), - // absStart+sequence.getStart()+entrySeq.length()-1}, - // new int[] { entry.getStart(), entry.getEnd() }, 1, 1); - // relocate local features for updated start + remoteEnclosesLocal = true; + mp = null; if (updateRefFrame) { - if (sequence.getSequenceFeatures() != null) + /* + * relocate existing sequence features by offset + */ + int startShift = absStart - sequenceStart + 1; + if (startShift != 0) { - /* - * relocate existing sequence features by offset - */ - SequenceFeature[] sf = sequence.getSequenceFeatures(); - int start = sequenceStart; - int end = sequence.getEnd(); - int startShift = 1 - absStart - start; // how much the features - // are - // to be shifted by - for (int sfi = 0; sfi < sf.length; sfi++) - { - if (sf[sfi].getBegin() >= start && sf[sfi].getEnd() <= end) - { - // shift feature along by absstart - sf[sfi].setBegin(sf[sfi].getBegin() + startShift); - sf[sfi].setEnd(sf[sfi].getEnd() + startShift); - } - } + modified |= sequence.getFeatures().shiftFeatures(1, + startShift); } } } System.out.println("Adding dbrefs to " + sequence.getName() - + " from " + dbSource + " sequence : " + entry.getName()); - sequence.transferAnnotation(entry, mp); + + " from " + dbSource + " sequence : " + + retrievedSeq.getName()); + sequence.transferAnnotation(retrievedSeq, mp); - absStart += entry.getStart(); + absStart += retrievedSeq.getStart(); int absEnd = absStart + nonGapped.length() - 1; if (!trimDatasetSeqs) { - // insert full length sequence from record - sequence.setSequence(entry.getSequenceAsString()); - sequence.setStart(entry.getStart()); + /* + * update start position and/or expand to longer retrieved sequence + */ + if (!retrievedSeqString.equals(sequence.getSequenceAsString()) + && remoteEnclosesLocal) + { + sequence.setSequence(retrievedSeqString); + modified = true; + addWarningMessage(warningMessages, + "Sequence for " + sequence.getName() + " expanded from " + + retrievedSeq.getName()); + } + if (sequence.getStart() != retrievedSeq.getStart()) + { + sequence.setStart(retrievedSeq.getStart()); + modified = true; + if (absStart != sequenceStart) + { + addWarningMessage(warningMessages, + "Start/end position for " + sequence.getName() + + " updated from " + retrievedSeq.getName()); + } + } } if (updateRefFrame) { @@ -731,24 +715,34 @@ public class DBRefFetcher implements Runnable if (trimDatasetSeqs) { // just fix start/end - sequence.setStart(absStart); - sequence.setEnd(absEnd); + if (sequence.getStart() != absStart + || sequence.getEnd() != absEnd) + { + sequence.setStart(absStart); + sequence.setEnd(absEnd); + modified = true; + addWarningMessage(warningMessages, + "Start/end for " + sequence.getName() + + " updated from " + retrievedSeq.getName()); + } } // search for alignment sequences to update coordinate frame for for (int alsq = 0; alsq < alseqs.length; alsq++) { if (alseqs[alsq].getDatasetSequence() == sequence) { - String ngAlsq = AlignSeq.extractGaps("-. ", - alseqs[alsq].getSequenceAsString()).toUpperCase(); + String ngAlsq = AlignSeq + .extractGaps("-. ", + alseqs[alsq].getSequenceAsString()) + .toUpperCase(); int oldstrt = alseqs[alsq].getStart(); alseqs[alsq].setStart(sequence.getSequenceAsString() - .toUpperCase().indexOf(ngAlsq) - + sequence.getStart()); + .toUpperCase().indexOf(ngAlsq) + sequence.getStart()); if (oldstrt != alseqs[alsq].getStart()) { - alseqs[alsq].setEnd(ngAlsq.length() - + alseqs[alsq].getStart() - 1); + alseqs[alsq].setEnd( + ngAlsq.length() + alseqs[alsq].getStart() - 1); + modified = true; } } } @@ -761,14 +755,22 @@ public class DBRefFetcher implements Runnable // and remove it from the rest // TODO: decide if we should remove annotated sequence from set sdataset.remove(sequence); - // TODO: should we make a note of sequences that have received new DB - // ids, so we can query all enabled DAS servers for them ? } } - if (!transferred) + return modified; + } + + /** + * Adds the message to the list unless it already contains it + * + * @param messageList + * @param msg + */ + void addWarningMessage(List messageList, String msg) + { + if (!messageList.contains(msg)) { - // report the ID/sequence mismatches - sbuffer.append(messages); + messageList.add(msg); } } @@ -780,8 +782,9 @@ public class DBRefFetcher implements Runnable */ private SequenceI[] recoverDbSequences(SequenceI[] sequencesArray) { - Vector nseq = new Vector(); - for (int i = 0; sequencesArray != null && i < sequencesArray.length; i++) + Vector nseq = new Vector<>(); + for (int i = 0; sequencesArray != null + && i < sequencesArray.length; i++) { nseq.addElement(sequencesArray[i]); DBRefEntry[] dbr = sequencesArray[i].getDBRefs();