X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fws%2FDBRefFetcher.java;h=581901a5450a2f9655ea2bf278ab694c988f27db;hb=2763845dada91cdfe266200cf852146c70392ad7;hp=82b411a3fa888f991d435cae8edce1d7f6fd3544;hpb=bc9b9a3dc799adc3d14ef23115074c8871d2746d;p=jalview.git diff --git a/src/jalview/ws/DBRefFetcher.java b/src/jalview/ws/DBRefFetcher.java index 82b411a..581901a 100644 --- a/src/jalview/ws/DBRefFetcher.java +++ b/src/jalview/ws/DBRefFetcher.java @@ -1,39 +1,50 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer - * Copyright (C) 2007 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle - * + * Jalview - A Sequence Alignment Editor and Viewer (Development Version 2.4.1) + * Copyright (C) 2009 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ package jalview.ws; -import java.io.*; -import java.util.*; - -import org.exolab.castor.mapping.*; -import org.exolab.castor.xml.*; -import jalview.analysis.*; -import jalview.datamodel.*; +import jalview.analysis.AlignSeq; +import jalview.bin.Cache; +import jalview.datamodel.AlignmentI; +import jalview.datamodel.DBRefEntry; +import jalview.datamodel.DBRefSource; import jalview.datamodel.Mapping; -import jalview.gui.*; -import jalview.ws.dbsources.Uniprot; -import jalview.ws.ebi.EBIFetchClient; +import jalview.datamodel.SequenceFeature; +import jalview.datamodel.SequenceI; +import jalview.gui.AlignFrame; +import jalview.gui.CutAndPasteTransfer; +import jalview.gui.Desktop; +import jalview.gui.IProgressIndicator; +import jalview.gui.OOMWarning; + +import java.lang.reflect.Array; +import java.util.Enumeration; +import java.util.Hashtable; +import java.util.StringTokenizer; +import java.util.Vector; + +import org.biojava.dasobert.dasregistry.DasSource; + +import uk.ac.ebi.picr.model.UPEntry; /** - * Implements a runnable for validating a sequence - * against external databases and then propagating - * references and features onto the sequence(s) + * Implements a runnable for validating a sequence against external databases + * and then propagating references and features onto the sequence(s) * * @author $author$ * @version $Revision$ @@ -50,6 +61,11 @@ public class DBRefFetcher implements Runnable boolean running = false; + /** + * picr client instance + */ + uk.ac.ebi.www.picr.AccessionMappingService.AccessionMapperInterface picrClient = null; + // /This will be a collection of Vectors of sequenceI refs. // The key will be the seq name or accession id of the seq Hashtable seqRefs; @@ -58,24 +74,45 @@ public class DBRefFetcher implements Runnable SequenceFetcher sfetcher; + private SequenceI[] alseqs; + public DBRefFetcher() { } /** - * Creates a new SequenceFeatureFetcher object. + * Creates a new SequenceFeatureFetcher object and fetches from the currently + * selected set of databases. * * @param seqs - * fetch references for these sequences + * fetch references for these sequences * @param af - * the parent alignframe for progress bar monitoring. + * the parent alignframe for progress bar monitoring. */ public DBRefFetcher(SequenceI[] seqs, AlignFrame af) { + this(seqs, af, null); + } + + /** + * Creates a new SequenceFeatureFetcher object and fetches from the currently + * selected set of databases. + * + * @param seqs + * fetch references for these sequences + * @param af + * the parent alignframe for progress bar monitoring. + * @param sources + * array of database source strings to query references from + */ + public DBRefFetcher(SequenceI[] seqs, AlignFrame af, String[] sources) + { this.af = af; + alseqs = new SequenceI[seqs.length]; SequenceI[] ds = new SequenceI[seqs.length]; for (int i = 0; i < seqs.length; i++) { + alseqs[i] = seqs[i]; if (seqs[i].getDatasetSequence() != null) ds[i] = seqs[i].getDatasetSequence(); else @@ -83,15 +120,84 @@ public class DBRefFetcher implements Runnable } this.dataset = ds; // TODO Jalview 2.5 lots of this code should be in the gui package! - sfetcher = jalview.gui.SequenceFetcher.getSequenceFetcherSingleton(af); - // select appropriate databases based on alignFrame context. - if (af.getViewport().getAlignment().isNucleotide()) + sfetcher = jalview.gui.SequenceFetcher.getSequenceFetcherSingleton(af); + if (sources == null) { - dbSources = DBRefSource.DNACODINGDBS; + // af.featureSettings_actionPerformed(null); + String[] defdb = null, otherdb = sfetcher + .getDbInstances(jalview.ws.dbsources.DasSequenceSource.class); + Vector selsources = new Vector(), dasselsrc = (af.featureSettings != null) ? af.featureSettings + .getSelectedSources() + : new jalview.gui.DasSourceBrowser().getSelectedSources(); + Enumeration en = dasselsrc.elements(); + while (en.hasMoreElements()) + { + DasSource src = (DasSource) en.nextElement(); + selsources.addElement(src.getNickname()); + } + int osel = 0; + for (int o = 0; otherdb != null && o < otherdb.length; o++) + { + if (!selsources.contains(otherdb[o])) + { + otherdb[o] = null; + } + else + { + osel++; + } + } + // select appropriate databases based on alignFrame context. + if (af.getViewport().getAlignment().isNucleotide()) + { + defdb = DBRefSource.DNACODINGDBS; + } + else + { + defdb = DBRefSource.PROTEINDBS; + } + // append the selected sequence sources to the default dbs + dbSources = new String[defdb.length + osel]; + System.arraycopy(defdb, 0, dbSources, 0, defdb.length); + for (int o = 0, op = defdb.length; otherdb != null + && o < otherdb.length; o++) + { + if (otherdb[o] != null) + { + dbSources[op++] = otherdb[o]; + } + } } else { - dbSources = DBRefSource.PROTEINDBS; + // we assume the caller knows what they're doing and ensured that all the + // db source names are valid + dbSources = sources; + } + } + + /** + * retrieve all the das sequence sources and add them to the list of db + * sources to retrieve from + */ + public void appendAllDasSources() + { + if (dbSources == null) + { + dbSources = new String[] + {}; + } + // append additional sources + String[] otherdb = sfetcher + .getDbInstances(jalview.ws.dbsources.DasSequenceSource.class); + if (otherdb != null && otherdb.length > 0) + { + String[] newsrc = new String[dbSources.length + otherdb.length]; + System.arraycopy(dbSources, 0, newsrc, 0, dbSources.length); + System + .arraycopy(otherdb, 0, newsrc, dbSources.length, + otherdb.length); + dbSources = newsrc; } } @@ -99,7 +205,7 @@ public class DBRefFetcher implements Runnable * start the fetcher thread * * @param waitTillFinished - * true to block until the fetcher has finished + * true to block until the fetcher has finished */ public void fetchDBRefs(boolean waitTillFinished) { @@ -126,9 +232,9 @@ public class DBRefFetcher implements Runnable * could be either seq name or dbref id * * @param seq - * SequenceI + * SequenceI * @param key - * String + * String */ void addSeqId(SequenceI seq, String key) { @@ -168,9 +274,21 @@ public class DBRefFetcher implements Runnable { throw new Error("Implementation error. Must initialise dbSources"); } + running = true; long startTime = System.currentTimeMillis(); af.setProgressBar("Fetching db refs", startTime); - running = true; + try + { + if (Cache.getDefault("DBREFFETCH_USEPICR", false)) + { + picrClient = new uk.ac.ebi.www.picr.AccessionMappingService.AccessionMapperServiceLocator() + .getAccessionMapperPort(); + } + } catch (Exception e) + { + System.err.println("Couldn't locate PICR service instance.\n"); + e.printStackTrace(); + } int db = 0; Vector sdataset = new Vector(); for (int s = 0; s < dataset.length; s++) @@ -194,8 +312,10 @@ public class DBRefFetcher implements Runnable { maxqlen = ((Integer) dbsource.getDbSourceProperties().get( DBRefSource.MULTIACC)).intValue(); - } else { - maxqlen=1; + } + else + { + maxqlen = 1; } // iterate through db for each remaining un-verified sequence SequenceI[] currSeqs = new SequenceI[sdataset.size()]; @@ -212,31 +332,40 @@ public class DBRefFetcher implements Runnable { // Still queries to make for current seqIndex StringBuffer queryString = new StringBuffer(""); - int nqSize = (maxqlen > queries.size()) ? queries.size() - : maxqlen; - for (int nq = 0, numq = 0; nq < nqSize; nq++) + int numq = 0, nqSize = (maxqlen > queries.size()) ? queries + .size() : maxqlen; + + while (queries.size() > 0 && numq < nqSize) { - String query = (String) queries.elementAt(nq); + String query = (String) queries.elementAt(0); if (dbsource.isValidReference(query)) { - queryString.append((nq == 0) ? "" : dbsource + queryString.append((numq == 0) ? "" : dbsource .getAccessionSeparator()); queryString.append(query); numq++; } - } - for (int nq = 0; nq < nqSize; nq++) - { + // remove the extracted query string queries.removeElementAt(0); } // make the queries and process the response AlignmentI retrieved = null; try { + if (jalview.bin.Cache.log.isDebugEnabled()) + { + jalview.bin.Cache.log.debug("Querying " + + dbsource.getDbName() + " with : '" + + queryString.toString() + "'"); + } retrieved = dbsource.getSequenceRecords(queryString.toString()); } catch (Exception ex) { ex.printStackTrace(); + } catch (OutOfMemoryError err) + { + new OOMWarning("retrieving database references (" + + queryString.toString() + ")", err); } if (retrieved != null) { @@ -254,7 +383,7 @@ public class DBRefFetcher implements Runnable { dbSources[db] }); // jalview.datamodel.DBRefSource.UNIPROT // }); // check for existing dbrefs to use - if (uprefs != null) + if (uprefs != null && uprefs.length > 0) { for (int j = 0; j < uprefs.length; j++) { @@ -272,8 +401,43 @@ public class DBRefFetcher implements Runnable while (st.hasMoreTokens()) { String token = st.nextToken(); - addSeqId(sequence, token); - queries.addElement(token.toUpperCase()); + UPEntry[] presp = null; + if (picrClient != null) + { + // resolve the string against PICR to recover valid IDs + try + { + presp = picrClient + .getUPIForAccession(token, null, picrClient + .getMappedDatabaseNames(), null, true); + } catch (Exception e) + { + System.err.println("Exception with Picr for '" + token + + "'\n"); + e.printStackTrace(); + } + } + if (presp != null && presp.length > 0) + { + for (int id = 0; id < presp.length; id++) + { + // construct sequences from response if sequences are + // present, and do a transferReferences + // otherwise transfer non sequence x-references directly. + } + System.out + .println("Validated ID against PICR... (for what its worth):" + + token); + addSeqId(sequence, token); + queries.addElement(token.toUpperCase()); + } + else + { + // if () + // System.out.println("Not querying source with token="+token+"\n"); + addSeqId(sequence, token); + queries.addElement(token.toUpperCase()); + } } } } @@ -311,13 +475,15 @@ public class DBRefFetcher implements Runnable AlignmentI retrievedAl) // File // file) { - if (retrievedAl == null || retrievedAl.getHeight() == 0) { return; } - SequenceI[] retrieved = retrievedAl.getSequencesArray(); + SequenceI[] retrieved = recoverDbSequences(retrievedAl + .getSequencesArray()); SequenceI sequence = null; + boolean transferred = false; + StringBuffer messages = new StringBuffer(); // Vector entries = new Uniprot().getUniprotEntries(file); @@ -334,31 +500,36 @@ public class DBRefFetcher implements Runnable DBRefEntry[] entryRefs = jalview.util.DBRefUtils.selectRefs(entry .getDBRef(), new String[] { dbSource }); + if (entryRefs==null) { + System.err.println("Dud dbSource string ? no entryrefs selected for "+dbSource+ " on "+entry.getName()); + continue; + } for (int j = 0; j < entryRefs.length; j++) - { - String accessionId = entryRefs[j].getAccessionId(); // .getAccession().elementAt(j).toString(); - // match up on accessionId - if (seqRefs.containsKey(accessionId.toUpperCase())) { - Vector seqs = (Vector) seqRefs.get(accessionId); - for (int jj = 0; jj < seqs.size(); jj++) + String accessionId = entryRefs[j].getAccessionId(); // .getAccession().elementAt(j).toString(); + // match up on accessionId + if (seqRefs.containsKey(accessionId.toUpperCase())) { - sequence = (SequenceI) seqs.elementAt(jj); - if (!sequenceMatches.contains(sequence)) + Vector seqs = (Vector) seqRefs.get(accessionId); + for (int jj = 0; jj < seqs.size(); jj++) { - sequenceMatches.addElement(sequence); + sequence = (SequenceI) seqs.elementAt(jj); + if (!sequenceMatches.contains(sequence)) + { + sequenceMatches.addElement(sequence); + } } } } - } - if (sequenceMatches.size()==0) - { - // failed to match directly on accessionId==query so just compare all sequences to entry + if (sequenceMatches.size() == 0) + { + // failed to match directly on accessionId==query so just compare all + // sequences to entry Enumeration e = seqRefs.keys(); while (e.hasMoreElements()) { Vector sqs = (Vector) seqRefs.get(e.nextElement()); - if (sqs!=null && sqs.size()>0) + if (sqs != null && sqs.size() > 0) { Enumeration sqe = sqs.elements(); while (sqe.hasMoreElements()) @@ -388,9 +559,12 @@ public class DBRefFetcher implements Runnable for (int m = 0; m < sequenceMatches.size(); m++) { sequence = (SequenceI) sequenceMatches.elementAt(m); - // only update start and end positions and shift features if there are no existing references - // TODO: test for legacy where uniprot or EMBL refs exist but no mappings are made (but content matches retrieved set) - boolean updateRefFrame = sequence.getDBRef()==null || sequence.getDBRef().length==0; + // only update start and end positions and shift features if there are + // no existing references + // TODO: test for legacy where uniprot or EMBL refs exist but no + // mappings are made (but content matches retrieved set) + boolean updateRefFrame = sequence.getDBRef() == null + || sequence.getDBRef().length == 0; // verify sequence against the entry sequence String nonGapped = AlignSeq.extractGaps("-. ", @@ -406,27 +580,31 @@ public class DBRefFetcher implements Runnable absStart = nonGapped.indexOf(entrySeq); if (absStart == -1) { // verification failed. - sbuffer.append(sequence.getName() + messages.append(sequence.getName() + " SEQUENCE NOT %100 MATCH \n"); continue; } - + transferred = true; sbuffer.append(sequence.getName() + " HAS " + absStart - + " PREFIXED RESIDUES COMPARED TO " + dbSource+"\n"); + + " PREFIXED RESIDUES COMPARED TO " + dbSource + "\n"); // - // + " - ANY SEQUENCE FEATURES" - // + " HAVE BEEN ADJUSTED ACCORDINGLY \n"); + // + " - ANY SEQUENCE FEATURES" + // + " HAVE BEEN ADJUSTED ACCORDINGLY \n"); // absStart = 0; // create valid mapping between matching region of local sequence and // the mapped sequence mp = new Mapping(null, new int[] - { sequence.getStart()+absStart, sequence.getStart()+absStart+entrySeq.length()-1 }, new int[] - { entry.getStart(), - entry.getStart() + entrySeq.length() - 1 }, 1, 1); - updateRefFrame=false; // mapping is based on current start/end so don't modify start and end + { sequence.getStart() + absStart, + sequence.getStart() + absStart + entrySeq.length() - 1 }, + new int[] + { entry.getStart(), + entry.getStart() + entrySeq.length() - 1 }, 1, 1); + updateRefFrame = false; // mapping is based on current start/end so + // don't modify start and end } else { + transferred = true; // update start and end of local sequence to place it in entry's // reference frame. // apply identity map map from whole of local sequence to matching @@ -438,12 +616,14 @@ public class DBRefFetcher implements Runnable // absStart+sequence.getStart()+entrySeq.length()-1}, // new int[] { entry.getStart(), entry.getEnd() }, 1, 1); // relocate local features for updated start - if (updateRefFrame && sequence.getSequenceFeatures() != null) - { + if (updateRefFrame) { + if (sequence.getSequenceFeatures() != null) + { SequenceFeature[] sf = sequence.getSequenceFeatures(); int start = sequence.getStart(); int end = sequence.getEnd(); - int startShift = 1-absStart-start; // how much the features are to be shifted by + int startShift = 1 - absStart - start; // how much the features are + // to be shifted by for (int sfi = 0; sfi < sf.length; sfi++) { if (sf[sfi].getBegin() >= start && sf[sfi].getEnd() <= end) @@ -453,6 +633,7 @@ public class DBRefFetcher implements Runnable sf[sfi].setEnd(sf[sfi].getEnd() + startShift); } } + } } } @@ -467,11 +648,67 @@ public class DBRefFetcher implements Runnable // finally, update local sequence reference frame if we're allowed sequence.setStart(absStart); sequence.setEnd(absEnd); + // search for alignment sequences to update coordinate frame for + for (int alsq = 0; alsq 0) + { + sequencesArray = new SequenceI[nseq.size()]; + nseq.toArray(sequencesArray); + } + return sequencesArray; } }