From 8504d9c48d03acee51397e5fefa3df660ad0a6cc Mon Sep 17 00:00:00 2001 From: Jim Procter Date: Sun, 28 Jun 2015 17:10:57 +0100 Subject: [PATCH] JAL-1705 renamed SeqFetcherTest to EnsemblSeqProxyTest --- src/jalview/ext/ensembl/EnsemblSeqProxy.java | 178 ++++++++++++++++++++ ...eqFetcherTest.java => EnsemblSeqProxyTest.java} | 0 2 files changed, 178 insertions(+) create mode 100644 src/jalview/ext/ensembl/EnsemblSeqProxy.java rename test/jalview/ext/ensembl/{SeqFetcherTest.java => EnsemblSeqProxyTest.java} (100%) diff --git a/src/jalview/ext/ensembl/EnsemblSeqProxy.java b/src/jalview/ext/ensembl/EnsemblSeqProxy.java new file mode 100644 index 0000000..137c9b0 --- /dev/null +++ b/src/jalview/ext/ensembl/EnsemblSeqProxy.java @@ -0,0 +1,178 @@ +package jalview.ext.ensembl; + +import jalview.datamodel.AlignmentI; +import jalview.datamodel.DBRefSource; +import jalview.datamodel.SequenceI; +import jalview.exceptions.JalviewException; +import jalview.ext.ensembl.SeqFetcher.EnsemblSeqType; +import jalview.io.FastaFile; +import jalview.io.FileParse; +import jalview.util.DBRefUtils; +import jalview.ws.seqfetcher.DbSourceProxy; +import jalview.ws.seqfetcher.DbSourceProxyImpl; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import com.stevesoft.pat.Regex; + +public abstract class EnsemblSeqProxy extends DbSourceProxyImpl implements + DbSourceProxy +{ + SeqFetcher sf; + + public EnsemblSeqProxy() throws Exception + { + sf = new SeqFetcher(); + addDbSourceProperty(DBRefSource.MULTIACC); + addDbSourceProperty(DBRefSource.SEQDB); + // decide whether these need to be filtered according to return type + addDbSourceProperty(DBRefSource.PROTSEQDB); + addDbSourceProperty(DBRefSource.DNACODINGSEQDB); + addDbSourceProperty(DBRefSource.DNASEQDB); + } + + @Override + public String getDbSource() + { + return "ENSEMBL"; + } + + + @Override + public String getDbVersion() + { + return "0"; // sf.getVersion(); + } + + @Override + public String getAccessionSeparator() + { + return " "; + } + + @Override + public Regex getAccessionValidator() + { + return new Regex("((ENSP|ENST|ENSG|CCDS)[0-9.]{3,})"); + } + + @Override + public String getTestQuery() + { + return "ENSP00000288602"; + } + + @Override + public boolean isValidReference(String accession) + { + return getAccessionValidator().search(accession); + } + + private volatile boolean inProgress = false; + + @Override + public AlignmentI getSequenceRecords(String queries) throws Exception + { + inProgress = true; + List tids, ids = new ArrayList(); + tids = Arrays.asList(queries.split(" +")); + AlignmentI rtn = null; + for (int v = 0, vSize = tids.size(); v < vSize; v += 50) + { + int p = v + 50; + if (p > vSize) + { + p = vSize; + } + ; + ids = tids.subList(v, p); + try + { + if (!sf.isEnsemblAvailable()) + { + inProgress = false; + throw new JalviewException("ENSEMBL Rest API not available."); + } + FileParse fp = new FileParse(sf.getSequenceReader( + getSourceEnsemblType(), ids)); + FastaFile fr = new FastaFile(fp); + if (fr.hasWarningMessage()) + { + System.out + .println("Warning when retrieving " + ids.size() + " ids" + + ids.toString() + "\n" + fr.getWarningMessage()); + } + else if (fr.getSeqs().size() != ids.size()) + { + System.out.println("Only retrieved " + fr.getSeqs().size() + + " sequences for " + ids.size() + " query strings."); + } + if (fr.getSeqs().size() > 0) + { + AlignmentI seqal = new jalview.datamodel.Alignment( + fr.getSeqsAsArray()); + for (SequenceI sq:seqal.getSequences()) + { + if (ids.contains((sq.getName()))) + { + DBRefUtils.parseToDbRef(sq, "ENSEMBL", "0", sq.getName()); + } + } + if (rtn == null) + { + rtn = seqal; + } + else + { + rtn.append(seqal); + } + } + } catch (Throwable r) + { + inProgress = false; + if (rtn != null) + { + System.err.println("Aborting ID retrieval after " + v + + " chunks."); + r.printStackTrace(); + } + else + { + + throw new JalviewException("Aborting ID retrieval after " + v + + " chunks. Unexpected problem (" + + r.getLocalizedMessage() + ")", r); + } + + } + } + inProgress = false; + return rtn; + } + + /** + * + * @return the configured sequence return type for this source + */ + protected abstract EnsemblSeqType getSourceEnsemblType(); + + @Override + public boolean queryInProgress() + { + return inProgress; + } + + @Override + public StringBuffer getRawRecords() + { + return null; + } + + @Override + public int getTier() + { + return 0; + } +} diff --git a/test/jalview/ext/ensembl/SeqFetcherTest.java b/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java similarity index 100% rename from test/jalview/ext/ensembl/SeqFetcherTest.java rename to test/jalview/ext/ensembl/EnsemblSeqProxyTest.java -- 1.7.10.2