From 409fd993c6e32e999b24082aae107a043a590f8f Mon Sep 17 00:00:00 2001 From: gmungoc Date: Fri, 8 Jan 2016 10:03:40 +0000 Subject: [PATCH] JAL-1705 DbSourceProxy properties converted to methods, tidy/format code --- src/jalview/datamodel/DBRefSource.java | 43 --- src/jalview/ext/ensembl/EnsemblCdna.java | 2 +- src/jalview/ext/ensembl/EnsemblCds.java | 2 +- src/jalview/ext/ensembl/EnsemblGenome.java | 2 +- src/jalview/ext/ensembl/EnsemblProtein.java | 17 +- src/jalview/ext/ensembl/EnsemblSeqProxy.java | 53 ++-- src/jalview/ext/ensembl/EnsemblTranscript.java | 3 +- src/jalview/ext/ensembl/SeqFetcher.java | 137 ++++---- src/jalview/gui/SequenceFetcher.java | 6 +- src/jalview/util/DBRefUtils.java | 32 -- src/jalview/ws/DBRefFetcher.java | 13 +- src/jalview/ws/SequenceFetcher.java | 333 ++------------------ .../{EmblCdsSouce.java => EmblCdsSource.java} | 16 +- src/jalview/ws/dbsources/EmblSource.java | 16 +- src/jalview/ws/dbsources/EmblXmlSource.java | 6 + src/jalview/ws/dbsources/GeneDbSource.java | 14 +- src/jalview/ws/dbsources/Pdb.java | 12 +- src/jalview/ws/dbsources/Pfam.java | 21 +- src/jalview/ws/dbsources/PfamFull.java | 8 +- src/jalview/ws/dbsources/PfamSeed.java | 7 +- src/jalview/ws/dbsources/Rfam.java | 13 +- src/jalview/ws/dbsources/RfamFull.java | 8 +- src/jalview/ws/dbsources/RfamSeed.java | 8 +- src/jalview/ws/dbsources/Uniprot.java | 32 +- .../{UnprotName.java => UniprotName.java} | 8 +- src/jalview/ws/dbsources/Xfam.java | 17 +- src/jalview/ws/seqfetcher/ASequenceFetcher.java | 76 ++--- src/jalview/ws/seqfetcher/DbSourceProxy.java | 53 ++-- src/jalview/ws/seqfetcher/DbSourceProxyImpl.java | 84 ++--- test/jalview/ext/ensembl/EnsemblSeqProxyTest.java | 188 +---------- test/jalview/ext/ensembl/SeqFetcherTest.java | 175 ++++++++++ test/jalview/ws/SequenceFetcherTest.java | 232 ++++++++++++++ 32 files changed, 795 insertions(+), 842 deletions(-) rename src/jalview/ws/dbsources/{EmblCdsSouce.java => EmblCdsSource.java} (89%) rename src/jalview/ws/dbsources/{UnprotName.java => UniprotName.java} (88%) create mode 100644 test/jalview/ext/ensembl/SeqFetcherTest.java create mode 100644 test/jalview/ws/SequenceFetcherTest.java diff --git a/src/jalview/datamodel/DBRefSource.java b/src/jalview/datamodel/DBRefSource.java index 6a676cf..99c68ba 100755 --- a/src/jalview/datamodel/DBRefSource.java +++ b/src/jalview/datamodel/DBRefSource.java @@ -94,47 +94,4 @@ public class DBRefSource public static final String[] PROTEINSTR = { PDB }; public static final String[] DOMAINDBS = { PFAM, RFAM }; - - /** - * set of unique DBRefSource property constants. These could be used to - * reconstruct the above groupings - */ - public static final Object SEQDB = "SQ"; - - /** - * database of nucleic acid sequences - */ - public static final Object DNASEQDB = "NASQ"; - - /** - * database of amino acid sequences - */ - public static final Object PROTSEQDB = "PROTSQ"; - - /** - * database of cDNA sequences - */ - public static final Object CODINGSEQDB = "CODING"; - - /** - * database of na sequences with exon annotation - */ - public static final Object DNACODINGSEQDB = "XONCODING"; - - /** - * DB returns several sequences associated with a protein/nucleotide domain - */ - public static final Object DOMAINDB = "DOMAIN"; - - /** - * DB query can take multiple accession codes concatenated by a separator. - * Value of property indicates maximum number of accession codes to send at a - * time. - */ - public static final Object MULTIACC = "MULTIACC"; - - /** - * DB query returns an alignment for each accession provided. - */ - public static final Object ALIGNMENTDB = "ALIGNMENTS"; } diff --git a/src/jalview/ext/ensembl/EnsemblCdna.java b/src/jalview/ext/ensembl/EnsemblCdna.java index 757b3c8..9c88b7c 100644 --- a/src/jalview/ext/ensembl/EnsemblCdna.java +++ b/src/jalview/ext/ensembl/EnsemblCdna.java @@ -7,7 +7,7 @@ import com.stevesoft.pat.Regex; public class EnsemblCdna extends EnsemblSeqProxy { - public EnsemblCdna() throws Exception + public EnsemblCdna() { super(); } diff --git a/src/jalview/ext/ensembl/EnsemblCds.java b/src/jalview/ext/ensembl/EnsemblCds.java index 1f63e05..dc92348 100644 --- a/src/jalview/ext/ensembl/EnsemblCds.java +++ b/src/jalview/ext/ensembl/EnsemblCds.java @@ -5,7 +5,7 @@ import jalview.ext.ensembl.SeqFetcher.EnsemblSeqType; public class EnsemblCds extends EnsemblSeqProxy { - public EnsemblCds() throws Exception + public EnsemblCds() { super(); } diff --git a/src/jalview/ext/ensembl/EnsemblGenome.java b/src/jalview/ext/ensembl/EnsemblGenome.java index 37e8e2b..39dfac0 100644 --- a/src/jalview/ext/ensembl/EnsemblGenome.java +++ b/src/jalview/ext/ensembl/EnsemblGenome.java @@ -5,7 +5,7 @@ import jalview.ext.ensembl.SeqFetcher.EnsemblSeqType; public class EnsemblGenome extends EnsemblSeqProxy { - public EnsemblGenome() throws Exception + public EnsemblGenome() { super(); } diff --git a/src/jalview/ext/ensembl/EnsemblProtein.java b/src/jalview/ext/ensembl/EnsemblProtein.java index db8d9d5..4cc43ab 100644 --- a/src/jalview/ext/ensembl/EnsemblProtein.java +++ b/src/jalview/ext/ensembl/EnsemblProtein.java @@ -5,7 +5,7 @@ import jalview.ext.ensembl.SeqFetcher.EnsemblSeqType; public class EnsemblProtein extends EnsemblSeqProxy { - public EnsemblProtein() throws Exception + public EnsemblProtein() { super(); } @@ -22,4 +22,19 @@ public class EnsemblProtein extends EnsemblSeqProxy return EnsemblSeqType.PROTEIN; } + @Override + public boolean isDnaCoding() + { + return false; + } + + /** + * Test query is to the protein translation of transcript ENST00000288602 + */ + @Override + public String getTestQuery() + { + return "ENSP00000288602"; + } + } diff --git a/src/jalview/ext/ensembl/EnsemblSeqProxy.java b/src/jalview/ext/ensembl/EnsemblSeqProxy.java index 137c9b0..4f85bd0 100644 --- a/src/jalview/ext/ensembl/EnsemblSeqProxy.java +++ b/src/jalview/ext/ensembl/EnsemblSeqProxy.java @@ -1,14 +1,13 @@ package jalview.ext.ensembl; +import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; -import jalview.datamodel.DBRefSource; import jalview.datamodel.SequenceI; import jalview.exceptions.JalviewException; import jalview.ext.ensembl.SeqFetcher.EnsemblSeqType; import jalview.io.FastaFile; import jalview.io.FileParse; import jalview.util.DBRefUtils; -import jalview.ws.seqfetcher.DbSourceProxy; import jalview.ws.seqfetcher.DbSourceProxyImpl; import java.util.ArrayList; @@ -17,20 +16,13 @@ import java.util.List; import com.stevesoft.pat.Regex; -public abstract class EnsemblSeqProxy extends DbSourceProxyImpl implements - DbSourceProxy +public abstract class EnsemblSeqProxy extends DbSourceProxyImpl { SeqFetcher sf; - public EnsemblSeqProxy() throws Exception + public EnsemblSeqProxy() { sf = new SeqFetcher(); - addDbSourceProperty(DBRefSource.MULTIACC); - addDbSourceProperty(DBRefSource.SEQDB); - // decide whether these need to be filtered according to return type - addDbSourceProperty(DBRefSource.PROTSEQDB); - addDbSourceProperty(DBRefSource.DNACODINGSEQDB); - addDbSourceProperty(DBRefSource.DNASEQDB); } @Override @@ -58,10 +50,13 @@ public abstract class EnsemblSeqProxy extends DbSourceProxyImpl implements return new Regex("((ENSP|ENST|ENSG|CCDS)[0-9.]{3,})"); } + /** + * Default test query is a transcript + */ @Override public String getTestQuery() { - return "ENSP00000288602"; + return "ENST00000288602"; } @Override @@ -79,14 +74,15 @@ public abstract class EnsemblSeqProxy extends DbSourceProxyImpl implements List tids, ids = new ArrayList(); tids = Arrays.asList(queries.split(" +")); AlignmentI rtn = null; - for (int v = 0, vSize = tids.size(); v < vSize; v += 50) + + /* + * execute queries, if necessary in batches of the + * maximum allowed number of ids + */ + int maxQueryCount = getMaximumQueryCount(); + for (int v = 0, vSize = tids.size(); v < vSize; v += maxQueryCount) { - int p = v + 50; - if (p > vSize) - { - p = vSize; - } - ; + int p = Math.min(vSize, v + maxQueryCount); ids = tids.subList(v, p); try { @@ -111,7 +107,7 @@ public abstract class EnsemblSeqProxy extends DbSourceProxyImpl implements } if (fr.getSeqs().size() > 0) { - AlignmentI seqal = new jalview.datamodel.Alignment( + AlignmentI seqal = new Alignment( fr.getSeqsAsArray()); for (SequenceI sq:seqal.getSequences()) { @@ -175,4 +171,21 @@ public abstract class EnsemblSeqProxy extends DbSourceProxyImpl implements { return 0; } + + /** + * A sequence/id POST request currently allows up to 50 queries + * + * @see http://rest.ensembl.org/documentation/info/sequence_id_post + */ + @Override + public int getMaximumQueryCount() + { + return 50; + } + + @Override + public boolean isDnaCoding() + { + return true; + } } diff --git a/src/jalview/ext/ensembl/EnsemblTranscript.java b/src/jalview/ext/ensembl/EnsemblTranscript.java index 68ed310..c2d0b6e 100644 --- a/src/jalview/ext/ensembl/EnsemblTranscript.java +++ b/src/jalview/ext/ensembl/EnsemblTranscript.java @@ -5,7 +5,8 @@ import jalview.ext.ensembl.SeqFetcher.EnsemblSeqType; public class EnsemblTranscript extends EnsemblSeqProxy { - public EnsemblTranscript() throws Exception + // TODO is this class needed? it seems to duplicate EnsemblProtein + public EnsemblTranscript() { super(); } diff --git a/src/jalview/ext/ensembl/SeqFetcher.java b/src/jalview/ext/ensembl/SeqFetcher.java index 7c913bf..57f000f 100644 --- a/src/jalview/ext/ensembl/SeqFetcher.java +++ b/src/jalview/ext/ensembl/SeqFetcher.java @@ -10,7 +10,7 @@ import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.URL; import java.net.URLConnection; -import java.util.Collections; +import java.util.ArrayList; import java.util.List; import org.apache.http.NameValuePair; @@ -20,81 +20,60 @@ public class SeqFetcher { private final static String ENSEMBL_REST = "rest.ensembl.org"; - private static boolean ensemblRestavailable = false; + private static final String SEQUENCE_ID_URL = "http://" + ENSEMBL_REST + "/sequence/id"; - private static long lastCheck = -1; + private static final String PING_URL = "http://" + ENSEMBL_REST + "/info/ping"; - public boolean isEnsemblAvailable() - { - if (isTesting || !ensemblRestavailable - || System.currentTimeMillis() - lastCheck > 10000) - { - checkEnsembl(); - lastCheck = System.currentTimeMillis(); - } - return ensemblRestavailable; - } - - private boolean isTesting, testEnsemblStatus; + private final static long RETEST_INTERVAL = 10000L; // 10 seconds - /** - * @return the isTesting - */ - public boolean isTesting() - { - return isTesting; - } + private static boolean ensemblRestAvailable = false; - /** - * @param isTesting - * the isTesting to set - */ - public void setTesting(boolean isTesting) - { - this.isTesting = isTesting; - } + private static long lastCheck = -1; /** - * @return the testEnsemblStatus + * Rechecks if Ensembl is responding, unless the last check was successful and + * the retest interval has not yet elapsed. Returns true if Ensembl is up, + * else false. + * + * @return */ - public boolean isTestEnsemblStatus() + public boolean isEnsemblAvailable() { - return testEnsemblStatus; + long now = System.currentTimeMillis(); + boolean retest = now - lastCheck > RETEST_INTERVAL; + if (ensemblRestAvailable && !retest) + { + return true; + } + ensemblRestAvailable = checkEnsembl(); + lastCheck = now; + return ensemblRestAvailable; } /** - * @param testEnsemblStatus - * the testEnsemblStatus to set + * Tries to connect to Ensembl's REST 'ping' endpoint, and returns true if + * successful, else false + * + * @return */ - public void setTestEnsemblStatus(boolean testEnsemblStatus) + private boolean checkEnsembl() { - this.testEnsemblStatus = testEnsemblStatus; - } - - private void checkEnsembl() - { - if (isTesting) - { - ensemblRestavailable = testEnsemblStatus; - return; - } try { - URL ping = new URL("http://" + ENSEMBL_REST + "/info/ping"); - HttpURLConnection conn = (HttpURLConnection) (ping.openConnection()); - if (conn.getResponseCode() >= 200 && conn.getResponseCode() < 300) + URL ping = new URL(PING_URL); + HttpURLConnection conn = (HttpURLConnection) ping.openConnection(); + int rc = conn.getResponseCode(); + conn.disconnect(); + if (rc >= 200 && rc < 300) { - ensemblRestavailable = true; - return; + return true; } - } catch (Error err) - { - err.printStackTrace(); - } catch (Exception exx) + } catch (Throwable t) { - exx.printStackTrace(); + System.err.println("Error connecting to " + PING_URL + ": " + + t.getMessage()); } - ensemblRestavailable = false; + return false; } public SeqFetcher() @@ -120,14 +99,17 @@ public class SeqFetcher } /** - * reolve request type as an argument for sequence and features queries + * Returns a list of additional URL query parameters to specify the desired + * sequence type (genomic/cds/protein etc), and data format Fasta * * @param type */ - public List getObjectTypeArg(EnsemblSeqType type) + public List getAdditionalParameters(EnsemblSeqType type) { - NameValuePair nameValue = new BasicNameValuePair("type", type.getType()); - return Collections.singletonList(nameValue); + List params = new ArrayList(); + params.add(new BasicNameValuePair("type", type.getType())); + params.add(new BasicNameValuePair("content-type", "text/x-fasta")); + return params; } /** @@ -141,27 +123,19 @@ public class SeqFetcher public FileParse getSequenceReader(EnsemblSeqType returnType, List ids) throws IOException { + // see http://rest.ensembl.org/documentation/info/sequence_id - // adapted From the rest.ensembl.org documentation for sequence_id - - String urls = "http://" + ENSEMBL_REST + "/sequence/id"; - List vals = getObjectTypeArg(returnType); - boolean f = true; + String urlstring = SEQUENCE_ID_URL; + List vals = getAdditionalParameters(returnType); + boolean first = true; for (NameValuePair nvp : vals) { - if (f) - { - f = false; - urls += "?"; - } - else - { - urls += "&"; - } - urls += nvp.getName() + "=" + nvp.getValue(); + urlstring += first ? "?" : "&"; + first = false; + urlstring += nvp.getName() + "=" + nvp.getValue(); } - URL url = new URL(urls); + URL url = new URL(urlstring); URLConnection connection = url.openConnection(); HttpURLConnection httpConnection = (HttpURLConnection) connection; @@ -173,17 +147,14 @@ public class SeqFetcher { StringBuilder postBody = new StringBuilder(); postBody.append("{\"ids\":["); - boolean first = true; + first = true; for (String id : ids) { - if (first) - { - first = false; - } - else + if (!first) { postBody.append(","); } + first = false; postBody.append("\""); postBody.append(id.trim()); postBody.append("\""); diff --git a/src/jalview/gui/SequenceFetcher.java b/src/jalview/gui/SequenceFetcher.java index 09d33c8..ad3fcc9 100755 --- a/src/jalview/gui/SequenceFetcher.java +++ b/src/jalview/gui/SequenceFetcher.java @@ -22,7 +22,6 @@ package jalview.gui; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefEntry; -import jalview.datamodel.DBRefSource; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.io.FormatAdapter; @@ -551,7 +550,7 @@ public class SequenceFetcher extends JPanel implements Runnable Integer.valueOf(nqueries).toString(), proxy.getDbName() }), Thread.currentThread() .hashCode()); - isAliSource = proxy.isA(DBRefSource.ALIGNMENTDB); + isAliSource = proxy.isAlignmentSource(); if (proxy.getAccessionSeparator() == null) { while (en.hasNext()) @@ -638,8 +637,7 @@ public class SequenceFetcher extends JPanel implements Runnable DBRefEntry dbr = new DBRefEntry(), found[] = null; dbr.setSource(proxy.getDbSource()); dbr.setVersion(null); - String accId = DBRefUtils.processQueryToAccessionFor(proxy, - q); + String accId = proxy.getAccessionIdFromQuery(q); dbr.setAccessionId(accId); boolean rfound = false; for (int r = 0; r < rs.length; r++) diff --git a/src/jalview/util/DBRefUtils.java b/src/jalview/util/DBRefUtils.java index b8f1dd5..cdf2325 100755 --- a/src/jalview/util/DBRefUtils.java +++ b/src/jalview/util/DBRefUtils.java @@ -473,36 +473,4 @@ public class DBRefUtils return ref; } - /** - * Extract valid accession strings from a query string. Used by the - * SequenceFetcher and DBRefFetcher to create valid accession strings from an - * ID string for database sources with a Regex validation field. - * - * @param proxy - * @param q - * @return q if proxy.getAccessionValidator()==null, otherwise the matched - * region or the first subgroup match from the matched region - */ - public static String processQueryToAccessionFor(DbSourceProxy proxy, - String q) - { - if (proxy.getAccessionValidator() != null) - { - Regex vgr = proxy.getAccessionValidator(); - vgr.search(q); - if (vgr.numSubs() > 0) - { - return (vgr.stringMatched(1)); - } - else - { - return (vgr.stringMatched()); - } - } - else - { - return (q); - } - } - } diff --git a/src/jalview/ws/DBRefFetcher.java b/src/jalview/ws/DBRefFetcher.java index d531fea..9d91e31 100644 --- a/src/jalview/ws/DBRefFetcher.java +++ b/src/jalview/ws/DBRefFetcher.java @@ -283,6 +283,7 @@ public class DBRefFetcher implements Runnable /** * DOCUMENT ME! */ + @Override public void run() { if (dbSources == null) @@ -335,16 +336,8 @@ public class DBRefFetcher implements Runnable // TODO: introduce multithread multisource queries and logic to remove a // query from other sources if any source for a database returns a // record - if (dbsource.getDbSourceProperties().containsKey( - DBRefSource.MULTIACC)) - { - maxqlen = ((Integer) dbsource.getDbSourceProperties().get( - DBRefSource.MULTIACC)).intValue(); - } - else - { - maxqlen = 1; - } + maxqlen = dbsource.getMaximumQueryCount(); + while (queries.size() > 0 || seqIndex < currSeqs.length) { if (queries.size() > 0) diff --git a/src/jalview/ws/SequenceFetcher.java b/src/jalview/ws/SequenceFetcher.java index fcc4457..74bf56f 100644 --- a/src/jalview/ws/SequenceFetcher.java +++ b/src/jalview/ws/SequenceFetcher.java @@ -20,24 +20,30 @@ */ package jalview.ws; -import jalview.datamodel.Alignment; -import jalview.datamodel.AlignmentI; -import jalview.datamodel.DBRefSource; -import jalview.datamodel.SequenceI; +import jalview.ext.ensembl.EnsemblCdna; +import jalview.ext.ensembl.EnsemblCds; +import jalview.ext.ensembl.EnsemblGenome; +import jalview.ext.ensembl.EnsemblProtein; +import jalview.ws.dbsources.EmblCdsSource; +import jalview.ws.dbsources.EmblSource; +import jalview.ws.dbsources.Pdb; +import jalview.ws.dbsources.PfamFull; +import jalview.ws.dbsources.PfamSeed; +import jalview.ws.dbsources.RfamFull; +import jalview.ws.dbsources.RfamSeed; +import jalview.ws.dbsources.Uniprot; +import jalview.ws.dbsources.UniprotName; import jalview.ws.dbsources.das.api.jalviewSourceI; import jalview.ws.seqfetcher.ASequenceFetcher; import jalview.ws.seqfetcher.DbSourceProxy; import java.util.ArrayList; -import java.util.Enumeration; import java.util.List; -import java.util.Vector; /** * This is the the concrete implementation of the sequence retrieval interface * and abstract class in jalview.ws.seqfetcher. This implements the run-time - * discovery of sequence database clients, and provides a hardwired main for - * testing all registered handlers. + * discovery of sequence database clientss. * */ public class SequenceFetcher extends ASequenceFetcher @@ -55,22 +61,24 @@ public class SequenceFetcher extends ASequenceFetcher public SequenceFetcher(boolean addDas) { - addDBRefSourceImpl(jalview.ext.ensembl.EnsemblProtein.class); - addDBRefSourceImpl(jalview.ext.ensembl.EnsemblTranscript.class); - addDBRefSourceImpl(jalview.ext.ensembl.EnsemblCds.class); - addDBRefSourceImpl(jalview.ext.ensembl.EnsemblGenome.class); - addDBRefSourceImpl(jalview.ext.ensembl.EnsemblCdna.class); - - addDBRefSourceImpl(jalview.ws.dbsources.EmblSource.class); - addDBRefSourceImpl(jalview.ws.dbsources.EmblCdsSouce.class); - addDBRefSourceImpl(jalview.ws.dbsources.Uniprot.class); - addDBRefSourceImpl(jalview.ws.dbsources.UnprotName.class); - addDBRefSourceImpl(jalview.ws.dbsources.Pdb.class); - addDBRefSourceImpl(jalview.ws.dbsources.PfamFull.class); - addDBRefSourceImpl(jalview.ws.dbsources.PfamSeed.class); + addDBRefSourceImpl(EnsemblProtein.class); + // EnsemblTranscript would just replace EnsemblProtein as the proxy for + // { DbSource="ENSEMBL", DbName="ENSEMBL (Protein)" } + // addDBRefSourceImpl(EnsemblTranscript.class); + addDBRefSourceImpl(EnsemblCds.class); + addDBRefSourceImpl(EnsemblGenome.class); + addDBRefSourceImpl(EnsemblCdna.class); + + addDBRefSourceImpl(EmblSource.class); + addDBRefSourceImpl(EmblCdsSource.class); + addDBRefSourceImpl(Uniprot.class); + addDBRefSourceImpl(UniprotName.class); + addDBRefSourceImpl(Pdb.class); + addDBRefSourceImpl(PfamFull.class); + addDBRefSourceImpl(PfamSeed.class); // ensures Seed alignment is 'default' for PFAM - addDBRefSourceImpl(jalview.ws.dbsources.RfamFull.class); - addDBRefSourceImpl(jalview.ws.dbsources.RfamSeed.class); + addDBRefSourceImpl(RfamFull.class); + addDBRefSourceImpl(RfamSeed.class); if (addDas) { registerDasSequenceSources(); @@ -93,7 +101,7 @@ public class SequenceFetcher extends ASequenceFetcher { // Skip the alignment databases for the moment - they're not useful for // verifying a single sequence against its reference source - if (dbs.isA(DBRefSource.ALIGNMENTDB)) + if (dbs.isAlignmentSource()) { skip = true; } @@ -156,283 +164,6 @@ public class SequenceFetcher extends ASequenceFetcher } /** - * return plaintext databse list suitable for using in a GUI element - */ - public String[] _getOrderedSupportedSources() - { - String[] srcs = this.getSupportedDb(); - ArrayList dassrc = new ArrayList(), nondas = new ArrayList(); - for (int i = 0; i < srcs.length; i++) - { - for (DbSourceProxy dbs : getSourceProxy(srcs[i])) - { - String nm = dbs.getDbName(); - if (getSourceProxy(srcs[i]) instanceof jalview.ws.dbsources.das.datamodel.DasSequenceSource) - { - if (nm.startsWith("das:")) - { - nm = nm.substring(4); - } - dassrc.add(new String[] { srcs[i], nm.toUpperCase() }); - } - else - { - nondas.add(new String[] { srcs[i], nm.toUpperCase() }); - } - } - } - Object[] sorted = nondas.toArray(); - String[] tosort = new String[sorted.length]; - nondas.clear(); - for (int j = 0; j < sorted.length; j++) - { - tosort[j] = ((String[]) sorted[j])[1]; - } - jalview.util.QuickSort.sort(tosort, sorted); - int i = 0; - // construct array with all sources listed - srcs = new String[sorted.length + dassrc.size()]; - for (int j = sorted.length - 1; j >= 0; j--, i++) - { - srcs[i] = ((String[]) sorted[j])[0]; - sorted[j] = null; - } - - sorted = dassrc.toArray(); - tosort = new String[sorted.length]; - dassrc.clear(); - for (int j = 0; j < sorted.length; j++) - { - tosort[j] = ((String[]) sorted[j])[1]; - } - jalview.util.QuickSort.sort(tosort, sorted); - for (int j = sorted.length - 1; j >= 0; j--, i++) - { - srcs[i] = ((String[]) sorted[j])[0]; - sorted[j] = null; - } - return srcs; - } - - /** - * simple run method to test dbsources. - * - * @param argv - */ - public static void main(String[] argv) - { - AlignmentI ds = null; - Vector noProds = new Vector(); - String usage = "SequenceFetcher.main [-nodas] [ []]\n" - + "With no arguments, all DbSources will be queried with their test Accession number.\n" - + "With one argument, the argument will be resolved to one or more db sources and each will be queried with their test accession only.\n" - + "If given two arguments, SequenceFetcher will try to find the DbFetcher corresponding to and retrieve from it.\n" - + "The -nodas option will exclude DAS sources from the database fetchers Jalview will try to use."; - boolean withDas = true; - if (argv != null && argv.length > 0 - && argv[0].toLowerCase().startsWith("-nodas")) - { - withDas = false; - String targs[] = new String[argv.length - 1]; - System.arraycopy(argv, 1, targs, 0, targs.length); - argv = targs; - } - if (argv != null && argv.length > 0) - { - List sps = new SequenceFetcher(withDas) - .getSourceProxy(argv[0]); - - if (sps != null) - { - for (DbSourceProxy sp : sps) - { - AlignmentI al = null; - try - { - al = sp.getSequenceRecords(argv.length > 1 ? argv[1] : sp - .getTestQuery()); - } catch (Exception e) - { - e.printStackTrace(); - System.err.println("Error when retrieving " - + (argv.length > 1 ? argv[1] : sp.getTestQuery()) - + " from " + argv[0] + "\nUsage: " + usage); - } - SequenceI[] prod = al.getSequencesArray(); - if (al != null) - { - for (int p = 0; p < prod.length; p++) - { - System.out.println("Prod " + p + ": " - + prod[p].getDisplayId(true) + " : " - + prod[p].getDescription()); - } - } - } - return; - } - else - { - System.err.println("Can't resolve " + argv[0] - + " as a database name. Allowed values are :\n" - + new SequenceFetcher().getSupportedDb()); - } - System.out.println(usage); - return; - } - ASequenceFetcher sfetcher = new SequenceFetcher(withDas); - String[] dbSources = sfetcher.getSupportedDb(); - for (int dbsource = 0; dbsource < dbSources.length; dbsource++) - { - String db = dbSources[dbsource]; - // skip me - if (db.equals(DBRefSource.PDB)) - { - continue; - } - for (DbSourceProxy sp : sfetcher.getSourceProxy(db)) - { - System.out.println("Source: " + sp.getDbName() + " (" + db - + "): retrieving test:" + sp.getTestQuery()); - AlignmentI al = null; - try - { - al = sp.getSequenceRecords(sp.getTestQuery()); - if (al != null && al.getHeight() > 0 - && sp.getDbSourceProperties() != null) - { - boolean dna = sp.getDbSourceProperties().containsKey( - DBRefSource.DNACODINGSEQDB) - || sp.getDbSourceProperties().containsKey( - DBRefSource.DNASEQDB) - || sp.getDbSourceProperties().containsKey( - DBRefSource.CODINGSEQDB); - // try and find products - String types[] = jalview.analysis.CrossRef - .findSequenceXrefTypes(dna, al.getSequencesArray()); - if (types != null) - { - System.out.println("Xref Types for: " - + (dna ? "dna" : "prot")); - for (int t = 0; t < types.length; t++) - { - System.out.println("Type: " + types[t]); - SequenceI[] prod = jalview.analysis.CrossRef - .findXrefSequences(al.getSequencesArray(), dna, - types[t]).getSequencesArray(); - System.out.println("Found " - + ((prod == null) ? "no" : "" + prod.length) - + " products"); - if (prod != null) - { - for (int p = 0; p < prod.length; p++) - { - System.out.println("Prod " + p + ": " - + prod[p].getDisplayId(true)); - } - } - } - } - else - { - noProds.addElement((dna ? new Object[] { al, al } - : new Object[] { al })); - } - - } - } catch (Exception ex) - { - System.out.println("ERROR:Failed to retrieve test query."); - ex.printStackTrace(System.out); - } - - if (al == null) - { - System.out.println("ERROR:No alignment retrieved."); - StringBuffer raw = sp.getRawRecords(); - if (raw != null) - { - System.out.println(raw.toString()); - } - else - { - System.out.println("ERROR:No Raw results."); - } - } - else - { - System.out.println("Retrieved " + al.getHeight() + " sequences."); - for (int s = 0; s < al.getHeight(); s++) - { - SequenceI sq = al.getSequenceAt(s); - while (sq.getDatasetSequence() != null) - { - sq = sq.getDatasetSequence(); - - } - if (ds == null) - { - ds = new Alignment(new SequenceI[] { sq }); - - } - else - { - ds.addSequence(sq); - } - } - } - System.out.flush(); - System.err.flush(); - - } - if (noProds.size() > 0) - { - Enumeration ts = noProds.elements(); - while (ts.hasMoreElements()) - - { - Object[] typeSq = (Object[]) ts.nextElement(); - boolean dna = (typeSq.length > 1); - AlignmentI al = (AlignmentI) typeSq[0]; - System.out.println("Trying getProducts for " - + al.getSequenceAt(0).getDisplayId(true)); - System.out.println("Search DS Xref for: " - + (dna ? "dna" : "prot")); - // have a bash at finding the products amongst all the retrieved - // sequences. - SequenceI[] seqs = al.getSequencesArray(); - Alignment prodal = jalview.analysis.CrossRef.findXrefSequences( - seqs, dna, null, ds); - System.out.println("Found " - + ((prodal == null) ? "no" : "" + prodal.getHeight()) - + " products"); - if (prodal != null) - { - SequenceI[] prod = prodal.getSequencesArray(); // note - // should - // test - // rather - // than - // throw - // away - // codon - // mapping - // (if - // present) - for (int p = 0; p < prod.length; p++) - { - System.out.println("Prod " + p + ": " - + prod[p].getDisplayId(true)); - } - } - } - - } - - } - } - - /** * query the currently defined DAS source registry for sequence sources and * add a DasSequenceSource instance for each source to the SequenceFetcher * source list. diff --git a/src/jalview/ws/dbsources/EmblCdsSouce.java b/src/jalview/ws/dbsources/EmblCdsSource.java similarity index 89% rename from src/jalview/ws/dbsources/EmblCdsSouce.java rename to src/jalview/ws/dbsources/EmblCdsSource.java index e5fbd6c..a73af61 100644 --- a/src/jalview/ws/dbsources/EmblCdsSouce.java +++ b/src/jalview/ws/dbsources/EmblCdsSource.java @@ -22,40 +22,43 @@ package jalview.ws.dbsources; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefSource; -import jalview.ws.seqfetcher.DbSourceProxy; import com.stevesoft.pat.Regex; -public class EmblCdsSouce extends EmblXmlSource implements DbSourceProxy +public class EmblCdsSource extends EmblXmlSource { - public EmblCdsSouce() + public EmblCdsSource() { super(); - addDbSourceProperty(DBRefSource.CODINGSEQDB); } + @Override public String getAccessionSeparator() { return null; } + @Override public Regex getAccessionValidator() { - return new com.stevesoft.pat.Regex("^[A-Z]+[0-9]+"); + return new Regex("^[A-Z]+[0-9]+"); } + @Override public String getDbSource() { return DBRefSource.EMBLCDS; } + @Override public String getDbVersion() { return "0"; // TODO : this is dynamically set for a returned record - not // tied to proxy } + @Override public AlignmentI getSequenceRecords(String queries) throws Exception { if (queries.indexOf(".") > -1) @@ -65,6 +68,7 @@ public class EmblCdsSouce extends EmblXmlSource implements DbSourceProxy return getEmblSequenceRecords(DBRefSource.EMBLCDS, queries); } + @Override public boolean isValidReference(String accession) { // most embl CDS refs look like .. @@ -76,11 +80,13 @@ public class EmblCdsSouce extends EmblXmlSource implements DbSourceProxy /** * cDNA for LDHA_CHICK swissprot sequence */ + @Override public String getTestQuery() { return "CAA37824"; } + @Override public String getDbName() { return "EMBL (CDS)"; diff --git a/src/jalview/ws/dbsources/EmblSource.java b/src/jalview/ws/dbsources/EmblSource.java index 893ab41..6bbe2e1 100644 --- a/src/jalview/ws/dbsources/EmblSource.java +++ b/src/jalview/ws/dbsources/EmblSource.java @@ -22,7 +22,6 @@ package jalview.ws.dbsources; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefSource; -import jalview.ws.seqfetcher.DbSourceProxy; import com.stevesoft.pat.Regex; @@ -30,13 +29,12 @@ import com.stevesoft.pat.Regex; * @author JimP * */ -public class EmblSource extends EmblXmlSource implements DbSourceProxy +public class EmblSource extends EmblXmlSource { public EmblSource() { - addDbSourceProperty(DBRefSource.DNASEQDB); - addDbSourceProperty(DBRefSource.CODINGSEQDB); + super(); } /* @@ -44,6 +42,7 @@ public class EmblSource extends EmblXmlSource implements DbSourceProxy * * @see jalview.ws.DbSourceProxy#getAccessionSeparator() */ + @Override public String getAccessionSeparator() { // TODO Auto-generated method stub @@ -55,9 +54,10 @@ public class EmblSource extends EmblXmlSource implements DbSourceProxy * * @see jalview.ws.DbSourceProxy#getAccessionValidator() */ + @Override public Regex getAccessionValidator() { - return new com.stevesoft.pat.Regex("^[A-Z]+[0-9]+"); + return new Regex("^[A-Z]+[0-9]+"); } /* @@ -65,6 +65,7 @@ public class EmblSource extends EmblXmlSource implements DbSourceProxy * * @see jalview.ws.DbSourceProxy#getDbSource() */ + @Override public String getDbSource() { return DBRefSource.EMBL; @@ -75,6 +76,7 @@ public class EmblSource extends EmblXmlSource implements DbSourceProxy * * @see jalview.ws.DbSourceProxy#getDbVersion() */ + @Override public String getDbVersion() { // TODO Auto-generated method stub @@ -86,6 +88,7 @@ public class EmblSource extends EmblXmlSource implements DbSourceProxy * * @see jalview.ws.DbSourceProxy#getSequenceRecords(java.lang.String[]) */ + @Override public AlignmentI getSequenceRecords(String queries) throws Exception { return getEmblSequenceRecords(DBRefSource.EMBL, queries); @@ -96,6 +99,7 @@ public class EmblSource extends EmblXmlSource implements DbSourceProxy * * @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String) */ + @Override public boolean isValidReference(String accession) { // most embl refs look like .. @@ -108,11 +112,13 @@ public class EmblSource extends EmblXmlSource implements DbSourceProxy /** * return LHD_CHICK coding gene */ + @Override public String getTestQuery() { return "X53828"; } + @Override public String getDbName() { return "EMBL"; // getDbSource(); diff --git a/src/jalview/ws/dbsources/EmblXmlSource.java b/src/jalview/ws/dbsources/EmblXmlSource.java index 20da45c..66ebe1b 100644 --- a/src/jalview/ws/dbsources/EmblXmlSource.java +++ b/src/jalview/ws/dbsources/EmblXmlSource.java @@ -155,4 +155,10 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy return al; } + @Override + public boolean isDnaCoding() + { + return true; + } + } diff --git a/src/jalview/ws/dbsources/GeneDbSource.java b/src/jalview/ws/dbsources/GeneDbSource.java index 116962b..ce21ad0 100644 --- a/src/jalview/ws/dbsources/GeneDbSource.java +++ b/src/jalview/ws/dbsources/GeneDbSource.java @@ -22,7 +22,6 @@ package jalview.ws.dbsources; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefSource; -import jalview.ws.seqfetcher.DbSourceProxy; import com.stevesoft.pat.Regex; @@ -32,13 +31,12 @@ import com.stevesoft.pat.Regex; * @author JimP * */ -public class GeneDbSource extends EmblXmlSource implements DbSourceProxy +public class GeneDbSource extends EmblXmlSource { public GeneDbSource() { - addDbSourceProperty(DBRefSource.DNASEQDB); - addDbSourceProperty(DBRefSource.CODINGSEQDB); + super(); } /* @@ -46,6 +44,7 @@ public class GeneDbSource extends EmblXmlSource implements DbSourceProxy * * @see jalview.ws.DbSourceProxy#getAccessionSeparator() */ + @Override public String getAccessionSeparator() { // TODO Auto-generated method stub @@ -57,6 +56,7 @@ public class GeneDbSource extends EmblXmlSource implements DbSourceProxy * * @see jalview.ws.DbSourceProxy#getAccessionValidator() */ + @Override public Regex getAccessionValidator() { // TODO Auto-generated method stub @@ -68,6 +68,7 @@ public class GeneDbSource extends EmblXmlSource implements DbSourceProxy * * @see jalview.ws.DbSourceProxy#getDbSource() */ + @Override public String getDbSource() { return DBRefSource.GENEDB; @@ -78,6 +79,7 @@ public class GeneDbSource extends EmblXmlSource implements DbSourceProxy * * @see jalview.ws.DbSourceProxy#getDbVersion() */ + @Override public String getDbVersion() { // TODO Auto-generated method stub @@ -89,6 +91,7 @@ public class GeneDbSource extends EmblXmlSource implements DbSourceProxy * * @see jalview.ws.DbSourceProxy#getSequenceRecords(java.lang.String[]) */ + @Override public AlignmentI getSequenceRecords(String queries) throws Exception { // query of form @@ -102,6 +105,7 @@ public class GeneDbSource extends EmblXmlSource implements DbSourceProxy * * @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String) */ + @Override public boolean isValidReference(String accession) { // TODO Auto-generated method stub @@ -111,11 +115,13 @@ public class GeneDbSource extends EmblXmlSource implements DbSourceProxy /** * return T.Brucei Mannosyl-Transferase TbPIG-M */ + @Override public String getTestQuery() { return "Tb927.6.3300"; } + @Override public String getDbName() { return "GeneDB"; // getDbSource(); diff --git a/src/jalview/ws/dbsources/Pdb.java b/src/jalview/ws/dbsources/Pdb.java index 4a50196..b9fb8f3 100644 --- a/src/jalview/ws/dbsources/Pdb.java +++ b/src/jalview/ws/dbsources/Pdb.java @@ -29,7 +29,6 @@ import jalview.datamodel.SequenceI; import jalview.io.FormatAdapter; import jalview.util.MessageManager; import jalview.ws.ebi.EBIFetchClient; -import jalview.ws.seqfetcher.DbSourceProxy; import java.util.ArrayList; import java.util.List; @@ -41,12 +40,11 @@ import com.stevesoft.pat.Regex; * @author JimP * */ -public class Pdb extends EbiFileRetrievedProxy implements DbSourceProxy +public class Pdb extends EbiFileRetrievedProxy { public Pdb() { super(); - addDbSourceProperty(DBRefSource.PROTSEQDB); } /* @@ -54,6 +52,7 @@ public class Pdb extends EbiFileRetrievedProxy implements DbSourceProxy * * @see jalview.ws.DbSourceProxy#getAccessionSeparator() */ + @Override public String getAccessionSeparator() { // TODO Auto-generated method stub @@ -65,6 +64,7 @@ public class Pdb extends EbiFileRetrievedProxy implements DbSourceProxy * * @see jalview.ws.DbSourceProxy#getAccessionValidator() */ + @Override public Regex getAccessionValidator() { return new Regex("([1-9][0-9A-Za-z]{3}):?([ _A-Za-z0-9]?)"); @@ -75,6 +75,7 @@ public class Pdb extends EbiFileRetrievedProxy implements DbSourceProxy * * @see jalview.ws.DbSourceProxy#getDbSource() */ + @Override public String getDbSource() { return DBRefSource.PDB; @@ -85,6 +86,7 @@ public class Pdb extends EbiFileRetrievedProxy implements DbSourceProxy * * @see jalview.ws.DbSourceProxy#getDbVersion() */ + @Override public String getDbVersion() { return "0"; @@ -95,6 +97,7 @@ public class Pdb extends EbiFileRetrievedProxy implements DbSourceProxy * * @see jalview.ws.DbSourceProxy#getSequenceRecords(java.lang.String[]) */ + @Override public AlignmentI getSequenceRecords(String queries) throws Exception { AlignmentI pdbfile = null; @@ -219,6 +222,7 @@ public class Pdb extends EbiFileRetrievedProxy implements DbSourceProxy * * @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String) */ + @Override public boolean isValidReference(String accession) { Regex r = getAccessionValidator(); @@ -228,11 +232,13 @@ public class Pdb extends EbiFileRetrievedProxy implements DbSourceProxy /** * obtain human glyoxalase chain A sequence */ + @Override public String getTestQuery() { return "1QIPA"; } + @Override public String getDbName() { return "PDB"; // getDbSource(); diff --git a/src/jalview/ws/dbsources/Pfam.java b/src/jalview/ws/dbsources/Pfam.java index 0211bb1..4f081ee 100644 --- a/src/jalview/ws/dbsources/Pfam.java +++ b/src/jalview/ws/dbsources/Pfam.java @@ -22,7 +22,8 @@ package jalview.ws.dbsources; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefEntry; -import jalview.ws.seqfetcher.DbSourceProxy; +import jalview.datamodel.DBRefSource; +import jalview.io.FormatAdapter; import com.stevesoft.pat.Regex; @@ -34,15 +35,12 @@ import com.stevesoft.pat.Regex; * @author JimP * */ -abstract public class Pfam extends Xfam implements DbSourceProxy +abstract public class Pfam extends Xfam { public Pfam() { super(); - // all extensions of this PFAM source base class are DOMAINDB sources - addDbSourceProperty(jalview.datamodel.DBRefSource.DOMAINDB); - addDbSourceProperty(jalview.datamodel.DBRefSource.ALIGNMENTDB); } /* @@ -50,6 +48,7 @@ abstract public class Pfam extends Xfam implements DbSourceProxy * * @see jalview.ws.DbSourceProxy#getAccessionSeparator() */ + @Override public String getAccessionSeparator() { // TODO Auto-generated method stub @@ -61,6 +60,7 @@ abstract public class Pfam extends Xfam implements DbSourceProxy * * @see jalview.ws.DbSourceProxy#getAccessionValidator() */ + @Override public Regex getAccessionValidator() { // TODO Auto-generated method stub @@ -111,22 +111,23 @@ abstract public class Pfam extends Xfam implements DbSourceProxy * * @see jalview.ws.DbSourceProxy#getSequenceRecords(java.lang.String[]) */ + @Override public AlignmentI getSequenceRecords(String queries) throws Exception { // TODO: this is not a perfect implementation. We need to be able to add // individual references to each sequence in each family alignment that's // retrieved. startQuery(); - AlignmentI rcds = new jalview.io.FormatAdapter().readFile(getXFAMURL() + AlignmentI rcds = new FormatAdapter().readFile(getXFAMURL() + queries.trim().toUpperCase(), jalview.io.FormatAdapter.URL, "STH"); for (int s = 0, sNum = rcds.getHeight(); s < sNum; s++) { rcds.getSequenceAt(s).addDBRef( - new DBRefEntry(jalview.datamodel.DBRefSource.PFAM, +new DBRefEntry(DBRefSource.PFAM, // getDbSource(), getDbVersion(), queries.trim().toUpperCase())); - if (!getDbSource().equals(jalview.datamodel.DBRefSource.PFAM)) + if (!getDbSource().equals(DBRefSource.PFAM)) { // add the specific ref too rcds.getSequenceAt(s).addDBRef( new DBRefEntry(getDbSource(), getDbVersion(), queries @@ -142,6 +143,7 @@ abstract public class Pfam extends Xfam implements DbSourceProxy * * @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String) */ + @Override public boolean isValidReference(String accession) { return accession.indexOf("PF") == 0; @@ -151,9 +153,10 @@ abstract public class Pfam extends Xfam implements DbSourceProxy * public String getDbName() { return "PFAM"; // getDbSource(); } */ + @Override public String getXfamSource() { - return jalview.datamodel.DBRefSource.PFAM; + return DBRefSource.PFAM; } } diff --git a/src/jalview/ws/dbsources/PfamFull.java b/src/jalview/ws/dbsources/PfamFull.java index 0490291..4f5b8f5 100644 --- a/src/jalview/ws/dbsources/PfamFull.java +++ b/src/jalview/ws/dbsources/PfamFull.java @@ -20,13 +20,12 @@ */ package jalview.ws.dbsources; -import jalview.ws.seqfetcher.DbSourceProxy; /** * flyweight class specifying retrieval of Full family alignments from PFAM * */ -public class PfamFull extends Pfam implements DbSourceProxy +public class PfamFull extends Pfam { public PfamFull() { @@ -38,6 +37,7 @@ public class PfamFull extends Pfam implements DbSourceProxy * * @see jalview.ws.dbsources.Pfam#getPFAMURL() */ + @Override protected String getXFAMURL() { return "http://pfam.sanger.ac.uk/family/alignment/download/format?alnType=full&format=stockholm&order=t&case=l&gaps=default&entry="; @@ -48,21 +48,25 @@ public class PfamFull extends Pfam implements DbSourceProxy * * @see jalview.ws.seqfetcher.DbSourceProxy#getDbName() */ + @Override public String getDbName() { return "PFAM (Full)"; } + @Override public String getDbSource() { return getDbName(); // so we have unique DbSource string. } + @Override public String getTestQuery() { return "PF03760"; } + @Override public String getDbVersion() { return null; diff --git a/src/jalview/ws/dbsources/PfamSeed.java b/src/jalview/ws/dbsources/PfamSeed.java index 2ea75af..be8f044 100644 --- a/src/jalview/ws/dbsources/PfamSeed.java +++ b/src/jalview/ws/dbsources/PfamSeed.java @@ -20,7 +20,6 @@ */ package jalview.ws.dbsources; -import jalview.ws.seqfetcher.DbSourceProxy; /** * flyweight class specifying retrieval of Seed alignments from PFAM @@ -28,7 +27,7 @@ import jalview.ws.seqfetcher.DbSourceProxy; * @author JimP * */ -public class PfamSeed extends Pfam implements DbSourceProxy +public class PfamSeed extends Pfam { public PfamSeed() { @@ -40,6 +39,7 @@ public class PfamSeed extends Pfam implements DbSourceProxy * * @see jalview.ws.dbsources.Pfam#getPFAMURL() */ + @Override protected String getXFAMURL() { return "http://pfam.sanger.ac.uk/family/alignment/download/format?alnType=seed&format=stockholm&order=t&case=l&gaps=default&entry="; @@ -50,16 +50,19 @@ public class PfamSeed extends Pfam implements DbSourceProxy * * @see jalview.ws.seqfetcher.DbSourceProxy#getDbName() */ + @Override public String getDbName() { return "PFAM (Seed)"; } + @Override public String getDbSource() { return jalview.datamodel.DBRefSource.PFAM; // archetype source } + @Override public String getTestQuery() { return "PF03760"; diff --git a/src/jalview/ws/dbsources/Rfam.java b/src/jalview/ws/dbsources/Rfam.java index b07b8ea..97f73d0 100644 --- a/src/jalview/ws/dbsources/Rfam.java +++ b/src/jalview/ws/dbsources/Rfam.java @@ -20,7 +20,7 @@ */ package jalview.ws.dbsources; -import jalview.ws.seqfetcher.DbSourceProxy; +import jalview.datamodel.DBRefSource; import com.stevesoft.pat.Regex; @@ -29,15 +29,12 @@ import com.stevesoft.pat.Regex; * * @author Lauren Michelle Lui */ -abstract public class Rfam extends Xfam implements DbSourceProxy +abstract public class Rfam extends Xfam { public Rfam() { super(); - // all extensions of this RFAM source base class are DOMAINDB sources - addDbSourceProperty(jalview.datamodel.DBRefSource.DOMAINDB); - addDbSourceProperty(jalview.datamodel.DBRefSource.ALIGNMENTDB); } /* @@ -46,6 +43,7 @@ abstract public class Rfam extends Xfam implements DbSourceProxy * @see jalview.ws.DbSourceProxy#getAccessionSeparator() Left here for * consistency with Pfam class */ + @Override public String getAccessionSeparator() { // TODO Auto-generated method stub @@ -57,6 +55,7 @@ abstract public class Rfam extends Xfam implements DbSourceProxy * * @see jalview.ws.DbSourceProxy#getAccessionValidator() * Left here for */ + @Override public Regex getAccessionValidator() { // TODO Auto-generated method stub @@ -100,6 +99,7 @@ abstract public class Rfam extends Xfam implements DbSourceProxy * * @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String) */ + @Override public boolean isValidReference(String accession) { return accession.indexOf("RF") == 0; @@ -110,9 +110,10 @@ abstract public class Rfam extends Xfam implements DbSourceProxy * * @see jalview.ws.dbsources.Xfam#getXfamSource() */ + @Override public String getXfamSource() { - return jalview.datamodel.DBRefSource.RFAM; + return DBRefSource.RFAM; } } diff --git a/src/jalview/ws/dbsources/RfamFull.java b/src/jalview/ws/dbsources/RfamFull.java index 74f4ec6..e1e9e9a 100644 --- a/src/jalview/ws/dbsources/RfamFull.java +++ b/src/jalview/ws/dbsources/RfamFull.java @@ -20,7 +20,6 @@ */ package jalview.ws.dbsources; -import jalview.ws.seqfetcher.DbSourceProxy; /** * Flyweight class specifying retrieval of Full family alignments from RFAM @@ -28,7 +27,7 @@ import jalview.ws.seqfetcher.DbSourceProxy; * @author Lauren Michelle Lui * */ -public class RfamFull extends Rfam implements DbSourceProxy +public class RfamFull extends Rfam { public RfamFull() { @@ -40,6 +39,7 @@ public class RfamFull extends Rfam implements DbSourceProxy * * @see jalview.ws.dbsources.Rfam#getXFAMURL() */ + @Override protected String getXFAMURL() { return "http://rfam.sanger.ac.uk/family/alignment/download/format?alnType=full&nseLabels=0&format=stockholm&acc="; @@ -50,16 +50,19 @@ public class RfamFull extends Rfam implements DbSourceProxy * * @see jalview.ws.seqfetcher.DbSourceProxy#getDbName() */ + @Override public String getDbName() { return "RFAM (Full)"; } + @Override public String getDbSource() { return getDbName(); // so we have unique DbSource string. } + @Override public String getTestQuery() { // Can be retrieved from http://rfam.janelia.org/cgi-bin/getdesc?acc=RF00014 @@ -68,6 +71,7 @@ public class RfamFull extends Rfam implements DbSourceProxy return "RF00014"; } + @Override public String getDbVersion() { return null; diff --git a/src/jalview/ws/dbsources/RfamSeed.java b/src/jalview/ws/dbsources/RfamSeed.java index dd2b12f..2850fd5 100644 --- a/src/jalview/ws/dbsources/RfamSeed.java +++ b/src/jalview/ws/dbsources/RfamSeed.java @@ -20,7 +20,6 @@ */ package jalview.ws.dbsources; -import jalview.ws.seqfetcher.DbSourceProxy; /** * Flyweight class specifying retrieval of Seed family alignments from RFAM @@ -28,7 +27,7 @@ import jalview.ws.seqfetcher.DbSourceProxy; * @author Lauren Michelle Lui * */ -public class RfamSeed extends Rfam implements DbSourceProxy +public class RfamSeed extends Rfam { public RfamSeed() { @@ -40,6 +39,7 @@ public class RfamSeed extends Rfam implements DbSourceProxy * * @see jalview.ws.dbsources.Rfam#getRFAMURL() */ + @Override protected String getXFAMURL() { return "http://rfam.sanger.ac.uk/family/alignment/download/format?alnType=seed&nseLabels=0&format=stockholm&acc="; @@ -52,21 +52,25 @@ public class RfamSeed extends Rfam implements DbSourceProxy * * @see jalview.ws.seqfetcher.DbSourceProxy#getDbName() */ + @Override public String getDbName() { return "RFAM (Seed)"; } + @Override public String getDbSource() { return getDbName(); // so we have unique DbSource string. } + @Override public String getTestQuery() { return "RF00014"; } // http://rfam.janelia.org/cgi-bin/getdesc?acc=RF00014 + @Override public String getDbVersion() { return null; diff --git a/src/jalview/ws/dbsources/Uniprot.java b/src/jalview/ws/dbsources/Uniprot.java index 1e8eadb..0a252b1 100644 --- a/src/jalview/ws/dbsources/Uniprot.java +++ b/src/jalview/ws/dbsources/Uniprot.java @@ -29,14 +29,15 @@ import jalview.datamodel.SequenceI; import jalview.datamodel.UniprotEntry; import jalview.datamodel.UniprotFile; import jalview.ws.ebi.EBIFetchClient; -import jalview.ws.seqfetcher.DbSourceProxy; import jalview.ws.seqfetcher.DbSourceProxyImpl; import java.io.File; import java.io.FileReader; import java.io.Reader; +import java.net.URL; import java.util.Vector; +import org.exolab.castor.mapping.Mapping; import org.exolab.castor.xml.Unmarshaller; import com.stevesoft.pat.Regex; @@ -45,14 +46,14 @@ import com.stevesoft.pat.Regex; * @author JimP * */ -public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy +public class Uniprot extends DbSourceProxyImpl { private static final String BAR_DELIMITER = "|"; private static final String NEWLINE = "\n"; - private static org.exolab.castor.mapping.Mapping map; + private static Mapping map; /** * Constructor @@ -60,9 +61,6 @@ public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy public Uniprot() { super(); - addDbSourceProperty(DBRefSource.SEQDB, DBRefSource.SEQDB); - addDbSourceProperty(DBRefSource.PROTSEQDB); - // addDbSourceProperty(DBRefSource.MULTIACC, new Integer(50)); } /* @@ -70,6 +68,7 @@ public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy * * @see jalview.ws.DbSourceProxy#getAccessionSeparator() */ + @Override public String getAccessionSeparator() { return null; // ";"; @@ -80,6 +79,7 @@ public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy * * @see jalview.ws.DbSourceProxy#getAccessionValidator() */ + @Override public Regex getAccessionValidator() { return new Regex("([A-Z]+[0-9]+[A-Z0-9]+|[A-Z0-9]+_[A-Z0-9]+)"); @@ -90,6 +90,7 @@ public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy * * @see jalview.ws.DbSourceProxy#getDbSource() */ + @Override public String getDbSource() { return DBRefSource.UNIPROT; @@ -100,6 +101,7 @@ public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy * * @see jalview.ws.DbSourceProxy#getDbVersion() */ + @Override public String getDbVersion() { return "0"; // we really don't know what version we're on. @@ -121,9 +123,8 @@ public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy if (map == null) { // 1. Load the mapping information from the file - map = new org.exolab.castor.mapping.Mapping(uni.getClass() - .getClassLoader()); - java.net.URL url = getClass().getResource("/uniprot_mapping.xml"); + map = new Mapping(uni.getClass().getClassLoader()); + URL url = getClass().getResource("/uniprot_mapping.xml"); map.loadMapping(url); } @@ -148,6 +149,7 @@ public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy * * @see jalview.ws.DbSourceProxy#getSequenceRecords(java.lang.String[]) */ + @Override public AlignmentI getSequenceRecords(String queries) throws Exception { startQuery(); @@ -302,6 +304,7 @@ public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy * * @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String) */ + @Override public boolean isValidReference(String accession) { // TODO: make the following a standard validator @@ -312,11 +315,13 @@ public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy /** * return LDHA_CHICK uniprot entry */ + @Override public String getTestQuery() { return "P00340"; } + @Override public String getDbName() { return "Uniprot"; // getDbSource(); @@ -327,4 +332,13 @@ public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy { return 0; } + + @Override + public int getMaximumQueryCount() + { + // relocated this commented out code... + // addDbSourceProperty(DBRefSource.MULTIACC, new Integer(50)); + // return 50; + return super.getMaximumQueryCount(); + } } diff --git a/src/jalview/ws/dbsources/UnprotName.java b/src/jalview/ws/dbsources/UniprotName.java similarity index 88% rename from src/jalview/ws/dbsources/UnprotName.java rename to src/jalview/ws/dbsources/UniprotName.java index 5dbc960..fa693c0 100644 --- a/src/jalview/ws/dbsources/UnprotName.java +++ b/src/jalview/ws/dbsources/UniprotName.java @@ -20,6 +20,8 @@ */ package jalview.ws.dbsources; +import jalview.datamodel.DBRefSource; + /** * Canonical Uniprot fetcher instance specifically retrieving UP_NAME * references. @@ -27,8 +29,7 @@ package jalview.ws.dbsources; * @author JimP * */ -public class UnprotName extends Uniprot implements - jalview.ws.seqfetcher.DbSourceProxy +public class UniprotName extends Uniprot { /* @@ -36,9 +37,10 @@ public class UnprotName extends Uniprot implements * * @see jalview.ws.dbsources.Uniprot#getDbSource() */ + @Override public String getDbSource() { - return jalview.datamodel.DBRefSource.UP_NAME; + return DBRefSource.UP_NAME; } } diff --git a/src/jalview/ws/dbsources/Xfam.java b/src/jalview/ws/dbsources/Xfam.java index c392ce6..26c9997 100644 --- a/src/jalview/ws/dbsources/Xfam.java +++ b/src/jalview/ws/dbsources/Xfam.java @@ -22,6 +22,7 @@ package jalview.ws.dbsources; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefEntry; +import jalview.io.FormatAdapter; import jalview.ws.seqfetcher.DbSourceProxyImpl; /** @@ -40,10 +41,12 @@ public abstract class Xfam extends DbSourceProxyImpl protected abstract String getXFAMURL(); + @Override public abstract String getDbVersion(); abstract String getXfamSource(); + @Override public AlignmentI getSequenceRecords(String queries) throws Exception { // TODO: this is not a perfect implementation. We need to be able to add @@ -51,9 +54,8 @@ public abstract class Xfam extends DbSourceProxyImpl // retrieved. startQuery(); // TODO: trap HTTP 404 exceptions and return null - AlignmentI rcds = new jalview.io.FormatAdapter().readFile(getXFAMURL() - + queries.trim().toUpperCase(), jalview.io.FormatAdapter.URL, - "STH"); + AlignmentI rcds = new FormatAdapter().readFile(getXFAMURL() + + queries.trim().toUpperCase(), FormatAdapter.URL, "STH"); for (int s = 0, sNum = rcds.getHeight(); s < sNum; s++) { rcds.getSequenceAt(s).addDBRef(new DBRefEntry(getXfamSource(), @@ -70,4 +72,13 @@ public abstract class Xfam extends DbSourceProxyImpl return rcds; } + /** + * Pfam and Rfam provide alignments + */ + @Override + public boolean isAlignmentSource() + { + return true; + } + } diff --git a/src/jalview/ws/seqfetcher/ASequenceFetcher.java b/src/jalview/ws/seqfetcher/ASequenceFetcher.java index 8656c25..1e3ae7a 100644 --- a/src/jalview/ws/seqfetcher/ASequenceFetcher.java +++ b/src/jalview/ws/seqfetcher/ASequenceFetcher.java @@ -20,11 +20,13 @@ */ package jalview.ws.seqfetcher; +import jalview.bin.Cache; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefEntry; import jalview.datamodel.SequenceI; import jalview.util.DBRefUtils; import jalview.util.MessageManager; +import jalview.util.QuickSort; import java.util.ArrayList; import java.util.Enumeration; @@ -62,11 +64,11 @@ public class ASequenceFetcher return null; } String[] sf = new String[FETCHABLEDBS.size()]; - Enumeration e = FETCHABLEDBS.keys(); + Enumeration e = FETCHABLEDBS.keys(); int i = 0; while (e.hasMoreElements()) { - sf[i++] = (String) e.nextElement(); + sf[i++] = e.nextElement(); } ; return sf; @@ -74,25 +76,25 @@ public class ASequenceFetcher public boolean isFetchable(String source) { - Enumeration e = FETCHABLEDBS.keys(); + Enumeration e = FETCHABLEDBS.keys(); while (e.hasMoreElements()) { - String db = (String) e.nextElement(); + String db = e.nextElement(); if (source.compareToIgnoreCase(db) == 0) { return true; } } - jalview.bin.Cache.log.warn("isFetchable doesn't know about '" + source + Cache.log.warn("isFetchable doesn't know about '" + source + "'"); return false; } - public SequenceI[] getSequences(jalview.datamodel.DBRefEntry[] refs) + public SequenceI[] getSequences(DBRefEntry[] refs) { SequenceI[] ret = null; - Vector rseqs = new Vector(); - Hashtable> queries = new Hashtable(); + Vector rseqs = new Vector(); + Hashtable> queries = new Hashtable>(); for (int r = 0; r < refs.length; r++) { if (!queries.containsKey(refs[r].getSource())) @@ -125,15 +127,12 @@ public class ASequenceFetcher while (fetchers.hasNext()) { List queriesMade = new ArrayList(); - HashSet queriesFound = new HashSet(); + HashSet queriesFound = new HashSet(); try { DbSourceProxy fetcher = fetchers.next(); - boolean doMultiple = fetcher.getAccessionSeparator() != null; // No - // separator - // - no - // Multiple - // Queries + boolean doMultiple = fetcher.getAccessionSeparator() != null; + // No separator - no Multiple Queries while (!queriesLeft.isEmpty()) { StringBuffer qsb = new StringBuffer(); @@ -228,11 +227,11 @@ public class ASequenceFetcher if (rseqs.size() > 0) { ret = new SequenceI[rseqs.size()]; - Enumeration sqs = rseqs.elements(); + Enumeration sqs = rseqs.elements(); int si = 0; while (sqs.hasMoreElements()) { - SequenceI s = (SequenceI) sqs.nextElement(); + SequenceI s = sqs.nextElement(); ret[si++] = s; s.updatePDBIds(); } @@ -288,7 +287,7 @@ public class ASequenceFetcher { nm[i++] = "" + s.getTier() + s.getDbName().toLowerCase(); } - jalview.util.QuickSort.sort(nm, l); + QuickSort.sort(nm, l); dbs = new ArrayList(); for (i = l.length - 1; i >= 0; i--) { @@ -303,8 +302,7 @@ public class ASequenceFetcher } /** - * constructs and instance of the proxy and registers it as a valid - * dbrefsource + * constructs an instance of the proxy and registers it as a valid dbrefsource * * @param dbSourceProxy * reference for class implementing @@ -312,7 +310,7 @@ public class ASequenceFetcher */ protected void addDBRefSourceImpl( Class dbSourceProxy) - throws java.lang.IllegalArgumentException + throws IllegalArgumentException { DbSourceProxy proxy = null; try @@ -359,34 +357,6 @@ public class ASequenceFetcher } /** - * test if the database handler for dbName contains the given dbProperty when - * a dbName resolves to a set of proxies - this method will return the result - * of the test for the first instance. TODO implement additional method to - * query all sources for a db to find one with a particular property - * - * @param dbName - * @param dbProperty - * @return true if proxy has the given property - */ - public boolean hasDbSourceProperty(String dbName, String dbProperty) - { - // TODO: decide if invalidDbName exception is thrown here. - - List proxies = getSourceProxy(dbName); - if (proxies != null) - { - for (DbSourceProxy proxy : proxies) - { - if (proxy.getDbSourceProperties() != null) - { - return proxy.getDbSourceProperties().containsKey(dbProperty); - } - } - } - return false; - } - - /** * select sources which are implemented by instances of the given class * * @param class that implements DbSourceProxy @@ -394,7 +364,7 @@ public class ASequenceFetcher */ public String[] getDbInstances(Class class1) { - if (!jalview.ws.seqfetcher.DbSourceProxy.class.isAssignableFrom(class1)) + if (!DbSourceProxy.class.isAssignableFrom(class1)) { throw new Error( MessageManager @@ -407,11 +377,11 @@ public class ASequenceFetcher return null; } String[] sources = null; - Vector src = new Vector(); - Enumeration dbs = FETCHABLEDBS.keys(); + Vector src = new Vector(); + Enumeration dbs = FETCHABLEDBS.keys(); while (dbs.hasMoreElements()) { - String dbn = (String) dbs.nextElement(); + String dbn = dbs.nextElement(); for (DbSourceProxy dbp : FETCHABLEDBS.get(dbn).values()) { if (class1.isAssignableFrom(dbp.getClass())) @@ -429,7 +399,7 @@ public class ASequenceFetcher public DbSourceProxy[] getDbSourceProxyInstances(Class class1) { - ArrayList prlist = new ArrayList(); + List prlist = new ArrayList(); for (String fetchable : getSupportedDb()) { for (DbSourceProxy pr : getSourceProxy(fetchable)) diff --git a/src/jalview/ws/seqfetcher/DbSourceProxy.java b/src/jalview/ws/seqfetcher/DbSourceProxy.java index 33f62b6..1e62d13 100644 --- a/src/jalview/ws/seqfetcher/DbSourceProxy.java +++ b/src/jalview/ws/seqfetcher/DbSourceProxy.java @@ -78,15 +78,6 @@ public interface DbSourceProxy public Regex getAccessionValidator(); /** - * DbSource properties hash - define the capabilities of this source Property - * hash methods defined in DbSourceProxyImpl. See constants in - * jalview.datamodel.DBRefSource for definition of properties. - * - * @return - */ - public Hashtable getDbSourceProperties(); - - /** * * @return a test/example query that can be used to validate retrieval and * parsing mechanisms @@ -133,19 +124,43 @@ public interface DbSourceProxy public StringBuffer getRawRecords(); /** - * Find out more info about the source. + * Tier for this data source * - * @param dbsourceproperty - * - one of the database reference source properties in - * jalview.datamodel.DBRefSource - * @return true if the source has this property + * @return 0 - primary datasource, 1 - das primary source, 2 - secondary */ - public boolean isA(Object dbsourceproperty); + public int getTier(); /** - * Tier for this data source + * Extracts valid accession strings from a query string. If there is an + * accession id validator, returns the the matched region or the first + * subgroup match from the matched region; else just returns the whole query. * - * @return 0 - primary datasource, 1 - das primary source, 2 - secondary + * @param query + * @return */ - public int getTier(); -} + String getAccessionIdFromQuery(String query); + + /** + * Returns the maximum number of accession ids that can be queried in one + * request. + * + * @return + */ + public int getMaximumQueryCount(); + + /** + * Returns true if the source may provide coding DNA i.e. sequences with + * implicit peptide products + * + * @return + */ + public boolean isDnaCoding(); + + /** + * Answers true if the database is a source of alignments (for example, domain + * families) + * + * @return + */ + public boolean isAlignmentSource(); +} \ No newline at end of file diff --git a/src/jalview/ws/seqfetcher/DbSourceProxyImpl.java b/src/jalview/ws/seqfetcher/DbSourceProxyImpl.java index 85a729d..1e1468f 100644 --- a/src/jalview/ws/seqfetcher/DbSourceProxyImpl.java +++ b/src/jalview/ws/seqfetcher/DbSourceProxyImpl.java @@ -24,7 +24,7 @@ import jalview.datamodel.AlignmentI; import jalview.io.FormatAdapter; import jalview.io.IdentifyFile; -import java.util.Hashtable; +import com.stevesoft.pat.Regex; /** * common methods for implementations of the DbSourceProxy interface. @@ -34,50 +34,21 @@ import java.util.Hashtable; */ public abstract class DbSourceProxyImpl implements DbSourceProxy { - public DbSourceProxyImpl() - { - // default constructor - do nothing probably. - } - private Hashtable props = null; - - /* - * (non-Javadoc) - * - * @see jalview.ws.DbSourceProxy#getDbSourceProperties() - */ - public Hashtable getDbSourceProperties() - { - if (props == null) - { - props = new Hashtable(); - } - return props; - } + boolean queryInProgress = false; - protected void addDbSourceProperty(Object propname) - { - addDbSourceProperty(propname, propname); - } + protected StringBuffer results = null; - protected void addDbSourceProperty(Object propname, Object propvalue) + public DbSourceProxyImpl() { - if (props == null) - { - props = new Hashtable(); - } - props.put(propname, propvalue); } - boolean queryInProgress = false; - - protected StringBuffer results = null; - /* * (non-Javadoc) * * @see jalview.ws.DbSourceProxy#getRawRecords() */ + @Override public StringBuffer getRawRecords() { return results; @@ -88,6 +59,7 @@ public abstract class DbSourceProxyImpl implements DbSourceProxy * * @see jalview.ws.DbSourceProxy#queryInProgress() */ + @Override public boolean queryInProgress() { return queryInProgress; @@ -131,10 +103,48 @@ public abstract class DbSourceProxyImpl implements DbSourceProxy } @Override - public boolean isA(Object dbsourceproperty) + public String getAccessionIdFromQuery(String query) + { + Regex vgr = getAccessionValidator(); + if (vgr == null) + { + return query; + } + vgr.search(query); + if (vgr.numSubs() > 0) + { + return (vgr.stringMatched(1)); + } + else + { + return (vgr.stringMatched()); + } + } + + /** + * Default is only one accession id per query - override if more are allowed. + */ + @Override + public int getMaximumQueryCount() + { + return 1; + } + + /** + * Returns false - override to return true for DNA coding data sources + */ + @Override + public boolean isDnaCoding() { - assert (dbsourceproperty != null); - return (props == null) ? false : props.containsKey(dbsourceproperty); + return false; } + /** + * Answers false - override as required in subclasses + */ + @Override + public boolean isAlignmentSource() + { + return false; + } } diff --git a/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java b/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java index 978316b..f3526bc 100644 --- a/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java +++ b/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java @@ -1,199 +1,27 @@ package jalview.ext.ensembl; -import jalview.datamodel.Alignment; -import jalview.datamodel.SequenceI; -import jalview.ext.ensembl.SeqFetcher.EnsemblSeqType; -import jalview.io.AppletFormatAdapter; -import jalview.io.FastaFile; -import jalview.io.FileParse; -import jalview.util.DBRefUtils; - import java.lang.reflect.Method; -import java.util.Arrays; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; + public class EnsemblSeqProxyTest { - @Test - public void testCheckEnsembl() - { - SeqFetcher sf = new SeqFetcher(); - sf.setTestEnsemblStatus(true); - sf.setTesting(true); - Assert.assertTrue(sf.isEnsemblAvailable()); - sf.setTestEnsemblStatus(false); - Assert.assertFalse(sf.isEnsemblAvailable()); - } - - @Test(suiteName = "live") - public void testLiveCheckEnsembl() - { - SeqFetcher sf = new SeqFetcher(); - boolean isAvailable = sf.isEnsemblAvailable(); - System.out.println("Ensembl is " - + (isAvailable ? "UP!" : "DOWN ******************* BAD!")); - } - - @DataProvider(name = "ens_seqs") + @DataProvider(name = "queries") public Object[][] createData(Method m) { - System.out.println(m.getName()); - return allSeqs; + return new Object[][] { { "CCDS5863.1" }, { "ENSP00000288602" } }; } - public static Object[][] allSeqs = new Object[][] - { - { - EnsemblSeqType.PROTEIN, - "CCDS5863.1", - ">CCDS5863.1\n" - + "MAALSGGGGGGAEPGQALFNGDMEPEAGAGAGAAASSAADPAIPEEVWNIKQMIKLTQEH\n" - + "IEALLDKFGGEHNPPSIYLEAYEEYTSKLDALQQREQQLLESLGNGTDFSVSSSASMDTV\n" - + "TSSSSSSLSVLPSSLSVFQNPTDVARSNPKSPQKPIVRVFLPNKQRTVVPARCGVTVRDS\n" - + "LKKALMMRGLIPECCAVYRIQDGEKKPIGWDTDISWLTGEELHVEVLENVPLTTHNFVRK\n" - + "TFFTLAFCDFCRKLLFQGFRCQTCGYKFHQRCSTEVPLMCVNYDQLDLLFVSKFFEHHPI\n" - + "PQEEASLAETALTSGSSPSAPASDSIGPQILTSPSPSKSIPIPQPFRPADEDHRNQFGQR\n" - + "DRSSSAPNVHINTIEPVNIDDLIRDQGFRGDGGSTTGLSATPPASLPGSLTNVKALQKSP\n" - + "GPQRERKSSSSSEDRNRMKTLGRRDSSDDWEIPDGQITVGQRIGSGSFGTVYKGKWHGDV\n" - + "AVKMLNVTAPTPQQLQAFKNEVGVLRKTRHVNILLFMGYSTKPQLAIVTQWCEGSSLYHH\n" - + "LHIIETKFEMIKLIDIARQTAQGMDYLHAKSIIHRDLKSNNIFLHEDLTVKIGDFGLATV\n" - + "KSRWSGSHQFEQLSGSILWMAPEVIRMQDKNPYSFQSDVYAFGIVLYELMTGQLPYSNIN\n" - + "NRDQIIFMVGRGYLSPDLSKVRSNCPKAMKRLMAECLKKKRDERPLFPQILASIELLARS\n" - + "LPKIHRSASEPSLNRAGFQTEDFSLYACASPKTPIQAGGYGAFPVH\n" }, - { - EnsemblSeqType.TRANSCRIPT, - "CCDS5863.1", - ">CCDS5863.1\n" - + "ATGGCGGCGCTGAGCGGTGGCGGTGGTGGCGGCGCGGAGCCGGGCCAGGCTCTGTTCAAC\n" - + "GGGGACATGGAGCCCGAGGCCGGCGCCGGCGCCGGCGCCGCGGCCTCTTCGGCTGCGGAC\n" - + "CCTGCCATTCCGGAGGAGGTGTGGAATATCAAACAAATGATTAAGTTGACACAGGAACAT\n" - + "ATAGAGGCCCTATTGGACAAATTTGGTGGGGAGCATAATCCACCATCAATATATCTGGAG\n" - + "GCCTATGAAGAATACACCAGCAAGCTAGATGCACTCCAACAAAGAGAACAACAGTTATTG\n" - + "GAATCTCTGGGGAACGGAACTGATTTTTCTGTTTCTAGCTCTGCATCAATGGATACCGTT\n" - + "ACATCTTCTTCCTCTTCTAGCCTTTCAGTGCTACCTTCATCTCTTTCAGTTTTTCAAAAT\n" - + "CCCACAGATGTGGCACGGAGCAACCCCAAGTCACCACAAAAACCTATCGTTAGAGTCTTC\n" - + "CTGCCCAACAAACAGAGGACAGTGGTACCTGCAAGGTGTGGAGTTACAGTCCGAGACAGT\n" - + "CTAAAGAAAGCACTGATGATGAGAGGTCTAATCCCAGAGTGCTGTGCTGTTTACAGAATT\n" - + "CAGGATGGAGAGAAGAAACCAATTGGTTGGGACACTGATATTTCCTGGCTTACTGGAGAA\n" - + "GAATTGCATGTGGAAGTGTTGGAGAATGTTCCACTTACAACACACAACTTTGTACGAAAA\n" - + "ACGTTTTTCACCTTAGCATTTTGTGACTTTTGTCGAAAGCTGCTTTTCCAGGGTTTCCGC\n" - + "TGTCAAACATGTGGTTATAAATTTCACCAGCGTTGTAGTACAGAAGTTCCACTGATGTGT\n" - + "GTTAATTATGACCAACTTGATTTGCTGTTTGTCTCCAAGTTCTTTGAACACCACCCAATA\n" - + "CCACAGGAAGAGGCGTCCTTAGCAGAGACTGCCCTAACATCTGGATCATCCCCTTCCGCA\n" - + "CCCGCCTCGGACTCTATTGGGCCCCAAATTCTCACCAGTCCGTCTCCTTCAAAATCCATT\n" - + "CCAATTCCACAGCCCTTCCGACCAGCAGATGAAGATCATCGAAATCAATTTGGGCAACGA\n" - + "GACCGATCCTCATCAGCTCCCAATGTGCATATAAACACAATAGAACCTGTCAATATTGAT\n" - + "GACTTGATTAGAGACCAAGGATTTCGTGGTGATGGAGGATCAACCACAGGTTTGTCTGCT\n" - + "ACCCCCCCTGCCTCATTACCTGGCTCACTAACTAACGTGAAAGCCTTACAGAAATCTCCA\n" - + "GGACCTCAGCGAGAAAGGAAGTCATCTTCATCCTCAGAAGACAGGAATCGAATGAAAACA\n" - + "CTTGGTAGACGGGACTCGAGTGATGATTGGGAGATTCCTGATGGGCAGATTACAGTGGGA\n" - + "CAAAGAATTGGATCTGGATCATTTGGAACAGTCTACAAGGGAAAGTGGCATGGTGATGTG\n" - + "GCAGTGAAAATGTTGAATGTGACAGCACCTACACCTCAGCAGTTACAAGCCTTCAAAAAT\n" - + "GAAGTAGGAGTACTCAGGAAAACACGACATGTGAATATCCTACTCTTCATGGGCTATTCC\n" - + "ACAAAGCCACAACTGGCTATTGTTACCCAGTGGTGTGAGGGCTCCAGCTTGTATCACCAT\n" - + "CTCCATATCATTGAGACCAAATTTGAGATGATCAAACTTATAGATATTGCACGACAGACT\n" - + "GCACAGGGCATGGATTACTTACACGCCAAGTCAATCATCCACAGAGACCTCAAGAGTAAT\n" - + "AATATATTTCTTCATGAAGACCTCACAGTAAAAATAGGTGATTTTGGTCTAGCTACAGTG\n" - + "AAATCTCGATGGAGTGGGTCCCATCAGTTTGAACAGTTGTCTGGATCCATTTTGTGGATG\n" - + "GCACCAGAAGTCATCAGAATGCAAGATAAAAATCCATACAGCTTTCAGTCAGATGTATAT\n" - + "GCATTTGGAATTGTTCTGTATGAATTGATGACTGGACAGTTACCTTATTCAAACATCAAC\n" - + "AACAGGGACCAGATAATTTTTATGGTGGGACGAGGATACCTGTCTCCAGATCTCAGTAAG\n" - + "GTACGGAGTAACTGTCCAAAAGCCATGAAGAGATTAATGGCAGAGTGCCTCAAAAAGAAA\n" - + "AGAGATGAGAGACCACTCTTTCCCCAAATTCTCGCCTCTATTGAGCTGCTGGCCCGCTCA\n" - + "TTGCCAAAAATTCACCGCAGTGCATCAGAACCCTCCTTGAATCGGGCTGGTTTCCAAACA\n" - + "GAGGATTTTAGTCTATATGCTTGTGCTTCTCCAAAAACACCCATCCAGGCAGGGGGATAT\n" - + "GGTGCGTTTCCTGTCCACTGA\n" }, - { - EnsemblSeqType.PROTEIN, - "ENSP00000288602", - ">ENSP00000288602\n" - + "MAALSGGGGGGAEPGQALFNGDMEPEAGAGAGAAASSAADPAIPEEVWNIKQMIKLTQEH\n" - + "IEALLDKFGGEHNPPSIYLEAYEEYTSKLDALQQREQQLLESLGNGTDFSVSSSASMDTV\n" - + "TSSSSSSLSVLPSSLSVFQNPTDVARSNPKSPQKPIVRVFLPNKQRTVVPARCGVTVRDS\n" - + "LKKALMMRGLIPECCAVYRIQDGEKKPIGWDTDISWLTGEELHVEVLENVPLTTHNFVRK\n" - + "TFFTLAFCDFCRKLLFQGFRCQTCGYKFHQRCSTEVPLMCVNYDQLDLLFVSKFFEHHPI\n" - + "PQEEASLAETALTSGSSPSAPASDSIGPQILTSPSPSKSIPIPQPFRPADEDHRNQFGQR\n" - + "DRSSSAPNVHINTIEPVNIDDLIRDQGFRGDGGSTTGLSATPPASLPGSLTNVKALQKSP\n" - + "GPQRERKSSSSSEDRNRMKTLGRRDSSDDWEIPDGQITVGQRIGSGSFGTVYKGKWHGDV\n" - + "AVKMLNVTAPTPQQLQAFKNEVGVLRKTRHVNILLFMGYSTKPQLAIVTQWCEGSSLYHH\n" - + "LHIIETKFEMIKLIDIARQTAQGMDYLHAKSIIHRDLKSNNIFLHEDLTVKIGDFGLATV\n" - + "KSRWSGSHQFEQLSGSILWMAPEVIRMQDKNPYSFQSDVYAFGIVLYELMTGQLPYSNIN\n" - + "NRDQIIFMVGRGYLSPDLSKVRSNCPKAMKRLMAECLKKKRDERPLFPQILASIELLARS\n" - + "LPKIHRSASEPSLNRAGFQTEDFSLYACASPKTPIQAGGYGAFPVH" } }; - - @Test(dataProvider = "ens_seqs", suiteName = "live") - public void testGetOneSeqs(EnsemblSeqType type, String sq, String fastasq) - throws Exception - { - SeqFetcher sf = new SeqFetcher(); - FileParse fp = sf.getSequenceReader(type, Arrays.asList(new String[] - { sq })); - SequenceI[] sqs = new FastaFile(fp).getSeqsAsArray(); - FastaFile trueRes = new FastaFile(fastasq, AppletFormatAdapter.PASTE); - SequenceI[] trueSqs = trueRes.getSeqsAsArray(); - Assert.assertEquals(sqs.length, trueSqs.length, - "Different number of sequences retrieved for query " + sq); - Alignment ral = new Alignment(sqs); - for (SequenceI tr : trueSqs) - { - SequenceI[] rseq; - Assert.assertNotNull( - rseq = ral.findSequenceMatch(tr.getName()), - "Couldn't find sequences matching expected sequence " - + tr.getName()); - Assert.assertEquals(rseq.length, 1, - "Expected only one sequence for sequence ID " + tr.getName()); - Assert.assertEquals( - rseq[0].getSequenceAsString(), - tr.getSequenceAsString(), - "Sequences differ for " + tr.getName() + "\n" + "Exp:" - + tr.getSequenceAsString() + "\n" + "Got:" - + rseq[0].getSequenceAsString()); - - } - } - - @Test(dataProvider = "ens_seqs") - public void testRegexForProxy(EnsemblSeqType type, String sq, - String fastasq) throws Exception + @Test(dataProvider = "queries") + public void testIsValidReference(String query) throws Exception { EnsemblSeqProxy esq = new EnsemblProtein(); - Assert.assertTrue(esq.isValidReference(sq), - "Expected reference string " + sq + " to be valid for regex " + Assert.assertTrue(esq.isValidReference(query), + "Expected reference string " + query + + " to be valid for regex " + esq.getAccessionValidator().toString()); - - Assert.assertEquals(sq, DBRefUtils.processQueryToAccessionFor(esq, sq), - "Regex for " + esq.getClass().toString() + " not correct."); } - // TODO: - // sequence query with ENSG and anything other than a genomic type will yield - // sequences with different IDs which will - // break the post-processing stage where DBRefs are assigned to sequences. - // -> multiple_sequences = true is needed additional parameter - // http://rest.ensembl.org/sequence/id/ENSG00000157764?content-type=text/x-json;type=protein;multiple_sequences=true - // result with four transcripts, cds, cdna, and protein products. - // * - // features for ENG - - // http://rest.ensembl.org/overlap/id/ENSG00000157764?feature=cds&feature=exon&feature=transcript&content-type=text/x-gff3 - // transcript: gives locus, all transcript products with ENSG parents - // gene: give all ENSG on locus - // exon: all exon boundaries. CDS same info. - - // @Test(dataProvider = "ens_seqs", suiteName = "live") - // public void testGetOneSeqs(EnsemblSeqType type, String sq, String fastasq) - // throws Exception - // { - // - // { - // Assert.assertTrue(rseq[0].getDBRef() != null - // && rseq[0].getDBRef().length > 0, - // "No database references added to sequence by fetcher."); - // Assert.assertNotNull(DBRefUtils.searchRefs(rseq[0].getDBRef(), - // new DBRefEntry("ENSEMBL", null, sq)), - // "Could't find database references added to sequence by fetcher."); - // - // } } \ No newline at end of file diff --git a/test/jalview/ext/ensembl/SeqFetcherTest.java b/test/jalview/ext/ensembl/SeqFetcherTest.java new file mode 100644 index 0000000..8762698 --- /dev/null +++ b/test/jalview/ext/ensembl/SeqFetcherTest.java @@ -0,0 +1,175 @@ +package jalview.ext.ensembl; + +import jalview.datamodel.Alignment; +import jalview.datamodel.SequenceI; +import jalview.ext.ensembl.SeqFetcher.EnsemblSeqType; +import jalview.io.AppletFormatAdapter; +import jalview.io.FastaFile; +import jalview.io.FileParse; + +import java.lang.reflect.Method; +import java.util.Arrays; + +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +public class SeqFetcherTest +{ + private static final Object[][] allSeqs = new Object[][] { + { + EnsemblSeqType.PROTEIN, + "CCDS5863.1", + ">CCDS5863.1\n" + + "MAALSGGGGGGAEPGQALFNGDMEPEAGAGAGAAASSAADPAIPEEVWNIKQMIKLTQEH\n" + + "IEALLDKFGGEHNPPSIYLEAYEEYTSKLDALQQREQQLLESLGNGTDFSVSSSASMDTV\n" + + "TSSSSSSLSVLPSSLSVFQNPTDVARSNPKSPQKPIVRVFLPNKQRTVVPARCGVTVRDS\n" + + "LKKALMMRGLIPECCAVYRIQDGEKKPIGWDTDISWLTGEELHVEVLENVPLTTHNFVRK\n" + + "TFFTLAFCDFCRKLLFQGFRCQTCGYKFHQRCSTEVPLMCVNYDQLDLLFVSKFFEHHPI\n" + + "PQEEASLAETALTSGSSPSAPASDSIGPQILTSPSPSKSIPIPQPFRPADEDHRNQFGQR\n" + + "DRSSSAPNVHINTIEPVNIDDLIRDQGFRGDGGSTTGLSATPPASLPGSLTNVKALQKSP\n" + + "GPQRERKSSSSSEDRNRMKTLGRRDSSDDWEIPDGQITVGQRIGSGSFGTVYKGKWHGDV\n" + + "AVKMLNVTAPTPQQLQAFKNEVGVLRKTRHVNILLFMGYSTKPQLAIVTQWCEGSSLYHH\n" + + "LHIIETKFEMIKLIDIARQTAQGMDYLHAKSIIHRDLKSNNIFLHEDLTVKIGDFGLATV\n" + + "KSRWSGSHQFEQLSGSILWMAPEVIRMQDKNPYSFQSDVYAFGIVLYELMTGQLPYSNIN\n" + + "NRDQIIFMVGRGYLSPDLSKVRSNCPKAMKRLMAECLKKKRDERPLFPQILASIELLARS\n" + + "LPKIHRSASEPSLNRAGFQTEDFSLYACASPKTPIQAGGYGAFPVH\n" }, + { + EnsemblSeqType.TRANSCRIPT, + "CCDS5863.1", + ">CCDS5863.1\n" + + "ATGGCGGCGCTGAGCGGTGGCGGTGGTGGCGGCGCGGAGCCGGGCCAGGCTCTGTTCAAC\n" + + "GGGGACATGGAGCCCGAGGCCGGCGCCGGCGCCGGCGCCGCGGCCTCTTCGGCTGCGGAC\n" + + "CCTGCCATTCCGGAGGAGGTGTGGAATATCAAACAAATGATTAAGTTGACACAGGAACAT\n" + + "ATAGAGGCCCTATTGGACAAATTTGGTGGGGAGCATAATCCACCATCAATATATCTGGAG\n" + + "GCCTATGAAGAATACACCAGCAAGCTAGATGCACTCCAACAAAGAGAACAACAGTTATTG\n" + + "GAATCTCTGGGGAACGGAACTGATTTTTCTGTTTCTAGCTCTGCATCAATGGATACCGTT\n" + + "ACATCTTCTTCCTCTTCTAGCCTTTCAGTGCTACCTTCATCTCTTTCAGTTTTTCAAAAT\n" + + "CCCACAGATGTGGCACGGAGCAACCCCAAGTCACCACAAAAACCTATCGTTAGAGTCTTC\n" + + "CTGCCCAACAAACAGAGGACAGTGGTACCTGCAAGGTGTGGAGTTACAGTCCGAGACAGT\n" + + "CTAAAGAAAGCACTGATGATGAGAGGTCTAATCCCAGAGTGCTGTGCTGTTTACAGAATT\n" + + "CAGGATGGAGAGAAGAAACCAATTGGTTGGGACACTGATATTTCCTGGCTTACTGGAGAA\n" + + "GAATTGCATGTGGAAGTGTTGGAGAATGTTCCACTTACAACACACAACTTTGTACGAAAA\n" + + "ACGTTTTTCACCTTAGCATTTTGTGACTTTTGTCGAAAGCTGCTTTTCCAGGGTTTCCGC\n" + + "TGTCAAACATGTGGTTATAAATTTCACCAGCGTTGTAGTACAGAAGTTCCACTGATGTGT\n" + + "GTTAATTATGACCAACTTGATTTGCTGTTTGTCTCCAAGTTCTTTGAACACCACCCAATA\n" + + "CCACAGGAAGAGGCGTCCTTAGCAGAGACTGCCCTAACATCTGGATCATCCCCTTCCGCA\n" + + "CCCGCCTCGGACTCTATTGGGCCCCAAATTCTCACCAGTCCGTCTCCTTCAAAATCCATT\n" + + "CCAATTCCACAGCCCTTCCGACCAGCAGATGAAGATCATCGAAATCAATTTGGGCAACGA\n" + + "GACCGATCCTCATCAGCTCCCAATGTGCATATAAACACAATAGAACCTGTCAATATTGAT\n" + + "GACTTGATTAGAGACCAAGGATTTCGTGGTGATGGAGGATCAACCACAGGTTTGTCTGCT\n" + + "ACCCCCCCTGCCTCATTACCTGGCTCACTAACTAACGTGAAAGCCTTACAGAAATCTCCA\n" + + "GGACCTCAGCGAGAAAGGAAGTCATCTTCATCCTCAGAAGACAGGAATCGAATGAAAACA\n" + + "CTTGGTAGACGGGACTCGAGTGATGATTGGGAGATTCCTGATGGGCAGATTACAGTGGGA\n" + + "CAAAGAATTGGATCTGGATCATTTGGAACAGTCTACAAGGGAAAGTGGCATGGTGATGTG\n" + + "GCAGTGAAAATGTTGAATGTGACAGCACCTACACCTCAGCAGTTACAAGCCTTCAAAAAT\n" + + "GAAGTAGGAGTACTCAGGAAAACACGACATGTGAATATCCTACTCTTCATGGGCTATTCC\n" + + "ACAAAGCCACAACTGGCTATTGTTACCCAGTGGTGTGAGGGCTCCAGCTTGTATCACCAT\n" + + "CTCCATATCATTGAGACCAAATTTGAGATGATCAAACTTATAGATATTGCACGACAGACT\n" + + "GCACAGGGCATGGATTACTTACACGCCAAGTCAATCATCCACAGAGACCTCAAGAGTAAT\n" + + "AATATATTTCTTCATGAAGACCTCACAGTAAAAATAGGTGATTTTGGTCTAGCTACAGTG\n" + + "AAATCTCGATGGAGTGGGTCCCATCAGTTTGAACAGTTGTCTGGATCCATTTTGTGGATG\n" + + "GCACCAGAAGTCATCAGAATGCAAGATAAAAATCCATACAGCTTTCAGTCAGATGTATAT\n" + + "GCATTTGGAATTGTTCTGTATGAATTGATGACTGGACAGTTACCTTATTCAAACATCAAC\n" + + "AACAGGGACCAGATAATTTTTATGGTGGGACGAGGATACCTGTCTCCAGATCTCAGTAAG\n" + + "GTACGGAGTAACTGTCCAAAAGCCATGAAGAGATTAATGGCAGAGTGCCTCAAAAAGAAA\n" + + "AGAGATGAGAGACCACTCTTTCCCCAAATTCTCGCCTCTATTGAGCTGCTGGCCCGCTCA\n" + + "TTGCCAAAAATTCACCGCAGTGCATCAGAACCCTCCTTGAATCGGGCTGGTTTCCAAACA\n" + + "GAGGATTTTAGTCTATATGCTTGTGCTTCTCCAAAAACACCCATCCAGGCAGGGGGATAT\n" + + "GGTGCGTTTCCTGTCCACTGA\n" }, + { + EnsemblSeqType.PROTEIN, + "ENSP00000288602", + ">ENSP00000288602\n" + + "MAALSGGGGGGAEPGQALFNGDMEPEAGAGAGAAASSAADPAIPEEVWNIKQMIKLTQEH\n" + + "IEALLDKFGGEHNPPSIYLEAYEEYTSKLDALQQREQQLLESLGNGTDFSVSSSASMDTV\n" + + "TSSSSSSLSVLPSSLSVFQNPTDVARSNPKSPQKPIVRVFLPNKQRTVVPARCGVTVRDS\n" + + "LKKALMMRGLIPECCAVYRIQDGEKKPIGWDTDISWLTGEELHVEVLENVPLTTHNFVRK\n" + + "TFFTLAFCDFCRKLLFQGFRCQTCGYKFHQRCSTEVPLMCVNYDQLDLLFVSKFFEHHPI\n" + + "PQEEASLAETALTSGSSPSAPASDSIGPQILTSPSPSKSIPIPQPFRPADEDHRNQFGQR\n" + + "DRSSSAPNVHINTIEPVNIDDLIRDQGFRGDGGSTTGLSATPPASLPGSLTNVKALQKSP\n" + + "GPQRERKSSSSSEDRNRMKTLGRRDSSDDWEIPDGQITVGQRIGSGSFGTVYKGKWHGDV\n" + + "AVKMLNVTAPTPQQLQAFKNEVGVLRKTRHVNILLFMGYSTKPQLAIVTQWCEGSSLYHH\n" + + "LHIIETKFEMIKLIDIARQTAQGMDYLHAKSIIHRDLKSNNIFLHEDLTVKIGDFGLATV\n" + + "KSRWSGSHQFEQLSGSILWMAPEVIRMQDKNPYSFQSDVYAFGIVLYELMTGQLPYSNIN\n" + + "NRDQIIFMVGRGYLSPDLSKVRSNCPKAMKRLMAECLKKKRDERPLFPQILASIELLARS\n" + + "LPKIHRSASEPSLNRAGFQTEDFSLYACASPKTPIQAGGYGAFPVH" } }; + + @DataProvider(name = "ens_seqs") + public Object[][] createData(Method m) + { + System.out.println(m.getName()); + return allSeqs; + } + + @Test(dataProvider = "ens_seqs", suiteName = "live") + public void testGetOneSeqs(EnsemblSeqType type, String sq, String fastasq) + throws Exception + { + SeqFetcher sf = new SeqFetcher(); + FileParse fp = sf.getSequenceReader(type, Arrays.asList(new String[] + { sq })); + SequenceI[] sqs = new FastaFile(fp).getSeqsAsArray(); + FastaFile trueRes = new FastaFile(fastasq, AppletFormatAdapter.PASTE); + SequenceI[] trueSqs = trueRes.getSeqsAsArray(); + Assert.assertEquals(sqs.length, trueSqs.length, + "Different number of sequences retrieved for query " + sq); + Alignment ral = new Alignment(sqs); + for (SequenceI tr : trueSqs) + { + SequenceI[] rseq; + Assert.assertNotNull( + rseq = ral.findSequenceMatch(tr.getName()), + "Couldn't find sequences matching expected sequence " + + tr.getName()); + Assert.assertEquals(rseq.length, 1, + "Expected only one sequence for sequence ID " + tr.getName()); + Assert.assertEquals( + rseq[0].getSequenceAsString(), + tr.getSequenceAsString(), + "Sequences differ for " + tr.getName() + "\n" + "Exp:" + + tr.getSequenceAsString() + "\n" + "Got:" + + rseq[0].getSequenceAsString()); + + } + } + + @Test(suiteName = "live") + public void testLiveCheckEnsembl() + { + SeqFetcher sf = new SeqFetcher(); + boolean isAvailable = sf.isEnsemblAvailable(); + System.out.println("Ensembl is " + + (isAvailable ? "UP!" + : "DOWN or unreachable ******************* BAD!")); + } + // TODO: + // sequence query with ENSG and anything other than a genomic type will yield + // sequences with different IDs which will + // break the post-processing stage where DBRefs are assigned to sequences. + // -> multiple_sequences = true is needed additional parameter + // http://rest.ensembl.org/sequence/id/ENSG00000157764?content-type=text/x-json;type=protein;multiple_sequences=true + // result with four transcripts, cds, cdna, and protein products. + // * + // features for ENG - + // http://rest.ensembl.org/overlap/id/ENSG00000157764?feature=cds&feature=exon&feature=transcript&content-type=text/x-gff3 + // transcript: gives locus, all transcript products with ENSG parents + // gene: give all ENSG on locus + // exon: all exon boundaries. CDS same info. + + // @Test(dataProvider = "ens_seqs", suiteName = "live") + // public void testGetOneSeqs(EnsemblSeqType type, String sq, String fastasq) + // throws Exception + // { + // + // { + // Assert.assertTrue(rseq[0].getDBRef() != null + // && rseq[0].getDBRef().length > 0, + // "No database references added to sequence by fetcher."); + // Assert.assertNotNull(DBRefUtils.searchRefs(rseq[0].getDBRef(), + // new DBRefEntry("ENSEMBL", null, sq)), + // "Could't find database references added to sequence by fetcher."); + // + // } + +} diff --git a/test/jalview/ws/SequenceFetcherTest.java b/test/jalview/ws/SequenceFetcherTest.java new file mode 100644 index 0000000..7a9b553 --- /dev/null +++ b/test/jalview/ws/SequenceFetcherTest.java @@ -0,0 +1,232 @@ +package jalview.ws; + +import jalview.datamodel.Alignment; +import jalview.datamodel.AlignmentI; +import jalview.datamodel.DBRefSource; +import jalview.datamodel.SequenceI; +import jalview.ws.seqfetcher.ASequenceFetcher; +import jalview.ws.seqfetcher.DbSourceProxy; + +import java.util.Enumeration; +import java.util.List; +import java.util.Vector; + +public class SequenceFetcherTest +{ + + /** + * simple run method to test dbsources. + * + * @param argv + */ + public static void main(String[] argv) + { + // TODO: extracted from SequenceFetcher - convert to proper unit test with + // assertions + + AlignmentI ds = null; + Vector noProds = new Vector(); + String usage = "SequenceFetcher.main [-nodas] [ []]\n" + + "With no arguments, all DbSources will be queried with their test Accession number.\n" + + "With one argument, the argument will be resolved to one or more db sources and each will be queried with their test accession only.\n" + + "If given two arguments, SequenceFetcher will try to find the DbFetcher corresponding to and retrieve from it.\n" + + "The -nodas option will exclude DAS sources from the database fetchers Jalview will try to use."; + boolean withDas = true; + if (argv != null && argv.length > 0 + && argv[0].toLowerCase().startsWith("-nodas")) + { + withDas = false; + String targs[] = new String[argv.length - 1]; + System.arraycopy(argv, 1, targs, 0, targs.length); + argv = targs; + } + if (argv != null && argv.length > 0) + { + List sps = new SequenceFetcher(withDas) + .getSourceProxy(argv[0]); + + if (sps != null) + { + for (DbSourceProxy sp : sps) + { + AlignmentI al = null; + try + { + al = sp.getSequenceRecords(argv.length > 1 ? argv[1] : sp + .getTestQuery()); + } catch (Exception e) + { + e.printStackTrace(); + System.err.println("Error when retrieving " + + (argv.length > 1 ? argv[1] : sp.getTestQuery()) + + " from " + argv[0] + "\nUsage: " + usage); + } + SequenceI[] prod = al.getSequencesArray(); + if (al != null) + { + for (int p = 0; p < prod.length; p++) + { + System.out.println("Prod " + p + ": " + + prod[p].getDisplayId(true) + " : " + + prod[p].getDescription()); + } + } + } + return; + } + else + { + System.err.println("Can't resolve " + argv[0] + + " as a database name. Allowed values are :\n" + + new SequenceFetcher().getSupportedDb()); + } + System.out.println(usage); + return; + } + ASequenceFetcher sfetcher = new SequenceFetcher(withDas); + String[] dbSources = sfetcher.getSupportedDb(); + for (int dbsource = 0; dbsource < dbSources.length; dbsource++) + { + String db = dbSources[dbsource]; + // skip me + if (db.equals(DBRefSource.PDB)) + { + continue; + } + for (DbSourceProxy sp : sfetcher.getSourceProxy(db)) + { + System.out.println("Source: " + sp.getDbName() + " (" + db + + "): retrieving test:" + sp.getTestQuery()); + AlignmentI al = null; + try + { + al = sp.getSequenceRecords(sp.getTestQuery()); + if (al != null && al.getHeight() > 0) + { + boolean dna = sp.isDnaCoding(); + // try and find products + String types[] = jalview.analysis.CrossRef + .findSequenceXrefTypes(dna, al.getSequencesArray()); + if (types != null) + { + System.out.println("Xref Types for: " + + (dna ? "dna" : "prot")); + for (int t = 0; t < types.length; t++) + { + System.out.println("Type: " + types[t]); + SequenceI[] prod = jalview.analysis.CrossRef + .findXrefSequences(al.getSequencesArray(), dna, + types[t]).getSequencesArray(); + System.out.println("Found " + + ((prod == null) ? "no" : "" + prod.length) + + " products"); + if (prod != null) + { + for (int p = 0; p < prod.length; p++) + { + System.out.println("Prod " + p + ": " + + prod[p].getDisplayId(true)); + } + } + } + } + else + { + noProds.addElement((dna ? new Object[] { al, al } + : new Object[] { al })); + } + + } + } catch (Exception ex) + { + System.out.println("ERROR:Failed to retrieve test query."); + ex.printStackTrace(System.out); + } + + if (al == null) + { + System.out.println("ERROR:No alignment retrieved."); + StringBuffer raw = sp.getRawRecords(); + if (raw != null) + { + System.out.println(raw.toString()); + } + else + { + System.out.println("ERROR:No Raw results."); + } + } + else + { + System.out.println("Retrieved " + al.getHeight() + " sequences."); + for (int s = 0; s < al.getHeight(); s++) + { + SequenceI sq = al.getSequenceAt(s); + while (sq.getDatasetSequence() != null) + { + sq = sq.getDatasetSequence(); + + } + if (ds == null) + { + ds = new Alignment(new SequenceI[] { sq }); + + } + else + { + ds.addSequence(sq); + } + } + } + System.out.flush(); + System.err.flush(); + + } + if (noProds.size() > 0) + { + Enumeration ts = noProds.elements(); + while (ts.hasMoreElements()) + + { + Object[] typeSq = (Object[]) ts.nextElement(); + boolean dna = (typeSq.length > 1); + AlignmentI al = (AlignmentI) typeSq[0]; + System.out.println("Trying getProducts for " + + al.getSequenceAt(0).getDisplayId(true)); + System.out.println("Search DS Xref for: " + + (dna ? "dna" : "prot")); + // have a bash at finding the products amongst all the retrieved + // sequences. + SequenceI[] seqs = al.getSequencesArray(); + Alignment prodal = jalview.analysis.CrossRef.findXrefSequences( + seqs, dna, null, ds); + System.out.println("Found " + + ((prodal == null) ? "no" : "" + prodal.getHeight()) + + " products"); + if (prodal != null) + { + SequenceI[] prod = prodal.getSequencesArray(); // note + // should + // test + // rather + // than + // throw + // away + // codon + // mapping + // (if + // present) + for (int p = 0; p < prod.length; p++) + { + System.out.println("Prod " + p + ": " + + prod[p].getDisplayId(true)); + } + } + } + + } + + } + } + +} -- 1.7.10.2