public static final String[] PROTEINSTR = { PDB };
public static final String[] DOMAINDBS = { PFAM, RFAM };
-
- /**
- * set of unique DBRefSource property constants. These could be used to
- * reconstruct the above groupings
- */
- public static final Object SEQDB = "SQ";
-
- /**
- * database of nucleic acid sequences
- */
- public static final Object DNASEQDB = "NASQ";
-
- /**
- * database of amino acid sequences
- */
- public static final Object PROTSEQDB = "PROTSQ";
-
- /**
- * database of cDNA sequences
- */
- public static final Object CODINGSEQDB = "CODING";
-
- /**
- * database of na sequences with exon annotation
- */
- public static final Object DNACODINGSEQDB = "XONCODING";
-
- /**
- * DB returns several sequences associated with a protein/nucleotide domain
- */
- public static final Object DOMAINDB = "DOMAIN";
-
- /**
- * DB query can take multiple accession codes concatenated by a separator.
- * Value of property indicates maximum number of accession codes to send at a
- * time.
- */
- public static final Object MULTIACC = "MULTIACC";
-
- /**
- * DB query returns an alignment for each accession provided.
- */
- public static final Object ALIGNMENTDB = "ALIGNMENTS";
}
public class EnsemblCdna extends EnsemblSeqProxy
{
- public EnsemblCdna() throws Exception
+ public EnsemblCdna()
{
super();
}
public class EnsemblCds extends EnsemblSeqProxy
{
- public EnsemblCds() throws Exception
+ public EnsemblCds()
{
super();
}
public class EnsemblGenome extends EnsemblSeqProxy
{
- public EnsemblGenome() throws Exception
+ public EnsemblGenome()
{
super();
}
public class EnsemblProtein extends EnsemblSeqProxy
{
- public EnsemblProtein() throws Exception
+ public EnsemblProtein()
{
super();
}
return EnsemblSeqType.PROTEIN;
}
+ @Override
+ public boolean isDnaCoding()
+ {
+ return false;
+ }
+
+ /**
+ * Test query is to the protein translation of transcript ENST00000288602
+ */
+ @Override
+ public String getTestQuery()
+ {
+ return "ENSP00000288602";
+ }
+
}
package jalview.ext.ensembl;
+import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentI;
-import jalview.datamodel.DBRefSource;
import jalview.datamodel.SequenceI;
import jalview.exceptions.JalviewException;
import jalview.ext.ensembl.SeqFetcher.EnsemblSeqType;
import jalview.io.FastaFile;
import jalview.io.FileParse;
import jalview.util.DBRefUtils;
-import jalview.ws.seqfetcher.DbSourceProxy;
import jalview.ws.seqfetcher.DbSourceProxyImpl;
import java.util.ArrayList;
import com.stevesoft.pat.Regex;
-public abstract class EnsemblSeqProxy extends DbSourceProxyImpl implements
- DbSourceProxy
+public abstract class EnsemblSeqProxy extends DbSourceProxyImpl
{
SeqFetcher sf;
- public EnsemblSeqProxy() throws Exception
+ public EnsemblSeqProxy()
{
sf = new SeqFetcher();
- addDbSourceProperty(DBRefSource.MULTIACC);
- addDbSourceProperty(DBRefSource.SEQDB);
- // decide whether these need to be filtered according to return type
- addDbSourceProperty(DBRefSource.PROTSEQDB);
- addDbSourceProperty(DBRefSource.DNACODINGSEQDB);
- addDbSourceProperty(DBRefSource.DNASEQDB);
}
@Override
return new Regex("((ENSP|ENST|ENSG|CCDS)[0-9.]{3,})");
}
+ /**
+ * Default test query is a transcript
+ */
@Override
public String getTestQuery()
{
- return "ENSP00000288602";
+ return "ENST00000288602";
}
@Override
List<String> tids, ids = new ArrayList<String>();
tids = Arrays.asList(queries.split(" +"));
AlignmentI rtn = null;
- for (int v = 0, vSize = tids.size(); v < vSize; v += 50)
+
+ /*
+ * execute queries, if necessary in batches of the
+ * maximum allowed number of ids
+ */
+ int maxQueryCount = getMaximumQueryCount();
+ for (int v = 0, vSize = tids.size(); v < vSize; v += maxQueryCount)
{
- int p = v + 50;
- if (p > vSize)
- {
- p = vSize;
- }
- ;
+ int p = Math.min(vSize, v + maxQueryCount);
ids = tids.subList(v, p);
try
{
}
if (fr.getSeqs().size() > 0)
{
- AlignmentI seqal = new jalview.datamodel.Alignment(
+ AlignmentI seqal = new Alignment(
fr.getSeqsAsArray());
for (SequenceI sq:seqal.getSequences())
{
{
return 0;
}
+
+ /**
+ * A sequence/id POST request currently allows up to 50 queries
+ *
+ * @see http://rest.ensembl.org/documentation/info/sequence_id_post
+ */
+ @Override
+ public int getMaximumQueryCount()
+ {
+ return 50;
+ }
+
+ @Override
+ public boolean isDnaCoding()
+ {
+ return true;
+ }
}
public class EnsemblTranscript extends EnsemblSeqProxy
{
- public EnsemblTranscript() throws Exception
+ // TODO is this class needed? it seems to duplicate EnsemblProtein
+ public EnsemblTranscript()
{
super();
}
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
-import java.util.Collections;
+import java.util.ArrayList;
import java.util.List;
import org.apache.http.NameValuePair;
{
private final static String ENSEMBL_REST = "rest.ensembl.org";
- private static boolean ensemblRestavailable = false;
+ private static final String SEQUENCE_ID_URL = "http://" + ENSEMBL_REST + "/sequence/id";
- private static long lastCheck = -1;
+ private static final String PING_URL = "http://" + ENSEMBL_REST + "/info/ping";
- public boolean isEnsemblAvailable()
- {
- if (isTesting || !ensemblRestavailable
- || System.currentTimeMillis() - lastCheck > 10000)
- {
- checkEnsembl();
- lastCheck = System.currentTimeMillis();
- }
- return ensemblRestavailable;
- }
-
- private boolean isTesting, testEnsemblStatus;
+ private final static long RETEST_INTERVAL = 10000L; // 10 seconds
- /**
- * @return the isTesting
- */
- public boolean isTesting()
- {
- return isTesting;
- }
+ private static boolean ensemblRestAvailable = false;
- /**
- * @param isTesting
- * the isTesting to set
- */
- public void setTesting(boolean isTesting)
- {
- this.isTesting = isTesting;
- }
+ private static long lastCheck = -1;
/**
- * @return the testEnsemblStatus
+ * Rechecks if Ensembl is responding, unless the last check was successful and
+ * the retest interval has not yet elapsed. Returns true if Ensembl is up,
+ * else false.
+ *
+ * @return
*/
- public boolean isTestEnsemblStatus()
+ public boolean isEnsemblAvailable()
{
- return testEnsemblStatus;
+ long now = System.currentTimeMillis();
+ boolean retest = now - lastCheck > RETEST_INTERVAL;
+ if (ensemblRestAvailable && !retest)
+ {
+ return true;
+ }
+ ensemblRestAvailable = checkEnsembl();
+ lastCheck = now;
+ return ensemblRestAvailable;
}
/**
- * @param testEnsemblStatus
- * the testEnsemblStatus to set
+ * Tries to connect to Ensembl's REST 'ping' endpoint, and returns true if
+ * successful, else false
+ *
+ * @return
*/
- public void setTestEnsemblStatus(boolean testEnsemblStatus)
+ private boolean checkEnsembl()
{
- this.testEnsemblStatus = testEnsemblStatus;
- }
-
- private void checkEnsembl()
- {
- if (isTesting)
- {
- ensemblRestavailable = testEnsemblStatus;
- return;
- }
try
{
- URL ping = new URL("http://" + ENSEMBL_REST + "/info/ping");
- HttpURLConnection conn = (HttpURLConnection) (ping.openConnection());
- if (conn.getResponseCode() >= 200 && conn.getResponseCode() < 300)
+ URL ping = new URL(PING_URL);
+ HttpURLConnection conn = (HttpURLConnection) ping.openConnection();
+ int rc = conn.getResponseCode();
+ conn.disconnect();
+ if (rc >= 200 && rc < 300)
{
- ensemblRestavailable = true;
- return;
+ return true;
}
- } catch (Error err)
- {
- err.printStackTrace();
- } catch (Exception exx)
+ } catch (Throwable t)
{
- exx.printStackTrace();
+ System.err.println("Error connecting to " + PING_URL + ": "
+ + t.getMessage());
}
- ensemblRestavailable = false;
+ return false;
}
public SeqFetcher()
}
/**
- * reolve request type as an argument for sequence and features queries
+ * Returns a list of additional URL query parameters to specify the desired
+ * sequence type (genomic/cds/protein etc), and data format Fasta
*
* @param type
*/
- public List<NameValuePair> getObjectTypeArg(EnsemblSeqType type)
+ public List<NameValuePair> getAdditionalParameters(EnsemblSeqType type)
{
- NameValuePair nameValue = new BasicNameValuePair("type", type.getType());
- return Collections.singletonList(nameValue);
+ List<NameValuePair> params = new ArrayList<NameValuePair>();
+ params.add(new BasicNameValuePair("type", type.getType()));
+ params.add(new BasicNameValuePair("content-type", "text/x-fasta"));
+ return params;
}
/**
public FileParse getSequenceReader(EnsemblSeqType returnType,
List<String> ids) throws IOException
{
+ // see http://rest.ensembl.org/documentation/info/sequence_id
- // adapted From the rest.ensembl.org documentation for sequence_id
-
- String urls = "http://" + ENSEMBL_REST + "/sequence/id";
- List<NameValuePair> vals = getObjectTypeArg(returnType);
- boolean f = true;
+ String urlstring = SEQUENCE_ID_URL;
+ List<NameValuePair> vals = getAdditionalParameters(returnType);
+ boolean first = true;
for (NameValuePair nvp : vals)
{
- if (f)
- {
- f = false;
- urls += "?";
- }
- else
- {
- urls += "&";
- }
- urls += nvp.getName() + "=" + nvp.getValue();
+ urlstring += first ? "?" : "&";
+ first = false;
+ urlstring += nvp.getName() + "=" + nvp.getValue();
}
- URL url = new URL(urls);
+ URL url = new URL(urlstring);
URLConnection connection = url.openConnection();
HttpURLConnection httpConnection = (HttpURLConnection) connection;
{
StringBuilder postBody = new StringBuilder();
postBody.append("{\"ids\":[");
- boolean first = true;
+ first = true;
for (String id : ids)
{
- if (first)
- {
- first = false;
- }
- else
+ if (!first)
{
postBody.append(",");
}
+ first = false;
postBody.append("\"");
postBody.append(id.trim());
postBody.append("\"");
import jalview.datamodel.AlignmentI;
import jalview.datamodel.DBRefEntry;
-import jalview.datamodel.DBRefSource;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
import jalview.io.FormatAdapter;
Integer.valueOf(nqueries).toString(),
proxy.getDbName() }), Thread.currentThread()
.hashCode());
- isAliSource = proxy.isA(DBRefSource.ALIGNMENTDB);
+ isAliSource = proxy.isAlignmentSource();
if (proxy.getAccessionSeparator() == null)
{
while (en.hasNext())
DBRefEntry dbr = new DBRefEntry(), found[] = null;
dbr.setSource(proxy.getDbSource());
dbr.setVersion(null);
- String accId = DBRefUtils.processQueryToAccessionFor(proxy,
- q);
+ String accId = proxy.getAccessionIdFromQuery(q);
dbr.setAccessionId(accId);
boolean rfound = false;
for (int r = 0; r < rs.length; r++)
return ref;
}
- /**
- * Extract valid accession strings from a query string. Used by the
- * SequenceFetcher and DBRefFetcher to create valid accession strings from an
- * ID string for database sources with a Regex validation field.
- *
- * @param proxy
- * @param q
- * @return q if proxy.getAccessionValidator()==null, otherwise the matched
- * region or the first subgroup match from the matched region
- */
- public static String processQueryToAccessionFor(DbSourceProxy proxy,
- String q)
- {
- if (proxy.getAccessionValidator() != null)
- {
- Regex vgr = proxy.getAccessionValidator();
- vgr.search(q);
- if (vgr.numSubs() > 0)
- {
- return (vgr.stringMatched(1));
- }
- else
- {
- return (vgr.stringMatched());
- }
- }
- else
- {
- return (q);
- }
- }
-
}
/**
* DOCUMENT ME!
*/
+ @Override
public void run()
{
if (dbSources == null)
// TODO: introduce multithread multisource queries and logic to remove a
// query from other sources if any source for a database returns a
// record
- if (dbsource.getDbSourceProperties().containsKey(
- DBRefSource.MULTIACC))
- {
- maxqlen = ((Integer) dbsource.getDbSourceProperties().get(
- DBRefSource.MULTIACC)).intValue();
- }
- else
- {
- maxqlen = 1;
- }
+ maxqlen = dbsource.getMaximumQueryCount();
+
while (queries.size() > 0 || seqIndex < currSeqs.length)
{
if (queries.size() > 0)
*/
package jalview.ws;
-import jalview.datamodel.Alignment;
-import jalview.datamodel.AlignmentI;
-import jalview.datamodel.DBRefSource;
-import jalview.datamodel.SequenceI;
+import jalview.ext.ensembl.EnsemblCdna;
+import jalview.ext.ensembl.EnsemblCds;
+import jalview.ext.ensembl.EnsemblGenome;
+import jalview.ext.ensembl.EnsemblProtein;
+import jalview.ws.dbsources.EmblCdsSource;
+import jalview.ws.dbsources.EmblSource;
+import jalview.ws.dbsources.Pdb;
+import jalview.ws.dbsources.PfamFull;
+import jalview.ws.dbsources.PfamSeed;
+import jalview.ws.dbsources.RfamFull;
+import jalview.ws.dbsources.RfamSeed;
+import jalview.ws.dbsources.Uniprot;
+import jalview.ws.dbsources.UniprotName;
import jalview.ws.dbsources.das.api.jalviewSourceI;
import jalview.ws.seqfetcher.ASequenceFetcher;
import jalview.ws.seqfetcher.DbSourceProxy;
import java.util.ArrayList;
-import java.util.Enumeration;
import java.util.List;
-import java.util.Vector;
/**
* This is the the concrete implementation of the sequence retrieval interface
* and abstract class in jalview.ws.seqfetcher. This implements the run-time
- * discovery of sequence database clients, and provides a hardwired main for
- * testing all registered handlers.
+ * discovery of sequence database clientss.
*
*/
public class SequenceFetcher extends ASequenceFetcher
public SequenceFetcher(boolean addDas)
{
- addDBRefSourceImpl(jalview.ext.ensembl.EnsemblProtein.class);
- addDBRefSourceImpl(jalview.ext.ensembl.EnsemblTranscript.class);
- addDBRefSourceImpl(jalview.ext.ensembl.EnsemblCds.class);
- addDBRefSourceImpl(jalview.ext.ensembl.EnsemblGenome.class);
- addDBRefSourceImpl(jalview.ext.ensembl.EnsemblCdna.class);
-
- addDBRefSourceImpl(jalview.ws.dbsources.EmblSource.class);
- addDBRefSourceImpl(jalview.ws.dbsources.EmblCdsSouce.class);
- addDBRefSourceImpl(jalview.ws.dbsources.Uniprot.class);
- addDBRefSourceImpl(jalview.ws.dbsources.UnprotName.class);
- addDBRefSourceImpl(jalview.ws.dbsources.Pdb.class);
- addDBRefSourceImpl(jalview.ws.dbsources.PfamFull.class);
- addDBRefSourceImpl(jalview.ws.dbsources.PfamSeed.class);
+ addDBRefSourceImpl(EnsemblProtein.class);
+ // EnsemblTranscript would just replace EnsemblProtein as the proxy for
+ // { DbSource="ENSEMBL", DbName="ENSEMBL (Protein)" }
+ // addDBRefSourceImpl(EnsemblTranscript.class);
+ addDBRefSourceImpl(EnsemblCds.class);
+ addDBRefSourceImpl(EnsemblGenome.class);
+ addDBRefSourceImpl(EnsemblCdna.class);
+
+ addDBRefSourceImpl(EmblSource.class);
+ addDBRefSourceImpl(EmblCdsSource.class);
+ addDBRefSourceImpl(Uniprot.class);
+ addDBRefSourceImpl(UniprotName.class);
+ addDBRefSourceImpl(Pdb.class);
+ addDBRefSourceImpl(PfamFull.class);
+ addDBRefSourceImpl(PfamSeed.class);
// ensures Seed alignment is 'default' for PFAM
- addDBRefSourceImpl(jalview.ws.dbsources.RfamFull.class);
- addDBRefSourceImpl(jalview.ws.dbsources.RfamSeed.class);
+ addDBRefSourceImpl(RfamFull.class);
+ addDBRefSourceImpl(RfamSeed.class);
if (addDas)
{
registerDasSequenceSources();
{
// Skip the alignment databases for the moment - they're not useful for
// verifying a single sequence against its reference source
- if (dbs.isA(DBRefSource.ALIGNMENTDB))
+ if (dbs.isAlignmentSource())
{
skip = true;
}
}
/**
- * return plaintext databse list suitable for using in a GUI element
- */
- public String[] _getOrderedSupportedSources()
- {
- String[] srcs = this.getSupportedDb();
- ArrayList dassrc = new ArrayList(), nondas = new ArrayList();
- for (int i = 0; i < srcs.length; i++)
- {
- for (DbSourceProxy dbs : getSourceProxy(srcs[i]))
- {
- String nm = dbs.getDbName();
- if (getSourceProxy(srcs[i]) instanceof jalview.ws.dbsources.das.datamodel.DasSequenceSource)
- {
- if (nm.startsWith("das:"))
- {
- nm = nm.substring(4);
- }
- dassrc.add(new String[] { srcs[i], nm.toUpperCase() });
- }
- else
- {
- nondas.add(new String[] { srcs[i], nm.toUpperCase() });
- }
- }
- }
- Object[] sorted = nondas.toArray();
- String[] tosort = new String[sorted.length];
- nondas.clear();
- for (int j = 0; j < sorted.length; j++)
- {
- tosort[j] = ((String[]) sorted[j])[1];
- }
- jalview.util.QuickSort.sort(tosort, sorted);
- int i = 0;
- // construct array with all sources listed
- srcs = new String[sorted.length + dassrc.size()];
- for (int j = sorted.length - 1; j >= 0; j--, i++)
- {
- srcs[i] = ((String[]) sorted[j])[0];
- sorted[j] = null;
- }
-
- sorted = dassrc.toArray();
- tosort = new String[sorted.length];
- dassrc.clear();
- for (int j = 0; j < sorted.length; j++)
- {
- tosort[j] = ((String[]) sorted[j])[1];
- }
- jalview.util.QuickSort.sort(tosort, sorted);
- for (int j = sorted.length - 1; j >= 0; j--, i++)
- {
- srcs[i] = ((String[]) sorted[j])[0];
- sorted[j] = null;
- }
- return srcs;
- }
-
- /**
- * simple run method to test dbsources.
- *
- * @param argv
- */
- public static void main(String[] argv)
- {
- AlignmentI ds = null;
- Vector noProds = new Vector();
- String usage = "SequenceFetcher.main [-nodas] [<DBNAME> [<ACCNO>]]\n"
- + "With no arguments, all DbSources will be queried with their test Accession number.\n"
- + "With one argument, the argument will be resolved to one or more db sources and each will be queried with their test accession only.\n"
- + "If given two arguments, SequenceFetcher will try to find the DbFetcher corresponding to <DBNAME> and retrieve <ACCNO> from it.\n"
- + "The -nodas option will exclude DAS sources from the database fetchers Jalview will try to use.";
- boolean withDas = true;
- if (argv != null && argv.length > 0
- && argv[0].toLowerCase().startsWith("-nodas"))
- {
- withDas = false;
- String targs[] = new String[argv.length - 1];
- System.arraycopy(argv, 1, targs, 0, targs.length);
- argv = targs;
- }
- if (argv != null && argv.length > 0)
- {
- List<DbSourceProxy> sps = new SequenceFetcher(withDas)
- .getSourceProxy(argv[0]);
-
- if (sps != null)
- {
- for (DbSourceProxy sp : sps)
- {
- AlignmentI al = null;
- try
- {
- al = sp.getSequenceRecords(argv.length > 1 ? argv[1] : sp
- .getTestQuery());
- } catch (Exception e)
- {
- e.printStackTrace();
- System.err.println("Error when retrieving "
- + (argv.length > 1 ? argv[1] : sp.getTestQuery())
- + " from " + argv[0] + "\nUsage: " + usage);
- }
- SequenceI[] prod = al.getSequencesArray();
- if (al != null)
- {
- for (int p = 0; p < prod.length; p++)
- {
- System.out.println("Prod " + p + ": "
- + prod[p].getDisplayId(true) + " : "
- + prod[p].getDescription());
- }
- }
- }
- return;
- }
- else
- {
- System.err.println("Can't resolve " + argv[0]
- + " as a database name. Allowed values are :\n"
- + new SequenceFetcher().getSupportedDb());
- }
- System.out.println(usage);
- return;
- }
- ASequenceFetcher sfetcher = new SequenceFetcher(withDas);
- String[] dbSources = sfetcher.getSupportedDb();
- for (int dbsource = 0; dbsource < dbSources.length; dbsource++)
- {
- String db = dbSources[dbsource];
- // skip me
- if (db.equals(DBRefSource.PDB))
- {
- continue;
- }
- for (DbSourceProxy sp : sfetcher.getSourceProxy(db))
- {
- System.out.println("Source: " + sp.getDbName() + " (" + db
- + "): retrieving test:" + sp.getTestQuery());
- AlignmentI al = null;
- try
- {
- al = sp.getSequenceRecords(sp.getTestQuery());
- if (al != null && al.getHeight() > 0
- && sp.getDbSourceProperties() != null)
- {
- boolean dna = sp.getDbSourceProperties().containsKey(
- DBRefSource.DNACODINGSEQDB)
- || sp.getDbSourceProperties().containsKey(
- DBRefSource.DNASEQDB)
- || sp.getDbSourceProperties().containsKey(
- DBRefSource.CODINGSEQDB);
- // try and find products
- String types[] = jalview.analysis.CrossRef
- .findSequenceXrefTypes(dna, al.getSequencesArray());
- if (types != null)
- {
- System.out.println("Xref Types for: "
- + (dna ? "dna" : "prot"));
- for (int t = 0; t < types.length; t++)
- {
- System.out.println("Type: " + types[t]);
- SequenceI[] prod = jalview.analysis.CrossRef
- .findXrefSequences(al.getSequencesArray(), dna,
- types[t]).getSequencesArray();
- System.out.println("Found "
- + ((prod == null) ? "no" : "" + prod.length)
- + " products");
- if (prod != null)
- {
- for (int p = 0; p < prod.length; p++)
- {
- System.out.println("Prod " + p + ": "
- + prod[p].getDisplayId(true));
- }
- }
- }
- }
- else
- {
- noProds.addElement((dna ? new Object[] { al, al }
- : new Object[] { al }));
- }
-
- }
- } catch (Exception ex)
- {
- System.out.println("ERROR:Failed to retrieve test query.");
- ex.printStackTrace(System.out);
- }
-
- if (al == null)
- {
- System.out.println("ERROR:No alignment retrieved.");
- StringBuffer raw = sp.getRawRecords();
- if (raw != null)
- {
- System.out.println(raw.toString());
- }
- else
- {
- System.out.println("ERROR:No Raw results.");
- }
- }
- else
- {
- System.out.println("Retrieved " + al.getHeight() + " sequences.");
- for (int s = 0; s < al.getHeight(); s++)
- {
- SequenceI sq = al.getSequenceAt(s);
- while (sq.getDatasetSequence() != null)
- {
- sq = sq.getDatasetSequence();
-
- }
- if (ds == null)
- {
- ds = new Alignment(new SequenceI[] { sq });
-
- }
- else
- {
- ds.addSequence(sq);
- }
- }
- }
- System.out.flush();
- System.err.flush();
-
- }
- if (noProds.size() > 0)
- {
- Enumeration ts = noProds.elements();
- while (ts.hasMoreElements())
-
- {
- Object[] typeSq = (Object[]) ts.nextElement();
- boolean dna = (typeSq.length > 1);
- AlignmentI al = (AlignmentI) typeSq[0];
- System.out.println("Trying getProducts for "
- + al.getSequenceAt(0).getDisplayId(true));
- System.out.println("Search DS Xref for: "
- + (dna ? "dna" : "prot"));
- // have a bash at finding the products amongst all the retrieved
- // sequences.
- SequenceI[] seqs = al.getSequencesArray();
- Alignment prodal = jalview.analysis.CrossRef.findXrefSequences(
- seqs, dna, null, ds);
- System.out.println("Found "
- + ((prodal == null) ? "no" : "" + prodal.getHeight())
- + " products");
- if (prodal != null)
- {
- SequenceI[] prod = prodal.getSequencesArray(); // note
- // should
- // test
- // rather
- // than
- // throw
- // away
- // codon
- // mapping
- // (if
- // present)
- for (int p = 0; p < prod.length; p++)
- {
- System.out.println("Prod " + p + ": "
- + prod[p].getDisplayId(true));
- }
- }
- }
-
- }
-
- }
- }
-
- /**
* query the currently defined DAS source registry for sequence sources and
* add a DasSequenceSource instance for each source to the SequenceFetcher
* source list.
import jalview.datamodel.AlignmentI;
import jalview.datamodel.DBRefSource;
-import jalview.ws.seqfetcher.DbSourceProxy;
import com.stevesoft.pat.Regex;
-public class EmblCdsSouce extends EmblXmlSource implements DbSourceProxy
+public class EmblCdsSource extends EmblXmlSource
{
- public EmblCdsSouce()
+ public EmblCdsSource()
{
super();
- addDbSourceProperty(DBRefSource.CODINGSEQDB);
}
+ @Override
public String getAccessionSeparator()
{
return null;
}
+ @Override
public Regex getAccessionValidator()
{
- return new com.stevesoft.pat.Regex("^[A-Z]+[0-9]+");
+ return new Regex("^[A-Z]+[0-9]+");
}
+ @Override
public String getDbSource()
{
return DBRefSource.EMBLCDS;
}
+ @Override
public String getDbVersion()
{
return "0"; // TODO : this is dynamically set for a returned record - not
// tied to proxy
}
+ @Override
public AlignmentI getSequenceRecords(String queries) throws Exception
{
if (queries.indexOf(".") > -1)
return getEmblSequenceRecords(DBRefSource.EMBLCDS, queries);
}
+ @Override
public boolean isValidReference(String accession)
{
// most embl CDS refs look like ..
/**
* cDNA for LDHA_CHICK swissprot sequence
*/
+ @Override
public String getTestQuery()
{
return "CAA37824";
}
+ @Override
public String getDbName()
{
return "EMBL (CDS)";
import jalview.datamodel.AlignmentI;
import jalview.datamodel.DBRefSource;
-import jalview.ws.seqfetcher.DbSourceProxy;
import com.stevesoft.pat.Regex;
* @author JimP
*
*/
-public class EmblSource extends EmblXmlSource implements DbSourceProxy
+public class EmblSource extends EmblXmlSource
{
public EmblSource()
{
- addDbSourceProperty(DBRefSource.DNASEQDB);
- addDbSourceProperty(DBRefSource.CODINGSEQDB);
+ super();
}
/*
*
* @see jalview.ws.DbSourceProxy#getAccessionSeparator()
*/
+ @Override
public String getAccessionSeparator()
{
// TODO Auto-generated method stub
*
* @see jalview.ws.DbSourceProxy#getAccessionValidator()
*/
+ @Override
public Regex getAccessionValidator()
{
- return new com.stevesoft.pat.Regex("^[A-Z]+[0-9]+");
+ return new Regex("^[A-Z]+[0-9]+");
}
/*
*
* @see jalview.ws.DbSourceProxy#getDbSource()
*/
+ @Override
public String getDbSource()
{
return DBRefSource.EMBL;
*
* @see jalview.ws.DbSourceProxy#getDbVersion()
*/
+ @Override
public String getDbVersion()
{
// TODO Auto-generated method stub
*
* @see jalview.ws.DbSourceProxy#getSequenceRecords(java.lang.String[])
*/
+ @Override
public AlignmentI getSequenceRecords(String queries) throws Exception
{
return getEmblSequenceRecords(DBRefSource.EMBL, queries);
*
* @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String)
*/
+ @Override
public boolean isValidReference(String accession)
{
// most embl refs look like ..
/**
* return LHD_CHICK coding gene
*/
+ @Override
public String getTestQuery()
{
return "X53828";
}
+ @Override
public String getDbName()
{
return "EMBL"; // getDbSource();
return al;
}
+ @Override
+ public boolean isDnaCoding()
+ {
+ return true;
+ }
+
}
import jalview.datamodel.AlignmentI;
import jalview.datamodel.DBRefSource;
-import jalview.ws.seqfetcher.DbSourceProxy;
import com.stevesoft.pat.Regex;
* @author JimP
*
*/
-public class GeneDbSource extends EmblXmlSource implements DbSourceProxy
+public class GeneDbSource extends EmblXmlSource
{
public GeneDbSource()
{
- addDbSourceProperty(DBRefSource.DNASEQDB);
- addDbSourceProperty(DBRefSource.CODINGSEQDB);
+ super();
}
/*
*
* @see jalview.ws.DbSourceProxy#getAccessionSeparator()
*/
+ @Override
public String getAccessionSeparator()
{
// TODO Auto-generated method stub
*
* @see jalview.ws.DbSourceProxy#getAccessionValidator()
*/
+ @Override
public Regex getAccessionValidator()
{
// TODO Auto-generated method stub
*
* @see jalview.ws.DbSourceProxy#getDbSource()
*/
+ @Override
public String getDbSource()
{
return DBRefSource.GENEDB;
*
* @see jalview.ws.DbSourceProxy#getDbVersion()
*/
+ @Override
public String getDbVersion()
{
// TODO Auto-generated method stub
*
* @see jalview.ws.DbSourceProxy#getSequenceRecords(java.lang.String[])
*/
+ @Override
public AlignmentI getSequenceRecords(String queries) throws Exception
{
// query of form
*
* @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String)
*/
+ @Override
public boolean isValidReference(String accession)
{
// TODO Auto-generated method stub
/**
* return T.Brucei Mannosyl-Transferase TbPIG-M
*/
+ @Override
public String getTestQuery()
{
return "Tb927.6.3300";
}
+ @Override
public String getDbName()
{
return "GeneDB"; // getDbSource();
import jalview.io.FormatAdapter;
import jalview.util.MessageManager;
import jalview.ws.ebi.EBIFetchClient;
-import jalview.ws.seqfetcher.DbSourceProxy;
import java.util.ArrayList;
import java.util.List;
* @author JimP
*
*/
-public class Pdb extends EbiFileRetrievedProxy implements DbSourceProxy
+public class Pdb extends EbiFileRetrievedProxy
{
public Pdb()
{
super();
- addDbSourceProperty(DBRefSource.PROTSEQDB);
}
/*
*
* @see jalview.ws.DbSourceProxy#getAccessionSeparator()
*/
+ @Override
public String getAccessionSeparator()
{
// TODO Auto-generated method stub
*
* @see jalview.ws.DbSourceProxy#getAccessionValidator()
*/
+ @Override
public Regex getAccessionValidator()
{
return new Regex("([1-9][0-9A-Za-z]{3}):?([ _A-Za-z0-9]?)");
*
* @see jalview.ws.DbSourceProxy#getDbSource()
*/
+ @Override
public String getDbSource()
{
return DBRefSource.PDB;
*
* @see jalview.ws.DbSourceProxy#getDbVersion()
*/
+ @Override
public String getDbVersion()
{
return "0";
*
* @see jalview.ws.DbSourceProxy#getSequenceRecords(java.lang.String[])
*/
+ @Override
public AlignmentI getSequenceRecords(String queries) throws Exception
{
AlignmentI pdbfile = null;
*
* @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String)
*/
+ @Override
public boolean isValidReference(String accession)
{
Regex r = getAccessionValidator();
/**
* obtain human glyoxalase chain A sequence
*/
+ @Override
public String getTestQuery()
{
return "1QIPA";
}
+ @Override
public String getDbName()
{
return "PDB"; // getDbSource();
import jalview.datamodel.AlignmentI;
import jalview.datamodel.DBRefEntry;
-import jalview.ws.seqfetcher.DbSourceProxy;
+import jalview.datamodel.DBRefSource;
+import jalview.io.FormatAdapter;
import com.stevesoft.pat.Regex;
* @author JimP
*
*/
-abstract public class Pfam extends Xfam implements DbSourceProxy
+abstract public class Pfam extends Xfam
{
public Pfam()
{
super();
- // all extensions of this PFAM source base class are DOMAINDB sources
- addDbSourceProperty(jalview.datamodel.DBRefSource.DOMAINDB);
- addDbSourceProperty(jalview.datamodel.DBRefSource.ALIGNMENTDB);
}
/*
*
* @see jalview.ws.DbSourceProxy#getAccessionSeparator()
*/
+ @Override
public String getAccessionSeparator()
{
// TODO Auto-generated method stub
*
* @see jalview.ws.DbSourceProxy#getAccessionValidator()
*/
+ @Override
public Regex getAccessionValidator()
{
// TODO Auto-generated method stub
*
* @see jalview.ws.DbSourceProxy#getSequenceRecords(java.lang.String[])
*/
+ @Override
public AlignmentI getSequenceRecords(String queries) throws Exception
{
// TODO: this is not a perfect implementation. We need to be able to add
// individual references to each sequence in each family alignment that's
// retrieved.
startQuery();
- AlignmentI rcds = new jalview.io.FormatAdapter().readFile(getXFAMURL()
+ AlignmentI rcds = new FormatAdapter().readFile(getXFAMURL()
+ queries.trim().toUpperCase(), jalview.io.FormatAdapter.URL,
"STH");
for (int s = 0, sNum = rcds.getHeight(); s < sNum; s++)
{
rcds.getSequenceAt(s).addDBRef(
- new DBRefEntry(jalview.datamodel.DBRefSource.PFAM,
+new DBRefEntry(DBRefSource.PFAM,
// getDbSource(),
getDbVersion(), queries.trim().toUpperCase()));
- if (!getDbSource().equals(jalview.datamodel.DBRefSource.PFAM))
+ if (!getDbSource().equals(DBRefSource.PFAM))
{ // add the specific ref too
rcds.getSequenceAt(s).addDBRef(
new DBRefEntry(getDbSource(), getDbVersion(), queries
*
* @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String)
*/
+ @Override
public boolean isValidReference(String accession)
{
return accession.indexOf("PF") == 0;
* public String getDbName() { return "PFAM"; // getDbSource(); }
*/
+ @Override
public String getXfamSource()
{
- return jalview.datamodel.DBRefSource.PFAM;
+ return DBRefSource.PFAM;
}
}
*/
package jalview.ws.dbsources;
-import jalview.ws.seqfetcher.DbSourceProxy;
/**
* flyweight class specifying retrieval of Full family alignments from PFAM
*
*/
-public class PfamFull extends Pfam implements DbSourceProxy
+public class PfamFull extends Pfam
{
public PfamFull()
{
*
* @see jalview.ws.dbsources.Pfam#getPFAMURL()
*/
+ @Override
protected String getXFAMURL()
{
return "http://pfam.sanger.ac.uk/family/alignment/download/format?alnType=full&format=stockholm&order=t&case=l&gaps=default&entry=";
*
* @see jalview.ws.seqfetcher.DbSourceProxy#getDbName()
*/
+ @Override
public String getDbName()
{
return "PFAM (Full)";
}
+ @Override
public String getDbSource()
{
return getDbName(); // so we have unique DbSource string.
}
+ @Override
public String getTestQuery()
{
return "PF03760";
}
+ @Override
public String getDbVersion()
{
return null;
*/
package jalview.ws.dbsources;
-import jalview.ws.seqfetcher.DbSourceProxy;
/**
* flyweight class specifying retrieval of Seed alignments from PFAM
* @author JimP
*
*/
-public class PfamSeed extends Pfam implements DbSourceProxy
+public class PfamSeed extends Pfam
{
public PfamSeed()
{
*
* @see jalview.ws.dbsources.Pfam#getPFAMURL()
*/
+ @Override
protected String getXFAMURL()
{
return "http://pfam.sanger.ac.uk/family/alignment/download/format?alnType=seed&format=stockholm&order=t&case=l&gaps=default&entry=";
*
* @see jalview.ws.seqfetcher.DbSourceProxy#getDbName()
*/
+ @Override
public String getDbName()
{
return "PFAM (Seed)";
}
+ @Override
public String getDbSource()
{
return jalview.datamodel.DBRefSource.PFAM; // archetype source
}
+ @Override
public String getTestQuery()
{
return "PF03760";
*/
package jalview.ws.dbsources;
-import jalview.ws.seqfetcher.DbSourceProxy;
+import jalview.datamodel.DBRefSource;
import com.stevesoft.pat.Regex;
*
* @author Lauren Michelle Lui
*/
-abstract public class Rfam extends Xfam implements DbSourceProxy
+abstract public class Rfam extends Xfam
{
public Rfam()
{
super();
- // all extensions of this RFAM source base class are DOMAINDB sources
- addDbSourceProperty(jalview.datamodel.DBRefSource.DOMAINDB);
- addDbSourceProperty(jalview.datamodel.DBRefSource.ALIGNMENTDB);
}
/*
* @see jalview.ws.DbSourceProxy#getAccessionSeparator() Left here for
* consistency with Pfam class
*/
+ @Override
public String getAccessionSeparator()
{
// TODO Auto-generated method stub
*
* @see jalview.ws.DbSourceProxy#getAccessionValidator() * Left here for
*/
+ @Override
public Regex getAccessionValidator()
{
// TODO Auto-generated method stub
*
* @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String)
*/
+ @Override
public boolean isValidReference(String accession)
{
return accession.indexOf("RF") == 0;
*
* @see jalview.ws.dbsources.Xfam#getXfamSource()
*/
+ @Override
public String getXfamSource()
{
- return jalview.datamodel.DBRefSource.RFAM;
+ return DBRefSource.RFAM;
}
}
*/
package jalview.ws.dbsources;
-import jalview.ws.seqfetcher.DbSourceProxy;
/**
* Flyweight class specifying retrieval of Full family alignments from RFAM
* @author Lauren Michelle Lui
*
*/
-public class RfamFull extends Rfam implements DbSourceProxy
+public class RfamFull extends Rfam
{
public RfamFull()
{
*
* @see jalview.ws.dbsources.Rfam#getXFAMURL()
*/
+ @Override
protected String getXFAMURL()
{
return "http://rfam.sanger.ac.uk/family/alignment/download/format?alnType=full&nseLabels=0&format=stockholm&acc=";
*
* @see jalview.ws.seqfetcher.DbSourceProxy#getDbName()
*/
+ @Override
public String getDbName()
{
return "RFAM (Full)";
}
+ @Override
public String getDbSource()
{
return getDbName(); // so we have unique DbSource string.
}
+ @Override
public String getTestQuery()
{
// Can be retrieved from http://rfam.janelia.org/cgi-bin/getdesc?acc=RF00014
return "RF00014";
}
+ @Override
public String getDbVersion()
{
return null;
*/
package jalview.ws.dbsources;
-import jalview.ws.seqfetcher.DbSourceProxy;
/**
* Flyweight class specifying retrieval of Seed family alignments from RFAM
* @author Lauren Michelle Lui
*
*/
-public class RfamSeed extends Rfam implements DbSourceProxy
+public class RfamSeed extends Rfam
{
public RfamSeed()
{
*
* @see jalview.ws.dbsources.Rfam#getRFAMURL()
*/
+ @Override
protected String getXFAMURL()
{
return "http://rfam.sanger.ac.uk/family/alignment/download/format?alnType=seed&nseLabels=0&format=stockholm&acc=";
*
* @see jalview.ws.seqfetcher.DbSourceProxy#getDbName()
*/
+ @Override
public String getDbName()
{
return "RFAM (Seed)";
}
+ @Override
public String getDbSource()
{
return getDbName(); // so we have unique DbSource string.
}
+ @Override
public String getTestQuery()
{
return "RF00014";
} // http://rfam.janelia.org/cgi-bin/getdesc?acc=RF00014
+ @Override
public String getDbVersion()
{
return null;
import jalview.datamodel.UniprotEntry;
import jalview.datamodel.UniprotFile;
import jalview.ws.ebi.EBIFetchClient;
-import jalview.ws.seqfetcher.DbSourceProxy;
import jalview.ws.seqfetcher.DbSourceProxyImpl;
import java.io.File;
import java.io.FileReader;
import java.io.Reader;
+import java.net.URL;
import java.util.Vector;
+import org.exolab.castor.mapping.Mapping;
import org.exolab.castor.xml.Unmarshaller;
import com.stevesoft.pat.Regex;
* @author JimP
*
*/
-public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy
+public class Uniprot extends DbSourceProxyImpl
{
private static final String BAR_DELIMITER = "|";
private static final String NEWLINE = "\n";
- private static org.exolab.castor.mapping.Mapping map;
+ private static Mapping map;
/**
* Constructor
public Uniprot()
{
super();
- addDbSourceProperty(DBRefSource.SEQDB, DBRefSource.SEQDB);
- addDbSourceProperty(DBRefSource.PROTSEQDB);
- // addDbSourceProperty(DBRefSource.MULTIACC, new Integer(50));
}
/*
*
* @see jalview.ws.DbSourceProxy#getAccessionSeparator()
*/
+ @Override
public String getAccessionSeparator()
{
return null; // ";";
*
* @see jalview.ws.DbSourceProxy#getAccessionValidator()
*/
+ @Override
public Regex getAccessionValidator()
{
return new Regex("([A-Z]+[0-9]+[A-Z0-9]+|[A-Z0-9]+_[A-Z0-9]+)");
*
* @see jalview.ws.DbSourceProxy#getDbSource()
*/
+ @Override
public String getDbSource()
{
return DBRefSource.UNIPROT;
*
* @see jalview.ws.DbSourceProxy#getDbVersion()
*/
+ @Override
public String getDbVersion()
{
return "0"; // we really don't know what version we're on.
if (map == null)
{
// 1. Load the mapping information from the file
- map = new org.exolab.castor.mapping.Mapping(uni.getClass()
- .getClassLoader());
- java.net.URL url = getClass().getResource("/uniprot_mapping.xml");
+ map = new Mapping(uni.getClass().getClassLoader());
+ URL url = getClass().getResource("/uniprot_mapping.xml");
map.loadMapping(url);
}
*
* @see jalview.ws.DbSourceProxy#getSequenceRecords(java.lang.String[])
*/
+ @Override
public AlignmentI getSequenceRecords(String queries) throws Exception
{
startQuery();
*
* @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String)
*/
+ @Override
public boolean isValidReference(String accession)
{
// TODO: make the following a standard validator
/**
* return LDHA_CHICK uniprot entry
*/
+ @Override
public String getTestQuery()
{
return "P00340";
}
+ @Override
public String getDbName()
{
return "Uniprot"; // getDbSource();
{
return 0;
}
+
+ @Override
+ public int getMaximumQueryCount()
+ {
+ // relocated this commented out code...
+ // addDbSourceProperty(DBRefSource.MULTIACC, new Integer(50));
+ // return 50;
+ return super.getMaximumQueryCount();
+ }
}
*/
package jalview.ws.dbsources;
+import jalview.datamodel.DBRefSource;
+
/**
* Canonical Uniprot fetcher instance specifically retrieving UP_NAME
* references.
* @author JimP
*
*/
-public class UnprotName extends Uniprot implements
- jalview.ws.seqfetcher.DbSourceProxy
+public class UniprotName extends Uniprot
{
/*
*
* @see jalview.ws.dbsources.Uniprot#getDbSource()
*/
+ @Override
public String getDbSource()
{
- return jalview.datamodel.DBRefSource.UP_NAME;
+ return DBRefSource.UP_NAME;
}
}
import jalview.datamodel.AlignmentI;
import jalview.datamodel.DBRefEntry;
+import jalview.io.FormatAdapter;
import jalview.ws.seqfetcher.DbSourceProxyImpl;
/**
protected abstract String getXFAMURL();
+ @Override
public abstract String getDbVersion();
abstract String getXfamSource();
+ @Override
public AlignmentI getSequenceRecords(String queries) throws Exception
{
// TODO: this is not a perfect implementation. We need to be able to add
// retrieved.
startQuery();
// TODO: trap HTTP 404 exceptions and return null
- AlignmentI rcds = new jalview.io.FormatAdapter().readFile(getXFAMURL()
- + queries.trim().toUpperCase(), jalview.io.FormatAdapter.URL,
- "STH");
+ AlignmentI rcds = new FormatAdapter().readFile(getXFAMURL()
+ + queries.trim().toUpperCase(), FormatAdapter.URL, "STH");
for (int s = 0, sNum = rcds.getHeight(); s < sNum; s++)
{
rcds.getSequenceAt(s).addDBRef(new DBRefEntry(getXfamSource(),
return rcds;
}
+ /**
+ * Pfam and Rfam provide alignments
+ */
+ @Override
+ public boolean isAlignmentSource()
+ {
+ return true;
+ }
+
}
*/
package jalview.ws.seqfetcher;
+import jalview.bin.Cache;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.DBRefEntry;
import jalview.datamodel.SequenceI;
import jalview.util.DBRefUtils;
import jalview.util.MessageManager;
+import jalview.util.QuickSort;
import java.util.ArrayList;
import java.util.Enumeration;
return null;
}
String[] sf = new String[FETCHABLEDBS.size()];
- Enumeration e = FETCHABLEDBS.keys();
+ Enumeration<String> e = FETCHABLEDBS.keys();
int i = 0;
while (e.hasMoreElements())
{
- sf[i++] = (String) e.nextElement();
+ sf[i++] = e.nextElement();
}
;
return sf;
public boolean isFetchable(String source)
{
- Enumeration e = FETCHABLEDBS.keys();
+ Enumeration<String> e = FETCHABLEDBS.keys();
while (e.hasMoreElements())
{
- String db = (String) e.nextElement();
+ String db = e.nextElement();
if (source.compareToIgnoreCase(db) == 0)
{
return true;
}
}
- jalview.bin.Cache.log.warn("isFetchable doesn't know about '" + source
+ Cache.log.warn("isFetchable doesn't know about '" + source
+ "'");
return false;
}
- public SequenceI[] getSequences(jalview.datamodel.DBRefEntry[] refs)
+ public SequenceI[] getSequences(DBRefEntry[] refs)
{
SequenceI[] ret = null;
- Vector<SequenceI> rseqs = new Vector();
- Hashtable<String, List<String>> queries = new Hashtable();
+ Vector<SequenceI> rseqs = new Vector<SequenceI>();
+ Hashtable<String, List<String>> queries = new Hashtable<String, List<String>>();
for (int r = 0; r < refs.length; r++)
{
if (!queries.containsKey(refs[r].getSource()))
while (fetchers.hasNext())
{
List<String> queriesMade = new ArrayList<String>();
- HashSet queriesFound = new HashSet<String>();
+ HashSet<String> queriesFound = new HashSet<String>();
try
{
DbSourceProxy fetcher = fetchers.next();
- boolean doMultiple = fetcher.getAccessionSeparator() != null; // No
- // separator
- // - no
- // Multiple
- // Queries
+ boolean doMultiple = fetcher.getAccessionSeparator() != null;
+ // No separator - no Multiple Queries
while (!queriesLeft.isEmpty())
{
StringBuffer qsb = new StringBuffer();
if (rseqs.size() > 0)
{
ret = new SequenceI[rseqs.size()];
- Enumeration sqs = rseqs.elements();
+ Enumeration<SequenceI> sqs = rseqs.elements();
int si = 0;
while (sqs.hasMoreElements())
{
- SequenceI s = (SequenceI) sqs.nextElement();
+ SequenceI s = sqs.nextElement();
ret[si++] = s;
s.updatePDBIds();
}
{
nm[i++] = "" + s.getTier() + s.getDbName().toLowerCase();
}
- jalview.util.QuickSort.sort(nm, l);
+ QuickSort.sort(nm, l);
dbs = new ArrayList<DbSourceProxy>();
for (i = l.length - 1; i >= 0; i--)
{
}
/**
- * constructs and instance of the proxy and registers it as a valid
- * dbrefsource
+ * constructs an instance of the proxy and registers it as a valid dbrefsource
*
* @param dbSourceProxy
* reference for class implementing
*/
protected void addDBRefSourceImpl(
Class<? extends DbSourceProxy> dbSourceProxy)
- throws java.lang.IllegalArgumentException
+ throws IllegalArgumentException
{
DbSourceProxy proxy = null;
try
}
/**
- * test if the database handler for dbName contains the given dbProperty when
- * a dbName resolves to a set of proxies - this method will return the result
- * of the test for the first instance. TODO implement additional method to
- * query all sources for a db to find one with a particular property
- *
- * @param dbName
- * @param dbProperty
- * @return true if proxy has the given property
- */
- public boolean hasDbSourceProperty(String dbName, String dbProperty)
- {
- // TODO: decide if invalidDbName exception is thrown here.
-
- List<DbSourceProxy> proxies = getSourceProxy(dbName);
- if (proxies != null)
- {
- for (DbSourceProxy proxy : proxies)
- {
- if (proxy.getDbSourceProperties() != null)
- {
- return proxy.getDbSourceProperties().containsKey(dbProperty);
- }
- }
- }
- return false;
- }
-
- /**
* select sources which are implemented by instances of the given class
*
* @param class that implements DbSourceProxy
*/
public String[] getDbInstances(Class class1)
{
- if (!jalview.ws.seqfetcher.DbSourceProxy.class.isAssignableFrom(class1))
+ if (!DbSourceProxy.class.isAssignableFrom(class1))
{
throw new Error(
MessageManager
return null;
}
String[] sources = null;
- Vector src = new Vector();
- Enumeration dbs = FETCHABLEDBS.keys();
+ Vector<String> src = new Vector<String>();
+ Enumeration<String> dbs = FETCHABLEDBS.keys();
while (dbs.hasMoreElements())
{
- String dbn = (String) dbs.nextElement();
+ String dbn = dbs.nextElement();
for (DbSourceProxy dbp : FETCHABLEDBS.get(dbn).values())
{
if (class1.isAssignableFrom(dbp.getClass()))
public DbSourceProxy[] getDbSourceProxyInstances(Class class1)
{
- ArrayList<DbSourceProxy> prlist = new ArrayList<DbSourceProxy>();
+ List<DbSourceProxy> prlist = new ArrayList<DbSourceProxy>();
for (String fetchable : getSupportedDb())
{
for (DbSourceProxy pr : getSourceProxy(fetchable))
public Regex getAccessionValidator();
/**
- * DbSource properties hash - define the capabilities of this source Property
- * hash methods defined in DbSourceProxyImpl. See constants in
- * jalview.datamodel.DBRefSource for definition of properties.
- *
- * @return
- */
- public Hashtable getDbSourceProperties();
-
- /**
*
* @return a test/example query that can be used to validate retrieval and
* parsing mechanisms
public StringBuffer getRawRecords();
/**
- * Find out more info about the source.
+ * Tier for this data source
*
- * @param dbsourceproperty
- * - one of the database reference source properties in
- * jalview.datamodel.DBRefSource
- * @return true if the source has this property
+ * @return 0 - primary datasource, 1 - das primary source, 2 - secondary
*/
- public boolean isA(Object dbsourceproperty);
+ public int getTier();
/**
- * Tier for this data source
+ * Extracts valid accession strings from a query string. If there is an
+ * accession id validator, returns the the matched region or the first
+ * subgroup match from the matched region; else just returns the whole query.
*
- * @return 0 - primary datasource, 1 - das primary source, 2 - secondary
+ * @param query
+ * @return
*/
- public int getTier();
-}
+ String getAccessionIdFromQuery(String query);
+
+ /**
+ * Returns the maximum number of accession ids that can be queried in one
+ * request.
+ *
+ * @return
+ */
+ public int getMaximumQueryCount();
+
+ /**
+ * Returns true if the source may provide coding DNA i.e. sequences with
+ * implicit peptide products
+ *
+ * @return
+ */
+ public boolean isDnaCoding();
+
+ /**
+ * Answers true if the database is a source of alignments (for example, domain
+ * families)
+ *
+ * @return
+ */
+ public boolean isAlignmentSource();
+}
\ No newline at end of file
import jalview.io.FormatAdapter;
import jalview.io.IdentifyFile;
-import java.util.Hashtable;
+import com.stevesoft.pat.Regex;
/**
* common methods for implementations of the DbSourceProxy interface.
*/
public abstract class DbSourceProxyImpl implements DbSourceProxy
{
- public DbSourceProxyImpl()
- {
- // default constructor - do nothing probably.
- }
- private Hashtable props = null;
-
- /*
- * (non-Javadoc)
- *
- * @see jalview.ws.DbSourceProxy#getDbSourceProperties()
- */
- public Hashtable getDbSourceProperties()
- {
- if (props == null)
- {
- props = new Hashtable();
- }
- return props;
- }
+ boolean queryInProgress = false;
- protected void addDbSourceProperty(Object propname)
- {
- addDbSourceProperty(propname, propname);
- }
+ protected StringBuffer results = null;
- protected void addDbSourceProperty(Object propname, Object propvalue)
+ public DbSourceProxyImpl()
{
- if (props == null)
- {
- props = new Hashtable();
- }
- props.put(propname, propvalue);
}
- boolean queryInProgress = false;
-
- protected StringBuffer results = null;
-
/*
* (non-Javadoc)
*
* @see jalview.ws.DbSourceProxy#getRawRecords()
*/
+ @Override
public StringBuffer getRawRecords()
{
return results;
*
* @see jalview.ws.DbSourceProxy#queryInProgress()
*/
+ @Override
public boolean queryInProgress()
{
return queryInProgress;
}
@Override
- public boolean isA(Object dbsourceproperty)
+ public String getAccessionIdFromQuery(String query)
+ {
+ Regex vgr = getAccessionValidator();
+ if (vgr == null)
+ {
+ return query;
+ }
+ vgr.search(query);
+ if (vgr.numSubs() > 0)
+ {
+ return (vgr.stringMatched(1));
+ }
+ else
+ {
+ return (vgr.stringMatched());
+ }
+ }
+
+ /**
+ * Default is only one accession id per query - override if more are allowed.
+ */
+ @Override
+ public int getMaximumQueryCount()
+ {
+ return 1;
+ }
+
+ /**
+ * Returns false - override to return true for DNA coding data sources
+ */
+ @Override
+ public boolean isDnaCoding()
{
- assert (dbsourceproperty != null);
- return (props == null) ? false : props.containsKey(dbsourceproperty);
+ return false;
}
+ /**
+ * Answers false - override as required in subclasses
+ */
+ @Override
+ public boolean isAlignmentSource()
+ {
+ return false;
+ }
}
package jalview.ext.ensembl;
-import jalview.datamodel.Alignment;
-import jalview.datamodel.SequenceI;
-import jalview.ext.ensembl.SeqFetcher.EnsemblSeqType;
-import jalview.io.AppletFormatAdapter;
-import jalview.io.FastaFile;
-import jalview.io.FileParse;
-import jalview.util.DBRefUtils;
-
import java.lang.reflect.Method;
-import java.util.Arrays;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
+
public class EnsemblSeqProxyTest
{
- @Test
- public void testCheckEnsembl()
- {
- SeqFetcher sf = new SeqFetcher();
- sf.setTestEnsemblStatus(true);
- sf.setTesting(true);
- Assert.assertTrue(sf.isEnsemblAvailable());
- sf.setTestEnsemblStatus(false);
- Assert.assertFalse(sf.isEnsemblAvailable());
- }
-
- @Test(suiteName = "live")
- public void testLiveCheckEnsembl()
- {
- SeqFetcher sf = new SeqFetcher();
- boolean isAvailable = sf.isEnsemblAvailable();
- System.out.println("Ensembl is "
- + (isAvailable ? "UP!" : "DOWN ******************* BAD!"));
- }
-
- @DataProvider(name = "ens_seqs")
+ @DataProvider(name = "queries")
public Object[][] createData(Method m)
{
- System.out.println(m.getName());
- return allSeqs;
+ return new Object[][] { { "CCDS5863.1" }, { "ENSP00000288602" } };
}
- public static Object[][] allSeqs = new Object[][]
- {
- {
- EnsemblSeqType.PROTEIN,
- "CCDS5863.1",
- ">CCDS5863.1\n"
- + "MAALSGGGGGGAEPGQALFNGDMEPEAGAGAGAAASSAADPAIPEEVWNIKQMIKLTQEH\n"
- + "IEALLDKFGGEHNPPSIYLEAYEEYTSKLDALQQREQQLLESLGNGTDFSVSSSASMDTV\n"
- + "TSSSSSSLSVLPSSLSVFQNPTDVARSNPKSPQKPIVRVFLPNKQRTVVPARCGVTVRDS\n"
- + "LKKALMMRGLIPECCAVYRIQDGEKKPIGWDTDISWLTGEELHVEVLENVPLTTHNFVRK\n"
- + "TFFTLAFCDFCRKLLFQGFRCQTCGYKFHQRCSTEVPLMCVNYDQLDLLFVSKFFEHHPI\n"
- + "PQEEASLAETALTSGSSPSAPASDSIGPQILTSPSPSKSIPIPQPFRPADEDHRNQFGQR\n"
- + "DRSSSAPNVHINTIEPVNIDDLIRDQGFRGDGGSTTGLSATPPASLPGSLTNVKALQKSP\n"
- + "GPQRERKSSSSSEDRNRMKTLGRRDSSDDWEIPDGQITVGQRIGSGSFGTVYKGKWHGDV\n"
- + "AVKMLNVTAPTPQQLQAFKNEVGVLRKTRHVNILLFMGYSTKPQLAIVTQWCEGSSLYHH\n"
- + "LHIIETKFEMIKLIDIARQTAQGMDYLHAKSIIHRDLKSNNIFLHEDLTVKIGDFGLATV\n"
- + "KSRWSGSHQFEQLSGSILWMAPEVIRMQDKNPYSFQSDVYAFGIVLYELMTGQLPYSNIN\n"
- + "NRDQIIFMVGRGYLSPDLSKVRSNCPKAMKRLMAECLKKKRDERPLFPQILASIELLARS\n"
- + "LPKIHRSASEPSLNRAGFQTEDFSLYACASPKTPIQAGGYGAFPVH\n" },
- {
- EnsemblSeqType.TRANSCRIPT,
- "CCDS5863.1",
- ">CCDS5863.1\n"
- + "ATGGCGGCGCTGAGCGGTGGCGGTGGTGGCGGCGCGGAGCCGGGCCAGGCTCTGTTCAAC\n"
- + "GGGGACATGGAGCCCGAGGCCGGCGCCGGCGCCGGCGCCGCGGCCTCTTCGGCTGCGGAC\n"
- + "CCTGCCATTCCGGAGGAGGTGTGGAATATCAAACAAATGATTAAGTTGACACAGGAACAT\n"
- + "ATAGAGGCCCTATTGGACAAATTTGGTGGGGAGCATAATCCACCATCAATATATCTGGAG\n"
- + "GCCTATGAAGAATACACCAGCAAGCTAGATGCACTCCAACAAAGAGAACAACAGTTATTG\n"
- + "GAATCTCTGGGGAACGGAACTGATTTTTCTGTTTCTAGCTCTGCATCAATGGATACCGTT\n"
- + "ACATCTTCTTCCTCTTCTAGCCTTTCAGTGCTACCTTCATCTCTTTCAGTTTTTCAAAAT\n"
- + "CCCACAGATGTGGCACGGAGCAACCCCAAGTCACCACAAAAACCTATCGTTAGAGTCTTC\n"
- + "CTGCCCAACAAACAGAGGACAGTGGTACCTGCAAGGTGTGGAGTTACAGTCCGAGACAGT\n"
- + "CTAAAGAAAGCACTGATGATGAGAGGTCTAATCCCAGAGTGCTGTGCTGTTTACAGAATT\n"
- + "CAGGATGGAGAGAAGAAACCAATTGGTTGGGACACTGATATTTCCTGGCTTACTGGAGAA\n"
- + "GAATTGCATGTGGAAGTGTTGGAGAATGTTCCACTTACAACACACAACTTTGTACGAAAA\n"
- + "ACGTTTTTCACCTTAGCATTTTGTGACTTTTGTCGAAAGCTGCTTTTCCAGGGTTTCCGC\n"
- + "TGTCAAACATGTGGTTATAAATTTCACCAGCGTTGTAGTACAGAAGTTCCACTGATGTGT\n"
- + "GTTAATTATGACCAACTTGATTTGCTGTTTGTCTCCAAGTTCTTTGAACACCACCCAATA\n"
- + "CCACAGGAAGAGGCGTCCTTAGCAGAGACTGCCCTAACATCTGGATCATCCCCTTCCGCA\n"
- + "CCCGCCTCGGACTCTATTGGGCCCCAAATTCTCACCAGTCCGTCTCCTTCAAAATCCATT\n"
- + "CCAATTCCACAGCCCTTCCGACCAGCAGATGAAGATCATCGAAATCAATTTGGGCAACGA\n"
- + "GACCGATCCTCATCAGCTCCCAATGTGCATATAAACACAATAGAACCTGTCAATATTGAT\n"
- + "GACTTGATTAGAGACCAAGGATTTCGTGGTGATGGAGGATCAACCACAGGTTTGTCTGCT\n"
- + "ACCCCCCCTGCCTCATTACCTGGCTCACTAACTAACGTGAAAGCCTTACAGAAATCTCCA\n"
- + "GGACCTCAGCGAGAAAGGAAGTCATCTTCATCCTCAGAAGACAGGAATCGAATGAAAACA\n"
- + "CTTGGTAGACGGGACTCGAGTGATGATTGGGAGATTCCTGATGGGCAGATTACAGTGGGA\n"
- + "CAAAGAATTGGATCTGGATCATTTGGAACAGTCTACAAGGGAAAGTGGCATGGTGATGTG\n"
- + "GCAGTGAAAATGTTGAATGTGACAGCACCTACACCTCAGCAGTTACAAGCCTTCAAAAAT\n"
- + "GAAGTAGGAGTACTCAGGAAAACACGACATGTGAATATCCTACTCTTCATGGGCTATTCC\n"
- + "ACAAAGCCACAACTGGCTATTGTTACCCAGTGGTGTGAGGGCTCCAGCTTGTATCACCAT\n"
- + "CTCCATATCATTGAGACCAAATTTGAGATGATCAAACTTATAGATATTGCACGACAGACT\n"
- + "GCACAGGGCATGGATTACTTACACGCCAAGTCAATCATCCACAGAGACCTCAAGAGTAAT\n"
- + "AATATATTTCTTCATGAAGACCTCACAGTAAAAATAGGTGATTTTGGTCTAGCTACAGTG\n"
- + "AAATCTCGATGGAGTGGGTCCCATCAGTTTGAACAGTTGTCTGGATCCATTTTGTGGATG\n"
- + "GCACCAGAAGTCATCAGAATGCAAGATAAAAATCCATACAGCTTTCAGTCAGATGTATAT\n"
- + "GCATTTGGAATTGTTCTGTATGAATTGATGACTGGACAGTTACCTTATTCAAACATCAAC\n"
- + "AACAGGGACCAGATAATTTTTATGGTGGGACGAGGATACCTGTCTCCAGATCTCAGTAAG\n"
- + "GTACGGAGTAACTGTCCAAAAGCCATGAAGAGATTAATGGCAGAGTGCCTCAAAAAGAAA\n"
- + "AGAGATGAGAGACCACTCTTTCCCCAAATTCTCGCCTCTATTGAGCTGCTGGCCCGCTCA\n"
- + "TTGCCAAAAATTCACCGCAGTGCATCAGAACCCTCCTTGAATCGGGCTGGTTTCCAAACA\n"
- + "GAGGATTTTAGTCTATATGCTTGTGCTTCTCCAAAAACACCCATCCAGGCAGGGGGATAT\n"
- + "GGTGCGTTTCCTGTCCACTGA\n" },
- {
- EnsemblSeqType.PROTEIN,
- "ENSP00000288602",
- ">ENSP00000288602\n"
- + "MAALSGGGGGGAEPGQALFNGDMEPEAGAGAGAAASSAADPAIPEEVWNIKQMIKLTQEH\n"
- + "IEALLDKFGGEHNPPSIYLEAYEEYTSKLDALQQREQQLLESLGNGTDFSVSSSASMDTV\n"
- + "TSSSSSSLSVLPSSLSVFQNPTDVARSNPKSPQKPIVRVFLPNKQRTVVPARCGVTVRDS\n"
- + "LKKALMMRGLIPECCAVYRIQDGEKKPIGWDTDISWLTGEELHVEVLENVPLTTHNFVRK\n"
- + "TFFTLAFCDFCRKLLFQGFRCQTCGYKFHQRCSTEVPLMCVNYDQLDLLFVSKFFEHHPI\n"
- + "PQEEASLAETALTSGSSPSAPASDSIGPQILTSPSPSKSIPIPQPFRPADEDHRNQFGQR\n"
- + "DRSSSAPNVHINTIEPVNIDDLIRDQGFRGDGGSTTGLSATPPASLPGSLTNVKALQKSP\n"
- + "GPQRERKSSSSSEDRNRMKTLGRRDSSDDWEIPDGQITVGQRIGSGSFGTVYKGKWHGDV\n"
- + "AVKMLNVTAPTPQQLQAFKNEVGVLRKTRHVNILLFMGYSTKPQLAIVTQWCEGSSLYHH\n"
- + "LHIIETKFEMIKLIDIARQTAQGMDYLHAKSIIHRDLKSNNIFLHEDLTVKIGDFGLATV\n"
- + "KSRWSGSHQFEQLSGSILWMAPEVIRMQDKNPYSFQSDVYAFGIVLYELMTGQLPYSNIN\n"
- + "NRDQIIFMVGRGYLSPDLSKVRSNCPKAMKRLMAECLKKKRDERPLFPQILASIELLARS\n"
- + "LPKIHRSASEPSLNRAGFQTEDFSLYACASPKTPIQAGGYGAFPVH" } };
-
- @Test(dataProvider = "ens_seqs", suiteName = "live")
- public void testGetOneSeqs(EnsemblSeqType type, String sq, String fastasq)
- throws Exception
- {
- SeqFetcher sf = new SeqFetcher();
- FileParse fp = sf.getSequenceReader(type, Arrays.asList(new String[]
- { sq }));
- SequenceI[] sqs = new FastaFile(fp).getSeqsAsArray();
- FastaFile trueRes = new FastaFile(fastasq, AppletFormatAdapter.PASTE);
- SequenceI[] trueSqs = trueRes.getSeqsAsArray();
- Assert.assertEquals(sqs.length, trueSqs.length,
- "Different number of sequences retrieved for query " + sq);
- Alignment ral = new Alignment(sqs);
- for (SequenceI tr : trueSqs)
- {
- SequenceI[] rseq;
- Assert.assertNotNull(
- rseq = ral.findSequenceMatch(tr.getName()),
- "Couldn't find sequences matching expected sequence "
- + tr.getName());
- Assert.assertEquals(rseq.length, 1,
- "Expected only one sequence for sequence ID " + tr.getName());
- Assert.assertEquals(
- rseq[0].getSequenceAsString(),
- tr.getSequenceAsString(),
- "Sequences differ for " + tr.getName() + "\n" + "Exp:"
- + tr.getSequenceAsString() + "\n" + "Got:"
- + rseq[0].getSequenceAsString());
-
- }
- }
-
- @Test(dataProvider = "ens_seqs")
- public void testRegexForProxy(EnsemblSeqType type, String sq,
- String fastasq) throws Exception
+ @Test(dataProvider = "queries")
+ public void testIsValidReference(String query) throws Exception
{
EnsemblSeqProxy esq = new EnsemblProtein();
- Assert.assertTrue(esq.isValidReference(sq),
- "Expected reference string " + sq + " to be valid for regex "
+ Assert.assertTrue(esq.isValidReference(query),
+ "Expected reference string " + query
+ + " to be valid for regex "
+ esq.getAccessionValidator().toString());
-
- Assert.assertEquals(sq, DBRefUtils.processQueryToAccessionFor(esq, sq),
- "Regex for " + esq.getClass().toString() + " not correct.");
}
- // TODO:
- // sequence query with ENSG and anything other than a genomic type will yield
- // sequences with different IDs which will
- // break the post-processing stage where DBRefs are assigned to sequences.
- // -> multiple_sequences = true is needed additional parameter
- // http://rest.ensembl.org/sequence/id/ENSG00000157764?content-type=text/x-json;type=protein;multiple_sequences=true
- // result with four transcripts, cds, cdna, and protein products.
- // *
- // features for ENG -
- // http://rest.ensembl.org/overlap/id/ENSG00000157764?feature=cds&feature=exon&feature=transcript&content-type=text/x-gff3
- // transcript: gives locus, all transcript products with ENSG parents
- // gene: give all ENSG on locus
- // exon: all exon boundaries. CDS same info.
-
- // @Test(dataProvider = "ens_seqs", suiteName = "live")
- // public void testGetOneSeqs(EnsemblSeqType type, String sq, String fastasq)
- // throws Exception
- // {
- //
- // {
- // Assert.assertTrue(rseq[0].getDBRef() != null
- // && rseq[0].getDBRef().length > 0,
- // "No database references added to sequence by fetcher.");
- // Assert.assertNotNull(DBRefUtils.searchRefs(rseq[0].getDBRef(),
- // new DBRefEntry("ENSEMBL", null, sq)),
- // "Could't find database references added to sequence by fetcher.");
- //
- // }
}
\ No newline at end of file
--- /dev/null
+package jalview.ext.ensembl;
+
+import jalview.datamodel.Alignment;
+import jalview.datamodel.SequenceI;
+import jalview.ext.ensembl.SeqFetcher.EnsemblSeqType;
+import jalview.io.AppletFormatAdapter;
+import jalview.io.FastaFile;
+import jalview.io.FileParse;
+
+import java.lang.reflect.Method;
+import java.util.Arrays;
+
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+public class SeqFetcherTest
+{
+ private static final Object[][] allSeqs = new Object[][] {
+ {
+ EnsemblSeqType.PROTEIN,
+ "CCDS5863.1",
+ ">CCDS5863.1\n"
+ + "MAALSGGGGGGAEPGQALFNGDMEPEAGAGAGAAASSAADPAIPEEVWNIKQMIKLTQEH\n"
+ + "IEALLDKFGGEHNPPSIYLEAYEEYTSKLDALQQREQQLLESLGNGTDFSVSSSASMDTV\n"
+ + "TSSSSSSLSVLPSSLSVFQNPTDVARSNPKSPQKPIVRVFLPNKQRTVVPARCGVTVRDS\n"
+ + "LKKALMMRGLIPECCAVYRIQDGEKKPIGWDTDISWLTGEELHVEVLENVPLTTHNFVRK\n"
+ + "TFFTLAFCDFCRKLLFQGFRCQTCGYKFHQRCSTEVPLMCVNYDQLDLLFVSKFFEHHPI\n"
+ + "PQEEASLAETALTSGSSPSAPASDSIGPQILTSPSPSKSIPIPQPFRPADEDHRNQFGQR\n"
+ + "DRSSSAPNVHINTIEPVNIDDLIRDQGFRGDGGSTTGLSATPPASLPGSLTNVKALQKSP\n"
+ + "GPQRERKSSSSSEDRNRMKTLGRRDSSDDWEIPDGQITVGQRIGSGSFGTVYKGKWHGDV\n"
+ + "AVKMLNVTAPTPQQLQAFKNEVGVLRKTRHVNILLFMGYSTKPQLAIVTQWCEGSSLYHH\n"
+ + "LHIIETKFEMIKLIDIARQTAQGMDYLHAKSIIHRDLKSNNIFLHEDLTVKIGDFGLATV\n"
+ + "KSRWSGSHQFEQLSGSILWMAPEVIRMQDKNPYSFQSDVYAFGIVLYELMTGQLPYSNIN\n"
+ + "NRDQIIFMVGRGYLSPDLSKVRSNCPKAMKRLMAECLKKKRDERPLFPQILASIELLARS\n"
+ + "LPKIHRSASEPSLNRAGFQTEDFSLYACASPKTPIQAGGYGAFPVH\n" },
+ {
+ EnsemblSeqType.TRANSCRIPT,
+ "CCDS5863.1",
+ ">CCDS5863.1\n"
+ + "ATGGCGGCGCTGAGCGGTGGCGGTGGTGGCGGCGCGGAGCCGGGCCAGGCTCTGTTCAAC\n"
+ + "GGGGACATGGAGCCCGAGGCCGGCGCCGGCGCCGGCGCCGCGGCCTCTTCGGCTGCGGAC\n"
+ + "CCTGCCATTCCGGAGGAGGTGTGGAATATCAAACAAATGATTAAGTTGACACAGGAACAT\n"
+ + "ATAGAGGCCCTATTGGACAAATTTGGTGGGGAGCATAATCCACCATCAATATATCTGGAG\n"
+ + "GCCTATGAAGAATACACCAGCAAGCTAGATGCACTCCAACAAAGAGAACAACAGTTATTG\n"
+ + "GAATCTCTGGGGAACGGAACTGATTTTTCTGTTTCTAGCTCTGCATCAATGGATACCGTT\n"
+ + "ACATCTTCTTCCTCTTCTAGCCTTTCAGTGCTACCTTCATCTCTTTCAGTTTTTCAAAAT\n"
+ + "CCCACAGATGTGGCACGGAGCAACCCCAAGTCACCACAAAAACCTATCGTTAGAGTCTTC\n"
+ + "CTGCCCAACAAACAGAGGACAGTGGTACCTGCAAGGTGTGGAGTTACAGTCCGAGACAGT\n"
+ + "CTAAAGAAAGCACTGATGATGAGAGGTCTAATCCCAGAGTGCTGTGCTGTTTACAGAATT\n"
+ + "CAGGATGGAGAGAAGAAACCAATTGGTTGGGACACTGATATTTCCTGGCTTACTGGAGAA\n"
+ + "GAATTGCATGTGGAAGTGTTGGAGAATGTTCCACTTACAACACACAACTTTGTACGAAAA\n"
+ + "ACGTTTTTCACCTTAGCATTTTGTGACTTTTGTCGAAAGCTGCTTTTCCAGGGTTTCCGC\n"
+ + "TGTCAAACATGTGGTTATAAATTTCACCAGCGTTGTAGTACAGAAGTTCCACTGATGTGT\n"
+ + "GTTAATTATGACCAACTTGATTTGCTGTTTGTCTCCAAGTTCTTTGAACACCACCCAATA\n"
+ + "CCACAGGAAGAGGCGTCCTTAGCAGAGACTGCCCTAACATCTGGATCATCCCCTTCCGCA\n"
+ + "CCCGCCTCGGACTCTATTGGGCCCCAAATTCTCACCAGTCCGTCTCCTTCAAAATCCATT\n"
+ + "CCAATTCCACAGCCCTTCCGACCAGCAGATGAAGATCATCGAAATCAATTTGGGCAACGA\n"
+ + "GACCGATCCTCATCAGCTCCCAATGTGCATATAAACACAATAGAACCTGTCAATATTGAT\n"
+ + "GACTTGATTAGAGACCAAGGATTTCGTGGTGATGGAGGATCAACCACAGGTTTGTCTGCT\n"
+ + "ACCCCCCCTGCCTCATTACCTGGCTCACTAACTAACGTGAAAGCCTTACAGAAATCTCCA\n"
+ + "GGACCTCAGCGAGAAAGGAAGTCATCTTCATCCTCAGAAGACAGGAATCGAATGAAAACA\n"
+ + "CTTGGTAGACGGGACTCGAGTGATGATTGGGAGATTCCTGATGGGCAGATTACAGTGGGA\n"
+ + "CAAAGAATTGGATCTGGATCATTTGGAACAGTCTACAAGGGAAAGTGGCATGGTGATGTG\n"
+ + "GCAGTGAAAATGTTGAATGTGACAGCACCTACACCTCAGCAGTTACAAGCCTTCAAAAAT\n"
+ + "GAAGTAGGAGTACTCAGGAAAACACGACATGTGAATATCCTACTCTTCATGGGCTATTCC\n"
+ + "ACAAAGCCACAACTGGCTATTGTTACCCAGTGGTGTGAGGGCTCCAGCTTGTATCACCAT\n"
+ + "CTCCATATCATTGAGACCAAATTTGAGATGATCAAACTTATAGATATTGCACGACAGACT\n"
+ + "GCACAGGGCATGGATTACTTACACGCCAAGTCAATCATCCACAGAGACCTCAAGAGTAAT\n"
+ + "AATATATTTCTTCATGAAGACCTCACAGTAAAAATAGGTGATTTTGGTCTAGCTACAGTG\n"
+ + "AAATCTCGATGGAGTGGGTCCCATCAGTTTGAACAGTTGTCTGGATCCATTTTGTGGATG\n"
+ + "GCACCAGAAGTCATCAGAATGCAAGATAAAAATCCATACAGCTTTCAGTCAGATGTATAT\n"
+ + "GCATTTGGAATTGTTCTGTATGAATTGATGACTGGACAGTTACCTTATTCAAACATCAAC\n"
+ + "AACAGGGACCAGATAATTTTTATGGTGGGACGAGGATACCTGTCTCCAGATCTCAGTAAG\n"
+ + "GTACGGAGTAACTGTCCAAAAGCCATGAAGAGATTAATGGCAGAGTGCCTCAAAAAGAAA\n"
+ + "AGAGATGAGAGACCACTCTTTCCCCAAATTCTCGCCTCTATTGAGCTGCTGGCCCGCTCA\n"
+ + "TTGCCAAAAATTCACCGCAGTGCATCAGAACCCTCCTTGAATCGGGCTGGTTTCCAAACA\n"
+ + "GAGGATTTTAGTCTATATGCTTGTGCTTCTCCAAAAACACCCATCCAGGCAGGGGGATAT\n"
+ + "GGTGCGTTTCCTGTCCACTGA\n" },
+ {
+ EnsemblSeqType.PROTEIN,
+ "ENSP00000288602",
+ ">ENSP00000288602\n"
+ + "MAALSGGGGGGAEPGQALFNGDMEPEAGAGAGAAASSAADPAIPEEVWNIKQMIKLTQEH\n"
+ + "IEALLDKFGGEHNPPSIYLEAYEEYTSKLDALQQREQQLLESLGNGTDFSVSSSASMDTV\n"
+ + "TSSSSSSLSVLPSSLSVFQNPTDVARSNPKSPQKPIVRVFLPNKQRTVVPARCGVTVRDS\n"
+ + "LKKALMMRGLIPECCAVYRIQDGEKKPIGWDTDISWLTGEELHVEVLENVPLTTHNFVRK\n"
+ + "TFFTLAFCDFCRKLLFQGFRCQTCGYKFHQRCSTEVPLMCVNYDQLDLLFVSKFFEHHPI\n"
+ + "PQEEASLAETALTSGSSPSAPASDSIGPQILTSPSPSKSIPIPQPFRPADEDHRNQFGQR\n"
+ + "DRSSSAPNVHINTIEPVNIDDLIRDQGFRGDGGSTTGLSATPPASLPGSLTNVKALQKSP\n"
+ + "GPQRERKSSSSSEDRNRMKTLGRRDSSDDWEIPDGQITVGQRIGSGSFGTVYKGKWHGDV\n"
+ + "AVKMLNVTAPTPQQLQAFKNEVGVLRKTRHVNILLFMGYSTKPQLAIVTQWCEGSSLYHH\n"
+ + "LHIIETKFEMIKLIDIARQTAQGMDYLHAKSIIHRDLKSNNIFLHEDLTVKIGDFGLATV\n"
+ + "KSRWSGSHQFEQLSGSILWMAPEVIRMQDKNPYSFQSDVYAFGIVLYELMTGQLPYSNIN\n"
+ + "NRDQIIFMVGRGYLSPDLSKVRSNCPKAMKRLMAECLKKKRDERPLFPQILASIELLARS\n"
+ + "LPKIHRSASEPSLNRAGFQTEDFSLYACASPKTPIQAGGYGAFPVH" } };
+
+ @DataProvider(name = "ens_seqs")
+ public Object[][] createData(Method m)
+ {
+ System.out.println(m.getName());
+ return allSeqs;
+ }
+
+ @Test(dataProvider = "ens_seqs", suiteName = "live")
+ public void testGetOneSeqs(EnsemblSeqType type, String sq, String fastasq)
+ throws Exception
+ {
+ SeqFetcher sf = new SeqFetcher();
+ FileParse fp = sf.getSequenceReader(type, Arrays.asList(new String[]
+ { sq }));
+ SequenceI[] sqs = new FastaFile(fp).getSeqsAsArray();
+ FastaFile trueRes = new FastaFile(fastasq, AppletFormatAdapter.PASTE);
+ SequenceI[] trueSqs = trueRes.getSeqsAsArray();
+ Assert.assertEquals(sqs.length, trueSqs.length,
+ "Different number of sequences retrieved for query " + sq);
+ Alignment ral = new Alignment(sqs);
+ for (SequenceI tr : trueSqs)
+ {
+ SequenceI[] rseq;
+ Assert.assertNotNull(
+ rseq = ral.findSequenceMatch(tr.getName()),
+ "Couldn't find sequences matching expected sequence "
+ + tr.getName());
+ Assert.assertEquals(rseq.length, 1,
+ "Expected only one sequence for sequence ID " + tr.getName());
+ Assert.assertEquals(
+ rseq[0].getSequenceAsString(),
+ tr.getSequenceAsString(),
+ "Sequences differ for " + tr.getName() + "\n" + "Exp:"
+ + tr.getSequenceAsString() + "\n" + "Got:"
+ + rseq[0].getSequenceAsString());
+
+ }
+ }
+
+ @Test(suiteName = "live")
+ public void testLiveCheckEnsembl()
+ {
+ SeqFetcher sf = new SeqFetcher();
+ boolean isAvailable = sf.isEnsemblAvailable();
+ System.out.println("Ensembl is "
+ + (isAvailable ? "UP!"
+ : "DOWN or unreachable ******************* BAD!"));
+ }
+ // TODO:
+ // sequence query with ENSG and anything other than a genomic type will yield
+ // sequences with different IDs which will
+ // break the post-processing stage where DBRefs are assigned to sequences.
+ // -> multiple_sequences = true is needed additional parameter
+ // http://rest.ensembl.org/sequence/id/ENSG00000157764?content-type=text/x-json;type=protein;multiple_sequences=true
+ // result with four transcripts, cds, cdna, and protein products.
+ // *
+ // features for ENG -
+ // http://rest.ensembl.org/overlap/id/ENSG00000157764?feature=cds&feature=exon&feature=transcript&content-type=text/x-gff3
+ // transcript: gives locus, all transcript products with ENSG parents
+ // gene: give all ENSG on locus
+ // exon: all exon boundaries. CDS same info.
+
+ // @Test(dataProvider = "ens_seqs", suiteName = "live")
+ // public void testGetOneSeqs(EnsemblSeqType type, String sq, String fastasq)
+ // throws Exception
+ // {
+ //
+ // {
+ // Assert.assertTrue(rseq[0].getDBRef() != null
+ // && rseq[0].getDBRef().length > 0,
+ // "No database references added to sequence by fetcher.");
+ // Assert.assertNotNull(DBRefUtils.searchRefs(rseq[0].getDBRef(),
+ // new DBRefEntry("ENSEMBL", null, sq)),
+ // "Could't find database references added to sequence by fetcher.");
+ //
+ // }
+
+}
--- /dev/null
+package jalview.ws;
+
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.DBRefSource;
+import jalview.datamodel.SequenceI;
+import jalview.ws.seqfetcher.ASequenceFetcher;
+import jalview.ws.seqfetcher.DbSourceProxy;
+
+import java.util.Enumeration;
+import java.util.List;
+import java.util.Vector;
+
+public class SequenceFetcherTest
+{
+
+ /**
+ * simple run method to test dbsources.
+ *
+ * @param argv
+ */
+ public static void main(String[] argv)
+ {
+ // TODO: extracted from SequenceFetcher - convert to proper unit test with
+ // assertions
+
+ AlignmentI ds = null;
+ Vector noProds = new Vector();
+ String usage = "SequenceFetcher.main [-nodas] [<DBNAME> [<ACCNO>]]\n"
+ + "With no arguments, all DbSources will be queried with their test Accession number.\n"
+ + "With one argument, the argument will be resolved to one or more db sources and each will be queried with their test accession only.\n"
+ + "If given two arguments, SequenceFetcher will try to find the DbFetcher corresponding to <DBNAME> and retrieve <ACCNO> from it.\n"
+ + "The -nodas option will exclude DAS sources from the database fetchers Jalview will try to use.";
+ boolean withDas = true;
+ if (argv != null && argv.length > 0
+ && argv[0].toLowerCase().startsWith("-nodas"))
+ {
+ withDas = false;
+ String targs[] = new String[argv.length - 1];
+ System.arraycopy(argv, 1, targs, 0, targs.length);
+ argv = targs;
+ }
+ if (argv != null && argv.length > 0)
+ {
+ List<DbSourceProxy> sps = new SequenceFetcher(withDas)
+ .getSourceProxy(argv[0]);
+
+ if (sps != null)
+ {
+ for (DbSourceProxy sp : sps)
+ {
+ AlignmentI al = null;
+ try
+ {
+ al = sp.getSequenceRecords(argv.length > 1 ? argv[1] : sp
+ .getTestQuery());
+ } catch (Exception e)
+ {
+ e.printStackTrace();
+ System.err.println("Error when retrieving "
+ + (argv.length > 1 ? argv[1] : sp.getTestQuery())
+ + " from " + argv[0] + "\nUsage: " + usage);
+ }
+ SequenceI[] prod = al.getSequencesArray();
+ if (al != null)
+ {
+ for (int p = 0; p < prod.length; p++)
+ {
+ System.out.println("Prod " + p + ": "
+ + prod[p].getDisplayId(true) + " : "
+ + prod[p].getDescription());
+ }
+ }
+ }
+ return;
+ }
+ else
+ {
+ System.err.println("Can't resolve " + argv[0]
+ + " as a database name. Allowed values are :\n"
+ + new SequenceFetcher().getSupportedDb());
+ }
+ System.out.println(usage);
+ return;
+ }
+ ASequenceFetcher sfetcher = new SequenceFetcher(withDas);
+ String[] dbSources = sfetcher.getSupportedDb();
+ for (int dbsource = 0; dbsource < dbSources.length; dbsource++)
+ {
+ String db = dbSources[dbsource];
+ // skip me
+ if (db.equals(DBRefSource.PDB))
+ {
+ continue;
+ }
+ for (DbSourceProxy sp : sfetcher.getSourceProxy(db))
+ {
+ System.out.println("Source: " + sp.getDbName() + " (" + db
+ + "): retrieving test:" + sp.getTestQuery());
+ AlignmentI al = null;
+ try
+ {
+ al = sp.getSequenceRecords(sp.getTestQuery());
+ if (al != null && al.getHeight() > 0)
+ {
+ boolean dna = sp.isDnaCoding();
+ // try and find products
+ String types[] = jalview.analysis.CrossRef
+ .findSequenceXrefTypes(dna, al.getSequencesArray());
+ if (types != null)
+ {
+ System.out.println("Xref Types for: "
+ + (dna ? "dna" : "prot"));
+ for (int t = 0; t < types.length; t++)
+ {
+ System.out.println("Type: " + types[t]);
+ SequenceI[] prod = jalview.analysis.CrossRef
+ .findXrefSequences(al.getSequencesArray(), dna,
+ types[t]).getSequencesArray();
+ System.out.println("Found "
+ + ((prod == null) ? "no" : "" + prod.length)
+ + " products");
+ if (prod != null)
+ {
+ for (int p = 0; p < prod.length; p++)
+ {
+ System.out.println("Prod " + p + ": "
+ + prod[p].getDisplayId(true));
+ }
+ }
+ }
+ }
+ else
+ {
+ noProds.addElement((dna ? new Object[] { al, al }
+ : new Object[] { al }));
+ }
+
+ }
+ } catch (Exception ex)
+ {
+ System.out.println("ERROR:Failed to retrieve test query.");
+ ex.printStackTrace(System.out);
+ }
+
+ if (al == null)
+ {
+ System.out.println("ERROR:No alignment retrieved.");
+ StringBuffer raw = sp.getRawRecords();
+ if (raw != null)
+ {
+ System.out.println(raw.toString());
+ }
+ else
+ {
+ System.out.println("ERROR:No Raw results.");
+ }
+ }
+ else
+ {
+ System.out.println("Retrieved " + al.getHeight() + " sequences.");
+ for (int s = 0; s < al.getHeight(); s++)
+ {
+ SequenceI sq = al.getSequenceAt(s);
+ while (sq.getDatasetSequence() != null)
+ {
+ sq = sq.getDatasetSequence();
+
+ }
+ if (ds == null)
+ {
+ ds = new Alignment(new SequenceI[] { sq });
+
+ }
+ else
+ {
+ ds.addSequence(sq);
+ }
+ }
+ }
+ System.out.flush();
+ System.err.flush();
+
+ }
+ if (noProds.size() > 0)
+ {
+ Enumeration ts = noProds.elements();
+ while (ts.hasMoreElements())
+
+ {
+ Object[] typeSq = (Object[]) ts.nextElement();
+ boolean dna = (typeSq.length > 1);
+ AlignmentI al = (AlignmentI) typeSq[0];
+ System.out.println("Trying getProducts for "
+ + al.getSequenceAt(0).getDisplayId(true));
+ System.out.println("Search DS Xref for: "
+ + (dna ? "dna" : "prot"));
+ // have a bash at finding the products amongst all the retrieved
+ // sequences.
+ SequenceI[] seqs = al.getSequencesArray();
+ Alignment prodal = jalview.analysis.CrossRef.findXrefSequences(
+ seqs, dna, null, ds);
+ System.out.println("Found "
+ + ((prodal == null) ? "no" : "" + prodal.getHeight())
+ + " products");
+ if (prodal != null)
+ {
+ SequenceI[] prod = prodal.getSequencesArray(); // note
+ // should
+ // test
+ // rather
+ // than
+ // throw
+ // away
+ // codon
+ // mapping
+ // (if
+ // present)
+ for (int p = 0; p < prod.length; p++)
+ {
+ System.out.println("Prod " + p + ": "
+ + prod[p].getDisplayId(true));
+ }
+ }
+ }
+
+ }
+
+ }
+ }
+
+}