--- /dev/null
+package jalview.ext.ensembl;
+
+import jalview.io.FileParse;
+
+import java.io.BufferedReader;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.net.URLConnection;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.http.NameValuePair;
+import org.apache.http.message.BasicNameValuePair;
+
+public class SeqFetcher
+{
+ private static String ensemblRest = "rest.ensembl.org";
+
+ private static boolean ensemblRestavailable = false;
+
+ private static long lastCheck = -1;
+
+ public boolean isEnsemblAvailable()
+ {
+ if (isTesting || !ensemblRestavailable
+ || System.currentTimeMillis() - lastCheck > 10000)
+ {
+ checkEnsembl();
+ lastCheck = System.currentTimeMillis();
+ }
+ return ensemblRestavailable;
+ }
+
+ private boolean isTesting, testEnsemblStatus;
+
+ /**
+ * @return the isTesting
+ */
+ public boolean isTesting()
+ {
+ return isTesting;
+ }
+
+ /**
+ * @param isTesting
+ * the isTesting to set
+ */
+ public void setTesting(boolean isTesting)
+ {
+ this.isTesting = isTesting;
+ }
+
+ /**
+ * @return the testEnsemblStatus
+ */
+ public boolean isTestEnsemblStatus()
+ {
+ return testEnsemblStatus;
+ }
+
+ /**
+ * @param testEnsemblStatus
+ * the testEnsemblStatus to set
+ */
+ public void setTestEnsemblStatus(boolean testEnsemblStatus)
+ {
+ this.testEnsemblStatus = testEnsemblStatus;
+ }
+
+ private void checkEnsembl()
+ {
+ if (isTesting)
+ {
+ ensemblRestavailable = testEnsemblStatus;
+ return;
+ }
+ try
+ {
+ URL ping = new URL("http://" + ensemblRest + "/info/ping");
+ HttpURLConnection conn = (HttpURLConnection) (ping.openConnection());
+ if (conn.getResponseCode() >= 200 && conn.getResponseCode() < 300)
+ {
+ ensemblRestavailable = true;
+ return;
+ }
+ } catch (Error err)
+ {
+ err.printStackTrace();
+ } catch (Exception exx)
+ {
+ exx.printStackTrace();
+ }
+ ensemblRestavailable = false;
+ }
+
+ public SeqFetcher()
+ {
+
+ // TODO Auto-generated constructor stub
+ }
+
+ public enum EnsemblSeqType
+ {
+ GENOMIC, CDS, TRANSCRIPT, PROTEIN, CDNA;
+ }
+
+ /**
+ * reolve request type as an argument for sequence and features queries
+ *
+ * @param type
+ */
+ public List<NameValuePair> getObjectTypeArg(EnsemblSeqType type)
+ {
+ String arg;
+ switch (type)
+ {
+ case CDS:
+ arg = "cds";
+ break;
+ case TRANSCRIPT:
+ arg = "cds";
+ break;
+ case CDNA:
+ arg = "CDNA";
+ break;
+ case PROTEIN:
+ arg = "protein";
+ break;
+ case GENOMIC:
+ default:
+ arg = "genomic";
+ }
+ return Arrays.asList(new NameValuePair[]
+ { new BasicNameValuePair("type", arg) });
+ }
+
+ public FileParse getSequenceReader(EnsemblSeqType returnType,
+ List<String> ids) throws IOException
+ {
+
+ // adapted From the rest.ensembl.org documentation for sequence_id
+
+ String urls = "http://" + ensemblRest + "/sequence/id";
+ List<NameValuePair> vals = getObjectTypeArg(returnType);
+ boolean f = true;
+ for (NameValuePair nvp : vals)
+ {
+ if (f)
+ {
+ f = false;
+ urls += "?";
+ }
+ else
+ {
+ urls += "&";
+ }
+ urls += nvp.getName() + "=" + nvp.getValue();
+ }
+
+ URL url = new URL(urls);
+
+ URLConnection connection = url.openConnection();
+ HttpURLConnection httpConnection = (HttpURLConnection) connection;
+
+ httpConnection.setRequestMethod("POST");
+ httpConnection.setRequestProperty("Content-Type", "application/json");
+ httpConnection.setRequestProperty("Accept", "text/x-fasta");
+ byte[] thepostbody;
+ {
+ StringBuilder postBody = new StringBuilder();
+ postBody.append("{\"ids\":[");
+ boolean first = true;
+ for (String id : ids)
+ {
+ if (first)
+ {
+ first = false;
+ }
+ else
+ {
+ postBody.append(",");
+ }
+ postBody.append("\"");
+ postBody.append(id.trim());
+ postBody.append("\"");
+ }
+ postBody.append("]}");
+ thepostbody = postBody.toString().getBytes();
+ }
+ httpConnection.setRequestProperty("Content-Length",
+ Integer.toString(thepostbody.length));
+ httpConnection.setUseCaches(false);
+ httpConnection.setDoInput(true);
+ httpConnection.setDoOutput(true);
+
+ DataOutputStream wr = new DataOutputStream(
+ httpConnection.getOutputStream());
+ wr.write(thepostbody);
+ wr.flush();
+ wr.close();
+
+ InputStream response = connection.getInputStream();
+ int responseCode = httpConnection.getResponseCode();
+
+ if (responseCode != 200)
+ {
+ throw new RuntimeException(
+ "Response code was not 200. Detected response was "
+ + responseCode);
+ }
+
+ BufferedReader reader = null;
+ reader = new BufferedReader(new InputStreamReader(response, "UTF-8"));
+ FileParse fp = new FileParse(reader, url.toString(), "HTTP_POST");
+ return fp;
+ }
+}
--- /dev/null
+package jalview.ext.ensembl;
+
+import jalview.datamodel.Alignment;
+import jalview.datamodel.SequenceI;
+import jalview.ext.ensembl.SeqFetcher;
+import jalview.ext.ensembl.SeqFetcher.EnsemblSeqType;
+import jalview.io.AppletFormatAdapter;
+import jalview.io.FastaFile;
+import jalview.io.FileParse;
+
+import java.lang.reflect.Method;
+import java.util.Arrays;
+
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+public class SeqFetcherTest
+{
+ @Test
+ public void testCheckEnsembl()
+ {
+ SeqFetcher sf = new SeqFetcher();
+ sf.setTestEnsemblStatus(true);
+ sf.setTesting(true);
+ Assert.assertTrue(sf.isEnsemblAvailable());
+ sf.setTestEnsemblStatus(false);
+ Assert.assertFalse(sf.isEnsemblAvailable());
+ }
+
+ @Test(suiteName = "live")
+ public void testLiveCheckEnsembl()
+ {
+ SeqFetcher sf = new SeqFetcher();
+ boolean isAvailable = sf.isEnsemblAvailable();
+ System.out.println("Ensembl is "
+ + (isAvailable ? "UP!" : "DOWN ******************* BAD!"));
+ }
+
+ @DataProvider(name = "ens_seqs")
+ public Object[][] createData(Method m)
+ {
+ System.out.println(m.getName());
+ return allSeqs;
+ }
+
+ public static Object[][] allSeqs = new Object[][]
+ {
+ {
+ EnsemblSeqType.CDS,
+ "CCDS5863.1",
+ ">CCDS5863.1\n"
+ + "ATGGCGGCGCTGAGCGGTGGCGGTGGTGGCGGCGCGGAGCCGGGCCAGGCTCTGTTCAAC\n"
+ + "GGGGACATGGAGCCCGAGGCCGGCGCCGGCGCCGGCGCCGCGGCCTCTTCGGCTGCGGAC\n"
+ + "CCTGCCATTCCGGAGGAGGTGTGGAATATCAAACAAATGATTAAGTTGACACAGGAACAT\n"
+ + "ATAGAGGCCCTATTGGACAAATTTGGTGGGGAGCATAATCCACCATCAATATATCTGGAG\n"
+ + "GCCTATGAAGAATACACCAGCAAGCTAGATGCACTCCAACAAAGAGAACAACAGTTATTG\n"
+ + "GAATCTCTGGGGAACGGAACTGATTTTTCTGTTTCTAGCTCTGCATCAATGGATACCGTT\n"
+ + "ACATCTTCTTCCTCTTCTAGCCTTTCAGTGCTACCTTCATCTCTTTCAGTTTTTCAAAAT\n"
+ + "CCCACAGATGTGGCACGGAGCAACCCCAAGTCACCACAAAAACCTATCGTTAGAGTCTTC\n"
+ + "CTGCCCAACAAACAGAGGACAGTGGTACCTGCAAGGTGTGGAGTTACAGTCCGAGACAGT\n"
+ + "CTAAAGAAAGCACTGATGATGAGAGGTCTAATCCCAGAGTGCTGTGCTGTTTACAGAATT\n"
+ + "CAGGATGGAGAGAAGAAACCAATTGGTTGGGACACTGATATTTCCTGGCTTACTGGAGAA\n"
+ + "GAATTGCATGTGGAAGTGTTGGAGAATGTTCCACTTACAACACACAACTTTGTACGAAAA\n"
+ + "ACGTTTTTCACCTTAGCATTTTGTGACTTTTGTCGAAAGCTGCTTTTCCAGGGTTTCCGC\n"
+ + "TGTCAAACATGTGGTTATAAATTTCACCAGCGTTGTAGTACAGAAGTTCCACTGATGTGT\n"
+ + "GTTAATTATGACCAACTTGATTTGCTGTTTGTCTCCAAGTTCTTTGAACACCACCCAATA\n"
+ + "CCACAGGAAGAGGCGTCCTTAGCAGAGACTGCCCTAACATCTGGATCATCCCCTTCCGCA\n"
+ + "CCCGCCTCGGACTCTATTGGGCCCCAAATTCTCACCAGTCCGTCTCCTTCAAAATCCATT\n"
+ + "CCAATTCCACAGCCCTTCCGACCAGCAGATGAAGATCATCGAAATCAATTTGGGCAACGA\n"
+ + "GACCGATCCTCATCAGCTCCCAATGTGCATATAAACACAATAGAACCTGTCAATATTGAT\n"
+ + "GACTTGATTAGAGACCAAGGATTTCGTGGTGATGGAGGATCAACCACAGGTTTGTCTGCT\n"
+ + "ACCCCCCCTGCCTCATTACCTGGCTCACTAACTAACGTGAAAGCCTTACAGAAATCTCCA\n"
+ + "GGACCTCAGCGAGAAAGGAAGTCATCTTCATCCTCAGAAGACAGGAATCGAATGAAAACA\n"
+ + "CTTGGTAGACGGGACTCGAGTGATGATTGGGAGATTCCTGATGGGCAGATTACAGTGGGA\n"
+ + "CAAAGAATTGGATCTGGATCATTTGGAACAGTCTACAAGGGAAAGTGGCATGGTGATGTG\n"
+ + "GCAGTGAAAATGTTGAATGTGACAGCACCTACACCTCAGCAGTTACAAGCCTTCAAAAAT\n"
+ + "GAAGTAGGAGTACTCAGGAAAACACGACATGTGAATATCCTACTCTTCATGGGCTATTCC\n"
+ + "ACAAAGCCACAACTGGCTATTGTTACCCAGTGGTGTGAGGGCTCCAGCTTGTATCACCAT\n"
+ + "CTCCATATCATTGAGACCAAATTTGAGATGATCAAACTTATAGATATTGCACGACAGACT\n"
+ + "GCACAGGGCATGGATTACTTACACGCCAAGTCAATCATCCACAGAGACCTCAAGAGTAAT\n"
+ + "AATATATTTCTTCATGAAGACCTCACAGTAAAAATAGGTGATTTTGGTCTAGCTACAGTG\n"
+ + "AAATCTCGATGGAGTGGGTCCCATCAGTTTGAACAGTTGTCTGGATCCATTTTGTGGATG\n"
+ + "GCACCAGAAGTCATCAGAATGCAAGATAAAAATCCATACAGCTTTCAGTCAGATGTATAT\n"
+ + "GCATTTGGAATTGTTCTGTATGAATTGATGACTGGACAGTTACCTTATTCAAACATCAAC\n"
+ + "AACAGGGACCAGATAATTTTTATGGTGGGACGAGGATACCTGTCTCCAGATCTCAGTAAG\n"
+ + "GTACGGAGTAACTGTCCAAAAGCCATGAAGAGATTAATGGCAGAGTGCCTCAAAAAGAAA\n"
+ + "AGAGATGAGAGACCACTCTTTCCCCAAATTCTCGCCTCTATTGAGCTGCTGGCCCGCTCA\n"
+ + "TTGCCAAAAATTCACCGCAGTGCATCAGAACCCTCCTTGAATCGGGCTGGTTTCCAAACA\n"
+ + "GAGGATTTTAGTCTATATGCTTGTGCTTCTCCAAAAACACCCATCCAGGCAGGGGGATAT\n"
+ + "GGTGCGTTTCCTGTCCACTGA" } };
+
+ @Test(dataProvider = "ens_seqs", suiteName = "live")
+ public void testGetOneSeqs(EnsemblSeqType type, String sq, String fastasq)
+ throws Exception
+ {
+ SeqFetcher sf = new SeqFetcher();
+ FileParse fp = sf.getSequenceReader(type, Arrays.asList(new String[]
+ { sq }));
+ SequenceI[] sqs = new FastaFile(fp).getSeqsAsArray();
+ FastaFile trueRes = new FastaFile(fastasq, AppletFormatAdapter.PASTE);
+ SequenceI[] trueSqs = trueRes.getSeqsAsArray();
+ Assert.assertEquals(sqs.length, trueSqs.length,
+ "Different number of sequences retrieved for query " + sq);
+ Alignment ral = new Alignment(sqs);
+ for (SequenceI tr : trueSqs)
+ {
+ SequenceI[] rseq;
+ Assert.assertNotNull(
+ rseq = ral.findSequenceMatch(tr.getName()),
+ "Couldn't find sequences matching expected sequence "
+ + tr.getName());
+ Assert.assertEquals(rseq.length, 1,
+ "Expected only one sequence for sequence ID " + tr.getName());
+ Assert.assertEquals(
+ rseq[0].getSequenceAsString(),
+ tr.getSequenceAsString(),
+ "Sequences differ for " + tr.getName() + "\n" + "Exp:"
+ + tr.getSequenceAsString() + "\n" + "Got:"
+ + rseq[0].getSequenceAsString());
+ }
+ }
+}
\ No newline at end of file