JAL-1705 basic client & tests for rest.ensembl.org /ping and /sequence/id
authorJim Procter <jprocter@issues.jalview.org>
Fri, 26 Jun 2015 16:23:07 +0000 (17:23 +0100)
committerJim Procter <jprocter@issues.jalview.org>
Fri, 26 Jun 2015 16:23:07 +0000 (17:23 +0100)
src/jalview/ext/ensembl/SeqFetcher.java [new file with mode: 0644]
test/jalview/ext/ensembl/SeqFetcherTest.java [new file with mode: 0644]

diff --git a/src/jalview/ext/ensembl/SeqFetcher.java b/src/jalview/ext/ensembl/SeqFetcher.java
new file mode 100644 (file)
index 0000000..95a990d
--- /dev/null
@@ -0,0 +1,221 @@
+package jalview.ext.ensembl;
+
+import jalview.io.FileParse;
+
+import java.io.BufferedReader;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.net.URLConnection;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.http.NameValuePair;
+import org.apache.http.message.BasicNameValuePair;
+
+public class SeqFetcher
+{
+  private static String ensemblRest = "rest.ensembl.org";
+
+  private static boolean ensemblRestavailable = false;
+
+  private static long lastCheck = -1;
+
+  public boolean isEnsemblAvailable()
+  {
+    if (isTesting || !ensemblRestavailable
+            || System.currentTimeMillis() - lastCheck > 10000)
+    {
+      checkEnsembl();
+      lastCheck = System.currentTimeMillis();
+    }
+    return ensemblRestavailable;
+  }
+
+  private boolean isTesting, testEnsemblStatus;
+
+  /**
+   * @return the isTesting
+   */
+  public boolean isTesting()
+  {
+    return isTesting;
+  }
+
+  /**
+   * @param isTesting
+   *          the isTesting to set
+   */
+  public void setTesting(boolean isTesting)
+  {
+    this.isTesting = isTesting;
+  }
+
+  /**
+   * @return the testEnsemblStatus
+   */
+  public boolean isTestEnsemblStatus()
+  {
+    return testEnsemblStatus;
+  }
+
+  /**
+   * @param testEnsemblStatus
+   *          the testEnsemblStatus to set
+   */
+  public void setTestEnsemblStatus(boolean testEnsemblStatus)
+  {
+    this.testEnsemblStatus = testEnsemblStatus;
+  }
+
+  private void checkEnsembl()
+  {
+    if (isTesting)
+    {
+      ensemblRestavailable = testEnsemblStatus;
+      return;
+    }
+    try
+    {
+      URL ping = new URL("http://" + ensemblRest + "/info/ping");
+      HttpURLConnection conn = (HttpURLConnection) (ping.openConnection());
+      if (conn.getResponseCode() >= 200 && conn.getResponseCode() < 300)
+      {
+        ensemblRestavailable = true;
+        return;
+      }
+    } catch (Error err)
+    {
+      err.printStackTrace();
+    } catch (Exception exx)
+    {
+      exx.printStackTrace();
+    }
+    ensemblRestavailable = false;
+  }
+
+  public SeqFetcher()
+  {
+
+    // TODO Auto-generated constructor stub
+  }
+
+  public enum EnsemblSeqType
+  {
+    GENOMIC, CDS, TRANSCRIPT, PROTEIN, CDNA;
+  }
+
+  /**
+   * reolve request type as an argument for sequence and features queries
+   * 
+   * @param type
+   */
+  public List<NameValuePair> getObjectTypeArg(EnsemblSeqType type)
+  {
+    String arg;
+    switch (type)
+    {
+    case CDS:
+      arg = "cds";
+      break;
+    case TRANSCRIPT:
+      arg = "cds";
+      break;
+    case CDNA:
+      arg = "CDNA";
+      break;
+    case PROTEIN:
+      arg = "protein";
+      break;
+    case GENOMIC:
+    default:
+      arg = "genomic";
+    }
+    return Arrays.asList(new NameValuePair[]
+    { new BasicNameValuePair("type", arg) });
+  }
+
+  public FileParse getSequenceReader(EnsemblSeqType returnType,
+          List<String> ids) throws IOException
+  {
+
+    // adapted From the rest.ensembl.org documentation for sequence_id
+
+    String urls = "http://" + ensemblRest + "/sequence/id";
+    List<NameValuePair> vals = getObjectTypeArg(returnType);
+    boolean f = true;
+    for (NameValuePair nvp : vals)
+    {
+      if (f)
+      {
+        f = false;
+        urls += "?";
+      }
+      else
+      {
+        urls += "&";
+      }
+      urls += nvp.getName() + "=" + nvp.getValue();
+    }
+
+    URL url = new URL(urls);
+
+    URLConnection connection = url.openConnection();
+    HttpURLConnection httpConnection = (HttpURLConnection) connection;
+
+    httpConnection.setRequestMethod("POST");
+    httpConnection.setRequestProperty("Content-Type", "application/json");
+    httpConnection.setRequestProperty("Accept", "text/x-fasta");
+    byte[] thepostbody;
+    {
+      StringBuilder postBody = new StringBuilder();
+      postBody.append("{\"ids\":[");
+      boolean first = true;
+      for (String id : ids)
+      {
+        if (first)
+        {
+          first = false;
+        }
+        else
+        {
+          postBody.append(",");
+        }
+        postBody.append("\"");
+        postBody.append(id.trim());
+        postBody.append("\"");
+      }
+      postBody.append("]}");
+      thepostbody = postBody.toString().getBytes();
+    }
+    httpConnection.setRequestProperty("Content-Length",
+            Integer.toString(thepostbody.length));
+    httpConnection.setUseCaches(false);
+    httpConnection.setDoInput(true);
+    httpConnection.setDoOutput(true);
+
+    DataOutputStream wr = new DataOutputStream(
+            httpConnection.getOutputStream());
+    wr.write(thepostbody);
+    wr.flush();
+    wr.close();
+
+    InputStream response = connection.getInputStream();
+    int responseCode = httpConnection.getResponseCode();
+
+    if (responseCode != 200)
+    {
+      throw new RuntimeException(
+              "Response code was not 200. Detected response was "
+                      + responseCode);
+    }
+
+    BufferedReader reader = null;
+    reader = new BufferedReader(new InputStreamReader(response, "UTF-8"));
+    FileParse fp = new FileParse(reader, url.toString(), "HTTP_POST");
+    return fp;
+  }
+}
diff --git a/test/jalview/ext/ensembl/SeqFetcherTest.java b/test/jalview/ext/ensembl/SeqFetcherTest.java
new file mode 100644 (file)
index 0000000..3a6bdc6
--- /dev/null
@@ -0,0 +1,123 @@
+package jalview.ext.ensembl;
+
+import jalview.datamodel.Alignment;
+import jalview.datamodel.SequenceI;
+import jalview.ext.ensembl.SeqFetcher;
+import jalview.ext.ensembl.SeqFetcher.EnsemblSeqType;
+import jalview.io.AppletFormatAdapter;
+import jalview.io.FastaFile;
+import jalview.io.FileParse;
+
+import java.lang.reflect.Method;
+import java.util.Arrays;
+
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+public class SeqFetcherTest
+{
+  @Test
+  public void testCheckEnsembl()
+  {
+    SeqFetcher sf = new SeqFetcher();
+    sf.setTestEnsemblStatus(true);
+    sf.setTesting(true);
+    Assert.assertTrue(sf.isEnsemblAvailable());
+    sf.setTestEnsemblStatus(false);
+    Assert.assertFalse(sf.isEnsemblAvailable());
+  }
+
+  @Test(suiteName = "live")
+  public void testLiveCheckEnsembl()
+  {
+    SeqFetcher sf = new SeqFetcher();
+    boolean isAvailable = sf.isEnsemblAvailable();
+    System.out.println("Ensembl is "
+            + (isAvailable ? "UP!" : "DOWN ******************* BAD!"));
+  }
+
+  @DataProvider(name = "ens_seqs")
+  public Object[][] createData(Method m)
+  {
+    System.out.println(m.getName());
+    return allSeqs;
+  }
+
+  public static Object[][] allSeqs = new Object[][]
+  {
+  {
+      EnsemblSeqType.CDS,
+      "CCDS5863.1",
+      ">CCDS5863.1\n"
+              + "ATGGCGGCGCTGAGCGGTGGCGGTGGTGGCGGCGCGGAGCCGGGCCAGGCTCTGTTCAAC\n"
+              + "GGGGACATGGAGCCCGAGGCCGGCGCCGGCGCCGGCGCCGCGGCCTCTTCGGCTGCGGAC\n"
+              + "CCTGCCATTCCGGAGGAGGTGTGGAATATCAAACAAATGATTAAGTTGACACAGGAACAT\n"
+              + "ATAGAGGCCCTATTGGACAAATTTGGTGGGGAGCATAATCCACCATCAATATATCTGGAG\n"
+              + "GCCTATGAAGAATACACCAGCAAGCTAGATGCACTCCAACAAAGAGAACAACAGTTATTG\n"
+              + "GAATCTCTGGGGAACGGAACTGATTTTTCTGTTTCTAGCTCTGCATCAATGGATACCGTT\n"
+              + "ACATCTTCTTCCTCTTCTAGCCTTTCAGTGCTACCTTCATCTCTTTCAGTTTTTCAAAAT\n"
+              + "CCCACAGATGTGGCACGGAGCAACCCCAAGTCACCACAAAAACCTATCGTTAGAGTCTTC\n"
+              + "CTGCCCAACAAACAGAGGACAGTGGTACCTGCAAGGTGTGGAGTTACAGTCCGAGACAGT\n"
+              + "CTAAAGAAAGCACTGATGATGAGAGGTCTAATCCCAGAGTGCTGTGCTGTTTACAGAATT\n"
+              + "CAGGATGGAGAGAAGAAACCAATTGGTTGGGACACTGATATTTCCTGGCTTACTGGAGAA\n"
+              + "GAATTGCATGTGGAAGTGTTGGAGAATGTTCCACTTACAACACACAACTTTGTACGAAAA\n"
+              + "ACGTTTTTCACCTTAGCATTTTGTGACTTTTGTCGAAAGCTGCTTTTCCAGGGTTTCCGC\n"
+              + "TGTCAAACATGTGGTTATAAATTTCACCAGCGTTGTAGTACAGAAGTTCCACTGATGTGT\n"
+              + "GTTAATTATGACCAACTTGATTTGCTGTTTGTCTCCAAGTTCTTTGAACACCACCCAATA\n"
+              + "CCACAGGAAGAGGCGTCCTTAGCAGAGACTGCCCTAACATCTGGATCATCCCCTTCCGCA\n"
+              + "CCCGCCTCGGACTCTATTGGGCCCCAAATTCTCACCAGTCCGTCTCCTTCAAAATCCATT\n"
+              + "CCAATTCCACAGCCCTTCCGACCAGCAGATGAAGATCATCGAAATCAATTTGGGCAACGA\n"
+              + "GACCGATCCTCATCAGCTCCCAATGTGCATATAAACACAATAGAACCTGTCAATATTGAT\n"
+              + "GACTTGATTAGAGACCAAGGATTTCGTGGTGATGGAGGATCAACCACAGGTTTGTCTGCT\n"
+              + "ACCCCCCCTGCCTCATTACCTGGCTCACTAACTAACGTGAAAGCCTTACAGAAATCTCCA\n"
+              + "GGACCTCAGCGAGAAAGGAAGTCATCTTCATCCTCAGAAGACAGGAATCGAATGAAAACA\n"
+              + "CTTGGTAGACGGGACTCGAGTGATGATTGGGAGATTCCTGATGGGCAGATTACAGTGGGA\n"
+              + "CAAAGAATTGGATCTGGATCATTTGGAACAGTCTACAAGGGAAAGTGGCATGGTGATGTG\n"
+              + "GCAGTGAAAATGTTGAATGTGACAGCACCTACACCTCAGCAGTTACAAGCCTTCAAAAAT\n"
+              + "GAAGTAGGAGTACTCAGGAAAACACGACATGTGAATATCCTACTCTTCATGGGCTATTCC\n"
+              + "ACAAAGCCACAACTGGCTATTGTTACCCAGTGGTGTGAGGGCTCCAGCTTGTATCACCAT\n"
+              + "CTCCATATCATTGAGACCAAATTTGAGATGATCAAACTTATAGATATTGCACGACAGACT\n"
+              + "GCACAGGGCATGGATTACTTACACGCCAAGTCAATCATCCACAGAGACCTCAAGAGTAAT\n"
+              + "AATATATTTCTTCATGAAGACCTCACAGTAAAAATAGGTGATTTTGGTCTAGCTACAGTG\n"
+              + "AAATCTCGATGGAGTGGGTCCCATCAGTTTGAACAGTTGTCTGGATCCATTTTGTGGATG\n"
+              + "GCACCAGAAGTCATCAGAATGCAAGATAAAAATCCATACAGCTTTCAGTCAGATGTATAT\n"
+              + "GCATTTGGAATTGTTCTGTATGAATTGATGACTGGACAGTTACCTTATTCAAACATCAAC\n"
+              + "AACAGGGACCAGATAATTTTTATGGTGGGACGAGGATACCTGTCTCCAGATCTCAGTAAG\n"
+              + "GTACGGAGTAACTGTCCAAAAGCCATGAAGAGATTAATGGCAGAGTGCCTCAAAAAGAAA\n"
+              + "AGAGATGAGAGACCACTCTTTCCCCAAATTCTCGCCTCTATTGAGCTGCTGGCCCGCTCA\n"
+              + "TTGCCAAAAATTCACCGCAGTGCATCAGAACCCTCCTTGAATCGGGCTGGTTTCCAAACA\n"
+              + "GAGGATTTTAGTCTATATGCTTGTGCTTCTCCAAAAACACCCATCCAGGCAGGGGGATAT\n"
+              + "GGTGCGTTTCCTGTCCACTGA" } };
+
+  @Test(dataProvider = "ens_seqs", suiteName = "live")
+  public void testGetOneSeqs(EnsemblSeqType type, String sq, String fastasq)
+          throws Exception
+  {
+    SeqFetcher sf = new SeqFetcher();
+    FileParse fp = sf.getSequenceReader(type, Arrays.asList(new String[]
+    { sq }));
+    SequenceI[] sqs = new FastaFile(fp).getSeqsAsArray();
+    FastaFile trueRes = new FastaFile(fastasq, AppletFormatAdapter.PASTE);
+    SequenceI[] trueSqs = trueRes.getSeqsAsArray();
+    Assert.assertEquals(sqs.length, trueSqs.length,
+            "Different number of sequences retrieved for query " + sq);
+    Alignment ral = new Alignment(sqs);
+    for (SequenceI tr : trueSqs)
+    {
+      SequenceI[] rseq;
+      Assert.assertNotNull(
+              rseq = ral.findSequenceMatch(tr.getName()),
+              "Couldn't find sequences matching expected sequence "
+                      + tr.getName());
+      Assert.assertEquals(rseq.length, 1,
+              "Expected only one sequence for sequence ID " + tr.getName());
+      Assert.assertEquals(
+              rseq[0].getSequenceAsString(),
+              tr.getSequenceAsString(),
+              "Sequences differ for " + tr.getName() + "\n" + "Exp:"
+                      + tr.getSequenceAsString() + "\n" + "Got:"
+                      + rseq[0].getSequenceAsString());
+    }
+  }
+}
\ No newline at end of file