From 604a94b849bcf1ddb14407418a465c9d134cd6ab Mon Sep 17 00:00:00 2001 From: Jim Procter Date: Fri, 26 Jun 2015 17:23:07 +0100 Subject: [PATCH] JAL-1705 basic client & tests for rest.ensembl.org /ping and /sequence/id --- src/jalview/ext/ensembl/SeqFetcher.java | 221 ++++++++++++++++++++++++++ test/jalview/ext/ensembl/SeqFetcherTest.java | 123 ++++++++++++++ 2 files changed, 344 insertions(+) create mode 100644 src/jalview/ext/ensembl/SeqFetcher.java create mode 100644 test/jalview/ext/ensembl/SeqFetcherTest.java diff --git a/src/jalview/ext/ensembl/SeqFetcher.java b/src/jalview/ext/ensembl/SeqFetcher.java new file mode 100644 index 0000000..95a990d --- /dev/null +++ b/src/jalview/ext/ensembl/SeqFetcher.java @@ -0,0 +1,221 @@ +package jalview.ext.ensembl; + +import jalview.io.FileParse; + +import java.io.BufferedReader; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.net.HttpURLConnection; +import java.net.URL; +import java.net.URLConnection; +import java.util.Arrays; +import java.util.List; + +import org.apache.http.NameValuePair; +import org.apache.http.message.BasicNameValuePair; + +public class SeqFetcher +{ + private static String ensemblRest = "rest.ensembl.org"; + + private static boolean ensemblRestavailable = false; + + private static long lastCheck = -1; + + public boolean isEnsemblAvailable() + { + if (isTesting || !ensemblRestavailable + || System.currentTimeMillis() - lastCheck > 10000) + { + checkEnsembl(); + lastCheck = System.currentTimeMillis(); + } + return ensemblRestavailable; + } + + private boolean isTesting, testEnsemblStatus; + + /** + * @return the isTesting + */ + public boolean isTesting() + { + return isTesting; + } + + /** + * @param isTesting + * the isTesting to set + */ + public void setTesting(boolean isTesting) + { + this.isTesting = isTesting; + } + + /** + * @return the testEnsemblStatus + */ + public boolean isTestEnsemblStatus() + { + return testEnsemblStatus; + } + + /** + * @param testEnsemblStatus + * the testEnsemblStatus to set + */ + public void setTestEnsemblStatus(boolean testEnsemblStatus) + { + this.testEnsemblStatus = testEnsemblStatus; + } + + private void checkEnsembl() + { + if (isTesting) + { + ensemblRestavailable = testEnsemblStatus; + return; + } + try + { + URL ping = new URL("http://" + ensemblRest + "/info/ping"); + HttpURLConnection conn = (HttpURLConnection) (ping.openConnection()); + if (conn.getResponseCode() >= 200 && conn.getResponseCode() < 300) + { + ensemblRestavailable = true; + return; + } + } catch (Error err) + { + err.printStackTrace(); + } catch (Exception exx) + { + exx.printStackTrace(); + } + ensemblRestavailable = false; + } + + public SeqFetcher() + { + + // TODO Auto-generated constructor stub + } + + public enum EnsemblSeqType + { + GENOMIC, CDS, TRANSCRIPT, PROTEIN, CDNA; + } + + /** + * reolve request type as an argument for sequence and features queries + * + * @param type + */ + public List getObjectTypeArg(EnsemblSeqType type) + { + String arg; + switch (type) + { + case CDS: + arg = "cds"; + break; + case TRANSCRIPT: + arg = "cds"; + break; + case CDNA: + arg = "CDNA"; + break; + case PROTEIN: + arg = "protein"; + break; + case GENOMIC: + default: + arg = "genomic"; + } + return Arrays.asList(new NameValuePair[] + { new BasicNameValuePair("type", arg) }); + } + + public FileParse getSequenceReader(EnsemblSeqType returnType, + List ids) throws IOException + { + + // adapted From the rest.ensembl.org documentation for sequence_id + + String urls = "http://" + ensemblRest + "/sequence/id"; + List vals = getObjectTypeArg(returnType); + boolean f = true; + for (NameValuePair nvp : vals) + { + if (f) + { + f = false; + urls += "?"; + } + else + { + urls += "&"; + } + urls += nvp.getName() + "=" + nvp.getValue(); + } + + URL url = new URL(urls); + + URLConnection connection = url.openConnection(); + HttpURLConnection httpConnection = (HttpURLConnection) connection; + + httpConnection.setRequestMethod("POST"); + httpConnection.setRequestProperty("Content-Type", "application/json"); + httpConnection.setRequestProperty("Accept", "text/x-fasta"); + byte[] thepostbody; + { + StringBuilder postBody = new StringBuilder(); + postBody.append("{\"ids\":["); + boolean first = true; + for (String id : ids) + { + if (first) + { + first = false; + } + else + { + postBody.append(","); + } + postBody.append("\""); + postBody.append(id.trim()); + postBody.append("\""); + } + postBody.append("]}"); + thepostbody = postBody.toString().getBytes(); + } + httpConnection.setRequestProperty("Content-Length", + Integer.toString(thepostbody.length)); + httpConnection.setUseCaches(false); + httpConnection.setDoInput(true); + httpConnection.setDoOutput(true); + + DataOutputStream wr = new DataOutputStream( + httpConnection.getOutputStream()); + wr.write(thepostbody); + wr.flush(); + wr.close(); + + InputStream response = connection.getInputStream(); + int responseCode = httpConnection.getResponseCode(); + + if (responseCode != 200) + { + throw new RuntimeException( + "Response code was not 200. Detected response was " + + responseCode); + } + + BufferedReader reader = null; + reader = new BufferedReader(new InputStreamReader(response, "UTF-8")); + FileParse fp = new FileParse(reader, url.toString(), "HTTP_POST"); + return fp; + } +} diff --git a/test/jalview/ext/ensembl/SeqFetcherTest.java b/test/jalview/ext/ensembl/SeqFetcherTest.java new file mode 100644 index 0000000..3a6bdc6 --- /dev/null +++ b/test/jalview/ext/ensembl/SeqFetcherTest.java @@ -0,0 +1,123 @@ +package jalview.ext.ensembl; + +import jalview.datamodel.Alignment; +import jalview.datamodel.SequenceI; +import jalview.ext.ensembl.SeqFetcher; +import jalview.ext.ensembl.SeqFetcher.EnsemblSeqType; +import jalview.io.AppletFormatAdapter; +import jalview.io.FastaFile; +import jalview.io.FileParse; + +import java.lang.reflect.Method; +import java.util.Arrays; + +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +public class SeqFetcherTest +{ + @Test + public void testCheckEnsembl() + { + SeqFetcher sf = new SeqFetcher(); + sf.setTestEnsemblStatus(true); + sf.setTesting(true); + Assert.assertTrue(sf.isEnsemblAvailable()); + sf.setTestEnsemblStatus(false); + Assert.assertFalse(sf.isEnsemblAvailable()); + } + + @Test(suiteName = "live") + public void testLiveCheckEnsembl() + { + SeqFetcher sf = new SeqFetcher(); + boolean isAvailable = sf.isEnsemblAvailable(); + System.out.println("Ensembl is " + + (isAvailable ? "UP!" : "DOWN ******************* BAD!")); + } + + @DataProvider(name = "ens_seqs") + public Object[][] createData(Method m) + { + System.out.println(m.getName()); + return allSeqs; + } + + public static Object[][] allSeqs = new Object[][] + { + { + EnsemblSeqType.CDS, + "CCDS5863.1", + ">CCDS5863.1\n" + + "ATGGCGGCGCTGAGCGGTGGCGGTGGTGGCGGCGCGGAGCCGGGCCAGGCTCTGTTCAAC\n" + + "GGGGACATGGAGCCCGAGGCCGGCGCCGGCGCCGGCGCCGCGGCCTCTTCGGCTGCGGAC\n" + + "CCTGCCATTCCGGAGGAGGTGTGGAATATCAAACAAATGATTAAGTTGACACAGGAACAT\n" + + "ATAGAGGCCCTATTGGACAAATTTGGTGGGGAGCATAATCCACCATCAATATATCTGGAG\n" + + "GCCTATGAAGAATACACCAGCAAGCTAGATGCACTCCAACAAAGAGAACAACAGTTATTG\n" + + "GAATCTCTGGGGAACGGAACTGATTTTTCTGTTTCTAGCTCTGCATCAATGGATACCGTT\n" + + "ACATCTTCTTCCTCTTCTAGCCTTTCAGTGCTACCTTCATCTCTTTCAGTTTTTCAAAAT\n" + + "CCCACAGATGTGGCACGGAGCAACCCCAAGTCACCACAAAAACCTATCGTTAGAGTCTTC\n" + + "CTGCCCAACAAACAGAGGACAGTGGTACCTGCAAGGTGTGGAGTTACAGTCCGAGACAGT\n" + + "CTAAAGAAAGCACTGATGATGAGAGGTCTAATCCCAGAGTGCTGTGCTGTTTACAGAATT\n" + + "CAGGATGGAGAGAAGAAACCAATTGGTTGGGACACTGATATTTCCTGGCTTACTGGAGAA\n" + + "GAATTGCATGTGGAAGTGTTGGAGAATGTTCCACTTACAACACACAACTTTGTACGAAAA\n" + + "ACGTTTTTCACCTTAGCATTTTGTGACTTTTGTCGAAAGCTGCTTTTCCAGGGTTTCCGC\n" + + "TGTCAAACATGTGGTTATAAATTTCACCAGCGTTGTAGTACAGAAGTTCCACTGATGTGT\n" + + "GTTAATTATGACCAACTTGATTTGCTGTTTGTCTCCAAGTTCTTTGAACACCACCCAATA\n" + + "CCACAGGAAGAGGCGTCCTTAGCAGAGACTGCCCTAACATCTGGATCATCCCCTTCCGCA\n" + + "CCCGCCTCGGACTCTATTGGGCCCCAAATTCTCACCAGTCCGTCTCCTTCAAAATCCATT\n" + + "CCAATTCCACAGCCCTTCCGACCAGCAGATGAAGATCATCGAAATCAATTTGGGCAACGA\n" + + "GACCGATCCTCATCAGCTCCCAATGTGCATATAAACACAATAGAACCTGTCAATATTGAT\n" + + "GACTTGATTAGAGACCAAGGATTTCGTGGTGATGGAGGATCAACCACAGGTTTGTCTGCT\n" + + "ACCCCCCCTGCCTCATTACCTGGCTCACTAACTAACGTGAAAGCCTTACAGAAATCTCCA\n" + + "GGACCTCAGCGAGAAAGGAAGTCATCTTCATCCTCAGAAGACAGGAATCGAATGAAAACA\n" + + "CTTGGTAGACGGGACTCGAGTGATGATTGGGAGATTCCTGATGGGCAGATTACAGTGGGA\n" + + "CAAAGAATTGGATCTGGATCATTTGGAACAGTCTACAAGGGAAAGTGGCATGGTGATGTG\n" + + "GCAGTGAAAATGTTGAATGTGACAGCACCTACACCTCAGCAGTTACAAGCCTTCAAAAAT\n" + + "GAAGTAGGAGTACTCAGGAAAACACGACATGTGAATATCCTACTCTTCATGGGCTATTCC\n" + + "ACAAAGCCACAACTGGCTATTGTTACCCAGTGGTGTGAGGGCTCCAGCTTGTATCACCAT\n" + + "CTCCATATCATTGAGACCAAATTTGAGATGATCAAACTTATAGATATTGCACGACAGACT\n" + + "GCACAGGGCATGGATTACTTACACGCCAAGTCAATCATCCACAGAGACCTCAAGAGTAAT\n" + + "AATATATTTCTTCATGAAGACCTCACAGTAAAAATAGGTGATTTTGGTCTAGCTACAGTG\n" + + "AAATCTCGATGGAGTGGGTCCCATCAGTTTGAACAGTTGTCTGGATCCATTTTGTGGATG\n" + + "GCACCAGAAGTCATCAGAATGCAAGATAAAAATCCATACAGCTTTCAGTCAGATGTATAT\n" + + "GCATTTGGAATTGTTCTGTATGAATTGATGACTGGACAGTTACCTTATTCAAACATCAAC\n" + + "AACAGGGACCAGATAATTTTTATGGTGGGACGAGGATACCTGTCTCCAGATCTCAGTAAG\n" + + "GTACGGAGTAACTGTCCAAAAGCCATGAAGAGATTAATGGCAGAGTGCCTCAAAAAGAAA\n" + + "AGAGATGAGAGACCACTCTTTCCCCAAATTCTCGCCTCTATTGAGCTGCTGGCCCGCTCA\n" + + "TTGCCAAAAATTCACCGCAGTGCATCAGAACCCTCCTTGAATCGGGCTGGTTTCCAAACA\n" + + "GAGGATTTTAGTCTATATGCTTGTGCTTCTCCAAAAACACCCATCCAGGCAGGGGGATAT\n" + + "GGTGCGTTTCCTGTCCACTGA" } }; + + @Test(dataProvider = "ens_seqs", suiteName = "live") + public void testGetOneSeqs(EnsemblSeqType type, String sq, String fastasq) + throws Exception + { + SeqFetcher sf = new SeqFetcher(); + FileParse fp = sf.getSequenceReader(type, Arrays.asList(new String[] + { sq })); + SequenceI[] sqs = new FastaFile(fp).getSeqsAsArray(); + FastaFile trueRes = new FastaFile(fastasq, AppletFormatAdapter.PASTE); + SequenceI[] trueSqs = trueRes.getSeqsAsArray(); + Assert.assertEquals(sqs.length, trueSqs.length, + "Different number of sequences retrieved for query " + sq); + Alignment ral = new Alignment(sqs); + for (SequenceI tr : trueSqs) + { + SequenceI[] rseq; + Assert.assertNotNull( + rseq = ral.findSequenceMatch(tr.getName()), + "Couldn't find sequences matching expected sequence " + + tr.getName()); + Assert.assertEquals(rseq.length, 1, + "Expected only one sequence for sequence ID " + tr.getName()); + Assert.assertEquals( + rseq[0].getSequenceAsString(), + tr.getSequenceAsString(), + "Sequences differ for " + tr.getName() + "\n" + "Exp:" + + tr.getSequenceAsString() + "\n" + "Got:" + + rseq[0].getSequenceAsString()); + } + } +} \ No newline at end of file -- 1.7.10.2