1 package jalview.ext.ensembl;
3 import static org.testng.AssertJUnit.assertEquals;
5 import jalview.datamodel.Alignment;
6 import jalview.datamodel.AlignmentI;
7 import jalview.datamodel.SequenceI;
8 import jalview.io.AppletFormatAdapter;
9 import jalview.io.FastaFile;
10 import jalview.io.FileParse;
12 import java.lang.reflect.Method;
13 import java.net.MalformedURLException;
15 import java.util.Arrays;
16 import java.util.List;
18 import org.testng.Assert;
19 import org.testng.annotations.DataProvider;
20 import org.testng.annotations.Test;
23 public class EnsemblSeqProxyTest
25 private static final Object[][] allSeqs = new Object[][] {
30 + "MAALSGGGGGGAEPGQALFNGDMEPEAGAGAGAAASSAADPAIPEEVWNIKQMIKLTQEH\n"
31 + "IEALLDKFGGEHNPPSIYLEAYEEYTSKLDALQQREQQLLESLGNGTDFSVSSSASMDTV\n"
32 + "TSSSSSSLSVLPSSLSVFQNPTDVARSNPKSPQKPIVRVFLPNKQRTVVPARCGVTVRDS\n"
33 + "LKKALMMRGLIPECCAVYRIQDGEKKPIGWDTDISWLTGEELHVEVLENVPLTTHNFVRK\n"
34 + "TFFTLAFCDFCRKLLFQGFRCQTCGYKFHQRCSTEVPLMCVNYDQLDLLFVSKFFEHHPI\n"
35 + "PQEEASLAETALTSGSSPSAPASDSIGPQILTSPSPSKSIPIPQPFRPADEDHRNQFGQR\n"
36 + "DRSSSAPNVHINTIEPVNIDDLIRDQGFRGDGGSTTGLSATPPASLPGSLTNVKALQKSP\n"
37 + "GPQRERKSSSSSEDRNRMKTLGRRDSSDDWEIPDGQITVGQRIGSGSFGTVYKGKWHGDV\n"
38 + "AVKMLNVTAPTPQQLQAFKNEVGVLRKTRHVNILLFMGYSTKPQLAIVTQWCEGSSLYHH\n"
39 + "LHIIETKFEMIKLIDIARQTAQGMDYLHAKSIIHRDLKSNNIFLHEDLTVKIGDFGLATV\n"
40 + "KSRWSGSHQFEQLSGSILWMAPEVIRMQDKNPYSFQSDVYAFGIVLYELMTGQLPYSNIN\n"
41 + "NRDQIIFMVGRGYLSPDLSKVRSNCPKAMKRLMAECLKKKRDERPLFPQILASIELLARS\n"
42 + "LPKIHRSASEPSLNRAGFQTEDFSLYACASPKTPIQAGGYGAFPVH\n" },
47 + "ATGGCGGCGCTGAGCGGTGGCGGTGGTGGCGGCGCGGAGCCGGGCCAGGCTCTGTTCAAC\n"
48 + "GGGGACATGGAGCCCGAGGCCGGCGCCGGCGCCGGCGCCGCGGCCTCTTCGGCTGCGGAC\n"
49 + "CCTGCCATTCCGGAGGAGGTGTGGAATATCAAACAAATGATTAAGTTGACACAGGAACAT\n"
50 + "ATAGAGGCCCTATTGGACAAATTTGGTGGGGAGCATAATCCACCATCAATATATCTGGAG\n"
51 + "GCCTATGAAGAATACACCAGCAAGCTAGATGCACTCCAACAAAGAGAACAACAGTTATTG\n"
52 + "GAATCTCTGGGGAACGGAACTGATTTTTCTGTTTCTAGCTCTGCATCAATGGATACCGTT\n"
53 + "ACATCTTCTTCCTCTTCTAGCCTTTCAGTGCTACCTTCATCTCTTTCAGTTTTTCAAAAT\n"
54 + "CCCACAGATGTGGCACGGAGCAACCCCAAGTCACCACAAAAACCTATCGTTAGAGTCTTC\n"
55 + "CTGCCCAACAAACAGAGGACAGTGGTACCTGCAAGGTGTGGAGTTACAGTCCGAGACAGT\n"
56 + "CTAAAGAAAGCACTGATGATGAGAGGTCTAATCCCAGAGTGCTGTGCTGTTTACAGAATT\n"
57 + "CAGGATGGAGAGAAGAAACCAATTGGTTGGGACACTGATATTTCCTGGCTTACTGGAGAA\n"
58 + "GAATTGCATGTGGAAGTGTTGGAGAATGTTCCACTTACAACACACAACTTTGTACGAAAA\n"
59 + "ACGTTTTTCACCTTAGCATTTTGTGACTTTTGTCGAAAGCTGCTTTTCCAGGGTTTCCGC\n"
60 + "TGTCAAACATGTGGTTATAAATTTCACCAGCGTTGTAGTACAGAAGTTCCACTGATGTGT\n"
61 + "GTTAATTATGACCAACTTGATTTGCTGTTTGTCTCCAAGTTCTTTGAACACCACCCAATA\n"
62 + "CCACAGGAAGAGGCGTCCTTAGCAGAGACTGCCCTAACATCTGGATCATCCCCTTCCGCA\n"
63 + "CCCGCCTCGGACTCTATTGGGCCCCAAATTCTCACCAGTCCGTCTCCTTCAAAATCCATT\n"
64 + "CCAATTCCACAGCCCTTCCGACCAGCAGATGAAGATCATCGAAATCAATTTGGGCAACGA\n"
65 + "GACCGATCCTCATCAGCTCCCAATGTGCATATAAACACAATAGAACCTGTCAATATTGAT\n"
66 + "GACTTGATTAGAGACCAAGGATTTCGTGGTGATGGAGGATCAACCACAGGTTTGTCTGCT\n"
67 + "ACCCCCCCTGCCTCATTACCTGGCTCACTAACTAACGTGAAAGCCTTACAGAAATCTCCA\n"
68 + "GGACCTCAGCGAGAAAGGAAGTCATCTTCATCCTCAGAAGACAGGAATCGAATGAAAACA\n"
69 + "CTTGGTAGACGGGACTCGAGTGATGATTGGGAGATTCCTGATGGGCAGATTACAGTGGGA\n"
70 + "CAAAGAATTGGATCTGGATCATTTGGAACAGTCTACAAGGGAAAGTGGCATGGTGATGTG\n"
71 + "GCAGTGAAAATGTTGAATGTGACAGCACCTACACCTCAGCAGTTACAAGCCTTCAAAAAT\n"
72 + "GAAGTAGGAGTACTCAGGAAAACACGACATGTGAATATCCTACTCTTCATGGGCTATTCC\n"
73 + "ACAAAGCCACAACTGGCTATTGTTACCCAGTGGTGTGAGGGCTCCAGCTTGTATCACCAT\n"
74 + "CTCCATATCATTGAGACCAAATTTGAGATGATCAAACTTATAGATATTGCACGACAGACT\n"
75 + "GCACAGGGCATGGATTACTTACACGCCAAGTCAATCATCCACAGAGACCTCAAGAGTAAT\n"
76 + "AATATATTTCTTCATGAAGACCTCACAGTAAAAATAGGTGATTTTGGTCTAGCTACAGTG\n"
77 + "AAATCTCGATGGAGTGGGTCCCATCAGTTTGAACAGTTGTCTGGATCCATTTTGTGGATG\n"
78 + "GCACCAGAAGTCATCAGAATGCAAGATAAAAATCCATACAGCTTTCAGTCAGATGTATAT\n"
79 + "GCATTTGGAATTGTTCTGTATGAATTGATGACTGGACAGTTACCTTATTCAAACATCAAC\n"
80 + "AACAGGGACCAGATAATTTTTATGGTGGGACGAGGATACCTGTCTCCAGATCTCAGTAAG\n"
81 + "GTACGGAGTAACTGTCCAAAAGCCATGAAGAGATTAATGGCAGAGTGCCTCAAAAAGAAA\n"
82 + "AGAGATGAGAGACCACTCTTTCCCCAAATTCTCGCCTCTATTGAGCTGCTGGCCCGCTCA\n"
83 + "TTGCCAAAAATTCACCGCAGTGCATCAGAACCCTCCTTGAATCGGGCTGGTTTCCAAACA\n"
84 + "GAGGATTTTAGTCTATATGCTTGTGCTTCTCCAAAAACACCCATCCAGGCAGGGGGATAT\n"
85 + "GGTGCGTTTCCTGTCCACTGA\n" },
90 + "MAALSGGGGGGAEPGQALFNGDMEPEAGAGAGAAASSAADPAIPEEVWNIKQMIKLTQEH\n"
91 + "IEALLDKFGGEHNPPSIYLEAYEEYTSKLDALQQREQQLLESLGNGTDFSVSSSASMDTV\n"
92 + "TSSSSSSLSVLPSSLSVFQNPTDVARSNPKSPQKPIVRVFLPNKQRTVVPARCGVTVRDS\n"
93 + "LKKALMMRGLIPECCAVYRIQDGEKKPIGWDTDISWLTGEELHVEVLENVPLTTHNFVRK\n"
94 + "TFFTLAFCDFCRKLLFQGFRCQTCGYKFHQRCSTEVPLMCVNYDQLDLLFVSKFFEHHPI\n"
95 + "PQEEASLAETALTSGSSPSAPASDSIGPQILTSPSPSKSIPIPQPFRPADEDHRNQFGQR\n"
96 + "DRSSSAPNVHINTIEPVNIDDLIRDQGFRGDGGSTTGLSATPPASLPGSLTNVKALQKSP\n"
97 + "GPQRERKSSSSSEDRNRMKTLGRRDSSDDWEIPDGQITVGQRIGSGSFGTVYKGKWHGDV\n"
98 + "AVKMLNVTAPTPQQLQAFKNEVGVLRKTRHVNILLFMGYSTKPQLAIVTQWCEGSSLYHH\n"
99 + "LHIIETKFEMIKLIDIARQTAQGMDYLHAKSIIHRDLKSNNIFLHEDLTVKIGDFGLATV\n"
100 + "KSRWSGSHQFEQLSGSILWMAPEVIRMQDKNPYSFQSDVYAFGIVLYELMTGQLPYSNIN\n"
101 + "NRDQIIFMVGRGYLSPDLSKVRSNCPKAMKRLMAECLKKKRDERPLFPQILASIELLARS\n"
102 + "LPKIHRSASEPSLNRAGFQTEDFSLYACASPKTPIQAGGYGAFPVH" } };
104 @DataProvider(name = "queries")
105 public Object[][] createQueryData(Method m)
107 return new Object[][] { { "CCDS5863.1" }, { "ENSP00000288602" } };
110 @Test(dataProvider = "queries")
111 public void testIsValidReference(String query) throws Exception
113 EnsemblSequenceFetcher esq = new EnsemblProtein();
114 Assert.assertTrue(esq.isValidReference(query),
115 "Expected reference string " + query
116 + " to be valid for regex "
117 + esq.getAccessionValidator().toString());
120 @DataProvider(name = "ens_seqs")
121 public Object[][] createData(Method m)
123 System.out.println(m.getName());
127 @Test(dataProvider = "ens_seqs", suiteName = "live")
128 public void testGetOneSeqs(EnsemblRestClient proxy, String sq, String fastasq)
131 FileParse fp = proxy.getSequenceReader(Arrays
134 SequenceI[] sqs = new FastaFile(fp).getSeqsAsArray();
135 FastaFile trueRes = new FastaFile(fastasq, AppletFormatAdapter.PASTE);
136 SequenceI[] trueSqs = trueRes.getSeqsAsArray();
137 Assert.assertEquals(sqs.length, trueSqs.length,
138 "Different number of sequences retrieved for query " + sq);
139 Alignment ral = new Alignment(sqs);
140 for (SequenceI tr : trueSqs)
143 Assert.assertNotNull(
144 rseq = ral.findSequenceMatch(tr.getName()),
145 "Couldn't find sequences matching expected sequence "
147 Assert.assertEquals(rseq.length, 1,
148 "Expected only one sequence for sequence ID " + tr.getName());
150 rseq[0].getSequenceAsString(),
151 tr.getSequenceAsString(),
152 "Sequences differ for " + tr.getName() + "\n" + "Exp:"
153 + tr.getSequenceAsString() + "\n" + "Got:"
154 + rseq[0].getSequenceAsString());
159 @Test(suiteName = "live")
160 public void testLiveCheckEnsembl()
162 EnsemblRestClient sf = new EnsemblRestClient()
166 public String getDbName()
168 // TODO Auto-generated method stub
173 public AlignmentI getSequenceRecords(String queries) throws Exception
175 // TODO Auto-generated method stub
180 protected URL getUrl(List<String> ids) throws MalformedURLException
182 // TODO Auto-generated method stub
187 protected boolean useGetRequest()
189 // TODO Auto-generated method stub
194 protected String getRequestMimeType(boolean b)
196 // TODO Auto-generated method stub
201 protected String getResponseMimeType()
203 // TODO Auto-generated method stub
208 boolean isAvailable = sf.isEnsemblAvailable();
209 System.out.println("Ensembl is "
210 + (isAvailable ? "UP!"
211 : "DOWN or unreachable ******************* BAD!"));
215 * Tests for the method that computes all peptide variants given codon
218 @Test(groups = "Functional")
219 public void testComputePeptideVariants()
221 String[][] codonVariants = new String[][] { { "A" }, { "G" }, { "T" } };
224 * AGT codes for S - this is not included in the variants returned
226 List<String> variants = EnsemblSeqProxy.computePeptideVariants(codonVariants, "S");
227 assertEquals("[]", variants.toString());
229 // S is reported if it differs from the current value (A):
230 variants = EnsemblSeqProxy.computePeptideVariants(codonVariants, "A");
231 assertEquals("[S]", variants.toString());
234 * synonymous variant is not reported
236 codonVariants = new String[][] { { "A" }, { "G" }, { "C", "T" } };
237 // AGC and AGT both code for S
238 variants = EnsemblSeqProxy.computePeptideVariants(codonVariants, "s");
239 assertEquals("[]", variants.toString());
242 * equivalent variants are only reported once
244 codonVariants = new String[][] { { "C" }, { "T" },
245 { "A", "C", "G", "T" } };
246 // CTA CTC CTG CTT all code for L
247 variants = EnsemblSeqProxy.computePeptideVariants(codonVariants, "S");
248 assertEquals("[L]", variants.toString());
251 * vary codons 1 and 2; variant products are sorted and non-redundant
253 codonVariants = new String[][] { { "a", "C" }, { "g", "T" }, { "A" } };
254 // aga ata cga cta code for R, I, R, L
255 variants = EnsemblSeqProxy.computePeptideVariants(codonVariants, "S");
256 assertEquals("[I, L, R]", variants.toString());
259 * vary codons 2 and 3
261 codonVariants = new String[][] { { "a" }, { "g", "T" }, { "A", "c" } };
262 // aga agc ata atc code for R, S, I, I
263 variants = EnsemblSeqProxy.computePeptideVariants(codonVariants, "S");
264 assertEquals("[I, R]", variants.toString());
267 * vary codons 1 and 3
269 codonVariants = new String[][] { { "a", "t" }, { "a" }, { "t", "g" } };
270 // aat aag tat tag code for N, K, Y, STOP - STOP sorted to end
271 variants = EnsemblSeqProxy.computePeptideVariants(codonVariants, "S");
272 assertEquals("[K, N, Y, STOP]", variants.toString());
275 * vary codons 1, 2 and 3
277 codonVariants = new String[][] { { "a", "t" }, { "G", "C" },
279 // agt agg act acg tgt tgg tct tcg code for S, R, T, T, C, W, S, S
280 variants = EnsemblSeqProxy.computePeptideVariants(codonVariants, "S");
281 assertEquals("[C, R, T, W]", variants.toString());