3ca74b06ddacd399f6d141ed485e0b3369734728
[jalview.git] / test / jalview / ext / ensembl / EnsemblSeqProxyTest.java
1 package jalview.ext.ensembl;
2
3 import jalview.datamodel.Alignment;
4 import jalview.datamodel.AlignmentI;
5 import jalview.datamodel.SequenceI;
6 import jalview.io.AppletFormatAdapter;
7 import jalview.io.FastaFile;
8 import jalview.io.FileParse;
9
10 import java.lang.reflect.Method;
11 import java.net.MalformedURLException;
12 import java.net.URL;
13 import java.util.Arrays;
14 import java.util.List;
15
16 import org.testng.Assert;
17 import org.testng.annotations.DataProvider;
18 import org.testng.annotations.Test;
19
20
21 public class EnsemblSeqProxyTest
22 {
23   private static final Object[][] allSeqs = new Object[][] {
24       {
25           new EnsemblProtein(),
26           "CCDS5863.1",
27           ">CCDS5863.1\n"
28                   + "MAALSGGGGGGAEPGQALFNGDMEPEAGAGAGAAASSAADPAIPEEVWNIKQMIKLTQEH\n"
29                   + "IEALLDKFGGEHNPPSIYLEAYEEYTSKLDALQQREQQLLESLGNGTDFSVSSSASMDTV\n"
30                   + "TSSSSSSLSVLPSSLSVFQNPTDVARSNPKSPQKPIVRVFLPNKQRTVVPARCGVTVRDS\n"
31                   + "LKKALMMRGLIPECCAVYRIQDGEKKPIGWDTDISWLTGEELHVEVLENVPLTTHNFVRK\n"
32                   + "TFFTLAFCDFCRKLLFQGFRCQTCGYKFHQRCSTEVPLMCVNYDQLDLLFVSKFFEHHPI\n"
33                   + "PQEEASLAETALTSGSSPSAPASDSIGPQILTSPSPSKSIPIPQPFRPADEDHRNQFGQR\n"
34                   + "DRSSSAPNVHINTIEPVNIDDLIRDQGFRGDGGSTTGLSATPPASLPGSLTNVKALQKSP\n"
35                   + "GPQRERKSSSSSEDRNRMKTLGRRDSSDDWEIPDGQITVGQRIGSGSFGTVYKGKWHGDV\n"
36                   + "AVKMLNVTAPTPQQLQAFKNEVGVLRKTRHVNILLFMGYSTKPQLAIVTQWCEGSSLYHH\n"
37                   + "LHIIETKFEMIKLIDIARQTAQGMDYLHAKSIIHRDLKSNNIFLHEDLTVKIGDFGLATV\n"
38                   + "KSRWSGSHQFEQLSGSILWMAPEVIRMQDKNPYSFQSDVYAFGIVLYELMTGQLPYSNIN\n"
39                   + "NRDQIIFMVGRGYLSPDLSKVRSNCPKAMKRLMAECLKKKRDERPLFPQILASIELLARS\n"
40                   + "LPKIHRSASEPSLNRAGFQTEDFSLYACASPKTPIQAGGYGAFPVH\n" },
41       {
42           new EnsemblCdna(),
43           "CCDS5863.1",
44           ">CCDS5863.1\n"
45                   + "ATGGCGGCGCTGAGCGGTGGCGGTGGTGGCGGCGCGGAGCCGGGCCAGGCTCTGTTCAAC\n"
46                   + "GGGGACATGGAGCCCGAGGCCGGCGCCGGCGCCGGCGCCGCGGCCTCTTCGGCTGCGGAC\n"
47                   + "CCTGCCATTCCGGAGGAGGTGTGGAATATCAAACAAATGATTAAGTTGACACAGGAACAT\n"
48                   + "ATAGAGGCCCTATTGGACAAATTTGGTGGGGAGCATAATCCACCATCAATATATCTGGAG\n"
49                   + "GCCTATGAAGAATACACCAGCAAGCTAGATGCACTCCAACAAAGAGAACAACAGTTATTG\n"
50                   + "GAATCTCTGGGGAACGGAACTGATTTTTCTGTTTCTAGCTCTGCATCAATGGATACCGTT\n"
51                   + "ACATCTTCTTCCTCTTCTAGCCTTTCAGTGCTACCTTCATCTCTTTCAGTTTTTCAAAAT\n"
52                   + "CCCACAGATGTGGCACGGAGCAACCCCAAGTCACCACAAAAACCTATCGTTAGAGTCTTC\n"
53                   + "CTGCCCAACAAACAGAGGACAGTGGTACCTGCAAGGTGTGGAGTTACAGTCCGAGACAGT\n"
54                   + "CTAAAGAAAGCACTGATGATGAGAGGTCTAATCCCAGAGTGCTGTGCTGTTTACAGAATT\n"
55                   + "CAGGATGGAGAGAAGAAACCAATTGGTTGGGACACTGATATTTCCTGGCTTACTGGAGAA\n"
56                   + "GAATTGCATGTGGAAGTGTTGGAGAATGTTCCACTTACAACACACAACTTTGTACGAAAA\n"
57                   + "ACGTTTTTCACCTTAGCATTTTGTGACTTTTGTCGAAAGCTGCTTTTCCAGGGTTTCCGC\n"
58                   + "TGTCAAACATGTGGTTATAAATTTCACCAGCGTTGTAGTACAGAAGTTCCACTGATGTGT\n"
59                   + "GTTAATTATGACCAACTTGATTTGCTGTTTGTCTCCAAGTTCTTTGAACACCACCCAATA\n"
60                   + "CCACAGGAAGAGGCGTCCTTAGCAGAGACTGCCCTAACATCTGGATCATCCCCTTCCGCA\n"
61                   + "CCCGCCTCGGACTCTATTGGGCCCCAAATTCTCACCAGTCCGTCTCCTTCAAAATCCATT\n"
62                   + "CCAATTCCACAGCCCTTCCGACCAGCAGATGAAGATCATCGAAATCAATTTGGGCAACGA\n"
63                   + "GACCGATCCTCATCAGCTCCCAATGTGCATATAAACACAATAGAACCTGTCAATATTGAT\n"
64                   + "GACTTGATTAGAGACCAAGGATTTCGTGGTGATGGAGGATCAACCACAGGTTTGTCTGCT\n"
65                   + "ACCCCCCCTGCCTCATTACCTGGCTCACTAACTAACGTGAAAGCCTTACAGAAATCTCCA\n"
66                   + "GGACCTCAGCGAGAAAGGAAGTCATCTTCATCCTCAGAAGACAGGAATCGAATGAAAACA\n"
67                   + "CTTGGTAGACGGGACTCGAGTGATGATTGGGAGATTCCTGATGGGCAGATTACAGTGGGA\n"
68                   + "CAAAGAATTGGATCTGGATCATTTGGAACAGTCTACAAGGGAAAGTGGCATGGTGATGTG\n"
69                   + "GCAGTGAAAATGTTGAATGTGACAGCACCTACACCTCAGCAGTTACAAGCCTTCAAAAAT\n"
70                   + "GAAGTAGGAGTACTCAGGAAAACACGACATGTGAATATCCTACTCTTCATGGGCTATTCC\n"
71                   + "ACAAAGCCACAACTGGCTATTGTTACCCAGTGGTGTGAGGGCTCCAGCTTGTATCACCAT\n"
72                   + "CTCCATATCATTGAGACCAAATTTGAGATGATCAAACTTATAGATATTGCACGACAGACT\n"
73                   + "GCACAGGGCATGGATTACTTACACGCCAAGTCAATCATCCACAGAGACCTCAAGAGTAAT\n"
74                   + "AATATATTTCTTCATGAAGACCTCACAGTAAAAATAGGTGATTTTGGTCTAGCTACAGTG\n"
75                   + "AAATCTCGATGGAGTGGGTCCCATCAGTTTGAACAGTTGTCTGGATCCATTTTGTGGATG\n"
76                   + "GCACCAGAAGTCATCAGAATGCAAGATAAAAATCCATACAGCTTTCAGTCAGATGTATAT\n"
77                   + "GCATTTGGAATTGTTCTGTATGAATTGATGACTGGACAGTTACCTTATTCAAACATCAAC\n"
78                   + "AACAGGGACCAGATAATTTTTATGGTGGGACGAGGATACCTGTCTCCAGATCTCAGTAAG\n"
79                   + "GTACGGAGTAACTGTCCAAAAGCCATGAAGAGATTAATGGCAGAGTGCCTCAAAAAGAAA\n"
80                   + "AGAGATGAGAGACCACTCTTTCCCCAAATTCTCGCCTCTATTGAGCTGCTGGCCCGCTCA\n"
81                   + "TTGCCAAAAATTCACCGCAGTGCATCAGAACCCTCCTTGAATCGGGCTGGTTTCCAAACA\n"
82                   + "GAGGATTTTAGTCTATATGCTTGTGCTTCTCCAAAAACACCCATCCAGGCAGGGGGATAT\n"
83                   + "GGTGCGTTTCCTGTCCACTGA\n" },
84       {
85           new EnsemblProtein(),
86           "ENSP00000288602",
87           ">ENSP00000288602\n"
88                   + "MAALSGGGGGGAEPGQALFNGDMEPEAGAGAGAAASSAADPAIPEEVWNIKQMIKLTQEH\n"
89                   + "IEALLDKFGGEHNPPSIYLEAYEEYTSKLDALQQREQQLLESLGNGTDFSVSSSASMDTV\n"
90                   + "TSSSSSSLSVLPSSLSVFQNPTDVARSNPKSPQKPIVRVFLPNKQRTVVPARCGVTVRDS\n"
91                   + "LKKALMMRGLIPECCAVYRIQDGEKKPIGWDTDISWLTGEELHVEVLENVPLTTHNFVRK\n"
92                   + "TFFTLAFCDFCRKLLFQGFRCQTCGYKFHQRCSTEVPLMCVNYDQLDLLFVSKFFEHHPI\n"
93                   + "PQEEASLAETALTSGSSPSAPASDSIGPQILTSPSPSKSIPIPQPFRPADEDHRNQFGQR\n"
94                   + "DRSSSAPNVHINTIEPVNIDDLIRDQGFRGDGGSTTGLSATPPASLPGSLTNVKALQKSP\n"
95                   + "GPQRERKSSSSSEDRNRMKTLGRRDSSDDWEIPDGQITVGQRIGSGSFGTVYKGKWHGDV\n"
96                   + "AVKMLNVTAPTPQQLQAFKNEVGVLRKTRHVNILLFMGYSTKPQLAIVTQWCEGSSLYHH\n"
97                   + "LHIIETKFEMIKLIDIARQTAQGMDYLHAKSIIHRDLKSNNIFLHEDLTVKIGDFGLATV\n"
98                   + "KSRWSGSHQFEQLSGSILWMAPEVIRMQDKNPYSFQSDVYAFGIVLYELMTGQLPYSNIN\n"
99                   + "NRDQIIFMVGRGYLSPDLSKVRSNCPKAMKRLMAECLKKKRDERPLFPQILASIELLARS\n"
100                   + "LPKIHRSASEPSLNRAGFQTEDFSLYACASPKTPIQAGGYGAFPVH" } };
101
102   @DataProvider(name = "queries")
103   public Object[][] createQueryData(Method m)
104   {
105     return new Object[][] { { "CCDS5863.1" }, { "ENSP00000288602" } };
106   }
107
108   @Test(dataProvider = "queries")
109   public void testIsValidReference(String query) throws Exception
110   {
111     EnsemblSequenceFetcher esq = new EnsemblProtein();
112     Assert.assertTrue(esq.isValidReference(query),
113             "Expected reference string " + query
114                     + " to be valid for regex "
115                     + esq.getAccessionValidator().toString());
116   }
117
118   @DataProvider(name = "ens_seqs")
119   public Object[][] createData(Method m)
120   {
121     System.out.println(m.getName());
122     return allSeqs;
123   }
124
125   @Test(dataProvider = "ens_seqs", suiteName = "live")
126   public void testGetOneSeqs(EnsemblRestClient proxy, String sq, String fastasq)
127           throws Exception
128   {
129     FileParse fp = proxy.getSequenceReader(Arrays
130             .asList(new String[]
131     { sq }));
132     SequenceI[] sqs = new FastaFile(fp).getSeqsAsArray();
133     FastaFile trueRes = new FastaFile(fastasq, AppletFormatAdapter.PASTE);
134     SequenceI[] trueSqs = trueRes.getSeqsAsArray();
135     Assert.assertEquals(sqs.length, trueSqs.length,
136             "Different number of sequences retrieved for query " + sq);
137     Alignment ral = new Alignment(sqs);
138     for (SequenceI tr : trueSqs)
139     {
140       SequenceI[] rseq;
141       Assert.assertNotNull(
142               rseq = ral.findSequenceMatch(tr.getName()),
143               "Couldn't find sequences matching expected sequence "
144                       + tr.getName());
145       Assert.assertEquals(rseq.length, 1,
146               "Expected only one sequence for sequence ID " + tr.getName());
147       Assert.assertEquals(
148               rseq[0].getSequenceAsString(),
149               tr.getSequenceAsString(),
150               "Sequences differ for " + tr.getName() + "\n" + "Exp:"
151                       + tr.getSequenceAsString() + "\n" + "Got:"
152                       + rseq[0].getSequenceAsString());
153   
154     }
155   }
156
157   @Test(suiteName = "live")
158   public void testLiveCheckEnsembl()
159   {
160     EnsemblRestClient sf = new EnsemblRestClient()
161     {
162
163       @Override
164       public String getDbName()
165       {
166         // TODO Auto-generated method stub
167         return null;
168       }
169
170       @Override
171       public AlignmentI getSequenceRecords(String queries) throws Exception
172       {
173         // TODO Auto-generated method stub
174         return null;
175       }
176
177       @Override
178       protected URL getUrl(List<String> ids) throws MalformedURLException
179       {
180         // TODO Auto-generated method stub
181         return null;
182       }
183
184       @Override
185       public boolean useGetRequest()
186       {
187         // TODO Auto-generated method stub
188         return false;
189       }
190
191       @Override
192       public String getRequestMimeType()
193       {
194         // TODO Auto-generated method stub
195         return null;
196       }
197
198       @Override
199       public String getResponseMimeType()
200       {
201         // TODO Auto-generated method stub
202         return null;
203       }
204
205     };
206     boolean isAvailable = sf.isEnsemblAvailable();
207     System.out.println("Ensembl is "
208             + (isAvailable ? "UP!"
209                     : "DOWN or unreachable ******************* BAD!"));
210   }
211 }