JAL-1705 refactoring etc for fetching Ensembl --> Uniprot
[jalview.git] / test / jalview / ext / ensembl / EnsemblSeqProxyTest.java
1 package jalview.ext.ensembl;
2
3 import static org.testng.AssertJUnit.assertEquals;
4 import static org.testng.AssertJUnit.assertFalse;
5 import static org.testng.AssertJUnit.assertTrue;
6
7 import jalview.analysis.AlignmentUtils;
8 import jalview.datamodel.Alignment;
9 import jalview.datamodel.AlignmentI;
10 import jalview.datamodel.SequenceI;
11 import jalview.io.AppletFormatAdapter;
12 import jalview.io.FastaFile;
13 import jalview.io.FileParse;
14 import jalview.io.gff.SequenceOntologyFactory;
15 import jalview.io.gff.SequenceOntologyLite;
16
17 import java.lang.reflect.Method;
18 import java.net.MalformedURLException;
19 import java.net.URL;
20 import java.util.Arrays;
21 import java.util.List;
22
23 import org.testng.Assert;
24 import org.testng.annotations.AfterClass;
25 import org.testng.annotations.BeforeClass;
26 import org.testng.annotations.DataProvider;
27 import org.testng.annotations.Test;
28
29
30 public class EnsemblSeqProxyTest
31 {
32   private static final Object[][] allSeqs = new Object[][] {
33       {
34           new EnsemblProtein(),
35           "CCDS5863.1",
36           ">CCDS5863.1\n"
37                   + "MAALSGGGGGGAEPGQALFNGDMEPEAGAGAGAAASSAADPAIPEEVWNIKQMIKLTQEH\n"
38                   + "IEALLDKFGGEHNPPSIYLEAYEEYTSKLDALQQREQQLLESLGNGTDFSVSSSASMDTV\n"
39                   + "TSSSSSSLSVLPSSLSVFQNPTDVARSNPKSPQKPIVRVFLPNKQRTVVPARCGVTVRDS\n"
40                   + "LKKALMMRGLIPECCAVYRIQDGEKKPIGWDTDISWLTGEELHVEVLENVPLTTHNFVRK\n"
41                   + "TFFTLAFCDFCRKLLFQGFRCQTCGYKFHQRCSTEVPLMCVNYDQLDLLFVSKFFEHHPI\n"
42                   + "PQEEASLAETALTSGSSPSAPASDSIGPQILTSPSPSKSIPIPQPFRPADEDHRNQFGQR\n"
43                   + "DRSSSAPNVHINTIEPVNIDDLIRDQGFRGDGGSTTGLSATPPASLPGSLTNVKALQKSP\n"
44                   + "GPQRERKSSSSSEDRNRMKTLGRRDSSDDWEIPDGQITVGQRIGSGSFGTVYKGKWHGDV\n"
45                   + "AVKMLNVTAPTPQQLQAFKNEVGVLRKTRHVNILLFMGYSTKPQLAIVTQWCEGSSLYHH\n"
46                   + "LHIIETKFEMIKLIDIARQTAQGMDYLHAKSIIHRDLKSNNIFLHEDLTVKIGDFGLATV\n"
47                   + "KSRWSGSHQFEQLSGSILWMAPEVIRMQDKNPYSFQSDVYAFGIVLYELMTGQLPYSNIN\n"
48                   + "NRDQIIFMVGRGYLSPDLSKVRSNCPKAMKRLMAECLKKKRDERPLFPQILASIELLARS\n"
49                   + "LPKIHRSASEPSLNRAGFQTEDFSLYACASPKTPIQAGGYGAFPVH\n" },
50       {
51           new EnsemblCdna(),
52           "CCDS5863.1",
53           ">CCDS5863.1\n"
54                   + "ATGGCGGCGCTGAGCGGTGGCGGTGGTGGCGGCGCGGAGCCGGGCCAGGCTCTGTTCAAC\n"
55                   + "GGGGACATGGAGCCCGAGGCCGGCGCCGGCGCCGGCGCCGCGGCCTCTTCGGCTGCGGAC\n"
56                   + "CCTGCCATTCCGGAGGAGGTGTGGAATATCAAACAAATGATTAAGTTGACACAGGAACAT\n"
57                   + "ATAGAGGCCCTATTGGACAAATTTGGTGGGGAGCATAATCCACCATCAATATATCTGGAG\n"
58                   + "GCCTATGAAGAATACACCAGCAAGCTAGATGCACTCCAACAAAGAGAACAACAGTTATTG\n"
59                   + "GAATCTCTGGGGAACGGAACTGATTTTTCTGTTTCTAGCTCTGCATCAATGGATACCGTT\n"
60                   + "ACATCTTCTTCCTCTTCTAGCCTTTCAGTGCTACCTTCATCTCTTTCAGTTTTTCAAAAT\n"
61                   + "CCCACAGATGTGGCACGGAGCAACCCCAAGTCACCACAAAAACCTATCGTTAGAGTCTTC\n"
62                   + "CTGCCCAACAAACAGAGGACAGTGGTACCTGCAAGGTGTGGAGTTACAGTCCGAGACAGT\n"
63                   + "CTAAAGAAAGCACTGATGATGAGAGGTCTAATCCCAGAGTGCTGTGCTGTTTACAGAATT\n"
64                   + "CAGGATGGAGAGAAGAAACCAATTGGTTGGGACACTGATATTTCCTGGCTTACTGGAGAA\n"
65                   + "GAATTGCATGTGGAAGTGTTGGAGAATGTTCCACTTACAACACACAACTTTGTACGAAAA\n"
66                   + "ACGTTTTTCACCTTAGCATTTTGTGACTTTTGTCGAAAGCTGCTTTTCCAGGGTTTCCGC\n"
67                   + "TGTCAAACATGTGGTTATAAATTTCACCAGCGTTGTAGTACAGAAGTTCCACTGATGTGT\n"
68                   + "GTTAATTATGACCAACTTGATTTGCTGTTTGTCTCCAAGTTCTTTGAACACCACCCAATA\n"
69                   + "CCACAGGAAGAGGCGTCCTTAGCAGAGACTGCCCTAACATCTGGATCATCCCCTTCCGCA\n"
70                   + "CCCGCCTCGGACTCTATTGGGCCCCAAATTCTCACCAGTCCGTCTCCTTCAAAATCCATT\n"
71                   + "CCAATTCCACAGCCCTTCCGACCAGCAGATGAAGATCATCGAAATCAATTTGGGCAACGA\n"
72                   + "GACCGATCCTCATCAGCTCCCAATGTGCATATAAACACAATAGAACCTGTCAATATTGAT\n"
73                   + "GACTTGATTAGAGACCAAGGATTTCGTGGTGATGGAGGATCAACCACAGGTTTGTCTGCT\n"
74                   + "ACCCCCCCTGCCTCATTACCTGGCTCACTAACTAACGTGAAAGCCTTACAGAAATCTCCA\n"
75                   + "GGACCTCAGCGAGAAAGGAAGTCATCTTCATCCTCAGAAGACAGGAATCGAATGAAAACA\n"
76                   + "CTTGGTAGACGGGACTCGAGTGATGATTGGGAGATTCCTGATGGGCAGATTACAGTGGGA\n"
77                   + "CAAAGAATTGGATCTGGATCATTTGGAACAGTCTACAAGGGAAAGTGGCATGGTGATGTG\n"
78                   + "GCAGTGAAAATGTTGAATGTGACAGCACCTACACCTCAGCAGTTACAAGCCTTCAAAAAT\n"
79                   + "GAAGTAGGAGTACTCAGGAAAACACGACATGTGAATATCCTACTCTTCATGGGCTATTCC\n"
80                   + "ACAAAGCCACAACTGGCTATTGTTACCCAGTGGTGTGAGGGCTCCAGCTTGTATCACCAT\n"
81                   + "CTCCATATCATTGAGACCAAATTTGAGATGATCAAACTTATAGATATTGCACGACAGACT\n"
82                   + "GCACAGGGCATGGATTACTTACACGCCAAGTCAATCATCCACAGAGACCTCAAGAGTAAT\n"
83                   + "AATATATTTCTTCATGAAGACCTCACAGTAAAAATAGGTGATTTTGGTCTAGCTACAGTG\n"
84                   + "AAATCTCGATGGAGTGGGTCCCATCAGTTTGAACAGTTGTCTGGATCCATTTTGTGGATG\n"
85                   + "GCACCAGAAGTCATCAGAATGCAAGATAAAAATCCATACAGCTTTCAGTCAGATGTATAT\n"
86                   + "GCATTTGGAATTGTTCTGTATGAATTGATGACTGGACAGTTACCTTATTCAAACATCAAC\n"
87                   + "AACAGGGACCAGATAATTTTTATGGTGGGACGAGGATACCTGTCTCCAGATCTCAGTAAG\n"
88                   + "GTACGGAGTAACTGTCCAAAAGCCATGAAGAGATTAATGGCAGAGTGCCTCAAAAAGAAA\n"
89                   + "AGAGATGAGAGACCACTCTTTCCCCAAATTCTCGCCTCTATTGAGCTGCTGGCCCGCTCA\n"
90                   + "TTGCCAAAAATTCACCGCAGTGCATCAGAACCCTCCTTGAATCGGGCTGGTTTCCAAACA\n"
91                   + "GAGGATTTTAGTCTATATGCTTGTGCTTCTCCAAAAACACCCATCCAGGCAGGGGGATAT\n"
92                   + "GGTGCGTTTCCTGTCCACTGA\n" },
93       {
94           new EnsemblProtein(),
95           "ENSP00000288602",
96           ">ENSP00000288602\n"
97                   + "MAALSGGGGGGAEPGQALFNGDMEPEAGAGAGAAASSAADPAIPEEVWNIKQMIKLTQEH\n"
98                   + "IEALLDKFGGEHNPPSIYLEAYEEYTSKLDALQQREQQLLESLGNGTDFSVSSSASMDTV\n"
99                   + "TSSSSSSLSVLPSSLSVFQNPTDVARSNPKSPQKPIVRVFLPNKQRTVVPARCGVTVRDS\n"
100                   + "LKKALMMRGLIPECCAVYRIQDGEKKPIGWDTDISWLTGEELHVEVLENVPLTTHNFVRK\n"
101                   + "TFFTLAFCDFCRKLLFQGFRCQTCGYKFHQRCSTEVPLMCVNYDQLDLLFVSKFFEHHPI\n"
102                   + "PQEEASLAETALTSGSSPSAPASDSIGPQILTSPSPSKSIPIPQPFRPADEDHRNQFGQR\n"
103                   + "DRSSSAPNVHINTIEPVNIDDLIRDQGFRGDGGSTTGLSATPPASLPGSLTNVKALQKSP\n"
104                   + "GPQRERKSSSSSEDRNRMKTLGRRDSSDDWEIPDGQITVGQRIGSGSFGTVYKGKWHGDV\n"
105                   + "AVKMLNVTAPTPQQLQAFKNEVGVLRKTRHVNILLFMGYSTKPQLAIVTQWCEGSSLYHH\n"
106                   + "LHIIETKFEMIKLIDIARQTAQGMDYLHAKSIIHRDLKSNNIFLHEDLTVKIGDFGLATV\n"
107                   + "KSRWSGSHQFEQLSGSILWMAPEVIRMQDKNPYSFQSDVYAFGIVLYELMTGQLPYSNIN\n"
108                   + "NRDQIIFMVGRGYLSPDLSKVRSNCPKAMKRLMAECLKKKRDERPLFPQILASIELLARS\n"
109                   + "LPKIHRSASEPSLNRAGFQTEDFSLYACASPKTPIQAGGYGAFPVH" } };
110
111   @BeforeClass
112   public void setUp()
113   {
114     SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
115   }
116
117   @AfterClass
118   public void tearDown()
119   {
120     SequenceOntologyFactory.setInstance(null);
121   }
122
123   @DataProvider(name = "ens_seqs")
124   public Object[][] createData(Method m)
125   {
126     System.out.println(m.getName());
127     return allSeqs;
128   }
129
130   @Test(dataProvider = "ens_seqs", suiteName = "live")
131   public void testGetOneSeqs(EnsemblRestClient proxy, String sq, String fastasq)
132           throws Exception
133   {
134     FileParse fp = proxy.getSequenceReader(Arrays
135             .asList(new String[]
136     { sq }));
137     SequenceI[] sqs = new FastaFile(fp).getSeqsAsArray();
138     FastaFile trueRes = new FastaFile(fastasq, AppletFormatAdapter.PASTE);
139     SequenceI[] trueSqs = trueRes.getSeqsAsArray();
140     Assert.assertEquals(sqs.length, trueSqs.length,
141             "Different number of sequences retrieved for query " + sq);
142     Alignment ral = new Alignment(sqs);
143     for (SequenceI tr : trueSqs)
144     {
145       SequenceI[] rseq;
146       Assert.assertNotNull(
147               rseq = ral.findSequenceMatch(tr.getName()),
148               "Couldn't find sequences matching expected sequence "
149                       + tr.getName());
150       Assert.assertEquals(rseq.length, 1,
151               "Expected only one sequence for sequence ID " + tr.getName());
152       Assert.assertEquals(
153               rseq[0].getSequenceAsString(),
154               tr.getSequenceAsString(),
155               "Sequences differ for " + tr.getName() + "\n" + "Exp:"
156                       + tr.getSequenceAsString() + "\n" + "Got:"
157                       + rseq[0].getSequenceAsString());
158   
159     }
160   }
161
162   @Test(suiteName = "live")
163   public void testLiveCheckEnsembl()
164   {
165     EnsemblRestClient sf = new EnsemblRestClient()
166     {
167
168       @Override
169       public String getDbName()
170       {
171         // TODO Auto-generated method stub
172         return null;
173       }
174
175       @Override
176       public AlignmentI getSequenceRecords(String queries) throws Exception
177       {
178         // TODO Auto-generated method stub
179         return null;
180       }
181
182       @Override
183       protected URL getUrl(List<String> ids) throws MalformedURLException
184       {
185         // TODO Auto-generated method stub
186         return null;
187       }
188
189       @Override
190       protected boolean useGetRequest()
191       {
192         // TODO Auto-generated method stub
193         return false;
194       }
195
196       @Override
197       protected String getRequestMimeType(boolean b)
198       {
199         // TODO Auto-generated method stub
200         return null;
201       }
202
203       @Override
204       protected String getResponseMimeType()
205       {
206         // TODO Auto-generated method stub
207         return null;
208       }
209
210     };
211     boolean isAvailable = sf.isEnsemblAvailable();
212     System.out.println("Ensembl is "
213             + (isAvailable ? "UP!"
214                     : "DOWN or unreachable ******************* BAD!"));
215   }
216
217   @Test(groups = "Functional")
218   public void getGenomicRangesFromFeatures()
219   {
220
221   }
222
223   @Test(groups = "Functional")
224   public void testIsTranscriptIdentifier()
225   {
226     EnsemblSeqProxy testee = new EnsemblGene();
227     assertFalse(testee.isTranscriptIdentifier(null));
228     assertFalse(testee.isTranscriptIdentifier(""));
229     assertFalse(testee.isTranscriptIdentifier("ENSG00000012345"));
230     assertTrue(testee.isTranscriptIdentifier("ENST00000012345"));
231     assertTrue(testee.isTranscriptIdentifier("ENSMUST00000012345"));
232     assertFalse(testee.isTranscriptIdentifier("enst00000012345"));
233     assertFalse(testee.isTranscriptIdentifier("ENST000000123456"));
234     assertFalse(testee.isTranscriptIdentifier("ENST0000001234"));
235   }
236
237   @Test(groups = "Functional")
238   public void testIsGeneIdentifier()
239   {
240     EnsemblSeqProxy testee = new EnsemblGene();
241     assertFalse(testee.isGeneIdentifier(null));
242     assertFalse(testee.isGeneIdentifier(""));
243     assertFalse(testee.isGeneIdentifier("ENST00000012345"));
244     assertTrue(testee.isGeneIdentifier("ENSG00000012345"));
245     assertTrue(testee.isGeneIdentifier("ENSMUSG00000012345"));
246     assertFalse(testee.isGeneIdentifier("ensg00000012345"));
247     assertFalse(testee.isGeneIdentifier("ENSG000000123456"));
248     assertFalse(testee.isGeneIdentifier("ENSG0000001234"));
249   }
250 }