JAL-1705 a second ENSEMBL test id/sequence pair
[jalview.git] / test / jalview / ext / ensembl / EnsemblSeqProxyTest.java
1 package jalview.ext.ensembl;
2
3 import jalview.datamodel.Alignment;
4 import jalview.datamodel.SequenceI;
5 import jalview.ext.ensembl.SeqFetcher.EnsemblSeqType;
6 import jalview.io.AppletFormatAdapter;
7 import jalview.io.FastaFile;
8 import jalview.io.FileParse;
9 import jalview.util.DBRefUtils;
10
11 import java.lang.reflect.Method;
12 import java.util.Arrays;
13
14 import org.testng.Assert;
15 import org.testng.annotations.DataProvider;
16 import org.testng.annotations.Test;
17
18 public class EnsemblSeqProxyTest
19 {
20   @Test
21   public void testCheckEnsembl()
22   {
23     SeqFetcher sf = new SeqFetcher();
24     sf.setTestEnsemblStatus(true);
25     sf.setTesting(true);
26     Assert.assertTrue(sf.isEnsemblAvailable());
27     sf.setTestEnsemblStatus(false);
28     Assert.assertFalse(sf.isEnsemblAvailable());
29   }
30
31   @Test(suiteName = "live")
32   public void testLiveCheckEnsembl()
33   {
34     SeqFetcher sf = new SeqFetcher();
35     boolean isAvailable = sf.isEnsemblAvailable();
36     System.out.println("Ensembl is "
37             + (isAvailable ? "UP!" : "DOWN ******************* BAD!"));
38   }
39
40   @DataProvider(name = "ens_seqs")
41   public Object[][] createData(Method m)
42   {
43     System.out.println(m.getName());
44     return allSeqs;
45   }
46
47   public static Object[][] allSeqs = new Object[][]
48   {
49       {
50           EnsemblSeqType.PROTEIN,
51           "CCDS5863.1",
52           ">CCDS5863.1\n"
53                   + "MAALSGGGGGGAEPGQALFNGDMEPEAGAGAGAAASSAADPAIPEEVWNIKQMIKLTQEH\n"
54                   + "IEALLDKFGGEHNPPSIYLEAYEEYTSKLDALQQREQQLLESLGNGTDFSVSSSASMDTV\n"
55                   + "TSSSSSSLSVLPSSLSVFQNPTDVARSNPKSPQKPIVRVFLPNKQRTVVPARCGVTVRDS\n"
56                   + "LKKALMMRGLIPECCAVYRIQDGEKKPIGWDTDISWLTGEELHVEVLENVPLTTHNFVRK\n"
57                   + "TFFTLAFCDFCRKLLFQGFRCQTCGYKFHQRCSTEVPLMCVNYDQLDLLFVSKFFEHHPI\n"
58                   + "PQEEASLAETALTSGSSPSAPASDSIGPQILTSPSPSKSIPIPQPFRPADEDHRNQFGQR\n"
59                   + "DRSSSAPNVHINTIEPVNIDDLIRDQGFRGDGGSTTGLSATPPASLPGSLTNVKALQKSP\n"
60                   + "GPQRERKSSSSSEDRNRMKTLGRRDSSDDWEIPDGQITVGQRIGSGSFGTVYKGKWHGDV\n"
61                   + "AVKMLNVTAPTPQQLQAFKNEVGVLRKTRHVNILLFMGYSTKPQLAIVTQWCEGSSLYHH\n"
62                   + "LHIIETKFEMIKLIDIARQTAQGMDYLHAKSIIHRDLKSNNIFLHEDLTVKIGDFGLATV\n"
63                   + "KSRWSGSHQFEQLSGSILWMAPEVIRMQDKNPYSFQSDVYAFGIVLYELMTGQLPYSNIN\n"
64                   + "NRDQIIFMVGRGYLSPDLSKVRSNCPKAMKRLMAECLKKKRDERPLFPQILASIELLARS\n"
65                   + "LPKIHRSASEPSLNRAGFQTEDFSLYACASPKTPIQAGGYGAFPVH\n" },
66       {
67           EnsemblSeqType.TRANSCRIPT,
68           "CCDS5863.1",
69           ">CCDS5863.1\n"
70                   + "ATGGCGGCGCTGAGCGGTGGCGGTGGTGGCGGCGCGGAGCCGGGCCAGGCTCTGTTCAAC\n"
71                   + "GGGGACATGGAGCCCGAGGCCGGCGCCGGCGCCGGCGCCGCGGCCTCTTCGGCTGCGGAC\n"
72                   + "CCTGCCATTCCGGAGGAGGTGTGGAATATCAAACAAATGATTAAGTTGACACAGGAACAT\n"
73                   + "ATAGAGGCCCTATTGGACAAATTTGGTGGGGAGCATAATCCACCATCAATATATCTGGAG\n"
74                   + "GCCTATGAAGAATACACCAGCAAGCTAGATGCACTCCAACAAAGAGAACAACAGTTATTG\n"
75                   + "GAATCTCTGGGGAACGGAACTGATTTTTCTGTTTCTAGCTCTGCATCAATGGATACCGTT\n"
76                   + "ACATCTTCTTCCTCTTCTAGCCTTTCAGTGCTACCTTCATCTCTTTCAGTTTTTCAAAAT\n"
77                   + "CCCACAGATGTGGCACGGAGCAACCCCAAGTCACCACAAAAACCTATCGTTAGAGTCTTC\n"
78                   + "CTGCCCAACAAACAGAGGACAGTGGTACCTGCAAGGTGTGGAGTTACAGTCCGAGACAGT\n"
79                   + "CTAAAGAAAGCACTGATGATGAGAGGTCTAATCCCAGAGTGCTGTGCTGTTTACAGAATT\n"
80                   + "CAGGATGGAGAGAAGAAACCAATTGGTTGGGACACTGATATTTCCTGGCTTACTGGAGAA\n"
81                   + "GAATTGCATGTGGAAGTGTTGGAGAATGTTCCACTTACAACACACAACTTTGTACGAAAA\n"
82                   + "ACGTTTTTCACCTTAGCATTTTGTGACTTTTGTCGAAAGCTGCTTTTCCAGGGTTTCCGC\n"
83                   + "TGTCAAACATGTGGTTATAAATTTCACCAGCGTTGTAGTACAGAAGTTCCACTGATGTGT\n"
84                   + "GTTAATTATGACCAACTTGATTTGCTGTTTGTCTCCAAGTTCTTTGAACACCACCCAATA\n"
85                   + "CCACAGGAAGAGGCGTCCTTAGCAGAGACTGCCCTAACATCTGGATCATCCCCTTCCGCA\n"
86                   + "CCCGCCTCGGACTCTATTGGGCCCCAAATTCTCACCAGTCCGTCTCCTTCAAAATCCATT\n"
87                   + "CCAATTCCACAGCCCTTCCGACCAGCAGATGAAGATCATCGAAATCAATTTGGGCAACGA\n"
88                   + "GACCGATCCTCATCAGCTCCCAATGTGCATATAAACACAATAGAACCTGTCAATATTGAT\n"
89                   + "GACTTGATTAGAGACCAAGGATTTCGTGGTGATGGAGGATCAACCACAGGTTTGTCTGCT\n"
90                   + "ACCCCCCCTGCCTCATTACCTGGCTCACTAACTAACGTGAAAGCCTTACAGAAATCTCCA\n"
91                   + "GGACCTCAGCGAGAAAGGAAGTCATCTTCATCCTCAGAAGACAGGAATCGAATGAAAACA\n"
92                   + "CTTGGTAGACGGGACTCGAGTGATGATTGGGAGATTCCTGATGGGCAGATTACAGTGGGA\n"
93                   + "CAAAGAATTGGATCTGGATCATTTGGAACAGTCTACAAGGGAAAGTGGCATGGTGATGTG\n"
94                   + "GCAGTGAAAATGTTGAATGTGACAGCACCTACACCTCAGCAGTTACAAGCCTTCAAAAAT\n"
95                   + "GAAGTAGGAGTACTCAGGAAAACACGACATGTGAATATCCTACTCTTCATGGGCTATTCC\n"
96                   + "ACAAAGCCACAACTGGCTATTGTTACCCAGTGGTGTGAGGGCTCCAGCTTGTATCACCAT\n"
97                   + "CTCCATATCATTGAGACCAAATTTGAGATGATCAAACTTATAGATATTGCACGACAGACT\n"
98                   + "GCACAGGGCATGGATTACTTACACGCCAAGTCAATCATCCACAGAGACCTCAAGAGTAAT\n"
99                   + "AATATATTTCTTCATGAAGACCTCACAGTAAAAATAGGTGATTTTGGTCTAGCTACAGTG\n"
100                   + "AAATCTCGATGGAGTGGGTCCCATCAGTTTGAACAGTTGTCTGGATCCATTTTGTGGATG\n"
101                   + "GCACCAGAAGTCATCAGAATGCAAGATAAAAATCCATACAGCTTTCAGTCAGATGTATAT\n"
102                   + "GCATTTGGAATTGTTCTGTATGAATTGATGACTGGACAGTTACCTTATTCAAACATCAAC\n"
103                   + "AACAGGGACCAGATAATTTTTATGGTGGGACGAGGATACCTGTCTCCAGATCTCAGTAAG\n"
104                   + "GTACGGAGTAACTGTCCAAAAGCCATGAAGAGATTAATGGCAGAGTGCCTCAAAAAGAAA\n"
105                   + "AGAGATGAGAGACCACTCTTTCCCCAAATTCTCGCCTCTATTGAGCTGCTGGCCCGCTCA\n"
106                   + "TTGCCAAAAATTCACCGCAGTGCATCAGAACCCTCCTTGAATCGGGCTGGTTTCCAAACA\n"
107                   + "GAGGATTTTAGTCTATATGCTTGTGCTTCTCCAAAAACACCCATCCAGGCAGGGGGATAT\n"
108                   + "GGTGCGTTTCCTGTCCACTGA\n" },
109       {
110           EnsemblSeqType.PROTEIN,
111           "ENSP00000288602",
112           ">ENSP00000288602\n"
113                   + "MAALSGGGGGGAEPGQALFNGDMEPEAGAGAGAAASSAADPAIPEEVWNIKQMIKLTQEH\n"
114                   + "IEALLDKFGGEHNPPSIYLEAYEEYTSKLDALQQREQQLLESLGNGTDFSVSSSASMDTV\n"
115                   + "TSSSSSSLSVLPSSLSVFQNPTDVARSNPKSPQKPIVRVFLPNKQRTVVPARCGVTVRDS\n"
116                   + "LKKALMMRGLIPECCAVYRIQDGEKKPIGWDTDISWLTGEELHVEVLENVPLTTHNFVRK\n"
117                   + "TFFTLAFCDFCRKLLFQGFRCQTCGYKFHQRCSTEVPLMCVNYDQLDLLFVSKFFEHHPI\n"
118                   + "PQEEASLAETALTSGSSPSAPASDSIGPQILTSPSPSKSIPIPQPFRPADEDHRNQFGQR\n"
119                   + "DRSSSAPNVHINTIEPVNIDDLIRDQGFRGDGGSTTGLSATPPASLPGSLTNVKALQKSP\n"
120                   + "GPQRERKSSSSSEDRNRMKTLGRRDSSDDWEIPDGQITVGQRIGSGSFGTVYKGKWHGDV\n"
121                   + "AVKMLNVTAPTPQQLQAFKNEVGVLRKTRHVNILLFMGYSTKPQLAIVTQWCEGSSLYHH\n"
122                   + "LHIIETKFEMIKLIDIARQTAQGMDYLHAKSIIHRDLKSNNIFLHEDLTVKIGDFGLATV\n"
123                   + "KSRWSGSHQFEQLSGSILWMAPEVIRMQDKNPYSFQSDVYAFGIVLYELMTGQLPYSNIN\n"
124                   + "NRDQIIFMVGRGYLSPDLSKVRSNCPKAMKRLMAECLKKKRDERPLFPQILASIELLARS\n"
125                   + "LPKIHRSASEPSLNRAGFQTEDFSLYACASPKTPIQAGGYGAFPVH" } };
126
127   @Test(dataProvider = "ens_seqs", suiteName = "live")
128   public void testGetOneSeqs(EnsemblSeqType type, String sq, String fastasq)
129           throws Exception
130   {
131     SeqFetcher sf = new SeqFetcher();
132     FileParse fp = sf.getSequenceReader(type, Arrays.asList(new String[]
133     { sq }));
134     SequenceI[] sqs = new FastaFile(fp).getSeqsAsArray();
135     FastaFile trueRes = new FastaFile(fastasq, AppletFormatAdapter.PASTE);
136     SequenceI[] trueSqs = trueRes.getSeqsAsArray();
137     Assert.assertEquals(sqs.length, trueSqs.length,
138             "Different number of sequences retrieved for query " + sq);
139     Alignment ral = new Alignment(sqs);
140     for (SequenceI tr : trueSqs)
141     {
142       SequenceI[] rseq;
143       Assert.assertNotNull(
144               rseq = ral.findSequenceMatch(tr.getName()),
145               "Couldn't find sequences matching expected sequence "
146                       + tr.getName());
147       Assert.assertEquals(rseq.length, 1,
148               "Expected only one sequence for sequence ID " + tr.getName());
149       Assert.assertEquals(
150               rseq[0].getSequenceAsString(),
151               tr.getSequenceAsString(),
152               "Sequences differ for " + tr.getName() + "\n" + "Exp:"
153                       + tr.getSequenceAsString() + "\n" + "Got:"
154                       + rseq[0].getSequenceAsString());
155
156     }
157   }
158 }