1 package jalview.ext.ensembl;
3 import jalview.datamodel.Alignment;
4 import jalview.datamodel.SequenceI;
5 import jalview.ext.ensembl.SeqFetcher.EnsemblSeqType;
6 import jalview.io.AppletFormatAdapter;
7 import jalview.io.FastaFile;
8 import jalview.io.FileParse;
9 import jalview.util.DBRefUtils;
11 import java.lang.reflect.Method;
12 import java.util.Arrays;
14 import org.testng.Assert;
15 import org.testng.annotations.DataProvider;
16 import org.testng.annotations.Test;
18 public class EnsemblSeqProxyTest
21 public void testCheckEnsembl()
23 SeqFetcher sf = new SeqFetcher();
24 sf.setTestEnsemblStatus(true);
26 Assert.assertTrue(sf.isEnsemblAvailable());
27 sf.setTestEnsemblStatus(false);
28 Assert.assertFalse(sf.isEnsemblAvailable());
31 @Test(suiteName = "live")
32 public void testLiveCheckEnsembl()
34 SeqFetcher sf = new SeqFetcher();
35 boolean isAvailable = sf.isEnsemblAvailable();
36 System.out.println("Ensembl is "
37 + (isAvailable ? "UP!" : "DOWN ******************* BAD!"));
40 @DataProvider(name = "ens_seqs")
41 public Object[][] createData(Method m)
43 System.out.println(m.getName());
47 public static Object[][] allSeqs = new Object[][]
50 EnsemblSeqType.PROTEIN,
53 + "MAALSGGGGGGAEPGQALFNGDMEPEAGAGAGAAASSAADPAIPEEVWNIKQMIKLTQEH\n"
54 + "IEALLDKFGGEHNPPSIYLEAYEEYTSKLDALQQREQQLLESLGNGTDFSVSSSASMDTV\n"
55 + "TSSSSSSLSVLPSSLSVFQNPTDVARSNPKSPQKPIVRVFLPNKQRTVVPARCGVTVRDS\n"
56 + "LKKALMMRGLIPECCAVYRIQDGEKKPIGWDTDISWLTGEELHVEVLENVPLTTHNFVRK\n"
57 + "TFFTLAFCDFCRKLLFQGFRCQTCGYKFHQRCSTEVPLMCVNYDQLDLLFVSKFFEHHPI\n"
58 + "PQEEASLAETALTSGSSPSAPASDSIGPQILTSPSPSKSIPIPQPFRPADEDHRNQFGQR\n"
59 + "DRSSSAPNVHINTIEPVNIDDLIRDQGFRGDGGSTTGLSATPPASLPGSLTNVKALQKSP\n"
60 + "GPQRERKSSSSSEDRNRMKTLGRRDSSDDWEIPDGQITVGQRIGSGSFGTVYKGKWHGDV\n"
61 + "AVKMLNVTAPTPQQLQAFKNEVGVLRKTRHVNILLFMGYSTKPQLAIVTQWCEGSSLYHH\n"
62 + "LHIIETKFEMIKLIDIARQTAQGMDYLHAKSIIHRDLKSNNIFLHEDLTVKIGDFGLATV\n"
63 + "KSRWSGSHQFEQLSGSILWMAPEVIRMQDKNPYSFQSDVYAFGIVLYELMTGQLPYSNIN\n"
64 + "NRDQIIFMVGRGYLSPDLSKVRSNCPKAMKRLMAECLKKKRDERPLFPQILASIELLARS\n"
65 + "LPKIHRSASEPSLNRAGFQTEDFSLYACASPKTPIQAGGYGAFPVH\n" },
67 EnsemblSeqType.TRANSCRIPT,
70 + "ATGGCGGCGCTGAGCGGTGGCGGTGGTGGCGGCGCGGAGCCGGGCCAGGCTCTGTTCAAC\n"
71 + "GGGGACATGGAGCCCGAGGCCGGCGCCGGCGCCGGCGCCGCGGCCTCTTCGGCTGCGGAC\n"
72 + "CCTGCCATTCCGGAGGAGGTGTGGAATATCAAACAAATGATTAAGTTGACACAGGAACAT\n"
73 + "ATAGAGGCCCTATTGGACAAATTTGGTGGGGAGCATAATCCACCATCAATATATCTGGAG\n"
74 + "GCCTATGAAGAATACACCAGCAAGCTAGATGCACTCCAACAAAGAGAACAACAGTTATTG\n"
75 + "GAATCTCTGGGGAACGGAACTGATTTTTCTGTTTCTAGCTCTGCATCAATGGATACCGTT\n"
76 + "ACATCTTCTTCCTCTTCTAGCCTTTCAGTGCTACCTTCATCTCTTTCAGTTTTTCAAAAT\n"
77 + "CCCACAGATGTGGCACGGAGCAACCCCAAGTCACCACAAAAACCTATCGTTAGAGTCTTC\n"
78 + "CTGCCCAACAAACAGAGGACAGTGGTACCTGCAAGGTGTGGAGTTACAGTCCGAGACAGT\n"
79 + "CTAAAGAAAGCACTGATGATGAGAGGTCTAATCCCAGAGTGCTGTGCTGTTTACAGAATT\n"
80 + "CAGGATGGAGAGAAGAAACCAATTGGTTGGGACACTGATATTTCCTGGCTTACTGGAGAA\n"
81 + "GAATTGCATGTGGAAGTGTTGGAGAATGTTCCACTTACAACACACAACTTTGTACGAAAA\n"
82 + "ACGTTTTTCACCTTAGCATTTTGTGACTTTTGTCGAAAGCTGCTTTTCCAGGGTTTCCGC\n"
83 + "TGTCAAACATGTGGTTATAAATTTCACCAGCGTTGTAGTACAGAAGTTCCACTGATGTGT\n"
84 + "GTTAATTATGACCAACTTGATTTGCTGTTTGTCTCCAAGTTCTTTGAACACCACCCAATA\n"
85 + "CCACAGGAAGAGGCGTCCTTAGCAGAGACTGCCCTAACATCTGGATCATCCCCTTCCGCA\n"
86 + "CCCGCCTCGGACTCTATTGGGCCCCAAATTCTCACCAGTCCGTCTCCTTCAAAATCCATT\n"
87 + "CCAATTCCACAGCCCTTCCGACCAGCAGATGAAGATCATCGAAATCAATTTGGGCAACGA\n"
88 + "GACCGATCCTCATCAGCTCCCAATGTGCATATAAACACAATAGAACCTGTCAATATTGAT\n"
89 + "GACTTGATTAGAGACCAAGGATTTCGTGGTGATGGAGGATCAACCACAGGTTTGTCTGCT\n"
90 + "ACCCCCCCTGCCTCATTACCTGGCTCACTAACTAACGTGAAAGCCTTACAGAAATCTCCA\n"
91 + "GGACCTCAGCGAGAAAGGAAGTCATCTTCATCCTCAGAAGACAGGAATCGAATGAAAACA\n"
92 + "CTTGGTAGACGGGACTCGAGTGATGATTGGGAGATTCCTGATGGGCAGATTACAGTGGGA\n"
93 + "CAAAGAATTGGATCTGGATCATTTGGAACAGTCTACAAGGGAAAGTGGCATGGTGATGTG\n"
94 + "GCAGTGAAAATGTTGAATGTGACAGCACCTACACCTCAGCAGTTACAAGCCTTCAAAAAT\n"
95 + "GAAGTAGGAGTACTCAGGAAAACACGACATGTGAATATCCTACTCTTCATGGGCTATTCC\n"
96 + "ACAAAGCCACAACTGGCTATTGTTACCCAGTGGTGTGAGGGCTCCAGCTTGTATCACCAT\n"
97 + "CTCCATATCATTGAGACCAAATTTGAGATGATCAAACTTATAGATATTGCACGACAGACT\n"
98 + "GCACAGGGCATGGATTACTTACACGCCAAGTCAATCATCCACAGAGACCTCAAGAGTAAT\n"
99 + "AATATATTTCTTCATGAAGACCTCACAGTAAAAATAGGTGATTTTGGTCTAGCTACAGTG\n"
100 + "AAATCTCGATGGAGTGGGTCCCATCAGTTTGAACAGTTGTCTGGATCCATTTTGTGGATG\n"
101 + "GCACCAGAAGTCATCAGAATGCAAGATAAAAATCCATACAGCTTTCAGTCAGATGTATAT\n"
102 + "GCATTTGGAATTGTTCTGTATGAATTGATGACTGGACAGTTACCTTATTCAAACATCAAC\n"
103 + "AACAGGGACCAGATAATTTTTATGGTGGGACGAGGATACCTGTCTCCAGATCTCAGTAAG\n"
104 + "GTACGGAGTAACTGTCCAAAAGCCATGAAGAGATTAATGGCAGAGTGCCTCAAAAAGAAA\n"
105 + "AGAGATGAGAGACCACTCTTTCCCCAAATTCTCGCCTCTATTGAGCTGCTGGCCCGCTCA\n"
106 + "TTGCCAAAAATTCACCGCAGTGCATCAGAACCCTCCTTGAATCGGGCTGGTTTCCAAACA\n"
107 + "GAGGATTTTAGTCTATATGCTTGTGCTTCTCCAAAAACACCCATCCAGGCAGGGGGATAT\n"
108 + "GGTGCGTTTCCTGTCCACTGA\n" },
110 EnsemblSeqType.PROTEIN,
113 + "MAALSGGGGGGAEPGQALFNGDMEPEAGAGAGAAASSAADPAIPEEVWNIKQMIKLTQEH\n"
114 + "IEALLDKFGGEHNPPSIYLEAYEEYTSKLDALQQREQQLLESLGNGTDFSVSSSASMDTV\n"
115 + "TSSSSSSLSVLPSSLSVFQNPTDVARSNPKSPQKPIVRVFLPNKQRTVVPARCGVTVRDS\n"
116 + "LKKALMMRGLIPECCAVYRIQDGEKKPIGWDTDISWLTGEELHVEVLENVPLTTHNFVRK\n"
117 + "TFFTLAFCDFCRKLLFQGFRCQTCGYKFHQRCSTEVPLMCVNYDQLDLLFVSKFFEHHPI\n"
118 + "PQEEASLAETALTSGSSPSAPASDSIGPQILTSPSPSKSIPIPQPFRPADEDHRNQFGQR\n"
119 + "DRSSSAPNVHINTIEPVNIDDLIRDQGFRGDGGSTTGLSATPPASLPGSLTNVKALQKSP\n"
120 + "GPQRERKSSSSSEDRNRMKTLGRRDSSDDWEIPDGQITVGQRIGSGSFGTVYKGKWHGDV\n"
121 + "AVKMLNVTAPTPQQLQAFKNEVGVLRKTRHVNILLFMGYSTKPQLAIVTQWCEGSSLYHH\n"
122 + "LHIIETKFEMIKLIDIARQTAQGMDYLHAKSIIHRDLKSNNIFLHEDLTVKIGDFGLATV\n"
123 + "KSRWSGSHQFEQLSGSILWMAPEVIRMQDKNPYSFQSDVYAFGIVLYELMTGQLPYSNIN\n"
124 + "NRDQIIFMVGRGYLSPDLSKVRSNCPKAMKRLMAECLKKKRDERPLFPQILASIELLARS\n"
125 + "LPKIHRSASEPSLNRAGFQTEDFSLYACASPKTPIQAGGYGAFPVH" } };
127 @Test(dataProvider = "ens_seqs", suiteName = "live")
128 public void testGetOneSeqs(EnsemblSeqType type, String sq, String fastasq)
131 SeqFetcher sf = new SeqFetcher();
132 FileParse fp = sf.getSequenceReader(type, Arrays.asList(new String[]
134 SequenceI[] sqs = new FastaFile(fp).getSeqsAsArray();
135 FastaFile trueRes = new FastaFile(fastasq, AppletFormatAdapter.PASTE);
136 SequenceI[] trueSqs = trueRes.getSeqsAsArray();
137 Assert.assertEquals(sqs.length, trueSqs.length,
138 "Different number of sequences retrieved for query " + sq);
139 Alignment ral = new Alignment(sqs);
140 for (SequenceI tr : trueSqs)
143 Assert.assertNotNull(
144 rseq = ral.findSequenceMatch(tr.getName()),
145 "Couldn't find sequences matching expected sequence "
147 Assert.assertEquals(rseq.length, 1,
148 "Expected only one sequence for sequence ID " + tr.getName());
150 rseq[0].getSequenceAsString(),
151 tr.getSequenceAsString(),
152 "Sequences differ for " + tr.getName() + "\n" + "Exp:"
153 + tr.getSequenceAsString() + "\n" + "Got:"
154 + rseq[0].getSequenceAsString());
159 @Test(dataProvider = "ens_seqs")
160 public void testRegexForProxy(EnsemblSeqType type, String sq,
161 String fastasq) throws Exception
163 EnsemblSeqProxy esq = new EnsemblProtein();
164 Assert.assertTrue(esq.isValidReference(sq),
165 "Expected reference string " + sq + " to be valid for regex "
166 + esq.getAccessionValidator().toString());
168 Assert.assertEquals(sq, DBRefUtils.processQueryToAccessionFor(esq, sq),
169 "Regex for " + esq.getClass().toString() + " not correct.");
171 // @Test(dataProvider = "ens_seqs", suiteName = "live")
172 // public void testGetOneSeqs(EnsemblSeqType type, String sq, String fastasq)
177 // Assert.assertTrue(rseq[0].getDBRef() != null
178 // && rseq[0].getDBRef().length > 0,
179 // "No database references added to sequence by fetcher.");
180 // Assert.assertNotNull(DBRefUtils.searchRefs(rseq[0].getDBRef(),
181 // new DBRefEntry("ENSEMBL", null, sq)),
182 // "Could't find database references added to sequence by fetcher.");