JAL-1705 reworked Ensembl clients now fetching and mapping features &
[jalview.git] / test / jalview / ext / ensembl / EnsemblSeqProxyTest.java
index f3526bc..3ca74b0 100644 (file)
@@ -1,6 +1,17 @@
 package jalview.ext.ensembl;
 
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.SequenceI;
+import jalview.io.AppletFormatAdapter;
+import jalview.io.FastaFile;
+import jalview.io.FileParse;
+
 import java.lang.reflect.Method;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.Arrays;
+import java.util.List;
 
 import org.testng.Assert;
 import org.testng.annotations.DataProvider;
@@ -9,8 +20,87 @@ import org.testng.annotations.Test;
 
 public class EnsemblSeqProxyTest
 {
+  private static final Object[][] allSeqs = new Object[][] {
+      {
+          new EnsemblProtein(),
+          "CCDS5863.1",
+          ">CCDS5863.1\n"
+                  + "MAALSGGGGGGAEPGQALFNGDMEPEAGAGAGAAASSAADPAIPEEVWNIKQMIKLTQEH\n"
+                  + "IEALLDKFGGEHNPPSIYLEAYEEYTSKLDALQQREQQLLESLGNGTDFSVSSSASMDTV\n"
+                  + "TSSSSSSLSVLPSSLSVFQNPTDVARSNPKSPQKPIVRVFLPNKQRTVVPARCGVTVRDS\n"
+                  + "LKKALMMRGLIPECCAVYRIQDGEKKPIGWDTDISWLTGEELHVEVLENVPLTTHNFVRK\n"
+                  + "TFFTLAFCDFCRKLLFQGFRCQTCGYKFHQRCSTEVPLMCVNYDQLDLLFVSKFFEHHPI\n"
+                  + "PQEEASLAETALTSGSSPSAPASDSIGPQILTSPSPSKSIPIPQPFRPADEDHRNQFGQR\n"
+                  + "DRSSSAPNVHINTIEPVNIDDLIRDQGFRGDGGSTTGLSATPPASLPGSLTNVKALQKSP\n"
+                  + "GPQRERKSSSSSEDRNRMKTLGRRDSSDDWEIPDGQITVGQRIGSGSFGTVYKGKWHGDV\n"
+                  + "AVKMLNVTAPTPQQLQAFKNEVGVLRKTRHVNILLFMGYSTKPQLAIVTQWCEGSSLYHH\n"
+                  + "LHIIETKFEMIKLIDIARQTAQGMDYLHAKSIIHRDLKSNNIFLHEDLTVKIGDFGLATV\n"
+                  + "KSRWSGSHQFEQLSGSILWMAPEVIRMQDKNPYSFQSDVYAFGIVLYELMTGQLPYSNIN\n"
+                  + "NRDQIIFMVGRGYLSPDLSKVRSNCPKAMKRLMAECLKKKRDERPLFPQILASIELLARS\n"
+                  + "LPKIHRSASEPSLNRAGFQTEDFSLYACASPKTPIQAGGYGAFPVH\n" },
+      {
+          new EnsemblCdna(),
+          "CCDS5863.1",
+          ">CCDS5863.1\n"
+                  + "ATGGCGGCGCTGAGCGGTGGCGGTGGTGGCGGCGCGGAGCCGGGCCAGGCTCTGTTCAAC\n"
+                  + "GGGGACATGGAGCCCGAGGCCGGCGCCGGCGCCGGCGCCGCGGCCTCTTCGGCTGCGGAC\n"
+                  + "CCTGCCATTCCGGAGGAGGTGTGGAATATCAAACAAATGATTAAGTTGACACAGGAACAT\n"
+                  + "ATAGAGGCCCTATTGGACAAATTTGGTGGGGAGCATAATCCACCATCAATATATCTGGAG\n"
+                  + "GCCTATGAAGAATACACCAGCAAGCTAGATGCACTCCAACAAAGAGAACAACAGTTATTG\n"
+                  + "GAATCTCTGGGGAACGGAACTGATTTTTCTGTTTCTAGCTCTGCATCAATGGATACCGTT\n"
+                  + "ACATCTTCTTCCTCTTCTAGCCTTTCAGTGCTACCTTCATCTCTTTCAGTTTTTCAAAAT\n"
+                  + "CCCACAGATGTGGCACGGAGCAACCCCAAGTCACCACAAAAACCTATCGTTAGAGTCTTC\n"
+                  + "CTGCCCAACAAACAGAGGACAGTGGTACCTGCAAGGTGTGGAGTTACAGTCCGAGACAGT\n"
+                  + "CTAAAGAAAGCACTGATGATGAGAGGTCTAATCCCAGAGTGCTGTGCTGTTTACAGAATT\n"
+                  + "CAGGATGGAGAGAAGAAACCAATTGGTTGGGACACTGATATTTCCTGGCTTACTGGAGAA\n"
+                  + "GAATTGCATGTGGAAGTGTTGGAGAATGTTCCACTTACAACACACAACTTTGTACGAAAA\n"
+                  + "ACGTTTTTCACCTTAGCATTTTGTGACTTTTGTCGAAAGCTGCTTTTCCAGGGTTTCCGC\n"
+                  + "TGTCAAACATGTGGTTATAAATTTCACCAGCGTTGTAGTACAGAAGTTCCACTGATGTGT\n"
+                  + "GTTAATTATGACCAACTTGATTTGCTGTTTGTCTCCAAGTTCTTTGAACACCACCCAATA\n"
+                  + "CCACAGGAAGAGGCGTCCTTAGCAGAGACTGCCCTAACATCTGGATCATCCCCTTCCGCA\n"
+                  + "CCCGCCTCGGACTCTATTGGGCCCCAAATTCTCACCAGTCCGTCTCCTTCAAAATCCATT\n"
+                  + "CCAATTCCACAGCCCTTCCGACCAGCAGATGAAGATCATCGAAATCAATTTGGGCAACGA\n"
+                  + "GACCGATCCTCATCAGCTCCCAATGTGCATATAAACACAATAGAACCTGTCAATATTGAT\n"
+                  + "GACTTGATTAGAGACCAAGGATTTCGTGGTGATGGAGGATCAACCACAGGTTTGTCTGCT\n"
+                  + "ACCCCCCCTGCCTCATTACCTGGCTCACTAACTAACGTGAAAGCCTTACAGAAATCTCCA\n"
+                  + "GGACCTCAGCGAGAAAGGAAGTCATCTTCATCCTCAGAAGACAGGAATCGAATGAAAACA\n"
+                  + "CTTGGTAGACGGGACTCGAGTGATGATTGGGAGATTCCTGATGGGCAGATTACAGTGGGA\n"
+                  + "CAAAGAATTGGATCTGGATCATTTGGAACAGTCTACAAGGGAAAGTGGCATGGTGATGTG\n"
+                  + "GCAGTGAAAATGTTGAATGTGACAGCACCTACACCTCAGCAGTTACAAGCCTTCAAAAAT\n"
+                  + "GAAGTAGGAGTACTCAGGAAAACACGACATGTGAATATCCTACTCTTCATGGGCTATTCC\n"
+                  + "ACAAAGCCACAACTGGCTATTGTTACCCAGTGGTGTGAGGGCTCCAGCTTGTATCACCAT\n"
+                  + "CTCCATATCATTGAGACCAAATTTGAGATGATCAAACTTATAGATATTGCACGACAGACT\n"
+                  + "GCACAGGGCATGGATTACTTACACGCCAAGTCAATCATCCACAGAGACCTCAAGAGTAAT\n"
+                  + "AATATATTTCTTCATGAAGACCTCACAGTAAAAATAGGTGATTTTGGTCTAGCTACAGTG\n"
+                  + "AAATCTCGATGGAGTGGGTCCCATCAGTTTGAACAGTTGTCTGGATCCATTTTGTGGATG\n"
+                  + "GCACCAGAAGTCATCAGAATGCAAGATAAAAATCCATACAGCTTTCAGTCAGATGTATAT\n"
+                  + "GCATTTGGAATTGTTCTGTATGAATTGATGACTGGACAGTTACCTTATTCAAACATCAAC\n"
+                  + "AACAGGGACCAGATAATTTTTATGGTGGGACGAGGATACCTGTCTCCAGATCTCAGTAAG\n"
+                  + "GTACGGAGTAACTGTCCAAAAGCCATGAAGAGATTAATGGCAGAGTGCCTCAAAAAGAAA\n"
+                  + "AGAGATGAGAGACCACTCTTTCCCCAAATTCTCGCCTCTATTGAGCTGCTGGCCCGCTCA\n"
+                  + "TTGCCAAAAATTCACCGCAGTGCATCAGAACCCTCCTTGAATCGGGCTGGTTTCCAAACA\n"
+                  + "GAGGATTTTAGTCTATATGCTTGTGCTTCTCCAAAAACACCCATCCAGGCAGGGGGATAT\n"
+                  + "GGTGCGTTTCCTGTCCACTGA\n" },
+      {
+          new EnsemblProtein(),
+          "ENSP00000288602",
+          ">ENSP00000288602\n"
+                  + "MAALSGGGGGGAEPGQALFNGDMEPEAGAGAGAAASSAADPAIPEEVWNIKQMIKLTQEH\n"
+                  + "IEALLDKFGGEHNPPSIYLEAYEEYTSKLDALQQREQQLLESLGNGTDFSVSSSASMDTV\n"
+                  + "TSSSSSSLSVLPSSLSVFQNPTDVARSNPKSPQKPIVRVFLPNKQRTVVPARCGVTVRDS\n"
+                  + "LKKALMMRGLIPECCAVYRIQDGEKKPIGWDTDISWLTGEELHVEVLENVPLTTHNFVRK\n"
+                  + "TFFTLAFCDFCRKLLFQGFRCQTCGYKFHQRCSTEVPLMCVNYDQLDLLFVSKFFEHHPI\n"
+                  + "PQEEASLAETALTSGSSPSAPASDSIGPQILTSPSPSKSIPIPQPFRPADEDHRNQFGQR\n"
+                  + "DRSSSAPNVHINTIEPVNIDDLIRDQGFRGDGGSTTGLSATPPASLPGSLTNVKALQKSP\n"
+                  + "GPQRERKSSSSSEDRNRMKTLGRRDSSDDWEIPDGQITVGQRIGSGSFGTVYKGKWHGDV\n"
+                  + "AVKMLNVTAPTPQQLQAFKNEVGVLRKTRHVNILLFMGYSTKPQLAIVTQWCEGSSLYHH\n"
+                  + "LHIIETKFEMIKLIDIARQTAQGMDYLHAKSIIHRDLKSNNIFLHEDLTVKIGDFGLATV\n"
+                  + "KSRWSGSHQFEQLSGSILWMAPEVIRMQDKNPYSFQSDVYAFGIVLYELMTGQLPYSNIN\n"
+                  + "NRDQIIFMVGRGYLSPDLSKVRSNCPKAMKRLMAECLKKKRDERPLFPQILASIELLARS\n"
+                  + "LPKIHRSASEPSLNRAGFQTEDFSLYACASPKTPIQAGGYGAFPVH" } };
+
   @DataProvider(name = "queries")
-  public Object[][] createData(Method m)
+  public Object[][] createQueryData(Method m)
   {
     return new Object[][] { { "CCDS5863.1" }, { "ENSP00000288602" } };
   }
@@ -18,10 +108,104 @@ public class EnsemblSeqProxyTest
   @Test(dataProvider = "queries")
   public void testIsValidReference(String query) throws Exception
   {
-    EnsemblSeqProxy esq = new EnsemblProtein();
+    EnsemblSequenceFetcher esq = new EnsemblProtein();
     Assert.assertTrue(esq.isValidReference(query),
             "Expected reference string " + query
                     + " to be valid for regex "
                     + esq.getAccessionValidator().toString());
   }
+
+  @DataProvider(name = "ens_seqs")
+  public Object[][] createData(Method m)
+  {
+    System.out.println(m.getName());
+    return allSeqs;
+  }
+
+  @Test(dataProvider = "ens_seqs", suiteName = "live")
+  public void testGetOneSeqs(EnsemblRestClient proxy, String sq, String fastasq)
+          throws Exception
+  {
+    FileParse fp = proxy.getSequenceReader(Arrays
+            .asList(new String[]
+    { sq }));
+    SequenceI[] sqs = new FastaFile(fp).getSeqsAsArray();
+    FastaFile trueRes = new FastaFile(fastasq, AppletFormatAdapter.PASTE);
+    SequenceI[] trueSqs = trueRes.getSeqsAsArray();
+    Assert.assertEquals(sqs.length, trueSqs.length,
+            "Different number of sequences retrieved for query " + sq);
+    Alignment ral = new Alignment(sqs);
+    for (SequenceI tr : trueSqs)
+    {
+      SequenceI[] rseq;
+      Assert.assertNotNull(
+              rseq = ral.findSequenceMatch(tr.getName()),
+              "Couldn't find sequences matching expected sequence "
+                      + tr.getName());
+      Assert.assertEquals(rseq.length, 1,
+              "Expected only one sequence for sequence ID " + tr.getName());
+      Assert.assertEquals(
+              rseq[0].getSequenceAsString(),
+              tr.getSequenceAsString(),
+              "Sequences differ for " + tr.getName() + "\n" + "Exp:"
+                      + tr.getSequenceAsString() + "\n" + "Got:"
+                      + rseq[0].getSequenceAsString());
+  
+    }
+  }
+
+  @Test(suiteName = "live")
+  public void testLiveCheckEnsembl()
+  {
+    EnsemblRestClient sf = new EnsemblRestClient()
+    {
+
+      @Override
+      public String getDbName()
+      {
+        // TODO Auto-generated method stub
+        return null;
+      }
+
+      @Override
+      public AlignmentI getSequenceRecords(String queries) throws Exception
+      {
+        // TODO Auto-generated method stub
+        return null;
+      }
+
+      @Override
+      protected URL getUrl(List<String> ids) throws MalformedURLException
+      {
+        // TODO Auto-generated method stub
+        return null;
+      }
+
+      @Override
+      public boolean useGetRequest()
+      {
+        // TODO Auto-generated method stub
+        return false;
+      }
+
+      @Override
+      public String getRequestMimeType()
+      {
+        // TODO Auto-generated method stub
+        return null;
+      }
+
+      @Override
+      public String getResponseMimeType()
+      {
+        // TODO Auto-generated method stub
+        return null;
+      }
+
+    };
+    boolean isAvailable = sf.isEnsemblAvailable();
+    System.out.println("Ensembl is "
+            + (isAvailable ? "UP!"
+                    : "DOWN or unreachable ******************* BAD!"));
+  }
 }
\ No newline at end of file