JAL-1705 DbSourceProxy properties converted to methods, tidy/format code
[jalview.git] / test / jalview / ext / ensembl / EnsemblSeqProxyTest.java
index 978316b..f3526bc 100644 (file)
 package jalview.ext.ensembl;
 
-import jalview.datamodel.Alignment;
-import jalview.datamodel.SequenceI;
-import jalview.ext.ensembl.SeqFetcher.EnsemblSeqType;
-import jalview.io.AppletFormatAdapter;
-import jalview.io.FastaFile;
-import jalview.io.FileParse;
-import jalview.util.DBRefUtils;
-
 import java.lang.reflect.Method;
-import java.util.Arrays;
 
 import org.testng.Assert;
 import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;
 
+
 public class EnsemblSeqProxyTest
 {
-  @Test
-  public void testCheckEnsembl()
-  {
-    SeqFetcher sf = new SeqFetcher();
-    sf.setTestEnsemblStatus(true);
-    sf.setTesting(true);
-    Assert.assertTrue(sf.isEnsemblAvailable());
-    sf.setTestEnsemblStatus(false);
-    Assert.assertFalse(sf.isEnsemblAvailable());
-  }
-
-  @Test(suiteName = "live")
-  public void testLiveCheckEnsembl()
-  {
-    SeqFetcher sf = new SeqFetcher();
-    boolean isAvailable = sf.isEnsemblAvailable();
-    System.out.println("Ensembl is "
-            + (isAvailable ? "UP!" : "DOWN ******************* BAD!"));
-  }
-
-  @DataProvider(name = "ens_seqs")
+  @DataProvider(name = "queries")
   public Object[][] createData(Method m)
   {
-    System.out.println(m.getName());
-    return allSeqs;
+    return new Object[][] { { "CCDS5863.1" }, { "ENSP00000288602" } };
   }
 
-  public static Object[][] allSeqs = new Object[][]
-  {
-      {
-          EnsemblSeqType.PROTEIN,
-          "CCDS5863.1",
-          ">CCDS5863.1\n"
-                  + "MAALSGGGGGGAEPGQALFNGDMEPEAGAGAGAAASSAADPAIPEEVWNIKQMIKLTQEH\n"
-                  + "IEALLDKFGGEHNPPSIYLEAYEEYTSKLDALQQREQQLLESLGNGTDFSVSSSASMDTV\n"
-                  + "TSSSSSSLSVLPSSLSVFQNPTDVARSNPKSPQKPIVRVFLPNKQRTVVPARCGVTVRDS\n"
-                  + "LKKALMMRGLIPECCAVYRIQDGEKKPIGWDTDISWLTGEELHVEVLENVPLTTHNFVRK\n"
-                  + "TFFTLAFCDFCRKLLFQGFRCQTCGYKFHQRCSTEVPLMCVNYDQLDLLFVSKFFEHHPI\n"
-                  + "PQEEASLAETALTSGSSPSAPASDSIGPQILTSPSPSKSIPIPQPFRPADEDHRNQFGQR\n"
-                  + "DRSSSAPNVHINTIEPVNIDDLIRDQGFRGDGGSTTGLSATPPASLPGSLTNVKALQKSP\n"
-                  + "GPQRERKSSSSSEDRNRMKTLGRRDSSDDWEIPDGQITVGQRIGSGSFGTVYKGKWHGDV\n"
-                  + "AVKMLNVTAPTPQQLQAFKNEVGVLRKTRHVNILLFMGYSTKPQLAIVTQWCEGSSLYHH\n"
-                  + "LHIIETKFEMIKLIDIARQTAQGMDYLHAKSIIHRDLKSNNIFLHEDLTVKIGDFGLATV\n"
-                  + "KSRWSGSHQFEQLSGSILWMAPEVIRMQDKNPYSFQSDVYAFGIVLYELMTGQLPYSNIN\n"
-                  + "NRDQIIFMVGRGYLSPDLSKVRSNCPKAMKRLMAECLKKKRDERPLFPQILASIELLARS\n"
-                  + "LPKIHRSASEPSLNRAGFQTEDFSLYACASPKTPIQAGGYGAFPVH\n" },
-      {
-          EnsemblSeqType.TRANSCRIPT,
-          "CCDS5863.1",
-          ">CCDS5863.1\n"
-                  + "ATGGCGGCGCTGAGCGGTGGCGGTGGTGGCGGCGCGGAGCCGGGCCAGGCTCTGTTCAAC\n"
-                  + "GGGGACATGGAGCCCGAGGCCGGCGCCGGCGCCGGCGCCGCGGCCTCTTCGGCTGCGGAC\n"
-                  + "CCTGCCATTCCGGAGGAGGTGTGGAATATCAAACAAATGATTAAGTTGACACAGGAACAT\n"
-                  + "ATAGAGGCCCTATTGGACAAATTTGGTGGGGAGCATAATCCACCATCAATATATCTGGAG\n"
-                  + "GCCTATGAAGAATACACCAGCAAGCTAGATGCACTCCAACAAAGAGAACAACAGTTATTG\n"
-                  + "GAATCTCTGGGGAACGGAACTGATTTTTCTGTTTCTAGCTCTGCATCAATGGATACCGTT\n"
-                  + "ACATCTTCTTCCTCTTCTAGCCTTTCAGTGCTACCTTCATCTCTTTCAGTTTTTCAAAAT\n"
-                  + "CCCACAGATGTGGCACGGAGCAACCCCAAGTCACCACAAAAACCTATCGTTAGAGTCTTC\n"
-                  + "CTGCCCAACAAACAGAGGACAGTGGTACCTGCAAGGTGTGGAGTTACAGTCCGAGACAGT\n"
-                  + "CTAAAGAAAGCACTGATGATGAGAGGTCTAATCCCAGAGTGCTGTGCTGTTTACAGAATT\n"
-                  + "CAGGATGGAGAGAAGAAACCAATTGGTTGGGACACTGATATTTCCTGGCTTACTGGAGAA\n"
-                  + "GAATTGCATGTGGAAGTGTTGGAGAATGTTCCACTTACAACACACAACTTTGTACGAAAA\n"
-                  + "ACGTTTTTCACCTTAGCATTTTGTGACTTTTGTCGAAAGCTGCTTTTCCAGGGTTTCCGC\n"
-                  + "TGTCAAACATGTGGTTATAAATTTCACCAGCGTTGTAGTACAGAAGTTCCACTGATGTGT\n"
-                  + "GTTAATTATGACCAACTTGATTTGCTGTTTGTCTCCAAGTTCTTTGAACACCACCCAATA\n"
-                  + "CCACAGGAAGAGGCGTCCTTAGCAGAGACTGCCCTAACATCTGGATCATCCCCTTCCGCA\n"
-                  + "CCCGCCTCGGACTCTATTGGGCCCCAAATTCTCACCAGTCCGTCTCCTTCAAAATCCATT\n"
-                  + "CCAATTCCACAGCCCTTCCGACCAGCAGATGAAGATCATCGAAATCAATTTGGGCAACGA\n"
-                  + "GACCGATCCTCATCAGCTCCCAATGTGCATATAAACACAATAGAACCTGTCAATATTGAT\n"
-                  + "GACTTGATTAGAGACCAAGGATTTCGTGGTGATGGAGGATCAACCACAGGTTTGTCTGCT\n"
-                  + "ACCCCCCCTGCCTCATTACCTGGCTCACTAACTAACGTGAAAGCCTTACAGAAATCTCCA\n"
-                  + "GGACCTCAGCGAGAAAGGAAGTCATCTTCATCCTCAGAAGACAGGAATCGAATGAAAACA\n"
-                  + "CTTGGTAGACGGGACTCGAGTGATGATTGGGAGATTCCTGATGGGCAGATTACAGTGGGA\n"
-                  + "CAAAGAATTGGATCTGGATCATTTGGAACAGTCTACAAGGGAAAGTGGCATGGTGATGTG\n"
-                  + "GCAGTGAAAATGTTGAATGTGACAGCACCTACACCTCAGCAGTTACAAGCCTTCAAAAAT\n"
-                  + "GAAGTAGGAGTACTCAGGAAAACACGACATGTGAATATCCTACTCTTCATGGGCTATTCC\n"
-                  + "ACAAAGCCACAACTGGCTATTGTTACCCAGTGGTGTGAGGGCTCCAGCTTGTATCACCAT\n"
-                  + "CTCCATATCATTGAGACCAAATTTGAGATGATCAAACTTATAGATATTGCACGACAGACT\n"
-                  + "GCACAGGGCATGGATTACTTACACGCCAAGTCAATCATCCACAGAGACCTCAAGAGTAAT\n"
-                  + "AATATATTTCTTCATGAAGACCTCACAGTAAAAATAGGTGATTTTGGTCTAGCTACAGTG\n"
-                  + "AAATCTCGATGGAGTGGGTCCCATCAGTTTGAACAGTTGTCTGGATCCATTTTGTGGATG\n"
-                  + "GCACCAGAAGTCATCAGAATGCAAGATAAAAATCCATACAGCTTTCAGTCAGATGTATAT\n"
-                  + "GCATTTGGAATTGTTCTGTATGAATTGATGACTGGACAGTTACCTTATTCAAACATCAAC\n"
-                  + "AACAGGGACCAGATAATTTTTATGGTGGGACGAGGATACCTGTCTCCAGATCTCAGTAAG\n"
-                  + "GTACGGAGTAACTGTCCAAAAGCCATGAAGAGATTAATGGCAGAGTGCCTCAAAAAGAAA\n"
-                  + "AGAGATGAGAGACCACTCTTTCCCCAAATTCTCGCCTCTATTGAGCTGCTGGCCCGCTCA\n"
-                  + "TTGCCAAAAATTCACCGCAGTGCATCAGAACCCTCCTTGAATCGGGCTGGTTTCCAAACA\n"
-                  + "GAGGATTTTAGTCTATATGCTTGTGCTTCTCCAAAAACACCCATCCAGGCAGGGGGATAT\n"
-                  + "GGTGCGTTTCCTGTCCACTGA\n" },
-      {
-          EnsemblSeqType.PROTEIN,
-          "ENSP00000288602",
-          ">ENSP00000288602\n"
-                  + "MAALSGGGGGGAEPGQALFNGDMEPEAGAGAGAAASSAADPAIPEEVWNIKQMIKLTQEH\n"
-                  + "IEALLDKFGGEHNPPSIYLEAYEEYTSKLDALQQREQQLLESLGNGTDFSVSSSASMDTV\n"
-                  + "TSSSSSSLSVLPSSLSVFQNPTDVARSNPKSPQKPIVRVFLPNKQRTVVPARCGVTVRDS\n"
-                  + "LKKALMMRGLIPECCAVYRIQDGEKKPIGWDTDISWLTGEELHVEVLENVPLTTHNFVRK\n"
-                  + "TFFTLAFCDFCRKLLFQGFRCQTCGYKFHQRCSTEVPLMCVNYDQLDLLFVSKFFEHHPI\n"
-                  + "PQEEASLAETALTSGSSPSAPASDSIGPQILTSPSPSKSIPIPQPFRPADEDHRNQFGQR\n"
-                  + "DRSSSAPNVHINTIEPVNIDDLIRDQGFRGDGGSTTGLSATPPASLPGSLTNVKALQKSP\n"
-                  + "GPQRERKSSSSSEDRNRMKTLGRRDSSDDWEIPDGQITVGQRIGSGSFGTVYKGKWHGDV\n"
-                  + "AVKMLNVTAPTPQQLQAFKNEVGVLRKTRHVNILLFMGYSTKPQLAIVTQWCEGSSLYHH\n"
-                  + "LHIIETKFEMIKLIDIARQTAQGMDYLHAKSIIHRDLKSNNIFLHEDLTVKIGDFGLATV\n"
-                  + "KSRWSGSHQFEQLSGSILWMAPEVIRMQDKNPYSFQSDVYAFGIVLYELMTGQLPYSNIN\n"
-                  + "NRDQIIFMVGRGYLSPDLSKVRSNCPKAMKRLMAECLKKKRDERPLFPQILASIELLARS\n"
-                  + "LPKIHRSASEPSLNRAGFQTEDFSLYACASPKTPIQAGGYGAFPVH" } };
-
-  @Test(dataProvider = "ens_seqs", suiteName = "live")
-  public void testGetOneSeqs(EnsemblSeqType type, String sq, String fastasq)
-          throws Exception
-  {
-    SeqFetcher sf = new SeqFetcher();
-    FileParse fp = sf.getSequenceReader(type, Arrays.asList(new String[]
-    { sq }));
-    SequenceI[] sqs = new FastaFile(fp).getSeqsAsArray();
-    FastaFile trueRes = new FastaFile(fastasq, AppletFormatAdapter.PASTE);
-    SequenceI[] trueSqs = trueRes.getSeqsAsArray();
-    Assert.assertEquals(sqs.length, trueSqs.length,
-            "Different number of sequences retrieved for query " + sq);
-    Alignment ral = new Alignment(sqs);
-    for (SequenceI tr : trueSqs)
-    {
-      SequenceI[] rseq;
-      Assert.assertNotNull(
-              rseq = ral.findSequenceMatch(tr.getName()),
-              "Couldn't find sequences matching expected sequence "
-                      + tr.getName());
-      Assert.assertEquals(rseq.length, 1,
-              "Expected only one sequence for sequence ID " + tr.getName());
-      Assert.assertEquals(
-              rseq[0].getSequenceAsString(),
-              tr.getSequenceAsString(),
-              "Sequences differ for " + tr.getName() + "\n" + "Exp:"
-                      + tr.getSequenceAsString() + "\n" + "Got:"
-                      + rseq[0].getSequenceAsString());
-
-    }
-  }
-
-  @Test(dataProvider = "ens_seqs")
-  public void testRegexForProxy(EnsemblSeqType type, String sq,
-          String fastasq) throws Exception
+  @Test(dataProvider = "queries")
+  public void testIsValidReference(String query) throws Exception
   {
     EnsemblSeqProxy esq = new EnsemblProtein();
-    Assert.assertTrue(esq.isValidReference(sq),
-            "Expected reference string " + sq + " to be valid for regex "
+    Assert.assertTrue(esq.isValidReference(query),
+            "Expected reference string " + query
+                    + " to be valid for regex "
                     + esq.getAccessionValidator().toString());
-    
-    Assert.assertEquals(sq, DBRefUtils.processQueryToAccessionFor(esq, sq),
-            "Regex for " + esq.getClass().toString() + " not correct.");
   }
-  // TODO:
-  // sequence query with ENSG and anything other than a genomic type will yield
-  // sequences with different IDs which will
-  // break the post-processing stage where DBRefs are assigned to sequences.
-  // -> multiple_sequences = true is needed additional parameter
-  // http://rest.ensembl.org/sequence/id/ENSG00000157764?content-type=text/x-json;type=protein;multiple_sequences=true
-  // result with four transcripts, cds, cdna, and protein products.
-  // *
-  // features for ENG -
-  // http://rest.ensembl.org/overlap/id/ENSG00000157764?feature=cds&feature=exon&feature=transcript&content-type=text/x-gff3
-  // transcript: gives locus, all transcript products with ENSG parents
-  // gene: give all ENSG on locus
-  // exon: all exon boundaries. CDS same info.
-
-  // @Test(dataProvider = "ens_seqs", suiteName = "live")
-  // public void testGetOneSeqs(EnsemblSeqType type, String sq, String fastasq)
-  // throws Exception
-  // {
-  //
-  // {
-  // Assert.assertTrue(rseq[0].getDBRef() != null
-  // && rseq[0].getDBRef().length > 0,
-  // "No database references added to sequence by fetcher.");
-  // Assert.assertNotNull(DBRefUtils.searchRefs(rseq[0].getDBRef(),
-  // new DBRefEntry("ENSEMBL", null, sq)),
-  // "Could't find database references added to sequence by fetcher.");
-  //
-  // }
 }
\ No newline at end of file