X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=test%2Fjalview%2Fext%2Fensembl%2FEnsemblSeqProxyTest.java;h=ed936d5bbd8e4ef34c3df6e68616369d149716c4;hb=e96f5e8ce137e879dd4b4f37fb5c4d134e2778e2;hp=7e8745e1a6708529c0196d0bfa68b6e04c602743;hpb=4b508dc906b5a45cea6fc7280cb9c1228c720156;p=jalview.git diff --git a/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java b/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java index 7e8745e..ed936d5 100644 --- a/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java +++ b/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java @@ -1,53 +1,37 @@ package jalview.ext.ensembl; +import static org.testng.AssertJUnit.assertEquals; + import jalview.datamodel.Alignment; +import jalview.datamodel.AlignmentI; +import jalview.datamodel.Sequence; +import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; -import jalview.ext.ensembl.SeqFetcher.EnsemblSeqType; import jalview.io.AppletFormatAdapter; import jalview.io.FastaFile; import jalview.io.FileParse; -import jalview.util.DBRefUtils; +import jalview.io.gff.SequenceOntologyFactory; +import jalview.io.gff.SequenceOntologyLite; import java.lang.reflect.Method; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; import java.util.Arrays; +import java.util.List; import org.testng.Assert; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; + public class EnsemblSeqProxyTest { - @Test - public void testCheckEnsembl() - { - SeqFetcher sf = new SeqFetcher(); - sf.setTestEnsemblStatus(true); - sf.setTesting(true); - Assert.assertTrue(sf.isEnsemblAvailable()); - sf.setTestEnsemblStatus(false); - Assert.assertFalse(sf.isEnsemblAvailable()); - } - - @Test(suiteName = "live") - public void testLiveCheckEnsembl() - { - SeqFetcher sf = new SeqFetcher(); - boolean isAvailable = sf.isEnsemblAvailable(); - System.out.println("Ensembl is " - + (isAvailable ? "UP!" : "DOWN ******************* BAD!")); - } - - @DataProvider(name = "ens_seqs") - public Object[][] createData(Method m) - { - System.out.println(m.getName()); - return allSeqs; - } - - public static Object[][] allSeqs = new Object[][] - { + private static final Object[][] allSeqs = new Object[][] { { - EnsemblSeqType.PROTEIN, + new EnsemblProtein(), "CCDS5863.1", ">CCDS5863.1\n" + "MAALSGGGGGGAEPGQALFNGDMEPEAGAGAGAAASSAADPAIPEEVWNIKQMIKLTQEH\n" @@ -64,7 +48,7 @@ public class EnsemblSeqProxyTest + "NRDQIIFMVGRGYLSPDLSKVRSNCPKAMKRLMAECLKKKRDERPLFPQILASIELLARS\n" + "LPKIHRSASEPSLNRAGFQTEDFSLYACASPKTPIQAGGYGAFPVH\n" }, { - EnsemblSeqType.TRANSCRIPT, + new EnsemblCdna(), "CCDS5863.1", ">CCDS5863.1\n" + "ATGGCGGCGCTGAGCGGTGGCGGTGGTGGCGGCGCGGAGCCGGGCCAGGCTCTGTTCAAC\n" @@ -107,7 +91,7 @@ public class EnsemblSeqProxyTest + "GAGGATTTTAGTCTATATGCTTGTGCTTCTCCAAAAACACCCATCCAGGCAGGGGGATAT\n" + "GGTGCGTTTCCTGTCCACTGA\n" }, { - EnsemblSeqType.PROTEIN, + new EnsemblProtein(), "ENSP00000288602", ">ENSP00000288602\n" + "MAALSGGGGGGAEPGQALFNGDMEPEAGAGAGAAASSAADPAIPEEVWNIKQMIKLTQEH\n" @@ -124,12 +108,47 @@ public class EnsemblSeqProxyTest + "NRDQIIFMVGRGYLSPDLSKVRSNCPKAMKRLMAECLKKKRDERPLFPQILASIELLARS\n" + "LPKIHRSASEPSLNRAGFQTEDFSLYACASPKTPIQAGGYGAFPVH" } }; + @BeforeClass + public void setUp() + { + SequenceOntologyFactory.setInstance(new SequenceOntologyLite()); + } + + @AfterClass + public void tearDown() + { + SequenceOntologyFactory.setInstance(null); + } + + @DataProvider(name = "queries") + public Object[][] createQueryData(Method m) + { + return new Object[][] { { "CCDS5863.1" }, { "ENSP00000288602" } }; + } + + @Test(dataProvider = "queries") + public void testIsValidReference(String query) throws Exception + { + EnsemblSequenceFetcher esq = new EnsemblProtein(); + Assert.assertTrue(esq.isValidReference(query), + "Expected reference string " + query + + " to be valid for regex " + + esq.getAccessionValidator().toString()); + } + + @DataProvider(name = "ens_seqs") + public Object[][] createData(Method m) + { + System.out.println(m.getName()); + return allSeqs; + } + @Test(dataProvider = "ens_seqs", suiteName = "live") - public void testGetOneSeqs(EnsemblSeqType type, String sq, String fastasq) + public void testGetOneSeqs(EnsemblRestClient proxy, String sq, String fastasq) throws Exception { - SeqFetcher sf = new SeqFetcher(); - FileParse fp = sf.getSequenceReader(type, Arrays.asList(new String[] + FileParse fp = proxy.getSequenceReader(Arrays + .asList(new String[] { sq })); SequenceI[] sqs = new FastaFile(fp).getSeqsAsArray(); FastaFile trueRes = new FastaFile(fastasq, AppletFormatAdapter.PASTE); @@ -152,7 +171,208 @@ public class EnsemblSeqProxyTest "Sequences differ for " + tr.getName() + "\n" + "Exp:" + tr.getSequenceAsString() + "\n" + "Got:" + rseq[0].getSequenceAsString()); - + } } + + @Test(suiteName = "live") + public void testLiveCheckEnsembl() + { + EnsemblRestClient sf = new EnsemblRestClient() + { + + @Override + public String getDbName() + { + // TODO Auto-generated method stub + return null; + } + + @Override + public AlignmentI getSequenceRecords(String queries) throws Exception + { + // TODO Auto-generated method stub + return null; + } + + @Override + protected URL getUrl(List ids) throws MalformedURLException + { + // TODO Auto-generated method stub + return null; + } + + @Override + protected boolean useGetRequest() + { + // TODO Auto-generated method stub + return false; + } + + @Override + protected String getRequestMimeType(boolean b) + { + // TODO Auto-generated method stub + return null; + } + + @Override + protected String getResponseMimeType() + { + // TODO Auto-generated method stub + return null; + } + + }; + boolean isAvailable = sf.isEnsemblAvailable(); + System.out.println("Ensembl is " + + (isAvailable ? "UP!" + : "DOWN or unreachable ******************* BAD!")); + } + + /** + * Tests for the method that computes all peptide variants given codon + * variants + */ + @Test(groups = "Functional") + public void testComputePeptideVariants() + { + String[][] codonVariants = new String[][] { { "A" }, { "G" }, { "T" } }; + + /* + * AGT codes for S - this is not included in the variants returned + */ + List variants = EnsemblSeqProxy.computePeptideVariants(codonVariants, "S"); + assertEquals("[]", variants.toString()); + + // S is reported if it differs from the current value (A): + variants = EnsemblSeqProxy.computePeptideVariants(codonVariants, "A"); + assertEquals("[S]", variants.toString()); + + /* + * synonymous variant is not reported + */ + codonVariants = new String[][] { { "A" }, { "G" }, { "C", "T" } }; + // AGC and AGT both code for S + variants = EnsemblSeqProxy.computePeptideVariants(codonVariants, "s"); + assertEquals("[]", variants.toString()); + + /* + * equivalent variants are only reported once + */ + codonVariants = new String[][] { { "C" }, { "T" }, + { "A", "C", "G", "T" } }; + // CTA CTC CTG CTT all code for L + variants = EnsemblSeqProxy.computePeptideVariants(codonVariants, "S"); + assertEquals("[L]", variants.toString()); + + /* + * vary codons 1 and 2; variant products are sorted and non-redundant + */ + codonVariants = new String[][] { { "a", "C" }, { "g", "T" }, { "A" } }; + // aga ata cga cta code for R, I, R, L + variants = EnsemblSeqProxy.computePeptideVariants(codonVariants, "S"); + assertEquals("[I, L, R]", variants.toString()); + + /* + * vary codons 2 and 3 + */ + codonVariants = new String[][] { { "a" }, { "g", "T" }, { "A", "c" } }; + // aga agc ata atc code for R, S, I, I + variants = EnsemblSeqProxy.computePeptideVariants(codonVariants, "S"); + assertEquals("[I, R]", variants.toString()); + + /* + * vary codons 1 and 3 + */ + codonVariants = new String[][] { { "a", "t" }, { "a" }, { "t", "g" } }; + // aat aag tat tag code for N, K, Y, STOP - STOP sorted to end + variants = EnsemblSeqProxy.computePeptideVariants(codonVariants, "S"); + assertEquals("[K, N, Y, STOP]", variants.toString()); + + /* + * vary codons 1, 2 and 3 + */ + codonVariants = new String[][] { { "a", "t" }, { "G", "C" }, + { "t", "g" } }; + // agt agg act acg tgt tgg tct tcg code for S, R, T, T, C, W, S, S + variants = EnsemblSeqProxy.computePeptideVariants(codonVariants, "S"); + assertEquals("[C, R, T, W]", variants.toString()); + } + + /** + * Tests for the method that maps the subset of a dna sequence that has CDS + * (or subtype) feature. + */ + @Test(groups = "Functional") + public void testGetCdsRanges() + { + EnsemblSeqProxy testee = new EnsemblSeqProxyAdapter(); + + SequenceI dnaSeq = new Sequence("dna", "aaaGGGcccAAATTTttt"); + dnaSeq.createDatasetSequence(); + SequenceI ds = dnaSeq.getDatasetSequence(); + + // CDS for dna 3-6 + SequenceFeature sf = new SequenceFeature("CDS", "", 4, 6, 0f, null); + ds.addSequenceFeature(sf); + // exon feature should be ignored here + sf = new SequenceFeature("exon", "", 7, 9, 0f, null); + ds.addSequenceFeature(sf); + // CDS for dna 10-12 + sf = new SequenceFeature("CDS_predicted", "", 10, 12, 0f, null); + ds.addSequenceFeature(sf); + + List ranges = new ArrayList(); + int mappedLength = testee.getCdsRanges(dnaSeq, ranges); + assertEquals(6, mappedLength); + assertEquals(2, ranges.size()); + assertEquals(4, ranges.get(0)[0]); + assertEquals(6, ranges.get(0)[1]); + assertEquals(10, ranges.get(1)[0]); + assertEquals(12, ranges.get(1)[1]); + + } + + @Test(groups = "Functional") + public void getGenomicRangesFromFeatures() + { + + } + + /** + * Tests for the method that maps the subset of a dna sequence that has CDS + * (or subtype) feature - case where the start codon is incomplete. + */ + @Test(groups = "Functional") + public void testGetCdsRanges_fivePrimeIncomplete() + { + EnsemblSeqProxy testee = new EnsemblSeqProxyAdapter(); + + SequenceI dnaSeq = new Sequence("dna", "aaagGGCCCaaaTTTttt"); + dnaSeq.createDatasetSequence(); + SequenceI ds = dnaSeq.getDatasetSequence(); + + // CDS for dna 5-6 (incomplete codon), 7-9 + SequenceFeature sf = new SequenceFeature("CDS", "", 5, 9, 0f, null); + sf.setPhase("2"); // skip 2 bases to start of next codon + ds.addSequenceFeature(sf); + ds.addSequenceFeature(sf); + // CDS for dna 13-15 + sf = new SequenceFeature("CDS_predicted", "", 13, 15, 0f, null); + ds.addSequenceFeature(sf); + + List ranges = new ArrayList(); + int mappedLength = testee.getCdsRanges(dnaSeq, ranges); + + /* + * check the mapping starts with the first complete codon + */ + assertEquals(6, mappedLength); + assertEquals(2, ranges.size()); + assertEquals(7, ranges.get(0)[0]); + assertEquals(9, ranges.get(0)[1]); + assertEquals(13, ranges.get(1)[0]); + assertEquals(15, ranges.get(1)[1]); + } } \ No newline at end of file