From e96f5e8ce137e879dd4b4f37fb5c4d134e2778e2 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Mon, 1 Feb 2016 16:44:17 +0000 Subject: [PATCH] JAL-1705 further unit tests --- src/jalview/ext/ensembl/EnsemblGenome.java | 7 +- src/jalview/ext/ensembl/EnsemblSeqProxy.java | 8 +- src/jalview/io/gff/SequenceOntologyI.java | 6 + src/jalview/io/gff/SequenceOntologyLite.java | 5 +- test/jalview/ext/ensembl/EnsemblCdnaTest.java | 97 ++++++++- test/jalview/ext/ensembl/EnsemblCdsTest.java | 154 ++++++++++++++ test/jalview/ext/ensembl/EnsemblGeneTest.java | 228 +++++++++++++++++++++ test/jalview/ext/ensembl/EnsemblGenomeTest.java | 170 +++++++++++++++ test/jalview/ext/ensembl/EnsemblSeqProxyTest.java | 20 +- 9 files changed, 670 insertions(+), 25 deletions(-) create mode 100644 test/jalview/ext/ensembl/EnsemblCdsTest.java create mode 100644 test/jalview/ext/ensembl/EnsemblGeneTest.java create mode 100644 test/jalview/ext/ensembl/EnsemblGenomeTest.java diff --git a/src/jalview/ext/ensembl/EnsemblGenome.java b/src/jalview/ext/ensembl/EnsemblGenome.java index b7db2bc..6bbc3e9 100644 --- a/src/jalview/ext/ensembl/EnsemblGenome.java +++ b/src/jalview/ext/ensembl/EnsemblGenome.java @@ -37,9 +37,10 @@ public class EnsemblGenome extends EnsemblSeqProxy /** * Answers true unless the feature type is 'transcript' (or a sub-type of - * transcript in the Sequence Ontology). Transcript features are only - * retrieved in order to identify the transcript sequence range, and are - * redundant information on the transcript sequence itself. + * transcript in the Sequence Ontology), or has a parent other than the given + * accession id. Transcript features are only retrieved in order to identify + * the transcript sequence range, and are redundant information on the + * transcript sequence itself. */ @Override protected boolean retainFeature(SequenceFeature sf, String accessionId) diff --git a/src/jalview/ext/ensembl/EnsemblSeqProxy.java b/src/jalview/ext/ensembl/EnsemblSeqProxy.java index 8c1e972..b2804f2 100644 --- a/src/jalview/ext/ensembl/EnsemblSeqProxy.java +++ b/src/jalview/ext/ensembl/EnsemblSeqProxy.java @@ -46,12 +46,6 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient protected static final String ID = "ID"; - /* - * this needs special handling, as it isA sequence_variant in the - * Sequence Ontology, but behaves in Ensembl as if it isA transcript - */ - protected static final String NMD_VARIANT = "NMD_transcript_variant"; - protected static final String NAME = "Name"; public enum EnsemblSeqType @@ -1105,7 +1099,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient */ public static boolean isTranscript(String featureType) { - return NMD_VARIANT.equals(featureType) + return SequenceOntologyI.NMD_TRANSCRIPT_VARIANT.equals(featureType) || SequenceOntologyFactory.getInstance().isA(featureType, SequenceOntologyI.TRANSCRIPT); } diff --git a/src/jalview/io/gff/SequenceOntologyI.java b/src/jalview/io/gff/SequenceOntologyI.java index 8128177..c7a5baa 100644 --- a/src/jalview/io/gff/SequenceOntologyI.java +++ b/src/jalview/io/gff/SequenceOntologyI.java @@ -7,10 +7,13 @@ public interface SequenceOntologyI /* * selected commonly used values for quick reference */ + // SO:0000104 public static final String POLYPEPTIDE = "polypeptide"; + // SO:0000349 public static final String PROTEIN_MATCH = "protein_match"; + // SO:0000347 public static final String NUCLEOTIDE_MATCH = "nucleotide_match"; // SO:0000316 @@ -25,6 +28,9 @@ public interface SequenceOntologyI // SO:0000673 public static final String TRANSCRIPT = "transcript"; + // SO:0001621 isA sequence_variant but used in Ensembl as a transcript + public static final String NMD_TRANSCRIPT_VARIANT = "NMD_transcript_variant"; + // SO:0000704 public static final String GENE = "gene"; diff --git a/src/jalview/io/gff/SequenceOntologyLite.java b/src/jalview/io/gff/SequenceOntologyLite.java index 6719ae6..c0ae971 100644 --- a/src/jalview/io/gff/SequenceOntologyLite.java +++ b/src/jalview/io/gff/SequenceOntologyLite.java @@ -55,10 +55,13 @@ public class SequenceOntologyLite implements SequenceOntologyI { "structural_variant", "sequence_variant" }, /* - * no sub-types of exon or CDS yet encountered; add if needed + * no sub-types of exon or CDS encountered in Ensembl + * a few added here for testing purposes */ { "exon", "exon" }, + { "coding_exon", "exon" }, { "CDS", "CDS" }, + { "CDS_predicted", "CDS" }, /* * used in exonerate GFF diff --git a/test/jalview/ext/ensembl/EnsemblCdnaTest.java b/test/jalview/ext/ensembl/EnsemblCdnaTest.java index 30bc81b..2d99a52 100644 --- a/test/jalview/ext/ensembl/EnsemblCdnaTest.java +++ b/test/jalview/ext/ensembl/EnsemblCdnaTest.java @@ -1,7 +1,9 @@ package jalview.ext.ensembl; import static org.testng.AssertJUnit.assertEquals; +import static org.testng.AssertJUnit.assertFalse; import static org.testng.AssertJUnit.assertNull; +import static org.testng.AssertJUnit.assertTrue; import jalview.datamodel.SequenceDummy; import jalview.datamodel.SequenceFeature; @@ -12,16 +14,29 @@ import jalview.util.MapList; import java.util.List; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; public class EnsemblCdnaTest { + @BeforeClass + public void setUp() + { + SequenceOntologyFactory.setInstance(new SequenceOntologyLite()); + } + + @AfterClass + public void tearDown() + { + SequenceOntologyFactory.setInstance(null); + } /** * Test that the cdna part of genomic sequence is correctly identified by * 'exon' features (or subtypes) - reverse strand case. */ @Test(groups = "Functional") - public void getGenomicRangesFromFeatures_reverseStrand() + public void testGetGenomicRangesFromFeatures_reverseStrand() { EnsemblCdna testee = new EnsemblCdna(); SequenceI genomic = new SequenceDummy("chr7"); @@ -51,8 +66,6 @@ public class EnsemblCdnaTest sf = new SequenceFeature("transcript", "", 10000, 50000, 0f, null); genomic.addSequenceFeature(sf); - SequenceOntologyFactory.setInstance(new SequenceOntologyLite()); - MapList ranges = testee.getGenomicRangesFromFeatures(genomic, transcriptId, 23); List fromRanges = ranges.getFromRanges(); @@ -75,10 +88,10 @@ public class EnsemblCdnaTest /** * Test that the cdna part of genomic sequence is correctly identified by - * 'exon' features (or subtypes). + * 'exon' features (or subtypes) with the desired transcript as parent */ @Test(groups = "Functional") - public void getGenomicRangesFromFeatures() + public void testGetGenomicRangesFromFeatures() { EnsemblCdna testee = new EnsemblCdna(); SequenceI genomic = new SequenceDummy("chr7"); @@ -109,8 +122,6 @@ public class EnsemblCdnaTest sf.setStrand("-"); // weird but ignored genomic.addSequenceFeature(sf); - SequenceOntologyFactory.setInstance(new SequenceOntologyLite()); - MapList ranges = testee.getGenomicRangesFromFeatures(genomic, transcriptId, 23); List fromRanges = ranges.getFromRanges(); @@ -132,7 +143,7 @@ public class EnsemblCdnaTest * reverse strands are present in the features of interest */ @Test(groups = "Functional") - public void getGenomicRangesFromFeatures_mixedStrand() + public void testGetGenomicRangesFromFeatures_mixedStrand() { EnsemblCdna testee = new EnsemblCdna(); SequenceI genomic = new SequenceDummy("chr7"); @@ -151,10 +162,76 @@ public class EnsemblCdnaTest sf.setStrand("+"); genomic.addSequenceFeature(sf); - SequenceOntologyFactory.setInstance(new SequenceOntologyLite()); - MapList ranges = testee.getGenomicRangesFromFeatures(genomic, transcriptId, 23); assertNull(ranges); } + + /** + * Test the method that retains features except for 'transcript' (or + * subtypes), or features with parent other than the given id + */ + @Test(groups = "Functional") + public void testRetainFeature() + { + String accId = "ABC123"; + EnsemblCdna testee = new EnsemblCdna(); + + SequenceFeature sf = new SequenceFeature("transcript", "", 20000, + 20500, 0f, null); + assertFalse(testee.retainFeature(sf, accId)); + + sf.setType("aberrant_processed_transcript"); + assertFalse(testee.retainFeature(sf, accId)); + + sf.setType("NMD_transcript_variant"); + assertFalse(testee.retainFeature(sf, accId)); + + // other feature with no parent is retained + sf.setType("sequence_variant"); + assertTrue(testee.retainFeature(sf, accId)); + + // other feature with desired parent is retained + sf.setValue("Parent", "transcript:" + accId); + assertTrue(testee.retainFeature(sf, accId)); + + // feature with wrong parent is not retained + sf.setValue("Parent", "transcript:XYZ"); + assertFalse(testee.retainFeature(sf, accId)); + } + + /** + * Test the method that picks out 'exon' (or subtype) features with the + * accession id as parent + */ + @Test(groups = "Functional") + public void testIdentifiesSequence() + { + String accId = "ABC123"; + EnsemblCdna testee = new EnsemblCdna(); + + // exon with no parent not valid + SequenceFeature sf = new SequenceFeature("exon", "", 1, 2, 0f, null); + assertFalse(testee.identifiesSequence(sf, accId)); + + // exon with wrong parent not valid + sf.setValue("Parent", "transcript:XYZ"); + assertFalse(testee.identifiesSequence(sf, accId)); + + // exon with right parent is valid + sf.setValue("Parent", "transcript:" + accId); + assertTrue(testee.identifiesSequence(sf, accId)); + + // exon sub-type with right parent is valid + sf.setType("coding_exon"); + assertTrue(testee.identifiesSequence(sf, accId)); + + // transcript not valid: + sf.setType("transcript"); + assertFalse(testee.identifiesSequence(sf, accId)); + + // CDS not valid: + sf.setType("CDS"); + assertFalse(testee.identifiesSequence(sf, accId)); + } } diff --git a/test/jalview/ext/ensembl/EnsemblCdsTest.java b/test/jalview/ext/ensembl/EnsemblCdsTest.java new file mode 100644 index 0000000..fb17845 --- /dev/null +++ b/test/jalview/ext/ensembl/EnsemblCdsTest.java @@ -0,0 +1,154 @@ +package jalview.ext.ensembl; + +import static org.testng.AssertJUnit.assertEquals; +import static org.testng.AssertJUnit.assertFalse; +import static org.testng.AssertJUnit.assertTrue; + +import jalview.datamodel.SequenceDummy; +import jalview.datamodel.SequenceFeature; +import jalview.datamodel.SequenceI; +import jalview.io.gff.SequenceOntologyFactory; +import jalview.io.gff.SequenceOntologyLite; +import jalview.util.MapList; + +import java.util.List; + +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +public class EnsemblCdsTest +{ + @BeforeClass + public void setUp() + { + SequenceOntologyFactory.setInstance(new SequenceOntologyLite()); + } + + @AfterClass + public void tearDown() + { + SequenceOntologyFactory.setInstance(null); + } + + /** + * Test that the cdna part of genomic sequence is correctly identified by + * 'CDS' features (or subtypes) with the desired transcript as parent + */ + @Test(groups = "Functional") + public void testGetGenomicRangesFromFeatures() + { + EnsemblCds testee = new EnsemblCds(); + SequenceI genomic = new SequenceDummy("chr7"); + genomic.setStart(10000); + genomic.setEnd(50000); + String transcriptId = "ABC123"; + + // CDS at (start+10000) length 501 + SequenceFeature sf = new SequenceFeature("CDS", "", 20000, 20500, 0f, + null); + sf.setValue("Parent", "transcript:" + transcriptId); + sf.setStrand("+"); + genomic.addSequenceFeature(sf); + + // CDS (sub-type) at (start + 10500) length 101 + sf = new SequenceFeature("CDS_predicted", "", 10500, 10600, 0f, null); + sf.setValue("Parent", "transcript:" + transcriptId); + sf.setStrand("+"); + genomic.addSequenceFeature(sf); + + // CDS belonging to a different transcript doesn't count + sf = new SequenceFeature("CDS", "", 11500, 12600, 0f, null); + sf.setValue("Parent", "transcript:anotherOne"); + genomic.addSequenceFeature(sf); + + // exon feature doesn't count + sf = new SequenceFeature("exon", "", 10000, 50000, 0f, null); + genomic.addSequenceFeature(sf); + + // mRNA_region feature doesn't count (parent of CDS) + sf = new SequenceFeature("mRNA_region", "", 10000, 50000, 0f, null); + genomic.addSequenceFeature(sf); + + MapList ranges = testee.getGenomicRangesFromFeatures(genomic, + transcriptId, 23); + List fromRanges = ranges.getFromRanges(); + assertEquals(2, fromRanges.size()); + // from ranges should be sorted by start order + assertEquals(10500, fromRanges.get(0)[0]); + assertEquals(10600, fromRanges.get(0)[1]); + assertEquals(20000, fromRanges.get(1)[0]); + assertEquals(20500, fromRanges.get(1)[1]); + // to range should start from given start numbering + List toRanges = ranges.getToRanges(); + assertEquals(1, toRanges.size()); + assertEquals(23, toRanges.get(0)[0]); + assertEquals(624, toRanges.get(0)[1]); + } + + /** + * Test the method that retains features except for 'CDS' (or subtypes), or + * features with parent other than the given id + */ + @Test(groups = "Functional") + public void testRetainFeature() + { + String accId = "ABC123"; + EnsemblCds testee = new EnsemblCds(); + + SequenceFeature sf = new SequenceFeature("CDS", "", 20000, + 20500, 0f, null); + assertFalse(testee.retainFeature(sf, accId)); + + sf.setType("CDS_predicted"); + assertFalse(testee.retainFeature(sf, accId)); + + // other feature with no parent is retained + sf.setType("sequence_variant"); + assertTrue(testee.retainFeature(sf, accId)); + + // other feature with desired parent is retained + sf.setValue("Parent", "transcript:" + accId); + assertTrue(testee.retainFeature(sf, accId)); + + // feature with wrong parent is not retained + sf.setValue("Parent", "transcript:XYZ"); + assertFalse(testee.retainFeature(sf, accId)); + } + + /** + * Test the method that picks out 'CDS' (or subtype) features with the + * accession id as parent + */ + @Test(groups = "Functional") + public void testIdentifiesSequence() + { + String accId = "ABC123"; + EnsemblCds testee = new EnsemblCds(); + + // cds with no parent not valid + SequenceFeature sf = new SequenceFeature("CDS", "", 1, 2, 0f, null); + assertFalse(testee.identifiesSequence(sf, accId)); + + // cds with wrong parent not valid + sf.setValue("Parent", "transcript:XYZ"); + assertFalse(testee.identifiesSequence(sf, accId)); + + // cds with right parent is valid + sf.setValue("Parent", "transcript:" + accId); + assertTrue(testee.identifiesSequence(sf, accId)); + + // cds sub-type with right parent is valid + sf.setType("CDS_predicted"); + assertTrue(testee.identifiesSequence(sf, accId)); + + // transcript not valid: + sf.setType("transcript"); + assertFalse(testee.identifiesSequence(sf, accId)); + + // exon not valid: + sf.setType("exon"); + assertFalse(testee.identifiesSequence(sf, accId)); + } + +} diff --git a/test/jalview/ext/ensembl/EnsemblGeneTest.java b/test/jalview/ext/ensembl/EnsemblGeneTest.java new file mode 100644 index 0000000..1f1a84e --- /dev/null +++ b/test/jalview/ext/ensembl/EnsemblGeneTest.java @@ -0,0 +1,228 @@ +package jalview.ext.ensembl; + +import static org.testng.AssertJUnit.assertEquals; +import static org.testng.AssertJUnit.assertFalse; +import static org.testng.AssertJUnit.assertSame; +import static org.testng.AssertJUnit.assertTrue; + +import jalview.datamodel.SequenceDummy; +import jalview.datamodel.SequenceFeature; +import jalview.datamodel.SequenceI; +import jalview.io.gff.SequenceOntologyFactory; +import jalview.io.gff.SequenceOntologyLite; +import jalview.util.MapList; + +import java.util.List; + +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +public class EnsemblGeneTest +{ + @BeforeClass + public void setUp() + { + SequenceOntologyFactory.setInstance(new SequenceOntologyLite()); + } + + @AfterClass + public void tearDown() + { + SequenceOntologyFactory.setInstance(null); + } + + /** + * Test that the gene part of genomic sequence is uniquely identified by a + * 'gene' features (or subtype) with the correct gene ID + */ + @Test(groups = "Functional") + public void testGetGenomicRangesFromFeatures() + { + EnsemblGene testee = new EnsemblGene(); + SequenceI genomic = new SequenceDummy("chr7"); + genomic.setStart(10000); + genomic.setEnd(50000); + String geneId = "ABC123"; + + // gene at (start+10000) length 501 + SequenceFeature sf = new SequenceFeature("gene", "", 20000, 20500, 0f, + null); + sf.setValue("ID", "gene:" + geneId); + sf.setStrand("+"); + genomic.addSequenceFeature(sf); + + // gene at (start + 10500) length 101 + // should be ignored - the first 'gene' found defines the whole range + sf = new SequenceFeature("gene", "", 10500, 10600, 0f, null); + sf.setValue("ID", "gene:" + geneId); + sf.setStrand("+"); + genomic.addSequenceFeature(sf); + + MapList ranges = testee.getGenomicRangesFromFeatures(genomic, geneId, + 23); + List fromRanges = ranges.getFromRanges(); + assertEquals(1, fromRanges.size()); + assertEquals(20000, fromRanges.get(0)[0]); + assertEquals(20500, fromRanges.get(0)[1]); + // to range should start from given start numbering + List toRanges = ranges.getToRanges(); + assertEquals(1, toRanges.size()); + assertEquals(23, toRanges.get(0)[0]); + assertEquals(523, toRanges.get(0)[1]); + } + + /** + * Test variant using a sub-type of gene from the Sequence Ontology + */ + @Test(groups = "Functional") + public void testGetGenomicRangesFromFeatures_ncRNA_gene_reverseStrand() + { + EnsemblGene testee = new EnsemblGene(); + SequenceI genomic = new SequenceDummy("chr7"); + genomic.setStart(10000); + genomic.setEnd(50000); + String geneId = "ABC123"; + + // gene at (start+10000) length 501 + SequenceFeature sf = new SequenceFeature("ncRNA_gene", "", 20000, + 20500, 0f, null); + sf.setValue("ID", "gene:" + geneId); + sf.setStrand("-"); + genomic.addSequenceFeature(sf); + + // gene at (start + 10500) length 101 + // should be ignored - the first 'gene' found defines the whole range + // (real data would only have one such feature) + sf = new SequenceFeature("gene", "", 10500, 10600, 0f, null); + sf.setValue("ID", "gene:" + geneId); + sf.setStrand("+"); + genomic.addSequenceFeature(sf); + + MapList ranges = testee.getGenomicRangesFromFeatures(genomic, geneId, + 23); + List fromRanges = ranges.getFromRanges(); + assertEquals(1, fromRanges.size()); + // from range on reverse strand: + assertEquals(20500, fromRanges.get(0)[0]); + assertEquals(20000, fromRanges.get(0)[1]); + // to range should start from given start numbering + List toRanges = ranges.getToRanges(); + assertEquals(1, toRanges.size()); + assertEquals(23, toRanges.get(0)[0]); + assertEquals(523, toRanges.get(0)[1]); + } + + /** + * Test the method that extracts transcript (or subtype) features with a + * specified gene as parent + */ + @Test(groups = "Functional") + public void testGetTranscriptFeatures() + { + SequenceI genomic = new SequenceDummy("chr7"); + genomic.setStart(10000); + genomic.setEnd(50000); + String geneId = "ABC123"; + + // transcript feature + SequenceFeature sf1 = new SequenceFeature("transcript", "", 20000, + 20500, 0f, null); + sf1.setValue("Parent", "gene:" + geneId); + genomic.addSequenceFeature(sf1); + + // transcript sub-type feature + SequenceFeature sf2 = new SequenceFeature("snRNA", "", 20000, + 20500, 0f, null); + sf2.setValue("Parent", "gene:" + geneId); + genomic.addSequenceFeature(sf2); + + // NMD_transcript_variant treated like transcript in Ensembl + SequenceFeature sf3 = new SequenceFeature("NMD_transcript_variant", "", + 20000, 20500, 0f, null); + sf3.setValue("Parent", "gene:" + geneId); + genomic.addSequenceFeature(sf3); + + // transcript for a different gene - ignored + SequenceFeature sf4 = new SequenceFeature("snRNA", "", 20000, 20500, + 0f, null); + sf4.setValue("Parent", "gene:XYZ"); + genomic.addSequenceFeature(sf4); + + EnsemblGene testee = new EnsemblGene(); + List features = testee.getTranscriptFeatures(geneId, + genomic); + assertEquals(3, features.size()); + assertSame(sf1, features.get(0)); + assertSame(sf2, features.get(1)); + assertSame(sf3, features.get(2)); + } + + /** + * Test the method that retains features except for 'gene', or 'transcript' + * with parent other than the given id + */ + @Test(groups = "Functional") + public void testRetainFeature() + { + String geneId = "ABC123"; + EnsemblGene testee = new EnsemblGene(); + SequenceFeature sf = new SequenceFeature("gene", "", 20000, + 20500, 0f, null); + sf.setValue("ID", "gene:" + geneId); + assertFalse(testee.retainFeature(sf, geneId)); + + sf.setType("transcript"); + sf.setValue("Parent", "gene:" + geneId); + assertTrue(testee.retainFeature(sf, geneId)); + + sf.setType("mature_transcript"); + sf.setValue("Parent", "gene:" + geneId); + assertTrue(testee.retainFeature(sf, geneId)); + + sf.setType("NMD_transcript_variant"); + sf.setValue("Parent", "gene:" + geneId); + assertTrue(testee.retainFeature(sf, geneId)); + + sf.setValue("Parent", "gene:XYZ"); + assertFalse(testee.retainFeature(sf, geneId)); + + sf.setType("anything"); + assertTrue(testee.retainFeature(sf, geneId)); + } + + /** + * Test the method that picks out 'gene' (or subtype) features with the + * accession id as ID + */ + @Test(groups = "Functional") + public void testIdentifiesSequence() + { + String accId = "ABC123"; + EnsemblGene testee = new EnsemblGene(); + + // gene with no ID not valid + SequenceFeature sf = new SequenceFeature("gene", "", 1, 2, 0f, null); + assertFalse(testee.identifiesSequence(sf, accId)); + + // gene with wrong ID not valid + sf.setValue("ID", "gene:XYZ"); + assertFalse(testee.identifiesSequence(sf, accId)); + + // gene with right ID is valid + sf.setValue("ID", "gene:" + accId); + assertTrue(testee.identifiesSequence(sf, accId)); + + // gene sub-type with right ID is valid + sf.setType("snRNA_gene"); + assertTrue(testee.identifiesSequence(sf, accId)); + + // transcript not valid: + sf.setType("transcript"); + assertFalse(testee.identifiesSequence(sf, accId)); + + // exon not valid: + sf.setType("exon"); + assertFalse(testee.identifiesSequence(sf, accId)); + } +} diff --git a/test/jalview/ext/ensembl/EnsemblGenomeTest.java b/test/jalview/ext/ensembl/EnsemblGenomeTest.java new file mode 100644 index 0000000..daad8b1 --- /dev/null +++ b/test/jalview/ext/ensembl/EnsemblGenomeTest.java @@ -0,0 +1,170 @@ +package jalview.ext.ensembl; + +import static org.testng.AssertJUnit.assertEquals; +import static org.testng.AssertJUnit.assertFalse; +import static org.testng.AssertJUnit.assertTrue; + +import jalview.datamodel.SequenceDummy; +import jalview.datamodel.SequenceFeature; +import jalview.datamodel.SequenceI; +import jalview.io.gff.SequenceOntologyFactory; +import jalview.io.gff.SequenceOntologyLite; +import jalview.util.MapList; + +import java.util.List; + +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +public class EnsemblGenomeTest +{ + @BeforeClass + public void setUp() + { + SequenceOntologyFactory.setInstance(new SequenceOntologyLite()); + } + + @AfterClass + public void tearDown() + { + SequenceOntologyFactory.setInstance(null); + } + + /** + * Test that the genomic sequence part of genomic sequence is correctly + * identified by 'transcript' features (or subtypes) with the correct gene ID + */ + @Test(groups = "Functional") + public void testGetGenomicRangesFromFeatures() + { + EnsemblGenome testee = new EnsemblGenome(); + SequenceI genomic = new SequenceDummy("chr7"); + genomic.setStart(10000); + genomic.setEnd(50000); + String transcriptId = "ABC123"; + + // transcript at (start+10000) length 501 + SequenceFeature sf = new SequenceFeature("transcript", "", 20000, + 20500, 0f, + null); + sf.setValue("ID", "transcript:" + transcriptId); + sf.setStrand("+"); + genomic.addSequenceFeature(sf); + + // transcript (sub-type) at (start + 10500) length 101 + sf = new SequenceFeature("ncRNA", "", 10500, 10600, 0f, null); + sf.setValue("ID", "transcript:" + transcriptId); + sf.setStrand("+"); + genomic.addSequenceFeature(sf); + + // Ensembl treats NMD_transcript_variant as if transcript + // although strictly it is a sequence_variant in SO + sf = new SequenceFeature("NMD_transcript_variant", "", 11000, 12000, + 0f, null); + sf.setValue("ID", "transcript:" + transcriptId); + sf.setStrand("+"); + genomic.addSequenceFeature(sf); + + // transcript with a different ID doesn't count + sf = new SequenceFeature("transcript", "", 11500, 12600, 0f, null); + sf.setValue("ID", "transcript:anotherOne"); + genomic.addSequenceFeature(sf); + + // parent of transcript feature doesn't count + sf = new SequenceFeature("gene_member_region", "", 10000, 50000, 0f, + null); + genomic.addSequenceFeature(sf); + + MapList ranges = testee.getGenomicRangesFromFeatures(genomic, + transcriptId, 23); + List fromRanges = ranges.getFromRanges(); + assertEquals(3, fromRanges.size()); + // from ranges should be sorted by start order + assertEquals(10500, fromRanges.get(0)[0]); + assertEquals(10600, fromRanges.get(0)[1]); + assertEquals(11000, fromRanges.get(1)[0]); + assertEquals(12000, fromRanges.get(1)[1]); + assertEquals(20000, fromRanges.get(2)[0]); + assertEquals(20500, fromRanges.get(2)[1]); + // to range should start from given start numbering + List toRanges = ranges.getToRanges(); + assertEquals(1, toRanges.size()); + assertEquals(23, toRanges.get(0)[0]); + assertEquals(1625, toRanges.get(0)[1]); + } + + /** + * Test the method that retains features except for 'transcript' (or + * sub-type), or those with parent other than the given id + */ + @Test(groups = "Functional") + public void testRetainFeature() + { + String accId = "ABC123"; + EnsemblGenome testee = new EnsemblGenome(); + + SequenceFeature sf = new SequenceFeature("transcript", "", 20000, + 20500, 0f, null); + assertFalse(testee.retainFeature(sf, accId)); + + sf.setType("mature_transcript"); + assertFalse(testee.retainFeature(sf, accId)); + + sf.setType("NMD_transcript_variant"); + assertFalse(testee.retainFeature(sf, accId)); + + // other feature with no parent is kept + sf.setType("anything"); + assertTrue(testee.retainFeature(sf, accId)); + + // other feature with correct parent is kept + sf.setValue("Parent", "transcript:" + accId); + assertTrue(testee.retainFeature(sf, accId)); + + // other feature with wrong parent is not kept + sf.setValue("Parent", "transcript:XYZ"); + assertFalse(testee.retainFeature(sf, accId)); + } + + /** + * Test the method that picks out 'transcript' (or subtype) features with the + * accession id as ID + */ + @Test(groups = "Functional") + public void testIdentifiesSequence() + { + String accId = "ABC123"; + EnsemblGenome testee = new EnsemblGenome(); + + // transcript with no ID not valid + SequenceFeature sf = new SequenceFeature("transcript", "", 1, 2, 0f, + null); + assertFalse(testee.identifiesSequence(sf, accId)); + + // transcript with wrong ID not valid + sf.setValue("ID", "transcript"); + assertFalse(testee.identifiesSequence(sf, accId)); + + // transcript with right ID is valid + sf.setValue("ID", "transcript:" + accId); + assertTrue(testee.identifiesSequence(sf, accId)); + + // transcript sub-type with right ID is valid + sf.setType("ncRNA"); + assertTrue(testee.identifiesSequence(sf, accId)); + + // Ensembl treats NMD_transcript_variant as if a transcript + sf.setType("NMD_transcript_variant"); + assertTrue(testee.identifiesSequence(sf, accId)); + + // gene not valid: + sf.setType("gene"); + assertFalse(testee.identifiesSequence(sf, accId)); + + // exon not valid: + sf.setType("exon"); + assertFalse(testee.identifiesSequence(sf, accId)); + } + +} diff --git a/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java b/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java index 31745e5..ed936d5 100644 --- a/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java +++ b/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java @@ -21,6 +21,8 @@ import java.util.Arrays; import java.util.List; import org.testng.Assert; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -106,6 +108,18 @@ public class EnsemblSeqProxyTest + "NRDQIIFMVGRGYLSPDLSKVRSNCPKAMKRLMAECLKKKRDERPLFPQILASIELLARS\n" + "LPKIHRSASEPSLNRAGFQTEDFSLYACASPKTPIQAGGYGAFPVH" } }; + @BeforeClass + public void setUp() + { + SequenceOntologyFactory.setInstance(new SequenceOntologyLite()); + } + + @AfterClass + public void tearDown() + { + SequenceOntologyFactory.setInstance(null); + } + @DataProvider(name = "queries") public Object[][] createQueryData(Method m) { @@ -306,10 +320,9 @@ public class EnsemblSeqProxyTest sf = new SequenceFeature("exon", "", 7, 9, 0f, null); ds.addSequenceFeature(sf); // CDS for dna 10-12 - sf = new SequenceFeature("some_cds", "", 10, 12, 0f, null); + sf = new SequenceFeature("CDS_predicted", "", 10, 12, 0f, null); ds.addSequenceFeature(sf); - SequenceOntologyFactory.setInstance(new SequenceOntologyLite()); List ranges = new ArrayList(); int mappedLength = testee.getCdsRanges(dnaSeq, ranges); assertEquals(6, mappedLength); @@ -346,10 +359,9 @@ public class EnsemblSeqProxyTest ds.addSequenceFeature(sf); ds.addSequenceFeature(sf); // CDS for dna 13-15 - sf = new SequenceFeature("some_cds", "", 13, 15, 0f, null); + sf = new SequenceFeature("CDS_predicted", "", 13, 15, 0f, null); ds.addSequenceFeature(sf); - SequenceOntologyFactory.setInstance(new SequenceOntologyLite()); List ranges = new ArrayList(); int mappedLength = testee.getCdsRanges(dnaSeq, ranges); -- 1.7.10.2