package jalview.ext.ensembl; import static org.testng.AssertJUnit.assertEquals; import static org.testng.AssertJUnit.assertFalse; import static org.testng.AssertJUnit.assertSame; import static org.testng.AssertJUnit.assertTrue; import jalview.api.FeatureSettingsModelI; import jalview.datamodel.SequenceDummy; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.io.gff.SequenceOntologyFactory; import jalview.io.gff.SequenceOntologyLite; import jalview.util.MapList; import java.awt.Color; import java.util.List; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; public class EnsemblGeneTest { @BeforeClass public void setUp() { SequenceOntologyFactory.setInstance(new SequenceOntologyLite()); } @AfterClass public void tearDown() { SequenceOntologyFactory.setInstance(null); } /** * Test that the gene part of genomic sequence is uniquely identified by a * 'gene' features (or subtype) with the correct gene ID */ @Test(groups = "Functional") public void testGetGenomicRangesFromFeatures() { EnsemblGene testee = new EnsemblGene(); SequenceI genomic = new SequenceDummy("chr7"); genomic.setStart(10000); genomic.setEnd(50000); String geneId = "ABC123"; // gene at (start+10000) length 501 SequenceFeature sf = new SequenceFeature("gene", "", 20000, 20500, 0f, null); sf.setValue("ID", "gene:" + geneId); sf.setStrand("+"); genomic.addSequenceFeature(sf); // gene at (start + 10500) length 101 // should be ignored - the first 'gene' found defines the whole range sf = new SequenceFeature("gene", "", 10500, 10600, 0f, null); sf.setValue("ID", "gene:" + geneId); sf.setStrand("+"); genomic.addSequenceFeature(sf); MapList ranges = testee.getGenomicRangesFromFeatures(genomic, geneId, 23); List fromRanges = ranges.getFromRanges(); assertEquals(1, fromRanges.size()); assertEquals(20000, fromRanges.get(0)[0]); assertEquals(20500, fromRanges.get(0)[1]); // to range should start from given start numbering List toRanges = ranges.getToRanges(); assertEquals(1, toRanges.size()); assertEquals(23, toRanges.get(0)[0]); assertEquals(523, toRanges.get(0)[1]); } /** * Test variant using a sub-type of gene from the Sequence Ontology */ @Test(groups = "Functional") public void testGetGenomicRangesFromFeatures_ncRNA_gene_reverseStrand() { EnsemblGene testee = new EnsemblGene(); SequenceI genomic = new SequenceDummy("chr7"); genomic.setStart(10000); genomic.setEnd(50000); String geneId = "ABC123"; // gene at (start+10000) length 501 SequenceFeature sf = new SequenceFeature("ncRNA_gene", "", 20000, 20500, 0f, null); sf.setValue("ID", "gene:" + geneId); sf.setStrand("-"); genomic.addSequenceFeature(sf); // gene at (start + 10500) length 101 // should be ignored - the first 'gene' found defines the whole range // (real data would only have one such feature) sf = new SequenceFeature("gene", "", 10500, 10600, 0f, null); sf.setValue("ID", "gene:" + geneId); sf.setStrand("+"); genomic.addSequenceFeature(sf); MapList ranges = testee.getGenomicRangesFromFeatures(genomic, geneId, 23); List fromRanges = ranges.getFromRanges(); assertEquals(1, fromRanges.size()); // from range on reverse strand: assertEquals(20500, fromRanges.get(0)[0]); assertEquals(20000, fromRanges.get(0)[1]); // to range should start from given start numbering List toRanges = ranges.getToRanges(); assertEquals(1, toRanges.size()); assertEquals(23, toRanges.get(0)[0]); assertEquals(523, toRanges.get(0)[1]); } /** * Test the method that extracts transcript (or subtype) features with a * specified gene as parent */ @Test(groups = "Functional") public void testGetTranscriptFeatures() { SequenceI genomic = new SequenceDummy("chr7"); genomic.setStart(10000); genomic.setEnd(50000); String geneId = "ABC123"; // transcript feature SequenceFeature sf1 = new SequenceFeature("transcript", "", 20000, 20500, 0f, null); sf1.setValue("Parent", "gene:" + geneId); sf1.setValue("transcript_id", "transcript1"); genomic.addSequenceFeature(sf1); // transcript sub-type feature SequenceFeature sf2 = new SequenceFeature("snRNA", "", 20000, 20500, 0f, null); sf2.setValue("Parent", "gene:" + geneId); sf2.setValue("transcript_id", "transcript2"); genomic.addSequenceFeature(sf2); // NMD_transcript_variant treated like transcript in Ensembl SequenceFeature sf3 = new SequenceFeature("NMD_transcript_variant", "", 20000, 20500, 0f, null); sf3.setValue("Parent", "gene:" + geneId); sf3.setValue("transcript_id", "transcript3"); genomic.addSequenceFeature(sf3); // transcript for a different gene - ignored SequenceFeature sf4 = new SequenceFeature("snRNA", "", 20000, 20500, 0f, null); sf4.setValue("Parent", "gene:XYZ"); sf4.setValue("transcript_id", "transcript4"); genomic.addSequenceFeature(sf4); EnsemblGene testee = new EnsemblGene(); /* * with no filter */ List features = testee.getTranscriptFeatures(geneId, genomic); assertEquals(3, features.size()); assertSame(sf1, features.get(0)); assertSame(sf2, features.get(1)); assertSame(sf3, features.get(2)); } /** * Test the method that retains features except for 'gene', or 'transcript' * with parent other than the given id */ @Test(groups = "Functional") public void testRetainFeature() { String geneId = "ABC123"; EnsemblGene testee = new EnsemblGene(); SequenceFeature sf = new SequenceFeature("gene", "", 20000, 20500, 0f, null); sf.setValue("ID", "gene:" + geneId); assertFalse(testee.retainFeature(sf, geneId)); sf.setType("transcript"); sf.setValue("Parent", "gene:" + geneId); assertTrue(testee.retainFeature(sf, geneId)); sf.setType("mature_transcript"); sf.setValue("Parent", "gene:" + geneId); assertTrue(testee.retainFeature(sf, geneId)); sf.setType("NMD_transcript_variant"); sf.setValue("Parent", "gene:" + geneId); assertTrue(testee.retainFeature(sf, geneId)); sf.setValue("Parent", "gene:XYZ"); assertFalse(testee.retainFeature(sf, geneId)); sf.setType("anything"); assertTrue(testee.retainFeature(sf, geneId)); } /** * Test the method that picks out 'gene' (or subtype) features with the * accession id as ID */ @Test(groups = "Functional") public void testIdentifiesSequence() { String accId = "ABC123"; EnsemblGene testee = new EnsemblGene(); // gene with no ID not valid SequenceFeature sf = new SequenceFeature("gene", "", 1, 2, 0f, null); assertFalse(testee.identifiesSequence(sf, accId)); // gene with wrong ID not valid sf.setValue("ID", "gene:XYZ"); assertFalse(testee.identifiesSequence(sf, accId)); // gene with right ID is valid sf.setValue("ID", "gene:" + accId); assertTrue(testee.identifiesSequence(sf, accId)); // gene sub-type with right ID is valid sf.setType("snRNA_gene"); assertTrue(testee.identifiesSequence(sf, accId)); // transcript not valid: sf.setType("transcript"); assertFalse(testee.identifiesSequence(sf, accId)); // exon not valid: sf.setType("exon"); assertFalse(testee.identifiesSequence(sf, accId)); } /** * Check behaviour of feature colour scheme for EnsemblGene sequences. * Currently coded to display exon and sequence_variant (or sub-types) only, * with sequence_variant in red above exon coloured by label. */ @Test(groups = "Functional") public void testGetFeatureColourScheme() { FeatureSettingsModelI fc = new EnsemblGene().getFeatureColourScheme(); assertTrue(fc.isFeatureDisplayed("exon")); assertTrue(fc.isFeatureDisplayed("coding_exon")); // subtype of exon assertTrue(fc.isFeatureDisplayed("sequence_variant")); assertTrue(fc.isFeatureDisplayed("feature_variant")); // subtype assertFalse(fc.isFeatureDisplayed("transcript")); assertEquals(Color.RED, fc.getFeatureColour("sequence_variant") .getColour()); assertEquals(Color.RED, fc.getFeatureColour("feature_variant") .getColour()); assertTrue(fc.getFeatureColour("exon").isColourByLabel()); assertTrue(fc.getFeatureColour("coding_exon").isColourByLabel()); assertEquals(1, fc.compare("sequence_variant", "exon")); assertEquals(-1, fc.compare("exon", "sequence_variant")); assertEquals(1, fc.compare("feature_variant", "coding_exon")); assertEquals(-1, fc.compare("coding_exon", "feature_variant")); assertEquals(1f, fc.getTransparency()); } }