package jalview.ext.ensembl; import static org.testng.AssertJUnit.assertEquals; import static org.testng.AssertJUnit.assertFalse; import static org.testng.AssertJUnit.assertTrue; import jalview.datamodel.SequenceDummy; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.io.gff.SequenceOntologyFactory; import jalview.io.gff.SequenceOntologyLite; import jalview.util.MapList; import java.util.List; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; public class EnsemblGenomeTest { @BeforeClass(alwaysRun = true) public void setUp() { SequenceOntologyFactory.setInstance(new SequenceOntologyLite()); } @AfterClass(alwaysRun = true) public void tearDown() { SequenceOntologyFactory.setInstance(null); } /** * Test that the genomic sequence part of genomic sequence is correctly * identified by 'transcript' features (or subtypes) with the correct gene ID */ @Test(groups = "Functional") public void testGetGenomicRangesFromFeatures() { EnsemblGenome testee = new EnsemblGenome(); SequenceI genomic = new SequenceDummy("chr7"); genomic.setStart(10000); genomic.setEnd(50000); String transcriptId = "ABC123"; // transcript at (start+10000) length 501 SequenceFeature sf = new SequenceFeature("transcript", "", 20000, 20500, 0f, null); sf.setValue("ID", "transcript:" + transcriptId); sf.setStrand("+"); genomic.addSequenceFeature(sf); // transcript (sub-type) at (start + 10500) length 101 sf = new SequenceFeature("ncRNA", "", 10500, 10600, 0f, null); sf.setValue("ID", "transcript:" + transcriptId); sf.setStrand("+"); genomic.addSequenceFeature(sf); // Ensembl treats NMD_transcript_variant as if transcript // although strictly it is a sequence_variant in SO sf = new SequenceFeature("NMD_transcript_variant", "", 11000, 12000, 0f, null); sf.setValue("ID", "transcript:" + transcriptId); sf.setStrand("+"); genomic.addSequenceFeature(sf); // transcript with a different ID doesn't count sf = new SequenceFeature("transcript", "", 11500, 12600, 0f, null); sf.setValue("ID", "transcript:anotherOne"); genomic.addSequenceFeature(sf); // parent of transcript feature doesn't count sf = new SequenceFeature("gene_member_region", "", 10000, 50000, 0f, null); genomic.addSequenceFeature(sf); MapList ranges = testee.getGenomicRangesFromFeatures(genomic, transcriptId, 23); List fromRanges = ranges.getFromRanges(); assertEquals(3, fromRanges.size()); // from ranges should be sorted by start order assertEquals(10500, fromRanges.get(0)[0]); assertEquals(10600, fromRanges.get(0)[1]); assertEquals(11000, fromRanges.get(1)[0]); assertEquals(12000, fromRanges.get(1)[1]); assertEquals(20000, fromRanges.get(2)[0]); assertEquals(20500, fromRanges.get(2)[1]); // to range should start from given start numbering List toRanges = ranges.getToRanges(); assertEquals(1, toRanges.size()); assertEquals(23, toRanges.get(0)[0]); assertEquals(1625, toRanges.get(0)[1]); } /** * Test the method that retains features except for 'transcript' (or * sub-type), or those with parent other than the given id */ @Test(groups = "Functional") public void testRetainFeature() { String accId = "ABC123"; EnsemblGenome testee = new EnsemblGenome(); SequenceFeature sf = new SequenceFeature("transcript", "", 20000, 20500, 0f, null); assertFalse(testee.retainFeature(sf, accId)); sf.setType("mature_transcript"); assertFalse(testee.retainFeature(sf, accId)); sf.setType("NMD_transcript_variant"); assertFalse(testee.retainFeature(sf, accId)); // other feature with no parent is kept sf.setType("anything"); assertTrue(testee.retainFeature(sf, accId)); // other feature with correct parent is kept sf.setValue("Parent", "transcript:" + accId); assertTrue(testee.retainFeature(sf, accId)); // other feature with wrong parent is not kept sf.setValue("Parent", "transcript:XYZ"); assertFalse(testee.retainFeature(sf, accId)); } /** * Test the method that picks out 'transcript' (or subtype) features with the * accession id as ID */ @Test(groups = "Functional") public void testIdentifiesSequence() { String accId = "ABC123"; EnsemblGenome testee = new EnsemblGenome(); // transcript with no ID not valid SequenceFeature sf = new SequenceFeature("transcript", "", 1, 2, 0f, null); assertFalse(testee.identifiesSequence(sf, accId)); // transcript with wrong ID not valid sf.setValue("ID", "transcript"); assertFalse(testee.identifiesSequence(sf, accId)); // transcript with right ID is valid sf.setValue("ID", "transcript:" + accId); assertTrue(testee.identifiesSequence(sf, accId)); // transcript sub-type with right ID is valid sf.setType("ncRNA"); assertTrue(testee.identifiesSequence(sf, accId)); // Ensembl treats NMD_transcript_variant as if a transcript sf.setType("NMD_transcript_variant"); assertTrue(testee.identifiesSequence(sf, accId)); // gene not valid: sf.setType("gene"); assertFalse(testee.identifiesSequence(sf, accId)); // exon not valid: sf.setType("exon"); assertFalse(testee.identifiesSequence(sf, accId)); } }