/**
* Answers true unless the feature type is 'transcript' (or a sub-type of
- * transcript in the Sequence Ontology). Transcript features are only
- * retrieved in order to identify the transcript sequence range, and are
- * redundant information on the transcript sequence itself.
+ * transcript in the Sequence Ontology), or has a parent other than the given
+ * accession id. Transcript features are only retrieved in order to identify
+ * the transcript sequence range, and are redundant information on the
+ * transcript sequence itself.
*/
@Override
protected boolean retainFeature(SequenceFeature sf, String accessionId)
protected static final String ID = "ID";
- /*
- * this needs special handling, as it isA sequence_variant in the
- * Sequence Ontology, but behaves in Ensembl as if it isA transcript
- */
- protected static final String NMD_VARIANT = "NMD_transcript_variant";
-
protected static final String NAME = "Name";
public enum EnsemblSeqType
*/
public static boolean isTranscript(String featureType)
{
- return NMD_VARIANT.equals(featureType)
+ return SequenceOntologyI.NMD_TRANSCRIPT_VARIANT.equals(featureType)
|| SequenceOntologyFactory.getInstance().isA(featureType,
SequenceOntologyI.TRANSCRIPT);
}
/*
* selected commonly used values for quick reference
*/
+ // SO:0000104
public static final String POLYPEPTIDE = "polypeptide";
+ // SO:0000349
public static final String PROTEIN_MATCH = "protein_match";
+ // SO:0000347
public static final String NUCLEOTIDE_MATCH = "nucleotide_match";
// SO:0000316
// SO:0000673
public static final String TRANSCRIPT = "transcript";
+ // SO:0001621 isA sequence_variant but used in Ensembl as a transcript
+ public static final String NMD_TRANSCRIPT_VARIANT = "NMD_transcript_variant";
+
// SO:0000704
public static final String GENE = "gene";
{ "structural_variant", "sequence_variant" },
/*
- * no sub-types of exon or CDS yet encountered; add if needed
+ * no sub-types of exon or CDS encountered in Ensembl
+ * a few added here for testing purposes
*/
{ "exon", "exon" },
+ { "coding_exon", "exon" },
{ "CDS", "CDS" },
+ { "CDS_predicted", "CDS" },
/*
* used in exonerate GFF
package jalview.ext.ensembl;
import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertFalse;
import static org.testng.AssertJUnit.assertNull;
+import static org.testng.AssertJUnit.assertTrue;
import jalview.datamodel.SequenceDummy;
import jalview.datamodel.SequenceFeature;
import java.util.List;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
public class EnsemblCdnaTest
{
+ @BeforeClass
+ public void setUp()
+ {
+ SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
+ }
+
+ @AfterClass
+ public void tearDown()
+ {
+ SequenceOntologyFactory.setInstance(null);
+ }
/**
* Test that the cdna part of genomic sequence is correctly identified by
* 'exon' features (or subtypes) - reverse strand case.
*/
@Test(groups = "Functional")
- public void getGenomicRangesFromFeatures_reverseStrand()
+ public void testGetGenomicRangesFromFeatures_reverseStrand()
{
EnsemblCdna testee = new EnsemblCdna();
SequenceI genomic = new SequenceDummy("chr7");
sf = new SequenceFeature("transcript", "", 10000, 50000, 0f, null);
genomic.addSequenceFeature(sf);
- SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
-
MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
transcriptId, 23);
List<int[]> fromRanges = ranges.getFromRanges();
/**
* Test that the cdna part of genomic sequence is correctly identified by
- * 'exon' features (or subtypes).
+ * 'exon' features (or subtypes) with the desired transcript as parent
*/
@Test(groups = "Functional")
- public void getGenomicRangesFromFeatures()
+ public void testGetGenomicRangesFromFeatures()
{
EnsemblCdna testee = new EnsemblCdna();
SequenceI genomic = new SequenceDummy("chr7");
sf.setStrand("-"); // weird but ignored
genomic.addSequenceFeature(sf);
- SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
-
MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
transcriptId, 23);
List<int[]> fromRanges = ranges.getFromRanges();
* reverse strands are present in the features of interest
*/
@Test(groups = "Functional")
- public void getGenomicRangesFromFeatures_mixedStrand()
+ public void testGetGenomicRangesFromFeatures_mixedStrand()
{
EnsemblCdna testee = new EnsemblCdna();
SequenceI genomic = new SequenceDummy("chr7");
sf.setStrand("+");
genomic.addSequenceFeature(sf);
- SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
-
MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
transcriptId, 23);
assertNull(ranges);
}
+
+ /**
+ * Test the method that retains features except for 'transcript' (or
+ * subtypes), or features with parent other than the given id
+ */
+ @Test(groups = "Functional")
+ public void testRetainFeature()
+ {
+ String accId = "ABC123";
+ EnsemblCdna testee = new EnsemblCdna();
+
+ SequenceFeature sf = new SequenceFeature("transcript", "", 20000,
+ 20500, 0f, null);
+ assertFalse(testee.retainFeature(sf, accId));
+
+ sf.setType("aberrant_processed_transcript");
+ assertFalse(testee.retainFeature(sf, accId));
+
+ sf.setType("NMD_transcript_variant");
+ assertFalse(testee.retainFeature(sf, accId));
+
+ // other feature with no parent is retained
+ sf.setType("sequence_variant");
+ assertTrue(testee.retainFeature(sf, accId));
+
+ // other feature with desired parent is retained
+ sf.setValue("Parent", "transcript:" + accId);
+ assertTrue(testee.retainFeature(sf, accId));
+
+ // feature with wrong parent is not retained
+ sf.setValue("Parent", "transcript:XYZ");
+ assertFalse(testee.retainFeature(sf, accId));
+ }
+
+ /**
+ * Test the method that picks out 'exon' (or subtype) features with the
+ * accession id as parent
+ */
+ @Test(groups = "Functional")
+ public void testIdentifiesSequence()
+ {
+ String accId = "ABC123";
+ EnsemblCdna testee = new EnsemblCdna();
+
+ // exon with no parent not valid
+ SequenceFeature sf = new SequenceFeature("exon", "", 1, 2, 0f, null);
+ assertFalse(testee.identifiesSequence(sf, accId));
+
+ // exon with wrong parent not valid
+ sf.setValue("Parent", "transcript:XYZ");
+ assertFalse(testee.identifiesSequence(sf, accId));
+
+ // exon with right parent is valid
+ sf.setValue("Parent", "transcript:" + accId);
+ assertTrue(testee.identifiesSequence(sf, accId));
+
+ // exon sub-type with right parent is valid
+ sf.setType("coding_exon");
+ assertTrue(testee.identifiesSequence(sf, accId));
+
+ // transcript not valid:
+ sf.setType("transcript");
+ assertFalse(testee.identifiesSequence(sf, accId));
+
+ // CDS not valid:
+ sf.setType("CDS");
+ assertFalse(testee.identifiesSequence(sf, accId));
+ }
}
--- /dev/null
+package jalview.ext.ensembl;
+
+import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertFalse;
+import static org.testng.AssertJUnit.assertTrue;
+
+import jalview.datamodel.SequenceDummy;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.io.gff.SequenceOntologyFactory;
+import jalview.io.gff.SequenceOntologyLite;
+import jalview.util.MapList;
+
+import java.util.List;
+
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+public class EnsemblCdsTest
+{
+ @BeforeClass
+ public void setUp()
+ {
+ SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
+ }
+
+ @AfterClass
+ public void tearDown()
+ {
+ SequenceOntologyFactory.setInstance(null);
+ }
+
+ /**
+ * Test that the cdna part of genomic sequence is correctly identified by
+ * 'CDS' features (or subtypes) with the desired transcript as parent
+ */
+ @Test(groups = "Functional")
+ public void testGetGenomicRangesFromFeatures()
+ {
+ EnsemblCds testee = new EnsemblCds();
+ SequenceI genomic = new SequenceDummy("chr7");
+ genomic.setStart(10000);
+ genomic.setEnd(50000);
+ String transcriptId = "ABC123";
+
+ // CDS at (start+10000) length 501
+ SequenceFeature sf = new SequenceFeature("CDS", "", 20000, 20500, 0f,
+ null);
+ sf.setValue("Parent", "transcript:" + transcriptId);
+ sf.setStrand("+");
+ genomic.addSequenceFeature(sf);
+
+ // CDS (sub-type) at (start + 10500) length 101
+ sf = new SequenceFeature("CDS_predicted", "", 10500, 10600, 0f, null);
+ sf.setValue("Parent", "transcript:" + transcriptId);
+ sf.setStrand("+");
+ genomic.addSequenceFeature(sf);
+
+ // CDS belonging to a different transcript doesn't count
+ sf = new SequenceFeature("CDS", "", 11500, 12600, 0f, null);
+ sf.setValue("Parent", "transcript:anotherOne");
+ genomic.addSequenceFeature(sf);
+
+ // exon feature doesn't count
+ sf = new SequenceFeature("exon", "", 10000, 50000, 0f, null);
+ genomic.addSequenceFeature(sf);
+
+ // mRNA_region feature doesn't count (parent of CDS)
+ sf = new SequenceFeature("mRNA_region", "", 10000, 50000, 0f, null);
+ genomic.addSequenceFeature(sf);
+
+ MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
+ transcriptId, 23);
+ List<int[]> fromRanges = ranges.getFromRanges();
+ assertEquals(2, fromRanges.size());
+ // from ranges should be sorted by start order
+ assertEquals(10500, fromRanges.get(0)[0]);
+ assertEquals(10600, fromRanges.get(0)[1]);
+ assertEquals(20000, fromRanges.get(1)[0]);
+ assertEquals(20500, fromRanges.get(1)[1]);
+ // to range should start from given start numbering
+ List<int[]> toRanges = ranges.getToRanges();
+ assertEquals(1, toRanges.size());
+ assertEquals(23, toRanges.get(0)[0]);
+ assertEquals(624, toRanges.get(0)[1]);
+ }
+
+ /**
+ * Test the method that retains features except for 'CDS' (or subtypes), or
+ * features with parent other than the given id
+ */
+ @Test(groups = "Functional")
+ public void testRetainFeature()
+ {
+ String accId = "ABC123";
+ EnsemblCds testee = new EnsemblCds();
+
+ SequenceFeature sf = new SequenceFeature("CDS", "", 20000,
+ 20500, 0f, null);
+ assertFalse(testee.retainFeature(sf, accId));
+
+ sf.setType("CDS_predicted");
+ assertFalse(testee.retainFeature(sf, accId));
+
+ // other feature with no parent is retained
+ sf.setType("sequence_variant");
+ assertTrue(testee.retainFeature(sf, accId));
+
+ // other feature with desired parent is retained
+ sf.setValue("Parent", "transcript:" + accId);
+ assertTrue(testee.retainFeature(sf, accId));
+
+ // feature with wrong parent is not retained
+ sf.setValue("Parent", "transcript:XYZ");
+ assertFalse(testee.retainFeature(sf, accId));
+ }
+
+ /**
+ * Test the method that picks out 'CDS' (or subtype) features with the
+ * accession id as parent
+ */
+ @Test(groups = "Functional")
+ public void testIdentifiesSequence()
+ {
+ String accId = "ABC123";
+ EnsemblCds testee = new EnsemblCds();
+
+ // cds with no parent not valid
+ SequenceFeature sf = new SequenceFeature("CDS", "", 1, 2, 0f, null);
+ assertFalse(testee.identifiesSequence(sf, accId));
+
+ // cds with wrong parent not valid
+ sf.setValue("Parent", "transcript:XYZ");
+ assertFalse(testee.identifiesSequence(sf, accId));
+
+ // cds with right parent is valid
+ sf.setValue("Parent", "transcript:" + accId);
+ assertTrue(testee.identifiesSequence(sf, accId));
+
+ // cds sub-type with right parent is valid
+ sf.setType("CDS_predicted");
+ assertTrue(testee.identifiesSequence(sf, accId));
+
+ // transcript not valid:
+ sf.setType("transcript");
+ assertFalse(testee.identifiesSequence(sf, accId));
+
+ // exon not valid:
+ sf.setType("exon");
+ assertFalse(testee.identifiesSequence(sf, accId));
+ }
+
+}
--- /dev/null
+package jalview.ext.ensembl;
+
+import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertFalse;
+import static org.testng.AssertJUnit.assertSame;
+import static org.testng.AssertJUnit.assertTrue;
+
+import jalview.datamodel.SequenceDummy;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.io.gff.SequenceOntologyFactory;
+import jalview.io.gff.SequenceOntologyLite;
+import jalview.util.MapList;
+
+import java.util.List;
+
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+public class EnsemblGeneTest
+{
+ @BeforeClass
+ public void setUp()
+ {
+ SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
+ }
+
+ @AfterClass
+ public void tearDown()
+ {
+ SequenceOntologyFactory.setInstance(null);
+ }
+
+ /**
+ * Test that the gene part of genomic sequence is uniquely identified by a
+ * 'gene' features (or subtype) with the correct gene ID
+ */
+ @Test(groups = "Functional")
+ public void testGetGenomicRangesFromFeatures()
+ {
+ EnsemblGene testee = new EnsemblGene();
+ SequenceI genomic = new SequenceDummy("chr7");
+ genomic.setStart(10000);
+ genomic.setEnd(50000);
+ String geneId = "ABC123";
+
+ // gene at (start+10000) length 501
+ SequenceFeature sf = new SequenceFeature("gene", "", 20000, 20500, 0f,
+ null);
+ sf.setValue("ID", "gene:" + geneId);
+ sf.setStrand("+");
+ genomic.addSequenceFeature(sf);
+
+ // gene at (start + 10500) length 101
+ // should be ignored - the first 'gene' found defines the whole range
+ sf = new SequenceFeature("gene", "", 10500, 10600, 0f, null);
+ sf.setValue("ID", "gene:" + geneId);
+ sf.setStrand("+");
+ genomic.addSequenceFeature(sf);
+
+ MapList ranges = testee.getGenomicRangesFromFeatures(genomic, geneId,
+ 23);
+ List<int[]> fromRanges = ranges.getFromRanges();
+ assertEquals(1, fromRanges.size());
+ assertEquals(20000, fromRanges.get(0)[0]);
+ assertEquals(20500, fromRanges.get(0)[1]);
+ // to range should start from given start numbering
+ List<int[]> toRanges = ranges.getToRanges();
+ assertEquals(1, toRanges.size());
+ assertEquals(23, toRanges.get(0)[0]);
+ assertEquals(523, toRanges.get(0)[1]);
+ }
+
+ /**
+ * Test variant using a sub-type of gene from the Sequence Ontology
+ */
+ @Test(groups = "Functional")
+ public void testGetGenomicRangesFromFeatures_ncRNA_gene_reverseStrand()
+ {
+ EnsemblGene testee = new EnsemblGene();
+ SequenceI genomic = new SequenceDummy("chr7");
+ genomic.setStart(10000);
+ genomic.setEnd(50000);
+ String geneId = "ABC123";
+
+ // gene at (start+10000) length 501
+ SequenceFeature sf = new SequenceFeature("ncRNA_gene", "", 20000,
+ 20500, 0f, null);
+ sf.setValue("ID", "gene:" + geneId);
+ sf.setStrand("-");
+ genomic.addSequenceFeature(sf);
+
+ // gene at (start + 10500) length 101
+ // should be ignored - the first 'gene' found defines the whole range
+ // (real data would only have one such feature)
+ sf = new SequenceFeature("gene", "", 10500, 10600, 0f, null);
+ sf.setValue("ID", "gene:" + geneId);
+ sf.setStrand("+");
+ genomic.addSequenceFeature(sf);
+
+ MapList ranges = testee.getGenomicRangesFromFeatures(genomic, geneId,
+ 23);
+ List<int[]> fromRanges = ranges.getFromRanges();
+ assertEquals(1, fromRanges.size());
+ // from range on reverse strand:
+ assertEquals(20500, fromRanges.get(0)[0]);
+ assertEquals(20000, fromRanges.get(0)[1]);
+ // to range should start from given start numbering
+ List<int[]> toRanges = ranges.getToRanges();
+ assertEquals(1, toRanges.size());
+ assertEquals(23, toRanges.get(0)[0]);
+ assertEquals(523, toRanges.get(0)[1]);
+ }
+
+ /**
+ * Test the method that extracts transcript (or subtype) features with a
+ * specified gene as parent
+ */
+ @Test(groups = "Functional")
+ public void testGetTranscriptFeatures()
+ {
+ SequenceI genomic = new SequenceDummy("chr7");
+ genomic.setStart(10000);
+ genomic.setEnd(50000);
+ String geneId = "ABC123";
+
+ // transcript feature
+ SequenceFeature sf1 = new SequenceFeature("transcript", "", 20000,
+ 20500, 0f, null);
+ sf1.setValue("Parent", "gene:" + geneId);
+ genomic.addSequenceFeature(sf1);
+
+ // transcript sub-type feature
+ SequenceFeature sf2 = new SequenceFeature("snRNA", "", 20000,
+ 20500, 0f, null);
+ sf2.setValue("Parent", "gene:" + geneId);
+ genomic.addSequenceFeature(sf2);
+
+ // NMD_transcript_variant treated like transcript in Ensembl
+ SequenceFeature sf3 = new SequenceFeature("NMD_transcript_variant", "",
+ 20000, 20500, 0f, null);
+ sf3.setValue("Parent", "gene:" + geneId);
+ genomic.addSequenceFeature(sf3);
+
+ // transcript for a different gene - ignored
+ SequenceFeature sf4 = new SequenceFeature("snRNA", "", 20000, 20500,
+ 0f, null);
+ sf4.setValue("Parent", "gene:XYZ");
+ genomic.addSequenceFeature(sf4);
+
+ EnsemblGene testee = new EnsemblGene();
+ List<SequenceFeature> features = testee.getTranscriptFeatures(geneId,
+ genomic);
+ assertEquals(3, features.size());
+ assertSame(sf1, features.get(0));
+ assertSame(sf2, features.get(1));
+ assertSame(sf3, features.get(2));
+ }
+
+ /**
+ * Test the method that retains features except for 'gene', or 'transcript'
+ * with parent other than the given id
+ */
+ @Test(groups = "Functional")
+ public void testRetainFeature()
+ {
+ String geneId = "ABC123";
+ EnsemblGene testee = new EnsemblGene();
+ SequenceFeature sf = new SequenceFeature("gene", "", 20000,
+ 20500, 0f, null);
+ sf.setValue("ID", "gene:" + geneId);
+ assertFalse(testee.retainFeature(sf, geneId));
+
+ sf.setType("transcript");
+ sf.setValue("Parent", "gene:" + geneId);
+ assertTrue(testee.retainFeature(sf, geneId));
+
+ sf.setType("mature_transcript");
+ sf.setValue("Parent", "gene:" + geneId);
+ assertTrue(testee.retainFeature(sf, geneId));
+
+ sf.setType("NMD_transcript_variant");
+ sf.setValue("Parent", "gene:" + geneId);
+ assertTrue(testee.retainFeature(sf, geneId));
+
+ sf.setValue("Parent", "gene:XYZ");
+ assertFalse(testee.retainFeature(sf, geneId));
+
+ sf.setType("anything");
+ assertTrue(testee.retainFeature(sf, geneId));
+ }
+
+ /**
+ * Test the method that picks out 'gene' (or subtype) features with the
+ * accession id as ID
+ */
+ @Test(groups = "Functional")
+ public void testIdentifiesSequence()
+ {
+ String accId = "ABC123";
+ EnsemblGene testee = new EnsemblGene();
+
+ // gene with no ID not valid
+ SequenceFeature sf = new SequenceFeature("gene", "", 1, 2, 0f, null);
+ assertFalse(testee.identifiesSequence(sf, accId));
+
+ // gene with wrong ID not valid
+ sf.setValue("ID", "gene:XYZ");
+ assertFalse(testee.identifiesSequence(sf, accId));
+
+ // gene with right ID is valid
+ sf.setValue("ID", "gene:" + accId);
+ assertTrue(testee.identifiesSequence(sf, accId));
+
+ // gene sub-type with right ID is valid
+ sf.setType("snRNA_gene");
+ assertTrue(testee.identifiesSequence(sf, accId));
+
+ // transcript not valid:
+ sf.setType("transcript");
+ assertFalse(testee.identifiesSequence(sf, accId));
+
+ // exon not valid:
+ sf.setType("exon");
+ assertFalse(testee.identifiesSequence(sf, accId));
+ }
+}
--- /dev/null
+package jalview.ext.ensembl;
+
+import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertFalse;
+import static org.testng.AssertJUnit.assertTrue;
+
+import jalview.datamodel.SequenceDummy;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.io.gff.SequenceOntologyFactory;
+import jalview.io.gff.SequenceOntologyLite;
+import jalview.util.MapList;
+
+import java.util.List;
+
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+public class EnsemblGenomeTest
+{
+ @BeforeClass
+ public void setUp()
+ {
+ SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
+ }
+
+ @AfterClass
+ public void tearDown()
+ {
+ SequenceOntologyFactory.setInstance(null);
+ }
+
+ /**
+ * Test that the genomic sequence part of genomic sequence is correctly
+ * identified by 'transcript' features (or subtypes) with the correct gene ID
+ */
+ @Test(groups = "Functional")
+ public void testGetGenomicRangesFromFeatures()
+ {
+ EnsemblGenome testee = new EnsemblGenome();
+ SequenceI genomic = new SequenceDummy("chr7");
+ genomic.setStart(10000);
+ genomic.setEnd(50000);
+ String transcriptId = "ABC123";
+
+ // transcript at (start+10000) length 501
+ SequenceFeature sf = new SequenceFeature("transcript", "", 20000,
+ 20500, 0f,
+ null);
+ sf.setValue("ID", "transcript:" + transcriptId);
+ sf.setStrand("+");
+ genomic.addSequenceFeature(sf);
+
+ // transcript (sub-type) at (start + 10500) length 101
+ sf = new SequenceFeature("ncRNA", "", 10500, 10600, 0f, null);
+ sf.setValue("ID", "transcript:" + transcriptId);
+ sf.setStrand("+");
+ genomic.addSequenceFeature(sf);
+
+ // Ensembl treats NMD_transcript_variant as if transcript
+ // although strictly it is a sequence_variant in SO
+ sf = new SequenceFeature("NMD_transcript_variant", "", 11000, 12000,
+ 0f, null);
+ sf.setValue("ID", "transcript:" + transcriptId);
+ sf.setStrand("+");
+ genomic.addSequenceFeature(sf);
+
+ // transcript with a different ID doesn't count
+ sf = new SequenceFeature("transcript", "", 11500, 12600, 0f, null);
+ sf.setValue("ID", "transcript:anotherOne");
+ genomic.addSequenceFeature(sf);
+
+ // parent of transcript feature doesn't count
+ sf = new SequenceFeature("gene_member_region", "", 10000, 50000, 0f,
+ null);
+ genomic.addSequenceFeature(sf);
+
+ MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
+ transcriptId, 23);
+ List<int[]> fromRanges = ranges.getFromRanges();
+ assertEquals(3, fromRanges.size());
+ // from ranges should be sorted by start order
+ assertEquals(10500, fromRanges.get(0)[0]);
+ assertEquals(10600, fromRanges.get(0)[1]);
+ assertEquals(11000, fromRanges.get(1)[0]);
+ assertEquals(12000, fromRanges.get(1)[1]);
+ assertEquals(20000, fromRanges.get(2)[0]);
+ assertEquals(20500, fromRanges.get(2)[1]);
+ // to range should start from given start numbering
+ List<int[]> toRanges = ranges.getToRanges();
+ assertEquals(1, toRanges.size());
+ assertEquals(23, toRanges.get(0)[0]);
+ assertEquals(1625, toRanges.get(0)[1]);
+ }
+
+ /**
+ * Test the method that retains features except for 'transcript' (or
+ * sub-type), or those with parent other than the given id
+ */
+ @Test(groups = "Functional")
+ public void testRetainFeature()
+ {
+ String accId = "ABC123";
+ EnsemblGenome testee = new EnsemblGenome();
+
+ SequenceFeature sf = new SequenceFeature("transcript", "", 20000,
+ 20500, 0f, null);
+ assertFalse(testee.retainFeature(sf, accId));
+
+ sf.setType("mature_transcript");
+ assertFalse(testee.retainFeature(sf, accId));
+
+ sf.setType("NMD_transcript_variant");
+ assertFalse(testee.retainFeature(sf, accId));
+
+ // other feature with no parent is kept
+ sf.setType("anything");
+ assertTrue(testee.retainFeature(sf, accId));
+
+ // other feature with correct parent is kept
+ sf.setValue("Parent", "transcript:" + accId);
+ assertTrue(testee.retainFeature(sf, accId));
+
+ // other feature with wrong parent is not kept
+ sf.setValue("Parent", "transcript:XYZ");
+ assertFalse(testee.retainFeature(sf, accId));
+ }
+
+ /**
+ * Test the method that picks out 'transcript' (or subtype) features with the
+ * accession id as ID
+ */
+ @Test(groups = "Functional")
+ public void testIdentifiesSequence()
+ {
+ String accId = "ABC123";
+ EnsemblGenome testee = new EnsemblGenome();
+
+ // transcript with no ID not valid
+ SequenceFeature sf = new SequenceFeature("transcript", "", 1, 2, 0f,
+ null);
+ assertFalse(testee.identifiesSequence(sf, accId));
+
+ // transcript with wrong ID not valid
+ sf.setValue("ID", "transcript");
+ assertFalse(testee.identifiesSequence(sf, accId));
+
+ // transcript with right ID is valid
+ sf.setValue("ID", "transcript:" + accId);
+ assertTrue(testee.identifiesSequence(sf, accId));
+
+ // transcript sub-type with right ID is valid
+ sf.setType("ncRNA");
+ assertTrue(testee.identifiesSequence(sf, accId));
+
+ // Ensembl treats NMD_transcript_variant as if a transcript
+ sf.setType("NMD_transcript_variant");
+ assertTrue(testee.identifiesSequence(sf, accId));
+
+ // gene not valid:
+ sf.setType("gene");
+ assertFalse(testee.identifiesSequence(sf, accId));
+
+ // exon not valid:
+ sf.setType("exon");
+ assertFalse(testee.identifiesSequence(sf, accId));
+ }
+
+}
import java.util.List;
import org.testng.Assert;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
+ "NRDQIIFMVGRGYLSPDLSKVRSNCPKAMKRLMAECLKKKRDERPLFPQILASIELLARS\n"
+ "LPKIHRSASEPSLNRAGFQTEDFSLYACASPKTPIQAGGYGAFPVH" } };
+ @BeforeClass
+ public void setUp()
+ {
+ SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
+ }
+
+ @AfterClass
+ public void tearDown()
+ {
+ SequenceOntologyFactory.setInstance(null);
+ }
+
@DataProvider(name = "queries")
public Object[][] createQueryData(Method m)
{
sf = new SequenceFeature("exon", "", 7, 9, 0f, null);
ds.addSequenceFeature(sf);
// CDS for dna 10-12
- sf = new SequenceFeature("some_cds", "", 10, 12, 0f, null);
+ sf = new SequenceFeature("CDS_predicted", "", 10, 12, 0f, null);
ds.addSequenceFeature(sf);
- SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
List<int[]> ranges = new ArrayList<int[]>();
int mappedLength = testee.getCdsRanges(dnaSeq, ranges);
assertEquals(6, mappedLength);
ds.addSequenceFeature(sf);
ds.addSequenceFeature(sf);
// CDS for dna 13-15
- sf = new SequenceFeature("some_cds", "", 13, 15, 0f, null);
+ sf = new SequenceFeature("CDS_predicted", "", 13, 15, 0f, null);
ds.addSequenceFeature(sf);
- SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
List<int[]> ranges = new ArrayList<int[]>();
int mappedLength = testee.getCdsRanges(dnaSeq, ranges);