JAL-2154 canonical dbsources are lowercased before matching
[jalview.git] / test / jalview / ext / ensembl / EnsemblCdsTest.java
1 package jalview.ext.ensembl;
2
3 import static org.testng.AssertJUnit.assertEquals;
4 import static org.testng.AssertJUnit.assertFalse;
5 import static org.testng.AssertJUnit.assertTrue;
6
7 import jalview.datamodel.SequenceDummy;
8 import jalview.datamodel.SequenceFeature;
9 import jalview.datamodel.SequenceI;
10 import jalview.io.gff.SequenceOntologyFactory;
11 import jalview.io.gff.SequenceOntologyLite;
12 import jalview.util.MapList;
13
14 import java.util.List;
15
16 import org.testng.Assert;
17 import org.testng.annotations.AfterClass;
18 import org.testng.annotations.BeforeClass;
19 import org.testng.annotations.Test;
20
21 public class EnsemblCdsTest
22 {
23   @BeforeClass(alwaysRun = true)
24   public void setUp()
25   {
26     SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
27   }
28
29   @AfterClass(alwaysRun = true)
30   public void tearDown()
31   {
32     SequenceOntologyFactory.setInstance(null);
33   }
34
35   /**
36    * Test that the cdna part of genomic sequence is correctly identified by
37    * 'CDS' features (or subtypes) with the desired transcript as parent
38    */
39   @Test(groups = "Functional")
40   public void testGetGenomicRangesFromFeatures()
41   {
42     EnsemblCds testee = new EnsemblCds();
43     SequenceI genomic = new SequenceDummy("chr7");
44     genomic.setStart(10000);
45     genomic.setEnd(50000);
46     String transcriptId = "ABC123";
47   
48     // CDS at (start+10000) length 501
49     SequenceFeature sf = new SequenceFeature("CDS", "", 20000, 20500, 0f,
50             null);
51     sf.setValue("Parent", "transcript:" + transcriptId);
52     sf.setStrand("+");
53     genomic.addSequenceFeature(sf);
54   
55     // CDS (sub-type) at (start + 10500) length 101
56     sf = new SequenceFeature("CDS_predicted", "", 10500, 10600, 0f, null);
57     sf.setValue("Parent", "transcript:" + transcriptId);
58     sf.setStrand("+");
59     genomic.addSequenceFeature(sf);
60   
61     // CDS belonging to a different transcript doesn't count
62     sf = new SequenceFeature("CDS", "", 11500, 12600, 0f, null);
63     sf.setValue("Parent", "transcript:anotherOne");
64     genomic.addSequenceFeature(sf);
65   
66     // exon feature doesn't count
67     sf = new SequenceFeature("exon", "", 10000, 50000, 0f, null);
68     genomic.addSequenceFeature(sf);
69
70     // mRNA_region feature doesn't count (parent of CDS)
71     sf = new SequenceFeature("mRNA_region", "", 10000, 50000, 0f, null);
72     genomic.addSequenceFeature(sf);
73   
74     MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
75             transcriptId, 23);
76     List<int[]> fromRanges = ranges.getFromRanges();
77     assertEquals(2, fromRanges.size());
78     // from ranges should be sorted by start order
79     assertEquals(10500, fromRanges.get(0)[0]);
80     assertEquals(10600, fromRanges.get(0)[1]);
81     assertEquals(20000, fromRanges.get(1)[0]);
82     assertEquals(20500, fromRanges.get(1)[1]);
83     // to range should start from given start numbering
84     List<int[]> toRanges = ranges.getToRanges();
85     assertEquals(1, toRanges.size());
86     assertEquals(23, toRanges.get(0)[0]);
87     assertEquals(624, toRanges.get(0)[1]);
88   }
89
90   /**
91    * Test the method that retains features except for 'CDS' (or subtypes), or
92    * features with parent other than the given id
93    */
94   @Test(groups = "Functional")
95   public void testRetainFeature()
96   {
97     String accId = "ABC123";
98     EnsemblCds testee = new EnsemblCds();
99   
100     SequenceFeature sf = new SequenceFeature("CDS", "", 20000,
101             20500, 0f, null);
102     assertFalse(testee.retainFeature(sf, accId));
103   
104     sf.setType("CDS_predicted");
105     assertFalse(testee.retainFeature(sf, accId));
106   
107     // other feature with no parent is retained
108     sf.setType("sequence_variant");
109     assertTrue(testee.retainFeature(sf, accId));
110   
111     // other feature with desired parent is retained
112     sf.setValue("Parent", "transcript:" + accId);
113     assertTrue(testee.retainFeature(sf, accId));
114   
115     // feature with wrong parent is not retained
116     sf.setValue("Parent", "transcript:XYZ");
117     assertFalse(testee.retainFeature(sf, accId));
118   }
119
120   /**
121    * Test the method that picks out 'CDS' (or subtype) features with the
122    * accession id as parent
123    */
124   @Test(groups = "Functional")
125   public void testIdentifiesSequence()
126   {
127     String accId = "ABC123";
128     EnsemblCds testee = new EnsemblCds();
129   
130     // cds with no parent not valid
131     SequenceFeature sf = new SequenceFeature("CDS", "", 1, 2, 0f, null);
132     assertFalse(testee.identifiesSequence(sf, accId));
133   
134     // cds with wrong parent not valid
135     sf.setValue("Parent", "transcript:XYZ");
136     assertFalse(testee.identifiesSequence(sf, accId));
137   
138     // cds with right parent is valid
139     sf.setValue("Parent", "transcript:" + accId);
140     assertTrue(testee.identifiesSequence(sf, accId));
141   
142     // cds sub-type with right parent is valid
143     sf.setType("CDS_predicted");
144     assertTrue(testee.identifiesSequence(sf, accId));
145   
146     // transcript not valid:
147     sf.setType("transcript");
148     assertFalse(testee.identifiesSequence(sf, accId));
149   
150     // exon not valid:
151     sf.setType("exon");
152     assertFalse(testee.identifiesSequence(sf, accId));
153   }
154
155   @Test(groups = "Functional")
156   public void testIsValidReference() throws Exception
157   {
158     EnsemblSequenceFetcher esq = new EnsemblCds();
159     Assert.assertTrue(esq.isValidReference("CCDS5863.1"));
160     Assert.assertTrue(esq.isValidReference("ENST00000288602"));
161     Assert.assertTrue(esq.isValidReference("ENSG00000288602"));
162     Assert.assertTrue(esq.isValidReference("ENSP00000288602"));
163     Assert.assertFalse(esq.isValidReference("ENST0000288602"));
164     // non-human species have a 3 character identifier included:
165     Assert.assertTrue(esq.isValidReference("ENSMUSG00000099398"));
166   }
167
168 }