JAL-2154 canonical dbsources are lowercased before matching
[jalview.git] / test / jalview / ext / ensembl / EnsemblGenomeTest.java
1 package jalview.ext.ensembl;
2
3 import static org.testng.AssertJUnit.assertEquals;
4 import static org.testng.AssertJUnit.assertFalse;
5 import static org.testng.AssertJUnit.assertTrue;
6
7 import jalview.datamodel.SequenceDummy;
8 import jalview.datamodel.SequenceFeature;
9 import jalview.datamodel.SequenceI;
10 import jalview.io.gff.SequenceOntologyFactory;
11 import jalview.io.gff.SequenceOntologyLite;
12 import jalview.util.MapList;
13
14 import java.util.List;
15
16 import org.testng.annotations.AfterClass;
17 import org.testng.annotations.BeforeClass;
18 import org.testng.annotations.Test;
19
20 public class EnsemblGenomeTest
21 {
22   @BeforeClass(alwaysRun = true)
23   public void setUp()
24   {
25     SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
26   }
27
28   @AfterClass(alwaysRun = true)
29   public void tearDown()
30   {
31     SequenceOntologyFactory.setInstance(null);
32   }
33
34   /**
35    * Test that the genomic sequence part of genomic sequence is correctly
36    * identified by 'transcript' features (or subtypes) with the correct gene ID
37    */
38   @Test(groups = "Functional")
39   public void testGetGenomicRangesFromFeatures()
40   {
41     EnsemblGenome testee = new EnsemblGenome();
42     SequenceI genomic = new SequenceDummy("chr7");
43     genomic.setStart(10000);
44     genomic.setEnd(50000);
45     String transcriptId = "ABC123";
46   
47     // transcript at (start+10000) length 501
48     SequenceFeature sf = new SequenceFeature("transcript", "", 20000,
49             20500, 0f,
50             null);
51     sf.setValue("ID", "transcript:" + transcriptId);
52     sf.setStrand("+");
53     genomic.addSequenceFeature(sf);
54   
55     // transcript (sub-type) at (start + 10500) length 101
56     sf = new SequenceFeature("ncRNA", "", 10500, 10600, 0f, null);
57     sf.setValue("ID", "transcript:" + transcriptId);
58     sf.setStrand("+");
59     genomic.addSequenceFeature(sf);
60
61     // Ensembl treats NMD_transcript_variant as if transcript
62     // although strictly it is a sequence_variant in SO
63     sf = new SequenceFeature("NMD_transcript_variant", "", 11000, 12000,
64             0f, null);
65     sf.setValue("ID", "transcript:" + transcriptId);
66     sf.setStrand("+");
67     genomic.addSequenceFeature(sf);
68   
69     // transcript with a different ID doesn't count
70     sf = new SequenceFeature("transcript", "", 11500, 12600, 0f, null);
71     sf.setValue("ID", "transcript:anotherOne");
72     genomic.addSequenceFeature(sf);
73   
74     // parent of transcript feature doesn't count
75     sf = new SequenceFeature("gene_member_region", "", 10000, 50000, 0f,
76             null);
77     genomic.addSequenceFeature(sf);
78
79     MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
80             transcriptId, 23);
81     List<int[]> fromRanges = ranges.getFromRanges();
82     assertEquals(3, fromRanges.size());
83     // from ranges should be sorted by start order
84     assertEquals(10500, fromRanges.get(0)[0]);
85     assertEquals(10600, fromRanges.get(0)[1]);
86     assertEquals(11000, fromRanges.get(1)[0]);
87     assertEquals(12000, fromRanges.get(1)[1]);
88     assertEquals(20000, fromRanges.get(2)[0]);
89     assertEquals(20500, fromRanges.get(2)[1]);
90     // to range should start from given start numbering
91     List<int[]> toRanges = ranges.getToRanges();
92     assertEquals(1, toRanges.size());
93     assertEquals(23, toRanges.get(0)[0]);
94     assertEquals(1625, toRanges.get(0)[1]);
95   }
96
97   /**
98    * Test the method that retains features except for 'transcript' (or
99    * sub-type), or those with parent other than the given id
100    */
101   @Test(groups = "Functional")
102   public void testRetainFeature()
103   {
104     String accId = "ABC123";
105     EnsemblGenome testee = new EnsemblGenome();
106
107     SequenceFeature sf = new SequenceFeature("transcript", "", 20000,
108             20500, 0f, null);
109     assertFalse(testee.retainFeature(sf, accId));
110   
111     sf.setType("mature_transcript");
112     assertFalse(testee.retainFeature(sf, accId));
113   
114     sf.setType("NMD_transcript_variant");
115     assertFalse(testee.retainFeature(sf, accId));
116   
117     // other feature with no parent is kept
118     sf.setType("anything");
119     assertTrue(testee.retainFeature(sf, accId));
120
121     // other feature with correct parent is kept
122     sf.setValue("Parent", "transcript:" + accId);
123     assertTrue(testee.retainFeature(sf, accId));
124
125     // other feature with wrong parent is not kept
126     sf.setValue("Parent", "transcript:XYZ");
127     assertFalse(testee.retainFeature(sf, accId));
128   }
129
130   /**
131    * Test the method that picks out 'transcript' (or subtype) features with the
132    * accession id as ID
133    */
134   @Test(groups = "Functional")
135   public void testIdentifiesSequence()
136   {
137     String accId = "ABC123";
138     EnsemblGenome testee = new EnsemblGenome();
139   
140     // transcript with no ID not valid
141     SequenceFeature sf = new SequenceFeature("transcript", "", 1, 2, 0f,
142             null);
143     assertFalse(testee.identifiesSequence(sf, accId));
144   
145     // transcript with wrong ID not valid
146     sf.setValue("ID", "transcript");
147     assertFalse(testee.identifiesSequence(sf, accId));
148   
149     // transcript with right ID is valid
150     sf.setValue("ID", "transcript:" + accId);
151     assertTrue(testee.identifiesSequence(sf, accId));
152   
153     // transcript sub-type with right ID is valid
154     sf.setType("ncRNA");
155     assertTrue(testee.identifiesSequence(sf, accId));
156
157     // Ensembl treats NMD_transcript_variant as if a transcript
158     sf.setType("NMD_transcript_variant");
159     assertTrue(testee.identifiesSequence(sf, accId));
160   
161     // gene not valid:
162     sf.setType("gene");
163     assertFalse(testee.identifiesSequence(sf, accId));
164   
165     // exon not valid:
166     sf.setType("exon");
167     assertFalse(testee.identifiesSequence(sf, accId));
168   }
169
170 }