260b01121cb1679c1267642b29485939feb0d370
[jalview.git] / test / jalview / ext / so / SequenceOntologyTest.java
1 /*
2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
4  * 
5  * This file is part of Jalview.
6  * 
7  * Jalview is free software: you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License 
9  * as published by the Free Software Foundation, either version 3
10  * of the License, or (at your option) any later version.
11  *  
12  * Jalview is distributed in the hope that it will be useful, but 
13  * WITHOUT ANY WARRANTY; without even the implied warranty 
14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
15  * PURPOSE.  See the GNU General Public License for more details.
16  * 
17  * You should have received a copy of the GNU General Public License
18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
19  * The Jalview Authors are detailed in the 'AUTHORS' file.
20  */
21 package jalview.ext.so;
22
23 import static org.testng.Assert.assertEquals;
24 import static org.testng.Assert.assertFalse;
25 import static org.testng.Assert.assertNotNull;
26 import static org.testng.Assert.assertNull;
27 import static org.testng.Assert.assertSame;
28 import static org.testng.Assert.assertTrue;
29
30 import jalview.gui.JvOptionPane;
31
32 import java.util.Arrays;
33 import java.util.Collections;
34 import java.util.HashSet;
35 import java.util.List;
36 import java.util.Map;
37 import java.util.Set;
38
39 import org.biojava.nbio.ontology.Synonym;
40 import org.biojava.nbio.ontology.Term;
41 import org.testng.annotations.BeforeClass;
42 import org.testng.annotations.Test;
43
44 import junit.extensions.PA;
45
46 public class SequenceOntologyTest
47 {
48
49   @BeforeClass(alwaysRun = true)
50   public void setUpJvOptionPane()
51   {
52     JvOptionPane.setInteractiveMode(false);
53     JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
54   }
55
56   private SequenceOntology so;
57
58   @BeforeClass(alwaysRun = true)
59   public void setUp()
60   {
61     long now = System.currentTimeMillis();
62     try
63     {
64       so = new SequenceOntology();
65     } catch (Throwable t)
66     {
67       System.out.println("SOTest error ");
68       t.printStackTrace(System.err);
69     }
70     long elapsed = System.currentTimeMillis() - now;
71     System.out.println("Load and cache of Sequence Ontology took "
72             + elapsed + "ms");
73   }
74
75   @Test(groups = "Functional")
76   public void testIsA()
77   {
78     assertFalse(so.isA(null, null));
79     assertFalse(so.isA(null, "SO:0000087"));
80     assertFalse(so.isA("SO:0000087", null));
81     assertFalse(so.isA("complete", "garbage"));
82
83     assertTrue(so.isA("SO:0000087", "SO:0000704"));
84     assertFalse(so.isA("SO:0000704", "SO:0000087"));
85     assertTrue(so.isA("SO:0000736", "SO:0000735"));
86
87     // same thing:
88     assertTrue(so.isA("micronuclear_sequence", "micronuclear_sequence"));
89     // direct parent:
90     assertTrue(so.isA("micronuclear_sequence", "organelle_sequence"));
91     // grandparent:
92     assertTrue(so.isA("micronuclear_sequence", "sequence_location"));
93     // great-grandparent:
94     assertTrue(so.isA("micronuclear_sequence", "sequence_attribute"));
95
96     // same thing by name / description:
97     assertTrue(so.isA("micronuclear_sequence", "SO:0000084"));
98     assertTrue(so.isA("SO:0000084", "micronuclear_sequence"));
99     assertTrue(so.isA("SO:0000084", "SO:0000084"));
100
101     // SO name to description:
102     assertTrue(so.isA("SO:0000084", "organelle_sequence"));
103     assertTrue(so.isA("SO:0000084", "sequence_location"));
104     assertTrue(so.isA("SO:0000084", "sequence_attribute"));
105
106     // description to SO name:
107     assertTrue(so.isA("micronuclear_sequence", "SO:0000736"));
108     assertTrue(so.isA("micronuclear_sequence", "SO:0000735"));
109     assertTrue(so.isA("micronuclear_sequence", "SO:0000400"));
110   }
111
112   @Test(groups = "Functional")
113   public void testIsCDS()
114   {
115     assertTrue(so.isA("CDS", "CDS"));
116     assertTrue(so.isA("CDS_predicted", "CDS"));
117     assertTrue(so.isA("transposable_element_CDS", "CDS"));
118     assertTrue(so.isA("edited_CDS", "CDS"));
119     assertTrue(so.isA("CDS_independently_known", "CDS"));
120     assertTrue(so.isA("CDS_fragment", "CDS"));
121     assertFalse(so.isA("CDS_region", "CDS"));// part_of
122     assertFalse(so.isA("polypeptide", "CDS")); // derives_from
123   }
124
125   @Test(groups = "Functional")
126   public void testIsSequenceVariant()
127   {
128     assertFalse(so.isA("CDS", "sequence_variant"));
129     assertTrue(so.isA("sequence_variant", "sequence_variant"));
130
131     /*
132      * these should all be sub-types of sequence_variant
133      */
134     assertTrue(so.isA("structural_variant", "sequence_variant"));
135     assertTrue(so.isA("feature_variant", "sequence_variant"));
136     assertTrue(so.isA("gene_variant", "sequence_variant"));
137     assertTrue(so.isA("transcript_variant", "sequence_variant"));
138     assertTrue(so.isA("NMD_transcript_variant", "sequence_variant"));
139     assertTrue(so.isA("missense_variant", "sequence_variant"));
140     assertTrue(so.isA("synonymous_variant", "sequence_variant"));
141     assertTrue(so.isA("frameshift_variant", "sequence_variant"));
142     assertTrue(so.isA("5_prime_UTR_variant", "sequence_variant"));
143     assertTrue(so.isA("3_prime_UTR_variant", "sequence_variant"));
144     assertTrue(so.isA("stop_gained", "sequence_variant"));
145     assertTrue(so.isA("stop_lost", "sequence_variant"));
146     assertTrue(so.isA("inframe_deletion", "sequence_variant"));
147     assertTrue(so.isA("inframe_insertion", "sequence_variant"));
148   }
149
150   @Test(groups = "Functional")
151   public void testGetChildTerms()
152   {
153     List<String> terms = Collections.<String> emptyList();
154     List<String> children = so.getChildTerms("exon", terms);
155     assertTrue(children.isEmpty());
156   
157     terms = Arrays.asList("gene", "transcript", "snRNA", "junk", "mRNA");
158     children = so.getChildTerms("exon", terms);
159     assertTrue(children.isEmpty());
160     children = so.getChildTerms("transcript", terms);
161     assertEquals(children.size(), 2);
162     assertTrue(children.contains("snRNA"));
163     assertTrue(children.contains("mRNA"));
164   
165     terms = Arrays.asList("gene", "transcript", "synonymous_variant",
166             "stop_lost", "chain");
167     children = so.getChildTerms("sequence_variant", terms);
168     assertEquals(children.size(), 2);
169     assertTrue(children.contains("synonymous_variant"));
170     assertTrue(children.contains("stop_lost"));
171   }
172
173   @Test(groups = "Functional")
174   public void testGetParentTerms()
175   {
176     Set<String> terms = new HashSet<>();
177     terms.add("sequence_variant");
178     terms.add("NMD_transcript_variant");
179     terms.add("stop_lost");
180     terms.add("chain"); // not an SO term
181   
182     Set<String> parents = so.getParentTerms(terms);
183     assertEquals(parents.size(), 2);
184     assertTrue(parents.contains("sequence_variant"));
185     assertTrue(parents.contains("chain"));
186   }
187
188   @Test(groups = "Functional")
189   public void testGetParents()
190   {
191     // invalid term
192     List<String> roots = so.getParents("xyz");
193     assertTrue(roots.isEmpty());
194
195     roots = so.getParents("stop_gained");
196     assertEquals(roots.size(), 2);
197     assertTrue(roots.contains("nonsynonymous_variant"));
198     assertTrue(roots.contains("feature_truncation"));
199
200     // top level term
201     roots = so.getParents("sequence_variant");
202     assertTrue(roots.isEmpty());
203
204     roots = so.getParents(null);
205     assertTrue(roots.isEmpty());
206   }
207
208   @Test(groups = "Functional")
209   public void testGetRootParents()
210   {
211     List<String> roots = so.getRootParents("xyz");
212     assertNull(roots);
213     roots = so.getRootParents(null);
214     assertNull(roots);
215
216     roots = so.getRootParents("stop_gained");
217     assertEquals(roots.size(), 1);
218     assertEquals(roots.get(0), "sequence_variant");
219
220     roots = so.getRootParents("sequence_variant");
221     assertEquals(roots.size(), 1);
222     assertEquals(roots.get(0), "sequence_variant");
223
224     roots = so.getRootParents("alanine");
225     assertEquals(roots.size(), 1);
226     assertEquals(roots.get(0), "sequence_feature");
227
228     /*
229      * verify these are now cached
230      */
231     Map<String, List<String>> cached = (Map<String, List<String>>) PA
232             .getValue(so, "rootParents");
233     List<String> parents = cached.get("stop_gained");
234     assertEquals(parents.size(), 1);
235     parents = cached.get("sequence_variant");
236     assertEquals(parents.size(), 1);
237     assertTrue(parents.contains("sequence_variant"));
238     parents = cached.get("alanine");
239     assertEquals(parents.size(), 1);
240     assertTrue(parents.contains("sequence_feature"));
241   }
242
243   @Test(groups = "Functional")
244   public void testGetTerm()
245   {
246     assertNull(so.getTerm(null));
247     assertNull(so.getTerm("!*£&!"));
248
249     Term t = so.getTerm("SO:0000084");
250     assertNotNull(t);
251     assertEquals(t.getDescription(), "micronuclear_sequence");
252     // name lookup is case sensitive
253     assertNull(so.getTerm("so:0000084"));
254
255     t = so.getTerm("alpha_helix");
256     assertNotNull(t);
257     Object[] synonyms = t.getSynonyms();
258     assertEquals(synonyms.length, 2);
259     assertEquals(((Synonym) synonyms[0]).getName(), "a-helix");
260     assertEquals(((Synonym) synonyms[1]).getName(), "helix");
261     // case-insensitive description lookup
262     Term t2 = so.getTerm("ALPHA_HELIX");
263     assertSame(t, t2);
264     // can also retrieve by synonym
265     t2 = so.getTerm("a-helix");
266     assertSame(t, t2);
267
268     t = so.getTerm("serine_threonine_motif");
269     t2 = so.getTerm("ST-MOTIF"); // synonym is "st_motif"
270     assertNotNull(t);
271     assertSame(t, t2);
272
273     /*
274      * if a synonym is ambiguous within a hierarchy,
275      * we keep it for the most general term (always valid)
276      * helix is a synonym for
277      *   alpha_helix (isA) right_handed_peptide_helix (isA) peptide_helix
278      * motif is a synonym for polypeptide_conserved_motif (isA) polypeptide_motif
279      *    
280      */
281     t = so.getTerm("helix");
282     assertNotNull(t);
283     assertEquals(t.getDescription(), "peptide_helix");
284     t = so.getTerm("motif");
285     assertNotNull(t);
286     assertEquals(t.getDescription(), "polypeptide_motif");
287
288     /*
289      * ambiguous synonyms with no mutual hierarchy are not cached
290      * 'sequence variation' is a synonym for 
291      * sequence_alteration SO:0001059
292      * alternate_sequence_site SO:0001149
293      * and these have no 'isA' relationship
294      */
295     assertNull(so.getTerm("sequence_variation"));
296
297     /*
298      * nmd_transcript is synonym for SO:0001621:NMD_transcript_variant 
299      * and also the description for SO:0002114:NMD_transcript
300      * since v3.1 of so-simple.obo
301      */
302     t = so.getTerm("SO:0002114");
303     assertNotNull(t);
304     t2 = so.getTerm("SO:0001621");
305     assertNotNull(t2);
306     assertSame(t, so.getTerm("nmd_transcript"));
307     assertSame(t2, so.getTerm("nmd_transcript_variant"));
308   }
309
310   @Test(groups = "Functional")
311   public void testCanonicalise()
312   {
313     assertNull(SequenceOntology.canonicalise(null));
314     assertEquals(SequenceOntology.canonicalise("A-b_c"), "a_b_c");
315     assertEquals(SequenceOntology.canonicalise("A-b-C"), "a_b_c");
316     assertEquals(SequenceOntology.canonicalise("metal binding site"), "metal_binding_site");
317     String s = "thisought_nottobe_modified!";
318     String s2 = SequenceOntology.canonicalise(s);
319     assertSame(s, s2);
320   }
321 }