Merge branch 'develop' into features/JAL-3010ontologyFeatureSettings
[jalview.git] / test / jalview / ext / so / SequenceOntologyTest.java
1 /*
2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
4  * 
5  * This file is part of Jalview.
6  * 
7  * Jalview is free software: you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License 
9  * as published by the Free Software Foundation, either version 3
10  * of the License, or (at your option) any later version.
11  *  
12  * Jalview is distributed in the hope that it will be useful, but 
13  * WITHOUT ANY WARRANTY; without even the implied warranty 
14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
15  * PURPOSE.  See the GNU General Public License for more details.
16  * 
17  * You should have received a copy of the GNU General Public License
18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
19  * The Jalview Authors are detailed in the 'AUTHORS' file.
20  */
21 package jalview.ext.so;
22
23 import static org.testng.Assert.assertEquals;
24 import static org.testng.Assert.assertFalse;
25 import static org.testng.Assert.assertNotNull;
26 import static org.testng.Assert.assertNull;
27 import static org.testng.Assert.assertSame;
28 import static org.testng.Assert.assertTrue;
29
30 import jalview.bin.Cache;
31 import jalview.gui.JvOptionPane;
32
33 import java.util.Arrays;
34 import java.util.Collections;
35 import java.util.HashSet;
36 import java.util.List;
37 import java.util.Map;
38 import java.util.Set;
39
40 import org.biojava.nbio.ontology.Synonym;
41 import org.biojava.nbio.ontology.Term;
42 import org.testng.annotations.BeforeClass;
43 import org.testng.annotations.Test;
44
45 import junit.extensions.PA;
46
47 public class SequenceOntologyTest
48 {
49
50   @BeforeClass(alwaysRun = true)
51   public void setUpJvOptionPane()
52   {
53     JvOptionPane.setInteractiveMode(false);
54     JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
55   }
56
57   private SequenceOntology so;
58
59   @BeforeClass(alwaysRun = true)
60   public void setUp()
61   {
62     Cache.initLogger();
63     long now = System.currentTimeMillis();
64     try
65     {
66       so = new SequenceOntology();
67     } catch (Throwable t)
68     {
69       System.out.println("SOTest error ");
70       t.printStackTrace(System.err);
71     }
72     long elapsed = System.currentTimeMillis() - now;
73     System.out.println("Load and cache of Sequence Ontology took "
74             + elapsed + "ms");
75   }
76
77   @Test(groups = "Functional")
78   public void testIsA()
79   {
80     assertFalse(so.isA(null, null));
81     assertFalse(so.isA(null, "SO:0000087"));
82     assertFalse(so.isA("SO:0000087", null));
83     assertFalse(so.isA("complete", "garbage"));
84
85     assertTrue(so.isA("SO:0000087", "SO:0000704"));
86     assertFalse(so.isA("SO:0000704", "SO:0000087"));
87     assertTrue(so.isA("SO:0000736", "SO:0000735"));
88
89     // same thing:
90     assertTrue(so.isA("micronuclear_sequence", "micronuclear_sequence"));
91     // direct parent:
92     assertTrue(so.isA("micronuclear_sequence", "organelle_sequence"));
93     // grandparent:
94     assertTrue(so.isA("micronuclear_sequence", "sequence_location"));
95     // great-grandparent:
96     assertTrue(so.isA("micronuclear_sequence", "sequence_attribute"));
97
98     // same thing by name / description:
99     assertTrue(so.isA("micronuclear_sequence", "SO:0000084"));
100     assertTrue(so.isA("SO:0000084", "micronuclear_sequence"));
101     assertTrue(so.isA("SO:0000084", "SO:0000084"));
102
103     // SO name to description:
104     assertTrue(so.isA("SO:0000084", "organelle_sequence"));
105     assertTrue(so.isA("SO:0000084", "sequence_location"));
106     assertTrue(so.isA("SO:0000084", "sequence_attribute"));
107
108     // description to SO name:
109     assertTrue(so.isA("micronuclear_sequence", "SO:0000736"));
110     assertTrue(so.isA("micronuclear_sequence", "SO:0000735"));
111     assertTrue(so.isA("micronuclear_sequence", "SO:0000400"));
112   }
113
114   @Test(groups = "Functional")
115   public void testIsCDS()
116   {
117     assertTrue(so.isA("CDS", "CDS"));
118     assertTrue(so.isA("CDS_predicted", "CDS"));
119     assertTrue(so.isA("transposable_element_CDS", "CDS"));
120     assertTrue(so.isA("edited_CDS", "CDS"));
121     assertTrue(so.isA("CDS_independently_known", "CDS"));
122     assertTrue(so.isA("CDS_fragment", "CDS"));
123     assertFalse(so.isA("CDS_region", "CDS"));// part_of
124     assertFalse(so.isA("polypeptide", "CDS")); // derives_from
125   }
126
127   @Test(groups = "Functional")
128   public void testIsSequenceVariant()
129   {
130     assertFalse(so.isA("CDS", "sequence_variant"));
131     assertTrue(so.isA("sequence_variant", "sequence_variant"));
132
133     /*
134      * these should all be sub-types of sequence_variant
135      */
136     assertTrue(so.isA("structural_variant", "sequence_variant"));
137     assertTrue(so.isA("feature_variant", "sequence_variant"));
138     assertTrue(so.isA("gene_variant", "sequence_variant"));
139     assertTrue(so.isA("transcript_variant", "sequence_variant"));
140     assertTrue(so.isA("NMD_transcript_variant", "sequence_variant"));
141     assertTrue(so.isA("missense_variant", "sequence_variant"));
142     assertTrue(so.isA("synonymous_variant", "sequence_variant"));
143     assertTrue(so.isA("frameshift_variant", "sequence_variant"));
144     assertTrue(so.isA("5_prime_UTR_variant", "sequence_variant"));
145     assertTrue(so.isA("3_prime_UTR_variant", "sequence_variant"));
146     assertTrue(so.isA("stop_gained", "sequence_variant"));
147     assertTrue(so.isA("stop_lost", "sequence_variant"));
148     assertTrue(so.isA("inframe_deletion", "sequence_variant"));
149     assertTrue(so.isA("inframe_insertion", "sequence_variant"));
150   }
151
152   @Test(groups = "Functional")
153   public void testGetChildTerms()
154   {
155     List<String> terms = Collections.<String> emptyList();
156     List<String> children = so.getChildTerms("exon", terms);
157     assertTrue(children.isEmpty());
158   
159     terms = Arrays.asList("gene", "transcript", "snRNA", "junk", "mRNA");
160     children = so.getChildTerms("exon", terms);
161     assertTrue(children.isEmpty());
162     children = so.getChildTerms("transcript", terms);
163     assertEquals(children.size(), 2);
164     assertTrue(children.contains("snRNA"));
165     assertTrue(children.contains("mRNA"));
166   
167     terms = Arrays.asList("gene", "transcript", "synonymous_variant",
168             "stop_lost", "chain");
169     children = so.getChildTerms("sequence_variant", terms);
170     assertEquals(children.size(), 2);
171     assertTrue(children.contains("synonymous_variant"));
172     assertTrue(children.contains("stop_lost"));
173   }
174
175   @Test(groups = "Functional")
176   public void testGetParentTerms()
177   {
178     Set<String> terms = new HashSet<>();
179     terms.add("sequence_variant");
180     terms.add("NMD_transcript_variant");
181     terms.add("stop_lost");
182     terms.add("chain"); // not an SO term
183   
184     Set<String> parents = so.getParentTerms(terms);
185     assertEquals(parents.size(), 2);
186     assertTrue(parents.contains("sequence_variant"));
187     assertTrue(parents.contains("chain"));
188   }
189
190   @Test(groups = "Functional")
191   public void testGetParents()
192   {
193     // invalid term
194     List<String> roots = so.getParents("xyz");
195     assertTrue(roots.isEmpty());
196
197     roots = so.getParents("stop_gained");
198     assertEquals(roots.size(), 2);
199     assertTrue(roots.contains("nonsynonymous_variant"));
200     assertTrue(roots.contains("feature_truncation"));
201
202     // top level term
203     roots = so.getParents("sequence_variant");
204     assertTrue(roots.isEmpty());
205
206     roots = so.getParents(null);
207     assertTrue(roots.isEmpty());
208   }
209
210   @Test(groups = "Functional")
211   public void testGetRootParents()
212   {
213     List<String> roots = so.getRootParents("xyz");
214     assertNull(roots);
215     roots = so.getRootParents(null);
216     assertNull(roots);
217
218     roots = so.getRootParents("stop_gained");
219     assertEquals(roots.size(), 1);
220     assertEquals(roots.get(0), "sequence_variant");
221
222     roots = so.getRootParents("sequence_variant");
223     assertEquals(roots.size(), 1);
224     assertEquals(roots.get(0), "sequence_variant");
225
226     roots = so.getRootParents("alanine");
227     assertEquals(roots.size(), 1);
228     assertEquals(roots.get(0), "sequence_feature");
229
230     /*
231      * verify these are now cached
232      */
233     Map<String, List<String>> cached = (Map<String, List<String>>) PA
234             .getValue(so, "rootParents");
235     List<String> parents = cached.get("stop_gained");
236     assertEquals(parents.size(), 1);
237     parents = cached.get("sequence_variant");
238     assertEquals(parents.size(), 1);
239     assertTrue(parents.contains("sequence_variant"));
240     parents = cached.get("alanine");
241     assertEquals(parents.size(), 1);
242     assertTrue(parents.contains("sequence_feature"));
243   }
244
245   @Test(groups = "Functional")
246   public void testGetTerm()
247   {
248     assertNull(so.getTerm(null));
249     assertNull(so.getTerm("!*£&!"));
250
251     Term t = so.getTerm("SO:0000084");
252     assertNotNull(t);
253     assertEquals(t.getDescription(), "micronuclear_sequence");
254     // name lookup is case sensitive
255     assertNull(so.getTerm("so:0000084"));
256
257     t = so.getTerm("alpha_helix");
258     assertNotNull(t);
259     Object[] synonyms = t.getSynonyms();
260     assertEquals(synonyms.length, 2);
261     assertEquals(((Synonym) synonyms[0]).getName(), "a-helix");
262     assertEquals(((Synonym) synonyms[1]).getName(), "helix");
263     // case-insensitive description lookup
264     Term t2 = so.getTerm("ALPHA_HELIX");
265     assertSame(t, t2);
266     // can also retrieve by synonym
267     t2 = so.getTerm("a-helix");
268     assertSame(t, t2);
269
270     t = so.getTerm("serine_threonine_motif");
271     t2 = so.getTerm("ST-MOTIF"); // synonym is "st_motif"
272     assertNotNull(t);
273     assertSame(t, t2);
274
275     /*
276      * if a synonym is ambiguous within a hierarchy,
277      * we keep it for the most general term (always valid)
278      * helix is a synonym for
279      *   alpha_helix (isA) right_handed_peptide_helix (isA) peptide_helix
280      * motif is a synonym for polypeptide_conserved_motif (isA) polypeptide_motif
281      *    
282      */
283     t = so.getTerm("helix");
284     assertNotNull(t);
285     assertEquals(t.getDescription(), "peptide_helix");
286     t = so.getTerm("motif");
287     assertNotNull(t);
288     assertEquals(t.getDescription(), "polypeptide_motif");
289
290     /*
291      * ambiguous synonyms with no mutual hierarchy are not cached
292      * 'sequence variation' is a synonym for 
293      * sequence_alteration SO:0001059
294      * alternate_sequence_site SO:0001149
295      * and these have no 'isA' relationship
296      */
297     assertNull(so.getTerm("sequence_variation"));
298
299     /*
300      * nmd_transcript is synonym for SO:0001621:NMD_transcript_variant 
301      * and also the description for SO:0002114:NMD_transcript
302      * since v3.1 of so-simple.obo
303      */
304     t = so.getTerm("SO:0002114");
305     assertNotNull(t);
306     t2 = so.getTerm("SO:0001621");
307     assertNotNull(t2);
308     assertSame(t, so.getTerm("nmd_transcript"));
309     assertSame(t2, so.getTerm("nmd_transcript_variant"));
310   }
311
312   @Test(groups = "Functional")
313   public void testCanonicalise()
314   {
315     assertNull(SequenceOntology.canonicalise(null));
316     assertEquals(SequenceOntology.canonicalise("A-b_c"), "a_b_c");
317     assertEquals(SequenceOntology.canonicalise("A-b-C"), "a_b_c");
318     assertEquals(SequenceOntology.canonicalise("metal binding site"), "metal_binding_site");
319     String s = "thisought_nottobe_modified!";
320     String s2 = SequenceOntology.canonicalise(s);
321     assertSame(s, s2);
322   }
323 }