JAL-3010 cache synonyms for Sequence Ontology terms
[jalview.git] / test / jalview / ext / so / SequenceOntologyTest.java
index 7eb01c9..260b011 100644 (file)
@@ -22,10 +22,11 @@ package jalview.ext.so;
 
 import static org.testng.Assert.assertEquals;
 import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertNotNull;
 import static org.testng.Assert.assertNull;
+import static org.testng.Assert.assertSame;
 import static org.testng.Assert.assertTrue;
 
-import jalview.datamodel.ontology.OntologyI;
 import jalview.gui.JvOptionPane;
 
 import java.util.Arrays;
@@ -35,6 +36,8 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
+import org.biojava.nbio.ontology.Synonym;
+import org.biojava.nbio.ontology.Term;
 import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
 
@@ -50,7 +53,7 @@ public class SequenceOntologyTest
     JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
   }
 
-  private OntologyI so;
+  private SequenceOntology so;
 
   @BeforeClass(alwaysRun = true)
   public void setUp()
@@ -236,4 +239,83 @@ public class SequenceOntologyTest
     assertEquals(parents.size(), 1);
     assertTrue(parents.contains("sequence_feature"));
   }
+
+  @Test(groups = "Functional")
+  public void testGetTerm()
+  {
+    assertNull(so.getTerm(null));
+    assertNull(so.getTerm("!*£&!"));
+
+    Term t = so.getTerm("SO:0000084");
+    assertNotNull(t);
+    assertEquals(t.getDescription(), "micronuclear_sequence");
+    // name lookup is case sensitive
+    assertNull(so.getTerm("so:0000084"));
+
+    t = so.getTerm("alpha_helix");
+    assertNotNull(t);
+    Object[] synonyms = t.getSynonyms();
+    assertEquals(synonyms.length, 2);
+    assertEquals(((Synonym) synonyms[0]).getName(), "a-helix");
+    assertEquals(((Synonym) synonyms[1]).getName(), "helix");
+    // case-insensitive description lookup
+    Term t2 = so.getTerm("ALPHA_HELIX");
+    assertSame(t, t2);
+    // can also retrieve by synonym
+    t2 = so.getTerm("a-helix");
+    assertSame(t, t2);
+
+    t = so.getTerm("serine_threonine_motif");
+    t2 = so.getTerm("ST-MOTIF"); // synonym is "st_motif"
+    assertNotNull(t);
+    assertSame(t, t2);
+
+    /*
+     * if a synonym is ambiguous within a hierarchy,
+     * we keep it for the most general term (always valid)
+     * helix is a synonym for
+     *   alpha_helix (isA) right_handed_peptide_helix (isA) peptide_helix
+     * motif is a synonym for polypeptide_conserved_motif (isA) polypeptide_motif
+     *    
+     */
+    t = so.getTerm("helix");
+    assertNotNull(t);
+    assertEquals(t.getDescription(), "peptide_helix");
+    t = so.getTerm("motif");
+    assertNotNull(t);
+    assertEquals(t.getDescription(), "polypeptide_motif");
+
+    /*
+     * ambiguous synonyms with no mutual hierarchy are not cached
+     * 'sequence variation' is a synonym for 
+     * sequence_alteration SO:0001059
+     * alternate_sequence_site SO:0001149
+     * and these have no 'isA' relationship
+     */
+    assertNull(so.getTerm("sequence_variation"));
+
+    /*
+     * nmd_transcript is synonym for SO:0001621:NMD_transcript_variant 
+     * and also the description for SO:0002114:NMD_transcript
+     * since v3.1 of so-simple.obo
+     */
+    t = so.getTerm("SO:0002114");
+    assertNotNull(t);
+    t2 = so.getTerm("SO:0001621");
+    assertNotNull(t2);
+    assertSame(t, so.getTerm("nmd_transcript"));
+    assertSame(t2, so.getTerm("nmd_transcript_variant"));
+  }
+
+  @Test(groups = "Functional")
+  public void testCanonicalise()
+  {
+    assertNull(SequenceOntology.canonicalise(null));
+    assertEquals(SequenceOntology.canonicalise("A-b_c"), "a_b_c");
+    assertEquals(SequenceOntology.canonicalise("A-b-C"), "a_b_c");
+    assertEquals(SequenceOntology.canonicalise("metal binding site"), "metal_binding_site");
+    String s = "thisought_nottobe_modified!";
+    String s2 = SequenceOntology.canonicalise(s);
+    assertSame(s, s2);
+  }
 }