JAL-3010 refactoring to support feature type grouping by ontology
authorgmungoc <g.m.carstairs@dundee.ac.uk>
Fri, 1 Jun 2018 18:29:30 +0000 (19:29 +0100)
committergmungoc <g.m.carstairs@dundee.ac.uk>
Fri, 1 Jun 2018 18:29:30 +0000 (19:29 +0100)
src/jalview/datamodel/ontology/OntologyBase.java [new file with mode: 0644]
src/jalview/datamodel/ontology/OntologyI.java [new file with mode: 0644]
src/jalview/ext/so/SequenceOntology.java
src/jalview/io/gff/SequenceOntologyI.java
src/jalview/io/gff/SequenceOntologyLite.java
test/jalview/ext/so/SequenceOntologyTest.java
test/jalview/io/gff/SequenceOntologyLiteTest.java

diff --git a/src/jalview/datamodel/ontology/OntologyBase.java b/src/jalview/datamodel/ontology/OntologyBase.java
new file mode 100644 (file)
index 0000000..25dae22
--- /dev/null
@@ -0,0 +1,73 @@
+package jalview.datamodel.ontology;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * A base class for models of Sequence Ontology and others
+ * 
+ * @author gmcarstairs
+ *
+ */
+public abstract class OntologyBase implements OntologyI
+{
+  @Override
+  public Set<String> getParentTerms(Set<String> terms)
+  {
+    Set<String> parents = new HashSet<>(terms);
+
+    boolean childRemoved = true;
+    while (childRemoved)
+    {
+      childRemoved = removeChild(parents);
+    }
+    return parents;
+  }
+
+  /**
+   * Removes the first term in the given set found which is a child of another
+   * term in the set. Answers true if a child was found and removed, else false.
+   * 
+   * @param terms
+   * @return
+   */
+  boolean removeChild(Set<String> terms)
+  {
+    for (String t1 : terms)
+    {
+      for (String t2 : terms)
+      {
+        if (t1 != t2)
+        {
+          if (isA(t1, t2))
+          {
+            terms.remove(t1);
+            return true;
+          }
+          if (isA(t2, t1))
+          {
+            terms.remove(t2);
+            return true;
+          }
+        }
+      }
+    }
+    return false;
+  }
+
+  @Override
+  public List<String> getChildTerms(String parent, List<String> terms)
+  {
+    List<String> children = new ArrayList<>();
+    for (String term : terms)
+    {
+      if (!term.equals(parent) && isA(term, parent))
+      {
+        children.add(term);
+      }
+    }
+    return children;
+  }
+}
diff --git a/src/jalview/datamodel/ontology/OntologyI.java b/src/jalview/datamodel/ontology/OntologyI.java
new file mode 100644 (file)
index 0000000..545a3c7
--- /dev/null
@@ -0,0 +1,61 @@
+package jalview.datamodel.ontology;
+
+import java.util.List;
+import java.util.Set;
+
+public interface OntologyI
+{
+
+  /**
+   * Answers true if <code>childTerm</code> is the same as, or a sub-type
+   * (specialisation of) <code>parentTerm</code>, else false
+   * 
+   * @param childTerm
+   * @param parentTerm
+   * @return
+   */
+  boolean isA(String childTerm, String parentTerm);
+
+  /**
+   * Answers those terms in the given set which are not child terms of some
+   * other term in the set. That is, returns a set of parent terms. The input
+   * set is not modified.
+   * 
+   * @param terms
+   * @return
+   */
+  Set<String> getParentTerms(Set<String> terms);
+
+  /**
+   * Answers a (possibly empty) list of those terms in the supplied list which
+   * are a child (directly or indirectly) of <code>parent</code>. The parent
+   * term itself is not included (even if in the input list)
+   * 
+   * @param parent
+   * @param terms
+   * @return
+   */
+  List<String> getChildTerms(String parent, List<String> terms);
+
+  /**
+   * Returns a sorted list of all valid terms queried for (i.e. terms processed
+   * which were valid in the SO), using the friendly description.
+   * 
+   * This can be used to check that any hard-coded stand-in for the full SO
+   * includes all the terms needed for correct processing.
+   * 
+   * @return
+   */
+  List<String> termsFound();
+
+  /**
+   * Returns a sorted list of all invalid terms queried for (i.e. terms
+   * processed which were not found in the SO), using the friendly description.
+   * 
+   * This can be used to report any 'non-compliance' in data, and/or to report
+   * valid terms missing from any hard-coded stand-in for the full SO.
+   * 
+   * @return
+   */
+  List<String> termsNotFound();
+}
\ No newline at end of file
index 0d631e6..7842294 100644 (file)
@@ -20,6 +20,7 @@
  */
 package jalview.ext.so;
 
+import jalview.datamodel.ontology.OntologyBase;
 import jalview.io.gff.SequenceOntologyI;
 
 import java.io.BufferedInputStream;
@@ -48,7 +49,8 @@ import org.biojava.nbio.ontology.utils.Annotation;
  * A wrapper class that parses the Sequence Ontology and exposes useful access
  * methods. This version uses the BioJava parser.
  */
-public class SequenceOntology implements SequenceOntologyI
+public class SequenceOntology extends OntologyBase
+        implements SequenceOntologyI
 {
   /*
    * the parsed Ontology data as modelled by BioJava
@@ -82,10 +84,10 @@ public class SequenceOntology implements SequenceOntologyI
    */
   public SequenceOntology()
   {
-    termsFound = new ArrayList<String>();
-    termsNotFound = new ArrayList<String>();
-    termsByDescription = new HashMap<String, Term>();
-    termIsA = new HashMap<Term, List<Term>>();
+    termsFound = new ArrayList<>();
+    termsNotFound = new ArrayList<>();
+    termsByDescription = new HashMap<>();
+    termIsA = new HashMap<>();
 
     loadOntologyZipFile("so-xp-simple.obo");
   }
@@ -404,7 +406,7 @@ public class SequenceOntology implements SequenceOntologyI
    */
   protected synchronized void findParents(Term childTerm)
   {
-    List<Term> result = new ArrayList<Term>();
+    List<Term> result = new ArrayList<>();
     for (Triple triple : ontology.getTriples(childTerm, null, isA))
     {
       Term parent = triple.getObject();
index 307e1d1..e9b9923 100644 (file)
@@ -20,9 +20,9 @@
  */
 package jalview.io.gff;
 
-import java.util.List;
+import jalview.datamodel.ontology.OntologyI;
 
-public interface SequenceOntologyI
+public interface SequenceOntologyI extends OntologyI
 {
   /*
    * selected commonly used values for quick reference
@@ -62,28 +62,4 @@ public interface SequenceOntologyI
 
   // SO:0000704
   public static final String GENE = "gene";
-
-  public boolean isA(String childTerm, String parentTerm);
-
-  /**
-   * Returns a sorted list of all valid terms queried for (i.e. terms processed
-   * which were valid in the SO), using the friendly description.
-   * 
-   * This can be used to check that any hard-coded stand-in for the full SO
-   * includes all the terms needed for correct processing.
-   * 
-   * @return
-   */
-  public List<String> termsFound();
-
-  /**
-   * Returns a sorted list of all invalid terms queried for (i.e. terms
-   * processed which were not found in the SO), using the friendly description.
-   * 
-   * This can be used to report any 'non-compliance' in data, and/or to report
-   * valid terms missing from any hard-coded stand-in for the full SO.
-   * 
-   * @return
-   */
-  public List<String> termsNotFound();
 }
index 72e906c..670d887 100644 (file)
@@ -20,6 +20,8 @@
  */
 package jalview.io.gff;
 
+import jalview.datamodel.ontology.OntologyBase;
+
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
@@ -38,7 +40,8 @@ import java.util.Map;
  * @author gmcarstairs
  *
  */
-public class SequenceOntologyLite implements SequenceOntologyI
+public class SequenceOntologyLite extends OntologyBase
+        implements SequenceOntologyI
 {
   /*
    * initial selection of types of interest when processing Ensembl features
@@ -80,8 +83,11 @@ public class SequenceOntologyLite implements SequenceOntologyI
     { "sequence_variant", "sequence_variant" },
     { "structural_variant", "sequence_variant" },
     { "feature_variant", "sequence_variant" },
+    { "upstream_gene_variant", "sequence_variant" },
     { "gene_variant", "sequence_variant" },
     { "transcript_variant", "sequence_variant" },
+    { "non_coding_transcript_variant", "sequence_variant" },
+    { "non_coding_transcript_exon_variant", "sequence_variant" },
     // NB Ensembl uses NMD_transcript_variant as if a 'transcript'
     // but we model it here correctly as per the SO
     { "NMD_transcript_variant", "sequence_variant" },
index 31e1887..c7776a3 100644 (file)
  */
 package jalview.ext.so;
 
-import static org.testng.AssertJUnit.assertFalse;
-import static org.testng.AssertJUnit.assertTrue;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
 
+import jalview.datamodel.ontology.OntologyI;
 import jalview.gui.JvOptionPane;
-import jalview.io.gff.SequenceOntologyI;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
 
 import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
@@ -39,7 +46,7 @@ public class SequenceOntologyTest
     JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
   }
 
-  private SequenceOntologyI so;
+  private OntologyI so;
 
   @BeforeClass(alwaysRun = true)
   public void setUp()
@@ -132,4 +139,42 @@ public class SequenceOntologyTest
     assertTrue(so.isA("inframe_deletion", "sequence_variant"));
     assertTrue(so.isA("inframe_insertion", "sequence_variant"));
   }
+
+  @Test(groups = "Functional")
+  public void testGetChildTerms()
+  {
+    List<String> terms = Collections.<String> emptyList();
+    List<String> children = so.getChildTerms("exon", terms);
+    assertTrue(children.isEmpty());
+  
+    terms = Arrays.asList("gene", "transcript", "snRNA", "junk", "mRNA");
+    children = so.getChildTerms("exon", terms);
+    assertTrue(children.isEmpty());
+    children = so.getChildTerms("transcript", terms);
+    assertEquals(children.size(), 2);
+    assertTrue(children.contains("snRNA"));
+    assertTrue(children.contains("mRNA"));
+  
+    terms = Arrays.asList("gene", "transcript", "synonymous_variant",
+            "stop_lost", "chain");
+    children = so.getChildTerms("sequence_variant", terms);
+    assertEquals(children.size(), 2);
+    assertTrue(children.contains("synonymous_variant"));
+    assertTrue(children.contains("stop_lost"));
+  }
+
+  @Test(groups = "Functional")
+  public void testGetParentTerms()
+  {
+    Set<String> terms = new HashSet<>();
+    terms.add("sequence_variant");
+    terms.add("NMD_transcript_variant");
+    terms.add("stop_lost");
+    terms.add("chain"); // not an SO term
+  
+    Set<String> parents = so.getParentTerms(terms);
+    assertEquals(parents.size(), 2);
+    assertTrue(parents.contains("sequence_variant"));
+    assertTrue(parents.contains("chain"));
+  }
 }
index 0766666..3076f96 100644 (file)
@@ -1,17 +1,33 @@
 package jalview.io.gff;
 
-import static org.testng.AssertJUnit.assertFalse;
-import static org.testng.AssertJUnit.assertTrue;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
 
+import jalview.datamodel.ontology.OntologyI;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
 
 public class SequenceOntologyLiteTest
 {
+  private OntologyI so;
+
+  @BeforeClass(alwaysRun = true)
+  public void setUp()
+  {
+    so = new SequenceOntologyLite();
+  }
+
   @Test(groups = "Functional")
   public void testIsA_sequenceVariant()
   {
-    SequenceOntologyI so = new SequenceOntologyLite();
-
     assertFalse(so.isA("CDS", "sequence_variant"));
     assertTrue(so.isA("sequence_variant", "sequence_variant"));
 
@@ -34,4 +50,42 @@ public class SequenceOntologyLiteTest
     assertTrue(so.isA("inframe_insertion", "sequence_variant"));
     assertTrue(so.isA("splice_region_variant", "sequence_variant"));
   }
+
+  @Test(groups = "Functional")
+  public void testGetParentTerms()
+  {
+    Set<String> terms = new HashSet<>();
+    terms.add("sequence_variant");
+    terms.add("NMD_transcript_variant");
+    terms.add("stop_lost");
+    terms.add("chain"); // not an SO term
+  
+    Set<String> parents = so.getParentTerms(terms);
+    assertEquals(parents.size(), 2);
+    assertTrue(parents.contains("sequence_variant"));
+    assertTrue(parents.contains("chain"));
+  }
+
+  @Test(groups = "Functional")
+  public void testGetChildTerms()
+  {
+    List<String> terms = Collections.<String> emptyList();
+    List<String> children = so.getChildTerms("exon", terms);
+    assertTrue(children.isEmpty());
+
+    terms = Arrays.asList("gene", "transcript", "snRNA", "junk", "mRNA");
+    children = so.getChildTerms("exon", terms);
+    assertTrue(children.isEmpty());
+    children = so.getChildTerms("transcript", terms);
+    assertEquals(children.size(), 2);
+    assertTrue(children.contains("snRNA"));
+    assertTrue(children.contains("mRNA"));
+
+    terms = Arrays.asList("gene", "transcript", "synonymous_variant",
+            "stop_lost", "chain");
+    children = so.getChildTerms("sequence_variant", terms);
+    assertEquals(children.size(), 2);
+    assertTrue(children.contains("synonymous_variant"));
+    assertTrue(children.contains("stop_lost"));
+  }
 }