--- /dev/null
+package jalview.datamodel.ontology;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * A base class for models of Sequence Ontology and others
+ *
+ * @author gmcarstairs
+ *
+ */
+public abstract class OntologyBase implements OntologyI
+{
+ @Override
+ public Set<String> getParentTerms(Set<String> terms)
+ {
+ Set<String> parents = new HashSet<>(terms);
+
+ boolean childRemoved = true;
+ while (childRemoved)
+ {
+ childRemoved = removeChild(parents);
+ }
+ return parents;
+ }
+
+ /**
+ * Removes the first term in the given set found which is a child of another
+ * term in the set. Answers true if a child was found and removed, else false.
+ *
+ * @param terms
+ * @return
+ */
+ boolean removeChild(Set<String> terms)
+ {
+ for (String t1 : terms)
+ {
+ for (String t2 : terms)
+ {
+ if (t1 != t2)
+ {
+ if (isA(t1, t2))
+ {
+ terms.remove(t1);
+ return true;
+ }
+ if (isA(t2, t1))
+ {
+ terms.remove(t2);
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+ }
+
+ @Override
+ public List<String> getChildTerms(String parent, List<String> terms)
+ {
+ List<String> children = new ArrayList<>();
+ for (String term : terms)
+ {
+ if (!term.equals(parent) && isA(term, parent))
+ {
+ children.add(term);
+ }
+ }
+ return children;
+ }
+}
--- /dev/null
+package jalview.datamodel.ontology;
+
+import java.util.List;
+import java.util.Set;
+
+public interface OntologyI
+{
+
+ /**
+ * Answers true if <code>childTerm</code> is the same as, or a sub-type
+ * (specialisation of) <code>parentTerm</code>, else false
+ *
+ * @param childTerm
+ * @param parentTerm
+ * @return
+ */
+ boolean isA(String childTerm, String parentTerm);
+
+ /**
+ * Answers those terms in the given set which are not child terms of some
+ * other term in the set. That is, returns a set of parent terms. The input
+ * set is not modified.
+ *
+ * @param terms
+ * @return
+ */
+ Set<String> getParentTerms(Set<String> terms);
+
+ /**
+ * Answers a (possibly empty) list of those terms in the supplied list which
+ * are a child (directly or indirectly) of <code>parent</code>. The parent
+ * term itself is not included (even if in the input list)
+ *
+ * @param parent
+ * @param terms
+ * @return
+ */
+ List<String> getChildTerms(String parent, List<String> terms);
+
+ /**
+ * Returns a sorted list of all valid terms queried for (i.e. terms processed
+ * which were valid in the SO), using the friendly description.
+ *
+ * This can be used to check that any hard-coded stand-in for the full SO
+ * includes all the terms needed for correct processing.
+ *
+ * @return
+ */
+ List<String> termsFound();
+
+ /**
+ * Returns a sorted list of all invalid terms queried for (i.e. terms
+ * processed which were not found in the SO), using the friendly description.
+ *
+ * This can be used to report any 'non-compliance' in data, and/or to report
+ * valid terms missing from any hard-coded stand-in for the full SO.
+ *
+ * @return
+ */
+ List<String> termsNotFound();
+}
\ No newline at end of file
*/
package jalview.ext.so;
+import jalview.datamodel.ontology.OntologyBase;
import jalview.io.gff.SequenceOntologyI;
import java.io.BufferedInputStream;
* A wrapper class that parses the Sequence Ontology and exposes useful access
* methods. This version uses the BioJava parser.
*/
-public class SequenceOntology implements SequenceOntologyI
+public class SequenceOntology extends OntologyBase
+ implements SequenceOntologyI
{
/*
* the parsed Ontology data as modelled by BioJava
*/
public SequenceOntology()
{
- termsFound = new ArrayList<String>();
- termsNotFound = new ArrayList<String>();
- termsByDescription = new HashMap<String, Term>();
- termIsA = new HashMap<Term, List<Term>>();
+ termsFound = new ArrayList<>();
+ termsNotFound = new ArrayList<>();
+ termsByDescription = new HashMap<>();
+ termIsA = new HashMap<>();
loadOntologyZipFile("so-xp-simple.obo");
}
*/
protected synchronized void findParents(Term childTerm)
{
- List<Term> result = new ArrayList<Term>();
+ List<Term> result = new ArrayList<>();
for (Triple triple : ontology.getTriples(childTerm, null, isA))
{
Term parent = triple.getObject();
*/
package jalview.io.gff;
-import java.util.List;
+import jalview.datamodel.ontology.OntologyI;
-public interface SequenceOntologyI
+public interface SequenceOntologyI extends OntologyI
{
/*
* selected commonly used values for quick reference
// SO:0000704
public static final String GENE = "gene";
-
- public boolean isA(String childTerm, String parentTerm);
-
- /**
- * Returns a sorted list of all valid terms queried for (i.e. terms processed
- * which were valid in the SO), using the friendly description.
- *
- * This can be used to check that any hard-coded stand-in for the full SO
- * includes all the terms needed for correct processing.
- *
- * @return
- */
- public List<String> termsFound();
-
- /**
- * Returns a sorted list of all invalid terms queried for (i.e. terms
- * processed which were not found in the SO), using the friendly description.
- *
- * This can be used to report any 'non-compliance' in data, and/or to report
- * valid terms missing from any hard-coded stand-in for the full SO.
- *
- * @return
- */
- public List<String> termsNotFound();
}
*/
package jalview.io.gff;
+import jalview.datamodel.ontology.OntologyBase;
+
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
* @author gmcarstairs
*
*/
-public class SequenceOntologyLite implements SequenceOntologyI
+public class SequenceOntologyLite extends OntologyBase
+ implements SequenceOntologyI
{
/*
* initial selection of types of interest when processing Ensembl features
{ "sequence_variant", "sequence_variant" },
{ "structural_variant", "sequence_variant" },
{ "feature_variant", "sequence_variant" },
+ { "upstream_gene_variant", "sequence_variant" },
{ "gene_variant", "sequence_variant" },
{ "transcript_variant", "sequence_variant" },
+ { "non_coding_transcript_variant", "sequence_variant" },
+ { "non_coding_transcript_exon_variant", "sequence_variant" },
// NB Ensembl uses NMD_transcript_variant as if a 'transcript'
// but we model it here correctly as per the SO
{ "NMD_transcript_variant", "sequence_variant" },
*/
package jalview.ext.so;
-import static org.testng.AssertJUnit.assertFalse;
-import static org.testng.AssertJUnit.assertTrue;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
+import jalview.datamodel.ontology.OntologyI;
import jalview.gui.JvOptionPane;
-import jalview.io.gff.SequenceOntologyI;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
}
- private SequenceOntologyI so;
+ private OntologyI so;
@BeforeClass(alwaysRun = true)
public void setUp()
assertTrue(so.isA("inframe_deletion", "sequence_variant"));
assertTrue(so.isA("inframe_insertion", "sequence_variant"));
}
+
+ @Test(groups = "Functional")
+ public void testGetChildTerms()
+ {
+ List<String> terms = Collections.<String> emptyList();
+ List<String> children = so.getChildTerms("exon", terms);
+ assertTrue(children.isEmpty());
+
+ terms = Arrays.asList("gene", "transcript", "snRNA", "junk", "mRNA");
+ children = so.getChildTerms("exon", terms);
+ assertTrue(children.isEmpty());
+ children = so.getChildTerms("transcript", terms);
+ assertEquals(children.size(), 2);
+ assertTrue(children.contains("snRNA"));
+ assertTrue(children.contains("mRNA"));
+
+ terms = Arrays.asList("gene", "transcript", "synonymous_variant",
+ "stop_lost", "chain");
+ children = so.getChildTerms("sequence_variant", terms);
+ assertEquals(children.size(), 2);
+ assertTrue(children.contains("synonymous_variant"));
+ assertTrue(children.contains("stop_lost"));
+ }
+
+ @Test(groups = "Functional")
+ public void testGetParentTerms()
+ {
+ Set<String> terms = new HashSet<>();
+ terms.add("sequence_variant");
+ terms.add("NMD_transcript_variant");
+ terms.add("stop_lost");
+ terms.add("chain"); // not an SO term
+
+ Set<String> parents = so.getParentTerms(terms);
+ assertEquals(parents.size(), 2);
+ assertTrue(parents.contains("sequence_variant"));
+ assertTrue(parents.contains("chain"));
+ }
}
package jalview.io.gff;
-import static org.testng.AssertJUnit.assertFalse;
-import static org.testng.AssertJUnit.assertTrue;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
+import jalview.datamodel.ontology.OntologyI;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
public class SequenceOntologyLiteTest
{
+ private OntologyI so;
+
+ @BeforeClass(alwaysRun = true)
+ public void setUp()
+ {
+ so = new SequenceOntologyLite();
+ }
+
@Test(groups = "Functional")
public void testIsA_sequenceVariant()
{
- SequenceOntologyI so = new SequenceOntologyLite();
-
assertFalse(so.isA("CDS", "sequence_variant"));
assertTrue(so.isA("sequence_variant", "sequence_variant"));
assertTrue(so.isA("inframe_insertion", "sequence_variant"));
assertTrue(so.isA("splice_region_variant", "sequence_variant"));
}
+
+ @Test(groups = "Functional")
+ public void testGetParentTerms()
+ {
+ Set<String> terms = new HashSet<>();
+ terms.add("sequence_variant");
+ terms.add("NMD_transcript_variant");
+ terms.add("stop_lost");
+ terms.add("chain"); // not an SO term
+
+ Set<String> parents = so.getParentTerms(terms);
+ assertEquals(parents.size(), 2);
+ assertTrue(parents.contains("sequence_variant"));
+ assertTrue(parents.contains("chain"));
+ }
+
+ @Test(groups = "Functional")
+ public void testGetChildTerms()
+ {
+ List<String> terms = Collections.<String> emptyList();
+ List<String> children = so.getChildTerms("exon", terms);
+ assertTrue(children.isEmpty());
+
+ terms = Arrays.asList("gene", "transcript", "snRNA", "junk", "mRNA");
+ children = so.getChildTerms("exon", terms);
+ assertTrue(children.isEmpty());
+ children = so.getChildTerms("transcript", terms);
+ assertEquals(children.size(), 2);
+ assertTrue(children.contains("snRNA"));
+ assertTrue(children.contains("mRNA"));
+
+ terms = Arrays.asList("gene", "transcript", "synonymous_variant",
+ "stop_lost", "chain");
+ children = so.getChildTerms("sequence_variant", terms);
+ assertEquals(children.size(), 2);
+ assertTrue(children.contains("synonymous_variant"));
+ assertTrue(children.contains("stop_lost"));
+ }
}