From ccb694217023093e3e5f7506fbd9745e854dbcd9 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Tue, 5 Jan 2016 10:12:09 +0000 Subject: [PATCH] JAL-1191 check for obsolete term when descriptions are duplicated --- src/jalview/io/gff/SequenceOntology.java | 62 ++++++++++++++++++++++--- test/jalview/io/gff/SequenceOntologyTest.java | 4 ++ 2 files changed, 60 insertions(+), 6 deletions(-) diff --git a/src/jalview/io/gff/SequenceOntology.java b/src/jalview/io/gff/SequenceOntology.java index a999410..7f714ae 100644 --- a/src/jalview/io/gff/SequenceOntology.java +++ b/src/jalview/io/gff/SequenceOntology.java @@ -19,6 +19,7 @@ import org.biojava.nbio.ontology.Term; import org.biojava.nbio.ontology.Term.Impl; import org.biojava.nbio.ontology.Triple; import org.biojava.nbio.ontology.io.OboParser; +import org.biojava.nbio.ontology.utils.Annotation; /** * A wrapper class that parses the Sequence Ontology and exposes useful access @@ -26,7 +27,7 @@ import org.biojava.nbio.ontology.io.OboParser; */ public class SequenceOntology { - private static SequenceOntology instance = new SequenceOntology(); + private static SequenceOntology instance; private Ontology ontology; @@ -43,8 +44,12 @@ public class SequenceOntology */ private Map> termIsA; - public static SequenceOntology getInstance() + public synchronized static SequenceOntology getInstance() { + if (instance == null) + { + instance = new SequenceOntology(); + } return instance; } @@ -127,8 +132,10 @@ public class SequenceOntology } /** - * Store a lookup table of terms by their description. Note that description - * is not guaranteed unique - currently reporting 8 duplicates. + * Stores a lookup table of terms by description. Note that description is not + * guaranteed unique. Where duplicate descriptions are found, try to discard + * the term that is flagged as obsolete. However we do store obsolete terms + * where there is no duplication of description. */ protected void storeTermNames() { @@ -139,20 +146,63 @@ public class SequenceOntology String description = term.getDescription(); if (description != null) { - // System.out.println(term.getName() + "=" + term.getDescription()); - Term replaced = termsByDescription.put(description, term); + Term replaced = termsByDescription.get(description); if (replaced != null) { + boolean newTermIsObsolete = isObsolete(term); + boolean oldTermIsObsolete = isObsolete(replaced); + if (newTermIsObsolete && !oldTermIsObsolete) + { + System.err.println("Ignoring " + term.getName() + + " as obsolete and duplicated by " + + replaced.getName()); + term = replaced; + } + else if (!newTermIsObsolete && oldTermIsObsolete) + { + System.err.println("Ignoring " + replaced.getName() + + " as obsolete and duplicated by " + term.getName()); + } + else + { System.err.println("Warning: " + term.getName() + " has replaced " + replaced.getName() + " for lookup of '" + description + "'"); + } } + termsByDescription.put(description, term); } } } } /** + * Answers true if the term has property "is_obsolete" with value true, else + * false + * + * @param term + * @return + */ + public static boolean isObsolete(Term term) + { + Annotation ann = term.getAnnotation(); + if (ann != null) + { + try + { + if (Boolean.TRUE.equals(ann.getProperty("is_obsolete"))) + { + return true; + } + } catch (NoSuchElementException e) + { + // fall through to false + } + } + return false; + } + + /** * Test whether the given Sequence Ontology term is nucleotide_match (either * directly or via is_a relationship) * diff --git a/test/jalview/io/gff/SequenceOntologyTest.java b/test/jalview/io/gff/SequenceOntologyTest.java index f8ff6f2..6c9226f 100644 --- a/test/jalview/io/gff/SequenceOntologyTest.java +++ b/test/jalview/io/gff/SequenceOntologyTest.java @@ -12,7 +12,11 @@ public class SequenceOntologyTest @BeforeMethod public void setUp() { + long now = System.currentTimeMillis(); so = SequenceOntology.getInstance(); + long elapsed = System.currentTimeMillis() - now; + System.out.println("Load and cache of Sequence Ontology took " + + elapsed + "ms"); } @Test(groups = "Functional") -- 1.7.10.2