JAL-3010 tidy logging and Javadoc, extract method to store synonyms
authorgmungoc <g.m.carstairs@dundee.ac.uk>
Thu, 25 Apr 2019 08:50:28 +0000 (09:50 +0100)
committergmungoc <g.m.carstairs@dundee.ac.uk>
Thu, 25 Apr 2019 08:50:28 +0000 (09:50 +0100)
src/jalview/ext/so/SequenceOntology.java
test/jalview/ext/so/SequenceOntologyTest.java

index 3a775b0..16b1562 100644 (file)
@@ -20,6 +20,7 @@
  */
 package jalview.ext.so;
 
+import jalview.bin.Cache;
 import jalview.datamodel.ontology.OntologyBase;
 import jalview.io.gff.SequenceOntologyI;
 
@@ -191,19 +192,20 @@ public class SequenceOntology extends OntologyBase
             boolean oldTermIsObsolete = isObsolete(replaced);
             if (newTermIsObsolete && !oldTermIsObsolete)
             {
-              System.err.println("Ignoring " + term.getName()
+              Cache.log.debug("SequenceOntology ignoring " + term.getName()
                       + " as obsolete and duplicated by "
                       + replaced.getName());
               term = replaced;
             }
             else if (!newTermIsObsolete && oldTermIsObsolete)
             {
-              System.err.println("Ignoring " + replaced.getName()
+              Cache.log.debug("SequenceOntology ignoring "
+                      + replaced.getName()
                       + " as obsolete and duplicated by " + term.getName());
             }
             else
             {
-              System.err.println("Warning: " + term.getName()
+              Cache.log.debug("SequenceOntology warning: " + term.getName()
                       + " has replaced " + replaced.getName()
                       + " for lookup of '" + description + "'");
             }
@@ -215,57 +217,7 @@ public class SequenceOntology extends OntologyBase
            */
           if (!newTermIsObsolete)
           {
-            for (Object syn : term.getSynonyms())
-            {
-              String name = ((Synonym) syn).getName();
-              String synonym = canonicalise(name);
-              if (aliases.containsKey(synonym))
-              {
-                final Term found = aliases.get(synonym);
-                if (found != term)
-                {
-                  /*
-                   * this alias is ambiguous - matches description,
-                   * or an alias, of another term
-                   */
-                  String msg = String.format(
-                          "Ambiguous synonym %s for '%s:%s' and '%s:%s'",
-                          synonym, term.getName(), term.getDescription(),
-                          found.getName(), found.getDescription());
-                  System.err.println(msg);
-
-                  /*
-                   * preserve any entry whose canonical description happens to match
-                   * a synonym (NMD_transcript is a valid description, and also
-                   * a synonym for NMD_transcript_variant)
-                   * also preserve a parent (more general) term
-                   */
-                  if (synonym.equals(canonicalise(found.getDescription()))
-                          || termIsA(term, found))
-                  {
-                    // leave it alone
-                  }
-                  /*
-                   * replace a specialised term with a more general one
-                   * with the same alias
-                   */
-                  // else if
-                  // (synonym.equals(canonicalise(term.getDescription())))
-                  else if (termIsA(found, term))
-                  {
-                    aliases.put(synonym, term);
-                  }
-                  else
-                  {
-                    ambiguous.add(synonym);
-                  }
-                }
-              }
-              else
-              {
-                aliases.put(synonym, term);
-              }
-            }
+            storeSynonymsForTerm(term, ambiguous);
           }
         }
       }
@@ -285,6 +237,96 @@ public class SequenceOntology extends OntologyBase
   }
 
   /**
+   * Stores any synonyms as an alternative lookup for the term, canonicalised
+   * for case/hyphen/space insensitivity on lookup.
+   * <p>
+   * Some synonyms may be ambiguous (present for more than one term), and these
+   * are handled as follows:
+   * <ul>
+   * <li>if a synonym matches the <em>description</em> of another term, it is
+   * not saved, so that a term can always be found by description
+   * <ul>
+   * <li>Example: {@code nmd_transcript} is the description for
+   * {@code NMD_transcript} and also a synonym for
+   * {@code NMD_transcript_variant} - the synonym is ignored</li>
+   * </ul>
+   * </li>
+   * <li>if one term is a sub-term (directly or indirectly) of the other, the
+   * synonym is retained for the more general term
+   * <ul>
+   * <li>Example: {@code helix} is a synonym for
+   * {@code alpha_helix, right_handed_peptide_helix, peptide_helix} - it is kept
+   * for {@code peptide_helix} as this is a parent of the other terms</li>
+   * </ul>
+   * </li>
+   * <li>otherwise the synonym is added to the {@code ambiguous} list for
+   * removal
+   * <ul>
+   * <li>Example: {@code sequence variation} is a synonym for
+   * {@code sequence_alteration} and {@code alternate_sequence_site} but these
+   * have no {@code isA} relationship - the synonym is ignored as ambiguous</li>
+   * </ul>
+   * </ul>
+   * 
+   * @param term
+   * @param ambiguous
+   */
+  void storeSynonymsForTerm(Term term, Set<String> ambiguous)
+  {
+    for (Object syn : term.getSynonyms())
+    {
+      String name = ((Synonym) syn).getName();
+      String synonym = canonicalise(name);
+      if (aliases.containsKey(synonym))
+      {
+        final Term found = aliases.get(synonym);
+        if (found != term)
+        {
+          /*
+           * this alias is ambiguous - matches description,
+           * or an alias, of another term
+           */
+          String msg = String.format(
+                  "SequenceOntology ambiguous synonym %s for '%s:%s' and '%s:%s'",
+                  synonym, term.getName(), term.getDescription(),
+                  found.getName(), found.getDescription());
+          Cache.log.debug(msg);
+
+          /*
+           * preserve any entry whose canonical description happens to match
+           * a synonym (NMD_transcript is a valid description, and also
+           * a synonym for NMD_transcript_variant)
+           * also preserve a parent (more general) term
+           */
+          if (synonym.equals(canonicalise(found.getDescription()))
+                  || termIsA(term, found))
+          {
+            // leave it alone
+          }
+          /*
+           * replace a specialised term with a more general one
+           * with the same alias
+           */
+          // else if
+          // (synonym.equals(canonicalise(term.getDescription())))
+          else if (termIsA(found, term))
+          {
+            aliases.put(synonym, term);
+          }
+          else
+          {
+            ambiguous.add(synonym);
+          }
+        }
+      }
+      else
+      {
+        aliases.put(synonym, term);
+      }
+    }
+  }
+
+  /**
    * Converts a string to lower case and changes hyphens and spaces to
    * underscores
    * 
@@ -427,7 +469,7 @@ public class SequenceOntology extends OntologyBase
     {
       if (!termsNotFound.contains(term))
       {
-        System.err.println("SO term " + term + " invalid");
+        Cache.log.debug("SequenceOntology term " + term + " invalid");
         termsNotFound.add(term);
       }
     }
index 260b011..d21322f 100644 (file)
@@ -27,6 +27,7 @@ import static org.testng.Assert.assertNull;
 import static org.testng.Assert.assertSame;
 import static org.testng.Assert.assertTrue;
 
+import jalview.bin.Cache;
 import jalview.gui.JvOptionPane;
 
 import java.util.Arrays;
@@ -58,6 +59,7 @@ public class SequenceOntologyTest
   @BeforeClass(alwaysRun = true)
   public void setUp()
   {
+    Cache.initLogger();
     long now = System.currentTimeMillis();
     try
     {