*/
package jalview.ext.so;
+import jalview.bin.Cache;
import jalview.datamodel.ontology.OntologyBase;
import jalview.io.gff.SequenceOntologyI;
boolean oldTermIsObsolete = isObsolete(replaced);
if (newTermIsObsolete && !oldTermIsObsolete)
{
- System.err.println("Ignoring " + term.getName()
+ Cache.log.debug("SequenceOntology ignoring " + term.getName()
+ " as obsolete and duplicated by "
+ replaced.getName());
term = replaced;
}
else if (!newTermIsObsolete && oldTermIsObsolete)
{
- System.err.println("Ignoring " + replaced.getName()
+ Cache.log.debug("SequenceOntology ignoring "
+ + replaced.getName()
+ " as obsolete and duplicated by " + term.getName());
}
else
{
- System.err.println("Warning: " + term.getName()
+ Cache.log.debug("SequenceOntology warning: " + term.getName()
+ " has replaced " + replaced.getName()
+ " for lookup of '" + description + "'");
}
*/
if (!newTermIsObsolete)
{
- for (Object syn : term.getSynonyms())
- {
- String name = ((Synonym) syn).getName();
- String synonym = canonicalise(name);
- if (aliases.containsKey(synonym))
- {
- final Term found = aliases.get(synonym);
- if (found != term)
- {
- /*
- * this alias is ambiguous - matches description,
- * or an alias, of another term
- */
- String msg = String.format(
- "Ambiguous synonym %s for '%s:%s' and '%s:%s'",
- synonym, term.getName(), term.getDescription(),
- found.getName(), found.getDescription());
- System.err.println(msg);
-
- /*
- * preserve any entry whose canonical description happens to match
- * a synonym (NMD_transcript is a valid description, and also
- * a synonym for NMD_transcript_variant)
- * also preserve a parent (more general) term
- */
- if (synonym.equals(canonicalise(found.getDescription()))
- || termIsA(term, found))
- {
- // leave it alone
- }
- /*
- * replace a specialised term with a more general one
- * with the same alias
- */
- // else if
- // (synonym.equals(canonicalise(term.getDescription())))
- else if (termIsA(found, term))
- {
- aliases.put(synonym, term);
- }
- else
- {
- ambiguous.add(synonym);
- }
- }
- }
- else
- {
- aliases.put(synonym, term);
- }
- }
+ storeSynonymsForTerm(term, ambiguous);
}
}
}
}
/**
+ * Stores any synonyms as an alternative lookup for the term, canonicalised
+ * for case/hyphen/space insensitivity on lookup.
+ * <p>
+ * Some synonyms may be ambiguous (present for more than one term), and these
+ * are handled as follows:
+ * <ul>
+ * <li>if a synonym matches the <em>description</em> of another term, it is
+ * not saved, so that a term can always be found by description
+ * <ul>
+ * <li>Example: {@code nmd_transcript} is the description for
+ * {@code NMD_transcript} and also a synonym for
+ * {@code NMD_transcript_variant} - the synonym is ignored</li>
+ * </ul>
+ * </li>
+ * <li>if one term is a sub-term (directly or indirectly) of the other, the
+ * synonym is retained for the more general term
+ * <ul>
+ * <li>Example: {@code helix} is a synonym for
+ * {@code alpha_helix, right_handed_peptide_helix, peptide_helix} - it is kept
+ * for {@code peptide_helix} as this is a parent of the other terms</li>
+ * </ul>
+ * </li>
+ * <li>otherwise the synonym is added to the {@code ambiguous} list for
+ * removal
+ * <ul>
+ * <li>Example: {@code sequence variation} is a synonym for
+ * {@code sequence_alteration} and {@code alternate_sequence_site} but these
+ * have no {@code isA} relationship - the synonym is ignored as ambiguous</li>
+ * </ul>
+ * </ul>
+ *
+ * @param term
+ * @param ambiguous
+ */
+ void storeSynonymsForTerm(Term term, Set<String> ambiguous)
+ {
+ for (Object syn : term.getSynonyms())
+ {
+ String name = ((Synonym) syn).getName();
+ String synonym = canonicalise(name);
+ if (aliases.containsKey(synonym))
+ {
+ final Term found = aliases.get(synonym);
+ if (found != term)
+ {
+ /*
+ * this alias is ambiguous - matches description,
+ * or an alias, of another term
+ */
+ String msg = String.format(
+ "SequenceOntology ambiguous synonym %s for '%s:%s' and '%s:%s'",
+ synonym, term.getName(), term.getDescription(),
+ found.getName(), found.getDescription());
+ Cache.log.debug(msg);
+
+ /*
+ * preserve any entry whose canonical description happens to match
+ * a synonym (NMD_transcript is a valid description, and also
+ * a synonym for NMD_transcript_variant)
+ * also preserve a parent (more general) term
+ */
+ if (synonym.equals(canonicalise(found.getDescription()))
+ || termIsA(term, found))
+ {
+ // leave it alone
+ }
+ /*
+ * replace a specialised term with a more general one
+ * with the same alias
+ */
+ // else if
+ // (synonym.equals(canonicalise(term.getDescription())))
+ else if (termIsA(found, term))
+ {
+ aliases.put(synonym, term);
+ }
+ else
+ {
+ ambiguous.add(synonym);
+ }
+ }
+ }
+ else
+ {
+ aliases.put(synonym, term);
+ }
+ }
+ }
+
+ /**
* Converts a string to lower case and changes hyphens and spaces to
* underscores
*
{
if (!termsNotFound.contains(term))
{
- System.err.println("SO term " + term + " invalid");
+ Cache.log.debug("SequenceOntology term " + term + " invalid");
termsNotFound.add(term);
}
}