package jalview.io.gff; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; /** * An implementation of SequenceOntologyI that hard codes terms of interest. * * Use this in unit testing by calling SequenceOntology.setInstance(new * SequenceOntologyLite()). * * May also become a stand-in for SequenceOntology in the applet if we want to * avoid the additional jars needed for parsing the full SO. * * @author gmcarstairs * */ public class SequenceOntologyLite implements SequenceOntologyI { /* * initial selection of types of interest when processing Ensembl features * NB unlike the full SequenceOntology we don't traverse indirect * child-parent relationships here so e.g. need to list every sub-type * of gene (direct or indirect) that is of interest */ // @formatter:off private final String[][] TERMS = new String[][] { /* * gene sub-types: */ { "gene", "gene" }, { "ncRNA_gene", "gene" }, { "snRNA_gene", "gene" }, { "miRNA_gene", "gene" }, { "lincRNA_gene", "gene" }, { "rRNA_gene", "gene" }, /* * transcript sub-types: */ { "transcript", "transcript" }, { "mature_transcript", "transcript" }, { "processed_transcript", "transcript" }, { "aberrant_processed_transcript", "transcript" }, { "ncRNA", "transcript" }, { "snRNA", "transcript" }, { "miRNA", "transcript" }, { "lincRNA", "transcript" }, { "rRNA", "transcript" }, { "mRNA", "transcript" }, // there are many more sub-types of ncRNA... /* * sequence_variant sub-types: */ { "sequence_variant", "sequence_variant" }, { "feature_variant", "sequence_variant" }, { "gene_variant", "sequence_variant" }, // NB Ensembl uses NMD_transcript_variant as if a 'transcript' // but we model it here correctly as per the SO { "NMD_transcript_variant", "sequence_variant" }, { "transcript_variant", "sequence_variant" }, { "structural_variant", "sequence_variant" }, /* * no sub-types of exon or CDS yet seen in Ensembl * some added here for testing purposes */ { "exon", "exon" }, { "coding_exon", "exon" }, { "CDS", "CDS" }, { "CDS_predicted", "CDS" }, /* * terms used in exonerate or PASA GFF */ { "protein_match", "protein_match"}, { "nucleotide_match", "nucleotide_match"}, { "cDNA_match", "nucleotide_match"}, /* * used in InterProScan GFF */ { "polypeptide", "polypeptide" } }; // @formatter:on /* * hard-coded list of any parents (direct or indirect) * that we care about for a term */ private Map> parents; private List termsFound; private List termsNotFound; public SequenceOntologyLite() { termsFound = new ArrayList(); termsNotFound = new ArrayList(); loadStaticData(); } /** * Loads hard-coded data into a lookup table of {term, {list_of_parents}} */ private void loadStaticData() { parents = new HashMap>(); for (String[] pair : TERMS) { List p = parents.get(pair[0]); if (p == null) { p = new ArrayList(); parents.put(pair[0], p); } p.add(pair[1]); } } /** * Answers true if 'child' isA 'parent' (including equality). In this * implementation, based only on hard-coded values. */ @Override public boolean isA(String child, String parent) { if (child == null || parent == null) { return false; } if (child.equals(parent)) { termFound(child); return true; } List p = parents.get(child); if (p == null) { termNotFound(child); return false; } termFound(child); if (p.contains(parent)) { return true; } return false; } /** * Records a valid term queried for, for reporting purposes * * @param term */ private void termFound(String term) { if (!termsFound.contains(term)) { synchronized (termsFound) { termsFound.add(term); } } } /** * Records an invalid term queried for, for reporting purposes * * @param term */ private void termNotFound(String term) { synchronized (termsNotFound) { if (!termsNotFound.contains(term)) { // suppress logging here as it reports Uniprot sequence features // (which do not use SO terms) when auto-configuring feature colours // System.out.println("SO term " + term // + " not known - add to model if needed in " // + getClass().getName()); termsNotFound.add(term); } } } /** * Sorts (case-insensitive) and returns the list of valid terms queried for */ @Override public List termsFound() { synchronized (termsFound) { Collections.sort(termsFound, String.CASE_INSENSITIVE_ORDER); return termsFound; } } /** * Sorts (case-insensitive) and returns the list of invalid terms queried for */ @Override public List termsNotFound() { synchronized (termsNotFound) { Collections.sort(termsNotFound, String.CASE_INSENSITIVE_ORDER); return termsNotFound; } } }