1 package jalview.io.gff;
3 import java.util.ArrayList;
4 import java.util.Collections;
5 import java.util.HashMap;
10 * An implementation of SequenceOntologyI that hard codes terms of interest.
12 * Use this in unit testing by calling SequenceOntology.setInstance(new
13 * SequenceOntologyLite()).
15 * May also become a stand-in for SequenceOntology in the applet if we want to
16 * avoid the additional jars needed for parsing the full SO.
21 public class SequenceOntologyLite implements SequenceOntologyI
24 * initial selection of types of interest when processing Ensembl features
25 * NB unlike the full SequenceOntology we don't traverse indirect
26 * child-parent relationships here so e.g. need to list every sub-type
27 * of gene (direct or indirect) that is of interest
30 private final String[][] TERMS = new String[][] {
36 { "ncRNA_gene", "gene" },
37 { "snRNA_gene", "gene" },
38 { "miRNA_gene", "gene" },
39 { "lincRNA_gene", "gene" },
40 { "rRNA_gene", "gene" },
43 * transcript sub-types:
45 { "transcript", "transcript" },
46 { "mature_transcript", "transcript" },
47 { "processed_transcript", "transcript" },
48 { "aberrant_processed_transcript", "transcript" },
49 { "ncRNA", "transcript" },
50 { "snRNA", "transcript" },
51 { "miRNA", "transcript" },
52 { "lincRNA", "transcript" },
53 { "rRNA", "transcript" },
54 { "mRNA", "transcript" },
55 // there are many more sub-types of ncRNA...
58 * sequence_variant sub-types:
60 { "sequence_variant", "sequence_variant" },
61 { "feature_variant", "sequence_variant" },
62 { "gene_variant", "sequence_variant" },
63 // NB Ensembl uses NMD_transcript_variant as if a 'transcript'
64 // but we model it here correctly as per the SO
65 { "NMD_transcript_variant", "sequence_variant" },
66 { "transcript_variant", "sequence_variant" },
67 { "structural_variant", "sequence_variant" },
70 * no sub-types of exon or CDS yet seen in Ensembl
71 * some added here for testing purposes
74 { "coding_exon", "exon" },
76 { "CDS_predicted", "CDS" },
79 * terms used in exonerate or PASA GFF
81 { "protein_match", "protein_match"},
82 { "nucleotide_match", "nucleotide_match"},
83 { "cDNA_match", "nucleotide_match"},
86 * used in InterProScan GFF
88 { "polypeptide", "polypeptide" }
93 * hard-coded list of any parents (direct or indirect)
94 * that we care about for a term
96 private Map<String, List<String>> parents;
98 private List<String> termsFound;
100 private List<String> termsNotFound;
102 public SequenceOntologyLite()
104 termsFound = new ArrayList<String>();
105 termsNotFound = new ArrayList<String>();
110 * Loads hard-coded data into a lookup table of {term, {list_of_parents}}
112 private void loadStaticData()
114 parents = new HashMap<String, List<String>>();
115 for (String[] pair : TERMS)
117 List<String> p = parents.get(pair[0]);
120 p = new ArrayList<String>();
121 parents.put(pair[0], p);
128 * Answers true if 'child' isA 'parent' (including equality). In this
129 * implementation, based only on hard-coded values.
132 public boolean isA(String child, String parent)
134 if (child == null || parent == null)
138 if (child.equals(parent))
144 List<String> p = parents.get(child);
151 if (p.contains(parent))
159 * Records a valid term queried for, for reporting purposes
163 private void termFound(String term)
165 if (!termsFound.contains(term))
167 synchronized (termsFound)
169 termsFound.add(term);
175 * Records an invalid term queried for, for reporting purposes
179 private void termNotFound(String term)
181 synchronized (termsNotFound)
183 if (!termsNotFound.contains(term))
185 // suppress logging here as it reports Uniprot sequence features
186 // (which do not use SO terms) when auto-configuring feature colours
187 // System.out.println("SO term " + term
188 // + " not known - add to model if needed in "
189 // + getClass().getName());
190 termsNotFound.add(term);
196 * Sorts (case-insensitive) and returns the list of valid terms queried for
199 public List<String> termsFound()
201 synchronized (termsFound)
203 Collections.sort(termsFound, String.CASE_INSENSITIVE_ORDER);
209 * Sorts (case-insensitive) and returns the list of invalid terms queried for
212 public List<String> termsNotFound()
214 synchronized (termsNotFound)
216 Collections.sort(termsNotFound, String.CASE_INSENSITIVE_ORDER);
217 return termsNotFound;