1 package jalview.io.gff;
3 import java.util.ArrayList;
4 import java.util.Collections;
5 import java.util.HashMap;
10 * An implementation of SequenceOntologyI that hard codes terms of interest.
12 * Use this in unit testing by calling SequenceOntology.setInstance(new
13 * SequenceOntologyLite()).
15 * May also become a stand-in for SequenceOntology in the applet if we want to
16 * avoid the additional jars needed for parsing the full SO.
21 public class SequenceOntologyLite implements SequenceOntologyI
24 * initial selection of types of interest when processing Ensembl features
27 private final String[][] TERMS = new String[][] {
33 { "ncRNA_gene", "gene" },
34 { "snRNA_gene", "gene" },
37 * transcript sub-types:
39 { "transcript", "transcript" },
40 { "mature_transcript", "transcript" },
41 { "ncRNA", "transcript" },
42 { "snRNA", "transcript" },
43 { "aberrant_processed_transcript", "transcript" },
46 * sequence_variant sub-types:
48 { "sequence_variant", "sequence_variant" },
49 { "feature_variant", "sequence_variant" },
50 { "gene_variant", "sequence_variant" },
51 // NB Ensembl uses NMD_transcript_variant as if a 'transcript'
52 // but we model it here correctly as per the SO
53 { "NMD_transcript_variant", "sequence_variant" },
54 { "transcript_variant", "sequence_variant" },
55 { "structural_variant", "sequence_variant" },
58 * no sub-types of exon or CDS yet encountered; add if needed
64 * used in exonerate GFF
66 { "protein_match", "protein_match"},
67 { "nucleotide_match", "nucleotide_match"},
70 * used in InterProScan GFF
72 { "polypeptide", "polypeptide" }
77 * hard-coded list of any parents (direct or indirect)
78 * that we care about for a term
80 private Map<String, List<String>> parents;
82 private List<String> termsFound;
84 private List<String> termsNotFound;
86 public SequenceOntologyLite()
88 termsFound = new ArrayList<String>();
89 termsNotFound = new ArrayList<String>();
94 * Loads hard-coded data into a lookup table of {term, {list_of_parents}}
96 private void loadStaticData()
98 parents = new HashMap<String, List<String>>();
99 for (String [] pair : TERMS) {
100 List<String> p = parents.get(pair[0]);
103 p = new ArrayList<String>();
104 parents.put(pair[0], p);
111 * Answers true if 'child' isA 'parent' (including equality). In this
112 * implementation, based only on hard-coded values.
115 public boolean isA(String child, String parent)
117 if (child == null || parent == null)
121 if (child.equals(parent))
127 List<String> p = parents.get(child);
134 if (p.contains(parent))
142 * Records a valid term queried for, for reporting purposes
146 private void termFound(String term)
148 if (!termsFound.contains(term))
150 synchronized (termsFound)
152 termsFound.add(term);
158 * Records an invalid term queried for, for reporting purposes
162 private void termNotFound(String term)
164 synchronized (termsNotFound)
166 if (!termsNotFound.contains(term))
168 System.out.println("SO term " + term
169 + " not known - either invalid or needs modelled in "
170 + getClass().getName());
171 termsNotFound.add(term);
177 * Sorts (case-insensitive) and returns the list of valid terms queried for
180 public List<String> termsFound()
182 synchronized (termsFound)
184 Collections.sort(termsFound, String.CASE_INSENSITIVE_ORDER);
190 * Sorts (case-insensitive) and returns the list of invalid terms queried for
193 public List<String> termsNotFound()
195 synchronized (termsNotFound)
197 Collections.sort(termsNotFound, String.CASE_INSENSITIVE_ORDER);
198 return termsNotFound;