1 package jalview.io.gff;
3 import java.util.ArrayList;
4 import java.util.Collections;
5 import java.util.HashMap;
10 * An implementation of SequenceOntologyI that hard codes terms of interest.
12 * Use this in unit testing by calling SequenceOntology.setInstance(new
13 * SequenceOntologyLite()).
15 * May also become a stand-in for SequenceOntology in the applet if we want to
16 * avoid the additional jars needed for parsing the full SO.
21 public class SequenceOntologyLite implements SequenceOntologyI
24 * initial selection of types of interest when processing Ensembl features
27 private final String[][] TERMS = new String[][] {
33 { "ncRNA_gene", "gene" },
34 { "snRNA_gene", "gene" },
37 * transcript sub-types:
39 { "transcript", "transcript" },
40 { "mature_transcript", "transcript" },
41 { "ncRNA", "transcript" },
42 { "snRNA", "transcript" },
43 { "aberrant_processed_transcript", "transcript" },
46 * sequence_variant sub-types:
48 { "sequence_variant", "sequence_variant" },
49 { "feature_variant", "sequence_variant" },
50 { "gene_variant", "sequence_variant" },
51 // NB Ensembl uses NMD_transcript_variant as if a 'transcript'
52 // but we model it here correctly as per the SO
53 { "NMD_transcript_variant", "sequence_variant" },
54 { "transcript_variant", "sequence_variant" },
55 { "structural_variant", "sequence_variant" },
58 * no sub-types of exon or CDS encountered in Ensembl
59 * a few added here for testing purposes
62 { "coding_exon", "exon" },
64 { "CDS_predicted", "CDS" },
67 * used in exonerate GFF
69 { "protein_match", "protein_match"},
70 { "nucleotide_match", "nucleotide_match"},
73 * used in InterProScan GFF
75 { "polypeptide", "polypeptide" }
80 * hard-coded list of any parents (direct or indirect)
81 * that we care about for a term
83 private Map<String, List<String>> parents;
85 private List<String> termsFound;
87 private List<String> termsNotFound;
89 public SequenceOntologyLite()
91 termsFound = new ArrayList<String>();
92 termsNotFound = new ArrayList<String>();
97 * Loads hard-coded data into a lookup table of {term, {list_of_parents}}
99 private void loadStaticData()
101 parents = new HashMap<String, List<String>>();
102 for (String [] pair : TERMS) {
103 List<String> p = parents.get(pair[0]);
106 p = new ArrayList<String>();
107 parents.put(pair[0], p);
114 * Answers true if 'child' isA 'parent' (including equality). In this
115 * implementation, based only on hard-coded values.
118 public boolean isA(String child, String parent)
120 if (child == null || parent == null)
124 if (child.equals(parent))
130 List<String> p = parents.get(child);
137 if (p.contains(parent))
145 * Records a valid term queried for, for reporting purposes
149 private void termFound(String term)
151 if (!termsFound.contains(term))
153 synchronized (termsFound)
155 termsFound.add(term);
161 * Records an invalid term queried for, for reporting purposes
165 private void termNotFound(String term)
167 synchronized (termsNotFound)
169 if (!termsNotFound.contains(term))
171 System.out.println("SO term " + term
172 + " not known - either invalid or needs modelled in "
173 + getClass().getName());
174 termsNotFound.add(term);
180 * Sorts (case-insensitive) and returns the list of valid terms queried for
183 public List<String> termsFound()
185 synchronized (termsFound)
187 Collections.sort(termsFound, String.CASE_INSENSITIVE_ORDER);
193 * Sorts (case-insensitive) and returns the list of invalid terms queried for
196 public List<String> termsNotFound()
198 synchronized (termsNotFound)
200 Collections.sort(termsNotFound, String.CASE_INSENSITIVE_ORDER);
201 return termsNotFound;