src/jalview/io/gff/SequenceOntologyLite.java

   1 package jalview.io.gff;
   2
   3 import java.util.ArrayList;
   4 import java.util.Collections;
   5 import java.util.HashMap;
   6 import java.util.List;
   7 import java.util.Map;
   8
   9 /**
  10  * An implementation of SequenceOntologyI that hard codes terms of interest.
  11  *
  12  * Use this in unit testing by calling SequenceOntology.setInstance(new
  13  * SequenceOntologyLite()).
  14  *
  15  * May also become a stand-in for SequenceOntology in the applet if we want to
  16  * avoid the additional jars needed for parsing the full SO.
  17  *
  18  * @author gmcarstairs
  19  *
  20  */
  21 public class SequenceOntologyLite implements SequenceOntologyI
  22 {
  23   /*
  24    * initial selection of types of interest when processing Ensembl features
  25    * NB unlike the full SequenceOntology we don't traverse indirect
  26    * child-parent relationships here so e.g. need to list every sub-type
  27    * of gene (direct or indirect) that is of interest
  28    */
  29   // @formatter:off
  30   private final String[][] TERMS = new String[][] {
  31
  32     /*
  33      * gene sub-types:
  34      */
  35     { "gene", "gene" },
  36     { "ncRNA_gene", "gene" },
  37     { "snRNA_gene", "gene" },
  38     { "miRNA_gene", "gene" },
  39     { "lincRNA_gene", "gene" },
  40     { "rRNA_gene", "gene" },
  41
  42     /*
  43      * transcript sub-types:
  44      */
  45     { "transcript", "transcript" },
  46     { "mature_transcript", "transcript" },
  47     { "processed_transcript", "transcript" },
  48     { "aberrant_processed_transcript", "transcript" },
  49     { "ncRNA", "transcript" },
  50     { "snRNA", "transcript" },
  51     { "miRNA", "transcript" },
  52     { "lincRNA", "transcript" },
  53     { "rRNA", "transcript" },
  54     // there are many more sub-types of ncRNA...
  55
  56     /*
  57      * sequence_variant sub-types:
  58      */
  59     { "sequence_variant", "sequence_variant" },
  60     { "feature_variant", "sequence_variant" },
  61     { "gene_variant", "sequence_variant" },
  62     // NB Ensembl uses NMD_transcript_variant as if a 'transcript'
  63     // but we model it here correctly as per the SO
  64     { "NMD_transcript_variant", "sequence_variant" },
  65     { "transcript_variant", "sequence_variant" },
  66     { "structural_variant", "sequence_variant" },
  67
  68     /*
  69      * no sub-types of exon or CDS yet seen in Ensembl
  70      * some added here for testing purposes
  71      */
  72     { "exon", "exon" },
  73     { "coding_exon", "exon" },
  74     { "CDS", "CDS" },
  75     { "CDS_predicted", "CDS" },
  76
  77     /*
  78      * terms used in exonerate or PASA GFF
  79      */
  80     { "protein_match", "protein_match"},
  81     { "nucleotide_match", "nucleotide_match"},
  82     { "cDNA_match", "nucleotide_match"},
  83
  84     /*
  85      * used in InterProScan GFF
  86      */
  87     { "polypeptide", "polypeptide" }
  88   };
  89   // @formatter:on
  90
  91   /*
  92    * hard-coded list of any parents (direct or indirect)
  93    * that we care about for a term
  94    */
  95   private Map<String, List<String>> parents;
  96
  97   private List<String> termsFound;
  98
  99   private List<String> termsNotFound;
 100
 101   public SequenceOntologyLite()
 102   {
 103     termsFound = new ArrayList<String>();
 104     termsNotFound = new ArrayList<String>();
 105     loadStaticData();
 106   }
 107
 108   /**
 109    * Loads hard-coded data into a lookup table of {term, {list_of_parents}}
 110    */
 111   private void loadStaticData()
 112   {
 113     parents = new HashMap<String, List<String>>();
 114     for (String [] pair : TERMS) {
 115       List<String> p = parents.get(pair[0]);
 116       if (p == null)
 117       {
 118         p = new ArrayList<String>();
 119         parents.put(pair[0], p);
 120       }
 121       p.add(pair[1]);
 122     }
 123   }
 124
 125   /**
 126    * Answers true if 'child' isA 'parent' (including equality). In this
 127    * implementation, based only on hard-coded values.
 128    */
 129   @Override
 130   public boolean isA(String child, String parent)
 131   {
 132     if (child == null || parent == null)
 133     {
 134       return false;
 135     }
 136     if (child.equals(parent))
 137     {
 138       termFound(child);
 139       return true;
 140     }
 141
 142     List<String> p = parents.get(child);
 143     if (p == null)
 144     {
 145       termNotFound(child);
 146       return false;
 147     }
 148     termFound(child);
 149     if (p.contains(parent))
 150     {
 151       return true;
 152     }
 153     return false;
 154   }
 155
 156   /**
 157    * Records a valid term queried for, for reporting purposes
 158    *
 159    * @param term
 160    */
 161   private void termFound(String term)
 162   {
 163     if (!termsFound.contains(term))
 164     {
 165       synchronized (termsFound)
 166       {
 167         termsFound.add(term);
 168       }
 169     }
 170   }
 171
 172   /**
 173    * Records an invalid term queried for, for reporting purposes
 174    *
 175    * @param term
 176    */
 177   private void termNotFound(String term)
 178   {
 179     synchronized (termsNotFound)
 180     {
 181       if (!termsNotFound.contains(term))
 182       {
 183         System.out.println("SO term " + term
 184                 + " not known - may be invalid, or model if needed in "
 185                 + getClass().getName());
 186         termsNotFound.add(term);
 187       }
 188     }
 189   }
 190
 191   /**
 192    * Sorts (case-insensitive) and returns the list of valid terms queried for
 193    */
 194   @Override
 195   public List<String> termsFound()
 196   {
 197     synchronized (termsFound)
 198     {
 199       Collections.sort(termsFound, String.CASE_INSENSITIVE_ORDER);
 200       return termsFound;
 201     }
 202   }
 203
 204   /**
 205    * Sorts (case-insensitive) and returns the list of invalid terms queried for
 206    */
 207   @Override
 208   public List<String> termsNotFound()
 209   {
 210     synchronized (termsNotFound)
 211     {
 212       Collections.sort(termsNotFound, String.CASE_INSENSITIVE_ORDER);
 213       return termsNotFound;
 214     }
 215   }
 216 }