src/jalview/io/gff/SequenceOntologyLite.java

   1 package jalview.io.gff;
   2
   3 import java.util.ArrayList;
   4 import java.util.Collections;
   5 import java.util.HashMap;
   6 import java.util.List;
   7 import java.util.Map;
   8
   9 /**
  10  * An implementation of SequenceOntologyI that hard codes terms of interest.
  11  *
  12  * Use this in unit testing by calling SequenceOntology.setInstance(new
  13  * SequenceOntologyLite()).
  14  *
  15  * May also become a stand-in for SequenceOntology in the applet if we want to
  16  * avoid the additional jars needed for parsing the full SO.
  17  *
  18  * @author gmcarstairs
  19  *
  20  */
  21 public class SequenceOntologyLite implements SequenceOntologyI
  22 {
  23   /*
  24    * initial selection of types of interest when processing Ensembl features
  25    * NB unlike the full SequenceOntology we don't traverse indirect
  26    * child-parent relationships here so e.g. need to list every sub-type
  27    * of gene (direct or indirect) that is of interest
  28    */
  29   // @formatter:off
  30   private final String[][] TERMS = new String[][] {
  31
  32     /*
  33      * gene sub-types:
  34      */
  35     { "gene", "gene" },
  36     { "ncRNA_gene", "gene" },
  37     { "snRNA_gene", "gene" },
  38     { "miRNA_gene", "gene" },
  39     { "lincRNA_gene", "gene" },
  40     { "rRNA_gene", "gene" },
  41
  42     /*
  43      * transcript sub-types:
  44      */
  45     { "transcript", "transcript" },
  46     { "mature_transcript", "transcript" },
  47     { "processed_transcript", "transcript" },
  48     { "aberrant_processed_transcript", "transcript" },
  49     { "ncRNA", "transcript" },
  50     { "snRNA", "transcript" },
  51     { "miRNA", "transcript" },
  52     { "lincRNA", "transcript" },
  53     { "rRNA", "transcript" },
  54     { "mRNA", "transcript" },
  55     // there are many more sub-types of ncRNA...
  56
  57     /*
  58      * sequence_variant sub-types:
  59      */
  60     { "sequence_variant", "sequence_variant" },
  61     { "feature_variant", "sequence_variant" },
  62     { "gene_variant", "sequence_variant" },
  63     // NB Ensembl uses NMD_transcript_variant as if a 'transcript'
  64     // but we model it here correctly as per the SO
  65     { "NMD_transcript_variant", "sequence_variant" },
  66     { "transcript_variant", "sequence_variant" },
  67     { "structural_variant", "sequence_variant" },
  68
  69     /*
  70      * no sub-types of exon or CDS yet seen in Ensembl
  71      * some added here for testing purposes
  72      */
  73     { "exon", "exon" },
  74     { "coding_exon", "exon" },
  75     { "CDS", "CDS" },
  76     { "CDS_predicted", "CDS" },
  77
  78     /*
  79      * terms used in exonerate or PASA GFF
  80      */
  81     { "protein_match", "protein_match"},
  82     { "nucleotide_match", "nucleotide_match"},
  83     { "cDNA_match", "nucleotide_match"},
  84
  85     /*
  86      * used in InterProScan GFF
  87      */
  88     { "polypeptide", "polypeptide" }
  89   };
  90   // @formatter:on
  91
  92   /*
  93    * hard-coded list of any parents (direct or indirect)
  94    * that we care about for a term
  95    */
  96   private Map<String, List<String>> parents;
  97
  98   private List<String> termsFound;
  99
 100   private List<String> termsNotFound;
 101
 102   public SequenceOntologyLite()
 103   {
 104     termsFound = new ArrayList<String>();
 105     termsNotFound = new ArrayList<String>();
 106     loadStaticData();
 107   }
 108
 109   /**
 110    * Loads hard-coded data into a lookup table of {term, {list_of_parents}}
 111    */
 112   private void loadStaticData()
 113   {
 114     parents = new HashMap<String, List<String>>();
 115     for (String [] pair : TERMS) {
 116       List<String> p = parents.get(pair[0]);
 117       if (p == null)
 118       {
 119         p = new ArrayList<String>();
 120         parents.put(pair[0], p);
 121       }
 122       p.add(pair[1]);
 123     }
 124   }
 125
 126   /**
 127    * Answers true if 'child' isA 'parent' (including equality). In this
 128    * implementation, based only on hard-coded values.
 129    */
 130   @Override
 131   public boolean isA(String child, String parent)
 132   {
 133     if (child == null || parent == null)
 134     {
 135       return false;
 136     }
 137     if (child.equals(parent))
 138     {
 139       termFound(child);
 140       return true;
 141     }
 142
 143     List<String> p = parents.get(child);
 144     if (p == null)
 145     {
 146       termNotFound(child);
 147       return false;
 148     }
 149     termFound(child);
 150     if (p.contains(parent))
 151     {
 152       return true;
 153     }
 154     return false;
 155   }
 156
 157   /**
 158    * Records a valid term queried for, for reporting purposes
 159    *
 160    * @param term
 161    */
 162   private void termFound(String term)
 163   {
 164     if (!termsFound.contains(term))
 165     {
 166       synchronized (termsFound)
 167       {
 168         termsFound.add(term);
 169       }
 170     }
 171   }
 172
 173   /**
 174    * Records an invalid term queried for, for reporting purposes
 175    *
 176    * @param term
 177    */
 178   private void termNotFound(String term)
 179   {
 180     synchronized (termsNotFound)
 181     {
 182       if (!termsNotFound.contains(term))
 183       {
 184         System.out.println("SO term " + term
 185                 + " not known - add to model if needed in "
 186                 + getClass().getName());
 187         termsNotFound.add(term);
 188       }
 189     }
 190   }
 191
 192   /**
 193    * Sorts (case-insensitive) and returns the list of valid terms queried for
 194    */
 195   @Override
 196   public List<String> termsFound()
 197   {
 198     synchronized (termsFound)
 199     {
 200       Collections.sort(termsFound, String.CASE_INSENSITIVE_ORDER);
 201       return termsFound;
 202     }
 203   }
 204
 205   /**
 206    * Sorts (case-insensitive) and returns the list of invalid terms queried for
 207    */
 208   @Override
 209   public List<String> termsNotFound()
 210   {
 211     synchronized (termsNotFound)
 212     {
 213       Collections.sort(termsNotFound, String.CASE_INSENSITIVE_ORDER);
 214       return termsNotFound;
 215     }
 216   }
 217 }