JAL-1705 JAL-1191 SequenceOntologyLite added as hard-coded alternative

author gmungoc <g.m.carstairs@dundee.ac.uk>

Mon, 1 Feb 2016 10:17:49 +0000 (10:17 +0000)

committer gmungoc <g.m.carstairs@dundee.ac.uk>

Mon, 1 Feb 2016 10:17:49 +0000 (10:17 +0000)
author gmungoc <g.m.carstairs@dundee.ac.uk>
Mon, 1 Feb 2016 10:17:49 +0000 (10:17 +0000)
committer gmungoc <g.m.carstairs@dundee.ac.uk>
Mon, 1 Feb 2016 10:17:49 +0000 (10:17 +0000)
diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java

index 41538eb..34eaa60 100644 (file)
--- a/src/jalview/analysis/AlignmentUtils.java
+++ b/src/jalview/analysis/AlignmentUtils.java
@@ -34,7 +34,8 @@ import jalview.datamodel.Sequence;
  import jalview.datamodel.SequenceFeature;
  import jalview.datamodel.SequenceGroup;
  import jalview.datamodel.SequenceI;
-import jalview.io.gff.SequenceOntology;
+import jalview.io.gff.SequenceOntologyFactory;
+import jalview.io.gff.SequenceOntologyI;
  import jalview.schemes.ResidueProperties;
  import jalview.util.DBRefUtils;
  import jalview.util.MapList;
@@ -1435,7 +1436,7 @@ public class AlignmentUtils
        copyTo = copyTo.getDatasetSequence();
      }
  
-    SequenceOntology so = SequenceOntology.getInstance();
+    SequenceOntologyI so = SequenceOntologyFactory.getInstance();
      int count = 0;
      SequenceFeature[] sfs = fromSeq.getSequenceFeatures();
      if (sfs != null)
diff --git a/src/jalview/ext/ensembl/EnsemblCdna.java b/src/jalview/ext/ensembl/EnsemblCdna.java

index 139e44f..373286f 100644 (file)
--- a/src/jalview/ext/ensembl/EnsemblCdna.java
+++ b/src/jalview/ext/ensembl/EnsemblCdna.java
@@ -1,7 +1,8 @@
  package jalview.ext.ensembl;
  
  import jalview.datamodel.SequenceFeature;
-import jalview.io.gff.SequenceOntology;
+import jalview.io.gff.SequenceOntologyFactory;
+import jalview.io.gff.SequenceOntologyI;
  
  import java.util.List;
  
@@ -68,8 +69,8 @@ public class EnsemblCdna extends EnsemblSeqProxy
    @Override
    protected boolean identifiesSequence(SequenceFeature sf, String accId)
    {
-    if (SequenceOntology.getInstance().isA(sf.getType(),
-            SequenceOntology.EXON))
+    if (SequenceOntologyFactory.getInstance().isA(sf.getType(),
+            SequenceOntologyI.EXON))
      {
        String parentFeature = (String) sf.getValue(PARENT);
        if (("transcript:" + accId).equals(parentFeature))
diff --git a/src/jalview/ext/ensembl/EnsemblCds.java b/src/jalview/ext/ensembl/EnsemblCds.java

index 22c0a06..ec5780f 100644 (file)
--- a/src/jalview/ext/ensembl/EnsemblCds.java
+++ b/src/jalview/ext/ensembl/EnsemblCds.java
@@ -2,7 +2,8 @@ package jalview.ext.ensembl;
  
  import jalview.datamodel.SequenceFeature;
  import jalview.datamodel.SequenceI;
-import jalview.io.gff.SequenceOntology;
+import jalview.io.gff.SequenceOntologyFactory;
+import jalview.io.gff.SequenceOntologyI;
  
  import java.util.List;
  
@@ -51,8 +52,8 @@ public class EnsemblCds extends EnsemblSeqProxy
    @Override
    protected boolean retainFeature(SequenceFeature sf, String accessionId)
    {
-    if (SequenceOntology.getInstance().isA(sf.getType(),
-            SequenceOntology.CDS))
+    if (SequenceOntologyFactory.getInstance().isA(sf.getType(),
+            SequenceOntologyI.CDS))
      {
        return false;
      }
@@ -67,8 +68,8 @@ public class EnsemblCds extends EnsemblSeqProxy
    @Override
    protected boolean identifiesSequence(SequenceFeature sf, String accId)
    {
-    if (SequenceOntology.getInstance().isA(sf.getType(),
-            SequenceOntology.CDS))
+    if (SequenceOntologyFactory.getInstance().isA(sf.getType(),
+            SequenceOntologyI.CDS))
      {
        String parentFeature = (String) sf.getValue(PARENT);
        if (("transcript:" + accId).equals(parentFeature))
diff --git a/src/jalview/ext/ensembl/EnsemblGene.java b/src/jalview/ext/ensembl/EnsemblGene.java

index 1325bec..df246f8 100644 (file)
--- a/src/jalview/ext/ensembl/EnsemblGene.java
+++ b/src/jalview/ext/ensembl/EnsemblGene.java
@@ -4,7 +4,8 @@ import jalview.datamodel.AlignmentI;
  import jalview.datamodel.Sequence;
  import jalview.datamodel.SequenceFeature;
  import jalview.datamodel.SequenceI;
-import jalview.io.gff.SequenceOntology;
+import jalview.io.gff.SequenceOntologyFactory;
+import jalview.io.gff.SequenceOntologyI;
  import jalview.util.MapList;
  
  import java.util.ArrayList;
@@ -132,10 +133,10 @@ public class EnsemblGene extends EnsemblSeqProxy
       */
      String parentId = "transcript:" + accId;
      List<SequenceFeature> splices = findFeatures(gene,
-            SequenceOntology.EXON, parentId);
+            SequenceOntologyI.EXON, parentId);
      if (splices.isEmpty())
      {
-      splices = findFeatures(gene, SequenceOntology.CDS, parentId);
+      splices = findFeatures(gene, SequenceOntologyI.CDS, parentId);
      }
  
      int transcriptLength = 0;
@@ -176,7 +177,7 @@ public class EnsemblGene extends EnsemblSeqProxy
      /*
       * and finally fetch the protein product and save as a cross-reference
       */
-    addProteinProduct(transcript);
+    new EnsemblCdna().addProteinProduct(transcript);
  
      return transcript;
    }
@@ -240,8 +241,8 @@ public class EnsemblGene extends EnsemblSeqProxy
    @Override
    protected boolean identifiesSequence(SequenceFeature sf, String accId)
    {
-    if (SequenceOntology.getInstance().isA(sf.getType(),
-            SequenceOntology.GENE))
+    if (SequenceOntologyFactory.getInstance().isA(sf.getType(),
+            SequenceOntologyI.GENE))
      {
        String id = (String) sf.getValue(ID);
        if (("gene:" + accId).equals(id))
@@ -262,8 +263,8 @@ public class EnsemblGene extends EnsemblSeqProxy
    @Override
    protected boolean retainFeature(SequenceFeature sf, String accessionId)
    {
-    if (SequenceOntology.getInstance().isA(sf.getType(),
-            SequenceOntology.GENE))
+    if (SequenceOntologyFactory.getInstance().isA(sf.getType(),
+            SequenceOntologyI.GENE))
      {
        return false;
      }
@@ -299,4 +300,13 @@ public class EnsemblGene extends EnsemblSeqProxy
      return super.getCrossReferenceDatabases();
    }
  
+  /**
+   * Override to do nothing as Ensembl doesn't return a protein sequence for a
+   * gene identifier
+   */
+  @Override
+  protected void addProteinProduct(SequenceI querySeq)
+  {
+  }
+
  }
diff --git a/src/jalview/ext/ensembl/EnsemblOverlap.java b/src/jalview/ext/ensembl/EnsemblOverlap.java

index b1514d8..507b6f8 100644 (file)
--- a/src/jalview/ext/ensembl/EnsemblOverlap.java
+++ b/src/jalview/ext/ensembl/EnsemblOverlap.java
@@ -42,14 +42,11 @@ public class EnsemblOverlap extends EnsemblRestClient
    @Override
    public AlignmentI getSequenceRecords(String query) throws IOException
    {
-    long now = System.currentTimeMillis();
      // TODO: use a vararg String... for getSequenceRecords instead?
      List<String> queries = new ArrayList<String>();
      queries.add(query);
      FileParse fp = getSequenceReader(queries);
      FeaturesFile fr = new FeaturesFile(fp);
-    System.out.println(getClass().getName() + " took "
-            + (System.currentTimeMillis() - now) + "ms to fetch");
      return new Alignment(fr.getSeqsAsArray());
    }
  
diff --git a/src/jalview/ext/ensembl/EnsemblRestClient.java b/src/jalview/ext/ensembl/EnsemblRestClient.java

index 2fd7fa3..dc4cc88 100644 (file)
--- a/src/jalview/ext/ensembl/EnsemblRestClient.java
+++ b/src/jalview/ext/ensembl/EnsemblRestClient.java
@@ -140,6 +140,7 @@ abstract class EnsemblRestClient extends EnsemblSequenceFetcher
    protected BufferedReader getHttpResponse(URL url, List<String> ids)
            throws IOException
    {
+    long now = System.currentTimeMillis();
      HttpURLConnection connection = (HttpURLConnection) url.openConnection();
    
      /*
@@ -175,6 +176,8 @@ abstract class EnsemblRestClient extends EnsemblSequenceFetcher
                "Response code was not 200. Detected response was "
                        + responseCode);
      }
+    System.out.println(getClass().getName() + " took "
+            + (System.currentTimeMillis() - now) + "ms to fetch");
    
      BufferedReader reader = null;
      reader = new BufferedReader(new InputStreamReader(response, "UTF-8"));
diff --git a/src/jalview/ext/ensembl/EnsemblSeqProxy.java b/src/jalview/ext/ensembl/EnsemblSeqProxy.java

index 0bfeda1..744aa49 100644 (file)
--- a/src/jalview/ext/ensembl/EnsemblSeqProxy.java
+++ b/src/jalview/ext/ensembl/EnsemblSeqProxy.java
@@ -11,7 +11,8 @@ import jalview.datamodel.SequenceI;
  import jalview.exceptions.JalviewException;
  import jalview.io.FastaFile;
  import jalview.io.FileParse;
-import jalview.io.gff.SequenceOntology;
+import jalview.io.gff.SequenceOntologyFactory;
+import jalview.io.gff.SequenceOntologyI;
  import jalview.schemes.ResidueProperties;
  import jalview.util.DBRefUtils;
  import jalview.util.MapList;
@@ -127,7 +128,6 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
    @Override
    public AlignmentI getSequenceRecords(String query) throws Exception
    {
-    long now = System.currentTimeMillis();
      // TODO use a String... query vararg instead?
  
      // danger: accession separator used as a regex here, a string elsewhere
@@ -156,14 +156,15 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
                  + " chunks. Unexpected problem (" + r.getLocalizedMessage()
                  + ")";
          System.err.println(msg);
-        if (alignment != null)
-        {
-          break; // return what we got
-        }
-        else
-        {
-          throw new JalviewException(msg, r);
-        }
+        break;
+        // if (alignment != null)
+        // {
+        // break; // return what we got
+        // }
+        // else
+        // {
+        // throw new JalviewException(msg, r);
+        // }
        }
      }
  
@@ -181,8 +182,6 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
        getCrossReferences(seq);
      }
  
-    System.out.println(getClass().getName() + " took "
-            + (System.currentTimeMillis() - now) + "ms to fetch");
      return alignment;
    }
  
@@ -368,11 +367,12 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
    }
  
    /**
-   * Adds CDS ranges to the ranges list, and returns the total length mapped.
+   * Adds CDS ranges to the ranges list, and returns the total length mapped
+   * from.
     * 
-   * No need to worry about reverse strand dna here since the retrieved sequence
-   * is as transcribed (reverse complement for reverse strand), i.e in the same
-   * sense as the peptide.
+   * No need to worry about reverse strand dna, here since the retrieved
+   * sequence is as transcribed (reverse complement for reverse strand), i.e in
+   * the same sense as the peptide.
     * 
     * @param dnaSeq
     * @param ranges
@@ -391,7 +391,8 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
        /*
         * process a CDS feature (or a sub-type of CDS)
         */
-      if (SequenceOntology.getInstance().isA(sf.getType(), SequenceOntology.CDS))
+      if (SequenceOntologyFactory.getInstance().isA(sf.getType(),
+              SequenceOntologyI.CDS))
        {
          int phase = 0;
          try {
@@ -579,7 +580,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
     *          the start position of the sequence we are mapping to
     * @return
     */
-  protected MapList getGenomicRanges(SequenceI sourceSequence,
+  protected MapList getGenomicRangesFromFeatures(SequenceI sourceSequence,
            String accId, int start)
    {
      SequenceFeature[] sfs = sourceSequence.getSequenceFeatures();
@@ -605,11 +606,12 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
         */
        if (identifiesSequence(sf, accId))
        {
-          int strand = sf.getStrand();
-  
-          if (directionSet && strand != direction)
-          {
-            // abort - mix of forward and backward
+        int strand = sf.getStrand();
+        strand = strand == 0 ? 1 : strand; // treat unknown as forward
+
+        if (directionSet && strand != direction)
+        {
+          // abort - mix of forward and backward
            System.err.println("Error: forward and backward strand for "
                    + accId);
              return null;
@@ -654,8 +656,8 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
       */
      Collections.sort(regions, new RangeSorter(direction == 1));
    
-    List<int[]> to = new ArrayList<int[]>();
-    to.add(new int[] { start, start + mappedLength - 1 });
+    List<int[]> to = Arrays.asList(new int[] { start,
+        start + mappedLength - 1 });
    
      return new MapList(regions, to, 1, 1);
    }
@@ -710,7 +712,8 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
        /*
         * for sequence_variant, make an additional feature with consequence
         */
-      if (SequenceOntology.getInstance().isSequenceVariant(sf.getType()))
+      if (SequenceOntologyFactory.getInstance().isA(sf.getType(),
+              SequenceOntologyI.SEQUENCE_VARIANT))
        {
          String consequence = (String) sf.getValue(CONSEQUENCE_TYPE);
          if (consequence != null)
@@ -741,7 +744,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
      }
  
      SequenceFeature[] sfs = sourceSequence.getSequenceFeatures();
-    MapList mapping = getGenomicRanges(sourceSequence, accessionId,
+    MapList mapping = getGenomicRangesFromFeatures(sourceSequence, accessionId,
              targetSequence.getStart());
      if (mapping == null)
      {
@@ -850,7 +853,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
      
      SequenceFeature[] sfs = sequence.getSequenceFeatures();
      if (sfs != null) {
-      SequenceOntology so = SequenceOntology.getInstance();
+      SequenceOntologyI so = SequenceOntologyFactory.getInstance();
        for (SequenceFeature sf :sfs) {
          if (so.isA(sf.getType(), type))
          {
@@ -888,7 +891,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
      }
    
      AlignmentUtils.transferFeatures(dnaSeq, peptide, dnaToProtein,
-            SequenceOntology.EXON);
+            SequenceOntologyI.EXON);
  
      LinkedHashMap<Integer, String[][]> variants = buildDnaVariantsMap(
              dnaSeq, dnaToProtein);
@@ -909,7 +912,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
          String desc = StringUtils.listToDelimitedString(peptideVariants,
                  ", ");
          SequenceFeature sf = new SequenceFeature(
-                SequenceOntology.SEQUENCE_VARIANT, desc, peptidePos,
+                SequenceOntologyI.SEQUENCE_VARIANT, desc, peptidePos,
                  peptidePos, 0f, null);
          peptide.addSequenceFeature(sf);
          count++;
@@ -934,7 +937,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
       * LinkedHashMap ensures we add the peptide features in sequence order
       */
      LinkedHashMap<Integer, String[][]> variants = new LinkedHashMap<Integer, String[][]>();
-    SequenceOntology so = SequenceOntology.getInstance();
+    SequenceOntologyI so = SequenceOntologyFactory.getInstance();
    
      SequenceFeature[] dnaFeatures = dnaSeq.getSequenceFeatures();
      if (dnaFeatures == null)
@@ -957,7 +960,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
          // not handling multi-locus variant features
          continue;
        }
-      if (so.isSequenceVariant(sf.getType()))
+      if (so.isA(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT))
        {
          int[] mapsTo = dnaToProtein.locateInTo(dnaCol, dnaCol);
          if (mapsTo == null)
@@ -1096,6 +1099,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
    public static boolean isTranscript(String featureType)
    {
      return NMD_VARIANT.equals(featureType)
-            || SequenceOntology.getInstance().isA(featureType, SequenceOntology.TRANSCRIPT);
+            || SequenceOntologyFactory.getInstance().isA(featureType,
+                    SequenceOntologyI.TRANSCRIPT);
    }
  }
diff --git a/src/jalview/io/gff/Gff3Helper.java b/src/jalview/io/gff/Gff3Helper.java

index 2e98e4e..d29645b 100644 (file)
--- a/src/jalview/io/gff/Gff3Helper.java
+++ b/src/jalview/io/gff/Gff3Helper.java
@@ -70,12 +70,13 @@ public class Gff3Helper extends GffHelperBase
        String atts = gff[ATTRIBUTES_COL];
        Map<String, List<String>> attributes = parseNameValuePairs(atts);
  
-      if (SequenceOntology.getInstance().isProteinMatch(soTerm))
+      SequenceOntologyI so = SequenceOntologyFactory.getInstance();
+      if (so.isA(soTerm, SequenceOntologyI.PROTEIN_MATCH))
        {
-        sf = processProteinMatch(attributes, seq, gff, align,
-                newseqs, relaxedIdMatching);
+        sf = processProteinMatch(attributes, seq, gff, align, newseqs,
+                relaxedIdMatching);
        }
-      else if (SequenceOntology.getInstance().isNucleotideMatch(soTerm))
+      else if (so.isA(soTerm, SequenceOntologyI.NUCLEOTIDE_MATCH))
        {
          sf = processNucleotideMatch(attributes, seq, gff, align,
                  newseqs, relaxedIdMatching);
@@ -372,9 +373,9 @@ public class Gff3Helper extends GffHelperBase
        desc = target.split(" ")[0];
      }
  
-    SequenceOntology so = SequenceOntology.getInstance();
+    SequenceOntologyI so = SequenceOntologyFactory.getInstance();
      String type = sf.getType();
-    if (so.isSequenceVariant(type))
+    if (so.isA(type, SequenceOntologyI.SEQUENCE_VARIANT))
      {
        /*
         * Ensembl returns dna variants as 'alleles'
diff --git a/src/jalview/io/gff/InterProScanHelper.java b/src/jalview/io/gff/InterProScanHelper.java

index 3323e27..68d5d4f 100644 (file)
--- a/src/jalview/io/gff/InterProScanHelper.java
+++ b/src/jalview/io/gff/InterProScanHelper.java
@@ -89,10 +89,11 @@ public class InterProScanHelper extends Gff3Helper
     */
    public static boolean recognises(String[] columns)
    {
-    SequenceOntology so = SequenceOntology.getInstance();
+    SequenceOntologyI so = SequenceOntologyFactory.getInstance();
      String type = columns[TYPE_COL];
-    if (so.isProteinMatch(type)
-            || (".".equals(columns[SOURCE_COL]) && so.isPolypeptide(type)))
+    if (so.isA(type, SequenceOntologyI.PROTEIN_MATCH)
+            || (".".equals(columns[SOURCE_COL]) && so.isA(type,
+                    SequenceOntologyI.POLYPEPTIDE)))
      {
        return true;
      }
diff --git a/src/jalview/io/gff/SequenceOntology.java b/src/jalview/io/gff/SequenceOntology.java

index 685b83e..b069eef 100644 (file)
--- a/src/jalview/io/gff/SequenceOntology.java
+++ b/src/jalview/io/gff/SequenceOntology.java
@@ -7,6 +7,7 @@ import java.io.InputStream;
  import java.io.InputStreamReader;
  import java.text.ParseException;
  import java.util.ArrayList;
+import java.util.Collections;
  import java.util.HashMap;
  import java.util.List;
  import java.util.Map;
@@ -25,32 +26,8 @@ import org.biojava.nbio.ontology.utils.Annotation;
   * A wrapper class that parses the Sequence Ontology and exposes useful access
   * methods. This version uses the BioJava parser.
   */
-public class SequenceOntology
+class SequenceOntology implements SequenceOntologyI
  {
-
-  /*
-   * selected commonly used values for quick reference
-   */
-  // SO:0000316
-  public static final String CDS = "CDS";
-
-  // SO:0001060
-  public static final String SEQUENCE_VARIANT = "sequence_variant";
-
-  // SO:0000147
-  public static final String EXON = "exon";
-
-  // SO:0000673
-  public static final String TRANSCRIPT = "transcript";
-
-  // SO:0000704
-  public static final String GENE = "gene";
-
-  /*
-   * singleton instance of this class
-   */
-  private static SequenceOntology instance;
-
    /*
     * the parsed Ontology data as modelled by BioJava
     */
@@ -73,26 +50,18 @@ public class SequenceOntology
     */
    private Map<Term, List<Term>> termIsA;
  
-  /**
-   * Returns singleton instance
-   * 
-   * @return
-   */
-  public synchronized static SequenceOntology getInstance()
-  {
-    if (instance == null)
-    {
-      instance = new SequenceOntology();
-    }
-    return instance;
-  }
+  private List<String> termsFound;
+
+  private List<String> termsNotFound;
  
    /**
-   * Private constructor to enforce use of singleton. Parses and caches the SO
-   * OBO data file.
+   * Package private constructor to enforce use of singleton. Parses and caches
+   * the SO OBO data file.
     */
-  private SequenceOntology()
+  SequenceOntology()
    {
+    termsFound = new ArrayList<String>();
+    termsNotFound = new ArrayList<String>();
      termsByDescription = new HashMap<String, Term>();
      termIsA = new HashMap<Term, List<Term>>();
  
@@ -248,7 +217,7 @@ public class SequenceOntology
     */
    public boolean isNucleotideMatch(String soTerm)
    {
-    return isA(soTerm, "nucleotide_match");
+    return isA(soTerm, NUCLEOTIDE_MATCH);
    }
  
    /**
@@ -261,7 +230,7 @@ public class SequenceOntology
     */
    public boolean isProteinMatch(String soTerm)
    {
-    return isA(soTerm, "protein_match");
+    return isA(soTerm, PROTEIN_MATCH);
    }
  
    /**
@@ -274,7 +243,7 @@ public class SequenceOntology
     */
    public boolean isPolypeptide(String soTerm)
    {
-    return isA(soTerm, "polypeptide");
+    return isA(soTerm, POLYPEPTIDE);
    }
  
    /**
@@ -285,23 +254,70 @@ public class SequenceOntology
     * @param parent
     * @return
     */
+  @Override
    public boolean isA(String child, String parent)
    {
+    if (child == null || parent == null)
+    {
+      return false;
+    }
      /*
       * optimise trivial checks like isA("CDS", "CDS")
       */
      if (child.equals(parent))
      {
+      termFound(child);
        return true;
      }
  
      Term childTerm = getTerm(child);
+    if (childTerm != null)
+    {
+      termFound(child);
+    }
+    else
+    {
+      termNotFound(child);
+    }
      Term parentTerm = getTerm(parent);
  
      return termIsA(childTerm, parentTerm);
    }
  
    /**
+   * Records a valid term queried for, for reporting purposes
+   * 
+   * @param term
+   */
+  private void termFound(String term)
+  {
+    synchronized (termsFound)
+    {
+      if (!termsFound.contains(term))
+      {
+        termsFound.add(term);
+      }
+    }
+  }
+
+  /**
+   * Records an invalid term queried for, for reporting purposes
+   * 
+   * @param term
+   */
+  private void termNotFound(String term)
+  {
+    synchronized (termsNotFound)
+    {
+      if (!termsNotFound.contains(term))
+      {
+        System.err.println("SO term " + term + " invalid");
+        termsNotFound.add(term);
+      }
+    }
+  }
+
+  /**
     * Returns true if the childTerm 'isA' parentTerm (directly or indirectly).
     * 
     * @param childTerm
@@ -402,6 +418,32 @@ public class SequenceOntology
  
    public boolean isSequenceVariant(String soTerm)
    {
-    return isA(soTerm, "sequence_variant");
+    return isA(soTerm, SEQUENCE_VARIANT);
+  }
+
+  /**
+   * Sorts (case-insensitive) and returns the list of valid terms queried for
+   */
+  @Override
+  public List<String> termsFound()
+  {
+    synchronized (termsFound)
+    {
+      Collections.sort(termsFound, String.CASE_INSENSITIVE_ORDER);
+      return termsFound;
+    }
+  }
+
+  /**
+   * Sorts (case-insensitive) and returns the list of invalid terms queried for
+   */
+  @Override
+  public List<String> termsNotFound()
+  {
+    synchronized (termsNotFound)
+    {
+      Collections.sort(termsNotFound, String.CASE_INSENSITIVE_ORDER);
+      return termsNotFound;
+    }
    }
  }
diff --git a/src/jalview/io/gff/SequenceOntologyFactory.java b/src/jalview/io/gff/SequenceOntologyFactory.java

new file mode 100644 (file)

index 0000000..3eaa5d1
--- /dev/null
+++ b/src/jalview/io/gff/SequenceOntologyFactory.java
@@ -0,0 +1,21 @@
+package jalview.io.gff;
+
+public class SequenceOntologyFactory
+{
+  private static SequenceOntologyI instance;
+
+  public static synchronized SequenceOntologyI getInstance()
+  {
+    if (instance == null)
+    {
+      // instance = new SequenceOntology();
+      instance = new SequenceOntologyLite();
+    }
+    return instance;
+  }
+
+  public static void setInstance(SequenceOntologyI so)
+  {
+    instance = so;
+  }
+}
diff --git a/src/jalview/io/gff/SequenceOntologyI.java b/src/jalview/io/gff/SequenceOntologyI.java

new file mode 100644 (file)

index 0000000..8128177
--- /dev/null
+++ b/src/jalview/io/gff/SequenceOntologyI.java
@@ -0,0 +1,54 @@
+package jalview.io.gff;
+
+import java.util.List;
+
+public interface SequenceOntologyI
+{
+  /*
+   * selected commonly used values for quick reference
+   */
+  public static final String POLYPEPTIDE = "polypeptide";
+
+  public static final String PROTEIN_MATCH = "protein_match";
+
+  public static final String NUCLEOTIDE_MATCH = "nucleotide_match";
+
+  // SO:0000316
+  public static final String CDS = "CDS";
+
+  // SO:0001060
+  public static final String SEQUENCE_VARIANT = "sequence_variant";
+
+  // SO:0000147
+  public static final String EXON = "exon";
+
+  // SO:0000673
+  public static final String TRANSCRIPT = "transcript";
+
+  // SO:0000704
+  public static final String GENE = "gene";
+
+  public boolean isA(String childTerm, String parentTerm);
+
+  /**
+   * Returns a sorted list of all valid terms queried for (i.e. terms processed
+   * which were valid in the SO), using the friendly description.
+   * 
+   * This can be used to check that any hard-coded stand-in for the full SO
+   * includes all the terms needed for correct processing.
+   * 
+   * @return
+   */
+  public List<String> termsFound();
+
+  /**
+   * Returns a sorted list of all invalid terms queried for (i.e. terms
+   * processed which were not found in the SO), using the friendly description.
+   * 
+   * This can be used to report any 'non-compliance' in data, and/or to report
+   * valid terms missing from any hard-coded stand-in for the full SO.
+   * 
+   * @return
+   */
+  public List<String> termsNotFound();
+}
diff --git a/src/jalview/io/gff/SequenceOntologyLite.java b/src/jalview/io/gff/SequenceOntologyLite.java

new file mode 100644 (file)

index 0000000..173dea6
--- /dev/null
+++ b/src/jalview/io/gff/SequenceOntologyLite.java
@@ -0,0 +1,190 @@
+package jalview.io.gff;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * An implementation of SequenceOntologyI that hard codes terms of interest.
+ *
+ * Use this in unit testing by calling SequenceOntology.setInstance(new
+ * SequenceOntologyLite()).
+ * 
+ * May also become a stand-in for SequenceOntology in the applet if we want to
+ * avoid the additional jars needed for parsing the full SO.
+ * 
+ * @author gmcarstairs
+ *
+ */
+public class SequenceOntologyLite implements SequenceOntologyI
+{
+  /*
+   * initial selection of types of interest when processing Ensembl features
+   */
+  // @formatter:off
+  private final String[][] TERMS = new String[][] {
+
+    /*
+     * gene sub-types:
+     */
+    { "gene", "gene" }, 
+    { "ncRNA_gene", "gene" }, 
+    { "snRNA_gene", "gene" },
+    
+    /*
+     * transcript sub-types:
+     */
+    { "transcript", "transcript" }, 
+    { "mature_transcript", "transcript" }, 
+    { "ncRNA", "transcript" },
+    { "snRNA", "transcript" },
+    { "aberrant_processed_transcript", "transcript" },
+    
+    /*
+     * sequence_variant sub-types:
+     */
+    { "sequence_variant", "sequence_variant" },
+    { "feature_variant", "sequence_variant" },
+    { "gene_variant", "sequence_variant" },
+    // NB Ensembl uses NMD_transcript_variant as if a 'transcript'
+    // but we model it here correctly as per the SO
+    { "NMD_transcript_variant", "sequence_variant" },
+    { "transcript_variant", "sequence_variant" },
+    { "structural_variant", "sequence_variant" },
+    
+    /*
+     * no sub-types of exon or CDS yet encountered; add if needed
+     */
+    { "exon", "exon" },
+    { "CDS", "CDS" }
+  };
+  // @formatter:on
+
+  /*
+   * hard-coded list of any parents (direct or indirect) 
+   * that we care about for a term
+   */
+  private Map<String, List<String>> parents;
+
+  private List<String> termsFound;
+
+  private List<String> termsNotFound;
+
+  public SequenceOntologyLite()
+  {
+    termsFound = new ArrayList<String>();
+    termsNotFound = new ArrayList<String>();
+    loadStaticData();
+  }
+
+  /**
+   * Loads hard-coded data into a lookup table of {term, {list_of_parents}}
+   */
+  private void loadStaticData()
+  {
+    parents = new HashMap<String, List<String>>();
+    for (String [] pair : TERMS) {
+      List<String> p = parents.get(pair[0]);
+      if (p == null)
+      {
+        p = new ArrayList<String>();
+        parents.put(pair[0], p);
+      }
+      p.add(pair[1]);
+    }
+  }
+
+  /**
+   * Answers true if 'child' isA 'parent' (including equality). In this
+   * implementation, based only on hard-coded values.
+   */
+  @Override
+  public boolean isA(String child, String parent)
+  {
+    if (child == null || parent == null)
+    {
+      return false;
+    }
+    if (child.equals(parent))
+    {
+      termFound(child);
+      return true;
+    }
+
+    List<String> p = parents.get(child);
+    if (p == null)
+    {
+      termNotFound(child);
+      return false;
+    }
+    termFound(child);
+    if (p.contains(parent))
+    {
+      return true;
+    }
+    return false;
+  }
+
+  /**
+   * Records a valid term queried for, for reporting purposes
+   * 
+   * @param term
+   */
+  private void termFound(String term)
+  {
+    if (!termsFound.contains(term))
+    {
+      synchronized (termsFound)
+      {
+        termsFound.add(term);
+      }
+    }
+  }
+
+  /**
+   * Records an invalid term queried for, for reporting purposes
+   * 
+   * @param term
+   */
+  private void termNotFound(String term)
+  {
+    synchronized (termsNotFound)
+    {
+      if (!termsNotFound.contains(term))
+      {
+        System.out.println("SO term " + term
+                + " not known - either invalid or needs modelled in "
+                + getClass().getName());
+        termsNotFound.add(term);
+      }
+    }
+  }
+
+  /**
+   * Sorts (case-insensitive) and returns the list of valid terms queried for
+   */
+  @Override
+  public List<String> termsFound()
+  {
+    synchronized (termsFound)
+    {
+      Collections.sort(termsFound, String.CASE_INSENSITIVE_ORDER);
+      return termsFound;
+    }
+  }
+
+  /**
+   * Sorts (case-insensitive) and returns the list of invalid terms queried for
+   */
+  @Override
+  public List<String> termsNotFound()
+  {
+    synchronized (termsNotFound)
+    {
+      Collections.sort(termsNotFound, String.CASE_INSENSITIVE_ORDER);
+      return termsNotFound;
+    }
+  }
+}
diff --git a/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java b/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java

index c525e95..31745e5 100644 (file)
--- a/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java
+++ b/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java
@@ -4,14 +4,19 @@ import static org.testng.AssertJUnit.assertEquals;
  
  import jalview.datamodel.Alignment;
  import jalview.datamodel.AlignmentI;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceFeature;
  import jalview.datamodel.SequenceI;
  import jalview.io.AppletFormatAdapter;
  import jalview.io.FastaFile;
  import jalview.io.FileParse;
+import jalview.io.gff.SequenceOntologyFactory;
+import jalview.io.gff.SequenceOntologyLite;
  
  import java.lang.reflect.Method;
  import java.net.MalformedURLException;
  import java.net.URL;
+import java.util.ArrayList;
  import java.util.Arrays;
  import java.util.List;
  
@@ -280,4 +285,82 @@ public class EnsemblSeqProxyTest
      variants = EnsemblSeqProxy.computePeptideVariants(codonVariants, "S");
      assertEquals("[C, R, T, W]", variants.toString());
    }
+  
+  /**
+   * Tests for the method that maps the subset of a dna sequence that has CDS
+   * (or subtype) feature.
+   */
+  @Test(groups = "Functional")
+  public void testGetCdsRanges()
+  {
+    EnsemblSeqProxy testee = new EnsemblSeqProxyAdapter();
+
+    SequenceI dnaSeq = new Sequence("dna", "aaaGGGcccAAATTTttt");
+    dnaSeq.createDatasetSequence();
+    SequenceI ds = dnaSeq.getDatasetSequence();
+
+    // CDS for dna 3-6
+    SequenceFeature sf = new SequenceFeature("CDS", "", 4, 6, 0f, null);
+    ds.addSequenceFeature(sf);
+    // exon feature should be ignored here
+    sf = new SequenceFeature("exon", "", 7, 9, 0f, null);
+    ds.addSequenceFeature(sf);
+    // CDS for dna 10-12
+    sf = new SequenceFeature("some_cds", "", 10, 12, 0f, null);
+    ds.addSequenceFeature(sf);
+
+    SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
+    List<int[]> ranges = new ArrayList<int[]>();
+    int mappedLength = testee.getCdsRanges(dnaSeq, ranges);
+    assertEquals(6, mappedLength);
+    assertEquals(2, ranges.size());
+    assertEquals(4, ranges.get(0)[0]);
+    assertEquals(6, ranges.get(0)[1]);
+    assertEquals(10, ranges.get(1)[0]);
+    assertEquals(12, ranges.get(1)[1]);
+
+  }
+
+  @Test(groups = "Functional")
+  public void getGenomicRangesFromFeatures()
+  {
+
+  }
+
+  /**
+   * Tests for the method that maps the subset of a dna sequence that has CDS
+   * (or subtype) feature - case where the start codon is incomplete.
+   */
+  @Test(groups = "Functional")
+  public void testGetCdsRanges_fivePrimeIncomplete()
+  {
+    EnsemblSeqProxy testee = new EnsemblSeqProxyAdapter();
+  
+    SequenceI dnaSeq = new Sequence("dna", "aaagGGCCCaaaTTTttt");
+    dnaSeq.createDatasetSequence();
+    SequenceI ds = dnaSeq.getDatasetSequence();
+  
+    // CDS for dna 5-6 (incomplete codon), 7-9
+    SequenceFeature sf = new SequenceFeature("CDS", "", 5, 9, 0f, null);
+    sf.setPhase("2"); // skip 2 bases to start of next codon
+    ds.addSequenceFeature(sf);
+    ds.addSequenceFeature(sf);
+    // CDS for dna 13-15
+    sf = new SequenceFeature("some_cds", "", 13, 15, 0f, null);
+    ds.addSequenceFeature(sf);
+  
+    SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
+    List<int[]> ranges = new ArrayList<int[]>();
+    int mappedLength = testee.getCdsRanges(dnaSeq, ranges);
+
+    /*
+     * check the mapping starts with the first complete codon
+     */
+    assertEquals(6, mappedLength);
+    assertEquals(2, ranges.size());
+    assertEquals(7, ranges.get(0)[0]);
+    assertEquals(9, ranges.get(0)[1]);
+    assertEquals(13, ranges.get(1)[0]);
+    assertEquals(15, ranges.get(1)[1]);
+  }
  }
\ No newline at end of file
diff --git a/test/jalview/io/gff/SequenceOntologyTest.java b/test/jalview/io/gff/SequenceOntologyTest.java

index 6c9226f..f791a1e 100644 (file)
--- a/test/jalview/io/gff/SequenceOntologyTest.java
+++ b/test/jalview/io/gff/SequenceOntologyTest.java
@@ -8,12 +8,12 @@ import org.testng.annotations.Test;
  
  public class SequenceOntologyTest
  {
-  private SequenceOntology so;
+  private SequenceOntologyI so;
  
    @BeforeMethod
    public void setUp() {
      long now = System.currentTimeMillis();
-    so = SequenceOntology.getInstance();
+    so = SequenceOntologyFactory.getInstance();
      long elapsed = System.currentTimeMillis() - now;
      System.out.println("Load and cache of Sequence Ontology took "
              + elapsed + "ms");
@@ -57,29 +57,6 @@ public class SequenceOntologyTest
    }
  
    @Test(groups = "Functional")
-  public void testIsProteinMatch()
-  {
-    assertTrue(so.isProteinMatch("protein_match"));
-    assertTrue(so.isProteinMatch("protein_hmm_match"));
-    assertFalse(so.isProteinMatch("Protein_match")); // case-sensitive
-  }
-
-  @Test(groups = "Functional")
-  public void testIsNucleotideMatch()
-  {
-    assertTrue(so.isNucleotideMatch("nucleotide_match"));
-    assertTrue(so.isNucleotideMatch("primer_match"));
-    assertTrue(so.isNucleotideMatch("cross_genome_match"));
-    assertTrue(so.isNucleotideMatch("expressed_sequence_match"));
-    assertTrue(so.isNucleotideMatch("translated_nucleotide_match"));
-    assertTrue(so.isNucleotideMatch("UST_match"));
-    assertTrue(so.isNucleotideMatch("RST_match"));
-    assertTrue(so.isNucleotideMatch("cDNA_match"));
-    assertTrue(so.isNucleotideMatch("EST_match"));
-    assertFalse(so.isNucleotideMatch("match")); // parent
-  }
-
-  @Test(groups = "Functional")
    public void testIsCDS()
    {
      assertTrue(so.isA("CDS", "CDS"));
author	gmungoc <g.m.carstairs@dundee.ac.uk>
	Mon, 1 Feb 2016 10:17:49 +0000 (10:17 +0000)
committer	gmungoc <g.m.carstairs@dundee.ac.uk>
	Mon, 1 Feb 2016 10:17:49 +0000 (10:17 +0000)
src/jalview/analysis/AlignmentUtils.java		patch \| blob \| history
src/jalview/ext/ensembl/EnsemblCdna.java		patch \| blob \| history
src/jalview/ext/ensembl/EnsemblCds.java		patch \| blob \| history
src/jalview/ext/ensembl/EnsemblGene.java		patch \| blob \| history
src/jalview/ext/ensembl/EnsemblOverlap.java		patch \| blob \| history
src/jalview/ext/ensembl/EnsemblRestClient.java		patch \| blob \| history
src/jalview/ext/ensembl/EnsemblSeqProxy.java		patch \| blob \| history
src/jalview/io/gff/Gff3Helper.java		patch \| blob \| history
src/jalview/io/gff/InterProScanHelper.java		patch \| blob \| history
src/jalview/io/gff/SequenceOntology.java		patch \| blob \| history
src/jalview/io/gff/SequenceOntologyFactory.java	[new file with mode: 0644]	patch \| blob
src/jalview/io/gff/SequenceOntologyI.java	[new file with mode: 0644]	patch \| blob
src/jalview/io/gff/SequenceOntologyLite.java	[new file with mode: 0644]	patch \| blob
test/jalview/ext/ensembl/EnsemblSeqProxyTest.java		patch \| blob \| history
test/jalview/io/gff/SequenceOntologyTest.java		patch \| blob \| history