Merge branch 'develop' of http://source.jalview.org/git/jalview into develop
authorJim Procter <jprocter@issues.jalview.org>
Thu, 28 Feb 2019 15:31:19 +0000 (15:31 +0000)
committerJim Procter <jprocter@issues.jalview.org>
Thu, 28 Feb 2019 15:31:19 +0000 (15:31 +0000)
15 files changed:
src/jalview/analysis/AlignmentUtils.java
src/jalview/ext/ensembl/EnsemblCdna.java
src/jalview/ext/ensembl/EnsemblCds.java
src/jalview/ext/ensembl/EnsemblFeatures.java
src/jalview/ext/ensembl/EnsemblGene.java
src/jalview/ext/ensembl/EnsemblGenome.java
src/jalview/ext/ensembl/EnsemblSeqProxy.java
src/jalview/ext/ensembl/EnsemblSequenceFetcher.java
src/jalview/util/JSONUtils.java [new file with mode: 0644]
test/jalview/analysis/AlignmentUtilsTests.java
test/jalview/ext/ensembl/EnsemblCdnaTest.java
test/jalview/ext/ensembl/EnsemblCdsTest.java
test/jalview/ext/ensembl/EnsemblGeneTest.java
test/jalview/ext/ensembl/EnsemblGenomeTest.java
test/jalview/util/JSONUtilsTest.java [new file with mode: 0644]

index d1217bf..0dfd383 100644 (file)
@@ -74,12 +74,15 @@ import java.util.TreeMap;
  */
 public class AlignmentUtils
 {
-
   private static final int CODON_LENGTH = 3;
 
   private static final String SEQUENCE_VARIANT = "sequence_variant:";
 
-  private static final String ID = "ID";
+  /*
+   * the 'id' attribute is provided for variant features fetched from
+   * Ensembl using its REST service with JSON format 
+   */
+  public static final String VARIANT_ID = "id";
 
   /**
    * A data model to hold the 'normal' base value at a position, and an optional
@@ -2575,15 +2578,15 @@ public class AlignmentUtils
             peptidePos, var.getSource());
 
     StringBuilder attributes = new StringBuilder(32);
-    String id = (String) var.variant.getValue(ID);
+    String id = (String) var.variant.getValue(VARIANT_ID);
     if (id != null)
     {
       if (id.startsWith(SEQUENCE_VARIANT))
       {
         id = id.substring(SEQUENCE_VARIANT.length());
       }
-      sf.setValue(ID, id);
-      attributes.append(ID).append("=").append(id);
+      sf.setValue(VARIANT_ID, id);
+      attributes.append(VARIANT_ID).append("=").append(id);
       // TODO handle other species variants JAL-2064
       StringBuilder link = new StringBuilder(32);
       try
index 7384327..e01ad17 100644 (file)
@@ -22,7 +22,6 @@ package jalview.ext.ensembl;
 
 import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.SequenceI;
-import jalview.io.gff.SequenceOntologyFactory;
 import jalview.io.gff.SequenceOntologyI;
 
 import java.util.ArrayList;
@@ -127,7 +126,7 @@ public class EnsemblCdna extends EnsemblSeqProxy
     for (SequenceFeature sf : sfs)
     {
       String parentFeature = (String) sf.getValue(PARENT);
-      if (("transcript:" + accId).equals(parentFeature))
+      if (accId.equals(parentFeature))
       {
         result.add(sf);
       }
index 8a71b64..8f13d99 100644 (file)
@@ -116,7 +116,7 @@ public class EnsemblCds extends EnsemblSeqProxy
     for (SequenceFeature sf : sfs)
     {
       String parentFeature = (String) sf.getValue(PARENT);
-      if (("transcript:" + accId).equals(parentFeature))
+      if ( accId.equals(parentFeature))
       {
         result.add(sf);
       }
index 582eac6..744191d 100644 (file)
@@ -22,17 +22,25 @@ package jalview.ext.ensembl;
 
 import jalview.datamodel.Alignment;
 import jalview.datamodel.AlignmentI;
-import jalview.io.DataSourceType;
-import jalview.io.FeaturesFile;
-import jalview.io.FileParse;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.io.gff.SequenceOntologyI;
+import jalview.util.JSONUtils;
 
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.net.MalformedURLException;
 import java.net.URL;
 import java.util.ArrayList;
+import java.util.Iterator;
 import java.util.List;
 
+import org.json.simple.JSONArray;
+import org.json.simple.JSONObject;
+import org.json.simple.parser.JSONParser;
+import org.json.simple.parser.ParseException;
+
 /**
  * A client for fetching and processing Ensembl feature data in GFF format by
  * calling the overlap REST service
@@ -91,9 +99,128 @@ class EnsemblFeatures extends EnsemblRestClient
     {
       return null;
     }
-    FeaturesFile fr = new FeaturesFile(
-            new FileParse(fp, null, DataSourceType.URL));
-    return new Alignment(fr.getSeqsAsArray());
+
+    SequenceI seq = parseFeaturesJson(fp);
+    return new Alignment(new SequenceI[] { seq });
+  }
+
+  /**
+   * Parses the JSON response into Jalview sequence features and attaches them
+   * to a dummy sequence
+   * 
+   * @param br
+   * @return
+   */
+  private SequenceI parseFeaturesJson(BufferedReader br)
+  {
+    SequenceI seq = new Sequence("Dummy", "");
+
+    JSONParser jp = new JSONParser();
+    try
+    {
+      JSONArray responses = (JSONArray) jp.parse(br);
+      Iterator rvals = responses.iterator();
+      while (rvals.hasNext())
+      {
+        try
+        {
+          JSONObject obj = (JSONObject) rvals.next();
+          String type = obj.get("feature_type").toString();
+          int start = Integer.parseInt(obj.get("start").toString());
+          int end = Integer.parseInt(obj.get("end").toString());
+          String source = obj.get("source").toString();
+          String strand = obj.get("strand").toString();
+          String alleles = JSONUtils
+                  .arrayToList((JSONArray) obj.get("alleles"));
+          String clinSig = JSONUtils
+                  .arrayToList(
+                          (JSONArray) obj.get("clinical_significance"));
+
+          /*
+           * convert 'variation' to 'sequence_variant', and 'cds' to 'CDS'
+           * so as to have a valid SO term for the feature type
+           * ('gene', 'exon', 'transcript' don't need any conversion)
+           */
+          if ("variation".equals(type))
+          {
+            type = SequenceOntologyI.SEQUENCE_VARIANT;
+          }
+          else if (SequenceOntologyI.CDS.equalsIgnoreCase((type)))
+          {
+            type = SequenceOntologyI.CDS;
+          }
+          
+          String desc = getFirstNotNull(obj, "alleles", "external_name",
+                  JSON_ID);
+          SequenceFeature sf = new SequenceFeature(type, desc, start, end,
+                  source);
+          sf.setStrand("1".equals(strand) ? "+" : "-");
+          setFeatureAttribute(sf, obj, "id");
+          setFeatureAttribute(sf, obj, "Parent");
+          setFeatureAttribute(sf, obj, "consequence_type");
+          sf.setValue("alleles", alleles);
+          sf.setValue("clinical_significance", clinSig);
+
+          seq.addSequenceFeature(sf);
+        } catch (Throwable t)
+        {
+          // ignore - keep trying other features
+        }
+      }
+    } catch (ParseException | IOException e)
+    {
+      // ignore
+    }
+
+    return seq;
+  }
+
+  /**
+   * Returns the first non-null attribute found (if any) as a string, formatted
+   * suitably for display as feature description or tooltip. Answers null if
+   * none of the attribute keys is present.
+   * 
+   * @param obj
+   * @param keys
+   * @return
+   */
+  protected String getFirstNotNull(JSONObject obj, String... keys)
+  {
+    String desc = null;
+
+    for (String key : keys)
+    {
+      Object val = obj.get(key);
+      if (val != null)
+      {
+        String s = val instanceof JSONArray
+                ? JSONUtils.arrayToList((JSONArray) val)
+                : val.toString();
+        if (!s.isEmpty())
+        {
+          return s;
+        }
+      }
+    }
+    return desc;
+  }
+
+  /**
+   * A helper method that reads the 'key' entry in the JSON object, and if not
+   * null, sets its string value as an attribute on the sequence feature
+   * 
+   * @param sf
+   * @param obj
+   * @param key
+   */
+  protected void setFeatureAttribute(SequenceFeature sf, JSONObject obj,
+          String key)
+  {
+    Object object = obj.get(key);
+    if (object != null)
+    {
+      sf.setValue(key, object.toString());
+    }
   }
 
   /**
@@ -109,7 +236,7 @@ class EnsemblFeatures extends EnsemblRestClient
     urlstring.append(getDomain()).append("/overlap/id/").append(ids.get(0));
 
     // @see https://github.com/Ensembl/ensembl-rest/wiki/Output-formats
-    urlstring.append("?content-type=text/x-gff3");
+    urlstring.append("?content-type=" + getResponseMimeType());
 
     /*
      * specify object_type=gene in case is shared by transcript and/or protein;
@@ -145,16 +272,16 @@ class EnsemblFeatures extends EnsemblRestClient
   @Override
   protected String getRequestMimeType()
   {
-    return "text/x-gff3";
+    return "application/json";
   }
 
   /**
-   * Returns the MIME type for GFF3
+   * Returns the MIME type wanted for the response
    */
   @Override
   protected String getResponseMimeType()
   {
-    return "text/x-gff3";
+    return "application/json";
   }
 
   /**
index 36b19e2..7648536 100644 (file)
@@ -51,8 +51,6 @@ import com.stevesoft.pat.Regex;
  */
 public class EnsemblGene extends EnsemblSeqProxy
 {
-  private static final String GENE_PREFIX = "gene:";
-
   /*
    * accepts anything as we will attempt lookup of gene or 
    * transcript id or gene name
@@ -368,7 +366,7 @@ public class EnsemblGene extends EnsemblSeqProxy
      * look for exon features of the transcript, failing that for CDS
      * (for example ENSG00000124610 has 1 CDS but no exon features)
      */
-    String parentId = "transcript:" + accId;
+    String parentId = accId;
     List<SequenceFeature> splices = findFeatures(gene,
             SequenceOntologyI.EXON, parentId);
     if (splices.isEmpty())
@@ -399,7 +397,7 @@ public class EnsemblGene extends EnsemblSeqProxy
      * Ensembl has gene name as transcript Name
      * EnsemblGenomes doesn't, but has a url-encoded description field
      */
-    String description = (String) transcriptFeature.getValue(NAME);
+    String description = transcriptFeature.getDescription();
     if (description == null)
     {
       description = (String) transcriptFeature.getValue(DESCRIPTION);
@@ -488,7 +486,7 @@ public class EnsemblGene extends EnsemblSeqProxy
    */
   protected String getTranscriptId(SequenceFeature feature)
   {
-    return (String) feature.getValue("transcript_id");
+    return (String) feature.getValue(JSON_ID);
   }
 
   /**
@@ -510,7 +508,7 @@ public class EnsemblGene extends EnsemblSeqProxy
   {
     List<SequenceFeature> transcriptFeatures = new ArrayList<>();
 
-    String parentIdentifier = GENE_PREFIX + accId;
+    String parentIdentifier = accId;
 
     List<SequenceFeature> sfs = geneSequence.getFeatures()
             .getFeaturesByOntology(SequenceOntologyI.TRANSCRIPT);
@@ -561,9 +559,8 @@ public class EnsemblGene extends EnsemblSeqProxy
             .getFeaturesByOntology(SequenceOntologyI.GENE);
     for (SequenceFeature sf : sfs)
     {
-      // NB features as gff use 'ID'; rest services return as 'id'
-      String id = (String) sf.getValue("ID");
-      if ((GENE_PREFIX + accId).equalsIgnoreCase(id))
+      String id = (String) sf.getValue(JSON_ID);
+      if (accId.equalsIgnoreCase(id))
       {
         result.add(sf);
       }
@@ -590,7 +587,7 @@ public class EnsemblGene extends EnsemblSeqProxy
     if (isTranscript(type))
     {
       String parent = (String) sf.getValue(PARENT);
-      if (!(GENE_PREFIX + accessionId).equalsIgnoreCase(parent))
+      if (!accessionId.equalsIgnoreCase(parent))
       {
         return false;
       }
index 6684e20..4f59bc5 100644 (file)
@@ -117,9 +117,8 @@ public class EnsemblGenome extends EnsemblSeqProxy
             SequenceOntologyI.NMD_TRANSCRIPT_VARIANT);
     for (SequenceFeature sf : sfs)
     {
-      // NB features as gff use 'ID'; rest services return as 'id'
-      String id = (String) sf.getValue("ID");
-      if (("transcript:" + accId).equals(id))
+      String id = (String) sf.getValue(JSON_ID);
+      if (accId.equals(id))
       {
         result.add(sf);
       }
index 7b448fd..5dc701d 100644 (file)
@@ -62,8 +62,6 @@ import org.json.simple.parser.ParseException;
  */
 public abstract class EnsemblSeqProxy extends EnsemblRestClient
 {
-  protected static final String NAME = "Name";
-
   protected static final String DESCRIPTION = "description";
 
   /*
@@ -867,9 +865,8 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
   protected boolean featureMayBelong(SequenceFeature sf, String identifier)
   {
     String parent = (String) sf.getValue(PARENT);
-    // using contains to allow for prefix "gene:", "transcript:" etc
     if (parent != null
-            && !parent.toUpperCase().contains(identifier.toUpperCase()))
+            && !parent.equalsIgnoreCase(identifier))
     {
       // this genomic feature belongs to a different transcript
       return false;
@@ -877,6 +874,9 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
     return true;
   }
 
+  /**
+   * Answers a short description of the sequence fetcher
+   */
   @Override
   public String getDescription()
   {
@@ -915,10 +915,14 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
 
   /**
    * Answers true if the feature type is either 'NMD_transcript_variant' or
-   * 'transcript' or one of its sub-types in the Sequence Ontology. This is
-   * needed because NMD_transcript_variant behaves like 'transcript' in Ensembl
+   * 'transcript' (or one of its sub-types in the Sequence Ontology). This is
+   * because NMD_transcript_variant behaves like 'transcript' in Ensembl
    * although strictly speaking it is not (it is a sub-type of
    * sequence_variant).
+   * <p>
+   * (This test was needed when fetching transcript features as GFF. As we are
+   * now fetching as JSON, all features have type 'transcript' so the check for
+   * NMD_transcript_variant is redundant. Left in for any future case arising.)
    * 
    * @param featureType
    * @return
index 9e3fef4..4da0e3c 100644 (file)
@@ -20,6 +20,7 @@
  */
 package jalview.ext.ensembl;
 
+import jalview.analysis.AlignmentUtils;
 import jalview.bin.Cache;
 import jalview.datamodel.DBRefSource;
 import jalview.ws.seqfetcher.DbSourceProxyImpl;
@@ -64,7 +65,7 @@ abstract class EnsemblSequenceFetcher extends DbSourceProxyImpl
 
   protected static final String PARENT = "Parent";
 
-  protected static final String JSON_ID = "id";
+  protected static final String JSON_ID = AlignmentUtils.VARIANT_ID; // "id";
 
   protected static final String OBJECT_TYPE = "object_type";
 
diff --git a/src/jalview/util/JSONUtils.java b/src/jalview/util/JSONUtils.java
new file mode 100644 (file)
index 0000000..cdfc88e
--- /dev/null
@@ -0,0 +1,34 @@
+package jalview.util;
+
+import org.json.simple.JSONArray;
+
+public class JSONUtils
+{
+
+  /**
+   * Converts a JSONArray of values to a string as a comma-separated list.
+   * Answers null if the array is null or empty.
+   * 
+   * @param jsonArray
+   * @return
+   */
+  public static String arrayToList(JSONArray jsonArray)
+  {
+    if (jsonArray == null)
+    {
+      return null;
+    }
+
+    StringBuilder sb = new StringBuilder();
+    for (int i = 0; i < jsonArray.size(); i++)
+    {
+      if (i > 0)
+      {
+        sb.append(",");
+      }
+      sb.append(jsonArray.get(i).toString());
+    }
+    return sb.length() == 0 ? null : sb.toString();
+  }
+
+}
index a7a7d34..70ae6a0 100644 (file)
@@ -2040,44 +2040,48 @@ public class AlignmentUtilsTests
     String dbSnp = "dbSNP";
     String cosmic = "COSMIC";
 
+    /*
+     * NB setting "id" (as returned by Ensembl for features in JSON format);
+     * previously "ID" (as returned for GFF3 format)
+     */
     SequenceFeature sf1 = new SequenceFeature("sequence_variant", "", 1, 1,
             0f, ensembl);
     sf1.setValue("alleles", "A,G"); // AAA -> GAA -> K/E
-    sf1.setValue("ID", "var1.125A>G");
+    sf1.setValue("id", "var1.125A>G");
 
     SequenceFeature sf2 = new SequenceFeature("sequence_variant", "", 1, 1,
             0f, dbSnp);
     sf2.setValue("alleles", "A,C"); // AAA -> CAA -> K/Q
-    sf2.setValue("ID", "var2");
+    sf2.setValue("id", "var2");
     sf2.setValue("clinical_significance", "Dodgy");
 
     SequenceFeature sf3 = new SequenceFeature("sequence_variant", "", 1, 1,
             0f, dbSnp);
     sf3.setValue("alleles", "A,T"); // AAA -> TAA -> stop codon
-    sf3.setValue("ID", "var3");
+    sf3.setValue("id", "var3");
     sf3.setValue("clinical_significance", "Bad");
 
     SequenceFeature sf4 = new SequenceFeature("sequence_variant", "", 3, 3,
             0f, cosmic);
     sf4.setValue("alleles", "A,G"); // AAA -> AAG synonymous
-    sf4.setValue("ID", "var4");
+    sf4.setValue("id", "var4");
     sf4.setValue("clinical_significance", "None");
 
     SequenceFeature sf5 = new SequenceFeature("sequence_variant", "", 3, 3,
             0f, ensembl);
     sf5.setValue("alleles", "A,T"); // AAA -> AAT -> K/N
-    sf5.setValue("ID", "sequence_variant:var5"); // prefix gets stripped off
+    sf5.setValue("id", "sequence_variant:var5"); // prefix gets stripped off
     sf5.setValue("clinical_significance", "Benign");
 
     SequenceFeature sf6 = new SequenceFeature("sequence_variant", "", 6, 6,
             0f, dbSnp);
     sf6.setValue("alleles", "T,C"); // TTT -> TTC synonymous
-    sf6.setValue("ID", "var6");
+    sf6.setValue("id", "var6");
 
     SequenceFeature sf7 = new SequenceFeature("sequence_variant", "", 8, 8,
             0f, cosmic);
     sf7.setValue("alleles", "C,A,G"); // CCC -> CAC,CGC -> P/H/R
-    sf7.setValue("ID", "var7");
+    sf7.setValue("id", "var7");
     sf7.setValue("clinical_significance", "Good");
 
     List<DnaVariant> codon1Variants = new ArrayList<>();
@@ -2149,9 +2153,9 @@ public class AlignmentUtilsTests
     assertEquals(1, sf.getEnd());
     assertEquals("nonsynonymous_variant", sf.getType());
     assertEquals("p.Lys1Asn", sf.getDescription());
-    assertEquals("var5", sf.getValue("ID"));
+    assertEquals("var5", sf.getValue("id"));
     assertEquals("Benign", sf.getValue("clinical_significance"));
-    assertEquals("ID=var5;clinical_significance=Benign",
+    assertEquals("id=var5;clinical_significance=Benign",
             sf.getAttributes());
     assertEquals(1, sf.links.size());
     assertEquals(
@@ -2165,9 +2169,9 @@ public class AlignmentUtilsTests
     assertEquals(1, sf.getEnd());
     assertEquals("nonsynonymous_variant", sf.getType());
     assertEquals("p.Lys1Gln", sf.getDescription());
-    assertEquals("var2", sf.getValue("ID"));
+    assertEquals("var2", sf.getValue("id"));
     assertEquals("Dodgy", sf.getValue("clinical_significance"));
-    assertEquals("ID=var2;clinical_significance=Dodgy", sf.getAttributes());
+    assertEquals("id=var2;clinical_significance=Dodgy", sf.getAttributes());
     assertEquals(1, sf.links.size());
     assertEquals(
             "p.Lys1Gln var2|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var2",
@@ -2180,9 +2184,9 @@ public class AlignmentUtilsTests
     assertEquals(1, sf.getEnd());
     assertEquals("nonsynonymous_variant", sf.getType());
     assertEquals("p.Lys1Glu", sf.getDescription());
-    assertEquals("var1.125A>G", sf.getValue("ID"));
+    assertEquals("var1.125A>G", sf.getValue("id"));
     assertNull(sf.getValue("clinical_significance"));
-    assertEquals("ID=var1.125A>G", sf.getAttributes());
+    assertEquals("id=var1.125A>G", sf.getAttributes());
     assertEquals(1, sf.links.size());
     // link to variation is urlencoded
     assertEquals(
@@ -2196,9 +2200,9 @@ public class AlignmentUtilsTests
     assertEquals(1, sf.getEnd());
     assertEquals("stop_gained", sf.getType());
     assertEquals("Aaa/Taa", sf.getDescription());
-    assertEquals("var3", sf.getValue("ID"));
+    assertEquals("var3", sf.getValue("id"));
     assertEquals("Bad", sf.getValue("clinical_significance"));
-    assertEquals("ID=var3;clinical_significance=Bad", sf.getAttributes());
+    assertEquals("id=var3;clinical_significance=Bad", sf.getAttributes());
     assertEquals(1, sf.links.size());
     assertEquals(
             "Aaa/Taa var3|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var3",
@@ -2211,9 +2215,9 @@ public class AlignmentUtilsTests
     assertEquals(1, sf.getEnd());
     assertEquals("synonymous_variant", sf.getType());
     assertEquals("aaA/aaG", sf.getDescription());
-    assertEquals("var4", sf.getValue("ID"));
+    assertEquals("var4", sf.getValue("id"));
     assertEquals("None", sf.getValue("clinical_significance"));
-    assertEquals("ID=var4;clinical_significance=None", sf.getAttributes());
+    assertEquals("id=var4;clinical_significance=None", sf.getAttributes());
     assertEquals(1, sf.links.size());
     assertEquals(
             "aaA/aaG var4|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var4",
@@ -2226,9 +2230,9 @@ public class AlignmentUtilsTests
     assertEquals(2, sf.getEnd());
     assertEquals("synonymous_variant", sf.getType());
     assertEquals("ttT/ttC", sf.getDescription());
-    assertEquals("var6", sf.getValue("ID"));
+    assertEquals("var6", sf.getValue("id"));
     assertNull(sf.getValue("clinical_significance"));
-    assertEquals("ID=var6", sf.getAttributes());
+    assertEquals("id=var6", sf.getAttributes());
     assertEquals(1, sf.links.size());
     assertEquals(
             "ttT/ttC var6|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var6",
@@ -2242,9 +2246,9 @@ public class AlignmentUtilsTests
     assertEquals(3, sf.getEnd());
     assertEquals("nonsynonymous_variant", sf.getType());
     assertEquals("p.Pro3Arg", sf.getDescription());
-    assertEquals("var7", sf.getValue("ID"));
+    assertEquals("var7", sf.getValue("id"));
     assertEquals("Good", sf.getValue("clinical_significance"));
-    assertEquals("ID=var7;clinical_significance=Good", sf.getAttributes());
+    assertEquals("id=var7;clinical_significance=Good", sf.getAttributes());
     assertEquals(1, sf.links.size());
     assertEquals(
             "p.Pro3Arg var7|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var7",
@@ -2257,9 +2261,9 @@ public class AlignmentUtilsTests
     assertEquals(3, sf.getEnd());
     assertEquals("nonsynonymous_variant", sf.getType());
     assertEquals("p.Pro3His", sf.getDescription());
-    assertEquals("var7", sf.getValue("ID"));
+    assertEquals("var7", sf.getValue("id"));
     assertEquals("Good", sf.getValue("clinical_significance"));
-    assertEquals("ID=var7;clinical_significance=Good", sf.getAttributes());
+    assertEquals("id=var7;clinical_significance=Good", sf.getAttributes());
     assertEquals(1, sf.links.size());
     assertEquals(
             "p.Pro3His var7|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var7",
index c9d8deb..9e9d9a4 100644 (file)
@@ -79,19 +79,19 @@ public class EnsemblCdnaTest
     // exon at (start+10000) length 501
     SequenceFeature sf = new SequenceFeature("exon", "", 20000, 20500, 0f,
             null);
-    sf.setValue("Parent", "transcript:" + transcriptId);
+    sf.setValue("Parent", transcriptId);
     sf.setStrand("-");
     genomic.addSequenceFeature(sf);
 
     // exon (sub-type) at (start + exon_variant) length 101
     sf = new SequenceFeature("coding_exon", "", 10500, 10600, 0f, null);
-    sf.setValue("Parent", "transcript:" + transcriptId);
+    sf.setValue("Parent", transcriptId);
     sf.setStrand("-");
     genomic.addSequenceFeature(sf);
 
     // exon belonging to a different transcript doesn't count
     sf = new SequenceFeature("exon", "", 11500, 12600, 0f, null);
-    sf.setValue("Parent", "transcript:anotherOne");
+    sf.setValue("Parent", "anotherOne");
     genomic.addSequenceFeature(sf);
 
     // transcript feature doesn't count
@@ -134,19 +134,19 @@ public class EnsemblCdnaTest
     // exon at (start+10000) length 501
     SequenceFeature sf = new SequenceFeature("exon", "", 20000, 20500, 0f,
             null);
-    sf.setValue("Parent", "transcript:" + transcriptId);
+    sf.setValue("Parent", transcriptId);
     sf.setStrand("+");
     genomic.addSequenceFeature(sf);
 
     // exon (sub-type) at (start + exon_variant) length 101
     sf = new SequenceFeature("coding_exon", "", 10500, 10600, 0f, null);
-    sf.setValue("Parent", "transcript:" + transcriptId);
+    sf.setValue("Parent", transcriptId);
     sf.setStrand("+");
     genomic.addSequenceFeature(sf);
 
     // exon belonging to a different transcript doesn't count
     sf = new SequenceFeature("exon", "", 11500, 12600, 0f, null);
-    sf.setValue("Parent", "transcript:anotherOne");
+    sf.setValue("Parent", "anotherOne");
     genomic.addSequenceFeature(sf);
 
     // transcript feature doesn't count
@@ -226,14 +226,14 @@ public class EnsemblCdnaTest
     assertTrue(testee.retainFeature(sf, accId));
 
     // other feature with desired parent is retained
-    sf.setValue("Parent", "transcript:" + accId);
+    sf.setValue("Parent", accId);
     assertTrue(testee.retainFeature(sf, accId));
 
     // test is not case-sensitive
     assertTrue(testee.retainFeature(sf, accId.toLowerCase()));
 
     // feature with wrong parent is not retained
-    sf.setValue("Parent", "transcript:XYZ");
+    sf.setValue("Parent", "XYZ");
     assertFalse(testee.retainFeature(sf, accId));
   }
 
@@ -253,30 +253,30 @@ public class EnsemblCdnaTest
 
     // exon with wrong parent: not valid
     SequenceFeature sf2 = new SequenceFeature("exon", "", 1, 2, 0f, null);
-    sf2.setValue("Parent", "transcript:XYZ");
+    sf2.setValue("Parent", "XYZ");
     seq.addSequenceFeature(sf2);
 
     // exon with right parent is valid
     SequenceFeature sf3 = new SequenceFeature("exon", "", 1, 2, 0f, null);
-    sf3.setValue("Parent", "transcript:" + accId);
+    sf3.setValue("Parent", accId);
     seq.addSequenceFeature(sf3);
 
     // exon sub-type with right parent is valid
     SequenceFeature sf4 = new SequenceFeature("coding_exon", "", 1, 2, 0f,
             null);
-    sf4.setValue("Parent", "transcript:" + accId);
+    sf4.setValue("Parent", accId);
     seq.addSequenceFeature(sf4);
 
     // transcript not valid:
     SequenceFeature sf5 = new SequenceFeature("transcript", "", 1, 2, 0f,
             null);
-    sf5.setValue("Parent", "transcript:" + accId);
+    sf5.setValue("Parent", accId);
     seq.addSequenceFeature(sf5);
 
     // CDS not valid:
     SequenceFeature sf6 = new SequenceFeature("transcript", "", 1, 2, 0f,
             null);
-    sf6.setValue("Parent", "transcript:" + accId);
+    sf6.setValue("Parent", accId);
     seq.addSequenceFeature(sf6);
 
     List<SequenceFeature> sfs = new EnsemblCdna()
index a44ab7f..e7574eb 100644 (file)
@@ -78,19 +78,19 @@ public class EnsemblCdsTest
     // CDS at (start+10000) length 501
     SequenceFeature sf = new SequenceFeature("CDS", "", 20000, 20500, 0f,
             null);
-    sf.setValue("Parent", "transcript:" + transcriptId);
+    sf.setValue("Parent", transcriptId);
     sf.setStrand("+");
     genomic.addSequenceFeature(sf);
 
     // CDS (sub-type) at (start + 10500) length 101
     sf = new SequenceFeature("CDS_predicted", "", 10500, 10600, 0f, null);
-    sf.setValue("Parent", "transcript:" + transcriptId);
+    sf.setValue("Parent", transcriptId);
     sf.setStrand("+");
     genomic.addSequenceFeature(sf);
 
     // CDS belonging to a different transcript doesn't count
     sf = new SequenceFeature("CDS", "", 11500, 12600, 0f, null);
-    sf.setValue("Parent", "transcript:anotherOne");
+    sf.setValue("Parent", "anotherOne");
     genomic.addSequenceFeature(sf);
 
     // exon feature doesn't count
@@ -135,16 +135,16 @@ public class EnsemblCdsTest
     assertFalse(testee.retainFeature(sf, accId));
 
     // other feature with no parent is retained
-    sf = new SequenceFeature("CDS_psequence_variantredicted", "", 20000,
+    sf = new SequenceFeature("anotherType", "", 20000,
             20500, 0f, null);
     assertTrue(testee.retainFeature(sf, accId));
 
     // other feature with desired parent is retained
-    sf.setValue("Parent", "transcript:" + accId);
+    sf.setValue("Parent", accId);
     assertTrue(testee.retainFeature(sf, accId));
 
     // feature with wrong parent is not retained
-    sf.setValue("Parent", "transcript:XYZ");
+    sf.setValue("Parent", "XYZ");
     assertFalse(testee.retainFeature(sf, accId));
   }
 
@@ -164,29 +164,29 @@ public class EnsemblCdsTest
 
     // cds with wrong parent not valid
     SequenceFeature sf2 = new SequenceFeature("CDS", "", 1, 2, 0f, null);
-    sf2.setValue("Parent", "transcript:XYZ");
+    sf2.setValue("Parent", "XYZ");
     seq.addSequenceFeature(sf2);
 
     // cds with right parent is valid
     SequenceFeature sf3 = new SequenceFeature("CDS", "", 1, 2, 0f, null);
-    sf3.setValue("Parent", "transcript:" + accId);
+    sf3.setValue("Parent", accId);
     seq.addSequenceFeature(sf3);
 
     // cds sub-type with right parent is valid
     SequenceFeature sf4 = new SequenceFeature("CDS_predicted", "", 1, 2, 0f,
             null);
-    sf4.setValue("Parent", "transcript:" + accId);
+    sf4.setValue("Parent", accId);
     seq.addSequenceFeature(sf4);
 
     // transcript not valid:
     SequenceFeature sf5 = new SequenceFeature("transcript", "", 1, 2, 0f,
             null);
-    sf5.setValue("Parent", "transcript:" + accId);
+    sf5.setValue("Parent", accId);
     seq.addSequenceFeature(sf5);
 
     // exon not valid:
     SequenceFeature sf6 = new SequenceFeature("exon", "", 1, 2, 0f, null);
-    sf6.setValue("Parent", "transcript:" + accId);
+    sf6.setValue("Parent", accId);
     seq.addSequenceFeature(sf6);
 
     List<SequenceFeature> sfs = new EnsemblCds().getIdentifyingFeatures(seq,
index 446b4f7..8b1e840 100644 (file)
@@ -81,7 +81,7 @@ public class EnsemblGeneTest
     // gene at (start + 10500) length 101
     SequenceFeature sf = new SequenceFeature("gene", "", 10500, 10600, 0f,
             null);
-    sf.setValue("ID", "gene:" + geneId);
+    sf.setValue("id", geneId);
     sf.setStrand("+");
     genomic.addSequenceFeature(sf);
 
@@ -113,7 +113,7 @@ public class EnsemblGeneTest
     // gene at (start + 10500) length 101
     SequenceFeature sf = new SequenceFeature("gene", "", 10500, 10600, 0f,
             null);
-    sf.setValue("ID", "gene:" + geneId);
+    sf.setValue("id", geneId);
     sf.setStrand("+");
     genomic.addSequenceFeature(sf);
 
@@ -146,30 +146,30 @@ public class EnsemblGeneTest
     // transcript feature
     SequenceFeature sf1 = new SequenceFeature("transcript", "", 20000,
             20500, 0f, null);
-    sf1.setValue("Parent", "gene:" + geneId);
-    sf1.setValue("transcript_id", "transcript1");
+    sf1.setValue("Parent", geneId);
+    sf1.setValue("id", "transcript1");
     genomic.addSequenceFeature(sf1);
 
     // transcript sub-type feature
     SequenceFeature sf2 = new SequenceFeature("snRNA", "", 21000, 21500,
             0f, null);
-    sf2.setValue("Parent", "gene:" + geneId);
-    sf2.setValue("transcript_id", "transcript2");
+    sf2.setValue("Parent", geneId);
+    sf2.setValue("id", "transcript2");
     genomic.addSequenceFeature(sf2);
 
     // NMD_transcript_variant treated like transcript in Ensembl
     SequenceFeature sf3 = new SequenceFeature("NMD_transcript_variant", "",
             22000, 22500, 0f, null);
     // id matching should not be case-sensitive
-    sf3.setValue("Parent", "gene:" + geneId.toLowerCase());
-    sf3.setValue("transcript_id", "transcript3");
+    sf3.setValue("Parent", geneId.toLowerCase());
+    sf3.setValue("id", "transcript3");
     genomic.addSequenceFeature(sf3);
 
     // transcript for a different gene - ignored
     SequenceFeature sf4 = new SequenceFeature("snRNA", "", 23000, 23500,
             0f, null);
-    sf4.setValue("Parent", "gene:XYZ");
-    sf4.setValue("transcript_id", "transcript4");
+    sf4.setValue("Parent", "XYZ");
+    sf4.setValue("id", "transcript4");
     genomic.addSequenceFeature(sf4);
 
     EnsemblGene testee = new EnsemblGene();
@@ -196,24 +196,24 @@ public class EnsemblGeneTest
     EnsemblGene testee = new EnsemblGene();
     SequenceFeature sf = new SequenceFeature("gene", "", 20000, 20500, 0f,
             null);
-    sf.setValue("ID", "gene:" + geneId);
+    sf.setValue("id", geneId);
     assertFalse(testee.retainFeature(sf, geneId));
 
     sf = new SequenceFeature("transcript", "", 20000, 20500, 0f, null);
-    sf.setValue("Parent", "gene:" + geneId);
+    sf.setValue("Parent", geneId);
     assertTrue(testee.retainFeature(sf, geneId));
 
     sf = new SequenceFeature("mature_transcript", "", 20000, 20500, 0f,
             null);
-    sf.setValue("Parent", "gene:" + geneId);
+    sf.setValue("Parent", geneId);
     assertTrue(testee.retainFeature(sf, geneId));
 
     sf = new SequenceFeature("NMD_transcript_variant", "", 20000, 20500,
             0f, null);
-    sf.setValue("Parent", "gene:" + geneId);
+    sf.setValue("Parent", geneId);
     assertTrue(testee.retainFeature(sf, geneId));
 
-    sf.setValue("Parent", "gene:XYZ");
+    sf.setValue("Parent", "ßXYZ");
     assertFalse(testee.retainFeature(sf, geneId));
 
     sf = new SequenceFeature("anything", "", 20000, 20500, 0f, null);
@@ -235,28 +235,28 @@ public class EnsemblGeneTest
     seq.addSequenceFeature(sf1);
 
     // gene with wrong ID not valid
-    SequenceFeature sf2 = new SequenceFeature("gene", "", 1, 2, 0f, null);
-    sf2.setValue("ID", "gene:XYZ");
+    SequenceFeature sf2 = new SequenceFeature("gene", "a", 1, 2, 0f, null);
+    sf2.setValue("id", "XYZ");
     seq.addSequenceFeature(sf2);
 
     // gene with right ID is valid
-    SequenceFeature sf3 = new SequenceFeature("gene", "", 1, 2, 0f, null);
-    sf3.setValue("ID", "gene:" + accId);
+    SequenceFeature sf3 = new SequenceFeature("gene", "b", 1, 2, 0f, null);
+    sf3.setValue("id", accId);
     seq.addSequenceFeature(sf3);
 
     // gene sub-type with right ID is valid
     SequenceFeature sf4 = new SequenceFeature("snRNA_gene", "", 1, 2, 0f, null);
-    sf4.setValue("ID", "gene:" + accId);
+    sf4.setValue("id", accId);
     seq.addSequenceFeature(sf4);
 
     // transcript not valid:
     SequenceFeature sf5 = new SequenceFeature("transcript", "", 1, 2, 0f, null);
-    sf5.setValue("ID", "gene:" + accId);
+    sf5.setValue("id", accId);
     seq.addSequenceFeature(sf5);
 
     // exon not valid:
     SequenceFeature sf6 = new SequenceFeature("exon", "", 1, 2, 0f, null);
-    sf6.setValue("ID", "gene:" + accId);
+    sf6.setValue("id", accId);
     seq.addSequenceFeature(sf6);
     
     List<SequenceFeature> sfs = new EnsemblGene()
index 72ee492..11140f9 100644 (file)
@@ -77,13 +77,13 @@ public class EnsemblGenomeTest
     // transcript at (start+10000) length 501
     SequenceFeature sf = new SequenceFeature("transcript", "", 20000,
             20500, 0f, null);
-    sf.setValue("ID", "transcript:" + transcriptId);
+    sf.setValue("id", transcriptId);
     sf.setStrand("+");
     genomic.addSequenceFeature(sf);
 
     // transcript (sub-type) at (start + 10500) length 101
     sf = new SequenceFeature("ncRNA", "", 10500, 10600, 0f, null);
-    sf.setValue("ID", "transcript:" + transcriptId);
+    sf.setValue("id", transcriptId);
     sf.setStrand("+");
     genomic.addSequenceFeature(sf);
 
@@ -91,13 +91,13 @@ public class EnsemblGenomeTest
     // although strictly it is a sequence_variant in SO
     sf = new SequenceFeature("NMD_transcript_variant", "", 11000, 12000,
             0f, null);
-    sf.setValue("ID", "transcript:" + transcriptId);
+    sf.setValue("id", transcriptId);
     sf.setStrand("+");
     genomic.addSequenceFeature(sf);
 
     // transcript with a different ID doesn't count
     sf = new SequenceFeature("transcript", "", 11500, 12600, 0f, null);
-    sf.setValue("ID", "transcript:anotherOne");
+    sf.setValue("id", "anotherOne");
     genomic.addSequenceFeature(sf);
 
     // parent of transcript feature doesn't count
@@ -150,11 +150,11 @@ public class EnsemblGenomeTest
     assertTrue(testee.retainFeature(sf, accId));
 
     // other feature with correct parent is kept
-    sf.setValue("Parent", "transcript:" + accId);
+    sf.setValue("Parent", accId);
     assertTrue(testee.retainFeature(sf, accId));
 
     // other feature with wrong parent is not kept
-    sf.setValue("Parent", "transcript:XYZ");
+    sf.setValue("Parent", "XYZ");
     assertFalse(testee.retainFeature(sf, accId));
   }
 
@@ -174,36 +174,37 @@ public class EnsemblGenomeTest
     seq.addSequenceFeature(sf1);
 
     // transcript with wrong ID not valid
-    SequenceFeature sf2 = new SequenceFeature("transcript", "", 1, 2, 0f,
+    // NB change desc to avoid rejection of duplicate feature!
+    SequenceFeature sf2 = new SequenceFeature("transcript", "a", 1, 2, 0f,
             null);
-    sf2.setValue("ID", "transcript");
+    sf2.setValue("id", "transcript");
     seq.addSequenceFeature(sf2);
 
     // transcript with right ID is valid
-    SequenceFeature sf3 = new SequenceFeature("transcript", "", 1, 2, 0f,
+    SequenceFeature sf3 = new SequenceFeature("transcript", "b", 1, 2, 0f,
             null);
-    sf3.setValue("ID", "transcript:" + accId);
+    sf3.setValue("id", accId);
     seq.addSequenceFeature(sf3);
 
     // transcript sub-type with right ID is valid
     SequenceFeature sf4 = new SequenceFeature("ncRNA", "", 1, 2, 0f, null);
-    sf4.setValue("ID", "transcript:" + accId);
+    sf4.setValue("id", accId);
     seq.addSequenceFeature(sf4);
 
     // Ensembl treats NMD_transcript_variant as if a transcript
     SequenceFeature sf5 = new SequenceFeature("NMD_transcript_variant", "",
             1, 2, 0f, null);
-    sf5.setValue("ID", "transcript:" + accId);
+    sf5.setValue("id", accId);
     seq.addSequenceFeature(sf5);
 
     // gene not valid:
     SequenceFeature sf6 = new SequenceFeature("gene", "", 1, 2, 0f, null);
-    sf6.setValue("ID", "transcript:" + accId);
+    sf6.setValue("id", accId);
     seq.addSequenceFeature(sf6);
 
     // exon not valid:
     SequenceFeature sf7 = new SequenceFeature("exon", "", 1, 2, 0f, null);
-    sf7.setValue("ID", "transcript:" + accId);
+    sf7.setValue("id", accId);
     seq.addSequenceFeature(sf7);
 
     List<SequenceFeature> sfs = new EnsemblGenome()
diff --git a/test/jalview/util/JSONUtilsTest.java b/test/jalview/util/JSONUtilsTest.java
new file mode 100644 (file)
index 0000000..45f1c48
--- /dev/null
@@ -0,0 +1,26 @@
+package jalview.util;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertNull;
+
+import org.json.JSONException;
+import org.json.simple.JSONArray;
+import org.testng.annotations.Test;
+
+public class JSONUtilsTest
+{
+  @Test(groups = "Functional")
+  public void testArrayToList() throws JSONException
+  {
+    assertNull(JSONUtils.arrayToList(null));
+
+    JSONArray ja = new JSONArray();
+    assertNull(JSONUtils.arrayToList(null));
+
+    ja.add("hello");
+    assertEquals(JSONUtils.arrayToList(ja), "hello");
+
+    ja.add("world");
+    assertEquals(JSONUtils.arrayToList(ja), "hello,world");
+  }
+}