*/
public class AlignmentUtils
{
-
private static final int CODON_LENGTH = 3;
private static final String SEQUENCE_VARIANT = "sequence_variant:";
- private static final String ID = "ID";
+ /*
+ * the 'id' attribute is provided for variant features fetched from
+ * Ensembl using its REST service with JSON format
+ */
+ public static final String VARIANT_ID = "id";
/**
* A data model to hold the 'normal' base value at a position, and an optional
peptidePos, var.getSource());
StringBuilder attributes = new StringBuilder(32);
- String id = (String) var.variant.getValue(ID);
+ String id = (String) var.variant.getValue(VARIANT_ID);
if (id != null)
{
if (id.startsWith(SEQUENCE_VARIANT))
{
id = id.substring(SEQUENCE_VARIANT.length());
}
- sf.setValue(ID, id);
- attributes.append(ID).append("=").append(id);
+ sf.setValue(VARIANT_ID, id);
+ attributes.append(VARIANT_ID).append("=").append(id);
// TODO handle other species variants JAL-2064
StringBuilder link = new StringBuilder(32);
try
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
-import jalview.io.gff.SequenceOntologyFactory;
import jalview.io.gff.SequenceOntologyI;
import java.util.ArrayList;
for (SequenceFeature sf : sfs)
{
String parentFeature = (String) sf.getValue(PARENT);
- if (("transcript:" + accId).equals(parentFeature))
+ if (accId.equals(parentFeature))
{
result.add(sf);
}
for (SequenceFeature sf : sfs)
{
String parentFeature = (String) sf.getValue(PARENT);
- if (("transcript:" + accId).equals(parentFeature))
+ if ( accId.equals(parentFeature))
{
result.add(sf);
}
import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentI;
-import jalview.io.DataSourceType;
-import jalview.io.FeaturesFile;
-import jalview.io.FileParse;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.io.gff.SequenceOntologyI;
+import jalview.util.JSONUtils;
import java.io.BufferedReader;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
+import java.util.Iterator;
import java.util.List;
+import org.json.simple.JSONArray;
+import org.json.simple.JSONObject;
+import org.json.simple.parser.JSONParser;
+import org.json.simple.parser.ParseException;
+
/**
* A client for fetching and processing Ensembl feature data in GFF format by
* calling the overlap REST service
{
return null;
}
- FeaturesFile fr = new FeaturesFile(
- new FileParse(fp, null, DataSourceType.URL));
- return new Alignment(fr.getSeqsAsArray());
+
+ SequenceI seq = parseFeaturesJson(fp);
+ return new Alignment(new SequenceI[] { seq });
+ }
+
+ /**
+ * Parses the JSON response into Jalview sequence features and attaches them
+ * to a dummy sequence
+ *
+ * @param br
+ * @return
+ */
+ private SequenceI parseFeaturesJson(BufferedReader br)
+ {
+ SequenceI seq = new Sequence("Dummy", "");
+
+ JSONParser jp = new JSONParser();
+ try
+ {
+ JSONArray responses = (JSONArray) jp.parse(br);
+ Iterator rvals = responses.iterator();
+ while (rvals.hasNext())
+ {
+ try
+ {
+ JSONObject obj = (JSONObject) rvals.next();
+ String type = obj.get("feature_type").toString();
+ int start = Integer.parseInt(obj.get("start").toString());
+ int end = Integer.parseInt(obj.get("end").toString());
+ String source = obj.get("source").toString();
+ String strand = obj.get("strand").toString();
+ String alleles = JSONUtils
+ .arrayToList((JSONArray) obj.get("alleles"));
+ String clinSig = JSONUtils
+ .arrayToList(
+ (JSONArray) obj.get("clinical_significance"));
+
+ /*
+ * convert 'variation' to 'sequence_variant', and 'cds' to 'CDS'
+ * so as to have a valid SO term for the feature type
+ * ('gene', 'exon', 'transcript' don't need any conversion)
+ */
+ if ("variation".equals(type))
+ {
+ type = SequenceOntologyI.SEQUENCE_VARIANT;
+ }
+ else if (SequenceOntologyI.CDS.equalsIgnoreCase((type)))
+ {
+ type = SequenceOntologyI.CDS;
+ }
+
+ String desc = getFirstNotNull(obj, "alleles", "external_name",
+ JSON_ID);
+ SequenceFeature sf = new SequenceFeature(type, desc, start, end,
+ source);
+ sf.setStrand("1".equals(strand) ? "+" : "-");
+ setFeatureAttribute(sf, obj, "id");
+ setFeatureAttribute(sf, obj, "Parent");
+ setFeatureAttribute(sf, obj, "consequence_type");
+ sf.setValue("alleles", alleles);
+ sf.setValue("clinical_significance", clinSig);
+
+ seq.addSequenceFeature(sf);
+ } catch (Throwable t)
+ {
+ // ignore - keep trying other features
+ }
+ }
+ } catch (ParseException | IOException e)
+ {
+ // ignore
+ }
+
+ return seq;
+ }
+
+ /**
+ * Returns the first non-null attribute found (if any) as a string, formatted
+ * suitably for display as feature description or tooltip. Answers null if
+ * none of the attribute keys is present.
+ *
+ * @param obj
+ * @param keys
+ * @return
+ */
+ protected String getFirstNotNull(JSONObject obj, String... keys)
+ {
+ String desc = null;
+
+ for (String key : keys)
+ {
+ Object val = obj.get(key);
+ if (val != null)
+ {
+ String s = val instanceof JSONArray
+ ? JSONUtils.arrayToList((JSONArray) val)
+ : val.toString();
+ if (!s.isEmpty())
+ {
+ return s;
+ }
+ }
+ }
+ return desc;
+ }
+
+ /**
+ * A helper method that reads the 'key' entry in the JSON object, and if not
+ * null, sets its string value as an attribute on the sequence feature
+ *
+ * @param sf
+ * @param obj
+ * @param key
+ */
+ protected void setFeatureAttribute(SequenceFeature sf, JSONObject obj,
+ String key)
+ {
+ Object object = obj.get(key);
+ if (object != null)
+ {
+ sf.setValue(key, object.toString());
+ }
}
/**
urlstring.append(getDomain()).append("/overlap/id/").append(ids.get(0));
// @see https://github.com/Ensembl/ensembl-rest/wiki/Output-formats
- urlstring.append("?content-type=text/x-gff3");
+ urlstring.append("?content-type=" + getResponseMimeType());
/*
* specify object_type=gene in case is shared by transcript and/or protein;
@Override
protected String getRequestMimeType()
{
- return "text/x-gff3";
+ return "application/json";
}
/**
- * Returns the MIME type for GFF3
+ * Returns the MIME type wanted for the response
*/
@Override
protected String getResponseMimeType()
{
- return "text/x-gff3";
+ return "application/json";
}
/**
*/
public class EnsemblGene extends EnsemblSeqProxy
{
- private static final String GENE_PREFIX = "gene:";
-
/*
* accepts anything as we will attempt lookup of gene or
* transcript id or gene name
* look for exon features of the transcript, failing that for CDS
* (for example ENSG00000124610 has 1 CDS but no exon features)
*/
- String parentId = "transcript:" + accId;
+ String parentId = accId;
List<SequenceFeature> splices = findFeatures(gene,
SequenceOntologyI.EXON, parentId);
if (splices.isEmpty())
* Ensembl has gene name as transcript Name
* EnsemblGenomes doesn't, but has a url-encoded description field
*/
- String description = (String) transcriptFeature.getValue(NAME);
+ String description = transcriptFeature.getDescription();
if (description == null)
{
description = (String) transcriptFeature.getValue(DESCRIPTION);
*/
protected String getTranscriptId(SequenceFeature feature)
{
- return (String) feature.getValue("transcript_id");
+ return (String) feature.getValue(JSON_ID);
}
/**
{
List<SequenceFeature> transcriptFeatures = new ArrayList<>();
- String parentIdentifier = GENE_PREFIX + accId;
+ String parentIdentifier = accId;
List<SequenceFeature> sfs = geneSequence.getFeatures()
.getFeaturesByOntology(SequenceOntologyI.TRANSCRIPT);
.getFeaturesByOntology(SequenceOntologyI.GENE);
for (SequenceFeature sf : sfs)
{
- // NB features as gff use 'ID'; rest services return as 'id'
- String id = (String) sf.getValue("ID");
- if ((GENE_PREFIX + accId).equalsIgnoreCase(id))
+ String id = (String) sf.getValue(JSON_ID);
+ if (accId.equalsIgnoreCase(id))
{
result.add(sf);
}
if (isTranscript(type))
{
String parent = (String) sf.getValue(PARENT);
- if (!(GENE_PREFIX + accessionId).equalsIgnoreCase(parent))
+ if (!accessionId.equalsIgnoreCase(parent))
{
return false;
}
SequenceOntologyI.NMD_TRANSCRIPT_VARIANT);
for (SequenceFeature sf : sfs)
{
- // NB features as gff use 'ID'; rest services return as 'id'
- String id = (String) sf.getValue("ID");
- if (("transcript:" + accId).equals(id))
+ String id = (String) sf.getValue(JSON_ID);
+ if (accId.equals(id))
{
result.add(sf);
}
*/
public abstract class EnsemblSeqProxy extends EnsemblRestClient
{
- protected static final String NAME = "Name";
-
protected static final String DESCRIPTION = "description";
/*
protected boolean featureMayBelong(SequenceFeature sf, String identifier)
{
String parent = (String) sf.getValue(PARENT);
- // using contains to allow for prefix "gene:", "transcript:" etc
if (parent != null
- && !parent.toUpperCase().contains(identifier.toUpperCase()))
+ && !parent.equalsIgnoreCase(identifier))
{
// this genomic feature belongs to a different transcript
return false;
return true;
}
+ /**
+ * Answers a short description of the sequence fetcher
+ */
@Override
public String getDescription()
{
/**
* Answers true if the feature type is either 'NMD_transcript_variant' or
- * 'transcript' or one of its sub-types in the Sequence Ontology. This is
- * needed because NMD_transcript_variant behaves like 'transcript' in Ensembl
+ * 'transcript' (or one of its sub-types in the Sequence Ontology). This is
+ * because NMD_transcript_variant behaves like 'transcript' in Ensembl
* although strictly speaking it is not (it is a sub-type of
* sequence_variant).
+ * <p>
+ * (This test was needed when fetching transcript features as GFF. As we are
+ * now fetching as JSON, all features have type 'transcript' so the check for
+ * NMD_transcript_variant is redundant. Left in for any future case arising.)
*
* @param featureType
* @return
*/
package jalview.ext.ensembl;
+import jalview.analysis.AlignmentUtils;
import jalview.bin.Cache;
import jalview.datamodel.DBRefSource;
import jalview.ws.seqfetcher.DbSourceProxyImpl;
protected static final String PARENT = "Parent";
- protected static final String JSON_ID = "id";
+ protected static final String JSON_ID = AlignmentUtils.VARIANT_ID; // "id";
protected static final String OBJECT_TYPE = "object_type";
--- /dev/null
+package jalview.util;
+
+import org.json.simple.JSONArray;
+
+public class JSONUtils
+{
+
+ /**
+ * Converts a JSONArray of values to a string as a comma-separated list.
+ * Answers null if the array is null or empty.
+ *
+ * @param jsonArray
+ * @return
+ */
+ public static String arrayToList(JSONArray jsonArray)
+ {
+ if (jsonArray == null)
+ {
+ return null;
+ }
+
+ StringBuilder sb = new StringBuilder();
+ for (int i = 0; i < jsonArray.size(); i++)
+ {
+ if (i > 0)
+ {
+ sb.append(",");
+ }
+ sb.append(jsonArray.get(i).toString());
+ }
+ return sb.length() == 0 ? null : sb.toString();
+ }
+
+}
String dbSnp = "dbSNP";
String cosmic = "COSMIC";
+ /*
+ * NB setting "id" (as returned by Ensembl for features in JSON format);
+ * previously "ID" (as returned for GFF3 format)
+ */
SequenceFeature sf1 = new SequenceFeature("sequence_variant", "", 1, 1,
0f, ensembl);
sf1.setValue("alleles", "A,G"); // AAA -> GAA -> K/E
- sf1.setValue("ID", "var1.125A>G");
+ sf1.setValue("id", "var1.125A>G");
SequenceFeature sf2 = new SequenceFeature("sequence_variant", "", 1, 1,
0f, dbSnp);
sf2.setValue("alleles", "A,C"); // AAA -> CAA -> K/Q
- sf2.setValue("ID", "var2");
+ sf2.setValue("id", "var2");
sf2.setValue("clinical_significance", "Dodgy");
SequenceFeature sf3 = new SequenceFeature("sequence_variant", "", 1, 1,
0f, dbSnp);
sf3.setValue("alleles", "A,T"); // AAA -> TAA -> stop codon
- sf3.setValue("ID", "var3");
+ sf3.setValue("id", "var3");
sf3.setValue("clinical_significance", "Bad");
SequenceFeature sf4 = new SequenceFeature("sequence_variant", "", 3, 3,
0f, cosmic);
sf4.setValue("alleles", "A,G"); // AAA -> AAG synonymous
- sf4.setValue("ID", "var4");
+ sf4.setValue("id", "var4");
sf4.setValue("clinical_significance", "None");
SequenceFeature sf5 = new SequenceFeature("sequence_variant", "", 3, 3,
0f, ensembl);
sf5.setValue("alleles", "A,T"); // AAA -> AAT -> K/N
- sf5.setValue("ID", "sequence_variant:var5"); // prefix gets stripped off
+ sf5.setValue("id", "sequence_variant:var5"); // prefix gets stripped off
sf5.setValue("clinical_significance", "Benign");
SequenceFeature sf6 = new SequenceFeature("sequence_variant", "", 6, 6,
0f, dbSnp);
sf6.setValue("alleles", "T,C"); // TTT -> TTC synonymous
- sf6.setValue("ID", "var6");
+ sf6.setValue("id", "var6");
SequenceFeature sf7 = new SequenceFeature("sequence_variant", "", 8, 8,
0f, cosmic);
sf7.setValue("alleles", "C,A,G"); // CCC -> CAC,CGC -> P/H/R
- sf7.setValue("ID", "var7");
+ sf7.setValue("id", "var7");
sf7.setValue("clinical_significance", "Good");
List<DnaVariant> codon1Variants = new ArrayList<>();
assertEquals(1, sf.getEnd());
assertEquals("nonsynonymous_variant", sf.getType());
assertEquals("p.Lys1Asn", sf.getDescription());
- assertEquals("var5", sf.getValue("ID"));
+ assertEquals("var5", sf.getValue("id"));
assertEquals("Benign", sf.getValue("clinical_significance"));
- assertEquals("ID=var5;clinical_significance=Benign",
+ assertEquals("id=var5;clinical_significance=Benign",
sf.getAttributes());
assertEquals(1, sf.links.size());
assertEquals(
assertEquals(1, sf.getEnd());
assertEquals("nonsynonymous_variant", sf.getType());
assertEquals("p.Lys1Gln", sf.getDescription());
- assertEquals("var2", sf.getValue("ID"));
+ assertEquals("var2", sf.getValue("id"));
assertEquals("Dodgy", sf.getValue("clinical_significance"));
- assertEquals("ID=var2;clinical_significance=Dodgy", sf.getAttributes());
+ assertEquals("id=var2;clinical_significance=Dodgy", sf.getAttributes());
assertEquals(1, sf.links.size());
assertEquals(
"p.Lys1Gln var2|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var2",
assertEquals(1, sf.getEnd());
assertEquals("nonsynonymous_variant", sf.getType());
assertEquals("p.Lys1Glu", sf.getDescription());
- assertEquals("var1.125A>G", sf.getValue("ID"));
+ assertEquals("var1.125A>G", sf.getValue("id"));
assertNull(sf.getValue("clinical_significance"));
- assertEquals("ID=var1.125A>G", sf.getAttributes());
+ assertEquals("id=var1.125A>G", sf.getAttributes());
assertEquals(1, sf.links.size());
// link to variation is urlencoded
assertEquals(
assertEquals(1, sf.getEnd());
assertEquals("stop_gained", sf.getType());
assertEquals("Aaa/Taa", sf.getDescription());
- assertEquals("var3", sf.getValue("ID"));
+ assertEquals("var3", sf.getValue("id"));
assertEquals("Bad", sf.getValue("clinical_significance"));
- assertEquals("ID=var3;clinical_significance=Bad", sf.getAttributes());
+ assertEquals("id=var3;clinical_significance=Bad", sf.getAttributes());
assertEquals(1, sf.links.size());
assertEquals(
"Aaa/Taa var3|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var3",
assertEquals(1, sf.getEnd());
assertEquals("synonymous_variant", sf.getType());
assertEquals("aaA/aaG", sf.getDescription());
- assertEquals("var4", sf.getValue("ID"));
+ assertEquals("var4", sf.getValue("id"));
assertEquals("None", sf.getValue("clinical_significance"));
- assertEquals("ID=var4;clinical_significance=None", sf.getAttributes());
+ assertEquals("id=var4;clinical_significance=None", sf.getAttributes());
assertEquals(1, sf.links.size());
assertEquals(
"aaA/aaG var4|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var4",
assertEquals(2, sf.getEnd());
assertEquals("synonymous_variant", sf.getType());
assertEquals("ttT/ttC", sf.getDescription());
- assertEquals("var6", sf.getValue("ID"));
+ assertEquals("var6", sf.getValue("id"));
assertNull(sf.getValue("clinical_significance"));
- assertEquals("ID=var6", sf.getAttributes());
+ assertEquals("id=var6", sf.getAttributes());
assertEquals(1, sf.links.size());
assertEquals(
"ttT/ttC var6|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var6",
assertEquals(3, sf.getEnd());
assertEquals("nonsynonymous_variant", sf.getType());
assertEquals("p.Pro3Arg", sf.getDescription());
- assertEquals("var7", sf.getValue("ID"));
+ assertEquals("var7", sf.getValue("id"));
assertEquals("Good", sf.getValue("clinical_significance"));
- assertEquals("ID=var7;clinical_significance=Good", sf.getAttributes());
+ assertEquals("id=var7;clinical_significance=Good", sf.getAttributes());
assertEquals(1, sf.links.size());
assertEquals(
"p.Pro3Arg var7|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var7",
assertEquals(3, sf.getEnd());
assertEquals("nonsynonymous_variant", sf.getType());
assertEquals("p.Pro3His", sf.getDescription());
- assertEquals("var7", sf.getValue("ID"));
+ assertEquals("var7", sf.getValue("id"));
assertEquals("Good", sf.getValue("clinical_significance"));
- assertEquals("ID=var7;clinical_significance=Good", sf.getAttributes());
+ assertEquals("id=var7;clinical_significance=Good", sf.getAttributes());
assertEquals(1, sf.links.size());
assertEquals(
"p.Pro3His var7|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var7",
// exon at (start+10000) length 501
SequenceFeature sf = new SequenceFeature("exon", "", 20000, 20500, 0f,
null);
- sf.setValue("Parent", "transcript:" + transcriptId);
+ sf.setValue("Parent", transcriptId);
sf.setStrand("-");
genomic.addSequenceFeature(sf);
// exon (sub-type) at (start + exon_variant) length 101
sf = new SequenceFeature("coding_exon", "", 10500, 10600, 0f, null);
- sf.setValue("Parent", "transcript:" + transcriptId);
+ sf.setValue("Parent", transcriptId);
sf.setStrand("-");
genomic.addSequenceFeature(sf);
// exon belonging to a different transcript doesn't count
sf = new SequenceFeature("exon", "", 11500, 12600, 0f, null);
- sf.setValue("Parent", "transcript:anotherOne");
+ sf.setValue("Parent", "anotherOne");
genomic.addSequenceFeature(sf);
// transcript feature doesn't count
// exon at (start+10000) length 501
SequenceFeature sf = new SequenceFeature("exon", "", 20000, 20500, 0f,
null);
- sf.setValue("Parent", "transcript:" + transcriptId);
+ sf.setValue("Parent", transcriptId);
sf.setStrand("+");
genomic.addSequenceFeature(sf);
// exon (sub-type) at (start + exon_variant) length 101
sf = new SequenceFeature("coding_exon", "", 10500, 10600, 0f, null);
- sf.setValue("Parent", "transcript:" + transcriptId);
+ sf.setValue("Parent", transcriptId);
sf.setStrand("+");
genomic.addSequenceFeature(sf);
// exon belonging to a different transcript doesn't count
sf = new SequenceFeature("exon", "", 11500, 12600, 0f, null);
- sf.setValue("Parent", "transcript:anotherOne");
+ sf.setValue("Parent", "anotherOne");
genomic.addSequenceFeature(sf);
// transcript feature doesn't count
assertTrue(testee.retainFeature(sf, accId));
// other feature with desired parent is retained
- sf.setValue("Parent", "transcript:" + accId);
+ sf.setValue("Parent", accId);
assertTrue(testee.retainFeature(sf, accId));
// test is not case-sensitive
assertTrue(testee.retainFeature(sf, accId.toLowerCase()));
// feature with wrong parent is not retained
- sf.setValue("Parent", "transcript:XYZ");
+ sf.setValue("Parent", "XYZ");
assertFalse(testee.retainFeature(sf, accId));
}
// exon with wrong parent: not valid
SequenceFeature sf2 = new SequenceFeature("exon", "", 1, 2, 0f, null);
- sf2.setValue("Parent", "transcript:XYZ");
+ sf2.setValue("Parent", "XYZ");
seq.addSequenceFeature(sf2);
// exon with right parent is valid
SequenceFeature sf3 = new SequenceFeature("exon", "", 1, 2, 0f, null);
- sf3.setValue("Parent", "transcript:" + accId);
+ sf3.setValue("Parent", accId);
seq.addSequenceFeature(sf3);
// exon sub-type with right parent is valid
SequenceFeature sf4 = new SequenceFeature("coding_exon", "", 1, 2, 0f,
null);
- sf4.setValue("Parent", "transcript:" + accId);
+ sf4.setValue("Parent", accId);
seq.addSequenceFeature(sf4);
// transcript not valid:
SequenceFeature sf5 = new SequenceFeature("transcript", "", 1, 2, 0f,
null);
- sf5.setValue("Parent", "transcript:" + accId);
+ sf5.setValue("Parent", accId);
seq.addSequenceFeature(sf5);
// CDS not valid:
SequenceFeature sf6 = new SequenceFeature("transcript", "", 1, 2, 0f,
null);
- sf6.setValue("Parent", "transcript:" + accId);
+ sf6.setValue("Parent", accId);
seq.addSequenceFeature(sf6);
List<SequenceFeature> sfs = new EnsemblCdna()
// CDS at (start+10000) length 501
SequenceFeature sf = new SequenceFeature("CDS", "", 20000, 20500, 0f,
null);
- sf.setValue("Parent", "transcript:" + transcriptId);
+ sf.setValue("Parent", transcriptId);
sf.setStrand("+");
genomic.addSequenceFeature(sf);
// CDS (sub-type) at (start + 10500) length 101
sf = new SequenceFeature("CDS_predicted", "", 10500, 10600, 0f, null);
- sf.setValue("Parent", "transcript:" + transcriptId);
+ sf.setValue("Parent", transcriptId);
sf.setStrand("+");
genomic.addSequenceFeature(sf);
// CDS belonging to a different transcript doesn't count
sf = new SequenceFeature("CDS", "", 11500, 12600, 0f, null);
- sf.setValue("Parent", "transcript:anotherOne");
+ sf.setValue("Parent", "anotherOne");
genomic.addSequenceFeature(sf);
// exon feature doesn't count
assertFalse(testee.retainFeature(sf, accId));
// other feature with no parent is retained
- sf = new SequenceFeature("CDS_psequence_variantredicted", "", 20000,
+ sf = new SequenceFeature("anotherType", "", 20000,
20500, 0f, null);
assertTrue(testee.retainFeature(sf, accId));
// other feature with desired parent is retained
- sf.setValue("Parent", "transcript:" + accId);
+ sf.setValue("Parent", accId);
assertTrue(testee.retainFeature(sf, accId));
// feature with wrong parent is not retained
- sf.setValue("Parent", "transcript:XYZ");
+ sf.setValue("Parent", "XYZ");
assertFalse(testee.retainFeature(sf, accId));
}
// cds with wrong parent not valid
SequenceFeature sf2 = new SequenceFeature("CDS", "", 1, 2, 0f, null);
- sf2.setValue("Parent", "transcript:XYZ");
+ sf2.setValue("Parent", "XYZ");
seq.addSequenceFeature(sf2);
// cds with right parent is valid
SequenceFeature sf3 = new SequenceFeature("CDS", "", 1, 2, 0f, null);
- sf3.setValue("Parent", "transcript:" + accId);
+ sf3.setValue("Parent", accId);
seq.addSequenceFeature(sf3);
// cds sub-type with right parent is valid
SequenceFeature sf4 = new SequenceFeature("CDS_predicted", "", 1, 2, 0f,
null);
- sf4.setValue("Parent", "transcript:" + accId);
+ sf4.setValue("Parent", accId);
seq.addSequenceFeature(sf4);
// transcript not valid:
SequenceFeature sf5 = new SequenceFeature("transcript", "", 1, 2, 0f,
null);
- sf5.setValue("Parent", "transcript:" + accId);
+ sf5.setValue("Parent", accId);
seq.addSequenceFeature(sf5);
// exon not valid:
SequenceFeature sf6 = new SequenceFeature("exon", "", 1, 2, 0f, null);
- sf6.setValue("Parent", "transcript:" + accId);
+ sf6.setValue("Parent", accId);
seq.addSequenceFeature(sf6);
List<SequenceFeature> sfs = new EnsemblCds().getIdentifyingFeatures(seq,
// gene at (start + 10500) length 101
SequenceFeature sf = new SequenceFeature("gene", "", 10500, 10600, 0f,
null);
- sf.setValue("ID", "gene:" + geneId);
+ sf.setValue("id", geneId);
sf.setStrand("+");
genomic.addSequenceFeature(sf);
// gene at (start + 10500) length 101
SequenceFeature sf = new SequenceFeature("gene", "", 10500, 10600, 0f,
null);
- sf.setValue("ID", "gene:" + geneId);
+ sf.setValue("id", geneId);
sf.setStrand("+");
genomic.addSequenceFeature(sf);
// transcript feature
SequenceFeature sf1 = new SequenceFeature("transcript", "", 20000,
20500, 0f, null);
- sf1.setValue("Parent", "gene:" + geneId);
- sf1.setValue("transcript_id", "transcript1");
+ sf1.setValue("Parent", geneId);
+ sf1.setValue("id", "transcript1");
genomic.addSequenceFeature(sf1);
// transcript sub-type feature
SequenceFeature sf2 = new SequenceFeature("snRNA", "", 21000, 21500,
0f, null);
- sf2.setValue("Parent", "gene:" + geneId);
- sf2.setValue("transcript_id", "transcript2");
+ sf2.setValue("Parent", geneId);
+ sf2.setValue("id", "transcript2");
genomic.addSequenceFeature(sf2);
// NMD_transcript_variant treated like transcript in Ensembl
SequenceFeature sf3 = new SequenceFeature("NMD_transcript_variant", "",
22000, 22500, 0f, null);
// id matching should not be case-sensitive
- sf3.setValue("Parent", "gene:" + geneId.toLowerCase());
- sf3.setValue("transcript_id", "transcript3");
+ sf3.setValue("Parent", geneId.toLowerCase());
+ sf3.setValue("id", "transcript3");
genomic.addSequenceFeature(sf3);
// transcript for a different gene - ignored
SequenceFeature sf4 = new SequenceFeature("snRNA", "", 23000, 23500,
0f, null);
- sf4.setValue("Parent", "gene:XYZ");
- sf4.setValue("transcript_id", "transcript4");
+ sf4.setValue("Parent", "XYZ");
+ sf4.setValue("id", "transcript4");
genomic.addSequenceFeature(sf4);
EnsemblGene testee = new EnsemblGene();
EnsemblGene testee = new EnsemblGene();
SequenceFeature sf = new SequenceFeature("gene", "", 20000, 20500, 0f,
null);
- sf.setValue("ID", "gene:" + geneId);
+ sf.setValue("id", geneId);
assertFalse(testee.retainFeature(sf, geneId));
sf = new SequenceFeature("transcript", "", 20000, 20500, 0f, null);
- sf.setValue("Parent", "gene:" + geneId);
+ sf.setValue("Parent", geneId);
assertTrue(testee.retainFeature(sf, geneId));
sf = new SequenceFeature("mature_transcript", "", 20000, 20500, 0f,
null);
- sf.setValue("Parent", "gene:" + geneId);
+ sf.setValue("Parent", geneId);
assertTrue(testee.retainFeature(sf, geneId));
sf = new SequenceFeature("NMD_transcript_variant", "", 20000, 20500,
0f, null);
- sf.setValue("Parent", "gene:" + geneId);
+ sf.setValue("Parent", geneId);
assertTrue(testee.retainFeature(sf, geneId));
- sf.setValue("Parent", "gene:XYZ");
+ sf.setValue("Parent", "ßXYZ");
assertFalse(testee.retainFeature(sf, geneId));
sf = new SequenceFeature("anything", "", 20000, 20500, 0f, null);
seq.addSequenceFeature(sf1);
// gene with wrong ID not valid
- SequenceFeature sf2 = new SequenceFeature("gene", "", 1, 2, 0f, null);
- sf2.setValue("ID", "gene:XYZ");
+ SequenceFeature sf2 = new SequenceFeature("gene", "a", 1, 2, 0f, null);
+ sf2.setValue("id", "XYZ");
seq.addSequenceFeature(sf2);
// gene with right ID is valid
- SequenceFeature sf3 = new SequenceFeature("gene", "", 1, 2, 0f, null);
- sf3.setValue("ID", "gene:" + accId);
+ SequenceFeature sf3 = new SequenceFeature("gene", "b", 1, 2, 0f, null);
+ sf3.setValue("id", accId);
seq.addSequenceFeature(sf3);
// gene sub-type with right ID is valid
SequenceFeature sf4 = new SequenceFeature("snRNA_gene", "", 1, 2, 0f, null);
- sf4.setValue("ID", "gene:" + accId);
+ sf4.setValue("id", accId);
seq.addSequenceFeature(sf4);
// transcript not valid:
SequenceFeature sf5 = new SequenceFeature("transcript", "", 1, 2, 0f, null);
- sf5.setValue("ID", "gene:" + accId);
+ sf5.setValue("id", accId);
seq.addSequenceFeature(sf5);
// exon not valid:
SequenceFeature sf6 = new SequenceFeature("exon", "", 1, 2, 0f, null);
- sf6.setValue("ID", "gene:" + accId);
+ sf6.setValue("id", accId);
seq.addSequenceFeature(sf6);
List<SequenceFeature> sfs = new EnsemblGene()
// transcript at (start+10000) length 501
SequenceFeature sf = new SequenceFeature("transcript", "", 20000,
20500, 0f, null);
- sf.setValue("ID", "transcript:" + transcriptId);
+ sf.setValue("id", transcriptId);
sf.setStrand("+");
genomic.addSequenceFeature(sf);
// transcript (sub-type) at (start + 10500) length 101
sf = new SequenceFeature("ncRNA", "", 10500, 10600, 0f, null);
- sf.setValue("ID", "transcript:" + transcriptId);
+ sf.setValue("id", transcriptId);
sf.setStrand("+");
genomic.addSequenceFeature(sf);
// although strictly it is a sequence_variant in SO
sf = new SequenceFeature("NMD_transcript_variant", "", 11000, 12000,
0f, null);
- sf.setValue("ID", "transcript:" + transcriptId);
+ sf.setValue("id", transcriptId);
sf.setStrand("+");
genomic.addSequenceFeature(sf);
// transcript with a different ID doesn't count
sf = new SequenceFeature("transcript", "", 11500, 12600, 0f, null);
- sf.setValue("ID", "transcript:anotherOne");
+ sf.setValue("id", "anotherOne");
genomic.addSequenceFeature(sf);
// parent of transcript feature doesn't count
assertTrue(testee.retainFeature(sf, accId));
// other feature with correct parent is kept
- sf.setValue("Parent", "transcript:" + accId);
+ sf.setValue("Parent", accId);
assertTrue(testee.retainFeature(sf, accId));
// other feature with wrong parent is not kept
- sf.setValue("Parent", "transcript:XYZ");
+ sf.setValue("Parent", "XYZ");
assertFalse(testee.retainFeature(sf, accId));
}
seq.addSequenceFeature(sf1);
// transcript with wrong ID not valid
- SequenceFeature sf2 = new SequenceFeature("transcript", "", 1, 2, 0f,
+ // NB change desc to avoid rejection of duplicate feature!
+ SequenceFeature sf2 = new SequenceFeature("transcript", "a", 1, 2, 0f,
null);
- sf2.setValue("ID", "transcript");
+ sf2.setValue("id", "transcript");
seq.addSequenceFeature(sf2);
// transcript with right ID is valid
- SequenceFeature sf3 = new SequenceFeature("transcript", "", 1, 2, 0f,
+ SequenceFeature sf3 = new SequenceFeature("transcript", "b", 1, 2, 0f,
null);
- sf3.setValue("ID", "transcript:" + accId);
+ sf3.setValue("id", accId);
seq.addSequenceFeature(sf3);
// transcript sub-type with right ID is valid
SequenceFeature sf4 = new SequenceFeature("ncRNA", "", 1, 2, 0f, null);
- sf4.setValue("ID", "transcript:" + accId);
+ sf4.setValue("id", accId);
seq.addSequenceFeature(sf4);
// Ensembl treats NMD_transcript_variant as if a transcript
SequenceFeature sf5 = new SequenceFeature("NMD_transcript_variant", "",
1, 2, 0f, null);
- sf5.setValue("ID", "transcript:" + accId);
+ sf5.setValue("id", accId);
seq.addSequenceFeature(sf5);
// gene not valid:
SequenceFeature sf6 = new SequenceFeature("gene", "", 1, 2, 0f, null);
- sf6.setValue("ID", "transcript:" + accId);
+ sf6.setValue("id", accId);
seq.addSequenceFeature(sf6);
// exon not valid:
SequenceFeature sf7 = new SequenceFeature("exon", "", 1, 2, 0f, null);
- sf7.setValue("ID", "transcript:" + accId);
+ sf7.setValue("id", accId);
seq.addSequenceFeature(sf7);
List<SequenceFeature> sfs = new EnsemblGenome()
--- /dev/null
+package jalview.util;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertNull;
+
+import org.json.JSONException;
+import org.json.simple.JSONArray;
+import org.testng.annotations.Test;
+
+public class JSONUtilsTest
+{
+ @Test(groups = "Functional")
+ public void testArrayToList() throws JSONException
+ {
+ assertNull(JSONUtils.arrayToList(null));
+
+ JSONArray ja = new JSONArray();
+ assertNull(JSONUtils.arrayToList(null));
+
+ ja.add("hello");
+ assertEquals(JSONUtils.arrayToList(ja), "hello");
+
+ ja.add("world");
+ assertEquals(JSONUtils.arrayToList(ja), "hello,world");
+ }
+}