import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentI;
-import jalview.io.DataSourceType;
-import jalview.io.FeaturesFile;
-import jalview.io.FileParse;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.io.gff.SequenceOntologyI;
import java.io.BufferedReader;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
+import java.util.Iterator;
import java.util.List;
+import org.json.simple.JSONArray;
+import org.json.simple.JSONObject;
+import org.json.simple.parser.JSONParser;
+import org.json.simple.parser.ParseException;
+
/**
* A client for fetching and processing Ensembl feature data in GFF format by
* calling the overlap REST service
{
return null;
}
- FeaturesFile fr = new FeaturesFile(
- new FileParse(fp, null, DataSourceType.URL));
- return new Alignment(fr.getSeqsAsArray());
+
+ SequenceI seq = parseFeaturesJson(fp);
+ return new Alignment(new SequenceI[] { seq });
+ }
+
+ /**
+ * Parses the JSON response into Jalview sequence features and attaches them
+ * to a dummy sequence
+ *
+ * @param br
+ * @return
+ */
+ private SequenceI parseFeaturesJson(BufferedReader br)
+ {
+ SequenceI seq = new Sequence("Dummy", "");
+
+ JSONParser jp = new JSONParser();
+ try
+ {
+ JSONArray responses = (JSONArray) jp.parse(br);
+ Iterator rvals = responses.iterator();
+ while (rvals.hasNext())
+ {
+ try
+ {
+ JSONObject obj = (JSONObject) rvals.next();
+ String type = obj.get("feature_type").toString();
+ int start = Integer.parseInt(obj.get("start").toString());
+ int end = Integer.parseInt(obj.get("end").toString());
+ String source = obj.get("source").toString();
+ String strand = obj.get("strand").toString();
+ Object value = obj.get("consequence_type");
+ value = obj.get("alleles");
+ JSONArray allelesArray = (JSONArray) value;
+ String alleles = allelesArray == null ? null
+ : allelesArray.toString(); // todo need as a List?
+ value = obj.get("clinical_significance");
+ JSONArray clinSigArray = (JSONArray) value;
+ String clinSig = clinSigArray == null ? null
+ : clinSigArray.toString();
+
+ /*
+ * convert 'variation' to 'sequence_variant', and 'cds' to 'CDS'
+ * so as to have a valid SO term for the feature type
+ * ('gene', 'exon', 'transcript' don't need any conversion)
+ */
+ if ("variation".equals(type))
+ {
+ type = SequenceOntologyI.SEQUENCE_VARIANT;
+ }
+ else if (SequenceOntologyI.CDS.equalsIgnoreCase((type)))
+ {
+ type = SequenceOntologyI.CDS;
+ }
+
+ String desc = getFirstNotNull(obj, "alleles", "external_name",
+ JSON_ID);
+ SequenceFeature sf = new SequenceFeature(type, desc, start, end,
+ source);
+ sf.setStrand("1".equals(strand) ? "+" : "-");
+ setFeatureAttribute(sf, obj, "id");
+ setFeatureAttribute(sf, obj, "Parent");
+ setFeatureAttribute(sf, obj, "consequence_type");
+ sf.setValue("alleles", alleles);
+ sf.setValue("clinical_significance", clinSig);
+
+ seq.addSequenceFeature(sf);
+ } catch (Throwable t)
+ {
+ // ignore - keep trying other features
+ }
+ }
+ } catch (ParseException | IOException e)
+ {
+ // ignore
+ }
+
+ return seq;
+ }
+
+ /**
+ * Returns the first non-null attribute found (if any) as a string
+ *
+ * @param obj
+ * @param keys
+ * @return
+ */
+ protected String getFirstNotNull(JSONObject obj, String... keys)
+ {
+ String desc = null;
+
+ for (String key : keys)
+ {
+ Object val = obj.get(key);
+ if (val != null)
+ {
+ String s = val.toString();
+ if (!s.isEmpty())
+ {
+ return s;
+ }
+ }
+ }
+ return desc;
+ }
+
+ /**
+ * A helper method that reads the 'key' entry in the JSON object, and if not
+ * null, sets its string value as an attribute on the sequence feature
+ *
+ * @param sf
+ * @param obj
+ * @param key
+ */
+ protected void setFeatureAttribute(SequenceFeature sf, JSONObject obj,
+ String key)
+ {
+ Object object = obj.get(key);
+ if (object != null)
+ {
+ sf.setValue(key, object.toString());
+ }
}
/**
urlstring.append(getDomain()).append("/overlap/id/").append(ids.get(0));
// @see https://github.com/Ensembl/ensembl-rest/wiki/Output-formats
- urlstring.append("?content-type=text/x-gff3");
+ urlstring.append("?content-type=" + getResponseMimeType());
/*
* specify object_type=gene in case is shared by transcript and/or protein;
@Override
protected String getRequestMimeType()
{
- return "text/x-gff3";
+ return "application/json";
}
/**
- * Returns the MIME type for GFF3
+ * Returns the MIME type wanted for the response
*/
@Override
protected String getResponseMimeType()
{
- return "text/x-gff3";
+ return "application/json";
}
/**
*/
public class EnsemblGene extends EnsemblSeqProxy
{
- private static final String GENE_PREFIX = "gene:";
-
/*
* accepts anything as we will attempt lookup of gene or
* transcript id or gene name
* look for exon features of the transcript, failing that for CDS
* (for example ENSG00000124610 has 1 CDS but no exon features)
*/
- String parentId = "transcript:" + accId;
+ String parentId = accId;
List<SequenceFeature> splices = findFeatures(gene,
SequenceOntologyI.EXON, parentId);
if (splices.isEmpty())
* Ensembl has gene name as transcript Name
* EnsemblGenomes doesn't, but has a url-encoded description field
*/
- String description = (String) transcriptFeature.getValue(NAME);
+ String description = transcriptFeature.getDescription();
if (description == null)
{
description = (String) transcriptFeature.getValue(DESCRIPTION);
*/
protected String getTranscriptId(SequenceFeature feature)
{
- return (String) feature.getValue("transcript_id");
+ return (String) feature.getValue(JSON_ID);
}
/**
{
List<SequenceFeature> transcriptFeatures = new ArrayList<>();
- String parentIdentifier = GENE_PREFIX + accId;
+ String parentIdentifier = accId;
List<SequenceFeature> sfs = geneSequence.getFeatures()
.getFeaturesByOntology(SequenceOntologyI.TRANSCRIPT);
.getFeaturesByOntology(SequenceOntologyI.GENE);
for (SequenceFeature sf : sfs)
{
- // NB features as gff use 'ID'; rest services return as 'id'
- String id = (String) sf.getValue("ID");
- if ((GENE_PREFIX + accId).equalsIgnoreCase(id))
+ String id = (String) sf.getValue(JSON_ID);
+ if (accId.equalsIgnoreCase(id))
{
result.add(sf);
}
if (isTranscript(type))
{
String parent = (String) sf.getValue(PARENT);
- if (!(GENE_PREFIX + accessionId).equalsIgnoreCase(parent))
+ if (!accessionId.equalsIgnoreCase(parent))
{
return false;
}