X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fext%2Fensembl%2FEnsemblFeatures.java;h=e28cc7f0b4a4fdf9828ad89eaca14c35ed9e2d19;hb=refs%2Fheads%2FJAL-3253-applet-SwingJS-omnibus;hp=75708221f769e6ab33d7b595421a36c8530ac16c;hpb=cc33b3407777d6b69df1c1312eedcfa0c6b21b99;p=jalview.git diff --git a/src/jalview/ext/ensembl/EnsemblFeatures.java b/src/jalview/ext/ensembl/EnsemblFeatures.java index 7570822..e28cc7f 100644 --- a/src/jalview/ext/ensembl/EnsemblFeatures.java +++ b/src/jalview/ext/ensembl/EnsemblFeatures.java @@ -22,14 +22,23 @@ package jalview.ext.ensembl; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; -import jalview.io.FeaturesFile; -import jalview.io.FileParse; +import jalview.datamodel.Sequence; +import jalview.datamodel.SequenceFeature; +import jalview.datamodel.SequenceI; +import jalview.io.gff.SequenceOntologyI; +import jalview.util.JSONUtils; +import jalview.util.Platform; +import java.io.BufferedReader; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; +import java.util.Iterator; import java.util.List; +import java.util.Map; + +import org.json.simple.parser.ParseException; /** * A client for fetching and processing Ensembl feature data in GFF format by @@ -82,15 +91,141 @@ class EnsemblFeatures extends EnsemblRestClient public AlignmentI getSequenceRecords(String query) throws IOException { // TODO: use a vararg String... for getSequenceRecords instead? - List queries = new ArrayList(); + + List queries = new ArrayList<>(); queries.add(query); - FileParse fp = getSequenceReader(queries); - if (fp == null || !fp.isValid()) + SequenceI seq = parseFeaturesJson(queries); + if (seq == null) + return null; + return new Alignment(new SequenceI[] { seq }); + + } + + /** + * Parses the JSON response into Jalview sequence features and attaches them + * to a dummy sequence + * + * @param br + * @return + */ + @SuppressWarnings("unchecked") + private SequenceI parseFeaturesJson(List queries) + { + SequenceI seq = new Sequence("Dummy", ""); + try + { + Iterator rvals = (Iterator) getJSON(null, queries, -1, MODE_ITERATOR, null); + if (rvals == null) + { + return null; + } + while (rvals.hasNext()) + { + try + { + Map obj = (Map) rvals.next(); + String type = obj.get("feature_type").toString(); + int start = Integer.parseInt(obj.get("start").toString()); + int end = Integer.parseInt(obj.get("end").toString()); + String source = obj.get("source").toString(); + String strand = obj.get("strand").toString(); + Object phase = obj.get("phase"); + String alleles = JSONUtils + .arrayToStringList((List) obj.get("alleles")); + String clinSig = JSONUtils + .arrayToStringList( + (List) obj.get("clinical_significance")); + + /* + * convert 'variation' to 'sequence_variant', and 'cds' to 'CDS' + * so as to have a valid SO term for the feature type + * ('gene', 'exon', 'transcript' don't need any conversion) + */ + if ("variation".equals(type)) + { + type = SequenceOntologyI.SEQUENCE_VARIANT; + } + else if (SequenceOntologyI.CDS.equalsIgnoreCase((type))) + { + type = SequenceOntologyI.CDS; + } + + String desc = getFirstNotNull(obj, "alleles", "external_name", + JSON_ID); + SequenceFeature sf = new SequenceFeature(type, desc, start, end, + source); + sf.setStrand("1".equals(strand) ? "+" : "-"); + if (phase != null) + { + sf.setPhase(phase.toString()); + } + setFeatureAttribute(sf, obj, "id"); + setFeatureAttribute(sf, obj, "Parent"); + setFeatureAttribute(sf, obj, "consequence_type"); + sf.setValue("alleles", alleles); + sf.setValue("clinical_significance", clinSig); + + seq.addSequenceFeature(sf); + + } catch (Throwable t) + { + // ignore - keep trying other features + } + } + } catch (ParseException | IOException e) + { + e.printStackTrace(); + // ignore + } + + return seq; + } + + /** + * Returns the first non-null attribute found (if any) as a string, formatted + * suitably for display as feature description or tooltip. Answers null if + * none of the attribute keys is present. + * + * @param obj + * @param keys + * @return + */ + @SuppressWarnings("unchecked") + protected String getFirstNotNull(Map obj, String... keys) + { + for (String key : keys) { - return null; + Object val = obj.get(key); + if (val != null) + { + String s = val instanceof List + ? JSONUtils.arrayToStringList((List) val) + : val.toString(); + if (!s.isEmpty()) + { + return s; + } + } + } + return null; + } + + /** + * A helper method that reads the 'key' entry in the JSON object, and if not + * null, sets its string value as an attribute on the sequence feature + * + * @param sf + * @param obj + * @param key + */ + protected void setFeatureAttribute(SequenceFeature sf, Map obj, + String key) + { + Object object = obj.get(key); + if (object != null) + { + sf.setValue(key, object.toString()); } - FeaturesFile fr = new FeaturesFile(fp); - return new Alignment(fr.getSeqsAsArray()); } /** @@ -106,12 +241,20 @@ class EnsemblFeatures extends EnsemblRestClient urlstring.append(getDomain()).append("/overlap/id/").append(ids.get(0)); // @see https://github.com/Ensembl/ensembl-rest/wiki/Output-formats - urlstring.append("?content-type=text/x-gff3"); + urlstring.append("?content-type=" + getResponseMimeType()); + + /* + * specify object_type=gene in case is shared by transcript and/or protein; + * currently only fetching features for gene sequences; + * refactor in future if needed to fetch for transcripts + */ + urlstring.append("&").append(OBJECT_TYPE).append("=") + .append(OBJECT_TYPE_GENE); /* * specify features to retrieve * @see http://rest.ensembl.org/documentation/info/overlap_id - * could make the list a configurable entry in jalview.properties + * could make the list a configurable entry in .jalview_properties */ for (EnsemblFeatureType feature : featuresWanted) { @@ -132,18 +275,18 @@ class EnsemblFeatures extends EnsemblRestClient * describes the required encoding of the response. */ @Override - protected String getRequestMimeType(boolean multipleIds) + protected String getRequestMimeType() { - return "text/x-gff3"; + return "application/json"; } /** - * Returns the MIME type for GFF3. + * Returns the MIME type wanted for the response */ @Override protected String getResponseMimeType() { - return "text/x-gff3"; + return "application/json"; } /**