2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.ext.ensembl;
23 import jalview.datamodel.Alignment;
24 import jalview.datamodel.AlignmentI;
25 import jalview.datamodel.Sequence;
26 import jalview.datamodel.SequenceFeature;
27 import jalview.datamodel.SequenceI;
28 import jalview.io.gff.SequenceOntologyI;
29 import jalview.util.JSONUtils;
30 import jalview.util.Platform;
32 import java.io.BufferedReader;
33 import java.io.IOException;
34 import java.net.MalformedURLException;
36 import java.util.ArrayList;
37 import java.util.Iterator;
38 import java.util.List;
41 import org.json.simple.parser.ParseException;
44 * A client for fetching and processing Ensembl feature data in GFF format by
45 * calling the overlap REST service
48 * @see http://rest.ensembl.org/documentation/info/overlap_id
50 class EnsemblFeatures extends EnsemblRestClient
53 * The default features to retrieve from Ensembl
54 * can override in getSequenceRecords parameter
56 private EnsemblFeatureType[] featuresWanted = { EnsemblFeatureType.cds,
57 EnsemblFeatureType.exon, EnsemblFeatureType.variation };
60 * Default constructor (to use rest.ensembl.org)
62 public EnsemblFeatures()
68 * Constructor given the target domain to fetch data from
72 public EnsemblFeatures(String d)
78 public String getDbName()
80 return "ENSEMBL (features)";
84 * Makes a query to the REST overlap endpoint for the given sequence
85 * identifier. This returns an 'alignment' consisting of one 'dummy sequence'
86 * (the genomic sequence for which overlap features are returned by the
87 * service). This sequence will have on it sequence features which are the
88 * real information of interest, such as CDS regions or sequence variations.
91 public AlignmentI getSequenceRecords(String query) throws IOException
93 // TODO: use a vararg String... for getSequenceRecords instead?
95 List<String> queries = new ArrayList<>();
97 SequenceI seq = parseFeaturesJson(queries);
100 return new Alignment(new SequenceI[] { seq });
105 * Parses the JSON response into Jalview sequence features and attaches them
106 * to a dummy sequence
111 @SuppressWarnings("unchecked")
112 private SequenceI parseFeaturesJson(List<String> queries)
114 SequenceI seq = new Sequence("Dummy", "");
117 Iterator<Object> rvals = (Iterator<Object>) getJSON(null, queries, -1,
118 MODE_ITERATOR, null);
123 while (rvals.hasNext())
127 Map<String, Object> obj = (Map<String, Object>) rvals.next();
128 String type = obj.get("feature_type").toString();
129 int start = Integer.parseInt(obj.get("start").toString());
130 int end = Integer.parseInt(obj.get("end").toString());
131 String source = obj.get("source").toString();
132 String strand = obj.get("strand").toString();
133 Object phase = obj.get("phase");
134 String alleles = JSONUtils
135 .arrayToStringList((List<Object>) obj.get("alleles"));
136 String clinSig = JSONUtils.arrayToStringList(
137 (List<Object>) obj.get("clinical_significance"));
140 * convert 'variation' to 'sequence_variant', and 'cds' to 'CDS'
141 * so as to have a valid SO term for the feature type
142 * ('gene', 'exon', 'transcript' don't need any conversion)
144 if ("variation".equals(type))
146 type = SequenceOntologyI.SEQUENCE_VARIANT;
148 else if (SequenceOntologyI.CDS.equalsIgnoreCase((type)))
150 type = SequenceOntologyI.CDS;
153 String desc = getFirstNotNull(obj, "alleles", "external_name",
155 SequenceFeature sf = new SequenceFeature(type, desc, start, end,
157 sf.setStrand("1".equals(strand) ? "+" : "-");
160 sf.setPhase(phase.toString());
162 setFeatureAttribute(sf, obj, "id");
163 setFeatureAttribute(sf, obj, "Parent");
164 setFeatureAttribute(sf, obj, "consequence_type");
165 sf.setValue("alleles", alleles);
166 sf.setValue("clinical_significance", clinSig);
168 seq.addSequenceFeature(sf);
170 } catch (Throwable t)
172 // ignore - keep trying other features
175 } catch (ParseException | IOException e)
185 * Returns the first non-null attribute found (if any) as a string, formatted
186 * suitably for display as feature description or tooltip. Answers null if
187 * none of the attribute keys is present.
193 @SuppressWarnings("unchecked")
194 protected String getFirstNotNull(Map<String, Object> obj, String... keys)
196 for (String key : keys)
198 Object val = obj.get(key);
201 String s = val instanceof List<?>
202 ? JSONUtils.arrayToStringList((List<Object>) val)
214 * A helper method that reads the 'key' entry in the JSON object, and if not
215 * null, sets its string value as an attribute on the sequence feature
221 protected void setFeatureAttribute(SequenceFeature sf,
222 Map<String, Object> obj, String key)
224 Object object = obj.get(key);
227 sf.setValue(key, object.toString());
232 * Returns a URL for the REST overlap endpoint
238 protected URL getUrl(List<String> ids) throws MalformedURLException
240 StringBuffer urlstring = new StringBuffer(128);
241 urlstring.append(getDomain()).append("/overlap/id/").append(ids.get(0));
243 // @see https://github.com/Ensembl/ensembl-rest/wiki/Output-formats
244 urlstring.append("?content-type=" + getResponseMimeType());
247 * specify object_type=gene in case is shared by transcript and/or protein;
248 * currently only fetching features for gene sequences;
249 * refactor in future if needed to fetch for transcripts
251 urlstring.append("&").append(OBJECT_TYPE).append("=")
252 .append(OBJECT_TYPE_GENE);
255 * specify features to retrieve
256 * @see http://rest.ensembl.org/documentation/info/overlap_id
257 * could make the list a configurable entry in .jalview_properties
259 for (EnsemblFeatureType feature : featuresWanted)
261 urlstring.append("&feature=").append(feature.name());
264 return new URL(urlstring.toString());
268 protected boolean useGetRequest()
274 * Returns the MIME type for GFF3. For GET requests the Content-type header
275 * describes the required encoding of the response.
278 protected String getRequestMimeType()
280 return "application/json";
284 * Returns the MIME type wanted for the response
287 protected String getResponseMimeType()
289 return "application/json";
293 * Overloaded method that allows a list of features to retrieve to be
299 * @throws IOException
301 protected AlignmentI getSequenceRecords(String accId,
302 EnsemblFeatureType[] features) throws IOException
304 featuresWanted = features;
305 return getSequenceRecords(accId);