2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.ext.ensembl;
23 import jalview.bin.Cache;
24 import jalview.datamodel.AlignmentI;
25 import jalview.datamodel.GeneLociI;
26 import jalview.util.MapList;
28 import java.io.BufferedReader;
29 import java.io.IOException;
30 import java.net.MalformedURLException;
32 import java.util.Arrays;
33 import java.util.Collections;
34 import java.util.List;
36 import org.json.simple.JSONObject;
37 import org.json.simple.parser.JSONParser;
38 import org.json.simple.parser.ParseException;
41 * A client for the Ensembl /lookup REST endpoint, used to find the gene
42 * identifier given a gene, transcript or protein identifier, or to extract the
43 * species or chromosomal coordinates from the same service response
47 public class EnsemblLookup extends EnsemblRestClient
49 private static final String SPECIES = "species";
52 * keep track of last identifier retrieved to break loops
54 private String lastId;
57 * Default constructor (to use rest.ensembl.org)
59 public EnsemblLookup()
65 * Constructor given the target domain to fetch data from
69 public EnsemblLookup(String d)
75 public String getDbName()
81 public AlignmentI getSequenceRecords(String queries) throws Exception
87 protected URL getUrl(List<String> ids) throws MalformedURLException
89 String identifier = ids.get(0);
90 return getUrl(identifier, null);
94 * Gets the url for lookup of the given identifier, optionally with objectType
95 * also specified in the request
101 protected URL getUrl(String identifier, String objectType)
103 String url = getDomain() + "/lookup/id/" + identifier
105 if (objectType != null)
107 url += "&" + OBJECT_TYPE + "=" + objectType;
113 } catch (MalformedURLException e)
120 protected boolean useGetRequest()
126 protected String getRequestMimeType(boolean multipleIds)
128 return "application/json";
132 protected String getResponseMimeType()
134 return "application/json";
138 * Returns the gene id related to the given identifier (which may be for a
139 * gene, transcript or protein)
144 public String getGeneId(String identifier)
146 return getGeneId(identifier, null);
150 * Returns the gene id related to the given identifier (which may be for a
151 * gene, transcript or protein)
157 public String getGeneId(String identifier, String objectType)
159 List<String> ids = Arrays.asList(new String[] { identifier });
161 BufferedReader br = null;
164 URL url = getUrl(identifier, objectType);
167 br = getHttpResponse(url, ids);
169 return br == null ? null : parseResponse(br);
170 } catch (IOException e)
181 } catch (IOException e)
190 * Parses the JSON response and returns the gene identifier, or null if not
191 * found. If the returned object_type is Gene, returns the id, if Transcript
192 * returns the Parent. If it is Translation (peptide identifier), then the
193 * Parent is the transcript identifier, so we redo the search with this value.
197 * @throws IOException
199 protected String parseResponse(BufferedReader br) throws IOException
201 String geneId = null;
202 JSONParser jp = new JSONParser();
205 JSONObject val = (JSONObject) jp.parse(br);
206 String type = val.get(OBJECT_TYPE).toString();
207 if (OBJECT_TYPE_GENE.equalsIgnoreCase(type))
209 // got the gene - just returns its id
210 geneId = val.get(JSON_ID).toString();
212 else if (OBJECT_TYPE_TRANSCRIPT.equalsIgnoreCase(type))
214 // got the transcript - return its (Gene) Parent
215 geneId = val.get(PARENT).toString();
217 else if (OBJECT_TYPE_TRANSLATION.equalsIgnoreCase(type))
219 // got the protein - get its Parent, restricted to type Transcript
220 String transcriptId = val.get(PARENT).toString();
221 geneId = getGeneId(transcriptId, OBJECT_TYPE_TRANSCRIPT);
223 } catch (ParseException e)
231 * Calls the Ensembl lookup REST endpoint and retrieves the 'species' for the
232 * given identifier, or null if not found
237 public String getSpecies(String identifier)
239 String species = null;
240 JSONObject json = getResult(identifier, null);
243 Object o = json.get(SPECIES);
246 species = o.toString();
253 * Calls the /lookup/id rest service and returns the response as a JSONObject,
254 * or null if any error
261 protected JSONObject getResult(String identifier, String objectType)
263 List<String> ids = Arrays.asList(new String[] { identifier });
265 BufferedReader br = null;
269 URL url = getUrl(identifier, objectType);
271 if (identifier.equals(lastId))
273 System.err.println("** Ensembl lookup " + url.toString()
274 + " looping on Parent!");
282 br = getHttpResponse(url, ids);
284 return br == null ? null : (JSONObject) (new JSONParser().parse(br));
285 } catch (IOException | ParseException e)
287 System.err.println("Error parsing " + identifier + " lookup response "
297 } catch (IOException e)
306 * Parses the JSON response and returns the gene identifier, or null if not
307 * found. If the returned object_type is Gene, returns the id, if Transcript
308 * returns the Parent. If it is Translation (peptide identifier), then the
309 * Parent is the transcript identifier, so we redo the search with this value,
310 * specifying that object_type should be Transcript.
315 protected String parseGeneId(JSONObject json)
319 // e.g. lookup failed with 404 not found
323 String geneId = null;
324 String type = json.get(OBJECT_TYPE).toString();
325 if (OBJECT_TYPE_GENE.equalsIgnoreCase(type))
327 // got the gene - just returns its id
328 geneId = json.get(JSON_ID).toString();
330 else if (OBJECT_TYPE_TRANSCRIPT.equalsIgnoreCase(type))
332 // got the transcript - return its (Gene) Parent
333 geneId = json.get(PARENT).toString();
335 else if (OBJECT_TYPE_TRANSLATION.equalsIgnoreCase(type))
337 // got the protein - look up its Parent, restricted to type Transcript
338 String transcriptId = json.get(PARENT).toString();
339 geneId = parseGeneId(getResult(transcriptId, OBJECT_TYPE_TRANSCRIPT));
346 * Calls the /lookup/id rest service for the given id, and if successful,
347 * parses and returns the gene's chromosomal coordinates
352 public GeneLociI getGeneLoci(String geneId)
354 return parseGeneLoci(getResult(geneId, OBJECT_TYPE_GENE));
358 * Parses the /lookup/id response for species, asssembly_name,
359 * seq_region_name, start, end and returns an object that wraps them, or null
365 GeneLociI parseGeneLoci(JSONObject json)
374 final String species = json.get("species").toString();
375 final String assembly = json.get("assembly_name").toString();
376 final String chromosome = json.get("seq_region_name").toString();
377 String strand = json.get("strand").toString();
378 int start = Integer.parseInt(json.get("start").toString());
379 int end = Integer.parseInt(json.get("end").toString());
380 int fromEnd = end - start + 1;
381 boolean reverseStrand = "-1".equals(strand);
382 int toStart = reverseStrand ? end : start;
383 int toEnd = reverseStrand ? start : end;
384 List<int[]> fromRange = Collections.singletonList(new int[] { 1,
386 List<int[]> toRange = Collections.singletonList(new int[] { toStart,
388 final MapList map = new MapList(fromRange, toRange, 1, 1);
389 return new GeneLociI()
393 public String getSpeciesId()
395 return species == null ? "" : species;
399 public String getAssemblyId()
405 public String getChromosomeId()
411 public MapList getMap()
416 } catch (NullPointerException | NumberFormatException e)
418 Cache.log.error("Error looking up gene loci: " + e.getMessage());