2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.ext.ensembl;
23 import jalview.bin.Cache;
24 import jalview.datamodel.AlignmentI;
25 import jalview.datamodel.GeneLociI;
26 import jalview.util.MapList;
28 import java.io.BufferedReader;
29 import java.io.IOException;
30 import java.net.MalformedURLException;
32 import java.util.Arrays;
33 import java.util.Collections;
34 import java.util.List;
35 import java.util.function.Function;
37 import org.json.simple.JSONObject;
38 import org.json.simple.parser.JSONParser;
39 import org.json.simple.parser.ParseException;
42 * A client for the Ensembl /lookup REST endpoint, used to find the gene
43 * identifier given a gene, transcript or protein identifier, or to extract the
44 * species or chromosomal coordinates from the same service response
48 public class EnsemblLookup extends EnsemblRestClient
50 private static final String SPECIES = "species";
53 * Default constructor (to use rest.ensembl.org)
55 public EnsemblLookup()
61 * Constructor given the target domain to fetch data from
65 public EnsemblLookup(String d)
71 public String getDbName()
77 public AlignmentI getSequenceRecords(String queries) throws Exception
83 protected URL getUrl(List<String> ids) throws MalformedURLException
85 String identifier = ids.get(0);
86 return getUrl(identifier, null);
90 * Gets the url for lookup of the given identifier, optionally with objectType
91 * also specified in the request
97 protected URL getUrl(String identifier, String objectType)
99 String url = getDomain() + "/lookup/id/" + identifier
101 if (objectType != null)
103 url += "&" + OBJECT_TYPE + "=" + objectType;
109 } catch (MalformedURLException e)
116 protected boolean useGetRequest()
122 protected String getRequestMimeType(boolean multipleIds)
124 return "application/json";
128 protected String getResponseMimeType()
130 return "application/json";
134 * Returns the gene id related to the given identifier (which may be for a
135 * gene, transcript or protein)
140 public String getGeneId(String identifier)
142 return (String) getResult(identifier, null, br -> parseGeneId(br));
146 * Calls the Ensembl lookup REST endpoint and retrieves the 'species' for the
147 * given identifier, or null if not found
152 public String getSpecies(String identifier)
154 return (String) getResult(identifier, null,
155 br -> getAttribute(br, SPECIES));
159 * Calls the /lookup/id rest service and delegates parsing of the JSON
160 * response to the supplied parser
168 protected Object getResult(String identifier, String objectType,
169 Function<BufferedReader, Object> parser)
171 List<String> ids = Arrays.asList(new String[] { identifier });
173 BufferedReader br = null;
176 URL url = getUrl(identifier, objectType);
179 br = getHttpResponse(url, ids);
181 return br == null ? null : parser.apply(br);
182 } catch (IOException e)
193 } catch (IOException e)
202 * Answers the value of 'attribute' from the JSON response, or null if not
209 protected String getAttribute(BufferedReader br, String attribute)
212 JSONParser jp = new JSONParser();
215 JSONObject val = (JSONObject) jp.parse(br);
216 value = val.get(attribute).toString();
217 } catch (ParseException | NullPointerException | IOException e)
225 * Parses the JSON response and returns the gene identifier, or null if not
226 * found. If the returned object_type is Gene, returns the id, if Transcript
227 * returns the Parent. If it is Translation (peptide identifier), then the
228 * Parent is the transcript identifier, so we redo the search with this value,
229 * specifying that object_type should be Transcript.
234 protected String parseGeneId(BufferedReader br)
236 String geneId = null;
237 JSONParser jp = new JSONParser();
240 JSONObject val = (JSONObject) jp.parse(br);
241 String type = val.get(OBJECT_TYPE).toString();
242 if (OBJECT_TYPE_GENE.equalsIgnoreCase(type))
244 // got the gene - just returns its id
245 geneId = val.get(JSON_ID).toString();
247 else if (OBJECT_TYPE_TRANSCRIPT.equalsIgnoreCase(type))
249 // got the transcript - return its (Gene) Parent
250 geneId = val.get(PARENT).toString();
252 else if (OBJECT_TYPE_TRANSLATION.equalsIgnoreCase(type))
254 // got the protein - get its Parent, restricted to type Transcript
255 String transcriptId = val.get(PARENT).toString();
256 geneId = (String) getResult(transcriptId, OBJECT_TYPE_TRANSCRIPT,
257 reader -> parseGeneId(reader));
259 } catch (ParseException | IOException e)
267 * Calls the /lookup/id rest service for the given id, and if successful,
268 * parses and returns the gene's chromosomal coordinates
273 public GeneLociI getGeneLoci(String geneId)
275 return (GeneLociI) getResult(geneId, OBJECT_TYPE_GENE,
276 br -> parseGeneLoci(br));
280 * Parses the /lookup/id response for species, asssembly_name,
281 * seq_region_name, start, end and returns an object that wraps them, or null
287 GeneLociI parseGeneLoci(BufferedReader br)
289 JSONParser jp = new JSONParser();
292 JSONObject val = (JSONObject) jp.parse(br);
293 final String species = val.get("species").toString();
294 final String assembly = val.get("assembly_name").toString();
295 final String chromosome = val.get("seq_region_name").toString();
296 String strand = val.get("strand").toString();
297 int start = Integer.parseInt(val.get("start").toString());
298 int end = Integer.parseInt(val.get("end").toString());
299 int fromEnd = end - start + 1;
300 boolean reverseStrand = "-1".equals(strand);
301 int toStart = reverseStrand ? end : start;
302 int toEnd = reverseStrand ? start : end;
303 List<int[]> fromRange = Collections.singletonList(new int[] { 1,
305 List<int[]> toRange = Collections.singletonList(new int[] { toStart,
307 final MapList map = new MapList(fromRange, toRange, 1, 1);
308 return new GeneLociI()
312 public String getSpeciesId()
314 return species == null ? "" : species;
318 public String getAssemblyId()
324 public String getChromosomeId()
330 public MapList getMap()
335 } catch (ParseException | NullPointerException | IOException
336 | NumberFormatException | ClassCastException e)
338 Cache.log.error("Error looking up gene loci: " + e.getMessage());