2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.ext.ensembl;
23 import jalview.bin.Cache;
24 import jalview.datamodel.AlignmentI;
25 import jalview.datamodel.GeneLociI;
26 import jalview.util.MapList;
28 import java.io.BufferedReader;
29 import java.io.IOException;
30 import java.net.MalformedURLException;
32 import java.util.Arrays;
33 import java.util.Collections;
34 import java.util.List;
35 import java.util.function.Function;
37 import org.json.simple.JSONObject;
38 import org.json.simple.parser.JSONParser;
39 import org.json.simple.parser.ParseException;
42 * A client for the Ensembl lookup REST endpoint
46 public class EnsemblLookup extends EnsemblRestClient
48 private static final String SPECIES = "species";
50 private static final String PARENT = "Parent";
52 private static final String OBJECT_TYPE_TRANSLATION = "Translation";
53 private static final String OBJECT_TYPE_TRANSCRIPT = "Transcript";
54 private static final String ID = "id";
55 private static final String OBJECT_TYPE_GENE = "Gene";
56 private static final String OBJECT_TYPE = "object_type";
59 * Default constructor (to use rest.ensembl.org)
61 public EnsemblLookup()
67 * Constructor given the target domain to fetch data from
71 public EnsemblLookup(String d)
77 public String getDbName()
83 public AlignmentI getSequenceRecords(String queries) throws Exception
89 protected URL getUrl(List<String> ids) throws MalformedURLException
91 String identifier = ids.get(0);
92 return getUrl(identifier);
99 protected URL getUrl(String identifier)
101 String url = getDomain() + "/lookup/id/" + identifier
106 } catch (MalformedURLException e)
113 protected boolean useGetRequest()
119 protected String getRequestMimeType(boolean multipleIds)
121 return "application/json";
125 protected String getResponseMimeType()
127 return "application/json";
131 * Calls the Ensembl lookup REST endpoint and returns
133 * <li>the 'id' for the identifier if its type is "Gene"</li>
134 * <li>the 'Parent' if its type is 'Transcript'</li>
136 * If the type is 'Translation', does a recursive call to this method, passing
137 * in the 'Parent' (transcript id).
142 public String getGeneId(String identifier)
144 return (String) getResult(identifier, br -> parseGeneId(br));
148 * Calls the Ensembl lookup REST endpoint and retrieves the 'species' for the
149 * given identifier, or null if not found
154 public String getSpecies(String identifier)
156 return (String) getResult(identifier, br -> getAttribute(br, SPECIES));
160 * Calls the /lookup/id rest service and delegates parsing of the JSON
161 * response to the supplied parser
167 protected Object getResult(String identifier,
168 Function<BufferedReader, Object> parser)
170 List<String> ids = Arrays.asList(new String[] { identifier });
172 BufferedReader br = null;
175 URL url = getUrl(identifier);
178 br = getHttpResponse(url, ids);
180 return br == null ? null : parser.apply(br);
181 } catch (IOException e)
192 } catch (IOException e)
201 * Answers the value of 'attribute' from the JSON response, or null if not
208 protected String getAttribute(BufferedReader br, String attribute)
211 JSONParser jp = new JSONParser();
214 JSONObject val = (JSONObject) jp.parse(br);
215 value = val.get(attribute).toString();
216 } catch (ParseException | NullPointerException | IOException e)
224 * Parses the JSON response and returns the gene identifier, or null if not
225 * found. If the returned object_type is Gene, returns the id, if Transcript
226 * returns the Parent. If it is Translation (peptide identifier), then the
227 * Parent is the transcript identifier, so we redo the search with this value.
232 protected String parseGeneId(BufferedReader br)
234 String geneId = null;
235 JSONParser jp = new JSONParser();
238 JSONObject val = (JSONObject) jp.parse(br);
239 String type = val.get(OBJECT_TYPE).toString();
240 if (OBJECT_TYPE_GENE.equalsIgnoreCase(type))
242 geneId = val.get(ID).toString();
244 else if (OBJECT_TYPE_TRANSCRIPT.equalsIgnoreCase(type))
246 geneId = val.get(PARENT).toString();
248 else if (OBJECT_TYPE_TRANSLATION.equalsIgnoreCase(type))
250 String transcriptId = val.get(PARENT).toString();
253 geneId = getGeneId(transcriptId);
254 } catch (StackOverflowError e)
257 * unlikely data condition error!
260 .println("** Ensembl lookup "
261 + getUrl(transcriptId).toString()
262 + " looping on Parent!");
265 } catch (ParseException | IOException e)
273 * Calls the /lookup/id rest service for the given id, and if successful,
274 * parses and returns the gene's chromosomal coordinates
279 public GeneLociI getGeneLoci(String geneId)
281 return (GeneLociI) getResult(geneId, br -> parseGeneLoci(br));
285 * Parses the /lookup/id response for species, asssembly_name,
286 * seq_region_name, start, end and returns an object that wraps them, or null
292 GeneLociI parseGeneLoci(BufferedReader br)
294 JSONParser jp = new JSONParser();
297 JSONObject val = (JSONObject) jp.parse(br);
298 final String species = val.get("species").toString();
299 final String assembly = val.get("assembly_name").toString();
300 final String chromosome = val.get("seq_region_name").toString();
301 String strand = val.get("strand").toString();
302 int start = Integer.parseInt(val.get("start").toString());
303 int end = Integer.parseInt(val.get("end").toString());
304 int fromEnd = end - start + 1;
305 boolean reverseStrand = "-1".equals(strand);
306 int toStart = reverseStrand ? end : start;
307 int toEnd = reverseStrand ? start : end;
308 List<int[]> fromRange = Collections.singletonList(new int[] { 1,
310 List<int[]> toRange = Collections.singletonList(new int[] { toStart,
312 final MapList map = new MapList(fromRange, toRange, 1, 1);
313 return new GeneLociI()
317 public String getSpeciesId()
319 return species == null ? "" : species;
323 public String getAssemblyId()
329 public String getChromosomeId()
335 public MapList getMap()
340 } catch (ParseException | NullPointerException | IOException
341 | NumberFormatException | ClassCastException e)
343 Cache.log.error("Error looking up gene loci: " + e.getMessage());