2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.ext.ensembl;
23 import jalview.datamodel.AlignmentI;
24 import jalview.datamodel.DBRefSource;
25 import jalview.datamodel.GeneLociI;
26 import jalview.datamodel.GeneLocus;
27 import jalview.datamodel.Mapping;
28 import jalview.util.MapList;
30 import java.io.BufferedReader;
31 import java.io.IOException;
32 import java.net.MalformedURLException;
34 import java.util.ArrayList;
35 import java.util.Collections;
36 import java.util.Iterator;
37 import java.util.List;
39 import org.json.simple.JSONArray;
40 import org.json.simple.JSONObject;
41 import org.json.simple.parser.JSONParser;
42 import org.json.simple.parser.ParseException;
45 * A client for the Ensembl REST service /map endpoint, to convert from
46 * coordinates of one genome assembly to another.
48 * Note that species and assembly identifiers passed to this class must be valid
49 * in Ensembl. They are not case sensitive.
52 * @see https://rest.ensembl.org/documentation/info/assembly_map
53 * @see https://rest.ensembl.org/info/assembly/human?content-type=text/xml
54 * @see https://rest.ensembl.org/info/species?content-type=text/xml
56 public class EnsemblMap extends EnsemblRestClient
58 private static final String MAPPED = "mapped";
60 private static final String MAPPINGS = "mappings";
62 private static final String CDS = "cds";
64 private static final String CDNA = "cdna";
67 * Default constructor (to use rest.ensembl.org)
75 * Constructor given the target domain to fetch data from
79 public EnsemblMap(String domain)
85 public String getDbName()
87 return DBRefSource.ENSEMBL;
91 public AlignmentI getSequenceRecords(String queries) throws Exception
93 return null; // not used
97 * Constructs a URL of the format <code>
98 * http://rest.ensembl.org/map/human/GRCh38/17:45051610..45109016:1/GRCh37?content-type=application/json
108 * @throws MalformedURLException
110 protected URL getAssemblyMapUrl(String species, String chromosome, String fromRef,
111 String toRef, int startPos, int endPos)
112 throws MalformedURLException
115 * start-end might be reverse strand - present forwards to the service
117 boolean forward = startPos <= endPos;
118 int start = forward ? startPos : endPos;
119 int end = forward ? endPos : startPos;
120 String strand = forward ? "1" : "-1";
121 String url = String.format(
122 "%s/map/%s/%s/%s:%d..%d:%s/%s?content-type=application/json",
123 getDomain(), species, fromRef, chromosome, start, end, strand,
129 protected boolean useGetRequest()
135 protected URL getUrl(List<String> ids) throws MalformedURLException
137 return null; // not used
141 * Calls the REST /map service to get the chromosomal coordinates (start/end)
142 * in 'toRef' that corresponding to the (start/end) queryRange in 'fromRef'
150 * @see http://rest.ensemblgenomes.org/documentation/info/assembly_map
152 public int[] getAssemblyMapping(String species, String chromosome,
153 String fromRef, String toRef, int[] queryRange)
156 BufferedReader br = null;
160 url = getAssemblyMapUrl(species, chromosome, fromRef, toRef, queryRange[0],
162 br = getHttpResponse(url, null);
163 return (parseAssemblyMappingResponse(br));
164 } catch (Throwable t)
166 System.out.println("Error calling " + url + ": " + t.getMessage());
175 } catch (IOException e)
184 * Parses the JSON response from the /map/<species>/ REST service. The
185 * format is (with some fields omitted)
190 * "original": {"end":45109016,"start":45051610},
191 * "mapped" : {"end":43186384,"start":43128978}
198 protected int[] parseAssemblyMappingResponse(BufferedReader br)
201 JSONParser jp = new JSONParser();
205 JSONObject parsed = (JSONObject) jp.parse(br);
206 JSONArray mappings = (JSONArray) parsed.get(MAPPINGS);
208 Iterator rvals = mappings.iterator();
209 while (rvals.hasNext())
211 // todo check for "mapped"
212 JSONObject val = (JSONObject) rvals.next();
213 JSONObject mapped = (JSONObject) val.get(MAPPED);
214 int start = Integer.parseInt(mapped.get("start").toString());
215 int end = Integer.parseInt(mapped.get("end").toString());
216 String strand = mapped.get("strand").toString();
217 if ("1".equals(strand))
219 result = new int[] { start, end };
223 result = new int[] { end, start };
226 } catch (IOException | ParseException | NumberFormatException e)
234 * Calls the REST /map/cds/id service, and returns a DBRefEntry holding the
235 * returned chromosomal coordinates, or returns null if the call fails
238 * e.g. Ensembl, EnsemblMetazoa
240 * e.g. ENST00000592782, Y55B1AR.1.1
245 public GeneLociI getCdsMapping(String division, String accession,
248 return getIdMapping(division, accession, start, end, CDS);
252 * Calls the REST /map/cdna/id service, and returns a DBRefEntry holding the
253 * returned chromosomal coordinates, or returns null if the call fails
256 * e.g. Ensembl, EnsemblMetazoa
258 * e.g. ENST00000592782, Y55B1AR.1.1
263 public GeneLociI getCdnaMapping(String division, String accession,
266 return getIdMapping(division, accession, start, end, CDNA);
269 GeneLociI getIdMapping(String division, String accession, int start,
270 int end, String cdsOrCdna)
273 BufferedReader br = null;
277 String domain = new EnsemblInfo().getDomain(division);
280 url = getIdMapUrl(domain, accession, start, end, cdsOrCdna);
281 br = getHttpResponse(url, null);
284 return (parseIdMappingResponse(br, accession, domain));
288 } catch (Throwable t)
290 System.out.println("Error calling " + url + ": " + t.getMessage());
299 } catch (IOException e)
308 * Constructs a URL to the /map/cds/<id> or /map/cdna/<id> REST service. The
309 * REST call is to either ensembl or ensemblgenomes, as determined from the
310 * division, e.g. Ensembl or EnsemblProtists.
318 * @throws MalformedURLException
320 URL getIdMapUrl(String domain, String accession, int start, int end,
321 String cdsOrCdna) throws MalformedURLException
324 .format("%s/map/%s/%s/%d..%d?include_original_region=1&content-type=application/json",
325 domain, cdsOrCdna, accession, start, end);
330 * Parses the JSON response from the /map/cds/ or /map/cdna REST service. The
336 * {"assembly_name":"TAIR10","end":2501311,"seq_region_name":"1","gap":0,
337 * "strand":-1,"coord_system":"chromosome","rank":0,"start":2501114},
338 * {"assembly_name":"TAIR10","end":2500815,"seq_region_name":"1","gap":0,
339 * "strand":-1,"coord_system":"chromosome","rank":0,"start":2500714}
349 GeneLociI parseIdMappingResponse(BufferedReader br, String accession,
352 JSONParser jp = new JSONParser();
356 JSONObject parsed = (JSONObject) jp.parse(br);
357 JSONArray mappings = (JSONArray) parsed.get(MAPPINGS);
359 Iterator rvals = mappings.iterator();
360 String assembly = null;
361 String chromosome = null;
363 List<int[]> regions = new ArrayList<>();
365 while (rvals.hasNext())
367 JSONObject val = (JSONObject) rvals.next();
368 JSONObject original = (JSONObject) val.get("original");
369 fromEnd = Integer.parseInt(original.get("end").toString());
371 JSONObject mapped = (JSONObject) val.get(MAPPED);
372 int start = Integer.parseInt(mapped.get("start").toString());
373 int end = Integer.parseInt(mapped.get("end").toString());
374 String ass = mapped.get("assembly_name").toString();
375 if (assembly != null && !assembly.equals(ass))
378 .println("EnsemblMap found multiple assemblies - can't resolve");
382 String chr = mapped.get("seq_region_name").toString();
383 if (chromosome != null && !chromosome.equals(chr))
386 .println("EnsemblMap found multiple chromosomes - can't resolve");
390 String strand = mapped.get("strand").toString();
391 if ("-1".equals(strand))
393 regions.add(new int[] { end, start });
397 regions.add(new int[] { start, end });
402 * processed all mapped regions on chromosome, assemble the result,
403 * having first fetched the species id for the accession
405 final String species = new EnsemblLookup(domain)
406 .getSpecies(accession);
407 final String as = assembly;
408 final String chr = chromosome;
409 List<int[]> fromRange = Collections.singletonList(new int[] { 1,
411 Mapping mapping = new Mapping(new MapList(fromRange, regions, 1, 1));
412 return new GeneLocus(species == null ? "" : species, as, chr,
414 } catch (IOException | ParseException | NumberFormatException e)