1 package jalview.ext.ensembl;
3 import jalview.datamodel.AlignmentI;
4 import jalview.datamodel.DBRefSource;
5 import jalview.datamodel.GeneLociI;
6 import jalview.datamodel.GeneLocus;
7 import jalview.datamodel.Mapping;
8 import jalview.util.MapList;
10 import java.io.BufferedReader;
11 import java.io.IOException;
12 import java.net.MalformedURLException;
14 import java.util.ArrayList;
15 import java.util.Collections;
16 import java.util.Iterator;
17 import java.util.List;
19 import org.json.simple.JSONArray;
20 import org.json.simple.JSONObject;
21 import org.json.simple.parser.JSONParser;
22 import org.json.simple.parser.ParseException;
25 * A client for the Ensembl REST service /map endpoint, to convert from
26 * coordinates of one genome assembly to another.
28 * Note that species and assembly identifiers passed to this class must be valid
29 * in Ensembl. They are not case sensitive.
32 * @see https://rest.ensembl.org/documentation/info/assembly_map
33 * @see https://rest.ensembl.org/info/assembly/human?content-type=text/xml
34 * @see https://rest.ensembl.org/info/species?content-type=text/xml
36 public class EnsemblMap extends EnsemblRestClient
38 private static final String MAPPED = "mapped";
40 private static final String MAPPINGS = "mappings";
42 private static final String CDS = "cds";
44 private static final String CDNA = "cdna";
47 * Default constructor (to use rest.ensembl.org)
55 * Constructor given the target domain to fetch data from
59 public EnsemblMap(String domain)
65 public String getDbName()
67 return DBRefSource.ENSEMBL;
71 public AlignmentI getSequenceRecords(String queries) throws Exception
73 return null; // not used
77 * Constructs a URL of the format <code>
78 * http://rest.ensembl.org/map/human/GRCh38/17:45051610..45109016:1/GRCh37?content-type=application/json
88 * @throws MalformedURLException
90 protected URL getAssemblyMapUrl(String species, String chromosome, String fromRef,
91 String toRef, int startPos, int endPos)
92 throws MalformedURLException
95 * start-end might be reverse strand - present forwards to the service
97 boolean forward = startPos <= endPos;
98 int start = forward ? startPos : endPos;
99 int end = forward ? endPos : startPos;
100 String strand = forward ? "1" : "-1";
101 String url = String.format(
102 "%s/map/%s/%s/%s:%d..%d:%s/%s?content-type=application/json",
103 getDomain(), species, fromRef, chromosome, start, end, strand,
109 protected boolean useGetRequest()
115 protected URL getUrl(List<String> ids) throws MalformedURLException
117 return null; // not used
121 * Calls the REST /map service to get the chromosomal coordinates (start/end)
122 * in 'toRef' that corresponding to the (start/end) queryRange in 'fromRef'
130 * @see http://rest.ensemblgenomes.org/documentation/info/assembly_map
132 public int[] getAssemblyMapping(String species, String chromosome,
133 String fromRef, String toRef, int[] queryRange)
136 BufferedReader br = null;
140 url = getAssemblyMapUrl(species, chromosome, fromRef, toRef, queryRange[0],
142 br = getHttpResponse(url, null);
143 return (parseAssemblyMappingResponse(br));
144 } catch (Throwable t)
146 System.out.println("Error calling " + url + ": " + t.getMessage());
155 } catch (IOException e)
164 * Parses the JSON response from the /map/<species>/ REST service. The
165 * format is (with some fields omitted)
170 * "original": {"end":45109016,"start":45051610},
171 * "mapped" : {"end":43186384,"start":43128978}
178 protected int[] parseAssemblyMappingResponse(BufferedReader br)
181 JSONParser jp = new JSONParser();
185 JSONObject parsed = (JSONObject) jp.parse(br);
186 JSONArray mappings = (JSONArray) parsed.get(MAPPINGS);
188 Iterator rvals = mappings.iterator();
189 while (rvals.hasNext())
191 // todo check for "mapped"
192 JSONObject val = (JSONObject) rvals.next();
193 JSONObject mapped = (JSONObject) val.get(MAPPED);
194 int start = Integer.parseInt(mapped.get("start").toString());
195 int end = Integer.parseInt(mapped.get("end").toString());
196 String strand = mapped.get("strand").toString();
197 if ("1".equals(strand))
199 result = new int[] { start, end };
203 result = new int[] { end, start };
206 } catch (IOException | ParseException | NumberFormatException e)
214 * Calls the REST /map/cds/id service, and returns a DBRefEntry holding the
215 * returned chromosomal coordinates, or returns null if the call fails
218 * e.g. Ensembl, EnsemblMetazoa
220 * e.g. ENST00000592782, Y55B1AR.1.1
225 public GeneLociI getCdsMapping(String division, String accession,
228 return getIdMapping(division, accession, start, end, CDS);
232 * Calls the REST /map/cdna/id service, and returns a DBRefEntry holding the
233 * returned chromosomal coordinates, or returns null if the call fails
236 * e.g. Ensembl, EnsemblMetazoa
238 * e.g. ENST00000592782, Y55B1AR.1.1
243 public GeneLociI getCdnaMapping(String division, String accession,
246 return getIdMapping(division, accession, start, end, CDNA);
249 GeneLociI getIdMapping(String division, String accession, int start,
250 int end, String cdsOrCdna)
253 BufferedReader br = null;
257 String domain = new EnsemblInfo().getDomain(division);
260 url = getIdMapUrl(domain, accession, start, end, cdsOrCdna);
261 br = getHttpResponse(url, null);
264 return (parseIdMappingResponse(br, accession, domain));
268 } catch (Throwable t)
270 System.out.println("Error calling " + url + ": " + t.getMessage());
279 } catch (IOException e)
288 * Constructs a URL to the /map/cds/<id> or /map/cdna/<id> REST service. The
289 * REST call is to either ensembl or ensemblgenomes, as determined from the
290 * division, e.g. Ensembl or EnsemblProtists.
298 * @throws MalformedURLException
300 URL getIdMapUrl(String domain, String accession, int start, int end,
301 String cdsOrCdna) throws MalformedURLException
304 .format("%s/map/%s/%s/%d..%d?include_original_region=1&content-type=application/json",
305 domain, cdsOrCdna, accession, start, end);
310 * Parses the JSON response from the /map/cds/ or /map/cdna REST service. The
316 * {"assembly_name":"TAIR10","end":2501311,"seq_region_name":"1","gap":0,
317 * "strand":-1,"coord_system":"chromosome","rank":0,"start":2501114},
318 * {"assembly_name":"TAIR10","end":2500815,"seq_region_name":"1","gap":0,
319 * "strand":-1,"coord_system":"chromosome","rank":0,"start":2500714}
329 GeneLociI parseIdMappingResponse(BufferedReader br, String accession,
332 JSONParser jp = new JSONParser();
336 JSONObject parsed = (JSONObject) jp.parse(br);
337 JSONArray mappings = (JSONArray) parsed.get(MAPPINGS);
339 Iterator rvals = mappings.iterator();
340 String assembly = null;
341 String chromosome = null;
343 List<int[]> regions = new ArrayList<>();
345 while (rvals.hasNext())
347 JSONObject val = (JSONObject) rvals.next();
348 JSONObject original = (JSONObject) val.get("original");
349 fromEnd = Integer.parseInt(original.get("end").toString());
351 JSONObject mapped = (JSONObject) val.get(MAPPED);
352 int start = Integer.parseInt(mapped.get("start").toString());
353 int end = Integer.parseInt(mapped.get("end").toString());
354 String ass = mapped.get("assembly_name").toString();
355 if (assembly != null && !assembly.equals(ass))
358 .println("EnsemblMap found multiple assemblies - can't resolve");
362 String chr = mapped.get("seq_region_name").toString();
363 if (chromosome != null && !chromosome.equals(chr))
366 .println("EnsemblMap found multiple chromosomes - can't resolve");
370 String strand = mapped.get("strand").toString();
371 if ("-1".equals(strand))
373 regions.add(new int[] { end, start });
377 regions.add(new int[] { start, end });
382 * processed all mapped regions on chromosome, assemble the result,
383 * having first fetched the species id for the accession
385 final String species = new EnsemblLookup(domain)
386 .getSpecies(accession);
387 final String as = assembly;
388 final String chr = chromosome;
389 List<int[]> fromRange = Collections.singletonList(new int[] { 1,
391 Mapping mapping = new Mapping(new MapList(fromRange, regions, 1, 1));
392 return new GeneLocus(species == null ? "" : species, as, chr,
394 } catch (IOException | ParseException | NumberFormatException e)