1 package jalview.ext.ensembl;
3 import jalview.datamodel.AlignmentI;
4 import jalview.datamodel.DBRefSource;
5 import jalview.datamodel.GeneLociI;
6 import jalview.util.MapList;
8 import java.io.BufferedReader;
9 import java.io.IOException;
10 import java.net.MalformedURLException;
12 import java.util.ArrayList;
13 import java.util.Collections;
14 import java.util.Iterator;
15 import java.util.List;
17 import org.json.simple.JSONArray;
18 import org.json.simple.JSONObject;
19 import org.json.simple.parser.JSONParser;
20 import org.json.simple.parser.ParseException;
22 public class EnsemblMap extends EnsemblRestClient
24 private static final String MAPPED = "mapped";
26 private static final String MAPPINGS = "mappings";
28 private static final String CDS = "cds";
30 private static final String CDNA = "cdna";
33 * Default constructor (to use rest.ensembl.org)
41 * Constructor given the target domain to fetch data from
45 public EnsemblMap(String domain)
51 public String getDbName()
53 return DBRefSource.ENSEMBL;
57 public AlignmentI getSequenceRecords(String queries) throws Exception
59 return null; // not used
63 * Constructs a URL of the format <code>
64 * http://rest.ensembl.org/map/human/GRCh38/17:45051610..45109016:1/GRCh37?content-type=application/json
74 * @throws MalformedURLException
76 protected URL getAssemblyMapUrl(String species, String chromosome, String fromRef,
77 String toRef, int startPos, int endPos)
78 throws MalformedURLException
81 * start-end might be reverse strand - present forwards to the service
83 boolean forward = startPos <= endPos;
84 int start = forward ? startPos : endPos;
85 int end = forward ? endPos : startPos;
86 String strand = forward ? "1" : "-1";
87 String url = String.format(
88 "%s/map/%s/%s/%s:%d..%d:%s/%s?content-type=application/json",
89 getDomain(), species, fromRef, chromosome, start, end, strand,
95 protected boolean useGetRequest()
101 protected String getRequestMimeType(boolean multipleIds)
103 return "application/json";
107 protected String getResponseMimeType()
109 return "application/json";
113 protected URL getUrl(List<String> ids) throws MalformedURLException
115 return null; // not used
119 * Calls the REST /map service to get the chromosomal coordinates (start/end)
120 * in 'toRef' that corresponding to the (start/end) queryRange in 'fromRef'
128 * @see http://rest.ensemblgenomes.org/documentation/info/assembly_map
130 public int[] getAssemblyMapping(String species, String chromosome,
131 String fromRef, String toRef, int[] queryRange)
134 BufferedReader br = null;
138 url = getAssemblyMapUrl(species, chromosome, fromRef, toRef, queryRange[0],
140 br = getHttpResponse(url, null);
141 return (parseAssemblyMappingResponse(br));
142 } catch (Throwable t)
144 System.out.println("Error calling " + url + ": " + t.getMessage());
153 } catch (IOException e)
162 * Parses the JSON response from the /map/<species>/ REST service. The
163 * format is (with some fields omitted)
168 * "original": {"end":45109016,"start":45051610},
169 * "mapped" : {"end":43186384,"start":43128978}
176 protected int[] parseAssemblyMappingResponse(BufferedReader br)
179 JSONParser jp = new JSONParser();
183 JSONObject parsed = (JSONObject) jp.parse(br);
184 JSONArray mappings = (JSONArray) parsed.get(MAPPINGS);
186 Iterator rvals = mappings.iterator();
187 while (rvals.hasNext())
189 // todo check for "mapped"
190 JSONObject val = (JSONObject) rvals.next();
191 JSONObject mapped = (JSONObject) val.get(MAPPED);
192 int start = Integer.parseInt(mapped.get("start").toString());
193 int end = Integer.parseInt(mapped.get("end").toString());
194 String strand = mapped.get("strand").toString();
195 if ("1".equals(strand))
197 result = new int[] { start, end };
201 result = new int[] { end, start };
204 } catch (IOException | ParseException | NumberFormatException e)
212 * Calls the REST /map/cds/id service, and returns a DBRefEntry holding the
213 * returned chromosomal coordinates, or returns null if the call fails
216 * e.g. Ensembl, EnsemblMetazoa
218 * e.g. ENST00000592782, Y55B1AR.1.1
223 public GeneLociI getCdsMapping(String division, String accession,
226 return getIdMapping(division, accession, start, end, CDS);
230 * Calls the REST /map/cdna/id service, and returns a DBRefEntry holding the
231 * returned chromosomal coordinates, or returns null if the call fails
234 * e.g. Ensembl, EnsemblMetazoa
236 * e.g. ENST00000592782, Y55B1AR.1.1
241 public GeneLociI getCdnaMapping(String division, String accession,
244 return getIdMapping(division, accession, start, end, CDNA);
247 GeneLociI getIdMapping(String division, String accession, int start,
248 int end, String cdsOrCdna)
251 BufferedReader br = null;
255 String domain = new EnsemblInfo().getDomain(division);
258 url = getIdMapUrl(domain, accession, start, end, cdsOrCdna);
259 br = getHttpResponse(url, null);
260 return (parseIdMappingResponse(br, accession, domain));
263 } catch (Throwable t)
265 System.out.println("Error calling " + url + ": " + t.getMessage());
274 } catch (IOException e)
283 * Constructs a URL to the /map/cds/<id> or /map/cdna/<id> REST service. The
284 * REST call is to either ensembl or ensemblgenomes, as determined from the
285 * division, e.g. Ensembl or EnsemblProtists.
293 * @throws MalformedURLException
295 URL getIdMapUrl(String domain, String accession, int start, int end,
296 String cdsOrCdna) throws MalformedURLException
299 .format("%s/map/%s/%s/%d..%d?include_original_region=1&content-type=application/json",
300 domain, cdsOrCdna, accession, start, end);
305 * Parses the JSON response from the /map/cds/ or /map/cdna REST service. The
311 * {"assembly_name":"TAIR10","end":2501311,"seq_region_name":"1","gap":0,
312 * "strand":-1,"coord_system":"chromosome","rank":0,"start":2501114},
313 * {"assembly_name":"TAIR10","end":2500815,"seq_region_name":"1","gap":0,
314 * "strand":-1,"coord_system":"chromosome","rank":0,"start":2500714}
324 GeneLociI parseIdMappingResponse(BufferedReader br, String accession,
327 JSONParser jp = new JSONParser();
331 JSONObject parsed = (JSONObject) jp.parse(br);
332 JSONArray mappings = (JSONArray) parsed.get(MAPPINGS);
334 Iterator rvals = mappings.iterator();
335 String assembly = null;
336 String chromosome = null;
338 List<int[]> regions = new ArrayList<>();
340 while (rvals.hasNext())
342 JSONObject val = (JSONObject) rvals.next();
343 JSONObject original = (JSONObject) val.get("original");
344 fromEnd = Integer.parseInt(original.get("end").toString());
346 JSONObject mapped = (JSONObject) val.get(MAPPED);
347 int start = Integer.parseInt(mapped.get("start").toString());
348 int end = Integer.parseInt(mapped.get("end").toString());
349 String ass = mapped.get("assembly_name").toString();
350 if (assembly != null && !assembly.equals(ass))
353 .println("EnsemblMap found multiple assemblies - can't resolve");
357 String chr = mapped.get("seq_region_name").toString();
358 if (chromosome != null && !chromosome.equals(chr))
361 .println("EnsemblMap found multiple chromosomes - can't resolve");
365 String strand = mapped.get("strand").toString();
366 if ("-1".equals(strand))
368 regions.add(new int[] { end, start });
372 regions.add(new int[] { start, end });
377 * processed all mapped regions on chromosome, assemble the result,
378 * having first fetched the species id for the accession
380 final String species = new EnsemblLookup(domain)
381 .getSpecies(accession);
382 final String as = assembly;
383 final String chr = chromosome;
384 List<int[]> fromRange = Collections.singletonList(new int[] { 1,
386 final MapList map = new MapList(fromRange, regions, 1, 1);
387 return new GeneLociI()
391 public String getSpeciesId()
393 return species == null ? "" : species;
397 public String getAssemblyId()
403 public String getChromosomeId()
409 public MapList getMap()
414 } catch (IOException | ParseException | NumberFormatException e)