1 package jalview.ext.ensembl;
3 import jalview.datamodel.AlignmentI;
4 import jalview.datamodel.DBRefSource;
5 import jalview.datamodel.GeneLociI;
6 import jalview.datamodel.GeneLocus;
7 import jalview.datamodel.Mapping;
8 import jalview.util.MapList;
10 import java.io.BufferedReader;
11 import java.io.IOException;
12 import java.net.MalformedURLException;
14 import java.util.ArrayList;
15 import java.util.Collections;
16 import java.util.Iterator;
17 import java.util.List;
19 import org.json.simple.JSONArray;
20 import org.json.simple.JSONObject;
21 import org.json.simple.parser.JSONParser;
22 import org.json.simple.parser.ParseException;
24 public class EnsemblMap extends EnsemblRestClient
26 private static final String MAPPED = "mapped";
28 private static final String MAPPINGS = "mappings";
30 private static final String CDS = "cds";
32 private static final String CDNA = "cdna";
35 * Default constructor (to use rest.ensembl.org)
43 * Constructor given the target domain to fetch data from
47 public EnsemblMap(String domain)
53 public String getDbName()
55 return DBRefSource.ENSEMBL;
59 public AlignmentI getSequenceRecords(String queries) throws Exception
61 return null; // not used
65 * Constructs a URL of the format <code>
66 * http://rest.ensembl.org/map/human/GRCh38/17:45051610..45109016:1/GRCh37?content-type=application/json
76 * @throws MalformedURLException
78 protected URL getAssemblyMapUrl(String species, String chromosome, String fromRef,
79 String toRef, int startPos, int endPos)
80 throws MalformedURLException
83 * start-end might be reverse strand - present forwards to the service
85 boolean forward = startPos <= endPos;
86 int start = forward ? startPos : endPos;
87 int end = forward ? endPos : startPos;
88 String strand = forward ? "1" : "-1";
89 String url = String.format(
90 "%s/map/%s/%s/%s:%d..%d:%s/%s?content-type=application/json",
91 getDomain(), species, fromRef, chromosome, start, end, strand,
97 protected boolean useGetRequest()
103 protected URL getUrl(List<String> ids) throws MalformedURLException
105 return null; // not used
109 * Calls the REST /map service to get the chromosomal coordinates (start/end)
110 * in 'toRef' that corresponding to the (start/end) queryRange in 'fromRef'
118 * @see http://rest.ensemblgenomes.org/documentation/info/assembly_map
120 public int[] getAssemblyMapping(String species, String chromosome,
121 String fromRef, String toRef, int[] queryRange)
124 BufferedReader br = null;
128 url = getAssemblyMapUrl(species, chromosome, fromRef, toRef, queryRange[0],
130 br = getHttpResponse(url, null);
131 return (parseAssemblyMappingResponse(br));
132 } catch (Throwable t)
134 System.out.println("Error calling " + url + ": " + t.getMessage());
143 } catch (IOException e)
152 * Parses the JSON response from the /map/<species>/ REST service. The
153 * format is (with some fields omitted)
158 * "original": {"end":45109016,"start":45051610},
159 * "mapped" : {"end":43186384,"start":43128978}
166 protected int[] parseAssemblyMappingResponse(BufferedReader br)
169 JSONParser jp = new JSONParser();
173 JSONObject parsed = (JSONObject) jp.parse(br);
174 JSONArray mappings = (JSONArray) parsed.get(MAPPINGS);
176 Iterator rvals = mappings.iterator();
177 while (rvals.hasNext())
179 // todo check for "mapped"
180 JSONObject val = (JSONObject) rvals.next();
181 JSONObject mapped = (JSONObject) val.get(MAPPED);
182 int start = Integer.parseInt(mapped.get("start").toString());
183 int end = Integer.parseInt(mapped.get("end").toString());
184 String strand = mapped.get("strand").toString();
185 if ("1".equals(strand))
187 result = new int[] { start, end };
191 result = new int[] { end, start };
194 } catch (IOException | ParseException | NumberFormatException e)
202 * Calls the REST /map/cds/id service, and returns a DBRefEntry holding the
203 * returned chromosomal coordinates, or returns null if the call fails
206 * e.g. Ensembl, EnsemblMetazoa
208 * e.g. ENST00000592782, Y55B1AR.1.1
213 public GeneLociI getCdsMapping(String division, String accession,
216 return getIdMapping(division, accession, start, end, CDS);
220 * Calls the REST /map/cdna/id service, and returns a DBRefEntry holding the
221 * returned chromosomal coordinates, or returns null if the call fails
224 * e.g. Ensembl, EnsemblMetazoa
226 * e.g. ENST00000592782, Y55B1AR.1.1
231 public GeneLociI getCdnaMapping(String division, String accession,
234 return getIdMapping(division, accession, start, end, CDNA);
237 GeneLociI getIdMapping(String division, String accession, int start,
238 int end, String cdsOrCdna)
241 BufferedReader br = null;
245 String domain = new EnsemblInfo().getDomain(division);
248 url = getIdMapUrl(domain, accession, start, end, cdsOrCdna);
249 br = getHttpResponse(url, null);
252 return (parseIdMappingResponse(br, accession, domain));
256 } catch (Throwable t)
258 System.out.println("Error calling " + url + ": " + t.getMessage());
267 } catch (IOException e)
276 * Constructs a URL to the /map/cds/<id> or /map/cdna/<id> REST service. The
277 * REST call is to either ensembl or ensemblgenomes, as determined from the
278 * division, e.g. Ensembl or EnsemblProtists.
286 * @throws MalformedURLException
288 URL getIdMapUrl(String domain, String accession, int start, int end,
289 String cdsOrCdna) throws MalformedURLException
292 .format("%s/map/%s/%s/%d..%d?include_original_region=1&content-type=application/json",
293 domain, cdsOrCdna, accession, start, end);
298 * Parses the JSON response from the /map/cds/ or /map/cdna REST service. The
304 * {"assembly_name":"TAIR10","end":2501311,"seq_region_name":"1","gap":0,
305 * "strand":-1,"coord_system":"chromosome","rank":0,"start":2501114},
306 * {"assembly_name":"TAIR10","end":2500815,"seq_region_name":"1","gap":0,
307 * "strand":-1,"coord_system":"chromosome","rank":0,"start":2500714}
317 GeneLociI parseIdMappingResponse(BufferedReader br, String accession,
320 JSONParser jp = new JSONParser();
324 JSONObject parsed = (JSONObject) jp.parse(br);
325 JSONArray mappings = (JSONArray) parsed.get(MAPPINGS);
327 Iterator rvals = mappings.iterator();
328 String assembly = null;
329 String chromosome = null;
331 List<int[]> regions = new ArrayList<>();
333 while (rvals.hasNext())
335 JSONObject val = (JSONObject) rvals.next();
336 JSONObject original = (JSONObject) val.get("original");
337 fromEnd = Integer.parseInt(original.get("end").toString());
339 JSONObject mapped = (JSONObject) val.get(MAPPED);
340 int start = Integer.parseInt(mapped.get("start").toString());
341 int end = Integer.parseInt(mapped.get("end").toString());
342 String ass = mapped.get("assembly_name").toString();
343 if (assembly != null && !assembly.equals(ass))
346 .println("EnsemblMap found multiple assemblies - can't resolve");
350 String chr = mapped.get("seq_region_name").toString();
351 if (chromosome != null && !chromosome.equals(chr))
354 .println("EnsemblMap found multiple chromosomes - can't resolve");
358 String strand = mapped.get("strand").toString();
359 if ("-1".equals(strand))
361 regions.add(new int[] { end, start });
365 regions.add(new int[] { start, end });
370 * processed all mapped regions on chromosome, assemble the result,
371 * having first fetched the species id for the accession
373 final String species = new EnsemblLookup(domain)
374 .getSpecies(accession);
375 final String as = assembly;
376 final String chr = chromosome;
377 List<int[]> fromRange = Collections.singletonList(new int[] { 1,
379 Mapping mapping = new Mapping(new MapList(fromRange, regions, 1, 1));
380 return new GeneLocus(species == null ? "" : species, as, chr,
382 } catch (IOException | ParseException | NumberFormatException e)