1 package jalview.ext.ensembl;
3 import jalview.datamodel.AlignmentI;
4 import jalview.datamodel.DBRefSource;
5 import jalview.datamodel.GeneLociI;
6 import jalview.util.JSONUtils;
7 import jalview.util.MapList;
9 import java.io.BufferedReader;
10 import java.io.IOException;
11 import java.net.MalformedURLException;
13 import java.util.ArrayList;
14 import java.util.Collections;
15 import java.util.Iterator;
16 import java.util.List;
19 import org.json.simple.parser.ParseException;
22 * A client for the Ensembl REST service /map endpoint, to convert from
23 * coordinates of one genome assembly to another.
25 * Note that species and assembly identifiers passed to this class must be valid
26 * in Ensembl. They are not case sensitive.
29 * @see https://rest.ensembl.org/documentation/info/assembly_map
30 * @see https://rest.ensembl.org/info/assembly/human?content-type=text/xml
31 * @see https://rest.ensembl.org/info/species?content-type=text/xml
33 public class EnsemblMap extends EnsemblRestClient
35 private static final String MAPPED = "mapped";
37 private static final String MAPPINGS = "mappings";
39 private static final String CDS = "cds";
41 private static final String CDNA = "cdna";
44 * Default constructor (to use rest.ensembl.org)
52 * Constructor given the target domain to fetch data from
56 public EnsemblMap(String domain)
62 public String getDbName()
64 return DBRefSource.ENSEMBL;
68 public AlignmentI getSequenceRecords(String queries) throws Exception
70 return null; // not used
74 * Constructs a URL of the format <code>
75 * http://rest.ensembl.org/map/human/GRCh38/17:45051610..45109016:1/GRCh37?content-type=application/json
85 * @throws MalformedURLException
87 protected URL getAssemblyMapUrl(String species, String chromosome, String fromRef,
88 String toRef, int startPos, int endPos)
89 throws MalformedURLException
92 * start-end might be reverse strand - present forwards to the service
94 boolean forward = startPos <= endPos;
95 int start = forward ? startPos : endPos;
96 int end = forward ? endPos : startPos;
97 String strand = forward ? "1" : "-1";
98 String url = String.format(
99 "%s/map/%s/%s/%s:%d..%d:%s/%s?content-type=application/json",
100 getDomain(), species, fromRef, chromosome, start, end, strand,
106 protected boolean useGetRequest()
112 protected URL getUrl(List<String> ids) throws MalformedURLException
114 return null; // not used
118 * Calls the REST /map service to get the chromosomal coordinates (start/end)
119 * in 'toRef' that corresponding to the (start/end) queryRange in 'fromRef'
127 * @see http://rest.ensemblgenomes.org/documentation/info/assembly_map
129 public int[] getAssemblyMapping(String species, String chromosome,
130 String fromRef, String toRef, int[] queryRange)
135 url = getAssemblyMapUrl(species, chromosome, fromRef, toRef, queryRange[0],
137 return (parseAssemblyMappingResponse(url));
138 } catch (Throwable t)
140 System.out.println("Error calling " + url + ": " + t.getMessage());
146 * Parses the JSON response from the /map/<species>/ REST service. The
147 * format is (with some fields omitted)
152 * "original": {"end":45109016,"start":45051610},
153 * "mapped" : {"end":43186384,"start":43128978}
160 @SuppressWarnings("unchecked")
161 protected int[] parseAssemblyMappingResponse(URL url)
167 Iterator<Object> rvals = (Iterator<Object>) getJSON(url, null, -1, MODE_ITERATOR, MAPPINGS);
170 while (rvals.hasNext())
172 // todo check for "mapped"
173 Map<String, Object> val = (Map<String, Object>) rvals.next();
174 Map<String, Object> mapped = (Map<String, Object>) val.get(MAPPED);
175 int start = Integer.parseInt(mapped.get("start").toString());
176 int end = Integer.parseInt(mapped.get("end").toString());
177 String strand = mapped.get("strand").toString();
178 if ("1".equals(strand))
180 result = new int[] { start, end };
184 result = new int[] { end, start };
187 } catch (IOException | ParseException | NumberFormatException e)
195 * Calls the REST /map/cds/id service, and returns a DBRefEntry holding the
196 * returned chromosomal coordinates, or returns null if the call fails
199 * e.g. Ensembl, EnsemblMetazoa
201 * e.g. ENST00000592782, Y55B1AR.1.1
206 public GeneLociI getCdsMapping(String division, String accession,
209 return getIdMapping(division, accession, start, end, CDS);
213 * Calls the REST /map/cdna/id service, and returns a DBRefEntry holding the
214 * returned chromosomal coordinates, or returns null if the call fails
217 * e.g. Ensembl, EnsemblMetazoa
219 * e.g. ENST00000592782, Y55B1AR.1.1
224 public GeneLociI getCdnaMapping(String division, String accession,
227 return getIdMapping(division, accession, start, end, CDNA);
230 GeneLociI getIdMapping(String division, String accession, int start,
231 int end, String cdsOrCdna)
236 String domain = new EnsemblInfo().getDomain(division);
239 url = getIdMapUrl(domain, accession, start, end, cdsOrCdna);
240 return (parseIdMappingResponse(url, accession, domain));
243 } catch (Throwable t)
245 System.out.println("Error calling " + url + ": " + t.getMessage());
251 * Constructs a URL to the /map/cds/<id> or /map/cdna/<id> REST service. The
252 * REST call is to either ensembl or ensemblgenomes, as determined from the
253 * division, e.g. Ensembl or EnsemblProtists.
261 * @throws MalformedURLException
263 URL getIdMapUrl(String domain, String accession, int start, int end,
264 String cdsOrCdna) throws MalformedURLException
267 .format("%s/map/%s/%s/%d..%d?include_original_region=1&content-type=application/json",
268 domain, cdsOrCdna, accession, start, end);
273 * Parses the JSON response from the /map/cds/ or /map/cdna REST service. The
279 * {"assembly_name":"TAIR10","end":2501311,"seq_region_name":"1","gap":0,
280 * "strand":-1,"coord_system":"chromosome","rank":0,"start":2501114},
281 * {"assembly_name":"TAIR10","end":2500815,"seq_region_name":"1","gap":0,
282 * "strand":-1,"coord_system":"chromosome","rank":0,"start":2500714}
292 @SuppressWarnings("unchecked")
293 GeneLociI parseIdMappingResponse(URL url, String accession,
299 Iterator<Object> rvals = (Iterator<Object>) getJSON(url, null, -1, MODE_ITERATOR, MAPPINGS);
302 String assembly = null;
303 String chromosome = null;
305 List<int[]> regions = new ArrayList<>();
307 while (rvals.hasNext())
309 Map<String, Object> val = (Map<String, Object>) rvals.next();
310 Map<String, Object> original = (Map<String, Object>) val.get("original");
311 fromEnd = Integer.parseInt(original.get("end").toString());
313 Map<String, Object> mapped = (Map<String, Object>) val.get(MAPPED);
314 int start = Integer.parseInt(mapped.get("start").toString());
315 int end = Integer.parseInt(mapped.get("end").toString());
316 String ass = mapped.get("assembly_name").toString();
317 if (assembly != null && !assembly.equals(ass))
320 .println("EnsemblMap found multiple assemblies - can't resolve");
324 String chr = mapped.get("seq_region_name").toString();
325 if (chromosome != null && !chromosome.equals(chr))
328 .println("EnsemblMap found multiple chromosomes - can't resolve");
332 String strand = mapped.get("strand").toString();
333 if ("-1".equals(strand))
335 regions.add(new int[] { end, start });
339 regions.add(new int[] { start, end });
344 * processed all mapped regions on chromosome, assemble the result,
345 * having first fetched the species id for the accession
347 final String species = new EnsemblLookup(domain)
348 .getSpecies(accession);
349 final String as = assembly;
350 final String chr = chromosome;
351 List<int[]> fromRange = Collections.singletonList(new int[] { 1,
353 final MapList map = new MapList(fromRange, regions, 1, 1);
354 return new GeneLociI()
358 public String getSpeciesId()
360 return species == null ? "" : species;
364 public String getAssemblyId()
370 public String getChromosomeId()
376 public MapList getMap()
381 } catch (IOException | ParseException | NumberFormatException e)