X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fext%2Fensembl%2FEnsemblMap.java;h=56657e0578c517e8c02717e1d57060a77a557f70;hb=10dd3a771da0085ead2318b7385211c7b675d7f4;hp=d522ea8880aea6facf2f4515eba97d28f2c53ede;hpb=5e1b1391f58f31578c436e5ed4e571b0ceef8c9d;p=jalview.git
diff --git a/src/jalview/ext/ensembl/EnsemblMap.java b/src/jalview/ext/ensembl/EnsemblMap.java
index d522ea8..56657e0 100644
--- a/src/jalview/ext/ensembl/EnsemblMap.java
+++ b/src/jalview/ext/ensembl/EnsemblMap.java
@@ -2,11 +2,15 @@ package jalview.ext.ensembl;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.DBRefSource;
+import jalview.datamodel.GeneLociI;
+import jalview.util.MapList;
import java.io.BufferedReader;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
+import java.util.ArrayList;
+import java.util.Collections;
import java.util.Iterator;
import java.util.List;
@@ -17,6 +21,13 @@ import org.json.simple.parser.ParseException;
public class EnsemblMap extends EnsemblRestClient
{
+ private static final String MAPPED = "mapped";
+
+ private static final String MAPPINGS = "mappings";
+
+ private static final String CDS = "cds";
+
+ private static final String CDNA = "cdna";
/**
* Default constructor (to use rest.ensembl.org)
@@ -48,20 +59,36 @@ public class EnsemblMap extends EnsemblRestClient
return null; // not used
}
- protected URL getUrl(String species, String chromosome, String fromRef,
+ /**
+ * Constructs a URL of the format
+ * http://rest.ensembl.org/map/human/GRCh38/17:45051610..45109016:1/GRCh37?content-type=application/json
+ *
+ *
+ * @param species
+ * @param chromosome
+ * @param fromRef
+ * @param toRef
+ * @param startPos
+ * @param endPos
+ * @return
+ * @throws MalformedURLException
+ */
+ protected URL getAssemblyMapUrl(String species, String chromosome, String fromRef,
String toRef, int startPos, int endPos)
throws MalformedURLException
{
- String url = getDomain() + "/map/" + species + "/" + fromRef + "/"
- + chromosome + ":" + startPos + ".." + endPos + ":1/" + toRef
- + "?content-type=application/json";
- try
- {
- return new URL(url);
- } catch (MalformedURLException e)
- {
- return null;
- }
+ /*
+ * start-end might be reverse strand - present forwards to the service
+ */
+ boolean forward = startPos <= endPos;
+ int start = forward ? startPos : endPos;
+ int end = forward ? endPos : startPos;
+ String strand = forward ? "1" : "-1";
+ String url = String.format(
+ "%s/map/%s/%s/%s:%d..%d:%s/%s?content-type=application/json",
+ getDomain(), species, fromRef, chromosome, start, end, strand,
+ toRef);
+ return new URL(url);
}
@Override
@@ -88,7 +115,19 @@ public class EnsemblMap extends EnsemblRestClient
return null; // not used
}
- public int[] getMapping(String species, String chromosome,
+ /**
+ * Calls the REST /map service to get the chromosomal coordinates (start/end)
+ * in 'toRef' that corresponding to the (start/end) queryRange in 'fromRef'
+ *
+ * @param species
+ * @param chromosome
+ * @param fromRef
+ * @param toRef
+ * @param queryRange
+ * @return
+ * @see http://rest.ensemblgenomes.org/documentation/info/assembly_map
+ */
+ public int[] getAssemblyMapping(String species, String chromosome,
String fromRef, String toRef, int[] queryRange)
{
URL url = null;
@@ -96,20 +135,32 @@ public class EnsemblMap extends EnsemblRestClient
try
{
- url = getUrl(species, chromosome, fromRef, toRef, queryRange[0],
+ url = getAssemblyMapUrl(species, chromosome, fromRef, toRef, queryRange[0],
queryRange[1]);
br = getHttpResponse(url, null);
- return (parseResponse(br));
+ return (parseAssemblyMappingResponse(br));
} catch (Throwable t)
{
System.out.println("Error calling " + url + ": " + t.getMessage());
return null;
+ } finally
+ {
+ if (br != null)
+ {
+ try
+ {
+ br.close();
+ } catch (IOException e)
+ {
+ // ignore
+ }
+ }
}
}
/**
- * Parses the JSON response from the /map REST service. The format is (with
- * some fields omitted)
+ * Parses the JSON response from the /map/<species>/ REST service. The
+ * format is (with some fields omitted)
*
*
* {"mappings": @@ -122,7 +173,7 @@ public class EnsemblMap extends EnsemblRestClient * @param br * @return */ - protected int[] parseResponse(BufferedReader br) + protected int[] parseAssemblyMappingResponse(BufferedReader br) { int[] result = null; JSONParser jp = new JSONParser(); @@ -130,17 +181,25 @@ public class EnsemblMap extends EnsemblRestClient try { JSONObject parsed = (JSONObject) jp.parse(br); - JSONArray mappings = (JSONArray) parsed.get("mappings"); + JSONArray mappings = (JSONArray) parsed.get(MAPPINGS); Iterator rvals = mappings.iterator(); while (rvals.hasNext()) { // todo check for "mapped" JSONObject val = (JSONObject) rvals.next(); - JSONObject mapped = (JSONObject) val.get("mapped"); - String start = mapped.get("start").toString(); - String end = mapped.get("end").toString(); - result = new int[] { Integer.parseInt(start), Integer.parseInt(end) }; + JSONObject mapped = (JSONObject) val.get(MAPPED); + int start = Integer.parseInt(mapped.get("start").toString()); + int end = Integer.parseInt(mapped.get("end").toString()); + String strand = mapped.get("strand").toString(); + if ("1".equals(strand)) + { + result = new int[] { start, end }; + } + else + { + result = new int[] { end, start }; + } } } catch (IOException | ParseException | NumberFormatException e) { @@ -149,4 +208,215 @@ public class EnsemblMap extends EnsemblRestClient return result; } + /** + * Calls the REST /map/cds/id service, and returns a DBRefEntry holding the + * returned chromosomal coordinates, or returns null if the call fails + * + * @param division + * e.g. Ensembl, EnsemblMetazoa + * @param accession + * e.g. ENST00000592782, Y55B1AR.1.1 + * @param start + * @param end + * @return + */ + public GeneLociI getCdsMapping(String division, String accession, + int start, int end) + { + return getIdMapping(division, accession, start, end, CDS); + } + + /** + * Calls the REST /map/cdna/id service, and returns a DBRefEntry holding the + * returned chromosomal coordinates, or returns null if the call fails + * + * @param division + * e.g. Ensembl, EnsemblMetazoa + * @param accession + * e.g. ENST00000592782, Y55B1AR.1.1 + * @param start + * @param end + * @return + */ + public GeneLociI getCdnaMapping(String division, String accession, + int start, int end) + { + return getIdMapping(division, accession, start, end, CDNA); + } + + GeneLociI getIdMapping(String division, String accession, int start, + int end, String cdsOrCdna) + { + URL url = null; + BufferedReader br = null; + + try + { + String domain = new EnsemblInfo().getDomain(division); + if (domain != null) + { + url = getIdMapUrl(domain, accession, start, end, cdsOrCdna); + br = getHttpResponse(url, null); + return (parseIdMappingResponse(br, accession, domain)); + } + return null; + } catch (Throwable t) + { + System.out.println("Error calling " + url + ": " + t.getMessage()); + return null; + } finally + { + if (br != null) + { + try + { + br.close(); + } catch (IOException e) + { + // ignore + } + } + } + } + + /** + * Constructs a URL to the /map/cds/or /map/cdna/ REST service. The + * REST call is to either ensembl or ensemblgenomes, as determined from the + * division, e.g. Ensembl or EnsemblProtists. + * + * @param domain + * @param accession + * @param start + * @param end + * @param cdsOrCdna + * @return + * @throws MalformedURLException + */ + URL getIdMapUrl(String domain, String accession, int start, int end, + String cdsOrCdna) throws MalformedURLException + { + String url = String + .format("%s/map/%s/%s/%d..%d?include_original_region=1&content-type=application/json", + domain, cdsOrCdna, accession, start, end); + return new URL(url); + } + + /** + * Parses the JSON response from the /map/cds/ or /map/cdna REST service. The + * format is + * + * + * {"mappings": + * [ + * {"assembly_name":"TAIR10","end":2501311,"seq_region_name":"1","gap":0, + * "strand":-1,"coord_system":"chromosome","rank":0,"start":2501114}, + * {"assembly_name":"TAIR10","end":2500815,"seq_region_name":"1","gap":0, + * "strand":-1,"coord_system":"chromosome","rank":0,"start":2500714} + * ] + * } + *+ * + * @param br + * @param accession + * @param domain + * @return + */ + GeneLociI parseIdMappingResponse(BufferedReader br, String accession, + String domain) + { + JSONParser jp = new JSONParser(); + + try + { + JSONObject parsed = (JSONObject) jp.parse(br); + JSONArray mappings = (JSONArray) parsed.get(MAPPINGS); + + Iterator rvals = mappings.iterator(); + String assembly = null; + String chromosome = null; + int fromEnd = 0; + Listregions = new ArrayList<>(); + + while (rvals.hasNext()) + { + JSONObject val = (JSONObject) rvals.next(); + JSONObject original = (JSONObject) val.get("original"); + fromEnd = Integer.parseInt(original.get("end").toString()); + + JSONObject mapped = (JSONObject) val.get(MAPPED); + int start = Integer.parseInt(mapped.get("start").toString()); + int end = Integer.parseInt(mapped.get("end").toString()); + String ass = mapped.get("assembly_name").toString(); + if (assembly != null && !assembly.equals(ass)) + { + System.err + .println("EnsemblMap found multiple assemblies - can't resolve"); + return null; + } + assembly = ass; + String chr = mapped.get("seq_region_name").toString(); + if (chromosome != null && !chromosome.equals(chr)) + { + System.err + .println("EnsemblMap found multiple chromosomes - can't resolve"); + return null; + } + chromosome = chr; + String strand = mapped.get("strand").toString(); + if ("-1".equals(strand)) + { + regions.add(new int[] { end, start }); + } + else + { + regions.add(new int[] { start, end }); + } + } + + /* + * processed all mapped regions on chromosome, assemble the result, + * having first fetched the species id for the accession + */ + final String species = new EnsemblLookup(domain) + .getSpecies(accession); + final String as = assembly; + final String chr = chromosome; + List fromRange = Collections.singletonList(new int[] { 1, + fromEnd }); + final MapList map = new MapList(fromRange, regions, 1, 1); + return new GeneLociI() + { + + @Override + public String getSpeciesId() + { + return species == null ? "" : species; + } + + @Override + public String getAssemblyId() + { + return as; + } + + @Override + public String getChromosomeId() + { + return chr; + } + + @Override + public MapList getMap() + { + return map; + } + }; + } catch (IOException | ParseException | NumberFormatException e) + { + // ignore + } + + return null; + } + }