X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fext%2Fensembl%2FEnsemblMap.java;h=7777bda4292d3872947dc95a208e906938137afc;hb=30119dce1634085c41372d55b528a7a878b03b23;hp=d522ea8880aea6facf2f4515eba97d28f2c53ede;hpb=5e1b1391f58f31578c436e5ed4e571b0ceef8c9d;p=jalview.git diff --git a/src/jalview/ext/ensembl/EnsemblMap.java b/src/jalview/ext/ensembl/EnsemblMap.java index d522ea8..7777bda 100644 --- a/src/jalview/ext/ensembl/EnsemblMap.java +++ b/src/jalview/ext/ensembl/EnsemblMap.java @@ -1,12 +1,38 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ package jalview.ext.ensembl; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefSource; +import jalview.datamodel.GeneLociI; +import jalview.datamodel.GeneLocus; +import jalview.datamodel.Mapping; +import jalview.util.MapList; import java.io.BufferedReader; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.util.ArrayList; +import java.util.Collections; import java.util.Iterator; import java.util.List; @@ -15,8 +41,27 @@ import org.json.simple.JSONObject; import org.json.simple.parser.JSONParser; import org.json.simple.parser.ParseException; +/** + * A client for the Ensembl REST service /map endpoint, to convert from + * coordinates of one genome assembly to another. + *

+ * Note that species and assembly identifiers passed to this class must be valid + * in Ensembl. They are not case sensitive. + * + * @author gmcarstairs + * @see https://rest.ensembl.org/documentation/info/assembly_map + * @see https://rest.ensembl.org/info/assembly/human?content-type=text/xml + * @see https://rest.ensembl.org/info/species?content-type=text/xml + */ public class EnsemblMap extends EnsemblRestClient { + private static final String MAPPED = "mapped"; + + private static final String MAPPINGS = "mappings"; + + private static final String CDS = "cds"; + + private static final String CDNA = "cdna"; /** * Default constructor (to use rest.ensembl.org) @@ -48,20 +93,36 @@ public class EnsemblMap extends EnsemblRestClient return null; // not used } - protected URL getUrl(String species, String chromosome, String fromRef, + /** + * Constructs a URL of the format + * http://rest.ensembl.org/map/human/GRCh38/17:45051610..45109016:1/GRCh37?content-type=application/json + * + * + * @param species + * @param chromosome + * @param fromRef + * @param toRef + * @param startPos + * @param endPos + * @return + * @throws MalformedURLException + */ + protected URL getAssemblyMapUrl(String species, String chromosome, String fromRef, String toRef, int startPos, int endPos) throws MalformedURLException { - String url = getDomain() + "/map/" + species + "/" + fromRef + "/" - + chromosome + ":" + startPos + ".." + endPos + ":1/" + toRef - + "?content-type=application/json"; - try - { - return new URL(url); - } catch (MalformedURLException e) - { - return null; - } + /* + * start-end might be reverse strand - present forwards to the service + */ + boolean forward = startPos <= endPos; + int start = forward ? startPos : endPos; + int end = forward ? endPos : startPos; + String strand = forward ? "1" : "-1"; + String url = String.format( + "%s/map/%s/%s/%s:%d..%d:%s/%s?content-type=application/json", + getDomain(), species, fromRef, chromosome, start, end, strand, + toRef); + return new URL(url); } @Override @@ -71,24 +132,24 @@ public class EnsemblMap extends EnsemblRestClient } @Override - protected String getRequestMimeType(boolean multipleIds) - { - return "application/json"; - } - - @Override - protected String getResponseMimeType() - { - return "application/json"; - } - - @Override protected URL getUrl(List ids) throws MalformedURLException { return null; // not used } - public int[] getMapping(String species, String chromosome, + /** + * Calls the REST /map service to get the chromosomal coordinates (start/end) + * in 'toRef' that corresponding to the (start/end) queryRange in 'fromRef' + * + * @param species + * @param chromosome + * @param fromRef + * @param toRef + * @param queryRange + * @return + * @see http://rest.ensemblgenomes.org/documentation/info/assembly_map + */ + public int[] getAssemblyMapping(String species, String chromosome, String fromRef, String toRef, int[] queryRange) { URL url = null; @@ -96,20 +157,32 @@ public class EnsemblMap extends EnsemblRestClient try { - url = getUrl(species, chromosome, fromRef, toRef, queryRange[0], + url = getAssemblyMapUrl(species, chromosome, fromRef, toRef, queryRange[0], queryRange[1]); br = getHttpResponse(url, null); - return (parseResponse(br)); + return (parseAssemblyMappingResponse(br)); } catch (Throwable t) { System.out.println("Error calling " + url + ": " + t.getMessage()); return null; + } finally + { + if (br != null) + { + try + { + br.close(); + } catch (IOException e) + { + // ignore + } + } } } /** - * Parses the JSON response from the /map REST service. The format is (with - * some fields omitted) + * Parses the JSON response from the /map/<species>/ REST service. The + * format is (with some fields omitted) * *

    *  {"mappings": 
@@ -122,7 +195,7 @@ public class EnsemblMap extends EnsemblRestClient
    * @param br
    * @return
    */
-  protected int[] parseResponse(BufferedReader br)
+  protected int[] parseAssemblyMappingResponse(BufferedReader br)
   {
     int[] result = null;
     JSONParser jp = new JSONParser();
@@ -130,17 +203,25 @@ public class EnsemblMap extends EnsemblRestClient
     try
     {
       JSONObject parsed = (JSONObject) jp.parse(br);
-      JSONArray mappings = (JSONArray) parsed.get("mappings");
+      JSONArray mappings = (JSONArray) parsed.get(MAPPINGS);
 
       Iterator rvals = mappings.iterator();
       while (rvals.hasNext())
       {
         // todo check for "mapped"
         JSONObject val = (JSONObject) rvals.next();
-        JSONObject mapped = (JSONObject) val.get("mapped");
-        String start = mapped.get("start").toString();
-        String end = mapped.get("end").toString();
-        result = new int[] { Integer.parseInt(start), Integer.parseInt(end) };
+        JSONObject mapped = (JSONObject) val.get(MAPPED);
+        int start = Integer.parseInt(mapped.get("start").toString());
+        int end = Integer.parseInt(mapped.get("end").toString());
+        String strand = mapped.get("strand").toString();
+        if ("1".equals(strand))
+        {
+          result = new int[] { start, end };
+        }
+        else
+        {
+          result = new int[] { end, start };
+        }
       }
     } catch (IOException | ParseException | NumberFormatException e)
     {
@@ -149,4 +230,193 @@ public class EnsemblMap extends EnsemblRestClient
     return result;
   }
 
+  /**
+   * Calls the REST /map/cds/id service, and returns a DBRefEntry holding the
+   * returned chromosomal coordinates, or returns null if the call fails
+   * 
+   * @param division
+   *          e.g. Ensembl, EnsemblMetazoa
+   * @param accession
+   *          e.g. ENST00000592782, Y55B1AR.1.1
+   * @param start
+   * @param end
+   * @return
+   */
+  public GeneLociI getCdsMapping(String division, String accession,
+          int start, int end)
+  {
+    return getIdMapping(division, accession, start, end, CDS);
+  }
+
+  /**
+   * Calls the REST /map/cdna/id service, and returns a DBRefEntry holding the
+   * returned chromosomal coordinates, or returns null if the call fails
+   * 
+   * @param division
+   *          e.g. Ensembl, EnsemblMetazoa
+   * @param accession
+   *          e.g. ENST00000592782, Y55B1AR.1.1
+   * @param start
+   * @param end
+   * @return
+   */
+  public GeneLociI getCdnaMapping(String division, String accession,
+          int start, int end)
+  {
+    return getIdMapping(division, accession, start, end, CDNA);
+  }
+
+  GeneLociI getIdMapping(String division, String accession, int start,
+          int end, String cdsOrCdna)
+  {
+    URL url = null;
+    BufferedReader br = null;
+
+    try
+    {
+      String domain = new EnsemblInfo().getDomain(division);
+      if (domain != null)
+      {
+        url = getIdMapUrl(domain, accession, start, end, cdsOrCdna);
+        br = getHttpResponse(url, null);
+        if (br != null)
+        {
+          return (parseIdMappingResponse(br, accession, domain));
+        }
+      }
+      return null;
+    } catch (Throwable t)
+    {
+      System.out.println("Error calling " + url + ": " + t.getMessage());
+      return null;
+    } finally
+    {
+      if (br != null)
+      {
+        try
+        {
+          br.close();
+        } catch (IOException e)
+        {
+          // ignore
+        }
+      }
+    }
+  }
+
+  /**
+   * Constructs a URL to the /map/cds/ or /map/cdna/ REST service. The
+   * REST call is to either ensembl or ensemblgenomes, as determined from the
+   * division, e.g. Ensembl or EnsemblProtists.
+   * 
+   * @param domain
+   * @param accession
+   * @param start
+   * @param end
+   * @param cdsOrCdna
+   * @return
+   * @throws MalformedURLException
+   */
+  URL getIdMapUrl(String domain, String accession, int start, int end,
+          String cdsOrCdna) throws MalformedURLException
+  {
+    String url = String
+            .format("%s/map/%s/%s/%d..%d?include_original_region=1&content-type=application/json",
+                    domain, cdsOrCdna, accession, start, end);
+    return new URL(url);
+  }
+
+  /**
+   * Parses the JSON response from the /map/cds/ or /map/cdna REST service. The
+   * format is
+   * 
+   * 
+   * {"mappings":
+   *   [
+   *    {"assembly_name":"TAIR10","end":2501311,"seq_region_name":"1","gap":0,
+   *     "strand":-1,"coord_system":"chromosome","rank":0,"start":2501114},
+   *    {"assembly_name":"TAIR10","end":2500815,"seq_region_name":"1","gap":0,
+   *     "strand":-1,"coord_system":"chromosome","rank":0,"start":2500714}
+   *   ]
+   * }
+   * 
+ * + * @param br + * @param accession + * @param domain + * @return + */ + GeneLociI parseIdMappingResponse(BufferedReader br, String accession, + String domain) + { + JSONParser jp = new JSONParser(); + + try + { + JSONObject parsed = (JSONObject) jp.parse(br); + JSONArray mappings = (JSONArray) parsed.get(MAPPINGS); + + Iterator rvals = mappings.iterator(); + String assembly = null; + String chromosome = null; + int fromEnd = 0; + List regions = new ArrayList<>(); + + while (rvals.hasNext()) + { + JSONObject val = (JSONObject) rvals.next(); + JSONObject original = (JSONObject) val.get("original"); + fromEnd = Integer.parseInt(original.get("end").toString()); + + JSONObject mapped = (JSONObject) val.get(MAPPED); + int start = Integer.parseInt(mapped.get("start").toString()); + int end = Integer.parseInt(mapped.get("end").toString()); + String ass = mapped.get("assembly_name").toString(); + if (assembly != null && !assembly.equals(ass)) + { + System.err + .println("EnsemblMap found multiple assemblies - can't resolve"); + return null; + } + assembly = ass; + String chr = mapped.get("seq_region_name").toString(); + if (chromosome != null && !chromosome.equals(chr)) + { + System.err + .println("EnsemblMap found multiple chromosomes - can't resolve"); + return null; + } + chromosome = chr; + String strand = mapped.get("strand").toString(); + if ("-1".equals(strand)) + { + regions.add(new int[] { end, start }); + } + else + { + regions.add(new int[] { start, end }); + } + } + + /* + * processed all mapped regions on chromosome, assemble the result, + * having first fetched the species id for the accession + */ + final String species = new EnsemblLookup(domain) + .getSpecies(accession); + final String as = assembly; + final String chr = chromosome; + List fromRange = Collections.singletonList(new int[] { 1, + fromEnd }); + Mapping mapping = new Mapping(new MapList(fromRange, regions, 1, 1)); + return new GeneLocus(species == null ? "" : species, as, chr, + mapping); + } catch (IOException | ParseException | NumberFormatException e) + { + // ignore + } + + return null; + } + }