X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fext%2Fensembl%2FEnsemblLookup.java;h=ed1b4fa4e00d91c7b67aa0e137bc94aa82ef2895;hb=8ab24374eabe70d3d55bc92bcbbae401c743baa5;hp=f314b0a8ffdcb8e8528e2223368782ee39b28b92;hpb=8d3aefef72e993e55c0f6b5c26e3979ae7269e0f;p=jalview.git diff --git a/src/jalview/ext/ensembl/EnsemblLookup.java b/src/jalview/ext/ensembl/EnsemblLookup.java index f314b0a..ed1b4fa 100644 --- a/src/jalview/ext/ensembl/EnsemblLookup.java +++ b/src/jalview/ext/ensembl/EnsemblLookup.java @@ -20,22 +20,27 @@ */ package jalview.ext.ensembl; +import jalview.bin.Cache; import jalview.datamodel.AlignmentI; +import jalview.datamodel.GeneLociI; +import jalview.util.MapList; import java.io.BufferedReader; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.util.Arrays; +import java.util.Collections; import java.util.List; -import java.util.function.Function; import org.json.simple.JSONObject; import org.json.simple.parser.JSONParser; import org.json.simple.parser.ParseException; /** - * A client for the Ensembl lookup REST endpoint + * A client for the Ensembl /lookup REST endpoint, used to find the gene + * identifier given a gene, transcript or protein identifier, or to extract the + * species or chromosomal coordinates from the same service response * * @author gmcarstairs */ @@ -43,13 +48,10 @@ public class EnsemblLookup extends EnsemblRestClient { private static final String SPECIES = "species"; - private static final String PARENT = "Parent"; - - private static final String OBJECT_TYPE_TRANSLATION = "Translation"; - private static final String OBJECT_TYPE_TRANSCRIPT = "Transcript"; - private static final String ID = "id"; - private static final String OBJECT_TYPE_GENE = "Gene"; - private static final String OBJECT_TYPE = "object_type"; + /** + * keep track of last identifier retrieved to break loops + */ + private String lastId; /** * Default constructor (to use rest.ensembl.org) @@ -85,17 +87,26 @@ public class EnsemblLookup extends EnsemblRestClient protected URL getUrl(List ids) throws MalformedURLException { String identifier = ids.get(0); - return getUrl(identifier); + return getUrl(identifier, null); } /** + * Gets the url for lookup of the given identifier, optionally with objectType + * also specified in the request + * * @param identifier + * @param objectType * @return */ - protected URL getUrl(String identifier) + protected URL getUrl(String identifier, String objectType) { String url = getDomain() + "/lookup/id/" + identifier + CONTENT_TYPE_JSON; + if (objectType != null) + { + url += "&" + OBJECT_TYPE + "=" + objectType; + } + try { return new URL(url); @@ -124,48 +135,38 @@ public class EnsemblLookup extends EnsemblRestClient } /** - * Calls the Ensembl lookup REST endpoint and retrieves the 'Parent' for the - * given identifier, or null if not found + * Returns the gene id related to the given identifier (which may be for a + * gene, transcript or protein) * * @param identifier * @return */ public String getGeneId(String identifier) { - return getResult(identifier, br -> parseGeneId(br)); + return getGeneId(identifier, null); } /** - * Calls the Ensembl lookup REST endpoint and retrieves the 'species' for the - * given identifier, or null if not found + * Returns the gene id related to the given identifier (which may be for a + * gene, transcript or protein) * * @param identifier + * @param objectType * @return */ - public String getSpecies(String identifier) - { - return getResult(identifier, br -> getAttribute(br, SPECIES)); - } - - /** - * @param identifier - * @param attribute - * @return - */ - protected String getResult(String identifier, - Function parser) + public String getGeneId(String identifier, String objectType) { List ids = Arrays.asList(new String[] { identifier }); BufferedReader br = null; try { - URL url = getUrl(identifier); + URL url = getUrl(identifier, objectType); if (url != null) { br = getHttpResponse(url, ids); } - return br == null ? null : parser.apply(br); + return br == null ? null : parseResponse(br); } catch (IOException e) { // ignore @@ -186,75 +187,238 @@ public class EnsemblLookup extends EnsemblRestClient } /** - * Answers the value of 'attribute' from the JSON response, or null if not - * found + * Parses the JSON response and returns the gene identifier, or null if not + * found. If the returned object_type is Gene, returns the id, if Transcript + * returns the Parent. If it is Translation (peptide identifier), then the + * Parent is the transcript identifier, so we redo the search with this value. * * @param br - * @param attribute * @return + * @throws IOException */ - protected String getAttribute(BufferedReader br, String attribute) + protected String parseResponse(BufferedReader br) throws IOException { - String value = null; + String geneId = null; JSONParser jp = new JSONParser(); try { JSONObject val = (JSONObject) jp.parse(br); - value = val.get(attribute).toString(); - } catch (ParseException | NullPointerException | IOException e) + String type = val.get(OBJECT_TYPE).toString(); + if (OBJECT_TYPE_GENE.equalsIgnoreCase(type)) + { + // got the gene - just returns its id + geneId = val.get(JSON_ID).toString(); + } + else if (OBJECT_TYPE_TRANSCRIPT.equalsIgnoreCase(type)) + { + // got the transcript - return its (Gene) Parent + geneId = val.get(PARENT).toString(); + } + else if (OBJECT_TYPE_TRANSLATION.equalsIgnoreCase(type)) + { + // got the protein - get its Parent, restricted to type Transcript + String transcriptId = val.get(PARENT).toString(); + geneId = getGeneId(transcriptId, OBJECT_TYPE_TRANSCRIPT); + } + } catch (ParseException e) { // ignore } - return value; + return geneId; } /** - * Parses the JSON response and returns the gene identifier, or null if not - * found. If the returned object_type is Gene, returns the id, if Transcript - * returns the Parent. If it is Translation (peptide identifier), then the - * Parent is the transcript identifier, so we redo the search with this value. + * Calls the Ensembl lookup REST endpoint and retrieves the 'species' for the + * given identifier, or null if not found * - * @param br + * @param identifier * @return */ - protected String parseGeneId(BufferedReader br) + public String getSpecies(String identifier) { - String geneId = null; - JSONParser jp = new JSONParser(); + String species = null; + JSONObject json = getResult(identifier, null); + if (json != null) + { + Object o = json.get(SPECIES); + if (o != null) + { + species = o.toString(); + } + } + return species; + } + + /** + * Calls the /lookup/id rest service and returns the response as a JSONObject, + * or null if any error + * + * @param identifier + * @param objectType + * (optional) + * @return + */ + protected JSONObject getResult(String identifier, String objectType) + { + List ids = Arrays.asList(new String[] { identifier }); + + BufferedReader br = null; try { - JSONObject val = (JSONObject) jp.parse(br); - String type = val.get(OBJECT_TYPE).toString(); - if (OBJECT_TYPE_GENE.equalsIgnoreCase(type)) + + URL url = getUrl(identifier, objectType); + + if (identifier.equals(lastId)) { - geneId = val.get(ID).toString(); + System.err.println("** Ensembl lookup " + url.toString() + + " looping on Parent!"); + return null; } - else if (OBJECT_TYPE_TRANSCRIPT.equalsIgnoreCase(type)) + + lastId = identifier; + + if (url != null) { - geneId = val.get(PARENT).toString(); + br = getHttpResponse(url, ids); } - else if (OBJECT_TYPE_TRANSLATION.equalsIgnoreCase(type)) + return br == null ? null : (JSONObject) (new JSONParser().parse(br)); + } catch (IOException | ParseException e) + { + System.err.println("Error parsing " + identifier + " lookup response " + + e.getMessage()); + return null; + } finally + { + if (br != null) { - String transcriptId = val.get(PARENT).toString(); try { - geneId = getGeneId(transcriptId); - } catch (StackOverflowError e) + br.close(); + } catch (IOException e) { - /* - * unlikely data condition error! - */ - System.err - .println("** Ensembl lookup " - + getUrl(transcriptId).toString() - + " looping on Parent!"); + // ignore } } - } catch (ParseException | IOException e) + } + } + + /** + * Parses the JSON response and returns the gene identifier, or null if not + * found. If the returned object_type is Gene, returns the id, if Transcript + * returns the Parent. If it is Translation (peptide identifier), then the + * Parent is the transcript identifier, so we redo the search with this value, + * specifying that object_type should be Transcript. + * + * @param jsonObject + * @return + */ + protected String parseGeneId(JSONObject json) + { + if (json == null) { - // ignore + // e.g. lookup failed with 404 not found + return null; } + + String geneId = null; + String type = json.get(OBJECT_TYPE).toString(); + if (OBJECT_TYPE_GENE.equalsIgnoreCase(type)) + { + // got the gene - just returns its id + geneId = json.get(JSON_ID).toString(); + } + else if (OBJECT_TYPE_TRANSCRIPT.equalsIgnoreCase(type)) + { + // got the transcript - return its (Gene) Parent + geneId = json.get(PARENT).toString(); + } + else if (OBJECT_TYPE_TRANSLATION.equalsIgnoreCase(type)) + { + // got the protein - look up its Parent, restricted to type Transcript + String transcriptId = json.get(PARENT).toString(); + geneId = parseGeneId(getResult(transcriptId, OBJECT_TYPE_TRANSCRIPT)); + } + return geneId; } + /** + * Calls the /lookup/id rest service for the given id, and if successful, + * parses and returns the gene's chromosomal coordinates + * + * @param geneId + * @return + */ + public GeneLociI getGeneLoci(String geneId) + { + return parseGeneLoci(getResult(geneId, OBJECT_TYPE_GENE)); + } + + /** + * Parses the /lookup/id response for species, asssembly_name, + * seq_region_name, start, end and returns an object that wraps them, or null + * if unsuccessful + * + * @param json + * @return + */ + GeneLociI parseGeneLoci(JSONObject json) + { + if (json == null) + { + return null; + } + + try + { + final String species = json.get("species").toString(); + final String assembly = json.get("assembly_name").toString(); + final String chromosome = json.get("seq_region_name").toString(); + String strand = json.get("strand").toString(); + int start = Integer.parseInt(json.get("start").toString()); + int end = Integer.parseInt(json.get("end").toString()); + int fromEnd = end - start + 1; + boolean reverseStrand = "-1".equals(strand); + int toStart = reverseStrand ? end : start; + int toEnd = reverseStrand ? start : end; + List fromRange = Collections.singletonList(new int[] { 1, + fromEnd }); + List toRange = Collections.singletonList(new int[] { toStart, + toEnd }); + final MapList map = new MapList(fromRange, toRange, 1, 1); + return new GeneLociI() + { + + @Override + public String getSpeciesId() + { + return species == null ? "" : species; + } + + @Override + public String getAssemblyId() + { + return assembly; + } + + @Override + public String getChromosomeId() + { + return chromosome; + } + + @Override + public MapList getMap() + { + return map; + } + }; + } catch (NullPointerException | NumberFormatException e) + { + Cache.log.error("Error looking up gene loci: " + e.getMessage()); + e.printStackTrace(); + } + return null; + } + }