X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fext%2Fensembl%2FEnsemblLookup.java;h=0ddef2b999ef57de300f6c4f511414086bf61a66;hb=d7e95f458ebcbbdcc13f8b07357542ab2d7e4547;hp=4c9ad2b6a7813cef541448e25ef4c2f57a0229bd;hpb=853624fb32058cccc544ae7d13af6ad4b0800b6c;p=jalview.git diff --git a/src/jalview/ext/ensembl/EnsemblLookup.java b/src/jalview/ext/ensembl/EnsemblLookup.java index 4c9ad2b..0ddef2b 100644 --- a/src/jalview/ext/ensembl/EnsemblLookup.java +++ b/src/jalview/ext/ensembl/EnsemblLookup.java @@ -1,3 +1,23 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ package jalview.ext.ensembl; import jalview.datamodel.AlignmentI; @@ -14,15 +34,26 @@ import org.json.simple.parser.JSONParser; import org.json.simple.parser.ParseException; /** - * A client for the Ensembl lookup REST endpoint; used to find the Parent gene - * identifier given a transcript identifier. + * A client for the Ensembl lookup REST endpoint, used to find the gene + * identifier given a gene, transcript or protein identifier. * * @author gmcarstairs - * */ public class EnsemblLookup extends EnsemblRestClient { + private static final String OBJECT_TYPE_TRANSLATION = "Translation"; + private static final String PARENT = "Parent"; + private static final String OBJECT_TYPE_TRANSCRIPT = "Transcript"; + private static final String ID = "id"; + private static final String OBJECT_TYPE_GENE = "Gene"; + private static final String OBJECT_TYPE = "object_type"; + + /** + * keep track of last identifier retrieved to break loops + */ + private String lastId; + /** * Default constructor (to use rest.ensembl.org) */ @@ -57,17 +88,26 @@ public class EnsemblLookup extends EnsemblRestClient protected URL getUrl(List ids) throws MalformedURLException { String identifier = ids.get(0); - return getUrl(identifier); + return getUrl(identifier, null); } /** + * Gets the url for lookup of the given identifier, optionally with objectType + * also specified in the request + * * @param identifier + * @param objectType * @return */ - protected URL getUrl(String identifier) + protected URL getUrl(String identifier, String objectType) { String url = getDomain() + "/lookup/id/" + identifier - + "?content-type=application/json"; + + CONTENT_TYPE_JSON; + if (objectType != null) + { + url += "&" + OBJECT_TYPE + "=" + objectType; + } + try { return new URL(url); @@ -96,25 +136,50 @@ public class EnsemblLookup extends EnsemblRestClient } /** + * Returns the gene id related to the given identifier, which may be for a + * gene, transcript or protein + * + * @param identifier + * @return + */ + public String getGeneId(String identifier) + { + return getGeneId(identifier, null); + } + + /** * Calls the Ensembl lookup REST endpoint and retrieves the 'Parent' for the * given identifier, or null if not found * * @param identifier + * @param objectType + * (optional) * @return */ - public String getParent(String identifier) + public String getGeneId(String identifier, String objectType) { List ids = Arrays.asList(new String[] { identifier }); BufferedReader br = null; try { - URL url = getUrl(identifier); + + URL url = getUrl(identifier, objectType); + + if (identifier.equals(lastId)) + { + System.err.println("** Ensembl lookup " + url.toString() + + " looping on Parent!"); + return null; + } + + lastId = identifier; + if (url != null) { br = getHttpResponse(url, ids); } - return (parseResponse(br)); + return br == null ? null : parseResponse(br); } catch (IOException e) { // ignore @@ -135,8 +200,10 @@ public class EnsemblLookup extends EnsemblRestClient } /** - * Parses "Parent" from the JSON response and returns the value, or null if - * not found + * Parses the JSON response and returns the gene identifier, or null if not + * found. If the returned object_type is Gene, returns the id, if Transcript + * returns the Parent. If it is Translation (peptide identifier), then the + * Parent is the transcript identifier, so we redo the search with this value. * * @param br * @return @@ -144,17 +211,33 @@ public class EnsemblLookup extends EnsemblRestClient */ protected String parseResponse(BufferedReader br) throws IOException { - String parent = null; + String geneId = null; JSONParser jp = new JSONParser(); try { JSONObject val = (JSONObject) jp.parse(br); - parent = val.get("Parent").toString(); + String type = val.get(OBJECT_TYPE).toString(); + if (OBJECT_TYPE_GENE.equalsIgnoreCase(type)) + { + // got the gene - just returns its id + geneId = val.get(ID).toString(); + } + else if (OBJECT_TYPE_TRANSCRIPT.equalsIgnoreCase(type)) + { + // got the transcript - return its (Gene) Parent + geneId = val.get(PARENT).toString(); + } + else if (OBJECT_TYPE_TRANSLATION.equalsIgnoreCase(type)) + { + // got the protein - get its Parent, restricted to type Transcript + String transcriptId = val.get(PARENT).toString(); + geneId = getGeneId(transcriptId, OBJECT_TYPE_TRANSCRIPT); + } } catch (ParseException e) { // ignore } - return parent; + return geneId; } }