From 8ab24374eabe70d3d55bc92bcbbae401c743baa5 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Wed, 14 Mar 2018 14:41:14 +0000 Subject: [PATCH 1/1] JAL-2679 reinstating changes lost in VCF merge --- src/jalview/ext/ensembl/EnsemblLookup.java | 89 +++++++++++++++++++++++++--- 1 file changed, 82 insertions(+), 7 deletions(-) diff --git a/src/jalview/ext/ensembl/EnsemblLookup.java b/src/jalview/ext/ensembl/EnsemblLookup.java index e55605d..ed1b4fa 100644 --- a/src/jalview/ext/ensembl/EnsemblLookup.java +++ b/src/jalview/ext/ensembl/EnsemblLookup.java @@ -47,12 +47,6 @@ import org.json.simple.parser.ParseException; public class EnsemblLookup extends EnsemblRestClient { private static final String SPECIES = "species"; - private static final String OBJECT_TYPE_TRANSLATION = "Translation"; - private static final String PARENT = "Parent"; - private static final String OBJECT_TYPE_TRANSCRIPT = "Transcript"; - private static final String ID = "id"; - private static final String OBJECT_TYPE_GENE = "Gene"; - private static final String OBJECT_TYPE = "object_type"; /** * keep track of last identifier retrieved to break loops @@ -149,7 +143,88 @@ public class EnsemblLookup extends EnsemblRestClient */ public String getGeneId(String identifier) { - return parseGeneId(getResult(identifier, null)); + return getGeneId(identifier, null); + } + + /** + * Returns the gene id related to the given identifier (which may be for a + * gene, transcript or protein) + * + * @param identifier + * @param objectType + * @return + */ + public String getGeneId(String identifier, String objectType) + { + List ids = Arrays.asList(new String[] { identifier }); + + BufferedReader br = null; + try + { + URL url = getUrl(identifier, objectType); + if (url != null) + { + br = getHttpResponse(url, ids); + } + return br == null ? null : parseResponse(br); + } catch (IOException e) + { + // ignore + return null; + } finally + { + if (br != null) + { + try + { + br.close(); + } catch (IOException e) + { + // ignore + } + } + } + } + + /** + * Parses the JSON response and returns the gene identifier, or null if not + * found. If the returned object_type is Gene, returns the id, if Transcript + * returns the Parent. If it is Translation (peptide identifier), then the + * Parent is the transcript identifier, so we redo the search with this value. + * + * @param br + * @return + * @throws IOException + */ + protected String parseResponse(BufferedReader br) throws IOException + { + String geneId = null; + JSONParser jp = new JSONParser(); + try + { + JSONObject val = (JSONObject) jp.parse(br); + String type = val.get(OBJECT_TYPE).toString(); + if (OBJECT_TYPE_GENE.equalsIgnoreCase(type)) + { + // got the gene - just returns its id + geneId = val.get(JSON_ID).toString(); + } + else if (OBJECT_TYPE_TRANSCRIPT.equalsIgnoreCase(type)) + { + // got the transcript - return its (Gene) Parent + geneId = val.get(PARENT).toString(); + } + else if (OBJECT_TYPE_TRANSLATION.equalsIgnoreCase(type)) + { + // got the protein - get its Parent, restricted to type Transcript + String transcriptId = val.get(PARENT).toString(); + geneId = getGeneId(transcriptId, OBJECT_TYPE_TRANSCRIPT); + } + } catch (ParseException e) + { + // ignore + } + return geneId; } /** -- 1.7.10.2