/* * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) * Copyright (C) $$Year-Rel$$ The Jalview Authors * * This file is part of Jalview. * * Jalview is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * Jalview is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty * of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Jalview. If not, see . * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.ext.ensembl; import jalview.bin.Cache; import jalview.datamodel.AlignmentI; import jalview.datamodel.GeneLociI; import jalview.util.MapList; import java.io.BufferedReader; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.function.Function; import org.json.simple.JSONObject; import org.json.simple.parser.JSONParser; import org.json.simple.parser.ParseException; /** * A client for the Ensembl /lookup REST endpoint, used to find the gene * identifier given a gene, transcript or protein identifier, or to extract the * species or chromosomal coordinates from the same service response * * @author gmcarstairs */ public class EnsemblLookup extends EnsemblRestClient { private static final String SPECIES = "species"; /** * Default constructor (to use rest.ensembl.org) */ public EnsemblLookup() { super(); } /** * Constructor given the target domain to fetch data from * * @param */ public EnsemblLookup(String d) { super(d); } @Override public String getDbName() { return "ENSEMBL"; } @Override public AlignmentI getSequenceRecords(String queries) throws Exception { return null; } @Override protected URL getUrl(List ids) throws MalformedURLException { String identifier = ids.get(0); return getUrl(identifier, null); } /** * Gets the url for lookup of the given identifier, optionally with objectType * also specified in the request * * @param identifier * @param objectType * @return */ protected URL getUrl(String identifier, String objectType) { String url = getDomain() + "/lookup/id/" + identifier + CONTENT_TYPE_JSON; if (objectType != null) { url += "&" + OBJECT_TYPE + "=" + objectType; } try { return new URL(url); } catch (MalformedURLException e) { return null; } } @Override protected boolean useGetRequest() { return true; } @Override protected String getRequestMimeType(boolean multipleIds) { return "application/json"; } @Override protected String getResponseMimeType() { return "application/json"; } /** * Returns the gene id related to the given identifier (which may be for a * gene, transcript or protein) * * @param identifier * @return */ public String getGeneId(String identifier) { return (String) getResult(identifier, null, br -> parseGeneId(br)); } /** * Calls the Ensembl lookup REST endpoint and retrieves the 'species' for the * given identifier, or null if not found * * @param identifier * @return */ public String getSpecies(String identifier) { return (String) getResult(identifier, null, br -> getAttribute(br, SPECIES)); } /** * Calls the /lookup/id rest service and delegates parsing of the JSON * response to the supplied parser * * @param identifier * @param objectType * (optional) * @param parser * @return */ protected Object getResult(String identifier, String objectType, Function parser) { List ids = Arrays.asList(new String[] { identifier }); BufferedReader br = null; try { URL url = getUrl(identifier, objectType); if (url != null) { br = getHttpResponse(url, ids); } return br == null ? null : parser.apply(br); } catch (IOException e) { // ignore return null; } finally { if (br != null) { try { br.close(); } catch (IOException e) { // ignore } } } } /** * Answers the value of 'attribute' from the JSON response, or null if not * found * * @param br * @param attribute * @return */ protected String getAttribute(BufferedReader br, String attribute) { String value = null; JSONParser jp = new JSONParser(); try { JSONObject val = (JSONObject) jp.parse(br); value = val.get(attribute).toString(); } catch (ParseException | NullPointerException | IOException e) { // ignore } return value; } /** * Parses the JSON response and returns the gene identifier, or null if not * found. If the returned object_type is Gene, returns the id, if Transcript * returns the Parent. If it is Translation (peptide identifier), then the * Parent is the transcript identifier, so we redo the search with this value, * specifying that object_type should be Transcript. * * @param br * @return */ protected String parseGeneId(BufferedReader br) { String geneId = null; JSONParser jp = new JSONParser(); try { JSONObject val = (JSONObject) jp.parse(br); String type = val.get(OBJECT_TYPE).toString(); if (OBJECT_TYPE_GENE.equalsIgnoreCase(type)) { // got the gene - just returns its id geneId = val.get(JSON_ID).toString(); } else if (OBJECT_TYPE_TRANSCRIPT.equalsIgnoreCase(type)) { // got the transcript - return its (Gene) Parent geneId = val.get(PARENT).toString(); } else if (OBJECT_TYPE_TRANSLATION.equalsIgnoreCase(type)) { // got the protein - get its Parent, restricted to type Transcript String transcriptId = val.get(PARENT).toString(); geneId = (String) getResult(transcriptId, OBJECT_TYPE_TRANSCRIPT, reader -> parseGeneId(reader)); } } catch (ParseException | IOException e) { // ignore } return geneId; } /** * Calls the /lookup/id rest service for the given id, and if successful, * parses and returns the gene's chromosomal coordinates * * @param geneId * @return */ public GeneLociI getGeneLoci(String geneId) { return (GeneLociI) getResult(geneId, OBJECT_TYPE_GENE, br -> parseGeneLoci(br)); } /** * Parses the /lookup/id response for species, asssembly_name, * seq_region_name, start, end and returns an object that wraps them, or null * if unsuccessful * * @param br * @return */ GeneLociI parseGeneLoci(BufferedReader br) { JSONParser jp = new JSONParser(); try { JSONObject val = (JSONObject) jp.parse(br); final String species = val.get("species").toString(); final String assembly = val.get("assembly_name").toString(); final String chromosome = val.get("seq_region_name").toString(); String strand = val.get("strand").toString(); int start = Integer.parseInt(val.get("start").toString()); int end = Integer.parseInt(val.get("end").toString()); int fromEnd = end - start + 1; boolean reverseStrand = "-1".equals(strand); int toStart = reverseStrand ? end : start; int toEnd = reverseStrand ? start : end; List fromRange = Collections.singletonList(new int[] { 1, fromEnd }); List toRange = Collections.singletonList(new int[] { toStart, toEnd }); final MapList map = new MapList(fromRange, toRange, 1, 1); return new GeneLociI() { @Override public String getSpeciesId() { return species == null ? "" : species; } @Override public String getAssemblyId() { return assembly; } @Override public String getChromosomeId() { return chromosome; } @Override public MapList getMap() { return map; } }; } catch (ParseException | NullPointerException | IOException | NumberFormatException | ClassCastException e) { Cache.log.error("Error looking up gene loci: " + e.getMessage()); e.printStackTrace(); } return null; } }