X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fext%2Fensembl%2FEnsemblLookup.java;h=0813ba88ba37041533070729ff491b1aa4d88016;hb=fdde9a078d7bdb46ed9fb7fe115ea83c84a19c81;hp=d7f1b07a650f13af99015bfb8545b8a96f0a9164;hpb=da31d3e5aebfcdbb1ae7084bbc75bbc3539ee2e6;p=jalview.git diff --git a/src/jalview/ext/ensembl/EnsemblLookup.java b/src/jalview/ext/ensembl/EnsemblLookup.java index d7f1b07..0813ba8 100644 --- a/src/jalview/ext/ensembl/EnsemblLookup.java +++ b/src/jalview/ext/ensembl/EnsemblLookup.java @@ -20,27 +20,34 @@ */ package jalview.ext.ensembl; +import jalview.bin.Console; import jalview.datamodel.AlignmentI; +import jalview.datamodel.GeneLociI; +import jalview.datamodel.GeneLocus; +import jalview.datamodel.Mapping; +import jalview.util.MapList; -import java.io.BufferedReader; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.util.Arrays; +import java.util.Collections; import java.util.List; +import java.util.Map; -import org.json.simple.JSONObject; -import org.json.simple.parser.JSONParser; import org.json.simple.parser.ParseException; /** - * A client for the Ensembl lookup REST endpoint, used to find the gene - * identifier given a gene, transcript or protein identifier. + * A client for the Ensembl /lookup REST endpoint, used to find the gene + * identifier given a gene, transcript or protein identifier, or to extract the + * species or chromosomal coordinates from the same service response * * @author gmcarstairs */ public class EnsemblLookup extends EnsemblRestClient { + private static final String SPECIES = "species"; + /** * Default constructor (to use rest.ensembl.org) */ @@ -111,8 +118,8 @@ public class EnsemblLookup extends EnsemblRestClient } /** - * Returns the gene id related to the given identifier, which may be for a - * gene, transcript or protein + * Returns the gene id related to the given identifier (which may be for a + * gene, transcript or protein), or null if none is found * * @param identifier * @return @@ -124,7 +131,7 @@ public class EnsemblLookup extends EnsemblRestClient /** * Returns the gene id related to the given identifier (which may be for a - * gene, transcript or protein) + * gene, transcript or protein), or null if none is found * * @param identifier * @param objectType @@ -132,34 +139,7 @@ public class EnsemblLookup extends EnsemblRestClient */ public String getGeneId(String identifier, String objectType) { - List ids = Arrays.asList(new String[] { identifier }); - - BufferedReader br = null; - try - { - URL url = getUrl(identifier, objectType); - if (url != null) - { - br = getHttpResponse(url, ids); - } - return br == null ? null : parseResponse(br); - } catch (IOException e) - { - // ignore - return null; - } finally - { - if (br != null) - { - try - { - br.close(); - } catch (IOException e) - { - // ignore - } - } - } + return parseGeneId(getResult(identifier, objectType)); } /** @@ -170,37 +150,139 @@ public class EnsemblLookup extends EnsemblRestClient * * @param br * @return - * @throws IOException */ - protected String parseResponse(BufferedReader br) throws IOException + protected String parseGeneId(Map val) { + if (val == null) + { + return null; + } String geneId = null; - JSONParser jp = new JSONParser(); - try + String type = val.get(OBJECT_TYPE).toString(); + if (OBJECT_TYPE_GENE.equalsIgnoreCase(type)) { - JSONObject val = (JSONObject) jp.parse(br); - String type = val.get(OBJECT_TYPE).toString(); - if (OBJECT_TYPE_GENE.equalsIgnoreCase(type)) - { - // got the gene - just returns its id - geneId = val.get(ID).toString(); - } - else if (OBJECT_TYPE_TRANSCRIPT.equalsIgnoreCase(type)) - { - // got the transcript - return its (Gene) Parent - geneId = val.get(PARENT).toString(); - } - else if (OBJECT_TYPE_TRANSLATION.equalsIgnoreCase(type)) + // got the gene - just returns its id + geneId = val.get(JSON_ID).toString(); + } + else if (OBJECT_TYPE_TRANSCRIPT.equalsIgnoreCase(type)) + { + // got the transcript - return its (Gene) Parent + geneId = val.get(PARENT).toString(); + } + else if (OBJECT_TYPE_TRANSLATION.equalsIgnoreCase(type)) + { + // got the protein - get its Parent, restricted to type Transcript + String transcriptId = val.get(PARENT).toString(); + geneId = getGeneId(transcriptId, OBJECT_TYPE_TRANSCRIPT); + } + + return geneId; + } + + /** + * Calls the Ensembl lookup REST endpoint and retrieves the 'species' for the + * given identifier, or null if not found + * + * @param identifier + * @return + */ + public String getSpecies(String identifier) + { + String species = null; + Map json = getResult(identifier, null); + if (json != null) + { + Object o = json.get(SPECIES); + if (o != null) { - // got the protein - get its Parent, restricted to type Transcript - String transcriptId = val.get(PARENT).toString(); - geneId = getGeneId(transcriptId, OBJECT_TYPE_TRANSCRIPT); + species = o.toString(); } - } catch (ParseException e) + } + return species; + } + + /** + * Calls the /lookup/id rest service and returns the response as a Map, or null if any error + * + * @param identifier + * @param objectType + * (optional) + * @return + */ + @SuppressWarnings("unchecked") + protected Map getResult(String identifier, + String objectType) + { + List ids = Arrays.asList(new String[] { identifier }); + + try + { + return (Map) getJSON(getUrl(identifier, objectType), + ids, -1, MODE_MAP, null); + } catch (IOException | ParseException e) { - // ignore + System.err.println("Error parsing " + identifier + " lookup response " + + e.getMessage()); + return null; } - return geneId; + } + + /** + * Calls the /lookup/id rest service for the given id, and if successful, + * parses and returns the gene's chromosomal coordinates + * + * @param geneId + * @return + */ + public GeneLociI getGeneLoci(String geneId) + { + return parseGeneLoci(getResult(geneId, OBJECT_TYPE_GENE)); + } + + /** + * Parses the /lookup/id response for species, asssembly_name, + * seq_region_name, start, end and returns an object that wraps them, or null + * if unsuccessful + * + * @param json + * @return + */ + GeneLociI parseGeneLoci(Map json) + { + if (json == null) + { + return null; + } + + try + { + final String species = json.get("species").toString(); + final String assembly = json.get("assembly_name").toString(); + final String chromosome = json.get("seq_region_name").toString(); + String strand = json.get("strand").toString(); + int start = Integer.parseInt(json.get("start").toString()); + int end = Integer.parseInt(json.get("end").toString()); + int fromEnd = end - start + 1; + boolean reverseStrand = "-1".equals(strand); + int toStart = reverseStrand ? end : start; + int toEnd = reverseStrand ? start : end; + List fromRange = Collections + .singletonList(new int[] + { 1, fromEnd }); + List toRange = Collections + .singletonList(new int[] + { toStart, toEnd }); + final Mapping map = new Mapping( + new MapList(fromRange, toRange, 1, 1)); + return new GeneLocus(species == null ? "" : species, assembly, + chromosome, map); + } catch (NullPointerException | NumberFormatException e) + { + Console.error("Error looking up gene loci: " + e.getMessage()); + e.printStackTrace(); + } + return null; } }