X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fext%2Fensembl%2FEnsemblLookup.java;fp=src%2Fjalview%2Fext%2Fensembl%2FEnsemblLookup.java;h=4c4328160db4877e59bc3044223f86e8ade59973;hb=d38847f4c9170385c3b4c8fbc5e6f5c95a15a196;hp=92763a17cce27e4a571c4a6edaeb9ff68bdfd939;hpb=82cba0a99ac7c16b89e8399adb158e6a063fecd3;p=jalview.git diff --git a/src/jalview/ext/ensembl/EnsemblLookup.java b/src/jalview/ext/ensembl/EnsemblLookup.java index 92763a1..4c43281 100644 --- a/src/jalview/ext/ensembl/EnsemblLookup.java +++ b/src/jalview/ext/ensembl/EnsemblLookup.java @@ -20,27 +20,35 @@ */ package jalview.ext.ensembl; +import jalview.bin.Cache; import jalview.datamodel.AlignmentI; +import jalview.datamodel.GeneLociI; +import jalview.util.MapList; import java.io.BufferedReader; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.util.Arrays; +import java.util.Collections; import java.util.List; +import java.util.function.Function; import org.json.simple.JSONObject; import org.json.simple.parser.JSONParser; import org.json.simple.parser.ParseException; /** - * A client for the Ensembl lookup REST endpoint, used to find the gene - * identifier given a gene, transcript or protein identifier. + * A client for the Ensembl /lookup REST endpoint, used to find the gene + * identifier given a gene, transcript or protein identifier, or to extract the + * species or chromosomal coordinates from the same service response * * @author gmcarstairs */ public class EnsemblLookup extends EnsemblRestClient { + private static final String SPECIES = "species"; + /** * Default constructor (to use rest.ensembl.org) */ @@ -123,27 +131,42 @@ public class EnsemblLookup extends EnsemblRestClient } /** - * Returns the gene id related to the given identifier, which may be for a - * gene, transcript or protein + * Returns the gene id related to the given identifier (which may be for a + * gene, transcript or protein) * * @param identifier * @return */ public String getGeneId(String identifier) { - return getGeneId(identifier, null); + return (String) getResult(identifier, null, br -> parseGeneId(br)); } /** - * Calls the Ensembl lookup REST endpoint and retrieves the 'Parent' for the + * Calls the Ensembl lookup REST endpoint and retrieves the 'species' for the * given identifier, or null if not found * * @param identifier + * @return + */ + public String getSpecies(String identifier) + { + return (String) getResult(identifier, null, + br -> getAttribute(br, SPECIES)); + } + + /** + * Calls the /lookup/id rest service and delegates parsing of the JSON + * response to the supplied parser + * + * @param identifier * @param objectType * (optional) + * @param parser * @return */ - public String getGeneId(String identifier, String objectType) + protected Object getResult(String identifier, String objectType, + Function parser) { List ids = Arrays.asList(new String[] { identifier }); @@ -155,7 +178,7 @@ public class EnsemblLookup extends EnsemblRestClient { br = getHttpResponse(url, ids); } - return br == null ? null : parseResponse(br); + return br == null ? null : parser.apply(br); } catch (IOException e) { // ignore @@ -176,16 +199,39 @@ public class EnsemblLookup extends EnsemblRestClient } /** + * Answers the value of 'attribute' from the JSON response, or null if not + * found + * + * @param br + * @param attribute + * @return + */ + protected String getAttribute(BufferedReader br, String attribute) + { + String value = null; + JSONParser jp = new JSONParser(); + try + { + JSONObject val = (JSONObject) jp.parse(br); + value = val.get(attribute).toString(); + } catch (ParseException | NullPointerException | IOException e) + { + // ignore + } + return value; + } + + /** * Parses the JSON response and returns the gene identifier, or null if not * found. If the returned object_type is Gene, returns the id, if Transcript * returns the Parent. If it is Translation (peptide identifier), then the - * Parent is the transcript identifier, so we redo the search with this value. + * Parent is the transcript identifier, so we redo the search with this value, + * specifying that object_type should be Transcript. * * @param br * @return - * @throws IOException */ - protected String parseResponse(BufferedReader br) throws IOException + protected String parseGeneId(BufferedReader br) { String geneId = null; JSONParser jp = new JSONParser(); @@ -196,7 +242,7 @@ public class EnsemblLookup extends EnsemblRestClient if (OBJECT_TYPE_GENE.equalsIgnoreCase(type)) { // got the gene - just returns its id - geneId = val.get(ID).toString(); + geneId = val.get(JSON_ID).toString(); } else if (OBJECT_TYPE_TRANSCRIPT.equalsIgnoreCase(type)) { @@ -207,13 +253,92 @@ public class EnsemblLookup extends EnsemblRestClient { // got the protein - get its Parent, restricted to type Transcript String transcriptId = val.get(PARENT).toString(); - geneId = getGeneId(transcriptId, OBJECT_TYPE_TRANSCRIPT); + geneId = (String) getResult(transcriptId, OBJECT_TYPE_TRANSCRIPT, + reader -> parseGeneId(reader)); } - } catch (ParseException e) + } catch (ParseException | IOException e) { // ignore } return geneId; } + /** + * Calls the /lookup/id rest service for the given id, and if successful, + * parses and returns the gene's chromosomal coordinates + * + * @param geneId + * @return + */ + public GeneLociI getGeneLoci(String geneId) + { + return (GeneLociI) getResult(geneId, OBJECT_TYPE_GENE, + br -> parseGeneLoci(br)); + } + + /** + * Parses the /lookup/id response for species, asssembly_name, + * seq_region_name, start, end and returns an object that wraps them, or null + * if unsuccessful + * + * @param br + * @return + */ + GeneLociI parseGeneLoci(BufferedReader br) + { + JSONParser jp = new JSONParser(); + try + { + JSONObject val = (JSONObject) jp.parse(br); + final String species = val.get("species").toString(); + final String assembly = val.get("assembly_name").toString(); + final String chromosome = val.get("seq_region_name").toString(); + String strand = val.get("strand").toString(); + int start = Integer.parseInt(val.get("start").toString()); + int end = Integer.parseInt(val.get("end").toString()); + int fromEnd = end - start + 1; + boolean reverseStrand = "-1".equals(strand); + int toStart = reverseStrand ? end : start; + int toEnd = reverseStrand ? start : end; + List fromRange = Collections.singletonList(new int[] { 1, + fromEnd }); + List toRange = Collections.singletonList(new int[] { toStart, + toEnd }); + final MapList map = new MapList(fromRange, toRange, 1, 1); + return new GeneLociI() + { + + @Override + public String getSpeciesId() + { + return species == null ? "" : species; + } + + @Override + public String getAssemblyId() + { + return assembly; + } + + @Override + public String getChromosomeId() + { + return chromosome; + } + + @Override + public MapList getMap() + { + return map; + } + }; + } catch (ParseException | NullPointerException | IOException + | NumberFormatException | ClassCastException e) + { + Cache.log.error("Error looking up gene loci: " + e.getMessage()); + e.printStackTrace(); + } + return null; + } + }