import jalview.datamodel.GeneLociI;
import jalview.util.MapList;
-import java.io.BufferedReader;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
-import java.util.function.Function;
+import java.util.Map;
-import org.json.simple.JSONObject;
-import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
/**
- * A client for the Ensembl lookup REST endpoint
+ * A client for the Ensembl /lookup REST endpoint, used to find the gene
+ * identifier given a gene, transcript or protein identifier, or to extract the
+ * species or chromosomal coordinates from the same service response
*
* @author gmcarstairs
*/
{
private static final String SPECIES = "species";
- private static final String PARENT = "Parent";
-
- private static final String OBJECT_TYPE_TRANSLATION = "Translation";
- private static final String OBJECT_TYPE_TRANSCRIPT = "Transcript";
- private static final String ID = "id";
- private static final String OBJECT_TYPE_GENE = "Gene";
- private static final String OBJECT_TYPE = "object_type";
-
/**
* Default constructor (to use rest.ensembl.org)
*/
protected URL getUrl(List<String> ids) throws MalformedURLException
{
String identifier = ids.get(0);
- return getUrl(identifier);
+ return getUrl(identifier, null);
}
/**
+ * Gets the url for lookup of the given identifier, optionally with objectType
+ * also specified in the request
+ *
* @param identifier
+ * @param objectType
* @return
*/
- protected URL getUrl(String identifier)
+ protected URL getUrl(String identifier, String objectType)
{
String url = getDomain() + "/lookup/id/" + identifier
+ CONTENT_TYPE_JSON;
+ if (objectType != null)
+ {
+ url += "&" + OBJECT_TYPE + "=" + objectType;
+ }
+
try
{
return new URL(url);
return true;
}
- @Override
- protected String getRequestMimeType(boolean multipleIds)
- {
- return "application/json";
- }
-
- @Override
- protected String getResponseMimeType()
- {
- return "application/json";
- }
-
/**
- * Calls the Ensembl lookup REST endpoint and returns
- * <ul>
- * <li>the 'id' for the identifier if its type is "Gene"</li>
- * <li>the 'Parent' if its type is 'Transcript'</li>
- * <ul>
- * If the type is 'Translation', does a recursive call to this method, passing
- * in the 'Parent' (transcript id).
+ * Returns the gene id related to the given identifier (which may be for a
+ * gene, transcript or protein), or null if none is found
*
* @param identifier
* @return
*/
public String getGeneId(String identifier)
{
- return (String) getResult(identifier, br -> parseGeneId(br));
+ return getGeneId(identifier, null);
}
/**
- * Calls the Ensembl lookup REST endpoint and retrieves the 'species' for the
- * given identifier, or null if not found
+ * Returns the gene id related to the given identifier (which may be for a
+ * gene, transcript or protein), or null if none is found
*
* @param identifier
+ * @param objectType
* @return
*/
- public String getSpecies(String identifier)
+ public String getGeneId(String identifier, String objectType)
{
- return (String) getResult(identifier, br -> getAttribute(br, SPECIES));
+ return parseGeneId(getResult(identifier, objectType));
}
/**
- * Calls the /lookup/id rest service and delegates parsing of the JSON
- * response to the supplied parser
+ * Parses the JSON response and returns the gene identifier, or null if not
+ * found. If the returned object_type is Gene, returns the id, if Transcript
+ * returns the Parent. If it is Translation (peptide identifier), then the
+ * Parent is the transcript identifier, so we redo the search with this value.
*
- * @param identifier
- * @param parser
+ * @param br
* @return
*/
- protected Object getResult(String identifier,
- Function<BufferedReader, Object> parser)
+ protected String parseGeneId(Map<String, Object> val)
{
- List<String> ids = Arrays.asList(new String[] { identifier });
-
- BufferedReader br = null;
- try
- {
- URL url = getUrl(identifier);
- if (url != null)
- {
- br = getHttpResponse(url, ids);
- }
- return br == null ? null : parser.apply(br);
- } catch (IOException e)
+ if (val == null)
{
- // ignore
return null;
- } finally
+ }
+ String geneId = null;
+ String type = val.get(OBJECT_TYPE).toString();
+ if (OBJECT_TYPE_GENE.equalsIgnoreCase(type))
{
- if (br != null)
- {
- try
- {
- br.close();
- } catch (IOException e)
- {
- // ignore
- }
- }
+ // got the gene - just returns its id
+ geneId = val.get(JSON_ID).toString();
+ }
+ else if (OBJECT_TYPE_TRANSCRIPT.equalsIgnoreCase(type))
+ {
+ // got the transcript - return its (Gene) Parent
+ geneId = val.get(PARENT).toString();
}
+ else if (OBJECT_TYPE_TRANSLATION.equalsIgnoreCase(type))
+ {
+ // got the protein - get its Parent, restricted to type Transcript
+ String transcriptId = val.get(PARENT).toString();
+ geneId = getGeneId(transcriptId, OBJECT_TYPE_TRANSCRIPT);
+ }
+
+ return geneId;
}
/**
- * Answers the value of 'attribute' from the JSON response, or null if not
- * found
+ * Calls the Ensembl lookup REST endpoint and retrieves the 'species' for the
+ * given identifier, or null if not found
*
- * @param br
- * @param attribute
+ * @param identifier
* @return
*/
- protected String getAttribute(BufferedReader br, String attribute)
+ public String getSpecies(String identifier)
{
- String value = null;
- JSONParser jp = new JSONParser();
- try
+ String species = null;
+ Map<String, Object> json = getResult(identifier, null);
+ if (json != null)
{
- JSONObject val = (JSONObject) jp.parse(br);
- value = val.get(attribute).toString();
- } catch (ParseException | NullPointerException | IOException e)
- {
- // ignore
+ Object o = json.get(SPECIES);
+ if (o != null)
+ {
+ species = o.toString();
+ }
}
- return value;
+ return species;
}
/**
- * Parses the JSON response and returns the gene identifier, or null if not
- * found. If the returned object_type is Gene, returns the id, if Transcript
- * returns the Parent. If it is Translation (peptide identifier), then the
- * Parent is the transcript identifier, so we redo the search with this value.
+ * Calls the /lookup/id rest service and returns the response as a Map<String, Object>,
+ * or null if any error
*
- * @param br
+ * @param identifier
+ * @param objectType
+ * (optional)
* @return
*/
- protected String parseGeneId(BufferedReader br)
+ @SuppressWarnings("unchecked")
+ protected Map<String, Object> getResult(String identifier, String objectType)
{
- String geneId = null;
- JSONParser jp = new JSONParser();
+ List<String> ids = Arrays.asList(new String[] { identifier });
+
try
{
- JSONObject val = (JSONObject) jp.parse(br);
- String type = val.get(OBJECT_TYPE).toString();
- if (OBJECT_TYPE_GENE.equalsIgnoreCase(type))
- {
- geneId = val.get(ID).toString();
- }
- else if (OBJECT_TYPE_TRANSCRIPT.equalsIgnoreCase(type))
- {
- geneId = val.get(PARENT).toString();
- }
- else if (OBJECT_TYPE_TRANSLATION.equalsIgnoreCase(type))
- {
- String transcriptId = val.get(PARENT).toString();
- try
- {
- geneId = getGeneId(transcriptId);
- } catch (StackOverflowError e)
- {
- /*
- * unlikely data condition error!
- */
- System.err
- .println("** Ensembl lookup "
- + getUrl(transcriptId).toString()
- + " looping on Parent!");
- }
- }
- } catch (ParseException | IOException e)
+ return (Map<String, Object>) getJSON(getUrl(identifier, objectType), ids, -1, MODE_MAP, null);
+ }
+ catch (IOException | ParseException e)
{
- // ignore
+ System.err.println("Error parsing " + identifier + " lookup response "
+ + e.getMessage());
+ return null;
}
- return geneId;
}
/**
*/
public GeneLociI getGeneLoci(String geneId)
{
- return (GeneLociI) getResult(geneId, br -> parseGeneLoci(br));
+ return parseGeneLoci(getResult(geneId, OBJECT_TYPE_GENE));
}
/**
* seq_region_name, start, end and returns an object that wraps them, or null
* if unsuccessful
*
- * @param br
+ * @param json
* @return
*/
- GeneLociI parseGeneLoci(BufferedReader br)
+ GeneLociI parseGeneLoci(Map<String, Object> json)
{
- JSONParser jp = new JSONParser();
+ if (json == null)
+ {
+ return null;
+ }
+
try
{
- JSONObject val = (JSONObject) jp.parse(br);
- final String species = val.get("species").toString();
- final String assembly = val.get("assembly_name").toString();
- final String chromosome = val.get("seq_region_name").toString();
- String strand = val.get("strand").toString();
- int start = Integer.parseInt(val.get("start").toString());
- int end = Integer.parseInt(val.get("end").toString());
+ final String species = json.get("species").toString();
+ final String assembly = json.get("assembly_name").toString();
+ final String chromosome = json.get("seq_region_name").toString();
+ String strand = json.get("strand").toString();
+ int start = Integer.parseInt(json.get("start").toString());
+ int end = Integer.parseInt(json.get("end").toString());
int fromEnd = end - start + 1;
boolean reverseStrand = "-1".equals(strand);
int toStart = reverseStrand ? end : start;
return map;
}
};
- } catch (ParseException | NullPointerException | IOException
- | NumberFormatException | ClassCastException e)
+ } catch (NullPointerException | NumberFormatException e)
{
Cache.log.error("Error looking up gene loci: " + e.getMessage());
+ e.printStackTrace();
}
return null;
}