import org.json.simple.parser.ParseException;
/**
- * A client for the Ensembl lookup REST endpoint
+ * A client for the Ensembl /lookup REST endpoint, used to find the gene
+ * identifier given a gene, transcript or protein identifier, or to extract the
+ * species or chromosomal coordinates from the same service response
*
* @author gmcarstairs
*/
{
private static final String SPECIES = "species";
- private static final String PARENT = "Parent";
-
- private static final String OBJECT_TYPE_TRANSLATION = "Translation";
- private static final String OBJECT_TYPE_TRANSCRIPT = "Transcript";
- private static final String ID = "id";
- private static final String OBJECT_TYPE_GENE = "Gene";
- private static final String OBJECT_TYPE = "object_type";
-
/**
* Default constructor (to use rest.ensembl.org)
*/
protected URL getUrl(List<String> ids) throws MalformedURLException
{
String identifier = ids.get(0);
- return getUrl(identifier);
+ return getUrl(identifier, null);
}
/**
+ * Gets the url for lookup of the given identifier, optionally with objectType
+ * also specified in the request
+ *
* @param identifier
+ * @param objectType
* @return
*/
- protected URL getUrl(String identifier)
+ protected URL getUrl(String identifier, String objectType)
{
String url = getDomain() + "/lookup/id/" + identifier
+ CONTENT_TYPE_JSON;
+ if (objectType != null)
+ {
+ url += "&" + OBJECT_TYPE + "=" + objectType;
+ }
+
try
{
return new URL(url);
}
/**
- * Calls the Ensembl lookup REST endpoint and returns
- * <ul>
- * <li>the 'id' for the identifier if its type is "Gene"</li>
- * <li>the 'Parent' if its type is 'Transcript'</li>
- * <ul>
- * If the type is 'Translation', does a recursive call to this method, passing
- * in the 'Parent' (transcript id).
+ * Returns the gene id related to the given identifier (which may be for a
+ * gene, transcript or protein)
*
* @param identifier
* @return
*/
public String getGeneId(String identifier)
{
- return (String) getResult(identifier, br -> parseGeneId(br));
+ return (String) getResult(identifier, null, br -> parseGeneId(br));
}
/**
*/
public String getSpecies(String identifier)
{
- return (String) getResult(identifier, br -> getAttribute(br, SPECIES));
+ return (String) getResult(identifier, null,
+ br -> getAttribute(br, SPECIES));
}
/**
* response to the supplied parser
*
* @param identifier
+ * @param objectType
+ * (optional)
* @param parser
* @return
*/
- protected Object getResult(String identifier,
+ protected Object getResult(String identifier, String objectType,
Function<BufferedReader, Object> parser)
{
List<String> ids = Arrays.asList(new String[] { identifier });
BufferedReader br = null;
try
{
- URL url = getUrl(identifier);
+ URL url = getUrl(identifier, objectType);
if (url != null)
{
br = getHttpResponse(url, ids);
* Parses the JSON response and returns the gene identifier, or null if not
* found. If the returned object_type is Gene, returns the id, if Transcript
* returns the Parent. If it is Translation (peptide identifier), then the
- * Parent is the transcript identifier, so we redo the search with this value.
+ * Parent is the transcript identifier, so we redo the search with this value,
+ * specifying that object_type should be Transcript.
*
* @param br
* @return
String type = val.get(OBJECT_TYPE).toString();
if (OBJECT_TYPE_GENE.equalsIgnoreCase(type))
{
- geneId = val.get(ID).toString();
+ // got the gene - just returns its id
+ geneId = val.get(JSON_ID).toString();
}
else if (OBJECT_TYPE_TRANSCRIPT.equalsIgnoreCase(type))
{
+ // got the transcript - return its (Gene) Parent
geneId = val.get(PARENT).toString();
}
else if (OBJECT_TYPE_TRANSLATION.equalsIgnoreCase(type))
{
+ // got the protein - get its Parent, restricted to type Transcript
String transcriptId = val.get(PARENT).toString();
- try
- {
- geneId = getGeneId(transcriptId);
- } catch (StackOverflowError e)
- {
- /*
- * unlikely data condition error!
- */
- System.err
- .println("** Ensembl lookup "
- + getUrl(transcriptId).toString()
- + " looping on Parent!");
- }
+ geneId = (String) getResult(transcriptId, OBJECT_TYPE_TRANSCRIPT,
+ reader -> parseGeneId(reader));
}
} catch (ParseException | IOException e)
{
*/
public GeneLociI getGeneLoci(String geneId)
{
- return (GeneLociI) getResult(geneId, br -> parseGeneLoci(br));
+ return (GeneLociI) getResult(geneId, OBJECT_TYPE_GENE,
+ br -> parseGeneLoci(br));
}
/**
| NumberFormatException | ClassCastException e)
{
Cache.log.error("Error looking up gene loci: " + e.getMessage());
+ e.printStackTrace();
}
return null;
}