import org.json.simple.parser.ParseException;
/**
- * A client for the Ensembl lookup REST endpoint; used to find the Parent gene
- * identifier given a transcript identifier.
+ * A client for the Ensembl lookup REST endpoint, used to find the gene
+ * identifier given a gene, transcript or protein identifier.
*
* @author gmcarstairs
- *
*/
public class EnsemblLookup extends EnsemblRestClient
{
private static final String OBJECT_TYPE_GENE = "Gene";
private static final String OBJECT_TYPE = "object_type";
+ /**
+ * keep track of last identifier retrieved to break loops
+ */
private String lastId;
/**
protected URL getUrl(List<String> ids) throws MalformedURLException
{
String identifier = ids.get(0);
- return getUrl(identifier);
+ return getUrl(identifier, null);
}
/**
+ * Gets the url for lookup of the given identifier, optionally with objectType
+ * also specified in the request
+ *
* @param identifier
+ * @param objectType
* @return
*/
- protected URL getUrl(String identifier)
+ protected URL getUrl(String identifier, String objectType)
{
String url = getDomain() + "/lookup/id/" + identifier
+ CONTENT_TYPE_JSON;
+ if (objectType != null)
+ {
+ url += "&" + OBJECT_TYPE + "=" + objectType;
+ }
+
try
{
return new URL(url);
}
/**
+ * Returns the gene id related to the given identifier, which may be for a
+ * gene, transcript or protein
+ *
+ * @param identifier
+ * @return
+ */
+ public String getGeneId(String identifier)
+ {
+ return getGeneId(identifier, null);
+ }
+
+ /**
* Calls the Ensembl lookup REST endpoint and retrieves the 'Parent' for the
* given identifier, or null if not found
*
* @param identifier
+ * @param objectType
+ * (optional)
* @return
*/
- public String getGeneId(String identifier)
+ public String getGeneId(String identifier, String objectType)
{
List<String> ids = Arrays.asList(new String[] { identifier });
BufferedReader br = null;
try
{
- URL url = getUrl(identifier);
+
+ URL url = getUrl(identifier, objectType);
+
if (identifier.equals(lastId))
{
System.err.println("** Ensembl lookup " + url.toString()
+ " looping on Parent!");
return null;
}
+
lastId = identifier;
+
if (url != null)
{
br = getHttpResponse(url, ids);
String type = val.get(OBJECT_TYPE).toString();
if (OBJECT_TYPE_GENE.equalsIgnoreCase(type))
{
+ // got the gene - just returns its id
geneId = val.get(ID).toString();
}
else if (OBJECT_TYPE_TRANSCRIPT.equalsIgnoreCase(type))
{
+ // got the transcript - return its (Gene) Parent
geneId = val.get(PARENT).toString();
}
else if (OBJECT_TYPE_TRANSLATION.equalsIgnoreCase(type))
{
+ // got the protein - get its Parent, restricted to type Transcript
String transcriptId = val.get(PARENT).toString();
- try
- {
- geneId = getGeneId(transcriptId);
- } catch (StackOverflowError e)
- {
- /*
- * unlikely data condition error!
- */
- System.err
- .println("** Ensembl lookup "
- + getUrl(transcriptId).toString()
- + " looping on Parent!");
- }
+ geneId = getGeneId(transcriptId, OBJECT_TYPE_TRANSCRIPT);
}
} catch (ParseException e)
{