}
/**
- * Converts a query, which may contain one or more gene or transcript
- * identifiers, into a non-redundant list of gene identifiers.
+ * Converts a query, which may contain one or more gene, transcript, or
+ * external (to Ensembl) identifiers, into a non-redundant list of gene
+ * identifiers.
*
* @param accessions
* @return
for (String acc : accessions.split(getAccessionSeparator()))
{
- if (isGeneIdentifier(acc))
- {
- if (!geneIds.contains(acc))
- {
- geneIds.add(acc);
- }
- }
-
/*
- * if given a transcript id, look up its gene parent
+ * First try lookup as an Ensembl (gene or transcript) identifier
*/
- else if (isTranscriptIdentifier(acc))
+ String geneId = new EnsemblLookup(getDomain()).getGeneId(acc);
+ if (geneId != null)
{
- String geneId = new EnsemblLookup(getDomain()).getParent(acc);
- if (geneId != null && !geneIds.contains(geneId))
+ if (!geneIds.contains(geneId))
{
geneIds.add(geneId);
}
}
- else if (isProteinIdentifier(acc))
- {
- String tscriptId = new EnsemblLookup(getDomain()).getParent(acc);
- if (tscriptId != null)
- {
- String geneId = new EnsemblLookup(getDomain())
- .getParent(tscriptId);
-
- if (geneId != null && !geneIds.contains(geneId))
- {
- geneIds.add(geneId);
- }
- }
- // NOTE - acc is lost if it resembles an ENS.+ ID but isn't actually
- // resolving to one... e.g. ENSMICP00000009241
- }
- /*
- * if given a gene or other external name, lookup and fetch
- * the corresponding gene for all model organisms
- */
else
{
+ /*
+ * if given a gene or other external name, lookup and fetch
+ * the corresponding gene for all model organisms
+ */
List<String> ids = new EnsemblSymbol(getDomain(), getDbSource(),
- getDbVersion()).getIds(acc);
- for (String geneId : ids)
+ getDbVersion()).getGeneIds(acc);
+ for (String id : ids)
{
- if (!geneIds.contains(geneId))
+ if (!geneIds.contains(id))
{
- geneIds.add(geneId);
+ geneIds.add(id);
}
}
}
}
/**
- * Attempts to get Ensembl stable identifiers for model organisms for a gene
- * name by calling the xrefs symbol REST service to resolve the gene name.
- *
- * @param query
- * @return
- */
- protected String getGeneIdentifiersForName(String query)
- {
- List<String> ids = new EnsemblSymbol(getDomain(), getDbSource(),
- getDbVersion()).getIds(query);
- if (ids != null)
- {
- for (String id : ids)
- {
- if (isGeneIdentifier(id))
- {
- return id;
- }
- }
- }
- return null;
- }
-
- /**
* Constructs all transcripts for the gene, as identified by "transcript"
* features whose Parent is the requested gene. The coding transcript
* sequences (i.e. with introns omitted) are added to the alignment.
}
@Override
- public boolean isGeneIdentifier(String query)
- {
- return true;
- }
-
- @Override
public String getDbName()
{
return "EnsemblGenomes";
public class EnsemblLookup extends EnsemblRestClient
{
+ private static final String OBJECT_TYPE_TRANSLATION = "Translation";
+ private static final String PARENT = "Parent";
+ private static final String OBJECT_TYPE_TRANSCRIPT = "Transcript";
+ private static final String ID = "id";
+ private static final String OBJECT_TYPE_GENE = "Gene";
+ private static final String OBJECT_TYPE = "object_type";
+
/**
* Default constructor (to use rest.ensembl.org)
*/
* @param identifier
* @return
*/
- public String getParent(String identifier)
+ public String getGeneId(String identifier)
{
List<String> ids = Arrays.asList(new String[] { identifier });
{
br = getHttpResponse(url, ids);
}
- return (parseResponse(br));
+ return br == null ? null : parseResponse(br);
} catch (IOException e)
{
// ignore
}
/**
- * Parses "Parent" from the JSON response and returns the value, or null if
- * not found
+ * Parses the JSON response and returns the gene identifier, or null if not
+ * found. If the returned object_type is Gene, returns the id, if Transcript
+ * returns the Parent. If it is Translation (peptide identifier), then the
+ * Parent is the transcript identifier, so we redo the search with this value.
*
* @param br
* @return
*/
protected String parseResponse(BufferedReader br) throws IOException
{
- String parent = null;
+ String geneId = null;
JSONParser jp = new JSONParser();
try
{
JSONObject val = (JSONObject) jp.parse(br);
- parent = val.get("Parent").toString();
+ String type = val.get(OBJECT_TYPE).toString();
+ if (OBJECT_TYPE_GENE.equalsIgnoreCase(type))
+ {
+ geneId = val.get(ID).toString();
+ }
+ else if (OBJECT_TYPE_TRANSCRIPT.equalsIgnoreCase(type))
+ {
+ geneId = val.get(PARENT).toString();
+ }
+ else if (OBJECT_TYPE_TRANSLATION.equalsIgnoreCase(type))
+ {
+ String transcriptId = val.get(PARENT).toString();
+ try
+ {
+ geneId = getGeneId(transcriptId);
+ } catch (StackOverflowError e)
+ {
+ /*
+ * unlikely data condition error!
+ */
+ System.err
+ .println("** Ensembl lookup "
+ + getUrl(transcriptId).toString()
+ + " looping on Parent!");
+ }
+ }
} catch (ParseException e)
{
// ignore
}
- return parent;
+ return geneId;
}
}
import jalview.datamodel.AlignmentI;
import jalview.datamodel.SequenceFeature;
-import java.util.List;
-
import com.stevesoft.pat.Regex;
/**
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
-import com.stevesoft.pat.Regex;
-
/**
* Base class for Ensembl REST service clients
*
private final static long VERSION_RETEST_INTERVAL = 1000L * 3600; // 1 hr
- private static final Regex PROTEIN_REGEX = new Regex(
- "(ENS)([A-Z]{3}|)P[0-9]{11}$");
-
- private static final Regex TRANSCRIPT_REGEX = new Regex(
- "(ENS)([A-Z]{3}|)T[0-9]{11}$");
-
- private static final Regex GENE_REGEX = new Regex(
- "(ENS)([A-Z]{3}|)G[0-9]{11}$");
-
static
{
domainData = new HashMap<String, EnsemblInfo>();
setDomain(d);
}
- /**
- * Answers true if the query matches the regular expression pattern for an
- * Ensembl transcript stable identifier
- *
- * @param query
- * @return
- */
- public boolean isTranscriptIdentifier(String query)
- {
- return query == null ? false : TRANSCRIPT_REGEX.search(query);
- }
-
- /**
- * Answers true if the query matches the regular expression pattern for an
- * Ensembl protein stable identifier
- *
- * @param query
- * @return
- */
- public boolean isProteinIdentifier(String query)
- {
- return query == null ? false : PROTEIN_REGEX.search(query);
- }
-
- /**
- * Answers true if the query matches the regular expression pattern for an
- * Ensembl gene stable identifier
- *
- * @param query
- * @return
- */
- public boolean isGeneIdentifier(String query)
- {
- return query == null ? false : GENE_REGEX.search(query);
- }
-
@Override
public boolean queryInProgress()
{
* if ping takes more than 2 seconds to respond, treat as if unavailable
*/
br = getHttpResponse(ping, null, 2 * 1000);
+ if (br == null)
+ {
+ // error reponse status
+ return false;
+ }
JSONParser jp = new JSONParser();
JSONObject val = (JSONObject) jp.parse(br);
String pingString = val.get("ping").toString();
void checkRateLimits(HttpURLConnection connection)
{
// number of requests allowed per time interval:
- String limit = connection.getHeaderField("X-RateLimit-Limit");
+ // String limit = connection.getHeaderField("X-RateLimit-Limit");
// length of quota time interval in seconds:
// String period = connection.getHeaderField("X-RateLimit-Period");
// seconds remaining until usage quota is reset:
- String reset = connection.getHeaderField("X-RateLimit-Reset");
+ // String reset = connection.getHeaderField("X-RateLimit-Reset");
// number of requests remaining from quota for current period:
- String remaining = connection.getHeaderField("X-RateLimit-Remaining");
+ // String remaining = connection.getHeaderField("X-RateLimit-Remaining");
// number of seconds to wait before retrying (if remaining == 0)
String retryDelay = connection.getHeaderField("Retry-After");
url = new URL(
getDomain() + "/info/rest?content-type=application/json");
BufferedReader br = getHttpResponse(url, null);
+ if (br == null)
+ {
+ return;
+ }
JSONObject val = (JSONObject) jp.parse(br);
String version = val.get("release").toString();
String majorVersion = version.substring(0, version.indexOf("."));
url = new URL(
getDomain() + "/info/data?content-type=application/json");
BufferedReader br = getHttpResponse(url, null);
- JSONObject val = (JSONObject) jp.parse(br);
- JSONArray versions = (JSONArray) val.get("releases");
- domainData.get(getDomain()).dataVersion = versions.get(0).toString();
+ if (br != null)
+ {
+ JSONObject val = (JSONObject) jp.parse(br);
+ JSONArray versions = (JSONArray) val.get("releases");
+ domainData.get(getDomain()).dataVersion = versions.get(0)
+ .toString();
+ }
} catch (Throwable t)
{
System.err.println(
{
JSONObject val = (JSONObject) rvals.next();
String id = val.get("id").toString();
- if (id != null && isGeneIdentifier(id))
+ String type = val.get("type").toString();
+ if (id != null && "gene".equals(type))
{
result = id;
break;
return result;
}
- protected URL getUrl(String id, Species species)
+ /**
+ * Constructs the URL for the REST symbol endpoint
+ *
+ * @param id
+ * the accession id (Ensembl or external)
+ * @param species
+ * a species name recognisable by Ensembl
+ * @param type
+ * an optional type to filter the response (gene, transcript,
+ * translation)
+ * @return
+ */
+ protected URL getUrl(String id, Species species, String... type)
{
- String url = getDomain() + "/xrefs/symbol/" + species.toString() + "/"
- + id + "?content-type=application/json";
+ StringBuilder sb = new StringBuilder();
+ sb.append(getDomain()).append("/xrefs/symbol/")
+ .append(species.toString()).append("/").append(id)
+ .append("?content-type=application/json");
+ for (String t : type)
+ {
+ sb.append("&object_type=").append(t);
+ }
try
{
+ String url = sb.toString();
return new URL(url);
} catch (MalformedURLException e)
{
* @param identifier
* @return
*/
- public List<String> getIds(String identifier)
+ public List<String> getGeneIds(String identifier)
{
List<String> result = new ArrayList<String>();
List<String> ids = new ArrayList<String>();
{
if (taxon.isModelOrganism())
{
- URL url = getUrl(query, taxon);
+ URL url = getUrl(query, taxon, "gene");
if (url != null)
{
br = getHttpResponse(url, ids);
- }
- String geneId = parseSymbolResponse(br);
- if (geneId != null)
- {
- result.add(geneId);
+ if (br != null)
+ {
+ String geneId = parseSymbolResponse(br);
+ if (geneId != null && !result.contains(geneId))
+ {
+ result.add(geneId);
+ }
+ }
}
}
}
{
br = getHttpResponse(url, ids);
}
- return (parseResponse(br));
+ if (br != null)
+ {
+ result = parseResponse(br);
+ }
} catch (IOException e)
{
// ignore
while (rvals.hasNext())
{
JSONObject val = (JSONObject) rvals.next();
- String dbName = val.get("dbname").toString();
- if (dbName.equals(GO_GENE_ONTOLOGY))
- {
- continue;
- }
+ String db = val.get("dbname").toString();
String id = val.get("primary_id").toString();
- if (dbName != null && id != null)
+ if (db != null && id != null
+ && !GO_GENE_ONTOLOGY.equals(db))
{
- dbName = DBRefUtils.getCanonicalName(dbName);
- DBRefEntry dbref = new DBRefEntry(dbName, getXRefVersion(), id);
+ db = DBRefUtils.getCanonicalName(db);
+ DBRefEntry dbref = new DBRefEntry(db, getXRefVersion(), id);
result.add(dbref);
}
}
}
- @Test(groups = "Functional")
- public void testIsTranscriptIdentifier()
- {
- EnsemblSeqProxy testee = new EnsemblGene();
- assertFalse(testee.isTranscriptIdentifier(null));
- assertFalse(testee.isTranscriptIdentifier(""));
- assertFalse(testee.isTranscriptIdentifier("ENSG00000012345"));
- assertTrue(testee.isTranscriptIdentifier("ENST00000012345"));
- assertTrue(testee.isTranscriptIdentifier("ENSMUST00000012345"));
- assertFalse(testee.isTranscriptIdentifier("enst00000012345"));
- assertFalse(testee.isTranscriptIdentifier("ENST000000123456"));
- assertFalse(testee.isTranscriptIdentifier("ENST0000001234"));
- }
-
- @Test(groups = "Functional")
- public void testIsGeneIdentifier()
- {
- EnsemblSeqProxy testee = new EnsemblGene();
- assertFalse(testee.isGeneIdentifier(null));
- assertFalse(testee.isGeneIdentifier(""));
- assertFalse(testee.isGeneIdentifier("ENST00000012345"));
- assertTrue(testee.isGeneIdentifier("ENSG00000012345"));
- assertTrue(testee.isGeneIdentifier("ENSMUSG00000012345"));
- assertFalse(testee.isGeneIdentifier("ensg00000012345"));
- assertFalse(testee.isGeneIdentifier("ENSG000000123456"));
- assertFalse(testee.isGeneIdentifier("ENSG0000001234"));
- }
-
/**
* Test the method that appends a single allele's reverse complement to a
* string buffer