import org.json.simple.parser.ParseException;
/**
- * A client for the Ensembl lookup REST endpoint; used to find the Parent gene
- * identifier given a transcript identifier.
+ * A client for the Ensembl lookup REST endpoint, used to find the gene
+ * identifier given a gene, transcript or protein identifier.
*
* @author gmcarstairs
*/
public class EnsemblLookup extends EnsemblRestClient
{
+
+ private static final String OBJECT_TYPE_TRANSLATION = "Translation";
+ private static final String PARENT = "Parent";
+ private static final String OBJECT_TYPE_TRANSCRIPT = "Transcript";
+ private static final String ID = "id";
+ private static final String OBJECT_TYPE_GENE = "Gene";
+ private static final String OBJECT_TYPE = "object_type";
+
++ /**
++ * keep track of last identifier retrieved to break loops
++ */
+ private String lastId;
+
/**
* Default constructor (to use rest.ensembl.org)
*/
protected URL getUrl(List<String> ids) throws MalformedURLException
{
String identifier = ids.get(0);
- return getUrl(identifier);
+ return getUrl(identifier, null);
}
/**
+ * Gets the url for lookup of the given identifier, optionally with objectType
+ * also specified in the request
+ *
* @param identifier
+ * @param objectType
* @return
*/
- protected URL getUrl(String identifier)
+ protected URL getUrl(String identifier, String objectType)
{
String url = getDomain() + "/lookup/id/" + identifier
+ CONTENT_TYPE_JSON;
+ if (objectType != null)
+ {
+ url += "&" + OBJECT_TYPE + "=" + objectType;
+ }
+
try
{
return new URL(url);
}
/**
+ * Returns the gene id related to the given identifier, which may be for a
+ * gene, transcript or protein
+ *
+ * @param identifier
+ * @return
+ */
+ public String getGeneId(String identifier)
+ {
+ return getGeneId(identifier, null);
+ }
+
+ /**
* Calls the Ensembl lookup REST endpoint and retrieves the 'Parent' for the
* given identifier, or null if not found
*
* @param identifier
+ * @param objectType
+ * (optional)
* @return
*/
- public String getGeneId(String identifier)
+ public String getGeneId(String identifier, String objectType)
{
List<String> ids = Arrays.asList(new String[] { identifier });
BufferedReader br = null;
try
{
- URL url = getUrl(identifier);
++
+ URL url = getUrl(identifier, objectType);
++
+ if (identifier.equals(lastId))
+ {
+ System.err.println("** Ensembl lookup " + url.toString()
+ + " looping on Parent!");
+ return null;
+ }
++
+ lastId = identifier;
++
if (url != null)
{
br = getHttpResponse(url, ids);
String type = val.get(OBJECT_TYPE).toString();
if (OBJECT_TYPE_GENE.equalsIgnoreCase(type))
{
+ // got the gene - just returns its id
geneId = val.get(ID).toString();
}
else if (OBJECT_TYPE_TRANSCRIPT.equalsIgnoreCase(type))
{
+ // got the transcript - return its (Gene) Parent
geneId = val.get(PARENT).toString();
}
else if (OBJECT_TYPE_TRANSLATION.equalsIgnoreCase(type))
{
+ // got the protein - get its Parent, restricted to type Transcript
String transcriptId = val.get(PARENT).toString();
- try
- {
- geneId = getGeneId(transcriptId);
- } catch (StackOverflowError e)
- {
- /*
- * unlikely data condition error!
- */
- System.err
- .println("** Ensembl lookup "
- + getUrl(transcriptId).toString()
- + " looping on Parent!");
- }
+ geneId = getGeneId(transcriptId, OBJECT_TYPE_TRANSCRIPT);
}
} catch (ParseException e)
{
*/
package jalview.ext.ensembl;
+ import jalview.bin.Cache;
import jalview.datamodel.DBRefSource;
import jalview.ws.seqfetcher.DbSourceProxyImpl;
*/
abstract class EnsemblSequenceFetcher extends DbSourceProxyImpl
{
+ // domain properties lookup keys:
+ protected static final String ENSEMBL_BASEURL = "ENSEMBL_BASEURL";
+
+ protected static final String ENSEMBL_GENOMES_BASEURL = "ENSEMBL_GENOMES_BASEURL";
+
+ // domain properties default values:
+ protected static final String DEFAULT_ENSEMBL_BASEURL = "https://rest.ensembl.org";
+
+ protected static final String DEFAULT_ENSEMBL_GENOMES_BASEURL = "https://rest.ensemblgenomes.org";
+
/*
* accepts ENSG/T/E/P with 11 digits
* or ENSMUSP or similar for other species
"(ENS([A-Z]{3}|)[GTEP]{1}[0-9]{11}$)" + "|"
+ "(CCDS[0-9.]{3,}$)");
- protected static final String ENSEMBL_GENOMES_REST = "http://rest.ensemblgenomes.org";
+ protected final String ensemblGenomesDomain;
- protected static final String ENSEMBL_REST = "http://rest.ensembl.org";
+ protected final String ensemblDomain;
+ protected static final String OBJECT_TYPE_TRANSLATION = "Translation";
+
+ protected static final String OBJECT_TYPE_TRANSCRIPT = "Transcript";
+
+ protected static final String OBJECT_TYPE_GENE = "Gene";
+
+ protected static final String PARENT = "Parent";
+
+ protected static final String ID = "id";
+
+ protected static final String OBJECT_TYPE = "object_type";
+
/*
* possible values for the 'feature' parameter of the /overlap REST service
* @see http://rest.ensembl.org/documentation/info/overlap_id
constrained, regulatory
}
- private String domain = ENSEMBL_REST;
+ private String domain;
+
+ /**
+ * Constructor
+ */
+ public EnsemblSequenceFetcher()
+ {
+ /*
+ * the default domain names may be overridden in .jalview_properties;
+ * this allows an easy change from http to https in future if needed
+ */
+ ensemblDomain = Cache.getDefault(ENSEMBL_BASEURL,
+ DEFAULT_ENSEMBL_BASEURL);
+ ensemblGenomesDomain = Cache.getDefault(ENSEMBL_GENOMES_BASEURL,
+ DEFAULT_ENSEMBL_GENOMES_BASEURL);
+ domain = ensemblDomain;
+ }
@Override
public String getDbSource()
{
// NB ensure Uniprot xrefs are canonicalised from "Ensembl" to "ENSEMBL"
- if (ENSEMBL_GENOMES_REST.equals(getDomain()))
+ if (ensemblGenomesDomain.equals(getDomain()))
{
return DBRefSource.ENSEMBLGENOMES;
}
import static org.testng.AssertJUnit.assertTrue;
import jalview.api.FeatureSettingsModelI;
+ import jalview.bin.Cache;
import jalview.datamodel.SequenceDummy;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
@BeforeClass(alwaysRun = true)
public void setUp()
{
+ Cache.loadProperties("test/jalview/io/testProps.jvprops");
SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
}
// NMD_transcript_variant treated like transcript in Ensembl
SequenceFeature sf3 = new SequenceFeature("NMD_transcript_variant", "",
22000, 22500, 0f, null);
- sf3.setValue("Parent", "gene:" + geneId);
+ // id matching should not be case-sensitive
+ sf3.setValue("Parent", "gene:" + geneId.toLowerCase());
sf3.setValue("transcript_id", "transcript3");
genomic.addSequenceFeature(sf3);
sf.setValue("ID", "gene:" + accId);
assertTrue(testee.identifiesSequence(sf, accId));
+ // test is not case-sensitive
+ assertTrue(testee.identifiesSequence(sf, accId.toLowerCase()));
+
// transcript not valid:
sf = new SequenceFeature("transcript", "", 1, 2, 0f, null);
sf.setValue("ID", "gene:" + accId);