From: Jim Procter Date: Fri, 23 Feb 2018 16:01:42 +0000 (+0000) Subject: Merge branch 'features/JAL-2885UniprotHttps' into releases/Release_2_10_4_Branch X-Git-Tag: Release_2_10_4~58 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=d7e95f458ebcbbdcc13f8b07357542ab2d7e4547;hp=-c;p=jalview.git Merge branch 'features/JAL-2885UniprotHttps' into releases/Release_2_10_4_Branch --- d7e95f458ebcbbdcc13f8b07357542ab2d7e4547 diff --combined src/jalview/ext/ensembl/EnsemblLookup.java index 92763a1,f31a3f0..0ddef2b --- a/src/jalview/ext/ensembl/EnsemblLookup.java +++ b/src/jalview/ext/ensembl/EnsemblLookup.java @@@ -34,13 -34,24 +34,26 @@@ import org.json.simple.parser.JSONParse import org.json.simple.parser.ParseException; /** - * A client for the Ensembl lookup REST endpoint; used to find the Parent gene - * identifier given a transcript identifier. + * A client for the Ensembl lookup REST endpoint, used to find the gene + * identifier given a gene, transcript or protein identifier. * * @author gmcarstairs */ public class EnsemblLookup extends EnsemblRestClient { + + private static final String OBJECT_TYPE_TRANSLATION = "Translation"; + private static final String PARENT = "Parent"; + private static final String OBJECT_TYPE_TRANSCRIPT = "Transcript"; + private static final String ID = "id"; + private static final String OBJECT_TYPE_GENE = "Gene"; + private static final String OBJECT_TYPE = "object_type"; + ++ /** ++ * keep track of last identifier retrieved to break loops ++ */ + private String lastId; + /** * Default constructor (to use rest.ensembl.org) */ @@@ -75,26 -86,17 +88,26 @@@ protected URL getUrl(List ids) throws MalformedURLException { String identifier = ids.get(0); - return getUrl(identifier); + return getUrl(identifier, null); } /** + * Gets the url for lookup of the given identifier, optionally with objectType + * also specified in the request + * * @param identifier + * @param objectType * @return */ - protected URL getUrl(String identifier) + protected URL getUrl(String identifier, String objectType) { String url = getDomain() + "/lookup/id/" + identifier + CONTENT_TYPE_JSON; + if (objectType != null) + { + url += "&" + OBJECT_TYPE + "=" + objectType; + } + try { return new URL(url); @@@ -123,34 -125,27 +136,45 @@@ } /** + * Returns the gene id related to the given identifier, which may be for a + * gene, transcript or protein + * + * @param identifier + * @return + */ + public String getGeneId(String identifier) + { + return getGeneId(identifier, null); + } + + /** * Calls the Ensembl lookup REST endpoint and retrieves the 'Parent' for the * given identifier, or null if not found * * @param identifier + * @param objectType + * (optional) * @return */ - public String getGeneId(String identifier) + public String getGeneId(String identifier, String objectType) { List ids = Arrays.asList(new String[] { identifier }); BufferedReader br = null; try { - URL url = getUrl(identifier); ++ + URL url = getUrl(identifier, objectType); ++ + if (identifier.equals(lastId)) + { + System.err.println("** Ensembl lookup " + url.toString() + + " looping on Parent!"); + return null; + } ++ + lastId = identifier; ++ if (url != null) { br = getHttpResponse(url, ids); @@@ -195,19 -190,28 +219,19 @@@ String type = val.get(OBJECT_TYPE).toString(); if (OBJECT_TYPE_GENE.equalsIgnoreCase(type)) { + // got the gene - just returns its id geneId = val.get(ID).toString(); } else if (OBJECT_TYPE_TRANSCRIPT.equalsIgnoreCase(type)) { + // got the transcript - return its (Gene) Parent geneId = val.get(PARENT).toString(); } else if (OBJECT_TYPE_TRANSLATION.equalsIgnoreCase(type)) { + // got the protein - get its Parent, restricted to type Transcript String transcriptId = val.get(PARENT).toString(); - try - { - geneId = getGeneId(transcriptId); - } catch (StackOverflowError e) - { - /* - * unlikely data condition error! - */ - System.err - .println("** Ensembl lookup " - + getUrl(transcriptId).toString() - + " looping on Parent!"); - } + geneId = getGeneId(transcriptId, OBJECT_TYPE_TRANSCRIPT); } } catch (ParseException e) { diff --combined src/jalview/ext/ensembl/EnsemblSequenceFetcher.java index c4abb20,eb1d399..0aaaf93 --- a/src/jalview/ext/ensembl/EnsemblSequenceFetcher.java +++ b/src/jalview/ext/ensembl/EnsemblSequenceFetcher.java @@@ -20,6 -20,7 +20,7 @@@ */ package jalview.ext.ensembl; + import jalview.bin.Cache; import jalview.datamodel.DBRefSource; import jalview.ws.seqfetcher.DbSourceProxyImpl; @@@ -32,6 -33,16 +33,16 @@@ import com.stevesoft.pat.Regex */ abstract class EnsemblSequenceFetcher extends DbSourceProxyImpl { + // domain properties lookup keys: + protected static final String ENSEMBL_BASEURL = "ENSEMBL_BASEURL"; + + protected static final String ENSEMBL_GENOMES_BASEURL = "ENSEMBL_GENOMES_BASEURL"; + + // domain properties default values: + protected static final String DEFAULT_ENSEMBL_BASEURL = "https://rest.ensembl.org"; + + protected static final String DEFAULT_ENSEMBL_GENOMES_BASEURL = "https://rest.ensemblgenomes.org"; + /* * accepts ENSG/T/E/P with 11 digits * or ENSMUSP or similar for other species @@@ -41,22 -52,10 +52,22 @@@ "(ENS([A-Z]{3}|)[GTEP]{1}[0-9]{11}$)" + "|" + "(CCDS[0-9.]{3,}$)"); - protected static final String ENSEMBL_GENOMES_REST = "http://rest.ensemblgenomes.org"; + protected final String ensemblGenomesDomain; - protected static final String ENSEMBL_REST = "http://rest.ensembl.org"; + protected final String ensemblDomain; + protected static final String OBJECT_TYPE_TRANSLATION = "Translation"; + + protected static final String OBJECT_TYPE_TRANSCRIPT = "Transcript"; + + protected static final String OBJECT_TYPE_GENE = "Gene"; + + protected static final String PARENT = "Parent"; + + protected static final String ID = "id"; + + protected static final String OBJECT_TYPE = "object_type"; + /* * possible values for the 'feature' parameter of the /overlap REST service * @see http://rest.ensembl.org/documentation/info/overlap_id @@@ -68,13 -67,29 +79,29 @@@ constrained, regulatory } - private String domain = ENSEMBL_REST; + private String domain; + + /** + * Constructor + */ + public EnsemblSequenceFetcher() + { + /* + * the default domain names may be overridden in .jalview_properties; + * this allows an easy change from http to https in future if needed + */ + ensemblDomain = Cache.getDefault(ENSEMBL_BASEURL, + DEFAULT_ENSEMBL_BASEURL); + ensemblGenomesDomain = Cache.getDefault(ENSEMBL_GENOMES_BASEURL, + DEFAULT_ENSEMBL_GENOMES_BASEURL); + domain = ensemblDomain; + } @Override public String getDbSource() { // NB ensure Uniprot xrefs are canonicalised from "Ensembl" to "ENSEMBL" - if (ENSEMBL_GENOMES_REST.equals(getDomain())) + if (ensemblGenomesDomain.equals(getDomain())) { return DBRefSource.ENSEMBLGENOMES; } diff --combined test/jalview/ext/ensembl/EnsemblGeneTest.java index 1b1a2b4,b7ea0cb..217742d --- a/test/jalview/ext/ensembl/EnsemblGeneTest.java +++ b/test/jalview/ext/ensembl/EnsemblGeneTest.java @@@ -25,6 -25,7 +25,7 @@@ import static org.testng.AssertJUnit.as import static org.testng.AssertJUnit.assertTrue; import jalview.api.FeatureSettingsModelI; + import jalview.bin.Cache; import jalview.datamodel.SequenceDummy; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; @@@ -53,6 -54,7 +54,7 @@@ public class EnsemblGeneTes @BeforeClass(alwaysRun = true) public void setUp() { + Cache.loadProperties("test/jalview/io/testProps.jvprops"); SequenceOntologyFactory.setInstance(new SequenceOntologyLite()); } @@@ -173,8 -175,7 +175,8 @@@ // NMD_transcript_variant treated like transcript in Ensembl SequenceFeature sf3 = new SequenceFeature("NMD_transcript_variant", "", 22000, 22500, 0f, null); - sf3.setValue("Parent", "gene:" + geneId); + // id matching should not be case-sensitive + sf3.setValue("Parent", "gene:" + geneId.toLowerCase()); sf3.setValue("transcript_id", "transcript3"); genomic.addSequenceFeature(sf3); @@@ -260,9 -261,6 +262,9 @@@ sf.setValue("ID", "gene:" + accId); assertTrue(testee.identifiesSequence(sf, accId)); + // test is not case-sensitive + assertTrue(testee.identifiesSequence(sf, accId.toLowerCase())); + // transcript not valid: sf = new SequenceFeature("transcript", "", 1, 2, 0f, null); sf.setValue("ID", "gene:" + accId);