From 17ff1f476e009b3a3c7e892e416edc2a4af8a2bc Mon Sep 17 00:00:00 2001 From: gmungoc Date: Tue, 16 Jan 2018 16:27:19 +0000 Subject: [PATCH] JAL-2679 use object_type=Transcript for lookup of Parent for Protein --- src/jalview/ext/ensembl/EnsemblCdna.java | 18 ++---- src/jalview/ext/ensembl/EnsemblGene.java | 17 ++++-- src/jalview/ext/ensembl/EnsemblGenome.java | 2 +- src/jalview/ext/ensembl/EnsemblLookup.java | 61 +++++++++++--------- src/jalview/ext/ensembl/EnsemblSeqProxy.java | 26 +++------ .../ext/ensembl/EnsemblSequenceFetcher.java | 12 ++++ src/jalview/ext/ensembl/EnsemblSymbol.java | 2 - 7 files changed, 70 insertions(+), 68 deletions(-) diff --git a/src/jalview/ext/ensembl/EnsemblCdna.java b/src/jalview/ext/ensembl/EnsemblCdna.java index 6d031b7..952f01e 100644 --- a/src/jalview/ext/ensembl/EnsemblCdna.java +++ b/src/jalview/ext/ensembl/EnsemblCdna.java @@ -24,9 +24,6 @@ import jalview.datamodel.SequenceFeature; import jalview.io.gff.SequenceOntologyFactory; import jalview.io.gff.SequenceOntologyI; -import java.util.HashMap; -import java.util.Map; - import com.stevesoft.pat.Regex; /** @@ -47,13 +44,6 @@ public class EnsemblCdna extends EnsemblSeqProxy private static final Regex ACCESSION_REGEX = new Regex( "(ENS([A-Z]{3}|)[TG][0-9]{11}$)" + "|" + "(CCDS[0-9.]{3,}$)"); - private static Map params = new HashMap(); - - static - { - params.put("object_type", "transcript"); - } - /* * fetch exon features on genomic sequence (to identify the cdna regions) * and cds and variation features (to retain) @@ -139,13 +129,13 @@ public class EnsemblCdna extends EnsemblSeqProxy } /** - * Parameter object_type=cdna added to ensure cdna and not peptide is returned - * (JAL-2529) + * Parameter object_type=Transcaript added to ensure cdna and not peptide is + * returned (JAL-2529) */ @Override - protected Map getAdditionalParameters() + protected String getObjectType() { - return params; + return OBJECT_TYPE_TRANSCRIPT; } } diff --git a/src/jalview/ext/ensembl/EnsemblGene.java b/src/jalview/ext/ensembl/EnsemblGene.java index 50dfa90..919134c 100644 --- a/src/jalview/ext/ensembl/EnsemblGene.java +++ b/src/jalview/ext/ensembl/EnsemblGene.java @@ -98,6 +98,12 @@ public class EnsemblGene extends EnsemblSeqProxy return EnsemblSeqType.GENOMIC; } + @Override + protected String getObjectType() + { + return OBJECT_TYPE_GENE; + } + /** * Returns an alignment containing the gene(s) for the given gene or * transcript identifier, or external identifier (e.g. Uniprot id). If given a @@ -170,7 +176,7 @@ public class EnsemblGene extends EnsemblSeqProxy */ List getGeneIds(String accessions) { - List geneIds = new ArrayList(); + List geneIds = new ArrayList<>(); for (String acc : accessions.split(getAccessionSeparator())) { @@ -305,7 +311,7 @@ public class EnsemblGene extends EnsemblSeqProxy int transcriptLength = 0; final char[] geneChars = gene.getSequence(); int offset = gene.getStart(); // to convert to 0-based positions - List mappedFrom = new ArrayList(); + List mappedFrom = new ArrayList<>(); for (SequenceFeature sf : splices) { @@ -347,7 +353,7 @@ public class EnsemblGene extends EnsemblSeqProxy * transfer features to the new sequence; we use EnsemblCdna to do this, * to filter out unwanted features types (see method retainFeature) */ - List mapTo = new ArrayList(); + List mapTo = new ArrayList<>(); mapTo.add(new int[] { 1, transcriptLength }); MapList mapping = new MapList(mappedFrom, mapTo, 1, 1); EnsemblCdna cdna = new EnsemblCdna(getDomain()); @@ -395,7 +401,7 @@ public class EnsemblGene extends EnsemblSeqProxy protected List getTranscriptFeatures(String accId, SequenceI geneSequence) { - List transcriptFeatures = new ArrayList(); + List transcriptFeatures = new ArrayList<>(); String parentIdentifier = GENE_PREFIX + accId; @@ -444,7 +450,8 @@ public class EnsemblGene extends EnsemblSeqProxy if (SequenceOntologyFactory.getInstance().isA(sf.getType(), SequenceOntologyI.GENE)) { - String id = (String) sf.getValue(ID); + // NB features as gff use 'ID'; rest services return as 'id' + String id = (String) sf.getValue("ID"); if ((GENE_PREFIX + accId).equals(id)) { return true; diff --git a/src/jalview/ext/ensembl/EnsemblGenome.java b/src/jalview/ext/ensembl/EnsemblGenome.java index 458a233..bde3c0f 100644 --- a/src/jalview/ext/ensembl/EnsemblGenome.java +++ b/src/jalview/ext/ensembl/EnsemblGenome.java @@ -103,7 +103,7 @@ public class EnsemblGenome extends EnsemblSeqProxy { if (isTranscript(sf.getType())) { - String id = (String) sf.getValue(ID); + String id = (String) sf.getValue("ID"); if (("transcript:" + accId).equals(id)) { return true; diff --git a/src/jalview/ext/ensembl/EnsemblLookup.java b/src/jalview/ext/ensembl/EnsemblLookup.java index 31da9c0..92763a1 100644 --- a/src/jalview/ext/ensembl/EnsemblLookup.java +++ b/src/jalview/ext/ensembl/EnsemblLookup.java @@ -34,22 +34,13 @@ import org.json.simple.parser.JSONParser; import org.json.simple.parser.ParseException; /** - * A client for the Ensembl lookup REST endpoint; used to find the Parent gene - * identifier given a transcript identifier. + * A client for the Ensembl lookup REST endpoint, used to find the gene + * identifier given a gene, transcript or protein identifier. * * @author gmcarstairs - * */ public class EnsemblLookup extends EnsemblRestClient { - - private static final String OBJECT_TYPE_TRANSLATION = "Translation"; - private static final String PARENT = "Parent"; - private static final String OBJECT_TYPE_TRANSCRIPT = "Transcript"; - private static final String ID = "id"; - private static final String OBJECT_TYPE_GENE = "Gene"; - private static final String OBJECT_TYPE = "object_type"; - /** * Default constructor (to use rest.ensembl.org) */ @@ -84,17 +75,26 @@ public class EnsemblLookup extends EnsemblRestClient protected URL getUrl(List ids) throws MalformedURLException { String identifier = ids.get(0); - return getUrl(identifier); + return getUrl(identifier, null); } /** + * Gets the url for lookup of the given identifier, optionally with objectType + * also specified in the request + * * @param identifier + * @param objectType * @return */ - protected URL getUrl(String identifier) + protected URL getUrl(String identifier, String objectType) { String url = getDomain() + "/lookup/id/" + identifier + CONTENT_TYPE_JSON; + if (objectType != null) + { + url += "&" + OBJECT_TYPE + "=" + objectType; + } + try { return new URL(url); @@ -123,20 +123,34 @@ public class EnsemblLookup extends EnsemblRestClient } /** + * Returns the gene id related to the given identifier, which may be for a + * gene, transcript or protein + * + * @param identifier + * @return + */ + public String getGeneId(String identifier) + { + return getGeneId(identifier, null); + } + + /** * Calls the Ensembl lookup REST endpoint and retrieves the 'Parent' for the * given identifier, or null if not found * * @param identifier + * @param objectType + * (optional) * @return */ - public String getGeneId(String identifier) + public String getGeneId(String identifier, String objectType) { List ids = Arrays.asList(new String[] { identifier }); BufferedReader br = null; try { - URL url = getUrl(identifier); + URL url = getUrl(identifier, objectType); if (url != null) { br = getHttpResponse(url, ids); @@ -181,28 +195,19 @@ public class EnsemblLookup extends EnsemblRestClient String type = val.get(OBJECT_TYPE).toString(); if (OBJECT_TYPE_GENE.equalsIgnoreCase(type)) { + // got the gene - just returns its id geneId = val.get(ID).toString(); } else if (OBJECT_TYPE_TRANSCRIPT.equalsIgnoreCase(type)) { + // got the transcript - return its (Gene) Parent geneId = val.get(PARENT).toString(); } else if (OBJECT_TYPE_TRANSLATION.equalsIgnoreCase(type)) { + // got the protein - get its Parent, restricted to type Transcript String transcriptId = val.get(PARENT).toString(); - try - { - geneId = getGeneId(transcriptId); - } catch (StackOverflowError e) - { - /* - * unlikely data condition error! - */ - System.err - .println("** Ensembl lookup " - + getUrl(transcriptId).toString() - + " looping on Parent!"); - } + geneId = getGeneId(transcriptId, OBJECT_TYPE_TRANSCRIPT); } } catch (ParseException e) { diff --git a/src/jalview/ext/ensembl/EnsemblSeqProxy.java b/src/jalview/ext/ensembl/EnsemblSeqProxy.java index 577111e..24d1b95 100644 --- a/src/jalview/ext/ensembl/EnsemblSeqProxy.java +++ b/src/jalview/ext/ensembl/EnsemblSeqProxy.java @@ -48,8 +48,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; -import java.util.Map; -import java.util.Map.Entry; /** * Base class for Ensembl sequence fetchers @@ -61,10 +59,6 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient { private static final String ALLELES = "alleles"; - protected static final String PARENT = "Parent"; - - protected static final String ID = "ID"; - protected static final String NAME = "Name"; protected static final String DESCRIPTION = "description"; @@ -472,14 +466,11 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient urlstring.append("?type=").append(getSourceEnsemblType().getType()); urlstring.append(("&Accept=text/x-fasta")); - Map params = getAdditionalParameters(); - if (params != null) + String objectType = getObjectType(); + if (objectType != null) { - for (Entry entry : params.entrySet()) - { - urlstring.append("&").append(entry.getKey()).append("=") - .append(entry.getValue()); - } + urlstring.append("&").append(OBJECT_TYPE).append("=") + .append(objectType); } URL url = new URL(urlstring.toString()); @@ -487,11 +478,11 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient } /** - * Override this method to add any additional x=y URL parameters needed + * Override this method to specify object_type request parameter * * @return */ - protected Map getAdditionalParameters() + protected String getObjectType() { return null; } @@ -560,7 +551,6 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient protected MapList getGenomicRangesFromFeatures(SequenceI sourceSequence, String accId, int start) { - // SequenceFeature[] sfs = sourceSequence.getSequenceFeatures(); List sfs = sourceSequence.getFeatures() .getPositionalFeatures(); if (sfs.isEmpty()) @@ -572,7 +562,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient * generously initial size for number of cds regions * (worst case titin Q8WZ42 has c. 313 exons) */ - List regions = new ArrayList(100); + List regions = new ArrayList<>(100); int mappedLength = 0; int direction = 1; // forward boolean directionSet = false; @@ -899,7 +889,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient protected List findFeatures(SequenceI sequence, String term, String parentId) { - List result = new ArrayList(); + List result = new ArrayList<>(); List sfs = sequence.getFeatures() .getFeaturesByOntology(term); diff --git a/src/jalview/ext/ensembl/EnsemblSequenceFetcher.java b/src/jalview/ext/ensembl/EnsemblSequenceFetcher.java index 598dba1..c4abb20 100644 --- a/src/jalview/ext/ensembl/EnsemblSequenceFetcher.java +++ b/src/jalview/ext/ensembl/EnsemblSequenceFetcher.java @@ -45,6 +45,18 @@ abstract class EnsemblSequenceFetcher extends DbSourceProxyImpl protected static final String ENSEMBL_REST = "http://rest.ensembl.org"; + protected static final String OBJECT_TYPE_TRANSLATION = "Translation"; + + protected static final String OBJECT_TYPE_TRANSCRIPT = "Transcript"; + + protected static final String OBJECT_TYPE_GENE = "Gene"; + + protected static final String PARENT = "Parent"; + + protected static final String ID = "id"; + + protected static final String OBJECT_TYPE = "object_type"; + /* * possible values for the 'feature' parameter of the /overlap REST service * @see http://rest.ensembl.org/documentation/info/overlap_id diff --git a/src/jalview/ext/ensembl/EnsemblSymbol.java b/src/jalview/ext/ensembl/EnsemblSymbol.java index 75598a0..e3b6c93 100644 --- a/src/jalview/ext/ensembl/EnsemblSymbol.java +++ b/src/jalview/ext/ensembl/EnsemblSymbol.java @@ -44,8 +44,6 @@ public class EnsemblSymbol extends EnsemblXref { private static final String GENE = "gene"; private static final String TYPE = "type"; - private static final String ID = "id"; - /** * Constructor given the target domain to fetch data from * -- 1.7.10.2