From a9f0472fe6fd4737b47d7955d198e76923e6aabc Mon Sep 17 00:00:00 2001 From: gmungoc Date: Fri, 29 Jan 2016 16:11:36 +0000 Subject: [PATCH] JAL-1705 fetch Uniprot and PDB xrefs for Ensembl protein products --- src/jalview/ext/ensembl/EnsemblRestClient.java | 23 +++- src/jalview/ext/ensembl/EnsemblSeqProxy.java | 25 ++++ src/jalview/ext/ensembl/EnsemblXref.java | 165 ++++++++++++++++++++++++ src/jalview/util/DBRefUtils.java | 5 + 4 files changed, 215 insertions(+), 3 deletions(-) create mode 100644 src/jalview/ext/ensembl/EnsemblXref.java diff --git a/src/jalview/ext/ensembl/EnsemblRestClient.java b/src/jalview/ext/ensembl/EnsemblRestClient.java index f81bce2..2fd7fa3 100644 --- a/src/jalview/ext/ensembl/EnsemblRestClient.java +++ b/src/jalview/ext/ensembl/EnsemblRestClient.java @@ -122,6 +122,24 @@ abstract class EnsemblRestClient extends EnsemblSequenceFetcher { URL url = getUrl(ids); + BufferedReader reader = getHttpResponse(url, ids); + FileParse fp = new FileParse(reader, url.toString(), "HTTP_POST"); + return fp; + } + + /** + * Writes the HTTP request and gets the response as a reader. + * + * @param url + * @param ids + * written as Json POST body if more than one + * @return + * @throws IOException + * if response code was not 200, or other I/O error + */ + protected BufferedReader getHttpResponse(URL url, List ids) + throws IOException + { HttpURLConnection connection = (HttpURLConnection) url.openConnection(); /* @@ -153,15 +171,14 @@ abstract class EnsemblRestClient extends EnsemblSequenceFetcher * note: a GET request for an invalid id returns an error code e.g. 415 * but POST request returns 200 and an empty Fasta response */ - throw new RuntimeException( + throw new IOException( "Response code was not 200. Detected response was " + responseCode); } BufferedReader reader = null; reader = new BufferedReader(new InputStreamReader(response, "UTF-8")); - FileParse fp = new FileParse(reader, url.toString(), "HTTP_POST"); - return fp; + return reader; } /** diff --git a/src/jalview/ext/ensembl/EnsemblSeqProxy.java b/src/jalview/ext/ensembl/EnsemblSeqProxy.java index cbeaae9..8698b78 100644 --- a/src/jalview/ext/ensembl/EnsemblSeqProxy.java +++ b/src/jalview/ext/ensembl/EnsemblSeqProxy.java @@ -265,6 +265,8 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient proteinSeq.createDatasetSequence(); querySeq.createDatasetSequence(); + getProteinCrossReferences(proteinSeq); + MapList mapList = mapCdsToProtein(querySeq, proteinSeq); if (mapList != null) { @@ -288,6 +290,29 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient } /** + * Get Uniprot and PDB xrefs from Ensembl, and attach them to the protein + * sequence + * + * @param proteinSeq + */ + protected void getProteinCrossReferences(SequenceI proteinSeq) + { + while (proteinSeq.getDatasetSequence() != null) + { + proteinSeq = proteinSeq.getDatasetSequence(); + } + + EnsemblXref xrefFetcher = new EnsemblXref(); + List xrefs = xrefFetcher.getCrossReferences( + proteinSeq.getName(), "PDB", "Uniprot/SPTREMBL", + "Uniprot/SWISSPROT"); + for (DBRefEntry xref : xrefs) + { + proteinSeq.addDBRef(xref); + } + } + + /** * Returns a mapping from dna to protein by inspecting sequence features of * type "CDS" on the dna. * diff --git a/src/jalview/ext/ensembl/EnsemblXref.java b/src/jalview/ext/ensembl/EnsemblXref.java new file mode 100644 index 0000000..6a4f369 --- /dev/null +++ b/src/jalview/ext/ensembl/EnsemblXref.java @@ -0,0 +1,165 @@ +package jalview.ext.ensembl; + +import jalview.datamodel.AlignmentI; +import jalview.datamodel.DBRefEntry; +import jalview.util.DBRefUtils; + +import java.io.BufferedReader; +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import org.json.simple.JSONArray; +import org.json.simple.JSONObject; +import org.json.simple.parser.JSONParser; +import org.json.simple.parser.ParseException; + +public class EnsemblXref extends EnsemblRestClient +{ + + @Override + public String getDbName() + { + return "ENSEMBL (xref)"; + } + + @Override + public AlignmentI getSequenceRecords(String queries) throws Exception + { + return null; + } + + @Override + protected URL getUrl(List ids) throws MalformedURLException + { + // TODO Auto-generated method stub + return null; + } + + @Override + protected boolean useGetRequest() + { + return true; + } + + @Override + protected String getRequestMimeType(boolean multipleIds) + { + return "application/json"; + } + + @Override + protected String getResponseMimeType() + { + return "application/json"; + } + + /** + * Calls the Ensembl xrefs REST endpoint and retrieves any cross-references + * ("primary_id") for the given identifier (Ensembl accession id) and database + * name. The "dbname" returned by Ensembl is canonicalised to Jalview's + * standard version, and a DBRefEntry constructed. + * + * @param identifier + * @param database + * @return + */ + public List getCrossReferences(String identifier, + String... database) + { + List result = new ArrayList(); + List ids = new ArrayList(); + ids.add(identifier); + + BufferedReader br = null; + try + { + for (String db : database) + { + URL url = getUrl(identifier, db); + if (url != null) + { + br = getHttpResponse(url, ids); + } + for (DBRefEntry xref : parseResponse(br)) + { + if (!result.contains(xref)) + { + result.add(xref); + } + } + br.close(); + } + } catch (IOException e) + { + // ignore + } finally + { + if (br != null) + { + try + { + br.close(); + } catch (IOException e) + { + // ignore + } + } + } + + return result; + } + + /** + * Parses "primary_id" and "dbname" values from the JSON response and returns + * a list of DBRefEntry constructed. + * + * @param br + * @return + * @throws IOException + */ + protected List parseResponse(BufferedReader br) + throws IOException + { + JSONParser jp = new JSONParser(); + List result = new ArrayList(); + try + { + JSONArray responses = (JSONArray) jp.parse(br); + Iterator rvals = responses.iterator(); + while (rvals.hasNext()) + { + JSONObject val = (JSONObject) rvals.next(); + String dbName = val.get("dbname").toString(); + String id = val.get("primary_id").toString(); + if (dbName != null && id != null) + { + dbName = DBRefUtils.getCanonicalName(dbName); + DBRefEntry dbref = new DBRefEntry(dbName, "0", id); + result.add(dbref); + } + } + } catch (ParseException e) + { + // ignore + } + return result; + } + + protected URL getUrl(String identifier, String db) + { + String url = ENSEMBL_REST + "/xrefs/id/" + identifier + + "?content-type=application/json&external_db=" + db; + try + { + return new URL(url); + } catch (MalformedURLException e) + { + return null; + } + } + +} diff --git a/src/jalview/util/DBRefUtils.java b/src/jalview/util/DBRefUtils.java index c85a489..e7053ed 100755 --- a/src/jalview/util/DBRefUtils.java +++ b/src/jalview/util/DBRefUtils.java @@ -52,6 +52,11 @@ public class DBRefUtils canonicalSourceNameLookup.put("uniprotkb/swiss-prot", DBRefSource.UNIPROT); canonicalSourceNameLookup.put("uniprotkb/trembl", DBRefSource.UNIPROT); + + // Ensembl values for dbname in xref REST service: + canonicalSourceNameLookup.put("uniprot/sptrembl", DBRefSource.UNIPROT); + canonicalSourceNameLookup.put("uniprot/swissprot", DBRefSource.UNIPROT); + canonicalSourceNameLookup.put("pdb", DBRefSource.PDB); canonicalSourceNameLookup.put("ensembl", DBRefSource.ENSEMBL); -- 1.7.10.2