JAL-1705 fetch Uniprot and PDB xrefs for Ensembl protein products
authorgmungoc <g.m.carstairs@dundee.ac.uk>
Fri, 29 Jan 2016 16:11:36 +0000 (16:11 +0000)
committergmungoc <g.m.carstairs@dundee.ac.uk>
Fri, 29 Jan 2016 16:11:36 +0000 (16:11 +0000)
src/jalview/ext/ensembl/EnsemblRestClient.java
src/jalview/ext/ensembl/EnsemblSeqProxy.java
src/jalview/ext/ensembl/EnsemblXref.java [new file with mode: 0644]
src/jalview/util/DBRefUtils.java

index f81bce2..2fd7fa3 100644 (file)
@@ -122,6 +122,24 @@ abstract class EnsemblRestClient extends EnsemblSequenceFetcher
   {
     URL url = getUrl(ids);
   
+    BufferedReader reader = getHttpResponse(url, ids);
+    FileParse fp = new FileParse(reader, url.toString(), "HTTP_POST");
+    return fp;
+  }
+
+  /**
+   * Writes the HTTP request and gets the response as a reader.
+   * 
+   * @param url
+   * @param ids
+   *          written as Json POST body if more than one
+   * @return
+   * @throws IOException
+   *           if response code was not 200, or other I/O error
+   */
+  protected BufferedReader getHttpResponse(URL url, List<String> ids)
+          throws IOException
+  {
     HttpURLConnection connection = (HttpURLConnection) url.openConnection();
   
     /*
@@ -153,15 +171,14 @@ abstract class EnsemblRestClient extends EnsemblSequenceFetcher
        * note: a GET request for an invalid id returns an error code e.g. 415
        * but POST request returns 200 and an empty Fasta response 
        */
-      throw new RuntimeException(
+      throw new IOException(
               "Response code was not 200. Detected response was "
                       + responseCode);
     }
   
     BufferedReader reader = null;
     reader = new BufferedReader(new InputStreamReader(response, "UTF-8"));
-    FileParse fp = new FileParse(reader, url.toString(), "HTTP_POST");
-    return fp;
+    return reader;
   }
 
   /**
index cbeaae9..8698b78 100644 (file)
@@ -265,6 +265,8 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
       proteinSeq.createDatasetSequence();
       querySeq.createDatasetSequence();
 
+      getProteinCrossReferences(proteinSeq);
+
       MapList mapList = mapCdsToProtein(querySeq, proteinSeq);
       if (mapList != null)
       {
@@ -288,6 +290,29 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
   }
 
   /**
+   * Get Uniprot and PDB xrefs from Ensembl, and attach them to the protein
+   * sequence
+   * 
+   * @param proteinSeq
+   */
+  protected void getProteinCrossReferences(SequenceI proteinSeq)
+  {
+    while (proteinSeq.getDatasetSequence() != null)
+    {
+      proteinSeq = proteinSeq.getDatasetSequence();
+    }
+
+    EnsemblXref xrefFetcher = new EnsemblXref();
+    List<DBRefEntry> xrefs = xrefFetcher.getCrossReferences(
+            proteinSeq.getName(), "PDB", "Uniprot/SPTREMBL",
+            "Uniprot/SWISSPROT");
+    for (DBRefEntry xref : xrefs)
+    {
+      proteinSeq.addDBRef(xref);
+    }
+  }
+
+  /**
    * Returns a mapping from dna to protein by inspecting sequence features of
    * type "CDS" on the dna.
    * 
diff --git a/src/jalview/ext/ensembl/EnsemblXref.java b/src/jalview/ext/ensembl/EnsemblXref.java
new file mode 100644 (file)
index 0000000..6a4f369
--- /dev/null
@@ -0,0 +1,165 @@
+package jalview.ext.ensembl;
+
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.DBRefEntry;
+import jalview.util.DBRefUtils;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.json.simple.JSONArray;
+import org.json.simple.JSONObject;
+import org.json.simple.parser.JSONParser;
+import org.json.simple.parser.ParseException;
+
+public class EnsemblXref extends EnsemblRestClient
+{
+
+  @Override
+  public String getDbName()
+  {
+    return "ENSEMBL (xref)";
+  }
+
+  @Override
+  public AlignmentI getSequenceRecords(String queries) throws Exception
+  {
+    return null;
+  }
+
+  @Override
+  protected URL getUrl(List<String> ids) throws MalformedURLException
+  {
+    // TODO Auto-generated method stub
+    return null;
+  }
+
+  @Override
+  protected boolean useGetRequest()
+  {
+    return true;
+  }
+
+  @Override
+  protected String getRequestMimeType(boolean multipleIds)
+  {
+    return "application/json";
+  }
+
+  @Override
+  protected String getResponseMimeType()
+  {
+    return "application/json";
+  }
+
+  /**
+   * Calls the Ensembl xrefs REST endpoint and retrieves any cross-references
+   * ("primary_id") for the given identifier (Ensembl accession id) and database
+   * name. The "dbname" returned by Ensembl is canonicalised to Jalview's
+   * standard version, and a DBRefEntry constructed.
+   * 
+   * @param identifier
+   * @param database
+   * @return
+   */
+  public List<DBRefEntry> getCrossReferences(String identifier,
+          String... database)
+  {
+    List<DBRefEntry> result = new ArrayList<DBRefEntry>();
+    List<String> ids = new ArrayList<String>();
+    ids.add(identifier);
+
+    BufferedReader br = null;
+    try
+    {
+      for (String db : database)
+      {
+        URL url = getUrl(identifier, db);
+        if (url != null)
+        {
+          br = getHttpResponse(url, ids);
+        }
+        for (DBRefEntry xref : parseResponse(br))
+        {
+          if (!result.contains(xref))
+          {
+            result.add(xref);
+          }
+        }
+        br.close();
+      }
+    } catch (IOException e)
+    {
+      // ignore
+    } finally
+    {
+      if (br != null)
+      {
+        try
+        {
+          br.close();
+        } catch (IOException e)
+        {
+          // ignore
+        }
+      }
+    }
+
+    return result;
+  }
+
+  /**
+   * Parses "primary_id" and "dbname" values from the JSON response and returns
+   * a list of DBRefEntry constructed.
+   * 
+   * @param br
+   * @return
+   * @throws IOException
+   */
+  protected List<DBRefEntry> parseResponse(BufferedReader br)
+          throws IOException
+  {
+    JSONParser jp = new JSONParser();
+    List<DBRefEntry> result = new ArrayList<DBRefEntry>();
+    try
+    {
+      JSONArray responses = (JSONArray) jp.parse(br);
+      Iterator rvals = responses.iterator();
+      while (rvals.hasNext())
+      {
+        JSONObject val = (JSONObject) rvals.next();
+        String dbName = val.get("dbname").toString();
+        String id = val.get("primary_id").toString();
+        if (dbName != null && id != null)
+        {
+          dbName = DBRefUtils.getCanonicalName(dbName);
+          DBRefEntry dbref = new DBRefEntry(dbName, "0", id);
+          result.add(dbref);
+        }
+      }
+    } catch (ParseException e)
+    {
+      // ignore
+    }
+    return result;
+  }
+
+  protected URL getUrl(String identifier, String db)
+  {
+    String url = ENSEMBL_REST + "/xrefs/id/" + identifier
+            + "?content-type=application/json&external_db=" + db;
+    try
+    {
+      return new URL(url);
+    } catch (MalformedURLException e)
+    {
+      return null;
+    }
+  }
+
+}
index c85a489..e7053ed 100755 (executable)
@@ -52,6 +52,11 @@ public class DBRefUtils
     canonicalSourceNameLookup.put("uniprotkb/swiss-prot",
             DBRefSource.UNIPROT);
     canonicalSourceNameLookup.put("uniprotkb/trembl", DBRefSource.UNIPROT);
+
+    // Ensembl values for dbname in xref REST service:
+    canonicalSourceNameLookup.put("uniprot/sptrembl", DBRefSource.UNIPROT);
+    canonicalSourceNameLookup.put("uniprot/swissprot", DBRefSource.UNIPROT);
+
     canonicalSourceNameLookup.put("pdb", DBRefSource.PDB);
     canonicalSourceNameLookup.put("ensembl", DBRefSource.ENSEMBL);