JAL-4036 New configuration, target and pagination process for 2022-07 new Uniprot API
[jalview.git] / src / jalview / fts / service / uniprot / UniProtFTSRestClient.java
index 250fba0..05ccba7 100644 (file)
  * The Jalview Authors are detailed in the 'AUTHORS' file.
  */
 
-package jalview.fts.service.uniprot;
+/*
+ * 2022-07-20 bsoares
+ * See https://issues.jalview.org/browse/JAL-4036
+ * The new Uniprot API is not dissimilar to the old one, but has some important changes.
+ * Some group names have changed slightly, some old groups have gone and there are quite a few new groups.
+ * 
+ * Most changes are mappings of old column ids to new field ids. There are a handful of old
+ * columns not mapped to new fields, and new fields without an old column.
+ * [aside: not all possible columns were listed in the resources/fts/uniprot_data_columns.txt file.
+ * These were presumably additions after the file was created]
+ * For existing/mapped fields, the same preferences found in the resource file have been migrated to
+ * the new file with the new field name, id and group.
+ * 
+ * The new mapped groups and files are stored and read from resources/fts/uniprot_data_columns-2022.txt.
+ * 
+ * There is now no "sort" query string parameter.
+ * 
+ * See https://www.uniprot.org/help/api_queries
+ * 
+ * SIGNIFICANT CHANGE: Pagination is no longer performed using a record offset, but with a "cursor"
+ * query string parameter that is not really a cursor.  The value is an opaque string that is passed (or
+ * rather a whole URL is passed) in the "Link" header of the HTTP response of the previous page.
+ * Where such a link is passed it is put into the cursors ArrayList.
+ * There are @Overridden methods in UniprotFTSPanel.
+ */
 
-import jalview.fts.api.FTSData;
-import jalview.fts.api.FTSDataColumnI;
-import jalview.fts.api.FTSRestClientI;
-import jalview.fts.core.FTSRestClient;
-import jalview.fts.core.FTSRestRequest;
-import jalview.fts.core.FTSRestResponse;
-import jalview.util.MessageManager;
+package jalview.fts.service.uniprot;
 
+import java.lang.invoke.MethodHandles;
+import java.net.MalformedURLException;
+import java.net.URL;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
@@ -39,57 +60,161 @@ import javax.ws.rs.core.MediaType;
 import com.sun.jersey.api.client.Client;
 import com.sun.jersey.api.client.ClientResponse;
 import com.sun.jersey.api.client.WebResource;
-import com.sun.jersey.api.client.config.ClientConfig;
 import com.sun.jersey.api.client.config.DefaultClientConfig;
 
+import jalview.bin.Cache;
+import jalview.bin.Console;
+import jalview.fts.api.FTSData;
+import jalview.fts.api.FTSDataColumnI;
+import jalview.fts.core.FTSRestClient;
+import jalview.fts.core.FTSRestRequest;
+import jalview.fts.core.FTSRestResponse;
+import jalview.util.ChannelProperties;
+import jalview.util.MessageManager;
+import jalview.util.Platform;
+
 public class UniProtFTSRestClient extends FTSRestClient
 {
-  private static FTSRestClientI instance = null;
+  private static final String DEFAULT_UNIPROT_DOMAIN = "https://rest.uniprot.org";
 
-  public static final String UNIPROT_SEARCH_ENDPOINT = "http://www.uniprot.org/uniprot/?";
+  private static final String USER_AGENT = ChannelProperties
+          .getProperty("app_name", "Jalview") + " "
+          + Cache.getDefault("VERSION", "Unknown") + " "
+          + MethodHandles.lookup().lookupClass() + " help@jalview.org";
+
+  static
+  {
+    Platform.addJ2SDirectDatabaseCall(DEFAULT_UNIPROT_DOMAIN);
+  }
+
+  private static UniProtFTSRestClient instance = null;
+
+  public final String uniprotSearchEndpoint;
+
+  public UniProtFTSRestClient()
+  {
+    super();
+    this.clearCursors();
+    uniprotSearchEndpoint = Cache.getDefault("UNIPROT_DOMAIN",
+            DEFAULT_UNIPROT_DOMAIN) + "/uniprotkb/search";
+  }
 
+  @SuppressWarnings("unchecked")
   @Override
-  public FTSRestResponse executeRequest(FTSRestRequest uniportRestRequest)
+  public FTSRestResponse executeRequest(FTSRestRequest uniprotRestRequest)
           throws Exception
   {
+    return executeRequest(uniprotRestRequest, null);
+  }
+
+  public FTSRestResponse executeRequest(FTSRestRequest uniprotRestRequest,
+          String cursor) throws Exception
+  {
     try
     {
-      ClientConfig clientConfig = new DefaultClientConfig();
-      Client client = Client.create(clientConfig);
-
       String wantedFields = getDataColumnsFieldsAsCommaDelimitedString(
-              uniportRestRequest.getWantedFields());
-      int responseSize = (uniportRestRequest.getResponseSize() == 0)
+              uniprotRestRequest.getWantedFields());
+      int responseSize = (uniprotRestRequest.getResponseSize() == 0)
               ? getDefaultResponsePageSize()
-              : uniportRestRequest.getResponseSize();
+              : uniprotRestRequest.getResponseSize();
 
-      int offSet = uniportRestRequest.getOffSet();
+      int offSet = uniprotRestRequest.getOffSet();
       String query;
-      if (isAdvancedQuery(uniportRestRequest.getSearchTerm()))
+      if (isAdvancedQuery(uniprotRestRequest.getSearchTerm()))
       {
-        query = uniportRestRequest.getSearchTerm();
+        query = uniprotRestRequest.getSearchTerm();
       }
       else
       {
-        query = uniportRestRequest.getFieldToSearchBy()
-                .equalsIgnoreCase("Search All")
-                        ? uniportRestRequest.getSearchTerm()
-                                + " or mnemonic:"
-                                + uniportRestRequest.getSearchTerm()
-                        : uniportRestRequest.getFieldToSearchBy() + ":"
-                                + uniportRestRequest.getSearchTerm();
+        query = uniprotRestRequest.getFieldToSearchBy().equalsIgnoreCase(
+                "Search All") ? uniprotRestRequest.getSearchTerm()
+                        // + " or mnemonic:"
+                        // + uniprotRestRequest.getSearchTerm()
+                        : uniprotRestRequest.getFieldToSearchBy() + ":"
+                                + uniprotRestRequest.getSearchTerm();
+      }
+
+      // BH 2018 the trick here is to coerce the classes in Javascript to be
+      // different from the ones in Java yet still allow this to be correct for
+      // Java
+      Client client;
+      Class<ClientResponse> clientResponseClass;
+      if (Platform.isJS())
+      {
+        // JavaScript only -- coerce types to Java types for Java
+        client = (Client) (Object) new jalview.javascript.web.Client();
+        clientResponseClass = (Class<ClientResponse>) (Object) jalview.javascript.web.ClientResponse.class;
+      }
+      else
+      /**
+       * Java only
+       * 
+       * @j2sIgnore
+       */
+      {
+        // Java only
+        client = Client.create(new DefaultClientConfig());
+        clientResponseClass = ClientResponse.class;
       }
 
       WebResource webResource = null;
-      webResource = client.resource(UNIPROT_SEARCH_ENDPOINT)
-              .queryParam("format", "tab")
-              .queryParam("columns", wantedFields)
-              .queryParam("limit", String.valueOf(responseSize))
-              .queryParam("offset", String.valueOf(offSet))
-              .queryParam("sort", "score").queryParam("query", query);
+      webResource = client.resource(uniprotSearchEndpoint)
+              .queryParam("format", "tsv")
+              .queryParam("fields", wantedFields)
+              .queryParam("size", String.valueOf(responseSize))
+              /* 2022 new api has no "sort"
+               * .queryParam("sort", "score")
+               */
+              .queryParam("query", query);
+      if (offSet != 0 && cursor != null && cursor.length() > 0)
+      // 2022 new api does not do pagination with an offset, it requires a
+      // "cursor" parameter with a key (given for the next page).
+      // (see https://www.uniprot.org/help/pagination)
+      {
+        webResource = webResource.queryParam("cursor", cursor);
+      }
+      Console.debug(
+              "Uniprot FTS Request: " + webResource.getURI().toString());
       // Execute the REST request
-      ClientResponse clientResponse = webResource
-              .accept(MediaType.TEXT_PLAIN).get(ClientResponse.class);
+      WebResource.Builder wrBuilder = webResource
+              .accept(MediaType.TEXT_PLAIN);
+      if (!Platform.isJS())
+      /**
+       * Java only
+       * 
+       * @j2sIgnore
+       */
+      {
+        wrBuilder.header("User-Agent", USER_AGENT);
+      }
+      ClientResponse clientResponse = wrBuilder.get(clientResponseClass);
+
+      if (!Platform.isJS())
+      /**
+       * Java only
+       * 
+       * @j2sIgnore
+       */
+      {
+        if (clientResponse.getHeaders().containsKey("Link"))
+        {
+          // extract the URL from the 'Link: <URL>; ref="stuff"' header
+          String linkHeader = clientResponse.getHeaders().get("Link")
+                  .get(0);
+          if (linkHeader.indexOf("<") > -1)
+          {
+            String temp = linkHeader.substring(linkHeader.indexOf("<") + 1);
+            if (temp.indexOf(">") > -1)
+            {
+              String nextUrl = temp.substring(0, temp.indexOf(">"));
+              // then get the cursor value from the query string parameters
+              String nextCursor = getQueryParam("cursor", nextUrl);
+              setCursor(cursorPage + 1, nextCursor);
+            }
+          }
+        }
+      }
+
       String uniProtTabDelimittedResponseString = clientResponse
               .getEntity(String.class);
       // Make redundant objects eligible for garbage collection to conserve
@@ -103,14 +228,26 @@ public class UniProtFTSRestClient extends FTSRestClient
         throw new Exception(errorMessage);
 
       }
-      int xTotalResults = Integer.valueOf(
-              clientResponse.getHeaders().get("X-Total-Results").get(0));
+      // new Uniprot API is not including a "X-Total-Results" header when there
+      // are 0 results
+      List<String> resultsHeaders = clientResponse.getHeaders()
+              .get("X-Total-Results");
+      int xTotalResults = 0;
+      if (Platform.isJS())
+      {
+        xTotalResults = 1;
+      }
+      else if (resultsHeaders != null && resultsHeaders.size() >= 1)
+      {
+        xTotalResults = Integer.valueOf(resultsHeaders.get(0));
+      }
       clientResponse = null;
       client = null;
       return parseUniprotResponse(uniProtTabDelimittedResponseString,
-              uniportRestRequest, xTotalResults);
+              uniprotRestRequest, xTotalResults);
     } catch (Exception e)
     {
+      Console.debug("Exception caught from response", e);
       String exceptionMsg = e.getMessage();
       if (exceptionMsg.contains("SocketException"))
       {
@@ -158,7 +295,7 @@ public class UniProtFTSRestClient extends FTSRestClient
     String[] foundDataRow = uniProtTabDelimittedResponseString.split("\n");
     if (foundDataRow != null && foundDataRow.length > 0)
     {
-      result = new ArrayList<FTSData>();
+      result = new ArrayList<>();
       boolean firstRow = true;
       for (String dataRow : foundDataRow)
       {
@@ -178,38 +315,39 @@ public class UniProtFTSRestClient extends FTSRestClient
     return searchResult;
   }
 
-  /**
-   * Takes a collection of FTSDataColumnI and converts its 'code' values into a
-   * tab delimited string.
-   * 
-   * @param dataColumnFields
-   *          the collection of FTSDataColumnI to process
-   * @return the generated comma delimited string from the supplied
-   *         FTSDataColumnI collection
-   */
-  private String getDataColumnsFieldsAsTabDelimitedString(
-          Collection<FTSDataColumnI> dataColumnFields)
-  {
-    String result = "";
-    if (dataColumnFields != null && !dataColumnFields.isEmpty())
-    {
-      StringBuilder returnedFields = new StringBuilder();
-      for (FTSDataColumnI field : dataColumnFields)
-      {
-        if (field.getName().equalsIgnoreCase("Uniprot Id"))
-        {
-          returnedFields.append("\t").append("Entry");
-        }
-        else
-        {
-          returnedFields.append("\t").append(field.getName());
-        }
-      }
-      returnedFields.deleteCharAt(0);
-      result = returnedFields.toString();
-    }
-    return result;
-  }
+  // /**
+  // * Takes a collection of FTSDataColumnI and converts its 'code' values into
+  // a
+  // * tab delimited string.
+  // *
+  // * @param dataColumnFields
+  // * the collection of FTSDataColumnI to process
+  // * @return the generated comma delimited string from the supplied
+  // * FTSDataColumnI collection
+  // */
+  // private String getDataColumnsFieldsAsTabDelimitedString(
+  // Collection<FTSDataColumnI> dataColumnFields)
+  // {
+  // String result = "";
+  // if (dataColumnFields != null && !dataColumnFields.isEmpty())
+  // {
+  // StringBuilder returnedFields = new StringBuilder();
+  // for (FTSDataColumnI field : dataColumnFields)
+  // {
+  // if (field.getName().equalsIgnoreCase("Uniprot Id"))
+  // {
+  // returnedFields.append("\t").append("Entry");
+  // }
+  // else
+  // {
+  // returnedFields.append("\t").append(field.getName());
+  // }
+  // }
+  // returnedFields.deleteCharAt(0);
+  // result = returnedFields.toString();
+  // }
+  // return result;
+  // }
 
   public static FTSData getFTSData(String tabDelimittedDataStr,
           FTSRestRequest request)
@@ -309,7 +447,7 @@ public class UniProtFTSRestClient extends FTSRestClient
     };
   }
 
-  public static FTSRestClientI getInstance()
+  public static UniProtFTSRestClient getInstance()
   {
     if (instance == null)
     {
@@ -321,7 +459,95 @@ public class UniProtFTSRestClient extends FTSRestClient
   @Override
   public String getColumnDataConfigFileName()
   {
-    return "/fts/uniprot_data_columns.txt";
+    return "/fts/uniprot_data_columns-2022.txt";
+  }
+
+  /* 2022-07-20 bsoares
+   * used for the new API "cursor" pagination. See https://www.uniprot.org/help/pagination
+   */
+  private ArrayList<String> cursors;
+
+  private int cursorPage = 0;
+
+  protected int getCursorPage()
+  {
+    return cursorPage;
+  }
+
+  protected void setCursorPage(int i)
+  {
+    cursorPage = i;
+  }
+
+  protected void setPrevCursorPage()
+  {
+    if (cursorPage > 0)
+      cursorPage--;
+  }
+
+  protected void setNextCursorPage()
+  {
+    cursorPage++;
+  }
+
+  protected void clearCursors()
+  {
+    cursors = new ArrayList(10);
+  }
+
+  protected String getCursor(int i)
+  {
+    return cursors.get(i);
   }
 
-}
+  protected String getNextCursor()
+  {
+    if (cursors.size() < cursorPage + 2)
+      return null;
+    return cursors.get(cursorPage + 1);
+  }
+
+  protected String getPrevCursor()
+  {
+    if (cursorPage == 0)
+      return null;
+    return cursors.get(cursorPage - 1);
+  }
+
+  protected void setCursor(int i, String c)
+  {
+    cursors.ensureCapacity(i + 1);
+    while (cursors.size() <= i)
+    {
+      cursors.add(null);
+    }
+    cursors.set(i, c);
+    Console.debug(
+            "Set UniprotFRSRestClient cursors[" + i + "] to '" + c + "'");
+    // cursors.add(c);
+  }
+
+  public static String getQueryParam(String param, String u)
+  {
+    if (param == null || u == null)
+      return null;
+    try
+    {
+      URL url = new URL(u);
+      String[] kevs = url.getQuery().split("&");
+      for (int j = 0; j < kevs.length; j++)
+      {
+        String[] kev = kevs[j].split("=", 2);
+        if (param.equals(kev[0]))
+        {
+          return kev[1];
+        }
+      }
+    } catch (MalformedURLException e)
+    {
+      // TODO Auto-generated catch block
+      e.printStackTrace();
+    }
+    return null;
+  }
+}
\ No newline at end of file