X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fext%2Fensembl%2FEnsemblRestClient.java;h=34f8816f9708dff2f15f23eeda9fa23f5f56fd43;hb=a064561d8665ee9db217b17cda826fceac90cbbc;hp=02b13ef6f3f8a85d05ee18a2528bb2d62a7f195d;hpb=f9b80711054b61e8c2257488a1637e15616cb9c9;p=jalview.git diff --git a/src/jalview/ext/ensembl/EnsemblRestClient.java b/src/jalview/ext/ensembl/EnsemblRestClient.java index 02b13ef..34f8816 100644 --- a/src/jalview/ext/ensembl/EnsemblRestClient.java +++ b/src/jalview/ext/ensembl/EnsemblRestClient.java @@ -14,6 +14,8 @@ import java.util.List; import javax.ws.rs.HttpMethod; +import com.stevesoft.pat.Regex; + /** * Base class for Ensembl REST service clients * @@ -21,22 +23,72 @@ import javax.ws.rs.HttpMethod; */ abstract class EnsemblRestClient extends EnsemblSequenceFetcher { - protected final static String ENSEMBL_REST = "http://rest.ensembl.org"; + private final static String ENSEMBL_REST = "http://rest.ensembl.org"; - protected static final String SEQUENCE_ID_URL = ENSEMBL_REST - + "/sequence/id"; + protected final static String ENSEMBL_GENOMES_REST = "http://rest.ensemblgenomes.org"; // @see https://github.com/Ensembl/ensembl-rest/wiki/Output-formats private static final String PING_URL = "http://rest.ensembl.org/info/ping.json"; private final static long RETEST_INTERVAL = 10000L; // 10 seconds + private static final Regex TRANSCRIPT_REGEX = new Regex( + "(ENS)([A-Z]{3}|)T[0-9]{11}$"); + + private static final Regex GENE_REGEX = new Regex( + "(ENS)([A-Z]{3}|)G[0-9]{11}$"); + + private String domain = ENSEMBL_REST; + private static boolean ensemblRestAvailable = false; private static long lastCheck = -1; + /* + * absolute time to wait till if we overloaded the REST service + */ + private static long retryAfter; + protected volatile boolean inProgress = false; + /** + * Default constructor to use rest.ensembl.org + */ + public EnsemblRestClient() + { + this(ENSEMBL_REST); + } + + /** + * Constructor given the target domain to fetch data from + * + * @param d + */ + public EnsemblRestClient(String d) + { + domain = d; + } + + String getDomain() + { + return domain; + } + + void setDomain(String d) + { + domain = d; + } + + public boolean isTranscriptIdentifier(String query) + { + return query == null ? false : TRANSCRIPT_REGEX.search(query); + } + + public boolean isGeneIdentifier(String query) + { + return query == null ? false : GENE_REGEX.search(query); + } + @Override public boolean queryInProgress() { @@ -69,10 +121,12 @@ abstract class EnsemblRestClient extends EnsemblSequenceFetcher /** * Return the desired value for the Content-Type request header * + * @param multipleIds + * * @return * @see https://github.com/Ensembl/ensembl-rest/wiki/HTTP-Headers */ - protected abstract String getRequestMimeType(); + protected abstract String getRequestMimeType(boolean multipleIds); /** * Return the desired value for the Accept request header @@ -92,7 +146,10 @@ abstract class EnsemblRestClient extends EnsemblSequenceFetcher { try { - URL ping = new URL(PING_URL); + // note this format works for both ensembl and ensemblgenomes + // info/ping.json works for ensembl only (March 2016) + URL ping = new URL(getDomain() + + "/info/ping?content-type=application/json"); HttpURLConnection conn = (HttpURLConnection) ping.openConnection(); int rc = conn.getResponseCode(); conn.disconnect(); @@ -120,22 +177,43 @@ abstract class EnsemblRestClient extends EnsemblSequenceFetcher { URL url = getUrl(ids); + BufferedReader reader = getHttpResponse(url, ids); + FileParse fp = new FileParse(reader, url.toString(), "HTTP_POST"); + return fp; + } + + /** + * Writes the HTTP request and gets the response as a reader. + * + * @param url + * @param ids + * written as Json POST body if more than one + * @return + * @throws IOException + * if response code was not 200, or other I/O error + */ + protected BufferedReader getHttpResponse(URL url, List ids) + throws IOException + { + // long now = System.currentTimeMillis(); HttpURLConnection connection = (HttpURLConnection) url.openConnection(); /* * POST method allows multiple queries in one request; it is supported for * sequence queries, but not for overlap */ - connection.setRequestMethod(useGetRequest() ? HttpMethod.GET - : HttpMethod.POST); - connection.setRequestProperty("Content-Type", getRequestMimeType()); + boolean multipleIds = ids.size() > 1;// useGetRequest(); + connection.setRequestMethod(multipleIds ? HttpMethod.POST + : HttpMethod.GET); + connection.setRequestProperty("Content-Type", + getRequestMimeType(multipleIds)); connection.setRequestProperty("Accept", getResponseMimeType()); connection.setUseCaches(false); connection.setDoInput(true); - connection.setDoOutput(true); + connection.setDoOutput(multipleIds); - if (!useGetRequest()) + if (multipleIds) { writePostBody(connection, ids); } @@ -145,18 +223,71 @@ abstract class EnsemblRestClient extends EnsemblSequenceFetcher if (responseCode != 200) { - throw new RuntimeException( + /* + * note: a GET request for an invalid id returns an error code e.g. 415 + * but POST request returns 200 and an empty Fasta response + */ + throw new IOException( "Response code was not 200. Detected response was " + responseCode); } + // System.out.println(getClass().getName() + " took " + // + (System.currentTimeMillis() - now) + "ms to fetch"); + + checkRateLimits(connection); BufferedReader reader = null; reader = new BufferedReader(new InputStreamReader(response, "UTF-8")); - FileParse fp = new FileParse(reader, url.toString(), "HTTP_POST"); - return fp; + return reader; } /** + * Inspect response headers for any sign of server overload and respect any + * 'retry-after' directive + * + * @see https://github.com/Ensembl/ensembl-rest/wiki/Rate-Limits + * @param connection + */ + void checkRateLimits(HttpURLConnection connection) + { + // number of requests allowed per time interval: + String limit = connection.getHeaderField("X-RateLimit-Limit"); + // length of quota time interval in seconds: + // String period = connection.getHeaderField("X-RateLimit-Period"); + // seconds remaining until usage quota is reset: + String reset = connection.getHeaderField("X-RateLimit-Reset"); + // number of requests remaining from quota for current period: + String remaining = connection.getHeaderField("X-RateLimit-Remaining"); + // number of seconds to wait before retrying (if remaining == 0) + String retryDelay = connection.getHeaderField("Retry-After"); + + // to test: + // retryDelay = "5"; + + if (retryDelay != null) + { + System.err.println("Ensembl REST service rate limit exceeded, wait " + + retryDelay + " seconds before retrying"); + try + { + retryAfter = System.currentTimeMillis() + + (1000 * Integer.valueOf(retryDelay)); + } catch (NumberFormatException e) + { + System.err.println("Unexpected value for Retry-After: " + + retryDelay); + } + } + else + { + retryAfter = 0; + // debug: + // System.out.println(String.format( + // "%s Ensembl requests remaining of %s (reset in %ss)", + // remaining, limit, reset)); + } + } + /** * Rechecks if Ensembl is responding, unless the last check was successful and * the retest interval has not yet elapsed. Returns true if Ensembl is up, * else false. @@ -166,6 +297,21 @@ abstract class EnsemblRestClient extends EnsemblSequenceFetcher protected boolean isEnsemblAvailable() { long now = System.currentTimeMillis(); + + /* + * check if we are waiting for 'Retry-After' to expire + */ + if (retryAfter > now) + { + System.err.println("Still " + (1 + (retryAfter - now) / 1000) + + " secs to wait before retrying Ensembl"); + return false; + } + else + { + retryAfter = 0; + } + boolean retest = now - lastCheck > RETEST_INTERVAL; if (ensemblRestAvailable && !retest) {