package jalview.ext.ensembl; import jalview.io.FileParse; import java.io.BufferedReader; import java.io.DataOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; import java.util.List; import javax.ws.rs.HttpMethod; import com.stevesoft.pat.Regex; /** * Base class for Ensembl REST service clients * * @author gmcarstairs */ abstract class EnsemblRestClient extends EnsemblSequenceFetcher { private final static String ENSEMBL_REST = "http://rest.ensembl.org"; protected final static String ENSEMBL_GENOMES_REST = "http://rest.ensemblgenomes.org"; // @see https://github.com/Ensembl/ensembl-rest/wiki/Output-formats private static final String PING_URL = "http://rest.ensembl.org/info/ping.json"; private final static long RETEST_INTERVAL = 10000L; // 10 seconds private static final Regex TRANSCRIPT_REGEX = new Regex( "(ENS)([A-Z]{3}|)T[0-9]{11}$"); private static final Regex GENE_REGEX = new Regex( "(ENS)([A-Z]{3}|)G[0-9]{11}$"); private String domain = ENSEMBL_REST; private static boolean ensemblRestAvailable = false; private static long lastCheck = -1; /* * absolute time to wait till if we overloaded the REST service */ private static long retryAfter; protected volatile boolean inProgress = false; /** * Default constructor to use rest.ensembl.org */ public EnsemblRestClient() { this(ENSEMBL_REST); } /** * Constructor given the target domain to fetch data from * * @param d */ public EnsemblRestClient(String d) { domain = d; } String getDomain() { return domain; } void setDomain(String d) { domain = d; } public boolean isTranscriptIdentifier(String query) { return query == null ? false : TRANSCRIPT_REGEX.search(query); } public boolean isGeneIdentifier(String query) { return query == null ? false : GENE_REGEX.search(query); } @Override public boolean queryInProgress() { return inProgress; } @Override public StringBuffer getRawRecords() { return null; } /** * Returns the URL for the client http request * * @param ids * @return * @throws MalformedURLException */ protected abstract URL getUrl(List ids) throws MalformedURLException; /** * Returns true if client uses GET method, false if it uses POST * * @return */ protected abstract boolean useGetRequest(); /** * Return the desired value for the Content-Type request header * * @param multipleIds * * @return * @see https://github.com/Ensembl/ensembl-rest/wiki/HTTP-Headers */ protected abstract String getRequestMimeType(boolean multipleIds); /** * Return the desired value for the Accept request header * * @return * @see https://github.com/Ensembl/ensembl-rest/wiki/HTTP-Headers */ protected abstract String getResponseMimeType(); /** * Tries to connect to Ensembl's REST 'ping' endpoint, and returns true if * successful, else false * * @return */ private boolean checkEnsembl() { try { // note this format works for both ensembl and ensemblgenomes // info/ping.json works for ensembl only (March 2016) URL ping = new URL(getDomain() + "/info/ping?content-type=application/json"); HttpURLConnection conn = (HttpURLConnection) ping.openConnection(); int rc = conn.getResponseCode(); conn.disconnect(); if (rc >= 200 && rc < 300) { return true; } } catch (Throwable t) { System.err.println("Error connecting to " + PING_URL + ": " + t.getMessage()); } return false; } /** * returns a reader to a Fasta response from the Ensembl sequence endpoint * * @param ids * @return * @throws IOException */ protected FileParse getSequenceReader(List ids) throws IOException { URL url = getUrl(ids); BufferedReader reader = getHttpResponse(url, ids); FileParse fp = new FileParse(reader, url.toString(), "HTTP_POST"); return fp; } /** * Writes the HTTP request and gets the response as a reader. * * @param url * @param ids * written as Json POST body if more than one * @return * @throws IOException * if response code was not 200, or other I/O error */ protected BufferedReader getHttpResponse(URL url, List ids) throws IOException { // long now = System.currentTimeMillis(); HttpURLConnection connection = (HttpURLConnection) url.openConnection(); /* * POST method allows multiple queries in one request; it is supported for * sequence queries, but not for overlap */ boolean multipleIds = ids.size() > 1;// useGetRequest(); connection.setRequestMethod(multipleIds ? HttpMethod.POST : HttpMethod.GET); connection.setRequestProperty("Content-Type", getRequestMimeType(multipleIds)); connection.setRequestProperty("Accept", getResponseMimeType()); connection.setUseCaches(false); connection.setDoInput(true); connection.setDoOutput(multipleIds); if (multipleIds) { writePostBody(connection, ids); } InputStream response = connection.getInputStream(); int responseCode = connection.getResponseCode(); if (responseCode != 200) { /* * note: a GET request for an invalid id returns an error code e.g. 415 * but POST request returns 200 and an empty Fasta response */ throw new IOException( "Response code was not 200. Detected response was " + responseCode); } // System.out.println(getClass().getName() + " took " // + (System.currentTimeMillis() - now) + "ms to fetch"); checkRateLimits(connection); BufferedReader reader = null; reader = new BufferedReader(new InputStreamReader(response, "UTF-8")); return reader; } /** * Inspect response headers for any sign of server overload and respect any * 'retry-after' directive * * @see https://github.com/Ensembl/ensembl-rest/wiki/Rate-Limits * @param connection */ void checkRateLimits(HttpURLConnection connection) { // number of requests allowed per time interval: String limit = connection.getHeaderField("X-RateLimit-Limit"); // length of quota time interval in seconds: // String period = connection.getHeaderField("X-RateLimit-Period"); // seconds remaining until usage quota is reset: String reset = connection.getHeaderField("X-RateLimit-Reset"); // number of requests remaining from quota for current period: String remaining = connection.getHeaderField("X-RateLimit-Remaining"); // number of seconds to wait before retrying (if remaining == 0) String retryDelay = connection.getHeaderField("Retry-After"); // to test: // retryDelay = "5"; if (retryDelay != null) { System.err.println("Ensembl REST service rate limit exceeded, wait " + retryDelay + " seconds before retrying"); try { retryAfter = System.currentTimeMillis() + (1000 * Integer.valueOf(retryDelay)); } catch (NumberFormatException e) { System.err.println("Unexpected value for Retry-After: " + retryDelay); } } else { retryAfter = 0; // debug: // System.out.println(String.format( // "%s Ensembl requests remaining of %s (reset in %ss)", // remaining, limit, reset)); } } /** * Rechecks if Ensembl is responding, unless the last check was successful and * the retest interval has not yet elapsed. Returns true if Ensembl is up, * else false. * * @return */ protected boolean isEnsemblAvailable() { long now = System.currentTimeMillis(); /* * check if we are waiting for 'Retry-After' to expire */ if (retryAfter > now) { System.err.println("Still " + (1 + (retryAfter - now) / 1000) + " secs to wait before retrying Ensembl"); return false; } else { retryAfter = 0; } boolean retest = now - lastCheck > RETEST_INTERVAL; if (ensemblRestAvailable && !retest) { return true; } ensemblRestAvailable = checkEnsembl(); lastCheck = now; return ensemblRestAvailable; } /** * Constructs, writes and flushes the POST body of the request, containing the * query ids in JSON format * * @param connection * @param ids * @throws IOException */ protected void writePostBody(HttpURLConnection connection, List ids) throws IOException { boolean first; StringBuilder postBody = new StringBuilder(64); postBody.append("{\"ids\":["); first = true; for (String id : ids) { if (!first) { postBody.append(","); } first = false; postBody.append("\""); postBody.append(id.trim()); postBody.append("\""); } postBody.append("]}"); byte[] thepostbody = postBody.toString().getBytes(); connection.setRequestProperty("Content-Length", Integer.toString(thepostbody.length)); DataOutputStream wr = new DataOutputStream(connection.getOutputStream()); wr.write(thepostbody); wr.flush(); wr.close(); } }