1 package jalview.ext.ensembl;
3 import jalview.io.FileParse;
5 import java.io.BufferedReader;
6 import java.io.DataOutputStream;
7 import java.io.IOException;
8 import java.io.InputStream;
9 import java.io.InputStreamReader;
10 import java.net.HttpURLConnection;
11 import java.net.MalformedURLException;
13 import java.util.List;
15 import javax.ws.rs.HttpMethod;
17 import com.stevesoft.pat.Regex;
20 * Base class for Ensembl REST service clients
24 abstract class EnsemblRestClient extends EnsemblSequenceFetcher
26 private final static String ENSEMBL_REST = "http://rest.ensembl.org";
28 protected final static String ENSEMBL_GENOMES_REST = "http://rest.ensemblgenomes.org";
30 // @see https://github.com/Ensembl/ensembl-rest/wiki/Output-formats
31 private static final String PING_URL = "http://rest.ensembl.org/info/ping.json";
33 private final static long RETEST_INTERVAL = 10000L; // 10 seconds
35 private static final Regex TRANSCRIPT_REGEX = new Regex(
36 "(ENS)([A-Z]{3}|)T[0-9]{11}$");
38 private static final Regex GENE_REGEX = new Regex(
39 "(ENS)([A-Z]{3}|)G[0-9]{11}$");
41 private String domain = ENSEMBL_REST;
43 private static boolean ensemblRestAvailable = false;
45 private static long lastCheck = -1;
48 * absolute time to wait till if we overloaded the REST service
50 private static long retryAfter;
52 protected volatile boolean inProgress = false;
55 * Default constructor to use rest.ensembl.org
57 public EnsemblRestClient()
63 * Constructor given the target domain to fetch data from
67 public EnsemblRestClient(String d)
73 * Returns the domain name to query e.g. http://rest.ensembl.org or
74 * http://rest.ensemblgenomes.org
83 void setDomain(String d)
89 * Answers true if the query matches the regular expression pattern for an
90 * Ensembl transcript stable identifier
95 public boolean isTranscriptIdentifier(String query)
97 return query == null ? false : TRANSCRIPT_REGEX.search(query);
101 * Answers true if the query matches the regular expression pattern for an
102 * Ensembl gene stable identifier
107 public boolean isGeneIdentifier(String query)
109 return query == null ? false : GENE_REGEX.search(query);
113 public boolean queryInProgress()
119 public StringBuffer getRawRecords()
125 * Returns the URL for the client http request
129 * @throws MalformedURLException
131 protected abstract URL getUrl(List<String> ids)
132 throws MalformedURLException;
135 * Returns true if client uses GET method, false if it uses POST
139 protected abstract boolean useGetRequest();
142 * Return the desired value for the Content-Type request header
147 * @see https://github.com/Ensembl/ensembl-rest/wiki/HTTP-Headers
149 protected abstract String getRequestMimeType(boolean multipleIds);
152 * Return the desired value for the Accept request header
155 * @see https://github.com/Ensembl/ensembl-rest/wiki/HTTP-Headers
157 protected abstract String getResponseMimeType();
160 * Tries to connect to Ensembl's REST 'ping' endpoint, and returns true if
161 * successful, else false
165 private boolean checkEnsembl()
169 // note this format works for both ensembl and ensemblgenomes
170 // info/ping.json works for ensembl only (March 2016)
171 URL ping = new URL(getDomain()
172 + "/info/ping?content-type=application/json");
173 HttpURLConnection conn = (HttpURLConnection) ping.openConnection();
174 int rc = conn.getResponseCode();
176 if (rc >= 200 && rc < 300)
180 } catch (Throwable t)
182 System.err.println("Error connecting to " + PING_URL + ": "
189 * returns a reader to a Fasta response from the Ensembl sequence endpoint
193 * @throws IOException
195 protected FileParse getSequenceReader(List<String> ids)
198 URL url = getUrl(ids);
200 BufferedReader reader = getHttpResponse(url, ids);
201 FileParse fp = new FileParse(reader, url.toString(), "HTTP_POST");
206 * Writes the HTTP request and gets the response as a reader.
210 * written as Json POST body if more than one
212 * @throws IOException
213 * if response code was not 200, or other I/O error
215 protected BufferedReader getHttpResponse(URL url, List<String> ids)
218 // long now = System.currentTimeMillis();
219 HttpURLConnection connection = (HttpURLConnection) url.openConnection();
222 * POST method allows multiple queries in one request; it is supported for
223 * sequence queries, but not for overlap
225 boolean multipleIds = ids.size() > 1;// useGetRequest();
226 connection.setRequestMethod(multipleIds ? HttpMethod.POST
228 connection.setRequestProperty("Content-Type",
229 getRequestMimeType(multipleIds));
230 connection.setRequestProperty("Accept", getResponseMimeType());
232 connection.setUseCaches(false);
233 connection.setDoInput(true);
234 connection.setDoOutput(multipleIds);
238 writePostBody(connection, ids);
241 InputStream response = connection.getInputStream();
242 int responseCode = connection.getResponseCode();
244 if (responseCode != 200)
247 * note: a GET request for an invalid id returns an error code e.g. 415
248 * but POST request returns 200 and an empty Fasta response
250 throw new IOException(
251 "Response code was not 200. Detected response was "
254 // System.out.println(getClass().getName() + " took "
255 // + (System.currentTimeMillis() - now) + "ms to fetch");
257 checkRateLimits(connection);
259 BufferedReader reader = null;
260 reader = new BufferedReader(new InputStreamReader(response, "UTF-8"));
265 * Inspect response headers for any sign of server overload and respect any
266 * 'retry-after' directive
268 * @see https://github.com/Ensembl/ensembl-rest/wiki/Rate-Limits
271 void checkRateLimits(HttpURLConnection connection)
273 // number of requests allowed per time interval:
274 String limit = connection.getHeaderField("X-RateLimit-Limit");
275 // length of quota time interval in seconds:
276 // String period = connection.getHeaderField("X-RateLimit-Period");
277 // seconds remaining until usage quota is reset:
278 String reset = connection.getHeaderField("X-RateLimit-Reset");
279 // number of requests remaining from quota for current period:
280 String remaining = connection.getHeaderField("X-RateLimit-Remaining");
281 // number of seconds to wait before retrying (if remaining == 0)
282 String retryDelay = connection.getHeaderField("Retry-After");
287 if (retryDelay != null)
289 System.err.println("Ensembl REST service rate limit exceeded, wait "
290 + retryDelay + " seconds before retrying");
293 retryAfter = System.currentTimeMillis()
294 + (1000 * Integer.valueOf(retryDelay));
295 } catch (NumberFormatException e)
297 System.err.println("Unexpected value for Retry-After: "
305 // System.out.println(String.format(
306 // "%s Ensembl requests remaining of %s (reset in %ss)",
307 // remaining, limit, reset));
311 * Rechecks if Ensembl is responding, unless the last check was successful and
312 * the retest interval has not yet elapsed. Returns true if Ensembl is up,
317 protected boolean isEnsemblAvailable()
319 long now = System.currentTimeMillis();
322 * check if we are waiting for 'Retry-After' to expire
324 if (retryAfter > now)
326 System.err.println("Still " + (1 + (retryAfter - now) / 1000)
327 + " secs to wait before retrying Ensembl");
335 boolean retest = now - lastCheck > RETEST_INTERVAL;
336 if (ensemblRestAvailable && !retest)
340 ensemblRestAvailable = checkEnsembl();
342 return ensemblRestAvailable;
346 * Constructs, writes and flushes the POST body of the request, containing the
347 * query ids in JSON format
351 * @throws IOException
353 protected void writePostBody(HttpURLConnection connection,
354 List<String> ids) throws IOException
357 StringBuilder postBody = new StringBuilder(64);
358 postBody.append("{\"ids\":[");
360 for (String id : ids)
364 postBody.append(",");
367 postBody.append("\"");
368 postBody.append(id.trim());
369 postBody.append("\"");
371 postBody.append("]}");
372 byte[] thepostbody = postBody.toString().getBytes();
373 connection.setRequestProperty("Content-Length",
374 Integer.toString(thepostbody.length));
375 DataOutputStream wr = new DataOutputStream(connection.getOutputStream());
376 wr.write(thepostbody);