1 package jalview.ext.ensembl;
3 import jalview.io.FileParse;
5 import java.io.BufferedReader;
6 import java.io.DataOutputStream;
7 import java.io.IOException;
8 import java.io.InputStream;
9 import java.io.InputStreamReader;
10 import java.net.HttpURLConnection;
11 import java.net.MalformedURLException;
13 import java.util.List;
15 import javax.ws.rs.HttpMethod;
17 import com.stevesoft.pat.Regex;
20 * Base class for Ensembl REST service clients
24 abstract class EnsemblRestClient extends EnsemblSequenceFetcher
26 private final static String ENSEMBL_REST = "http://rest.ensembl.org";
28 protected final static String ENSEMBL_GENOMES_REST = "http://rest.ensemblgenomes.org";
30 // @see https://github.com/Ensembl/ensembl-rest/wiki/Output-formats
31 private static final String PING_URL = "http://rest.ensembl.org/info/ping.json";
33 private final static long RETEST_INTERVAL = 10000L; // 10 seconds
35 private static final Regex TRANSCRIPT_REGEX = new Regex(
36 "(ENS)([A-Z]{3}|)T[0-9]{11}$");
38 private static final Regex GENE_REGEX = new Regex(
39 "(ENS)([A-Z]{3}|)G[0-9]{11}$");
41 private String domain = ENSEMBL_REST;
43 private static boolean ensemblRestAvailable = false;
45 private static long lastCheck = -1;
48 * absolute time to wait till if we overloaded the REST service
50 private static long retryAfter;
52 protected volatile boolean inProgress = false;
55 * Default constructor to use rest.ensembl.org
57 public EnsemblRestClient()
63 * Constructor given the target domain to fetch data from
67 public EnsemblRestClient(String d)
73 * Returns the domain name to query e.g. http://rest.ensembl.org or
74 * http://rest.ensemblgenomes.org
83 void setDomain(String d)
88 public boolean isTranscriptIdentifier(String query)
90 return query == null ? false : TRANSCRIPT_REGEX.search(query);
93 public boolean isGeneIdentifier(String query)
95 return query == null ? false : GENE_REGEX.search(query);
99 public boolean queryInProgress()
105 public StringBuffer getRawRecords()
111 * Returns the URL for the client http request
115 * @throws MalformedURLException
117 protected abstract URL getUrl(List<String> ids)
118 throws MalformedURLException;
121 * Returns true if client uses GET method, false if it uses POST
125 protected abstract boolean useGetRequest();
128 * Return the desired value for the Content-Type request header
133 * @see https://github.com/Ensembl/ensembl-rest/wiki/HTTP-Headers
135 protected abstract String getRequestMimeType(boolean multipleIds);
138 * Return the desired value for the Accept request header
141 * @see https://github.com/Ensembl/ensembl-rest/wiki/HTTP-Headers
143 protected abstract String getResponseMimeType();
146 * Tries to connect to Ensembl's REST 'ping' endpoint, and returns true if
147 * successful, else false
151 private boolean checkEnsembl()
155 // note this format works for both ensembl and ensemblgenomes
156 // info/ping.json works for ensembl only (March 2016)
157 URL ping = new URL(getDomain()
158 + "/info/ping?content-type=application/json");
159 HttpURLConnection conn = (HttpURLConnection) ping.openConnection();
160 int rc = conn.getResponseCode();
162 if (rc >= 200 && rc < 300)
166 } catch (Throwable t)
168 System.err.println("Error connecting to " + PING_URL + ": "
175 * returns a reader to a Fasta response from the Ensembl sequence endpoint
179 * @throws IOException
181 protected FileParse getSequenceReader(List<String> ids)
184 URL url = getUrl(ids);
186 BufferedReader reader = getHttpResponse(url, ids);
187 FileParse fp = new FileParse(reader, url.toString(), "HTTP_POST");
192 * Writes the HTTP request and gets the response as a reader.
196 * written as Json POST body if more than one
198 * @throws IOException
199 * if response code was not 200, or other I/O error
201 protected BufferedReader getHttpResponse(URL url, List<String> ids)
204 // long now = System.currentTimeMillis();
205 HttpURLConnection connection = (HttpURLConnection) url.openConnection();
208 * POST method allows multiple queries in one request; it is supported for
209 * sequence queries, but not for overlap
211 boolean multipleIds = ids.size() > 1;// useGetRequest();
212 connection.setRequestMethod(multipleIds ? HttpMethod.POST
214 connection.setRequestProperty("Content-Type",
215 getRequestMimeType(multipleIds));
216 connection.setRequestProperty("Accept", getResponseMimeType());
218 connection.setUseCaches(false);
219 connection.setDoInput(true);
220 connection.setDoOutput(multipleIds);
224 writePostBody(connection, ids);
227 InputStream response = connection.getInputStream();
228 int responseCode = connection.getResponseCode();
230 if (responseCode != 200)
233 * note: a GET request for an invalid id returns an error code e.g. 415
234 * but POST request returns 200 and an empty Fasta response
236 throw new IOException(
237 "Response code was not 200. Detected response was "
240 // System.out.println(getClass().getName() + " took "
241 // + (System.currentTimeMillis() - now) + "ms to fetch");
243 checkRateLimits(connection);
245 BufferedReader reader = null;
246 reader = new BufferedReader(new InputStreamReader(response, "UTF-8"));
251 * Inspect response headers for any sign of server overload and respect any
252 * 'retry-after' directive
254 * @see https://github.com/Ensembl/ensembl-rest/wiki/Rate-Limits
257 void checkRateLimits(HttpURLConnection connection)
259 // number of requests allowed per time interval:
260 String limit = connection.getHeaderField("X-RateLimit-Limit");
261 // length of quota time interval in seconds:
262 // String period = connection.getHeaderField("X-RateLimit-Period");
263 // seconds remaining until usage quota is reset:
264 String reset = connection.getHeaderField("X-RateLimit-Reset");
265 // number of requests remaining from quota for current period:
266 String remaining = connection.getHeaderField("X-RateLimit-Remaining");
267 // number of seconds to wait before retrying (if remaining == 0)
268 String retryDelay = connection.getHeaderField("Retry-After");
273 if (retryDelay != null)
275 System.err.println("Ensembl REST service rate limit exceeded, wait "
276 + retryDelay + " seconds before retrying");
279 retryAfter = System.currentTimeMillis()
280 + (1000 * Integer.valueOf(retryDelay));
281 } catch (NumberFormatException e)
283 System.err.println("Unexpected value for Retry-After: "
291 // System.out.println(String.format(
292 // "%s Ensembl requests remaining of %s (reset in %ss)",
293 // remaining, limit, reset));
297 * Rechecks if Ensembl is responding, unless the last check was successful and
298 * the retest interval has not yet elapsed. Returns true if Ensembl is up,
303 protected boolean isEnsemblAvailable()
305 long now = System.currentTimeMillis();
308 * check if we are waiting for 'Retry-After' to expire
310 if (retryAfter > now)
312 System.err.println("Still " + (1 + (retryAfter - now) / 1000)
313 + " secs to wait before retrying Ensembl");
321 boolean retest = now - lastCheck > RETEST_INTERVAL;
322 if (ensemblRestAvailable && !retest)
326 ensemblRestAvailable = checkEnsembl();
328 return ensemblRestAvailable;
332 * Constructs, writes and flushes the POST body of the request, containing the
333 * query ids in JSON format
337 * @throws IOException
339 protected void writePostBody(HttpURLConnection connection,
340 List<String> ids) throws IOException
343 StringBuilder postBody = new StringBuilder(64);
344 postBody.append("{\"ids\":[");
346 for (String id : ids)
350 postBody.append(",");
353 postBody.append("\"");
354 postBody.append(id.trim());
355 postBody.append("\"");
357 postBody.append("]}");
358 byte[] thepostbody = postBody.toString().getBytes();
359 connection.setRequestProperty("Content-Length",
360 Integer.toString(thepostbody.length));
361 DataOutputStream wr = new DataOutputStream(connection.getOutputStream());
362 wr.write(thepostbody);