1 package jalview.ext.ensembl;
3 import jalview.io.FileParse;
5 import java.io.BufferedReader;
6 import java.io.DataOutputStream;
7 import java.io.IOException;
8 import java.io.InputStream;
9 import java.io.InputStreamReader;
10 import java.net.HttpURLConnection;
11 import java.net.MalformedURLException;
13 import java.util.List;
15 import javax.ws.rs.HttpMethod;
17 import com.stevesoft.pat.Regex;
20 * Base class for Ensembl REST service clients
24 abstract class EnsemblRestClient extends EnsemblSequenceFetcher
26 private final static String ENSEMBL_REST = "http://rest.ensembl.org";
28 protected final static String ENSEMBL_GENOMES_REST = "http://rest.ensemblgenomes.org";
30 // @see https://github.com/Ensembl/ensembl-rest/wiki/Output-formats
31 private static final String PING_URL = "http://rest.ensembl.org/info/ping.json";
33 private final static long RETEST_INTERVAL = 10000L; // 10 seconds
35 private static final Regex TRANSCRIPT_REGEX = new Regex(
36 "(ENS)([A-Z]{3}|)T[0-9]{11}$");
38 private static final Regex GENE_REGEX = new Regex(
39 "(ENS)([A-Z]{3}|)G[0-9]{11}$");
41 private String domain = ENSEMBL_REST;
43 private static boolean ensemblRestAvailable = false;
45 private static long lastCheck = -1;
48 * absolute time to wait till if we overloaded the REST service
50 private static long retryAfter;
52 protected volatile boolean inProgress = false;
55 * Default constructor to use rest.ensembl.org
57 public EnsemblRestClient()
63 * Constructor given the target domain to fetch data from
67 public EnsemblRestClient(String d)
77 void setDomain(String d)
82 public boolean isTranscriptIdentifier(String query)
84 return query == null ? false : TRANSCRIPT_REGEX.search(query);
87 public boolean isGeneIdentifier(String query)
89 return query == null ? false : GENE_REGEX.search(query);
93 public boolean queryInProgress()
99 public StringBuffer getRawRecords()
105 * Returns the URL for the client http request
109 * @throws MalformedURLException
111 protected abstract URL getUrl(List<String> ids)
112 throws MalformedURLException;
115 * Returns true if client uses GET method, false if it uses POST
119 protected abstract boolean useGetRequest();
122 * Return the desired value for the Content-Type request header
127 * @see https://github.com/Ensembl/ensembl-rest/wiki/HTTP-Headers
129 protected abstract String getRequestMimeType(boolean multipleIds);
132 * Return the desired value for the Accept request header
135 * @see https://github.com/Ensembl/ensembl-rest/wiki/HTTP-Headers
137 protected abstract String getResponseMimeType();
140 * Tries to connect to Ensembl's REST 'ping' endpoint, and returns true if
141 * successful, else false
145 private boolean checkEnsembl()
149 // note this format works for both ensembl and ensemblgenomes
150 // info/ping.json works for ensembl only (March 2016)
151 URL ping = new URL(getDomain()
152 + "/info/ping?content-type=application/json");
153 HttpURLConnection conn = (HttpURLConnection) ping.openConnection();
154 int rc = conn.getResponseCode();
156 if (rc >= 200 && rc < 300)
160 } catch (Throwable t)
162 System.err.println("Error connecting to " + PING_URL + ": "
169 * returns a reader to a Fasta response from the Ensembl sequence endpoint
173 * @throws IOException
175 protected FileParse getSequenceReader(List<String> ids)
178 URL url = getUrl(ids);
180 BufferedReader reader = getHttpResponse(url, ids);
181 FileParse fp = new FileParse(reader, url.toString(), "HTTP_POST");
186 * Writes the HTTP request and gets the response as a reader.
190 * written as Json POST body if more than one
192 * @throws IOException
193 * if response code was not 200, or other I/O error
195 protected BufferedReader getHttpResponse(URL url, List<String> ids)
198 // long now = System.currentTimeMillis();
199 HttpURLConnection connection = (HttpURLConnection) url.openConnection();
202 * POST method allows multiple queries in one request; it is supported for
203 * sequence queries, but not for overlap
205 boolean multipleIds = ids.size() > 1;// useGetRequest();
206 connection.setRequestMethod(multipleIds ? HttpMethod.POST
208 connection.setRequestProperty("Content-Type",
209 getRequestMimeType(multipleIds));
210 connection.setRequestProperty("Accept", getResponseMimeType());
212 connection.setUseCaches(false);
213 connection.setDoInput(true);
214 connection.setDoOutput(multipleIds);
218 writePostBody(connection, ids);
221 InputStream response = connection.getInputStream();
222 int responseCode = connection.getResponseCode();
224 if (responseCode != 200)
227 * note: a GET request for an invalid id returns an error code e.g. 415
228 * but POST request returns 200 and an empty Fasta response
230 throw new IOException(
231 "Response code was not 200. Detected response was "
234 // System.out.println(getClass().getName() + " took "
235 // + (System.currentTimeMillis() - now) + "ms to fetch");
237 checkRateLimits(connection);
239 BufferedReader reader = null;
240 reader = new BufferedReader(new InputStreamReader(response, "UTF-8"));
245 * Inspect response headers for any sign of server overload and respect any
246 * 'retry-after' directive
248 * @see https://github.com/Ensembl/ensembl-rest/wiki/Rate-Limits
251 void checkRateLimits(HttpURLConnection connection)
253 // number of requests allowed per time interval:
254 String limit = connection.getHeaderField("X-RateLimit-Limit");
255 // length of quota time interval in seconds:
256 // String period = connection.getHeaderField("X-RateLimit-Period");
257 // seconds remaining until usage quota is reset:
258 String reset = connection.getHeaderField("X-RateLimit-Reset");
259 // number of requests remaining from quota for current period:
260 String remaining = connection.getHeaderField("X-RateLimit-Remaining");
261 // number of seconds to wait before retrying (if remaining == 0)
262 String retryDelay = connection.getHeaderField("Retry-After");
267 if (retryDelay != null)
269 System.err.println("Ensembl REST service rate limit exceeded, wait "
270 + retryDelay + " seconds before retrying");
273 retryAfter = System.currentTimeMillis()
274 + (1000 * Integer.valueOf(retryDelay));
275 } catch (NumberFormatException e)
277 System.err.println("Unexpected value for Retry-After: "
285 // System.out.println(String.format(
286 // "%s Ensembl requests remaining of %s (reset in %ss)",
287 // remaining, limit, reset));
291 * Rechecks if Ensembl is responding, unless the last check was successful and
292 * the retest interval has not yet elapsed. Returns true if Ensembl is up,
297 protected boolean isEnsemblAvailable()
299 long now = System.currentTimeMillis();
302 * check if we are waiting for 'Retry-After' to expire
304 if (retryAfter > now)
306 System.err.println("Still " + (1 + (retryAfter - now) / 1000)
307 + " secs to wait before retrying Ensembl");
315 boolean retest = now - lastCheck > RETEST_INTERVAL;
316 if (ensemblRestAvailable && !retest)
320 ensemblRestAvailable = checkEnsembl();
322 return ensemblRestAvailable;
326 * Constructs, writes and flushes the POST body of the request, containing the
327 * query ids in JSON format
331 * @throws IOException
333 protected void writePostBody(HttpURLConnection connection,
334 List<String> ids) throws IOException
337 StringBuilder postBody = new StringBuilder(64);
338 postBody.append("{\"ids\":[");
340 for (String id : ids)
344 postBody.append(",");
347 postBody.append("\"");
348 postBody.append(id.trim());
349 postBody.append("\"");
351 postBody.append("]}");
352 byte[] thepostbody = postBody.toString().getBytes();
353 connection.setRequestProperty("Content-Length",
354 Integer.toString(thepostbody.length));
355 DataOutputStream wr = new DataOutputStream(connection.getOutputStream());
356 wr.write(thepostbody);