1 package jalview.ext.ensembl;
3 import jalview.io.FileParse;
5 import java.io.BufferedReader;
6 import java.io.DataOutputStream;
7 import java.io.IOException;
8 import java.io.InputStream;
9 import java.io.InputStreamReader;
10 import java.net.HttpURLConnection;
11 import java.net.MalformedURLException;
13 import java.util.List;
15 import javax.ws.rs.HttpMethod;
17 import com.stevesoft.pat.Regex;
20 * Base class for Ensembl REST service clients
24 abstract class EnsemblRestClient extends EnsemblSequenceFetcher
26 private final static String ENSEMBL_REST = "http://rest.ensembl.org";
28 protected final static String ENSEMBL_GENOMES_REST = "http://rest.ensemblgenomes.org";
30 // @see https://github.com/Ensembl/ensembl-rest/wiki/Output-formats
31 private static final String PING_URL = "http://rest.ensembl.org/info/ping.json";
33 private final static long RETEST_INTERVAL = 10000L; // 10 seconds
35 private static final Regex TRANSCRIPT_REGEX = new Regex(
36 "(ENS)([A-Z]{3}|)T[0-9]{11}$");
38 private static final Regex GENE_REGEX = new Regex(
39 "(ENS)([A-Z]{3}|)G[0-9]{11}$");
41 private String domain = ENSEMBL_REST;
43 private static boolean ensemblRestAvailable = false;
45 private static long lastCheck = -1;
47 protected volatile boolean inProgress = false;
50 * Default constructor to use rest.ensembl.org
52 public EnsemblRestClient()
58 * Constructor given the target domain to fetch data from
62 public EnsemblRestClient(String d)
72 void setDomain(String d)
77 public boolean isTranscriptIdentifier(String query)
79 return query == null ? false : TRANSCRIPT_REGEX.search(query);
82 public boolean isGeneIdentifier(String query)
84 return query == null ? false : GENE_REGEX.search(query);
88 public boolean queryInProgress()
94 public StringBuffer getRawRecords()
100 * Returns the URL for the client http request
104 * @throws MalformedURLException
106 protected abstract URL getUrl(List<String> ids)
107 throws MalformedURLException;
110 * Returns true if client uses GET method, false if it uses POST
114 protected abstract boolean useGetRequest();
117 * Return the desired value for the Content-Type request header
122 * @see https://github.com/Ensembl/ensembl-rest/wiki/HTTP-Headers
124 protected abstract String getRequestMimeType(boolean multipleIds);
127 * Return the desired value for the Accept request header
130 * @see https://github.com/Ensembl/ensembl-rest/wiki/HTTP-Headers
132 protected abstract String getResponseMimeType();
135 * Tries to connect to Ensembl's REST 'ping' endpoint, and returns true if
136 * successful, else false
140 private boolean checkEnsembl()
144 // note this format works for both ensembl and ensemblgenomes
145 // info/ping.json works for ensembl only (March 2016)
146 URL ping = new URL(getDomain()
147 + "/info/ping?content-type=application/json");
148 HttpURLConnection conn = (HttpURLConnection) ping.openConnection();
149 int rc = conn.getResponseCode();
151 if (rc >= 200 && rc < 300)
155 } catch (Throwable t)
157 System.err.println("Error connecting to " + PING_URL + ": "
164 * returns a reader to a Fasta response from the Ensembl sequence endpoint
168 * @throws IOException
170 protected FileParse getSequenceReader(List<String> ids)
173 URL url = getUrl(ids);
175 BufferedReader reader = getHttpResponse(url, ids);
176 FileParse fp = new FileParse(reader, url.toString(), "HTTP_POST");
181 * Writes the HTTP request and gets the response as a reader.
185 * written as Json POST body if more than one
187 * @throws IOException
188 * if response code was not 200, or other I/O error
190 protected BufferedReader getHttpResponse(URL url, List<String> ids)
193 // long now = System.currentTimeMillis();
194 HttpURLConnection connection = (HttpURLConnection) url.openConnection();
197 * POST method allows multiple queries in one request; it is supported for
198 * sequence queries, but not for overlap
200 boolean multipleIds = ids.size() > 1;// useGetRequest();
201 connection.setRequestMethod(multipleIds ? HttpMethod.POST
203 connection.setRequestProperty("Content-Type",
204 getRequestMimeType(multipleIds));
205 connection.setRequestProperty("Accept", getResponseMimeType());
207 connection.setUseCaches(false);
208 connection.setDoInput(true);
209 connection.setDoOutput(multipleIds);
213 writePostBody(connection, ids);
216 InputStream response = connection.getInputStream();
217 int responseCode = connection.getResponseCode();
219 if (responseCode != 200)
222 * note: a GET request for an invalid id returns an error code e.g. 415
223 * but POST request returns 200 and an empty Fasta response
225 throw new IOException(
226 "Response code was not 200. Detected response was "
229 // System.out.println(getClass().getName() + " took "
230 // + (System.currentTimeMillis() - now) + "ms to fetch");
232 BufferedReader reader = null;
233 reader = new BufferedReader(new InputStreamReader(response, "UTF-8"));
238 * Rechecks if Ensembl is responding, unless the last check was successful and
239 * the retest interval has not yet elapsed. Returns true if Ensembl is up,
244 protected boolean isEnsemblAvailable()
246 long now = System.currentTimeMillis();
247 boolean retest = now - lastCheck > RETEST_INTERVAL;
248 if (ensemblRestAvailable && !retest)
252 ensemblRestAvailable = checkEnsembl();
254 return ensemblRestAvailable;
258 * Constructs, writes and flushes the POST body of the request, containing the
259 * query ids in JSON format
263 * @throws IOException
265 protected void writePostBody(HttpURLConnection connection,
266 List<String> ids) throws IOException
269 StringBuilder postBody = new StringBuilder(64);
270 postBody.append("{\"ids\":[");
272 for (String id : ids)
276 postBody.append(",");
279 postBody.append("\"");
280 postBody.append(id.trim());
281 postBody.append("\"");
283 postBody.append("]}");
284 byte[] thepostbody = postBody.toString().getBytes();
285 connection.setRequestProperty("Content-Length",
286 Integer.toString(thepostbody.length));
287 DataOutputStream wr = new DataOutputStream(connection.getOutputStream());
288 wr.write(thepostbody);