JAL-653 refactor / tidy
[jalview.git] / src / jalview / ext / ensembl / SeqFetcher.java
1 package jalview.ext.ensembl;
2
3 import jalview.io.FileParse;
4
5 import java.io.BufferedReader;
6 import java.io.DataOutputStream;
7 import java.io.IOException;
8 import java.io.InputStream;
9 import java.io.InputStreamReader;
10 import java.net.HttpURLConnection;
11 import java.net.URL;
12 import java.net.URLConnection;
13 import java.util.ArrayList;
14 import java.util.List;
15
16 import org.apache.http.NameValuePair;
17 import org.apache.http.message.BasicNameValuePair;
18
19 public class SeqFetcher
20 {
21   private final static String ENSEMBL_REST = "rest.ensembl.org";
22
23   private static final String SEQUENCE_ID_URL = "http://" + ENSEMBL_REST + "/sequence/id";
24
25   private static final String PING_URL = "http://" + ENSEMBL_REST + "/info/ping";
26
27   private final static long RETEST_INTERVAL = 10000L; // 10 seconds
28
29   private static boolean ensemblRestAvailable = false;
30
31   private static long lastCheck = -1;
32
33   /**
34    * Rechecks if Ensembl is responding, unless the last check was successful and
35    * the retest interval has not yet elapsed. Returns true if Ensembl is up,
36    * else false.
37    * 
38    * @return
39    */
40   public boolean isEnsemblAvailable()
41   {
42     long now = System.currentTimeMillis();
43     boolean retest = now - lastCheck > RETEST_INTERVAL;
44     if (ensemblRestAvailable && !retest)
45     {
46       return true;
47     }
48     ensemblRestAvailable = checkEnsembl();
49     lastCheck = now;
50     return ensemblRestAvailable;
51   }
52
53   /**
54    * Tries to connect to Ensembl's REST 'ping' endpoint, and returns true if
55    * successful, else false
56    * 
57    * @return
58    */
59   private boolean checkEnsembl()
60   {
61     try
62     {
63       URL ping = new URL(PING_URL);
64       HttpURLConnection conn = (HttpURLConnection) ping.openConnection();
65       int rc = conn.getResponseCode();
66       conn.disconnect();
67       if (rc >= 200 && rc < 300)
68       {
69         return true;
70       }
71     } catch (Throwable t)
72     {
73       System.err.println("Error connecting to " + PING_URL + ": "
74               + t.getMessage());
75     }
76     return false;
77   }
78
79   public SeqFetcher()
80   {
81   }
82
83   public enum EnsemblSeqType
84   {
85     GENOMIC("genomic"), CDS("cds"), TRANSCRIPT("cds"), PROTEIN("protein"), CDNA(
86             "cdna");
87
88     private String type;
89
90     EnsemblSeqType(String t)
91     {
92       type = t;
93     }
94
95     public String getType()
96     {
97       return type;
98     }
99   }
100
101   /**
102    * Returns a list of additional URL query parameters to specify the desired
103    * sequence type (genomic/cds/protein etc), and data format Fasta
104    * 
105    * @param type
106    */
107   public List<NameValuePair> getAdditionalParameters(EnsemblSeqType type)
108   {
109     List<NameValuePair> params = new ArrayList<NameValuePair>();
110     params.add(new BasicNameValuePair("type", type.getType()));
111     params.add(new BasicNameValuePair("content-type", "text/x-fasta"));
112     return params;
113   }
114
115   /**
116    * return a reader to a Fasta response from the Ensembl sequence endpoint
117    * 
118    * @param returnType
119    * @param ids
120    * @return
121    * @throws IOException
122    */
123   public FileParse getSequenceReader(EnsemblSeqType returnType,
124           List<String> ids) throws IOException
125   {
126     // see http://rest.ensembl.org/documentation/info/sequence_id
127
128     String urlstring = SEQUENCE_ID_URL;
129     List<NameValuePair> vals = getAdditionalParameters(returnType);
130     boolean first = true;
131     for (NameValuePair nvp : vals)
132     {
133       urlstring += first ? "?" : "&";
134       first = false;
135       urlstring += nvp.getName() + "=" + nvp.getValue();
136     }
137
138     URL url = new URL(urlstring);
139
140     URLConnection connection = url.openConnection();
141     HttpURLConnection httpConnection = (HttpURLConnection) connection;
142
143     httpConnection.setRequestMethod("POST");
144     httpConnection.setRequestProperty("Content-Type", "application/json");
145     httpConnection.setRequestProperty("Accept", "text/x-fasta");
146     byte[] thepostbody;
147     {
148       StringBuilder postBody = new StringBuilder();
149       postBody.append("{\"ids\":[");
150       first = true;
151       for (String id : ids)
152       {
153         if (!first)
154         {
155           postBody.append(",");
156         }
157         first = false;
158         postBody.append("\"");
159         postBody.append(id.trim());
160         postBody.append("\"");
161       }
162       postBody.append("]}");
163       thepostbody = postBody.toString().getBytes();
164     }
165     httpConnection.setRequestProperty("Content-Length",
166             Integer.toString(thepostbody.length));
167     httpConnection.setUseCaches(false);
168     httpConnection.setDoInput(true);
169     httpConnection.setDoOutput(true);
170
171     DataOutputStream wr = new DataOutputStream(
172             httpConnection.getOutputStream());
173     wr.write(thepostbody);
174     wr.flush();
175     wr.close();
176
177     InputStream response = connection.getInputStream();
178     int responseCode = httpConnection.getResponseCode();
179
180     if (responseCode != 200)
181     {
182       throw new RuntimeException(
183               "Response code was not 200. Detected response was "
184                       + responseCode);
185     }
186
187     BufferedReader reader = null;
188     reader = new BufferedReader(new InputStreamReader(response, "UTF-8"));
189     FileParse fp = new FileParse(reader, url.toString(), "HTTP_POST");
190     return fp;
191   }
192
193 }