1 package jalview.ws.ebi;
3 import jalview.datamodel.AlignmentI;
4 import jalview.io.AppletFormatAdapter;
5 import jalview.io.FileParse;
6 import jalview.io.FormatAdapter;
8 import java.io.BufferedReader;
10 import java.io.IOException;
11 import java.util.StringTokenizer;
12 import java.util.regex.Matcher;
14 import org.apache.axis.transport.http.HTTPConstants;
15 import org.apache.http.Header;
16 import org.apache.http.HttpResponse;
17 import org.apache.http.client.methods.HttpGet;
18 import org.apache.http.client.methods.HttpPost;
19 import org.apache.http.entity.StringEntity;
20 import org.apache.http.impl.client.DefaultHttpClient;
21 import org.apache.http.util.EntityUtils;
22 import org.json.JSONArray;
23 import org.json.JSONObject;
25 import compbio.util.FileUtil;
27 public class hmmerClient
32 static String baseUrl = "http://www.ebi.ac.uk/Tools/hmmer",
33 jackH = "/search/jackhmmer", phmmer = "/search/phmmer",
34 hmmscan = "/search/hmmscan", hmmsearch = "/search/hmmsearch";
36 static String edseq = ">2abl_A mol:protein length:163 ABL TYROSINE KINASE\nMGPSENDPNLFVALYDFVASGDNTLSITKGEKLRVLGYNHNGEWCEAQTKNGQGWVPSNYITPVNSLEKHS\nWYHGPVSRNAAEYLLSSGINGSFLVRESESSPGQRSISLRYEGRVYHYRINTASDGKLYVSSESRFNTLAE\nLVHHHSTVADGLITTLHYPAP";
38 public static void main(String[] args)
45 instr = FileUtil.readFileToString(new File(args[0]));
52 String res = new hmmerClient().submitJackhmmerSearch(instr,
53 "jackhmmer", "pdb", 5);
56 throw new Error("Failed.");
58 System.out.println("Result\n" + res);
64 * - fasta or other formatted sequence or alignment
68 * - pdb, uniprot, etc.
70 * number of iterations
73 String submitJackhmmerSearch(String input, String algo, String db,
76 JSONObject inparam = new JSONObject();
77 HttpPost jackhp = new HttpPost(baseUrl + jackH);
78 String lastiter = null;
81 inparam.put("algo", algo);
82 inparam.put("seq", input);
83 inparam.put("seqdb", db);
84 inparam.put("iterations", niter);
85 // #Now POST the request and generate the search job.
86 // dumb json post service
87 jackhp.setHeader("content-type", "application/json");
88 jackhp.setEntity(new StringEntity(inparam.toString()));
94 HttpResponse r = null;
97 DefaultHttpClient httpCl = new DefaultHttpClient();
99 r = httpCl.execute(jackhp);
101 } catch (Exception x)
103 System.err.println("Submit failed.");
106 if (r.getStatusLine().getStatusCode() != 201)
108 throw new Error(r.toString());
111 String jobid = null, redir = null;
114 JSONObject res = new JSONObject(EntityUtils.toString(r.getEntity()));
115 jobid = res.getString("job_id");
118 if ((loc = r.getHeaders(HTTPConstants.HEADER_LOCATION)) != null
124 .println("Ignoring additional "
126 + " location(s) provided in response header ( next one is '"
127 + loc[1].getValue() + "' )");
129 redir = loc[0].getValue();
131 } catch (Exception x)
133 System.err.println("job id extraction failed.");
137 boolean finished = false;
138 JSONObject jobstate = null;
143 DefaultHttpClient httpCl = new DefaultHttpClient();
145 HttpGet jackcheck = new HttpGet(redir);
146 jackcheck.setHeader("content-type", "application/json");
147 r = httpCl.execute(jackcheck);
148 switch (r.getStatusLine().getStatusCode())
151 jobstate = new JSONObject(EntityUtils.toString(r.getEntity()));
152 String st = jobstate.getString("status");
153 if ("DONE".equals(st))
157 if ("ERROR".equals(st))
159 System.err.println("Error");
162 if ("PEND".equals(st) || "RUN".equals("st"))
164 JSONArray iters = jobstate.getJSONArray("result");
165 lastiter = iters.getJSONObject(iters.length() - 1)
167 if (lastiter.length() > 0)
169 java.util.regex.Pattern p = java.util.regex.Pattern
170 .compile(".+(\\d+)");
171 Matcher m = p.matcher(lastiter);
174 System.out.println("On iteration " + m.group(1));
184 } catch (Exception q)
189 } while (!finished && tries < 50);
193 System.err.println("Giving up with job " + jobid + " at " + redir);
197 // http://www.ebi.ac.uk/Tools/hmmer/download/60048B38-7CEC-11E5-A230-CED6D26C98AD.5/score?format=csv
198 // 1gri_A 1gri_A 217 jackhmmer - 163 4.7e-62 212.4 0.1 1 2 4.4e-46 2.1e-43
199 // 151.758316040039 0.04 11 151 3 139 1 150 0.94 GROWTH FACTOR BOUND PROTEIN
201 // 1gri_A 1gri_A 217 jackhmmer - 163 4.7e-62 212.4 0.1 2 2 1.6e-17 7.9e-15
202 // 58.8796501159668 0.01 7 66 157 215 153 216 0.95 GROWTH FACTOR BOUND
203 // PROTEIN 2 1cj1_J 1gri_B
204 // 4h1o_A 4h1o_A 560 jackhmmer - 163 2.1e-57 197.3 0.0 1 2 7.5e-28 3.6e-25
205 // 92.4921493530273 0.00 65 161 20 122 17 124 0.95 Tyrosine-protein
206 // phosphatase non-receptor typ 4h1o_A
208 // 4h1o_A 4h1o_A 560 jackhmmer - 163 2.1e-57 197.3 0.0 2 2 7.6e-31 3.7e-28
209 // 102.219146728516 0.03 66 161 127 236 124 238 0.94 Tyrosine-protein
210 // phosphatase non-receptor typ 4h1o_A
212 // $ua->get( $rootUrl."/results/".$lastIteration->{uuid} . "/score"
215 * * #Job should have finished, but we may have converged, so get the last
216 * job. my $results = $json->decode( $response->content ); my $lastIteration
217 * = pop( @{ $results->{result} } ); #Now fetch the results of the last
218 * iteration my $searchResult = $ua->get( $rootUrl."/results/" .
219 * $lastIteration->{uuid} . "/score", 'Accept' => 'application/json' );
220 * unless( $searchResult->status_line eq "200 OK"){ die
221 * "Failed to get search results\n"; }
223 * #Decode the content of the full set of results $results = $json->decode(
224 * $searchResult->content ); print
225 * "Matched ".$results->{'results'}->{'stats'}->{'nincluded'}." sequences
226 * ($lastIteration->{uuid})!\n"; #Now do something more interesting with the
232 * retrieve an alignment annotated with scores from JackHmmer
238 AlignmentI retrieveJackhmmerResult(String jobid, AlignmentI dataset)
239 throws OutOfMemoryError, IOException
241 AlignmentI searchResult = null;
245 searchResult = new AppletFormatAdapter().readFile(baseUrl
246 + "/download/" + jobid
247 + "/score?format=afa&t=.gz", FormatAdapter.URL, "FASTA");
249 // match up to dataset.
253 FileParse csvsource = new FileParse(baseUrl + "/download/" + jobid
254 + "/score?format=csv", FormatAdapter.URL);
255 if (!csvsource.isValid())
257 throw new IOException("Couldn't access scores for Jackhammer results");
259 readJackhmmerScores(searchResult, csvsource);
263 private void readJackhmmerScores(AlignmentI searchResult,
268 BufferedReader rl = new BufferedReader(csvsource.getReader());
269 while ((line = rl.readLine()) != null)
271 StringTokenizer st = new StringTokenizer(line, "\t");
274 // http://www.ebi.ac.uk/Tools/hmmer/download/60048B38-7CEC-11E5-A230-CED6D26C98AD.5/score?format=csv
275 // 1gri_A 1gri_A 217 jackhmmer - 163 4.7e-62 212.4 0.1 1 2 4.4e-46 2.1e-43
276 // each line scores a fragment
277 // so for a combined score ?
280 * for a sequence q sort any t against q according to overallScore(q,t)
281 * maxFragment(q,t) in sequence features parlance: for alignment
282 * s.getFeature("overallScore",q) -> range on q and range on s
287 // 151.758316040039 0.04 11 151 3 139 1 150 0.94 GROWTH FACTOR BOUND PROTEIN
289 // 1gri_A 1gri_A 217 jackhmmer - 163 4.7e-62 212.4 0.1 2 2 1.6e-17 7.9e-15
290 // 58.8796501159668 0.01 7 66 157 215 153 216 0.95 GROWTH FACTOR BOUND
291 // PROTEIN 2 1cj1_J 1gri_B
292 // 4h1o_A 4h1o_A 560 jackhmmer - 163 2.1e-57 197.3 0.0 1 2 7.5e-28 3.6e-25
293 // 92.4921493530273 0.00 65 161 20 122 17 124 0.95 Tyrosine-protein
294 // phosphatase non-receptor typ 4h1o_A
296 // 4h1o_A 4h1o_A 560 jackhmmer - 163 2.1e-57 197.3 0.0 2 2 7.6e-31 3.7e-28
297 // 102.219146728516 0.03 66 161 127 236 124 238 0.94 Tyrosine-protein
298 // phosphatase non-receptor typ 4h1o_A