1 package jalview.ws.ebi;
3 import jalview.datamodel.AlignmentI;
4 import jalview.io.AppletFormatAdapter;
5 import jalview.io.DataSourceType;
6 import jalview.io.FileFormat;
7 import jalview.io.FileParse;
10 import java.io.IOException;
11 import java.util.regex.Matcher;
13 import org.apache.axis.transport.http.HTTPConstants;
14 import org.apache.http.Header;
15 import org.apache.http.HttpResponse;
16 import org.apache.http.client.methods.HttpGet;
17 import org.apache.http.client.methods.HttpPost;
18 import org.apache.http.entity.StringEntity;
19 import org.apache.http.impl.client.DefaultHttpClient;
20 import org.apache.http.util.EntityUtils;
21 import org.json.JSONArray;
22 import org.json.JSONObject;
24 import compbio.util.FileUtil;
26 public class hmmerClient
31 static String baseUrl = "http://www.ebi.ac.uk/Tools/hmmer",
32 jackH = "/search/jackhmmer", phmmer = "/search/phmmer",
33 hmmscan = "/search/hmmscan", hmmsearch = "/search/hmmsearch";
35 static String edseq = ">2abl_A mol:protein length:163 ABL TYROSINE KINASE\nMGPSENDPNLFVALYDFVASGDNTLSITKGEKLRVLGYNHNGEWCEAQTKNGQGWVPSNYITPVNSLEKHS\nWYHGPVSRNAAEYLLSSGINGSFLVRESESSPGQRSISLRYEGRVYHYRINTASDGKLYVSSESRFNTLAE\nLVHHHSTVADGLITTLHYPAP";
37 public static void main(String[] args)
44 instr = FileUtil.readFileToString(new File(args[0]));
51 String res = new hmmerClient().submitJackhmmerSearch(instr,
52 "jackhmmer", "pdb", 5);
55 throw new Error("Failed.");
57 System.out.println("Result\n" + res);
64 * - fasta or other formatted sequence or alignment
68 * - pdb, uniprot, etc.
70 * number of iterations
73 String submitJackhmmerSearch(String input, String algo, String db,
76 JSONObject inparam = new JSONObject();
77 HttpPost jackhp = new HttpPost(baseUrl + jackH);
78 String lastiter = null;
81 inparam.put("algo", algo);
82 inparam.put("seq", input);
83 inparam.put("seqdb", db);
84 inparam.put("iterations", niter);
85 // #Now POST the request and generate the search job.
86 // dumb json post service
87 jackhp.setHeader("content-type", "application/json");
88 jackhp.setEntity(new StringEntity(inparam.toString()));
94 HttpResponse r = null;
97 DefaultHttpClient httpCl = new DefaultHttpClient();
99 r = httpCl.execute(jackhp);
101 } catch (Exception x)
103 System.err.println("Submit failed.");
106 if (r.getStatusLine().getStatusCode() != 201)
108 throw new Error(r.toString());
111 String jobid = null, redir = null;
114 JSONObject res = new JSONObject(EntityUtils.toString(r.getEntity()));
115 jobid = res.getString("job_id");
118 if ((loc = r.getHeaders(HTTPConstants.HEADER_LOCATION)) != null
124 .println("Ignoring additional "
126 + " location(s) provided in response header ( next one is '"
127 + loc[1].getValue() + "' )");
129 redir = loc[0].getValue();
131 } catch (Exception x)
133 System.err.println("job id extraction failed.");
137 boolean finished = false;
138 JSONObject jobstate = null;
143 DefaultHttpClient httpCl = new DefaultHttpClient();
145 HttpGet jackcheck = new HttpGet(redir);
146 jackcheck.setHeader("content-type", "application/json");
147 r = httpCl.execute(jackcheck);
148 switch (r.getStatusLine().getStatusCode())
151 jobstate = new JSONObject(EntityUtils.toString(r.getEntity()));
152 String st = jobstate.getString("status");
153 if ("DONE".equals(st))
157 if ("ERROR".equals(st))
159 System.err.println("Error");
162 if ("PEND".equals(st) || "RUN".equals("st"))
164 JSONArray iters = jobstate.getJSONArray("result");
165 lastiter = iters.getJSONObject(iters.length() - 1).getString(
167 if (lastiter.length() > 0)
169 java.util.regex.Pattern p = java.util.regex.Pattern
170 .compile(".+(\\d+)");
171 Matcher m = p.matcher(lastiter);
174 System.out.println("On iteration " + m.group(1));
184 } catch (Exception q)
189 } while (!finished && tries < 50);
193 System.err.println("Giving up with job " + jobid + " at " + redir);
197 // http://www.ebi.ac.uk/Tools/hmmer/download/60048B38-7CEC-11E5-A230-CED6D26C98AD.5/score?format=csv
198 // 1gri_A 1gri_A 217 jackhmmer - 163 4.7e-62 212.4 0.1 1 2 4.4e-46 2.1e-43
199 // 151.758316040039 0.04 11 151 3 139 1 150 0.94 GROWTH FACTOR BOUND PROTEIN
201 // 1gri_A 1gri_A 217 jackhmmer - 163 4.7e-62 212.4 0.1 2 2 1.6e-17 7.9e-15
202 // 58.8796501159668 0.01 7 66 157 215 153 216 0.95 GROWTH FACTOR BOUND
203 // PROTEIN 2 1cj1_J 1gri_B
204 // 4h1o_A 4h1o_A 560 jackhmmer - 163 2.1e-57 197.3 0.0 1 2 7.5e-28 3.6e-25
205 // 92.4921493530273 0.00 65 161 20 122 17 124 0.95 Tyrosine-protein
206 // phosphatase non-receptor typ 4h1o_A
208 // 4h1o_A 4h1o_A 560 jackhmmer - 163 2.1e-57 197.3 0.0 2 2 7.6e-31 3.7e-28
209 // 102.219146728516 0.03 66 161 127 236 124 238 0.94 Tyrosine-protein
210 // phosphatase non-receptor typ 4h1o_A
212 // $ua->get( $rootUrl."/results/".$lastIteration->{uuid} . "/score"
215 * * #Job should have finished, but we may have converged, so get the last
216 * job. my $results = $json->decode( $response->content ); my $lastIteration
217 * = pop( @{ $results->{result} } ); #Now fetch the results of the last
218 * iteration my $searchResult = $ua->get( $rootUrl."/results/" .
219 * $lastIteration->{uuid} . "/score", 'Accept' => 'application/json' );
220 * unless( $searchResult->status_line eq "200 OK"){ die
221 * "Failed to get search results\n"; }
223 * #Decode the content of the full set of results $results = $json->decode(
224 * $searchResult->content ); print
225 * "Matched ".$results->{'results'}->{'stats'}->{'nincluded'}." sequences
226 * ($lastIteration->{uuid})!\n"; #Now do something more interesting with the
232 * retrieve an alignment annotated with scores from JackHmmer
238 AlignmentI retrieveJackhmmerResult(String jobid, AlignmentI dataset)
239 throws OutOfMemoryError, IOException
241 AlignmentI searchResult = null;
245 searchResult = new AppletFormatAdapter().readFile(baseUrl
246 + "/download/" + jobid + "/score?format=afa&t=.gz",
247 DataSourceType.URL, FileFormat.Fasta);
249 // TODO extract gapped columns as '.' - inserts to query profile
251 // TODO match up jackhammer results to dataset.
254 FileParse jsonsource = new FileParse(baseUrl + "/download/" + jobid
255 + "/score?format=json", DataSourceType.URL);
256 if (!jsonsource.isValid())
258 throw new IOException("Couldn't access scores for Jackhammer results");
260 readJackhmmerScores(searchResult, jsonsource);
265 * // 1gri_A 1gri_A 217 jackhmmer - 163 4.7e-62 212.4 0.1 1 2 4.4e-46 2.1e-43
267 * // 151.758316040039 0.04 11 151 3 139 1 150 0.94 GROWTH FACTOR BOUND
268 * PROTEIN // 2 1cj1_J 1gri_B // 1gri_A 1gri_A 217 jackhmmer - 163 4.7e-62
269 * 212.4 0.1 2 2 1.6e-17 7.9e-15 // 58.8796501159668 0.01 7 66 157 215 153 216
270 * 0.95 GROWTH FACTOR BOUND // PROTEIN 2 1cj1_J 1gri_B // 4h1o_A 4h1o_A 560
271 * jackhmmer - 163 2.1e-57 197.3 0.0 1 2 7.5e-28 3.6e-25 // 92.4921493530273
272 * 0.00 65 161 20 122 17 124 0.95 Tyrosine-protein // phosphatase non-receptor
275 private static String[] _hmmsearchcols = new String[] { "acc", "name", "" };
277 private void readJackhmmerScores(AlignmentI searchResult,
278 FileParse jsonsource) throws IOException, OutOfMemoryError
280 HmmerJSONProcessor hjp = new HmmerJSONProcessor(searchResult);
281 hjp.parseFrom(jsonsource);
283 // http://www.ebi.ac.uk/Tools/hmmer/download/60048B38-7CEC-11E5-A230-CED6D26C98AD.5/score?format=csv
284 // 1gri_A 1gri_A 217 jackhmmer - 163 4.7e-62 212.4 0.1 1 2 4.4e-46 2.1e-43
285 // 151.758316040039 0.04 11 151 3 139 1 150 0.94 GROWTH FACTOR BOUND PROTEIN
287 // 1gri_A 1gri_A 217 jackhmmer - 163 4.7e-62 212.4 0.1 2 2 1.6e-17 7.9e-15
288 // 58.8796501159668 0.01 7 66 157 215 153 216 0.95 GROWTH FACTOR BOUND
289 // PROTEIN 2 1cj1_J 1gri_B
290 // 4h1o_A 4h1o_A 560 jackhmmer - 163 2.1e-57 197.3 0.0 1 2 7.5e-28 3.6e-25
291 // 92.4921493530273 0.00 65 161 20 122 17 124 0.95 Tyrosine-protein
292 // phosphatase non-receptor typ 4h1o_A
293 // each line scores a fragment
294 // so for a combined score ?
297 * for a sequence q sort any t against q according to overallScore(q,t)
298 * maxFragment(q,t) in sequence features parlance: for alignment
299 * s.getFeature("overallScore",q) -> range on q and range on s
304 // 151.758316040039 0.04 11 151 3 139 1 150 0.94 GROWTH FACTOR BOUND PROTEIN
306 // 1gri_A 1gri_A 217 jackhmmer - 163 4.7e-62 212.4 0.1 2 2 1.6e-17 7.9e-15
307 // 58.8796501159668 0.01 7 66 157 215 153 216 0.95 GROWTH FACTOR BOUND
308 // PROTEIN 2 1cj1_J 1gri_B
309 // 4h1o_A 4h1o_A 560 jackhmmer - 163 2.1e-57 197.3 0.0 1 2 7.5e-28 3.6e-25
310 // 92.4921493530273 0.00 65 161 20 122 17 124 0.95 Tyrosine-protein
311 // phosphatase non-receptor typ 4h1o_A
313 // 4h1o_A 4h1o_A 560 jackhmmer - 163 2.1e-57 197.3 0.0 2 2 7.6e-31 3.7e-28
314 // 102.219146728516 0.03 66 161 127 236 124 238 0.94 Tyrosine-protein
315 // phosphatase non-receptor typ 4h1o_A