1 package jalview.ws.ebi;
3 import jalview.datamodel.AlignmentI;
4 import jalview.io.AppletFormatAdapter;
5 import jalview.io.FileParse;
6 import jalview.io.FormatAdapter;
9 import java.io.IOException;
10 import java.util.regex.Matcher;
12 import org.apache.axis.transport.http.HTTPConstants;
13 import org.apache.http.Header;
14 import org.apache.http.HttpResponse;
15 import org.apache.http.client.methods.HttpGet;
16 import org.apache.http.client.methods.HttpPost;
17 import org.apache.http.entity.StringEntity;
18 import org.apache.http.impl.client.DefaultHttpClient;
19 import org.apache.http.util.EntityUtils;
20 import org.json.JSONArray;
21 import org.json.JSONObject;
23 import compbio.util.FileUtil;
25 public class hmmerClient
30 static String baseUrl = "http://www.ebi.ac.uk/Tools/hmmer",
31 jackH = "/search/jackhmmer", phmmer = "/search/phmmer",
32 hmmscan = "/search/hmmscan", hmmsearch = "/search/hmmsearch";
34 static String edseq = ">2abl_A mol:protein length:163 ABL TYROSINE KINASE\nMGPSENDPNLFVALYDFVASGDNTLSITKGEKLRVLGYNHNGEWCEAQTKNGQGWVPSNYITPVNSLEKHS\nWYHGPVSRNAAEYLLSSGINGSFLVRESESSPGQRSISLRYEGRVYHYRINTASDGKLYVSSESRFNTLAE\nLVHHHSTVADGLITTLHYPAP";
36 public static void main(String[] args)
43 instr = FileUtil.readFileToString(new File(args[0]));
50 String res = new hmmerClient().submitJackhmmerSearch(instr,
51 "jackhmmer", "pdb", 5);
54 throw new Error("Failed.");
56 System.out.println("Result\n" + res);
63 * - fasta or other formatted sequence or alignment
67 * - pdb, uniprot, etc.
69 * number of iterations
72 String submitJackhmmerSearch(String input, String algo, String db,
75 JSONObject inparam = new JSONObject();
76 HttpPost jackhp = new HttpPost(baseUrl + jackH);
77 String lastiter = null;
80 inparam.put("algo", algo);
81 inparam.put("seq", input);
82 inparam.put("seqdb", db);
83 inparam.put("iterations", niter);
84 // #Now POST the request and generate the search job.
85 // dumb json post service
86 jackhp.setHeader("content-type", "application/json");
87 jackhp.setEntity(new StringEntity(inparam.toString()));
93 HttpResponse r = null;
96 DefaultHttpClient httpCl = new DefaultHttpClient();
98 r = httpCl.execute(jackhp);
100 } catch (Exception x)
102 System.err.println("Submit failed.");
105 if (r.getStatusLine().getStatusCode() != 201)
107 throw new Error(r.toString());
110 String jobid = null, redir = null;
113 JSONObject res = new JSONObject(EntityUtils.toString(r.getEntity()));
114 jobid = res.getString("job_id");
117 if ((loc = r.getHeaders(HTTPConstants.HEADER_LOCATION)) != null
123 .println("Ignoring additional "
125 + " location(s) provided in response header ( next one is '"
126 + loc[1].getValue() + "' )");
128 redir = loc[0].getValue();
130 } catch (Exception x)
132 System.err.println("job id extraction failed.");
136 boolean finished = false;
137 JSONObject jobstate = null;
142 DefaultHttpClient httpCl = new DefaultHttpClient();
144 HttpGet jackcheck = new HttpGet(redir);
145 jackcheck.setHeader("content-type", "application/json");
146 r = httpCl.execute(jackcheck);
147 switch (r.getStatusLine().getStatusCode())
150 jobstate = new JSONObject(EntityUtils.toString(r.getEntity()));
151 String st = jobstate.getString("status");
152 if ("DONE".equals(st))
156 if ("ERROR".equals(st))
158 System.err.println("Error");
161 if ("PEND".equals(st) || "RUN".equals("st"))
163 JSONArray iters = jobstate.getJSONArray("result");
164 lastiter = iters.getJSONObject(iters.length() - 1).getString(
166 if (lastiter.length() > 0)
168 java.util.regex.Pattern p = java.util.regex.Pattern
169 .compile(".+(\\d+)");
170 Matcher m = p.matcher(lastiter);
173 System.out.println("On iteration " + m.group(1));
183 } catch (Exception q)
188 } while (!finished && tries < 50);
192 System.err.println("Giving up with job " + jobid + " at " + redir);
196 // http://www.ebi.ac.uk/Tools/hmmer/download/60048B38-7CEC-11E5-A230-CED6D26C98AD.5/score?format=csv
197 // 1gri_A 1gri_A 217 jackhmmer - 163 4.7e-62 212.4 0.1 1 2 4.4e-46 2.1e-43
198 // 151.758316040039 0.04 11 151 3 139 1 150 0.94 GROWTH FACTOR BOUND PROTEIN
200 // 1gri_A 1gri_A 217 jackhmmer - 163 4.7e-62 212.4 0.1 2 2 1.6e-17 7.9e-15
201 // 58.8796501159668 0.01 7 66 157 215 153 216 0.95 GROWTH FACTOR BOUND
202 // PROTEIN 2 1cj1_J 1gri_B
203 // 4h1o_A 4h1o_A 560 jackhmmer - 163 2.1e-57 197.3 0.0 1 2 7.5e-28 3.6e-25
204 // 92.4921493530273 0.00 65 161 20 122 17 124 0.95 Tyrosine-protein
205 // phosphatase non-receptor typ 4h1o_A
207 // 4h1o_A 4h1o_A 560 jackhmmer - 163 2.1e-57 197.3 0.0 2 2 7.6e-31 3.7e-28
208 // 102.219146728516 0.03 66 161 127 236 124 238 0.94 Tyrosine-protein
209 // phosphatase non-receptor typ 4h1o_A
211 // $ua->get( $rootUrl."/results/".$lastIteration->{uuid} . "/score"
214 * * #Job should have finished, but we may have converged, so get the last
215 * job. my $results = $json->decode( $response->content ); my $lastIteration
216 * = pop( @{ $results->{result} } ); #Now fetch the results of the last
217 * iteration my $searchResult = $ua->get( $rootUrl."/results/" .
218 * $lastIteration->{uuid} . "/score", 'Accept' => 'application/json' );
219 * unless( $searchResult->status_line eq "200 OK"){ die
220 * "Failed to get search results\n"; }
222 * #Decode the content of the full set of results $results = $json->decode(
223 * $searchResult->content ); print
224 * "Matched ".$results->{'results'}->{'stats'}->{'nincluded'}." sequences
225 * ($lastIteration->{uuid})!\n"; #Now do something more interesting with the
231 * retrieve an alignment annotated with scores from JackHmmer
237 AlignmentI retrieveJackhmmerResult(String jobid, AlignmentI dataset)
238 throws OutOfMemoryError, IOException
240 AlignmentI searchResult = null;
244 searchResult = new AppletFormatAdapter().readFile(baseUrl
245 + "/download/" + jobid + "/score?format=afa&t=.gz",
246 FormatAdapter.URL, "FASTA");
248 // TODO extract gapped columns as '.' - inserts to query profile
250 // TODO match up jackhammer results to dataset.
253 FileParse jsonsource = new FileParse(baseUrl + "/download/" + jobid
254 + "/score?format=json", FormatAdapter.URL);
255 if (!jsonsource.isValid())
257 throw new IOException("Couldn't access scores for Jackhammer results");
259 readJackhmmerScores(searchResult, jsonsource);
264 * // 1gri_A 1gri_A 217 jackhmmer - 163 4.7e-62 212.4 0.1 1 2 4.4e-46 2.1e-43
266 * // 151.758316040039 0.04 11 151 3 139 1 150 0.94 GROWTH FACTOR BOUND
267 * PROTEIN // 2 1cj1_J 1gri_B // 1gri_A 1gri_A 217 jackhmmer - 163 4.7e-62
268 * 212.4 0.1 2 2 1.6e-17 7.9e-15 // 58.8796501159668 0.01 7 66 157 215 153 216
269 * 0.95 GROWTH FACTOR BOUND // PROTEIN 2 1cj1_J 1gri_B // 4h1o_A 4h1o_A 560
270 * jackhmmer - 163 2.1e-57 197.3 0.0 1 2 7.5e-28 3.6e-25 // 92.4921493530273
271 * 0.00 65 161 20 122 17 124 0.95 Tyrosine-protein // phosphatase non-receptor
274 private static String[] _hmmsearchcols = new String[] { "acc", "name", "" };
276 private void readJackhmmerScores(AlignmentI searchResult,
277 FileParse jsonsource) throws IOException, OutOfMemoryError
279 HmmerJSONProcessor hjp = new HmmerJSONProcessor(searchResult);
280 hjp.parseFrom(jsonsource);
282 // http://www.ebi.ac.uk/Tools/hmmer/download/60048B38-7CEC-11E5-A230-CED6D26C98AD.5/score?format=csv
283 // 1gri_A 1gri_A 217 jackhmmer - 163 4.7e-62 212.4 0.1 1 2 4.4e-46 2.1e-43
284 // 151.758316040039 0.04 11 151 3 139 1 150 0.94 GROWTH FACTOR BOUND PROTEIN
286 // 1gri_A 1gri_A 217 jackhmmer - 163 4.7e-62 212.4 0.1 2 2 1.6e-17 7.9e-15
287 // 58.8796501159668 0.01 7 66 157 215 153 216 0.95 GROWTH FACTOR BOUND
288 // PROTEIN 2 1cj1_J 1gri_B
289 // 4h1o_A 4h1o_A 560 jackhmmer - 163 2.1e-57 197.3 0.0 1 2 7.5e-28 3.6e-25
290 // 92.4921493530273 0.00 65 161 20 122 17 124 0.95 Tyrosine-protein
291 // phosphatase non-receptor typ 4h1o_A
292 // each line scores a fragment
293 // so for a combined score ?
296 * for a sequence q sort any t against q according to overallScore(q,t)
297 * maxFragment(q,t) in sequence features parlance: for alignment
298 * s.getFeature("overallScore",q) -> range on q and range on s
303 // 151.758316040039 0.04 11 151 3 139 1 150 0.94 GROWTH FACTOR BOUND PROTEIN
305 // 1gri_A 1gri_A 217 jackhmmer - 163 4.7e-62 212.4 0.1 2 2 1.6e-17 7.9e-15
306 // 58.8796501159668 0.01 7 66 157 215 153 216 0.95 GROWTH FACTOR BOUND
307 // PROTEIN 2 1cj1_J 1gri_B
308 // 4h1o_A 4h1o_A 560 jackhmmer - 163 2.1e-57 197.3 0.0 1 2 7.5e-28 3.6e-25
309 // 92.4921493530273 0.00 65 161 20 122 17 124 0.95 Tyrosine-protein
310 // phosphatase non-receptor typ 4h1o_A
312 // 4h1o_A 4h1o_A 560 jackhmmer - 163 2.1e-57 197.3 0.0 2 2 7.6e-31 3.7e-28
313 // 102.219146728516 0.03 66 161 127 236 124 238 0.94 Tyrosine-protein
314 // phosphatase non-receptor typ 4h1o_A