From 53bf23b889e1a3d0a51bdbcba18c6d9ac5b2578b Mon Sep 17 00:00:00 2001 From: Jim Procter Date: Wed, 28 Oct 2015 11:38:41 +0000 Subject: [PATCH] JAL-1950 extremely rough prototype submission/poll routine based on the Jackhammer submission/poll/retrieve perl script --- src/jalview/ws/ebi/hmmerClient.java | 222 +++++++++++++++++++++++++++++++++++ 1 file changed, 222 insertions(+) create mode 100644 src/jalview/ws/ebi/hmmerClient.java diff --git a/src/jalview/ws/ebi/hmmerClient.java b/src/jalview/ws/ebi/hmmerClient.java new file mode 100644 index 0000000..3db35be --- /dev/null +++ b/src/jalview/ws/ebi/hmmerClient.java @@ -0,0 +1,222 @@ +package jalview.ws.ebi; + +import java.io.File; +import java.util.regex.Matcher; + +import org.apache.axis.transport.http.HTTPConstants; +import org.apache.http.Header; +import org.apache.http.HttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.entity.StringEntity; +import org.apache.http.impl.client.DefaultHttpClient; +import org.apache.http.util.EntityUtils; +import org.json.JSONArray; +import org.json.JSONObject; + +import compbio.util.FileUtil; + +public class hmmerClient +{ + /** + * URLs for ebi api + */ + static String baseUrl = "http://www.ebi.ac.uk/Tools/hmmer", + jackH = "/search/jackhmmer", phmmer = "/search/phmmer", + hmmscan = "/search/hmmscan", hmmsearch = "/search/hmmsearch"; + + static String edseq = ">2abl_A mol:protein length:163 ABL TYROSINE KINASE\nMGPSENDPNLFVALYDFVASGDNTLSITKGEKLRVLGYNHNGEWCEAQTKNGQGWVPSNYITPVNSLEKHS\nWYHGPVSRNAAEYLLSSGINGSFLVRESESSPGQRSISLRYEGRVYHYRINTASDGKLYVSSESRFNTLAE\nLVHHHSTVADGLITTLHYPAP"; + + public static void main(String[] args) + { + String instr = edseq; + if (args.length > 0) + { + try + { + instr = FileUtil.readFileToString(new File(args[0])); + } catch (Exception f) + { + f.printStackTrace(); + return; + } + } + String res = new hmmerClient().submitJackhmmerSearch(instr, + "jackhmmer", "pdb", 5); + if (res == null) + { + throw new Error("Failed."); + } + System.out.println("Result\n" + res); + return; + } + /** + * + * @param input + * - fasta or other formatted sequence or alignment + * @param algo + * - jackhmmer + * @param db + * - pdb, uniprot, etc. + * @param niter + * number of iterations + * @return job id + */ + String submitJackhmmerSearch(String input, String algo, String db, + int niter) + { + JSONObject inparam = new JSONObject(); + HttpPost jackhp = new HttpPost(baseUrl + jackH); + String lastiter = null; + try + { + inparam.put("algo", algo); + inparam.put("seq", input); + inparam.put("seqdb", db); + inparam.put("iterations", niter); + // #Now POST the request and generate the search job. + // dumb json post service + jackhp.setHeader("content-type", "application/json"); + jackhp.setEntity(new StringEntity(inparam.toString())); + } catch (Exception f) + { + f.printStackTrace(); + return null; + } + HttpResponse r = null; + try + { + DefaultHttpClient httpCl = new DefaultHttpClient(); + + r = httpCl.execute(jackhp); + + } catch (Exception x) + { + System.err.println("Submit failed."); + x.printStackTrace(); + } + if (r.getStatusLine().getStatusCode() != 201) + { + throw new Error(r.toString()); + } + // get uid for job + String jobid = null, redir = null; + try + { + JSONObject res = new JSONObject(EntityUtils.toString(r.getEntity())); + jobid = res.getString("job_id"); + + Header[] loc; + if ((loc = r.getHeaders(HTTPConstants.HEADER_LOCATION)) != null + && loc.length > 0) + { + if (loc.length > 1) + { + System.err + .println("Ignoring additional " + + (loc.length - 1) + + " location(s) provided in response header ( next one is '" + + loc[1].getValue() + "' )"); + } + redir = loc[0].getValue(); + } + } catch (Exception x) + { + System.err.println("job id extraction failed."); + x.printStackTrace(); + } + int tries = 0; + boolean finished = false; + JSONObject jobstate = null; + do + { + try + { + DefaultHttpClient httpCl = new DefaultHttpClient(); + + HttpGet jackcheck = new HttpGet(redir); + jackcheck.setHeader("content-type", "application/json"); + r = httpCl.execute(jackcheck); + switch (r.getStatusLine().getStatusCode()) + { + case 200: + jobstate = new JSONObject(EntityUtils.toString(r.getEntity())); + String st = jobstate.getString("status"); + if ("DONE".equals(st)) + { + finished = true; + } + if ("ERROR".equals(st)) + { + System.err.println("Error"); + finished = true; + } + if ("PEND".equals(st) || "RUN".equals("st")) + { + JSONArray iters = jobstate.getJSONArray("result"); + lastiter = iters.getJSONObject(iters.length() - 1) + .getString("uuid"); + if (lastiter.length() > 0) + { + java.util.regex.Pattern p = java.util.regex.Pattern + .compile(".+(\\d+)"); + Matcher m = p.matcher(lastiter); + if (m.matches()) + { + System.out.println("On iteration " + m.group(1)); + } + } + } + break; + + default: + tries++; + Thread.sleep(2000); + } + } catch (Exception q) + { + q.printStackTrace(); + return null; + } + } while (!finished && tries < 50); + + if (!finished) + { + System.err.println("Giving up with job " + jobid + " at " + redir); + return null; + } + // get results + // http://www.ebi.ac.uk/Tools/hmmer/download/60048B38-7CEC-11E5-A230-CED6D26C98AD.5/score?format=csv + // 1gri_A 1gri_A 217 jackhmmer - 163 4.7e-62 212.4 0.1 1 2 4.4e-46 2.1e-43 + // 151.758316040039 0.04 11 151 3 139 1 150 0.94 GROWTH FACTOR BOUND PROTEIN + // 2 1cj1_J 1gri_B + // 1gri_A 1gri_A 217 jackhmmer - 163 4.7e-62 212.4 0.1 2 2 1.6e-17 7.9e-15 + // 58.8796501159668 0.01 7 66 157 215 153 216 0.95 GROWTH FACTOR BOUND + // PROTEIN 2 1cj1_J 1gri_B + // 4h1o_A 4h1o_A 560 jackhmmer - 163 2.1e-57 197.3 0.0 1 2 7.5e-28 3.6e-25 + // 92.4921493530273 0.00 65 161 20 122 17 124 0.95 Tyrosine-protein + // phosphatase non-receptor typ 4h1o_A + // + // 4h1o_A 4h1o_A 560 jackhmmer - 163 2.1e-57 197.3 0.0 2 2 7.6e-31 3.7e-28 + // 102.219146728516 0.03 66 161 127 236 124 238 0.94 Tyrosine-protein + // phosphatase non-receptor typ 4h1o_A + // + // $ua->get( $rootUrl."/results/".$lastIteration->{uuid} . "/score" + return lastiter; + /* + * * #Job should have finished, but we may have converged, so get the last + * job. my $results = $json->decode( $response->content ); my $lastIteration + * = pop( @{ $results->{result} } ); #Now fetch the results of the last + * iteration my $searchResult = $ua->get( $rootUrl."/results/" . + * $lastIteration->{uuid} . "/score", 'Accept' => 'application/json' ); + * unless( $searchResult->status_line eq "200 OK"){ die + * "Failed to get search results\n"; } + * + * #Decode the content of the full set of results $results = $json->decode( + * $searchResult->content ); print + * "Matched ".$results->{'results'}->{'stats'}->{'nincluded'}." sequences + * ($lastIteration->{uuid})!\n"; #Now do something more interesting with the + * results...... + */ + } +} -- 1.7.10.2