package jalview.ws.ebi; import jalview.datamodel.AlignmentI; import jalview.io.AppletFormatAdapter; import jalview.io.DataSourceType; import jalview.io.FileFormat; import jalview.io.FileParse; import java.io.File; import java.io.IOException; import java.util.regex.Matcher; import org.apache.axis.transport.http.HTTPConstants; import org.apache.http.Header; import org.apache.http.HttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpPost; import org.apache.http.entity.StringEntity; import org.apache.http.impl.client.DefaultHttpClient; import org.apache.http.util.EntityUtils; import org.json.JSONArray; import org.json.JSONObject; import compbio.util.FileUtil; public class hmmerClient { /** * URLs for ebi api */ static String baseUrl = "http://www.ebi.ac.uk/Tools/hmmer", jackH = "/search/jackhmmer", phmmer = "/search/phmmer", hmmscan = "/search/hmmscan", hmmsearch = "/search/hmmsearch"; static String edseq = ">2abl_A mol:protein length:163 ABL TYROSINE KINASE\nMGPSENDPNLFVALYDFVASGDNTLSITKGEKLRVLGYNHNGEWCEAQTKNGQGWVPSNYITPVNSLEKHS\nWYHGPVSRNAAEYLLSSGINGSFLVRESESSPGQRSISLRYEGRVYHYRINTASDGKLYVSSESRFNTLAE\nLVHHHSTVADGLITTLHYPAP"; public static void main(String[] args) { String instr = edseq; if (args.length > 0) { try { instr = FileUtil.readFileToString(new File(args[0])); } catch (Exception f) { f.printStackTrace(); return; } } String res = new hmmerClient().submitJackhmmerSearch(instr, "jackhmmer", "pdb", 5); if (res == null) { throw new Error("Failed."); } System.out.println("Result\n" + res); return; } /** * * @param input * - fasta or other formatted sequence or alignment * @param algo * - jackhmmer * @param db * - pdb, uniprot, etc. * @param niter * number of iterations * @return job id */ String submitJackhmmerSearch(String input, String algo, String db, int niter) { JSONObject inparam = new JSONObject(); HttpPost jackhp = new HttpPost(baseUrl + jackH); String lastiter = null; try { inparam.put("algo", algo); inparam.put("seq", input); inparam.put("seqdb", db); inparam.put("iterations", niter); // #Now POST the request and generate the search job. // dumb json post service jackhp.setHeader("content-type", "application/json"); jackhp.setEntity(new StringEntity(inparam.toString())); } catch (Exception f) { f.printStackTrace(); return null; } HttpResponse r = null; try { DefaultHttpClient httpCl = new DefaultHttpClient(); r = httpCl.execute(jackhp); } catch (Exception x) { System.err.println("Submit failed."); x.printStackTrace(); } if (r.getStatusLine().getStatusCode() != 201) { throw new Error(r.toString()); } // get uid for job String jobid = null, redir = null; try { JSONObject res = new JSONObject(EntityUtils.toString(r.getEntity())); jobid = res.getString("job_id"); Header[] loc; if ((loc = r.getHeaders(HTTPConstants.HEADER_LOCATION)) != null && loc.length > 0) { if (loc.length > 1) { System.err .println("Ignoring additional " + (loc.length - 1) + " location(s) provided in response header ( next one is '" + loc[1].getValue() + "' )"); } redir = loc[0].getValue(); } } catch (Exception x) { System.err.println("job id extraction failed."); x.printStackTrace(); } int tries = 0; boolean finished = false; JSONObject jobstate = null; do { try { DefaultHttpClient httpCl = new DefaultHttpClient(); HttpGet jackcheck = new HttpGet(redir); jackcheck.setHeader("content-type", "application/json"); r = httpCl.execute(jackcheck); switch (r.getStatusLine().getStatusCode()) { case 200: jobstate = new JSONObject(EntityUtils.toString(r.getEntity())); String st = jobstate.getString("status"); if ("DONE".equals(st)) { finished = true; } if ("ERROR".equals(st)) { System.err.println("Error"); finished = true; } if ("PEND".equals(st) || "RUN".equals("st")) { JSONArray iters = jobstate.getJSONArray("result"); lastiter = iters.getJSONObject(iters.length() - 1).getString( "uuid"); if (lastiter.length() > 0) { java.util.regex.Pattern p = java.util.regex.Pattern .compile(".+(\\d+)"); Matcher m = p.matcher(lastiter); if (m.matches()) { System.out.println("On iteration " + m.group(1)); } } } break; default: tries++; Thread.sleep(2000); } } catch (Exception q) { q.printStackTrace(); return null; } } while (!finished && tries < 50); if (!finished) { System.err.println("Giving up with job " + jobid + " at " + redir); return null; } // get results // http://www.ebi.ac.uk/Tools/hmmer/download/60048B38-7CEC-11E5-A230-CED6D26C98AD.5/score?format=csv // 1gri_A 1gri_A 217 jackhmmer - 163 4.7e-62 212.4 0.1 1 2 4.4e-46 2.1e-43 // 151.758316040039 0.04 11 151 3 139 1 150 0.94 GROWTH FACTOR BOUND PROTEIN // 2 1cj1_J 1gri_B // 1gri_A 1gri_A 217 jackhmmer - 163 4.7e-62 212.4 0.1 2 2 1.6e-17 7.9e-15 // 58.8796501159668 0.01 7 66 157 215 153 216 0.95 GROWTH FACTOR BOUND // PROTEIN 2 1cj1_J 1gri_B // 4h1o_A 4h1o_A 560 jackhmmer - 163 2.1e-57 197.3 0.0 1 2 7.5e-28 3.6e-25 // 92.4921493530273 0.00 65 161 20 122 17 124 0.95 Tyrosine-protein // phosphatase non-receptor typ 4h1o_A // // 4h1o_A 4h1o_A 560 jackhmmer - 163 2.1e-57 197.3 0.0 2 2 7.6e-31 3.7e-28 // 102.219146728516 0.03 66 161 127 236 124 238 0.94 Tyrosine-protein // phosphatase non-receptor typ 4h1o_A // // $ua->get( $rootUrl."/results/".$lastIteration->{uuid} . "/score" return lastiter; /* * * #Job should have finished, but we may have converged, so get the last * job. my $results = $json->decode( $response->content ); my $lastIteration * = pop( @{ $results->{result} } ); #Now fetch the results of the last * iteration my $searchResult = $ua->get( $rootUrl."/results/" . * $lastIteration->{uuid} . "/score", 'Accept' => 'application/json' ); * unless( $searchResult->status_line eq "200 OK"){ die * "Failed to get search results\n"; } * * #Decode the content of the full set of results $results = $json->decode( * $searchResult->content ); print * "Matched ".$results->{'results'}->{'stats'}->{'nincluded'}." sequences * ($lastIteration->{uuid})!\n"; #Now do something more interesting with the * results...... */ } /** * retrieve an alignment annotated with scores from JackHmmer * * @param jobid * @param dataset * @return */ AlignmentI retrieveJackhmmerResult(String jobid, AlignmentI dataset) throws OutOfMemoryError, IOException { AlignmentI searchResult = null; // get results searchResult = new AppletFormatAdapter().readFile(baseUrl + "/download/" + jobid + "/score?format=afa&t=.gz", DataSourceType.URL, FileFormat.Fasta); // TODO extract gapped columns as '.' - inserts to query profile // TODO match up jackhammer results to dataset. // do scores FileParse jsonsource = new FileParse(baseUrl + "/download/" + jobid + "/score?format=json", DataSourceType.URL); if (!jsonsource.isValid()) { throw new IOException("Couldn't access scores for Jackhammer results"); } readJackhmmerScores(searchResult, jsonsource); return searchResult; } /** * // 1gri_A 1gri_A 217 jackhmmer - 163 4.7e-62 212.4 0.1 1 2 4.4e-46 2.1e-43 * * // 151.758316040039 0.04 11 151 3 139 1 150 0.94 GROWTH FACTOR BOUND * PROTEIN // 2 1cj1_J 1gri_B // 1gri_A 1gri_A 217 jackhmmer - 163 4.7e-62 * 212.4 0.1 2 2 1.6e-17 7.9e-15 // 58.8796501159668 0.01 7 66 157 215 153 216 * 0.95 GROWTH FACTOR BOUND // PROTEIN 2 1cj1_J 1gri_B // 4h1o_A 4h1o_A 560 * jackhmmer - 163 2.1e-57 197.3 0.0 1 2 7.5e-28 3.6e-25 // 92.4921493530273 * 0.00 65 161 20 122 17 124 0.95 Tyrosine-protein // phosphatase non-receptor * typ 4h1o_A */ private static String[] _hmmsearchcols = new String[] { "acc", "name", "" }; private void readJackhmmerScores(AlignmentI searchResult, FileParse jsonsource) throws IOException, OutOfMemoryError { HmmerJSONProcessor hjp = new HmmerJSONProcessor(searchResult); hjp.parseFrom(jsonsource); // http://www.ebi.ac.uk/Tools/hmmer/download/60048B38-7CEC-11E5-A230-CED6D26C98AD.5/score?format=csv // 1gri_A 1gri_A 217 jackhmmer - 163 4.7e-62 212.4 0.1 1 2 4.4e-46 2.1e-43 // 151.758316040039 0.04 11 151 3 139 1 150 0.94 GROWTH FACTOR BOUND PROTEIN // 2 1cj1_J 1gri_B // 1gri_A 1gri_A 217 jackhmmer - 163 4.7e-62 212.4 0.1 2 2 1.6e-17 7.9e-15 // 58.8796501159668 0.01 7 66 157 215 153 216 0.95 GROWTH FACTOR BOUND // PROTEIN 2 1cj1_J 1gri_B // 4h1o_A 4h1o_A 560 jackhmmer - 163 2.1e-57 197.3 0.0 1 2 7.5e-28 3.6e-25 // 92.4921493530273 0.00 65 161 20 122 17 124 0.95 Tyrosine-protein // phosphatase non-receptor typ 4h1o_A // each line scores a fragment // so for a combined score ? /** * for a sequence q sort any t against q according to overallScore(q,t) * maxFragment(q,t) in sequence features parlance: for alignment * s.getFeature("overallScore",q) -> range on q and range on s * * */ // 151.758316040039 0.04 11 151 3 139 1 150 0.94 GROWTH FACTOR BOUND PROTEIN // 2 1cj1_J 1gri_B // 1gri_A 1gri_A 217 jackhmmer - 163 4.7e-62 212.4 0.1 2 2 1.6e-17 7.9e-15 // 58.8796501159668 0.01 7 66 157 215 153 216 0.95 GROWTH FACTOR BOUND // PROTEIN 2 1cj1_J 1gri_B // 4h1o_A 4h1o_A 560 jackhmmer - 163 2.1e-57 197.3 0.0 1 2 7.5e-28 3.6e-25 // 92.4921493530273 0.00 65 161 20 122 17 124 0.95 Tyrosine-protein // phosphatase non-receptor typ 4h1o_A // // 4h1o_A 4h1o_A 560 jackhmmer - 163 2.1e-57 197.3 0.0 2 2 7.6e-31 3.7e-28 // 102.219146728516 0.03 66 161 127 236 124 238 0.94 Tyrosine-protein // phosphatase non-receptor typ 4h1o_A } }