--- /dev/null
+package jalview.ws.ebi;
+
+import jalview.datamodel.AlignmentI;
+import jalview.io.AppletFormatAdapter;
+import jalview.io.DataSourceType;
+import jalview.io.FileFormat;
+import jalview.io.FileParse;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.regex.Matcher;
+
+import org.apache.axis.transport.http.HTTPConstants;
+import org.apache.http.Header;
+import org.apache.http.HttpResponse;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.client.methods.HttpPost;
+import org.apache.http.entity.StringEntity;
+import org.apache.http.impl.client.DefaultHttpClient;
+import org.apache.http.util.EntityUtils;
+import org.json.JSONArray;
+import org.json.JSONObject;
+
+import compbio.util.FileUtil;
+
+public class hmmerClient
+{
+ /**
+ * URLs for ebi api
+ */
+ static String baseUrl = "http://www.ebi.ac.uk/Tools/hmmer",
+ jackH = "/search/jackhmmer", phmmer = "/search/phmmer",
+ hmmscan = "/search/hmmscan", hmmsearch = "/search/hmmsearch";
+
+ static String edseq = ">2abl_A mol:protein length:163 ABL TYROSINE KINASE\nMGPSENDPNLFVALYDFVASGDNTLSITKGEKLRVLGYNHNGEWCEAQTKNGQGWVPSNYITPVNSLEKHS\nWYHGPVSRNAAEYLLSSGINGSFLVRESESSPGQRSISLRYEGRVYHYRINTASDGKLYVSSESRFNTLAE\nLVHHHSTVADGLITTLHYPAP";
+
+ public static void main(String[] args)
+ {
+ String instr = edseq;
+ if (args.length > 0)
+ {
+ try
+ {
+ instr = FileUtil.readFileToString(new File(args[0]));
+ } catch (Exception f)
+ {
+ f.printStackTrace();
+ return;
+ }
+ }
+ String res = new hmmerClient().submitJackhmmerSearch(instr,
+ "jackhmmer", "pdb", 5);
+ if (res == null)
+ {
+ throw new Error("Failed.");
+ }
+ System.out.println("Result\n" + res);
+ return;
+ }
+
+ /**
+ *
+ * @param input
+ * - fasta or other formatted sequence or alignment
+ * @param algo
+ * - jackhmmer
+ * @param db
+ * - pdb, uniprot, etc.
+ * @param niter
+ * number of iterations
+ * @return job id
+ */
+ String submitJackhmmerSearch(String input, String algo, String db,
+ int niter)
+ {
+ JSONObject inparam = new JSONObject();
+ HttpPost jackhp = new HttpPost(baseUrl + jackH);
+ String lastiter = null;
+ try
+ {
+ inparam.put("algo", algo);
+ inparam.put("seq", input);
+ inparam.put("seqdb", db);
+ inparam.put("iterations", niter);
+ // #Now POST the request and generate the search job.
+ // dumb json post service
+ jackhp.setHeader("content-type", "application/json");
+ jackhp.setEntity(new StringEntity(inparam.toString()));
+ } catch (Exception f)
+ {
+ f.printStackTrace();
+ return null;
+ }
+ HttpResponse r = null;
+ try
+ {
+ DefaultHttpClient httpCl = new DefaultHttpClient();
+
+ r = httpCl.execute(jackhp);
+
+ } catch (Exception x)
+ {
+ System.err.println("Submit failed.");
+ x.printStackTrace();
+ }
+ if (r.getStatusLine().getStatusCode() != 201)
+ {
+ throw new Error(r.toString());
+ }
+ // get uid for job
+ String jobid = null, redir = null;
+ try
+ {
+ JSONObject res = new JSONObject(EntityUtils.toString(r.getEntity()));
+ jobid = res.getString("job_id");
+
+ Header[] loc;
+ if ((loc = r.getHeaders(HTTPConstants.HEADER_LOCATION)) != null
+ && loc.length > 0)
+ {
+ if (loc.length > 1)
+ {
+ System.err
+ .println("Ignoring additional "
+ + (loc.length - 1)
+ + " location(s) provided in response header ( next one is '"
+ + loc[1].getValue() + "' )");
+ }
+ redir = loc[0].getValue();
+ }
+ } catch (Exception x)
+ {
+ System.err.println("job id extraction failed.");
+ x.printStackTrace();
+ }
+ int tries = 0;
+ boolean finished = false;
+ JSONObject jobstate = null;
+ do
+ {
+ try
+ {
+ DefaultHttpClient httpCl = new DefaultHttpClient();
+
+ HttpGet jackcheck = new HttpGet(redir);
+ jackcheck.setHeader("content-type", "application/json");
+ r = httpCl.execute(jackcheck);
+ switch (r.getStatusLine().getStatusCode())
+ {
+ case 200:
+ jobstate = new JSONObject(EntityUtils.toString(r.getEntity()));
+ String st = jobstate.getString("status");
+ if ("DONE".equals(st))
+ {
+ finished = true;
+ }
+ if ("ERROR".equals(st))
+ {
+ System.err.println("Error");
+ finished = true;
+ }
+ if ("PEND".equals(st) || "RUN".equals("st"))
+ {
+ JSONArray iters = jobstate.getJSONArray("result");
+ lastiter = iters.getJSONObject(iters.length() - 1).getString(
+ "uuid");
+ if (lastiter.length() > 0)
+ {
+ java.util.regex.Pattern p = java.util.regex.Pattern
+ .compile(".+(\\d+)");
+ Matcher m = p.matcher(lastiter);
+ if (m.matches())
+ {
+ System.out.println("On iteration " + m.group(1));
+ }
+ }
+ }
+ break;
+
+ default:
+ tries++;
+ Thread.sleep(2000);
+ }
+ } catch (Exception q)
+ {
+ q.printStackTrace();
+ return null;
+ }
+ } while (!finished && tries < 50);
+
+ if (!finished)
+ {
+ System.err.println("Giving up with job " + jobid + " at " + redir);
+ return null;
+ }
+ // get results
+ // http://www.ebi.ac.uk/Tools/hmmer/download/60048B38-7CEC-11E5-A230-CED6D26C98AD.5/score?format=csv
+ // 1gri_A 1gri_A 217 jackhmmer - 163 4.7e-62 212.4 0.1 1 2 4.4e-46 2.1e-43
+ // 151.758316040039 0.04 11 151 3 139 1 150 0.94 GROWTH FACTOR BOUND PROTEIN
+ // 2 1cj1_J 1gri_B
+ // 1gri_A 1gri_A 217 jackhmmer - 163 4.7e-62 212.4 0.1 2 2 1.6e-17 7.9e-15
+ // 58.8796501159668 0.01 7 66 157 215 153 216 0.95 GROWTH FACTOR BOUND
+ // PROTEIN 2 1cj1_J 1gri_B
+ // 4h1o_A 4h1o_A 560 jackhmmer - 163 2.1e-57 197.3 0.0 1 2 7.5e-28 3.6e-25
+ // 92.4921493530273 0.00 65 161 20 122 17 124 0.95 Tyrosine-protein
+ // phosphatase non-receptor typ 4h1o_A
+ //
+ // 4h1o_A 4h1o_A 560 jackhmmer - 163 2.1e-57 197.3 0.0 2 2 7.6e-31 3.7e-28
+ // 102.219146728516 0.03 66 161 127 236 124 238 0.94 Tyrosine-protein
+ // phosphatase non-receptor typ 4h1o_A
+ //
+ // $ua->get( $rootUrl."/results/".$lastIteration->{uuid} . "/score"
+ return lastiter;
+ /*
+ * * #Job should have finished, but we may have converged, so get the last
+ * job. my $results = $json->decode( $response->content ); my $lastIteration
+ * = pop( @{ $results->{result} } ); #Now fetch the results of the last
+ * iteration my $searchResult = $ua->get( $rootUrl."/results/" .
+ * $lastIteration->{uuid} . "/score", 'Accept' => 'application/json' );
+ * unless( $searchResult->status_line eq "200 OK"){ die
+ * "Failed to get search results\n"; }
+ *
+ * #Decode the content of the full set of results $results = $json->decode(
+ * $searchResult->content ); print
+ * "Matched ".$results->{'results'}->{'stats'}->{'nincluded'}." sequences
+ * ($lastIteration->{uuid})!\n"; #Now do something more interesting with the
+ * results......
+ */
+ }
+
+ /**
+ * retrieve an alignment annotated with scores from JackHmmer
+ *
+ * @param jobid
+ * @param dataset
+ * @return
+ */
+ AlignmentI retrieveJackhmmerResult(String jobid, AlignmentI dataset)
+ throws OutOfMemoryError, IOException
+ {
+ AlignmentI searchResult = null;
+
+ // get results
+
+ searchResult = new AppletFormatAdapter().readFile(baseUrl
+ + "/download/" + jobid + "/score?format=afa&t=.gz",
+ DataSourceType.URL, FileFormat.Fasta);
+
+ // TODO extract gapped columns as '.' - inserts to query profile
+
+ // TODO match up jackhammer results to dataset.
+
+ // do scores
+ FileParse jsonsource = new FileParse(baseUrl + "/download/" + jobid
+ + "/score?format=json", DataSourceType.URL);
+ if (!jsonsource.isValid())
+ {
+ throw new IOException("Couldn't access scores for Jackhammer results");
+ }
+ readJackhmmerScores(searchResult, jsonsource);
+ return searchResult;
+ }
+
+ /**
+ * // 1gri_A 1gri_A 217 jackhmmer - 163 4.7e-62 212.4 0.1 1 2 4.4e-46 2.1e-43
+ *
+ * // 151.758316040039 0.04 11 151 3 139 1 150 0.94 GROWTH FACTOR BOUND
+ * PROTEIN // 2 1cj1_J 1gri_B // 1gri_A 1gri_A 217 jackhmmer - 163 4.7e-62
+ * 212.4 0.1 2 2 1.6e-17 7.9e-15 // 58.8796501159668 0.01 7 66 157 215 153 216
+ * 0.95 GROWTH FACTOR BOUND // PROTEIN 2 1cj1_J 1gri_B // 4h1o_A 4h1o_A 560
+ * jackhmmer - 163 2.1e-57 197.3 0.0 1 2 7.5e-28 3.6e-25 // 92.4921493530273
+ * 0.00 65 161 20 122 17 124 0.95 Tyrosine-protein // phosphatase non-receptor
+ * typ 4h1o_A
+ */
+ private static String[] _hmmsearchcols = new String[] { "acc", "name", "" };
+
+ private void readJackhmmerScores(AlignmentI searchResult,
+ FileParse jsonsource) throws IOException, OutOfMemoryError
+ {
+ HmmerJSONProcessor hjp = new HmmerJSONProcessor(searchResult);
+ hjp.parseFrom(jsonsource);
+
+ // http://www.ebi.ac.uk/Tools/hmmer/download/60048B38-7CEC-11E5-A230-CED6D26C98AD.5/score?format=csv
+ // 1gri_A 1gri_A 217 jackhmmer - 163 4.7e-62 212.4 0.1 1 2 4.4e-46 2.1e-43
+ // 151.758316040039 0.04 11 151 3 139 1 150 0.94 GROWTH FACTOR BOUND PROTEIN
+ // 2 1cj1_J 1gri_B
+ // 1gri_A 1gri_A 217 jackhmmer - 163 4.7e-62 212.4 0.1 2 2 1.6e-17 7.9e-15
+ // 58.8796501159668 0.01 7 66 157 215 153 216 0.95 GROWTH FACTOR BOUND
+ // PROTEIN 2 1cj1_J 1gri_B
+ // 4h1o_A 4h1o_A 560 jackhmmer - 163 2.1e-57 197.3 0.0 1 2 7.5e-28 3.6e-25
+ // 92.4921493530273 0.00 65 161 20 122 17 124 0.95 Tyrosine-protein
+ // phosphatase non-receptor typ 4h1o_A
+ // each line scores a fragment
+ // so for a combined score ?
+
+ /**
+ * for a sequence q sort any t against q according to overallScore(q,t)
+ * maxFragment(q,t) in sequence features parlance: for alignment
+ * s.getFeature("overallScore",q) -> range on q and range on s
+ *
+ *
+ */
+
+ // 151.758316040039 0.04 11 151 3 139 1 150 0.94 GROWTH FACTOR BOUND PROTEIN
+ // 2 1cj1_J 1gri_B
+ // 1gri_A 1gri_A 217 jackhmmer - 163 4.7e-62 212.4 0.1 2 2 1.6e-17 7.9e-15
+ // 58.8796501159668 0.01 7 66 157 215 153 216 0.95 GROWTH FACTOR BOUND
+ // PROTEIN 2 1cj1_J 1gri_B
+ // 4h1o_A 4h1o_A 560 jackhmmer - 163 2.1e-57 197.3 0.0 1 2 7.5e-28 3.6e-25
+ // 92.4921493530273 0.00 65 161 20 122 17 124 0.95 Tyrosine-protein
+ // phosphatase non-receptor typ 4h1o_A
+ //
+ // 4h1o_A 4h1o_A 560 jackhmmer - 163 2.1e-57 197.3 0.0 2 2 7.6e-31 3.7e-28
+ // 102.219146728516 0.03 66 161 127 236 124 238 0.94 Tyrosine-protein
+ // phosphatase non-receptor typ 4h1o_A
+
+ }
+
+}