package jalview.ws.ebi; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; import jalview.datamodel.Annotation; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.io.FileParse; import java.io.IOException; import org.json.simple.JSONArray; import org.json.simple.JSONObject; import org.json.simple.parser.JSONParser; public class HmmerJSONProcessor { AlignmentI resultAl; public HmmerJSONProcessor(AlignmentI searchResult) { resultAl = searchResult; } public void parseFrom(FileParse jsonsource) throws IOException, OutOfMemoryError { JSONParser hmmerResultParser = new JSONParser(); Object jsonResults = null; try { jsonResults = hmmerResultParser.parse(jsonsource.getReader()); } catch (Exception p) { throw new IOException("While parsing from " + jsonsource.getInFile(), p); } if (jsonResults == null) { throw new IOException("No data at" + jsonsource.getInFile()); } if (!(jsonResults instanceof JSONObject)) { throw new IOException("Unexpected JSON model at " + jsonsource.getInFile()); } try { JSONObject hmmsearchr = (JSONObject) ((JSONObject) jsonResults) .get("results"); // now process the hits addStatistics((JSONObject) hmmsearchr.get("stats")); JSONArray jsonArray = (JSONArray) hmmsearchr.get("hits"); long p = 1; for (Object hit : jsonArray) { JSONObject hmmhit = (JSONObject) hit; addHit(hmmhit, p++); } } catch (ClassCastException q) { throw new IOException("Unexpected JSON model content at " + jsonsource.getInFile(), q); } } /** * * @param object * - actually a JSONObject key value set of search statistics. */ public void addStatistics(JSONObject stats) { for (Object stat : stats.keySet()) { String key = (String) stat; Object val = stats.get(key); resultAl.setProperty(key, "" + val); } } // encodings for JSON keys /** * score becomes sequence associated AlignmentAnnotation */ private String[] score = { "aliId", "ali_IdCount", "bitscore", "ievalue", "aliSim", "aliSimCount", "aliL", "aliSim", "ievalue", "cevalue" }; /** * attrib becomes numeric or binary attribute for sequence with respect to * this hmmsearch run */ private String[] attrib = { "bias", "oasc", "is_included", "is_reported" }; /** * name of the hmmsearch query */ private String[] label = { "alihmmname" // (query label?)}, }; /** * integer attributes for each */ private String[] ipos = { "alihmmfrom", "alihmmto" }, pos_l = { "alimline", "alimodel", "alirfline" }; /** * positional quantitative annotation encoded as strings. */ private String[] pos_nscore = { "alippline" }; // // mapping of keys to types of property on sequence // public void addHit(JSONObject hmmrhit, long p) { String sname = (String) hmmrhit.get("name"); SequenceI[] hits = resultAl.findSequenceMatch(sname); if (hits == null) { System.err.println("No seq for " + sname); } double pvalue = (Double) hmmrhit.get("pvalue"); double evalue = Double.valueOf("" + hmmrhit.get("evalue")); for (Object domainhit : ((JSONArray) hmmrhit.get("domains"))) { JSONObject dhit = (JSONObject) domainhit; // dhit.get(key) // alihmmfrom,alihmmto alimodel long alihmmfrom = (long) dhit.get("alihmmfrom"), alihmmto = (long) dhit .get("alihmmto"), alisqfrom = (long) dhit.get("alisqfrom"), alisqto = (long) dhit .get("alisqto"); // alisqfrom,alisqto,aliaseq // alippline String aliaseq = (String) dhit.get("aliaseq"), alimodel = (String) dhit .get("alimodel"), ppline = (String) dhit.get("alippline"); // int found = 0; for (SequenceI hitseq : hits) { // match alisqfrom,alisqto,seq if (hitseq.getStart() == alisqfrom && hitseq.getEnd() == alisqto) { found++; // annotated a sequence AlignmentAnnotation alipp = parsePosteriorProb(ppline); AlignmentAnnotation pval = new AlignmentAnnotation("p-value", "hmmer3 pvalue", pvalue); AlignmentAnnotation eval = new AlignmentAnnotation("e-value", "hmmer3 evalue", evalue); pval.setCalcId("HMMER3"); eval.setCalcId("HMMER3"); alipp.setCalcId("HMMER3"); hitseq.addAlignmentAnnotation(pval); hitseq.addAlignmentAnnotation(eval); alipp.createSequenceMapping(hitseq, hitseq.getStart(), false); hitseq.addAlignmentAnnotation(alipp); hitseq.addSequenceFeature(new SequenceFeature( "Pfam Domain Architecture", (hmmrhit.get("archindex")) + " " + (String) hmmrhit.get("arch"), 0, 0, Integer .valueOf((String) hmmrhit.get("archScore")), "HMMER3")); alipp.setScore(Double.valueOf("" + dhit.get("bitscore"))); alipp.adjustForAlignment(); resultAl.addAnnotation(pval); resultAl.addAnnotation(eval); resultAl.addAnnotation(alipp); alipp.validateRangeAndDisplay(); } } if (found == 0) { System.err.println("Warn - no match for json hit " + sname + "/" + alisqfrom + "-" + alisqto); } if (found > 1) { System.err.println("Warn - multiple matches for json hit " + sname + "/" + alisqfrom + "-" + alisqto); } } } private AlignmentAnnotation parsePosteriorProb(String ppline) { Annotation[] ae = new Annotation[ppline.length()]; int spos = 0; for (int i = 0, iSize = ppline.length(); i < iSize; i++) { char pp = ppline.charAt(i); if (pp == '*') { ae[spos++] = new Annotation(10f); } else { if (pp >= '0' && pp <= '9') { ae[spos++] = new Annotation(Integer.valueOf("" + pp)); } } } AlignmentAnnotation pprob = new AlignmentAnnotation( "Posterior Probability", "Likelihood of HMM fit at each hit position.", ae); pprob.graph = pprob.BAR_GRAPH; pprob.visible = false; return pprob; } }