1 package jalview.ws.ebi;
3 import jalview.datamodel.AlignmentAnnotation;
4 import jalview.datamodel.AlignmentI;
5 import jalview.datamodel.Annotation;
6 import jalview.datamodel.SequenceFeature;
7 import jalview.datamodel.SequenceGroup;
8 import jalview.datamodel.SequenceI;
9 import jalview.io.FileParse;
11 import java.io.IOException;
12 import java.util.HashMap;
15 import org.json.simple.JSONArray;
16 import org.json.simple.JSONObject;
17 import org.json.simple.parser.JSONParser;
19 public class HmmerJSONProcessor
23 public HmmerJSONProcessor(AlignmentI searchResult)
25 resultAl = searchResult;
28 public void parseFrom(FileParse jsonsource) throws IOException,
31 JSONParser hmmerResultParser = new JSONParser();
32 Object jsonResults = null;
35 jsonResults = hmmerResultParser.parse(jsonsource.getReader());
38 throw new IOException("While parsing from " + jsonsource.getInFile(),
41 if (jsonResults == null)
43 throw new IOException("No data at" + jsonsource.getInFile());
45 if (!(jsonResults instanceof JSONObject))
47 throw new IOException("Unexpected JSON model at "
48 + jsonsource.getInFile());
52 JSONObject hmmsearchr = (JSONObject) ((JSONObject) jsonResults)
54 // now process the hits
55 addStatistics((JSONObject) hmmsearchr.get("stats"));
56 JSONArray jsonArray = (JSONArray) hmmsearchr.get("hits");
58 for (Object hit : jsonArray)
60 JSONObject hmmhit = (JSONObject) hit;
63 } catch (ClassCastException q)
65 throw new IOException("Unexpected JSON model content at "
66 + jsonsource.getInFile(), q);
73 * - actually a JSONObject key value set of search statistics.
75 public void addStatistics(JSONObject stats)
77 for (Object stat : stats.keySet())
79 String key = (String) stat;
80 Object val = stats.get(key);
81 resultAl.setProperty(key, "" + val);
85 // encodings for JSON keys
87 * score becomes sequence associated AlignmentAnnotation
89 private String[] score = { "aliId", "ali_IdCount", "bitscore", "ievalue",
90 "aliSim", "aliSimCount", "aliL", "aliSim", "ievalue", "cevalue" };
93 * attrib becomes numeric or binary attribute for sequence with respect to
96 private String[] attrib = { "bias", "oasc", "is_included", "is_reported" };
99 * name of the hmmsearch query
101 private String[] label = { "alihmmname" // (query label?)},
105 * integer attributes for each
107 private String[] ipos = { "alihmmfrom", "alihmmto" }, pos_l = {
108 "alimline", "alimodel", "alirfline" };
111 * positional quantitative annotation encoded as strings.
113 private String[] pos_nscore = { "alippline" };
116 // mapping of keys to types of property on sequence
118 public void addHit(JSONObject hmmrhit, long p)
120 String sname = (String) hmmrhit.get("name");
121 SequenceI[] hits = resultAl.findSequenceMatch(sname);
124 System.err.println("No seq for " + sname);
126 double pvalue = (Double) hmmrhit.get("pvalue");
128 double evalue = Double.valueOf("" + hmmrhit.get("evalue"));
129 for (Object domainhit : ((JSONArray) hmmrhit.get("domains")))
131 JSONObject dhit = (JSONObject) domainhit;
134 // alihmmfrom,alihmmto alimodel
135 long alihmmfrom = (long) dhit.get("alihmmfrom"), alihmmto = (long) dhit
136 .get("alihmmto"), alisqfrom = (long) dhit.get("alisqfrom"), alisqto = (long) dhit
139 // alisqfrom,alisqto,aliaseq
142 String aliaseq = (String) dhit.get("aliaseq"), alimodel = (String) dhit
143 .get("alimodel"), ppline = (String) dhit.get("alippline");
146 for (SequenceI hitseq : hits)
148 // match alisqfrom,alisqto,seq
149 if (hitseq.getStart() == alisqfrom && hitseq.getEnd() == alisqto)
151 found++; // annotated a sequence
152 AlignmentAnnotation alipp = parsePosteriorProb(ppline);
153 AlignmentAnnotation pval = new AlignmentAnnotation("p-value",
154 "hmmer3 pvalue", pvalue);
155 AlignmentAnnotation eval = new AlignmentAnnotation("e-value",
156 "hmmer3 evalue", evalue);
157 pval.setCalcId("HMMER3");
158 eval.setCalcId("HMMER3");
159 alipp.setCalcId("HMMER3");
160 hitseq.addAlignmentAnnotation(pval);
161 hitseq.addAlignmentAnnotation(eval);
162 alipp.createSequenceMapping(hitseq, hitseq.getStart(), false);
163 hitseq.addAlignmentAnnotation(alipp);
165 hitseq.addSequenceFeature(new SequenceFeature(
166 "Pfam Domain Architecture", (hmmrhit.get("archindex"))
167 + " " + (arch = (String) hmmrhit.get("arch")), 0,
168 0, Integer.valueOf((String) hmmrhit.get("archScore")),
170 addArchGroup(hitseq, arch);
171 alipp.setScore(Double.valueOf("" + dhit.get("bitscore")));
172 alipp.adjustForAlignment();
173 resultAl.addAnnotation(pval);
174 resultAl.addAnnotation(eval);
175 resultAl.addAnnotation(alipp);
176 alipp.validateRangeAndDisplay();
181 System.err.println("Warn - no match for json hit " + sname + "/"
182 + alisqfrom + "-" + alisqto);
186 System.err.println("Warn - multiple matches for json hit " + sname
187 + "/" + alisqfrom + "-" + alisqto);
189 // look for other sequences represented by this hit and create
193 Map<String, SequenceGroup> groups = new HashMap<String, SequenceGroup>();
195 private void addArchGroup(SequenceI seqToAdd, String groupNam)
197 SequenceGroup sg = groups.get(groupNam);
200 sg = new SequenceGroup();
201 sg.setName(groupNam);
202 sg.addSequence(seqToAdd, false);
204 sg.setEndRes(resultAl.getWidth() - 1);
205 groups.put(groupNam, sg);
206 resultAl.addGroup(sg);
210 sg.addSequence(seqToAdd, false);
214 private AlignmentAnnotation parsePosteriorProb(String ppline)
216 Annotation[] ae = new Annotation[ppline.length()];
218 for (int i = 0, iSize = ppline.length(); i < iSize; i++)
220 char pp = ppline.charAt(i);
223 ae[spos++] = new Annotation(10f);
227 if (pp >= '0' && pp <= '9')
229 ae[spos++] = new Annotation(Integer.valueOf("" + pp));
233 AlignmentAnnotation pprob = new AlignmentAnnotation(
234 "Posterior Probability",
235 "Likelihood of HMM fit at each hit position.", ae);
236 pprob.graph = AlignmentAnnotation.BAR_GRAPH;
237 pprob.visible = false;