JAL-1950 group hits by architecture
[jalview.git] / src / jalview / ws / ebi / HmmerJSONProcessor.java
index 4d30b3b..d0f3c18 100644 (file)
@@ -2,10 +2,15 @@ package jalview.ws.ebi;
 
 import jalview.datamodel.AlignmentAnnotation;
 import jalview.datamodel.AlignmentI;
+import jalview.datamodel.Annotation;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceGroup;
 import jalview.datamodel.SequenceI;
 import jalview.io.FileParse;
 
 import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
 
 import org.json.simple.JSONArray;
 import org.json.simple.JSONObject;
@@ -123,30 +128,113 @@ public class HmmerJSONProcessor
     double evalue = Double.valueOf("" + hmmrhit.get("evalue"));
     for (Object domainhit : ((JSONArray) hmmrhit.get("domains")))
     {
-      JSONObject dhit=(JSONObject) domainhit;
+      JSONObject dhit = (JSONObject) domainhit;
       // dhit.get(key)
 
       // alihmmfrom,alihmmto alimodel
+      long alihmmfrom = (long) dhit.get("alihmmfrom"), alihmmto = (long) dhit
+              .get("alihmmto"), alisqfrom = (long) dhit.get("alisqfrom"), alisqto = (long) dhit
+              .get("alisqto");
+
       // alisqfrom,alisqto,aliaseq
+
       // alippline
-      // 
+      String aliaseq = (String) dhit.get("aliaseq"), alimodel = (String) dhit
+              .get("alimodel"), ppline = (String) dhit.get("alippline");
+      //
+      int found = 0;
       for (SequenceI hitseq : hits)
       {
         // match alisqfrom,alisqto,seq
-        // overlay ppline as seqannotation
-        // ievalue
-        // cevalue
-        // 
-
-        AlignmentAnnotation pval = new AlignmentAnnotation("p-value",
-                "hmmer3 pvalue", pvalue);
-        AlignmentAnnotation eval = new AlignmentAnnotation("e-value",
-                "hmmer3 evalue", evalue);
-        hitseq.addAlignmentAnnotation(pval);
-        hitseq.addAlignmentAnnotation(eval);
-        
+        if (hitseq.getStart() == alisqfrom && hitseq.getEnd() == alisqto)
+        {
+          found++; // annotated a sequence
+          AlignmentAnnotation alipp = parsePosteriorProb(ppline);
+          AlignmentAnnotation pval = new AlignmentAnnotation("p-value",
+                  "hmmer3 pvalue", pvalue);
+          AlignmentAnnotation eval = new AlignmentAnnotation("e-value",
+                  "hmmer3 evalue", evalue);
+          pval.setCalcId("HMMER3");
+          eval.setCalcId("HMMER3");
+          alipp.setCalcId("HMMER3");
+          hitseq.addAlignmentAnnotation(pval);
+          hitseq.addAlignmentAnnotation(eval);
+          alipp.createSequenceMapping(hitseq, hitseq.getStart(), false);
+          hitseq.addAlignmentAnnotation(alipp);
+          String arch;
+          hitseq.addSequenceFeature(new SequenceFeature(
+                  "Pfam Domain Architecture", (hmmrhit.get("archindex"))
+                          + " " + (arch = (String) hmmrhit.get("arch")), 0,
+                  0, Integer.valueOf((String) hmmrhit.get("archScore")),
+                  "HMMER3"));
+          addArchGroup(hitseq, arch);
+          alipp.setScore(Double.valueOf("" + dhit.get("bitscore")));
+          alipp.adjustForAlignment();
+          resultAl.addAnnotation(pval);
+          resultAl.addAnnotation(eval);
+          resultAl.addAnnotation(alipp);
+          alipp.validateRangeAndDisplay();
+        }
+      }
+      if (found == 0)
+      {
+        System.err.println("Warn - no match for json hit " + sname + "/"
+                + alisqfrom + "-" + alisqto);
+      }
+      if (found > 1)
+      {
+        System.err.println("Warn - multiple matches for json hit " + sname
+                + "/" + alisqfrom + "-" + alisqto);
+      }
+      // look for other sequences represented by this hit and create
+    }
+  }
+
+  Map<String, SequenceGroup> groups = new HashMap<String, SequenceGroup>();
+
+  private void addArchGroup(SequenceI seqToAdd, String groupNam)
+  {
+    SequenceGroup sg = groups.get(groupNam);
+    if (sg == null)
+    {
+      sg = new SequenceGroup();
+      sg.setName(groupNam);
+      sg.addSequence(seqToAdd, false);
+      sg.setStartRes(0);
+      sg.setEndRes(resultAl.getWidth() - 1);
+      groups.put(groupNam, sg);
+      resultAl.addGroup(sg);
+    }
+    else
+    {
+      sg.addSequence(seqToAdd, false);
+    }
+  }
 
+  private AlignmentAnnotation parsePosteriorProb(String ppline)
+  {
+    Annotation[] ae = new Annotation[ppline.length()];
+    int spos = 0;
+    for (int i = 0, iSize = ppline.length(); i < iSize; i++)
+    {
+      char pp = ppline.charAt(i);
+      if (pp == '*')
+      {
+        ae[spos++] = new Annotation(10f);
+      }
+      else
+      {
+        if (pp >= '0' && pp <= '9')
+        {
+          ae[spos++] = new Annotation(Integer.valueOf("" + pp));
+        }
       }
     }
+    AlignmentAnnotation pprob = new AlignmentAnnotation(
+            "Posterior Probability",
+            "Likelihood of HMM fit at each hit position.", ae);
+    pprob.graph = AlignmentAnnotation.BAR_GRAPH;
+    pprob.visible = false;
+    return pprob;
   }
 }