add information content calculations
authorTZVanaalten <TZVanaalten@LS30916.ad.lifesci.dundee.ac.uk>
Thu, 29 Jun 2017 16:17:28 +0000 (17:17 +0100)
committerTZVanaalten <TZVanaalten@LS30916.ad.lifesci.dundee.ac.uk>
Thu, 29 Jun 2017 16:17:28 +0000 (17:17 +0100)
src/jalview/datamodel/HiddenMarkovModel.java

index 5331f3d..7c8d075 100644 (file)
@@ -1,5 +1,7 @@
 package jalview.datamodel;
 
+import jalview.schemes.ResidueProperties;
+
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
@@ -32,6 +34,8 @@ public class HiddenMarkovModel
   //contains the symbol index for each symbol
   Map<Character, Integer> symbolIndexLookup = new HashMap<>();
 
+  Map<Character, Double> backgroundFrequencies = new HashMap();
+
 
   final static String YES = "yes";
 
@@ -299,7 +303,6 @@ public class HiddenMarkovModel
     {
       nodeIndex = nodeLookup.get(alignColumn + 1);
       probability = getNode(nodeIndex).getMatchEmissions().get(symbolIndex);
-      probability = Math.pow(Math.E, -probability);
       return probability;
     }
     else
@@ -332,7 +335,6 @@ public class HiddenMarkovModel
       nodeIndex = nodeLookup.get(alignColumn + 1);
       probability = getNode(nodeIndex).getInsertEmissions()
               .get(symbolIndex);
-      probability = Math.pow(Math.E, -probability);
       return probability;
     }
     else
@@ -362,7 +364,6 @@ public class HiddenMarkovModel
       nodeIndex = nodeLookup.get(alignColumn + 1);
       probability = getNode(nodeIndex).getStateTransitions()
             .get(transitionIndex);
-      probability = Math.pow(Math.E, -probability);
       return probability;
     }
     else
@@ -384,6 +385,18 @@ public class HiddenMarkovModel
    return value;
   }
   
+  public char getConsensus(int columnIndex)
+  {
+    char value;
+    Integer index = findNodeIndex(columnIndex + 1);
+    if (index == null)
+    {
+      return '-';
+    }
+    value = getNodes().get(index).getConsensusResidue();
+    return value;
+  }
+
   public char getReferenceAnnotation(int nodeIndex)
   {
    char value = nodes.get(nodeIndex).getReferenceAnnotation();
@@ -748,5 +761,59 @@ public class HiddenMarkovModel
       return NO;
     }
   }
+
+  /**
+   * creates the HMM annotation
+   * 
+   * @return
+   */
+  public AlignmentAnnotation createAnnotation(int length)
+  {
+    Annotation[] annotations = new Annotation[length];
+    float max = 0f;
+    for (int i = 0; i < length; i++)
+    {
+      Float content = getInformationContent(i);
+      if (content > max)
+      {
+        max = content;
+      }
+      String description = String.format("%.3f", content);
+      description += " bits";
+      annotations[i] = new Annotation(null, description, ' ', content);
+
+    }
+    AlignmentAnnotation annotation = new AlignmentAnnotation(
+            "Information Content",
+            "The information content of each column, measured in bits",
+            annotations,
+            0f, max, AlignmentAnnotation.BAR_GRAPH);
+    return annotation;
+  }
+
+  public float getInformationContent(int column)
+  {
+    float informationContent = 0f;
+
+    for (char symbol : symbols)
+    {
+      float freq = 0f;
+      if (symbols.size() == 20)
+      {
+        freq = ResidueProperties.aminoBackgroundFrequencies.get(symbol);
+      }
+      if (symbols.size() == 4)
+      {
+        freq = ResidueProperties.nucleotideBackgroundFrequencies
+                .get(symbol);
+      }
+      Double hmmProb = getMatchEmissionProbability(column, symbol);
+      float prob = hmmProb.floatValue();
+      informationContent += prob * (Math.log(prob / freq) / Math.log(2));
+
+    }
+
+    return informationContent;
+  }
 }