JAL-2629 tidy unit tests, constants etc

[jalview.git] / src / jalview / datamodel / HiddenMarkovModel.java
diff --git a/src/jalview/datamodel/HiddenMarkovModel.java b/src/jalview/datamodel/HiddenMarkovModel.java

index c96ad8b..1b12945 100644 (file)
--- a/src/jalview/datamodel/HiddenMarkovModel.java
+++ b/src/jalview/datamodel/HiddenMarkovModel.java
@@ -1,12 +1,11 @@
  package jalview.datamodel;
  
-import jalview.gui.AlignFrame;
+import jalview.schemes.ResidueProperties;
  
  import java.util.ArrayList;
  import java.util.HashMap;
  import java.util.List;
  import java.util.Map;
-import java.util.Scanner;
  
  /**
   * Data structure which stores a hidden Markov model. Currently contains file
@@ -18,7 +17,7 @@ import java.util.Scanner;
   */
  public class HiddenMarkovModel
  {
-
+  private static final double LOG2 = Math.log(2);
  
    // Stores file properties. Do not directly access this field as it contains
    // only string value - use the getter methods. For example, to find the length
@@ -43,8 +42,6 @@ public class HiddenMarkovModel
    final static String YES = "yes";
  
    final static String NO = "no";
-
-  int numberOfSymbols;
    
    // keys for file properties hashmap
    private final String NAME = "NAME";
@@ -117,9 +114,11 @@ public class HiddenMarkovModel
  
    String fileHeader;
  
+  /**
+   * Constructor
+   */
    public HiddenMarkovModel()
    {
-
    }
  
    public HiddenMarkovModel(HiddenMarkovModel hmm)
@@ -131,12 +130,40 @@ public class HiddenMarkovModel
      this.nodeLookup = new HashMap<>(hmm.nodeLookup);
      this.symbolIndexLookup = new HashMap<>(
              hmm.symbolIndexLookup);
-    this.numberOfSymbols = hmm.numberOfSymbols;
      this.fileHeader = new String(hmm.fileHeader);
    }
  
    /**
-   * Gets the file header of the .hmm file this model came from.
+   * Returns the information content at a specified column, calculated as the
+   * sum (over possible symbols) of the log ratio
+   * 
+   * <pre>
+   *  log(emission probability / background probability) / log(2)
+   * </pre>
+   * 
+   * @param column
+   *          column position (base 0)
+   * @return
+   */
+  public float getInformationContent(int column)
+  {
+    float informationContent = 0f;
+
+    for (char symbol : getSymbols())
+    {
+      float freq = ResidueProperties.backgroundFrequencies
+              .get(getAlphabetType()).get(symbol);
+      float prob = (float) getMatchEmissionProbability(column, symbol);
+      informationContent += prob * Math.log(prob / freq);
+    }
+
+    informationContent = informationContent / (float) LOG2;
+
+    return informationContent;
+  }
+
+  /**
+   * Gets the file header of the .hmm file this model came from
     * 
     * @return
     */
@@ -382,11 +409,11 @@ public class HiddenMarkovModel
     * @return
     * 
     */
-  public Double getMatchEmissionProbability(int alignColumn, char symbol)
+  public double getMatchEmissionProbability(int alignColumn, char symbol)
    {
      int symbolIndex;
      int nodeIndex;
-    Double probability;
+    double probability;
      if (!symbolIndexLookup.containsKey(symbol))
      {
        return 0d;
@@ -402,7 +429,6 @@ public class HiddenMarkovModel
      {
        return 0d;
      }
-
    }
  
    /**
@@ -417,11 +443,11 @@ public class HiddenMarkovModel
     * @return
     * 
     */
-  public Double getInsertEmissionProbability(int alignColumn, char symbol)
+  public double getInsertEmissionProbability(int alignColumn, char symbol)
    {
      int symbolIndex;
      int nodeIndex;
-    Double probability;
+    double probability;
      if (!symbolIndexLookup.containsKey(symbol))
      {
        return 0d;
@@ -456,7 +482,6 @@ public class HiddenMarkovModel
    public Double getStateTransitionProbability(int alignColumn,
            int transition)
    {
-    int transitionIndex;
      int nodeIndex;
      Double probability;
      if (nodeLookup.containsKey(alignColumn))
@@ -501,7 +526,8 @@ public class HiddenMarkovModel
    }
    
    /**
-   * Returns the consensus at a given alignment column.
+   * Returns the consensus at a given alignment column. If the character is
+   * lower case, its emission probability is less than 0.5.
     * 
     * @param columnIndex
     *          The index of the column in the alignment for which the consensus
@@ -534,6 +560,10 @@ public class HiddenMarkovModel
            mostLikely = character;
          }
        }
+      if (highestProb < 0.5)
+      {
+        mostLikely = Character.toLowerCase(mostLikely);
+      }
        return mostLikely;
      }
  
@@ -599,28 +629,7 @@ public class HiddenMarkovModel
     */
    public int getNumberOfSymbols()
    {
-    return numberOfSymbols;
-  }
-
-  /**
-   * Fills symbol array and whilst doing so, updates the value of the number of
-   * symbols.
-   * 
-   * @param parser
-   *          The scanner scanning the symbol line in the file.
-   */
-  public void fillSymbols(Scanner parser)
-  {
-    int i = 0;
-    while (parser.hasNext())
-    {
-      String strSymbol = parser.next();
-      char[] symbol = strSymbol.toCharArray();
-      symbols.add(symbol[0]);
-      symbolIndexLookup.put(symbol[0], i);
-      i++;
-    }
-    numberOfSymbols = symbols.size();
+    return symbols.size();
    }
  
    /**
@@ -772,9 +781,19 @@ public class HiddenMarkovModel
    public void setAlignmentColumn(int nodeIndex, int column)
    {
      nodes.get(nodeIndex).setAlignmentColumn(column);
+    nodeLookup.put(column, nodeIndex);
    }
  
    /**
+   * Clears all data in the node lookup map
+   */
+  public void emptyNodeLookup()
+  {
+    nodeLookup = new HashMap<>();
+  }
+
+
+  /**
     * Sets the reference annotation at a given node.
     * 
     * @param nodeIndex
@@ -985,7 +1004,7 @@ public class HiddenMarkovModel
     *          The length of the longest sequence in the existing alignment.
     * @return
     */
-  public Sequence getConsensusSequence(int length)
+  public Sequence getConsensusSequence()
    {
      int start;
      int end;
@@ -993,8 +1012,8 @@ public class HiddenMarkovModel
      start = getNodeAlignmentColumn(1);
      modelLength = getLength();
      end = getNodeAlignmentColumn(modelLength);
-    char[] sequence = new char[length];
-    for (int index = 0; index < length; index++)
+    char[] sequence = new char[end + 1];
+    for (int index = 0; index < end + 1; index++)
      {
        Character character;
  
@@ -1011,128 +1030,33 @@ public class HiddenMarkovModel
        }
  
  
-    Sequence seq = new Sequence(getName() + "_HMM", sequence, start, end);
+    Sequence seq = new Sequence(getName(), sequence, start,
+            end);
      return seq;
    }
  
  
    /**
-   * Maps the nodes of the hidden Markov model to the reference annotation and
-   * then deletes this annotation.
+   * Initiates a HMM consensus sequence
+   * 
+   * @return A new HMM consensus sequence
     */
-  public void mapToReferenceAnnotation(AlignFrame af, SequenceI seq)
+  public SequenceI initHMMSequence()
    {
-    AlignmentAnnotation annotArray[] = af.getViewport().getAlignment()
-            .getAlignmentAnnotation();
-
-    AlignmentAnnotation reference = null;
-    for (AlignmentAnnotation annot : annotArray)
-    {
-      if (annot.label.contains("Reference"))
-      {
-        reference = annot;
-      }
-    }
-
-    if (reference == null)
-    {
-      return;
-    }
-
-    mapToReferenceAnnotation(reference, seq);
-    af.getViewport().getAlignment().deleteAnnotation(reference);
-  }
-
-  public void mapToReferenceAnnotation(AlignmentAnnotation reference,
-          SequenceI seq)
-  {
-    HiddenMarkovModel hmm = seq.getHMM();
-    Annotation[] annots = reference.annotations;
-    {
-      int nodeIndex = 0;
-      for (int col = 0; col < annots.length; col++)
-      {
-        String character = annots[col].displayCharacter;
-        if ("x".equals(character) || "X".equals(character))
-        {
-          nodeIndex++;
-          if (nodeIndex < hmm.getNodes().size())
-          {
-            HMMNode node = hmm.getNode(nodeIndex);
-            int alignPos = getNodeAlignmentColumn(nodeIndex);
-            char seqCharacter = seq.getCharAt(alignPos);
-            if (alignPos >= seq.getLength() || col >= seq.getLength())
-            {
-              seq.insertCharAt(seq.getLength(),
-                      (alignPos + 1) - seq.getLength(),
-                      '-');
-            }
-            seq.getSequence()[alignPos] = '-';
-            seq.getSequence()[col] = seqCharacter;
-            node.setAlignmentColumn(col);
-            hmm.nodeLookup.put(col, nodeIndex);
-          }
-          else
-          {
-            System.out.println(
-                    "The reference annotation contains more consensus columns than the hidden Markov model");
-            break;
-          }
-        }
-        else
-        {
-          hmm.nodeLookup.remove(col);
-        }
-      }
-
-    }
-
+    Sequence consensus = getConsensusSequence();
+    consensus.setIsHMMConsensusSequence(true);
+    consensus.setHMM(this);
+    return consensus;
    }
  
-  public void mapToReferenceAnnotation(AlignmentAnnotation reference)
+  public int getSymbolIndex(char c)
    {
-    Annotation[] annots = reference.annotations;
-    {
-      int nodeIndex = 0;
-      for (int col = 0; col < annots.length; col++)
-      {
-        String character = annots[col].displayCharacter;
-        if ("x".equals(character) || "X".equals(character))
-        {
-          nodeIndex++;
-          if (nodeIndex < nodes.size())
-          {
-            HMMNode node = nodes.get(nodeIndex);
-            node.setAlignmentColumn(col + 1);
-            nodeLookup.put(col, nodeIndex);
-          }
-          else
-          {
-            System.out.println(
-                    "The reference annotation contains more consensus columns than the hidden Markov model");
-            break;
-          }
-        }
-        else
-        {
-          nodeLookup.remove(col);
-        }
-      }
-
-    }
-
+    return symbolIndexLookup.get(c);
    }
  
-  public SequenceI initHMMSequence(AlignFrame af, int position)
+  public void setSymbolIndex(Character c, Integer i)
    {
-    AlignmentI alignment = af.getViewport().getAlignment();
-    int length = alignment.getWidth();
-    Sequence consensus = getConsensusSequence(length);
-    consensus.setIsHMMConsensusSequence(true);
-    consensus.setHMM(this);
-    SequenceI[] consensusArr = new Sequence[] { consensus };
-    alignment.getSequences().add(position, consensus);
-    return consensus;
+    symbolIndexLookup.put(c, i);
    }