JAL-2599 HMMs can now be dropped onto the desktop

[jalview.git] / src / jalview / datamodel / HiddenMarkovModel.java
diff --git a/src/jalview/datamodel/HiddenMarkovModel.java b/src/jalview/datamodel/HiddenMarkovModel.java

index 879968f..e74d826 100644 (file)
--- a/src/jalview/datamodel/HiddenMarkovModel.java
+++ b/src/jalview/datamodel/HiddenMarkovModel.java
@@ -1,6 +1,6 @@
  package jalview.datamodel;
  
-import jalview.schemes.ResidueProperties;
+import jalview.gui.AlignFrame;
  
  import java.util.ArrayList;
  import java.util.HashMap;
@@ -33,7 +33,8 @@ public class HiddenMarkovModel
    // 0. Node 0 contains average emission probabilities for each symbol
    List<HMMNode> nodes = new ArrayList<>();
  
-  // contains the HMM node for each alignment column
+  // contains the HMM node for each alignment column, alignment columns start at
+  // index 0;
    Map<Integer, Integer> nodeLookup = new HashMap<>();
    
    // contains the symbol index for each symbol
@@ -114,6 +115,46 @@ public class HiddenMarkovModel
  
    public static final int DELETETODELETE = 6;
  
+  String fileHeader;
+
+  public HiddenMarkovModel()
+  {
+
+  }
+
+  public HiddenMarkovModel(HiddenMarkovModel hmm)
+  {
+    super();
+    this.fileProperties = new HashMap<>(hmm.fileProperties);
+    this.symbols = new ArrayList<>(hmm.symbols);
+    this.nodes = new ArrayList<>(hmm.nodes);
+    this.nodeLookup = new HashMap<>(hmm.nodeLookup);
+    this.symbolIndexLookup = new HashMap<>(
+            hmm.symbolIndexLookup);
+    this.numberOfSymbols = hmm.numberOfSymbols;
+    this.fileHeader = new String(hmm.fileHeader);
+  }
+
+  /**
+   * Gets the file header of the .hmm file this model came from.
+   * 
+   * @return
+   */
+  public String getFileHeader()
+  {
+    return fileHeader;
+  }
+
+  /**
+   * Sets the file header of this model.
+   * 
+   * @param header
+   */
+  public void setFileHeader(String header)
+  {
+    fileHeader = header;
+  }
+
    /**
     * Returns the map containing the matches between nodes and alignment column
     * indexes.
@@ -351,9 +392,9 @@ public class HiddenMarkovModel
        return 0d;
      }
      symbolIndex = symbolIndexLookup.get(symbol);
-    if (nodeLookup.containsKey(alignColumn + 1))
+    if (nodeLookup.containsKey(alignColumn))
      {
-      nodeIndex = nodeLookup.get(alignColumn + 1);
+      nodeIndex = nodeLookup.get(alignColumn);
        probability = getNode(nodeIndex).getMatchEmissions().get(symbolIndex);
        return probability;
      }
@@ -386,9 +427,9 @@ public class HiddenMarkovModel
        return 0d;
      }
      symbolIndex = symbolIndexLookup.get(symbol);
-    if (nodeLookup.containsKey(alignColumn + 1))
+    if (nodeLookup.containsKey(alignColumn))
      {
-      nodeIndex = nodeLookup.get(alignColumn + 1);
+      nodeIndex = nodeLookup.get(alignColumn);
        probability = getNode(nodeIndex).getInsertEmissions()
                .get(symbolIndex);
        return probability;
@@ -418,9 +459,9 @@ public class HiddenMarkovModel
      int transitionIndex;
      int nodeIndex;
      Double probability;
-    if (nodeLookup.containsKey(alignColumn + 1))
+    if (nodeLookup.containsKey(alignColumn))
      {
-      nodeIndex = nodeLookup.get(alignColumn + 1);
+      nodeIndex = nodeLookup.get(alignColumn);
        probability = getNode(nodeIndex).getStateTransitions()
                .get(transition);
        return probability;
@@ -443,7 +484,7 @@ public class HiddenMarkovModel
    public Integer getNodeAlignmentColumn(int nodeIndex)
    {
      Integer value = nodes.get(nodeIndex).getAlignmentColumn();
-    return value - 1;
+    return value;
    }
    
    /**
@@ -469,14 +510,33 @@ public class HiddenMarkovModel
     */
    public char getConsensusAtAlignColumn(int columnIndex)
    {
-    char value;
+    char mostLikely = '-';
+    if (consensusResidueIsActive())
+    {
+
      Integer index = findNodeIndex(columnIndex);
      if (index == null)
      {
        return '-';
      }
-    value = getNodes().get(index).getConsensusResidue();
-    return value;
+      mostLikely = getNodes().get(index).getConsensusResidue();
+      return mostLikely;
+    }
+    else
+    {
+      double highestProb = 0;
+      for (char character : symbols)
+      {
+        Double prob = getMatchEmissionProbability(columnIndex, character);
+        if (prob > highestProb)
+        {
+          highestProb = prob;
+          mostLikely = character;
+        }
+      }
+      return mostLikely;
+    }
+
    }
  
    /**
@@ -892,7 +952,7 @@ public class HiddenMarkovModel
    public Integer findNodeIndex(int alignmentColumn)
    {
      Integer index;
-    index = nodeLookup.get(alignmentColumn + 1);
+    index = nodeLookup.get(alignmentColumn);
      return index;
    }
  
@@ -914,78 +974,7 @@ public class HiddenMarkovModel
      }
    }
  
-  /**
-   * Creates the HMM Logo alignment annotation, and populates it with
-   * information content data.
-   * 
-   * @return The alignment annotation.
-   */
-  public AlignmentAnnotation createAnnotation(int length)
-  {
-    Annotation[] annotations = new Annotation[length];
-    float max = 0f;
-    for (int alignPos = 0; alignPos < length; alignPos++)
-    {
-      Float content = getInformationContent(alignPos);
-      if (content > max)
-      {
-        max = content;
-      }
  
-      Character cons;
-      cons = getConsensusAtAlignColumn(alignPos);
-      cons = Character.toUpperCase(cons);
-
-      String description = String.format("%.3f", content);
-      description += " bits";
-      annotations[alignPos] = new Annotation(cons.toString(), description,
-              ' ',
-              content);
-
-    }
-    AlignmentAnnotation annotation = new AlignmentAnnotation(
-            "Information Content",
-            "The information content of each column, measured in bits",
-            annotations,
-            0f, max, AlignmentAnnotation.BAR_GRAPH);
-    return annotation;
-  }
-
-  /**
-   * Returns the information content at a specified column.
-   * 
-   * @param column
-   *          Index of the column, starting from 0.
-   * @return
-   */
-  public float getInformationContent(int column)
-  {
-    float informationContent = 0f;
-
-    for (char symbol : symbols)
-    {
-      float freq = 0f;
-      if ("amino".equals(getAlphabetType()))
-      {
-        freq = ResidueProperties.aminoBackgroundFrequencies.get(symbol);
-      }
-      if ("DNA".equals(getAlphabetType()))
-      {
-        freq = ResidueProperties.dnaBackgroundFrequencies.get(symbol);
-      }
-      if ("RNA".equals(getAlphabetType()))
-      {
-        freq = ResidueProperties.rnaBackgroundFrequencies
-                .get(symbol);
-      }
-      Double hmmProb = getMatchEmissionProbability(column, symbol);
-      float prob = hmmProb.floatValue();
-      informationContent += prob * (Math.log(prob / freq) / Math.log(2));
-
-    }
-
-    return informationContent;
-  }
  
    /**
     * Returns the consensus sequence based on the most probable symbol at each
@@ -996,7 +985,7 @@ public class HiddenMarkovModel
     *          The length of the longest sequence in the existing alignment.
     * @return
     */
-  public Sequence getConsensusSequence(int length)
+  public Sequence getConsensusSequence()
    {
      int start;
      int end;
@@ -1004,18 +993,13 @@ public class HiddenMarkovModel
      start = getNodeAlignmentColumn(1);
      modelLength = getLength();
      end = getNodeAlignmentColumn(modelLength);
-    char[] sequence = new char[length];
-    for (int index = 0; index < length; index++)
+    char[] sequence = new char[end];
+    for (int index = 0; index < end; index++)
      {
        Character character;
-      if (consensusResidueIsActive())
-      {
+
          character = getConsensusAtAlignColumn(index);
-      }
-      else
-      {
-        character = findConsensusCharacter(index);
-      }
+
        if (character == null || character == '-')
        {
          sequence[index] = '-';
@@ -1027,32 +1011,126 @@ public class HiddenMarkovModel
        }
  
  
-    Sequence seq = new Sequence("HMM CONSENSUS", sequence, start, end);
+    Sequence seq = new Sequence(getName() + "_HMM", sequence, start, end);
      return seq;
    }
  
+
    /**
-   * Finds the most probable character at a column in an alignment based on the
-   * HMM.
-   * 
-   * @param nodeIndex
-   *          The index of the node.
-   * @return
+   * Maps the nodes of the hidden Markov model to the reference annotation and
+   * then deletes this annotation.
     */
-  Character findConsensusCharacter(int column)
+  public void mapToReferenceAnnotation(AlignFrame af, SequenceI seq)
+  {
+    AlignmentAnnotation annotArray[] = af.getViewport().getAlignment()
+            .getAlignmentAnnotation();
+
+    AlignmentAnnotation reference = null;
+    for (AlignmentAnnotation annot : annotArray)
+    {
+      if (annot.label.contains("Reference"))
+      {
+        reference = annot;
+      }
+    }
+
+    if (reference == null)
+    {
+      return;
+    }
+
+    mapToReferenceAnnotation(reference, seq);
+    af.getViewport().getAlignment().deleteAnnotation(reference);
+  }
+
+  public void mapToReferenceAnnotation(AlignmentAnnotation reference,
+          SequenceI seq)
    {
-    Character mostLikely = null;
-    double highestProb = 0;
-    for (char character : symbols)
+    HiddenMarkovModel hmm = seq.getHMM();
+    Annotation[] annots = reference.annotations;
      {
-      Double prob = getMatchEmissionProbability(column, character);
-      if (prob > highestProb)
+      int nodeIndex = 0;
+      for (int col = 0; col < annots.length; col++)
        {
-        highestProb = prob;
-        mostLikely = character;
+        String character = annots[col].displayCharacter;
+        if ("x".equals(character) || "X".equals(character))
+        {
+          nodeIndex++;
+          if (nodeIndex < hmm.getNodes().size())
+          {
+            HMMNode node = hmm.getNode(nodeIndex);
+            int alignPos = getNodeAlignmentColumn(nodeIndex);
+            char seqCharacter = seq.getCharAt(alignPos);
+            if (alignPos >= seq.getLength() || col >= seq.getLength())
+            {
+              seq.insertCharAt(seq.getLength(),
+                      (alignPos + 1) - seq.getLength(),
+                      '-');
+            }
+            seq.getSequence()[alignPos] = '-';
+            seq.getSequence()[col] = seqCharacter;
+            node.setAlignmentColumn(col);
+            hmm.nodeLookup.put(col, nodeIndex);
+          }
+          else
+          {
+            System.out.println(
+                    "The reference annotation contains more consensus columns than the hidden Markov model");
+            break;
+          }
+        }
+        else
+        {
+          hmm.nodeLookup.remove(col);
+        }
        }
+
      }
-    return mostLikely;
+
    }
+
+  public void mapToReferenceAnnotation(AlignmentAnnotation reference)
+  {
+    Annotation[] annots = reference.annotations;
+    {
+      int nodeIndex = 0;
+      for (int col = 0; col < annots.length; col++)
+      {
+        String character = annots[col].displayCharacter;
+        if ("x".equals(character) || "X".equals(character))
+        {
+          nodeIndex++;
+          if (nodeIndex < nodes.size())
+          {
+            HMMNode node = nodes.get(nodeIndex);
+            node.setAlignmentColumn(col + 1);
+            nodeLookup.put(col, nodeIndex);
+          }
+          else
+          {
+            System.out.println(
+                    "The reference annotation contains more consensus columns than the hidden Markov model");
+            break;
+          }
+        }
+        else
+        {
+          nodeLookup.remove(col);
+        }
+      }
+
+    }
+
+  }
+
+  public SequenceI initHMMSequence()
+  {
+    Sequence consensus = getConsensusSequence();
+    consensus.setIsHMMConsensusSequence(true);
+    consensus.setHMM(this);
+    return consensus;
+  }
+
+
  }