package jalview.datamodel;
-import jalview.gui.AlignFrame;
+import jalview.schemes.ResidueProperties;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import java.util.Scanner;
/**
* Data structure which stores a hidden Markov model. Currently contains file
*/
public class HiddenMarkovModel
{
-
+ private static final double LOG2 = Math.log(2);
// Stores file properties. Do not directly access this field as it contains
// only string value - use the getter methods. For example, to find the length
final static String YES = "yes";
final static String NO = "no";
-
- int numberOfSymbols;
// keys for file properties hashmap
private final String NAME = "NAME";
String fileHeader;
+ /**
+ * Constructor
+ */
public HiddenMarkovModel()
{
-
}
public HiddenMarkovModel(HiddenMarkovModel hmm)
this.nodeLookup = new HashMap<>(hmm.nodeLookup);
this.symbolIndexLookup = new HashMap<>(
hmm.symbolIndexLookup);
- this.numberOfSymbols = hmm.numberOfSymbols;
this.fileHeader = new String(hmm.fileHeader);
}
/**
- * Gets the file header of the .hmm file this model came from.
+ * Returns the information content at a specified column, calculated as the
+ * sum (over possible symbols) of the log ratio
+ *
+ * <pre>
+ * log(emission probability / background probability) / log(2)
+ * </pre>
+ *
+ * @param column
+ * column position (base 0)
+ * @return
+ */
+ public float getInformationContent(int column)
+ {
+ float informationContent = 0f;
+
+ for (char symbol : getSymbols())
+ {
+ float freq = ResidueProperties.backgroundFrequencies
+ .get(getAlphabetType()).get(symbol);
+ float prob = (float) getMatchEmissionProbability(column, symbol);
+ informationContent += prob * Math.log(prob / freq);
+ }
+
+ informationContent = informationContent / (float) LOG2;
+
+ return informationContent;
+ }
+
+ /**
+ * Gets the file header of the .hmm file this model came from
*
* @return
*/
* @return
*
*/
- public Double getMatchEmissionProbability(int alignColumn, char symbol)
+ public double getMatchEmissionProbability(int alignColumn, char symbol)
{
int symbolIndex;
int nodeIndex;
- Double probability;
+ double probability;
if (!symbolIndexLookup.containsKey(symbol))
{
return 0d;
{
return 0d;
}
-
}
/**
* @return
*
*/
- public Double getInsertEmissionProbability(int alignColumn, char symbol)
+ public double getInsertEmissionProbability(int alignColumn, char symbol)
{
int symbolIndex;
int nodeIndex;
- Double probability;
+ double probability;
if (!symbolIndexLookup.containsKey(symbol))
{
return 0d;
public Double getStateTransitionProbability(int alignColumn,
int transition)
{
- int transitionIndex;
int nodeIndex;
Double probability;
if (nodeLookup.containsKey(alignColumn))
}
/**
- * Returns the consensus at a given alignment column.
+ * Returns the consensus at a given alignment column. If the character is
+ * lower case, its emission probability is less than 0.5.
*
* @param columnIndex
* The index of the column in the alignment for which the consensus
mostLikely = character;
}
}
+ if (highestProb < 0.5)
+ {
+ mostLikely = Character.toLowerCase(mostLikely);
+ }
return mostLikely;
}
*/
public int getNumberOfSymbols()
{
- return numberOfSymbols;
- }
-
- /**
- * Fills symbol array and whilst doing so, updates the value of the number of
- * symbols.
- *
- * @param parser
- * The scanner scanning the symbol line in the file.
- */
- public void fillSymbols(Scanner parser)
- {
- int i = 0;
- while (parser.hasNext())
- {
- String strSymbol = parser.next();
- char[] symbol = strSymbol.toCharArray();
- symbols.add(symbol[0]);
- symbolIndexLookup.put(symbol[0], i);
- i++;
- }
- numberOfSymbols = symbols.size();
+ return symbols.size();
}
/**
public void setAlignmentColumn(int nodeIndex, int column)
{
nodes.get(nodeIndex).setAlignmentColumn(column);
+ nodeLookup.put(column, nodeIndex);
}
/**
+ * Clears all data in the node lookup map
+ */
+ public void emptyNodeLookup()
+ {
+ nodeLookup = new HashMap<>();
+ }
+
+
+ /**
* Sets the reference annotation at a given node.
*
* @param nodeIndex
* The length of the longest sequence in the existing alignment.
* @return
*/
- public Sequence getConsensusSequence(int length)
+ public Sequence getConsensusSequence()
{
int start;
int end;
start = getNodeAlignmentColumn(1);
modelLength = getLength();
end = getNodeAlignmentColumn(modelLength);
- char[] sequence = new char[length];
- for (int index = 0; index < length; index++)
+ char[] sequence = new char[end + 1];
+ for (int index = 0; index < end + 1; index++)
{
Character character;
}
- Sequence seq = new Sequence(getName() + "_HMM", sequence, start, end);
+ Sequence seq = new Sequence(getName(), sequence, start,
+ end);
return seq;
}
/**
- * Maps the nodes of the hidden Markov model to the reference annotation and
- * then deletes this annotation.
+ * Initiates a HMM consensus sequence
+ *
+ * @return A new HMM consensus sequence
*/
- public void mapToReferenceAnnotation(AlignFrame af, SequenceI seq)
+ public SequenceI initHMMSequence()
{
- AlignmentAnnotation annotArray[] = af.getViewport().getAlignment()
- .getAlignmentAnnotation();
-
- AlignmentAnnotation reference = null;
- for (AlignmentAnnotation annot : annotArray)
- {
- if (annot.label.contains("Reference"))
- {
- reference = annot;
- }
- }
-
- if (reference == null)
- {
- return;
- }
-
- mapToReferenceAnnotation(reference, seq);
- af.getViewport().getAlignment().deleteAnnotation(reference);
- }
-
- public void mapToReferenceAnnotation(AlignmentAnnotation reference,
- SequenceI seq)
- {
- HiddenMarkovModel hmm = seq.getHMM();
- Annotation[] annots = reference.annotations;
- {
- int nodeIndex = 0;
- for (int col = 0; col < annots.length; col++)
- {
- String character = annots[col].displayCharacter;
- if ("x".equals(character) || "X".equals(character))
- {
- nodeIndex++;
- if (nodeIndex < hmm.getNodes().size())
- {
- HMMNode node = hmm.getNode(nodeIndex);
- int alignPos = getNodeAlignmentColumn(nodeIndex);
- char seqCharacter = seq.getCharAt(alignPos);
- if (alignPos >= seq.getLength() || col >= seq.getLength())
- {
- seq.insertCharAt(seq.getLength(),
- (alignPos + 1) - seq.getLength(),
- '-');
- }
- seq.getSequence()[alignPos] = '-';
- seq.getSequence()[col] = seqCharacter;
- node.setAlignmentColumn(col);
- hmm.nodeLookup.put(col, nodeIndex);
- }
- else
- {
- System.out.println(
- "The reference annotation contains more consensus columns than the hidden Markov model");
- break;
- }
- }
- else
- {
- hmm.nodeLookup.remove(col);
- }
- }
-
- }
-
+ Sequence consensus = getConsensusSequence();
+ consensus.setIsHMMConsensusSequence(true);
+ consensus.setHMM(this);
+ return consensus;
}
- public void mapToReferenceAnnotation(AlignmentAnnotation reference)
+ public int getSymbolIndex(char c)
{
- Annotation[] annots = reference.annotations;
- {
- int nodeIndex = 0;
- for (int col = 0; col < annots.length; col++)
- {
- String character = annots[col].displayCharacter;
- if ("x".equals(character) || "X".equals(character))
- {
- nodeIndex++;
- if (nodeIndex < nodes.size())
- {
- HMMNode node = nodes.get(nodeIndex);
- node.setAlignmentColumn(col + 1);
- nodeLookup.put(col, nodeIndex);
- }
- else
- {
- System.out.println(
- "The reference annotation contains more consensus columns than the hidden Markov model");
- break;
- }
- }
- else
- {
- nodeLookup.remove(col);
- }
- }
-
- }
-
+ return symbolIndexLookup.get(c);
}
- public SequenceI initHMMSequence(AlignFrame af, int position)
+ public void setSymbolIndex(Character c, Integer i)
{
- AlignmentI alignment = af.getViewport().getAlignment();
- int length = alignment.getWidth();
- Sequence consensus = getConsensusSequence(length);
- consensus.setIsHMMConsensusSequence(true);
- consensus.setHMM(this);
- SequenceI[] consensusArr = new Sequence[] { consensus };
- alignment.getSequences().add(position, consensus);
- return consensus;
+ symbolIndexLookup.put(c, i);
}