package jalview.datamodel;
-import jalview.schemes.ResidueProperties;
+import jalview.gui.AlignFrame;
import java.util.ArrayList;
import java.util.HashMap;
// 0. Node 0 contains average emission probabilities for each symbol
List<HMMNode> nodes = new ArrayList<>();
- // contains the HMM node for each alignment column
+ // contains the HMM node for each alignment column, alignment columns start at
+ // index 0;
Map<Integer, Integer> nodeLookup = new HashMap<>();
// contains the symbol index for each symbol
public static final int DELETETODELETE = 6;
+ String fileHeader;
+
+ public HiddenMarkovModel()
+ {
+
+ }
+
+ public HiddenMarkovModel(HiddenMarkovModel hmm)
+ {
+ super();
+ this.fileProperties = new HashMap<>(hmm.fileProperties);
+ this.symbols = new ArrayList<>(hmm.symbols);
+ this.nodes = new ArrayList<>(hmm.nodes);
+ this.nodeLookup = new HashMap<>(hmm.nodeLookup);
+ this.symbolIndexLookup = new HashMap<>(
+ hmm.symbolIndexLookup);
+ this.numberOfSymbols = hmm.numberOfSymbols;
+ this.fileHeader = new String(hmm.fileHeader);
+ }
+
+ /**
+ * Gets the file header of the .hmm file this model came from.
+ *
+ * @return
+ */
+ public String getFileHeader()
+ {
+ return fileHeader;
+ }
+
+ /**
+ * Sets the file header of this model.
+ *
+ * @param header
+ */
+ public void setFileHeader(String header)
+ {
+ fileHeader = header;
+ }
+
/**
* Returns the map containing the matches between nodes and alignment column
* indexes.
return 0d;
}
symbolIndex = symbolIndexLookup.get(symbol);
- if (nodeLookup.containsKey(alignColumn + 1))
+ if (nodeLookup.containsKey(alignColumn))
{
- nodeIndex = nodeLookup.get(alignColumn + 1);
+ nodeIndex = nodeLookup.get(alignColumn);
probability = getNode(nodeIndex).getMatchEmissions().get(symbolIndex);
return probability;
}
return 0d;
}
symbolIndex = symbolIndexLookup.get(symbol);
- if (nodeLookup.containsKey(alignColumn + 1))
+ if (nodeLookup.containsKey(alignColumn))
{
- nodeIndex = nodeLookup.get(alignColumn + 1);
+ nodeIndex = nodeLookup.get(alignColumn);
probability = getNode(nodeIndex).getInsertEmissions()
.get(symbolIndex);
return probability;
int transitionIndex;
int nodeIndex;
Double probability;
- if (nodeLookup.containsKey(alignColumn + 1))
+ if (nodeLookup.containsKey(alignColumn))
{
- nodeIndex = nodeLookup.get(alignColumn + 1);
+ nodeIndex = nodeLookup.get(alignColumn);
probability = getNode(nodeIndex).getStateTransitions()
.get(transition);
return probability;
public Integer getNodeAlignmentColumn(int nodeIndex)
{
Integer value = nodes.get(nodeIndex).getAlignmentColumn();
- return value - 1;
+ return value;
}
/**
*/
public char getConsensusAtAlignColumn(int columnIndex)
{
- char value;
+ char mostLikely = '-';
+ if (consensusResidueIsActive())
+ {
+
Integer index = findNodeIndex(columnIndex);
if (index == null)
{
return '-';
}
- value = getNodes().get(index).getConsensusResidue();
- return value;
+ mostLikely = getNodes().get(index).getConsensusResidue();
+ return mostLikely;
+ }
+ else
+ {
+ double highestProb = 0;
+ for (char character : symbols)
+ {
+ Double prob = getMatchEmissionProbability(columnIndex, character);
+ if (prob > highestProb)
+ {
+ highestProb = prob;
+ mostLikely = character;
+ }
+ }
+ return mostLikely;
+ }
+
}
/**
public Integer findNodeIndex(int alignmentColumn)
{
Integer index;
- index = nodeLookup.get(alignmentColumn + 1);
+ index = nodeLookup.get(alignmentColumn);
return index;
}
}
}
- /**
- * Creates the HMM Logo alignment annotation, and populates it with
- * information content data.
- *
- * @return The alignment annotation.
- */
- public AlignmentAnnotation createAnnotation(int length)
- {
- Annotation[] annotations = new Annotation[length];
- float max = 0f;
- for (int alignPos = 0; alignPos < length; alignPos++)
- {
- Float content = getInformationContent(alignPos);
- if (content > max)
- {
- max = content;
- }
- Character cons;
- cons = getConsensusAtAlignColumn(alignPos);
- cons = Character.toUpperCase(cons);
-
- String description = String.format("%.3f", content);
- description += " bits";
- annotations[alignPos] = new Annotation(cons.toString(), description,
- ' ',
- content);
-
- }
- AlignmentAnnotation annotation = new AlignmentAnnotation(
- "Information Content",
- "The information content of each column, measured in bits",
- annotations,
- 0f, max, AlignmentAnnotation.BAR_GRAPH);
- return annotation;
- }
-
- /**
- * Returns the information content at a specified column.
- *
- * @param column
- * Index of the column, starting from 0.
- * @return
- */
- public float getInformationContent(int column)
- {
- float informationContent = 0f;
-
- for (char symbol : symbols)
- {
- float freq = 0f;
- if ("amino".equals(getAlphabetType()))
- {
- freq = ResidueProperties.aminoBackgroundFrequencies.get(symbol);
- }
- if ("DNA".equals(getAlphabetType()))
- {
- freq = ResidueProperties.dnaBackgroundFrequencies.get(symbol);
- }
- if ("RNA".equals(getAlphabetType()))
- {
- freq = ResidueProperties.rnaBackgroundFrequencies
- .get(symbol);
- }
- Double hmmProb = getMatchEmissionProbability(column, symbol);
- float prob = hmmProb.floatValue();
- informationContent += prob * (Math.log(prob / freq) / Math.log(2));
-
- }
-
- return informationContent;
- }
/**
* Returns the consensus sequence based on the most probable symbol at each
* The length of the longest sequence in the existing alignment.
* @return
*/
- public Sequence getConsensusSequence(int length)
+ public Sequence getConsensusSequence()
{
int start;
int end;
start = getNodeAlignmentColumn(1);
modelLength = getLength();
end = getNodeAlignmentColumn(modelLength);
- char[] sequence = new char[length];
- for (int index = 0; index < length; index++)
+ char[] sequence = new char[end];
+ for (int index = 0; index < end; index++)
{
Character character;
- if (consensusResidueIsActive())
- {
+
character = getConsensusAtAlignColumn(index);
- }
- else
- {
- character = findConsensusCharacter(index);
- }
+
if (character == null || character == '-')
{
sequence[index] = '-';
}
- Sequence seq = new Sequence("HMM CONSENSUS", sequence, start, end);
+ Sequence seq = new Sequence(getName() + "_HMM", sequence, start, end);
return seq;
}
+
/**
- * Finds the most probable character at a column in an alignment based on the
- * HMM.
- *
- * @param nodeIndex
- * The index of the node.
- * @return
+ * Maps the nodes of the hidden Markov model to the reference annotation and
+ * then deletes this annotation.
*/
- Character findConsensusCharacter(int column)
+ public void mapToReferenceAnnotation(AlignFrame af, SequenceI seq)
+ {
+ AlignmentAnnotation annotArray[] = af.getViewport().getAlignment()
+ .getAlignmentAnnotation();
+
+ AlignmentAnnotation reference = null;
+ for (AlignmentAnnotation annot : annotArray)
+ {
+ if (annot.label.contains("Reference"))
+ {
+ reference = annot;
+ }
+ }
+
+ if (reference == null)
+ {
+ return;
+ }
+
+ mapToReferenceAnnotation(reference, seq);
+ af.getViewport().getAlignment().deleteAnnotation(reference);
+ }
+
+ public void mapToReferenceAnnotation(AlignmentAnnotation reference,
+ SequenceI seq)
{
- Character mostLikely = null;
- double highestProb = 0;
- for (char character : symbols)
+ HiddenMarkovModel hmm = seq.getHMM();
+ Annotation[] annots = reference.annotations;
{
- Double prob = getMatchEmissionProbability(column, character);
- if (prob > highestProb)
+ int nodeIndex = 0;
+ for (int col = 0; col < annots.length; col++)
{
- highestProb = prob;
- mostLikely = character;
+ String character = annots[col].displayCharacter;
+ if ("x".equals(character) || "X".equals(character))
+ {
+ nodeIndex++;
+ if (nodeIndex < hmm.getNodes().size())
+ {
+ HMMNode node = hmm.getNode(nodeIndex);
+ int alignPos = getNodeAlignmentColumn(nodeIndex);
+ char seqCharacter = seq.getCharAt(alignPos);
+ if (alignPos >= seq.getLength() || col >= seq.getLength())
+ {
+ seq.insertCharAt(seq.getLength(),
+ (alignPos + 1) - seq.getLength(),
+ '-');
+ }
+ seq.getSequence()[alignPos] = '-';
+ seq.getSequence()[col] = seqCharacter;
+ node.setAlignmentColumn(col);
+ hmm.nodeLookup.put(col, nodeIndex);
+ }
+ else
+ {
+ System.out.println(
+ "The reference annotation contains more consensus columns than the hidden Markov model");
+ break;
+ }
+ }
+ else
+ {
+ hmm.nodeLookup.remove(col);
+ }
}
+
}
- return mostLikely;
+
}
+
+ public void mapToReferenceAnnotation(AlignmentAnnotation reference)
+ {
+ Annotation[] annots = reference.annotations;
+ {
+ int nodeIndex = 0;
+ for (int col = 0; col < annots.length; col++)
+ {
+ String character = annots[col].displayCharacter;
+ if ("x".equals(character) || "X".equals(character))
+ {
+ nodeIndex++;
+ if (nodeIndex < nodes.size())
+ {
+ HMMNode node = nodes.get(nodeIndex);
+ node.setAlignmentColumn(col + 1);
+ nodeLookup.put(col, nodeIndex);
+ }
+ else
+ {
+ System.out.println(
+ "The reference annotation contains more consensus columns than the hidden Markov model");
+ break;
+ }
+ }
+ else
+ {
+ nodeLookup.remove(col);
+ }
+ }
+
+ }
+
+ }
+
+ public SequenceI initHMMSequence()
+ {
+ Sequence consensus = getConsensusSequence();
+ consensus.setIsHMMConsensusSequence(true);
+ consensus.setHMM(this);
+ return consensus;
+ }
+
+
}