package jalview.datamodel;
-import jalview.schemes.ResidueProperties;
-
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import java.util.Scanner;
/**
* Data structure which stores a hidden Markov model. Currently contains file
// 0. Node 0 contains average emission probabilities for each symbol
List<HMMNode> nodes = new ArrayList<>();
- // contains the HMM node for each alignment column
+ // contains the HMM node for each alignment column, alignment columns start at
+ // index 0;
Map<Integer, Integer> nodeLookup = new HashMap<>();
// contains the symbol index for each symbol
final static String YES = "yes";
final static String NO = "no";
-
- int numberOfSymbols;
// keys for file properties hashmap
private final String NAME = "NAME";
public static final int DELETETODELETE = 6;
+ String fileHeader;
+
+ public HiddenMarkovModel()
+ {
+
+ }
+
+ public HiddenMarkovModel(HiddenMarkovModel hmm)
+ {
+ super();
+ this.fileProperties = new HashMap<>(hmm.fileProperties);
+ this.symbols = new ArrayList<>(hmm.symbols);
+ this.nodes = new ArrayList<>(hmm.nodes);
+ this.nodeLookup = new HashMap<>(hmm.nodeLookup);
+ this.symbolIndexLookup = new HashMap<>(
+ hmm.symbolIndexLookup);
+ this.fileHeader = new String(hmm.fileHeader);
+ }
+
+ /**
+ * Gets the file header of the .hmm file this model came from.
+ *
+ * @return
+ */
+ public String getFileHeader()
+ {
+ return fileHeader;
+ }
+
+ /**
+ * Sets the file header of this model.
+ *
+ * @param header
+ */
+ public void setFileHeader(String header)
+ {
+ fileHeader = header;
+ }
+
/**
* Returns the map containing the matches between nodes and alignment column
* indexes.
return 0d;
}
symbolIndex = symbolIndexLookup.get(symbol);
- if (nodeLookup.containsKey(alignColumn + 1))
+ if (nodeLookup.containsKey(alignColumn))
{
- nodeIndex = nodeLookup.get(alignColumn + 1);
+ nodeIndex = nodeLookup.get(alignColumn);
probability = getNode(nodeIndex).getMatchEmissions().get(symbolIndex);
return probability;
}
return 0d;
}
symbolIndex = symbolIndexLookup.get(symbol);
- if (nodeLookup.containsKey(alignColumn + 1))
+ if (nodeLookup.containsKey(alignColumn))
{
- nodeIndex = nodeLookup.get(alignColumn + 1);
+ nodeIndex = nodeLookup.get(alignColumn);
probability = getNode(nodeIndex).getInsertEmissions()
.get(symbolIndex);
return probability;
public Double getStateTransitionProbability(int alignColumn,
int transition)
{
- int transitionIndex;
int nodeIndex;
Double probability;
- if (nodeLookup.containsKey(alignColumn + 1))
+ if (nodeLookup.containsKey(alignColumn))
{
- nodeIndex = nodeLookup.get(alignColumn + 1);
+ nodeIndex = nodeLookup.get(alignColumn);
probability = getNode(nodeIndex).getStateTransitions()
.get(transition);
return probability;
public Integer getNodeAlignmentColumn(int nodeIndex)
{
Integer value = nodes.get(nodeIndex).getAlignmentColumn();
- return value - 1;
+ return value;
}
/**
}
/**
- * Returns the consensus at a given alignment column.
+ * Returns the consensus at a given alignment column. If the character is
+ * lower case, its emission probability is less than 0.5.
*
* @param columnIndex
* The index of the column in the alignment for which the consensus
*/
public char getConsensusAtAlignColumn(int columnIndex)
{
- char value;
+ char mostLikely = '-';
+ if (consensusResidueIsActive())
+ {
+
Integer index = findNodeIndex(columnIndex);
if (index == null)
{
return '-';
}
- value = getNodes().get(index).getConsensusResidue();
- return value;
+ mostLikely = getNodes().get(index).getConsensusResidue();
+ return mostLikely;
+ }
+ else
+ {
+ double highestProb = 0;
+ for (char character : symbols)
+ {
+ Double prob = getMatchEmissionProbability(columnIndex, character);
+ if (prob > highestProb)
+ {
+ highestProb = prob;
+ mostLikely = character;
+ }
+ }
+ if (highestProb < 0.5)
+ {
+ mostLikely = Character.toLowerCase(mostLikely);
+ }
+ return mostLikely;
+ }
+
}
/**
*/
public int getNumberOfSymbols()
{
- return numberOfSymbols;
- }
-
- /**
- * Fills symbol array and whilst doing so, updates the value of the number of
- * symbols.
- *
- * @param parser
- * The scanner scanning the symbol line in the file.
- */
- public void fillSymbols(Scanner parser)
- {
- int i = 0;
- while (parser.hasNext())
- {
- String strSymbol = parser.next();
- char[] symbol = strSymbol.toCharArray();
- symbols.add(symbol[0]);
- symbolIndexLookup.put(symbol[0], i);
- i++;
- }
- numberOfSymbols = symbols.size();
+ return symbols.size();
}
/**
*/
public void setAlignmentColumn(int nodeIndex, int column)
{
+ int currentCol = getNodeAlignmentColumn(nodeIndex);
+ nodeLookup.remove(currentCol);
nodes.get(nodeIndex).setAlignmentColumn(column);
+ nodeLookup.put(column, nodeIndex);
+ }
+
+ /**
+ * Clears all data in the node lookup map
+ */
+ public void emptyNodeLookup()
+ {
+ nodeLookup = new HashMap<>();
}
+
/**
* Sets the reference annotation at a given node.
*
public Integer findNodeIndex(int alignmentColumn)
{
Integer index;
- index = nodeLookup.get(alignmentColumn + 1);
+ index = nodeLookup.get(alignmentColumn);
return index;
}
}
}
- /**
- * Creates the HMM Logo alignment annotation, and populates it with
- * information content data.
- *
- * @return The alignment annotation.
- */
- public AlignmentAnnotation createAnnotation(int length)
- {
- Annotation[] annotations = new Annotation[length];
- float max = 0f;
- for (int alignPos = 0; alignPos < length; alignPos++)
- {
- Float content = getInformationContent(alignPos);
- if (content > max)
- {
- max = content;
- }
-
- Character cons;
- cons = getConsensusAtAlignColumn(alignPos);
- cons = Character.toUpperCase(cons);
-
- String description = String.format("%.3f", content);
- description += " bits";
- annotations[alignPos] = new Annotation(cons.toString(), description,
- ' ',
- content);
-
- }
- AlignmentAnnotation annotation = new AlignmentAnnotation(
- "Information Content",
- "The information content of each column, measured in bits",
- annotations,
- 0f, max, AlignmentAnnotation.BAR_GRAPH);
- return annotation;
- }
-
- /**
- * Returns the information content at a specified column.
- *
- * @param column
- * Index of the column, starting from 0.
- * @return
- */
- public float getInformationContent(int column)
- {
- float informationContent = 0f;
-
- for (char symbol : symbols)
- {
- float freq = 0f;
- if ("amino".equals(getAlphabetType()))
- {
- freq = ResidueProperties.aminoBackgroundFrequencies.get(symbol);
- }
- if ("DNA".equals(getAlphabetType()))
- {
- freq = ResidueProperties.dnaBackgroundFrequencies.get(symbol);
- }
- if ("RNA".equals(getAlphabetType()))
- {
- freq = ResidueProperties.rnaBackgroundFrequencies
- .get(symbol);
- }
- Double hmmProb = getMatchEmissionProbability(column, symbol);
- float prob = hmmProb.floatValue();
- informationContent += prob * (Math.log(prob / freq) / Math.log(2));
-
- }
- return informationContent;
- }
/**
* Returns the consensus sequence based on the most probable symbol at each
* The length of the longest sequence in the existing alignment.
* @return
*/
- public Sequence getConsensusSequence(int length)
+ public Sequence getConsensusSequence()
{
int start;
int end;
start = getNodeAlignmentColumn(1);
modelLength = getLength();
end = getNodeAlignmentColumn(modelLength);
- char[] sequence = new char[length];
- for (int index = 0; index < length; index++)
+ char[] sequence = new char[end + 1];
+ for (int index = 0; index < end + 1; index++)
{
Character character;
- if (consensusResidueIsActive())
- {
+
character = getConsensusAtAlignColumn(index);
- }
- else
- {
- character = findConsensusCharacter(index);
- }
+
if (character == null || character == '-')
{
sequence[index] = '-';
}
- Sequence seq = new Sequence("HMM CONSENSUS", sequence, start, end);
+ Sequence seq = new Sequence(getName(), sequence, start,
+ end);
return seq;
}
+
/**
- * Finds the most probable character at a column in an alignment based on the
- * HMM.
+ * Initiates a HMM consensus sequence
*
- * @param nodeIndex
- * The index of the node.
- * @return
+ * @return A new HMM consensus sequence
*/
- Character findConsensusCharacter(int column)
+ public SequenceI initHMMSequence()
{
- Character mostLikely = null;
- double highestProb = 0;
- for (char character : symbols)
- {
- Double prob = getMatchEmissionProbability(column, character);
- if (prob > highestProb)
- {
- highestProb = prob;
- mostLikely = character;
- }
- }
- return mostLikely;
+ Sequence consensus = getConsensusSequence();
+ consensus.setIsHMMConsensusSequence(true);
+ consensus.setHMM(this);
+ return consensus;
+ }
+
+ public int getSymbolIndex(char c)
+ {
+ return symbolIndexLookup.get(c);
+ }
+
+ public void setSymbolIndex(Character c, Integer i)
+ {
+ symbolIndexLookup.put(c, i);
}
+
}