package jalview.datamodel;
-import jalview.schemes.ResidueProperties;
-
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import java.util.Scanner;
/**
* Data structure which stores a hidden Markov model. Currently contains file
// 0. Node 0 contains average emission probabilities for each symbol
List<HMMNode> nodes = new ArrayList<>();
- // contains the HMM node for each alignment column
+ // contains the HMM node for each alignment column, alignment columns start at
+ // index 0;
Map<Integer, Integer> nodeLookup = new HashMap<>();
// contains the symbol index for each symbol
final static String YES = "yes";
final static String NO = "no";
-
- int numberOfSymbols;
// keys for file properties hashmap
private final String NAME = "NAME";
public static final int DELETETODELETE = 6;
+ String fileHeader;
+
+ public HiddenMarkovModel()
+ {
+
+ }
+
+ public HiddenMarkovModel(HiddenMarkovModel hmm)
+ {
+ super();
+ this.fileProperties = new HashMap<>(hmm.fileProperties);
+ this.symbols = new ArrayList<>(hmm.symbols);
+ this.nodes = new ArrayList<>(hmm.nodes);
+ this.nodeLookup = new HashMap<>(hmm.nodeLookup);
+ this.symbolIndexLookup = new HashMap<>(
+ hmm.symbolIndexLookup);
+ this.fileHeader = new String(hmm.fileHeader);
+ }
+
+ /**
+ * Gets the file header of the .hmm file this model came from.
+ *
+ * @return
+ */
+ public String getFileHeader()
+ {
+ return fileHeader;
+ }
+
+ /**
+ * Sets the file header of this model.
+ *
+ * @param header
+ */
+ public void setFileHeader(String header)
+ {
+ fileHeader = header;
+ }
+
/**
* Returns the map containing the matches between nodes and alignment column
* indexes.
return 0d;
}
symbolIndex = symbolIndexLookup.get(symbol);
- if (nodeLookup.containsKey(alignColumn + 1))
+ if (nodeLookup.containsKey(alignColumn))
{
- nodeIndex = nodeLookup.get(alignColumn + 1);
+ nodeIndex = nodeLookup.get(alignColumn);
probability = getNode(nodeIndex).getMatchEmissions().get(symbolIndex);
return probability;
}
return 0d;
}
symbolIndex = symbolIndexLookup.get(symbol);
- if (nodeLookup.containsKey(alignColumn + 1))
+ if (nodeLookup.containsKey(alignColumn))
{
- nodeIndex = nodeLookup.get(alignColumn + 1);
+ nodeIndex = nodeLookup.get(alignColumn);
probability = getNode(nodeIndex).getInsertEmissions()
.get(symbolIndex);
return probability;
public Double getStateTransitionProbability(int alignColumn,
int transition)
{
- int transitionIndex;
int nodeIndex;
Double probability;
- if (nodeLookup.containsKey(alignColumn + 1))
+ if (nodeLookup.containsKey(alignColumn))
{
- nodeIndex = nodeLookup.get(alignColumn + 1);
+ nodeIndex = nodeLookup.get(alignColumn);
probability = getNode(nodeIndex).getStateTransitions()
.get(transition);
return probability;
public Integer getNodeAlignmentColumn(int nodeIndex)
{
Integer value = nodes.get(nodeIndex).getAlignmentColumn();
- return value - 1;
+ return value;
}
/**
}
/**
- * Returns the consensus at a given alignment column.
+ * Returns the consensus at a given alignment column. If the character is
+ * lower case, its emission probability is less than 0.5.
*
* @param columnIndex
* The index of the column in the alignment for which the consensus
*/
public char getConsensusAtAlignColumn(int columnIndex)
{
- char value;
+ char mostLikely = '-';
+ if (consensusResidueIsActive())
+ {
+
Integer index = findNodeIndex(columnIndex);
if (index == null)
{
return '-';
}
- value = getNodes().get(index).getConsensusResidue();
- return value;
+ mostLikely = getNodes().get(index).getConsensusResidue();
+ return mostLikely;
+ }
+ else
+ {
+ double highestProb = 0;
+ for (char character : symbols)
+ {
+ Double prob = getMatchEmissionProbability(columnIndex, character);
+ if (prob > highestProb)
+ {
+ highestProb = prob;
+ mostLikely = character;
+ }
+ }
+ if (highestProb < 0.5)
+ {
+ mostLikely = Character.toLowerCase(mostLikely);
+ }
+ return mostLikely;
+ }
+
}
/**
*/
public int getNumberOfSymbols()
{
- return numberOfSymbols;
- }
-
- /**
- * Fills symbol array and whilst doing so, updates the value of the number of
- * symbols.
- *
- * @param parser
- * The scanner scanning the symbol line in the file.
- */
- public void fillSymbols(Scanner parser)
- {
- int i = 0;
- while (parser.hasNext())
- {
- String strSymbol = parser.next();
- char[] symbol = strSymbol.toCharArray();
- symbols.add(symbol[0]);
- symbolIndexLookup.put(symbol[0], i);
- i++;
- }
- numberOfSymbols = symbols.size();
+ return symbols.size();
}
/**
*/
public void setAlignmentColumn(int nodeIndex, int column)
{
+ int currentCol = getNodeAlignmentColumn(nodeIndex);
+ nodeLookup.remove(currentCol);
nodes.get(nodeIndex).setAlignmentColumn(column);
+ nodeLookup.put(column, nodeIndex);
}
/**
+ * Clears all data in the node lookup map
+ */
+ public void emptyNodeLookup()
+ {
+ nodeLookup = new HashMap<>();
+ }
+
+
+ /**
* Sets the reference annotation at a given node.
*
* @param nodeIndex
*/
public void setMAPStatus(boolean status)
{
- if (status == true)
- {
- fileProperties.put(MAP, YES);
- }
- else
- {
- fileProperties.put(MAP, NO);
- }
+ fileProperties.put(MAP, status ? YES : NO);
}
/**
*/
public void setReferenceAnnotationStatus(boolean status)
{
- if (status == true)
- {
- fileProperties.put(REFERENCE_ANNOTATION, YES);
- }
- else
- {
- fileProperties.put(REFERENCE_ANNOTATION, NO);
- }
+ fileProperties.put(REFERENCE_ANNOTATION, status ? YES : NO);
}
/**
*/
public void setMaskedValueStatus(boolean status)
{
- if (status == true)
- {
- fileProperties.put(MASKED_VALUE, YES);
- }
- else
- {
- fileProperties.put(MASKED_VALUE, NO);
- }
+ fileProperties.put(MASKED_VALUE, status ? YES : NO);
}
/**
*/
public void setConsensusResidueStatus(boolean status)
{
- if (status == true)
- {
- fileProperties.put(CONSENSUS_RESIDUE, YES);
- }
- else
- {
- fileProperties.put(CONSENSUS_RESIDUE, NO);
- }
+ fileProperties.put(CONSENSUS_RESIDUE, status ? YES : NO);
}
/**
*/
public void setConsensusStructureStatus(boolean status)
{
- if (status == true)
- {
- fileProperties.put(CONSENSUS_STRUCTURE, YES);
- }
- else
- {
- fileProperties.put(CONSENSUS_STRUCTURE, NO);
- }
+ fileProperties.put(CONSENSUS_STRUCTURE, status ? YES : NO);
}
/**
public Integer findNodeIndex(int alignmentColumn)
{
Integer index;
- index = nodeLookup.get(alignmentColumn + 1);
+ index = nodeLookup.get(alignmentColumn);
return index;
}
}
}
+
+
/**
- * Creates the HMM Logo alignment annotation, and populates it with
- * information content data.
+ * Returns the consensus sequence based on the most probable symbol at each
+ * position. The sequence is adjusted to match the length of the existing
+ * sequence alignment. Gap characters are used as padding.
*
- * @return The alignment annotation.
+ * @param length
+ * The length of the longest sequence in the existing alignment.
+ * @return
*/
- public AlignmentAnnotation createAnnotation(int length)
+ public Sequence getConsensusSequence()
{
- Annotation[] annotations = new Annotation[length];
- float max = 0f;
- for (int alignPos = 0; alignPos < length; alignPos++)
+ int start;
+ int end;
+ int modelLength;
+ start = getNodeAlignmentColumn(1);
+ modelLength = getLength();
+ end = getNodeAlignmentColumn(modelLength);
+ char[] sequence = new char[end + 1];
+ for (int index = 0; index < end + 1; index++)
{
- Float content = getInformationContent(alignPos);
- if (content > max)
+ Character character;
+
+ character = getConsensusAtAlignColumn(index);
+
+ if (character == null || character == '-')
{
- max = content;
+ sequence[index] = '-';
+ }
+ else
+ {
+ sequence[index] = Character.toUpperCase(character);
+ }
}
- Character cons;
- cons = getConsensusAtAlignColumn(alignPos);
- cons = Character.toUpperCase(cons);
-
- String description = String.format("%.3f", content);
- description += " bits";
- annotations[alignPos] = new Annotation(cons.toString(), description,
- ' ',
- content);
- }
- AlignmentAnnotation annotation = new AlignmentAnnotation(
- "Information Content",
- "The information content of each column, measured in bits",
- annotations,
- 0f, max, AlignmentAnnotation.BAR_GRAPH);
- return annotation;
+ Sequence seq = new Sequence(getName(), sequence, start,
+ end);
+ return seq;
}
+
/**
- * Returns the information content at a specified column.
+ * Initiates a HMM consensus sequence
*
- * @param column
- * Index of the column, starting from 0.
- * @return
+ * @return A new HMM consensus sequence
*/
- public float getInformationContent(int column)
+ public SequenceI initHMMSequence()
{
- float informationContent = 0f;
-
- for (char symbol : symbols)
- {
- float freq = 0f;
- if (symbols.size() == 20)
- {
- freq = ResidueProperties.aminoBackgroundFrequencies.get(symbol);
- }
- if (symbols.size() == 4)
- {
- freq = ResidueProperties.nucleotideBackgroundFrequencies
- .get(symbol);
- }
- Double hmmProb = getMatchEmissionProbability(column, symbol);
- float prob = hmmProb.floatValue();
- informationContent += prob * (Math.log(prob / freq) / Math.log(2));
+ Sequence consensus = getConsensusSequence();
+ consensus.setIsHMMConsensusSequence(true);
+ consensus.setHMM(this);
+ return consensus;
+ }
- }
+ public int getSymbolIndex(char c)
+ {
+ return symbolIndexLookup.get(c);
+ }
- return informationContent;
+ public void setSymbolIndex(Character c, Integer i)
+ {
+ symbolIndexLookup.put(c, i);
}
}