X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2FHiddenMarkovModel.java;h=1b1294504993a22188e8d92b5ec36f6d90f123a6;hb=7d950017bb9262f2eff563192071b5ed9ccc76b4;hp=c96ad8b1b5d13f590a5da953fcce2deaed5f6613;hpb=b48749857263e9c85c93fce54f764453fbce9696;p=jalview.git diff --git a/src/jalview/datamodel/HiddenMarkovModel.java b/src/jalview/datamodel/HiddenMarkovModel.java index c96ad8b..1b12945 100644 --- a/src/jalview/datamodel/HiddenMarkovModel.java +++ b/src/jalview/datamodel/HiddenMarkovModel.java @@ -1,12 +1,11 @@ package jalview.datamodel; -import jalview.gui.AlignFrame; +import jalview.schemes.ResidueProperties; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Scanner; /** * Data structure which stores a hidden Markov model. Currently contains file @@ -18,7 +17,7 @@ import java.util.Scanner; */ public class HiddenMarkovModel { - + private static final double LOG2 = Math.log(2); // Stores file properties. Do not directly access this field as it contains // only string value - use the getter methods. For example, to find the length @@ -43,8 +42,6 @@ public class HiddenMarkovModel final static String YES = "yes"; final static String NO = "no"; - - int numberOfSymbols; // keys for file properties hashmap private final String NAME = "NAME"; @@ -117,9 +114,11 @@ public class HiddenMarkovModel String fileHeader; + /** + * Constructor + */ public HiddenMarkovModel() { - } public HiddenMarkovModel(HiddenMarkovModel hmm) @@ -131,12 +130,40 @@ public class HiddenMarkovModel this.nodeLookup = new HashMap<>(hmm.nodeLookup); this.symbolIndexLookup = new HashMap<>( hmm.symbolIndexLookup); - this.numberOfSymbols = hmm.numberOfSymbols; this.fileHeader = new String(hmm.fileHeader); } /** - * Gets the file header of the .hmm file this model came from. + * Returns the information content at a specified column, calculated as the + * sum (over possible symbols) of the log ratio + * + *
+   *  log(emission probability / background probability) / log(2)
+   * 
+ * + * @param column + * column position (base 0) + * @return + */ + public float getInformationContent(int column) + { + float informationContent = 0f; + + for (char symbol : getSymbols()) + { + float freq = ResidueProperties.backgroundFrequencies + .get(getAlphabetType()).get(symbol); + float prob = (float) getMatchEmissionProbability(column, symbol); + informationContent += prob * Math.log(prob / freq); + } + + informationContent = informationContent / (float) LOG2; + + return informationContent; + } + + /** + * Gets the file header of the .hmm file this model came from * * @return */ @@ -382,11 +409,11 @@ public class HiddenMarkovModel * @return * */ - public Double getMatchEmissionProbability(int alignColumn, char symbol) + public double getMatchEmissionProbability(int alignColumn, char symbol) { int symbolIndex; int nodeIndex; - Double probability; + double probability; if (!symbolIndexLookup.containsKey(symbol)) { return 0d; @@ -402,7 +429,6 @@ public class HiddenMarkovModel { return 0d; } - } /** @@ -417,11 +443,11 @@ public class HiddenMarkovModel * @return * */ - public Double getInsertEmissionProbability(int alignColumn, char symbol) + public double getInsertEmissionProbability(int alignColumn, char symbol) { int symbolIndex; int nodeIndex; - Double probability; + double probability; if (!symbolIndexLookup.containsKey(symbol)) { return 0d; @@ -456,7 +482,6 @@ public class HiddenMarkovModel public Double getStateTransitionProbability(int alignColumn, int transition) { - int transitionIndex; int nodeIndex; Double probability; if (nodeLookup.containsKey(alignColumn)) @@ -501,7 +526,8 @@ public class HiddenMarkovModel } /** - * Returns the consensus at a given alignment column. + * Returns the consensus at a given alignment column. If the character is + * lower case, its emission probability is less than 0.5. * * @param columnIndex * The index of the column in the alignment for which the consensus @@ -534,6 +560,10 @@ public class HiddenMarkovModel mostLikely = character; } } + if (highestProb < 0.5) + { + mostLikely = Character.toLowerCase(mostLikely); + } return mostLikely; } @@ -599,28 +629,7 @@ public class HiddenMarkovModel */ public int getNumberOfSymbols() { - return numberOfSymbols; - } - - /** - * Fills symbol array and whilst doing so, updates the value of the number of - * symbols. - * - * @param parser - * The scanner scanning the symbol line in the file. - */ - public void fillSymbols(Scanner parser) - { - int i = 0; - while (parser.hasNext()) - { - String strSymbol = parser.next(); - char[] symbol = strSymbol.toCharArray(); - symbols.add(symbol[0]); - symbolIndexLookup.put(symbol[0], i); - i++; - } - numberOfSymbols = symbols.size(); + return symbols.size(); } /** @@ -772,9 +781,19 @@ public class HiddenMarkovModel public void setAlignmentColumn(int nodeIndex, int column) { nodes.get(nodeIndex).setAlignmentColumn(column); + nodeLookup.put(column, nodeIndex); } /** + * Clears all data in the node lookup map + */ + public void emptyNodeLookup() + { + nodeLookup = new HashMap<>(); + } + + + /** * Sets the reference annotation at a given node. * * @param nodeIndex @@ -985,7 +1004,7 @@ public class HiddenMarkovModel * The length of the longest sequence in the existing alignment. * @return */ - public Sequence getConsensusSequence(int length) + public Sequence getConsensusSequence() { int start; int end; @@ -993,8 +1012,8 @@ public class HiddenMarkovModel start = getNodeAlignmentColumn(1); modelLength = getLength(); end = getNodeAlignmentColumn(modelLength); - char[] sequence = new char[length]; - for (int index = 0; index < length; index++) + char[] sequence = new char[end + 1]; + for (int index = 0; index < end + 1; index++) { Character character; @@ -1011,128 +1030,33 @@ public class HiddenMarkovModel } - Sequence seq = new Sequence(getName() + "_HMM", sequence, start, end); + Sequence seq = new Sequence(getName(), sequence, start, + end); return seq; } /** - * Maps the nodes of the hidden Markov model to the reference annotation and - * then deletes this annotation. + * Initiates a HMM consensus sequence + * + * @return A new HMM consensus sequence */ - public void mapToReferenceAnnotation(AlignFrame af, SequenceI seq) + public SequenceI initHMMSequence() { - AlignmentAnnotation annotArray[] = af.getViewport().getAlignment() - .getAlignmentAnnotation(); - - AlignmentAnnotation reference = null; - for (AlignmentAnnotation annot : annotArray) - { - if (annot.label.contains("Reference")) - { - reference = annot; - } - } - - if (reference == null) - { - return; - } - - mapToReferenceAnnotation(reference, seq); - af.getViewport().getAlignment().deleteAnnotation(reference); - } - - public void mapToReferenceAnnotation(AlignmentAnnotation reference, - SequenceI seq) - { - HiddenMarkovModel hmm = seq.getHMM(); - Annotation[] annots = reference.annotations; - { - int nodeIndex = 0; - for (int col = 0; col < annots.length; col++) - { - String character = annots[col].displayCharacter; - if ("x".equals(character) || "X".equals(character)) - { - nodeIndex++; - if (nodeIndex < hmm.getNodes().size()) - { - HMMNode node = hmm.getNode(nodeIndex); - int alignPos = getNodeAlignmentColumn(nodeIndex); - char seqCharacter = seq.getCharAt(alignPos); - if (alignPos >= seq.getLength() || col >= seq.getLength()) - { - seq.insertCharAt(seq.getLength(), - (alignPos + 1) - seq.getLength(), - '-'); - } - seq.getSequence()[alignPos] = '-'; - seq.getSequence()[col] = seqCharacter; - node.setAlignmentColumn(col); - hmm.nodeLookup.put(col, nodeIndex); - } - else - { - System.out.println( - "The reference annotation contains more consensus columns than the hidden Markov model"); - break; - } - } - else - { - hmm.nodeLookup.remove(col); - } - } - - } - + Sequence consensus = getConsensusSequence(); + consensus.setIsHMMConsensusSequence(true); + consensus.setHMM(this); + return consensus; } - public void mapToReferenceAnnotation(AlignmentAnnotation reference) + public int getSymbolIndex(char c) { - Annotation[] annots = reference.annotations; - { - int nodeIndex = 0; - for (int col = 0; col < annots.length; col++) - { - String character = annots[col].displayCharacter; - if ("x".equals(character) || "X".equals(character)) - { - nodeIndex++; - if (nodeIndex < nodes.size()) - { - HMMNode node = nodes.get(nodeIndex); - node.setAlignmentColumn(col + 1); - nodeLookup.put(col, nodeIndex); - } - else - { - System.out.println( - "The reference annotation contains more consensus columns than the hidden Markov model"); - break; - } - } - else - { - nodeLookup.remove(col); - } - } - - } - + return symbolIndexLookup.get(c); } - public SequenceI initHMMSequence(AlignFrame af, int position) + public void setSymbolIndex(Character c, Integer i) { - AlignmentI alignment = af.getViewport().getAlignment(); - int length = alignment.getWidth(); - Sequence consensus = getConsensusSequence(length); - consensus.setIsHMMConsensusSequence(true); - consensus.setHMM(this); - SequenceI[] consensusArr = new Sequence[] { consensus }; - alignment.getSequences().add(position, consensus); - return consensus; + symbolIndexLookup.put(c, i); }