X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=src%2Fjalview%2Fdatamodel%2FHiddenMarkovModel.java;h=a837b4fd2ed2c7e85ad343a86642a7e0aa0a2786;hb=dce53361c4b6bb56dc8567ac77e85a2300a5458d;hp=0aee77a655b88c8bdd5b829afc2d7036382fe839;hpb=2e6deb5d089d95ce34b15828766479b4f7803f5c;p=jalview.git diff --git a/src/jalview/datamodel/HiddenMarkovModel.java b/src/jalview/datamodel/HiddenMarkovModel.java index 0aee77a..a837b4f 100644 --- a/src/jalview/datamodel/HiddenMarkovModel.java +++ b/src/jalview/datamodel/HiddenMarkovModel.java @@ -1,12 +1,9 @@ package jalview.datamodel; -import jalview.schemes.ResidueProperties; - import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Scanner; /** * Data structure which stores a hidden Markov model. Currently contains file @@ -33,7 +30,8 @@ public class HiddenMarkovModel // 0. Node 0 contains average emission probabilities for each symbol List nodes = new ArrayList<>(); - // contains the HMM node for each alignment column + // contains the HMM node for each alignment column, alignment columns start at + // index 0; Map nodeLookup = new HashMap<>(); // contains the symbol index for each symbol @@ -42,8 +40,6 @@ public class HiddenMarkovModel final static String YES = "yes"; final static String NO = "no"; - - int numberOfSymbols; // keys for file properties hashmap private final String NAME = "NAME"; @@ -114,6 +110,45 @@ public class HiddenMarkovModel public static final int DELETETODELETE = 6; + String fileHeader; + + public HiddenMarkovModel() + { + + } + + public HiddenMarkovModel(HiddenMarkovModel hmm) + { + super(); + this.fileProperties = new HashMap<>(hmm.fileProperties); + this.symbols = new ArrayList<>(hmm.symbols); + this.nodes = new ArrayList<>(hmm.nodes); + this.nodeLookup = new HashMap<>(hmm.nodeLookup); + this.symbolIndexLookup = new HashMap<>( + hmm.symbolIndexLookup); + this.fileHeader = new String(hmm.fileHeader); + } + + /** + * Gets the file header of the .hmm file this model came from. + * + * @return + */ + public String getFileHeader() + { + return fileHeader; + } + + /** + * Sets the file header of this model. + * + * @param header + */ + public void setFileHeader(String header) + { + fileHeader = header; + } + /** * Returns the map containing the matches between nodes and alignment column * indexes. @@ -351,9 +386,9 @@ public class HiddenMarkovModel return 0d; } symbolIndex = symbolIndexLookup.get(symbol); - if (nodeLookup.containsKey(alignColumn + 1)) + if (nodeLookup.containsKey(alignColumn)) { - nodeIndex = nodeLookup.get(alignColumn + 1); + nodeIndex = nodeLookup.get(alignColumn); probability = getNode(nodeIndex).getMatchEmissions().get(symbolIndex); return probability; } @@ -386,9 +421,9 @@ public class HiddenMarkovModel return 0d; } symbolIndex = symbolIndexLookup.get(symbol); - if (nodeLookup.containsKey(alignColumn + 1)) + if (nodeLookup.containsKey(alignColumn)) { - nodeIndex = nodeLookup.get(alignColumn + 1); + nodeIndex = nodeLookup.get(alignColumn); probability = getNode(nodeIndex).getInsertEmissions() .get(symbolIndex); return probability; @@ -415,12 +450,11 @@ public class HiddenMarkovModel public Double getStateTransitionProbability(int alignColumn, int transition) { - int transitionIndex; int nodeIndex; Double probability; - if (nodeLookup.containsKey(alignColumn + 1)) + if (nodeLookup.containsKey(alignColumn)) { - nodeIndex = nodeLookup.get(alignColumn + 1); + nodeIndex = nodeLookup.get(alignColumn); probability = getNode(nodeIndex).getStateTransitions() .get(transition); return probability; @@ -443,7 +477,7 @@ public class HiddenMarkovModel public Integer getNodeAlignmentColumn(int nodeIndex) { Integer value = nodes.get(nodeIndex).getAlignmentColumn(); - return value - 1; + return value; } /** @@ -460,7 +494,8 @@ public class HiddenMarkovModel } /** - * Returns the consensus at a given alignment column. + * Returns the consensus at a given alignment column. If the character is + * lower case, its emission probability is less than 0.5. * * @param columnIndex * The index of the column in the alignment for which the consensus @@ -469,14 +504,37 @@ public class HiddenMarkovModel */ public char getConsensusAtAlignColumn(int columnIndex) { - char value; + char mostLikely = '-'; + if (consensusResidueIsActive()) + { + Integer index = findNodeIndex(columnIndex); if (index == null) { return '-'; } - value = getNodes().get(index).getConsensusResidue(); - return value; + mostLikely = getNodes().get(index).getConsensusResidue(); + return mostLikely; + } + else + { + double highestProb = 0; + for (char character : symbols) + { + Double prob = getMatchEmissionProbability(columnIndex, character); + if (prob > highestProb) + { + highestProb = prob; + mostLikely = character; + } + } + if (highestProb < 0.5) + { + mostLikely = Character.toLowerCase(mostLikely); + } + return mostLikely; + } + } /** @@ -539,28 +597,7 @@ public class HiddenMarkovModel */ public int getNumberOfSymbols() { - return numberOfSymbols; - } - - /** - * Fills symbol array and whilst doing so, updates the value of the number of - * symbols. - * - * @param parser - * The scanner scanning the symbol line in the file. - */ - public void fillSymbols(Scanner parser) - { - int i = 0; - while (parser.hasNext()) - { - String strSymbol = parser.next(); - char[] symbol = strSymbol.toCharArray(); - symbols.add(symbol[0]); - symbolIndexLookup.put(symbol[0], i); - i++; - } - numberOfSymbols = symbols.size(); + return symbols.size(); } /** @@ -711,10 +748,22 @@ public class HiddenMarkovModel */ public void setAlignmentColumn(int nodeIndex, int column) { + int currentCol = getNodeAlignmentColumn(nodeIndex); + nodeLookup.remove(currentCol); nodes.get(nodeIndex).setAlignmentColumn(column); + nodeLookup.put(column, nodeIndex); } /** + * Clears all data in the node lookup map + */ + public void emptyNodeLookup() + { + nodeLookup = new HashMap<>(); + } + + + /** * Sets the reference annotation at a given node. * * @param nodeIndex @@ -837,14 +886,7 @@ public class HiddenMarkovModel */ public void setMAPStatus(boolean status) { - if (status == true) - { - fileProperties.put(MAP, YES); - } - else - { - fileProperties.put(MAP, NO); - } + fileProperties.put(MAP, status ? YES : NO); } /** @@ -854,14 +896,7 @@ public class HiddenMarkovModel */ public void setReferenceAnnotationStatus(boolean status) { - if (status == true) - { - fileProperties.put(REFERENCE_ANNOTATION, YES); - } - else - { - fileProperties.put(REFERENCE_ANNOTATION, NO); - } + fileProperties.put(REFERENCE_ANNOTATION, status ? YES : NO); } /** @@ -871,14 +906,7 @@ public class HiddenMarkovModel */ public void setMaskedValueStatus(boolean status) { - if (status == true) - { - fileProperties.put(MASKED_VALUE, YES); - } - else - { - fileProperties.put(MASKED_VALUE, NO); - } + fileProperties.put(MASKED_VALUE, status ? YES : NO); } /** @@ -888,14 +916,7 @@ public class HiddenMarkovModel */ public void setConsensusResidueStatus(boolean status) { - if (status == true) - { - fileProperties.put(CONSENSUS_RESIDUE, YES); - } - else - { - fileProperties.put(CONSENSUS_RESIDUE, NO); - } + fileProperties.put(CONSENSUS_RESIDUE, status ? YES : NO); } /** @@ -905,14 +926,7 @@ public class HiddenMarkovModel */ public void setConsensusStructureStatus(boolean status) { - if (status == true) - { - fileProperties.put(CONSENSUS_STRUCTURE, YES); - } - else - { - fileProperties.put(CONSENSUS_STRUCTURE, NO); - } + fileProperties.put(CONSENSUS_STRUCTURE, status ? YES : NO); } /** @@ -927,7 +941,7 @@ public class HiddenMarkovModel public Integer findNodeIndex(int alignmentColumn) { Integer index; - index = nodeLookup.get(alignmentColumn + 1); + index = nodeLookup.get(alignmentColumn); return index; } @@ -949,73 +963,70 @@ public class HiddenMarkovModel } } + + /** - * Creates the HMM Logo alignment annotation, and populates it with - * information content data. + * Returns the consensus sequence based on the most probable symbol at each + * position. The sequence is adjusted to match the length of the existing + * sequence alignment. Gap characters are used as padding. * - * @return The alignment annotation. + * @param length + * The length of the longest sequence in the existing alignment. + * @return */ - public AlignmentAnnotation createAnnotation(int length) + public Sequence getConsensusSequence() { - Annotation[] annotations = new Annotation[length]; - float max = 0f; - for (int alignPos = 0; alignPos < length; alignPos++) + int start; + int end; + int modelLength; + start = getNodeAlignmentColumn(1); + modelLength = getLength(); + end = getNodeAlignmentColumn(modelLength); + char[] sequence = new char[end + 1]; + for (int index = 0; index < end + 1; index++) { - Float content = getInformationContent(alignPos); - if (content > max) + Character character; + + character = getConsensusAtAlignColumn(index); + + if (character == null || character == '-') { - max = content; + sequence[index] = '-'; + } + else + { + sequence[index] = Character.toUpperCase(character); + } } - Character cons; - cons = getConsensusAtAlignColumn(alignPos); - cons = Character.toUpperCase(cons); - - String description = String.format("%.3f", content); - description += " bits"; - annotations[alignPos] = new Annotation(cons.toString(), description, - ' ', - content); - } - AlignmentAnnotation annotation = new AlignmentAnnotation( - "Information Content", - "The information content of each column, measured in bits", - annotations, - 0f, max, AlignmentAnnotation.BAR_GRAPH); - return annotation; + Sequence seq = new Sequence(getName(), sequence, start, + end); + return seq; } + /** - * Returns the information content at a specified column. + * Initiates a HMM consensus sequence * - * @param column - * Index of the column, starting from 0. - * @return + * @return A new HMM consensus sequence */ - public float getInformationContent(int column) + public SequenceI initHMMSequence() { - float informationContent = 0f; - - for (char symbol : symbols) - { - float freq = 0f; - if (symbols.size() == 20) - { - freq = ResidueProperties.aminoBackgroundFrequencies.get(symbol); - } - if (symbols.size() == 4) - { - freq = ResidueProperties.nucleotideBackgroundFrequencies - .get(symbol); - } - Double hmmProb = getMatchEmissionProbability(column, symbol); - float prob = hmmProb.floatValue(); - informationContent += prob * (Math.log(prob / freq) / Math.log(2)); + Sequence consensus = getConsensusSequence(); + consensus.setIsHMMConsensusSequence(true); + consensus.setHMM(this); + return consensus; + } - } + public int getSymbolIndex(char c) + { + return symbolIndexLookup.get(c); + } - return informationContent; + public void setSymbolIndex(Character c, Integer i) + { + symbolIndexLookup.put(c, i); } }