X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2FHiddenMarkovModel.java;h=e74d8261693634fadb0a5d2627d16343ccfb8fd2;hb=d6cace53173ae859bfd93f5e8a13be427864afd1;hp=879968f687d9e489087eab5fa84cadc6611011c5;hpb=4c5919d5393ad7fb00139b38cea93d1c270afa84;p=jalview.git diff --git a/src/jalview/datamodel/HiddenMarkovModel.java b/src/jalview/datamodel/HiddenMarkovModel.java index 879968f..e74d826 100644 --- a/src/jalview/datamodel/HiddenMarkovModel.java +++ b/src/jalview/datamodel/HiddenMarkovModel.java @@ -1,6 +1,6 @@ package jalview.datamodel; -import jalview.schemes.ResidueProperties; +import jalview.gui.AlignFrame; import java.util.ArrayList; import java.util.HashMap; @@ -33,7 +33,8 @@ public class HiddenMarkovModel // 0. Node 0 contains average emission probabilities for each symbol List nodes = new ArrayList<>(); - // contains the HMM node for each alignment column + // contains the HMM node for each alignment column, alignment columns start at + // index 0; Map nodeLookup = new HashMap<>(); // contains the symbol index for each symbol @@ -114,6 +115,46 @@ public class HiddenMarkovModel public static final int DELETETODELETE = 6; + String fileHeader; + + public HiddenMarkovModel() + { + + } + + public HiddenMarkovModel(HiddenMarkovModel hmm) + { + super(); + this.fileProperties = new HashMap<>(hmm.fileProperties); + this.symbols = new ArrayList<>(hmm.symbols); + this.nodes = new ArrayList<>(hmm.nodes); + this.nodeLookup = new HashMap<>(hmm.nodeLookup); + this.symbolIndexLookup = new HashMap<>( + hmm.symbolIndexLookup); + this.numberOfSymbols = hmm.numberOfSymbols; + this.fileHeader = new String(hmm.fileHeader); + } + + /** + * Gets the file header of the .hmm file this model came from. + * + * @return + */ + public String getFileHeader() + { + return fileHeader; + } + + /** + * Sets the file header of this model. + * + * @param header + */ + public void setFileHeader(String header) + { + fileHeader = header; + } + /** * Returns the map containing the matches between nodes and alignment column * indexes. @@ -351,9 +392,9 @@ public class HiddenMarkovModel return 0d; } symbolIndex = symbolIndexLookup.get(symbol); - if (nodeLookup.containsKey(alignColumn + 1)) + if (nodeLookup.containsKey(alignColumn)) { - nodeIndex = nodeLookup.get(alignColumn + 1); + nodeIndex = nodeLookup.get(alignColumn); probability = getNode(nodeIndex).getMatchEmissions().get(symbolIndex); return probability; } @@ -386,9 +427,9 @@ public class HiddenMarkovModel return 0d; } symbolIndex = symbolIndexLookup.get(symbol); - if (nodeLookup.containsKey(alignColumn + 1)) + if (nodeLookup.containsKey(alignColumn)) { - nodeIndex = nodeLookup.get(alignColumn + 1); + nodeIndex = nodeLookup.get(alignColumn); probability = getNode(nodeIndex).getInsertEmissions() .get(symbolIndex); return probability; @@ -418,9 +459,9 @@ public class HiddenMarkovModel int transitionIndex; int nodeIndex; Double probability; - if (nodeLookup.containsKey(alignColumn + 1)) + if (nodeLookup.containsKey(alignColumn)) { - nodeIndex = nodeLookup.get(alignColumn + 1); + nodeIndex = nodeLookup.get(alignColumn); probability = getNode(nodeIndex).getStateTransitions() .get(transition); return probability; @@ -443,7 +484,7 @@ public class HiddenMarkovModel public Integer getNodeAlignmentColumn(int nodeIndex) { Integer value = nodes.get(nodeIndex).getAlignmentColumn(); - return value - 1; + return value; } /** @@ -469,14 +510,33 @@ public class HiddenMarkovModel */ public char getConsensusAtAlignColumn(int columnIndex) { - char value; + char mostLikely = '-'; + if (consensusResidueIsActive()) + { + Integer index = findNodeIndex(columnIndex); if (index == null) { return '-'; } - value = getNodes().get(index).getConsensusResidue(); - return value; + mostLikely = getNodes().get(index).getConsensusResidue(); + return mostLikely; + } + else + { + double highestProb = 0; + for (char character : symbols) + { + Double prob = getMatchEmissionProbability(columnIndex, character); + if (prob > highestProb) + { + highestProb = prob; + mostLikely = character; + } + } + return mostLikely; + } + } /** @@ -892,7 +952,7 @@ public class HiddenMarkovModel public Integer findNodeIndex(int alignmentColumn) { Integer index; - index = nodeLookup.get(alignmentColumn + 1); + index = nodeLookup.get(alignmentColumn); return index; } @@ -914,78 +974,7 @@ public class HiddenMarkovModel } } - /** - * Creates the HMM Logo alignment annotation, and populates it with - * information content data. - * - * @return The alignment annotation. - */ - public AlignmentAnnotation createAnnotation(int length) - { - Annotation[] annotations = new Annotation[length]; - float max = 0f; - for (int alignPos = 0; alignPos < length; alignPos++) - { - Float content = getInformationContent(alignPos); - if (content > max) - { - max = content; - } - Character cons; - cons = getConsensusAtAlignColumn(alignPos); - cons = Character.toUpperCase(cons); - - String description = String.format("%.3f", content); - description += " bits"; - annotations[alignPos] = new Annotation(cons.toString(), description, - ' ', - content); - - } - AlignmentAnnotation annotation = new AlignmentAnnotation( - "Information Content", - "The information content of each column, measured in bits", - annotations, - 0f, max, AlignmentAnnotation.BAR_GRAPH); - return annotation; - } - - /** - * Returns the information content at a specified column. - * - * @param column - * Index of the column, starting from 0. - * @return - */ - public float getInformationContent(int column) - { - float informationContent = 0f; - - for (char symbol : symbols) - { - float freq = 0f; - if ("amino".equals(getAlphabetType())) - { - freq = ResidueProperties.aminoBackgroundFrequencies.get(symbol); - } - if ("DNA".equals(getAlphabetType())) - { - freq = ResidueProperties.dnaBackgroundFrequencies.get(symbol); - } - if ("RNA".equals(getAlphabetType())) - { - freq = ResidueProperties.rnaBackgroundFrequencies - .get(symbol); - } - Double hmmProb = getMatchEmissionProbability(column, symbol); - float prob = hmmProb.floatValue(); - informationContent += prob * (Math.log(prob / freq) / Math.log(2)); - - } - - return informationContent; - } /** * Returns the consensus sequence based on the most probable symbol at each @@ -996,7 +985,7 @@ public class HiddenMarkovModel * The length of the longest sequence in the existing alignment. * @return */ - public Sequence getConsensusSequence(int length) + public Sequence getConsensusSequence() { int start; int end; @@ -1004,18 +993,13 @@ public class HiddenMarkovModel start = getNodeAlignmentColumn(1); modelLength = getLength(); end = getNodeAlignmentColumn(modelLength); - char[] sequence = new char[length]; - for (int index = 0; index < length; index++) + char[] sequence = new char[end]; + for (int index = 0; index < end; index++) { Character character; - if (consensusResidueIsActive()) - { + character = getConsensusAtAlignColumn(index); - } - else - { - character = findConsensusCharacter(index); - } + if (character == null || character == '-') { sequence[index] = '-'; @@ -1027,32 +1011,126 @@ public class HiddenMarkovModel } - Sequence seq = new Sequence("HMM CONSENSUS", sequence, start, end); + Sequence seq = new Sequence(getName() + "_HMM", sequence, start, end); return seq; } + /** - * Finds the most probable character at a column in an alignment based on the - * HMM. - * - * @param nodeIndex - * The index of the node. - * @return + * Maps the nodes of the hidden Markov model to the reference annotation and + * then deletes this annotation. */ - Character findConsensusCharacter(int column) + public void mapToReferenceAnnotation(AlignFrame af, SequenceI seq) + { + AlignmentAnnotation annotArray[] = af.getViewport().getAlignment() + .getAlignmentAnnotation(); + + AlignmentAnnotation reference = null; + for (AlignmentAnnotation annot : annotArray) + { + if (annot.label.contains("Reference")) + { + reference = annot; + } + } + + if (reference == null) + { + return; + } + + mapToReferenceAnnotation(reference, seq); + af.getViewport().getAlignment().deleteAnnotation(reference); + } + + public void mapToReferenceAnnotation(AlignmentAnnotation reference, + SequenceI seq) { - Character mostLikely = null; - double highestProb = 0; - for (char character : symbols) + HiddenMarkovModel hmm = seq.getHMM(); + Annotation[] annots = reference.annotations; { - Double prob = getMatchEmissionProbability(column, character); - if (prob > highestProb) + int nodeIndex = 0; + for (int col = 0; col < annots.length; col++) { - highestProb = prob; - mostLikely = character; + String character = annots[col].displayCharacter; + if ("x".equals(character) || "X".equals(character)) + { + nodeIndex++; + if (nodeIndex < hmm.getNodes().size()) + { + HMMNode node = hmm.getNode(nodeIndex); + int alignPos = getNodeAlignmentColumn(nodeIndex); + char seqCharacter = seq.getCharAt(alignPos); + if (alignPos >= seq.getLength() || col >= seq.getLength()) + { + seq.insertCharAt(seq.getLength(), + (alignPos + 1) - seq.getLength(), + '-'); + } + seq.getSequence()[alignPos] = '-'; + seq.getSequence()[col] = seqCharacter; + node.setAlignmentColumn(col); + hmm.nodeLookup.put(col, nodeIndex); + } + else + { + System.out.println( + "The reference annotation contains more consensus columns than the hidden Markov model"); + break; + } + } + else + { + hmm.nodeLookup.remove(col); + } } + } - return mostLikely; + } + + public void mapToReferenceAnnotation(AlignmentAnnotation reference) + { + Annotation[] annots = reference.annotations; + { + int nodeIndex = 0; + for (int col = 0; col < annots.length; col++) + { + String character = annots[col].displayCharacter; + if ("x".equals(character) || "X".equals(character)) + { + nodeIndex++; + if (nodeIndex < nodes.size()) + { + HMMNode node = nodes.get(nodeIndex); + node.setAlignmentColumn(col + 1); + nodeLookup.put(col, nodeIndex); + } + else + { + System.out.println( + "The reference annotation contains more consensus columns than the hidden Markov model"); + break; + } + } + else + { + nodeLookup.remove(col); + } + } + + } + + } + + public SequenceI initHMMSequence() + { + Sequence consensus = getConsensusSequence(); + consensus.setIsHMMConsensusSequence(true); + consensus.setHMM(this); + return consensus; + } + + }