package jalview.datamodel; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Scanner; /** * Data structure which stores a hidden Markov model. Currently contains file properties as well, not sure whether these should be transferred to the HMMFile class * * @author TZVanaalten * */ public class HiddenMarkovModel { // Stores file properties. Do not directly access this field as it contains // only string value - use the getter methods. For example, to find the length // of theHMM, use getModelLength()to return an int value Map fileProperties = new HashMap<>(); //contains all of the symbols used in this model. The index of each symbol represents its lookup value List symbols = new ArrayList<>(); // contains information for each node in the model. The begin node is at index // 0. Node 0 contains average emission probabilities for each symbol List nodes = new ArrayList<>(); // contains the HMM node for each alignment column Map nodeLookup = new HashMap<>(); //contains the symbol index for each symbol Map symbolIndexLookup = new HashMap<>(); final static String YES = "yes"; final static String NO = "no"; int numberOfSymbols; //keys for file properties hashmap private final String NAME = "NAME"; private final String ACCESSION_NUMBER = "ACC"; private final String DESCRIPTION = "DESC"; private final String LENGTH = "LENG"; private final String MAX_LENGTH = "MAXL"; private final String ALPHABET = "ALPH"; private final String DATE = "DATE"; private final String COMMAND_LOG = "COM"; private final String NUMBER_OF_SEQUENCES = "NSEQ"; private final String EFF_NUMBER_OF_SEQUENCES = "EFFN"; private final String CHECK_SUM = "CKSUM"; private final String GATHERING_THRESHOLDS = "GA"; private final String TRUSTED_CUTOFFS = "TC"; private final String NOISE_CUTOFFS = "NC"; private final String STATISTICS = "STATS"; private final String COMPO = "COMPO"; private final String GATHERING_THRESHOLD = "GA"; private final String TRUSTED_CUTOFF = "TC"; private final String NOISE_CUTOFF = "NC"; private final String VITERBI = "VITERBI"; private final String MSV = "MSV"; private final String FORWARD = "FORWARD"; private final String MAP = "MAP"; private final String REFERENCE_ANNOTATION = "RF"; private final String CONSENSUS_RESIDUE = "CONS"; private final String CONSENSUS_STRUCTURE = "CS"; private final String MASKED_VALUE = "MM"; final static String[] TRANSITION_TYPES = new String[] { "m->m", "m->i", "m->d", "i->m", "i->i", "d->m", "d->d" }; public String getTransitionType(int index) { return TRANSITION_TYPES[index]; } public Map getNodeLookup() { return nodeLookup; } public void setNodeLookup(Map nodeLookup) { this.nodeLookup = nodeLookup; } public String[] getTransitionTypes() { return TRANSITION_TYPES; } public List getSymbols() { return symbols; } public Map getFileProperties() { return fileProperties; } public HMMNode getNode(int nodeIndex) { return getNodes().get(nodeIndex); } public void setSymbols(List symbolsL) { this.symbols = symbolsL; } public String getName() { return fileProperties.get(NAME); } public String getAccessionNumber() { return fileProperties.get(ACCESSION_NUMBER); } public void setAccessionNumber(String value) { fileProperties.put(ACCESSION_NUMBER, value); } public String getDescription() { return fileProperties.get(DESCRIPTION); } public void setDescription(String value) { fileProperties.put(DESCRIPTION, value); } public Integer getLength() { if (fileProperties.get(LENGTH) == null) { return null; } return Integer.parseInt(fileProperties.get(LENGTH)); } public void setLength(int value) { fileProperties.put(LENGTH, String.valueOf(value)); } public Integer getMaxInstanceLength() { if (fileProperties.get(MAX_LENGTH) == null) { return null; } return Integer.parseInt(fileProperties.get(MAX_LENGTH)); } public void setMaxInstanceLength(int value) { fileProperties.put(MAX_LENGTH, String.valueOf(value)); } // gets type of symbol alphabet - "amino", "DNA", "RNA" public String getAlphabetType() { return fileProperties.get(ALPHABET); } public void setAlphabetType(String value) { fileProperties.put(ALPHABET, value); } // not sure whether to implement this with Date object public String getDate() { return fileProperties.get(DATE); } public void setDate(String value) { fileProperties.put(DATE, value); } // not sure whether to implement this public String getCommandLineLog() { return fileProperties.get(COMMAND_LOG); } public void setCommandLineLog(String value) { fileProperties.put(COMMAND_LOG, value); } // gets the number of sequences that the HMM was trained on public Integer getNumberOfSequences() { if (fileProperties.get(NUMBER_OF_SEQUENCES) == null) { return null; } return Integer.parseInt(fileProperties.get(NUMBER_OF_SEQUENCES)); } public void setNumberOfSequences(int value) { fileProperties.put(NUMBER_OF_SEQUENCES, String.valueOf(value)); } // gets the effective number determined during sequence weighting public Double getEffectiveNumberOfSequences() { if (fileProperties.get(LENGTH) == null) { return null; } return Double.parseDouble(fileProperties.get(EFF_NUMBER_OF_SEQUENCES)); } public void setEffectiveNumberOfSequences(double value) { fileProperties.put(EFF_NUMBER_OF_SEQUENCES, String.valueOf(value)); } public Long getCheckSum() { if (fileProperties.get(LENGTH) == null) { return null; } return Long.parseLong(fileProperties.get(CHECK_SUM)); } public void setCheckSum(long value) { fileProperties.put(CHECK_SUM, String.valueOf(value)); } public List getNodes() { return nodes; } public void setNodes(List nodes) { this.nodes = nodes; } /** * get match emission probability for a given symbol at a column in the * alignment * * @param alignColumn * @param symbol * @return * */ public Double getMatchEmissionProbability(int alignColumn, char symbol) { int symbolIndex; int nodeIndex; Double probability; symbolIndex = symbolIndexLookup.get(symbol); nodeIndex = nodeLookup.get(alignColumn); probability = getNode(nodeIndex).getMatchEmissions().get(symbolIndex); return probability; } /** * get insert emission probability for a given symbol at a column in the * alignment * * @param alignColumn * @param symbol * @return */ public Double getInsertEmissionProbability(int alignColumn, char symbol) { int symbolIndex; int nodeIndex; Double probability; symbolIndex = symbolIndexLookup.get(symbol); nodeIndex = nodeLookup.get(alignColumn); probability = getNode(nodeIndex).getInsertEmissions().get(symbolIndex); return probability; } /** * get state transition probability for a given transition type at a column in * the alignment * * @param alignColumn * @param transition * @return */ public Double getStateTransitionProbability(int alignColumn, String transition) { int transitionIndex; int nodeIndex; Double probability; transitionIndex = getTransitionType(transition); nodeIndex = nodeLookup.get(alignColumn); probability = getNode(nodeIndex).getStateTransitions() .get(transitionIndex); return probability; } public Integer getNodeAlignmentColumn(int nodeIndex) { Integer value = nodes.get(nodeIndex).getAlignmentColumn(); return value; } public char getConsensusResidue(int nodeIndex) { char value = nodes.get(nodeIndex).getConsensusResidue(); return value; } public char getReferenceAnnotation(int nodeIndex) { char value = nodes.get(nodeIndex).getReferenceAnnotation(); return value; } public char getMaskedValue(int nodeIndex) { char value = nodes.get(nodeIndex).getMaskValue(); return value; } public char getConsensusStructure(int nodeIndex) { char value = nodes.get(nodeIndex).getConsensusStructure(); return value; } /** * returns the average match emission for a given symbol * @param symbolIndex * index of symbol * @return * average negative log propbability of a match emission of the given symbol */ public double getAverageMatchEmission(int symbolIndex) { double value = nodes.get(0).getMatchEmissions().get(symbolIndex); return value; } public int getNumberOfSymbols() { return numberOfSymbols; } public void setNumberOfSymbols(int numberOfSymbols) { this.numberOfSymbols = numberOfSymbols; } /** * fills symbol array and also finds numberOfSymbols * * @param parser * scanner scanning symbol line in file */ public void fillSymbols(Scanner parser) { int i = 0; while (parser.hasNext()) { String strSymbol = parser.next(); char[] symbol = strSymbol.toCharArray(); symbols.add(symbol[0]); symbolIndexLookup.put(symbol[0], i); i++; } numberOfSymbols = symbols.size(); } /** * adds file property * * @param key * @param value */ public void addFileProperty(String key, String value) { fileProperties.put(key, value); } public boolean referenceAnnotationIsActive() { String status; status = fileProperties.get(REFERENCE_ANNOTATION); if (status == null) { return false; } switch (status) { case YES: return true; case NO: return false; default: return false; } } public boolean maskValueIsActive() { String status; status = fileProperties.get(MASKED_VALUE); if (status == null) { return false; } switch (status) { case YES: return true; case NO: return false; default: return false; } } public boolean consensusResidueIsActive() { String status; status = fileProperties.get(CONSENSUS_RESIDUE); if (status == null) { return false; } switch (status) { case YES: return true; case NO: return false; default: return false; } } public boolean consensusStructureIsActive() { String status; status = fileProperties.get(CONSENSUS_STRUCTURE); if (status == null) { return false; } switch (status) { case YES: return true; case NO: return false; default: return false; } } public boolean mapIsActive() { String status; status = fileProperties.get(MAP); if (status == null) { return false; } switch (status) { case YES: return true; case NO: return false; default: return false; } } public void setAlignmentColumn(int nodeIndex, int column) { nodes.get(nodeIndex).setAlignmentColumn(column); } public void setReferenceAnnotation(int nodeIndex, char value) { nodes.get(nodeIndex).setReferenceAnnotation(value); } public void setConsensusResidue(int nodeIndex, char value) { nodes.get(nodeIndex).setConsensusResidue(value); } public void setConsensusStructure(int nodeIndex, char value) { nodes.get(nodeIndex).setConsensusStructure(value); } public void setMaskValue(int nodeIndex, char value) { nodes.get(nodeIndex).setMaskValue(value); } public String getGatheringThreshold() { String value; value = fileProperties.get("GA"); return value; } public String getNoiseCutoff() { String value; value = fileProperties.get("NC"); return value; } public String getTrustedCutoff() { String value; value = fileProperties.get("TC"); return value; } public String getViterbi() { String value; value = fileProperties.get(VITERBI); return value; } public String getMSV() { String value; value = fileProperties.get(MSV); return value; } public String getForward() { String value; value = fileProperties.get(FORWARD); return value; } public void setMAPStatus(boolean status) { if (status == true) { fileProperties.put(MAP, YES); } else { fileProperties.put(MAP, NO); } } public void setReferenceAnnotationStatus(boolean status) { if (status == true) { fileProperties.put(REFERENCE_ANNOTATION, YES); } else { fileProperties.put(REFERENCE_ANNOTATION, NO); } } public void setMaskedValueStatus(boolean status) { if (status == true) { fileProperties.put(MASKED_VALUE, YES); } else { fileProperties.put(MASKED_VALUE, NO); } } public void setConsensusResidueStatus(boolean status) { if (status == true) { fileProperties.put(CONSENSUS_RESIDUE, YES); } else { fileProperties.put(CONSENSUS_RESIDUE, NO); } } public void setConsensusStructureStatus(boolean status) { if (status == true) { fileProperties.put(CONSENSUS_STRUCTURE, YES); } else { fileProperties.put(CONSENSUS_STRUCTURE, NO); } } /** * * @param transition * type of transition occuring * @return index value representing position along stateTransition array. */ public Integer getTransitionType(String transition) { Integer index; switch (transition) { case "mm": index = 0; break; case "mi": index = 1; break; case "md": index = 2; break; case "im": index = 3; break; case "ii": index = 4; break; case "dm": index = 5; break; case "dd": index = 6; break; default: index = null; } return index; } /** * find the index of the node in a hidden Markov model based on the column in * the alignment * * @param alignmentColumn */ public Integer findNodeIndex(int alignmentColumn) { Integer index; index = nodeLookup.get(alignmentColumn); return index; } public static String findStringFromBoolean(boolean value) { if (value) { return YES; } else { return NO; } } }