1 package jalview.datamodel;
3 import java.util.ArrayList;
4 import java.util.HashMap;
7 import java.util.Scanner;
10 * Data structure which stores a hidden Markov model. Currently contains file properties as well, not sure whether these should be transferred to the HMMFile class
15 public class HiddenMarkovModel
17 // Stores file properties. Do not directly access this field as it contains
18 // only string value - use the getter methods. For example, to find the length
19 // of theHMM, use getModelLength()to return an int value
20 Map<String, String> fileProperties = new HashMap<>();
22 //contains all of the symbols used in this model. The index of each symbol represents its lookup value
23 List<Character> symbols = new ArrayList<>();
25 // contains information for each node in the model. The begin node is at index
26 // 0. Node 0 contains average emission probabilities for each symbol
27 List<HMMNode> nodes = new ArrayList<>();
29 // contains the HMM node for each alignment column
30 Map<Integer, Integer> nodeLookup = new HashMap<>();
32 //contains the symbol index for each symbol
33 Map<Character, Integer> symbolIndexLookup = new HashMap<>();
36 final static String YES = "yes";
38 final static String NO = "no";
42 //keys for file properties hashmap
43 private final String NAME = "NAME";
45 private final String ACCESSION_NUMBER = "ACC";
47 private final String DESCRIPTION = "DESC";
49 private final String LENGTH = "LENG";
51 private final String MAX_LENGTH = "MAXL";
53 private final String ALPHABET = "ALPH";
55 private final String DATE = "DATE";
57 private final String COMMAND_LOG = "COM";
59 private final String NUMBER_OF_SEQUENCES = "NSEQ";
61 private final String EFF_NUMBER_OF_SEQUENCES = "EFFN";
63 private final String CHECK_SUM = "CKSUM";
65 private final String GATHERING_THRESHOLDS = "GA";
67 private final String TRUSTED_CUTOFFS = "TC";
69 private final String NOISE_CUTOFFS = "NC";
71 private final String STATISTICS = "STATS";
73 private final String COMPO = "COMPO";
75 private final String GATHERING_THRESHOLD = "GA";
77 private final String TRUSTED_CUTOFF = "TC";
79 private final String NOISE_CUTOFF = "NC";
81 private final String VITERBI = "VITERBI";
83 private final String MSV = "MSV";
85 private final String FORWARD = "FORWARD";
87 private final String MAP = "MAP";
89 private final String REFERENCE_ANNOTATION = "RF";
91 private final String CONSENSUS_RESIDUE = "CONS";
93 private final String CONSENSUS_STRUCTURE = "CS";
95 private final String MASKED_VALUE = "MM";
97 final static String[] TRANSITION_TYPES = new String[] { "m->m", "m->i",
98 "m->d", "i->m", "i->i", "d->m", "d->d" };
100 public String getTransitionType(int index)
102 return TRANSITION_TYPES[index];
105 public Map<Integer, Integer> getNodeLookup()
110 public void setNodeLookup(Map<Integer, Integer> nodeLookup)
112 this.nodeLookup = nodeLookup;
115 public String[] getTransitionTypes()
117 return TRANSITION_TYPES;
120 public List<Character> getSymbols()
125 public Map<String, String> getFileProperties()
127 return fileProperties;
130 public HMMNode getNode(int nodeIndex)
132 return getNodes().get(nodeIndex);
135 public void setSymbols(List<Character> symbolsL)
137 this.symbols = symbolsL;
140 public String getName()
142 return fileProperties.get(NAME);
144 public String getAccessionNumber()
146 return fileProperties.get(ACCESSION_NUMBER);
149 public void setAccessionNumber(String value)
151 fileProperties.put(ACCESSION_NUMBER, value);
154 public String getDescription()
156 return fileProperties.get(DESCRIPTION);
159 public void setDescription(String value)
161 fileProperties.put(DESCRIPTION, value);
164 public Integer getLength()
166 if (fileProperties.get(LENGTH) == null)
170 return Integer.parseInt(fileProperties.get(LENGTH));
173 public void setLength(int value)
175 fileProperties.put(LENGTH, String.valueOf(value));
178 public Integer getMaxInstanceLength()
180 if (fileProperties.get(MAX_LENGTH) == null)
184 return Integer.parseInt(fileProperties.get(MAX_LENGTH));
187 public void setMaxInstanceLength(int value)
189 fileProperties.put(MAX_LENGTH, String.valueOf(value));
192 // gets type of symbol alphabet - "amino", "DNA", "RNA"
193 public String getAlphabetType()
195 return fileProperties.get(ALPHABET);
198 public void setAlphabetType(String value)
200 fileProperties.put(ALPHABET, value);
203 // not sure whether to implement this with Date object
204 public String getDate()
206 return fileProperties.get(DATE);
209 public void setDate(String value)
211 fileProperties.put(DATE, value);
214 // not sure whether to implement this
215 public String getCommandLineLog()
217 return fileProperties.get(COMMAND_LOG);
220 public void setCommandLineLog(String value)
222 fileProperties.put(COMMAND_LOG, value);
225 // gets the number of sequences that the HMM was trained on
226 public Integer getNumberOfSequences()
228 if (fileProperties.get(NUMBER_OF_SEQUENCES) == null)
232 return Integer.parseInt(fileProperties.get(NUMBER_OF_SEQUENCES));
235 public void setNumberOfSequences(int value)
237 fileProperties.put(NUMBER_OF_SEQUENCES, String.valueOf(value));
240 // gets the effective number determined during sequence weighting
241 public Double getEffectiveNumberOfSequences()
243 if (fileProperties.get(LENGTH) == null)
247 return Double.parseDouble(fileProperties.get(EFF_NUMBER_OF_SEQUENCES));
250 public void setEffectiveNumberOfSequences(double value)
252 fileProperties.put(EFF_NUMBER_OF_SEQUENCES, String.valueOf(value));
255 public Long getCheckSum()
257 if (fileProperties.get(LENGTH) == null)
261 return Long.parseLong(fileProperties.get(CHECK_SUM));
264 public void setCheckSum(long value)
266 fileProperties.put(CHECK_SUM, String.valueOf(value));
269 public List<HMMNode> getNodes()
274 public void setNodes(List<HMMNode> nodes)
280 * get match emission probability for a given symbol at a column in the
288 public Double getMatchEmissionProbability(int alignColumn, char symbol)
293 symbolIndex = symbolIndexLookup.get(symbol);
294 nodeIndex = nodeLookup.get(alignColumn);
295 probability = getNode(nodeIndex).getMatchEmissions().get(symbolIndex);
301 * get insert emission probability for a given symbol at a column in the
308 public Double getInsertEmissionProbability(int alignColumn, char symbol)
313 symbolIndex = symbolIndexLookup.get(symbol);
314 nodeIndex = nodeLookup.get(alignColumn);
315 probability = getNode(nodeIndex).getInsertEmissions().get(symbolIndex);
321 * get state transition probability for a given transition type at a column in
328 public Double getStateTransitionProbability(int alignColumn,
334 transitionIndex = getTransitionType(transition);
335 nodeIndex = nodeLookup.get(alignColumn);
336 probability = getNode(nodeIndex).getStateTransitions()
337 .get(transitionIndex);
342 public Integer getNodeAlignmentColumn(int nodeIndex)
344 Integer value = nodes.get(nodeIndex).getAlignmentColumn();
348 public char getConsensusResidue(int nodeIndex)
350 char value = nodes.get(nodeIndex).getConsensusResidue();
354 public char getReferenceAnnotation(int nodeIndex)
356 char value = nodes.get(nodeIndex).getReferenceAnnotation();
360 public char getMaskedValue(int nodeIndex)
362 char value = nodes.get(nodeIndex).getMaskValue();
366 public char getConsensusStructure(int nodeIndex)
368 char value = nodes.get(nodeIndex).getConsensusStructure();
373 * returns the average match emission for a given symbol
377 * average negative log propbability of a match emission of the given symbol
379 public double getAverageMatchEmission(int symbolIndex)
381 double value = nodes.get(0).getMatchEmissions().get(symbolIndex);
385 public int getNumberOfSymbols()
387 return numberOfSymbols;
390 public void setNumberOfSymbols(int numberOfSymbols)
392 this.numberOfSymbols = numberOfSymbols;
398 * fills symbol array and also finds numberOfSymbols
401 * scanner scanning symbol line in file
403 public void fillSymbols(Scanner parser)
406 while (parser.hasNext())
408 String strSymbol = parser.next();
409 char[] symbol = strSymbol.toCharArray();
410 symbols.add(symbol[0]);
411 symbolIndexLookup.put(symbol[0], i);
414 numberOfSymbols = symbols.size();
423 public void addFileProperty(String key, String value)
425 fileProperties.put(key, value);
428 public boolean referenceAnnotationIsActive()
431 status = fileProperties.get(REFERENCE_ANNOTATION);
448 public boolean maskValueIsActive()
451 status = fileProperties.get(MASKED_VALUE);
468 public boolean consensusResidueIsActive()
471 status = fileProperties.get(CONSENSUS_RESIDUE);
488 public boolean consensusStructureIsActive()
491 status = fileProperties.get(CONSENSUS_STRUCTURE);
508 public boolean mapIsActive()
511 status = fileProperties.get(MAP);
528 public void setAlignmentColumn(int nodeIndex, int column)
530 nodes.get(nodeIndex).setAlignmentColumn(column);
533 public void setReferenceAnnotation(int nodeIndex, char value)
535 nodes.get(nodeIndex).setReferenceAnnotation(value);
538 public void setConsensusResidue(int nodeIndex, char value)
540 nodes.get(nodeIndex).setConsensusResidue(value);
543 public void setConsensusStructure(int nodeIndex, char value)
545 nodes.get(nodeIndex).setConsensusStructure(value);
548 public void setMaskValue(int nodeIndex, char value)
550 nodes.get(nodeIndex).setMaskValue(value);
553 public String getGatheringThreshold()
556 value = fileProperties.get("GA");
560 public String getNoiseCutoff()
563 value = fileProperties.get("NC");
567 public String getTrustedCutoff()
570 value = fileProperties.get("TC");
574 public String getViterbi()
577 value = fileProperties.get(VITERBI);
581 public String getMSV()
584 value = fileProperties.get(MSV);
588 public String getForward()
591 value = fileProperties.get(FORWARD);
595 public void setMAPStatus(boolean status)
599 fileProperties.put(MAP, YES);
603 fileProperties.put(MAP, NO);
607 public void setReferenceAnnotationStatus(boolean status)
611 fileProperties.put(REFERENCE_ANNOTATION, YES);
615 fileProperties.put(REFERENCE_ANNOTATION, NO);
619 public void setMaskedValueStatus(boolean status)
623 fileProperties.put(MASKED_VALUE, YES);
627 fileProperties.put(MASKED_VALUE, NO);
631 public void setConsensusResidueStatus(boolean status)
635 fileProperties.put(CONSENSUS_RESIDUE, YES);
639 fileProperties.put(CONSENSUS_RESIDUE, NO);
643 public void setConsensusStructureStatus(boolean status)
647 fileProperties.put(CONSENSUS_STRUCTURE, YES);
651 fileProperties.put(CONSENSUS_STRUCTURE, NO);
658 * type of transition occuring
659 * @return index value representing position along stateTransition array.
661 public Integer getTransitionType(String transition)
694 * find the index of the node in a hidden Markov model based on the column in
697 * @param alignmentColumn
700 public Integer findNodeIndex(int alignmentColumn)
703 index = nodeLookup.get(alignmentColumn);
707 public static String findStringFromBoolean(boolean value)