1 package jalview.datamodel;
3 import java.util.ArrayList;
4 import java.util.HashMap;
7 import java.util.Scanner;
10 * Data structure which stores a hidden Markov model. Currently contains file properties as well, not sure whether these should be transferred to the HMMFile class
15 public class HiddenMarkovModel
17 // Stores file properties. Do not directly access this field as it contains
18 // only string value - use the getter methods. For example, to find the length
19 // of theHMM, use getModelLength()to return an int value
20 Map<String, String> fileProperties = new HashMap<>();
22 //contains all of the symbols used in this model. The index of each symbol represents its lookup value
23 List<Character> symbols = new ArrayList<>();
25 // contains information for each node in the model. The begin node is at index
26 // 0. Node 0 contains average emission probabilities for each symbol
27 List<HMMNode> nodes = new ArrayList<>();
29 final String YES = "yes";
31 final String NO = "no";
35 //keys for file properties hashmap
36 private final String NAME = "NAME";
38 private final String ACCESSION_NUMBER = "ACC";
40 private final String DESCRIPTION = "DESC";
42 private final String LENGTH = "LENG";
44 private final String MAX_LENGTH = "MAXL";
46 private final String ALPHABET = "ALPH";
48 private final String DATE = "DATE";
50 private final String COMMAND_LOG = "COM";
52 private final String NUMBER_OF_SEQUENCES = "NSEQ";
54 private final String EFF_NUMBER_OF_SEQUENCES = "EFFN";
56 private final String CHECK_SUM = "CKSUM";
58 private final String GATHERING_THRESHOLDS = "GA";
60 private final String TRUSTED_CUTOFFS = "TC";
62 private final String NOISE_CUTOFFS = "NC";
64 private final String STATISTICS = "STATS";
66 private final String COMPO = "COMPO";
68 private final String GATHERING_THRESHOLD = "GA";
70 private final String TRUSTED_CUTOFF = "TC";
72 private final String NOISE_CUTOFF = "NC";
74 private final String VITERBI = "VITERBI";
76 private final String MSV = "MSV";
78 private final String FORWARD = "FORWARD";
80 private final String MAP = "MAP";
82 private final String REFERENCE_ANNOTATION = "RF";
84 private final String CONSENSUS_RESIDUE = "CONS";
86 private final String CONSENSUS_STRUCTURE = "CS";
88 private final String MASKED_VALUE = "MM";
90 final static String[] TRANSITION_TYPES = new String[] { "m->m", "m->i",
91 "m->d", "i->m", "i->i", "d->m", "d->d" };
93 public String getTransitionType(int index)
95 return TRANSITION_TYPES[index];
98 public String[] getTransitionTypes()
100 return TRANSITION_TYPES;
102 public char getSymbol(int index)
104 return getSymbols().get(index);
106 public Map<String, String> getFileProperties()
108 return fileProperties;
111 public HMMNode getNode(int nodeIndex)
113 return getNodes().get(nodeIndex);
116 public void setSymbols(List<Character> symbolsL)
118 this.symbols = symbolsL;
121 public String getName()
123 return fileProperties.get(NAME);
125 public String getAccessionNumber()
127 return fileProperties.get(ACCESSION_NUMBER);
130 public void setAccessionNumber(String value)
132 fileProperties.put(ACCESSION_NUMBER, value);
135 public String getDescription()
137 return fileProperties.get(DESCRIPTION);
140 public void setDescription(String value)
142 fileProperties.put(DESCRIPTION, value);
145 public Integer getLength()
147 if (fileProperties.get(LENGTH) == null)
151 return Integer.parseInt(fileProperties.get(LENGTH));
154 public void setLength(int value)
156 fileProperties.put(LENGTH, String.valueOf(value));
159 public Integer getMaxInstanceLength()
161 if (fileProperties.get(MAX_LENGTH) == null)
165 return Integer.parseInt(fileProperties.get(MAX_LENGTH));
168 public void setMaxInstanceLength(int value)
170 fileProperties.put(MAX_LENGTH, String.valueOf(value));
173 // gets type of symbol alphabet - "amino", "DNA", "RNA"
174 public String getAlphabetType()
176 return fileProperties.get(ALPHABET);
179 public void setAlphabetType(String value)
181 fileProperties.put(ALPHABET, value);
184 // not sure whether to implement this with Date object
185 public String getDate()
187 return fileProperties.get(DATE);
190 public void setDate(String value)
192 fileProperties.put(DATE, value);
195 // not sure whether to implement this
196 public String getCommandLineLog()
198 return fileProperties.get(COMMAND_LOG);
201 public void setCommandLineLog(String value)
203 fileProperties.put(COMMAND_LOG, value);
206 // gets the number of sequences that the HMM was trained on
207 public Integer getNumberOfSequences()
209 if (fileProperties.get(NUMBER_OF_SEQUENCES) == null)
213 return Integer.parseInt(fileProperties.get(NUMBER_OF_SEQUENCES));
216 public void setNumberOfSequences(int value)
218 fileProperties.put(NUMBER_OF_SEQUENCES, String.valueOf(value));
221 // gets the effective number determined during sequence weighting
222 public Double getEffectiveNumberOfSequences()
224 if (fileProperties.get(LENGTH) == null)
228 return Double.parseDouble(fileProperties.get(EFF_NUMBER_OF_SEQUENCES));
231 public void setEffectiveNumberOfSequences(double value)
233 fileProperties.put(EFF_NUMBER_OF_SEQUENCES, String.valueOf(value));
236 public Long getCheckSum()
238 if (fileProperties.get(LENGTH) == null)
242 return Long.parseLong(fileProperties.get(CHECK_SUM));
245 public void setCheckSum(long value)
247 fileProperties.put(CHECK_SUM, String.valueOf(value));
250 public List<HMMNode> getNodes()
255 public void setNodes(List<HMMNode> nodes)
261 * gets the match emission at a node for a symbol
263 * position of node in model
265 * index of symbol being searched
267 * negative log probability of a match emission of the given symbol
269 public double getMatchEmission(int nodeIndex, int symbolIndex)
271 double value = nodes.get(nodeIndex).getMatchEmissions().get(symbolIndex);
276 * gets the insert emission at a node for a symbol
278 * position of node in model
280 * index of symbol being searched
282 * negative log probability of an insert emission of the given symbol
284 public double getInsertEmission(int nodeIndex, int symbolIndex)
286 double value = nodes.get(nodeIndex).getInsertEmissions().get(symbolIndex);
291 * gets the state transition at a node for a specific transition
293 * position of node in model
294 * @param transitionIndex
295 * index of stransition being searched
297 * negative log probability of a state transition of the given type
299 public double getStateTransition(int nodeIndex, int transitionIndex)
301 double value = nodes.get(nodeIndex).getStateTransitions()
302 .get(transitionIndex);
306 public Integer getNodeAlignmentColumn(int nodeIndex)
308 Integer value = nodes.get(nodeIndex).getAlignmentColumn();
312 public char getConsensusResidue(int nodeIndex)
314 char value = nodes.get(nodeIndex).getConsensusResidue();
318 public char getReferenceAnnotation(int nodeIndex)
320 char value = nodes.get(nodeIndex).getReferenceAnnotation();
324 public char getMaskedValue(int nodeIndex)
326 char value = nodes.get(nodeIndex).getMaskValue();
330 public char getConsensusStructure(int nodeIndex)
332 char value = nodes.get(nodeIndex).getConsensusStructure();
337 * returns the average match emission for a given symbol
341 * average negative log propbability of a match emission of the given symbol
343 public double getAverageMatchEmission(int symbolIndex)
345 double value = nodes.get(0).getMatchEmissions().get(symbolIndex);
349 public int getNumberOfSymbols()
351 return numberOfSymbols;
354 public void setNumberOfSymbols(int numberOfSymbols)
356 this.numberOfSymbols = numberOfSymbols;
359 public List<Character> getSymbols()
366 * fills symbol array and also finds numberOfSymbols
369 * scanner scanning symbol line in file
371 public void fillSymbols(Scanner parser)
373 while (parser.hasNext())
375 String strSymbol = parser.next();
376 char[] symbol = strSymbol.toCharArray();
377 symbols.add(symbol[0]);
379 numberOfSymbols = symbols.size();
388 public void addFileProperty(String key, String value)
390 fileProperties.put(key, value);
393 public boolean referenceAnnotationIsActive()
396 status = fileProperties.get(REFERENCE_ANNOTATION);
413 public boolean maskValueIsActive()
416 status = fileProperties.get(MASKED_VALUE);
433 public boolean consensusResidueIsActive()
436 status = fileProperties.get(CONSENSUS_RESIDUE);
453 public boolean consensusStructureIsActive()
456 status = fileProperties.get(CONSENSUS_STRUCTURE);
473 public boolean mapIsActive()
476 status = fileProperties.get(MAP);
493 public void setAlignmentColumn(int nodeIndex, int column)
495 nodes.get(nodeIndex).setAlignmentColumn(column);
498 public void setReferenceAnnotation(int nodeIndex, char value)
500 nodes.get(nodeIndex).setReferenceAnnotation(value);
503 public void setConsensusResidue(int nodeIndex, char value)
505 nodes.get(nodeIndex).setConsensusResidue(value);
508 public void setConsensusStructure(int nodeIndex, char value)
510 nodes.get(nodeIndex).setConsensusStructure(value);
513 public void setMaskValue(int nodeIndex, char value)
515 nodes.get(nodeIndex).setMaskValue(value);
518 public String getGatheringThreshold()
521 value = fileProperties.get("GA");
525 public String getNoiseCutoff()
528 value = fileProperties.get("NC");
532 public String getTrustedCutoff()
535 value = fileProperties.get("TC");
539 public String getViterbi()
542 value = fileProperties.get(VITERBI);
546 public String getMSV()
549 value = fileProperties.get(MSV);
553 public String getForward()
556 value = fileProperties.get(FORWARD);
560 public void setMAPStatus(boolean status)
564 fileProperties.put(MAP, YES);
568 fileProperties.put(MAP, NO);
572 public void setReferenceAnnotationStatus(boolean status)
576 fileProperties.put(REFERENCE_ANNOTATION, YES);
580 fileProperties.put(REFERENCE_ANNOTATION, NO);
584 public void setMaskedValueStatus(boolean status)
588 fileProperties.put(MASKED_VALUE, YES);
592 fileProperties.put(MASKED_VALUE, NO);
596 public void setConsensusResidueStatus(boolean status)
600 fileProperties.put(CONSENSUS_RESIDUE, YES);
604 fileProperties.put(CONSENSUS_RESIDUE, NO);
608 public void setConsensusStructureStatus(boolean status)
612 fileProperties.put(CONSENSUS_STRUCTURE, YES);
616 fileProperties.put(CONSENSUS_STRUCTURE, NO);