1 package jalview.datamodel;
3 import java.util.ArrayList;
4 import java.util.HashMap;
7 import java.util.Scanner;
10 * Data structure which stores a hidden Markov model. Currently contains file properties as well, not sure whether these should be transferred to the HMMFile class
15 public class HiddenMarkovModel
17 // Stores file properties. Do not directly access this field as it contains
18 // only string value - use the getter methods. For example, to find the length
19 // of theHMM, use getModelLength()to return an int value
20 Map<String, String> fileProperties = new HashMap<>();
22 // contains the average emission probabilities for each symbol
23 List<Double> averageMatchStateEmissionProbabilities = new ArrayList<>();
25 // contains the probabilities of insert 0 emissions for each symbol
26 List<Double> insertZeroEmissions = new ArrayList<>();
28 // contains the probabilities of transitions from the begin state and insert
29 // state 0. These are bm, bi, bd, im, ii, dm and dd in order (0th position in
30 // the array indicates the probability of a bm transition)
32 List<Double> beginStateTransitions = new ArrayList<>();
34 // contains the alignment column index for each node
35 List<Integer> alignmentColumnIndexes = new ArrayList<>();
37 // contains all other annotations for each node. These can be the
38 // consensus(CONS), reference annotation(RF), mask value(MM) or consensus
40 List<HashMap<String, Character>> annotations = new ArrayList<>();
42 // contains the match emission for each symbol at each node
43 List<List<Double>> matchEmissions = new ArrayList<>();
45 // contains the insert emission for each symbol at each node
46 List<List<Double>> insertEmissions = new ArrayList<>();
48 // contains the state transition for each state transition. See
49 // beginStateTransitions field for transition possibilities.
50 List<List<Double>> stateTransitions = new ArrayList<>();
52 // contains cutoffs and thresholds from PFAM
53 Map<String, Double[]> pfamData = new HashMap<>();
55 // contains e-value statistic objects which contain the alignment mode
56 // configuration, and the slope and location of each distribution
57 Map<String, EValueStatistic> eValueStatistics = new HashMap<>();
59 final String yes = "yes";
61 final String no = "no";
63 List<Character> symbols = new ArrayList<>();
65 public List<Double> getBeginStateTransitions()
67 return beginStateTransitions;
70 public void setBeginStateTransitions(List<Double> beginStateTransitionsL)
72 this.beginStateTransitions = beginStateTransitionsL;
75 public List<List<Double>> getStateTransitions()
77 return stateTransitions;
80 public void setStateTransitions(List<List<Double>> stateTransitionsL)
82 this.stateTransitions = stateTransitionsL;
85 public List<Character> getSymbols()
90 public void setSymbols(List<Character> symbolsL)
92 this.symbols = symbolsL;
95 public List<Double> getAverageMatchStateEmissionProbabilities()
97 return averageMatchStateEmissionProbabilities;
100 public void setAverageMatchStateEmissionProbabilities(
101 List<Double> averageMatchStateEmissionProbabilitiesL)
103 this.averageMatchStateEmissionProbabilities = averageMatchStateEmissionProbabilitiesL;
107 public List<Double> getInsertZeroEmissions()
109 return insertZeroEmissions;
112 public void setInsertZeroEmissions(List<Double> insertZeroEmissionsL)
114 this.insertZeroEmissions = insertZeroEmissionsL;
117 public List<List<Double>> getMatchEmissions()
119 return matchEmissions;
122 public void setMatchEmissions(List<List<Double>> matchEmissionsL)
124 this.matchEmissions = matchEmissionsL;
127 public List<List<Double>> getInsertEmissions()
129 return insertEmissions;
132 public void setInsertEmissions(List<List<Double>> insertEmissionsL)
134 this.insertEmissions = insertEmissionsL;
136 public void fillSymbols(String line)
138 Scanner scanner = new Scanner(line);
140 while (scanner.hasNext())
142 symbols.add(scanner.next().charAt(0));
147 public String getName()
149 return fileProperties.get("NAME");
151 public String getAccessionNumber()
153 return fileProperties.get("ACC");
156 public void setAccessionNumber(String value)
158 fileProperties.put("ACC", value);
161 public String getDescription()
163 return fileProperties.get("DESC");
166 public void setDescription(String value)
168 fileProperties.put("DESC", value);
171 public Integer getLength()
173 if (fileProperties.get("LENG") == null)
177 return Integer.parseInt(fileProperties.get("LENG"));
180 public void setLength(int value)
182 fileProperties.put("LENG", String.valueOf(value));
185 public Integer getMaxInstanceLength()
187 if (fileProperties.get("MAXL") == null)
191 return Integer.parseInt(fileProperties.get("MAXL"));
194 public void setMaxInstanceLength(int value)
196 fileProperties.put("MAXL", String.valueOf(value));
199 // gets type of symbol alphabet - "amino", "DNA", "RNA"
200 public String getAlphabetType()
202 return fileProperties.get("ALPH");
205 public void setAlphabetType(String value)
207 fileProperties.put("ALPH", value);
210 // returns boolean indicating whether the reference annotation character field
211 // for each match state is valid or ignored
212 public boolean getReferenceAnnotationFlag()
214 if (fileProperties.get("RF") != null)
216 if (fileProperties.get("RF").equals(yes))
224 public void setReferenceAnnotationFlag(boolean value)
228 fileProperties.put("RF", yes);
232 fileProperties.put("RF", no);
237 // returns boolean indicating whether the model mask annotation character
239 // for each match state is valid or ignored
240 public boolean getModelMaskedFlag()
242 if (fileProperties.get("MM") != null)
244 if (fileProperties.get("MM").equals(yes))
252 public void setModelMaskedFlag(boolean value)
256 fileProperties.put("MM", yes);
260 fileProperties.put("MM", no);
264 // returns boolean indicating whether the consensus residue field
265 // for each match state is valid or ignored
266 public boolean getConsensusResidueAnnotationFlag()
268 if (fileProperties.get("CONS") != null)
270 if (fileProperties.get("CONS").equals(yes))
278 public void setConsensusResidueeAnnotationFlag(boolean value)
282 fileProperties.put("CONS", yes);
286 fileProperties.put("CONS", no);
290 // returns boolean indicating whether the consensus structure character field
291 // for each match state is valid or ignored
292 public boolean getConsensusStructureAnnotationFlag()
294 if (fileProperties.get("CS") != null)
296 if (fileProperties.get("CS").equals(yes))
304 public void setConsensusStructureAnnotationFlag(boolean value)
308 fileProperties.put("CS", yes);
312 fileProperties.put("CS", no);
316 // returns boolean indicating whether the model mask annotation character
318 // for each match state is valid or ignored
319 public boolean getMapAnnotationFlag()
321 if (fileProperties.get("MAP") != null)
323 if (fileProperties.get("MAP").equals(yes))
331 public void setMapAnnotationFlag(boolean value)
335 fileProperties.put("MAP", yes);
339 fileProperties.put("MAP", no);
343 // not sure whether to implement this with Date object
344 public String getDate()
346 return fileProperties.get("DATE");
349 public void setDate(String value)
351 fileProperties.put("DATE", value);
354 // not sure whether to implement this
355 public String getCommandLineLog()
357 return fileProperties.get("COM");
360 public void setCommandLineLog(String value)
362 fileProperties.put("COM", value);
365 // gets the number of sequences that the HMM was trained on
366 public Integer getSequenceNumber()
368 if (fileProperties.get("NSEQ") == null)
372 return Integer.parseInt(fileProperties.get("NSEQ"));
375 public void setSequenceNumber(int value)
377 fileProperties.put("NSEQ", String.valueOf(value));
380 // gets the effective number determined during sequence weighting
381 public Double getEffectiveSequenceNumber()
383 if (fileProperties.get("LENG") == null)
387 return Double.parseDouble(fileProperties.get("EFFN"));
390 public void setEffectiveSequenceNumber(double value)
392 fileProperties.put("EFFN", String.valueOf(value));
395 public Long getCheckSum()
397 if (fileProperties.get("LENG") == null)
401 return Long.parseLong(fileProperties.get("CKSUM"));
404 public void setCheckSum(long value)
406 fileProperties.put("CKSUM", String.valueOf(value));
409 public Double getGatheringThreshold1()
413 return pfamData.get("GA")[0];
414 } catch (NullPointerException e)
420 public void setPFAMData(String key, Double[] data)
422 pfamData.put(key, data);
425 public Double getGatheringThreshold2()
429 return pfamData.get("GA")[1];
430 } catch (NullPointerException e)
437 public Double getTrustedCutoff1()
441 return pfamData.get("TC")[0];
442 } catch (NullPointerException e)
449 public Double getTrustedCutoff2()
453 return pfamData.get("TC")[1];
454 } catch (NullPointerException e)
461 public Double getNoiseCutoff1()
465 return pfamData.get("NC")[0];
466 } catch (NullPointerException e)
473 public Double getNoiseCutoff2()
477 return pfamData.get("NC")[1];
478 } catch (NullPointerException e)
485 public String getAlignmentModeConfiguration(String key)
487 return eValueStatistics.get(key).alignmentModeConfiguration;
490 public Double getSlopeOfDistribution(String scoreDistribution)
494 return eValueStatistics.get(scoreDistribution).slopeOfDistribution;
495 } catch (NullPointerException e)
501 public Double getLocationOfDistribution(String scoreDistribution)
505 return eValueStatistics.get(scoreDistribution).locationOfDistribution;
506 } catch (NullPointerException e)
512 public void addStatistic(String name, EValueStatistic stats)
514 eValueStatistics.put(name, stats);
518 * public double getBeginStateTransitions(Character symbol) { return
519 * beginStateTransitions.get(symbol); }
522 public void put(String key, String value)
524 fileProperties.put(key, value);
527 public Map<String, EValueStatistic> getEValueStatistics()
529 return eValueStatistics;
532 public void setEValueStatistics(
533 Map<String, EValueStatistic> eValueStatisticsM)
535 this.eValueStatistics = eValueStatisticsM;
538 public List<Integer> getAlignmentColumnIndexes()
540 return alignmentColumnIndexes;
543 public void setAlignmentColumnIndexes(
544 List<Integer> alignmentColumnIndexesL)
546 this.alignmentColumnIndexes = alignmentColumnIndexesL;
549 public List<HashMap<String, Character>> getAnnotations()
554 public void setAnnotations(List<HashMap<String, Character>> annotationsL)
556 this.annotations = annotationsL;
559 public Map<String, String> getFileProperties()
561 return fileProperties;
564 public void setFileProperties(Map<String, String> fileProperties)
566 this.fileProperties = fileProperties;