package jalview.datamodel;
+import java.util.ArrayList;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
+import java.util.Scanner;
/**
- * Data structure to hold a HMM file
- */
-/**
+ * Data structure which stores a hidden Markov model. Currently contains file properties as well, not sure whether these should be transferred to the HMMFile class
+ *
* @author TZVanaalten
*
*/
public class HiddenMarkovModel
{
+ // Stores file properties. Do not directly access this field as it contains
+ // only string value - use the getter methods. For example, to find the length
+ // of theHMM, use getModelLength()to return an int value
+ Map<String, String> fileProperties = new HashMap<>();
+
+ // contains the average emission probabilities for each symbol
+ List<Double> averageMatchStateEmissionProbabilities = new ArrayList<>();
+
+ // contains the probabilities of insert 0 emissions for each symbol
+ List<Double> insertZeroEmissions = new ArrayList<>();
+
+ // contains the probabilities of transitions from the begin state and insert
+ // state 0. These are bm, bi, bd, im, ii, dm and dd in order (0th position in
+ // the array indicates the probability of a bm transition)
+
+ List<Double> beginStateTransitions = new ArrayList<>();
+
+ // contains the alignment column index for each node
+ List<Integer> alignmentColumnIndexes = new ArrayList<>();
+
+ // contains all other annotations for each node. These can be the
+ // consensus(CONS), reference annotation(RF), mask value(MM) or consensus
+ // structure(CS)
+ List<HashMap<String, Character>> annotations = new ArrayList<>();
+
+ // contains the match emission for each symbol at each node
+ List<List<Double>> matchEmissions = new ArrayList<>();
+
+ // contains the insert emission for each symbol at each node
+ List<List<Double>> insertEmissions = new ArrayList<>();
+
+ // contains the state transition for each state transition. See
+ // beginStateTransitions field for transition possibilities.
+ List<List<Double>> stateTransitions = new ArrayList<>();
+
+ // contains cutoffs and thresholds from PFAM
+ Map<String, Double[]> pfamData = new HashMap<>();
+
+ // contains e-value statistic objects which contain the alignment mode
+ // configuration, and the slope and location of each distribution
+ Map<String, EValueStatistic> eValueStatistics = new HashMap<>();
+
+ final String yes = "yes";
+
+ final String no = "no";
+
+ List<Character> symbols = new ArrayList<>();
+
+ public List<Double> getBeginStateTransitions()
+ {
+ return beginStateTransitions;
+ }
+
+ public void setBeginStateTransitions(List<Double> beginStateTransitionsL)
+ {
+ this.beginStateTransitions = beginStateTransitionsL;
+ }
+
+ public List<List<Double>> getStateTransitions()
+ {
+ return stateTransitions;
+ }
+
+ public void setStateTransitions(List<List<Double>> stateTransitionsL)
+ {
+ this.stateTransitions = stateTransitionsL;
+ }
+
+ public List<Character> getSymbols()
+ {
+ return symbols;
+ }
+
+ public void setSymbols(List<Character> symbolsL)
+ {
+ this.symbols = symbolsL;
+ }
+
+ public List<Double> getAverageMatchStateEmissionProbabilities()
+ {
+ return averageMatchStateEmissionProbabilities;
+ }
+
+ public void setAverageMatchStateEmissionProbabilities(
+ List<Double> averageMatchStateEmissionProbabilitiesL)
+ {
+ this.averageMatchStateEmissionProbabilities = averageMatchStateEmissionProbabilitiesL;
+ }
+
+
+ public List<Double> getInsertZeroEmissions()
+ {
+ return insertZeroEmissions;
+ }
+
+ public void setInsertZeroEmissions(List<Double> insertZeroEmissionsL)
+ {
+ this.insertZeroEmissions = insertZeroEmissionsL;
+ }
+
+ public List<List<Double>> getMatchEmissions()
+ {
+ return matchEmissions;
+ }
+
+ public void setMatchEmissions(List<List<Double>> matchEmissionsL)
+ {
+ this.matchEmissions = matchEmissionsL;
+ }
+
+ public List<List<Double>> getInsertEmissions()
+ {
+ return insertEmissions;
+ }
- // Stores file properties
- private Map<String, String> fileProperties = new HashMap<>();
+ public void setInsertEmissions(List<List<Double>> insertEmissionsL)
+ {
+ this.insertEmissions = insertEmissionsL;
+ }
+ public void fillSymbols(String line)
+ {
+ Scanner scanner = new Scanner(line);
+ scanner.next();
+ while (scanner.hasNext())
+ {
+ symbols.add(scanner.next().charAt(0));
+ }
+ scanner.close();
+ }
+ public String getName()
+ {
+ return fileProperties.get("NAME");
+ }
public String getAccessionNumber()
{
return fileProperties.get("ACC");
}
+ public void setAccessionNumber(String value)
+ {
+ fileProperties.put("ACC", value);
+ }
+
public String getDescription()
{
return fileProperties.get("DESC");
}
- public int getModelLength()
+ public void setDescription(String value)
{
+ fileProperties.put("DESC", value);
+ }
+
+ public Integer getLength()
+ {
+ if (fileProperties.get("LENG") == null)
+ {
+ return null;
+ }
return Integer.parseInt(fileProperties.get("LENG"));
}
- public int getMaxInstanceLength()
+ public void setLength(int value)
+ {
+ fileProperties.put("LENG", String.valueOf(value));
+ }
+
+ public Integer getMaxInstanceLength()
{
+ if (fileProperties.get("MAXL") == null)
+ {
+ return null;
+ }
return Integer.parseInt(fileProperties.get("MAXL"));
}
+ public void setMaxInstanceLength(int value)
+ {
+ fileProperties.put("MAXL", String.valueOf(value));
+ }
+
// gets type of symbol alphabet - "amino", "DNA", "RNA"
public String getAlphabetType()
{
return fileProperties.get("ALPH");
}
+ public void setAlphabetType(String value)
+ {
+ fileProperties.put("ALPH", value);
+ }
+
// returns boolean indicating whether the reference annotation character field
// for each match state is valid or ignored
public boolean getReferenceAnnotationFlag()
{
- if (fileProperties.get("RF") == "yes")
+ if (fileProperties.get("RF") != null)
{
- return true;
+ if (fileProperties.get("RF").equals(yes))
+ {
+ return true;
+ }
}
return false;
}
+ public void setReferenceAnnotationFlag(boolean value)
+ {
+ if (value)
+ {
+ fileProperties.put("RF", yes);
+ }
+ else
+ {
+ fileProperties.put("RF", no);
+ }
+
+ }
+
// returns boolean indicating whether the model mask annotation character
// field
// for each match state is valid or ignored
public boolean getModelMaskedFlag()
{
- if (fileProperties.get("MM") == "yes")
+ if (fileProperties.get("MM") != null)
{
- return true;
+ if (fileProperties.get("MM").equals(yes))
+ {
+ return true;
+ }
}
return false;
}
+ public void setModelMaskedFlag(boolean value)
+ {
+ if (value)
+ {
+ fileProperties.put("MM", yes);
+ }
+ else
+ {
+ fileProperties.put("MM", no);
+ }
+ }
+
// returns boolean indicating whether the consensus residue field
// for each match state is valid or ignored
public boolean getConsensusResidueAnnotationFlag()
{
- if (fileProperties.get("CONS") == "yes")
+ if (fileProperties.get("CONS") != null)
{
- return true;
+ if (fileProperties.get("CONS").equals(yes))
+ {
+ return true;
+ }
}
return false;
}
+ public void setConsensusResidueeAnnotationFlag(boolean value)
+ {
+ if (value)
+ {
+ fileProperties.put("CONS", yes);
+ }
+ else
+ {
+ fileProperties.put("CONS", no);
+ }
+ }
+
// returns boolean indicating whether the consensus structure character field
// for each match state is valid or ignored
public boolean getConsensusStructureAnnotationFlag()
{
- if (fileProperties.get("CS") == "yes")
+ if (fileProperties.get("CS") != null)
{
- return true;
+ if (fileProperties.get("CS").equals(yes))
+ {
+ return true;
+ }
}
return false;
}
+ public void setConsensusStructureAnnotationFlag(boolean value)
+ {
+ if (value)
+ {
+ fileProperties.put("CS", yes);
+ }
+ else
+ {
+ fileProperties.put("CS", no);
+ }
+ }
+
// returns boolean indicating whether the model mask annotation character
// field
// for each match state is valid or ignored
public boolean getMapAnnotationFlag()
{
- if (fileProperties.get("MAP") == "yes")
+ if (fileProperties.get("MAP") != null)
{
- return true;
+ if (fileProperties.get("MAP").equals(yes))
+ {
+ return true;
+ }
}
return false;
}
- // not sure whether to implement this
- // public Date getDate()
- // {
+ public void setMapAnnotationFlag(boolean value)
+ {
+ if (value)
+ {
+ fileProperties.put("MAP", yes);
+ }
+ else
+ {
+ fileProperties.put("MAP", no);
+ }
+ }
- // }
+ // not sure whether to implement this with Date object
+ public String getDate()
+ {
+ return fileProperties.get("DATE");
+ }
+
+ public void setDate(String value)
+ {
+ fileProperties.put("DATE", value);
+ }
// not sure whether to implement this
- // public String getCommandLineLog()
- // {
+ public String getCommandLineLog()
+ {
+ return fileProperties.get("COM");
+ }
- // }
+ public void setCommandLineLog(String value)
+ {
+ fileProperties.put("COM", value);
+ }
// gets the number of sequences that the HMM was trained on
- public int getSequenceNumber()
+ public Integer getSequenceNumber()
{
+ if (fileProperties.get("NSEQ") == null)
+ {
+ return null;
+ }
return Integer.parseInt(fileProperties.get("NSEQ"));
}
+ public void setSequenceNumber(int value)
+ {
+ fileProperties.put("NSEQ", String.valueOf(value));
+ }
+
// gets the effective number determined during sequence weighting
- public int getEffectiveSequenceNumber()
+ public Double getEffectiveSequenceNumber()
+ {
+ if (fileProperties.get("LENG") == null)
+ {
+ return null;
+ }
+ return Double.parseDouble(fileProperties.get("EFFN"));
+ }
+
+ public void setEffectiveSequenceNumber(double value)
+ {
+ fileProperties.put("EFFN", String.valueOf(value));
+ }
+
+ public Long getCheckSum()
+ {
+ if (fileProperties.get("LENG") == null)
+ {
+ return null;
+ }
+ return Long.parseLong(fileProperties.get("CKSUM"));
+ }
+
+ public void setCheckSum(long value)
+ {
+ fileProperties.put("CKSUM", String.valueOf(value));
+ }
+
+ public Double getGatheringThreshold1()
+ {
+ try
+ {
+ return pfamData.get("GA")[0];
+ } catch (NullPointerException e)
+ {
+ return null;
+ }
+ }
+
+ public void setPFAMData(String key, Double[] data)
+ {
+ pfamData.put(key, data);
+ }
+
+ public Double getGatheringThreshold2()
+ {
+ try
+ {
+ return pfamData.get("GA")[1];
+ } catch (NullPointerException e)
+ {
+ return null;
+ }
+
+ }
+
+ public Double getTrustedCutoff1()
+ {
+ try
+ {
+ return pfamData.get("TC")[0];
+ } catch (NullPointerException e)
+ {
+ return null;
+ }
+
+ }
+
+ public Double getTrustedCutoff2()
{
- return Integer.parseInt(fileProperties.get("EFFN"));
+ try
+ {
+ return pfamData.get("TC")[1];
+ } catch (NullPointerException e)
+ {
+ return null;
+ }
+
}
- public int getCheckSum()
+ public Double getNoiseCutoff1()
{
- return Integer.parseInt(fileProperties.get("CKSUM"));
+ try
+ {
+ return pfamData.get("NC")[0];
+ } catch (NullPointerException e)
+ {
+ return null;
+ }
+
}
- // need to ask if BigDecimal is best decimal type for this purpose
- // and how to limit number of decimals
- public double getGatheringThresholdGA1()
+ public Double getNoiseCutoff2()
{
- return Double.parseDouble((fileProperties.get("GA1")));
+ try
+ {
+ return pfamData.get("NC")[1];
+ } catch (NullPointerException e)
+ {
+ return null;
+ }
+
+ }
+
+ public String getAlignmentModeConfiguration(String key)
+ {
+ return eValueStatistics.get(key).alignmentModeConfiguration;
+ }
+
+ public Double getSlopeOfDistribution(String scoreDistribution)
+ {
+ try
+ {
+ return eValueStatistics.get(scoreDistribution).slopeOfDistribution;
+ } catch (NullPointerException e)
+ {
+ return null;
+ }
+ }
+
+ public Double getLocationOfDistribution(String scoreDistribution)
+ {
+ try
+ {
+ return eValueStatistics.get(scoreDistribution).locationOfDistribution;
+ } catch (NullPointerException e)
+ {
+ return null;
+ }
}
+ public void addStatistic(String name, EValueStatistic stats)
+ {
+ eValueStatistics.put(name, stats);
+ }
+
+ /**
+ * public double getBeginStateTransitions(Character symbol) { return
+ * beginStateTransitions.get(symbol); }
+ **/
+
public void put(String key, String value)
{
fileProperties.put(key, value);
}
+ public Map<String, EValueStatistic> getEValueStatistics()
+ {
+ return eValueStatistics;
+ }
+
+ public void setEValueStatistics(
+ Map<String, EValueStatistic> eValueStatisticsM)
+ {
+ this.eValueStatistics = eValueStatisticsM;
+ }
+
+ public List<Integer> getAlignmentColumnIndexes()
+ {
+ return alignmentColumnIndexes;
+ }
+
+ public void setAlignmentColumnIndexes(
+ List<Integer> alignmentColumnIndexesL)
+ {
+ this.alignmentColumnIndexes = alignmentColumnIndexesL;
+ }
+
+ public List<HashMap<String, Character>> getAnnotations()
+ {
+ return annotations;
+ }
+
+ public void setAnnotations(List<HashMap<String, Character>> annotationsL)
+ {
+ this.annotations = annotationsL;
+ }
+
+ public Map<String, String> getFileProperties()
+ {
+ return fileProperties;
+ }
+
+ public void setFileProperties(Map<String, String> fileProperties)
+ {
+ this.fileProperties = fileProperties;
+ }
}
+