package jalview.datamodel;
+import jalview.io.HMMFile;
+import jalview.schemes.ResidueProperties;
+import jalview.util.Comparison;
+import jalview.util.MapList;
+
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import java.util.Scanner;
/**
- * Data structure which stores a hidden Markov model. Currently contains file properties as well, not sure whether these should be transferred to the HMMFile class
+ * Data structure which stores a hidden Markov model
*
* @author TZVanaalten
*
*/
public class HiddenMarkovModel
{
- // Stores file properties. Do not directly access this field as it contains
- // only string value - use the getter methods. For example, to find the length
- // of theHMM, use getModelLength()to return an int value
- Map<String, String> fileProperties = new HashMap<>();
-
- // contains the average emission probabilities for each symbol
- List<Double> averageMatchStateEmissionProbabilities = new ArrayList<>();
-
- // contains the probabilities of insert 0 emissions for each symbol
- List<Double> insertZeroEmissions = new ArrayList<>();
-
- // contains the probabilities of transitions from the begin state and insert
- // state 0. These are bm, bi, bd, im, ii, dm and dd in order (0th position in
- // the array indicates the probability of a bm transition)
+ private static final char GAP_DASH = '-';
- List<Double> beginStateTransitions = new ArrayList<>();
+ public final static String YES = "yes";
- // contains the alignment column index for each node
- List<Integer> alignmentColumnIndexes = new ArrayList<>();
+ public final static String NO = "no";
- // contains all other annotations for each node. These can be the
- // consensus(CONS), reference annotation(RF), mask value(MM) or consensus
- // structure(CS)
- List<HashMap<String, Character>> annotations = new ArrayList<>();
+ public static final int MATCHTOMATCH = 0;
- // contains the match emission for each symbol at each node
- List<List<Double>> matchEmissions = new ArrayList<>();
+ public static final int MATCHTOINSERT = 1;
- // contains the insert emission for each symbol at each node
- List<List<Double>> insertEmissions = new ArrayList<>();
+ public static final int MATCHTODELETE = 2;
- // contains the state transition for each state transition. See
- // beginStateTransitions field for transition possibilities.
- List<List<Double>> stateTransitions = new ArrayList<>();
+ public static final int INSERTTOMATCH = 3;
- // contains cutoffs and thresholds from PFAM
- Map<String, Double[]> pfamData = new HashMap<>();
+ public static final int INSERTTOINSERT = 4;
- // contains e-value statistic objects which contain the alignment mode
- // configuration, and the slope and location of each distribution
- Map<String, EValueStatistic> eValueStatistics = new HashMap<>();
+ public static final int DELETETOMATCH = 5;
- final String yes = "yes";
+ public static final int DELETETODELETE = 6;
- final String no = "no";
-
- List<Character> symbols = new ArrayList<>();
-
- public List<Double> getBeginStateTransitions()
- {
- return beginStateTransitions;
- }
+ private static final double LOG2 = Math.log(2);
- public void setBeginStateTransitions(List<Double> beginStateTransitionsL)
- {
- this.beginStateTransitions = beginStateTransitionsL;
- }
+ /*
+ * properties read from HMM file header lines
+ */
+ private Map<String, String> fileProperties = new HashMap<>();
- public List<List<Double>> getStateTransitions()
- {
- return stateTransitions;
- }
+ private String fileHeader;
+
+ /*
+ * the symbols used in this model e.g. "ACGT"
+ */
+ private String alphabet;
- public void setStateTransitions(List<List<Double>> stateTransitionsL)
- {
- this.stateTransitions = stateTransitionsL;
- }
+ /*
+ * symbol lookup index into the alphabet for 'A' to 'Z'
+ */
+ private int[] symbolIndexLookup = new int['Z' - 'A' + 1];
- public List<Character> getSymbols()
- {
- return symbols;
- }
-
- public void setSymbols(List<Character> symbolsL)
- {
- this.symbols = symbolsL;
- }
-
- public List<Double> getAverageMatchStateEmissionProbabilities()
- {
- return averageMatchStateEmissionProbabilities;
- }
-
- public void setAverageMatchStateEmissionProbabilities(
- List<Double> averageMatchStateEmissionProbabilitiesL)
- {
- this.averageMatchStateEmissionProbabilities = averageMatchStateEmissionProbabilitiesL;
- }
-
-
- public List<Double> getInsertZeroEmissions()
- {
- return insertZeroEmissions;
- }
-
- public void setInsertZeroEmissions(List<Double> insertZeroEmissionsL)
- {
- this.insertZeroEmissions = insertZeroEmissionsL;
- }
+ /*
+ * Nodes in the model. The begin node is at index 0, and contains
+ * average emission probabilities for each symbol.
+ */
+ private List<HMMNode> nodes = new ArrayList<>();
- public List<List<Double>> getMatchEmissions()
- {
- return matchEmissions;
- }
+ /*
+ * the aligned HMM consensus sequence extracted from the HMM profile
+ */
+ private SequenceI hmmSeq;
- public void setMatchEmissions(List<List<Double>> matchEmissionsL)
- {
- this.matchEmissions = matchEmissionsL;
- }
+ /*
+ * mapping from HMM nodes to residues of the hmm consensus sequence
+ */
+ private Mapping mapToHmmConsensus;
- public List<List<Double>> getInsertEmissions()
+ /**
+ * Constructor
+ */
+ public HiddenMarkovModel()
{
- return insertEmissions;
}
- public void setInsertEmissions(List<List<Double>> insertEmissionsL)
- {
- this.insertEmissions = insertEmissionsL;
- }
- public void fillSymbols(String line)
- {
- Scanner scanner = new Scanner(line);
- scanner.next();
- while (scanner.hasNext())
+ /**
+ * Copy constructor given a new aligned sequence with which to associate the
+ * HMM profile
+ *
+ * @param hmm
+ * @param sq
+ */
+ public HiddenMarkovModel(HiddenMarkovModel hmm, SequenceI sq)
+ {
+ super();
+ this.fileProperties = new HashMap<>(hmm.fileProperties);
+ this.alphabet = hmm.alphabet;
+ this.nodes = new ArrayList<>(hmm.nodes);
+ this.symbolIndexLookup = hmm.symbolIndexLookup;
+ this.fileHeader = new String(hmm.fileHeader);
+ this.hmmSeq = sq;
+ if (sq.getDatasetSequence() == hmm.mapToHmmConsensus.getTo())
+ {
+ // same dataset sequence e.g. after realigning search results
+ this.mapToHmmConsensus = hmm.mapToHmmConsensus;
+ }
+ else
{
- symbols.add(scanner.next().charAt(0));
+ // different dataset sequence e.g. after loading HMM from project
+ this.mapToHmmConsensus = new Mapping(sq.getDatasetSequence(),
+ hmm.mapToHmmConsensus.getMap());
}
- scanner.close();
}
- public String getName()
- {
- return fileProperties.get("NAME");
- }
- public String getAccessionNumber()
- {
- return fileProperties.get("ACC");
- }
-
- public void setAccessionNumber(String value)
- {
- fileProperties.put("ACC", value);
- }
-
- public String getDescription()
- {
- return fileProperties.get("DESC");
- }
+ /**
+ * Returns the information content at a specified column, calculated as the
+ * sum (over possible symbols) of the log ratio
+ *
+ * <pre>
+ * log(emission probability / background probability) / log(2)
+ * </pre>
+ *
+ * @param column
+ * column position (base 0)
+ * @return
+ */
+ public float getInformationContent(int column)
+ {
+ float informationContent = 0f;
+
+ for (char symbol : getSymbols().toCharArray())
+ {
+ float freq = ResidueProperties.backgroundFrequencies
+ .get(getAlphabetType()).get(symbol);
+ float prob = (float) getMatchEmissionProbability(column, symbol);
+ informationContent += prob * Math.log(prob / freq);
+ }
- public void setDescription(String value)
- {
- fileProperties.put("DESC", value);
- }
+ informationContent = informationContent / (float) LOG2;
- public Integer getLength()
- {
- if (fileProperties.get("LENG") == null)
- {
- return null;
- }
- return Integer.parseInt(fileProperties.get("LENG"));
+ return informationContent;
}
- public void setLength(int value)
+ /**
+ * Gets the file header of the .hmm file this model came from
+ *
+ * @return
+ */
+ public String getFileHeader()
{
- fileProperties.put("LENG", String.valueOf(value));
+ return fileHeader;
}
- public Integer getMaxInstanceLength()
+ /**
+ * Sets the file header of this model.
+ *
+ * @param header
+ */
+ public void setFileHeader(String header)
{
- if (fileProperties.get("MAXL") == null)
- {
- return null;
- }
- return Integer.parseInt(fileProperties.get("MAXL"));
+ fileHeader = header;
}
- public void setMaxInstanceLength(int value)
+ /**
+ * Returns the symbols used in this hidden Markov model
+ *
+ * @return
+ */
+ public String getSymbols()
{
- fileProperties.put("MAXL", String.valueOf(value));
+ return alphabet;
}
-
- // gets type of symbol alphabet - "amino", "DNA", "RNA"
- public String getAlphabetType()
- {
- return fileProperties.get("ALPH");
+
+ /**
+ * Gets the node in the hidden Markov model at the specified position.
+ *
+ * @param nodeIndex
+ * The index of the node requested. Node 0 optionally contains the
+ * average match emission probabilities across the entire model, and
+ * always contains the insert emission probabilities and state
+ * transition probabilities for the begin node. Node 1 contains the
+ * first node in the HMM that can correspond to a column in the
+ * alignment.
+ * @return
+ */
+ public HMMNode getNode(int nodeIndex)
+ {
+ return nodes.get(nodeIndex);
}
- public void setAlphabetType(String value)
+ /**
+ * Returns the name of the sequence alignment on which the HMM is based.
+ *
+ * @return
+ */
+ public String getName()
{
- fileProperties.put("ALPH", value);
+ return fileProperties.get(HMMFile.NAME);
}
-
- // returns boolean indicating whether the reference annotation character field
- // for each match state is valid or ignored
- public boolean getReferenceAnnotationFlag()
+
+ /**
+ * Answers the string value of the property (parsed from an HMM file) for the
+ * given key, or null if the property is not present
+ *
+ * @param key
+ * @return
+ */
+ public String getProperty(String key)
{
- if (fileProperties.get("RF") != null)
- {
- if (fileProperties.get("RF").equals(yes))
- {
- return true;
- }
- }
- return false;
+ return fileProperties.get(key);
}
- public void setReferenceAnnotationFlag(boolean value)
+ /**
+ * Answers true if the property with the given key is present with a value of
+ * "yes" (not case-sensitive), else false
+ *
+ * @param key
+ * @return
+ */
+ public boolean getBooleanProperty(String key)
{
- if (value)
- {
- fileProperties.put("RF", yes);
- }
- else
- {
- fileProperties.put("RF", no);
- }
-
+ return YES.equalsIgnoreCase(fileProperties.get(key));
}
- // returns boolean indicating whether the model mask annotation character
- // field
- // for each match state is valid or ignored
- public boolean getModelMaskedFlag()
+ /**
+ * Returns the length of the hidden Markov model. The value returned is the
+ * LENG property if specified, else the number of nodes, excluding the begin
+ * node (which should be the same thing).
+ *
+ * @return
+ */
+ public int getLength()
{
- if (fileProperties.get("MM") != null)
+ if (fileProperties.get(HMMFile.LENGTH) == null)
{
- if (fileProperties.get("MM").equals(yes))
- {
- return true;
- }
+ return nodes.size() - 1; // not counting BEGIN node
}
- return false;
+ return Integer.parseInt(fileProperties.get(HMMFile.LENGTH));
}
- public void setModelMaskedFlag(boolean value)
+ /**
+ * Returns the value of mandatory property "ALPH" - "amino", "DNA", "RNA" are
+ * the options. Other alphabets may be added.
+ *
+ * @return
+ */
+ public String getAlphabetType()
{
- if (value)
- {
- fileProperties.put("MM", yes);
- }
- else
- {
- fileProperties.put("MM", no);
- }
+ return fileProperties.get(HMMFile.ALPHABET);
}
- // returns boolean indicating whether the consensus residue field
- // for each match state is valid or ignored
- public boolean getConsensusResidueAnnotationFlag()
- {
- if (fileProperties.get("CONS") != null)
- {
- if (fileProperties.get("CONS").equals(yes))
+ /**
+ * Sets the model alphabet to the symbols in the given string (ignoring any
+ * whitespace), and returns the number of symbols
+ *
+ * @param symbols
+ */
+ public int setAlphabet(String symbols)
+ {
+ String trimmed = symbols.toUpperCase().replaceAll("\\s", "");
+ int count = trimmed.length();
+ alphabet = trimmed;
+ symbolIndexLookup = new int['Z' - 'A' + 1];
+ Arrays.fill(symbolIndexLookup, -1);
+ int ignored = 0;
+
+ /*
+ * save the symbols in order, and a quick lookup of symbol position
+ */
+ for (short i = 0; i < count; i++)
+ {
+ char symbol = trimmed.charAt(i);
+ if (symbol >= 'A' && symbol <= 'Z'
+ && symbolIndexLookup[symbol - 'A'] == -1)
{
- return true;
+ symbolIndexLookup[symbol - 'A'] = i;
}
- }
- return false;
- }
-
- public void setConsensusResidueeAnnotationFlag(boolean value)
- {
- if (value)
- {
- fileProperties.put("CONS", yes);
- }
- else
- {
- fileProperties.put("CONS", no);
- }
- }
-
- // returns boolean indicating whether the consensus structure character field
- // for each match state is valid or ignored
- public boolean getConsensusStructureAnnotationFlag()
- {
- if (fileProperties.get("CS") != null)
- {
- if (fileProperties.get("CS").equals(yes))
+ else
{
- return true;
+ System.err
+ .println(
+ "Unexpected or duplicated character in HMM ALPHabet: "
+ + symbol);
+ ignored++;
}
}
- return false;
+ return count - ignored;
}
- public void setConsensusStructureAnnotationFlag(boolean value)
- {
- if (value)
- {
- fileProperties.put("CS", yes);
- }
- else
+ /**
+ * Answers the node of the model corresponding to an aligned column position
+ * (0...), or null if there is no such node
+ *
+ * @param column
+ * @return
+ */
+ HMMNode getNodeForColumn(int column)
+ {
+ /*
+ * if the hmm consensus is gapped at the column,
+ * there is no corresponding node
+ */
+ if (Comparison.isGap(hmmSeq.getCharAt(column)))
{
- fileProperties.put("CS", no);
+ return null;
}
- }
- // returns boolean indicating whether the model mask annotation character
- // field
- // for each match state is valid or ignored
- public boolean getMapAnnotationFlag()
- {
- if (fileProperties.get("MAP") != null)
+ /*
+ * find the node (if any) that is mapped to the
+ * consensus sequence residue position at the column
+ */
+ int seqPos = hmmSeq.findPosition(column);
+ int[] nodeNo = mapToHmmConsensus.getMap().locateInFrom(seqPos, seqPos);
+ if (nodeNo != null)
{
- if (fileProperties.get("MAP").equals(yes))
- {
- return true;
- }
+ return getNode(nodeNo[0]);
}
- return false;
+ return null;
}
- public void setMapAnnotationFlag(boolean value)
- {
- if (value)
- {
- fileProperties.put("MAP", yes);
- }
- else
- {
- fileProperties.put("MAP", no);
+ /**
+ * Gets the match emission probability for a given symbol at a column in the
+ * alignment.
+ *
+ * @param alignColumn
+ * The index of the alignment column, starting at index 0. Index 0
+ * usually corresponds to index 1 in the HMM.
+ * @param symbol
+ * The symbol for which the desired probability is being requested.
+ * @return
+ *
+ */
+ public double getMatchEmissionProbability(int alignColumn, char symbol)
+ {
+ HMMNode node = getNodeForColumn(alignColumn);
+ int symbolIndex = getSymbolIndex(symbol);
+ if (node != null && symbolIndex != -1)
+ {
+ return node.getMatchEmission(symbolIndex);
}
+ return 0D;
}
- // not sure whether to implement this with Date object
- public String getDate()
- {
- return fileProperties.get("DATE");
- }
-
- public void setDate(String value)
- {
- fileProperties.put("DATE", value);
- }
-
- // not sure whether to implement this
- public String getCommandLineLog()
- {
- return fileProperties.get("COM");
- }
-
- public void setCommandLineLog(String value)
- {
- fileProperties.put("COM", value);
- }
-
- // gets the number of sequences that the HMM was trained on
- public Integer getSequenceNumber()
- {
- if (fileProperties.get("NSEQ") == null)
- {
- return null;
+ /**
+ * Gets the insert emission probability for a given symbol at a column in the
+ * alignment.
+ *
+ * @param alignColumn
+ * The index of the alignment column, starting at index 0. Index 0
+ * usually corresponds to index 1 in the HMM.
+ * @param symbol
+ * The symbol for which the desired probability is being requested.
+ * @return
+ *
+ */
+ public double getInsertEmissionProbability(int alignColumn, char symbol)
+ {
+ HMMNode node = getNodeForColumn(alignColumn);
+ int symbolIndex = getSymbolIndex(symbol);
+ if (node != null && symbolIndex != -1)
+ {
+ return node.getInsertEmission(symbolIndex);
}
- return Integer.parseInt(fileProperties.get("NSEQ"));
- }
-
- public void setSequenceNumber(int value)
- {
- fileProperties.put("NSEQ", String.valueOf(value));
+ return 0D;
}
-
- // gets the effective number determined during sequence weighting
- public Double getEffectiveSequenceNumber()
- {
- if (fileProperties.get("LENG") == null)
- {
- return null;
+
+ /**
+ * Gets the state transition probability for a given symbol at a column in the
+ * alignment.
+ *
+ * @param alignColumn
+ * The index of the alignment column, starting at index 0. Index 0
+ * usually corresponds to index 1 in the HMM.
+ * @param symbol
+ * The symbol for which the desired probability is being requested.
+ * @return
+ *
+ */
+ public double getStateTransitionProbability(int alignColumn,
+ int transition)
+ {
+ HMMNode node = getNodeForColumn(alignColumn);
+ if (node != null)
+ {
+ return node.getStateTransition(transition);
}
- return Double.parseDouble(fileProperties.get("EFFN"));
- }
-
- public void setEffectiveSequenceNumber(double value)
- {
- fileProperties.put("EFFN", String.valueOf(value));
+ return 0D;
}
-
- public Long getCheckSum()
+
+ /**
+ * Returns the sequence position linked to the node at the given index. This
+ * corresponds to an aligned column position (counting from 1).
+ *
+ * @param nodeIndex
+ * The index of the node, starting from index 1. Index 0 is the begin
+ * node, which does not correspond to a column in the alignment.
+ * @return
+ */
+ public int getNodeMapPosition(int nodeIndex)
+ {
+ return nodes.get(nodeIndex).getResidueNumber();
+ }
+
+ /**
+ * Returns the consensus residue at the specified node.
+ *
+ * @param nodeIndex
+ * The index of the specified node.
+ * @return
+ */
+ public char getConsensusResidue(int nodeIndex)
+ {
+ char value = nodes.get(nodeIndex).getConsensusResidue();
+ return value;
+ }
+
+ /**
+ * Returns the reference annotation at the specified node.
+ *
+ * @param nodeIndex
+ * The index of the specified node.
+ * @return
+ */
+ public char getReferenceAnnotation(int nodeIndex)
+ {
+ char value = nodes.get(nodeIndex).getReferenceAnnotation();
+ return value;
+ }
+
+ /**
+ * Returns the mask value at the specified node.
+ *
+ * @param nodeIndex
+ * The index of the specified node.
+ * @return
+ */
+ public char getMaskedValue(int nodeIndex)
+ {
+ char value = nodes.get(nodeIndex).getMaskValue();
+ return value;
+ }
+
+ /**
+ * Returns the consensus structure at the specified node.
+ *
+ * @param nodeIndex
+ * The index of the specified node.
+ * @return
+ */
+ public char getConsensusStructure(int nodeIndex)
+ {
+ char value = nodes.get(nodeIndex).getConsensusStructure();
+ return value;
+ }
+
+ /**
+ * Sets a property read from an HMM file
+ *
+ * @param key
+ * @param value
+ */
+ public void setProperty(String key, String value)
{
- if (fileProperties.get("LENG") == null)
- {
- return null;
- }
- return Long.parseLong(fileProperties.get("CKSUM"));
+ fileProperties.put(key, value);
}
- public void setCheckSum(long value)
+ /**
+ * Temporary implementation, should not be used.
+ *
+ * @return
+ */
+ public String getViterbi()
{
- fileProperties.put("CKSUM", String.valueOf(value));
+ String value;
+ value = fileProperties.get(HMMFile.VITERBI);
+ return value;
}
- public Double getGatheringThreshold1()
+ /**
+ * Temporary implementation, should not be used.
+ *
+ * @return
+ */
+ public String getMSV()
{
- try
- {
- return pfamData.get("GA")[0];
- } catch (NullPointerException e)
- {
- return null;
- }
+ String value;
+ value = fileProperties.get(HMMFile.MSV);
+ return value;
}
- public void setPFAMData(String key, Double[] data)
+ /**
+ * Temporary implementation, should not be used.
+ *
+ * @return
+ */
+ public String getForward()
{
- pfamData.put(key, data);
+ String value;
+ value = fileProperties.get(HMMFile.FORWARD);
+ return value;
}
- public Double getGatheringThreshold2()
- {
- try
- {
- return pfamData.get("GA")[1];
- } catch (NullPointerException e)
- {
- return null;
+ /**
+ * Constructs the consensus sequence based on the most probable symbol at each
+ * position. Gap characters are inserted for discontinuities in the node map
+ * numbering (if provided), else an ungapped sequence is generated.
+ * <p>
+ * A mapping between the HMM nodes and residue positions of the sequence is
+ * also built and saved.
+ *
+ * @return
+ */
+ void buildConsensusSequence()
+ {
+ List<int[]> toResidues = new ArrayList<>();
+
+ /*
+ * if the HMM provided a map to sequence, use those start/end values,
+ * else just treat it as for a contiguous sequence numbered from 1
+ */
+ boolean hasMap = getBooleanProperty(HMMFile.MAP);
+ int start = hasMap ? getNode(1).getResidueNumber() : 1;
+ int endResNo = hasMap ? getNode(nodes.size() - 1).getResidueNumber()
+ : (start + getLength() - 1);
+ char[] sequence = new char[endResNo + 1];
+
+ int lastResNo = start - 1;
+ int seqOffset = -1;
+ int gapCount = 0;
+
+ for (int seqN = 0; seqN < start; seqN++)
+ {
+ sequence[seqN] = GAP_DASH;
+ seqOffset++;
}
- }
-
- public Double getTrustedCutoff1()
- {
- try
- {
- return pfamData.get("TC")[0];
- } catch (NullPointerException e)
+ for (int nodeNo = 1; nodeNo < nodes.size(); nodeNo++)
{
- return null;
- }
+ HMMNode node = nodes.get(nodeNo);
+ final int resNo = hasMap ? node.getResidueNumber() : lastResNo + 1;
- }
-
- public Double getTrustedCutoff2()
- {
- try
- {
- return pfamData.get("TC")[1];
- } catch (NullPointerException e)
- {
- return null;
+ /*
+ * insert gaps if map numbering is not continuous
+ */
+ while (resNo > lastResNo + 1)
+ {
+ sequence[seqOffset++] = GAP_DASH;
+ lastResNo++;
+ gapCount++;
+ }
+ char consensusResidue = node.getConsensusResidue();
+ if (GAP_DASH == consensusResidue)
+ {
+ /*
+ * no residue annotation in HMM - scan for the symbol
+ * with the highest match emission probability
+ */
+ int symbolIndex = node.getMaxMatchEmissionIndex();
+ consensusResidue = alphabet.charAt(symbolIndex);
+ if (node.getMatchEmission(symbolIndex) < 0.5D)
+ {
+ // follow convention of lower case if match emission prob < 0.5
+ consensusResidue = Character.toLowerCase(consensusResidue);
+ }
+ }
+ sequence[seqOffset++] = consensusResidue;
+ lastResNo = resNo;
}
- }
-
- public Double getNoiseCutoff1()
- {
- try
- {
- return pfamData.get("NC")[0];
- } catch (NullPointerException e)
- {
- return null;
- }
+ Sequence seq = new Sequence(getName(), sequence, start,
+ lastResNo - gapCount);
+ seq.createDatasetSequence();
+ seq.setHMM(this);
+ this.hmmSeq = seq;
+ /*
+ * construct and store Mapping of nodes to residues
+ * note as constructed this is just an identity mapping,
+ * but it allows for greater flexibility in future
+ */
+ List<int[]> fromNodes = new ArrayList<>();
+ fromNodes.add(new int[] { 1, getLength() });
+ toResidues.add(new int[] { seq.getStart(), seq.getEnd() });
+ MapList mapList = new MapList(fromNodes, toResidues, 1, 1);
+ mapToHmmConsensus = new Mapping(seq.getDatasetSequence(), mapList);
}
- public Double getNoiseCutoff2()
- {
- try
- {
- return pfamData.get("NC")[1];
- } catch (NullPointerException e)
- {
- return null;
- }
-
- }
- public String getAlignmentModeConfiguration(String key)
+ /**
+ * Answers the aligned consensus sequence for the profile. Note this will
+ * return null if called before <code>setNodes</code> has been called.
+ *
+ * @return
+ */
+ public SequenceI getConsensusSequence()
{
- return eValueStatistics.get(key).alignmentModeConfiguration;
+ return hmmSeq;
}
- public Double getSlopeOfDistribution(String scoreDistribution)
- {
- try
- {
- return eValueStatistics.get(scoreDistribution).slopeOfDistribution;
- } catch (NullPointerException e)
- {
- return null;
+ /**
+ * Answers the index position (0...) of the given symbol, or -1 if not a valid
+ * symbol for this HMM
+ *
+ * @param symbol
+ * @return
+ */
+ private int getSymbolIndex(char symbol)
+ {
+ /*
+ * symbolIndexLookup holds the index for 'A' to 'Z'
+ */
+ char c = Character.toUpperCase(symbol);
+ if ('A' <= c && c <= 'Z')
+ {
+ return symbolIndexLookup[c - 'A'];
}
+ return -1;
}
- public Double getLocationOfDistribution(String scoreDistribution)
+ /**
+ * Sets the nodes of this HMM, and also extracts the HMM consensus sequence
+ * and a mapping between node numbers and sequence positions
+ *
+ * @param nodeList
+ */
+ public void setNodes(List<HMMNode> nodeList)
{
- try
- {
- return eValueStatistics.get(scoreDistribution).locationOfDistribution;
- } catch (NullPointerException e)
+ nodes = nodeList;
+ if (nodes.size() > 1)
{
- return null;
+ buildConsensusSequence();
}
}
- public void addStatistic(String name, EValueStatistic stats)
- {
- eValueStatistics.put(name, stats);
- }
-
/**
- * public double getBeginStateTransitions(Character symbol) { return
- * beginStateTransitions.get(symbol); }
- **/
-
- public void put(String key, String value)
- {
- fileProperties.put(key, value);
- }
-
- public Map<String, EValueStatistic> getEValueStatistics()
- {
- return eValueStatistics;
- }
-
- public void setEValueStatistics(
- Map<String, EValueStatistic> eValueStatisticsM)
- {
- this.eValueStatistics = eValueStatisticsM;
- }
-
- public List<Integer> getAlignmentColumnIndexes()
- {
- return alignmentColumnIndexes;
- }
-
- public void setAlignmentColumnIndexes(
- List<Integer> alignmentColumnIndexesL)
- {
- this.alignmentColumnIndexes = alignmentColumnIndexesL;
- }
-
- public List<HashMap<String, Character>> getAnnotations()
- {
- return annotations;
- }
-
- public void setAnnotations(List<HashMap<String, Character>> annotationsL)
- {
- this.annotations = annotationsL;
- }
-
- public Map<String, String> getFileProperties()
- {
- return fileProperties;
- }
-
- public void setFileProperties(Map<String, String> fileProperties)
+ * Sets the aligned consensus sequence this HMM is the model for
+ *
+ * @param hmmSeq
+ */
+ public void setHmmSeq(SequenceI hmmSeq)
{
- this.fileProperties = fileProperties;
+ this.hmmSeq = hmmSeq;
}
}