boolean removeBelowBackground, boolean infoLetterHeight)
{
ProfileI[] result = new ProfileI[width];
- int symbolCount = hmm.getNumberOfSymbols();
+ char[] symbols = hmm.getSymbols().toCharArray();
+ int symbolCount = symbols.length;
for (int column = start; column < end; column++)
{
ResidueCount counts = new ResidueCount();
- for (char symbol : hmm.getSymbols())
+ for (char symbol : symbols)
{
int value = getAnalogueCount(hmm, column, symbol,
removeBelowBackground, infoLetterHeight);
{
return null;
}
- int size = hmm.getNumberOfSymbols();
+ String alphabet = hmm.getSymbols();
+ int size = alphabet.length();
char symbols[] = new char[size];
int values[] = new int[size];
- List<Character> charList = hmm.getSymbols();
int totalCount = 0;
for (int i = 0; i < size; i++)
{
- char symbol = charList.get(i);
+ char symbol = alphabet.charAt(i);
symbols[i] = symbol;
int value = getAnalogueCount(hmm, column, symbol,
removeBelowBackground, infoHeight);
package jalview.datamodel;
-import java.util.ArrayList;
-import java.util.List;
-
/**
* stores data for each node in the hmm model
* @author TZVanaalten
public class HMMNode
{
//contains the match emissions for each symbol
- List<Double> matchEmissions = new ArrayList<>();
+ double[] matchEmissions;
+
//contains the insert emissions for each symbol
- List<Double> insertEmissions = new ArrayList<>();
- //contains the state transitions for each possible transition. These are bm, bi, bd, im, ii, dm and dd in order (0th position in
- // the array indicates the probability of a bm transition)
- List<Double> stateTransitions = new ArrayList<>();
+ double[] insertEmissions;
+
+ // contains the state transitions for each possible transition. These are mm,
+ // mi, md, im, ii, dm and dd in order
+ double[] stateTransitions;
//annotations
Integer alignmentColumn = null;
char maskValue;
char consensusStructure;
+ /**
+ * Constructor
+ */
public HMMNode()
{
}
- public HMMNode(HMMNode node)
+ public double[] getMatchEmissions()
{
- matchEmissions = new ArrayList<>(node.getMatchEmissions());
- insertEmissions = new ArrayList<>(node.getInsertEmissions());
- stateTransitions = new ArrayList<>(node.getStateTransitions());
- alignmentColumn = new Integer(node.getAlignmentColumn());
- consensusResidue = node.getConsensusResidue();
- referenceAnnotation = node.getReferenceAnnotation();
- maskValue = node.getMaskValue();
- consensusStructure = node.getConsensusStructure();
+ return matchEmissions;
}
- public List<Double> getMatchEmissions()
+ public double getMatchEmission(int symbolIndex)
{
- return matchEmissions;
+ return matchEmissions[symbolIndex];
}
- public void setMatchEmissions(List<Double> matchEmissionsL)
+ public void setMatchEmissions(double[] matches)
{
- this.matchEmissions = matchEmissionsL;
+ this.matchEmissions = matches;
}
- public List<Double> getInsertEmissions()
+
+ public double[] getInsertEmissions()
{
return insertEmissions;
}
- public void setInsertEmissions(List<Double> insertEmissionsL)
+ public double getInsertEmission(int symbolIndex)
+ {
+ return insertEmissions[symbolIndex];
+ }
+
+ public void setInsertEmissions(double[] insertEmissionsL)
{
this.insertEmissions = insertEmissionsL;
}
- public List<Double> getStateTransitions()
+ public double[] getStateTransitions()
{
return stateTransitions;
}
- public void setStateTransitions(List<Double> stateTransitionsM)
+ public double getStateTransition(int transition)
+ {
+ return stateTransitions[transition];
+ }
+
+ public void setStateTransitions(double[] stateTransitionsM)
{
this.stateTransitions = stateTransitionsM;
}
package jalview.datamodel;
+import jalview.io.HMMFile;
import jalview.schemes.ResidueProperties;
import jalview.util.Comparison;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
- * Data structure which stores a hidden Markov model. Currently contains file
- * properties as well, not sure whether these should be transferred to the
- * HMMFile class
+ * Data structure which stores a hidden Markov model
*
* @author TZVanaalten
*
*/
public class HiddenMarkovModel
{
- private static final double LOG2 = Math.log(2);
-
- // Stores file properties. Do not directly access this field as it contains
- // only string value - use the getter methods. For example, to find the length
- // of theHMM, use getModelLength()to return an int value
- Map<String, String> fileProperties = new HashMap<>();
-
- // contains all of the symbols used in this model. The index of each symbol
- // represents its lookup value
- List<Character> symbols = new ArrayList<>();
-
- // contains information for each node in the model. The begin node is at index
- // 0. Node 0 contains average emission probabilities for each symbol
- List<HMMNode> nodes = new ArrayList<>();
-
- // contains the HMM node for each alignment column, alignment columns start at
- // index 0;
- Map<Integer, HMMNode> nodeLookup = new HashMap<>();
-
- // contains the symbol index for each symbol
- Map<Character, Integer> symbolIndexLookup = new HashMap<>();
-
- final static String YES = "yes";
-
- final static String NO = "no";
-
- // keys for file properties hashmap
- private static final String NAME = "NAME";
-
- private static final String ACCESSION_NUMBER = "ACC";
-
- private static final String DESCRIPTION = "DESC";
-
- private static final String LENGTH = "LENG";
-
- private static final String MAX_LENGTH = "MAXL";
-
- private static final String ALPHABET = "ALPH";
-
- private static final String DATE = "DATE";
-
- private static final String COMMAND_LOG = "COM";
+ public final static String YES = "yes";
- private static final String NUMBER_OF_SEQUENCES = "NSEQ";
+ public final static String NO = "no";
- private static final String EFF_NUMBER_OF_SEQUENCES = "EFFN";
-
- private static final String CHECK_SUM = "CKSUM";
-
- private static final String GATHERING_THRESHOLDS = "GA";
-
- private static final String TRUSTED_CUTOFFS = "TC";
-
- private static final String NOISE_CUTOFFS = "NC";
-
- private static final String STATISTICS = "STATS";
-
- private static final String COMPO = "COMPO";
-
- private static final String GATHERING_THRESHOLD = "GA";
-
- private static final String TRUSTED_CUTOFF = "TC";
-
- private final String NOISE_CUTOFF = "NC";
-
- private static final String VITERBI = "VITERBI";
-
- private static final String MSV = "MSV";
-
- private static final String FORWARD = "FORWARD";
-
- private static final String MAP = "MAP";
-
- private static final String REFERENCE_ANNOTATION = "RF";
-
- private static final String CONSENSUS_RESIDUE = "CONS";
-
- private static final String CONSENSUS_STRUCTURE = "CS";
-
- private static final String MASKED_VALUE = "MM";
-
public static final int MATCHTOMATCH = 0;
public static final int MATCHTOINSERT = 1;
public static final int DELETETODELETE = 6;
+ private static final double LOG2 = Math.log(2);
+
+ /*
+ * properties read from HMM file header lines
+ */
+ Map<String, String> fileProperties = new HashMap<>();
+
String fileHeader;
+
+ /*
+ * the symbols used in this model e.g. "ACGT"
+ */
+ String alphabet;
+
+ /*
+ * symbol lookup index into the alphabet for 'A' to 'Z'
+ */
+ int[] symbolIndexLookup = new int['Z' - 'A' + 1];
+
+ /*
+ * Nodes in the model. The begin node is at index 0, and contains
+ * average emission probabilities for each symbol.
+ */
+ List<HMMNode> nodes = new ArrayList<>();
+
+ /*
+ * lookup of the HMM node for each alignment column (from 0)
+ */
+ Map<Integer, HMMNode> nodeLookup = new HashMap<>();
/**
* Constructor
{
super();
this.fileProperties = new HashMap<>(hmm.fileProperties);
- this.symbols = new ArrayList<>(hmm.symbols);
+ this.alphabet = hmm.alphabet;
this.nodes = new ArrayList<>(hmm.nodes);
this.nodeLookup = new HashMap<>(hmm.nodeLookup);
- this.symbolIndexLookup = new HashMap<>(
- hmm.symbolIndexLookup);
+ this.symbolIndexLookup = hmm.symbolIndexLookup;
this.fileHeader = new String(hmm.fileHeader);
}
{
float informationContent = 0f;
- for (char symbol : getSymbols())
+ for (char symbol : getSymbols().toCharArray())
{
float freq = ResidueProperties.backgroundFrequencies
.get(getAlphabetType()).get(symbol);
}
/**
- * Returns the map containing the matches between nodes and alignment column
- * indexes.
+ * Returns the symbols used in this hidden Markov model
*
* @return
- *
*/
- public Map<Integer, HMMNode> getNodeLookup()
+ public String getSymbols()
{
- return nodeLookup;
- }
-
- /**
- * Returns the list of symbols used in this hidden Markov model.
- *
- * @return
- */
- public List<Character> getSymbols()
- {
- return symbols;
+ return alphabet;
}
/**
- * Returns the file properties.
- *
- * @return
- */
- public Map<String, String> getFileProperties()
- {
- return fileProperties;
- }
-
- /**
* Gets the node in the hidden Markov model at the specified position.
*
* @param nodeIndex
*/
public HMMNode getNode(int nodeIndex)
{
- return getNodes().get(nodeIndex);
- }
-
- /**
- * Sets the list of symbols used in the hidden Markov model to the list
- * specified.
- *
- * @param symbolsL
- * The list of symbols to which the current list is to be changed.
- *
- */
- public void setSymbols(List<Character> symbolsL)
- {
- this.symbols = symbolsL;
+ return nodes.get(nodeIndex);
}
/**
*/
public String getName()
{
- return fileProperties.get(NAME);
+ return fileProperties.get(HMMFile.NAME);
}
/**
- * Returns the accession number.
+ * Answers the string value of the property (parsed from an HMM file) for the
+ * given key, or null if the property is not present
+ *
+ * @param key
* @return
*/
- public String getAccessionNumber()
+ public String getProperty(String key)
{
- return fileProperties.get(ACCESSION_NUMBER);
+ return fileProperties.get(key);
}
/**
- * Returns a description of the sequence alignment on which the hidden Markov
- * model is based.
+ * Answers true if the property with the given key is present with a value of
+ * "yes" (not case-sensitive), else false
*
+ * @param key
* @return
*/
- public String getDescription()
+ public boolean getBooleanProperty(String key)
{
- return fileProperties.get(DESCRIPTION);
+ return YES.equalsIgnoreCase(fileProperties.get(key));
}
/**
*/
public Integer getLength()
{
- if (fileProperties.get(LENGTH) == null)
+ if (fileProperties.get(HMMFile.LENGTH) == null)
{
return null;
}
- return Integer.parseInt(fileProperties.get(LENGTH));
+ return Integer.parseInt(fileProperties.get(HMMFile.LENGTH));
}
/**
- * Returns the max instance length within the hidden Markov model.
- *
- * @return
- */
- public Integer getMaxInstanceLength()
- {
- if (fileProperties.get(MAX_LENGTH) == null)
- {
- return null;
- }
- return Integer.parseInt(fileProperties.get(MAX_LENGTH));
- }
-
- /**
- * Returns the type of symbol alphabet - "amino", "DNA", "RNA" are the
- * options. Other alphabets may be added.
+ * Returns the value of mandatory property "ALPH" - "amino", "DNA", "RNA" are
+ * the options. Other alphabets may be added.
*
* @return
*/
public String getAlphabetType()
{
- return fileProperties.get(ALPHABET);
+ return fileProperties.get(HMMFile.ALPHABET);
}
/**
- * Returns the date as a String.
+ * Sets the model alphabet to the symbols in the given string (ignoring any
+ * whitespace), and returns the number of symbols
*
- * @return
+ * @param symbols
*/
- public String getDate()
+ public int setAlphabet(String symbols)
{
- return fileProperties.get(DATE);
- }
+ String trimmed = symbols.toUpperCase().replaceAll("\\s", "");
+ int count = trimmed.length();
+ alphabet = trimmed;
+ symbolIndexLookup = new int['Z' - 'A' + 1];
+ Arrays.fill(symbolIndexLookup, -1);
+ int ignored = 0;
- /**
- * Returns the command line log.
- *
- * @return
- */
- public String getCommandLineLog()
- {
- return fileProperties.get(COMMAND_LOG);
- }
-
- /**
- * Returns the number of sequences on which the HMM was trained.
- *
- * @return
- */
- public Integer getNumberOfSequences()
- {
- if (fileProperties.get(NUMBER_OF_SEQUENCES) == null)
- {
- return null;
- }
- return Integer.parseInt(fileProperties.get(NUMBER_OF_SEQUENCES));
- }
-
- /**
- * Returns the effective number of sequences on which the HMM was based.
- *
- * @param value
- */
- public Double getEffectiveNumberOfSequences()
- {
- if (fileProperties.get(LENGTH) == null)
+ /*
+ * save the symbols in order, and a quick lookup of symbol position
+ */
+ for (short i = 0; i < count; i++)
{
- return null;
- }
- return Double.parseDouble(fileProperties.get(EFF_NUMBER_OF_SEQUENCES));
- }
-
- /**
- * Returns the checksum.
- *
- * @return
- */
- public Long getCheckSum()
- {
- if (fileProperties.get(LENGTH) == null)
- {
- return null;
+ char symbol = trimmed.charAt(i);
+ if (symbol >= 'A' && symbol <= 'Z'
+ && symbolIndexLookup[symbol - 'A'] == -1)
+ {
+ symbolIndexLookup[symbol - 'A'] = i;
+ }
+ else
+ {
+ System.err
+ .println(
+ "Unexpected or duplicated character in HMM ALPHabet: "
+ + symbol);
+ ignored++;
+ }
}
- return Long.parseLong(fileProperties.get(CHECK_SUM));
- }
-
- /**
- * Returns the list of nodes in this HMM.
- *
- * @return
- */
- public List<HMMNode> getNodes()
- {
- return nodes;
+ return count - ignored;
}
/**
*/
public double getMatchEmissionProbability(int alignColumn, char symbol)
{
- if (!symbolIndexLookup.containsKey(symbol))
- {
- return 0d;
- }
- int symbolIndex = symbolIndexLookup.get(symbol);
+ int symbolIndex = getSymbolIndex(symbol);
double probability = 0d;
- if (nodeLookup.containsKey(alignColumn))
+ if (symbolIndex != -1 && nodeLookup.containsKey(alignColumn))
{
HMMNode node = nodeLookup.get(alignColumn);
- probability = node.getMatchEmissions().get(symbolIndex);
+ probability = node.getMatchEmission(symbolIndex);
}
return probability;
}
*/
public double getInsertEmissionProbability(int alignColumn, char symbol)
{
- if (!symbolIndexLookup.containsKey(symbol))
- {
- return 0d;
- }
- int symbolIndex = symbolIndexLookup.get(symbol);
+ int symbolIndex = getSymbolIndex(symbol);
double probability = 0d;
- if (nodeLookup.containsKey(alignColumn))
+ if (symbolIndex != -1 && nodeLookup.containsKey(alignColumn))
{
HMMNode node = nodeLookup.get(alignColumn);
- probability = node.getInsertEmissions().get(symbolIndex);
+ probability = node.getInsertEmission(symbolIndex);
}
return probability;
}
if (nodeLookup.containsKey(alignColumn))
{
HMMNode node = nodeLookup.get(alignColumn);
- probability = node.getStateTransitions().get(transition);
+ probability = node.getStateTransition(transition);
}
return probability;
}
public char getConsensusAtAlignColumn(int columnIndex)
{
char mostLikely = '-';
- if (consensusResidueIsActive())
+ if (getBooleanProperty(HMMFile.CONSENSUS_RESIDUE))
{
HMMNode node = nodeLookup.get(columnIndex);
if (node == null)
else
{
double highestProb = 0;
- for (char character : symbols)
+ for (char character : alphabet.toCharArray())
{
double prob = getMatchEmissionProbability(columnIndex, character);
if (prob > highestProb)
}
/**
- * Returns the average match emission probability for a given symbol
- *
- * @param symbolIndex
- * The index of the symbol.
- * @return
- *
- */
- public double getAverageMatchEmission(int symbolIndex)
- {
- double value = nodes.get(0).getMatchEmissions().get(symbolIndex);
- return value;
- }
-
- /**
* Returns the number of symbols in the alphabet used in this HMM.
*
* @return
*/
public int getNumberOfSymbols()
{
- return symbols.size();
+ return alphabet.length();
}
/**
- * Adds a file property.
+ * Sets a property read from an HMM file
*
* @param key
* @param value
*/
- public void addFileProperty(String key, String value)
+ public void setProperty(String key, String value)
{
fileProperties.put(key, value);
}
/**
- * Returns a boolean indicating whether the reference annotation is active.
- *
- * @return
- */
- public boolean referenceAnnotationIsActive()
- {
- String status;
- status = fileProperties.get(REFERENCE_ANNOTATION);
- if (status == null)
- {
- return false;
- }
- switch (status)
- {
- case YES:
- return true;
- case NO:
- return false;
- default:
- return false;
- }
-
- }
-
- /**
- * Returns a boolean indicating whether the mask value annotation is active.
- *
- * @return
- */
- public boolean maskValueIsActive()
- {
- String status;
- status = fileProperties.get(MASKED_VALUE);
- if (status == null)
- {
- return false;
- }
- switch (status)
- {
- case YES:
- return true;
- case NO:
- return false;
- default:
- return false;
- }
-
- }
-
- /**
- * Returns a boolean indicating whether the consensus residue annotation is
- * active.
- *
- * @return
- */
- public boolean consensusResidueIsActive()
- {
- String status;
- status = fileProperties.get(CONSENSUS_RESIDUE);
- if (status == null)
- {
- return false;
- }
- switch (status)
- {
- case YES:
- return true;
- case NO:
- return false;
- default:
- return false;
- }
-
- }
-
- /**
- * Returns a boolean indicating whether the consensus structure annotation is
- * active.
- *
- * @return
- */
- public boolean consensusStructureIsActive()
- {
- String status;
- status = fileProperties.get(CONSENSUS_STRUCTURE);
- if (status == null)
- {
- return false;
- }
- switch (status)
- {
- case YES:
- return true;
- case NO:
- return false;
- default:
- return false;
- }
-
- }
-
- /**
- * Returns a boolean indicating whether the MAP annotation is active.
- *
- * @return
- */
- public boolean mapIsActive()
- {
- String status;
- status = fileProperties.get(MAP);
- if (status == null)
- {
- return false;
- }
- switch (status)
- {
- case YES:
- return true;
- case NO:
- return false;
- default:
- return false;
- }
-
- }
-
- /**
* Sets the alignment column of the specified node
*
* @param nodeIndex
}
/**
- * Sets the reference annotation at a given node.
+ * Sets the reference annotation at a given node
*
* @param nodeIndex
* @param value
}
/**
- * Sets the consensus residue at a given node.
+ * Sets the consensus residue at a given node
*
* @param nodeIndex
* @param value
}
/**
- * Sets the consensus structure at a given node.
+ * Sets the consensus structure at a given node
*
* @param nodeIndex
* @param value
}
/**
- * Sets the mask value at a given node.
+ * Sets the mask value at a given node
*
* @param nodeIndex
* @param value
*
* @return
*/
- public String getGatheringThreshold()
- {
- String value;
- value = fileProperties.get("GA");
- return value;
- }
-
- /**
- * Temporary implementation, should not be used.
- *
- * @return
- */
- public String getNoiseCutoff()
- {
- String value;
- value = fileProperties.get("NC");
- return value;
- }
-
- /**
- * Temporary implementation, should not be used.
- *
- * @return
- */
- public String getTrustedCutoff()
- {
- String value;
- value = fileProperties.get("TC");
- return value;
- }
-
- /**
- * Temporary implementation, should not be used.
- *
- * @return
- */
public String getViterbi()
{
String value;
- value = fileProperties.get(VITERBI);
+ value = fileProperties.get(HMMFile.VITERBI);
return value;
}
public String getMSV()
{
String value;
- value = fileProperties.get(MSV);
+ value = fileProperties.get(HMMFile.MSV);
return value;
}
public String getForward()
{
String value;
- value = fileProperties.get(FORWARD);
+ value = fileProperties.get(HMMFile.FORWARD);
return value;
}
/**
- * Sets the activation status of the MAP annotation.
- *
- * @param status
- */
- public void setMAPStatus(boolean status)
- {
- fileProperties.put(MAP, status ? YES : NO);
- }
-
- /**
- * Sets the activation status of the reference annotation.
- *
- * @param status
- */
- public void setReferenceAnnotationStatus(boolean status)
- {
- fileProperties.put(REFERENCE_ANNOTATION, status ? YES : NO);
- }
-
- /**
- * Sets the activation status of the mask value annotation.
- *
- * @param status
- */
- public void setMaskedValueStatus(boolean status)
- {
- fileProperties.put(MASKED_VALUE, status ? YES : NO);
- }
-
- /**
- * Sets the activation status of the consensus residue annotation.
- *
- * @param status
- */
- public void setConsensusResidueStatus(boolean status)
- {
- fileProperties.put(CONSENSUS_RESIDUE, status ? YES : NO);
- }
-
- /**
- * Sets the activation status of the consensus structure annotation.
- *
- * @param status
- */
- public void setConsensusStructureStatus(boolean status)
- {
- fileProperties.put(CONSENSUS_STRUCTURE, status ? YES : NO);
- }
-
- /**
* Answers the HMMNode mapped to the given alignment column (base 0), or null
* if none is mapped
*
}
/**
- * Finds the String values of a boolean. "yes" for true and "no" for false.
- *
- * @param value
- * @return
- */
- public static String findStringFromBoolean(boolean value)
- {
- if (value)
- {
- return YES;
- }
- else
- {
- return NO;
- }
- }
-
-
-
- /**
* Returns the consensus sequence based on the most probable symbol at each
* position. The sequence is adjusted to match the length of the existing
* sequence alignment. Gap characters are used as padding.
*
- * @param length
- * The length of the longest sequence in the existing alignment.
* @return
*/
public Sequence getConsensusSequence()
return consensus;
}
- public int getSymbolIndex(char c)
+ /**
+ * Answers the index position (0...) of the given symbol, or -1 if not a valid
+ * symbol for this HMM
+ *
+ * @param symbol
+ * @return
+ */
+ public int getSymbolIndex(char symbol)
{
- return symbolIndexLookup.get(c);
+ /*
+ * symbolIndexLookup holds the index for 'A' to 'Z'
+ */
+ char c = Character.toUpperCase(symbol);
+ if ('A' <= c && c <= 'Z')
+ {
+ return symbolIndexLookup[symbol - 'A'];
+ }
+ return -1;
}
- public void setSymbolIndex(Character c, Integer i)
+ public void addNode(HMMNode node)
{
- symbolIndexLookup.put(c, i);
+ nodes.add(node);
}
-
-
}
public class HMMFile extends AlignFile
implements AlignmentFileReaderI, AlignmentFileWriterI
{
- private static final int NUMBER_OF_TRANSITIONS = 7;
+ /*
+ * keys to data in HMM file, used to store as properties of the HiddenMarkovModel
+ */
+ private static final String HMM = "HMM";
- private static final String SPACE = " ";
+ public static final String NAME = "NAME";
+
+ public static final String ACCESSION_NUMBER = "ACC";
+
+ public static final String DESCRIPTION = "DESC";
+
+ public static final String LENGTH = "LENG";
+
+ public static final String MAX_LENGTH = "MAXL";
+
+ public static final String ALPHABET = "ALPH";
+
+ private static final String ALPH_AMINO = "amino";
+
+ private static final String ALPH_DNA = "DNA";
+
+ private static final String ALPH_RNA = "RNA";
+
+ private static final String ALPHABET_AMINO = "ACDEFGHIKLMNPQRSTVWY";
+
+ private static final String ALPHABET_DNA = "ACGT";
+
+ private static final String ALPHABET_RNA = "ACGU";
+
+ public static final String DATE = "DATE";
+
+ public static final String COMMAND_LOG = "COM";
+
+ public static final String NUMBER_OF_SEQUENCES = "NSEQ";
+
+ public static final String EFF_NUMBER_OF_SEQUENCES = "EFFN";
+
+ public static final String CHECK_SUM = "CKSUM";
- private static final String COMPO = "COMPO";
+ public static final String STATISTICS = "STATS";
- private static final String EMPTY = "";
+ public static final String COMPO = "COMPO";
+
+ public static final String GATHERING_THRESHOLD = "GA";
+
+ public static final String TRUSTED_CUTOFF = "TC";
+
+ public static final String NOISE_CUTOFF = "NC";
+
+ public static final String VITERBI = "VITERBI";
+
+ public static final String MSV = "MSV";
+
+ public static final String FORWARD = "FORWARD";
+
+ public static final String MAP = "MAP";
+
+ public static final String REFERENCE_ANNOTATION = "RF";
+
+ public static final String CONSENSUS_RESIDUE = "CONS";
+
+ public static final String CONSENSUS_STRUCTURE = "CS";
+
+ public static final String MASKED_VALUE = "MM";
+
+ private static final int NUMBER_OF_TRANSITIONS = 7;
+
+ private static final String SPACE = " ";
/*
- * guide line added to an output HMMER file, purely for readability
+ * optional guide line added to an output HMMER file, purely for readability
*/
private static final String TRANSITIONTYPELINE = " m->m m->i m->d i->m i->i d->m d->d";
- private static String NL = "\n";
+ private static String NL = System.lineSeparator();
private HiddenMarkovModel hmm;
// number of symbols in the alphabet used in the hidden Markov model
- int numberOfSymbols;
+ private int numberOfSymbols;
/**
- * Parses immediately.
+ * Constructor that parses immediately
*
* @param inFile
* @param type
}
/**
- * Parses immediately.
+ * Constructor that parses immediately
*
* @param source
* @throws IOException
}
/**
- * Default constructor, do not use!
+ * Default constructor
*/
public HMMFile()
{
-
}
/**
- * Constructor for HMMFile used for exporting.
+ * Constructor for HMMFile used for exporting
*
* @param hmm
* @param exportImmediately
}
/**
- * For testing, do not use.
- *
- * @param br
- */
- HMMFile(BufferedReader br)
- {
- dataIn = br;
- }
-
- /**
- * Returns the HMM produced by reading in a HMMER3 file.
+ * Returns the HMM produced by parsing a HMMER3 file
*
* @return
*/
}
/**
- * Sets the HMM used in this file.
- *
- * @param model
- */
- public void setHMM(HiddenMarkovModel model)
- {
- this.hmm = model;
- }
-
- /**
- * Gets the name of the hidden Markov model.
+ * Gets the name of the hidden Markov model
*
* @return
*/
}
/**
- * Reads the data from HMM file into the HMM field on this object.
- *
- * @throws IOException
+ * Reads the data from HMM file into the HMM model
*/
@Override
- public void parse() throws IOException
+ public void parse()
{
try
{
hmm = new HiddenMarkovModel();
- parseFileProperties(dataIn);
+ parseHeaderLines(dataIn);
parseModel(dataIn);
} catch (Exception e)
{
}
/**
- * Reads the data from HMM file into the HMM field on this object.
- *
- * @throws IOException
- */
-
- public void parse(BufferedReader br) throws IOException
- {
- hmm = new HiddenMarkovModel();
- parseFileProperties(br);
- parseModel(br);
- }
-
-
-
- /**
- * Imports the file properties from a HMMER3 file.
+ * Reads the header properties from a HMMER3 file and saves them in the
+ * HiddeMarkovModel. This method exits after reading the next line after the
+ * HMM line.
*
* @param input
- * The buffered reader used to read in the file.
* @throws IOException
*/
- void parseFileProperties(BufferedReader input) throws IOException
+ void parseHeaderLines(BufferedReader input) throws IOException
{
- boolean readingFile = true;
+ boolean readingHeaders = true;
hmm.setFileHeader(input.readLine());
String line = input.readLine();
- while (readingFile)
+ while (readingHeaders && line != null)
{
- if (line != null)
+ Scanner parser = new Scanner(line);
+ String next = parser.next();
+ if (ALPHABET.equals(next))
{
- Scanner parser = new Scanner(line);
- String next = parser.next();
- if ("HMM".equals(next)) // indicates start of HMM data (end of file
- // properties)
- {
- readingFile = false;
- fillSymbols(parser);
- numberOfSymbols = hmm.getNumberOfSymbols();
- }
- else if ("STATS".equals(next))
- {
- parser.next();
- String key;
- String value;
- key = parser.next();
- value = parser.next() + SPACE + SPACE + parser.next();
- hmm.addFileProperty(key, value);
- }
- else
+ String alphabetType = parser.next();
+ hmm.setProperty(ALPHABET, alphabetType);
+ String alphabet = ALPH_DNA.equalsIgnoreCase(alphabetType)
+ ? ALPHABET_DNA
+ : (ALPH_RNA.equalsIgnoreCase(alphabetType) ? ALPHABET_RNA
+ : ALPHABET_AMINO);
+ numberOfSymbols = hmm.setAlphabet(alphabet);
+ }
+ else if (HMM.equals(next))
+ {
+ readingHeaders = false;
+ String symbols = line.substring(line.indexOf(HMM) + HMM.length());
+ numberOfSymbols = hmm.setAlphabet(symbols);
+ }
+ else if (STATISTICS.equals(next))
+ {
+ parser.next();
+ String key;
+ String value;
+ key = parser.next();
+ value = parser.next() + SPACE + SPACE + parser.next();
+ hmm.setProperty(key, value);
+ }
+ else
+ {
+ String key = next;
+ String value = parser.next();
+ while (parser.hasNext())
{
- String key = next;
- String value = parser.next();
- while (parser.hasNext())
- {
- value = value + SPACE + parser.next();
- }
- hmm.addFileProperty(key, value);
+ value = value + SPACE + parser.next();
}
- parser.close();
+ hmm.setProperty(key, value);
}
+ parser.close();
line = input.readLine();
- if (line == null)
- {
- readingFile = false;
- }
}
-
}
/**
* Parses the model data from the HMMER3 file
*
* @param input
- * The buffered reader used to read the file.
* @throws IOException
*/
void parseModel(BufferedReader input) throws IOException
{
boolean first = true;
+ // specification says there must always be an HMM header
+ // and one more header which is skipped here
String line = input.readLine();
while (!"//".equals(line))
{
HMMNode node = new HMMNode();
- hmm.getNodes().add(node);
+ hmm.addNode(node);
Scanner matchReader = new Scanner(line);
String next = matchReader.next();
if (next.equals(COMPO) || !first)
{
// stores match emission line in list
- List<Double> matches = new ArrayList<>();
- matches = fillList(matchReader, numberOfSymbols);
+ double[] matches = parseDoubles(matchReader, numberOfSymbols);
node.setMatchEmissions(matches);
if (!first)
{
+ // TODO handle files with no column map (make our own)
int column = parseAnnotations(matchReader, node);
hmm.setAlignmentColumn(node, column - 1);
}
// stores insert emission line in list
line = input.readLine();
Scanner insertReader = new Scanner(line);
- List<Double> inserts = new ArrayList<>();
- inserts = fillList(insertReader, numberOfSymbols);
+ double[] inserts = parseDoubles(insertReader, numberOfSymbols);
node.setInsertEmissions(inserts);
insertReader.close();
// stores state transition line in list
line = input.readLine();
Scanner transitionReader = new Scanner(line);
- List<Double> transitions = new ArrayList<>();
- transitions = fillList(transitionReader, NUMBER_OF_TRANSITIONS);
+ double[] transitions = parseDoubles(transitionReader,
+ NUMBER_OF_TRANSITIONS);
node.setStateTransitions(transitions);
transitionReader.close();
line = input.readLine();
* HMM counts columns from 1, convert to base 0 for Jalview
*/
int column = 0;
- if (hmm.mapIsActive() && scanner.hasNext())
+ if (hmm.getBooleanProperty(MAP) && scanner.hasNext())
{
column = scanner.nextInt();
node.setAlignmentColumn(column - 1);
}
/**
- * Fills a list of doubles from an input line
+ * Fills an array of doubles parsed from an input line
*
* @param input
- * The scanner for the line containing the data to be transferred to
- * the list.
* @param numberOfElements
- * The number of elements in the list to be filled.
- * @return filled list Returns the list of doubles.
+ * @return
* @throws IOException
*/
- static List<Double> fillList(Scanner input,
+ static double[] parseDoubles(Scanner input,
int numberOfElements) throws IOException
{
- List<Double> list = new ArrayList<>();
+ double[] values = new double[numberOfElements];
for (int i = 0; i < numberOfElements; i++)
{
-
+ if (!input.hasNext())
+ {
+ throw new IOException("Incomplete data");
+ }
String next = input.next();
- if (next.contains("*")) // state transitions to or from delete states
- // occasionally have values of -infinity. These
- // values are represented by an * in the .hmm
- // file.
+ if (next.contains("*"))
{
- list.add(Double.NEGATIVE_INFINITY);
+ values[i] = Double.NEGATIVE_INFINITY;
}
else
{
double prob = Double.valueOf(next);
prob = Math.pow(Math.E, -prob);
- list.add(prob);
+ values[i] = prob;
}
}
- if (list.size() < numberOfElements)
- {
- throw new IOException("Incomplete data");
- }
- return list;
+ return values;
}
/**
* @param columnSeparation
* The separation between subsequent data entries.
* @param data
- * The list fo data to be added to the String.
+ * The list of data to be added to the String.
* @return
*/
String addData(int initialColumnSeparation,
int columnSeparation, List<String> data)
{
- String line = EMPTY;
- int index = 0;
+ String line = "";
+ boolean first = true;
for (String value : data)
{
- if (index == 0)
- {
- line += String.format("%" + initialColumnSeparation + "s", value);
- }
- else
- {
- line += String.format("%" + columnSeparation + "s", value);
- }
- index++;
+ int sep = first ? initialColumnSeparation : columnSeparation;
+ line += String.format("%" + sep + "s", value);
+ first = false;
}
return line;
}
}
/**
- * Converts a list of doubles into a list of Strings, rounded to the nearest
- * 5th decimal place.
+ * Converts an array of doubles into a list of Strings, rounded to the nearest
+ * 5th decimal place
*
- * @param list
+ * @param doubles
* @param noOfDecimals
* @return
*/
- List<String> doubleListToStringList(List<Double> list)
+ List<String> doublesToStringList(double[] doubles)
{
List<String> strList = new ArrayList<>();
- for (double value : list)
+ for (double value : doubles)
{
String strValue;
if (value > 0)
{
strValue = String.format("%.5f", value);
-
}
else if (value == -0.00000d)
{
{
strValue = "*";
}
-
strList.add(strValue);
}
return strList;
}
/**
- * Converts a primitive array of Strings to a list of Strings.
+ * Appends model data in string format to the string builder
*
- * @param array
- * @return
+ * @param output
*/
- List<String> stringArrayToStringList(String[] array)
+ void appendModelAsString(StringBuilder output)
{
- List<String> list = new ArrayList<>();
- for (String value : array)
+ output.append(HMM).append(" ");
+ String charSymbols = hmm.getSymbols();
+ for (char c : charSymbols.toCharArray())
{
- list.add(value);
+ output.append(String.format("%9s", c));
}
-
- return list;
- }
-
- /**
- * Returns a string containing the model data.
- */
- String getModelAsString()
- {
- StringBuilder output = new StringBuilder();
- String symbolLine = "HMM";
- List<Character> charSymbols = hmm.getSymbols();
- List<String> strSymbols;
- strSymbols = charListToStringList(charSymbols);
- symbolLine += addData(11, 9, strSymbols);
- output.append(symbolLine);
output.append(NL).append(TRANSITIONTYPELINE);
int length = hmm.getLength();
- for (int node = 0; node <= length; node++)
+ for (int nodeNo = 0; nodeNo <= length; nodeNo++)
{
- String matchLine;
- if (node == 0)
- {
- matchLine = String.format("%7s", "COMPO");
- }
- else
- {
- matchLine = String.format("%7s", node);
- }
+ String matchLine = String.format("%7s",
+ nodeNo == 0 ? "COMPO" : Integer.toString(nodeNo));
- List<String> strMatches;
- List<Double> doubleMatches;
- doubleMatches = convertListToLogSpace(
- hmm.getNode(node).getMatchEmissions());
- strMatches = doubleListToStringList(doubleMatches);
+ double[] doubleMatches = convertToLogSpace(
+ hmm.getNode(nodeNo).getMatchEmissions());
+ List<String> strMatches = doublesToStringList(doubleMatches);
matchLine += addData(10, 9, strMatches);
-
- if (node != 0)
+ if (nodeNo != 0)
{
- matchLine += SPACE + (hmm.getNodeAlignmentColumn(node) + 1);
- matchLine += SPACE + hmm.getConsensusResidue(node);
- matchLine += SPACE + hmm.getReferenceAnnotation(node);
+ matchLine += SPACE + (hmm.getNodeAlignmentColumn(nodeNo) + 1);
+ matchLine += SPACE + hmm.getConsensusResidue(nodeNo);
+ matchLine += SPACE + hmm.getReferenceAnnotation(nodeNo);
if (hmm.getFileHeader().contains("HMMER3/f"))
{
- matchLine += SPACE + hmm.getMaskedValue(node);
- matchLine += SPACE + hmm.getConsensusStructure(node);
+ matchLine += SPACE + hmm.getMaskedValue(nodeNo);
+ matchLine += SPACE + hmm.getConsensusStructure(nodeNo);
}
-
}
output.append(NL).append(matchLine);
- String insertLine = EMPTY;
- List<String> strInserts;
- List<Double> doubleInserts;
- doubleInserts = convertListToLogSpace(
- hmm.getNode(node).getInsertEmissions());
- strInserts = doubleListToStringList(doubleInserts);
+ String insertLine = "";
+
+ double[] doubleInserts = convertToLogSpace(
+ hmm.getNode(nodeNo).getInsertEmissions());
+ List<String> strInserts = doublesToStringList(doubleInserts);
insertLine += addData(17, 9, strInserts);
output.append(NL).append(insertLine);
- String transitionLine = EMPTY;
- List<String> strTransitions;
- List<Double> doubleTransitions;
- doubleTransitions = convertListToLogSpace(
- hmm.getNode(node).getStateTransitions());
- strTransitions = doubleListToStringList(doubleTransitions);
+ String transitionLine = "";
+ double[] doubleTransitions = convertToLogSpace(
+ hmm.getNode(nodeNo).getStateTransitions());
+ List<String> strTransitions = doublesToStringList(
+ doubleTransitions);
transitionLine += addData(17, 9, strTransitions);
output.append(NL).append(transitionLine);
}
- return output.toString();
}
/**
- * Returns a String containing the HMM file properties
+ * Appends formatted HMM file properties to the string builder
+ *
+ * @param output
*/
- String getFilePropertiesAsString()
+ void appendProperties(StringBuilder output)
{
- StringBuffer output = new StringBuffer();
- String line;
-
output.append(hmm.getFileHeader());
-
- line = String.format("%-5s %1s", "NAME", hmm.getName());
- output.append(NL + line);
- if (hmm.getAccessionNumber() != null)
- {
- line = String.format("%-5s %1s", "ACC", hmm.getAccessionNumber());
- output.append(NL + line);
- }
+ String format = "%n%-5s %1s";
+ appendProperty(output, format, NAME);
+ appendProperty(output, format, ACCESSION_NUMBER);
+ appendProperty(output, format, DESCRIPTION);
+ appendProperty(output, format, LENGTH);
+ appendProperty(output, format, MAX_LENGTH);
+ appendProperty(output, format, ALPHABET);
+ appendBooleanProperty(output, format, REFERENCE_ANNOTATION);
+ appendBooleanProperty(output, format, MASKED_VALUE);
+ appendBooleanProperty(output, format, CONSENSUS_RESIDUE);
+ appendBooleanProperty(output, format, CONSENSUS_STRUCTURE);
+ appendBooleanProperty(output, format, MAP);
+ appendProperty(output, format, DATE);
+ appendProperty(output, format, NUMBER_OF_SEQUENCES);
+ appendProperty(output, format, EFF_NUMBER_OF_SEQUENCES);
+ appendProperty(output, format, CHECK_SUM);
+ appendProperty(output, format, GATHERING_THRESHOLD);
+ appendProperty(output, format, TRUSTED_CUTOFF);
+ appendProperty(output, format, NOISE_CUTOFF);
- if (hmm.getDescription() != null)
+ if (hmm.getMSV() != null)
{
- line = String.format("%-5s %1s", "DESC", hmm.getDescription());
- output.append(NL + line);
- }
- line = String.format("%-5s %1s", "LENG", hmm.getLength());
- output.append(NL + line);
+ output.append(String.format("%n%-19s %18s", "STATS LOCAL MSV",
+ hmm.getMSV()));
- if (hmm.getMaxInstanceLength() != null)
- {
- line = String.format("%-5s %1s", "MAXL", hmm.getMaxInstanceLength());
- output.append(NL + line);
- }
- line = String.format("%-5s %1s", "ALPH", hmm.getAlphabetType());
- output.append(NL + line);
-
- boolean status;
- String statusStr;
-
- status = hmm.referenceAnnotationIsActive();
- statusStr = HiddenMarkovModel.findStringFromBoolean(status);
- line = String.format("%-5s %1s", "RF",
- statusStr);
- output.append(NL + line);
-
- status = hmm.maskValueIsActive();
- statusStr = HiddenMarkovModel.findStringFromBoolean(status);
- line = String.format("%-5s %1s", "MM",
- statusStr);
- output.append(NL + line);
-
- status = hmm.consensusResidueIsActive();
- statusStr = HiddenMarkovModel.findStringFromBoolean(status);
- line = String.format("%-5s %1s", "CONS",
- statusStr);
- output.append(NL + line);
-
- status = hmm.consensusStructureIsActive();
- statusStr = HiddenMarkovModel.findStringFromBoolean(status);
- line = String.format("%-5s %1s", "CS",
- statusStr);
- output.append(NL + line);
-
- status = hmm.mapIsActive();
- statusStr = HiddenMarkovModel.findStringFromBoolean(status);
- line = String.format("%-5s %1s", "MAP",
- statusStr);
- output.append(NL + line);
-
-
- if (hmm.getDate() != null)
- {
- line = String.format("%-5s %1s", "DATE", hmm.getDate());
- output.append(NL + line);
- }
- if (hmm.getNumberOfSequences() != null)
- {
- line = String.format("%-5s %1s", "NSEQ", hmm.getNumberOfSequences());
- output.append(NL + line);
- }
- if (hmm.getEffectiveNumberOfSequences() != null)
- {
- line = String.format("%-5s %1s", "EFFN",
- hmm.getEffectiveNumberOfSequences());
- output.append(NL + line);
- }
- if (hmm.getCheckSum() != null)
- {
- line = String.format("%-5s %1s", "CKSUM", hmm.getCheckSum());
- output.append(NL + line);
- }
- if (hmm.getGatheringThreshold() != null)
- {
- line = String.format("%-5s %1s", "GA", hmm.getGatheringThreshold());
- output.append(NL + line);
- }
+ output.append(String.format("%n%-19s %18s", "STATS LOCAL VITERBI",
+ hmm.getViterbi()));
- if (hmm.getTrustedCutoff() != null)
- {
- line = String.format("%-5s %1s", "TC", hmm.getTrustedCutoff());
- output.append(NL + line);
+ output.append(String.format("%n%-19s %18s", "STATS LOCAL FORWARD",
+ hmm.getForward()));
}
- if (hmm.getNoiseCutoff() != null)
- {
- line = String.format("%-5s %1s", "NC", hmm.getNoiseCutoff());
- output.append(NL + line);
- }
- if (hmm.getMSV() != null)
+ }
+
+ /**
+ * Appends 'yes' or 'no' for the given property, according to whether or not
+ * it is set in the HMM
+ *
+ * @param output
+ * @param format
+ * @param propertyName
+ */
+ private void appendBooleanProperty(StringBuilder output, String format,
+ String propertyName)
+ {
+ boolean set = hmm.getBooleanProperty(propertyName);
+ output.append(String.format(format, propertyName,
+ set ? HiddenMarkovModel.YES : HiddenMarkovModel.NO));
+ }
+
+ /**
+ * Appends the value of the given property to the output, if not null
+ *
+ * @param output
+ * @param format
+ * @param propertyName
+ */
+ private void appendProperty(StringBuilder output, String format,
+ String propertyName)
+ {
+ String value = hmm.getProperty(propertyName);
+ if (value != null)
{
- line = String.format("%-19s %18s", "STATS LOCAL MSV", hmm.getMSV());
- output.append(NL + line);
-
- line = String.format("%-19s %18s", "STATS LOCAL VITERBI",
- hmm.getViterbi());
- output.append(NL + line);
-
- line = String.format("%-19s %18s", "STATS LOCAL FORWARD",
- hmm.getForward());
- output.append(NL + line);
+ output.append(String.format(format, propertyName, value));
}
- return output.toString();
}
-
/**
* Returns the char value of a single lettered String.
*
char character;
character = string.charAt(0);
return character;
-
}
@Override
*/
public String print()
{
- StringBuffer output = new StringBuffer();
- output.append(getFilePropertiesAsString());
+ StringBuilder output = new StringBuilder();
+ appendProperties(output);
output.append(NL);
- output.append(getModelAsString());
+ appendModelAsString(output);
output.append(NL + "//");
return output.toString();
}
/**
- * Converts the probabilities contained in a list into log space.
+ * Converts the probabilities contained in an array into log space
*
- * @param list
+ * @param ds
*/
- List<Double> convertListToLogSpace(List<Double> list)
+ double[] convertToLogSpace(double[] ds)
{
-
- List<Double> convertedList = new ArrayList<>();
- for (int i = 0; i < list.size(); i++)
+ double[] converted = new double[ds.length];
+ for (int i = 0; i < ds.length; i++)
{
- double prob = list.get(i);
+ double prob = ds[i];
double logProb = -1 * Math.log(prob);
- convertedList.add(logProb);
+ converted[i] = logProb;
}
- return convertedList;
-
-
+ return converted;
}
/**
SequenceI[] seq = new SequenceI[1];
seq[0] = hmmSeq;
return seq;
-
- }
-
- /**
- * Fills symbol array and adds each symbol to an index lookup
- *
- * @param parser
- * The scanner scanning the symbol line in the file.
- */
- public void fillSymbols(Scanner parser)
- {
- int i = 0;
- while (parser.hasNext())
- {
- String strSymbol = parser.next();
- char[] symbol = strSymbol.toCharArray();
- hmm.getSymbols().add(symbol[0]);
- hmm.setSymbolIndex(symbol[0], i);
- i++;
- }
}
@Override
{
assertEquals(hmm.getConsensusAtAlignColumn(10), 's');
assertEquals(hmm.getConsensusAtAlignColumn(50), 'k');
- hmm.setConsensusResidueStatus(false);
+ hmm.setProperty(HMMFile.CONSENSUS_RESIDUE, "no");
assertEquals(hmm.getConsensusAtAlignColumn(100), 'l');
assertEquals(hmm.getConsensusAtAlignColumn(400), 'k');
}
.get("amino");
int col = 4;
float expected = 0f;
- for (char aa : hmm.getSymbols())
+ for (char aa : hmm.getSymbols().toCharArray())
{
double mep = hmm.getMatchEmissionProbability(col, aa);
float background = uniprotFreqs.get(aa);
import jalview.datamodel.SequenceI;
import jalview.gui.AlignFrame;
import jalview.gui.Desktop;
+import jalview.io.HMMFile;
import jalview.ws.params.ArgumentI;
import java.io.IOException;
assertEquals(hmm.getLength().intValue(), 148);
assertEquals(hmm.getAlphabetType(), "amino");
assertEquals(hmm.getName(), "Alignment");
- assertEquals(hmm.getEffectiveNumberOfSequences(), 0.648193, 0.0001);
+ assertEquals(hmm.getProperty(HMMFile.EFF_NUMBER_OF_SEQUENCES),
+ "0.648193");
assertEquals(hmm.getConsensusAtAlignColumn(15), 's');
}
package jalview.io;
import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertNull;
+import static org.testng.Assert.assertTrue;
+import static org.testng.Assert.fail;
import jalview.datamodel.HMMNode;
import jalview.datamodel.HiddenMarkovModel;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
import java.util.Scanner;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
+import junit.extensions.PA;
+
public class HMMFileTest {
HMMFile fn3;
HMMFile made1;
@BeforeClass(alwaysRun = true)
- public void setUp() throws FileNotFoundException
+ public void setUp() throws IOException
{
- fn3 = new HMMFile(new BufferedReader(
- new FileReader(("test/jalview/io/test_fn3_hmm.txt"))));
+ fn3 = new HMMFile("test/jalview/io/test_fn3_hmm.txt",
+ DataSourceType.FILE);
- pKinase = new HMMFile(new BufferedReader(
- new FileReader(("test/jalview/io/test_PKinase_hmm.txt"))));
+ pKinase = new HMMFile("test/jalview/io/test_PKinase_hmm.txt",
+ DataSourceType.FILE);
- made1 = new HMMFile(new BufferedReader(
- new FileReader(("test/jalview/io/test_MADE1_hmm.txt"))));
+ made1 = new HMMFile("test/jalview/io/test_MADE1_hmm.txt",
+ DataSourceType.FILE);
}
@Test(groups = "Functional")
public void testParse() throws IOException
{
- pKinase.parse();
HiddenMarkovModel hmm = pKinase.getHMM();
assertEquals(hmm.getName(), "Pkinase");
- assertEquals(hmm.getAccessionNumber(), "PF00069.17");
- assertEquals(hmm.getDescription(), "Protein kinase domain");
+ assertEquals(hmm.getProperty(HMMFile.ACCESSION_NUMBER), "PF00069.17");
+ assertEquals(hmm.getProperty(HMMFile.DESCRIPTION),
+ "Protein kinase domain");
assertEquals(hmm.getLength().intValue(), 260);
- assertNull(hmm.getMaxInstanceLength());
+ assertNull(hmm.getProperty(HMMFile.MAX_LENGTH));
assertEquals(hmm.getAlphabetType(), "amino");
- assertEquals(hmm.referenceAnnotationIsActive(), false);
- assertEquals(hmm.maskValueIsActive(), false);
- assertEquals(hmm.consensusResidueIsActive(), true);
- assertEquals(hmm.consensusStructureIsActive(),
- true);
- assertEquals(hmm.mapIsActive(), true);
- assertEquals(hmm.getDate(), "Thu Jun 16 11:44:06 2011");
- assertNull(hmm.getCommandLineLog());
- assertEquals(hmm.getNumberOfSequences().intValue(), 54);
- assertEquals(hmm.getEffectiveNumberOfSequences(), 3.358521, 4d);
- assertEquals(hmm.getCheckSum().longValue(), 3106786190l);
- assertEquals(hmm.getGatheringThreshold(), "70.30 70.30");
- assertEquals(hmm.getTrustedCutoff(), "70.30 70.30");
- assertEquals(hmm.getNoiseCutoff(), "70.20 70.20");
-
- List<Character> symbols = Arrays
- .asList(new Character[]
- { 'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N',
- 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y' });
- assertEquals(hmm.getSymbols(), symbols);
+ assertFalse(hmm.getBooleanProperty(HMMFile.REFERENCE_ANNOTATION));
+ assertFalse(hmm.getBooleanProperty(HMMFile.MASKED_VALUE));
+ assertTrue(hmm.getBooleanProperty(HMMFile.CONSENSUS_RESIDUE));
+ assertTrue(hmm.getBooleanProperty(HMMFile.CONSENSUS_STRUCTURE));
+ assertTrue(hmm.getBooleanProperty(HMMFile.MAP));
+ assertEquals(hmm.getProperty(HMMFile.DATE), "Thu Jun 16 11:44:06 2011");
+ assertNull(hmm.getProperty(HMMFile.COMMAND_LOG));
+ assertEquals(hmm.getProperty(HMMFile.NUMBER_OF_SEQUENCES), "54");
+ assertEquals(hmm.getProperty(HMMFile.EFF_NUMBER_OF_SEQUENCES),
+ "3.358521");
+ assertEquals(hmm.getProperty(HMMFile.CHECK_SUM), "3106786190");
+ assertEquals(hmm.getProperty(HMMFile.GATHERING_THRESHOLD),
+ "70.30 70.30");
+ assertEquals(hmm.getProperty(HMMFile.TRUSTED_CUTOFF), "70.30 70.30");
+ assertEquals(hmm.getProperty(HMMFile.NOISE_CUTOFF), "70.20 70.20");
+
+ assertEquals(hmm.getSymbols(), "ACDEFGHIKLMNPQRSTVWY");
assertEquals(hmm.getMatchEmissionProbability(0, 'Y'), 0.16102, 0.001d);
assertEquals(hmm.getMatchEmissionProbability(11, 'P'), 0.0130, 0.001d);
assertEquals(hmm.getConsensusResidue(145), 'a');
assertEquals(hmm.getMaskedValue(183), '-');
assertEquals(hmm.getConsensusStructure(240), 'H');
-
}
- @Test(priority = 0)
- public void testParseFileProperties() throws IOException
+ @Test(groups = "Functional")
+ public void testParseHeaderLines_amino() throws IOException
{
FileReader fr = new FileReader(
new File("test/jalview/io/test_fn3_hmm.txt"));
BufferedReader br = new BufferedReader(fr);
- fn3.setHMM(new HiddenMarkovModel());
- fn3.parseFileProperties(br);
- fn3.parseModel(br); // this is for a later test
- HiddenMarkovModel testHMM = new HiddenMarkovModel();
- testHMM = fn3.getHMM();
+ HiddenMarkovModel hmm = new HiddenMarkovModel();
+ HMMFile testee = new HMMFile();
+ PA.setValue(testee, "hmm", hmm);
+ testee.parseHeaderLines(br);
br.close();
fr.close();
- assertEquals(testHMM.getName(), "fn3");
- assertEquals(testHMM.getAccessionNumber(), "PF00041.13");
- assertEquals(testHMM.getDescription(),
+ assertEquals(hmm.getName(), "fn3");
+ assertEquals(hmm.getProperty(HMMFile.ACCESSION_NUMBER), "PF00041.13");
+ assertEquals(hmm.getProperty(HMMFile.DESCRIPTION),
"Fibronectin type III domain");
- assertEquals(testHMM.getLength().intValue(), 86);
- assertNull(testHMM.getMaxInstanceLength());
- assertEquals(testHMM.getAlphabetType(), "amino");
- assertEquals(testHMM.referenceAnnotationIsActive(), false);
- assertEquals(testHMM.maskValueIsActive(), false);
- assertEquals(testHMM.consensusResidueIsActive(), true);
- assertEquals(testHMM.consensusStructureIsActive(), true);
- assertEquals(testHMM.mapIsActive(), true);
- assertEquals(testHMM.getDate(), "Fri Jun 20 08:22:31 2014");
- assertNull(testHMM.getCommandLineLog());
- assertEquals(testHMM.getNumberOfSequences().intValue(), 106);
- assertEquals(testHMM.getEffectiveNumberOfSequences(), 11.415833, 4d);
- assertEquals(testHMM.getCheckSum().longValue(), 3564431818l);
- assertEquals(testHMM.getGatheringThreshold(), "8.00 7.20");
- assertEquals(testHMM.getTrustedCutoff(), "8.00 7.20");
- assertEquals(testHMM.getNoiseCutoff(), "7.90 7.90");
- assertEquals(testHMM.getViterbi(), "-9.7737 0.71847");
- assertEquals(testHMM.getMSV(), "-9.4043 0.71847");
- assertEquals(testHMM.getForward(), "-3.8341 0.71847");
-
-
- FileReader fr3 = new FileReader(
+ assertEquals(hmm.getProperty(HMMFile.LENGTH), "86");
+ assertNull(hmm.getProperty(HMMFile.MAX_LENGTH));
+ assertEquals(hmm.getAlphabetType(), "amino");
+ assertFalse(hmm.getBooleanProperty(HMMFile.REFERENCE_ANNOTATION));
+ assertFalse(hmm.getBooleanProperty(HMMFile.MASKED_VALUE));
+ assertTrue(hmm.getBooleanProperty(HMMFile.CONSENSUS_RESIDUE));
+ assertTrue(hmm.getBooleanProperty(HMMFile.CONSENSUS_STRUCTURE));
+
+ assertTrue(hmm.getBooleanProperty(HMMFile.MAP));
+ assertEquals(hmm.getProperty(HMMFile.DATE), "Fri Jun 20 08:22:31 2014");
+ assertNull(hmm.getProperty(HMMFile.COMMAND_LOG));
+ assertEquals(hmm.getProperty(HMMFile.NUMBER_OF_SEQUENCES), "106");
+ assertEquals(hmm.getProperty(HMMFile.EFF_NUMBER_OF_SEQUENCES),
+ "11.415833");
+ assertEquals(hmm.getProperty(HMMFile.CHECK_SUM), "3564431818");
+ assertEquals(hmm.getProperty(HMMFile.GATHERING_THRESHOLD), "8.00 7.20");
+ assertEquals(hmm.getProperty(HMMFile.TRUSTED_CUTOFF), "8.00 7.20");
+ assertEquals(hmm.getProperty(HMMFile.NOISE_CUTOFF), "7.90 7.90");
+ assertEquals(hmm.getViterbi(), "-9.7737 0.71847");
+ assertEquals(hmm.getMSV(), "-9.4043 0.71847");
+ assertEquals(hmm.getForward(), "-3.8341 0.71847");
+ }
+
+ @Test(groups = "Functional")
+ public void testParseHeaderLines_dna() throws IOException
+ {
+ FileReader fr = new FileReader(
new File("test/jalview/io/test_MADE1_hmm.txt"));
- BufferedReader br3 = new BufferedReader(fr3);
- made1.setHMM(new HiddenMarkovModel());
- made1.parseFileProperties(br3);
- testHMM = made1.getHMM();
- br3.close();
- fr3.close();
+ BufferedReader br = new BufferedReader(fr);
+ HiddenMarkovModel hmm = new HiddenMarkovModel();
+ HMMFile testee = new HMMFile();
+ PA.setValue(testee, "hmm", hmm);
+ testee.parseHeaderLines(br);
+ br.close();
+ fr.close();
- assertEquals(testHMM.getName(), "MADE1");
- assertEquals(testHMM.getAccessionNumber(), "DF0000629.2");
- assertEquals(testHMM.getDescription(),
+ assertEquals(hmm.getName(), "MADE1");
+ assertEquals(hmm.getProperty(HMMFile.ACCESSION_NUMBER),
+ "DF0000629.2");
+ assertEquals(hmm.getProperty(HMMFile.DESCRIPTION),
"MADE1 (MAriner Derived Element 1), a TcMar-Mariner DNA transposon");
- assertEquals(testHMM.getLength().intValue(), 80);
- assertEquals(testHMM.getMaxInstanceLength().intValue(), 426);
- assertEquals(testHMM.getAlphabetType(), "DNA");
- assertEquals(testHMM.referenceAnnotationIsActive(), true);
- assertEquals(testHMM.maskValueIsActive(), false);
- assertEquals(testHMM.consensusResidueIsActive(), true);
- assertEquals(testHMM.consensusStructureIsActive(), false);
- assertEquals(testHMM.mapIsActive(), true);
- assertEquals(testHMM.getDate(), "Tue Feb 19 20:33:41 2013");
- assertNull(testHMM.getCommandLineLog());
- assertEquals(testHMM.getNumberOfSequences().intValue(), 1997);
- assertEquals(testHMM.getEffectiveNumberOfSequences(), 3.911818, 4d);
- assertEquals(testHMM.getCheckSum().longValue(), 3015610723l);
- assertEquals(testHMM.getGatheringThreshold(), "2.324 4.234");
- assertEquals(testHMM.getTrustedCutoff(), "2.343 1.212");
- assertEquals(testHMM.getNoiseCutoff(), "2.354 5.456");
- assertEquals(testHMM.getViterbi(), "-9.3632 0.71858");
- assertEquals(testHMM.getMSV(), "-8.5786 0.71858");
- assertEquals(testHMM.getForward(), "-3.4823 0.71858");
-
-
+ assertEquals(hmm.getProperty(HMMFile.LENGTH), "80");
+ assertEquals(hmm.getProperty(HMMFile.MAX_LENGTH), "426");
+ assertEquals(hmm.getAlphabetType(), "DNA");
+ assertTrue(hmm.getBooleanProperty(HMMFile.REFERENCE_ANNOTATION));
+ assertFalse(hmm.getBooleanProperty(HMMFile.MASKED_VALUE));
+ assertTrue(hmm.getBooleanProperty(HMMFile.CONSENSUS_RESIDUE));
+ assertFalse(hmm.getBooleanProperty(HMMFile.CONSENSUS_STRUCTURE));
+ assertTrue(hmm.getBooleanProperty(HMMFile.MAP));
+ assertEquals(hmm.getProperty(HMMFile.DATE), "Tue Feb 19 20:33:41 2013");
+ assertNull(hmm.getProperty(HMMFile.COMMAND_LOG));
+ assertEquals(hmm.getProperty(HMMFile.NUMBER_OF_SEQUENCES), "1997");
+ assertEquals(hmm.getProperty(HMMFile.EFF_NUMBER_OF_SEQUENCES), "3.911818");
+ assertEquals(hmm.getProperty(HMMFile.CHECK_SUM), "3015610723");
+ assertEquals(hmm.getProperty(HMMFile.GATHERING_THRESHOLD),
+ "2.324 4.234");
+ assertEquals(hmm.getProperty(HMMFile.TRUSTED_CUTOFF), "2.343 1.212");
+ assertEquals(hmm.getProperty(HMMFile.NOISE_CUTOFF), "2.354 5.456");
+ assertEquals(hmm.getViterbi(), "-9.3632 0.71858");
+ assertEquals(hmm.getMSV(), "-8.5786 0.71858");
+ assertEquals(hmm.getForward(), "-3.4823 0.71858");
}
- @Test
+ @Test(groups = "Functional")
public void testFillList() throws IOException
{
Scanner scanner1 = new Scanner("1.3 2.4 5.3 3.9 9.8 4.7 4.3 2.3 6.9");
filledArray.add(0.10026);
filledArray.add(0.001);
- List<Double> testList = HMMFile.fillList(scanner1, 9);
+ double[] testList = HMMFile.parseDoubles(scanner1, 9);
for (int i = 0; i < 9; i++)
{
- assertEquals(testList.get(i), filledArray.get(i), 0.001d);
-
+ assertEquals(testList[i], filledArray.get(i), 0.001d);
}
filledArray.clear();
filledArray.add(0.00355);
filledArray.add(0.2466);
- testList = HMMFile.fillList(scanner2, 5);
+ testList = HMMFile.parseDoubles(scanner2, 5);
for (int i = 0; i < 5; i++)
{
- assertEquals(testList.get(i), filledArray.get(i), 0.001d);
+ assertEquals(testList[i], filledArray.get(i), 0.001d);
}
-
}
- @Test
+ @Test(groups = "Functional")
public void testParseModel() throws IOException
{
FileReader fr = new FileReader(
0.001d);
assertEquals(testHMM.getStateTransitionProbability(1111, 6),
Double.NEGATIVE_INFINITY);
-
}
- @Test
+ /**
+ * Test that if no mapping of nodes to aligned columns is provided by the HMM
+ * file, we construct one
+ *
+ * @throws IOException
+ */
+ @Test(groups = "Functional")
+ public void testParseModel_noMap() throws IOException
+ {
+ fail("test to be written");
+ }
+
+ @Test(groups = "Functional")
public void testParseAnnotations()
{
HMMFile testFile = new HMMFile();
HiddenMarkovModel hmm = new HiddenMarkovModel();
- testFile.setHMM(hmm);
- hmm.getNodes().add(new HMMNode());
-
- hmm.setConsensusResidueStatus(true);
- hmm.setMAPStatus(true);
- hmm.setReferenceAnnotationStatus(true);
- hmm.setConsensusStructureStatus(true);
- hmm.setMaskedValueStatus(true);
+ PA.setValue(testFile, "hmm", hmm);
+ hmm.addNode(new HMMNode());
+
+ hmm.setProperty(HMMFile.CONSENSUS_RESIDUE, "yes");
+ hmm.setProperty(HMMFile.MAP, "yes");
+ hmm.setProperty(HMMFile.REFERENCE_ANNOTATION, "yes");
+ hmm.setProperty(HMMFile.CONSENSUS_STRUCTURE, "yes");
+ hmm.setProperty(HMMFile.MASKED_VALUE, "yes");
Scanner scanner = new Scanner("1345 t t t t");
HMMNode node = new HMMNode();
- hmm.getNodes().add(node);
+ hmm.addNode(node);
testFile.parseAnnotations(scanner, node);
- hmm.setConsensusResidueStatus(true);
- hmm.setMAPStatus(false);
- hmm.setReferenceAnnotationStatus(true);
- hmm.setConsensusStructureStatus(false);
- hmm.setMaskedValueStatus(false);
+ hmm.setProperty(HMMFile.CONSENSUS_RESIDUE, "yes");
+ hmm.setProperty(HMMFile.MAP, "no");
+ hmm.setProperty(HMMFile.REFERENCE_ANNOTATION, "yes");
+ hmm.setProperty(HMMFile.CONSENSUS_STRUCTURE, "no");
+ hmm.setProperty(HMMFile.MASKED_VALUE, "no");
Scanner scanner2 = new Scanner("- y x - -");
node = new HMMNode();
- hmm.getNodes().add(node);
+ hmm.addNode(node);
testFile.parseAnnotations(scanner2, node);
assertEquals(hmm.getNodeAlignmentColumn(1).intValue(), 1344);
*
* @throws IOException
*/
-
-
- @Test(priority = 3)
+ @Test(groups = "Functional")
public void testPrint() throws IOException
{
PrintWriter writer = new PrintWriter(
for (int i = 0; i < pKinaseHMM.getLength(); i++)
{
- List<Double> list1;
- List<Double> list2;
- boolean result;
+ double[] list1;
+ double[] list2;
list1 = pKinaseHMM.getNode(i).getMatchEmissions();
list2 = pKinaseCloneHMM.getNode(i).getMatchEmissions();
- result = checkIfListsAreIdentical(list1, list2);
- assertEquals(result, true);
+ assertEquals(list1, list2);
list1 = pKinaseHMM.getNode(i).getInsertEmissions();
list2 = pKinaseCloneHMM.getNode(i).getInsertEmissions();
- result = checkIfListsAreIdentical(list1, list2);
- assertEquals(result, true);
+ assertEquals(list1, list2);
list1 = pKinaseHMM.getNode(i).getStateTransitions();
list2 = pKinaseCloneHMM.getNode(i).getStateTransitions();
- result = checkIfListsAreIdentical(list1, list2);
- assertEquals(result, true);
+ assertEquals(list1, list2);
if (i > 0)
{
assertEquals(annotation1, annotation2);
}
-
}
-
}
- @Test(priority = 1)
- public void testGetFilePropertiesAsString() throws FileNotFoundException
+ @Test(groups = "Functional")
+ public void testAppendProperties() throws FileNotFoundException
{
- String string = fn3.getFilePropertiesAsString();
+ StringBuilder sb = new StringBuilder();
+ fn3.appendProperties(sb);
- Scanner testScanner = new Scanner(string);
+ Scanner testScanner = new Scanner(sb.toString());
String[] expected = new String[] { "HMMER3/f [3.1b1 | May 2013]",
"NAME fn3", "ACC PF00041.13",
testScanner.close();
}
- @Test(priority = 2)
- public void testGetModelAsString() throws FileNotFoundException
+ @Test(groups = "Functional")
+ public void testAppendModelAsString() throws FileNotFoundException
{
- String string = fn3.getModelAsString();
+ StringBuilder sb = new StringBuilder();
+ fn3.appendModelAsString(sb);
+ String string = sb.toString();
assertEquals(findValue(2, 2, 2, string), "4.42225");
assertEquals(findValue(12, 14, 1, string), "2.79307");
assertEquals(findValue(16, 65, 1, string), "2.81003");
assertEquals(findValue(14, 3, 1, string), "2.69012");
assertEquals(findValue(11, 32, 1, string), "4.34805");
-
}
/**
+ * A helper method to find a token in the model string
*
* @param symbolIndex
* index of symbol being searched. First symbol has index 1.
* string model being searched
* @return value at specified position
*/
-
- public String findValue(int symbolIndex, int nodeIndex, int line,
+ private String findValue(int symbolIndex, int nodeIndex, int line,
String model)
{
-
String value = "";
- String current;
Scanner scanner = new Scanner(model);
- current = scanner.nextLine();
- current = scanner.nextLine();
+ scanner.nextLine();
+ scanner.nextLine();
for (int lineIndex = 0; lineIndex < line - 1; lineIndex++)
{
- current = scanner.nextLine();
+ scanner.nextLine();
}
for (int node = 0; node < nodeIndex; node++)
{
- current = scanner.nextLine();
- current = scanner.nextLine();
- current = scanner.nextLine();
+ scanner.nextLine();
+ scanner.nextLine();
+ scanner.nextLine();
}
for (int symbol = 0; symbol < symbolIndex; symbol++)
value = scanner.next();
if ("COMPO".equals(value))
{
- current = scanner.next();
+ scanner.next();
}
else if (value.length() < 7)
{
- current = scanner.next();
+ scanner.next();
}
-
}
scanner.close();
return value;
-
}
-
- public boolean checkIfListsAreIdentical(List<Double> list1,
- List<Double> list2)
- {
- boolean isDifferent = false;
- for (int i = 0; i < list1.size(); i++)
- {
- Double entry1;
- Double entry2;
- entry1 = list1.get(i);
- entry2 = list2.get(i);
- if (!(entry1 == entry2))
- {
- isDifferent = true;
- }
- }
- return isDifferent;
- }
-
}
{
analyser.sequences = new Vector<>();
analyser.hmm = new HiddenMarkovModel();
- analyser.hmm.addFileProperty("LENG", "8");
+ analyser.hmm.setProperty("LENG", "8");
List<HMMNode> nodes = new ArrayList<>();
nodes.add(new HMMNode());