import java.util.Scanner;
/**
- * Data structure which stores a hidden Markov model. Currently contains file properties as well, not sure whether these should be transferred to the HMMFile class
+ * Data structure which stores a hidden Markov model. Currently contains file
+ * properties as well, not sure whether these should be transferred to the
+ * HMMFile class
*
* @author TZVanaalten
*
// of theHMM, use getModelLength()to return an int value
Map<String, String> fileProperties = new HashMap<>();
- //contains all of the symbols used in this model. The index of each symbol represents its lookup value
+ // contains all of the symbols used in this model. The index of each symbol
+ // represents its lookup value
List<Character> symbols = new ArrayList<>();
// contains information for each node in the model. The begin node is at index
// contains the HMM node for each alignment column
Map<Integer, Integer> nodeLookup = new HashMap<>();
- //contains the symbol index for each symbol
+ // contains the symbol index for each symbol
Map<Character, Integer> symbolIndexLookup = new HashMap<>();
- Map<Character, Double> backgroundFrequencies = new HashMap();
-
- ProfilesI profiles;
-
-
final static String YES = "yes";
final static String NO = "no";
int numberOfSymbols;
- //keys for file properties hashmap
+ // keys for file properties hashmap
private final String NAME = "NAME";
private final String ACCESSION_NUMBER = "ACC";
public static final int DELETETODELETE = 6;
+ /**
+ * Returns the map containing the matches between nodes and alignment column
+ * indexes.
+ *
+ * @return
+ *
+ */
public Map<Integer, Integer> getNodeLookup()
{
return nodeLookup;
}
- public void setNodeLookup(Map<Integer, Integer> nodeLookup)
- {
- this.nodeLookup = nodeLookup;
- }
-
+ /**
+ * Returns the list of symbols used in this hidden Markov model.
+ *
+ * @return
+ */
public List<Character> getSymbols()
{
return symbols;
}
-
+
+ /**
+ * Returns the file properties.
+ *
+ * @return
+ */
public Map<String, String> getFileProperties()
{
return fileProperties;
}
+ /**
+ * Gets the node in the hidden Markov model at the specified position.
+ *
+ * @param nodeIndex
+ * The index of the node requested. Node 0 optionally contains the
+ * average match emission probabilities across the entire model, and
+ * always contains the insert emission probabilities and state
+ * transition probabilities for the begin node. Node 1 contains the
+ * first node in the HMM that can correspond to a column in the
+ * alignment.
+ * @return
+ */
public HMMNode getNode(int nodeIndex)
{
return getNodes().get(nodeIndex);
}
+ /**
+ * Sets the list of symbols used in the hidden Markov model to the list
+ * specified.
+ *
+ * @param symbolsL
+ * The list of symbols to which the current list is to be changed.
+ *
+ */
public void setSymbols(List<Character> symbolsL)
{
this.symbols = symbolsL;
}
+ /**
+ * Returns the name of the sequence alignment on which the HMM is based.
+ *
+ * @return
+ */
public String getName()
{
return fileProperties.get(NAME);
}
+
+ /**
+ * Returns the accession number.
+ * @return
+ */
public String getAccessionNumber()
{
return fileProperties.get(ACCESSION_NUMBER);
}
- public void setAccessionNumber(String value)
- {
- fileProperties.put(ACCESSION_NUMBER, value);
- }
-
+ /**
+ * Returns a description of the sequence alignment on which the hidden Markov
+ * model is based.
+ *
+ * @return
+ */
public String getDescription()
{
return fileProperties.get(DESCRIPTION);
}
- public void setDescription(String value)
- {
- fileProperties.put(DESCRIPTION, value);
- }
-
+ /**
+ * Returns the length of the hidden Markov model.
+ *
+ * @return
+ */
public Integer getLength()
{
if (fileProperties.get(LENGTH) == null)
return Integer.parseInt(fileProperties.get(LENGTH));
}
- public void setLength(int value)
- {
- fileProperties.put(LENGTH, String.valueOf(value));
- }
-
+ /**
+ * Returns the max instance length within the hidden Markov model.
+ *
+ * @return
+ */
public Integer getMaxInstanceLength()
{
if (fileProperties.get(MAX_LENGTH) == null)
return Integer.parseInt(fileProperties.get(MAX_LENGTH));
}
- public void setMaxInstanceLength(int value)
- {
- fileProperties.put(MAX_LENGTH, String.valueOf(value));
- }
-
- // gets type of symbol alphabet - "amino", "DNA", "RNA"
+ /**
+ * Returns the type of symbol alphabet - "amino", "DNA", "RNA" are the
+ * options. Other alphabets may be added.
+ *
+ * @return
+ */
public String getAlphabetType()
{
return fileProperties.get(ALPHABET);
}
- public void setAlphabetType(String value)
- {
- fileProperties.put(ALPHABET, value);
- }
-
- // not sure whether to implement this with Date object
+ /**
+ * Returns the date as a String.
+ *
+ * @return
+ */
public String getDate()
{
return fileProperties.get(DATE);
}
- public void setDate(String value)
- {
- fileProperties.put(DATE, value);
- }
-
- // not sure whether to implement this
+ /**
+ * Returns the command line log.
+ *
+ * @return
+ */
public String getCommandLineLog()
{
return fileProperties.get(COMMAND_LOG);
}
- public void setCommandLineLog(String value)
- {
- fileProperties.put(COMMAND_LOG, value);
- }
-
- // gets the number of sequences that the HMM was trained on
+ /**
+ * Returns the number of sequences on which the HMM was trained.
+ *
+ * @return
+ */
public Integer getNumberOfSequences()
{
if (fileProperties.get(NUMBER_OF_SEQUENCES) == null)
return Integer.parseInt(fileProperties.get(NUMBER_OF_SEQUENCES));
}
- public void setNumberOfSequences(int value)
- {
- fileProperties.put(NUMBER_OF_SEQUENCES, String.valueOf(value));
- }
-
- // gets the effective number determined during sequence weighting
+ /**
+ * Returns the effective number of sequences on which the HMM was based.
+ *
+ * @param value
+ */
public Double getEffectiveNumberOfSequences()
{
if (fileProperties.get(LENGTH) == null)
return Double.parseDouble(fileProperties.get(EFF_NUMBER_OF_SEQUENCES));
}
- public void setEffectiveNumberOfSequences(double value)
- {
- fileProperties.put(EFF_NUMBER_OF_SEQUENCES, String.valueOf(value));
- }
-
+ /**
+ * Returns the checksum.
+ *
+ * @return
+ */
public Long getCheckSum()
{
if (fileProperties.get(LENGTH) == null)
return Long.parseLong(fileProperties.get(CHECK_SUM));
}
- public void setCheckSum(long value)
- {
- fileProperties.put(CHECK_SUM, String.valueOf(value));
- }
-
+ /**
+ * Returns the list of nodes in this HMM.
+ *
+ * @return
+ */
public List<HMMNode> getNodes()
{
return nodes;
}
+ /**
+ * Sets the list of nodes in this HMM to the given list.
+ *
+ * @param nodes
+ * The list of nodes to which the current list of nodes is being
+ * changed.
+ */
public void setNodes(List<HMMNode> nodes)
{
this.nodes = nodes;
}
/**
- * get match emission probability for a given symbol at a column in the
- * alignment
+ * Gets the match emission probability for a given symbol at a column in the
+ * alignment.
*
* @param alignColumn
+ * The index of the alignment column, starting at index 0. Index 0
+ * usually corresponds to index 1 in the HMM.
* @param symbol
+ * The symbol for which the desired probability is being requested.
* @return
*
*/
}
/**
- * get insert emission probability for a given symbol at a column in the
- * alignment
+ * Gets the insert emission probability for a given symbol at a column in the
+ * alignment.
*
* @param alignColumn
+ * The index of the alignment column, starting at index 0. Index 0
+ * usually corresponds to index 1 in the HMM.
* @param symbol
+ * The symbol for which the desired probability is being requested.
* @return
+ *
*/
public Double getInsertEmissionProbability(int alignColumn, char symbol)
{
}
/**
- * get state transition probability for a given transition type at a column in
- * the alignment
+ * Gets the state transition probability for a given symbol at a column in the
+ * alignment.
*
* @param alignColumn
- * @param transition
+ * The index of the alignment column, starting at index 0. Index 0
+ * usually corresponds to index 1 in the HMM.
+ * @param symbol
+ * The symbol for which the desired probability is being requested.
* @return
+ *
*/
public Double getStateTransitionProbability(int alignColumn,
int transition)
}
+ /**
+ * Returns the alignment column linked to the node at the given index.
+ *
+ * @param nodeIndex
+ * The index of the node, starting from index 1. Index 0 is the begin
+ * node, which does not correspond to a column in the alignment.
+ * @return
+ */
public Integer getNodeAlignmentColumn(int nodeIndex)
{
Integer value = nodes.get(nodeIndex).getAlignmentColumn();
return value - 1;
}
+ /**
+ * Returns the consensus residue at the specified node.
+ *
+ * @param nodeIndex
+ * The index of the specified node.
+ * @return
+ */
public char getConsensusResidue(int nodeIndex)
{
char value = nodes.get(nodeIndex).getConsensusResidue();
return value;
}
+ /**
+ * Returns the consensus at a given alignment column.
+ *
+ * @param columnIndex
+ * The index of the column in the alignment for which the consensus
+ * is desired. The list of columns starts at index 0.
+ * @return
+ */
public char getConsensusAtAlignColumn(int columnIndex)
{
char value;
- Integer index = findNodeIndex(columnIndex + 1);
+ Integer index = findNodeIndex(columnIndex);
if (index == null)
{
return '-';
return value;
}
+ /**
+ * Returns the reference annotation at the specified node.
+ *
+ * @param nodeIndex
+ * The index of the specified node.
+ * @return
+ */
public char getReferenceAnnotation(int nodeIndex)
{
char value = nodes.get(nodeIndex).getReferenceAnnotation();
return value;
}
+ /**
+ * Returns the mask value at the specified node.
+ *
+ * @param nodeIndex
+ * The index of the specified node.
+ * @return
+ */
public char getMaskedValue(int nodeIndex)
{
char value = nodes.get(nodeIndex).getMaskValue();
return value;
}
+ /**
+ * Returns the consensus structure at the specified node.
+ *
+ * @param nodeIndex
+ * The index of the specified node.
+ * @return
+ */
public char getConsensusStructure(int nodeIndex)
{
char value = nodes.get(nodeIndex).getConsensusStructure();
}
/**
- * returns the average match emission for a given symbol
+ * Returns the average match emission probability for a given symbol
+ *
* @param symbolIndex
- * index of symbol
+ * The index of the symbol.
* @return
- * average negative log propbability of a match emission of the given symbol
+ *
*/
public double getAverageMatchEmission(int symbolIndex)
{
return value;
}
+ /**
+ * Returns the number of symbols in the alphabet used in this HMM.
+ *
+ * @return
+ */
public int getNumberOfSymbols()
{
return numberOfSymbols;
}
- public void setNumberOfSymbols(int numberOfSymbols)
- {
- this.numberOfSymbols = numberOfSymbols;
- }
-
-
-
/**
- * fills symbol array and also finds numberOfSymbols
+ * Fills symbol array and whilst doing so, updates the value of the number of
+ * symbols.
*
* @param parser
- * scanner scanning symbol line in file
+ * The scanner scanning the symbol line in the file.
*/
public void fillSymbols(Scanner parser)
{
}
/**
- * adds file property
+ * Adds a file property.
*
* @param key
* @param value
fileProperties.put(key, value);
}
+ /**
+ * Returns a boolean indicating whether the reference annotation is active.
+ *
+ * @return
+ */
public boolean referenceAnnotationIsActive()
{
String status;
}
+ /**
+ * Returns a boolean indicating whether the mask value annotation is active.
+ *
+ * @return
+ */
public boolean maskValueIsActive()
{
String status;
}
+ /**
+ * Returns a boolean indicating whether the consensus residue annotation is
+ * active.
+ *
+ * @return
+ */
public boolean consensusResidueIsActive()
{
String status;
}
+ /**
+ * Returns a boolean indicating whether the consensus structure annotation is
+ * active.
+ *
+ * @return
+ */
public boolean consensusStructureIsActive()
{
String status;
}
+ /**
+ * Returns a boolean indicating whether the MAP annotation is active.
+ *
+ * @return
+ */
public boolean mapIsActive()
{
String status;
}
+ /**
+ * Sets the alignment column of the specified node.
+ *
+ * @param nodeIndex
+ *
+ * @param column
+ *
+ */
public void setAlignmentColumn(int nodeIndex, int column)
{
nodes.get(nodeIndex).setAlignmentColumn(column);
}
+ /**
+ * Sets the reference annotation at a given node.
+ *
+ * @param nodeIndex
+ * @param value
+ */
public void setReferenceAnnotation(int nodeIndex, char value)
{
nodes.get(nodeIndex).setReferenceAnnotation(value);
}
+ /**
+ * Sets the consensus residue at a given node.
+ *
+ * @param nodeIndex
+ * @param value
+ */
public void setConsensusResidue(int nodeIndex, char value)
{
nodes.get(nodeIndex).setConsensusResidue(value);
}
+ /**
+ * Sets the consensus structure at a given node.
+ *
+ * @param nodeIndex
+ * @param value
+ */
public void setConsensusStructure(int nodeIndex, char value)
{
nodes.get(nodeIndex).setConsensusStructure(value);
}
+ /**
+ * Sets the mask value at a given node.
+ *
+ * @param nodeIndex
+ * @param value
+ */
public void setMaskValue(int nodeIndex, char value)
{
nodes.get(nodeIndex).setMaskValue(value);
}
+ /**
+ * Temporary implementation, should not be used.
+ *
+ * @return
+ */
public String getGatheringThreshold()
{
String value;
return value;
}
+ /**
+ * Temporary implementation, should not be used.
+ *
+ * @return
+ */
public String getNoiseCutoff()
{
String value;
return value;
}
+ /**
+ * Temporary implementation, should not be used.
+ *
+ * @return
+ */
public String getTrustedCutoff()
{
String value;
return value;
}
+ /**
+ * Temporary implementation, should not be used.
+ *
+ * @return
+ */
public String getViterbi()
{
String value;
return value;
}
+ /**
+ * Temporary implementation, should not be used.
+ *
+ * @return
+ */
public String getMSV()
{
String value;
return value;
}
+ /**
+ * Temporary implementation, should not be used.
+ *
+ * @return
+ */
public String getForward()
{
String value;
return value;
}
+ /**
+ * Sets the activation status of the MAP annotation.
+ *
+ * @param status
+ */
public void setMAPStatus(boolean status)
{
if (status == true)
}
}
+ /**
+ * Sets the activation status of the reference annotation.
+ *
+ * @param status
+ */
public void setReferenceAnnotationStatus(boolean status)
{
if (status == true)
}
}
+ /**
+ * Sets the activation status of the mask value annotation.
+ *
+ * @param status
+ */
public void setMaskedValueStatus(boolean status)
{
if (status == true)
}
}
+ /**
+ * Sets the activation status of the consensus residue annotation.
+ *
+ * @param status
+ */
public void setConsensusResidueStatus(boolean status)
{
if (status == true)
}
}
+ /**
+ * Sets the activation status of the consensus structure annotation.
+ *
+ * @param status
+ */
public void setConsensusStructureStatus(boolean status)
{
if (status == true)
}
/**
- * find the index of the node in a hidden Markov model based on the column in
+ * Finds the index of the node in a hidden Markov model based on the column in
* the alignment
*
* @param alignmentColumn
+ * The index of the column in the alignment, with the indexes
+ * starting from 0.
*/
public Integer findNodeIndex(int alignmentColumn)
{
Integer index;
- index = nodeLookup.get(alignmentColumn);
+ index = nodeLookup.get(alignmentColumn + 1);
return index;
}
+ /**
+ * Finds the String values of a boolean. "yes" for true and "no" for false.
+ *
+ * @param value
+ * @return
+ */
public static String findStringFromBoolean(boolean value)
{
if (value)
}
/**
- * @return
+ * Creates the HMM Logo alignment annotation, and populates it with
+ * information content data.
+ *
+ * @return The alignment annotation.
*/
public AlignmentAnnotation createAnnotation(int length)
{
return annotation;
}
+ /**
+ * Returns the information content at a specified column.
+ *
+ * @param column
+ * Index of the column, starting from 0.
+ * @return
+ */
public float getInformationContent(int column)
{
float informationContent = 0f;
/**
- * reads in and writes out a HMMER standard file
+ * Adds capability to read in and write out HMMER3 files. Currently only supports HMMER3/f.
*
*
* @author TZVanaalten
private final String NEW_LINE = "\n";
-
// file header
String fileHeader;
+ //number of symbols in the alphabet used in the hidden Markov model
int numberOfSymbols;
private final String SPACE = " ";
private final String EMPTY = "";
+ //This is a line that needs to be added to each HMMER£ file. It is purely for readability.
private static final String TRANSITIONTYPELINE = "m->m m->i m->d i->m i->i d->m d->d";
+ /**
+ * Constructor for HMMFile
+ * @param source
+ * @throws IOException
+ */
public HMMFile(FileParse source) throws IOException
{
super(false, source);
}
+ /**
+ * Default constructor, do not use!
+ */
public HMMFile()
{
}
+ /**
+ * Returns the HMM produced by reading in a HMMER3 file.
+ *
+ * @return
+ */
public HiddenMarkovModel getHMM()
{
return hmm;
}
+ /**
+ * Sets the HMM used in this file.
+ *
+ * @param model
+ */
public void setHMM(HiddenMarkovModel model)
{
this.hmm = model;
}
+ /**
+ * Gets the name of the hidden Markov model.
+ *
+ * @return
+ */
public String getName()
{
return hmm.getName();
}
/**
- * reads data from HMM file
+ * Reads the data from HMM file into the HMM field on this object.
*
* @throws IOException
*/
/**
- * imports file properties from hmm file
+ * Imports the file properties from a HMMER3 file.
*
* @param input
- * buffered reader used to read in file
+ * The buffered reader used to read in the file.
* @throws IOException
*/
void parseFileProperties(BufferedReader input) throws IOException
}
/**
- * parses the model data from the hmm file
+ * Parses the model data from the HMMER3 file
*
* @param input
- * buffered reader used to read file
+ * The buffered reader used to read the file.
* @throws IOException
*/
void parseModel(BufferedReader input) throws IOException
}
/**
- * parses annotations on match emission line
+ * Parses the annotations on the match emission line.
*
* @param scanner
- * scanner which is processing match emission line
+ * The scanner which is processing match emission line.
* @param index
- * index of node which is beign scanned
+ * The index of node which is being scanned.
*/
void parseAnnotations(Scanner scanner, int index)
{
/**
+ * Fills a list of doubles based on an input line.
*
* @param input
- * scanner for line containing data to be transferred to list
+ * The scanner for the line containing the data to be transferred to
+ * the list.
* @param numberOfElements
- * number of elements in the list to be filled
- * @return filled list
+ * The number of elements in the list to be filled.
+ * @return filled list Returns the list of doubles.
*/
static List<Double> fillList(Scanner input,
int numberOfElements)
if (next.contains("*")) // state transitions to or from delete states
// occasionally have values of -infinity. These
// values are represented by an * in the .hmm
- // file, and by a null value in the
- // HiddenMarkovModel class
+ // file.
{
list.add(Double.NEGATIVE_INFINITY);
}
/**
- * writes a HiddenMarkovModel to a file
+ * Writes a HMM to a file/
*
* @param exportLocation
- * Filename, URL or Pasted String to write to
+ * Filename, URL or Pasted String to write to.
* @throws FileNotFoundException
* @throws UnsupportedEncodingException
*
}
+ /**
+ * Returns a string to be added to the StringBuilder containing the entire
+ * output String.
+ *
+ * @param initialColumnSeparation
+ * The initial whitespace separation between the left side of the
+ * file and first character.
+ * @param columnSeparation
+ * The separation between subsequent data entries.
+ * @param data
+ * The list fo data to be added to the String.
+ * @return
+ */
String addData(int initialColumnSeparation,
int columnSeparation, List<String> data)
{
return line;
}
+ /**
+ * Converts list of characters into a list of Strings.
+ *
+ * @param list
+ * @return Returns the list of Strings.
+ */
List<String> charListToStringList(List<Character> list)
{
List<String> strList = new ArrayList<>();
return strList;
}
- List<String> doubleListToStringList(List<Double> list,
- int noOfDecimals)
+ /**
+ * Converts a list of doubles into a list of Strings, rounded to the nearest
+ * 5th decimal place.
+ *
+ * @param list
+ * @param noOfDecimals
+ * @return
+ */
+ List<String> doubleListToStringList(List<Double> list)
{
List<String> strList = new ArrayList<>();
for (double value : list)
return strList;
}
+ /**
+ * Converts a primitive array of Strings to a list of Strings.
+ *
+ * @param array
+ * @return
+ */
List<String> stringArrayToStringList(String[] array)
{
List<String> list = new ArrayList<>();
return list;
}
+ /**
+ * Appends the hidden Markov model data to the StringBuilder containing the
+ * output
+ *
+ * @param file
+ * The StringBuilder containing the output.
+ */
void appendModel(StringBuilder file)
{
String symbolLine = "HMM";
List<Double> doubleMatches;
doubleMatches = hmm.getNode(node).getMatchEmissions();
convertListToLogSpace(doubleMatches);
- strMatches = doubleListToStringList(doubleMatches, 5);
+ strMatches = doubleListToStringList(doubleMatches);
matchLine += addData(10, 9, strMatches);
List<Double> doubleInserts;
doubleInserts = hmm.getNode(node).getInsertEmissions();
convertListToLogSpace(doubleInserts);
- strInserts = doubleListToStringList(doubleInserts, 5);
+ strInserts = doubleListToStringList(doubleInserts);
insertLine += addData(17, 9, strInserts);
file.append(insertLine + NEW_LINE);
List<Double> doubleTransitions;
doubleTransitions = hmm.getNode(node).getStateTransitions();
convertListToLogSpace(doubleTransitions);
- strTransitions = doubleListToStringList(doubleTransitions, 5);
+ strTransitions = doubleListToStringList(doubleTransitions);
transitionLine += addData(17, 9, strTransitions);
file.append(transitionLine + NEW_LINE);
}
}
+ /**
+ * Appends the hidden Markov model file properties to the StringBuilder
+ * containing the output
+ *
+ * @param file
+ * The StringBuilder containing the output.
+ */
void appendFileProperties(StringBuilder file)
{
String line;
}
-
+ /**
+ * Returns the char value of a single lettered String.
+ *
+ * @param string
+ * @return
+ */
char charValue(String string)
{
char character;
character = string.charAt(0);
return character;
+
}
@Override
return null;
}
+ /**
+ * Converts the probabilities contained in a list into log space.
+ *
+ * @param list
+ */
void convertListToLogSpace(List<Double> list)
{