package jalview.io;
+import jalview.api.AlignExportSettingI;
+import jalview.api.AlignmentViewPanel;
import jalview.datamodel.HMMNode;
import jalview.datamodel.HiddenMarkovModel;
+import jalview.datamodel.SequenceI;
import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.FileReader;
import java.io.IOException;
-import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
/**
- * reads in and writes out a HMMER standard file
+ * Adds capability to read in and write out HMMER3 files. .
*
*
* @author TZVanaalten
*
*/
-public class HMMFile extends FileParse
+public class HMMFile extends AlignFile
+ implements AlignmentFileReaderI, AlignmentFileWriterI
{
// HMM to store file data
- HiddenMarkovModel hmm = new HiddenMarkovModel();
+ private HiddenMarkovModel hmm;
+ // number of possible transitions
+ private static final int NUMBER_OF_TRANSITIONS = 7;
- // Source of file
- String dataObject;
+ private String NL = "\n";
- // number of possible transitions
- final static int NUMBER_OF_TRANSITIONS = 7;
+ //number of symbols in the alphabet used in the hidden Markov model
+ int numberOfSymbols;
- final static String NEW_LINE = "\n";
+ private final String SPACE = " ";
+ private final String COMPO = "COMPO";
- // file header
- String fileHeader;
+ private final String EMPTY = "";
- int numberOfSymbols;
+ //This is a line that needs to be added to each HMMER� file. It is purely for readability.
+ private static final String TRANSITIONTYPELINE = " m->m m->i m->d i->m i->i d->m d->d";
- final static String SPACE = " ";
+ /**
+ * Parses immediately.
+ *
+ * @param inFile
+ * @param type
+ * @throws IOException
+ */
+ public HMMFile(String inFile, DataSourceType type) throws IOException
+ {
+ super(inFile, type);
+ }
- final static String COMPO = "COMPO";
+ /**
+ * Parses immediately.
+ *
+ * @param source
+ * @throws IOException
+ */
+ public HMMFile(FileParse source) throws IOException
+ {
+ super(source);
+ }
- final static String EMPTY = "";
+ /**
+ * Default constructor, do not use!
+ */
+ public HMMFile()
+ {
+ }
/**
- * Constructor which contains model to be filled or exported
+ * Constructor for HMMFile used for exporting.
*
- * @param dataSource
- * Filename, URL or Pasted String to read from
+ * @param hmm
+ * @param exportImmediately
*/
- public HMMFile(String dataSource)
+ public HMMFile(HiddenMarkovModel markov)
{
- dataObject = dataSource;
+ hmm = markov;
}
- public HiddenMarkovModel getHmm()
+ /**
+ * For testing, do not use.
+ *
+ * @param br
+ */
+ HMMFile(BufferedReader br)
+ {
+ dataIn = br;
+ }
+
+ /**
+ * Returns the HMM produced by reading in a HMMER3 file.
+ *
+ * @return
+ */
+ public HiddenMarkovModel getHMM()
{
return hmm;
}
- public void setHmm(HiddenMarkovModel model)
+ /**
+ * Sets the HMM used in this file.
+ *
+ * @param model
+ */
+ public void setHMM(HiddenMarkovModel model)
{
this.hmm = model;
}
/**
- * reads data from HMM file
+ * Gets the name of the hidden Markov model.
*
- * @throws IOException
+ * @return
*/
- public void parse() throws IOException
+ public String getName()
{
- File file = new File(dataObject);
- FileReader fr = new FileReader(file);
- BufferedReader br = new BufferedReader(fr);
- parseFileProperties(br);
- parseModel(br);
-
+ return hmm.getName();
}
- public String getDataObject()
+ /**
+ * Reads the data from HMM file into the HMM field on this object.
+ *
+ * @throws IOException
+ */
+ @Override
+ public void parse() throws IOException
{
- return dataObject;
+ hmm = new HiddenMarkovModel();
+ parseFileProperties(dataIn);
+ parseModel(dataIn);
}
- public void setDataObject(String value)
+ /**
+ * Reads the data from HMM file into the HMM field on this object.
+ *
+ * @throws IOException
+ */
+
+ public void parse(BufferedReader br) throws IOException
{
- this.dataObject = value;
+ hmm = new HiddenMarkovModel();
+ parseFileProperties(br);
+ parseModel(br);
}
+
+
/**
- * imports file properties from hmm file
+ * Imports the file properties from a HMMER3 file.
*
* @param input
- * buffered reader used to read in file
+ * The buffered reader used to read in the file.
* @throws IOException
*/
- public void parseFileProperties(BufferedReader input) throws IOException
+ void parseFileProperties(BufferedReader input) throws IOException
{
boolean readingFile = true;
- fileHeader = input.readLine();
+ hmm.setFileHeader(input.readLine());
String line = input.readLine();
while (readingFile)
{
// properties)
{
readingFile = false;
- hmm.fillSymbols(parser);
+ fillSymbols(parser);
numberOfSymbols = hmm.getNumberOfSymbols();
}
else if ("STATS".equals(next))
}
/**
- * parses the model data from the hmm file
+ * Parses the model data from the HMMER3 file
*
* @param input
- * buffered reader used to read file
+ * The buffered reader used to read the file.
* @throws IOException
*/
- public void parseModel(BufferedReader input) throws IOException
+ void parseModel(BufferedReader input) throws IOException
{
- for (int i = 0; i < hmm.getLength() + 1; i++)
+ String line = input.readLine();
+ int node = 0;
+ while (!"//".equals(line))
{
hmm.getNodes().add(new HMMNode());
String next;
- String line;
- line = input.readLine();
Scanner matchReader = new Scanner(line);
next = matchReader.next();
- if (next.equals(COMPO) || i > 0)
+ if (next.equals(COMPO) || node > 0)
{
// stores match emission line in list
List<Double> matches = new ArrayList<>();
matches = fillList(matchReader, numberOfSymbols);
- hmm.getNodes().get(i).setMatchEmissions(matches);
- if (i > 0)
+ hmm.getNodes().get(node).setMatchEmissions(matches);
+ if (node > 0)
{
- parseAnnotations(matchReader, i);
+ parseAnnotations(matchReader, node);
}
}
matchReader.close();
Scanner insertReader = new Scanner(line);
List<Double> inserts = new ArrayList<>();
inserts = fillList(insertReader, numberOfSymbols);
- hmm.getNodes().get(i).setInsertEmissions(inserts);
+ hmm.getNodes().get(node).setInsertEmissions(inserts);
insertReader.close();
// stores state transition line in list
Scanner transitionReader = new Scanner(line);
List<Double> transitions = new ArrayList<>();
transitions = fillList(transitionReader, NUMBER_OF_TRANSITIONS);
- hmm.getNodes().get(i).setStateTransitions(transitions);
+ hmm.getNodes().get(node).setStateTransitions(transitions);
transitionReader.close();
+ line = input.readLine();
+ node++;
}
}
/**
- * parses annotations on match emission line
+ * Parses the annotations on the match emission line.
*
* @param scanner
- * scanner which is processing match emission line
+ * The scanner which is processing match emission line.
* @param index
- * index of node which is beign scanned
+ * The index of node which is being scanned.
*/
- public void parseAnnotations(Scanner scanner, int index)
+ void parseAnnotations(Scanner scanner, int index)
{
- if (hmm.mapIsActive())
+ if (hmm.mapIsActive() && scanner.hasNext())
{
int column;
column = scanner.nextInt();
- hmm.getNodes().get(index).setAlignmentColumn(column);
+ hmm.getNodes().get(index).setAlignmentColumn(column - 1);
+ hmm.getNodeLookup().put(column - 1, index);
}
else
{
scanner.next();
}
- char consensusR;
- consensusR = charValue(scanner.next());
- hmm.getNodes().get(index).setConsensusResidue(consensusR);
+ if (scanner.hasNext())
+ {
+ char consensusR;
+ consensusR = charValue(scanner.next());
+ hmm.getNodes().get(index).setConsensusResidue(consensusR);
+ }
+ if (scanner.hasNext())
+ {
char reference;
reference = charValue(scanner.next());
hmm.getNodes().get(index).setReferenceAnnotation(reference);
+ }
-
+ if (scanner.hasNext())
+ {
char value;
value = charValue(scanner.next());
hmm.getNodes().get(index).setMaskValue(value);
-
- char consensusS;
- consensusS = charValue(scanner.next());
- hmm.getNodes().get(index).setConsensusStructure(consensusS);
- }
-
- /**
- *
- * @param transition
- * type of transition occuring
- * @return index value representing position along stateTransition array.
- */
- public Integer getTransitionType(String transition)
- {
- Integer index;
- switch (transition)
+ }
+ if (scanner.hasNext())
{
- case "mm":
- index = 0;
- break;
- case "mi":
- index = 1;
- break;
- case "md":
- index = 2;
- break;
- case "im":
- index = 3;
- break;
- case "ii":
- index = 4;
- break;
- case "dm":
- index = 5;
- break;
- case "dd":
- index = 6;
- break;
- default:
- index = null;
+ char consensusS;
+ consensusS = charValue(scanner.next());
+ hmm.getNodes().get(index).setConsensusStructure(consensusS);
}
- return index;
}
+
+
/**
+ * Fills a list of doubles based on an input line.
*
* @param input
- * scanner for line containing data to be transferred to list
+ * The scanner for the line containing the data to be transferred to
+ * the list.
* @param numberOfElements
- * number of elements in the list to be filled
- * @return filled list
+ * The number of elements in the list to be filled.
+ * @return filled list Returns the list of doubles.
+ * @throws IOException
*/
- public static List<Double> fillList(Scanner input,
- int numberOfElements)
+ static List<Double> fillList(Scanner input,
+ int numberOfElements) throws IOException
{
List<Double> list = new ArrayList<>();
for (int i = 0; i < numberOfElements; i++)
if (next.contains("*")) // state transitions to or from delete states
// occasionally have values of -infinity. These
// values are represented by an * in the .hmm
- // file, and by a null value in the
- // HiddenMarkovModel class
+ // file.
{
list.add(Double.NEGATIVE_INFINITY);
}
else
{
- list.add(Double.valueOf(next));
+ double prob = Double.valueOf(next);
+ prob = Math.pow(Math.E, -prob);
+ list.add(prob);
}
}
+ if (list.size() < numberOfElements)
+ {
+ throw new IOException("Incomplete data");
+ }
return list;
}
-
/**
- * writes a HiddenMarkovModel to a file
+ * Returns a string to be added to the StringBuilder containing the entire
+ * output String.
*
- * @param exportLocation
- * Filename, URL or Pasted String to write to
- * @throws FileNotFoundException
- * @throws UnsupportedEncodingException
- *
- **/
-
- public void exportFile(String exportLocation) throws IOException
- {
- StringBuilder file = new StringBuilder();
- appendFileProperties(file);
- appendModel(file);
-
- file.append("//");
-
- }
-
- public String addData(int initialColumnSeparation,
+ * @param initialColumnSeparation
+ * The initial whitespace separation between the left side of the
+ * file and first character.
+ * @param columnSeparation
+ * The separation between subsequent data entries.
+ * @param data
+ * The list fo data to be added to the String.
+ * @return
+ */
+ String addData(int initialColumnSeparation,
int columnSeparation, List<String> data)
{
String line = EMPTY;
return line;
}
- public static List<String> charListToStringList(List<Character> list)
+ /**
+ * Converts list of characters into a list of Strings.
+ *
+ * @param list
+ * @return Returns the list of Strings.
+ */
+ List<String> charListToStringList(List<Character> list)
{
List<String> strList = new ArrayList<>();
for (char value : list)
return strList;
}
- public static List<String> doubleListToStringList(List<Double> list,
- int noOfDecimals)
+ /**
+ * Converts a list of doubles into a list of Strings, rounded to the nearest
+ * 5th decimal place.
+ *
+ * @param list
+ * @param noOfDecimals
+ * @return
+ */
+ List<String> doubleListToStringList(List<Double> list)
{
List<String> strList = new ArrayList<>();
for (double value : list)
{
String strValue;
- if (value == Double.NEGATIVE_INFINITY)
+ if (value > 0)
{
- strValue = "*";
+ strValue = String.format("%.5f", value);
+
+ }
+ else if (value == -0.00000d)
+ {
+ strValue = "0.00000";
}
else
{
- strValue = String.format("%.5f", value);
+ strValue = "*";
}
strList.add(strValue);
return strList;
}
- public static List<String> stringArrayToStringList(String[] array)
+ /**
+ * Converts a primitive array of Strings to a list of Strings.
+ *
+ * @param array
+ * @return
+ */
+ List<String> stringArrayToStringList(String[] array)
{
List<String> list = new ArrayList<>();
for (String value : array)
return list;
}
- void appendModel(StringBuilder file)
+ /**
+ * Returns a string containing the model data.
+ */
+ String getModelAsString()
{
+ StringBuffer output = new StringBuffer();
String symbolLine = "HMM";
List<Character> charSymbols = hmm.getSymbols();
List<String> strSymbols;
strSymbols = charListToStringList(charSymbols);
symbolLine += addData(11, 9, strSymbols);
- file.append(symbolLine + NEW_LINE);
-
- String transitionTypeLine = "";
- List<String> transitionTypes;
- transitionTypes = stringArrayToStringList(hmm.getTransitionTypes());
- transitionTypeLine += addData(16, 9, transitionTypes);
- file.append(transitionTypeLine + NEW_LINE);
+ output.append(symbolLine);
+ output.append(NL + TRANSITIONTYPELINE);
int length = hmm.getLength();
List<String> strMatches;
List<Double> doubleMatches;
- doubleMatches = hmm.getNode(node).getMatchEmissions();
- strMatches = doubleListToStringList(doubleMatches, 5);
+ doubleMatches = convertListToLogSpace(
+ hmm.getNode(node).getMatchEmissions());
+ strMatches = doubleListToStringList(doubleMatches);
matchLine += addData(10, 9, strMatches);
if (node != 0)
{
- matchLine += SPACE + hmm.getNodeAlignmentColumn(node);
+ matchLine += SPACE + (hmm.getNodeAlignmentColumn(node) + 1);
matchLine += SPACE + hmm.getConsensusResidue(node);
matchLine += SPACE + hmm.getReferenceAnnotation(node);
- matchLine += SPACE + hmm.getMaskedValue(node);
- matchLine += SPACE + hmm.getConsensusStructure(node);
+ if (hmm.getFileHeader().contains("HMMER3/f"))
+ {
+ matchLine += SPACE + hmm.getMaskedValue(node);
+ matchLine += SPACE + hmm.getConsensusStructure(node);
+ }
}
- file.append(matchLine + NEW_LINE);
+ output.append(NL + matchLine);
String insertLine = EMPTY;
List<String> strInserts;
List<Double> doubleInserts;
- doubleInserts = hmm.getNode(node).getInsertEmissions();
- strInserts = doubleListToStringList(doubleInserts, 5);
+ doubleInserts = convertListToLogSpace(
+ hmm.getNode(node).getInsertEmissions());
+ strInserts = doubleListToStringList(doubleInserts);
insertLine += addData(17, 9, strInserts);
- file.append(insertLine + NEW_LINE);
+ output.append(NL + insertLine);
String transitionLine = EMPTY;
List<String> strTransitions;
List<Double> doubleTransitions;
- doubleTransitions = hmm.getNode(node).getStateTransitions();
- strTransitions = doubleListToStringList(doubleTransitions, 5);
+ doubleTransitions = convertListToLogSpace(
+ hmm.getNode(node).getStateTransitions());
+ strTransitions = doubleListToStringList(doubleTransitions);
transitionLine += addData(17, 9, strTransitions);
- file.append(transitionLine + NEW_LINE);
+ output.append(NL + transitionLine);
}
+ return output.toString();
}
- void appendFileProperties(StringBuilder file)
+ /**
+ * Returns a String containing the HMM file properties
+ */
+ String getFilePropertiesAsString()
{
+ StringBuffer output = new StringBuffer();
String line;
- file.append(fileHeader + NEW_LINE);
+ output.append(hmm.getFileHeader());
line = String.format("%-5s %1s", "NAME", hmm.getName());
- file.append((line + NEW_LINE));
+ output.append(NL + line);
if (hmm.getAccessionNumber() != null)
{
line = String.format("%-5s %1s", "ACC", hmm.getAccessionNumber());
- file.append((line + NEW_LINE));
+ output.append(NL + line);
}
if (hmm.getDescription() != null)
{
line = String.format("%-5s %1s", "DESC", hmm.getDescription());
- file.append((line + NEW_LINE));
+ output.append(NL + line);
}
line = String.format("%-5s %1s", "LENG", hmm.getLength());
- file.append((line + NEW_LINE));
+ output.append(NL + line);
if (hmm.getMaxInstanceLength() != null)
{
line = String.format("%-5s %1s", "MAXL", hmm.getMaxInstanceLength());
- file.append((line + NEW_LINE));
+ output.append(NL + line);
}
line = String.format("%-5s %1s", "ALPH", hmm.getAlphabetType());
- file.append((line + NEW_LINE));
+ output.append(NL + line);
+
+ boolean status;
+ String statusStr;
+ status = hmm.referenceAnnotationIsActive();
+ statusStr = HiddenMarkovModel.findStringFromBoolean(status);
line = String.format("%-5s %1s", "RF",
- hmm.getFileProperties().get("RF"));
- file.append((line + NEW_LINE));
+ statusStr);
+ output.append(NL + line);
+ status = hmm.maskValueIsActive();
+ statusStr = HiddenMarkovModel.findStringFromBoolean(status);
line = String.format("%-5s %1s", "MM",
- hmm.getFileProperties().get("MM"));
- file.append((line + NEW_LINE));
+ statusStr);
+ output.append(NL + line);
+ status = hmm.consensusResidueIsActive();
+ statusStr = HiddenMarkovModel.findStringFromBoolean(status);
line = String.format("%-5s %1s", "CONS",
- hmm.getFileProperties().get("CONS"));
- file.append((line + NEW_LINE));
+ statusStr);
+ output.append(NL + line);
+ status = hmm.consensusStructureIsActive();
+ statusStr = HiddenMarkovModel.findStringFromBoolean(status);
line = String.format("%-5s %1s", "CS",
- hmm.getFileProperties().get("CS"));
- file.append((line + NEW_LINE));
+ statusStr);
+ output.append(NL + line);
+ status = hmm.mapIsActive();
+ statusStr = HiddenMarkovModel.findStringFromBoolean(status);
line = String.format("%-5s %1s", "MAP",
- hmm.getFileProperties().get("MAP"));
- file.append((line + NEW_LINE));
+ statusStr);
+ output.append(NL + line);
+
if (hmm.getDate() != null)
{
line = String.format("%-5s %1s", "DATE", hmm.getDate());
- file.append((line + NEW_LINE));
+ output.append(NL + line);
}
if (hmm.getNumberOfSequences() != null)
{
line = String.format("%-5s %1s", "NSEQ", hmm.getNumberOfSequences());
- file.append((line + NEW_LINE));
+ output.append(NL + line);
}
if (hmm.getEffectiveNumberOfSequences() != null)
{
line = String.format("%-5s %1s", "EFFN",
hmm.getEffectiveNumberOfSequences());
- file.append((line + NEW_LINE));
+ output.append(NL + line);
}
if (hmm.getCheckSum() != null)
{
line = String.format("%-5s %1s", "CKSUM", hmm.getCheckSum());
- file.append((line + NEW_LINE));
+ output.append(NL + line);
}
if (hmm.getGatheringThreshold() != null)
{
line = String.format("%-5s %1s", "GA", hmm.getGatheringThreshold());
- file.append((line + NEW_LINE));
+ output.append(NL + line);
}
if (hmm.getTrustedCutoff() != null)
{
line = String.format("%-5s %1s", "TC", hmm.getTrustedCutoff());
- file.append((line + NEW_LINE));
+ output.append(NL + line);
}
if (hmm.getNoiseCutoff() != null)
{
line = String.format("%-5s %1s", "NC", hmm.getNoiseCutoff());
- file.append((line + NEW_LINE));
+ output.append(NL + line);
}
if (hmm.getMSV() != null)
{
line = String.format("%-19s %18s", "STATS LOCAL MSV", hmm.getMSV());
- file.append((line + NEW_LINE));
+ output.append(NL + line);
line = String.format("%-19s %18s", "STATS LOCAL VITERBI",
hmm.getViterbi());
- file.append((line + NEW_LINE));
+ output.append(NL + line);
line = String.format("%-19s %18s", "STATS LOCAL FORWARD",
hmm.getForward());
- file.append((line + NEW_LINE));
+ output.append(NL + line);
}
+ return output.toString();
}
-
- public static char charValue(String string)
+ /**
+ * Returns the char value of a single lettered String.
+ *
+ * @param string
+ * @return
+ */
+ char charValue(String string)
{
char character;
character = string.charAt(0);
return character;
+
+ }
+
+ @Override
+ public String print(SequenceI[] seqs, boolean jvsuffix)
+ {
+ if (seqs[0].getHMM() != null)
+ {
+ hmm = seqs[0].getHMM();
+ }
+ return print();
+ }
+
+ /**
+ * Prints the .hmm file to a String.
+ *
+ * @return
+ */
+ public String print()
+ {
+ StringBuffer output = new StringBuffer();
+ output.append(getFilePropertiesAsString());
+ output.append(NL);
+ output.append(getModelAsString());
+ output.append(NL + "//");
+ return output.toString();
+ }
+
+ /**
+ * Converts the probabilities contained in a list into log space.
+ *
+ * @param list
+ */
+ List<Double> convertListToLogSpace(List<Double> list)
+ {
+
+ List<Double> convertedList = new ArrayList<>();
+ for (int i = 0; i < list.size(); i++)
+ {
+ double prob = list.get(i);
+ double logProb = -1 * Math.log(prob);
+
+ convertedList.add(logProb);
+ }
+ return convertedList;
+
+
}
+
+ /**
+ * Returns the HMM sequence produced by reading a .hmm file.
+ */
+ @Override
+ public SequenceI[] getSeqsAsArray()
+ {
+ SequenceI hmmSeq = hmm.initHMMSequence();
+ SequenceI[] seq = new SequenceI[1];
+ seq[0] = hmmSeq;
+ return seq;
+
+ }
+
+ /**
+ * Fills symbol array and adds each symbol to an index lookup
+ *
+ * @param parser
+ * The scanner scanning the symbol line in the file.
+ */
+ public void fillSymbols(Scanner parser)
+ {
+ int i = 0;
+ while (parser.hasNext())
+ {
+ String strSymbol = parser.next();
+ char[] symbol = strSymbol.toCharArray();
+ hmm.getSymbols().add(symbol[0]);
+ hmm.setSymbolIndex(symbol[0], i);
+ i++;
+ }
+ }
+
+ @Override
+ public void setNewlineString(String newLine)
+ {
+ NL = newLine;
+ }
+
+ @Override
+ public void setExportSettings(AlignExportSettingI exportSettings)
+ {
+
+ }
+
+ @Override
+ public void configureForView(AlignmentViewPanel viewpanel)
+ {
+
+ }
+
+ @Override
+ public boolean hasWarningMessage()
+ {
+ return false;
+ }
+
+ @Override
+ public String getWarningMessage()
+ {
+ return "warning message";
+ }
+
}