package jalview.io;
+import jalview.api.AlignExportSettingI;
+import jalview.api.AlignmentViewPanel;
import jalview.datamodel.HMMNode;
import jalview.datamodel.HiddenMarkovModel;
import jalview.datamodel.SequenceI;
import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileNotFoundException;
import java.io.IOException;
-import java.io.PrintWriter;
-import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
public class HMMFile extends AlignFile
implements AlignmentFileReaderI, AlignmentFileWriterI
{
- // HMM to store file data
- private HiddenMarkovModel hmm = new HiddenMarkovModel();
+ private static final int NUMBER_OF_TRANSITIONS = 7;
- // number of possible transitions
- private final int NUMBER_OF_TRANSITIONS = 7;
+ private static final String SPACE = " ";
- private final String NEW_LINE = "\n";
+ private static final String COMPO = "COMPO";
- //number of symbols in the alphabet used in the hidden Markov model
- int numberOfSymbols;
+ private static final String EMPTY = "";
- private final String SPACE = " ";
+ /*
+ * guide line added to an output HMMER file, purely for readability
+ */
+ private static final String TRANSITIONTYPELINE = " m->m m->i m->d i->m i->i d->m d->d";
- private final String COMPO = "COMPO";
+ private static String NL = "\n";
- private final String EMPTY = "";
+ private HiddenMarkovModel hmm;
- //This is a line that needs to be added to each HMMER� file. It is purely for readability.
- private static final String TRANSITIONTYPELINE = "m->m m->i m->d i->m i->i d->m d->d";
+ // number of symbols in the alphabet used in the hidden Markov model
+ int numberOfSymbols;
/**
- * Constructor for HMMFile, parses immediately
+ * Parses immediately.
+ *
+ * @param inFile
+ * @param type
+ * @throws IOException
+ */
+ public HMMFile(String inFile, DataSourceType type) throws IOException
+ {
+ super(inFile, type);
+ }
+
+ /**
+ * Parses immediately.
*
* @param source
* @throws IOException
*/
public HMMFile(FileParse source) throws IOException
{
- super(false, source);
- parse();
+ super(source);
}
/**
}
/**
+ * For testing, do not use.
+ *
+ * @param br
+ */
+ HMMFile(BufferedReader br)
+ {
+ dataIn = br;
+ }
+
+ /**
* Returns the HMM produced by reading in a HMMER3 file.
*
* @return
@Override
public void parse() throws IOException
{
- parseFileProperties(dataIn);
- parseModel(dataIn);
+ try
+ {
+ hmm = new HiddenMarkovModel();
+ parseFileProperties(dataIn);
+ parseModel(dataIn);
+ } catch (Exception e)
+ {
+ e.printStackTrace();
+ }
}
/**
public void parse(BufferedReader br) throws IOException
{
+ hmm = new HiddenMarkovModel();
parseFileProperties(br);
parseModel(br);
}
// properties)
{
readingFile = false;
- hmm.fillSymbols(parser);
+ fillSymbols(parser);
numberOfSymbols = hmm.getNumberOfSymbols();
}
else if ("STATS".equals(next))
*/
void parseModel(BufferedReader input) throws IOException
{
+ boolean first = true;
String line = input.readLine();
- int node = 0;
while (!"//".equals(line))
{
- hmm.getNodes().add(new HMMNode());
- String next;
+ HMMNode node = new HMMNode();
+ hmm.getNodes().add(node);
Scanner matchReader = new Scanner(line);
- next = matchReader.next();
- if (next.equals(COMPO) || node > 0)
+ String next = matchReader.next();
+ if (next.equals(COMPO) || !first)
{
// stores match emission line in list
List<Double> matches = new ArrayList<>();
matches = fillList(matchReader, numberOfSymbols);
- hmm.getNodes().get(node).setMatchEmissions(matches);
- if (node > 0)
+ node.setMatchEmissions(matches);
+ if (!first)
{
- parseAnnotations(matchReader, node);
+ int column = parseAnnotations(matchReader, node);
+ hmm.setAlignmentColumn(node, column - 1);
}
}
matchReader.close();
Scanner insertReader = new Scanner(line);
List<Double> inserts = new ArrayList<>();
inserts = fillList(insertReader, numberOfSymbols);
- hmm.getNodes().get(node).setInsertEmissions(inserts);
+ node.setInsertEmissions(inserts);
insertReader.close();
// stores state transition line in list
Scanner transitionReader = new Scanner(line);
List<Double> transitions = new ArrayList<>();
transitions = fillList(transitionReader, NUMBER_OF_TRANSITIONS);
- hmm.getNodes().get(node).setStateTransitions(transitions);
+ node.setStateTransitions(transitions);
transitionReader.close();
line = input.readLine();
- node++;
- }
+ first = false;
+ }
}
/**
- * Parses the annotations on the match emission line.
+ * Parses the annotations on the match emission line and add them to the node.
+ * (See p109 of the HMMER User Guide (V3.1b2) for the specification.) Returns
+ * the alignment column number (base 1) that the node maps to, if provided,
+ * else zero.
*
* @param scanner
- * The scanner which is processing match emission line.
- * @param index
- * The index of node which is being scanned.
+ * @param node
*/
- void parseAnnotations(Scanner scanner, int index)
+ int parseAnnotations(Scanner scanner, HMMNode node)
{
+ /*
+ * map from hmm node to alignment column index, if provided
+ * HMM counts columns from 1, convert to base 0 for Jalview
+ */
+ int column = 0;
if (hmm.mapIsActive() && scanner.hasNext())
{
- int column;
column = scanner.nextInt();
- hmm.getNodes().get(index).setAlignmentColumn(column - 1);
- hmm.getNodeLookup().put(column - 1, index);
+ node.setAlignmentColumn(column - 1);
}
else
{
scanner.next();
}
+ /*
+ * hmm consensus residue if provided, else -
+ */
if (scanner.hasNext())
{
char consensusR;
consensusR = charValue(scanner.next());
- hmm.getNodes().get(index).setConsensusResidue(consensusR);
+ node.setConsensusResidue(consensusR);
}
+ /*
+ * RF reference annotation, if provided, else -
+ */
if (scanner.hasNext())
{
char reference;
reference = charValue(scanner.next());
- hmm.getNodes().get(index).setReferenceAnnotation(reference);
+ node.setReferenceAnnotation(reference);
}
+ /*
+ * 'm' for masked position, if provided, else -
+ */
if (scanner.hasNext())
{
char value;
value = charValue(scanner.next());
- hmm.getNodes().get(index).setMaskValue(value);
+ node.setMaskValue(value);
}
+
+ /*
+ * structure consensus symbol, if provided, else -
+ */
if (scanner.hasNext())
{
char consensusS;
consensusS = charValue(scanner.next());
- hmm.getNodes().get(index).setConsensusStructure(consensusS);
+ node.setConsensusStructure(consensusS);
}
- }
-
+ return column;
+ }
/**
- * Fills a list of doubles based on an input line.
+ * Fills a list of doubles from an input line
*
* @param input
* The scanner for the line containing the data to be transferred to
return list;
}
-
- /**
- * Writes a HMM to a file/
- *
- * @param exportLocation
- * Filename, URL or Pasted String to write to.
- * @throws FileNotFoundException
- * @throws UnsupportedEncodingException
- *
- **/
-
- public void exportFile(String exportLocation) throws IOException
- {
- PrintWriter writer = new PrintWriter(exportLocation);
- appendFileProperties(writer);
- appendModel(writer);
- writer.println("//");
-
- writer.close();
-
- }
-
- /**
- * Writes a HMM to a file/
- *
- * @param exportLocation
- * Filename, URL or Pasted String to write to.
- * @throws FileNotFoundException
- * @throws UnsupportedEncodingException
- *
- **/
-
- public void exportFile(File exportLocation) throws IOException
- {
- PrintWriter writer = new PrintWriter(exportLocation);
- appendFileProperties(writer);
- appendModel(writer);
- writer.println("//");
-
- writer.close();
-
- }
-
/**
* Returns a string to be added to the StringBuilder containing the entire
* output String.
}
/**
- * Appends the hidden Markov model data to the StringBuilder containing the
- * output
- *
- * @param file
- * The StringBuilder containing the output.
+ * Returns a string containing the model data.
*/
- void appendModel(PrintWriter writer)
+ String getModelAsString()
{
+ StringBuilder output = new StringBuilder();
String symbolLine = "HMM";
List<Character> charSymbols = hmm.getSymbols();
List<String> strSymbols;
strSymbols = charListToStringList(charSymbols);
symbolLine += addData(11, 9, strSymbols);
- writer.println(symbolLine);
- writer.println(TRANSITIONTYPELINE);
+ output.append(symbolLine);
+ output.append(NL).append(TRANSITIONTYPELINE);
int length = hmm.getLength();
}
- writer.println(matchLine);
+ output.append(NL).append(matchLine);
String insertLine = EMPTY;
List<String> strInserts;
strInserts = doubleListToStringList(doubleInserts);
insertLine += addData(17, 9, strInserts);
- writer.println(insertLine);
+ output.append(NL).append(insertLine);
String transitionLine = EMPTY;
List<String> strTransitions;
strTransitions = doubleListToStringList(doubleTransitions);
transitionLine += addData(17, 9, strTransitions);
- writer.println(transitionLine);
+ output.append(NL).append(transitionLine);
}
+ return output.toString();
}
/**
- * Appends the hidden Markov model file properties to the StringBuilder
- * containing the output
- *
- * @param file
- * The StringBuilder containing the output.
+ * Returns a String containing the HMM file properties
*/
- void appendFileProperties(PrintWriter writer)
+ String getFilePropertiesAsString()
{
+ StringBuffer output = new StringBuffer();
String line;
- writer.println(hmm.getFileHeader());
+ output.append(hmm.getFileHeader());
line = String.format("%-5s %1s", "NAME", hmm.getName());
- writer.println((line));
+ output.append(NL + line);
if (hmm.getAccessionNumber() != null)
{
line = String.format("%-5s %1s", "ACC", hmm.getAccessionNumber());
- writer.println((line));
+ output.append(NL + line);
}
if (hmm.getDescription() != null)
{
line = String.format("%-5s %1s", "DESC", hmm.getDescription());
- writer.println((line));
+ output.append(NL + line);
}
line = String.format("%-5s %1s", "LENG", hmm.getLength());
- writer.println((line));
+ output.append(NL + line);
if (hmm.getMaxInstanceLength() != null)
{
line = String.format("%-5s %1s", "MAXL", hmm.getMaxInstanceLength());
- writer.println((line));
+ output.append(NL + line);
}
line = String.format("%-5s %1s", "ALPH", hmm.getAlphabetType());
- writer.println((line));
+ output.append(NL + line);
boolean status;
String statusStr;
statusStr = HiddenMarkovModel.findStringFromBoolean(status);
line = String.format("%-5s %1s", "RF",
statusStr);
- writer.println((line));
+ output.append(NL + line);
status = hmm.maskValueIsActive();
statusStr = HiddenMarkovModel.findStringFromBoolean(status);
line = String.format("%-5s %1s", "MM",
statusStr);
- writer.println((line));
+ output.append(NL + line);
status = hmm.consensusResidueIsActive();
statusStr = HiddenMarkovModel.findStringFromBoolean(status);
line = String.format("%-5s %1s", "CONS",
statusStr);
- writer.println((line));
+ output.append(NL + line);
status = hmm.consensusStructureIsActive();
statusStr = HiddenMarkovModel.findStringFromBoolean(status);
line = String.format("%-5s %1s", "CS",
statusStr);
- writer.println((line));
+ output.append(NL + line);
status = hmm.mapIsActive();
statusStr = HiddenMarkovModel.findStringFromBoolean(status);
line = String.format("%-5s %1s", "MAP",
statusStr);
- writer.println((line));
+ output.append(NL + line);
if (hmm.getDate() != null)
{
line = String.format("%-5s %1s", "DATE", hmm.getDate());
- writer.println((line));
+ output.append(NL + line);
}
if (hmm.getNumberOfSequences() != null)
{
line = String.format("%-5s %1s", "NSEQ", hmm.getNumberOfSequences());
- writer.println((line));
+ output.append(NL + line);
}
if (hmm.getEffectiveNumberOfSequences() != null)
{
line = String.format("%-5s %1s", "EFFN",
hmm.getEffectiveNumberOfSequences());
- writer.println((line));
+ output.append(NL + line);
}
if (hmm.getCheckSum() != null)
{
line = String.format("%-5s %1s", "CKSUM", hmm.getCheckSum());
- writer.println((line));
+ output.append(NL + line);
}
if (hmm.getGatheringThreshold() != null)
{
line = String.format("%-5s %1s", "GA", hmm.getGatheringThreshold());
- writer.println((line));
+ output.append(NL + line);
}
if (hmm.getTrustedCutoff() != null)
{
line = String.format("%-5s %1s", "TC", hmm.getTrustedCutoff());
- writer.println((line));
+ output.append(NL + line);
}
if (hmm.getNoiseCutoff() != null)
{
line = String.format("%-5s %1s", "NC", hmm.getNoiseCutoff());
- writer.println((line));
+ output.append(NL + line);
}
if (hmm.getMSV() != null)
{
line = String.format("%-19s %18s", "STATS LOCAL MSV", hmm.getMSV());
- writer.println((line));
+ output.append(NL + line);
line = String.format("%-19s %18s", "STATS LOCAL VITERBI",
hmm.getViterbi());
- writer.println((line));
+ output.append(NL + line);
line = String.format("%-19s %18s", "STATS LOCAL FORWARD",
hmm.getForward());
- writer.println((line));
+ output.append(NL + line);
}
+ return output.toString();
}
@Override
public String print(SequenceI[] seqs, boolean jvsuffix)
{
+ if (seqs[0].getHMM() != null)
+ {
+ hmm = seqs[0].getHMM();
+ }
+ return print();
+ }
- return null;
+ /**
+ * Prints the .hmm file to a String.
+ *
+ * @return
+ */
+ public String print()
+ {
+ StringBuffer output = new StringBuffer();
+ output.append(getFilePropertiesAsString());
+ output.append(NL);
+ output.append(getModelAsString());
+ output.append(NL + "//");
+ return output.toString();
}
/**
}
+ /**
+ * Returns the HMM sequence produced by reading a .hmm file.
+ */
@Override
public SequenceI[] getSeqsAsArray()
{
SequenceI hmmSeq = hmm.initHMMSequence();
SequenceI[] seq = new SequenceI[1];
seq[0] = hmmSeq;
- // view.initInformation();
- // view.updateInformation(view.panel);
- // getViewport().alignmentChanged(alignPanel);
return seq;
}
+ /**
+ * Fills symbol array and adds each symbol to an index lookup
+ *
+ * @param parser
+ * The scanner scanning the symbol line in the file.
+ */
+ public void fillSymbols(Scanner parser)
+ {
+ int i = 0;
+ while (parser.hasNext())
+ {
+ String strSymbol = parser.next();
+ char[] symbol = strSymbol.toCharArray();
+ hmm.getSymbols().add(symbol[0]);
+ hmm.setSymbolIndex(symbol[0], i);
+ i++;
+ }
+ }
+
+ @Override
+ public void setNewlineString(String newLine)
+ {
+ NL = newLine;
+ }
+
+ @Override
+ public void setExportSettings(AlignExportSettingI exportSettings)
+ {
+
+ }
+
+ @Override
+ public void configureForView(AlignmentViewPanel viewpanel)
+ {
+
+ }
+
+ @Override
+ public boolean hasWarningMessage()
+ {
+ return false;
+ }
+
+ @Override
+ public String getWarningMessage()
+ {
+ return "warning message";
+ }
+
}