public class HMMFile extends AlignFile
implements AlignmentFileReaderI, AlignmentFileWriterI
{
+ private static final String TERMINATOR = "//";
+
/*
* keys to data in HMM file, used to store as properties of the HiddenMarkovModel
*/
- private static final String HMM = "HMM";
+ public static final String HMM = "HMM";
public static final String NAME = "NAME";
public static final String ALPHABET = "ALPH";
- private static final String ALPH_AMINO = "amino";
-
- private static final String ALPH_DNA = "DNA";
-
- private static final String ALPH_RNA = "RNA";
-
- private static final String ALPHABET_AMINO = "ACDEFGHIKLMNPQRSTVWY";
-
- private static final String ALPHABET_DNA = "ACGT";
-
- private static final String ALPHABET_RNA = "ACGU";
-
public static final String DATE = "DATE";
public static final String COMMAND_LOG = "COM";
public static final String MASKED_VALUE = "MM";
+ private static final String ALPH_AMINO = "amino";
+
+ private static final String ALPH_DNA = "DNA";
+
+ private static final String ALPH_RNA = "RNA";
+
+ private static final String ALPHABET_AMINO = "ACDEFGHIKLMNPQRSTVWY";
+
+ private static final String ALPHABET_DNA = "ACGT";
+
+ private static final String ALPHABET_RNA = "ACGU";
+
private static final int NUMBER_OF_TRANSITIONS = 7;
private static final String SPACE = " ";
* Constructor for HMMFile used for exporting
*
* @param hmm
- * @param exportImmediately
*/
public HMMFile(HiddenMarkovModel markov)
{
}
/**
- * Parses the model data from the HMMER3 file
+ * Parses the model data from the HMMER3 file. The input buffer should be
+ * positioned at the (optional) COMPO line if there is one, else at the insert
+ * emissions line for the BEGIN node of the model.
*
* @param input
* @throws IOException
*/
void parseModel(BufferedReader input) throws IOException
{
- boolean first = true;
- // specification says there must always be an HMM header
- // and one more header which is skipped here
+ /*
+ * specification says there must always be an HMM header (already read)
+ * and one more header (guide headings) which is skipped here
+ */
+ int nodeNo = 0;
String line = input.readLine();
- while (!"//".equals(line))
+ List<HMMNode> nodes = new ArrayList<>();
+
+ while (line != null && !TERMINATOR.equals(line))
{
HMMNode node = new HMMNode();
- hmm.addNode(node);
- Scanner matchReader = new Scanner(line);
- String next = matchReader.next();
- if (next.equals(COMPO) || !first)
+ nodes.add(node);
+ Scanner scanner = new Scanner(line);
+ String next = scanner.next();
+
+ /*
+ * expect COMPO (optional) for average match emissions
+ * or a node number followed by node's match emissions
+ */
+ if (COMPO.equals(next) || nodeNo > 0)
{
- // stores match emission line in list
- double[] matches = parseDoubles(matchReader, numberOfSymbols);
+ /*
+ * parse match emissions
+ */
+ double[] matches = parseDoubles(scanner, numberOfSymbols);
node.setMatchEmissions(matches);
- if (!first)
+ if (!COMPO.equals(next))
{
- // TODO handle files with no column map (make our own)
- int column = parseAnnotations(matchReader, node);
- hmm.setAlignmentColumn(node, column - 1);
+ int resNo = parseAnnotations(scanner, node);
+ if (resNo == 0)
+ {
+ /*
+ * no MAP annotation provided, just number off from 0 (begin node)
+ */
+ resNo = nodeNo;
+ }
+ node.setResidueNumber(resNo);
}
+ line = input.readLine();
}
- matchReader.close();
- // stores insert emission line in list
- line = input.readLine();
- Scanner insertReader = new Scanner(line);
- double[] inserts = parseDoubles(insertReader, numberOfSymbols);
+ scanner.close();
+
+ /*
+ * parse insert emissions
+ */
+ scanner = new Scanner(line);
+ double[] inserts = parseDoubles(scanner, numberOfSymbols);
node.setInsertEmissions(inserts);
- insertReader.close();
+ scanner.close();
- // stores state transition line in list
+ /*
+ * parse state transitions
+ */
line = input.readLine();
- Scanner transitionReader = new Scanner(line);
- double[] transitions = parseDoubles(transitionReader,
+ scanner = new Scanner(line);
+ double[] transitions = parseDoubles(scanner,
NUMBER_OF_TRANSITIONS);
node.setStateTransitions(transitions);
- transitionReader.close();
+ scanner.close();
line = input.readLine();
- first = false;
+ nodeNo++;
}
+
+ hmm.setNodes(nodes);
}
/**
* Parses the annotations on the match emission line and add them to the node.
* (See p109 of the HMMER User Guide (V3.1b2) for the specification.) Returns
- * the alignment column number (base 1) that the node maps to, if provided,
- * else zero.
+ * the residue position that the node maps to, if provided, else zero.
*
* @param scanner
* @param node
*/
int parseAnnotations(Scanner scanner, HMMNode node)
{
+ int mapTo = 0;
+
/*
- * map from hmm node to alignment column index, if provided
- * HMM counts columns from 1, convert to base 0 for Jalview
+ * map from hmm node to sequence position, if provided
*/
- int column = 0;
- if (hmm.getBooleanProperty(MAP) && scanner.hasNext())
- {
- column = scanner.nextInt();
- node.setAlignmentColumn(column - 1);
- }
- else
+ if (scanner.hasNext())
{
- scanner.next();
+ String value = scanner.next();
+ if (!"-".equals(value))
+ {
+ try
+ {
+ mapTo = Integer.parseInt(value);
+ node.setResidueNumber(mapTo);
+ } catch (NumberFormatException e)
+ {
+ // ignore
+ }
+ }
}
/*
- * hmm consensus residue if provided, else -
+ * hmm consensus residue if provided, else '-'
*/
if (scanner.hasNext())
{
- char consensusR;
- consensusR = charValue(scanner.next());
- node.setConsensusResidue(consensusR);
+ node.setConsensusResidue(scanner.next().charAt(0));
}
/*
- * RF reference annotation, if provided, else -
+ * RF reference annotation, if provided, else '-'
*/
if (scanner.hasNext())
{
- char reference;
- reference = charValue(scanner.next());
- node.setReferenceAnnotation(reference);
+ node.setReferenceAnnotation(scanner.next().charAt(0));
}
/*
- * 'm' for masked position, if provided, else -
+ * 'm' for masked position, if provided, else '-'
*/
if (scanner.hasNext())
{
- char value;
- value = charValue(scanner.next());
- node.setMaskValue(value);
+ node.setMaskValue(scanner.next().charAt(0));
}
/*
- * structure consensus symbol, if provided, else -
+ * structure consensus symbol, if provided, else '-'
*/
if (scanner.hasNext())
{
- char consensusS;
- consensusS = charValue(scanner.next());
- node.setConsensusStructure(consensusS);
+ node.setConsensusStructure(scanner.next().charAt(0));
}
- return column;
+ return mapTo;
}
/**
for (int nodeNo = 0; nodeNo <= length; nodeNo++)
{
String matchLine = String.format("%7s",
- nodeNo == 0 ? "COMPO" : Integer.toString(nodeNo));
+ nodeNo == 0 ? COMPO : Integer.toString(nodeNo));
double[] doubleMatches = convertToLogSpace(
hmm.getNode(nodeNo).getMatchEmissions());
if (nodeNo != 0)
{
- matchLine += SPACE + (hmm.getNodeAlignmentColumn(nodeNo) + 1);
+ matchLine += SPACE + (hmm.getNodeMapPosition(nodeNo));
matchLine += SPACE + hmm.getConsensusResidue(nodeNo);
matchLine += SPACE + hmm.getReferenceAnnotation(nodeNo);
if (hmm.getFileHeader().contains("HMMER3/f"))
if (hmm.getMSV() != null)
{
- output.append(String.format("%n%-19s %18s", "STATS LOCAL MSV",
- hmm.getMSV()));
+ format = "%n%-19s %18s";
+ output.append(String.format(format, "STATS LOCAL MSV", hmm.getMSV()));
- output.append(String.format("%n%-19s %18s", "STATS LOCAL VITERBI",
+ output.append(String.format(format, "STATS LOCAL VITERBI",
hmm.getViterbi()));
- output.append(String.format("%n%-19s %18s", "STATS LOCAL FORWARD",
+ output.append(String.format(format, "STATS LOCAL FORWARD",
hmm.getForward()));
}
}
}
}
- /**
- * Returns the char value of a single lettered String.
- *
- * @param string
- * @return
- */
- char charValue(String string)
- {
- char character;
- character = string.charAt(0);
- return character;
- }
-
@Override
- public String print(SequenceI[] seqs, boolean jvsuffix)
+ public String print(SequenceI[] sequences, boolean jvsuffix)
{
- if (seqs[0].getHMM() != null)
+ if (sequences[0].getHMM() != null)
{
- hmm = seqs[0].getHMM();
+ hmm = sequences[0].getHMM();
}
return print();
}
appendProperties(output);
output.append(NL);
appendModelAsString(output);
- output.append(NL + "//");
+ output.append(NL).append(TERMINATOR).append(NL);
return output.toString();
}
@Override
public SequenceI[] getSeqsAsArray()
{
- SequenceI hmmSeq = hmm.initHMMSequence();
+ SequenceI hmmSeq = hmm.getConsensusSequence();
SequenceI[] seq = new SequenceI[1];
seq[0] = hmmSeq;
return seq;