public class HMMFile extends AlignFile
implements AlignmentFileReaderI, AlignmentFileWriterI
{
+ private static final String TERMINATOR = "//";
+
/*
* keys to data in HMM file, used to store as properties of the HiddenMarkovModel
*/
- private static final String HMM = "HMM";
+ public static final String HMM = "HMM";
public static final String NAME = "NAME";
public static final String ALPHABET = "ALPH";
- private static final String ALPH_AMINO = "amino";
-
- private static final String ALPH_DNA = "DNA";
-
- private static final String ALPH_RNA = "RNA";
-
- private static final String ALPHABET_AMINO = "ACDEFGHIKLMNPQRSTVWY";
-
- private static final String ALPHABET_DNA = "ACGT";
-
- private static final String ALPHABET_RNA = "ACGU";
-
public static final String DATE = "DATE";
public static final String COMMAND_LOG = "COM";
public static final String MASKED_VALUE = "MM";
+ private static final String ALPH_AMINO = "amino";
+
+ private static final String ALPH_DNA = "DNA";
+
+ private static final String ALPH_RNA = "RNA";
+
+ private static final String ALPHABET_AMINO = "ACDEFGHIKLMNPQRSTVWY";
+
+ private static final String ALPHABET_DNA = "ACGT";
+
+ private static final String ALPHABET_RNA = "ACGU";
+
private static final int NUMBER_OF_TRANSITIONS = 7;
private static final String SPACE = " ";
}
/**
- * Parses the model data from the HMMER3 file
+ * Parses the model data from the HMMER3 file. The input buffer should be
+ * positioned at the (optional) COMPO line if there is one, else at the insert
+ * emissions line for the BEGIN node of the model.
*
* @param input
* @throws IOException
*/
void parseModel(BufferedReader input) throws IOException
{
- boolean first = true;
- // specification says there must always be an HMM header
- // and one more header which is skipped here
+ /*
+ * specification says there must always be an HMM header (already read)
+ * and one more header (guide headings) which is skipped here
+ */
+ int nodeNo = 0;
String line = input.readLine();
- while (!"//".equals(line))
+ while (line != null && !TERMINATOR.equals(line))
{
HMMNode node = new HMMNode();
hmm.addNode(node);
- Scanner matchReader = new Scanner(line);
- String next = matchReader.next();
- if (next.equals(COMPO) || !first)
+ Scanner scanner = new Scanner(line);
+ String next = scanner.next();
+
+ /*
+ * expect COMPO (optional) for average match emissions
+ * or a node number followed by node's match emissions
+ */
+ if (COMPO.equals(next) || nodeNo > 0)
{
- // stores match emission line in list
- double[] matches = parseDoubles(matchReader, numberOfSymbols);
+ /*
+ * parse match emissions
+ */
+ double[] matches = parseDoubles(scanner, numberOfSymbols);
node.setMatchEmissions(matches);
- if (!first)
+ if (!COMPO.equals(next))
{
- // TODO handle files with no column map (make our own)
- int column = parseAnnotations(matchReader, node);
- hmm.setAlignmentColumn(node, column - 1);
+ int column = parseAnnotations(scanner, node);
+ if (column == 0)
+ {
+ /*
+ * no MAP annotation provided, just number off from 0 (begin node)
+ */
+ column = nodeNo;
+ }
+ hmm.setAlignmentColumn(node, column - 1); // node 1 <==> column 0
}
+ line = input.readLine();
}
- matchReader.close();
- // stores insert emission line in list
- line = input.readLine();
- Scanner insertReader = new Scanner(line);
- double[] inserts = parseDoubles(insertReader, numberOfSymbols);
+ scanner.close();
+
+ /*
+ * parse insert emissions
+ */
+ scanner = new Scanner(line);
+ double[] inserts = parseDoubles(scanner, numberOfSymbols);
node.setInsertEmissions(inserts);
- insertReader.close();
+ scanner.close();
- // stores state transition line in list
+ /*
+ * parse state transitions
+ */
line = input.readLine();
- Scanner transitionReader = new Scanner(line);
- double[] transitions = parseDoubles(transitionReader,
+ scanner = new Scanner(line);
+ double[] transitions = parseDoubles(scanner,
NUMBER_OF_TRANSITIONS);
node.setStateTransitions(transitions);
- transitionReader.close();
+ scanner.close();
line = input.readLine();
- first = false;
+ nodeNo++;
}
}
* HMM counts columns from 1, convert to base 0 for Jalview
*/
int column = 0;
- if (hmm.getBooleanProperty(MAP) && scanner.hasNext())
- {
- column = scanner.nextInt();
- node.setAlignmentColumn(column - 1);
- }
- else
+ String value;
+ if (scanner.hasNext())
{
- scanner.next();
+ value = scanner.next();
+ if (!"-".equals(value))
+ {
+ try
+ {
+ column = Integer.parseInt(value);
+ node.setAlignmentColumn(column - 1);
+ } catch (NumberFormatException e)
+ {
+ // ignore
+ }
+ }
}
/*
- * hmm consensus residue if provided, else -
+ * hmm consensus residue if provided, else '-'
*/
if (scanner.hasNext())
{
- char consensusR;
- consensusR = charValue(scanner.next());
- node.setConsensusResidue(consensusR);
+ node.setConsensusResidue(scanner.next().charAt(0));
}
/*
- * RF reference annotation, if provided, else -
+ * RF reference annotation, if provided, else '-'
*/
if (scanner.hasNext())
{
- char reference;
- reference = charValue(scanner.next());
- node.setReferenceAnnotation(reference);
+ node.setReferenceAnnotation(scanner.next().charAt(0));
}
/*
- * 'm' for masked position, if provided, else -
+ * 'm' for masked position, if provided, else '-'
*/
if (scanner.hasNext())
{
- char value;
- value = charValue(scanner.next());
- node.setMaskValue(value);
+ node.setMaskValue(scanner.next().charAt(0));
}
/*
- * structure consensus symbol, if provided, else -
+ * structure consensus symbol, if provided, else '-'
*/
if (scanner.hasNext())
{
- char consensusS;
- consensusS = charValue(scanner.next());
- node.setConsensusStructure(consensusS);
+ node.setConsensusStructure(scanner.next().charAt(0));
}
return column;
for (int nodeNo = 0; nodeNo <= length; nodeNo++)
{
String matchLine = String.format("%7s",
- nodeNo == 0 ? "COMPO" : Integer.toString(nodeNo));
+ nodeNo == 0 ? COMPO : Integer.toString(nodeNo));
double[] doubleMatches = convertToLogSpace(
hmm.getNode(nodeNo).getMatchEmissions());
if (hmm.getMSV() != null)
{
- output.append(String.format("%n%-19s %18s", "STATS LOCAL MSV",
- hmm.getMSV()));
+ format = "%n%-19s %18s";
+ output.append(String.format(format, "STATS LOCAL MSV", hmm.getMSV()));
- output.append(String.format("%n%-19s %18s", "STATS LOCAL VITERBI",
+ output.append(String.format(format, "STATS LOCAL VITERBI",
hmm.getViterbi()));
- output.append(String.format("%n%-19s %18s", "STATS LOCAL FORWARD",
+ output.append(String.format(format, "STATS LOCAL FORWARD",
hmm.getForward()));
}
}
}
}
- /**
- * Returns the char value of a single lettered String.
- *
- * @param string
- * @return
- */
- char charValue(String string)
- {
- char character;
- character = string.charAt(0);
- return character;
- }
-
@Override
- public String print(SequenceI[] seqs, boolean jvsuffix)
+ public String print(SequenceI[] sequences, boolean jvsuffix)
{
- if (seqs[0].getHMM() != null)
+ if (sequences[0].getHMM() != null)
{
- hmm = seqs[0].getHMM();
+ hmm = sequences[0].getHMM();
}
return print();
}
appendProperties(output);
output.append(NL);
appendModelAsString(output);
- output.append(NL + "//");
+ output.append(NL).append(TERMINATOR).append(NL);
return output.toString();
}
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertNotNull;
import static org.testng.Assert.assertNull;
import static org.testng.Assert.assertTrue;
-import static org.testng.Assert.fail;
import jalview.datamodel.HMMNode;
import jalview.datamodel.HiddenMarkovModel;
assertEquals(hmm.getProperty(HMMFile.ACCESSION_NUMBER), "PF00069.17");
assertEquals(hmm.getProperty(HMMFile.DESCRIPTION),
"Protein kinase domain");
- assertEquals(hmm.getLength().intValue(), 260);
+ assertEquals(hmm.getLength(), 260);
assertNull(hmm.getProperty(HMMFile.MAX_LENGTH));
assertEquals(hmm.getAlphabetType(), "amino");
assertFalse(hmm.getBooleanProperty(HMMFile.REFERENCE_ANNOTATION));
assertEquals(hmm.getMaskedValue(183), '-');
assertEquals(hmm.getConsensusStructure(240), 'H');
}
+
+ /**
+ * Test that Jalview can parse an HMM file even with a bunch of 'mandatory'
+ * fields missing (including no MAP annotation or // terminator line)
+ *
+ * @throws IOException
+ */
+ @Test(groups = "Functional")
+ public void testParse_minimalFile() throws IOException
+ {
+ /*
+ * ALPH is absent, alphabet inferred from HMM header line
+ * Optional COMPO line is absent
+ * first line after HMM is a guide line for readability
+ * next line is BEGIN node insert emissions
+ * next line is BEGIN node transitions
+ * next line is first sequence node match emissions 1.1 1.2 1.3
+ * next line is first sequence node insert emissions 1.4 1.5 1.6
+ * last line is first sequence node transitions
+ */
+ //@formatter:off
+ String hmmData =
+ "HMMER3\n" +
+ "HMM P M J\n" +
+ // both spec and parser require a line after the HMM line
+ " m->m m->i m->d i->m i->i d->m d->d\n" +
+ " 0.1 0.2 0.3\n" +
+ " 0.4 0.5 0.6 0.7 0.8 0.9 0.95\n" +
+ " 1 1.1 1.2 1.3 - - - - -\n" +
+ " 1.4 1.5 1.6\n" +
+ " 1.7 1.8 1.9 2.0 2.1 2.2 2.3\n" +
+ " 2 1.01 1.02 1.03 - - - - -\n" +
+ " 1.04 1.05 1.06\n" +
+ " 1.7 1.8 1.9 2.0 2.1 2.2 2.3\n";
+ //@formatter:on
+ HMMFile parser = new HMMFile(hmmData, DataSourceType.PASTE);
+ HiddenMarkovModel hmm = parser.getHMM();
+ assertNotNull(hmm);
+ assertEquals(hmm.getSymbols(), "PMJ");
+ assertEquals(hmm.getLength(), 0); // no LENG property :-(
+
+ // node 1 (implicitly mapped to column 0)
+ double prob = hmm.getMatchEmissionProbability(0, 'p');
+ assertEquals(prob, Math.pow(Math.E, -1.1));
+ prob = hmm.getInsertEmissionProbability(0, 'J');
+ assertEquals(prob, Math.pow(Math.E, -1.6));
+
+ // node 2 (implicitly mapped to column 1)
+ prob = hmm.getMatchEmissionProbability(1, 'M');
+ assertEquals(prob, Math.pow(Math.E, -1.02));
+ prob = hmm.getInsertEmissionProbability(1, 'm');
+ assertEquals(prob, Math.pow(Math.E, -1.05));
+ }
@Test(groups = "Functional")
public void testParseHeaderLines_amino() throws IOException
new File("test/jalview/io/test_MADE1_hmm.txt"));
BufferedReader br = new BufferedReader(fr);
HiddenMarkovModel testHMM = new HiddenMarkovModel();
- for (int i = 0; i < 24; i++)
+ String line = null;
+ do
{
- br.readLine();
- }
+ line = br.readLine(); // skip header lines up to HMM plus one
+ } while (!line.startsWith("HMM "));
+ br.readLine();
made1.parseModel(br);
testHMM = made1.getHMM();
Double.NEGATIVE_INFINITY);
}
- /**
- * Test that if no mapping of nodes to aligned columns is provided by the HMM
- * file, we construct one
- *
- * @throws IOException
- */
- @Test(groups = "Functional")
- public void testParseModel_noMap() throws IOException
- {
- fail("test to be written");
- }
-
@Test(groups = "Functional")
public void testParseAnnotations()
{