import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertNotNull;
import static org.testng.Assert.assertNull;
import static org.testng.Assert.assertTrue;
-import static org.testng.Assert.fail;
import jalview.datamodel.HMMNode;
import jalview.datamodel.HiddenMarkovModel;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
-import java.io.PrintWriter;
import java.util.ArrayList;
+import java.util.List;
import java.util.Scanner;
import org.testng.annotations.BeforeClass;
assertEquals(hmm.getProperty(HMMFile.ACCESSION_NUMBER), "PF00069.17");
assertEquals(hmm.getProperty(HMMFile.DESCRIPTION),
"Protein kinase domain");
- assertEquals(hmm.getLength().intValue(), 260);
+ assertEquals(hmm.getLength(), 260);
assertNull(hmm.getProperty(HMMFile.MAX_LENGTH));
assertEquals(hmm.getAlphabetType(), "amino");
assertFalse(hmm.getBooleanProperty(HMMFile.REFERENCE_ANNOTATION));
assertEquals(hmm.getStateTransitionProbability(380, 0), 0.99, 0.001d);
assertEquals(hmm.getStateTransitionProbability(453, 1), 0.0066, 0.001d);
- assertEquals(hmm.getNodeAlignmentColumn(3).intValue(), 2);
+ assertEquals(hmm.getNodeMapPosition(3), 3);
assertEquals(hmm.getReferenceAnnotation(7), '-');
assertEquals(hmm.getConsensusResidue(23), 't');
assertEquals(hmm.getMaskedValue(30), '-');
assertEquals(hmm.getConsensusStructure(56), 'S');
- assertEquals(hmm.getNodeAlignmentColumn(78).intValue(), 135);
+ assertEquals(hmm.getNodeMapPosition(78), 136);
assertEquals(hmm.getReferenceAnnotation(93), '-');
assertEquals(hmm.getConsensusResidue(145), 'a');
assertEquals(hmm.getMaskedValue(183), '-');
assertEquals(hmm.getConsensusStructure(240), 'H');
}
+
+ /**
+ * Test that Jalview can parse an HMM file even with a bunch of 'mandatory'
+ * fields missing (including no MAP annotation or // terminator line)
+ *
+ * @throws IOException
+ */
+ @Test(groups = "Functional")
+ public void testParse_minimalFile() throws IOException
+ {
+ /*
+ * ALPH is absent, alphabet inferred from HMM header line
+ * Optional COMPO line is absent
+ * first line after HMM is a guide line for readability
+ * next line is BEGIN node insert emissions
+ * next line is BEGIN node transitions
+ * next line is first sequence node match emissions 1.1 1.2 1.3
+ * next line is first sequence node insert emissions 1.4 1.5 1.6
+ * last line is first sequence node transitions
+ */
+ //@formatter:off
+ String hmmData =
+ "HMMER3\n" +
+ "HMM P M J\n" +
+ // both spec and parser require a line after the HMM line
+ " m->m m->i m->d i->m i->i d->m d->d\n" +
+ " 0.1 0.2 0.3\n" +
+ " 0.4 0.5 0.6 0.7 0.8 0.9 0.95\n" +
+ " 1 1.1 1.2 1.3 - - - - -\n" +
+ " 1.4 1.5 1.6\n" +
+ " 1.7 1.8 1.9 2.0 2.1 2.2 2.3\n" +
+ " 2 1.01 1.02 1.03 - - - - -\n" +
+ " 1.04 1.05 1.06\n" +
+ " 1.7 1.8 1.9 2.0 2.1 2.2 2.3\n";
+ //@formatter:on
+ HMMFile parser = new HMMFile(hmmData, DataSourceType.PASTE);
+ HiddenMarkovModel hmm = parser.getHMM();
+ assertNotNull(hmm);
+ assertEquals(hmm.getSymbols(), "PMJ");
+ // no LENG property: this should return node count excluding BEGIN node
+ assertEquals(hmm.getLength(), 2);
+
+ // node 1 (implicitly mapped to column 0)
+ double prob = hmm.getMatchEmissionProbability(0, 'p');
+ assertEquals(prob, Math.pow(Math.E, -1.1));
+ prob = hmm.getInsertEmissionProbability(0, 'J');
+ assertEquals(prob, Math.pow(Math.E, -1.6));
+
+ // node 2 (implicitly mapped to column 1)
+ prob = hmm.getMatchEmissionProbability(1, 'M');
+ assertEquals(prob, Math.pow(Math.E, -1.02));
+ prob = hmm.getInsertEmissionProbability(1, 'm');
+ assertEquals(prob, Math.pow(Math.E, -1.05));
+ }
@Test(groups = "Functional")
public void testParseHeaderLines_amino() throws IOException
new File("test/jalview/io/test_MADE1_hmm.txt"));
BufferedReader br = new BufferedReader(fr);
HiddenMarkovModel testHMM = new HiddenMarkovModel();
- for (int i = 0; i < 24; i++)
+ String line = null;
+ do
{
- br.readLine();
- }
+ line = br.readLine(); // skip header lines up to HMM plus one
+ } while (!line.startsWith("HMM "));
+ br.readLine();
made1.parseModel(br);
testHMM = made1.getHMM();
Double.NEGATIVE_INFINITY);
}
- /**
- * Test that if no mapping of nodes to aligned columns is provided by the HMM
- * file, we construct one
- *
- * @throws IOException
- */
- @Test(groups = "Functional")
- public void testParseModel_noMap() throws IOException
- {
- fail("test to be written");
- }
-
@Test(groups = "Functional")
public void testParseAnnotations()
{
HMMFile testFile = new HMMFile();
HiddenMarkovModel hmm = new HiddenMarkovModel();
PA.setValue(testFile, "hmm", hmm);
- hmm.addNode(new HMMNode());
+
+ List<HMMNode> nodes = new ArrayList<>();
+ nodes.add(new HMMNode()); // BEGIN node
hmm.setProperty(HMMFile.CONSENSUS_RESIDUE, "yes");
hmm.setProperty(HMMFile.MAP, "yes");
hmm.setProperty(HMMFile.MASKED_VALUE, "yes");
Scanner scanner = new Scanner("1345 t t t t");
HMMNode node = new HMMNode();
- hmm.addNode(node);
+ nodes.add(node);
testFile.parseAnnotations(scanner, node);
hmm.setProperty(HMMFile.CONSENSUS_RESIDUE, "yes");
hmm.setProperty(HMMFile.MASKED_VALUE, "no");
Scanner scanner2 = new Scanner("- y x - -");
node = new HMMNode();
- hmm.addNode(node);
+ nodes.add(node);
testFile.parseAnnotations(scanner2, node);
-
- assertEquals(hmm.getNodeAlignmentColumn(1).intValue(), 1344);
+
+ hmm.setNodes(nodes);
+
+ assertEquals(hmm.getNodeMapPosition(1), 1345);
assertEquals(hmm.getConsensusResidue(1), 't');
assertEquals(hmm.getReferenceAnnotation(1), 't');
assertEquals(hmm.getMaskedValue(1), 't');
* @throws IOException
*/
@Test(groups = "Functional")
- public void testPrint() throws IOException
+ public void testPrint_roundTrip() throws IOException
{
- PrintWriter writer = new PrintWriter(
- "test/jalview/io/test_export_hmm.txt");
String output = pKinase.print();
- writer.print(output);
- writer.close();
HMMFile pKinaseClone = new HMMFile(
- new FileParse("test/jalview/io/test_export_hmm.txt",
- DataSourceType.FILE));
- HiddenMarkovModel pKinaseHMM = new HiddenMarkovModel();
- HiddenMarkovModel pKinaseCloneHMM = new HiddenMarkovModel();
- pKinaseHMM = pKinase.getHMM();
- pKinaseCloneHMM = pKinaseClone.getHMM();
-
- for (int i = 0; i < pKinaseHMM.getLength(); i++)
- {
- double[] list1;
- double[] list2;
-
- list1 = pKinaseHMM.getNode(i).getMatchEmissions();
- list2 = pKinaseCloneHMM.getNode(i).getMatchEmissions();
-
- assertEquals(list1, list2);
-
- list1 = pKinaseHMM.getNode(i).getInsertEmissions();
- list2 = pKinaseCloneHMM.getNode(i).getInsertEmissions();
-
- assertEquals(list1, list2);
+ new FileParse(output, DataSourceType.PASTE));
+ HiddenMarkovModel pKinaseHMM = pKinase.getHMM();
+ HiddenMarkovModel pKinaseCloneHMM = pKinaseClone.getHMM();
- list1 = pKinaseHMM.getNode(i).getStateTransitions();
- list2 = pKinaseCloneHMM.getNode(i).getStateTransitions();
-
- assertEquals(list1, list2);
+ checkModelsMatch(pKinaseHMM, pKinaseCloneHMM);
+ }
+
+ /**
+ * A helper method to check two HMM models have the same values
+ *
+ * @param model1
+ * @param model2
+ */
+ protected void checkModelsMatch(HiddenMarkovModel model1,
+ HiddenMarkovModel model2)
+ {
+ assertEquals(model1.getLength(), model2.getLength());
+
+ for (int i = 0; i < model1.getLength(); i++)
+ {
+ String msg = "For Node" + i;
+ assertEquals(model1.getNode(i).getMatchEmissions(),
+ model2.getNode(i).getMatchEmissions(), msg);
+ assertEquals(model1.getNode(i).getInsertEmissions(),
+ model2.getNode(i).getInsertEmissions(), msg);
+ assertEquals(model1.getNode(i).getStateTransitions(),
+ model2.getNode(i).getStateTransitions(), msg);
if (i > 0)
{
- int alignColumn1;
- int alignColumn2;
-
- alignColumn1 = pKinaseHMM.getNodeAlignmentColumn(i);
- alignColumn2 = pKinaseCloneHMM.getNodeAlignmentColumn(i);
-
- assertEquals(alignColumn1, alignColumn2);
-
- char annotation1;
- char annotation2;
-
- annotation1 = pKinaseHMM.getReferenceAnnotation(i);
- annotation2 = pKinaseCloneHMM.getReferenceAnnotation(i);
-
- assertEquals(annotation1, annotation2);
-
- annotation1 = pKinaseHMM.getConsensusResidue(i);
- annotation2 = pKinaseCloneHMM.getConsensusResidue(i);
-
- assertEquals(annotation1, annotation2);
+ assertEquals(model1.getNodeMapPosition(i),
+ model2.getNodeMapPosition(i), msg);
+ assertEquals(model1.getReferenceAnnotation(i),
+ model2.getReferenceAnnotation(i), msg);
+ assertEquals(model1.getConsensusResidue(i),
+ model2.getConsensusResidue(i), msg);
}
}
}