X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=test%2Fjalview%2Fio%2FHMMFileTest.java;h=cf74f5581c78f1cf3c179a53dd1292044b332e87;hb=f5f6af7c725d698fd1f53aca15b9a5e5743ae434;hp=387a915c785789b3d591df0b170229de9697c798;hpb=dad3f91c2f9a38ce8c64a688b6f1ba4f539af9fc;p=jalview.git diff --git a/test/jalview/io/HMMFileTest.java b/test/jalview/io/HMMFileTest.java index 387a915..cf74f55 100644 --- a/test/jalview/io/HMMFileTest.java +++ b/test/jalview/io/HMMFileTest.java @@ -2,9 +2,9 @@ package jalview.io; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNotNull; import static org.testng.Assert.assertNull; import static org.testng.Assert.assertTrue; -import static org.testng.Assert.fail; import jalview.datamodel.HMMNode; import jalview.datamodel.HiddenMarkovModel; @@ -14,8 +14,8 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; -import java.io.PrintWriter; import java.util.ArrayList; +import java.util.List; import java.util.Scanner; import org.testng.annotations.BeforeClass; @@ -52,7 +52,7 @@ public class HMMFileTest { assertEquals(hmm.getProperty(HMMFile.ACCESSION_NUMBER), "PF00069.17"); assertEquals(hmm.getProperty(HMMFile.DESCRIPTION), "Protein kinase domain"); - assertEquals(hmm.getLength().intValue(), 260); + assertEquals(hmm.getLength(), 260); assertNull(hmm.getProperty(HMMFile.MAX_LENGTH)); assertEquals(hmm.getAlphabetType(), "amino"); assertFalse(hmm.getBooleanProperty(HMMFile.REFERENCE_ANNOTATION)); @@ -102,18 +102,72 @@ public class HMMFileTest { assertEquals(hmm.getStateTransitionProbability(380, 0), 0.99, 0.001d); assertEquals(hmm.getStateTransitionProbability(453, 1), 0.0066, 0.001d); - assertEquals(hmm.getNodeAlignmentColumn(3).intValue(), 2); + assertEquals(hmm.getNodeMapPosition(3), 3); assertEquals(hmm.getReferenceAnnotation(7), '-'); assertEquals(hmm.getConsensusResidue(23), 't'); assertEquals(hmm.getMaskedValue(30), '-'); assertEquals(hmm.getConsensusStructure(56), 'S'); - assertEquals(hmm.getNodeAlignmentColumn(78).intValue(), 135); + assertEquals(hmm.getNodeMapPosition(78), 136); assertEquals(hmm.getReferenceAnnotation(93), '-'); assertEquals(hmm.getConsensusResidue(145), 'a'); assertEquals(hmm.getMaskedValue(183), '-'); assertEquals(hmm.getConsensusStructure(240), 'H'); } + + /** + * Test that Jalview can parse an HMM file even with a bunch of 'mandatory' + * fields missing (including no MAP annotation or // terminator line) + * + * @throws IOException + */ + @Test(groups = "Functional") + public void testParse_minimalFile() throws IOException + { + /* + * ALPH is absent, alphabet inferred from HMM header line + * Optional COMPO line is absent + * first line after HMM is a guide line for readability + * next line is BEGIN node insert emissions + * next line is BEGIN node transitions + * next line is first sequence node match emissions 1.1 1.2 1.3 + * next line is first sequence node insert emissions 1.4 1.5 1.6 + * last line is first sequence node transitions + */ + //@formatter:off + String hmmData = + "HMMER3\n" + + "HMM P M J\n" + + // both spec and parser require a line after the HMM line + " m->m m->i m->d i->m i->i d->m d->d\n" + + " 0.1 0.2 0.3\n" + + " 0.4 0.5 0.6 0.7 0.8 0.9 0.95\n" + + " 1 1.1 1.2 1.3 - - - - -\n" + + " 1.4 1.5 1.6\n" + + " 1.7 1.8 1.9 2.0 2.1 2.2 2.3\n" + + " 2 1.01 1.02 1.03 - - - - -\n" + + " 1.04 1.05 1.06\n" + + " 1.7 1.8 1.9 2.0 2.1 2.2 2.3\n"; + //@formatter:on + HMMFile parser = new HMMFile(hmmData, DataSourceType.PASTE); + HiddenMarkovModel hmm = parser.getHMM(); + assertNotNull(hmm); + assertEquals(hmm.getSymbols(), "PMJ"); + // no LENG property: this should return node count excluding BEGIN node + assertEquals(hmm.getLength(), 2); + + // node 1 (implicitly mapped to column 0) + double prob = hmm.getMatchEmissionProbability(0, 'p'); + assertEquals(prob, Math.pow(Math.E, -1.1)); + prob = hmm.getInsertEmissionProbability(0, 'J'); + assertEquals(prob, Math.pow(Math.E, -1.6)); + + // node 2 (implicitly mapped to column 1) + prob = hmm.getMatchEmissionProbability(1, 'M'); + assertEquals(prob, Math.pow(Math.E, -1.02)); + prob = hmm.getInsertEmissionProbability(1, 'm'); + assertEquals(prob, Math.pow(Math.E, -1.05)); + } @Test(groups = "Functional") public void testParseHeaderLines_amino() throws IOException @@ -244,10 +298,12 @@ public class HMMFileTest { new File("test/jalview/io/test_MADE1_hmm.txt")); BufferedReader br = new BufferedReader(fr); HiddenMarkovModel testHMM = new HiddenMarkovModel(); - for (int i = 0; i < 24; i++) + String line = null; + do { - br.readLine(); - } + line = br.readLine(); // skip header lines up to HMM plus one + } while (!line.startsWith("HMM ")); + br.readLine(); made1.parseModel(br); testHMM = made1.getHMM(); @@ -287,25 +343,15 @@ public class HMMFileTest { Double.NEGATIVE_INFINITY); } - /** - * Test that if no mapping of nodes to aligned columns is provided by the HMM - * file, we construct one - * - * @throws IOException - */ - @Test(groups = "Functional") - public void testParseModel_noMap() throws IOException - { - fail("test to be written"); - } - @Test(groups = "Functional") public void testParseAnnotations() { HMMFile testFile = new HMMFile(); HiddenMarkovModel hmm = new HiddenMarkovModel(); PA.setValue(testFile, "hmm", hmm); - hmm.addNode(new HMMNode()); + + List nodes = new ArrayList<>(); + nodes.add(new HMMNode()); // BEGIN node hmm.setProperty(HMMFile.CONSENSUS_RESIDUE, "yes"); hmm.setProperty(HMMFile.MAP, "yes"); @@ -314,7 +360,7 @@ public class HMMFileTest { hmm.setProperty(HMMFile.MASKED_VALUE, "yes"); Scanner scanner = new Scanner("1345 t t t t"); HMMNode node = new HMMNode(); - hmm.addNode(node); + nodes.add(node); testFile.parseAnnotations(scanner, node); hmm.setProperty(HMMFile.CONSENSUS_RESIDUE, "yes"); @@ -324,10 +370,12 @@ public class HMMFileTest { hmm.setProperty(HMMFile.MASKED_VALUE, "no"); Scanner scanner2 = new Scanner("- y x - -"); node = new HMMNode(); - hmm.addNode(node); + nodes.add(node); testFile.parseAnnotations(scanner2, node); - - assertEquals(hmm.getNodeAlignmentColumn(1).intValue(), 1344); + + hmm.setNodes(nodes); + + assertEquals(hmm.getNodeMapPosition(1), 1345); assertEquals(hmm.getConsensusResidue(1), 't'); assertEquals(hmm.getReferenceAnnotation(1), 't'); assertEquals(hmm.getMaskedValue(1), 't'); @@ -342,63 +390,46 @@ public class HMMFileTest { * @throws IOException */ @Test(groups = "Functional") - public void testPrint() throws IOException + public void testPrint_roundTrip() throws IOException { - PrintWriter writer = new PrintWriter( - "test/jalview/io/test_export_hmm.txt"); String output = pKinase.print(); - writer.print(output); - writer.close(); HMMFile pKinaseClone = new HMMFile( - new FileParse("test/jalview/io/test_export_hmm.txt", - DataSourceType.FILE)); - HiddenMarkovModel pKinaseHMM = new HiddenMarkovModel(); - HiddenMarkovModel pKinaseCloneHMM = new HiddenMarkovModel(); - pKinaseHMM = pKinase.getHMM(); - pKinaseCloneHMM = pKinaseClone.getHMM(); - - for (int i = 0; i < pKinaseHMM.getLength(); i++) - { - double[] list1; - double[] list2; - - list1 = pKinaseHMM.getNode(i).getMatchEmissions(); - list2 = pKinaseCloneHMM.getNode(i).getMatchEmissions(); - - assertEquals(list1, list2); - - list1 = pKinaseHMM.getNode(i).getInsertEmissions(); - list2 = pKinaseCloneHMM.getNode(i).getInsertEmissions(); - - assertEquals(list1, list2); + new FileParse(output, DataSourceType.PASTE)); + HiddenMarkovModel pKinaseHMM = pKinase.getHMM(); + HiddenMarkovModel pKinaseCloneHMM = pKinaseClone.getHMM(); - list1 = pKinaseHMM.getNode(i).getStateTransitions(); - list2 = pKinaseCloneHMM.getNode(i).getStateTransitions(); - - assertEquals(list1, list2); + checkModelsMatch(pKinaseHMM, pKinaseCloneHMM); + } + + /** + * A helper method to check two HMM models have the same values + * + * @param model1 + * @param model2 + */ + protected void checkModelsMatch(HiddenMarkovModel model1, + HiddenMarkovModel model2) + { + assertEquals(model1.getLength(), model2.getLength()); + + for (int i = 0; i < model1.getLength(); i++) + { + String msg = "For Node" + i; + assertEquals(model1.getNode(i).getMatchEmissions(), + model2.getNode(i).getMatchEmissions(), msg); + assertEquals(model1.getNode(i).getInsertEmissions(), + model2.getNode(i).getInsertEmissions(), msg); + assertEquals(model1.getNode(i).getStateTransitions(), + model2.getNode(i).getStateTransitions(), msg); if (i > 0) { - int alignColumn1; - int alignColumn2; - - alignColumn1 = pKinaseHMM.getNodeAlignmentColumn(i); - alignColumn2 = pKinaseCloneHMM.getNodeAlignmentColumn(i); - - assertEquals(alignColumn1, alignColumn2); - - char annotation1; - char annotation2; - - annotation1 = pKinaseHMM.getReferenceAnnotation(i); - annotation2 = pKinaseCloneHMM.getReferenceAnnotation(i); - - assertEquals(annotation1, annotation2); - - annotation1 = pKinaseHMM.getConsensusResidue(i); - annotation2 = pKinaseCloneHMM.getConsensusResidue(i); - - assertEquals(annotation1, annotation2); + assertEquals(model1.getNodeMapPosition(i), + model2.getNodeMapPosition(i), msg); + assertEquals(model1.getReferenceAnnotation(i), + model2.getReferenceAnnotation(i), msg); + assertEquals(model1.getConsensusResidue(i), + model2.getConsensusResidue(i), msg); } } }