3 import static org.testng.Assert.assertEquals;
4 import static org.testng.Assert.assertFalse;
5 import static org.testng.Assert.assertNotNull;
6 import static org.testng.Assert.assertNull;
7 import static org.testng.Assert.assertTrue;
9 import jalview.datamodel.HMMNode;
10 import jalview.datamodel.HiddenMarkovModel;
12 import java.io.BufferedReader;
14 import java.io.FileNotFoundException;
15 import java.io.FileReader;
16 import java.io.IOException;
17 import java.util.ArrayList;
18 import java.util.List;
19 import java.util.Scanner;
21 import org.testng.annotations.BeforeClass;
22 import org.testng.annotations.Test;
24 import junit.extensions.PA;
26 public class HMMFileTest {
34 @BeforeClass(alwaysRun = true)
35 public void setUp() throws IOException
37 fn3 = new HMMFile("test/jalview/io/test_fn3_hmm.txt",
40 pKinase = new HMMFile("test/jalview/io/test_PKinase_hmm.txt",
43 made1 = new HMMFile("test/jalview/io/test_MADE1_hmm.txt",
47 @Test(groups = "Functional")
48 public void testParse() throws IOException
50 HiddenMarkovModel hmm = pKinase.getHMM();
51 assertEquals(hmm.getName(), "Pkinase");
52 assertEquals(hmm.getProperty(HMMFile.ACCESSION_NUMBER), "PF00069.17");
53 assertEquals(hmm.getProperty(HMMFile.DESCRIPTION),
54 "Protein kinase domain");
55 assertEquals(hmm.getLength(), 260);
56 assertNull(hmm.getProperty(HMMFile.MAX_LENGTH));
57 assertEquals(hmm.getAlphabetType(), "amino");
58 assertFalse(hmm.getBooleanProperty(HMMFile.REFERENCE_ANNOTATION));
59 assertFalse(hmm.getBooleanProperty(HMMFile.MASKED_VALUE));
60 assertTrue(hmm.getBooleanProperty(HMMFile.CONSENSUS_RESIDUE));
61 assertTrue(hmm.getBooleanProperty(HMMFile.CONSENSUS_STRUCTURE));
62 assertTrue(hmm.getBooleanProperty(HMMFile.MAP));
63 assertEquals(hmm.getProperty(HMMFile.DATE), "Thu Jun 16 11:44:06 2011");
64 assertNull(hmm.getProperty(HMMFile.COMMAND_LOG));
65 assertEquals(hmm.getProperty(HMMFile.NUMBER_OF_SEQUENCES), "54");
66 assertEquals(hmm.getProperty(HMMFile.EFF_NUMBER_OF_SEQUENCES),
68 assertEquals(hmm.getProperty(HMMFile.CHECK_SUM), "3106786190");
69 assertEquals(hmm.getProperty(HMMFile.GATHERING_THRESHOLD),
71 assertEquals(hmm.getProperty(HMMFile.TRUSTED_CUTOFF), "70.30 70.30");
72 assertEquals(hmm.getProperty(HMMFile.NOISE_CUTOFF), "70.20 70.20");
74 assertEquals(hmm.getSymbols(), "ACDEFGHIKLMNPQRSTVWY");
76 assertEquals(hmm.getMatchEmissionProbability(0, 'Y'), 0.16102, 0.001d);
77 assertEquals(hmm.getMatchEmissionProbability(11, 'P'), 0.0130, 0.001d);
78 assertEquals(hmm.getMatchEmissionProbability(24, 'I'), 0.02583, 0.001d);
79 assertEquals(hmm.getMatchEmissionProbability(83, 'C'), 0.008549,
81 assertEquals(hmm.getMatchEmissionProbability(332, 'E'), 0.07998,
83 assertEquals(hmm.getMatchEmissionProbability(381, 'D'), 0.014465,
85 assertEquals(hmm.getMatchEmissionProbability(475, 'Y'), 0.02213,
88 assertEquals(hmm.getInsertEmissionProbability(1, 'C'), 0.012, 0.001d);
89 assertEquals(hmm.getInsertEmissionProbability(14, 'H'), 0.02411,
91 assertEquals(hmm.getInsertEmissionProbability(23, 'L'), 0.06764,
93 assertEquals(hmm.getInsertEmissionProbability(90, 'D'), 0.0623, 0.001d);
94 assertEquals(hmm.getInsertEmissionProbability(374, 'T'), 0.0623,
96 assertEquals(hmm.getInsertEmissionProbability(470, 'P'), 0.0647,
99 assertEquals(hmm.getStateTransitionProbability(2, 6), 0.3848, 0.001d);
100 assertEquals(hmm.getStateTransitionProbability(38, 3), 0.5382, 0.001d);
101 assertEquals(hmm.getStateTransitionProbability(305, 3), 0.2916, 0.001d);
102 assertEquals(hmm.getStateTransitionProbability(380, 0), 0.99, 0.001d);
103 assertEquals(hmm.getStateTransitionProbability(453, 1), 0.0066, 0.001d);
105 assertEquals(hmm.getNodeMapPosition(3), 3);
106 assertEquals(hmm.getReferenceAnnotation(7), '-');
107 assertEquals(hmm.getConsensusResidue(23), 't');
108 assertEquals(hmm.getMaskedValue(30), '-');
109 assertEquals(hmm.getConsensusStructure(56), 'S');
111 assertEquals(hmm.getNodeMapPosition(78), 136);
112 assertEquals(hmm.getReferenceAnnotation(93), '-');
113 assertEquals(hmm.getConsensusResidue(145), 'a');
114 assertEquals(hmm.getMaskedValue(183), '-');
115 assertEquals(hmm.getConsensusStructure(240), 'H');
119 * Test that Jalview can parse an HMM file even with a bunch of 'mandatory'
120 * fields missing (including no MAP annotation or // terminator line)
122 * @throws IOException
124 @Test(groups = "Functional")
125 public void testParse_minimalFile() throws IOException
128 * ALPH is absent, alphabet inferred from HMM header line
129 * Optional COMPO line is absent
130 * first line after HMM is a guide line for readability
131 * next line is BEGIN node insert emissions
132 * next line is BEGIN node transitions
133 * next line is first sequence node match emissions 1.1 1.2 1.3
134 * next line is first sequence node insert emissions 1.4 1.5 1.6
135 * last line is first sequence node transitions
141 // both spec and parser require a line after the HMM line
142 " m->m m->i m->d i->m i->i d->m d->d\n" +
144 " 0.4 0.5 0.6 0.7 0.8 0.9 0.95\n" +
145 " 1 1.1 1.2 1.3 - - - - -\n" +
147 " 1.7 1.8 1.9 2.0 2.1 2.2 2.3\n" +
148 " 2 1.01 1.02 1.03 - - - - -\n" +
149 " 1.04 1.05 1.06\n" +
150 " 1.7 1.8 1.9 2.0 2.1 2.2 2.3\n";
152 HMMFile parser = new HMMFile(hmmData, DataSourceType.PASTE);
153 HiddenMarkovModel hmm = parser.getHMM();
155 assertEquals(hmm.getSymbols(), "PMJ");
156 // no LENG property: this should return node count excluding BEGIN node
157 assertEquals(hmm.getLength(), 2);
159 // node 1 (implicitly mapped to column 0)
160 double prob = hmm.getMatchEmissionProbability(0, 'p');
161 assertEquals(prob, Math.pow(Math.E, -1.1));
162 prob = hmm.getInsertEmissionProbability(0, 'J');
163 assertEquals(prob, Math.pow(Math.E, -1.6));
165 // node 2 (implicitly mapped to column 1)
166 prob = hmm.getMatchEmissionProbability(1, 'M');
167 assertEquals(prob, Math.pow(Math.E, -1.02));
168 prob = hmm.getInsertEmissionProbability(1, 'm');
169 assertEquals(prob, Math.pow(Math.E, -1.05));
172 @Test(groups = "Functional")
173 public void testParseHeaderLines_amino() throws IOException
175 FileReader fr = new FileReader(
176 new File("test/jalview/io/test_fn3_hmm.txt"));
177 BufferedReader br = new BufferedReader(fr);
178 HiddenMarkovModel hmm = new HiddenMarkovModel();
179 HMMFile testee = new HMMFile();
180 PA.setValue(testee, "hmm", hmm);
181 testee.parseHeaderLines(br);
185 assertEquals(hmm.getName(), "fn3");
186 assertEquals(hmm.getProperty(HMMFile.ACCESSION_NUMBER), "PF00041.13");
187 assertEquals(hmm.getProperty(HMMFile.DESCRIPTION),
188 "Fibronectin type III domain");
189 assertEquals(hmm.getProperty(HMMFile.LENGTH), "86");
190 assertNull(hmm.getProperty(HMMFile.MAX_LENGTH));
191 assertEquals(hmm.getAlphabetType(), "amino");
192 assertFalse(hmm.getBooleanProperty(HMMFile.REFERENCE_ANNOTATION));
193 assertFalse(hmm.getBooleanProperty(HMMFile.MASKED_VALUE));
194 assertTrue(hmm.getBooleanProperty(HMMFile.CONSENSUS_RESIDUE));
195 assertTrue(hmm.getBooleanProperty(HMMFile.CONSENSUS_STRUCTURE));
197 assertTrue(hmm.getBooleanProperty(HMMFile.MAP));
198 assertEquals(hmm.getProperty(HMMFile.DATE), "Fri Jun 20 08:22:31 2014");
199 assertNull(hmm.getProperty(HMMFile.COMMAND_LOG));
200 assertEquals(hmm.getProperty(HMMFile.NUMBER_OF_SEQUENCES), "106");
201 assertEquals(hmm.getProperty(HMMFile.EFF_NUMBER_OF_SEQUENCES),
203 assertEquals(hmm.getProperty(HMMFile.CHECK_SUM), "3564431818");
204 assertEquals(hmm.getProperty(HMMFile.GATHERING_THRESHOLD), "8.00 7.20");
205 assertEquals(hmm.getProperty(HMMFile.TRUSTED_CUTOFF), "8.00 7.20");
206 assertEquals(hmm.getProperty(HMMFile.NOISE_CUTOFF), "7.90 7.90");
207 assertEquals(hmm.getViterbi(), "-9.7737 0.71847");
208 assertEquals(hmm.getMSV(), "-9.4043 0.71847");
209 assertEquals(hmm.getForward(), "-3.8341 0.71847");
212 @Test(groups = "Functional")
213 public void testParseHeaderLines_dna() throws IOException
215 FileReader fr = new FileReader(
216 new File("test/jalview/io/test_MADE1_hmm.txt"));
217 BufferedReader br = new BufferedReader(fr);
218 HiddenMarkovModel hmm = new HiddenMarkovModel();
219 HMMFile testee = new HMMFile();
220 PA.setValue(testee, "hmm", hmm);
221 testee.parseHeaderLines(br);
225 assertEquals(hmm.getName(), "MADE1");
226 assertEquals(hmm.getProperty(HMMFile.ACCESSION_NUMBER),
228 assertEquals(hmm.getProperty(HMMFile.DESCRIPTION),
229 "MADE1 (MAriner Derived Element 1), a TcMar-Mariner DNA transposon");
230 assertEquals(hmm.getProperty(HMMFile.LENGTH), "80");
231 assertEquals(hmm.getProperty(HMMFile.MAX_LENGTH), "426");
232 assertEquals(hmm.getAlphabetType(), "DNA");
233 assertTrue(hmm.getBooleanProperty(HMMFile.REFERENCE_ANNOTATION));
234 assertFalse(hmm.getBooleanProperty(HMMFile.MASKED_VALUE));
235 assertTrue(hmm.getBooleanProperty(HMMFile.CONSENSUS_RESIDUE));
236 assertFalse(hmm.getBooleanProperty(HMMFile.CONSENSUS_STRUCTURE));
237 assertTrue(hmm.getBooleanProperty(HMMFile.MAP));
238 assertEquals(hmm.getProperty(HMMFile.DATE), "Tue Feb 19 20:33:41 2013");
239 assertNull(hmm.getProperty(HMMFile.COMMAND_LOG));
240 assertEquals(hmm.getProperty(HMMFile.NUMBER_OF_SEQUENCES), "1997");
241 assertEquals(hmm.getProperty(HMMFile.EFF_NUMBER_OF_SEQUENCES), "3.911818");
242 assertEquals(hmm.getProperty(HMMFile.CHECK_SUM), "3015610723");
243 assertEquals(hmm.getProperty(HMMFile.GATHERING_THRESHOLD),
245 assertEquals(hmm.getProperty(HMMFile.TRUSTED_CUTOFF), "2.343 1.212");
246 assertEquals(hmm.getProperty(HMMFile.NOISE_CUTOFF), "2.354 5.456");
247 assertEquals(hmm.getViterbi(), "-9.3632 0.71858");
248 assertEquals(hmm.getMSV(), "-8.5786 0.71858");
249 assertEquals(hmm.getForward(), "-3.4823 0.71858");
252 @Test(groups = "Functional")
253 public void testFillList() throws IOException
255 Scanner scanner1 = new Scanner("1.3 2.4 5.3 3.9 9.8 4.7 4.3 2.3 6.9");
256 ArrayList<Double> filledArray = new ArrayList<>();
258 filledArray.add(0.27253);
259 filledArray.add(0.0907);
260 filledArray.add(0.00499);
261 filledArray.add(0.02024);
262 filledArray.add(0.00005);
263 filledArray.add(0.00909);
264 filledArray.add(0.01357);
265 filledArray.add(0.10026);
266 filledArray.add(0.001);
268 double[] testList = HMMFile.parseDoubles(scanner1, 9);
270 for (int i = 0; i < 9; i++)
272 assertEquals(testList[i], filledArray.get(i), 0.001d);
278 Scanner scanner2 = new Scanner(
279 "1.346 5.554 35.345 5.64 1.4");
280 filledArray.add(0.2603);
281 filledArray.add(0.00387);
283 filledArray.add(0.00355);
284 filledArray.add(0.2466);
286 testList = HMMFile.parseDoubles(scanner2, 5);
288 for (int i = 0; i < 5; i++)
290 assertEquals(testList[i], filledArray.get(i), 0.001d);
294 @Test(groups = "Functional")
295 public void testParseModel() throws IOException
297 FileReader fr = new FileReader(
298 new File("test/jalview/io/test_MADE1_hmm.txt"));
299 BufferedReader br = new BufferedReader(fr);
300 HiddenMarkovModel testHMM = new HiddenMarkovModel();
304 line = br.readLine(); // skip header lines up to HMM plus one
305 } while (!line.startsWith("HMM "));
308 made1.parseModel(br);
309 testHMM = made1.getHMM();
314 assertEquals(testHMM.getMatchEmissionProbability(1, 'C'), 0.09267,
316 assertEquals(testHMM.getMatchEmissionProbability(25, 'G'), 0.07327,
318 assertEquals(testHMM.getMatchEmissionProbability(1092, 'C'), 0.04184,
320 assertEquals(testHMM.getMatchEmissionProbability(1107, 'G'), 0.07,
323 assertEquals(testHMM.getInsertEmissionProbability(0, 'G'), 0.25,
325 assertEquals(testHMM.getInsertEmissionProbability(247, 'T'), 0.2776,
327 assertEquals(testHMM.getInsertEmissionProbability(1096, 'T'), 0.25,
329 assertEquals(testHMM.getInsertEmissionProbability(1111, 'T'), 0.25,
332 assertEquals(testHMM.getStateTransitionProbability(1, 0), 0.9634,
334 assertEquals(testHMM.getStateTransitionProbability(5, 1), 0.0203,
336 assertEquals(testHMM.getStateTransitionProbability(14, 3), 0.2515,
338 assertEquals(testHMM.getStateTransitionProbability(65, 4), 0.78808,
340 assertEquals(testHMM.getStateTransitionProbability(1080, 2), 0.01845,
342 assertEquals(testHMM.getStateTransitionProbability(1111, 6),
343 Double.NEGATIVE_INFINITY);
346 @Test(groups = "Functional")
347 public void testParseAnnotations()
349 HMMFile testFile = new HMMFile();
350 HiddenMarkovModel hmm = new HiddenMarkovModel();
351 PA.setValue(testFile, "hmm", hmm);
353 List<HMMNode> nodes = new ArrayList<>();
354 nodes.add(new HMMNode()); // BEGIN node
356 hmm.setProperty(HMMFile.CONSENSUS_RESIDUE, "yes");
357 hmm.setProperty(HMMFile.MAP, "yes");
358 hmm.setProperty(HMMFile.REFERENCE_ANNOTATION, "yes");
359 hmm.setProperty(HMMFile.CONSENSUS_STRUCTURE, "yes");
360 hmm.setProperty(HMMFile.MASKED_VALUE, "yes");
361 Scanner scanner = new Scanner("1345 t t t t");
362 HMMNode node = new HMMNode();
364 testFile.parseAnnotations(scanner, node);
366 hmm.setProperty(HMMFile.CONSENSUS_RESIDUE, "yes");
367 hmm.setProperty(HMMFile.MAP, "no");
368 hmm.setProperty(HMMFile.REFERENCE_ANNOTATION, "yes");
369 hmm.setProperty(HMMFile.CONSENSUS_STRUCTURE, "no");
370 hmm.setProperty(HMMFile.MASKED_VALUE, "no");
371 Scanner scanner2 = new Scanner("- y x - -");
372 node = new HMMNode();
374 testFile.parseAnnotations(scanner2, node);
378 assertEquals(hmm.getNodeMapPosition(1), 1345);
379 assertEquals(hmm.getConsensusResidue(1), 't');
380 assertEquals(hmm.getReferenceAnnotation(1), 't');
381 assertEquals(hmm.getMaskedValue(1), 't');
382 assertEquals(hmm.getConsensusStructure(1), 't');
388 * tests to see if file produced by the output matches the file from the input
390 * @throws IOException
392 @Test(groups = "Functional")
393 public void testPrint_roundTrip() throws IOException
395 String output = pKinase.print();
396 HMMFile pKinaseClone = new HMMFile(
397 new FileParse(output, DataSourceType.PASTE));
398 HiddenMarkovModel pKinaseHMM = pKinase.getHMM();
399 HiddenMarkovModel pKinaseCloneHMM = pKinaseClone.getHMM();
401 checkModelsMatch(pKinaseHMM, pKinaseCloneHMM);
405 * A helper method to check two HMM models have the same values
410 protected void checkModelsMatch(HiddenMarkovModel model1,
411 HiddenMarkovModel model2)
413 assertEquals(model1.getLength(), model2.getLength());
415 for (int i = 0; i < model1.getLength(); i++)
417 String msg = "For Node" + i;
418 assertEquals(model1.getNode(i).getMatchEmissions(),
419 model2.getNode(i).getMatchEmissions(), msg);
420 assertEquals(model1.getNode(i).getInsertEmissions(),
421 model2.getNode(i).getInsertEmissions(), msg);
422 assertEquals(model1.getNode(i).getStateTransitions(),
423 model2.getNode(i).getStateTransitions(), msg);
427 assertEquals(model1.getNodeMapPosition(i),
428 model2.getNodeMapPosition(i), msg);
429 assertEquals(model1.getReferenceAnnotation(i),
430 model2.getReferenceAnnotation(i), msg);
431 assertEquals(model1.getConsensusResidue(i),
432 model2.getConsensusResidue(i), msg);
437 @Test(groups = "Functional")
438 public void testAppendProperties() throws FileNotFoundException
440 StringBuilder sb = new StringBuilder();
441 fn3.appendProperties(sb);
443 Scanner testScanner = new Scanner(sb.toString());
445 String[] expected = new String[] { "HMMER3/f [3.1b1 | May 2013]",
446 "NAME fn3", "ACC PF00041.13",
447 "DESC Fibronectin type III domain", "LENG 86", "ALPH amino",
448 "RF no", "MM no", "CONS yes", "CS yes", "MAP yes",
449 "DATE Fri Jun 20 08:22:31 2014", "NSEQ 106", "EFFN 11.415833",
450 "CKSUM 3564431818", "GA 8.00 7.20", "TC 8.00 7.20",
451 "NC 7.90 7.90", "STATS LOCAL MSV -9.4043 0.71847",
452 "STATS LOCAL VITERBI -9.7737 0.71847",
453 "STATS LOCAL FORWARD -3.8341 0.71847" };
455 for (String value : expected)
457 assertEquals(testScanner.nextLine(), value);
463 @Test(groups = "Functional")
464 public void testAppendModelAsString() throws FileNotFoundException
466 StringBuilder sb = new StringBuilder();
467 fn3.appendModelAsString(sb);
468 String string = sb.toString();
470 assertEquals(findValue(2, 2, 2, string), "4.42225");
471 assertEquals(findValue(12, 14, 1, string), "2.79307");
472 assertEquals(findValue(6, 24, 3, string), "0.48576");
473 assertEquals(findValue(19, 33, 2, string), "4.58477");
474 assertEquals(findValue(20, 64, 2, string), "3.61505");
475 assertEquals(findValue(3, 72, 3, string), "6.81068");
476 assertEquals(findValue(10, 80, 2, string), "2.69355");
477 assertEquals(findValue(16, 65, 1, string), "2.81003");
478 assertEquals(findValue(14, 3, 1, string), "2.69012");
479 assertEquals(findValue(11, 32, 1, string), "4.34805");
483 * A helper method to find a token in the model string
486 * index of symbol being searched. First symbol has index 1.
488 * index of node being searched. Begin node has index 0. First node
491 * index of line being searched in node. First line has index 1.
493 * string model being searched
494 * @return value at specified position
496 private String findValue(int symbolIndex, int nodeIndex, int line,
500 Scanner scanner = new Scanner(model);
504 for (int lineIndex = 0; lineIndex < line - 1; lineIndex++)
508 for (int node = 0; node < nodeIndex; node++)
515 for (int symbol = 0; symbol < symbolIndex; symbol++)
517 value = scanner.next();
518 if ("COMPO".equals(value))
522 else if (value.length() < 7)