3 import static org.testng.Assert.assertEquals;
4 import static org.testng.Assert.assertFalse;
5 import static org.testng.Assert.assertNotNull;
6 import static org.testng.Assert.assertNull;
7 import static org.testng.Assert.assertTrue;
9 import jalview.datamodel.HMMNode;
10 import jalview.datamodel.HiddenMarkovModel;
12 import java.io.BufferedReader;
14 import java.io.FileNotFoundException;
15 import java.io.FileReader;
16 import java.io.IOException;
17 import java.io.PrintWriter;
18 import java.util.ArrayList;
19 import java.util.Scanner;
21 import org.testng.annotations.BeforeClass;
22 import org.testng.annotations.Test;
24 import junit.extensions.PA;
26 public class HMMFileTest {
34 @BeforeClass(alwaysRun = true)
35 public void setUp() throws IOException
37 fn3 = new HMMFile("test/jalview/io/test_fn3_hmm.txt",
40 pKinase = new HMMFile("test/jalview/io/test_PKinase_hmm.txt",
43 made1 = new HMMFile("test/jalview/io/test_MADE1_hmm.txt",
47 @Test(groups = "Functional")
48 public void testParse() throws IOException
50 HiddenMarkovModel hmm = pKinase.getHMM();
51 assertEquals(hmm.getName(), "Pkinase");
52 assertEquals(hmm.getProperty(HMMFile.ACCESSION_NUMBER), "PF00069.17");
53 assertEquals(hmm.getProperty(HMMFile.DESCRIPTION),
54 "Protein kinase domain");
55 assertEquals(hmm.getLength(), 260);
56 assertNull(hmm.getProperty(HMMFile.MAX_LENGTH));
57 assertEquals(hmm.getAlphabetType(), "amino");
58 assertFalse(hmm.getBooleanProperty(HMMFile.REFERENCE_ANNOTATION));
59 assertFalse(hmm.getBooleanProperty(HMMFile.MASKED_VALUE));
60 assertTrue(hmm.getBooleanProperty(HMMFile.CONSENSUS_RESIDUE));
61 assertTrue(hmm.getBooleanProperty(HMMFile.CONSENSUS_STRUCTURE));
62 assertTrue(hmm.getBooleanProperty(HMMFile.MAP));
63 assertEquals(hmm.getProperty(HMMFile.DATE), "Thu Jun 16 11:44:06 2011");
64 assertNull(hmm.getProperty(HMMFile.COMMAND_LOG));
65 assertEquals(hmm.getProperty(HMMFile.NUMBER_OF_SEQUENCES), "54");
66 assertEquals(hmm.getProperty(HMMFile.EFF_NUMBER_OF_SEQUENCES),
68 assertEquals(hmm.getProperty(HMMFile.CHECK_SUM), "3106786190");
69 assertEquals(hmm.getProperty(HMMFile.GATHERING_THRESHOLD),
71 assertEquals(hmm.getProperty(HMMFile.TRUSTED_CUTOFF), "70.30 70.30");
72 assertEquals(hmm.getProperty(HMMFile.NOISE_CUTOFF), "70.20 70.20");
74 assertEquals(hmm.getSymbols(), "ACDEFGHIKLMNPQRSTVWY");
76 assertEquals(hmm.getMatchEmissionProbability(0, 'Y'), 0.16102, 0.001d);
77 assertEquals(hmm.getMatchEmissionProbability(11, 'P'), 0.0130, 0.001d);
78 assertEquals(hmm.getMatchEmissionProbability(24, 'I'), 0.02583, 0.001d);
79 assertEquals(hmm.getMatchEmissionProbability(83, 'C'), 0.008549,
81 assertEquals(hmm.getMatchEmissionProbability(332, 'E'), 0.07998,
83 assertEquals(hmm.getMatchEmissionProbability(381, 'D'), 0.014465,
85 assertEquals(hmm.getMatchEmissionProbability(475, 'Y'), 0.02213,
88 assertEquals(hmm.getInsertEmissionProbability(1, 'C'), 0.012, 0.001d);
89 assertEquals(hmm.getInsertEmissionProbability(14, 'H'), 0.02411,
91 assertEquals(hmm.getInsertEmissionProbability(23, 'L'), 0.06764,
93 assertEquals(hmm.getInsertEmissionProbability(90, 'D'), 0.0623, 0.001d);
94 assertEquals(hmm.getInsertEmissionProbability(374, 'T'), 0.0623,
96 assertEquals(hmm.getInsertEmissionProbability(470, 'P'), 0.0647,
99 assertEquals(hmm.getStateTransitionProbability(2, 6), 0.3848, 0.001d);
100 assertEquals(hmm.getStateTransitionProbability(38, 3), 0.5382, 0.001d);
101 assertEquals(hmm.getStateTransitionProbability(305, 3), 0.2916, 0.001d);
102 assertEquals(hmm.getStateTransitionProbability(380, 0), 0.99, 0.001d);
103 assertEquals(hmm.getStateTransitionProbability(453, 1), 0.0066, 0.001d);
105 assertEquals(hmm.getNodeAlignmentColumn(3).intValue(), 2);
106 assertEquals(hmm.getReferenceAnnotation(7), '-');
107 assertEquals(hmm.getConsensusResidue(23), 't');
108 assertEquals(hmm.getMaskedValue(30), '-');
109 assertEquals(hmm.getConsensusStructure(56), 'S');
111 assertEquals(hmm.getNodeAlignmentColumn(78).intValue(), 135);
112 assertEquals(hmm.getReferenceAnnotation(93), '-');
113 assertEquals(hmm.getConsensusResidue(145), 'a');
114 assertEquals(hmm.getMaskedValue(183), '-');
115 assertEquals(hmm.getConsensusStructure(240), 'H');
119 * Test that Jalview can parse an HMM file even with a bunch of 'mandatory'
120 * fields missing (including no MAP annotation or // terminator line)
122 * @throws IOException
124 @Test(groups = "Functional")
125 public void testParse_minimalFile() throws IOException
128 * ALPH is absent, alphabet inferred from HMM header line
129 * Optional COMPO line is absent
130 * first line after HMM is a guide line for readability
131 * next line is BEGIN node insert emissions
132 * next line is BEGIN node transitions
133 * next line is first sequence node match emissions 1.1 1.2 1.3
134 * next line is first sequence node insert emissions 1.4 1.5 1.6
135 * last line is first sequence node transitions
141 // both spec and parser require a line after the HMM line
142 " m->m m->i m->d i->m i->i d->m d->d\n" +
144 " 0.4 0.5 0.6 0.7 0.8 0.9 0.95\n" +
145 " 1 1.1 1.2 1.3 - - - - -\n" +
147 " 1.7 1.8 1.9 2.0 2.1 2.2 2.3\n" +
148 " 2 1.01 1.02 1.03 - - - - -\n" +
149 " 1.04 1.05 1.06\n" +
150 " 1.7 1.8 1.9 2.0 2.1 2.2 2.3\n";
152 HMMFile parser = new HMMFile(hmmData, DataSourceType.PASTE);
153 HiddenMarkovModel hmm = parser.getHMM();
155 assertEquals(hmm.getSymbols(), "PMJ");
156 assertEquals(hmm.getLength(), 0); // no LENG property :-(
158 // node 1 (implicitly mapped to column 0)
159 double prob = hmm.getMatchEmissionProbability(0, 'p');
160 assertEquals(prob, Math.pow(Math.E, -1.1));
161 prob = hmm.getInsertEmissionProbability(0, 'J');
162 assertEquals(prob, Math.pow(Math.E, -1.6));
164 // node 2 (implicitly mapped to column 1)
165 prob = hmm.getMatchEmissionProbability(1, 'M');
166 assertEquals(prob, Math.pow(Math.E, -1.02));
167 prob = hmm.getInsertEmissionProbability(1, 'm');
168 assertEquals(prob, Math.pow(Math.E, -1.05));
171 @Test(groups = "Functional")
172 public void testParseHeaderLines_amino() throws IOException
174 FileReader fr = new FileReader(
175 new File("test/jalview/io/test_fn3_hmm.txt"));
176 BufferedReader br = new BufferedReader(fr);
177 HiddenMarkovModel hmm = new HiddenMarkovModel();
178 HMMFile testee = new HMMFile();
179 PA.setValue(testee, "hmm", hmm);
180 testee.parseHeaderLines(br);
184 assertEquals(hmm.getName(), "fn3");
185 assertEquals(hmm.getProperty(HMMFile.ACCESSION_NUMBER), "PF00041.13");
186 assertEquals(hmm.getProperty(HMMFile.DESCRIPTION),
187 "Fibronectin type III domain");
188 assertEquals(hmm.getProperty(HMMFile.LENGTH), "86");
189 assertNull(hmm.getProperty(HMMFile.MAX_LENGTH));
190 assertEquals(hmm.getAlphabetType(), "amino");
191 assertFalse(hmm.getBooleanProperty(HMMFile.REFERENCE_ANNOTATION));
192 assertFalse(hmm.getBooleanProperty(HMMFile.MASKED_VALUE));
193 assertTrue(hmm.getBooleanProperty(HMMFile.CONSENSUS_RESIDUE));
194 assertTrue(hmm.getBooleanProperty(HMMFile.CONSENSUS_STRUCTURE));
196 assertTrue(hmm.getBooleanProperty(HMMFile.MAP));
197 assertEquals(hmm.getProperty(HMMFile.DATE), "Fri Jun 20 08:22:31 2014");
198 assertNull(hmm.getProperty(HMMFile.COMMAND_LOG));
199 assertEquals(hmm.getProperty(HMMFile.NUMBER_OF_SEQUENCES), "106");
200 assertEquals(hmm.getProperty(HMMFile.EFF_NUMBER_OF_SEQUENCES),
202 assertEquals(hmm.getProperty(HMMFile.CHECK_SUM), "3564431818");
203 assertEquals(hmm.getProperty(HMMFile.GATHERING_THRESHOLD), "8.00 7.20");
204 assertEquals(hmm.getProperty(HMMFile.TRUSTED_CUTOFF), "8.00 7.20");
205 assertEquals(hmm.getProperty(HMMFile.NOISE_CUTOFF), "7.90 7.90");
206 assertEquals(hmm.getViterbi(), "-9.7737 0.71847");
207 assertEquals(hmm.getMSV(), "-9.4043 0.71847");
208 assertEquals(hmm.getForward(), "-3.8341 0.71847");
211 @Test(groups = "Functional")
212 public void testParseHeaderLines_dna() throws IOException
214 FileReader fr = new FileReader(
215 new File("test/jalview/io/test_MADE1_hmm.txt"));
216 BufferedReader br = new BufferedReader(fr);
217 HiddenMarkovModel hmm = new HiddenMarkovModel();
218 HMMFile testee = new HMMFile();
219 PA.setValue(testee, "hmm", hmm);
220 testee.parseHeaderLines(br);
224 assertEquals(hmm.getName(), "MADE1");
225 assertEquals(hmm.getProperty(HMMFile.ACCESSION_NUMBER),
227 assertEquals(hmm.getProperty(HMMFile.DESCRIPTION),
228 "MADE1 (MAriner Derived Element 1), a TcMar-Mariner DNA transposon");
229 assertEquals(hmm.getProperty(HMMFile.LENGTH), "80");
230 assertEquals(hmm.getProperty(HMMFile.MAX_LENGTH), "426");
231 assertEquals(hmm.getAlphabetType(), "DNA");
232 assertTrue(hmm.getBooleanProperty(HMMFile.REFERENCE_ANNOTATION));
233 assertFalse(hmm.getBooleanProperty(HMMFile.MASKED_VALUE));
234 assertTrue(hmm.getBooleanProperty(HMMFile.CONSENSUS_RESIDUE));
235 assertFalse(hmm.getBooleanProperty(HMMFile.CONSENSUS_STRUCTURE));
236 assertTrue(hmm.getBooleanProperty(HMMFile.MAP));
237 assertEquals(hmm.getProperty(HMMFile.DATE), "Tue Feb 19 20:33:41 2013");
238 assertNull(hmm.getProperty(HMMFile.COMMAND_LOG));
239 assertEquals(hmm.getProperty(HMMFile.NUMBER_OF_SEQUENCES), "1997");
240 assertEquals(hmm.getProperty(HMMFile.EFF_NUMBER_OF_SEQUENCES), "3.911818");
241 assertEquals(hmm.getProperty(HMMFile.CHECK_SUM), "3015610723");
242 assertEquals(hmm.getProperty(HMMFile.GATHERING_THRESHOLD),
244 assertEquals(hmm.getProperty(HMMFile.TRUSTED_CUTOFF), "2.343 1.212");
245 assertEquals(hmm.getProperty(HMMFile.NOISE_CUTOFF), "2.354 5.456");
246 assertEquals(hmm.getViterbi(), "-9.3632 0.71858");
247 assertEquals(hmm.getMSV(), "-8.5786 0.71858");
248 assertEquals(hmm.getForward(), "-3.4823 0.71858");
251 @Test(groups = "Functional")
252 public void testFillList() throws IOException
254 Scanner scanner1 = new Scanner("1.3 2.4 5.3 3.9 9.8 4.7 4.3 2.3 6.9");
255 ArrayList<Double> filledArray = new ArrayList<>();
257 filledArray.add(0.27253);
258 filledArray.add(0.0907);
259 filledArray.add(0.00499);
260 filledArray.add(0.02024);
261 filledArray.add(0.00005);
262 filledArray.add(0.00909);
263 filledArray.add(0.01357);
264 filledArray.add(0.10026);
265 filledArray.add(0.001);
267 double[] testList = HMMFile.parseDoubles(scanner1, 9);
269 for (int i = 0; i < 9; i++)
271 assertEquals(testList[i], filledArray.get(i), 0.001d);
277 Scanner scanner2 = new Scanner(
278 "1.346 5.554 35.345 5.64 1.4");
279 filledArray.add(0.2603);
280 filledArray.add(0.00387);
282 filledArray.add(0.00355);
283 filledArray.add(0.2466);
285 testList = HMMFile.parseDoubles(scanner2, 5);
287 for (int i = 0; i < 5; i++)
289 assertEquals(testList[i], filledArray.get(i), 0.001d);
293 @Test(groups = "Functional")
294 public void testParseModel() throws IOException
296 FileReader fr = new FileReader(
297 new File("test/jalview/io/test_MADE1_hmm.txt"));
298 BufferedReader br = new BufferedReader(fr);
299 HiddenMarkovModel testHMM = new HiddenMarkovModel();
303 line = br.readLine(); // skip header lines up to HMM plus one
304 } while (!line.startsWith("HMM "));
307 made1.parseModel(br);
308 testHMM = made1.getHMM();
313 assertEquals(testHMM.getMatchEmissionProbability(1, 'C'), 0.09267,
315 assertEquals(testHMM.getMatchEmissionProbability(25, 'G'), 0.07327,
317 assertEquals(testHMM.getMatchEmissionProbability(1092, 'C'), 0.04184,
319 assertEquals(testHMM.getMatchEmissionProbability(1107, 'G'), 0.07,
322 assertEquals(testHMM.getInsertEmissionProbability(0, 'G'), 0.25,
324 assertEquals(testHMM.getInsertEmissionProbability(247, 'T'), 0.2776,
326 assertEquals(testHMM.getInsertEmissionProbability(1096, 'T'), 0.25,
328 assertEquals(testHMM.getInsertEmissionProbability(1111, 'T'), 0.25,
331 assertEquals(testHMM.getStateTransitionProbability(1, 0), 0.9634,
333 assertEquals(testHMM.getStateTransitionProbability(5, 1), 0.0203,
335 assertEquals(testHMM.getStateTransitionProbability(14, 3), 0.2515,
337 assertEquals(testHMM.getStateTransitionProbability(65, 4), 0.78808,
339 assertEquals(testHMM.getStateTransitionProbability(1080, 2), 0.01845,
341 assertEquals(testHMM.getStateTransitionProbability(1111, 6),
342 Double.NEGATIVE_INFINITY);
345 @Test(groups = "Functional")
346 public void testParseAnnotations()
348 HMMFile testFile = new HMMFile();
349 HiddenMarkovModel hmm = new HiddenMarkovModel();
350 PA.setValue(testFile, "hmm", hmm);
351 hmm.addNode(new HMMNode());
353 hmm.setProperty(HMMFile.CONSENSUS_RESIDUE, "yes");
354 hmm.setProperty(HMMFile.MAP, "yes");
355 hmm.setProperty(HMMFile.REFERENCE_ANNOTATION, "yes");
356 hmm.setProperty(HMMFile.CONSENSUS_STRUCTURE, "yes");
357 hmm.setProperty(HMMFile.MASKED_VALUE, "yes");
358 Scanner scanner = new Scanner("1345 t t t t");
359 HMMNode node = new HMMNode();
361 testFile.parseAnnotations(scanner, node);
363 hmm.setProperty(HMMFile.CONSENSUS_RESIDUE, "yes");
364 hmm.setProperty(HMMFile.MAP, "no");
365 hmm.setProperty(HMMFile.REFERENCE_ANNOTATION, "yes");
366 hmm.setProperty(HMMFile.CONSENSUS_STRUCTURE, "no");
367 hmm.setProperty(HMMFile.MASKED_VALUE, "no");
368 Scanner scanner2 = new Scanner("- y x - -");
369 node = new HMMNode();
371 testFile.parseAnnotations(scanner2, node);
373 assertEquals(hmm.getNodeAlignmentColumn(1).intValue(), 1344);
374 assertEquals(hmm.getConsensusResidue(1), 't');
375 assertEquals(hmm.getReferenceAnnotation(1), 't');
376 assertEquals(hmm.getMaskedValue(1), 't');
377 assertEquals(hmm.getConsensusStructure(1), 't');
383 * tests to see if file produced by the output matches the file from the input
385 * @throws IOException
387 @Test(groups = "Functional")
388 public void testPrint() throws IOException
390 PrintWriter writer = new PrintWriter(
391 "test/jalview/io/test_export_hmm.txt");
392 String output = pKinase.print();
393 writer.print(output);
395 HMMFile pKinaseClone = new HMMFile(
396 new FileParse("test/jalview/io/test_export_hmm.txt",
397 DataSourceType.FILE));
398 HiddenMarkovModel pKinaseHMM = new HiddenMarkovModel();
399 HiddenMarkovModel pKinaseCloneHMM = new HiddenMarkovModel();
400 pKinaseHMM = pKinase.getHMM();
401 pKinaseCloneHMM = pKinaseClone.getHMM();
403 for (int i = 0; i < pKinaseHMM.getLength(); i++)
408 list1 = pKinaseHMM.getNode(i).getMatchEmissions();
409 list2 = pKinaseCloneHMM.getNode(i).getMatchEmissions();
411 assertEquals(list1, list2);
413 list1 = pKinaseHMM.getNode(i).getInsertEmissions();
414 list2 = pKinaseCloneHMM.getNode(i).getInsertEmissions();
416 assertEquals(list1, list2);
418 list1 = pKinaseHMM.getNode(i).getStateTransitions();
419 list2 = pKinaseCloneHMM.getNode(i).getStateTransitions();
421 assertEquals(list1, list2);
428 alignColumn1 = pKinaseHMM.getNodeAlignmentColumn(i);
429 alignColumn2 = pKinaseCloneHMM.getNodeAlignmentColumn(i);
431 assertEquals(alignColumn1, alignColumn2);
436 annotation1 = pKinaseHMM.getReferenceAnnotation(i);
437 annotation2 = pKinaseCloneHMM.getReferenceAnnotation(i);
439 assertEquals(annotation1, annotation2);
441 annotation1 = pKinaseHMM.getConsensusResidue(i);
442 annotation2 = pKinaseCloneHMM.getConsensusResidue(i);
444 assertEquals(annotation1, annotation2);
449 @Test(groups = "Functional")
450 public void testAppendProperties() throws FileNotFoundException
452 StringBuilder sb = new StringBuilder();
453 fn3.appendProperties(sb);
455 Scanner testScanner = new Scanner(sb.toString());
457 String[] expected = new String[] { "HMMER3/f [3.1b1 | May 2013]",
458 "NAME fn3", "ACC PF00041.13",
459 "DESC Fibronectin type III domain", "LENG 86", "ALPH amino",
460 "RF no", "MM no", "CONS yes", "CS yes", "MAP yes",
461 "DATE Fri Jun 20 08:22:31 2014", "NSEQ 106", "EFFN 11.415833",
462 "CKSUM 3564431818", "GA 8.00 7.20", "TC 8.00 7.20",
463 "NC 7.90 7.90", "STATS LOCAL MSV -9.4043 0.71847",
464 "STATS LOCAL VITERBI -9.7737 0.71847",
465 "STATS LOCAL FORWARD -3.8341 0.71847" };
467 for (String value : expected)
469 assertEquals(testScanner.nextLine(), value);
475 @Test(groups = "Functional")
476 public void testAppendModelAsString() throws FileNotFoundException
478 StringBuilder sb = new StringBuilder();
479 fn3.appendModelAsString(sb);
480 String string = sb.toString();
482 assertEquals(findValue(2, 2, 2, string), "4.42225");
483 assertEquals(findValue(12, 14, 1, string), "2.79307");
484 assertEquals(findValue(6, 24, 3, string), "0.48576");
485 assertEquals(findValue(19, 33, 2, string), "4.58477");
486 assertEquals(findValue(20, 64, 2, string), "3.61505");
487 assertEquals(findValue(3, 72, 3, string), "6.81068");
488 assertEquals(findValue(10, 80, 2, string), "2.69355");
489 assertEquals(findValue(16, 65, 1, string), "2.81003");
490 assertEquals(findValue(14, 3, 1, string), "2.69012");
491 assertEquals(findValue(11, 32, 1, string), "4.34805");
495 * A helper method to find a token in the model string
498 * index of symbol being searched. First symbol has index 1.
500 * index of node being searched. Begin node has index 0. First node
503 * index of line being searched in node. First line has index 1.
505 * string model being searched
506 * @return value at specified position
508 private String findValue(int symbolIndex, int nodeIndex, int line,
512 Scanner scanner = new Scanner(model);
516 for (int lineIndex = 0; lineIndex < line - 1; lineIndex++)
520 for (int node = 0; node < nodeIndex; node++)
527 for (int symbol = 0; symbol < symbolIndex; symbol++)
529 value = scanner.next();
530 if ("COMPO".equals(value))
534 else if (value.length() < 7)