3 import static org.testng.Assert.assertEquals;
4 import static org.testng.Assert.assertFalse;
5 import static org.testng.Assert.assertNull;
6 import static org.testng.Assert.assertTrue;
7 import static org.testng.Assert.fail;
9 import jalview.datamodel.HMMNode;
10 import jalview.datamodel.HiddenMarkovModel;
12 import java.io.BufferedReader;
14 import java.io.FileNotFoundException;
15 import java.io.FileReader;
16 import java.io.IOException;
17 import java.io.PrintWriter;
18 import java.util.ArrayList;
19 import java.util.Scanner;
21 import org.testng.annotations.BeforeClass;
22 import org.testng.annotations.Test;
24 import junit.extensions.PA;
26 public class HMMFileTest {
34 @BeforeClass(alwaysRun = true)
35 public void setUp() throws IOException
37 fn3 = new HMMFile("test/jalview/io/test_fn3_hmm.txt",
40 pKinase = new HMMFile("test/jalview/io/test_PKinase_hmm.txt",
43 made1 = new HMMFile("test/jalview/io/test_MADE1_hmm.txt",
47 @Test(groups = "Functional")
48 public void testParse() throws IOException
50 HiddenMarkovModel hmm = pKinase.getHMM();
51 assertEquals(hmm.getName(), "Pkinase");
52 assertEquals(hmm.getProperty(HMMFile.ACCESSION_NUMBER), "PF00069.17");
53 assertEquals(hmm.getProperty(HMMFile.DESCRIPTION),
54 "Protein kinase domain");
55 assertEquals(hmm.getLength().intValue(), 260);
56 assertNull(hmm.getProperty(HMMFile.MAX_LENGTH));
57 assertEquals(hmm.getAlphabetType(), "amino");
58 assertFalse(hmm.getBooleanProperty(HMMFile.REFERENCE_ANNOTATION));
59 assertFalse(hmm.getBooleanProperty(HMMFile.MASKED_VALUE));
60 assertTrue(hmm.getBooleanProperty(HMMFile.CONSENSUS_RESIDUE));
61 assertTrue(hmm.getBooleanProperty(HMMFile.CONSENSUS_STRUCTURE));
62 assertTrue(hmm.getBooleanProperty(HMMFile.MAP));
63 assertEquals(hmm.getProperty(HMMFile.DATE), "Thu Jun 16 11:44:06 2011");
64 assertNull(hmm.getProperty(HMMFile.COMMAND_LOG));
65 assertEquals(hmm.getProperty(HMMFile.NUMBER_OF_SEQUENCES), "54");
66 assertEquals(hmm.getProperty(HMMFile.EFF_NUMBER_OF_SEQUENCES),
68 assertEquals(hmm.getProperty(HMMFile.CHECK_SUM), "3106786190");
69 assertEquals(hmm.getProperty(HMMFile.GATHERING_THRESHOLD),
71 assertEquals(hmm.getProperty(HMMFile.TRUSTED_CUTOFF), "70.30 70.30");
72 assertEquals(hmm.getProperty(HMMFile.NOISE_CUTOFF), "70.20 70.20");
74 assertEquals(hmm.getSymbols(), "ACDEFGHIKLMNPQRSTVWY");
76 assertEquals(hmm.getMatchEmissionProbability(0, 'Y'), 0.16102, 0.001d);
77 assertEquals(hmm.getMatchEmissionProbability(11, 'P'), 0.0130, 0.001d);
78 assertEquals(hmm.getMatchEmissionProbability(24, 'I'), 0.02583, 0.001d);
79 assertEquals(hmm.getMatchEmissionProbability(83, 'C'), 0.008549,
81 assertEquals(hmm.getMatchEmissionProbability(332, 'E'), 0.07998,
83 assertEquals(hmm.getMatchEmissionProbability(381, 'D'), 0.014465,
85 assertEquals(hmm.getMatchEmissionProbability(475, 'Y'), 0.02213,
88 assertEquals(hmm.getInsertEmissionProbability(1, 'C'), 0.012, 0.001d);
89 assertEquals(hmm.getInsertEmissionProbability(14, 'H'), 0.02411,
91 assertEquals(hmm.getInsertEmissionProbability(23, 'L'), 0.06764,
93 assertEquals(hmm.getInsertEmissionProbability(90, 'D'), 0.0623, 0.001d);
94 assertEquals(hmm.getInsertEmissionProbability(374, 'T'), 0.0623,
96 assertEquals(hmm.getInsertEmissionProbability(470, 'P'), 0.0647,
99 assertEquals(hmm.getStateTransitionProbability(2, 6), 0.3848, 0.001d);
100 assertEquals(hmm.getStateTransitionProbability(38, 3), 0.5382, 0.001d);
101 assertEquals(hmm.getStateTransitionProbability(305, 3), 0.2916, 0.001d);
102 assertEquals(hmm.getStateTransitionProbability(380, 0), 0.99, 0.001d);
103 assertEquals(hmm.getStateTransitionProbability(453, 1), 0.0066, 0.001d);
105 assertEquals(hmm.getNodeAlignmentColumn(3).intValue(), 2);
106 assertEquals(hmm.getReferenceAnnotation(7), '-');
107 assertEquals(hmm.getConsensusResidue(23), 't');
108 assertEquals(hmm.getMaskedValue(30), '-');
109 assertEquals(hmm.getConsensusStructure(56), 'S');
111 assertEquals(hmm.getNodeAlignmentColumn(78).intValue(), 135);
112 assertEquals(hmm.getReferenceAnnotation(93), '-');
113 assertEquals(hmm.getConsensusResidue(145), 'a');
114 assertEquals(hmm.getMaskedValue(183), '-');
115 assertEquals(hmm.getConsensusStructure(240), 'H');
118 @Test(groups = "Functional")
119 public void testParseHeaderLines_amino() throws IOException
121 FileReader fr = new FileReader(
122 new File("test/jalview/io/test_fn3_hmm.txt"));
123 BufferedReader br = new BufferedReader(fr);
124 HiddenMarkovModel hmm = new HiddenMarkovModel();
125 HMMFile testee = new HMMFile();
126 PA.setValue(testee, "hmm", hmm);
127 testee.parseHeaderLines(br);
131 assertEquals(hmm.getName(), "fn3");
132 assertEquals(hmm.getProperty(HMMFile.ACCESSION_NUMBER), "PF00041.13");
133 assertEquals(hmm.getProperty(HMMFile.DESCRIPTION),
134 "Fibronectin type III domain");
135 assertEquals(hmm.getProperty(HMMFile.LENGTH), "86");
136 assertNull(hmm.getProperty(HMMFile.MAX_LENGTH));
137 assertEquals(hmm.getAlphabetType(), "amino");
138 assertFalse(hmm.getBooleanProperty(HMMFile.REFERENCE_ANNOTATION));
139 assertFalse(hmm.getBooleanProperty(HMMFile.MASKED_VALUE));
140 assertTrue(hmm.getBooleanProperty(HMMFile.CONSENSUS_RESIDUE));
141 assertTrue(hmm.getBooleanProperty(HMMFile.CONSENSUS_STRUCTURE));
143 assertTrue(hmm.getBooleanProperty(HMMFile.MAP));
144 assertEquals(hmm.getProperty(HMMFile.DATE), "Fri Jun 20 08:22:31 2014");
145 assertNull(hmm.getProperty(HMMFile.COMMAND_LOG));
146 assertEquals(hmm.getProperty(HMMFile.NUMBER_OF_SEQUENCES), "106");
147 assertEquals(hmm.getProperty(HMMFile.EFF_NUMBER_OF_SEQUENCES),
149 assertEquals(hmm.getProperty(HMMFile.CHECK_SUM), "3564431818");
150 assertEquals(hmm.getProperty(HMMFile.GATHERING_THRESHOLD), "8.00 7.20");
151 assertEquals(hmm.getProperty(HMMFile.TRUSTED_CUTOFF), "8.00 7.20");
152 assertEquals(hmm.getProperty(HMMFile.NOISE_CUTOFF), "7.90 7.90");
153 assertEquals(hmm.getViterbi(), "-9.7737 0.71847");
154 assertEquals(hmm.getMSV(), "-9.4043 0.71847");
155 assertEquals(hmm.getForward(), "-3.8341 0.71847");
158 @Test(groups = "Functional")
159 public void testParseHeaderLines_dna() throws IOException
161 FileReader fr = new FileReader(
162 new File("test/jalview/io/test_MADE1_hmm.txt"));
163 BufferedReader br = new BufferedReader(fr);
164 HiddenMarkovModel hmm = new HiddenMarkovModel();
165 HMMFile testee = new HMMFile();
166 PA.setValue(testee, "hmm", hmm);
167 testee.parseHeaderLines(br);
171 assertEquals(hmm.getName(), "MADE1");
172 assertEquals(hmm.getProperty(HMMFile.ACCESSION_NUMBER),
174 assertEquals(hmm.getProperty(HMMFile.DESCRIPTION),
175 "MADE1 (MAriner Derived Element 1), a TcMar-Mariner DNA transposon");
176 assertEquals(hmm.getProperty(HMMFile.LENGTH), "80");
177 assertEquals(hmm.getProperty(HMMFile.MAX_LENGTH), "426");
178 assertEquals(hmm.getAlphabetType(), "DNA");
179 assertTrue(hmm.getBooleanProperty(HMMFile.REFERENCE_ANNOTATION));
180 assertFalse(hmm.getBooleanProperty(HMMFile.MASKED_VALUE));
181 assertTrue(hmm.getBooleanProperty(HMMFile.CONSENSUS_RESIDUE));
182 assertFalse(hmm.getBooleanProperty(HMMFile.CONSENSUS_STRUCTURE));
183 assertTrue(hmm.getBooleanProperty(HMMFile.MAP));
184 assertEquals(hmm.getProperty(HMMFile.DATE), "Tue Feb 19 20:33:41 2013");
185 assertNull(hmm.getProperty(HMMFile.COMMAND_LOG));
186 assertEquals(hmm.getProperty(HMMFile.NUMBER_OF_SEQUENCES), "1997");
187 assertEquals(hmm.getProperty(HMMFile.EFF_NUMBER_OF_SEQUENCES), "3.911818");
188 assertEquals(hmm.getProperty(HMMFile.CHECK_SUM), "3015610723");
189 assertEquals(hmm.getProperty(HMMFile.GATHERING_THRESHOLD),
191 assertEquals(hmm.getProperty(HMMFile.TRUSTED_CUTOFF), "2.343 1.212");
192 assertEquals(hmm.getProperty(HMMFile.NOISE_CUTOFF), "2.354 5.456");
193 assertEquals(hmm.getViterbi(), "-9.3632 0.71858");
194 assertEquals(hmm.getMSV(), "-8.5786 0.71858");
195 assertEquals(hmm.getForward(), "-3.4823 0.71858");
198 @Test(groups = "Functional")
199 public void testFillList() throws IOException
201 Scanner scanner1 = new Scanner("1.3 2.4 5.3 3.9 9.8 4.7 4.3 2.3 6.9");
202 ArrayList<Double> filledArray = new ArrayList<>();
204 filledArray.add(0.27253);
205 filledArray.add(0.0907);
206 filledArray.add(0.00499);
207 filledArray.add(0.02024);
208 filledArray.add(0.00005);
209 filledArray.add(0.00909);
210 filledArray.add(0.01357);
211 filledArray.add(0.10026);
212 filledArray.add(0.001);
214 double[] testList = HMMFile.parseDoubles(scanner1, 9);
216 for (int i = 0; i < 9; i++)
218 assertEquals(testList[i], filledArray.get(i), 0.001d);
224 Scanner scanner2 = new Scanner(
225 "1.346 5.554 35.345 5.64 1.4");
226 filledArray.add(0.2603);
227 filledArray.add(0.00387);
229 filledArray.add(0.00355);
230 filledArray.add(0.2466);
232 testList = HMMFile.parseDoubles(scanner2, 5);
234 for (int i = 0; i < 5; i++)
236 assertEquals(testList[i], filledArray.get(i), 0.001d);
240 @Test(groups = "Functional")
241 public void testParseModel() throws IOException
243 FileReader fr = new FileReader(
244 new File("test/jalview/io/test_MADE1_hmm.txt"));
245 BufferedReader br = new BufferedReader(fr);
246 HiddenMarkovModel testHMM = new HiddenMarkovModel();
247 for (int i = 0; i < 24; i++)
252 made1.parseModel(br);
253 testHMM = made1.getHMM();
258 assertEquals(testHMM.getMatchEmissionProbability(1, 'C'), 0.09267,
260 assertEquals(testHMM.getMatchEmissionProbability(25, 'G'), 0.07327,
262 assertEquals(testHMM.getMatchEmissionProbability(1092, 'C'), 0.04184,
264 assertEquals(testHMM.getMatchEmissionProbability(1107, 'G'), 0.07,
267 assertEquals(testHMM.getInsertEmissionProbability(0, 'G'), 0.25,
269 assertEquals(testHMM.getInsertEmissionProbability(247, 'T'), 0.2776,
271 assertEquals(testHMM.getInsertEmissionProbability(1096, 'T'), 0.25,
273 assertEquals(testHMM.getInsertEmissionProbability(1111, 'T'), 0.25,
276 assertEquals(testHMM.getStateTransitionProbability(1, 0), 0.9634,
278 assertEquals(testHMM.getStateTransitionProbability(5, 1), 0.0203,
280 assertEquals(testHMM.getStateTransitionProbability(14, 3), 0.2515,
282 assertEquals(testHMM.getStateTransitionProbability(65, 4), 0.78808,
284 assertEquals(testHMM.getStateTransitionProbability(1080, 2), 0.01845,
286 assertEquals(testHMM.getStateTransitionProbability(1111, 6),
287 Double.NEGATIVE_INFINITY);
291 * Test that if no mapping of nodes to aligned columns is provided by the HMM
292 * file, we construct one
294 * @throws IOException
296 @Test(groups = "Functional")
297 public void testParseModel_noMap() throws IOException
299 fail("test to be written");
302 @Test(groups = "Functional")
303 public void testParseAnnotations()
305 HMMFile testFile = new HMMFile();
306 HiddenMarkovModel hmm = new HiddenMarkovModel();
307 PA.setValue(testFile, "hmm", hmm);
308 hmm.addNode(new HMMNode());
310 hmm.setProperty(HMMFile.CONSENSUS_RESIDUE, "yes");
311 hmm.setProperty(HMMFile.MAP, "yes");
312 hmm.setProperty(HMMFile.REFERENCE_ANNOTATION, "yes");
313 hmm.setProperty(HMMFile.CONSENSUS_STRUCTURE, "yes");
314 hmm.setProperty(HMMFile.MASKED_VALUE, "yes");
315 Scanner scanner = new Scanner("1345 t t t t");
316 HMMNode node = new HMMNode();
318 testFile.parseAnnotations(scanner, node);
320 hmm.setProperty(HMMFile.CONSENSUS_RESIDUE, "yes");
321 hmm.setProperty(HMMFile.MAP, "no");
322 hmm.setProperty(HMMFile.REFERENCE_ANNOTATION, "yes");
323 hmm.setProperty(HMMFile.CONSENSUS_STRUCTURE, "no");
324 hmm.setProperty(HMMFile.MASKED_VALUE, "no");
325 Scanner scanner2 = new Scanner("- y x - -");
326 node = new HMMNode();
328 testFile.parseAnnotations(scanner2, node);
330 assertEquals(hmm.getNodeAlignmentColumn(1).intValue(), 1344);
331 assertEquals(hmm.getConsensusResidue(1), 't');
332 assertEquals(hmm.getReferenceAnnotation(1), 't');
333 assertEquals(hmm.getMaskedValue(1), 't');
334 assertEquals(hmm.getConsensusStructure(1), 't');
340 * tests to see if file produced by the output matches the file from the input
342 * @throws IOException
344 @Test(groups = "Functional")
345 public void testPrint() throws IOException
347 PrintWriter writer = new PrintWriter(
348 "test/jalview/io/test_export_hmm.txt");
349 String output = pKinase.print();
350 writer.print(output);
352 HMMFile pKinaseClone = new HMMFile(
353 new FileParse("test/jalview/io/test_export_hmm.txt",
354 DataSourceType.FILE));
355 HiddenMarkovModel pKinaseHMM = new HiddenMarkovModel();
356 HiddenMarkovModel pKinaseCloneHMM = new HiddenMarkovModel();
357 pKinaseHMM = pKinase.getHMM();
358 pKinaseCloneHMM = pKinaseClone.getHMM();
360 for (int i = 0; i < pKinaseHMM.getLength(); i++)
365 list1 = pKinaseHMM.getNode(i).getMatchEmissions();
366 list2 = pKinaseCloneHMM.getNode(i).getMatchEmissions();
368 assertEquals(list1, list2);
370 list1 = pKinaseHMM.getNode(i).getInsertEmissions();
371 list2 = pKinaseCloneHMM.getNode(i).getInsertEmissions();
373 assertEquals(list1, list2);
375 list1 = pKinaseHMM.getNode(i).getStateTransitions();
376 list2 = pKinaseCloneHMM.getNode(i).getStateTransitions();
378 assertEquals(list1, list2);
385 alignColumn1 = pKinaseHMM.getNodeAlignmentColumn(i);
386 alignColumn2 = pKinaseCloneHMM.getNodeAlignmentColumn(i);
388 assertEquals(alignColumn1, alignColumn2);
393 annotation1 = pKinaseHMM.getReferenceAnnotation(i);
394 annotation2 = pKinaseCloneHMM.getReferenceAnnotation(i);
396 assertEquals(annotation1, annotation2);
398 annotation1 = pKinaseHMM.getConsensusResidue(i);
399 annotation2 = pKinaseCloneHMM.getConsensusResidue(i);
401 assertEquals(annotation1, annotation2);
406 @Test(groups = "Functional")
407 public void testAppendProperties() throws FileNotFoundException
409 StringBuilder sb = new StringBuilder();
410 fn3.appendProperties(sb);
412 Scanner testScanner = new Scanner(sb.toString());
414 String[] expected = new String[] { "HMMER3/f [3.1b1 | May 2013]",
415 "NAME fn3", "ACC PF00041.13",
416 "DESC Fibronectin type III domain", "LENG 86", "ALPH amino",
417 "RF no", "MM no", "CONS yes", "CS yes", "MAP yes",
418 "DATE Fri Jun 20 08:22:31 2014", "NSEQ 106", "EFFN 11.415833",
419 "CKSUM 3564431818", "GA 8.00 7.20", "TC 8.00 7.20",
420 "NC 7.90 7.90", "STATS LOCAL MSV -9.4043 0.71847",
421 "STATS LOCAL VITERBI -9.7737 0.71847",
422 "STATS LOCAL FORWARD -3.8341 0.71847" };
424 for (String value : expected)
426 assertEquals(testScanner.nextLine(), value);
432 @Test(groups = "Functional")
433 public void testAppendModelAsString() throws FileNotFoundException
435 StringBuilder sb = new StringBuilder();
436 fn3.appendModelAsString(sb);
437 String string = sb.toString();
439 assertEquals(findValue(2, 2, 2, string), "4.42225");
440 assertEquals(findValue(12, 14, 1, string), "2.79307");
441 assertEquals(findValue(6, 24, 3, string), "0.48576");
442 assertEquals(findValue(19, 33, 2, string), "4.58477");
443 assertEquals(findValue(20, 64, 2, string), "3.61505");
444 assertEquals(findValue(3, 72, 3, string), "6.81068");
445 assertEquals(findValue(10, 80, 2, string), "2.69355");
446 assertEquals(findValue(16, 65, 1, string), "2.81003");
447 assertEquals(findValue(14, 3, 1, string), "2.69012");
448 assertEquals(findValue(11, 32, 1, string), "4.34805");
452 * A helper method to find a token in the model string
455 * index of symbol being searched. First symbol has index 1.
457 * index of node being searched. Begin node has index 0. First node
460 * index of line being searched in node. First line has index 1.
462 * string model being searched
463 * @return value at specified position
465 private String findValue(int symbolIndex, int nodeIndex, int line,
469 Scanner scanner = new Scanner(model);
473 for (int lineIndex = 0; lineIndex < line - 1; lineIndex++)
477 for (int node = 0; node < nodeIndex; node++)
484 for (int symbol = 0; symbol < symbolIndex; symbol++)
486 value = scanner.next();
487 if ("COMPO".equals(value))
491 else if (value.length() < 7)