3 import static org.testng.Assert.assertEquals;
4 import static org.testng.Assert.assertNull;
6 import jalview.datamodel.HMMNode;
7 import jalview.datamodel.HiddenMarkovModel;
9 import java.io.BufferedReader;
11 import java.io.FileReader;
12 import java.io.IOException;
13 import java.util.ArrayList;
14 import java.util.List;
15 import java.util.Scanner;
17 import org.testng.annotations.Test;
19 public class HMMFileTest {
23 HMMFile fn3 = new HMMFile(
24 new FileParse("test/jalview/io/test_fn3_hmm.txt",
25 DataSourceType.FILE));
27 HMMFile pKinase = new HMMFile(
28 new FileParse("test/jalview/io/test_PKinase_hmm.txt",
29 DataSourceType.FILE));
31 HMMFile made1 = new HMMFile(
32 new FileParse("test/jalview/io/test_MADE1_hmm.txt",
33 DataSourceType.FILE));
35 HMMFileTest() throws IOException
44 public void testParse() throws IOException
48 HiddenMarkovModel hmm = pKinase.getHMM();
49 assertEquals(hmm.getName(), "Pkinase");
50 assertEquals(hmm.getAccessionNumber(), "PF00069.17");
51 assertEquals(hmm.getDescription(), "Protein kinase domain");
52 assertEquals(hmm.getLength().intValue(), 260);
53 assertNull(hmm.getMaxInstanceLength());
54 assertEquals(hmm.getAlphabetType(), "amino");
55 assertEquals(hmm.referenceAnnotationIsActive(), false);
56 assertEquals(hmm.maskValueIsActive(), false);
57 assertEquals(hmm.consensusResidueIsActive(), true);
58 assertEquals(hmm.consensusStructureIsActive(),
60 assertEquals(hmm.mapIsActive(), true);
61 assertEquals(hmm.getDate(), "Thu Jun 16 11:44:06 2011");
62 assertNull(hmm.getCommandLineLog());
63 assertEquals(hmm.getNumberOfSequences().intValue(), 54);
64 assertEquals(hmm.getEffectiveNumberOfSequences(), 3.358521, 4d);
65 assertEquals(hmm.getCheckSum().longValue(), 3106786190l);
66 assertEquals(hmm.getGatheringThreshold(), "70.30 70.30");
67 assertEquals(hmm.getTrustedCutoff(), "70.30 70.30");
68 assertEquals(hmm.getNoiseCutoff(), "70.20 70.20");
70 List<Character> symbols = new ArrayList<>();
92 assertEquals(hmm.getSymbols(), symbols);
94 assertEquals(getMatchEmission(0, 19, hmm), 0.032298, 0.001d);
95 assertEquals(getMatchEmission(12, 12, hmm), 0.0130, 0.001d);
96 assertEquals(getMatchEmission(23, 7, hmm), 0.02583, 0.001d);
97 assertEquals(getMatchEmission(54, 1, hmm), 0.008549, 0.001d);
98 assertEquals(getMatchEmission(178, 3, hmm), 0.07998, 0.001d);
99 assertEquals(getMatchEmission(210, 2, hmm), 0.014465, 0.001d);
100 assertEquals(getMatchEmission(260, 19, hmm), 0.02213, 0.001d);
102 assertEquals(getInsertEmission(2, 1, hmm), 0.012, 0.001d);
103 assertEquals(getInsertEmission(15, 6, hmm), 0.02411, 0.001d);
104 assertEquals(getInsertEmission(22, 9, hmm), 0.06764, 0.001d);
105 assertEquals(getInsertEmission(57, 2, hmm), 0.0623, 0.001d);
106 assertEquals(getInsertEmission(203, 16, hmm), 0.0623, 0.001d);
107 assertEquals(getInsertEmission(255, 12, hmm), 0.0647, 0.001d);
109 assertEquals(getStateTransition(0, 6, hmm),
110 Double.NEGATIVE_INFINITY);
111 assertEquals(getStateTransition(3, 6, hmm), 0.3848, 0.001d);
112 assertEquals(getStateTransition(29, 3, hmm), 0.5382, 0.001d);
113 assertEquals(getStateTransition(169, 3, hmm), 0.2916, 0.001d);
114 assertEquals(getStateTransition(209, 0, hmm), 0.99, 0.001d);
115 assertEquals(getStateTransition(243, 1, hmm), 0.0066, 0.001d);
117 assertEquals(hmm.getNodeAlignmentColumn(3).intValue(), 3);
118 assertEquals(hmm.getReferenceAnnotation(7), '-');
119 assertEquals(hmm.getConsensusResidue(23), 't');
120 assertEquals(hmm.getMaskedValue(30), '-');
121 assertEquals(hmm.getConsensusStructure(56), 'S');
123 assertEquals(hmm.getNodeAlignmentColumn(78).intValue(), 136);
124 assertEquals(hmm.getReferenceAnnotation(93), '-');
125 assertEquals(hmm.getConsensusResidue(145), 'a');
126 assertEquals(hmm.getMaskedValue(183), '-');
127 assertEquals(hmm.getConsensusStructure(240), 'H');
132 public void testParseFileProperties() throws IOException
134 FileReader fr = new FileReader(
135 new File("test/jalview/io/test_fn3_hmm.txt"));
136 BufferedReader br = new BufferedReader(fr);
137 fn3.parseFileProperties(br);
138 fn3.parseModel(br); // this is for a later test
139 HiddenMarkovModel testHMM = new HiddenMarkovModel();
140 testHMM = fn3.getHMM();
144 assertEquals(testHMM.getName(), "fn3");
145 assertEquals(testHMM.getAccessionNumber(), "PF00041.13");
146 assertEquals(testHMM.getDescription(),
147 "Fibronectin type III domain");
148 assertEquals(testHMM.getLength().intValue(), 86);
149 assertNull(testHMM.getMaxInstanceLength());
150 assertEquals(testHMM.getAlphabetType(), "amino");
151 assertEquals(testHMM.referenceAnnotationIsActive(), false);
152 assertEquals(testHMM.maskValueIsActive(), false);
153 assertEquals(testHMM.consensusResidueIsActive(), true);
154 assertEquals(testHMM.consensusStructureIsActive(), true);
155 assertEquals(testHMM.mapIsActive(), true);
156 assertEquals(testHMM.getDate(), "Fri Jun 20 08:22:31 2014");
157 assertNull(testHMM.getCommandLineLog());
158 assertEquals(testHMM.getNumberOfSequences().intValue(), 106);
159 assertEquals(testHMM.getEffectiveNumberOfSequences(), 11.415833, 4d);
160 assertEquals(testHMM.getCheckSum().longValue(), 3564431818l);
161 assertEquals(testHMM.getGatheringThreshold(), "8.00 7.20");
162 assertEquals(testHMM.getTrustedCutoff(), "8.00 7.20");
163 assertEquals(testHMM.getNoiseCutoff(), "7.90 7.90");
164 assertEquals(testHMM.getViterbi(), "-9.7737 0.71847");
165 assertEquals(testHMM.getMSV(), "-9.4043 0.71847");
166 assertEquals(testHMM.getForward(), "-3.8341 0.71847");
169 FileReader fr3 = new FileReader(
170 new File("test/jalview/io/test_MADE1_hmm.txt"));
171 BufferedReader br3 = new BufferedReader(fr3);
172 made1.parseFileProperties(br3);
173 testHMM = made1.getHMM();
177 assertEquals(testHMM.getName(), "MADE1");
178 assertEquals(testHMM.getAccessionNumber(), "DF0000629.2");
179 assertEquals(testHMM.getDescription(),
180 "MADE1 (MAriner Derived Element 1), a TcMar-Mariner DNA transposon");
181 assertEquals(testHMM.getLength().intValue(), 80);
182 assertEquals(testHMM.getMaxInstanceLength().intValue(), 426);
183 assertEquals(testHMM.getAlphabetType(), "DNA");
184 assertEquals(testHMM.referenceAnnotationIsActive(), true);
185 assertEquals(testHMM.maskValueIsActive(), false);
186 assertEquals(testHMM.consensusResidueIsActive(), true);
187 assertEquals(testHMM.consensusStructureIsActive(), false);
188 assertEquals(testHMM.mapIsActive(), true);
189 assertEquals(testHMM.getDate(), "Tue Feb 19 20:33:41 2013");
190 assertNull(testHMM.getCommandLineLog());
191 assertEquals(testHMM.getNumberOfSequences().intValue(), 1997);
192 assertEquals(testHMM.getEffectiveNumberOfSequences(), 3.911818, 4d);
193 assertEquals(testHMM.getCheckSum().longValue(), 3015610723l);
194 assertEquals(testHMM.getGatheringThreshold(), "2.324 4.234");
195 assertEquals(testHMM.getTrustedCutoff(), "2.343 1.212");
196 assertEquals(testHMM.getNoiseCutoff(), "2.354 5.456");
197 assertEquals(testHMM.getViterbi(), "-9.3632 0.71858");
198 assertEquals(testHMM.getMSV(), "-8.5786 0.71858");
199 assertEquals(testHMM.getForward(), "-3.4823 0.71858");
205 public void testGetTransitionType()
207 HiddenMarkovModel hmm = fn3.getHMM();
208 assertEquals(hmm.getTransitionType("mm").intValue(), 0);
209 assertEquals(hmm.getTransitionType("mi").intValue(), 1);
210 assertEquals(hmm.getTransitionType("md").intValue(), 2);
211 assertEquals(hmm.getTransitionType("im").intValue(), 3);
212 assertEquals(hmm.getTransitionType("ii").intValue(), 4);
213 assertEquals(hmm.getTransitionType("dm").intValue(), 5);
214 assertEquals(hmm.getTransitionType("dd").intValue(), 6);
215 assertNull(hmm.getTransitionType("df"));
220 public void testFillList()
222 Scanner scanner1 = new Scanner("1.3 2.4 5.3 3.9 9.8 4.7 4.3 2.3 6.9");
223 ArrayList<Double> filledArray = new ArrayList<>();
225 filledArray.add(0.27253);
226 filledArray.add(0.0907);
227 filledArray.add(0.00499);
228 filledArray.add(0.02024);
229 filledArray.add(0.00005);
230 filledArray.add(0.00909);
231 filledArray.add(0.01357);
232 filledArray.add(0.10026);
233 filledArray.add(0.001);
235 List<Double> testList = HMMFile.fillList(scanner1, 9);
237 for (int i = 0; i < 9; i++)
239 assertEquals(testList.get(i), filledArray.get(i), 0.001d);
246 Scanner scanner2 = new Scanner(
247 "1.346 5.554 35.345 5.64 1.4");
248 filledArray.add(0.2603);
249 filledArray.add(0.00387);
251 filledArray.add(0.00355);
252 filledArray.add(0.2466);
254 testList = HMMFile.fillList(scanner2, 5);
256 for (int i = 0; i < 5; i++)
258 assertEquals(testList.get(i), filledArray.get(i), 0.001d);
264 public void testParseModel() throws IOException
266 FileReader fr = new FileReader(
267 new File("test/jalview/io/test_MADE1_hmm.txt"));
268 BufferedReader br = new BufferedReader(fr);
269 HiddenMarkovModel testHMM = new HiddenMarkovModel();
270 for (int i = 0; i < 24; i++)
274 made1.parseModel(br);
275 testHMM = made1.getHMM();
279 assertEquals(getMatchEmission(0, 2, testHMM), 0.1961, 0.001d);
280 assertEquals(getMatchEmission(2, 1, testHMM), 0.09267, 0.001d);
281 assertEquals(getMatchEmission(12, 2, testHMM), 0.07327, 0.001d);
282 assertEquals(getMatchEmission(69, 1, testHMM), 0.04184, 0.001d);
283 assertEquals(getMatchEmission(76, 2, testHMM), 0.07, 0.001d);
285 assertEquals(getInsertEmission(0, 1, testHMM), 0.25, 0.001d);
286 assertEquals(getInsertEmission(1, 2, testHMM), 0.25, 0.001d);
287 assertEquals(getInsertEmission(31, 3, testHMM), 0.2776, 0.001d);
288 assertEquals(getInsertEmission(70, 3, testHMM), 0.25, 0.001d);
289 assertEquals(getInsertEmission(80, 3, testHMM), 0.25, 0.001d);
291 assertEquals(getStateTransition(2, 0, testHMM), 0.9634, 0.001d);
292 assertEquals(getStateTransition(6, 1, testHMM), 0.0203, 0.001d);
293 assertEquals(getStateTransition(9, 3, testHMM), 0.2515, 0.001d);
294 assertEquals(getStateTransition(20, 4, testHMM), 0.78808, 0.001d);
295 assertEquals(getStateTransition(68, 2, testHMM), 0.01845, 0.001d);
296 assertEquals(getStateTransition(80, 6, testHMM),
297 Double.NEGATIVE_INFINITY);
302 public void testParseAnnotations()
304 HMMFile testFile = new HMMFile();
305 testFile.getHMM().getNodes().add(new HMMNode());
306 testFile.getHMM().getNodes().add(new HMMNode());
307 testFile.getHMM().getNodes().add(new HMMNode());
310 testFile.getHMM().setConsensusResidueStatus(true);
311 testFile.getHMM().setMAPStatus(true);
312 testFile.getHMM().setReferenceAnnotationStatus(true);
313 testFile.getHMM().setConsensusStructureStatus(true);
314 testFile.getHMM().setMaskedValueStatus(true);
315 Scanner scanner = new Scanner("1345 t t t t");
316 testFile.parseAnnotations(scanner, 1);
318 testFile.getHMM().setConsensusResidueStatus(true);
319 testFile.getHMM().setMAPStatus(false);
320 testFile.getHMM().setReferenceAnnotationStatus(true);
321 testFile.getHMM().setConsensusStructureStatus(false);
322 testFile.getHMM().setMaskedValueStatus(false);
323 Scanner scanner2 = new Scanner("- y x - -");
324 testFile.parseAnnotations(scanner2, 2);
326 HiddenMarkovModel hmm = testFile.getHMM();
328 assertEquals(hmm.getNodeAlignmentColumn(1).intValue(), 1345);
329 assertEquals(hmm.getConsensusResidue(1), 't');
330 assertEquals(hmm.getReferenceAnnotation(1), 't');
331 assertEquals(hmm.getMaskedValue(1), 't');
332 assertEquals(hmm.getConsensusStructure(1), 't');
334 assertEquals(hmm.findNodeIndex(1345).intValue(), 1);
338 assertNull(hmm.getNodeAlignmentColumn(2));
339 assertEquals(hmm.getConsensusResidue(2), 'y');
340 assertEquals(hmm.getReferenceAnnotation(2), 'x');
341 assertEquals(hmm.getMaskedValue(2), '-');
342 assertEquals(hmm.getConsensusStructure(2), '-');
344 assertNull(hmm.findNodeIndex(2));
350 * tests to see if file produced by the output matches the file from the input
352 * @throws IOException
357 public void testExportFile() throws IOException
359 pKinase.exportFile("test/jalview/io/test_export_hmm.txt");
360 HMMFile pKinaseClone = new HMMFile(
361 new FileParse("test/jalview/io/test_export_hmm.txt",
362 DataSourceType.FILE));
363 pKinaseClone.parse();
364 HiddenMarkovModel pKinaseHMM = new HiddenMarkovModel();
365 HiddenMarkovModel pKinaseCloneHMM = new HiddenMarkovModel();
366 pKinaseHMM = pKinase.getHMM();
367 pKinaseCloneHMM = pKinaseClone.getHMM();
369 for (int i = 0; i < pKinaseHMM.getLength(); i++)
375 list1 = pKinaseHMM.getNode(i).getMatchEmissions();
376 list2 = pKinaseCloneHMM.getNode(i).getMatchEmissions();
378 result = checkIfListsAreIdentical(list1, list2);
379 assertEquals(result, true);
381 list1 = pKinaseHMM.getNode(i).getInsertEmissions();
382 list2 = pKinaseCloneHMM.getNode(i).getInsertEmissions();
384 result = checkIfListsAreIdentical(list1, list2);
385 assertEquals(result, true);
387 list1 = pKinaseHMM.getNode(i).getStateTransitions();
388 list2 = pKinaseCloneHMM.getNode(i).getStateTransitions();
390 result = checkIfListsAreIdentical(list1, list2);
391 assertEquals(result, true);
398 alignColumn1 = pKinaseHMM.getNodeAlignmentColumn(i);
399 alignColumn2 = pKinaseCloneHMM.getNodeAlignmentColumn(i);
401 assertEquals(alignColumn1, alignColumn2);
406 annotation1 = pKinaseHMM.getReferenceAnnotation(i);
407 annotation2 = pKinaseCloneHMM.getReferenceAnnotation(i);
409 assertEquals(annotation1, annotation2);
411 annotation1 = pKinaseHMM.getConsensusResidue(i);
412 annotation2 = pKinaseCloneHMM.getConsensusResidue(i);
414 assertEquals(annotation1, annotation2);
422 public void testAppendFileProperties()
424 StringBuilder testBuilder = new StringBuilder();
425 fn3.appendFileProperties(testBuilder);
426 Scanner testScanner = new Scanner(testBuilder.toString());
428 String[] expected = new String[] { "HMMER3/f [3.1b1 | May 2013]",
429 "NAME fn3", "ACC PF00041.13",
430 "DESC Fibronectin type III domain", "LENG 86", "ALPH amino",
431 "RF no", "MM no", "CONS yes", "CS yes", "MAP yes",
432 "DATE Fri Jun 20 08:22:31 2014", "NSEQ 106", "EFFN 11.415833",
433 "CKSUM 3564431818", "GA 8.00 7.20", "TC 8.00 7.20",
434 "NC 7.90 7.90", "STATS LOCAL MSV -9.4043 0.71847",
435 "STATS LOCAL VITERBI -9.7737 0.71847",
436 "STATS LOCAL FORWARD -3.8341 0.71847" };
438 for (String value : expected)
440 assertEquals(testScanner.nextLine(), value);
447 public void testAppendModel()
449 StringBuilder testBuilder = new StringBuilder();
450 fn3.appendModel(testBuilder);
451 String string = testBuilder.toString();
452 assertEquals(findValue(2, 2, 2, string), "4.42225");
453 assertEquals(findValue(12, 14, 1, string), "2.79307");
454 assertEquals(findValue(6, 24, 3, string), "0.48576");
455 assertEquals(findValue(19, 33, 2, string), "4.58477");
456 assertEquals(findValue(20, 64, 2, string), "3.61505");
457 assertEquals(findValue(3, 72, 3, string), "6.81068");
458 assertEquals(findValue(10, 80, 2, string), "2.69355");
459 assertEquals(findValue(16, 65, 1, string), "2.81003");
460 assertEquals(findValue(14, 3, 1, string), "2.69012");
461 assertEquals(findValue(11, 32, 1, string), "4.34805");
468 * index of symbol being searched. First symbol has index 1.
470 * index of node being searched. Begin node has index 0. First node
473 * index of line being searched in node. First line has index 1.
475 * string model being searched
476 * @return value at specified position
479 public String findValue(int symbolIndex, int nodeIndex, int line,
484 Scanner scanner = new Scanner(model);
488 for (int lineIndex = 0; lineIndex < line - 1; lineIndex++)
492 for (int node = 0; node < nodeIndex; node++)
499 for (int symbol = 0; symbol < symbolIndex; symbol++)
501 value = scanner.next();
502 if ("COMPO".equals(value))
506 else if (value.length() < 7)
516 public boolean checkIfListsAreIdentical(List<Double> list1,
519 boolean isDifferent = false;
520 for (int i = 0; i < list1.size(); i++)
524 entry1 = list1.get(i);
525 entry2 = list2.get(i);
526 if (!(entry1 == entry2))
535 * gets the match emission at a node for a symbol
538 * position of node in model
540 * index of symbol being searched
541 * @return negative log probability of a match emission of the given symbol
543 public double getMatchEmission(int nodeIndex, int symbolIndex,
544 HiddenMarkovModel hmm)
546 double value = hmm.getNodes().get(nodeIndex).getMatchEmissions()
552 * gets the insert emission at a node for a symbol
555 * position of node in model
557 * index of symbol being searched
558 * @return negative log probability of an insert emission of the given symbol
560 public double getInsertEmission(int nodeIndex, int symbolIndex,
561 HiddenMarkovModel hmm)
563 double value = hmm.getNodes().get(nodeIndex).getInsertEmissions()
569 * gets the state transition at a node for a specific transition
572 * position of node in model
573 * @param transitionIndex
574 * index of stransition being searched
575 * @return negative log probability of a state transition of the given type
577 public double getStateTransition(int nodeIndex, int transitionIndex,
578 HiddenMarkovModel hmm)
580 double value = hmm.getNodes().get(nodeIndex).getStateTransitions()
581 .get(transitionIndex);