3 import static org.testng.Assert.assertEquals;
4 import static org.testng.Assert.assertNull;
6 import jalview.datamodel.HMMNode;
7 import jalview.datamodel.HiddenMarkovModel;
9 import java.io.BufferedReader;
11 import java.io.FileReader;
12 import java.io.IOException;
13 import java.util.ArrayList;
14 import java.util.List;
15 import java.util.Scanner;
17 import org.testng.annotations.Test;
19 public class HMMFileTest {
23 HMMFile fn3 = new HMMFile(
24 new FileParse("test/jalview/io/test_fn3_hmm.txt",
25 DataSourceType.FILE));
27 HMMFile pKinase = new HMMFile(
28 new FileParse("test/jalview/io/test_PKinase_hmm.txt",
29 DataSourceType.FILE));
31 HMMFile made1 = new HMMFile(
32 new FileParse("test/jalview/io/test_MADE1_hmm.txt",
33 DataSourceType.FILE));
35 HMMFileTest() throws IOException
44 public void testParse() throws IOException
48 HiddenMarkovModel hmm = pKinase.getHMM();
49 assertEquals(hmm.getName(), "Pkinase");
50 assertEquals(hmm.getAccessionNumber(), "PF00069.17");
51 assertEquals(hmm.getDescription(), "Protein kinase domain");
52 assertEquals(hmm.getLength().intValue(), 260);
53 assertNull(hmm.getMaxInstanceLength());
54 assertEquals(hmm.getAlphabetType(), "amino");
55 assertEquals(hmm.referenceAnnotationIsActive(), false);
56 assertEquals(hmm.maskValueIsActive(), false);
57 assertEquals(hmm.consensusResidueIsActive(), true);
58 assertEquals(hmm.consensusStructureIsActive(),
60 assertEquals(hmm.mapIsActive(), true);
61 assertEquals(hmm.getDate(), "Thu Jun 16 11:44:06 2011");
62 assertNull(hmm.getCommandLineLog());
63 assertEquals(hmm.getNumberOfSequences().intValue(), 54);
64 assertEquals(hmm.getEffectiveNumberOfSequences(), 3.358521, 4d);
65 assertEquals(hmm.getCheckSum().longValue(), 3106786190l);
66 assertEquals(hmm.getGatheringThreshold(), "70.30 70.30");
67 assertEquals(hmm.getTrustedCutoff(), "70.30 70.30");
68 assertEquals(hmm.getNoiseCutoff(), "70.20 70.20");
70 List<Character> symbols = new ArrayList<>();
92 assertEquals(hmm.getSymbols(), symbols);
94 assertEquals(getMatchEmission(0, 19, hmm), 3.43274);
95 assertEquals(getMatchEmission(12, 12, hmm), 4.33979);
96 assertEquals(getMatchEmission(23, 7, hmm), 3.65600);
97 assertEquals(getMatchEmission(54, 1, hmm), 4.76187);
98 assertEquals(getMatchEmission(79, 0, hmm), 2.81579);
99 assertEquals(getMatchEmission(100, 0, hmm), 1.86496);
100 assertEquals(getMatchEmission(112, 14, hmm), 2.77179);
101 assertEquals(getMatchEmission(143, 17, hmm), 5.10478);
102 assertEquals(getMatchEmission(156, 4, hmm), 4.69372);
103 assertEquals(getMatchEmission(178, 3, hmm), 2.52594);
104 assertEquals(getMatchEmission(210, 2, hmm), 4.23598);
105 assertEquals(getMatchEmission(260, 19, hmm), 3.81122);
107 assertEquals(getInsertEmission(2, 1, hmm), 4.42225);
108 assertEquals(getInsertEmission(15, 6, hmm), 3.72501);
109 assertEquals(getInsertEmission(22, 9, hmm), 2.69355);
110 assertEquals(getInsertEmission(57, 2, hmm), 2.77519);
111 assertEquals(getInsertEmission(62, 14, hmm), 2.89801);
112 assertEquals(getInsertEmission(95, 17, hmm), 2.98532);
113 assertEquals(getInsertEmission(105, 4, hmm), 3.46354);
114 assertEquals(getInsertEmission(134, 1, hmm), 4.42225);
115 assertEquals(getInsertEmission(143, 0, hmm), 2.68618);
116 assertEquals(getInsertEmission(152, 16, hmm), 2.77519);
117 assertEquals(getInsertEmission(203, 16, hmm), 2.77519);
118 assertEquals(getInsertEmission(255, 12, hmm), 2.73739);
120 assertEquals(getStateTransition(0, 6, hmm),
121 Double.NEGATIVE_INFINITY);
122 assertEquals(getStateTransition(3, 6, hmm), 0.95510);
123 assertEquals(getStateTransition(29, 3, hmm), 0.61958);
124 assertEquals(getStateTransition(46, 4, hmm), 0.77255);
125 assertEquals(getStateTransition(53, 1, hmm), 5.01631);
126 assertEquals(getStateTransition(79, 2, hmm), 5.73865);
127 assertEquals(getStateTransition(101, 2, hmm), 5.73865);
128 assertEquals(getStateTransition(120, 5, hmm), 0.48576);
129 assertEquals(getStateTransition(146, 5, hmm), 0.70219);
130 assertEquals(getStateTransition(169, 3, hmm), 1.23224);
131 assertEquals(getStateTransition(209, 0, hmm), 0.01003);
132 assertEquals(getStateTransition(243, 1, hmm), 5.01631);
134 assertEquals(hmm.getNodeAlignmentColumn(3).intValue(), 3);
135 assertEquals(hmm.getReferenceAnnotation(7), '-');
136 assertEquals(hmm.getConsensusResidue(23), 't');
137 assertEquals(hmm.getMaskedValue(30), '-');
138 assertEquals(hmm.getConsensusStructure(56), 'S');
140 assertEquals(hmm.getNodeAlignmentColumn(78).intValue(), 136);
141 assertEquals(hmm.getReferenceAnnotation(93), '-');
142 assertEquals(hmm.getConsensusResidue(145), 'a');
143 assertEquals(hmm.getMaskedValue(183), '-');
144 assertEquals(hmm.getConsensusStructure(240), 'H');
149 public void testParseFileProperties() throws IOException
151 FileReader fr = new FileReader(
152 new File("test/jalview/io/test_fn3_hmm.txt"));
153 BufferedReader br = new BufferedReader(fr);
154 fn3.parseFileProperties(br);
155 fn3.parseModel(br); // this is for a later test
156 HiddenMarkovModel testHMM = new HiddenMarkovModel();
157 testHMM = fn3.getHMM();
161 assertEquals(testHMM.getName(), "fn3");
162 assertEquals(testHMM.getAccessionNumber(), "PF00041.13");
163 assertEquals(testHMM.getDescription(),
164 "Fibronectin type III domain");
165 assertEquals(testHMM.getLength().intValue(), 86);
166 assertNull(testHMM.getMaxInstanceLength());
167 assertEquals(testHMM.getAlphabetType(), "amino");
168 assertEquals(testHMM.referenceAnnotationIsActive(), false);
169 assertEquals(testHMM.maskValueIsActive(), false);
170 assertEquals(testHMM.consensusResidueIsActive(), true);
171 assertEquals(testHMM.consensusStructureIsActive(), true);
172 assertEquals(testHMM.mapIsActive(), true);
173 assertEquals(testHMM.getDate(), "Fri Jun 20 08:22:31 2014");
174 assertNull(testHMM.getCommandLineLog());
175 assertEquals(testHMM.getNumberOfSequences().intValue(), 106);
176 assertEquals(testHMM.getEffectiveNumberOfSequences(), 11.415833, 4d);
177 assertEquals(testHMM.getCheckSum().longValue(), 3564431818l);
178 assertEquals(testHMM.getGatheringThreshold(), "8.00 7.20");
179 assertEquals(testHMM.getTrustedCutoff(), "8.00 7.20");
180 assertEquals(testHMM.getNoiseCutoff(), "7.90 7.90");
181 assertEquals(testHMM.getViterbi(), "-9.7737 0.71847");
182 assertEquals(testHMM.getMSV(), "-9.4043 0.71847");
183 assertEquals(testHMM.getForward(), "-3.8341 0.71847");
186 FileReader fr3 = new FileReader(
187 new File("test/jalview/io/test_MADE1_hmm.txt"));
188 BufferedReader br3 = new BufferedReader(fr3);
189 made1.parseFileProperties(br3);
190 testHMM = made1.getHMM();
194 assertEquals(testHMM.getName(), "MADE1");
195 assertEquals(testHMM.getAccessionNumber(), "DF0000629.2");
196 assertEquals(testHMM.getDescription(),
197 "MADE1 (MAriner Derived Element 1), a TcMar-Mariner DNA transposon");
198 assertEquals(testHMM.getLength().intValue(), 80);
199 assertEquals(testHMM.getMaxInstanceLength().intValue(), 426);
200 assertEquals(testHMM.getAlphabetType(), "DNA");
201 assertEquals(testHMM.referenceAnnotationIsActive(), true);
202 assertEquals(testHMM.maskValueIsActive(), false);
203 assertEquals(testHMM.consensusResidueIsActive(), true);
204 assertEquals(testHMM.consensusStructureIsActive(), false);
205 assertEquals(testHMM.mapIsActive(), true);
206 assertEquals(testHMM.getDate(), "Tue Feb 19 20:33:41 2013");
207 assertNull(testHMM.getCommandLineLog());
208 assertEquals(testHMM.getNumberOfSequences().intValue(), 1997);
209 assertEquals(testHMM.getEffectiveNumberOfSequences(), 3.911818, 4d);
210 assertEquals(testHMM.getCheckSum().longValue(), 3015610723l);
211 assertEquals(testHMM.getGatheringThreshold(), "2.324 4.234");
212 assertEquals(testHMM.getTrustedCutoff(), "2.343 1.212");
213 assertEquals(testHMM.getNoiseCutoff(), "2.354 5.456");
214 assertEquals(testHMM.getViterbi(), "-9.3632 0.71858");
215 assertEquals(testHMM.getMSV(), "-8.5786 0.71858");
216 assertEquals(testHMM.getForward(), "-3.4823 0.71858");
222 public void testGetTransitionType()
224 HiddenMarkovModel hmm = fn3.getHMM();
225 assertEquals(hmm.getTransitionType("mm").intValue(), 0);
226 assertEquals(hmm.getTransitionType("mi").intValue(), 1);
227 assertEquals(hmm.getTransitionType("md").intValue(), 2);
228 assertEquals(hmm.getTransitionType("im").intValue(), 3);
229 assertEquals(hmm.getTransitionType("ii").intValue(), 4);
230 assertEquals(hmm.getTransitionType("dm").intValue(), 5);
231 assertEquals(hmm.getTransitionType("dd").intValue(), 6);
232 assertNull(hmm.getTransitionType("df"));
237 public void testFillList()
239 Scanner scanner1 = new Scanner("1.3 2.4 5.3 3.9 9.8 4.7 4.3 2.3 6.9");
240 ArrayList<Double> filledArray = new ArrayList<>();
242 filledArray.add(1.3);
243 filledArray.add(2.4);
244 filledArray.add(5.3);
245 filledArray.add(3.9);
246 filledArray.add(9.8);
247 filledArray.add(4.7);
248 filledArray.add(4.3);
249 filledArray.add(2.3);
250 filledArray.add(6.9);
252 assertEquals(HMMFile.fillList(scanner1, 9), filledArray);
256 Scanner scanner2 = new Scanner(
257 "1.346554 5.58756754 35.3523645 12345.3564 1.4");
258 filledArray.add(1.346554);
259 filledArray.add(5.58756754);
260 filledArray.add(35.3523645);
261 filledArray.add(12345.3564);
262 filledArray.add(1.4);
264 assertEquals(HMMFile.fillList(scanner2, 5), filledArray);
270 public void testParseModel() throws IOException
272 FileReader fr = new FileReader(
273 new File("test/jalview/io/test_MADE1_hmm.txt"));
274 BufferedReader br = new BufferedReader(fr);
275 HiddenMarkovModel testHMM = new HiddenMarkovModel();
276 for (int i = 0; i < 24; i++)
280 made1.parseModel(br);
281 testHMM = made1.getHMM();
285 assertEquals(getMatchEmission(0, 2, testHMM), 1.62906);
286 assertEquals(getMatchEmission(2, 1, testHMM), 2.37873);
287 assertEquals(getMatchEmission(12, 2, testHMM), 2.61355);
288 assertEquals(getMatchEmission(26, 0, testHMM), 1.86925);
289 assertEquals(getMatchEmission(32, 3, testHMM), 2.58263);
290 assertEquals(getMatchEmission(59, 3, testHMM), 2.20507);
291 assertEquals(getMatchEmission(63, 0, testHMM), 0.41244);
292 assertEquals(getMatchEmission(69, 1, testHMM), 3.17398);
293 assertEquals(getMatchEmission(76, 2, testHMM), 2.65861);
295 assertEquals(getInsertEmission(0, 1, testHMM), 1.38629);
296 assertEquals(getInsertEmission(1, 2, testHMM), 1.38629);
297 assertEquals(getInsertEmission(31, 3, testHMM), 1.28150);
298 assertEquals(getInsertEmission(43, 0, testHMM), 1.32290);
299 assertEquals(getInsertEmission(48, 2, testHMM), 1.52606);
300 assertEquals(getInsertEmission(52, 1, testHMM), 1.62259);
301 assertEquals(getInsertEmission(67, 0, testHMM), 1.38141);
302 assertEquals(getInsertEmission(70, 3, testHMM), 1.38629);
303 assertEquals(getInsertEmission(80, 3, testHMM), 1.38629);
305 assertEquals(getStateTransition(2, 0, testHMM), 0.03725);
306 assertEquals(getStateTransition(6, 1, testHMM), 3.89715);
307 assertEquals(getStateTransition(9, 3, testHMM), 1.38021);
308 assertEquals(getStateTransition(20, 4, testHMM), 0.23815);
309 assertEquals(getStateTransition(34, 6, testHMM), 0.33363);
310 assertEquals(getStateTransition(46, 5, testHMM), 1.05474);
311 assertEquals(getStateTransition(57, 6, testHMM), 0.31164);
312 assertEquals(getStateTransition(68, 2, testHMM), 3.99242);
313 assertEquals(getStateTransition(80, 6, testHMM),
314 Double.NEGATIVE_INFINITY);
319 public void testParseAnnotations()
321 HMMFile testFile = new HMMFile();
322 testFile.getHMM().getNodes().add(new HMMNode());
323 testFile.getHMM().getNodes().add(new HMMNode());
324 testFile.getHMM().getNodes().add(new HMMNode());
327 testFile.getHMM().setConsensusResidueStatus(true);
328 testFile.getHMM().setMAPStatus(true);
329 testFile.getHMM().setReferenceAnnotationStatus(true);
330 testFile.getHMM().setConsensusStructureStatus(true);
331 testFile.getHMM().setMaskedValueStatus(true);
332 Scanner scanner = new Scanner("1345 t t t t");
333 testFile.parseAnnotations(scanner, 1);
335 testFile.getHMM().setConsensusResidueStatus(true);
336 testFile.getHMM().setMAPStatus(false);
337 testFile.getHMM().setReferenceAnnotationStatus(true);
338 testFile.getHMM().setConsensusStructureStatus(false);
339 testFile.getHMM().setMaskedValueStatus(false);
340 Scanner scanner2 = new Scanner("- y x - -");
341 testFile.parseAnnotations(scanner2, 2);
343 HiddenMarkovModel hmm = testFile.getHMM();
345 assertEquals(hmm.getNodeAlignmentColumn(1).intValue(), 1345);
346 assertEquals(hmm.getConsensusResidue(1), 't');
347 assertEquals(hmm.getReferenceAnnotation(1), 't');
348 assertEquals(hmm.getMaskedValue(1), 't');
349 assertEquals(hmm.getConsensusStructure(1), 't');
351 assertEquals(hmm.findNodeIndex(1345).intValue(), 1);
355 assertNull(hmm.getNodeAlignmentColumn(2));
356 assertEquals(hmm.getConsensusResidue(2), 'y');
357 assertEquals(hmm.getReferenceAnnotation(2), 'x');
358 assertEquals(hmm.getMaskedValue(2), '-');
359 assertEquals(hmm.getConsensusStructure(2), '-');
361 assertNull(hmm.findNodeIndex(2));
367 * tests to see if file produced by the output matches the file from the input
369 * @throws IOException
374 public void testExportFile() throws IOException
376 fn3.exportFile("test/jalview/io/test_export_hmm.txt");
377 HMMFile fn3Clone = new HMMFile(
378 new FileParse("test/jalview/io/test_export_hmm.txt",
379 DataSourceType.FILE));
381 HiddenMarkovModel fn3HMM = new HiddenMarkovModel();
382 HiddenMarkovModel fn3CloneHMM = new HiddenMarkovModel();
383 fn3HMM = fn3.getHMM();
384 fn3CloneHMM = fn3Clone.getHMM();
386 for (int i = 0; i < fn3HMM.getLength(); i++)
392 list1 = fn3HMM.getNode(i).getMatchEmissions();
393 list2 = fn3CloneHMM.getNode(i).getMatchEmissions();
395 result = checkIfListsAreIdentical(list1, list2);
396 assertEquals(result, true);
398 list1 = fn3HMM.getNode(i).getInsertEmissions();
399 list2 = fn3CloneHMM.getNode(i).getInsertEmissions();
401 result = checkIfListsAreIdentical(list1, list2);
402 assertEquals(result, true);
404 list1 = fn3HMM.getNode(i).getStateTransitions();
405 list2 = fn3CloneHMM.getNode(i).getStateTransitions();
407 result = checkIfListsAreIdentical(list1, list2);
408 assertEquals(result, true);
415 alignColumn1 = fn3HMM.getNodeAlignmentColumn(i);
416 alignColumn2 = fn3CloneHMM.getNodeAlignmentColumn(i);
418 assertEquals(alignColumn1, alignColumn2);
423 annotation1 = fn3HMM.getReferenceAnnotation(i);
424 annotation2 = fn3CloneHMM.getReferenceAnnotation(i);
426 assertEquals(annotation1, annotation2);
428 annotation1 = fn3HMM.getConsensusResidue(i);
429 annotation2 = fn3CloneHMM.getConsensusResidue(i);
431 assertEquals(annotation1, annotation2);
439 public void testAppendFileProperties()
441 StringBuilder testBuilder = new StringBuilder();
442 fn3.appendFileProperties(testBuilder);
443 Scanner testScanner = new Scanner(testBuilder.toString());
445 String[] expected = new String[] { "HMMER3/f [3.1b1 | May 2013]",
446 "NAME fn3", "ACC PF00041.13",
447 "DESC Fibronectin type III domain", "LENG 86", "ALPH amino",
448 "RF no", "MM no", "CONS yes", "CS yes", "MAP yes",
449 "DATE Fri Jun 20 08:22:31 2014", "NSEQ 106", "EFFN 11.415833",
450 "CKSUM 3564431818", "GA 8.00 7.20", "TC 8.00 7.20",
451 "NC 7.90 7.90", "STATS LOCAL MSV -9.4043 0.71847",
452 "STATS LOCAL VITERBI -9.7737 0.71847",
453 "STATS LOCAL FORWARD -3.8341 0.71847" };
455 for (String value : expected)
457 assertEquals(testScanner.nextLine(), value);
464 public void testAppendModel()
466 StringBuilder testBuilder = new StringBuilder();
467 fn3.appendModel(testBuilder);
468 String string = testBuilder.toString();
469 assertEquals(findValue(2, 2, 2, string), "4.42225");
470 assertEquals(findValue(12, 14, 1, string), "2.79307");
471 assertEquals(findValue(6, 24, 3, string), "0.48576");
472 assertEquals(findValue(19, 33, 2, string), "4.58477");
473 assertEquals(findValue(20, 64, 2, string), "3.61505");
474 assertEquals(findValue(3, 72, 3, string), "6.81068");
475 assertEquals(findValue(10, 80, 2, string), "2.69355");
476 assertEquals(findValue(16, 65, 1, string), "2.81003");
477 assertEquals(findValue(14, 3, 1, string), "2.69012");
478 assertEquals(findValue(11, 32, 1, string), "4.34805");
485 * index of symbol being searched. First symbol has index 1.
487 * index of node being searched. Begin node has index 0. First node
490 * index of line being searched in node. First line has index 1.
492 * string model being searched
493 * @return value at specified position
496 public String findValue(int symbolIndex, int nodeIndex, int line,
501 Scanner scanner = new Scanner(model);
505 for (int lineIndex = 0; lineIndex < line - 1; lineIndex++)
509 for (int node = 0; node < nodeIndex; node++)
516 for (int symbol = 0; symbol < symbolIndex; symbol++)
518 value = scanner.next();
519 if ("COMPO".equals(value))
523 else if (value.length() < 7)
533 public boolean checkIfListsAreIdentical(List<Double> list1,
536 boolean isDifferent = false;
537 for (int i = 0; i < list1.size(); i++)
541 entry1 = list1.get(i);
542 entry2 = list2.get(i);
543 if (!(entry1 == entry2))
552 * gets the match emission at a node for a symbol
555 * position of node in model
557 * index of symbol being searched
558 * @return negative log probability of a match emission of the given symbol
560 public double getMatchEmission(int nodeIndex, int symbolIndex,
561 HiddenMarkovModel hmm)
563 double value = hmm.getNodes().get(nodeIndex).getMatchEmissions()
569 * gets the insert emission at a node for a symbol
572 * position of node in model
574 * index of symbol being searched
575 * @return negative log probability of an insert emission of the given symbol
577 public double getInsertEmission(int nodeIndex, int symbolIndex,
578 HiddenMarkovModel hmm)
580 double value = hmm.getNodes().get(nodeIndex).getInsertEmissions()
586 * gets the state transition at a node for a specific transition
589 * position of node in model
590 * @param transitionIndex
591 * index of stransition being searched
592 * @return negative log probability of a state transition of the given type
594 public double getStateTransition(int nodeIndex, int transitionIndex,
595 HiddenMarkovModel hmm)
597 double value = hmm.getNodes().get(nodeIndex).getStateTransitions()
598 .get(transitionIndex);