3 import static org.testng.Assert.assertEquals;
4 import static org.testng.Assert.assertNull;
6 import jalview.datamodel.HMMNode;
7 import jalview.datamodel.HiddenMarkovModel;
9 import java.io.BufferedReader;
11 import java.io.FileReader;
12 import java.io.IOException;
13 import java.util.ArrayList;
14 import java.util.List;
15 import java.util.Scanner;
17 import org.testng.annotations.Test;
19 public class HMMFileTest {
23 HMMFile fn3 = new HMMFile(
24 new FileParse("H:/fn3.hmm", DataSourceType.FILE));
26 HMMFile pKinase = new HMMFile(
27 new FileParse("H:/Pkinase.hmm", DataSourceType.FILE));
29 HMMFile made1 = new HMMFile(
30 new FileParse("H:/MADE1.hmm", DataSourceType.FILE));
32 HMMFileTest() throws IOException
41 public void testParse() throws IOException
45 HiddenMarkovModel hmm = pKinase.getHMM();
46 assertEquals(hmm.getName(), "Pkinase");
47 assertEquals(hmm.getAccessionNumber(), "PF00069.17");
48 assertEquals(hmm.getDescription(), "Protein kinase domain");
49 assertEquals(hmm.getLength().intValue(), 260);
50 assertNull(hmm.getMaxInstanceLength());
51 assertEquals(hmm.getAlphabetType(), "amino");
52 assertEquals(hmm.referenceAnnotationIsActive(), false);
53 assertEquals(hmm.maskValueIsActive(), false);
54 assertEquals(hmm.consensusResidueIsActive(), true);
55 assertEquals(hmm.consensusStructureIsActive(),
57 assertEquals(hmm.mapIsActive(), true);
58 assertEquals(hmm.getDate(), "Thu Jun 16 11:44:06 2011");
59 assertNull(hmm.getCommandLineLog());
60 assertEquals(hmm.getNumberOfSequences().intValue(), 54);
61 assertEquals(hmm.getEffectiveNumberOfSequences(), 3.358521, 4d);
62 assertEquals(hmm.getCheckSum().longValue(), 3106786190l);
63 assertEquals(hmm.getGatheringThreshold(), "70.30 70.30");
64 assertEquals(hmm.getTrustedCutoff(), "70.30 70.30");
65 assertEquals(hmm.getNoiseCutoff(), "70.20 70.20");
67 List<Character> symbols = new ArrayList<>();
89 assertEquals(hmm.getSymbols(), symbols);
91 assertEquals(getMatchEmission(0, 19, hmm), 3.43274);
92 assertEquals(getMatchEmission(12, 12, hmm), 4.33979);
93 assertEquals(getMatchEmission(23, 7, hmm), 3.65600);
94 assertEquals(getMatchEmission(54, 1, hmm), 4.76187);
95 assertEquals(getMatchEmission(79, 0, hmm), 2.81579);
96 assertEquals(getMatchEmission(100, 0, hmm), 1.86496);
97 assertEquals(getMatchEmission(112, 14, hmm), 2.77179);
98 assertEquals(getMatchEmission(143, 17, hmm), 5.10478);
99 assertEquals(getMatchEmission(156, 4, hmm), 4.69372);
100 assertEquals(getMatchEmission(178, 3, hmm), 2.52594);
101 assertEquals(getMatchEmission(210, 2, hmm), 4.23598);
102 assertEquals(getMatchEmission(260, 19, hmm), 3.81122);
104 assertEquals(getInsertEmission(2, 1, hmm), 4.42225);
105 assertEquals(getInsertEmission(15, 6, hmm), 3.72501);
106 assertEquals(getInsertEmission(22, 9, hmm), 2.69355);
107 assertEquals(getInsertEmission(57, 2, hmm), 2.77519);
108 assertEquals(getInsertEmission(62, 14, hmm), 2.89801);
109 assertEquals(getInsertEmission(95, 17, hmm), 2.98532);
110 assertEquals(getInsertEmission(105, 4, hmm), 3.46354);
111 assertEquals(getInsertEmission(134, 1, hmm), 4.42225);
112 assertEquals(getInsertEmission(143, 0, hmm), 2.68618);
113 assertEquals(getInsertEmission(152, 16, hmm), 2.77519);
114 assertEquals(getInsertEmission(203, 16, hmm), 2.77519);
115 assertEquals(getInsertEmission(255, 12, hmm), 2.73739);
117 assertEquals(getStateTransition(0, 6, hmm),
118 Double.NEGATIVE_INFINITY);
119 assertEquals(getStateTransition(3, 6, hmm), 0.95510);
120 assertEquals(getStateTransition(29, 3, hmm), 0.61958);
121 assertEquals(getStateTransition(46, 4, hmm), 0.77255);
122 assertEquals(getStateTransition(53, 1, hmm), 5.01631);
123 assertEquals(getStateTransition(79, 2, hmm), 5.73865);
124 assertEquals(getStateTransition(101, 2, hmm), 5.73865);
125 assertEquals(getStateTransition(120, 5, hmm), 0.48576);
126 assertEquals(getStateTransition(146, 5, hmm), 0.70219);
127 assertEquals(getStateTransition(169, 3, hmm), 1.23224);
128 assertEquals(getStateTransition(209, 0, hmm), 0.01003);
129 assertEquals(getStateTransition(243, 1, hmm), 5.01631);
131 assertEquals(hmm.getNodeAlignmentColumn(3).intValue(), 3);
132 assertEquals(hmm.getReferenceAnnotation(7), '-');
133 assertEquals(hmm.getConsensusResidue(23), 't');
134 assertEquals(hmm.getMaskedValue(30), '-');
135 assertEquals(hmm.getConsensusStructure(56), 'S');
137 assertEquals(hmm.getNodeAlignmentColumn(78).intValue(), 136);
138 assertEquals(hmm.getReferenceAnnotation(93), '-');
139 assertEquals(hmm.getConsensusResidue(145), 'a');
140 assertEquals(hmm.getMaskedValue(183), '-');
141 assertEquals(hmm.getConsensusStructure(240), 'H');
146 public void testParseFileProperties() throws IOException
148 FileReader fr = new FileReader(new File("H:/fn3.hmm"));
149 BufferedReader br = new BufferedReader(fr);
150 fn3.parseFileProperties(br);
151 fn3.parseModel(br); // this is for a later test
152 HiddenMarkovModel testHMM = new HiddenMarkovModel();
153 testHMM = fn3.getHMM();
157 assertEquals(testHMM.getName(), "fn3");
158 assertEquals(testHMM.getAccessionNumber(), "PF00041.13");
159 assertEquals(testHMM.getDescription(),
160 "Fibronectin type III domain");
161 assertEquals(testHMM.getLength().intValue(), 86);
162 assertNull(testHMM.getMaxInstanceLength());
163 assertEquals(testHMM.getAlphabetType(), "amino");
164 assertEquals(testHMM.referenceAnnotationIsActive(), false);
165 assertEquals(testHMM.maskValueIsActive(), false);
166 assertEquals(testHMM.consensusResidueIsActive(), true);
167 assertEquals(testHMM.consensusStructureIsActive(), true);
168 assertEquals(testHMM.mapIsActive(), true);
169 assertEquals(testHMM.getDate(), "Fri Jun 20 08:22:31 2014");
170 assertNull(testHMM.getCommandLineLog());
171 assertEquals(testHMM.getNumberOfSequences().intValue(), 106);
172 assertEquals(testHMM.getEffectiveNumberOfSequences(), 11.415833, 4d);
173 assertEquals(testHMM.getCheckSum().longValue(), 3564431818l);
174 assertEquals(testHMM.getGatheringThreshold(), "8.00 7.20");
175 assertEquals(testHMM.getTrustedCutoff(), "8.00 7.20");
176 assertEquals(testHMM.getNoiseCutoff(), "7.90 7.90");
177 assertEquals(testHMM.getViterbi(), "-9.7737 0.71847");
178 assertEquals(testHMM.getMSV(), "-9.4043 0.71847");
179 assertEquals(testHMM.getForward(), "-3.8341 0.71847");
182 FileReader fr3 = new FileReader(new File("H:/MADE1.hmm"));
183 BufferedReader br3 = new BufferedReader(fr3);
184 made1.parseFileProperties(br3);
185 testHMM = made1.getHMM();
189 assertEquals(testHMM.getName(), "MADE1");
190 assertEquals(testHMM.getAccessionNumber(), "DF0000629.2");
191 assertEquals(testHMM.getDescription(),
192 "MADE1 (MAriner Derived Element 1), a TcMar-Mariner DNA transposon");
193 assertEquals(testHMM.getLength().intValue(), 80);
194 assertEquals(testHMM.getMaxInstanceLength().intValue(), 426);
195 assertEquals(testHMM.getAlphabetType(), "DNA");
196 assertEquals(testHMM.referenceAnnotationIsActive(), true);
197 assertEquals(testHMM.maskValueIsActive(), false);
198 assertEquals(testHMM.consensusResidueIsActive(), true);
199 assertEquals(testHMM.consensusStructureIsActive(), false);
200 assertEquals(testHMM.mapIsActive(), true);
201 assertEquals(testHMM.getDate(), "Tue Feb 19 20:33:41 2013");
202 assertNull(testHMM.getCommandLineLog());
203 assertEquals(testHMM.getNumberOfSequences().intValue(), 1997);
204 assertEquals(testHMM.getEffectiveNumberOfSequences(), 3.911818, 4d);
205 assertEquals(testHMM.getCheckSum().longValue(), 3015610723l);
206 assertEquals(testHMM.getGatheringThreshold(), "2.324 4.234");
207 assertEquals(testHMM.getTrustedCutoff(), "2.343 1.212");
208 assertEquals(testHMM.getNoiseCutoff(), "2.354 5.456");
209 assertEquals(testHMM.getViterbi(), "-9.3632 0.71858");
210 assertEquals(testHMM.getMSV(), "-8.5786 0.71858");
211 assertEquals(testHMM.getForward(), "-3.4823 0.71858");
217 public void testGetTransitionType()
219 HiddenMarkovModel hmm = fn3.getHMM();
220 assertEquals(hmm.getTransitionType("mm").intValue(), 0);
221 assertEquals(hmm.getTransitionType("mi").intValue(), 1);
222 assertEquals(hmm.getTransitionType("md").intValue(), 2);
223 assertEquals(hmm.getTransitionType("im").intValue(), 3);
224 assertEquals(hmm.getTransitionType("ii").intValue(), 4);
225 assertEquals(hmm.getTransitionType("dm").intValue(), 5);
226 assertEquals(hmm.getTransitionType("dd").intValue(), 6);
227 assertNull(hmm.getTransitionType("df"));
232 public void testFillList()
234 Scanner scanner1 = new Scanner("1.3 2.4 5.3 3.9 9.8 4.7 4.3 2.3 6.9");
235 ArrayList<Double> filledArray = new ArrayList<>();
237 filledArray.add(1.3);
238 filledArray.add(2.4);
239 filledArray.add(5.3);
240 filledArray.add(3.9);
241 filledArray.add(9.8);
242 filledArray.add(4.7);
243 filledArray.add(4.3);
244 filledArray.add(2.3);
245 filledArray.add(6.9);
247 assertEquals(HMMFile.fillList(scanner1, 9), filledArray);
251 Scanner scanner2 = new Scanner(
252 "1.346554 5.58756754 35.3523645 12345.3564 1.4");
253 filledArray.add(1.346554);
254 filledArray.add(5.58756754);
255 filledArray.add(35.3523645);
256 filledArray.add(12345.3564);
257 filledArray.add(1.4);
259 assertEquals(HMMFile.fillList(scanner2, 5), filledArray);
265 public void testParseModel() throws IOException
267 FileReader fr = new FileReader(new File("H:/MADE1.hmm"));
268 BufferedReader br = new BufferedReader(fr);
269 HiddenMarkovModel testHMM = new HiddenMarkovModel();
270 for (int i = 0; i < 24; i++)
274 made1.parseModel(br);
275 testHMM = made1.getHMM();
279 assertEquals(getMatchEmission(0, 2, testHMM), 1.62906);
280 assertEquals(getMatchEmission(2, 1, testHMM), 2.37873);
281 assertEquals(getMatchEmission(12, 2, testHMM), 2.61355);
282 assertEquals(getMatchEmission(26, 0, testHMM), 1.86925);
283 assertEquals(getMatchEmission(32, 3, testHMM), 2.58263);
284 assertEquals(getMatchEmission(59, 3, testHMM), 2.20507);
285 assertEquals(getMatchEmission(63, 0, testHMM), 0.41244);
286 assertEquals(getMatchEmission(69, 1, testHMM), 3.17398);
287 assertEquals(getMatchEmission(76, 2, testHMM), 2.65861);
289 assertEquals(getInsertEmission(0, 1, testHMM), 1.38629);
290 assertEquals(getInsertEmission(1, 2, testHMM), 1.38629);
291 assertEquals(getInsertEmission(31, 3, testHMM), 1.28150);
292 assertEquals(getInsertEmission(43, 0, testHMM), 1.32290);
293 assertEquals(getInsertEmission(48, 2, testHMM), 1.52606);
294 assertEquals(getInsertEmission(52, 1, testHMM), 1.62259);
295 assertEquals(getInsertEmission(67, 0, testHMM), 1.38141);
296 assertEquals(getInsertEmission(70, 3, testHMM), 1.38629);
297 assertEquals(getInsertEmission(80, 3, testHMM), 1.38629);
299 assertEquals(getStateTransition(2, 0, testHMM), 0.03725);
300 assertEquals(getStateTransition(6, 1, testHMM), 3.89715);
301 assertEquals(getStateTransition(9, 3, testHMM), 1.38021);
302 assertEquals(getStateTransition(20, 4, testHMM), 0.23815);
303 assertEquals(getStateTransition(34, 6, testHMM), 0.33363);
304 assertEquals(getStateTransition(46, 5, testHMM), 1.05474);
305 assertEquals(getStateTransition(57, 6, testHMM), 0.31164);
306 assertEquals(getStateTransition(68, 2, testHMM), 3.99242);
307 assertEquals(getStateTransition(80, 6, testHMM),
308 Double.NEGATIVE_INFINITY);
313 public void testParseAnnotations()
315 HMMFile testFile = new HMMFile();
316 testFile.getHMM().getNodes().add(new HMMNode());
317 testFile.getHMM().getNodes().add(new HMMNode());
318 testFile.getHMM().getNodes().add(new HMMNode());
321 testFile.getHMM().setConsensusResidueStatus(true);
322 testFile.getHMM().setMAPStatus(true);
323 testFile.getHMM().setReferenceAnnotationStatus(true);
324 testFile.getHMM().setConsensusStructureStatus(true);
325 testFile.getHMM().setMaskedValueStatus(true);
326 Scanner scanner = new Scanner("1345 t t t t");
327 testFile.parseAnnotations(scanner, 1);
329 testFile.getHMM().setConsensusResidueStatus(true);
330 testFile.getHMM().setMAPStatus(false);
331 testFile.getHMM().setReferenceAnnotationStatus(true);
332 testFile.getHMM().setConsensusStructureStatus(false);
333 testFile.getHMM().setMaskedValueStatus(false);
334 Scanner scanner2 = new Scanner("- y x - -");
335 testFile.parseAnnotations(scanner2, 2);
337 HiddenMarkovModel hmm = testFile.getHMM();
339 assertEquals(hmm.getNodeAlignmentColumn(1).intValue(), 1345);
340 assertEquals(hmm.getConsensusResidue(1), 't');
341 assertEquals(hmm.getReferenceAnnotation(1), 't');
342 assertEquals(hmm.getMaskedValue(1), 't');
343 assertEquals(hmm.getConsensusStructure(1), 't');
345 assertEquals(hmm.findNodeIndex(1345).intValue(), 1);
349 assertNull(hmm.getNodeAlignmentColumn(2));
350 assertEquals(hmm.getConsensusResidue(2), 'y');
351 assertEquals(hmm.getReferenceAnnotation(2), 'x');
352 assertEquals(hmm.getMaskedValue(2), '-');
353 assertEquals(hmm.getConsensusStructure(2), '-');
355 assertNull(hmm.findNodeIndex(2));
361 * tests to see if file produced by the output matches the file from the input
363 * @throws IOException
368 public void testExportFile() throws IOException
370 fn3.exportFile("H:/WriteFileTest.txt");
371 HMMFile fn3Clone = new HMMFile(
372 new FileParse("H:/WriteFileTest.txt", DataSourceType.FILE));
374 HiddenMarkovModel fn3HMM = new HiddenMarkovModel();
375 HiddenMarkovModel fn3CloneHMM = new HiddenMarkovModel();
376 fn3HMM = fn3.getHMM();
377 fn3CloneHMM = fn3Clone.getHMM();
379 for (int i = 0; i < fn3HMM.getLength(); i++)
385 list1 = fn3HMM.getNode(i).getMatchEmissions();
386 list2 = fn3CloneHMM.getNode(i).getMatchEmissions();
388 result = checkIfListsAreIdentical(list1, list2);
389 assertEquals(result, true);
391 list1 = fn3HMM.getNode(i).getInsertEmissions();
392 list2 = fn3CloneHMM.getNode(i).getInsertEmissions();
394 result = checkIfListsAreIdentical(list1, list2);
395 assertEquals(result, true);
397 list1 = fn3HMM.getNode(i).getStateTransitions();
398 list2 = fn3CloneHMM.getNode(i).getStateTransitions();
400 result = checkIfListsAreIdentical(list1, list2);
401 assertEquals(result, true);
408 alignColumn1 = fn3HMM.getNodeAlignmentColumn(i);
409 alignColumn2 = fn3CloneHMM.getNodeAlignmentColumn(i);
411 assertEquals(alignColumn1, alignColumn2);
416 annotation1 = fn3HMM.getReferenceAnnotation(i);
417 annotation2 = fn3CloneHMM.getReferenceAnnotation(i);
419 assertEquals(annotation1, annotation2);
421 annotation1 = fn3HMM.getConsensusResidue(i);
422 annotation2 = fn3CloneHMM.getConsensusResidue(i);
424 assertEquals(annotation1, annotation2);
432 public void testAppendFileProperties()
434 StringBuilder testBuilder = new StringBuilder();
435 fn3.appendFileProperties(testBuilder);
436 Scanner testScanner = new Scanner(testBuilder.toString());
438 String[] expected = new String[] { "HMMER3/f [3.1b1 | May 2013]",
439 "NAME fn3", "ACC PF00041.13",
440 "DESC Fibronectin type III domain", "LENG 86", "ALPH amino",
441 "RF no", "MM no", "CONS yes", "CS yes", "MAP yes",
442 "DATE Fri Jun 20 08:22:31 2014", "NSEQ 106", "EFFN 11.415833",
443 "CKSUM 3564431818", "GA 8.00 7.20", "TC 8.00 7.20",
444 "NC 7.90 7.90", "STATS LOCAL MSV -9.4043 0.71847",
445 "STATS LOCAL VITERBI -9.7737 0.71847",
446 "STATS LOCAL FORWARD -3.8341 0.71847" };
448 for (String value : expected)
450 assertEquals(testScanner.nextLine(), value);
457 public void testAppendModel()
459 StringBuilder testBuilder = new StringBuilder();
460 fn3.appendModel(testBuilder);
461 String string = testBuilder.toString();
462 assertEquals(findValue(2, 2, 2, string), "4.42225");
463 assertEquals(findValue(12, 14, 1, string), "2.79307");
464 assertEquals(findValue(6, 24, 3, string), "0.48576");
465 assertEquals(findValue(19, 33, 2, string), "4.58477");
466 assertEquals(findValue(20, 64, 2, string), "3.61505");
467 assertEquals(findValue(3, 72, 3, string), "6.81068");
468 assertEquals(findValue(10, 80, 2, string), "2.69355");
469 assertEquals(findValue(16, 65, 1, string), "2.81003");
470 assertEquals(findValue(14, 3, 1, string), "2.69012");
471 assertEquals(findValue(11, 32, 1, string), "4.34805");
478 * index of symbol being searched. First symbol has index 1.
480 * index of node being searched. Begin node has index 0. First node
483 * index of line being searched in node. First line has index 1.
485 * string model being searched
486 * @return value at specified position
489 public String findValue(int symbolIndex, int nodeIndex, int line,
494 Scanner scanner = new Scanner(model);
498 for (int lineIndex = 0; lineIndex < line - 1; lineIndex++)
502 for (int node = 0; node < nodeIndex; node++)
509 for (int symbol = 0; symbol < symbolIndex; symbol++)
511 value = scanner.next();
512 if ("COMPO".equals(value))
516 else if (value.length() < 7)
526 public boolean checkIfListsAreIdentical(List<Double> list1,
529 boolean isDifferent = false;
530 for (int i = 0; i < list1.size(); i++)
534 entry1 = list1.get(i);
535 entry2 = list2.get(i);
536 if (!(entry1 == entry2))
545 * gets the match emission at a node for a symbol
548 * position of node in model
550 * index of symbol being searched
551 * @return negative log probability of a match emission of the given symbol
553 public double getMatchEmission(int nodeIndex, int symbolIndex,
554 HiddenMarkovModel hmm)
556 double value = hmm.getNodes().get(nodeIndex).getMatchEmissions()
562 * gets the insert emission at a node for a symbol
565 * position of node in model
567 * index of symbol being searched
568 * @return negative log probability of an insert emission of the given symbol
570 public double getInsertEmission(int nodeIndex, int symbolIndex,
571 HiddenMarkovModel hmm)
573 double value = hmm.getNodes().get(nodeIndex).getInsertEmissions()
579 * gets the state transition at a node for a specific transition
582 * position of node in model
583 * @param transitionIndex
584 * index of stransition being searched
585 * @return negative log probability of a state transition of the given type
587 public double getStateTransition(int nodeIndex, int transitionIndex,
588 HiddenMarkovModel hmm)
590 double value = hmm.getNodes().get(nodeIndex).getStateTransitions()
591 .get(transitionIndex);