3 import static org.testng.Assert.assertEquals;
4 import static org.testng.Assert.assertNull;
6 import jalview.datamodel.HMMNode;
7 import jalview.datamodel.HiddenMarkovModel;
9 import java.io.BufferedReader;
11 import java.io.FileReader;
12 import java.io.IOException;
13 import java.util.ArrayList;
14 import java.util.List;
15 import java.util.Scanner;
17 import org.testng.annotations.Test;
19 public class HMMFileTest {
23 HMMFile fn3 = new HMMFile(
24 new FileParse("test/jalview/io/test_fn3_hmm.txt",
25 DataSourceType.FILE));
27 HMMFile pKinase = new HMMFile(
28 new FileParse("test/jalview/io/test_PKinase_hmm.txt",
29 DataSourceType.FILE));
31 HMMFile made1 = new HMMFile(
32 new FileParse("test/jalview/io/test_MADE1_hmm.txt",
33 DataSourceType.FILE));
35 HMMFileTest() throws IOException
44 public void testParse() throws IOException
48 HiddenMarkovModel hmm = pKinase.getHMM();
49 assertEquals(hmm.getName(), "Pkinase");
50 assertEquals(hmm.getAccessionNumber(), "PF00069.17");
51 assertEquals(hmm.getDescription(), "Protein kinase domain");
52 assertEquals(hmm.getLength().intValue(), 260);
53 assertNull(hmm.getMaxInstanceLength());
54 assertEquals(hmm.getAlphabetType(), "amino");
55 assertEquals(hmm.referenceAnnotationIsActive(), false);
56 assertEquals(hmm.maskValueIsActive(), false);
57 assertEquals(hmm.consensusResidueIsActive(), true);
58 assertEquals(hmm.consensusStructureIsActive(),
60 assertEquals(hmm.mapIsActive(), true);
61 assertEquals(hmm.getDate(), "Thu Jun 16 11:44:06 2011");
62 assertNull(hmm.getCommandLineLog());
63 assertEquals(hmm.getNumberOfSequences().intValue(), 54);
64 assertEquals(hmm.getEffectiveNumberOfSequences(), 3.358521, 4d);
65 assertEquals(hmm.getCheckSum().longValue(), 3106786190l);
66 assertEquals(hmm.getGatheringThreshold(), "70.30 70.30");
67 assertEquals(hmm.getTrustedCutoff(), "70.30 70.30");
68 assertEquals(hmm.getNoiseCutoff(), "70.20 70.20");
70 List<Character> symbols = new ArrayList<>();
92 assertEquals(hmm.getSymbols(), symbols);
94 assertEquals(getMatchEmission(0, 19, hmm), 0.032298, 0.001d);
95 assertEquals(getMatchEmission(12, 12, hmm), 0.0130, 0.001d);
96 assertEquals(getMatchEmission(23, 7, hmm), 0.02583, 0.001d);
97 assertEquals(getMatchEmission(54, 1, hmm), 0.008549, 0.001d);
98 assertEquals(getMatchEmission(178, 3, hmm), 0.07998, 0.001d);
99 assertEquals(getMatchEmission(210, 2, hmm), 0.014465, 0.001d);
100 assertEquals(getMatchEmission(260, 19, hmm), 0.02213, 0.001d);
102 assertEquals(getInsertEmission(2, 1, hmm), 0.012, 0.001d);
103 assertEquals(getInsertEmission(15, 6, hmm), 0.02411, 0.001d);
104 assertEquals(getInsertEmission(22, 9, hmm), 0.06764, 0.001d);
105 assertEquals(getInsertEmission(57, 2, hmm), 0.0623, 0.001d);
106 assertEquals(getInsertEmission(203, 16, hmm), 0.0623, 0.001d);
107 assertEquals(getInsertEmission(255, 12, hmm), 0.0647, 0.001d);
109 assertEquals(getStateTransition(0, 6, hmm),
110 Double.NEGATIVE_INFINITY);
111 assertEquals(getStateTransition(3, 6, hmm), 0.3848, 0.001d);
112 assertEquals(getStateTransition(29, 3, hmm), 0.5382, 0.001d);
113 assertEquals(getStateTransition(169, 3, hmm), 0.2916, 0.001d);
114 assertEquals(getStateTransition(209, 0, hmm), 0.99, 0.001d);
115 assertEquals(getStateTransition(243, 1, hmm), 0.0066, 0.001d);
117 assertEquals(hmm.getNodeAlignmentColumn(3).intValue(), 3);
118 assertEquals(hmm.getReferenceAnnotation(7), '-');
119 assertEquals(hmm.getConsensusResidue(23), 't');
120 assertEquals(hmm.getMaskedValue(30), '-');
121 assertEquals(hmm.getConsensusStructure(56), 'S');
123 assertEquals(hmm.getNodeAlignmentColumn(78).intValue(), 136);
124 assertEquals(hmm.getReferenceAnnotation(93), '-');
125 assertEquals(hmm.getConsensusResidue(145), 'a');
126 assertEquals(hmm.getMaskedValue(183), '-');
127 assertEquals(hmm.getConsensusStructure(240), 'H');
132 public void testParseFileProperties() throws IOException
134 FileReader fr = new FileReader(
135 new File("test/jalview/io/test_fn3_hmm.txt"));
136 BufferedReader br = new BufferedReader(fr);
137 fn3.parseFileProperties(br);
138 fn3.parseModel(br); // this is for a later test
139 HiddenMarkovModel testHMM = new HiddenMarkovModel();
140 testHMM = fn3.getHMM();
144 assertEquals(testHMM.getName(), "fn3");
145 assertEquals(testHMM.getAccessionNumber(), "PF00041.13");
146 assertEquals(testHMM.getDescription(),
147 "Fibronectin type III domain");
148 assertEquals(testHMM.getLength().intValue(), 86);
149 assertNull(testHMM.getMaxInstanceLength());
150 assertEquals(testHMM.getAlphabetType(), "amino");
151 assertEquals(testHMM.referenceAnnotationIsActive(), false);
152 assertEquals(testHMM.maskValueIsActive(), false);
153 assertEquals(testHMM.consensusResidueIsActive(), true);
154 assertEquals(testHMM.consensusStructureIsActive(), true);
155 assertEquals(testHMM.mapIsActive(), true);
156 assertEquals(testHMM.getDate(), "Fri Jun 20 08:22:31 2014");
157 assertNull(testHMM.getCommandLineLog());
158 assertEquals(testHMM.getNumberOfSequences().intValue(), 106);
159 assertEquals(testHMM.getEffectiveNumberOfSequences(), 11.415833, 4d);
160 assertEquals(testHMM.getCheckSum().longValue(), 3564431818l);
161 assertEquals(testHMM.getGatheringThreshold(), "8.00 7.20");
162 assertEquals(testHMM.getTrustedCutoff(), "8.00 7.20");
163 assertEquals(testHMM.getNoiseCutoff(), "7.90 7.90");
164 assertEquals(testHMM.getViterbi(), "-9.7737 0.71847");
165 assertEquals(testHMM.getMSV(), "-9.4043 0.71847");
166 assertEquals(testHMM.getForward(), "-3.8341 0.71847");
169 FileReader fr3 = new FileReader(
170 new File("test/jalview/io/test_MADE1_hmm.txt"));
171 BufferedReader br3 = new BufferedReader(fr3);
172 made1.parseFileProperties(br3);
173 testHMM = made1.getHMM();
177 assertEquals(testHMM.getName(), "MADE1");
178 assertEquals(testHMM.getAccessionNumber(), "DF0000629.2");
179 assertEquals(testHMM.getDescription(),
180 "MADE1 (MAriner Derived Element 1), a TcMar-Mariner DNA transposon");
181 assertEquals(testHMM.getLength().intValue(), 80);
182 assertEquals(testHMM.getMaxInstanceLength().intValue(), 426);
183 assertEquals(testHMM.getAlphabetType(), "DNA");
184 assertEquals(testHMM.referenceAnnotationIsActive(), true);
185 assertEquals(testHMM.maskValueIsActive(), false);
186 assertEquals(testHMM.consensusResidueIsActive(), true);
187 assertEquals(testHMM.consensusStructureIsActive(), false);
188 assertEquals(testHMM.mapIsActive(), true);
189 assertEquals(testHMM.getDate(), "Tue Feb 19 20:33:41 2013");
190 assertNull(testHMM.getCommandLineLog());
191 assertEquals(testHMM.getNumberOfSequences().intValue(), 1997);
192 assertEquals(testHMM.getEffectiveNumberOfSequences(), 3.911818, 4d);
193 assertEquals(testHMM.getCheckSum().longValue(), 3015610723l);
194 assertEquals(testHMM.getGatheringThreshold(), "2.324 4.234");
195 assertEquals(testHMM.getTrustedCutoff(), "2.343 1.212");
196 assertEquals(testHMM.getNoiseCutoff(), "2.354 5.456");
197 assertEquals(testHMM.getViterbi(), "-9.3632 0.71858");
198 assertEquals(testHMM.getMSV(), "-8.5786 0.71858");
199 assertEquals(testHMM.getForward(), "-3.4823 0.71858");
205 public void testFillList()
207 Scanner scanner1 = new Scanner("1.3 2.4 5.3 3.9 9.8 4.7 4.3 2.3 6.9");
208 ArrayList<Double> filledArray = new ArrayList<>();
210 filledArray.add(0.27253);
211 filledArray.add(0.0907);
212 filledArray.add(0.00499);
213 filledArray.add(0.02024);
214 filledArray.add(0.00005);
215 filledArray.add(0.00909);
216 filledArray.add(0.01357);
217 filledArray.add(0.10026);
218 filledArray.add(0.001);
220 List<Double> testList = HMMFile.fillList(scanner1, 9);
222 for (int i = 0; i < 9; i++)
224 assertEquals(testList.get(i), filledArray.get(i), 0.001d);
231 Scanner scanner2 = new Scanner(
232 "1.346 5.554 35.345 5.64 1.4");
233 filledArray.add(0.2603);
234 filledArray.add(0.00387);
236 filledArray.add(0.00355);
237 filledArray.add(0.2466);
239 testList = HMMFile.fillList(scanner2, 5);
241 for (int i = 0; i < 5; i++)
243 assertEquals(testList.get(i), filledArray.get(i), 0.001d);
249 public void testParseModel() throws IOException
251 FileReader fr = new FileReader(
252 new File("test/jalview/io/test_MADE1_hmm.txt"));
253 BufferedReader br = new BufferedReader(fr);
254 HiddenMarkovModel testHMM = new HiddenMarkovModel();
255 for (int i = 0; i < 24; i++)
259 made1.parseModel(br);
260 testHMM = made1.getHMM();
264 assertEquals(getMatchEmission(0, 2, testHMM), 0.1961, 0.001d);
265 assertEquals(getMatchEmission(2, 1, testHMM), 0.09267, 0.001d);
266 assertEquals(getMatchEmission(12, 2, testHMM), 0.07327, 0.001d);
267 assertEquals(getMatchEmission(69, 1, testHMM), 0.04184, 0.001d);
268 assertEquals(getMatchEmission(76, 2, testHMM), 0.07, 0.001d);
270 assertEquals(getInsertEmission(0, 1, testHMM), 0.25, 0.001d);
271 assertEquals(getInsertEmission(1, 2, testHMM), 0.25, 0.001d);
272 assertEquals(getInsertEmission(31, 3, testHMM), 0.2776, 0.001d);
273 assertEquals(getInsertEmission(70, 3, testHMM), 0.25, 0.001d);
274 assertEquals(getInsertEmission(80, 3, testHMM), 0.25, 0.001d);
276 assertEquals(getStateTransition(2, 0, testHMM), 0.9634, 0.001d);
277 assertEquals(getStateTransition(6, 1, testHMM), 0.0203, 0.001d);
278 assertEquals(getStateTransition(9, 3, testHMM), 0.2515, 0.001d);
279 assertEquals(getStateTransition(20, 4, testHMM), 0.78808, 0.001d);
280 assertEquals(getStateTransition(68, 2, testHMM), 0.01845, 0.001d);
281 assertEquals(getStateTransition(80, 6, testHMM),
282 Double.NEGATIVE_INFINITY);
287 public void testParseAnnotations()
289 HMMFile testFile = new HMMFile();
290 testFile.getHMM().getNodes().add(new HMMNode());
291 testFile.getHMM().getNodes().add(new HMMNode());
292 testFile.getHMM().getNodes().add(new HMMNode());
295 testFile.getHMM().setConsensusResidueStatus(true);
296 testFile.getHMM().setMAPStatus(true);
297 testFile.getHMM().setReferenceAnnotationStatus(true);
298 testFile.getHMM().setConsensusStructureStatus(true);
299 testFile.getHMM().setMaskedValueStatus(true);
300 Scanner scanner = new Scanner("1345 t t t t");
301 testFile.parseAnnotations(scanner, 1);
303 testFile.getHMM().setConsensusResidueStatus(true);
304 testFile.getHMM().setMAPStatus(false);
305 testFile.getHMM().setReferenceAnnotationStatus(true);
306 testFile.getHMM().setConsensusStructureStatus(false);
307 testFile.getHMM().setMaskedValueStatus(false);
308 Scanner scanner2 = new Scanner("- y x - -");
309 testFile.parseAnnotations(scanner2, 2);
311 HiddenMarkovModel hmm = testFile.getHMM();
313 assertEquals(hmm.getNodeAlignmentColumn(1).intValue(), 1345);
314 assertEquals(hmm.getConsensusResidue(1), 't');
315 assertEquals(hmm.getReferenceAnnotation(1), 't');
316 assertEquals(hmm.getMaskedValue(1), 't');
317 assertEquals(hmm.getConsensusStructure(1), 't');
319 assertEquals(hmm.findNodeIndex(1345).intValue(), 1);
323 assertNull(hmm.getNodeAlignmentColumn(2));
324 assertEquals(hmm.getConsensusResidue(2), 'y');
325 assertEquals(hmm.getReferenceAnnotation(2), 'x');
326 assertEquals(hmm.getMaskedValue(2), '-');
327 assertEquals(hmm.getConsensusStructure(2), '-');
329 assertNull(hmm.findNodeIndex(2));
335 * tests to see if file produced by the output matches the file from the input
337 * @throws IOException
342 public void testExportFile() throws IOException
344 pKinase.exportFile("test/jalview/io/test_export_hmm.txt");
345 HMMFile pKinaseClone = new HMMFile(
346 new FileParse("test/jalview/io/test_export_hmm.txt",
347 DataSourceType.FILE));
348 pKinaseClone.parse();
349 HiddenMarkovModel pKinaseHMM = new HiddenMarkovModel();
350 HiddenMarkovModel pKinaseCloneHMM = new HiddenMarkovModel();
351 pKinaseHMM = pKinase.getHMM();
352 pKinaseCloneHMM = pKinaseClone.getHMM();
354 for (int i = 0; i < pKinaseHMM.getLength(); i++)
360 list1 = pKinaseHMM.getNode(i).getMatchEmissions();
361 list2 = pKinaseCloneHMM.getNode(i).getMatchEmissions();
363 result = checkIfListsAreIdentical(list1, list2);
364 assertEquals(result, true);
366 list1 = pKinaseHMM.getNode(i).getInsertEmissions();
367 list2 = pKinaseCloneHMM.getNode(i).getInsertEmissions();
369 result = checkIfListsAreIdentical(list1, list2);
370 assertEquals(result, true);
372 list1 = pKinaseHMM.getNode(i).getStateTransitions();
373 list2 = pKinaseCloneHMM.getNode(i).getStateTransitions();
375 result = checkIfListsAreIdentical(list1, list2);
376 assertEquals(result, true);
383 alignColumn1 = pKinaseHMM.getNodeAlignmentColumn(i);
384 alignColumn2 = pKinaseCloneHMM.getNodeAlignmentColumn(i);
386 assertEquals(alignColumn1, alignColumn2);
391 annotation1 = pKinaseHMM.getReferenceAnnotation(i);
392 annotation2 = pKinaseCloneHMM.getReferenceAnnotation(i);
394 assertEquals(annotation1, annotation2);
396 annotation1 = pKinaseHMM.getConsensusResidue(i);
397 annotation2 = pKinaseCloneHMM.getConsensusResidue(i);
399 assertEquals(annotation1, annotation2);
407 public void testAppendFileProperties()
409 StringBuilder testBuilder = new StringBuilder();
410 fn3.appendFileProperties(testBuilder);
411 Scanner testScanner = new Scanner(testBuilder.toString());
413 String[] expected = new String[] { "HMMER3/f [3.1b1 | May 2013]",
414 "NAME fn3", "ACC PF00041.13",
415 "DESC Fibronectin type III domain", "LENG 86", "ALPH amino",
416 "RF no", "MM no", "CONS yes", "CS yes", "MAP yes",
417 "DATE Fri Jun 20 08:22:31 2014", "NSEQ 106", "EFFN 11.415833",
418 "CKSUM 3564431818", "GA 8.00 7.20", "TC 8.00 7.20",
419 "NC 7.90 7.90", "STATS LOCAL MSV -9.4043 0.71847",
420 "STATS LOCAL VITERBI -9.7737 0.71847",
421 "STATS LOCAL FORWARD -3.8341 0.71847" };
423 for (String value : expected)
425 assertEquals(testScanner.nextLine(), value);
432 public void testAppendModel()
434 StringBuilder testBuilder = new StringBuilder();
435 fn3.appendModel(testBuilder);
436 String string = testBuilder.toString();
437 assertEquals(findValue(2, 2, 2, string), "4.42225");
438 assertEquals(findValue(12, 14, 1, string), "2.79307");
439 assertEquals(findValue(6, 24, 3, string), "0.48576");
440 assertEquals(findValue(19, 33, 2, string), "4.58477");
441 assertEquals(findValue(20, 64, 2, string), "3.61505");
442 assertEquals(findValue(3, 72, 3, string), "6.81068");
443 assertEquals(findValue(10, 80, 2, string), "2.69355");
444 assertEquals(findValue(16, 65, 1, string), "2.81003");
445 assertEquals(findValue(14, 3, 1, string), "2.69012");
446 assertEquals(findValue(11, 32, 1, string), "4.34805");
453 * index of symbol being searched. First symbol has index 1.
455 * index of node being searched. Begin node has index 0. First node
458 * index of line being searched in node. First line has index 1.
460 * string model being searched
461 * @return value at specified position
464 public String findValue(int symbolIndex, int nodeIndex, int line,
469 Scanner scanner = new Scanner(model);
473 for (int lineIndex = 0; lineIndex < line - 1; lineIndex++)
477 for (int node = 0; node < nodeIndex; node++)
484 for (int symbol = 0; symbol < symbolIndex; symbol++)
486 value = scanner.next();
487 if ("COMPO".equals(value))
491 else if (value.length() < 7)
501 public boolean checkIfListsAreIdentical(List<Double> list1,
504 boolean isDifferent = false;
505 for (int i = 0; i < list1.size(); i++)
509 entry1 = list1.get(i);
510 entry2 = list2.get(i);
511 if (!(entry1 == entry2))
520 * gets the match emission at a node for a symbol
523 * position of node in model
525 * index of symbol being searched
526 * @return negative log probability of a match emission of the given symbol
528 public double getMatchEmission(int nodeIndex, int symbolIndex,
529 HiddenMarkovModel hmm)
531 double value = hmm.getNodes().get(nodeIndex).getMatchEmissions()
537 * gets the insert emission at a node for a symbol
540 * position of node in model
542 * index of symbol being searched
543 * @return negative log probability of an insert emission of the given symbol
545 public double getInsertEmission(int nodeIndex, int symbolIndex,
546 HiddenMarkovModel hmm)
548 double value = hmm.getNodes().get(nodeIndex).getInsertEmissions()
554 * gets the state transition at a node for a specific transition
557 * position of node in model
558 * @param transitionIndex
559 * index of stransition being searched
560 * @return negative log probability of a state transition of the given type
562 public double getStateTransition(int nodeIndex, int transitionIndex,
563 HiddenMarkovModel hmm)
565 double value = hmm.getNodes().get(nodeIndex).getStateTransitions()
566 .get(transitionIndex);