3 import static org.testng.Assert.assertEquals;
4 import static org.testng.Assert.assertNull;
6 import jalview.datamodel.HMMNode;
7 import jalview.datamodel.HiddenMarkovModel;
9 import java.io.BufferedReader;
11 import java.io.FileNotFoundException;
12 import java.io.FileReader;
13 import java.io.IOException;
14 import java.io.PrintWriter;
15 import java.util.ArrayList;
16 import java.util.List;
17 import java.util.Scanner;
19 import org.testng.annotations.Test;
21 public class HMMFileTest {
25 HMMFile fn3 = new HMMFile(
26 new FileParse("test/jalview/io/test_fn3_hmm.txt",
27 DataSourceType.FILE));
29 HMMFile pKinase = new HMMFile(
30 new FileParse("test/jalview/io/test_PKinase_hmm.txt",
31 DataSourceType.FILE));
33 HMMFile made1 = new HMMFile(
34 new FileParse("test/jalview/io/test_MADE1_hmm.txt",
35 DataSourceType.FILE));
37 HMMFileTest() throws IOException
46 public void testParse() throws IOException
50 HiddenMarkovModel hmm = pKinase.getHMM();
51 assertEquals(hmm.getName(), "Pkinase");
52 assertEquals(hmm.getAccessionNumber(), "PF00069.17");
53 assertEquals(hmm.getDescription(), "Protein kinase domain");
54 assertEquals(hmm.getLength().intValue(), 260);
55 assertNull(hmm.getMaxInstanceLength());
56 assertEquals(hmm.getAlphabetType(), "amino");
57 assertEquals(hmm.referenceAnnotationIsActive(), false);
58 assertEquals(hmm.maskValueIsActive(), false);
59 assertEquals(hmm.consensusResidueIsActive(), true);
60 assertEquals(hmm.consensusStructureIsActive(),
62 assertEquals(hmm.mapIsActive(), true);
63 assertEquals(hmm.getDate(), "Thu Jun 16 11:44:06 2011");
64 assertNull(hmm.getCommandLineLog());
65 assertEquals(hmm.getNumberOfSequences().intValue(), 54);
66 assertEquals(hmm.getEffectiveNumberOfSequences(), 3.358521, 4d);
67 assertEquals(hmm.getCheckSum().longValue(), 3106786190l);
68 assertEquals(hmm.getGatheringThreshold(), "70.30 70.30");
69 assertEquals(hmm.getTrustedCutoff(), "70.30 70.30");
70 assertEquals(hmm.getNoiseCutoff(), "70.20 70.20");
72 List<Character> symbols = new ArrayList<>();
94 assertEquals(hmm.getSymbols(), symbols);
96 assertEquals(getMatchEmission(0, 19, hmm), 0.032298, 0.001d);
97 assertEquals(getMatchEmission(12, 12, hmm), 0.0130, 0.001d);
98 assertEquals(getMatchEmission(23, 7, hmm), 0.02583, 0.001d);
99 assertEquals(getMatchEmission(54, 1, hmm), 0.008549, 0.001d);
100 assertEquals(getMatchEmission(178, 3, hmm), 0.07998, 0.001d);
101 assertEquals(getMatchEmission(210, 2, hmm), 0.014465, 0.001d);
102 assertEquals(getMatchEmission(260, 19, hmm), 0.02213, 0.001d);
104 assertEquals(getInsertEmission(2, 1, hmm), 0.012, 0.001d);
105 assertEquals(getInsertEmission(15, 6, hmm), 0.02411, 0.001d);
106 assertEquals(getInsertEmission(22, 9, hmm), 0.06764, 0.001d);
107 assertEquals(getInsertEmission(57, 2, hmm), 0.0623, 0.001d);
108 assertEquals(getInsertEmission(203, 16, hmm), 0.0623, 0.001d);
109 assertEquals(getInsertEmission(255, 12, hmm), 0.0647, 0.001d);
111 assertEquals(getStateTransition(0, 6, hmm),
112 Double.NEGATIVE_INFINITY);
113 assertEquals(getStateTransition(3, 6, hmm), 0.3848, 0.001d);
114 assertEquals(getStateTransition(29, 3, hmm), 0.5382, 0.001d);
115 assertEquals(getStateTransition(169, 3, hmm), 0.2916, 0.001d);
116 assertEquals(getStateTransition(209, 0, hmm), 0.99, 0.001d);
117 assertEquals(getStateTransition(243, 1, hmm), 0.0066, 0.001d);
119 assertEquals(hmm.getNodeAlignmentColumn(3).intValue(), 2);
120 assertEquals(hmm.getReferenceAnnotation(7), '-');
121 assertEquals(hmm.getConsensusResidue(23), 't');
122 assertEquals(hmm.getMaskedValue(30), '-');
123 assertEquals(hmm.getConsensusStructure(56), 'S');
125 assertEquals(hmm.getNodeAlignmentColumn(78).intValue(), 135);
126 assertEquals(hmm.getReferenceAnnotation(93), '-');
127 assertEquals(hmm.getConsensusResidue(145), 'a');
128 assertEquals(hmm.getMaskedValue(183), '-');
129 assertEquals(hmm.getConsensusStructure(240), 'H');
134 public void testParseFileProperties() throws IOException
136 FileReader fr = new FileReader(
137 new File("test/jalview/io/test_fn3_hmm.txt"));
138 BufferedReader br = new BufferedReader(fr);
139 fn3.parseFileProperties(br);
140 fn3.parseModel(br); // this is for a later test
141 HiddenMarkovModel testHMM = new HiddenMarkovModel();
142 testHMM = fn3.getHMM();
146 assertEquals(testHMM.getName(), "fn3");
147 assertEquals(testHMM.getAccessionNumber(), "PF00041.13");
148 assertEquals(testHMM.getDescription(),
149 "Fibronectin type III domain");
150 assertEquals(testHMM.getLength().intValue(), 86);
151 assertNull(testHMM.getMaxInstanceLength());
152 assertEquals(testHMM.getAlphabetType(), "amino");
153 assertEquals(testHMM.referenceAnnotationIsActive(), false);
154 assertEquals(testHMM.maskValueIsActive(), false);
155 assertEquals(testHMM.consensusResidueIsActive(), true);
156 assertEquals(testHMM.consensusStructureIsActive(), true);
157 assertEquals(testHMM.mapIsActive(), true);
158 assertEquals(testHMM.getDate(), "Fri Jun 20 08:22:31 2014");
159 assertNull(testHMM.getCommandLineLog());
160 assertEquals(testHMM.getNumberOfSequences().intValue(), 106);
161 assertEquals(testHMM.getEffectiveNumberOfSequences(), 11.415833, 4d);
162 assertEquals(testHMM.getCheckSum().longValue(), 3564431818l);
163 assertEquals(testHMM.getGatheringThreshold(), "8.00 7.20");
164 assertEquals(testHMM.getTrustedCutoff(), "8.00 7.20");
165 assertEquals(testHMM.getNoiseCutoff(), "7.90 7.90");
166 assertEquals(testHMM.getViterbi(), "-9.7737 0.71847");
167 assertEquals(testHMM.getMSV(), "-9.4043 0.71847");
168 assertEquals(testHMM.getForward(), "-3.8341 0.71847");
171 FileReader fr3 = new FileReader(
172 new File("test/jalview/io/test_MADE1_hmm.txt"));
173 BufferedReader br3 = new BufferedReader(fr3);
174 made1.parseFileProperties(br3);
175 testHMM = made1.getHMM();
179 assertEquals(testHMM.getName(), "MADE1");
180 assertEquals(testHMM.getAccessionNumber(), "DF0000629.2");
181 assertEquals(testHMM.getDescription(),
182 "MADE1 (MAriner Derived Element 1), a TcMar-Mariner DNA transposon");
183 assertEquals(testHMM.getLength().intValue(), 80);
184 assertEquals(testHMM.getMaxInstanceLength().intValue(), 426);
185 assertEquals(testHMM.getAlphabetType(), "DNA");
186 assertEquals(testHMM.referenceAnnotationIsActive(), true);
187 assertEquals(testHMM.maskValueIsActive(), false);
188 assertEquals(testHMM.consensusResidueIsActive(), true);
189 assertEquals(testHMM.consensusStructureIsActive(), false);
190 assertEquals(testHMM.mapIsActive(), true);
191 assertEquals(testHMM.getDate(), "Tue Feb 19 20:33:41 2013");
192 assertNull(testHMM.getCommandLineLog());
193 assertEquals(testHMM.getNumberOfSequences().intValue(), 1997);
194 assertEquals(testHMM.getEffectiveNumberOfSequences(), 3.911818, 4d);
195 assertEquals(testHMM.getCheckSum().longValue(), 3015610723l);
196 assertEquals(testHMM.getGatheringThreshold(), "2.324 4.234");
197 assertEquals(testHMM.getTrustedCutoff(), "2.343 1.212");
198 assertEquals(testHMM.getNoiseCutoff(), "2.354 5.456");
199 assertEquals(testHMM.getViterbi(), "-9.3632 0.71858");
200 assertEquals(testHMM.getMSV(), "-8.5786 0.71858");
201 assertEquals(testHMM.getForward(), "-3.4823 0.71858");
207 public void testFillList()
209 Scanner scanner1 = new Scanner("1.3 2.4 5.3 3.9 9.8 4.7 4.3 2.3 6.9");
210 ArrayList<Double> filledArray = new ArrayList<>();
212 filledArray.add(0.27253);
213 filledArray.add(0.0907);
214 filledArray.add(0.00499);
215 filledArray.add(0.02024);
216 filledArray.add(0.00005);
217 filledArray.add(0.00909);
218 filledArray.add(0.01357);
219 filledArray.add(0.10026);
220 filledArray.add(0.001);
222 List<Double> testList = HMMFile.fillList(scanner1, 9);
224 for (int i = 0; i < 9; i++)
226 assertEquals(testList.get(i), filledArray.get(i), 0.001d);
233 Scanner scanner2 = new Scanner(
234 "1.346 5.554 35.345 5.64 1.4");
235 filledArray.add(0.2603);
236 filledArray.add(0.00387);
238 filledArray.add(0.00355);
239 filledArray.add(0.2466);
241 testList = HMMFile.fillList(scanner2, 5);
243 for (int i = 0; i < 5; i++)
245 assertEquals(testList.get(i), filledArray.get(i), 0.001d);
251 public void testParseModel() throws IOException
253 FileReader fr = new FileReader(
254 new File("test/jalview/io/test_MADE1_hmm.txt"));
255 BufferedReader br = new BufferedReader(fr);
256 HiddenMarkovModel testHMM = new HiddenMarkovModel();
257 for (int i = 0; i < 24; i++)
261 made1.parseModel(br);
262 testHMM = made1.getHMM();
266 assertEquals(getMatchEmission(0, 2, testHMM), 0.1961, 0.001d);
267 assertEquals(getMatchEmission(2, 1, testHMM), 0.09267, 0.001d);
268 assertEquals(getMatchEmission(12, 2, testHMM), 0.07327, 0.001d);
269 assertEquals(getMatchEmission(69, 1, testHMM), 0.04184, 0.001d);
270 assertEquals(getMatchEmission(76, 2, testHMM), 0.07, 0.001d);
272 assertEquals(getInsertEmission(0, 1, testHMM), 0.25, 0.001d);
273 assertEquals(getInsertEmission(1, 2, testHMM), 0.25, 0.001d);
274 assertEquals(getInsertEmission(31, 3, testHMM), 0.2776, 0.001d);
275 assertEquals(getInsertEmission(70, 3, testHMM), 0.25, 0.001d);
276 assertEquals(getInsertEmission(80, 3, testHMM), 0.25, 0.001d);
278 assertEquals(getStateTransition(2, 0, testHMM), 0.9634, 0.001d);
279 assertEquals(getStateTransition(6, 1, testHMM), 0.0203, 0.001d);
280 assertEquals(getStateTransition(9, 3, testHMM), 0.2515, 0.001d);
281 assertEquals(getStateTransition(20, 4, testHMM), 0.78808, 0.001d);
282 assertEquals(getStateTransition(68, 2, testHMM), 0.01845, 0.001d);
283 assertEquals(getStateTransition(80, 6, testHMM),
284 Double.NEGATIVE_INFINITY);
289 public void testParseAnnotations()
291 HMMFile testFile = new HMMFile();
292 testFile.getHMM().getNodes().add(new HMMNode());
293 testFile.getHMM().getNodes().add(new HMMNode());
294 testFile.getHMM().getNodes().add(new HMMNode());
297 testFile.getHMM().setConsensusResidueStatus(true);
298 testFile.getHMM().setMAPStatus(true);
299 testFile.getHMM().setReferenceAnnotationStatus(true);
300 testFile.getHMM().setConsensusStructureStatus(true);
301 testFile.getHMM().setMaskedValueStatus(true);
302 Scanner scanner = new Scanner("1345 t t t t");
303 testFile.parseAnnotations(scanner, 1);
305 testFile.getHMM().setConsensusResidueStatus(true);
306 testFile.getHMM().setMAPStatus(false);
307 testFile.getHMM().setReferenceAnnotationStatus(true);
308 testFile.getHMM().setConsensusStructureStatus(false);
309 testFile.getHMM().setMaskedValueStatus(false);
310 Scanner scanner2 = new Scanner("- y x - -");
311 testFile.parseAnnotations(scanner2, 2);
313 HiddenMarkovModel hmm = testFile.getHMM();
315 assertEquals(hmm.getNodeAlignmentColumn(1).intValue(), 1344);
316 assertEquals(hmm.getConsensusResidue(1), 't');
317 assertEquals(hmm.getReferenceAnnotation(1), 't');
318 assertEquals(hmm.getMaskedValue(1), 't');
319 assertEquals(hmm.getConsensusStructure(1), 't');
321 assertEquals(hmm.findNodeIndex(1344).intValue(), 1);
328 * tests to see if file produced by the output matches the file from the input
330 * @throws IOException
335 public void testExportFile() throws IOException
337 pKinase.exportFile("test/jalview/io/test_export_hmm.txt");
338 HMMFile pKinaseClone = new HMMFile(
339 new FileParse("test/jalview/io/test_export_hmm.txt",
340 DataSourceType.FILE));
341 pKinaseClone.parse();
342 HiddenMarkovModel pKinaseHMM = new HiddenMarkovModel();
343 HiddenMarkovModel pKinaseCloneHMM = new HiddenMarkovModel();
344 pKinaseHMM = pKinase.getHMM();
345 pKinaseCloneHMM = pKinaseClone.getHMM();
347 for (int i = 0; i < pKinaseHMM.getLength(); i++)
353 list1 = pKinaseHMM.getNode(i).getMatchEmissions();
354 list2 = pKinaseCloneHMM.getNode(i).getMatchEmissions();
356 result = checkIfListsAreIdentical(list1, list2);
357 assertEquals(result, true);
359 list1 = pKinaseHMM.getNode(i).getInsertEmissions();
360 list2 = pKinaseCloneHMM.getNode(i).getInsertEmissions();
362 result = checkIfListsAreIdentical(list1, list2);
363 assertEquals(result, true);
365 list1 = pKinaseHMM.getNode(i).getStateTransitions();
366 list2 = pKinaseCloneHMM.getNode(i).getStateTransitions();
368 result = checkIfListsAreIdentical(list1, list2);
369 assertEquals(result, true);
376 alignColumn1 = pKinaseHMM.getNodeAlignmentColumn(i);
377 alignColumn2 = pKinaseCloneHMM.getNodeAlignmentColumn(i);
379 assertEquals(alignColumn1, alignColumn2);
384 annotation1 = pKinaseHMM.getReferenceAnnotation(i);
385 annotation2 = pKinaseCloneHMM.getReferenceAnnotation(i);
387 assertEquals(annotation1, annotation2);
389 annotation1 = pKinaseHMM.getConsensusResidue(i);
390 annotation2 = pKinaseCloneHMM.getConsensusResidue(i);
392 assertEquals(annotation1, annotation2);
400 public void testAppendFileProperties() throws FileNotFoundException
402 PrintWriter writer = new PrintWriter(
403 "test/jalview/io/test_export_hmm.txt");
404 fn3.appendFileProperties(writer);
407 File file = new File("test/jalview/io/test_export_hmm.txt");
409 Scanner testScanner = new Scanner(file);
410 testScanner.useDelimiter("\\Z");
412 String[] expected = new String[] { "HMMER3/f [3.1b1 | May 2013]",
413 "NAME fn3", "ACC PF00041.13",
414 "DESC Fibronectin type III domain", "LENG 86", "ALPH amino",
415 "RF no", "MM no", "CONS yes", "CS yes", "MAP yes",
416 "DATE Fri Jun 20 08:22:31 2014", "NSEQ 106", "EFFN 11.415833",
417 "CKSUM 3564431818", "GA 8.00 7.20", "TC 8.00 7.20",
418 "NC 7.90 7.90", "STATS LOCAL MSV -9.4043 0.71847",
419 "STATS LOCAL VITERBI -9.7737 0.71847",
420 "STATS LOCAL FORWARD -3.8341 0.71847" };
422 for (String value : expected)
424 assertEquals(testScanner.nextLine(), value);
431 public void testAppendModel() throws FileNotFoundException
433 PrintWriter writer = new PrintWriter(
434 "test/jalview/io/test_export_hmm.txt");
435 fn3.appendModel(writer);
438 File file = new File("test/jalview/io/test_export_hmm.txt");
439 Scanner scanner = new Scanner(file);
440 scanner.useDelimiter("\\Z");
441 string = scanner.next();
443 assertEquals(findValue(2, 2, 2, string), "4.42225");
444 assertEquals(findValue(12, 14, 1, string), "2.79307");
445 assertEquals(findValue(6, 24, 3, string), "0.48576");
446 assertEquals(findValue(19, 33, 2, string), "4.58477");
447 assertEquals(findValue(20, 64, 2, string), "3.61505");
448 assertEquals(findValue(3, 72, 3, string), "6.81068");
449 assertEquals(findValue(10, 80, 2, string), "2.69355");
450 assertEquals(findValue(16, 65, 1, string), "2.81003");
451 assertEquals(findValue(14, 3, 1, string), "2.69012");
452 assertEquals(findValue(11, 32, 1, string), "4.34805");
459 * index of symbol being searched. First symbol has index 1.
461 * index of node being searched. Begin node has index 0. First node
464 * index of line being searched in node. First line has index 1.
466 * string model being searched
467 * @return value at specified position
470 public String findValue(int symbolIndex, int nodeIndex, int line,
475 Scanner scanner = new Scanner(model);
479 for (int lineIndex = 0; lineIndex < line - 1; lineIndex++)
483 for (int node = 0; node < nodeIndex; node++)
490 for (int symbol = 0; symbol < symbolIndex; symbol++)
492 value = scanner.next();
493 if ("COMPO".equals(value))
497 else if (value.length() < 7)
507 public boolean checkIfListsAreIdentical(List<Double> list1,
510 boolean isDifferent = false;
511 for (int i = 0; i < list1.size(); i++)
515 entry1 = list1.get(i);
516 entry2 = list2.get(i);
517 if (!(entry1 == entry2))
526 * gets the match emission at a node for a symbol
529 * position of node in model
531 * index of symbol being searched
532 * @return negative log probability of a match emission of the given symbol
534 public double getMatchEmission(int nodeIndex, int symbolIndex,
535 HiddenMarkovModel hmm)
537 double value = hmm.getNodes().get(nodeIndex).getMatchEmissions()
543 * gets the insert emission at a node for a symbol
546 * position of node in model
548 * index of symbol being searched
549 * @return negative log probability of an insert emission of the given symbol
551 public double getInsertEmission(int nodeIndex, int symbolIndex,
552 HiddenMarkovModel hmm)
554 double value = hmm.getNodes().get(nodeIndex).getInsertEmissions()
560 * gets the state transition at a node for a specific transition
563 * position of node in model
564 * @param transitionIndex
565 * index of stransition being searched
566 * @return negative log probability of a state transition of the given type
568 public double getStateTransition(int nodeIndex, int transitionIndex,
569 HiddenMarkovModel hmm)
571 double value = hmm.getNodes().get(nodeIndex).getStateTransitions()
572 .get(transitionIndex);