3 import static org.testng.Assert.assertEquals;
4 import static org.testng.Assert.assertNull;
6 import jalview.datamodel.HMMNode;
7 import jalview.datamodel.HiddenMarkovModel;
9 import java.io.BufferedReader;
11 import java.io.FileNotFoundException;
12 import java.io.FileReader;
13 import java.io.IOException;
14 import java.io.PrintWriter;
15 import java.util.ArrayList;
16 import java.util.List;
17 import java.util.Scanner;
19 import org.testng.annotations.Test;
21 public class HMMFileTest {
25 HMMFile fn3 = new HMMFile(new BufferedReader(
26 new FileReader(("test/jalview/io/test_fn3_hmm.txt"))));
28 HMMFile pKinase = new HMMFile(new BufferedReader(
29 new FileReader(("test/jalview/io/test_PKinase_hmm.txt"))));
31 HMMFile made1 = new HMMFile(new BufferedReader(
32 new FileReader(("test/jalview/io/test_MADE1_hmm.txt"))));
34 HMMFileTest() throws IOException
43 public void testParse() throws IOException
47 HiddenMarkovModel hmm = pKinase.getHMM();
48 assertEquals(hmm.getName(), "Pkinase");
49 assertEquals(hmm.getAccessionNumber(), "PF00069.17");
50 assertEquals(hmm.getDescription(), "Protein kinase domain");
51 assertEquals(hmm.getLength().intValue(), 260);
52 assertNull(hmm.getMaxInstanceLength());
53 assertEquals(hmm.getAlphabetType(), "amino");
54 assertEquals(hmm.referenceAnnotationIsActive(), false);
55 assertEquals(hmm.maskValueIsActive(), false);
56 assertEquals(hmm.consensusResidueIsActive(), true);
57 assertEquals(hmm.consensusStructureIsActive(),
59 assertEquals(hmm.mapIsActive(), true);
60 assertEquals(hmm.getDate(), "Thu Jun 16 11:44:06 2011");
61 assertNull(hmm.getCommandLineLog());
62 assertEquals(hmm.getNumberOfSequences().intValue(), 54);
63 assertEquals(hmm.getEffectiveNumberOfSequences(), 3.358521, 4d);
64 assertEquals(hmm.getCheckSum().longValue(), 3106786190l);
65 assertEquals(hmm.getGatheringThreshold(), "70.30 70.30");
66 assertEquals(hmm.getTrustedCutoff(), "70.30 70.30");
67 assertEquals(hmm.getNoiseCutoff(), "70.20 70.20");
69 List<Character> symbols = new ArrayList<>();
91 assertEquals(hmm.getSymbols(), symbols);
93 assertEquals(hmm.getMatchEmissionProbability(0, 'Y'), 0.16102, 0.001d);
94 assertEquals(hmm.getMatchEmissionProbability(11, 'P'), 0.0130, 0.001d);
95 assertEquals(hmm.getMatchEmissionProbability(24, 'I'), 0.02583, 0.001d);
96 assertEquals(hmm.getMatchEmissionProbability(83, 'C'), 0.008549,
98 assertEquals(hmm.getMatchEmissionProbability(332, 'E'), 0.07998,
100 assertEquals(hmm.getMatchEmissionProbability(381, 'D'), 0.014465,
102 assertEquals(hmm.getMatchEmissionProbability(475, 'Y'), 0.02213,
105 assertEquals(hmm.getInsertEmissionProbability(1, 'C'), 0.012, 0.001d);
106 assertEquals(hmm.getInsertEmissionProbability(14, 'H'), 0.02411,
108 assertEquals(hmm.getInsertEmissionProbability(23, 'L'), 0.06764,
110 assertEquals(hmm.getInsertEmissionProbability(90, 'D'), 0.0623, 0.001d);
111 assertEquals(hmm.getInsertEmissionProbability(374, 'T'), 0.0623,
113 assertEquals(hmm.getInsertEmissionProbability(470, 'P'), 0.0647,
116 assertEquals(hmm.getStateTransitionProbability(2, 6), 0.3848, 0.001d);
117 assertEquals(hmm.getStateTransitionProbability(38, 3), 0.5382, 0.001d);
118 assertEquals(hmm.getStateTransitionProbability(305, 3), 0.2916, 0.001d);
119 assertEquals(hmm.getStateTransitionProbability(380, 0), 0.99, 0.001d);
120 assertEquals(hmm.getStateTransitionProbability(453, 1), 0.0066, 0.001d);
122 assertEquals(hmm.getNodeAlignmentColumn(3).intValue(), 2);
123 assertEquals(hmm.getReferenceAnnotation(7), '-');
124 assertEquals(hmm.getConsensusResidue(23), 't');
125 assertEquals(hmm.getMaskedValue(30), '-');
126 assertEquals(hmm.getConsensusStructure(56), 'S');
128 assertEquals(hmm.getNodeAlignmentColumn(78).intValue(), 135);
129 assertEquals(hmm.getReferenceAnnotation(93), '-');
130 assertEquals(hmm.getConsensusResidue(145), 'a');
131 assertEquals(hmm.getMaskedValue(183), '-');
132 assertEquals(hmm.getConsensusStructure(240), 'H');
137 public void testParseFileProperties() throws IOException
139 FileReader fr = new FileReader(
140 new File("test/jalview/io/test_fn3_hmm.txt"));
141 BufferedReader br = new BufferedReader(fr);
142 fn3.setHMM(new HiddenMarkovModel());
143 fn3.parseFileProperties(br);
144 fn3.parseModel(br); // this is for a later test
145 HiddenMarkovModel testHMM = new HiddenMarkovModel();
146 testHMM = fn3.getHMM();
150 assertEquals(testHMM.getName(), "fn3");
151 assertEquals(testHMM.getAccessionNumber(), "PF00041.13");
152 assertEquals(testHMM.getDescription(),
153 "Fibronectin type III domain");
154 assertEquals(testHMM.getLength().intValue(), 86);
155 assertNull(testHMM.getMaxInstanceLength());
156 assertEquals(testHMM.getAlphabetType(), "amino");
157 assertEquals(testHMM.referenceAnnotationIsActive(), false);
158 assertEquals(testHMM.maskValueIsActive(), false);
159 assertEquals(testHMM.consensusResidueIsActive(), true);
160 assertEquals(testHMM.consensusStructureIsActive(), true);
161 assertEquals(testHMM.mapIsActive(), true);
162 assertEquals(testHMM.getDate(), "Fri Jun 20 08:22:31 2014");
163 assertNull(testHMM.getCommandLineLog());
164 assertEquals(testHMM.getNumberOfSequences().intValue(), 106);
165 assertEquals(testHMM.getEffectiveNumberOfSequences(), 11.415833, 4d);
166 assertEquals(testHMM.getCheckSum().longValue(), 3564431818l);
167 assertEquals(testHMM.getGatheringThreshold(), "8.00 7.20");
168 assertEquals(testHMM.getTrustedCutoff(), "8.00 7.20");
169 assertEquals(testHMM.getNoiseCutoff(), "7.90 7.90");
170 assertEquals(testHMM.getViterbi(), "-9.7737 0.71847");
171 assertEquals(testHMM.getMSV(), "-9.4043 0.71847");
172 assertEquals(testHMM.getForward(), "-3.8341 0.71847");
175 FileReader fr3 = new FileReader(
176 new File("test/jalview/io/test_MADE1_hmm.txt"));
177 BufferedReader br3 = new BufferedReader(fr3);
178 made1.setHMM(new HiddenMarkovModel());
179 made1.parseFileProperties(br3);
180 testHMM = made1.getHMM();
184 assertEquals(testHMM.getName(), "MADE1");
185 assertEquals(testHMM.getAccessionNumber(), "DF0000629.2");
186 assertEquals(testHMM.getDescription(),
187 "MADE1 (MAriner Derived Element 1), a TcMar-Mariner DNA transposon");
188 assertEquals(testHMM.getLength().intValue(), 80);
189 assertEquals(testHMM.getMaxInstanceLength().intValue(), 426);
190 assertEquals(testHMM.getAlphabetType(), "DNA");
191 assertEquals(testHMM.referenceAnnotationIsActive(), true);
192 assertEquals(testHMM.maskValueIsActive(), false);
193 assertEquals(testHMM.consensusResidueIsActive(), true);
194 assertEquals(testHMM.consensusStructureIsActive(), false);
195 assertEquals(testHMM.mapIsActive(), true);
196 assertEquals(testHMM.getDate(), "Tue Feb 19 20:33:41 2013");
197 assertNull(testHMM.getCommandLineLog());
198 assertEquals(testHMM.getNumberOfSequences().intValue(), 1997);
199 assertEquals(testHMM.getEffectiveNumberOfSequences(), 3.911818, 4d);
200 assertEquals(testHMM.getCheckSum().longValue(), 3015610723l);
201 assertEquals(testHMM.getGatheringThreshold(), "2.324 4.234");
202 assertEquals(testHMM.getTrustedCutoff(), "2.343 1.212");
203 assertEquals(testHMM.getNoiseCutoff(), "2.354 5.456");
204 assertEquals(testHMM.getViterbi(), "-9.3632 0.71858");
205 assertEquals(testHMM.getMSV(), "-8.5786 0.71858");
206 assertEquals(testHMM.getForward(), "-3.4823 0.71858");
212 public void testFillList() throws IOException
214 Scanner scanner1 = new Scanner("1.3 2.4 5.3 3.9 9.8 4.7 4.3 2.3 6.9");
215 ArrayList<Double> filledArray = new ArrayList<>();
217 filledArray.add(0.27253);
218 filledArray.add(0.0907);
219 filledArray.add(0.00499);
220 filledArray.add(0.02024);
221 filledArray.add(0.00005);
222 filledArray.add(0.00909);
223 filledArray.add(0.01357);
224 filledArray.add(0.10026);
225 filledArray.add(0.001);
227 List<Double> testList = HMMFile.fillList(scanner1, 9);
229 for (int i = 0; i < 9; i++)
231 assertEquals(testList.get(i), filledArray.get(i), 0.001d);
238 Scanner scanner2 = new Scanner(
239 "1.346 5.554 35.345 5.64 1.4");
240 filledArray.add(0.2603);
241 filledArray.add(0.00387);
243 filledArray.add(0.00355);
244 filledArray.add(0.2466);
246 testList = HMMFile.fillList(scanner2, 5);
248 for (int i = 0; i < 5; i++)
250 assertEquals(testList.get(i), filledArray.get(i), 0.001d);
256 public void testParseModel() throws IOException
258 FileReader fr = new FileReader(
259 new File("test/jalview/io/test_MADE1_hmm.txt"));
260 BufferedReader br = new BufferedReader(fr);
261 HiddenMarkovModel testHMM = new HiddenMarkovModel();
262 for (int i = 0; i < 24; i++)
267 made1.parseModel(br);
268 testHMM = made1.getHMM();
273 assertEquals(testHMM.getMatchEmissionProbability(1, 'C'), 0.09267,
275 assertEquals(testHMM.getMatchEmissionProbability(25, 'G'), 0.07327,
277 assertEquals(testHMM.getMatchEmissionProbability(1092, 'C'), 0.04184,
279 assertEquals(testHMM.getMatchEmissionProbability(1107, 'G'), 0.07,
282 assertEquals(testHMM.getInsertEmissionProbability(0, 'G'), 0.25,
284 assertEquals(testHMM.getInsertEmissionProbability(247, 'T'), 0.2776,
286 assertEquals(testHMM.getInsertEmissionProbability(1096, 'T'), 0.25,
288 assertEquals(testHMM.getInsertEmissionProbability(1111, 'T'), 0.25,
291 assertEquals(testHMM.getStateTransitionProbability(1, 0), 0.9634,
293 assertEquals(testHMM.getStateTransitionProbability(5, 1), 0.0203,
295 assertEquals(testHMM.getStateTransitionProbability(14, 3), 0.2515,
297 assertEquals(testHMM.getStateTransitionProbability(65, 4), 0.78808,
299 assertEquals(testHMM.getStateTransitionProbability(1080, 2), 0.01845,
301 assertEquals(testHMM.getStateTransitionProbability(1111, 6),
302 Double.NEGATIVE_INFINITY);
307 public void testParseAnnotations()
309 HMMFile testFile = new HMMFile();
310 testFile.setHMM(new HiddenMarkovModel());
311 testFile.getHMM().getNodes().add(new HMMNode());
312 testFile.getHMM().getNodes().add(new HMMNode());
313 testFile.getHMM().getNodes().add(new HMMNode());
316 testFile.getHMM().setConsensusResidueStatus(true);
317 testFile.getHMM().setMAPStatus(true);
318 testFile.getHMM().setReferenceAnnotationStatus(true);
319 testFile.getHMM().setConsensusStructureStatus(true);
320 testFile.getHMM().setMaskedValueStatus(true);
321 Scanner scanner = new Scanner("1345 t t t t");
322 testFile.parseAnnotations(scanner, 1);
324 testFile.getHMM().setConsensusResidueStatus(true);
325 testFile.getHMM().setMAPStatus(false);
326 testFile.getHMM().setReferenceAnnotationStatus(true);
327 testFile.getHMM().setConsensusStructureStatus(false);
328 testFile.getHMM().setMaskedValueStatus(false);
329 Scanner scanner2 = new Scanner("- y x - -");
330 testFile.parseAnnotations(scanner2, 2);
332 HiddenMarkovModel hmm = testFile.getHMM();
334 assertEquals(hmm.getNodeAlignmentColumn(1).intValue(), 1344);
335 assertEquals(hmm.getConsensusResidue(1), 't');
336 assertEquals(hmm.getReferenceAnnotation(1), 't');
337 assertEquals(hmm.getMaskedValue(1), 't');
338 assertEquals(hmm.getConsensusStructure(1), 't');
340 assertEquals(hmm.findNodeIndex(1344).intValue(), 1);
347 * tests to see if file produced by the output matches the file from the input
349 * @throws IOException
354 public void testPrint() throws IOException
356 PrintWriter writer = new PrintWriter(
357 "test/jalview/io/test_export_hmm.txt");
358 String output = pKinase.print();
359 writer.print(output);
361 HMMFile pKinaseClone = new HMMFile(
362 new FileParse("test/jalview/io/test_export_hmm.txt",
363 DataSourceType.FILE));
364 HiddenMarkovModel pKinaseHMM = new HiddenMarkovModel();
365 HiddenMarkovModel pKinaseCloneHMM = new HiddenMarkovModel();
366 pKinaseHMM = pKinase.getHMM();
367 pKinaseCloneHMM = pKinaseClone.getHMM();
369 for (int i = 0; i < pKinaseHMM.getLength(); i++)
375 list1 = pKinaseHMM.getNode(i).getMatchEmissions();
376 list2 = pKinaseCloneHMM.getNode(i).getMatchEmissions();
378 result = checkIfListsAreIdentical(list1, list2);
379 assertEquals(result, true);
381 list1 = pKinaseHMM.getNode(i).getInsertEmissions();
382 list2 = pKinaseCloneHMM.getNode(i).getInsertEmissions();
384 result = checkIfListsAreIdentical(list1, list2);
385 assertEquals(result, true);
387 list1 = pKinaseHMM.getNode(i).getStateTransitions();
388 list2 = pKinaseCloneHMM.getNode(i).getStateTransitions();
390 result = checkIfListsAreIdentical(list1, list2);
391 assertEquals(result, true);
398 alignColumn1 = pKinaseHMM.getNodeAlignmentColumn(i);
399 alignColumn2 = pKinaseCloneHMM.getNodeAlignmentColumn(i);
401 assertEquals(alignColumn1, alignColumn2);
406 annotation1 = pKinaseHMM.getReferenceAnnotation(i);
407 annotation2 = pKinaseCloneHMM.getReferenceAnnotation(i);
409 assertEquals(annotation1, annotation2);
411 annotation1 = pKinaseHMM.getConsensusResidue(i);
412 annotation2 = pKinaseCloneHMM.getConsensusResidue(i);
414 assertEquals(annotation1, annotation2);
422 public void testGetFilePropertiesAsString() throws FileNotFoundException
424 String string = fn3.getFilePropertiesAsString();
426 Scanner testScanner = new Scanner(string);
428 String[] expected = new String[] { "HMMER3/f [3.1b1 | May 2013]",
429 "NAME fn3", "ACC PF00041.13",
430 "DESC Fibronectin type III domain", "LENG 86", "ALPH amino",
431 "RF no", "MM no", "CONS yes", "CS yes", "MAP yes",
432 "DATE Fri Jun 20 08:22:31 2014", "NSEQ 106", "EFFN 11.415833",
433 "CKSUM 3564431818", "GA 8.00 7.20", "TC 8.00 7.20",
434 "NC 7.90 7.90", "STATS LOCAL MSV -9.4043 0.71847",
435 "STATS LOCAL VITERBI -9.7737 0.71847",
436 "STATS LOCAL FORWARD -3.8341 0.71847" };
438 for (String value : expected)
440 assertEquals(testScanner.nextLine(), value);
447 public void testGetModelAsString() throws FileNotFoundException
449 String string = fn3.getModelAsString();
451 assertEquals(findValue(2, 2, 2, string), "4.42225");
452 assertEquals(findValue(12, 14, 1, string), "2.79307");
453 assertEquals(findValue(6, 24, 3, string), "0.48576");
454 assertEquals(findValue(19, 33, 2, string), "4.58477");
455 assertEquals(findValue(20, 64, 2, string), "3.61505");
456 assertEquals(findValue(3, 72, 3, string), "6.81068");
457 assertEquals(findValue(10, 80, 2, string), "2.69355");
458 assertEquals(findValue(16, 65, 1, string), "2.81003");
459 assertEquals(findValue(14, 3, 1, string), "2.69012");
460 assertEquals(findValue(11, 32, 1, string), "4.34805");
467 * index of symbol being searched. First symbol has index 1.
469 * index of node being searched. Begin node has index 0. First node
472 * index of line being searched in node. First line has index 1.
474 * string model being searched
475 * @return value at specified position
478 public String findValue(int symbolIndex, int nodeIndex, int line,
484 Scanner scanner = new Scanner(model);
485 current = scanner.nextLine();
486 current = scanner.nextLine();
488 for (int lineIndex = 0; lineIndex < line - 1; lineIndex++)
490 current = scanner.nextLine();
492 for (int node = 0; node < nodeIndex; node++)
494 current = scanner.nextLine();
495 current = scanner.nextLine();
496 current = scanner.nextLine();
499 for (int symbol = 0; symbol < symbolIndex; symbol++)
501 value = scanner.next();
502 if ("COMPO".equals(value))
504 current = scanner.next();
506 else if (value.length() < 7)
508 current = scanner.next();
517 public boolean checkIfListsAreIdentical(List<Double> list1,
520 boolean isDifferent = false;
521 for (int i = 0; i < list1.size(); i++)
525 entry1 = list1.get(i);
526 entry2 = list2.get(i);
527 if (!(entry1 == entry2))