3 import static org.testng.Assert.assertEquals;
4 import static org.testng.Assert.assertNull;
6 import jalview.datamodel.HMMNode;
7 import jalview.datamodel.HiddenMarkovModel;
9 import java.io.BufferedReader;
11 import java.io.FileNotFoundException;
12 import java.io.FileReader;
13 import java.io.IOException;
14 import java.io.PrintWriter;
15 import java.util.ArrayList;
16 import java.util.Arrays;
17 import java.util.List;
18 import java.util.Scanner;
20 import org.testng.annotations.BeforeClass;
21 import org.testng.annotations.Test;
23 public class HMMFileTest {
31 @BeforeClass(alwaysRun = true)
32 public void setUp() throws FileNotFoundException
34 fn3 = new HMMFile(new BufferedReader(
35 new FileReader(("test/jalview/io/test_fn3_hmm.txt"))));
37 pKinase = new HMMFile(new BufferedReader(
38 new FileReader(("test/jalview/io/test_PKinase_hmm.txt"))));
40 made1 = new HMMFile(new BufferedReader(
41 new FileReader(("test/jalview/io/test_MADE1_hmm.txt"))));
44 @Test(groups = "Functional")
45 public void testParse() throws IOException
48 HiddenMarkovModel hmm = pKinase.getHMM();
49 assertEquals(hmm.getName(), "Pkinase");
50 assertEquals(hmm.getAccessionNumber(), "PF00069.17");
51 assertEquals(hmm.getDescription(), "Protein kinase domain");
52 assertEquals(hmm.getLength().intValue(), 260);
53 assertNull(hmm.getMaxInstanceLength());
54 assertEquals(hmm.getAlphabetType(), "amino");
55 assertEquals(hmm.referenceAnnotationIsActive(), false);
56 assertEquals(hmm.maskValueIsActive(), false);
57 assertEquals(hmm.consensusResidueIsActive(), true);
58 assertEquals(hmm.consensusStructureIsActive(),
60 assertEquals(hmm.mapIsActive(), true);
61 assertEquals(hmm.getDate(), "Thu Jun 16 11:44:06 2011");
62 assertNull(hmm.getCommandLineLog());
63 assertEquals(hmm.getNumberOfSequences().intValue(), 54);
64 assertEquals(hmm.getEffectiveNumberOfSequences(), 3.358521, 4d);
65 assertEquals(hmm.getCheckSum().longValue(), 3106786190l);
66 assertEquals(hmm.getGatheringThreshold(), "70.30 70.30");
67 assertEquals(hmm.getTrustedCutoff(), "70.30 70.30");
68 assertEquals(hmm.getNoiseCutoff(), "70.20 70.20");
70 List<Character> symbols = Arrays
71 .asList(new Character[]
72 { 'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N',
73 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y' });
74 assertEquals(hmm.getSymbols(), symbols);
76 assertEquals(hmm.getMatchEmissionProbability(0, 'Y'), 0.16102, 0.001d);
77 assertEquals(hmm.getMatchEmissionProbability(11, 'P'), 0.0130, 0.001d);
78 assertEquals(hmm.getMatchEmissionProbability(24, 'I'), 0.02583, 0.001d);
79 assertEquals(hmm.getMatchEmissionProbability(83, 'C'), 0.008549,
81 assertEquals(hmm.getMatchEmissionProbability(332, 'E'), 0.07998,
83 assertEquals(hmm.getMatchEmissionProbability(381, 'D'), 0.014465,
85 assertEquals(hmm.getMatchEmissionProbability(475, 'Y'), 0.02213,
88 assertEquals(hmm.getInsertEmissionProbability(1, 'C'), 0.012, 0.001d);
89 assertEquals(hmm.getInsertEmissionProbability(14, 'H'), 0.02411,
91 assertEquals(hmm.getInsertEmissionProbability(23, 'L'), 0.06764,
93 assertEquals(hmm.getInsertEmissionProbability(90, 'D'), 0.0623, 0.001d);
94 assertEquals(hmm.getInsertEmissionProbability(374, 'T'), 0.0623,
96 assertEquals(hmm.getInsertEmissionProbability(470, 'P'), 0.0647,
99 assertEquals(hmm.getStateTransitionProbability(2, 6), 0.3848, 0.001d);
100 assertEquals(hmm.getStateTransitionProbability(38, 3), 0.5382, 0.001d);
101 assertEquals(hmm.getStateTransitionProbability(305, 3), 0.2916, 0.001d);
102 assertEquals(hmm.getStateTransitionProbability(380, 0), 0.99, 0.001d);
103 assertEquals(hmm.getStateTransitionProbability(453, 1), 0.0066, 0.001d);
105 assertEquals(hmm.getNodeAlignmentColumn(3).intValue(), 2);
106 assertEquals(hmm.getReferenceAnnotation(7), '-');
107 assertEquals(hmm.getConsensusResidue(23), 't');
108 assertEquals(hmm.getMaskedValue(30), '-');
109 assertEquals(hmm.getConsensusStructure(56), 'S');
111 assertEquals(hmm.getNodeAlignmentColumn(78).intValue(), 135);
112 assertEquals(hmm.getReferenceAnnotation(93), '-');
113 assertEquals(hmm.getConsensusResidue(145), 'a');
114 assertEquals(hmm.getMaskedValue(183), '-');
115 assertEquals(hmm.getConsensusStructure(240), 'H');
120 public void testParseFileProperties() throws IOException
122 FileReader fr = new FileReader(
123 new File("test/jalview/io/test_fn3_hmm.txt"));
124 BufferedReader br = new BufferedReader(fr);
125 fn3.setHMM(new HiddenMarkovModel());
126 fn3.parseFileProperties(br);
127 fn3.parseModel(br); // this is for a later test
128 HiddenMarkovModel testHMM = new HiddenMarkovModel();
129 testHMM = fn3.getHMM();
133 assertEquals(testHMM.getName(), "fn3");
134 assertEquals(testHMM.getAccessionNumber(), "PF00041.13");
135 assertEquals(testHMM.getDescription(),
136 "Fibronectin type III domain");
137 assertEquals(testHMM.getLength().intValue(), 86);
138 assertNull(testHMM.getMaxInstanceLength());
139 assertEquals(testHMM.getAlphabetType(), "amino");
140 assertEquals(testHMM.referenceAnnotationIsActive(), false);
141 assertEquals(testHMM.maskValueIsActive(), false);
142 assertEquals(testHMM.consensusResidueIsActive(), true);
143 assertEquals(testHMM.consensusStructureIsActive(), true);
144 assertEquals(testHMM.mapIsActive(), true);
145 assertEquals(testHMM.getDate(), "Fri Jun 20 08:22:31 2014");
146 assertNull(testHMM.getCommandLineLog());
147 assertEquals(testHMM.getNumberOfSequences().intValue(), 106);
148 assertEquals(testHMM.getEffectiveNumberOfSequences(), 11.415833, 4d);
149 assertEquals(testHMM.getCheckSum().longValue(), 3564431818l);
150 assertEquals(testHMM.getGatheringThreshold(), "8.00 7.20");
151 assertEquals(testHMM.getTrustedCutoff(), "8.00 7.20");
152 assertEquals(testHMM.getNoiseCutoff(), "7.90 7.90");
153 assertEquals(testHMM.getViterbi(), "-9.7737 0.71847");
154 assertEquals(testHMM.getMSV(), "-9.4043 0.71847");
155 assertEquals(testHMM.getForward(), "-3.8341 0.71847");
158 FileReader fr3 = new FileReader(
159 new File("test/jalview/io/test_MADE1_hmm.txt"));
160 BufferedReader br3 = new BufferedReader(fr3);
161 made1.setHMM(new HiddenMarkovModel());
162 made1.parseFileProperties(br3);
163 testHMM = made1.getHMM();
167 assertEquals(testHMM.getName(), "MADE1");
168 assertEquals(testHMM.getAccessionNumber(), "DF0000629.2");
169 assertEquals(testHMM.getDescription(),
170 "MADE1 (MAriner Derived Element 1), a TcMar-Mariner DNA transposon");
171 assertEquals(testHMM.getLength().intValue(), 80);
172 assertEquals(testHMM.getMaxInstanceLength().intValue(), 426);
173 assertEquals(testHMM.getAlphabetType(), "DNA");
174 assertEquals(testHMM.referenceAnnotationIsActive(), true);
175 assertEquals(testHMM.maskValueIsActive(), false);
176 assertEquals(testHMM.consensusResidueIsActive(), true);
177 assertEquals(testHMM.consensusStructureIsActive(), false);
178 assertEquals(testHMM.mapIsActive(), true);
179 assertEquals(testHMM.getDate(), "Tue Feb 19 20:33:41 2013");
180 assertNull(testHMM.getCommandLineLog());
181 assertEquals(testHMM.getNumberOfSequences().intValue(), 1997);
182 assertEquals(testHMM.getEffectiveNumberOfSequences(), 3.911818, 4d);
183 assertEquals(testHMM.getCheckSum().longValue(), 3015610723l);
184 assertEquals(testHMM.getGatheringThreshold(), "2.324 4.234");
185 assertEquals(testHMM.getTrustedCutoff(), "2.343 1.212");
186 assertEquals(testHMM.getNoiseCutoff(), "2.354 5.456");
187 assertEquals(testHMM.getViterbi(), "-9.3632 0.71858");
188 assertEquals(testHMM.getMSV(), "-8.5786 0.71858");
189 assertEquals(testHMM.getForward(), "-3.4823 0.71858");
195 public void testFillList() throws IOException
197 Scanner scanner1 = new Scanner("1.3 2.4 5.3 3.9 9.8 4.7 4.3 2.3 6.9");
198 ArrayList<Double> filledArray = new ArrayList<>();
200 filledArray.add(0.27253);
201 filledArray.add(0.0907);
202 filledArray.add(0.00499);
203 filledArray.add(0.02024);
204 filledArray.add(0.00005);
205 filledArray.add(0.00909);
206 filledArray.add(0.01357);
207 filledArray.add(0.10026);
208 filledArray.add(0.001);
210 List<Double> testList = HMMFile.fillList(scanner1, 9);
212 for (int i = 0; i < 9; i++)
214 assertEquals(testList.get(i), filledArray.get(i), 0.001d);
221 Scanner scanner2 = new Scanner(
222 "1.346 5.554 35.345 5.64 1.4");
223 filledArray.add(0.2603);
224 filledArray.add(0.00387);
226 filledArray.add(0.00355);
227 filledArray.add(0.2466);
229 testList = HMMFile.fillList(scanner2, 5);
231 for (int i = 0; i < 5; i++)
233 assertEquals(testList.get(i), filledArray.get(i), 0.001d);
239 public void testParseModel() throws IOException
241 FileReader fr = new FileReader(
242 new File("test/jalview/io/test_MADE1_hmm.txt"));
243 BufferedReader br = new BufferedReader(fr);
244 HiddenMarkovModel testHMM = new HiddenMarkovModel();
245 for (int i = 0; i < 24; i++)
250 made1.parseModel(br);
251 testHMM = made1.getHMM();
256 assertEquals(testHMM.getMatchEmissionProbability(1, 'C'), 0.09267,
258 assertEquals(testHMM.getMatchEmissionProbability(25, 'G'), 0.07327,
260 assertEquals(testHMM.getMatchEmissionProbability(1092, 'C'), 0.04184,
262 assertEquals(testHMM.getMatchEmissionProbability(1107, 'G'), 0.07,
265 assertEquals(testHMM.getInsertEmissionProbability(0, 'G'), 0.25,
267 assertEquals(testHMM.getInsertEmissionProbability(247, 'T'), 0.2776,
269 assertEquals(testHMM.getInsertEmissionProbability(1096, 'T'), 0.25,
271 assertEquals(testHMM.getInsertEmissionProbability(1111, 'T'), 0.25,
274 assertEquals(testHMM.getStateTransitionProbability(1, 0), 0.9634,
276 assertEquals(testHMM.getStateTransitionProbability(5, 1), 0.0203,
278 assertEquals(testHMM.getStateTransitionProbability(14, 3), 0.2515,
280 assertEquals(testHMM.getStateTransitionProbability(65, 4), 0.78808,
282 assertEquals(testHMM.getStateTransitionProbability(1080, 2), 0.01845,
284 assertEquals(testHMM.getStateTransitionProbability(1111, 6),
285 Double.NEGATIVE_INFINITY);
290 public void testParseAnnotations()
292 HMMFile testFile = new HMMFile();
293 HiddenMarkovModel hmm = new HiddenMarkovModel();
294 testFile.setHMM(hmm);
295 hmm.getNodes().add(new HMMNode());
297 hmm.setConsensusResidueStatus(true);
298 hmm.setMAPStatus(true);
299 hmm.setReferenceAnnotationStatus(true);
300 hmm.setConsensusStructureStatus(true);
301 hmm.setMaskedValueStatus(true);
302 Scanner scanner = new Scanner("1345 t t t t");
303 HMMNode node = new HMMNode();
304 hmm.getNodes().add(node);
305 testFile.parseAnnotations(scanner, node);
307 hmm.setConsensusResidueStatus(true);
308 hmm.setMAPStatus(false);
309 hmm.setReferenceAnnotationStatus(true);
310 hmm.setConsensusStructureStatus(false);
311 hmm.setMaskedValueStatus(false);
312 Scanner scanner2 = new Scanner("- y x - -");
313 node = new HMMNode();
314 hmm.getNodes().add(node);
315 testFile.parseAnnotations(scanner2, node);
317 assertEquals(hmm.getNodeAlignmentColumn(1).intValue(), 1344);
318 assertEquals(hmm.getConsensusResidue(1), 't');
319 assertEquals(hmm.getReferenceAnnotation(1), 't');
320 assertEquals(hmm.getMaskedValue(1), 't');
321 assertEquals(hmm.getConsensusStructure(1), 't');
327 * tests to see if file produced by the output matches the file from the input
329 * @throws IOException
334 public void testPrint() throws IOException
336 PrintWriter writer = new PrintWriter(
337 "test/jalview/io/test_export_hmm.txt");
338 String output = pKinase.print();
339 writer.print(output);
341 HMMFile pKinaseClone = new HMMFile(
342 new FileParse("test/jalview/io/test_export_hmm.txt",
343 DataSourceType.FILE));
344 HiddenMarkovModel pKinaseHMM = new HiddenMarkovModel();
345 HiddenMarkovModel pKinaseCloneHMM = new HiddenMarkovModel();
346 pKinaseHMM = pKinase.getHMM();
347 pKinaseCloneHMM = pKinaseClone.getHMM();
349 for (int i = 0; i < pKinaseHMM.getLength(); i++)
355 list1 = pKinaseHMM.getNode(i).getMatchEmissions();
356 list2 = pKinaseCloneHMM.getNode(i).getMatchEmissions();
358 result = checkIfListsAreIdentical(list1, list2);
359 assertEquals(result, true);
361 list1 = pKinaseHMM.getNode(i).getInsertEmissions();
362 list2 = pKinaseCloneHMM.getNode(i).getInsertEmissions();
364 result = checkIfListsAreIdentical(list1, list2);
365 assertEquals(result, true);
367 list1 = pKinaseHMM.getNode(i).getStateTransitions();
368 list2 = pKinaseCloneHMM.getNode(i).getStateTransitions();
370 result = checkIfListsAreIdentical(list1, list2);
371 assertEquals(result, true);
378 alignColumn1 = pKinaseHMM.getNodeAlignmentColumn(i);
379 alignColumn2 = pKinaseCloneHMM.getNodeAlignmentColumn(i);
381 assertEquals(alignColumn1, alignColumn2);
386 annotation1 = pKinaseHMM.getReferenceAnnotation(i);
387 annotation2 = pKinaseCloneHMM.getReferenceAnnotation(i);
389 assertEquals(annotation1, annotation2);
391 annotation1 = pKinaseHMM.getConsensusResidue(i);
392 annotation2 = pKinaseCloneHMM.getConsensusResidue(i);
394 assertEquals(annotation1, annotation2);
402 public void testGetFilePropertiesAsString() throws FileNotFoundException
404 String string = fn3.getFilePropertiesAsString();
406 Scanner testScanner = new Scanner(string);
408 String[] expected = new String[] { "HMMER3/f [3.1b1 | May 2013]",
409 "NAME fn3", "ACC PF00041.13",
410 "DESC Fibronectin type III domain", "LENG 86", "ALPH amino",
411 "RF no", "MM no", "CONS yes", "CS yes", "MAP yes",
412 "DATE Fri Jun 20 08:22:31 2014", "NSEQ 106", "EFFN 11.415833",
413 "CKSUM 3564431818", "GA 8.00 7.20", "TC 8.00 7.20",
414 "NC 7.90 7.90", "STATS LOCAL MSV -9.4043 0.71847",
415 "STATS LOCAL VITERBI -9.7737 0.71847",
416 "STATS LOCAL FORWARD -3.8341 0.71847" };
418 for (String value : expected)
420 assertEquals(testScanner.nextLine(), value);
427 public void testGetModelAsString() throws FileNotFoundException
429 String string = fn3.getModelAsString();
431 assertEquals(findValue(2, 2, 2, string), "4.42225");
432 assertEquals(findValue(12, 14, 1, string), "2.79307");
433 assertEquals(findValue(6, 24, 3, string), "0.48576");
434 assertEquals(findValue(19, 33, 2, string), "4.58477");
435 assertEquals(findValue(20, 64, 2, string), "3.61505");
436 assertEquals(findValue(3, 72, 3, string), "6.81068");
437 assertEquals(findValue(10, 80, 2, string), "2.69355");
438 assertEquals(findValue(16, 65, 1, string), "2.81003");
439 assertEquals(findValue(14, 3, 1, string), "2.69012");
440 assertEquals(findValue(11, 32, 1, string), "4.34805");
447 * index of symbol being searched. First symbol has index 1.
449 * index of node being searched. Begin node has index 0. First node
452 * index of line being searched in node. First line has index 1.
454 * string model being searched
455 * @return value at specified position
458 public String findValue(int symbolIndex, int nodeIndex, int line,
464 Scanner scanner = new Scanner(model);
465 current = scanner.nextLine();
466 current = scanner.nextLine();
468 for (int lineIndex = 0; lineIndex < line - 1; lineIndex++)
470 current = scanner.nextLine();
472 for (int node = 0; node < nodeIndex; node++)
474 current = scanner.nextLine();
475 current = scanner.nextLine();
476 current = scanner.nextLine();
479 for (int symbol = 0; symbol < symbolIndex; symbol++)
481 value = scanner.next();
482 if ("COMPO".equals(value))
484 current = scanner.next();
486 else if (value.length() < 7)
488 current = scanner.next();
497 public boolean checkIfListsAreIdentical(List<Double> list1,
500 boolean isDifferent = false;
501 for (int i = 0; i < list1.size(); i++)
505 entry1 = list1.get(i);
506 entry2 = list2.get(i);
507 if (!(entry1 == entry2))