3 import static org.testng.Assert.assertEquals;
4 import static org.testng.Assert.assertNull;
6 import java.io.BufferedReader;
8 import java.io.FileReader;
9 import java.io.IOException;
10 import java.util.ArrayList;
11 import java.util.List;
12 import java.util.Scanner;
14 import org.testng.annotations.Test;
16 public class HMMFileTest {
18 HMMFile testFile = new HMMFile("H:/HMMERFile.txt");
20 File file = new File("H:/HMMERFile.txt");
22 HMMFile testFile2 = new HMMFile("H:/EmptyFile.txt");
24 File file2 = new File("H:/EmptyFile.txt");
26 HMMFile testFile3 = new HMMFile("H:/HMMERFile2.txt");
28 File file3 = new File("H:/HMMERFile2.txt");
30 HMMFile testFile4 = new HMMFile("H:/HMMERFile.txt");
32 File file4 = new File("H:/HMMERFile.txt");
35 public void testParse() throws IOException
37 HMMFile integrationTestFile = new HMMFile("H:/HMMTutorialExample.hmm");
38 integrationTestFile.parse();
41 assertEquals(integrationTestFile.hmm.getName(), "MADE1");
42 assertEquals(integrationTestFile.hmm.getAccessionNumber(),
44 assertEquals(integrationTestFile.hmm.getDescription(),
45 "MADE1 (MAriner Derived Element 1), a TcMar-Mariner DNA transposon");
46 assertEquals(integrationTestFile.hmm.getLength().intValue(), 80);
47 assertEquals(integrationTestFile.hmm.getMaxInstanceLength().intValue(),
49 assertEquals(integrationTestFile.hmm.getAlphabetType(), "DNA");
50 assertEquals(integrationTestFile.hmm.getReferenceAnnotationFlag(),
52 assertEquals(integrationTestFile.hmm.getModelMaskedFlag(), false);
54 integrationTestFile.hmm.getConsensusResidueAnnotationFlag(),
57 integrationTestFile.hmm.getConsensusStructureAnnotationFlag(),
59 assertEquals(integrationTestFile.hmm.getMapAnnotationFlag(), true);
60 assertEquals(integrationTestFile.hmm.getDate(),
61 "Tue Feb 19 20:33:41 2013");
62 assertNull(integrationTestFile.hmm.getCommandLineLog());
63 assertEquals(integrationTestFile.hmm.getSequenceNumber().intValue(),
65 assertEquals(integrationTestFile.hmm.getEffectiveSequenceNumber(),
67 assertEquals(integrationTestFile.hmm.getCheckSum().longValue(),
69 assertNull(integrationTestFile.hmm.getGatheringThreshold1());
70 assertNull(integrationTestFile.hmm.getGatheringThreshold2());
71 assertNull(integrationTestFile.hmm.getTrustedCutoff1());
72 assertNull(integrationTestFile.hmm.getTrustedCutoff2());
73 assertNull(integrationTestFile.hmm.getNoiseCutoff1());
74 assertNull(integrationTestFile.hmm.getNoiseCutoff2());
75 assertEquals(integrationTestFile.hmm.getSlopeOfDistribution("MSV"),
77 assertEquals(integrationTestFile.hmm.getSlopeOfDistribution("VITERBI"),
79 assertEquals(integrationTestFile.hmm.getSlopeOfDistribution("FORWARD"),
81 assertEquals(integrationTestFile.hmm.getLocationOfDistribution("MSV"),
84 integrationTestFile.hmm.getLocationOfDistribution("VITERBI"),
87 integrationTestFile.hmm.getLocationOfDistribution("FORWARD"),
90 List<Character> symbols = new ArrayList<>();
96 assertEquals(integrationTestFile.hmm.getSymbols(), symbols);
98 List<Double> averages = new ArrayList<>();
99 averages.add(1.24257);
100 averages.add(1.59430);
101 averages.add(1.62906);
102 averages.add(1.16413);
104 assertEquals(integrationTestFile.hmm
105 .getAverageMatchStateEmissionProbabilities(), averages);
107 assertEquals(integrationTestFile.hmm.getInsertZeroEmissions().get(2),
109 assertEquals(integrationTestFile.hmm.getInsertZeroEmissions().get(3),
112 assertEquals(integrationTestFile.hmm.getBeginStateTransitions().get(1),
114 assertEquals(integrationTestFile.hmm.getBeginStateTransitions().get(4),
117 assertEquals(integrationTestFile.hmm.getMatchEmissions().get(1).get(1),
119 assertEquals(integrationTestFile.hmm.getMatchEmissions().get(8).get(0),
121 assertEquals(integrationTestFile.hmm.getMatchEmissions().get(12).get(2),
123 assertEquals(integrationTestFile.hmm.getMatchEmissions().get(43).get(3),
125 assertEquals(integrationTestFile.hmm.getMatchEmissions().get(54).get(2),
127 assertEquals(integrationTestFile.hmm.getMatchEmissions().get(23).get(2),
129 assertEquals(integrationTestFile.hmm.getMatchEmissions().get(56).get(1),
131 assertEquals(integrationTestFile.hmm.getMatchEmissions().get(65).get(0),
133 assertEquals(integrationTestFile.hmm.getMatchEmissions().get(21).get(0),
135 assertEquals(integrationTestFile.hmm.getMatchEmissions().get(79).get(3),
137 assertEquals(integrationTestFile.hmm.getMatchEmissions().get(76).get(3),
140 assertEquals(integrationTestFile.hmm.getInsertEmissions().get(23).get(0),
142 assertEquals(integrationTestFile.hmm.getInsertEmissions().get(54).get(3),
144 assertEquals(integrationTestFile.hmm.getInsertEmissions().get(65).get(3),
146 assertEquals(integrationTestFile.hmm.getInsertEmissions().get(57).get(2),
148 assertEquals(integrationTestFile.hmm.getInsertEmissions().get(42).get(1),
150 assertEquals(integrationTestFile.hmm.getInsertEmissions().get(12).get(3),
152 assertEquals(integrationTestFile.hmm.getInsertEmissions().get(6).get(1),
154 assertEquals(integrationTestFile.hmm.getInsertEmissions().get(59).get(0),
156 assertEquals(integrationTestFile.hmm.getInsertEmissions().get(78).get(0),
158 assertEquals(integrationTestFile.hmm.getInsertEmissions().get(17).get(2),
160 assertEquals(integrationTestFile.hmm.getInsertEmissions().get(0).get(2),
164 integrationTestFile.hmm.getStateTransitions().get(13).get(1),
167 integrationTestFile.hmm.getStateTransitions().get(64).get(2),
170 integrationTestFile.hmm.getStateTransitions().get(45).get(6),
173 integrationTestFile.hmm.getStateTransitions().get(71).get(4),
176 integrationTestFile.hmm.getStateTransitions().get(32).get(5),
179 integrationTestFile.hmm.getStateTransitions().get(9).get(0),
182 integrationTestFile.hmm.getStateTransitions().get(0).get(3),
185 integrationTestFile.hmm.getStateTransitions().get(31).get(6),
188 integrationTestFile.hmm.getStateTransitions().get(79).get(2));
190 integrationTestFile.hmm.getStateTransitions().get(3).get(1),
193 integrationTestFile.hmm.getStateTransitions().get(16).get(4),
196 assertEquals(integrationTestFile.hmm.getAlignmentColumnIndexes().get(0)
198 assertEquals(integrationTestFile.hmm.getAlignmentColumnIndexes().get(9)
200 assertEquals(integrationTestFile.hmm.getAlignmentColumnIndexes().get(12)
202 assertEquals(integrationTestFile.hmm.getAlignmentColumnIndexes().get(56)
204 assertEquals(integrationTestFile.hmm.getAlignmentColumnIndexes().get(79)
207 assertEquals(integrationTestFile.hmm.getAnnotations().get(0).get("RF")
209 assertEquals(integrationTestFile.hmm.getAnnotations().get(3).get("CS")
211 assertEquals(integrationTestFile.hmm.getAnnotations().get(65)
212 .get("CONS").charValue(), 't');
213 assertEquals(integrationTestFile.hmm.getAnnotations().get(23).get("MM")
215 assertEquals(integrationTestFile.hmm.getAnnotations().get(56).get("MM")
217 assertEquals(integrationTestFile.hmm.getAnnotations().get(76).get("RF")
219 assertEquals(integrationTestFile.hmm.getAnnotations().get(79)
220 .get("CONS").charValue(), 'a');
225 public void testParseFileProperties() throws IOException
227 FileReader fr = new FileReader(file);
228 BufferedReader br = new BufferedReader(fr);
229 testFile.parseFileProperties(br);
232 assertEquals(testFile.hmm.getName(), "fn3");
233 assertEquals(testFile.hmm.getAccessionNumber(), "PF00041.13");
234 assertEquals(testFile.hmm.getDescription(),
235 "Fibronectin type III domain");
236 assertEquals(testFile.hmm.getLength().intValue(), 4);
237 assertNull(testFile.hmm.getMaxInstanceLength());
238 assertEquals(testFile.hmm.getAlphabetType(), "amino");
239 assertEquals(testFile.hmm.getReferenceAnnotationFlag(), false);
240 assertEquals(testFile.hmm.getModelMaskedFlag(), false);
241 assertEquals(testFile.hmm.getConsensusResidueAnnotationFlag(), true);
242 assertEquals(testFile.hmm.getConsensusStructureAnnotationFlag(), true);
243 assertEquals(testFile.hmm.getMapAnnotationFlag(), true);
244 assertEquals(testFile.hmm.getDate(), "Fri Feb 15 06:04:13 2013");
245 assertNull(testFile.hmm.getCommandLineLog());
246 assertEquals(testFile.hmm.getSequenceNumber().intValue(), 106);
247 assertEquals(testFile.hmm.getEffectiveSequenceNumber(), 11.415833, 4d);
248 assertEquals(testFile.hmm.getCheckSum().longValue(), 3564431818l);
249 assertEquals(testFile.hmm.getGatheringThreshold1(), 8.00, 2d);
250 assertEquals(testFile.hmm.getGatheringThreshold2(), 7.20, 2d);
251 assertEquals(testFile.hmm.getTrustedCutoff1(), 8.00, 2d);
252 assertEquals(testFile.hmm.getTrustedCutoff2(), 7.20, 2d);
253 assertEquals(testFile.hmm.getNoiseCutoff1(), 7.90, 2d);
254 assertEquals(testFile.hmm.getNoiseCutoff2(), 7.90, 2d);
255 assertEquals(testFile.hmm.getSlopeOfDistribution("MSV"), -9.4043, 4d);
256 assertEquals(testFile.hmm.getSlopeOfDistribution("VITERBI"), -9.7737,
258 assertEquals(testFile.hmm.getSlopeOfDistribution("FORWARD"), -3.8341,
260 assertEquals(testFile.hmm.getLocationOfDistribution("MSV"), 0.71847,
262 assertEquals(testFile.hmm.getLocationOfDistribution("VITERBI"), 0.71847,
264 assertEquals(testFile.hmm.getLocationOfDistribution("FORWARD"), 0.71847,
267 FileReader fr2 = new FileReader(file2);
268 BufferedReader br2 = new BufferedReader(fr2);
269 testFile2.parseFileProperties(br2);
273 assertNull(testFile2.hmm.getName());
274 assertNull(testFile2.hmm.getAccessionNumber());
275 assertNull(testFile2.hmm.getDescription());
276 assertNull(testFile2.hmm.getLength());
277 assertNull(testFile2.hmm.getMaxInstanceLength());
278 assertNull(testFile2.hmm.getAlphabetType());
279 assertEquals(testFile2.hmm.getReferenceAnnotationFlag(), false);
280 assertEquals(testFile2.hmm.getModelMaskedFlag(), false);
281 assertEquals(testFile2.hmm.getConsensusResidueAnnotationFlag(), false);
282 assertEquals(testFile2.hmm.getConsensusStructureAnnotationFlag(),
284 assertEquals(testFile2.hmm.getMapAnnotationFlag(), false);
285 assertNull(testFile2.hmm.getDate());
286 assertNull(testFile2.hmm.getCommandLineLog());
287 assertNull(testFile2.hmm.getSequenceNumber());
288 assertNull(testFile2.hmm.getEffectiveSequenceNumber());
289 assertNull(testFile2.hmm.getCheckSum());
290 assertNull(testFile2.hmm.getGatheringThreshold1());
291 assertNull(testFile2.hmm.getGatheringThreshold2());
292 assertNull(testFile2.hmm.getTrustedCutoff1());
293 assertNull(testFile2.hmm.getTrustedCutoff2());
294 assertNull(testFile2.hmm.getNoiseCutoff1());
295 assertNull(testFile2.hmm.getNoiseCutoff2());
296 assertNull(testFile2.hmm.getSlopeOfDistribution("MSV"));
297 assertNull(testFile2.hmm.getSlopeOfDistribution("VITERBI"));
298 assertNull(testFile2.hmm.getSlopeOfDistribution("FORWARD"));
299 assertNull(testFile2.hmm.getLocationOfDistribution("MSV"));
300 assertNull(testFile2.hmm.getLocationOfDistribution("VITERBI"));
301 assertNull(testFile2.hmm.getLocationOfDistribution("FORWARD"));
303 FileReader fr3 = new FileReader(file3);
304 BufferedReader br3 = new BufferedReader(fr3);
305 testFile3.parseFileProperties(br3);
309 assertEquals(testFile3.hmm.getName(), "th4");
310 assertEquals(testFile3.hmm.getAccessionNumber(), "PF99041.16");
311 assertEquals(testFile3.hmm.getDescription(),
312 "Fibronectin type I domain");
313 assertEquals(testFile3.hmm.getLength().intValue(), 10);
314 assertEquals(testFile3.hmm.getMaxInstanceLength().intValue(), 6);
315 assertEquals(testFile3.hmm.getAlphabetType(), "amino");
316 assertEquals(testFile3.hmm.getReferenceAnnotationFlag(), true);
317 assertEquals(testFile3.hmm.getModelMaskedFlag(), false);
318 assertEquals(testFile3.hmm.getConsensusResidueAnnotationFlag(), false);
319 assertEquals(testFile3.hmm.getConsensusStructureAnnotationFlag(),
321 assertEquals(testFile3.hmm.getMapAnnotationFlag(), false);
322 assertEquals(testFile3.hmm.getDate(), "Tue Jan 01 11:02:59 2000");
323 assertEquals(testFile3.hmm.getCommandLineLog(), "this is the log");
324 assertEquals(testFile3.hmm.getSequenceNumber().intValue(), 567);
325 assertEquals(testFile3.hmm.getEffectiveSequenceNumber(), 15.964683, 4d);
326 assertEquals(testFile3.hmm.getCheckSum().longValue(), 9485949654l);
327 assertEquals(testFile3.hmm.getGatheringThreshold1(), 6.40, 2d);
328 assertEquals(testFile3.hmm.getGatheringThreshold2(), 7.20, 2d);
329 assertEquals(testFile3.hmm.getTrustedCutoff1(), 2.40, 2d);
330 assertEquals(testFile3.hmm.getTrustedCutoff2(), 7.00, 2d);
331 assertNull(testFile3.hmm.getNoiseCutoff1());
332 assertNull(testFile3.hmm.getNoiseCutoff2());
333 assertNull(testFile3.hmm.getSlopeOfDistribution("MSV"));
334 assertNull(testFile3.hmm.getSlopeOfDistribution("VITERBI"));
335 assertNull(testFile3.hmm.getSlopeOfDistribution("FORWARD"));
336 assertNull(testFile3.hmm.getLocationOfDistribution("MSV"));
337 assertNull(testFile3.hmm.getLocationOfDistribution("VITERBI"));
338 assertNull(testFile3.hmm.getLocationOfDistribution("FORWARD"));
342 * @Test public void testParseModel() throws IOException { HiddenMarkovModel
343 * hmm = new HiddenMarkovModel(); HMMFile testFile = new HMMFile(hmm,
344 * "H:/HMMERFile.txt"); File file = new File("H:/HMMERFile.txt");
345 * FileReader fr = new FileReader(file); BufferedReader br = new
346 * BufferedReader(fr); testFile.parseFileProperties(br);
347 * testFile.parseModel(br); br.close(); fr.close();
353 public void testGetTransitionType()
356 assertEquals(testFile.getTransitionType("mm").intValue(), 0);
357 assertEquals(testFile.getTransitionType("mi").intValue(), 1);
358 assertEquals(testFile.getTransitionType("md").intValue(), 2);
359 assertEquals(testFile.getTransitionType("im").intValue(), 3);
360 assertEquals(testFile.getTransitionType("ii").intValue(), 4);
361 assertEquals(testFile.getTransitionType("dm").intValue(), 5);
362 assertEquals(testFile.getTransitionType("dd").intValue(), 6);
363 assertNull(testFile.getTransitionType("df"));
368 public void testReadStats()
370 Scanner scanner = new Scanner("LOCAL MSV 5.6943 6.2313");
371 testFile.readStats(scanner);
372 assertEquals(testFile.hmm.getEValueStatistics().get("MSV")
373 .getAlignmentModeConfiguration(), "LOCAL");
375 testFile.hmm.getEValueStatistics().get("MSV")
376 .getSlopeOfDistribution(),
378 assertEquals(testFile.hmm.getEValueStatistics().get("MSV")
379 .getLocationOfDistribution(), 6.2313, 4d);
382 Scanner scanner2 = new Scanner("GLOBAL VITERBI 3 -0.234");
383 testFile.readStats(scanner2);
384 assertEquals(testFile.hmm.getEValueStatistics().get("VITERBI")
385 .getAlignmentModeConfiguration(), "GLOBAL");
386 assertEquals(testFile.hmm.getEValueStatistics().get("VITERBI")
387 .getSlopeOfDistribution(), 3, 2d);
388 assertEquals(testFile.hmm.getEValueStatistics().get("VITERBI")
389 .getLocationOfDistribution(), -0.234, 4d);
395 public void testParseBeginNodeData() throws IOException
397 FileReader fr = new FileReader(file4);
398 BufferedReader br = new BufferedReader(fr);
399 for (int i = 0; i < 24; i++)
401 br.readLine(); // this is done to reach the begin node
405 testFile4.hmm.fillSymbols("HMM A B C D E F G H I");
406 testFile4.parseBeginNodeData(br);
407 ArrayList<Double> emissions = new ArrayList<>();
408 ArrayList<Double> transitions = new ArrayList<>();
410 emissions.add(2.68618);
411 emissions.add(4.42225);
412 emissions.add(2.77519);
413 emissions.add(2.73123);
414 emissions.add(3.46354);
415 emissions.add(2.40513);
416 emissions.add(3.72494);
417 emissions.add(3.29354);
418 emissions.add(3.61503);
420 transitions.add(0.00338);
421 transitions.add(6.08833);
422 transitions.add(6.81068);
423 transitions.add(0.61958);
424 transitions.add(0.77255);
425 transitions.add(0.00000);
426 transitions.add(null);
428 assertEquals(testFile4.hmm.getInsertZeroEmissions(), emissions);
429 assertEquals(testFile4.hmm.getBeginStateTransitions(), transitions);
434 public void testFillList()
436 Scanner scanner1 = new Scanner("1.3 2.4 5.3 3.9 9.8 4.7 4.3 2.3 6.9");
437 ArrayList<Double> filledArray = new ArrayList<>();
439 filledArray.add(1.3);
440 filledArray.add(2.4);
441 filledArray.add(5.3);
442 filledArray.add(3.9);
443 filledArray.add(9.8);
444 filledArray.add(4.7);
445 filledArray.add(4.3);
446 filledArray.add(2.3);
447 filledArray.add(6.9);
449 assertEquals(HMMFile.fillList(scanner1, 9), filledArray);
453 Scanner scanner2 = new Scanner(
454 "1.346554 5.58756754 35.3523645 12345.3564 1.4");
455 filledArray.add(1.346554);
456 filledArray.add(5.58756754);
457 filledArray.add(35.3523645);
458 filledArray.add(12345.3564);
459 filledArray.add(1.4);
460 assertEquals(HMMFile.fillList(scanner2, 5), filledArray);
466 public void testParseModel() throws IOException
468 FileReader fr = new FileReader(file);
469 BufferedReader br = new BufferedReader(fr);
470 for (int i = 0; i < 23; i++)
472 br.readLine(); // this is done to reach the begin node
476 testFile.parseModel(br);
477 assertEquals(testFile.hmm.getMatchEmissions().get(0).get(0), 3.16986);
478 assertEquals(testFile.hmm.getMatchEmissions().get(0).get(3), 3.29953);
479 assertEquals(testFile.hmm.getMatchEmissions().get(1).get(2), 2.24744);
480 assertEquals(testFile.hmm.getMatchEmissions().get(1).get(8), 4.25623);
481 assertEquals(testFile.hmm.getMatchEmissions().get(2).get(5), 3.48010);
482 assertEquals(testFile.hmm.getMatchEmissions().get(2).get(6), 4.51877);
483 assertEquals(testFile.hmm.getMatchEmissions().get(3).get(4), 5.26587);
484 assertEquals(testFile.hmm.getMatchEmissions().get(3).get(8), 4.99111);
485 assertEquals(testFile.hmm.getInsertEmissions().get(0).get(3), 2.73088);
486 assertEquals(testFile.hmm.getInsertEmissions().get(0).get(6), 3.72505);
487 assertEquals(testFile.hmm.getInsertEmissions().get(1).get(2), 2.77519);
488 assertEquals(testFile.hmm.getInsertEmissions().get(1).get(8), 3.61503);
489 assertEquals(testFile.hmm.getInsertEmissions().get(2).get(0), 2.68618);
490 assertEquals(testFile.hmm.getInsertEmissions().get(2).get(8), 3.61503);
491 assertEquals(testFile.hmm.getInsertEmissions().get(3).get(2), 2.77519);
492 assertEquals(testFile.hmm.getInsertEmissions().get(3).get(3), 2.73123);
496 public void testParseAnnotations()
498 testFile4.hmm.setMapAnnotationFlag(true);
499 Scanner scanner = new Scanner("1 t - - -");
500 testFile4.parseAnnotations(scanner, 0);
503 testFile4.hmm.getAlignmentColumnIndexes().get(0).intValue(), 1);
505 testFile4.hmm.getAnnotations().get(0).get("CONS").charValue(),
508 testFile4.hmm.getAnnotations().get(0).get("RF").charValue(),
511 testFile4.hmm.getAnnotations().get(0).get("MM").charValue(),
514 testFile4.hmm.getAnnotations().get(0).get("CS").charValue(),
517 testFile4.hmm.setMapAnnotationFlag(false);
518 testFile4.hmm.getAlignmentColumnIndexes().clear();
519 testFile4.hmm.getAnnotations().clear();
520 Scanner scanner2 = new Scanner("- S g C Y");
521 testFile4.parseAnnotations(scanner2, 0);
524 testFile4.hmm.getAnnotations().get(0).get("CONS").charValue(),
527 testFile4.hmm.getAnnotations().get(0).get("RF").charValue(),
530 testFile4.hmm.getAnnotations().get(0).get("MM").charValue(),
533 testFile4.hmm.getAnnotations().get(0).get("CS").charValue(),
538 public void testExportFile() throws IOException
540 HMMFile exportTestFile = new HMMFile("H:/HMMTutorialExample.hmm");
541 exportTestFile.parse();
542 exportTestFile.exportFile("H:/WriteTestFile.hmm");