package jalview.io; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertNull; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Scanner; import org.testng.annotations.Test; public class HMMFileTest { HMMFile testFile = new HMMFile("H:/HMMERFile.txt"); File file = new File("H:/HMMERFile.txt"); HMMFile testFile2 = new HMMFile("H:/EmptyFile.txt"); File file2 = new File("H:/EmptyFile.txt"); HMMFile testFile3 = new HMMFile("H:/HMMERFile2.txt"); File file3 = new File("H:/HMMERFile2.txt"); HMMFile testFile4 = new HMMFile("H:/HMMERFile.txt"); File file4 = new File("H:/HMMERFile.txt"); @Test public void testParse() throws IOException { HMMFile integrationTestFile = new HMMFile("H:/HMMTutorialExample.hmm"); integrationTestFile.parse(); // file properties assertEquals(integrationTestFile.hmm.getName(), "MADE1"); assertEquals(integrationTestFile.hmm.getAccessionNumber(), "DF0000629.2"); assertEquals(integrationTestFile.hmm.getDescription(), "MADE1 (MAriner Derived Element 1), a TcMar-Mariner DNA transposon"); assertEquals(integrationTestFile.hmm.getLength().intValue(), 80); assertEquals(integrationTestFile.hmm.getMaxInstanceLength().intValue(), 426); assertEquals(integrationTestFile.hmm.getAlphabetType(), "DNA"); assertEquals(integrationTestFile.hmm.getReferenceAnnotationFlag(), true); assertEquals(integrationTestFile.hmm.getModelMaskedFlag(), false); assertEquals( integrationTestFile.hmm.getConsensusResidueAnnotationFlag(), true); assertEquals( integrationTestFile.hmm.getConsensusStructureAnnotationFlag(), false); assertEquals(integrationTestFile.hmm.getMapAnnotationFlag(), true); assertEquals(integrationTestFile.hmm.getDate(), "Tue Feb 19 20:33:41 2013"); assertNull(integrationTestFile.hmm.getCommandLineLog()); assertEquals(integrationTestFile.hmm.getSequenceNumber().intValue(), 1997); assertEquals(integrationTestFile.hmm.getEffectiveSequenceNumber(), 3.911818, 4d); assertEquals(integrationTestFile.hmm.getCheckSum().longValue(), 3015610723l); assertNull(integrationTestFile.hmm.getGatheringThreshold1()); assertNull(integrationTestFile.hmm.getGatheringThreshold2()); assertNull(integrationTestFile.hmm.getTrustedCutoff1()); assertNull(integrationTestFile.hmm.getTrustedCutoff2()); assertNull(integrationTestFile.hmm.getNoiseCutoff1()); assertNull(integrationTestFile.hmm.getNoiseCutoff2()); assertEquals(integrationTestFile.hmm.getSlopeOfDistribution("MSV"), -8.5786, 4d); assertEquals(integrationTestFile.hmm.getSlopeOfDistribution("VITERBI"), -9.3632, 4d); assertEquals(integrationTestFile.hmm.getSlopeOfDistribution("FORWARD"), -3.4823, 4d); assertEquals(integrationTestFile.hmm.getLocationOfDistribution("MSV"), 0.71858, 4d); assertEquals( integrationTestFile.hmm.getLocationOfDistribution("VITERBI"), 0.71858, 4d); assertEquals( integrationTestFile.hmm.getLocationOfDistribution("FORWARD"), 0.71858, 4d); List symbols = new ArrayList<>(); symbols.add('A'); symbols.add('C'); symbols.add('G'); symbols.add('T'); assertEquals(integrationTestFile.hmm.getSymbols(), symbols); List averages = new ArrayList<>(); averages.add(1.24257); averages.add(1.59430); averages.add(1.62906); averages.add(1.16413); assertEquals(integrationTestFile.hmm .getAverageMatchStateEmissionProbabilities(), averages); assertEquals(integrationTestFile.hmm.getInsertZeroEmissions().get(2), 1.38629); assertEquals(integrationTestFile.hmm.getInsertZeroEmissions().get(3), 1.38629); assertEquals(integrationTestFile.hmm.getBeginStateTransitions().get(1), 3.94183); assertEquals(integrationTestFile.hmm.getBeginStateTransitions().get(4), 0.26236); assertEquals(integrationTestFile.hmm.getMatchEmissions().get(1).get(1), 2.37873); assertEquals(integrationTestFile.hmm.getMatchEmissions().get(8).get(0), 2.16916); assertEquals(integrationTestFile.hmm.getMatchEmissions().get(12).get(2), 2.32214); assertEquals(integrationTestFile.hmm.getMatchEmissions().get(43).get(3), 2.60783); assertEquals(integrationTestFile.hmm.getMatchEmissions().get(54).get(2), 2.46442); assertEquals(integrationTestFile.hmm.getMatchEmissions().get(23).get(2), 2.50691); assertEquals(integrationTestFile.hmm.getMatchEmissions().get(56).get(1), 2.32720); assertEquals(integrationTestFile.hmm.getMatchEmissions().get(65).get(0), 2.79349); assertEquals(integrationTestFile.hmm.getMatchEmissions().get(21).get(0), 2.54484); assertEquals(integrationTestFile.hmm.getMatchEmissions().get(79).get(3), 2.88183); assertEquals(integrationTestFile.hmm.getMatchEmissions().get(76).get(3), 1.84373); assertEquals(integrationTestFile.hmm.getInsertEmissions().get(23).get(0), 1.35803); assertEquals(integrationTestFile.hmm.getInsertEmissions().get(54).get(3), 1.46331); assertEquals(integrationTestFile.hmm.getInsertEmissions().get(65).get(3), 1.39101); assertEquals(integrationTestFile.hmm.getInsertEmissions().get(57).get(2), 1.38112); assertEquals(integrationTestFile.hmm.getInsertEmissions().get(42).get(1), 1.58747); assertEquals(integrationTestFile.hmm.getInsertEmissions().get(12).get(3), 1.38740); assertEquals(integrationTestFile.hmm.getInsertEmissions().get(6).get(1), 1.38524); assertEquals(integrationTestFile.hmm.getInsertEmissions().get(59).get(0), 1.03649); assertEquals(integrationTestFile.hmm.getInsertEmissions().get(78).get(0), 1.38629); assertEquals(integrationTestFile.hmm.getInsertEmissions().get(17).get(2), 1.39937); assertEquals(integrationTestFile.hmm.getInsertEmissions().get(0).get(2), 1.38629); assertEquals( integrationTestFile.hmm.getStateTransitions().get(13).get(1), 4.02482); assertEquals( integrationTestFile.hmm.getStateTransitions().get(64).get(2), 4.03073); assertEquals( integrationTestFile.hmm.getStateTransitions().get(45).get(6), 0.42814); assertEquals( integrationTestFile.hmm.getStateTransitions().get(71).get(4), 0.28542); assertEquals( integrationTestFile.hmm.getStateTransitions().get(32).get(5), 1.18729); assertEquals( integrationTestFile.hmm.getStateTransitions().get(9).get(0), 0.03536); assertEquals( integrationTestFile.hmm.getStateTransitions().get(0).get(3), 1.46634); assertEquals( integrationTestFile.hmm.getStateTransitions().get(31).get(6), 0.44749); assertNull( integrationTestFile.hmm.getStateTransitions().get(79).get(2)); assertEquals( integrationTestFile.hmm.getStateTransitions().get(3).get(1), 4.05203); assertEquals( integrationTestFile.hmm.getStateTransitions().get(16).get(4), 0.26771); assertEquals(integrationTestFile.hmm.getAlignmentColumnIndexes().get(0) .intValue(), 1); assertEquals(integrationTestFile.hmm.getAlignmentColumnIndexes().get(9) .intValue(), 18); assertEquals(integrationTestFile.hmm.getAlignmentColumnIndexes().get(12) .intValue(), 28); assertEquals(integrationTestFile.hmm.getAlignmentColumnIndexes().get(56) .intValue(), 999); assertEquals(integrationTestFile.hmm.getAlignmentColumnIndexes().get(79) .intValue(), 1112); assertEquals(integrationTestFile.hmm.getAnnotations().get(0).get("RF") .charValue(), 'x'); assertEquals(integrationTestFile.hmm.getAnnotations().get(3).get("CS") .charValue(), '-'); assertEquals(integrationTestFile.hmm.getAnnotations().get(65) .get("CONS").charValue(), 't'); assertEquals(integrationTestFile.hmm.getAnnotations().get(23).get("MM") .charValue(), '-'); assertEquals(integrationTestFile.hmm.getAnnotations().get(56).get("MM") .charValue(), '-'); assertEquals(integrationTestFile.hmm.getAnnotations().get(76).get("RF") .charValue(), 'x'); assertEquals(integrationTestFile.hmm.getAnnotations().get(79) .get("CONS").charValue(), 'a'); } @Test public void testParseFileProperties() throws IOException { FileReader fr = new FileReader(file); BufferedReader br = new BufferedReader(fr); testFile.parseFileProperties(br); br.close(); fr.close(); assertEquals(testFile.hmm.getName(), "fn3"); assertEquals(testFile.hmm.getAccessionNumber(), "PF00041.13"); assertEquals(testFile.hmm.getDescription(), "Fibronectin type III domain"); assertEquals(testFile.hmm.getLength().intValue(), 4); assertNull(testFile.hmm.getMaxInstanceLength()); assertEquals(testFile.hmm.getAlphabetType(), "amino"); assertEquals(testFile.hmm.getReferenceAnnotationFlag(), false); assertEquals(testFile.hmm.getModelMaskedFlag(), false); assertEquals(testFile.hmm.getConsensusResidueAnnotationFlag(), true); assertEquals(testFile.hmm.getConsensusStructureAnnotationFlag(), true); assertEquals(testFile.hmm.getMapAnnotationFlag(), true); assertEquals(testFile.hmm.getDate(), "Fri Feb 15 06:04:13 2013"); assertNull(testFile.hmm.getCommandLineLog()); assertEquals(testFile.hmm.getSequenceNumber().intValue(), 106); assertEquals(testFile.hmm.getEffectiveSequenceNumber(), 11.415833, 4d); assertEquals(testFile.hmm.getCheckSum().longValue(), 3564431818l); assertEquals(testFile.hmm.getGatheringThreshold1(), 8.00, 2d); assertEquals(testFile.hmm.getGatheringThreshold2(), 7.20, 2d); assertEquals(testFile.hmm.getTrustedCutoff1(), 8.00, 2d); assertEquals(testFile.hmm.getTrustedCutoff2(), 7.20, 2d); assertEquals(testFile.hmm.getNoiseCutoff1(), 7.90, 2d); assertEquals(testFile.hmm.getNoiseCutoff2(), 7.90, 2d); assertEquals(testFile.hmm.getSlopeOfDistribution("MSV"), -9.4043, 4d); assertEquals(testFile.hmm.getSlopeOfDistribution("VITERBI"), -9.7737, 4d); assertEquals(testFile.hmm.getSlopeOfDistribution("FORWARD"), -3.8341, 4d); assertEquals(testFile.hmm.getLocationOfDistribution("MSV"), 0.71847, 4d); assertEquals(testFile.hmm.getLocationOfDistribution("VITERBI"), 0.71847, 4d); assertEquals(testFile.hmm.getLocationOfDistribution("FORWARD"), 0.71847, 4d); FileReader fr2 = new FileReader(file2); BufferedReader br2 = new BufferedReader(fr2); testFile2.parseFileProperties(br2); br2.close(); fr2.close(); assertNull(testFile2.hmm.getName()); assertNull(testFile2.hmm.getAccessionNumber()); assertNull(testFile2.hmm.getDescription()); assertNull(testFile2.hmm.getLength()); assertNull(testFile2.hmm.getMaxInstanceLength()); assertNull(testFile2.hmm.getAlphabetType()); assertEquals(testFile2.hmm.getReferenceAnnotationFlag(), false); assertEquals(testFile2.hmm.getModelMaskedFlag(), false); assertEquals(testFile2.hmm.getConsensusResidueAnnotationFlag(), false); assertEquals(testFile2.hmm.getConsensusStructureAnnotationFlag(), false); assertEquals(testFile2.hmm.getMapAnnotationFlag(), false); assertNull(testFile2.hmm.getDate()); assertNull(testFile2.hmm.getCommandLineLog()); assertNull(testFile2.hmm.getSequenceNumber()); assertNull(testFile2.hmm.getEffectiveSequenceNumber()); assertNull(testFile2.hmm.getCheckSum()); assertNull(testFile2.hmm.getGatheringThreshold1()); assertNull(testFile2.hmm.getGatheringThreshold2()); assertNull(testFile2.hmm.getTrustedCutoff1()); assertNull(testFile2.hmm.getTrustedCutoff2()); assertNull(testFile2.hmm.getNoiseCutoff1()); assertNull(testFile2.hmm.getNoiseCutoff2()); assertNull(testFile2.hmm.getSlopeOfDistribution("MSV")); assertNull(testFile2.hmm.getSlopeOfDistribution("VITERBI")); assertNull(testFile2.hmm.getSlopeOfDistribution("FORWARD")); assertNull(testFile2.hmm.getLocationOfDistribution("MSV")); assertNull(testFile2.hmm.getLocationOfDistribution("VITERBI")); assertNull(testFile2.hmm.getLocationOfDistribution("FORWARD")); FileReader fr3 = new FileReader(file3); BufferedReader br3 = new BufferedReader(fr3); testFile3.parseFileProperties(br3); br3.close(); fr3.close(); assertEquals(testFile3.hmm.getName(), "th4"); assertEquals(testFile3.hmm.getAccessionNumber(), "PF99041.16"); assertEquals(testFile3.hmm.getDescription(), "Fibronectin type I domain"); assertEquals(testFile3.hmm.getLength().intValue(), 10); assertEquals(testFile3.hmm.getMaxInstanceLength().intValue(), 6); assertEquals(testFile3.hmm.getAlphabetType(), "amino"); assertEquals(testFile3.hmm.getReferenceAnnotationFlag(), true); assertEquals(testFile3.hmm.getModelMaskedFlag(), false); assertEquals(testFile3.hmm.getConsensusResidueAnnotationFlag(), false); assertEquals(testFile3.hmm.getConsensusStructureAnnotationFlag(), false); assertEquals(testFile3.hmm.getMapAnnotationFlag(), false); assertEquals(testFile3.hmm.getDate(), "Tue Jan 01 11:02:59 2000"); assertEquals(testFile3.hmm.getCommandLineLog(), "this is the log"); assertEquals(testFile3.hmm.getSequenceNumber().intValue(), 567); assertEquals(testFile3.hmm.getEffectiveSequenceNumber(), 15.964683, 4d); assertEquals(testFile3.hmm.getCheckSum().longValue(), 9485949654l); assertEquals(testFile3.hmm.getGatheringThreshold1(), 6.40, 2d); assertEquals(testFile3.hmm.getGatheringThreshold2(), 7.20, 2d); assertEquals(testFile3.hmm.getTrustedCutoff1(), 2.40, 2d); assertEquals(testFile3.hmm.getTrustedCutoff2(), 7.00, 2d); assertNull(testFile3.hmm.getNoiseCutoff1()); assertNull(testFile3.hmm.getNoiseCutoff2()); assertNull(testFile3.hmm.getSlopeOfDistribution("MSV")); assertNull(testFile3.hmm.getSlopeOfDistribution("VITERBI")); assertNull(testFile3.hmm.getSlopeOfDistribution("FORWARD")); assertNull(testFile3.hmm.getLocationOfDistribution("MSV")); assertNull(testFile3.hmm.getLocationOfDistribution("VITERBI")); assertNull(testFile3.hmm.getLocationOfDistribution("FORWARD")); } /** * @Test public void testParseModel() throws IOException { HiddenMarkovModel * hmm = new HiddenMarkovModel(); HMMFile testFile = new HMMFile(hmm, * "H:/HMMERFile.txt"); File file = new File("H:/HMMERFile.txt"); * FileReader fr = new FileReader(file); BufferedReader br = new * BufferedReader(fr); testFile.parseFileProperties(br); * testFile.parseModel(br); br.close(); fr.close(); * * } **/ @Test public void testGetTransitionType() { assertEquals(testFile.getTransitionType("mm").intValue(), 0); assertEquals(testFile.getTransitionType("mi").intValue(), 1); assertEquals(testFile.getTransitionType("md").intValue(), 2); assertEquals(testFile.getTransitionType("im").intValue(), 3); assertEquals(testFile.getTransitionType("ii").intValue(), 4); assertEquals(testFile.getTransitionType("dm").intValue(), 5); assertEquals(testFile.getTransitionType("dd").intValue(), 6); assertNull(testFile.getTransitionType("df")); } @Test public void testReadStats() { Scanner scanner = new Scanner("LOCAL MSV 5.6943 6.2313"); testFile.readStats(scanner); assertEquals(testFile.hmm.getEValueStatistics().get("MSV") .getAlignmentModeConfiguration(), "LOCAL"); assertEquals( testFile.hmm.getEValueStatistics().get("MSV") .getSlopeOfDistribution(), 5.6943, 4d); assertEquals(testFile.hmm.getEValueStatistics().get("MSV") .getLocationOfDistribution(), 6.2313, 4d); scanner.close(); Scanner scanner2 = new Scanner("GLOBAL VITERBI 3 -0.234"); testFile.readStats(scanner2); assertEquals(testFile.hmm.getEValueStatistics().get("VITERBI") .getAlignmentModeConfiguration(), "GLOBAL"); assertEquals(testFile.hmm.getEValueStatistics().get("VITERBI") .getSlopeOfDistribution(), 3, 2d); assertEquals(testFile.hmm.getEValueStatistics().get("VITERBI") .getLocationOfDistribution(), -0.234, 4d); scanner.close(); } @Test public void testParseBeginNodeData() throws IOException { FileReader fr = new FileReader(file4); BufferedReader br = new BufferedReader(fr); for (int i = 0; i < 24; i++) { br.readLine(); // this is done to reach the begin node // data in the file } testFile4.hmm.fillSymbols("HMM A B C D E F G H I"); testFile4.parseBeginNodeData(br); ArrayList emissions = new ArrayList<>(); ArrayList transitions = new ArrayList<>(); emissions.add(2.68618); emissions.add(4.42225); emissions.add(2.77519); emissions.add(2.73123); emissions.add(3.46354); emissions.add(2.40513); emissions.add(3.72494); emissions.add(3.29354); emissions.add(3.61503); transitions.add(0.00338); transitions.add(6.08833); transitions.add(6.81068); transitions.add(0.61958); transitions.add(0.77255); transitions.add(0.00000); transitions.add(null); assertEquals(testFile4.hmm.getInsertZeroEmissions(), emissions); assertEquals(testFile4.hmm.getBeginStateTransitions(), transitions); } @Test public void testFillList() { Scanner scanner1 = new Scanner("1.3 2.4 5.3 3.9 9.8 4.7 4.3 2.3 6.9"); ArrayList filledArray = new ArrayList<>(); filledArray.add(1.3); filledArray.add(2.4); filledArray.add(5.3); filledArray.add(3.9); filledArray.add(9.8); filledArray.add(4.7); filledArray.add(4.3); filledArray.add(2.3); filledArray.add(6.9); assertEquals(HMMFile.fillList(scanner1, 9), filledArray); filledArray.clear(); scanner1.close(); Scanner scanner2 = new Scanner( "1.346554 5.58756754 35.3523645 12345.3564 1.4"); filledArray.add(1.346554); filledArray.add(5.58756754); filledArray.add(35.3523645); filledArray.add(12345.3564); filledArray.add(1.4); assertEquals(HMMFile.fillList(scanner2, 5), filledArray); scanner2.close(); } @Test public void testParseModel() throws IOException { FileReader fr = new FileReader(file); BufferedReader br = new BufferedReader(fr); for (int i = 0; i < 23; i++) { br.readLine(); // this is done to reach the begin node // data in the file } testFile.parseModel(br); assertEquals(testFile.hmm.getMatchEmissions().get(0).get(0), 3.16986); assertEquals(testFile.hmm.getMatchEmissions().get(0).get(3), 3.29953); assertEquals(testFile.hmm.getMatchEmissions().get(1).get(2), 2.24744); assertEquals(testFile.hmm.getMatchEmissions().get(1).get(8), 4.25623); assertEquals(testFile.hmm.getMatchEmissions().get(2).get(5), 3.48010); assertEquals(testFile.hmm.getMatchEmissions().get(2).get(6), 4.51877); assertEquals(testFile.hmm.getMatchEmissions().get(3).get(4), 5.26587); assertEquals(testFile.hmm.getMatchEmissions().get(3).get(8), 4.99111); assertEquals(testFile.hmm.getInsertEmissions().get(0).get(3), 2.73088); assertEquals(testFile.hmm.getInsertEmissions().get(0).get(6), 3.72505); assertEquals(testFile.hmm.getInsertEmissions().get(1).get(2), 2.77519); assertEquals(testFile.hmm.getInsertEmissions().get(1).get(8), 3.61503); assertEquals(testFile.hmm.getInsertEmissions().get(2).get(0), 2.68618); assertEquals(testFile.hmm.getInsertEmissions().get(2).get(8), 3.61503); assertEquals(testFile.hmm.getInsertEmissions().get(3).get(2), 2.77519); assertEquals(testFile.hmm.getInsertEmissions().get(3).get(3), 2.73123); } @Test public void testParseAnnotations() { testFile4.hmm.setMapAnnotationFlag(true); Scanner scanner = new Scanner("1 t - - -"); testFile4.parseAnnotations(scanner, 0); assertEquals( testFile4.hmm.getAlignmentColumnIndexes().get(0).intValue(), 1); assertEquals( testFile4.hmm.getAnnotations().get(0).get("CONS").charValue(), 't'); assertEquals( testFile4.hmm.getAnnotations().get(0).get("RF").charValue(), '-'); assertEquals( testFile4.hmm.getAnnotations().get(0).get("MM").charValue(), '-'); assertEquals( testFile4.hmm.getAnnotations().get(0).get("CS").charValue(), '-'); testFile4.hmm.setMapAnnotationFlag(false); testFile4.hmm.getAlignmentColumnIndexes().clear(); testFile4.hmm.getAnnotations().clear(); Scanner scanner2 = new Scanner("- S g C Y"); testFile4.parseAnnotations(scanner2, 0); assertEquals( testFile4.hmm.getAnnotations().get(0).get("CONS").charValue(), 'S'); assertEquals( testFile4.hmm.getAnnotations().get(0).get("RF").charValue(), 'g'); assertEquals( testFile4.hmm.getAnnotations().get(0).get("MM").charValue(), 'C'); assertEquals( testFile4.hmm.getAnnotations().get(0).get("CS").charValue(), 'Y'); } @Test public void testExportFile() throws IOException { HMMFile exportTestFile = new HMMFile("H:/HMMTutorialExample.hmm"); exportTestFile.parse(); exportTestFile.exportFile("H:/WriteTestFile.hmm"); } }