package jalview.io; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertNull; import jalview.datamodel.HMMNode; import jalview.datamodel.HiddenMarkovModel; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Scanner; import org.testng.annotations.Test; public class HMMFileTest { HMMFile fn3 = new HMMFile("H:/fn3.hmm"); HMMFile emptyFile = new HMMFile("H:/EmptyFile.hmm"); HMMFile pKinase = new HMMFile("H:/Pkinase.hmm"); HMMFile made1 = new HMMFile("H:/MADE1.hmm"); @Test public void testParse() throws IOException { pKinase.parse(); assertEquals(pKinase.hmm.getName(), "Pkinase"); assertEquals(pKinase.hmm.getAccessionNumber(), "PF00069.17"); assertEquals(pKinase.hmm.getDescription(), "Protein kinase domain"); assertEquals(pKinase.hmm.getLength().intValue(), 260); assertNull(pKinase.hmm.getMaxInstanceLength()); assertEquals(pKinase.hmm.getAlphabetType(), "amino"); assertEquals(pKinase.hmm.referenceAnnotationIsActive(), false); assertEquals(pKinase.hmm.maskValueIsActive(), false); assertEquals(pKinase.hmm.consensusResidueIsActive(), true); assertEquals(pKinase.hmm.consensusStructureIsActive(), true); assertEquals(pKinase.hmm.mapIsActive(), true); assertEquals(pKinase.hmm.getDate(), "Thu Jun 16 11:44:06 2011"); assertNull(pKinase.hmm.getCommandLineLog()); assertEquals(pKinase.hmm.getNumberOfSequences().intValue(), 54); assertEquals(pKinase.hmm.getEffectiveNumberOfSequences(), 3.358521, 4d); assertEquals(pKinase.hmm.getCheckSum().longValue(), 3106786190l); assertEquals(pKinase.hmm.getGatheringThreshold(), "70.30 70.30"); assertEquals(pKinase.hmm.getTrustedCutoff(), "70.30 70.30"); assertEquals(pKinase.hmm.getNoiseCutoff(), "70.20 70.20"); List symbols = new ArrayList<>(); symbols.add('A'); symbols.add('C'); symbols.add('D'); symbols.add('E'); symbols.add('F'); symbols.add('G'); symbols.add('H'); symbols.add('I'); symbols.add('K'); symbols.add('L'); symbols.add('M'); symbols.add('N'); symbols.add('P'); symbols.add('Q'); symbols.add('R'); symbols.add('S'); symbols.add('T'); symbols.add('V'); symbols.add('W'); symbols.add('Y'); assertEquals(pKinase.hmm.getSymbols(), symbols); assertEquals(pKinase.hmm.getMatchEmission(0, 19), 3.43274); assertEquals(pKinase.hmm.getMatchEmission(12, 12), 4.33979); assertEquals(pKinase.hmm.getMatchEmission(23, 7), 3.65600); assertEquals(pKinase.hmm.getMatchEmission(54, 1), 4.76187); assertEquals(pKinase.hmm.getMatchEmission(79, 0), 2.81579); assertEquals(pKinase.hmm.getMatchEmission(100, 0), 1.86496); assertEquals(pKinase.hmm.getMatchEmission(112, 14), 2.77179); assertEquals(pKinase.hmm.getMatchEmission(143, 17), 5.10478); assertEquals(pKinase.hmm.getMatchEmission(156, 4), 4.69372); assertEquals(pKinase.hmm.getMatchEmission(178, 3), 2.52594); assertEquals(pKinase.hmm.getMatchEmission(210, 2), 4.23598); assertEquals(pKinase.hmm.getMatchEmission(260, 19), 3.81122); assertEquals(pKinase.hmm.getInsertEmission(2, 1), 4.42225); assertEquals(pKinase.hmm.getInsertEmission(15, 6), 3.72501); assertEquals(pKinase.hmm.getInsertEmission(22, 9), 2.69355); assertEquals(pKinase.hmm.getInsertEmission(57, 2), 2.77519); assertEquals(pKinase.hmm.getInsertEmission(62, 14), 2.89801); assertEquals(pKinase.hmm.getInsertEmission(95, 17), 2.98532); assertEquals(pKinase.hmm.getInsertEmission(105, 4), 3.46354); assertEquals(pKinase.hmm.getInsertEmission(134, 1), 4.42225); assertEquals(pKinase.hmm.getInsertEmission(143, 0), 2.68618); assertEquals(pKinase.hmm.getInsertEmission(152, 16), 2.77519); assertEquals(pKinase.hmm.getInsertEmission(203, 16), 2.77519); assertEquals(pKinase.hmm.getInsertEmission(255, 12), 2.73739); assertEquals(pKinase.hmm.getStateTransition(0, 6), Double.NEGATIVE_INFINITY); assertEquals(pKinase.hmm.getStateTransition(3, 6), 0.95510); assertEquals(pKinase.hmm.getStateTransition(29, 3), 0.61958); assertEquals(pKinase.hmm.getStateTransition(46, 4), 0.77255); assertEquals(pKinase.hmm.getStateTransition(53, 1), 5.01631); assertEquals(pKinase.hmm.getStateTransition(79, 2), 5.73865); assertEquals(pKinase.hmm.getStateTransition(101, 2), 5.73865); assertEquals(pKinase.hmm.getStateTransition(120, 5), 0.48576); assertEquals(pKinase.hmm.getStateTransition(146, 5), 0.70219); assertEquals(pKinase.hmm.getStateTransition(169, 3), 1.23224); assertEquals(pKinase.hmm.getStateTransition(209, 0), 0.01003); assertEquals(pKinase.hmm.getStateTransition(243, 1), 5.01631); assertEquals(pKinase.hmm.getNodeAlignmentColumn(3).intValue(), 3); assertEquals(pKinase.hmm.getReferenceAnnotation(7), '-'); assertEquals(pKinase.hmm.getConsensusResidue(23), 't'); assertEquals(pKinase.hmm.getMaskedValue(30), '-'); assertEquals(pKinase.hmm.getConsensusStructure(56), 'S'); assertEquals(pKinase.hmm.getNodeAlignmentColumn(78).intValue(), 136); assertEquals(pKinase.hmm.getReferenceAnnotation(93), '-'); assertEquals(pKinase.hmm.getConsensusResidue(145), 'a'); assertEquals(pKinase.hmm.getMaskedValue(183), '-'); assertEquals(pKinase.hmm.getConsensusStructure(240), 'H'); } @Test public void testParseFileProperties() throws IOException { FileReader fr = new FileReader(fn3.getDataObject()); BufferedReader br = new BufferedReader(fr); fn3.parseFileProperties(br); fn3.parseModel(br); // this is for a later test HiddenMarkovModel testHMM = new HiddenMarkovModel(); testHMM = fn3.getHmm(); br.close(); fr.close(); assertEquals(testHMM.getName(), "fn3"); assertEquals(testHMM.getAccessionNumber(), "PF00041.13"); assertEquals(testHMM.getDescription(), "Fibronectin type III domain"); assertEquals(testHMM.getLength().intValue(), 86); assertNull(testHMM.getMaxInstanceLength()); assertEquals(testHMM.getAlphabetType(), "amino"); assertEquals(testHMM.referenceAnnotationIsActive(), false); assertEquals(testHMM.maskValueIsActive(), false); assertEquals(testHMM.consensusResidueIsActive(), true); assertEquals(testHMM.consensusStructureIsActive(), true); assertEquals(testHMM.mapIsActive(), true); assertEquals(testHMM.getDate(), "Fri Jun 20 08:22:31 2014"); assertNull(testHMM.getCommandLineLog()); assertEquals(testHMM.getNumberOfSequences().intValue(), 106); assertEquals(testHMM.getEffectiveNumberOfSequences(), 11.415833, 4d); assertEquals(testHMM.getCheckSum().longValue(), 3564431818l); assertEquals(testHMM.getGatheringThreshold(), "8.00 7.20"); assertEquals(testHMM.getTrustedCutoff(), "8.00 7.20"); assertEquals(testHMM.getNoiseCutoff(), "7.90 7.90"); assertEquals(testHMM.getViterbi(), "-9.7737 0.71847"); assertEquals(testHMM.getMSV(), "-9.4043 0.71847"); assertEquals(testHMM.getForward(), "-3.8341 0.71847"); FileReader fr2 = new FileReader(emptyFile.getDataObject()); BufferedReader br2 = new BufferedReader(fr2); emptyFile.parseFileProperties(br2); testHMM = emptyFile.getHmm(); br2.close(); fr2.close(); assertNull(testHMM.getName()); assertNull(testHMM.getAccessionNumber()); assertNull(testHMM.getDescription()); assertNull(testHMM.getLength()); assertNull(testHMM.getMaxInstanceLength()); assertNull(testHMM.getAlphabetType()); assertEquals(testHMM.referenceAnnotationIsActive(), false); assertEquals(testHMM.maskValueIsActive(), false); assertEquals(testHMM.consensusResidueIsActive(), false); assertEquals(testHMM.consensusStructureIsActive(), false); assertEquals(testHMM.mapIsActive(), false); assertNull(testHMM.getDate()); assertNull(testHMM.getCommandLineLog()); assertNull(testHMM.getNumberOfSequences()); assertNull(testHMM.getEffectiveNumberOfSequences()); assertNull(testHMM.getCheckSum()); assertNull(testHMM.getGatheringThreshold()); assertNull(testHMM.getGatheringThreshold()); assertNull(testHMM.getTrustedCutoff()); assertNull(testHMM.getTrustedCutoff()); assertNull(testHMM.getNoiseCutoff()); assertNull(testHMM.getNoiseCutoff()); assertNull(testHMM.getViterbi()); assertNull(testHMM.getMSV()); assertNull(testHMM.getForward()); FileReader fr3 = new FileReader(made1.getDataObject()); BufferedReader br3 = new BufferedReader(fr3); made1.parseFileProperties(br3); testHMM = made1.getHmm(); br3.close(); fr3.close(); assertEquals(testHMM.getName(), "MADE1"); assertEquals(testHMM.getAccessionNumber(), "DF0000629.2"); assertEquals(testHMM.getDescription(), "MADE1 (MAriner Derived Element 1), a TcMar-Mariner DNA transposon"); assertEquals(testHMM.getLength().intValue(), 80); assertEquals(testHMM.getMaxInstanceLength().intValue(), 426); assertEquals(testHMM.getAlphabetType(), "DNA"); assertEquals(testHMM.referenceAnnotationIsActive(), true); assertEquals(testHMM.maskValueIsActive(), false); assertEquals(testHMM.consensusResidueIsActive(), true); assertEquals(testHMM.consensusStructureIsActive(), false); assertEquals(testHMM.mapIsActive(), true); assertEquals(testHMM.getDate(), "Tue Feb 19 20:33:41 2013"); assertNull(testHMM.getCommandLineLog()); assertEquals(testHMM.getNumberOfSequences().intValue(), 1997); assertEquals(testHMM.getEffectiveNumberOfSequences(), 3.911818, 4d); assertEquals(testHMM.getCheckSum().longValue(), 3015610723l); assertEquals(testHMM.getGatheringThreshold(), "2.324 4.234"); assertEquals(testHMM.getTrustedCutoff(), "2.343 1.212"); assertEquals(testHMM.getNoiseCutoff(), "2.354 5.456"); assertEquals(testHMM.getViterbi(), "-9.3632 0.71858"); assertEquals(testHMM.getMSV(), "-8.5786 0.71858"); assertEquals(testHMM.getForward(), "-3.4823 0.71858"); } @Test public void testGetTransitionType() { assertEquals(fn3.getTransitionType("mm").intValue(), 0); assertEquals(fn3.getTransitionType("mi").intValue(), 1); assertEquals(fn3.getTransitionType("md").intValue(), 2); assertEquals(fn3.getTransitionType("im").intValue(), 3); assertEquals(fn3.getTransitionType("ii").intValue(), 4); assertEquals(fn3.getTransitionType("dm").intValue(), 5); assertEquals(fn3.getTransitionType("dd").intValue(), 6); assertNull(fn3.getTransitionType("df")); } @Test public void testFillList() { Scanner scanner1 = new Scanner("1.3 2.4 5.3 3.9 9.8 4.7 4.3 2.3 6.9"); ArrayList filledArray = new ArrayList<>(); filledArray.add(1.3); filledArray.add(2.4); filledArray.add(5.3); filledArray.add(3.9); filledArray.add(9.8); filledArray.add(4.7); filledArray.add(4.3); filledArray.add(2.3); filledArray.add(6.9); assertEquals(HMMFile.fillList(scanner1, 9), filledArray); filledArray.clear(); scanner1.close(); Scanner scanner2 = new Scanner( "1.346554 5.58756754 35.3523645 12345.3564 1.4"); filledArray.add(1.346554); filledArray.add(5.58756754); filledArray.add(35.3523645); filledArray.add(12345.3564); filledArray.add(1.4); assertEquals(HMMFile.fillList(scanner2, 5), filledArray); scanner2.close(); } @Test public void testParseModel() throws IOException { FileReader fr = new FileReader(made1.getDataObject()); BufferedReader br = new BufferedReader(fr); HiddenMarkovModel testHMM = new HiddenMarkovModel(); for (int i = 0; i < 24; i++) { br.readLine(); } made1.parseModel(br); testHMM = made1.getHmm(); br.close(); fr.close(); assertEquals(testHMM.getMatchEmission(0, 2), 1.62906); assertEquals(testHMM.getMatchEmission(2, 1), 2.37873); assertEquals(testHMM.getMatchEmission(12, 2), 2.61355); assertEquals(testHMM.getMatchEmission(26, 0), 1.86925); assertEquals(testHMM.getMatchEmission(32, 3), 2.58263); assertEquals(testHMM.getMatchEmission(59, 3), 2.20507); assertEquals(testHMM.getMatchEmission(63, 0), 0.41244); assertEquals(testHMM.getMatchEmission(69, 1), 3.17398); assertEquals(testHMM.getMatchEmission(76, 2), 2.65861); assertEquals(testHMM.getInsertEmission(0, 1), 1.38629); assertEquals(testHMM.getInsertEmission(1, 2), 1.38629); assertEquals(testHMM.getInsertEmission(31, 3), 1.28150); assertEquals(testHMM.getInsertEmission(43, 0), 1.32290); assertEquals(testHMM.getInsertEmission(48, 2), 1.52606); assertEquals(testHMM.getInsertEmission(52, 1), 1.62259); assertEquals(testHMM.getInsertEmission(67, 0), 1.38141); assertEquals(testHMM.getInsertEmission(70, 3), 1.38629); assertEquals(testHMM.getInsertEmission(80, 3), 1.38629); assertEquals(testHMM.getStateTransition(2, 0), 0.03725); assertEquals(testHMM.getStateTransition(6, 1), 3.89715); assertEquals(testHMM.getStateTransition(9, 3), 1.38021); assertEquals(testHMM.getStateTransition(20, 4), 0.23815); assertEquals(testHMM.getStateTransition(34, 6), 0.33363); assertEquals(testHMM.getStateTransition(46, 5), 1.05474); assertEquals(testHMM.getStateTransition(57, 6), 0.31164); assertEquals(testHMM.getStateTransition(68, 2), 3.99242); assertEquals(testHMM.getStateTransition(80, 6), Double.NEGATIVE_INFINITY); } @Test public void testParseAnnotations() { HMMFile testFile = new HMMFile("H:/EmptyFile.hmm"); testFile.hmm.getNodes().add(new HMMNode()); testFile.hmm.getNodes().add(new HMMNode()); testFile.hmm.setConsensusResidueStatus(true); testFile.hmm.setMAPStatus(true); testFile.hmm.setReferenceAnnotationStatus(true); testFile.hmm.setConsensusStructureStatus(true); testFile.hmm.setMaskedValueStatus(true); Scanner scanner = new Scanner("1345 t t t t"); testFile.parseAnnotations(scanner, 0); assertEquals(testFile.hmm.getNodeAlignmentColumn(0).intValue(), 1345); assertEquals(testFile.hmm.getConsensusResidue(0), 't'); assertEquals(testFile.hmm.getReferenceAnnotation(0), 't'); assertEquals(testFile.hmm.getMaskedValue(0), 't'); assertEquals(testFile.hmm.getConsensusStructure(0), 't'); scanner.close(); testFile.hmm.setConsensusResidueStatus(true); testFile.hmm.setMAPStatus(false); testFile.hmm.setReferenceAnnotationStatus(true); testFile.hmm.setConsensusStructureStatus(false); testFile.hmm.setMaskedValueStatus(false); Scanner scanner2 = new Scanner("- y x - -"); testFile.parseAnnotations(scanner2, 1); assertNull(testFile.hmm.getNodeAlignmentColumn(1)); assertEquals(testFile.hmm.getConsensusResidue(1), 'y'); assertEquals(testFile.hmm.getReferenceAnnotation(1), 'x'); assertEquals(testFile.hmm.getMaskedValue(1), '-'); assertEquals(testFile.hmm.getConsensusStructure(1), '-'); scanner2.close(); } @Test(priority = 3) public void testExportFile() throws IOException { fn3.exportFile("H:/WriteFileTest.hmm"); } @Test(priority = 1) public void testAppendFileProperties() { StringBuilder testBuilder = new StringBuilder(); fn3.appendFileProperties(testBuilder); Scanner testScanner = new Scanner(testBuilder.toString()); String[] expected = new String[] { "HMMER3/f [3.1b1 | May 2013]", "NAME fn3", "ACC PF00041.13", "DESC Fibronectin type III domain", "LENG 86", "ALPH amino", "RF no", "MM no", "CONS yes", "CS yes", "MAP yes", "DATE Fri Jun 20 08:22:31 2014", "NSEQ 106", "EFFN 11.415833", "CKSUM 3564431818", "GA 8.00 7.20", "TC 8.00 7.20", "NC 7.90 7.90", "STATS LOCAL MSV -9.4043 0.71847", "STATS LOCAL VITERBI -9.7737 0.71847", "STATS LOCAL FORWARD -3.8341 0.71847" }; for (String value : expected) { assertEquals(testScanner.nextLine(), value); } testScanner.close(); } @Test(priority = 2) public void testAppendModel() { StringBuilder testBuilder = new StringBuilder(); fn3.appendModel(testBuilder); String string = testBuilder.toString(); assertEquals(findValue(2, 2, 2, string), "4.42225"); assertEquals(findValue(12, 14, 1, string), "2.79307"); } public String findValue(int symbolIndex, int nodeIndex, int line, String model) { String value = ""; Scanner scanner = new Scanner(model); scanner.nextLine(); scanner.nextLine(); for (int lineIndex = 0; lineIndex < line - 1; lineIndex++) { scanner.nextLine(); } for (int node = 0; node < nodeIndex; node++) { scanner.nextLine(); scanner.nextLine(); scanner.nextLine(); } for (int symbol = 0; symbol < symbolIndex; symbol++) { value = scanner.next(); if ("COMPO".equals(value)) { scanner.next(); } else if (value.length() < 7) { scanner.next(); } } return value; } }