package jalview.io; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertNull; import jalview.datamodel.HMMNode; import jalview.datamodel.HiddenMarkovModel; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Scanner; import org.testng.annotations.Test; public class HMMFileTest { HMMFile fn3 = new HMMFile( new FileParse("test/jalview/io/test_fn3_hmm.txt", DataSourceType.FILE)); HMMFile pKinase = new HMMFile( new FileParse("test/jalview/io/test_PKinase_hmm.txt", DataSourceType.FILE)); HMMFile made1 = new HMMFile( new FileParse("test/jalview/io/test_MADE1_hmm.txt", DataSourceType.FILE)); HMMFileTest() throws IOException { } @Test public void testParse() throws IOException { pKinase.parse(); HiddenMarkovModel hmm = pKinase.getHMM(); assertEquals(hmm.getName(), "Pkinase"); assertEquals(hmm.getAccessionNumber(), "PF00069.17"); assertEquals(hmm.getDescription(), "Protein kinase domain"); assertEquals(hmm.getLength().intValue(), 260); assertNull(hmm.getMaxInstanceLength()); assertEquals(hmm.getAlphabetType(), "amino"); assertEquals(hmm.referenceAnnotationIsActive(), false); assertEquals(hmm.maskValueIsActive(), false); assertEquals(hmm.consensusResidueIsActive(), true); assertEquals(hmm.consensusStructureIsActive(), true); assertEquals(hmm.mapIsActive(), true); assertEquals(hmm.getDate(), "Thu Jun 16 11:44:06 2011"); assertNull(hmm.getCommandLineLog()); assertEquals(hmm.getNumberOfSequences().intValue(), 54); assertEquals(hmm.getEffectiveNumberOfSequences(), 3.358521, 4d); assertEquals(hmm.getCheckSum().longValue(), 3106786190l); assertEquals(hmm.getGatheringThreshold(), "70.30 70.30"); assertEquals(hmm.getTrustedCutoff(), "70.30 70.30"); assertEquals(hmm.getNoiseCutoff(), "70.20 70.20"); List symbols = new ArrayList<>(); symbols.add('A'); symbols.add('C'); symbols.add('D'); symbols.add('E'); symbols.add('F'); symbols.add('G'); symbols.add('H'); symbols.add('I'); symbols.add('K'); symbols.add('L'); symbols.add('M'); symbols.add('N'); symbols.add('P'); symbols.add('Q'); symbols.add('R'); symbols.add('S'); symbols.add('T'); symbols.add('V'); symbols.add('W'); symbols.add('Y'); assertEquals(hmm.getSymbols(), symbols); assertEquals(getMatchEmission(0, 19, hmm), 3.43274); assertEquals(getMatchEmission(12, 12, hmm), 4.33979); assertEquals(getMatchEmission(23, 7, hmm), 3.65600); assertEquals(getMatchEmission(54, 1, hmm), 4.76187); assertEquals(getMatchEmission(79, 0, hmm), 2.81579); assertEquals(getMatchEmission(100, 0, hmm), 1.86496); assertEquals(getMatchEmission(112, 14, hmm), 2.77179); assertEquals(getMatchEmission(143, 17, hmm), 5.10478); assertEquals(getMatchEmission(156, 4, hmm), 4.69372); assertEquals(getMatchEmission(178, 3, hmm), 2.52594); assertEquals(getMatchEmission(210, 2, hmm), 4.23598); assertEquals(getMatchEmission(260, 19, hmm), 3.81122); assertEquals(getInsertEmission(2, 1, hmm), 4.42225); assertEquals(getInsertEmission(15, 6, hmm), 3.72501); assertEquals(getInsertEmission(22, 9, hmm), 2.69355); assertEquals(getInsertEmission(57, 2, hmm), 2.77519); assertEquals(getInsertEmission(62, 14, hmm), 2.89801); assertEquals(getInsertEmission(95, 17, hmm), 2.98532); assertEquals(getInsertEmission(105, 4, hmm), 3.46354); assertEquals(getInsertEmission(134, 1, hmm), 4.42225); assertEquals(getInsertEmission(143, 0, hmm), 2.68618); assertEquals(getInsertEmission(152, 16, hmm), 2.77519); assertEquals(getInsertEmission(203, 16, hmm), 2.77519); assertEquals(getInsertEmission(255, 12, hmm), 2.73739); assertEquals(getStateTransition(0, 6, hmm), Double.NEGATIVE_INFINITY); assertEquals(getStateTransition(3, 6, hmm), 0.95510); assertEquals(getStateTransition(29, 3, hmm), 0.61958); assertEquals(getStateTransition(46, 4, hmm), 0.77255); assertEquals(getStateTransition(53, 1, hmm), 5.01631); assertEquals(getStateTransition(79, 2, hmm), 5.73865); assertEquals(getStateTransition(101, 2, hmm), 5.73865); assertEquals(getStateTransition(120, 5, hmm), 0.48576); assertEquals(getStateTransition(146, 5, hmm), 0.70219); assertEquals(getStateTransition(169, 3, hmm), 1.23224); assertEquals(getStateTransition(209, 0, hmm), 0.01003); assertEquals(getStateTransition(243, 1, hmm), 5.01631); assertEquals(hmm.getNodeAlignmentColumn(3).intValue(), 3); assertEquals(hmm.getReferenceAnnotation(7), '-'); assertEquals(hmm.getConsensusResidue(23), 't'); assertEquals(hmm.getMaskedValue(30), '-'); assertEquals(hmm.getConsensusStructure(56), 'S'); assertEquals(hmm.getNodeAlignmentColumn(78).intValue(), 136); assertEquals(hmm.getReferenceAnnotation(93), '-'); assertEquals(hmm.getConsensusResidue(145), 'a'); assertEquals(hmm.getMaskedValue(183), '-'); assertEquals(hmm.getConsensusStructure(240), 'H'); } @Test public void testParseFileProperties() throws IOException { FileReader fr = new FileReader( new File("test/jalview/io/test_fn3_hmm.txt")); BufferedReader br = new BufferedReader(fr); fn3.parseFileProperties(br); fn3.parseModel(br); // this is for a later test HiddenMarkovModel testHMM = new HiddenMarkovModel(); testHMM = fn3.getHMM(); br.close(); fr.close(); assertEquals(testHMM.getName(), "fn3"); assertEquals(testHMM.getAccessionNumber(), "PF00041.13"); assertEquals(testHMM.getDescription(), "Fibronectin type III domain"); assertEquals(testHMM.getLength().intValue(), 86); assertNull(testHMM.getMaxInstanceLength()); assertEquals(testHMM.getAlphabetType(), "amino"); assertEquals(testHMM.referenceAnnotationIsActive(), false); assertEquals(testHMM.maskValueIsActive(), false); assertEquals(testHMM.consensusResidueIsActive(), true); assertEquals(testHMM.consensusStructureIsActive(), true); assertEquals(testHMM.mapIsActive(), true); assertEquals(testHMM.getDate(), "Fri Jun 20 08:22:31 2014"); assertNull(testHMM.getCommandLineLog()); assertEquals(testHMM.getNumberOfSequences().intValue(), 106); assertEquals(testHMM.getEffectiveNumberOfSequences(), 11.415833, 4d); assertEquals(testHMM.getCheckSum().longValue(), 3564431818l); assertEquals(testHMM.getGatheringThreshold(), "8.00 7.20"); assertEquals(testHMM.getTrustedCutoff(), "8.00 7.20"); assertEquals(testHMM.getNoiseCutoff(), "7.90 7.90"); assertEquals(testHMM.getViterbi(), "-9.7737 0.71847"); assertEquals(testHMM.getMSV(), "-9.4043 0.71847"); assertEquals(testHMM.getForward(), "-3.8341 0.71847"); FileReader fr3 = new FileReader( new File("test/jalview/io/test_MADE1_hmm.txt")); BufferedReader br3 = new BufferedReader(fr3); made1.parseFileProperties(br3); testHMM = made1.getHMM(); br3.close(); fr3.close(); assertEquals(testHMM.getName(), "MADE1"); assertEquals(testHMM.getAccessionNumber(), "DF0000629.2"); assertEquals(testHMM.getDescription(), "MADE1 (MAriner Derived Element 1), a TcMar-Mariner DNA transposon"); assertEquals(testHMM.getLength().intValue(), 80); assertEquals(testHMM.getMaxInstanceLength().intValue(), 426); assertEquals(testHMM.getAlphabetType(), "DNA"); assertEquals(testHMM.referenceAnnotationIsActive(), true); assertEquals(testHMM.maskValueIsActive(), false); assertEquals(testHMM.consensusResidueIsActive(), true); assertEquals(testHMM.consensusStructureIsActive(), false); assertEquals(testHMM.mapIsActive(), true); assertEquals(testHMM.getDate(), "Tue Feb 19 20:33:41 2013"); assertNull(testHMM.getCommandLineLog()); assertEquals(testHMM.getNumberOfSequences().intValue(), 1997); assertEquals(testHMM.getEffectiveNumberOfSequences(), 3.911818, 4d); assertEquals(testHMM.getCheckSum().longValue(), 3015610723l); assertEquals(testHMM.getGatheringThreshold(), "2.324 4.234"); assertEquals(testHMM.getTrustedCutoff(), "2.343 1.212"); assertEquals(testHMM.getNoiseCutoff(), "2.354 5.456"); assertEquals(testHMM.getViterbi(), "-9.3632 0.71858"); assertEquals(testHMM.getMSV(), "-8.5786 0.71858"); assertEquals(testHMM.getForward(), "-3.4823 0.71858"); } @Test public void testGetTransitionType() { HiddenMarkovModel hmm = fn3.getHMM(); assertEquals(hmm.getTransitionType("mm").intValue(), 0); assertEquals(hmm.getTransitionType("mi").intValue(), 1); assertEquals(hmm.getTransitionType("md").intValue(), 2); assertEquals(hmm.getTransitionType("im").intValue(), 3); assertEquals(hmm.getTransitionType("ii").intValue(), 4); assertEquals(hmm.getTransitionType("dm").intValue(), 5); assertEquals(hmm.getTransitionType("dd").intValue(), 6); assertNull(hmm.getTransitionType("df")); } @Test public void testFillList() { Scanner scanner1 = new Scanner("1.3 2.4 5.3 3.9 9.8 4.7 4.3 2.3 6.9"); ArrayList filledArray = new ArrayList<>(); filledArray.add(1.3); filledArray.add(2.4); filledArray.add(5.3); filledArray.add(3.9); filledArray.add(9.8); filledArray.add(4.7); filledArray.add(4.3); filledArray.add(2.3); filledArray.add(6.9); assertEquals(HMMFile.fillList(scanner1, 9), filledArray); filledArray.clear(); scanner1.close(); Scanner scanner2 = new Scanner( "1.346554 5.58756754 35.3523645 12345.3564 1.4"); filledArray.add(1.346554); filledArray.add(5.58756754); filledArray.add(35.3523645); filledArray.add(12345.3564); filledArray.add(1.4); assertEquals(HMMFile.fillList(scanner2, 5), filledArray); scanner2.close(); } @Test public void testParseModel() throws IOException { FileReader fr = new FileReader( new File("test/jalview/io/test_MADE1_hmm.txt")); BufferedReader br = new BufferedReader(fr); HiddenMarkovModel testHMM = new HiddenMarkovModel(); for (int i = 0; i < 24; i++) { br.readLine(); } made1.parseModel(br); testHMM = made1.getHMM(); br.close(); fr.close(); assertEquals(getMatchEmission(0, 2, testHMM), 1.62906); assertEquals(getMatchEmission(2, 1, testHMM), 2.37873); assertEquals(getMatchEmission(12, 2, testHMM), 2.61355); assertEquals(getMatchEmission(26, 0, testHMM), 1.86925); assertEquals(getMatchEmission(32, 3, testHMM), 2.58263); assertEquals(getMatchEmission(59, 3, testHMM), 2.20507); assertEquals(getMatchEmission(63, 0, testHMM), 0.41244); assertEquals(getMatchEmission(69, 1, testHMM), 3.17398); assertEquals(getMatchEmission(76, 2, testHMM), 2.65861); assertEquals(getInsertEmission(0, 1, testHMM), 1.38629); assertEquals(getInsertEmission(1, 2, testHMM), 1.38629); assertEquals(getInsertEmission(31, 3, testHMM), 1.28150); assertEquals(getInsertEmission(43, 0, testHMM), 1.32290); assertEquals(getInsertEmission(48, 2, testHMM), 1.52606); assertEquals(getInsertEmission(52, 1, testHMM), 1.62259); assertEquals(getInsertEmission(67, 0, testHMM), 1.38141); assertEquals(getInsertEmission(70, 3, testHMM), 1.38629); assertEquals(getInsertEmission(80, 3, testHMM), 1.38629); assertEquals(getStateTransition(2, 0, testHMM), 0.03725); assertEquals(getStateTransition(6, 1, testHMM), 3.89715); assertEquals(getStateTransition(9, 3, testHMM), 1.38021); assertEquals(getStateTransition(20, 4, testHMM), 0.23815); assertEquals(getStateTransition(34, 6, testHMM), 0.33363); assertEquals(getStateTransition(46, 5, testHMM), 1.05474); assertEquals(getStateTransition(57, 6, testHMM), 0.31164); assertEquals(getStateTransition(68, 2, testHMM), 3.99242); assertEquals(getStateTransition(80, 6, testHMM), Double.NEGATIVE_INFINITY); } @Test public void testParseAnnotations() { HMMFile testFile = new HMMFile(); testFile.getHMM().getNodes().add(new HMMNode()); testFile.getHMM().getNodes().add(new HMMNode()); testFile.getHMM().getNodes().add(new HMMNode()); testFile.getHMM().setConsensusResidueStatus(true); testFile.getHMM().setMAPStatus(true); testFile.getHMM().setReferenceAnnotationStatus(true); testFile.getHMM().setConsensusStructureStatus(true); testFile.getHMM().setMaskedValueStatus(true); Scanner scanner = new Scanner("1345 t t t t"); testFile.parseAnnotations(scanner, 1); testFile.getHMM().setConsensusResidueStatus(true); testFile.getHMM().setMAPStatus(false); testFile.getHMM().setReferenceAnnotationStatus(true); testFile.getHMM().setConsensusStructureStatus(false); testFile.getHMM().setMaskedValueStatus(false); Scanner scanner2 = new Scanner("- y x - -"); testFile.parseAnnotations(scanner2, 2); HiddenMarkovModel hmm = testFile.getHMM(); assertEquals(hmm.getNodeAlignmentColumn(1).intValue(), 1345); assertEquals(hmm.getConsensusResidue(1), 't'); assertEquals(hmm.getReferenceAnnotation(1), 't'); assertEquals(hmm.getMaskedValue(1), 't'); assertEquals(hmm.getConsensusStructure(1), 't'); assertEquals(hmm.findNodeIndex(1345).intValue(), 1); scanner.close(); assertNull(hmm.getNodeAlignmentColumn(2)); assertEquals(hmm.getConsensusResidue(2), 'y'); assertEquals(hmm.getReferenceAnnotation(2), 'x'); assertEquals(hmm.getMaskedValue(2), '-'); assertEquals(hmm.getConsensusStructure(2), '-'); assertNull(hmm.findNodeIndex(2)); scanner2.close(); } /** * tests to see if file produced by the output matches the file from the input * * @throws IOException */ @Test(priority = 3) public void testExportFile() throws IOException { fn3.exportFile("test/jalview/io/test_export_hmm.txt"); HMMFile fn3Clone = new HMMFile( new FileParse("test/jalview/io/test_export_hmm.txt", DataSourceType.FILE)); fn3Clone.parse(); HiddenMarkovModel fn3HMM = new HiddenMarkovModel(); HiddenMarkovModel fn3CloneHMM = new HiddenMarkovModel(); fn3HMM = fn3.getHMM(); fn3CloneHMM = fn3Clone.getHMM(); for (int i = 0; i < fn3HMM.getLength(); i++) { List list1; List list2; boolean result; list1 = fn3HMM.getNode(i).getMatchEmissions(); list2 = fn3CloneHMM.getNode(i).getMatchEmissions(); result = checkIfListsAreIdentical(list1, list2); assertEquals(result, true); list1 = fn3HMM.getNode(i).getInsertEmissions(); list2 = fn3CloneHMM.getNode(i).getInsertEmissions(); result = checkIfListsAreIdentical(list1, list2); assertEquals(result, true); list1 = fn3HMM.getNode(i).getStateTransitions(); list2 = fn3CloneHMM.getNode(i).getStateTransitions(); result = checkIfListsAreIdentical(list1, list2); assertEquals(result, true); if (i > 0) { int alignColumn1; int alignColumn2; alignColumn1 = fn3HMM.getNodeAlignmentColumn(i); alignColumn2 = fn3CloneHMM.getNodeAlignmentColumn(i); assertEquals(alignColumn1, alignColumn2); char annotation1; char annotation2; annotation1 = fn3HMM.getReferenceAnnotation(i); annotation2 = fn3CloneHMM.getReferenceAnnotation(i); assertEquals(annotation1, annotation2); annotation1 = fn3HMM.getConsensusResidue(i); annotation2 = fn3CloneHMM.getConsensusResidue(i); assertEquals(annotation1, annotation2); } } } @Test(priority = 1) public void testAppendFileProperties() { StringBuilder testBuilder = new StringBuilder(); fn3.appendFileProperties(testBuilder); Scanner testScanner = new Scanner(testBuilder.toString()); String[] expected = new String[] { "HMMER3/f [3.1b1 | May 2013]", "NAME fn3", "ACC PF00041.13", "DESC Fibronectin type III domain", "LENG 86", "ALPH amino", "RF no", "MM no", "CONS yes", "CS yes", "MAP yes", "DATE Fri Jun 20 08:22:31 2014", "NSEQ 106", "EFFN 11.415833", "CKSUM 3564431818", "GA 8.00 7.20", "TC 8.00 7.20", "NC 7.90 7.90", "STATS LOCAL MSV -9.4043 0.71847", "STATS LOCAL VITERBI -9.7737 0.71847", "STATS LOCAL FORWARD -3.8341 0.71847" }; for (String value : expected) { assertEquals(testScanner.nextLine(), value); } testScanner.close(); } @Test(priority = 2) public void testAppendModel() { StringBuilder testBuilder = new StringBuilder(); fn3.appendModel(testBuilder); String string = testBuilder.toString(); assertEquals(findValue(2, 2, 2, string), "4.42225"); assertEquals(findValue(12, 14, 1, string), "2.79307"); assertEquals(findValue(6, 24, 3, string), "0.48576"); assertEquals(findValue(19, 33, 2, string), "4.58477"); assertEquals(findValue(20, 64, 2, string), "3.61505"); assertEquals(findValue(3, 72, 3, string), "6.81068"); assertEquals(findValue(10, 80, 2, string), "2.69355"); assertEquals(findValue(16, 65, 1, string), "2.81003"); assertEquals(findValue(14, 3, 1, string), "2.69012"); assertEquals(findValue(11, 32, 1, string), "4.34805"); } /** * * @param symbolIndex * index of symbol being searched. First symbol has index 1. * @param nodeIndex * index of node being searched. Begin node has index 0. First node * has index 1. * @param line * index of line being searched in node. First line has index 1. * @param model * string model being searched * @return value at specified position */ public String findValue(int symbolIndex, int nodeIndex, int line, String model) { String value = ""; Scanner scanner = new Scanner(model); scanner.nextLine(); scanner.nextLine(); for (int lineIndex = 0; lineIndex < line - 1; lineIndex++) { scanner.nextLine(); } for (int node = 0; node < nodeIndex; node++) { scanner.nextLine(); scanner.nextLine(); scanner.nextLine(); } for (int symbol = 0; symbol < symbolIndex; symbol++) { value = scanner.next(); if ("COMPO".equals(value)) { scanner.next(); } else if (value.length() < 7) { scanner.next(); } } return value; } public boolean checkIfListsAreIdentical(List list1, List list2) { boolean isDifferent = false; for (int i = 0; i < list1.size(); i++) { Double entry1; Double entry2; entry1 = list1.get(i); entry2 = list2.get(i); if (!(entry1 == entry2)) { isDifferent = true; } } return isDifferent; } /** * gets the match emission at a node for a symbol * * @param nodeIndex * position of node in model * @param symbolIndex * index of symbol being searched * @return negative log probability of a match emission of the given symbol */ public double getMatchEmission(int nodeIndex, int symbolIndex, HiddenMarkovModel hmm) { double value = hmm.getNodes().get(nodeIndex).getMatchEmissions() .get(symbolIndex); return value; } /** * gets the insert emission at a node for a symbol * * @param nodeIndex * position of node in model * @param symbolIndex * index of symbol being searched * @return negative log probability of an insert emission of the given symbol */ public double getInsertEmission(int nodeIndex, int symbolIndex, HiddenMarkovModel hmm) { double value = hmm.getNodes().get(nodeIndex).getInsertEmissions() .get(symbolIndex); return value; } /** * gets the state transition at a node for a specific transition * * @param nodeIndex * position of node in model * @param transitionIndex * index of stransition being searched * @return negative log probability of a state transition of the given type */ public double getStateTransition(int nodeIndex, int transitionIndex, HiddenMarkovModel hmm) { double value = hmm.getNodes().get(nodeIndex).getStateTransitions() .get(transitionIndex); return value; } }