package jalview.io; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertNull; import jalview.datamodel.HMMNode; import jalview.datamodel.HiddenMarkovModel; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.io.PrintWriter; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Scanner; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; public class HMMFileTest { HMMFile fn3; HMMFile pKinase; HMMFile made1; @BeforeClass(alwaysRun = true) public void setUp() throws FileNotFoundException { fn3 = new HMMFile(new BufferedReader( new FileReader(("test/jalview/io/test_fn3_hmm.txt")))); pKinase = new HMMFile(new BufferedReader( new FileReader(("test/jalview/io/test_PKinase_hmm.txt")))); made1 = new HMMFile(new BufferedReader( new FileReader(("test/jalview/io/test_MADE1_hmm.txt")))); } @Test(groups = "Functional") public void testParse() throws IOException { pKinase.parse(); HiddenMarkovModel hmm = pKinase.getHMM(); assertEquals(hmm.getName(), "Pkinase"); assertEquals(hmm.getAccessionNumber(), "PF00069.17"); assertEquals(hmm.getDescription(), "Protein kinase domain"); assertEquals(hmm.getLength().intValue(), 260); assertNull(hmm.getMaxInstanceLength()); assertEquals(hmm.getAlphabetType(), "amino"); assertEquals(hmm.referenceAnnotationIsActive(), false); assertEquals(hmm.maskValueIsActive(), false); assertEquals(hmm.consensusResidueIsActive(), true); assertEquals(hmm.consensusStructureIsActive(), true); assertEquals(hmm.mapIsActive(), true); assertEquals(hmm.getDate(), "Thu Jun 16 11:44:06 2011"); assertNull(hmm.getCommandLineLog()); assertEquals(hmm.getNumberOfSequences().intValue(), 54); assertEquals(hmm.getEffectiveNumberOfSequences(), 3.358521, 4d); assertEquals(hmm.getCheckSum().longValue(), 3106786190l); assertEquals(hmm.getGatheringThreshold(), "70.30 70.30"); assertEquals(hmm.getTrustedCutoff(), "70.30 70.30"); assertEquals(hmm.getNoiseCutoff(), "70.20 70.20"); List symbols = Arrays .asList(new Character[] { 'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y' }); assertEquals(hmm.getSymbols(), symbols); assertEquals(hmm.getMatchEmissionProbability(0, 'Y'), 0.16102, 0.001d); assertEquals(hmm.getMatchEmissionProbability(11, 'P'), 0.0130, 0.001d); assertEquals(hmm.getMatchEmissionProbability(24, 'I'), 0.02583, 0.001d); assertEquals(hmm.getMatchEmissionProbability(83, 'C'), 0.008549, 0.001d); assertEquals(hmm.getMatchEmissionProbability(332, 'E'), 0.07998, 0.001d); assertEquals(hmm.getMatchEmissionProbability(381, 'D'), 0.014465, 0.001d); assertEquals(hmm.getMatchEmissionProbability(475, 'Y'), 0.02213, 0.001d); assertEquals(hmm.getInsertEmissionProbability(1, 'C'), 0.012, 0.001d); assertEquals(hmm.getInsertEmissionProbability(14, 'H'), 0.02411, 0.001d); assertEquals(hmm.getInsertEmissionProbability(23, 'L'), 0.06764, 0.001d); assertEquals(hmm.getInsertEmissionProbability(90, 'D'), 0.0623, 0.001d); assertEquals(hmm.getInsertEmissionProbability(374, 'T'), 0.0623, 0.001d); assertEquals(hmm.getInsertEmissionProbability(470, 'P'), 0.0647, 0.001d); assertEquals(hmm.getStateTransitionProbability(2, 6), 0.3848, 0.001d); assertEquals(hmm.getStateTransitionProbability(38, 3), 0.5382, 0.001d); assertEquals(hmm.getStateTransitionProbability(305, 3), 0.2916, 0.001d); assertEquals(hmm.getStateTransitionProbability(380, 0), 0.99, 0.001d); assertEquals(hmm.getStateTransitionProbability(453, 1), 0.0066, 0.001d); assertEquals(hmm.getNodeAlignmentColumn(3).intValue(), 2); assertEquals(hmm.getReferenceAnnotation(7), '-'); assertEquals(hmm.getConsensusResidue(23), 't'); assertEquals(hmm.getMaskedValue(30), '-'); assertEquals(hmm.getConsensusStructure(56), 'S'); assertEquals(hmm.getNodeAlignmentColumn(78).intValue(), 135); assertEquals(hmm.getReferenceAnnotation(93), '-'); assertEquals(hmm.getConsensusResidue(145), 'a'); assertEquals(hmm.getMaskedValue(183), '-'); assertEquals(hmm.getConsensusStructure(240), 'H'); } @Test(priority = 0) public void testParseFileProperties() throws IOException { FileReader fr = new FileReader( new File("test/jalview/io/test_fn3_hmm.txt")); BufferedReader br = new BufferedReader(fr); fn3.setHMM(new HiddenMarkovModel()); fn3.parseFileProperties(br); fn3.parseModel(br); // this is for a later test HiddenMarkovModel testHMM = new HiddenMarkovModel(); testHMM = fn3.getHMM(); br.close(); fr.close(); assertEquals(testHMM.getName(), "fn3"); assertEquals(testHMM.getAccessionNumber(), "PF00041.13"); assertEquals(testHMM.getDescription(), "Fibronectin type III domain"); assertEquals(testHMM.getLength().intValue(), 86); assertNull(testHMM.getMaxInstanceLength()); assertEquals(testHMM.getAlphabetType(), "amino"); assertEquals(testHMM.referenceAnnotationIsActive(), false); assertEquals(testHMM.maskValueIsActive(), false); assertEquals(testHMM.consensusResidueIsActive(), true); assertEquals(testHMM.consensusStructureIsActive(), true); assertEquals(testHMM.mapIsActive(), true); assertEquals(testHMM.getDate(), "Fri Jun 20 08:22:31 2014"); assertNull(testHMM.getCommandLineLog()); assertEquals(testHMM.getNumberOfSequences().intValue(), 106); assertEquals(testHMM.getEffectiveNumberOfSequences(), 11.415833, 4d); assertEquals(testHMM.getCheckSum().longValue(), 3564431818l); assertEquals(testHMM.getGatheringThreshold(), "8.00 7.20"); assertEquals(testHMM.getTrustedCutoff(), "8.00 7.20"); assertEquals(testHMM.getNoiseCutoff(), "7.90 7.90"); assertEquals(testHMM.getViterbi(), "-9.7737 0.71847"); assertEquals(testHMM.getMSV(), "-9.4043 0.71847"); assertEquals(testHMM.getForward(), "-3.8341 0.71847"); FileReader fr3 = new FileReader( new File("test/jalview/io/test_MADE1_hmm.txt")); BufferedReader br3 = new BufferedReader(fr3); made1.setHMM(new HiddenMarkovModel()); made1.parseFileProperties(br3); testHMM = made1.getHMM(); br3.close(); fr3.close(); assertEquals(testHMM.getName(), "MADE1"); assertEquals(testHMM.getAccessionNumber(), "DF0000629.2"); assertEquals(testHMM.getDescription(), "MADE1 (MAriner Derived Element 1), a TcMar-Mariner DNA transposon"); assertEquals(testHMM.getLength().intValue(), 80); assertEquals(testHMM.getMaxInstanceLength().intValue(), 426); assertEquals(testHMM.getAlphabetType(), "DNA"); assertEquals(testHMM.referenceAnnotationIsActive(), true); assertEquals(testHMM.maskValueIsActive(), false); assertEquals(testHMM.consensusResidueIsActive(), true); assertEquals(testHMM.consensusStructureIsActive(), false); assertEquals(testHMM.mapIsActive(), true); assertEquals(testHMM.getDate(), "Tue Feb 19 20:33:41 2013"); assertNull(testHMM.getCommandLineLog()); assertEquals(testHMM.getNumberOfSequences().intValue(), 1997); assertEquals(testHMM.getEffectiveNumberOfSequences(), 3.911818, 4d); assertEquals(testHMM.getCheckSum().longValue(), 3015610723l); assertEquals(testHMM.getGatheringThreshold(), "2.324 4.234"); assertEquals(testHMM.getTrustedCutoff(), "2.343 1.212"); assertEquals(testHMM.getNoiseCutoff(), "2.354 5.456"); assertEquals(testHMM.getViterbi(), "-9.3632 0.71858"); assertEquals(testHMM.getMSV(), "-8.5786 0.71858"); assertEquals(testHMM.getForward(), "-3.4823 0.71858"); } @Test public void testFillList() throws IOException { Scanner scanner1 = new Scanner("1.3 2.4 5.3 3.9 9.8 4.7 4.3 2.3 6.9"); ArrayList filledArray = new ArrayList<>(); filledArray.add(0.27253); filledArray.add(0.0907); filledArray.add(0.00499); filledArray.add(0.02024); filledArray.add(0.00005); filledArray.add(0.00909); filledArray.add(0.01357); filledArray.add(0.10026); filledArray.add(0.001); List testList = HMMFile.fillList(scanner1, 9); for (int i = 0; i < 9; i++) { assertEquals(testList.get(i), filledArray.get(i), 0.001d); } filledArray.clear(); scanner1.close(); Scanner scanner2 = new Scanner( "1.346 5.554 35.345 5.64 1.4"); filledArray.add(0.2603); filledArray.add(0.00387); filledArray.add(0d); filledArray.add(0.00355); filledArray.add(0.2466); testList = HMMFile.fillList(scanner2, 5); for (int i = 0; i < 5; i++) { assertEquals(testList.get(i), filledArray.get(i), 0.001d); } } @Test public void testParseModel() throws IOException { FileReader fr = new FileReader( new File("test/jalview/io/test_MADE1_hmm.txt")); BufferedReader br = new BufferedReader(fr); HiddenMarkovModel testHMM = new HiddenMarkovModel(); for (int i = 0; i < 24; i++) { br.readLine(); } made1.parseModel(br); testHMM = made1.getHMM(); br.close(); fr.close(); assertEquals(testHMM.getMatchEmissionProbability(1, 'C'), 0.09267, 0.001d); assertEquals(testHMM.getMatchEmissionProbability(25, 'G'), 0.07327, 0.001d); assertEquals(testHMM.getMatchEmissionProbability(1092, 'C'), 0.04184, 0.001d); assertEquals(testHMM.getMatchEmissionProbability(1107, 'G'), 0.07, 0.001d); assertEquals(testHMM.getInsertEmissionProbability(0, 'G'), 0.25, 0.001d); assertEquals(testHMM.getInsertEmissionProbability(247, 'T'), 0.2776, 0.001d); assertEquals(testHMM.getInsertEmissionProbability(1096, 'T'), 0.25, 0.001d); assertEquals(testHMM.getInsertEmissionProbability(1111, 'T'), 0.25, 0.001d); assertEquals(testHMM.getStateTransitionProbability(1, 0), 0.9634, 0.001d); assertEquals(testHMM.getStateTransitionProbability(5, 1), 0.0203, 0.001d); assertEquals(testHMM.getStateTransitionProbability(14, 3), 0.2515, 0.001d); assertEquals(testHMM.getStateTransitionProbability(65, 4), 0.78808, 0.001d); assertEquals(testHMM.getStateTransitionProbability(1080, 2), 0.01845, 0.001d); assertEquals(testHMM.getStateTransitionProbability(1111, 6), Double.NEGATIVE_INFINITY); } @Test public void testParseAnnotations() { HMMFile testFile = new HMMFile(); HiddenMarkovModel hmm = new HiddenMarkovModel(); testFile.setHMM(hmm); hmm.getNodes().add(new HMMNode()); hmm.setConsensusResidueStatus(true); hmm.setMAPStatus(true); hmm.setReferenceAnnotationStatus(true); hmm.setConsensusStructureStatus(true); hmm.setMaskedValueStatus(true); Scanner scanner = new Scanner("1345 t t t t"); HMMNode node = new HMMNode(); hmm.getNodes().add(node); testFile.parseAnnotations(scanner, node); hmm.setConsensusResidueStatus(true); hmm.setMAPStatus(false); hmm.setReferenceAnnotationStatus(true); hmm.setConsensusStructureStatus(false); hmm.setMaskedValueStatus(false); Scanner scanner2 = new Scanner("- y x - -"); node = new HMMNode(); hmm.getNodes().add(node); testFile.parseAnnotations(scanner2, node); assertEquals(hmm.getNodeAlignmentColumn(1).intValue(), 1344); assertEquals(hmm.getConsensusResidue(1), 't'); assertEquals(hmm.getReferenceAnnotation(1), 't'); assertEquals(hmm.getMaskedValue(1), 't'); assertEquals(hmm.getConsensusStructure(1), 't'); scanner.close(); } /** * tests to see if file produced by the output matches the file from the input * * @throws IOException */ @Test(priority = 3) public void testPrint() throws IOException { PrintWriter writer = new PrintWriter( "test/jalview/io/test_export_hmm.txt"); String output = pKinase.print(); writer.print(output); writer.close(); HMMFile pKinaseClone = new HMMFile( new FileParse("test/jalview/io/test_export_hmm.txt", DataSourceType.FILE)); HiddenMarkovModel pKinaseHMM = new HiddenMarkovModel(); HiddenMarkovModel pKinaseCloneHMM = new HiddenMarkovModel(); pKinaseHMM = pKinase.getHMM(); pKinaseCloneHMM = pKinaseClone.getHMM(); for (int i = 0; i < pKinaseHMM.getLength(); i++) { List list1; List list2; boolean result; list1 = pKinaseHMM.getNode(i).getMatchEmissions(); list2 = pKinaseCloneHMM.getNode(i).getMatchEmissions(); result = checkIfListsAreIdentical(list1, list2); assertEquals(result, true); list1 = pKinaseHMM.getNode(i).getInsertEmissions(); list2 = pKinaseCloneHMM.getNode(i).getInsertEmissions(); result = checkIfListsAreIdentical(list1, list2); assertEquals(result, true); list1 = pKinaseHMM.getNode(i).getStateTransitions(); list2 = pKinaseCloneHMM.getNode(i).getStateTransitions(); result = checkIfListsAreIdentical(list1, list2); assertEquals(result, true); if (i > 0) { int alignColumn1; int alignColumn2; alignColumn1 = pKinaseHMM.getNodeAlignmentColumn(i); alignColumn2 = pKinaseCloneHMM.getNodeAlignmentColumn(i); assertEquals(alignColumn1, alignColumn2); char annotation1; char annotation2; annotation1 = pKinaseHMM.getReferenceAnnotation(i); annotation2 = pKinaseCloneHMM.getReferenceAnnotation(i); assertEquals(annotation1, annotation2); annotation1 = pKinaseHMM.getConsensusResidue(i); annotation2 = pKinaseCloneHMM.getConsensusResidue(i); assertEquals(annotation1, annotation2); } } } @Test(priority = 1) public void testGetFilePropertiesAsString() throws FileNotFoundException { String string = fn3.getFilePropertiesAsString(); Scanner testScanner = new Scanner(string); String[] expected = new String[] { "HMMER3/f [3.1b1 | May 2013]", "NAME fn3", "ACC PF00041.13", "DESC Fibronectin type III domain", "LENG 86", "ALPH amino", "RF no", "MM no", "CONS yes", "CS yes", "MAP yes", "DATE Fri Jun 20 08:22:31 2014", "NSEQ 106", "EFFN 11.415833", "CKSUM 3564431818", "GA 8.00 7.20", "TC 8.00 7.20", "NC 7.90 7.90", "STATS LOCAL MSV -9.4043 0.71847", "STATS LOCAL VITERBI -9.7737 0.71847", "STATS LOCAL FORWARD -3.8341 0.71847" }; for (String value : expected) { assertEquals(testScanner.nextLine(), value); } testScanner.close(); } @Test(priority = 2) public void testGetModelAsString() throws FileNotFoundException { String string = fn3.getModelAsString(); assertEquals(findValue(2, 2, 2, string), "4.42225"); assertEquals(findValue(12, 14, 1, string), "2.79307"); assertEquals(findValue(6, 24, 3, string), "0.48576"); assertEquals(findValue(19, 33, 2, string), "4.58477"); assertEquals(findValue(20, 64, 2, string), "3.61505"); assertEquals(findValue(3, 72, 3, string), "6.81068"); assertEquals(findValue(10, 80, 2, string), "2.69355"); assertEquals(findValue(16, 65, 1, string), "2.81003"); assertEquals(findValue(14, 3, 1, string), "2.69012"); assertEquals(findValue(11, 32, 1, string), "4.34805"); } /** * * @param symbolIndex * index of symbol being searched. First symbol has index 1. * @param nodeIndex * index of node being searched. Begin node has index 0. First node * has index 1. * @param line * index of line being searched in node. First line has index 1. * @param model * string model being searched * @return value at specified position */ public String findValue(int symbolIndex, int nodeIndex, int line, String model) { String value = ""; String current; Scanner scanner = new Scanner(model); current = scanner.nextLine(); current = scanner.nextLine(); for (int lineIndex = 0; lineIndex < line - 1; lineIndex++) { current = scanner.nextLine(); } for (int node = 0; node < nodeIndex; node++) { current = scanner.nextLine(); current = scanner.nextLine(); current = scanner.nextLine(); } for (int symbol = 0; symbol < symbolIndex; symbol++) { value = scanner.next(); if ("COMPO".equals(value)) { current = scanner.next(); } else if (value.length() < 7) { current = scanner.next(); } } scanner.close(); return value; } public boolean checkIfListsAreIdentical(List list1, List list2) { boolean isDifferent = false; for (int i = 0; i < list1.size(); i++) { Double entry1; Double entry2; entry1 = list1.get(i); entry2 = list2.get(i); if (!(entry1 == entry2)) { isDifferent = true; } } return isDifferent; } }