package jalview.datamodel; import static org.testng.Assert.assertEquals; import jalview.io.DataSourceType; import jalview.io.FileParse; import jalview.io.HMMFile; import jalview.schemes.ResidueProperties; import java.io.IOException; import java.net.MalformedURLException; import java.util.Map; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; public class HiddenMarkovModelTest { HiddenMarkovModel hmm; HiddenMarkovModel alignmentHmm; @BeforeClass(alwaysRun = true) public void setUp() throws MalformedURLException, IOException { /* * load hmm model of a Kinase domain to a HiddenMarkovModel */ HMMFile file = new HMMFile(new FileParse( "test/jalview/io/test_PKinase_hmm.txt", DataSourceType.FILE)); hmm = file.getHMM(); // used to check if consensus sequence is automatically aligned with alignment HMMFile alignmentTest = new HMMFile( new FileParse("test/jalview/io/HMMAlignmentTestHMM.hmm", DataSourceType.FILE)); alignmentHmm = alignmentTest.getHMM(); } @Test(groups = "Functional") public void testGetMatchEmissionProbabilities() throws MalformedURLException, IOException { /* * raw value in file is 3.67403 * saved as probability e^-X = 0.05259 */ double mep = hmm.getMatchEmissionProbability(0, 'R'); assertEquals(mep, 0.02537400637, 0.0001d); assertEquals(mep, Math.pow(Math.E, -3.67403), 0.0001d); mep = hmm.getMatchEmissionProbability(19, 'W'); assertEquals(mep, 0.00588228492, 0.0001d); assertEquals(mep, Math.pow(Math.E, -5.13581), 0.0001d); // column 160 is a gapped region of the model mep = hmm.getMatchEmissionProbability(160, 'G'); assertEquals(mep, 0D, 0.0001d); mep = hmm.getMatchEmissionProbability(475, 'A'); assertEquals(mep, 0.04995163708, 0.0001d); assertEquals(mep, Math.pow(Math.E, -2.99670), 0.0001d); } @Test(groups = "Functional") public void testGetInsertEmissionProbabilities() { double iep = hmm.getInsertEmissionProbability(2, 'A'); assertEquals(iep, Math.pow(Math.E, -2.68618), 0.0001d); // symbol is not case-sensitive assertEquals(iep, hmm.getInsertEmissionProbability(2, 'a')); iep = hmm.getInsertEmissionProbability(5, 'T'); assertEquals(iep, Math.pow(Math.E, -2.77519), 0.0001d); // column 161 is gapped in the hmm iep = hmm.getInsertEmissionProbability(161, 'K'); assertEquals(iep, 0D, 0.0001d); iep = hmm.getInsertEmissionProbability(472, 'L'); assertEquals(iep, Math.pow(Math.E, -2.69355), 0.0001d); } @Test(groups = "Functional") public void testGetStateTransitionProbabilities() { // * in model file treated as negative infinity double stp = hmm.getStateTransitionProbability(475, HiddenMarkovModel.MATCHTODELETE); assertEquals(stp, Double.NEGATIVE_INFINITY); // file value is 5.01631, saved as e^-5.01631 stp = hmm.getStateTransitionProbability(8, HiddenMarkovModel.MATCHTOINSERT); assertEquals(stp, Math.pow(Math.E, -5.01631), 0.0001D); stp = hmm.getStateTransitionProbability(36, HiddenMarkovModel.MATCHTODELETE); assertEquals(stp, Math.pow(Math.E, -5.73865), 0.0001D); stp = hmm.getStateTransitionProbability(22, HiddenMarkovModel.INSERTTOMATCH); assertEquals(stp, Math.pow(Math.E, -0.61958), 0.0001D); stp = hmm.getStateTransitionProbability(80, HiddenMarkovModel.INSERTTOINSERT); assertEquals(stp, Math.pow(Math.E, -0.77255), 0.0001D); stp = hmm.getStateTransitionProbability(475, HiddenMarkovModel.DELETETOMATCH); assertEquals(stp, 1D, 0.0001D); stp = hmm.getStateTransitionProbability(218, HiddenMarkovModel.DELETETODELETE); assertEquals(stp, Math.pow(Math.E, -0.95510), 0.0001D); } @Test(groups = "Functional") public void testGetConsensusSequence() { SequenceI seq = hmm.getConsensusSequence(); String subStr = seq.getSequenceAsString().substring(0, 10); assertEquals(subStr, "yelleklGsG"); subStr = seq.getSequenceAsString().substring(150, 161); assertEquals(subStr, "-dllk------"); // test to see if consensus sequence maps to alignment correctly // see HMMAlignmentTest.sto for corresponding alignment file SequenceI seq2 = alignmentHmm.getConsensusSequence(); assertEquals(seq2.getCharAt(0), '-'); assertEquals(seq2.getCharAt(7), '-'); assertEquals(seq2.getCharAt(8), 's'); } /** * A rather pointless test that reproduces the code implemented and asserts * the result is the same... */ @Test(groups = "Functional") public void testGetInformationContent() { /* * information measure is sum over all symbols of * emissionProb * log(emissionProb / background) / log(2) */ Map uniprotFreqs = ResidueProperties.backgroundFrequencies .get("amino"); int col = 4; float expected = 0f; for (char aa : hmm.getSymbols().toCharArray()) { double mep = hmm.getMatchEmissionProbability(col, aa); float background = uniprotFreqs.get(aa); expected += mep * Math.log(mep / background); } expected /= Math.log(2D); float actual = hmm.getInformationContent(col); assertEquals(actual, expected, 0.0001d); } }