package jalview.datamodel;
+import static org.testng.Assert.assertEquals;
+
+import jalview.io.DataSourceType;
+import jalview.io.FileParse;
+import jalview.io.HMMFile;
+import jalview.schemes.ResidueProperties;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.util.Map;
+
+import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
-public class HiddenMarkovModelTest
-{
- HiddenMarkovModel hmm = new HiddenMarkovModel();
+public class HiddenMarkovModelTest {
+
+ HiddenMarkovModel hmm;
+
+ HiddenMarkovModel alignmentHmm;
+
+ @BeforeClass(alwaysRun = true)
+ public void setUp() throws MalformedURLException, IOException
+ {
+ /*
+ * load hmm model of a Kinase domain to a HiddenMarkovModel
+ */
+ HMMFile file = new HMMFile(new FileParse(
+ "test/jalview/io/test_PKinase_hmm.txt", DataSourceType.FILE));
+ hmm = file.getHMM();
+
+ // used to check if consensus sequence is automatically aligned with alignment
+ HMMFile alignmentTest = new HMMFile(
+ new FileParse("test/jalview/io/HMMAlignmentTestHMM.hmm",
+ DataSourceType.FILE));
+ alignmentHmm = alignmentTest.getHMM();
+ }
- @Test
- public void testGetGatheringThresholdGA1()
+ @Test(groups = "Functional")
+ public void testGetMatchEmissionProbabilities()
+ throws MalformedURLException, IOException
{
- hmm.put("GA1", "10.1");
- // assertEquals(hmm.getGatheringThresholdGA1(), 10.1);
+ /*
+ * raw value in file is 3.67403
+ * saved as probability e^-X = 0.05259
+ */
+ double mep = hmm.getMatchEmissionProbability(0, 'R');
+ assertEquals(mep, 0.02537400637, 0.0001d);
+ assertEquals(mep, Math.pow(Math.E, -3.67403), 0.0001d);
+
+ mep = hmm.getMatchEmissionProbability(19, 'W');
+ assertEquals(mep, 0.00588228492, 0.0001d);
+ assertEquals(mep, Math.pow(Math.E, -5.13581), 0.0001d);
+
+ // column 160 is a gapped region of the model
+ mep = hmm.getMatchEmissionProbability(160, 'G');
+ assertEquals(mep, 0D, 0.0001d);
+
+ mep = hmm.getMatchEmissionProbability(475, 'A');
+ assertEquals(mep, 0.04995163708, 0.0001d);
+ assertEquals(mep, Math.pow(Math.E, -2.99670), 0.0001d);
+ }
+
+ @Test(groups = "Functional")
+ public void testGetInsertEmissionProbabilities()
+ {
+ double iep = hmm.getInsertEmissionProbability(2, 'A');
+ assertEquals(iep, Math.pow(Math.E, -2.68618), 0.0001d);
+ // symbol is not case-sensitive
+ assertEquals(iep, hmm.getInsertEmissionProbability(2, 'a'));
+
+ iep = hmm.getInsertEmissionProbability(5, 'T');
+ assertEquals(iep, Math.pow(Math.E, -2.77519), 0.0001d);
+
+ // column 161 is gapped in the hmm
+ iep = hmm.getInsertEmissionProbability(161, 'K');
+ assertEquals(iep, 0D, 0.0001d);
+
+ iep = hmm.getInsertEmissionProbability(472, 'L');
+ assertEquals(iep, Math.pow(Math.E, -2.69355), 0.0001d);
}
-}
\ No newline at end of file
+ @Test(groups = "Functional")
+ public void testGetStateTransitionProbabilities()
+ {
+ // * in model file treated as negative infinity
+ double stp = hmm.getStateTransitionProbability(475,
+ HiddenMarkovModel.MATCHTODELETE);
+ assertEquals(stp, Double.NEGATIVE_INFINITY);
+
+ // file value is 5.01631, saved as e^-5.01631
+ stp = hmm.getStateTransitionProbability(8,
+ HiddenMarkovModel.MATCHTOINSERT);
+ assertEquals(stp, Math.pow(Math.E, -5.01631), 0.0001D);
+
+ stp = hmm.getStateTransitionProbability(36,
+ HiddenMarkovModel.MATCHTODELETE);
+ assertEquals(stp, Math.pow(Math.E, -5.73865), 0.0001D);
+
+ stp = hmm.getStateTransitionProbability(22,
+ HiddenMarkovModel.INSERTTOMATCH);
+ assertEquals(stp, Math.pow(Math.E, -0.61958), 0.0001D);
+
+ stp = hmm.getStateTransitionProbability(80,
+ HiddenMarkovModel.INSERTTOINSERT);
+ assertEquals(stp, Math.pow(Math.E, -0.77255), 0.0001D);
+
+ stp = hmm.getStateTransitionProbability(475,
+ HiddenMarkovModel.DELETETOMATCH);
+ assertEquals(stp, 1D, 0.0001D);
+
+ stp = hmm.getStateTransitionProbability(218,
+ HiddenMarkovModel.DELETETODELETE);
+ assertEquals(stp, Math.pow(Math.E, -0.95510), 0.0001D);
+ }
+
+ @Test(groups = "Functional")
+ public void testGetConsensusSequence()
+ {
+ SequenceI seq = hmm.getConsensusSequence();
+ String subStr = seq.getSequenceAsString().substring(0, 10);
+ assertEquals(subStr, "yelleklGsG");
+ subStr = seq.getSequenceAsString().substring(150, 161);
+ assertEquals(subStr, "-dllk------");
+
+ // test to see if consensus sequence maps to alignment correctly
+ // see HMMAlignmentTest.sto for corresponding alignment file
+ SequenceI seq2 = alignmentHmm.getConsensusSequence();
+ assertEquals(seq2.getCharAt(0), '-');
+ assertEquals(seq2.getCharAt(7), '-');
+ assertEquals(seq2.getCharAt(8), 's');
+
+ }
+
+ /**
+ * A rather pointless test that reproduces the code implemented and asserts
+ * the result is the same...
+ */
+ @Test(groups = "Functional")
+ public void testGetInformationContent()
+ {
+ /*
+ * information measure is sum over all symbols of
+ * emissionProb * log(emissionProb / background) / log(2)
+ */
+ Map<Character, Float> uniprotFreqs = ResidueProperties.backgroundFrequencies
+ .get("amino");
+ int col = 4;
+ float expected = 0f;
+ for (char aa : hmm.getSymbols().toCharArray())
+ {
+ double mep = hmm.getMatchEmissionProbability(col, aa);
+ float background = uniprotFreqs.get(aa);
+ expected += mep * Math.log(mep / background);
+ }
+ expected /= Math.log(2D);
+
+ float actual = hmm.getInformationContent(col);
+ assertEquals(actual, expected, 0.0001d);
+ }
+}