X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=test%2Fjalview%2Fanalysis%2FAAFrequencyTest.java;h=14cc526459c6de174f53b622ed74e3cd4db5bf47;hb=c794c5033adeee182b03a5ea92c0a7495a29661f;hp=646412f85ae4ad90f267fd5b5b453c52d8abe146;hpb=96356075a122b745936738f478a8ee33ac7e0f0a;p=jalview.git diff --git a/test/jalview/analysis/AAFrequencyTest.java b/test/jalview/analysis/AAFrequencyTest.java index 646412f..14cc526 100644 --- a/test/jalview/analysis/AAFrequencyTest.java +++ b/test/jalview/analysis/AAFrequencyTest.java @@ -26,8 +26,11 @@ import static org.testng.AssertJUnit.assertNull; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.Annotation; import jalview.datamodel.HiddenMarkovModel; +import jalview.datamodel.Profile; import jalview.datamodel.ProfileI; +import jalview.datamodel.Profiles; import jalview.datamodel.ProfilesI; +import jalview.datamodel.ResidueCount; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceI; import jalview.gui.JvOptionPane; @@ -38,11 +41,14 @@ import jalview.io.HMMFile; import java.io.IOException; import java.net.MalformedURLException; +import java.util.Hashtable; + import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; public class AAFrequencyTest { + HiddenMarkovModel hmm; @BeforeClass(alwaysRun = true) public void setUpJvOptionPane() @@ -51,6 +57,17 @@ public class AAFrequencyTest JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION); } + @BeforeClass(alwaysRun = true) + public void setUp() throws IOException, MalformedURLException + { + /* + * load a dna (ACGT) HMM file to a HiddenMarkovModel + */ + HMMFile hmmFile = new HMMFile(new FileParse( + "test/jalview/io/test_MADE1_hmm.txt", DataSourceType.FILE)); + hmm = hmmFile.getHMM(); + } + @Test(groups = { "Functional" }) public void testCalculate_noProfile() { @@ -240,44 +257,271 @@ public class AAFrequencyTest assertEquals("T", ann.displayCharacter); } + /** + * Test to include rounding down of a non-zero count to 0% (JAL-3202) + */ @Test(groups = { "Functional" }) - public void testGetHMMProfileFor() - throws MalformedURLException, IOException + public void testExtractProfile() { + /* + * 200 sequences of which 30 gapped (170 ungapped) + * max count 70 for modal residue 'G' + */ + ProfileI profile = new Profile(200, 30, 70, "G"); + ResidueCount counts = new ResidueCount(); + counts.put('G', 70); + counts.put('R', 60); + counts.put('L', 38); + counts.put('H', 2); + profile.setCounts(counts); - HMMFile hmmFile = new HMMFile(new FileParse( - "test/jalview/io/test_MADE1_hmm.txt", DataSourceType.FILE)); - hmmFile.parse(); - HiddenMarkovModel hmm = hmmFile.getHMM(); - AlignmentAnnotation aa = hmm.createAnnotation(80); - aa.setHMM(hmm); + /* + * [0, noOfValues, totalPercent, char1, count1, ...] + * G: 70/170 = 41.2 = 41 + * R: 60/170 = 35.3 = 35 + * L: 38/170 = 22.3 = 22 + * H: 2/170 = 1 + * total (rounded) percentages = 99 + */ + int[] extracted = AAFrequency.extractProfile(profile, true); + int[] expected = new int[] { 0, 4, 99, 'G', 41, 'R', 35, 'L', 22, 'H', + 1 }; + org.testng.Assert.assertEquals(extracted, expected); + + /* + * add some counts of 1; these round down to 0% and should be discarded + */ + counts.put('G', 68); // 68/170 = 40% exactly (percentages now total 98) + counts.put('Q', 1); + counts.put('K', 1); + extracted = AAFrequency.extractProfile(profile, true); + expected = new int[] { 0, 4, 98, 'G', 40, 'R', 35, 'L', 22, 'H', 1 }; + org.testng.Assert.assertEquals(extracted, expected); + + } + + /** + * Tests for the profile calculation where gaps are included i.e. the + * denominator is the total number of sequences in the column + */ + @Test(groups = { "Functional" }) + public void testExtractProfile_countGaps() + { + /* + * 200 sequences of which 30 gapped (170 ungapped) + * max count 70 for modal residue 'G' + */ + ProfileI profile = new Profile(200, 30, 70, "G"); + ResidueCount counts = new ResidueCount(); + counts.put('G', 70); + counts.put('R', 60); + counts.put('L', 38); + counts.put('H', 2); + profile.setCounts(counts); + + /* + * [0, noOfValues, totalPercent, char1, count1, ...] + * G: 70/200 = 35% + * R: 60/200 = 30% + * L: 38/200 = 19% + * H: 2/200 = 1% + * total (rounded) percentages = 85 + */ + int[] extracted = AAFrequency.extractProfile(profile, false); + int[] expected = new int[] { AlignmentAnnotation.SEQUENCE_PROFILE, 4, + 85, 'G', 35, 'R', 30, 'L', 19, 'H', + 1 }; + org.testng.Assert.assertEquals(extracted, expected); + + /* + * add some counts of 1; these round down to 0% and should be discarded + */ + counts.put('G', 68); // 68/200 = 34% + counts.put('Q', 1); + counts.put('K', 1); + extracted = AAFrequency.extractProfile(profile, false); + expected = new int[] { AlignmentAnnotation.SEQUENCE_PROFILE, 4, 84, 'G', + 34, 'R', 30, 'L', 19, 'H', 1 }; + org.testng.Assert.assertEquals(extracted, expected); + + } + @Test(groups = { "Functional" }) + public void testExtractCdnaProfile() + { + /* + * 200 sequences of which 30 gapped (170 ungapped) + * max count 70 for modal residue 'G' + */ + Hashtable profile = new Hashtable(); + + /* + * cdna profile is {seqCount, ungappedCount, codonCount1, ...codonCount64} + * where 1..64 positions correspond to encoded codons + * see CodingUtils.encodeCodon() + */ + int[] codonCounts = new int[66]; + char[] codon1 = new char[] { 'G', 'C', 'A' }; + char[] codon2 = new char[] { 'c', 'C', 'A' }; + char[] codon3 = new char[] { 't', 'g', 'A' }; + char[] codon4 = new char[] { 'G', 'C', 't' }; + int encoded1 = CodingUtils.encodeCodon(codon1); + int encoded2 = CodingUtils.encodeCodon(codon2); + int encoded3 = CodingUtils.encodeCodon(codon3); + int encoded4 = CodingUtils.encodeCodon(codon4); + codonCounts[2 + encoded1] = 30; + codonCounts[2 + encoded2] = 70; + codonCounts[2 + encoded3] = 9; + codonCounts[2 + encoded4] = 1; + codonCounts[0] = 120; + codonCounts[1] = 110; + profile.put(AAFrequency.PROFILE, codonCounts); + + /* + * [0, noOfValues, totalPercent, char1, count1, ...] + * codon1: 30/110 = 27.2 = 27% + * codon2: 70/110 = 63.6% = 63% + * codon3: 9/110 = 8.1% = 8% + * codon4: 1/110 = 0.9% = 0% should be discarded + * total (rounded) percentages = 98 + */ + int[] extracted = AAFrequency.extractCdnaProfile(profile, true); + int[] expected = new int[] { AlignmentAnnotation.CDNA_PROFILE, 3, 98, + encoded2, 63, encoded1, 27, encoded3, 8 }; + org.testng.Assert.assertEquals(extracted, expected); + } + + @Test(groups = { "Functional" }) + public void testExtractCdnaProfile_countGaps() + { + /* + * 200 sequences of which 30 gapped (170 ungapped) + * max count 70 for modal residue 'G' + */ + Hashtable profile = new Hashtable(); + + /* + * cdna profile is {seqCount, ungappedCount, codonCount1, ...codonCount64} + * where 1..64 positions correspond to encoded codons + * see CodingUtils.encodeCodon() + */ + int[] codonCounts = new int[66]; + char[] codon1 = new char[] { 'G', 'C', 'A' }; + char[] codon2 = new char[] { 'c', 'C', 'A' }; + char[] codon3 = new char[] { 't', 'g', 'A' }; + char[] codon4 = new char[] { 'G', 'C', 't' }; + int encoded1 = CodingUtils.encodeCodon(codon1); + int encoded2 = CodingUtils.encodeCodon(codon2); + int encoded3 = CodingUtils.encodeCodon(codon3); + int encoded4 = CodingUtils.encodeCodon(codon4); + codonCounts[2 + encoded1] = 30; + codonCounts[2 + encoded2] = 70; + codonCounts[2 + encoded3] = 9; + codonCounts[2 + encoded4] = 1; + codonCounts[0] = 120; + codonCounts[1] = 110; + profile.put(AAFrequency.PROFILE, codonCounts); + + /* + * [0, noOfValues, totalPercent, char1, count1, ...] + * codon1: 30/120 = 25% + * codon2: 70/120 = 58.3 = 58% + * codon3: 9/120 = 7.5 = 7% + * codon4: 1/120 = 0.8 = 0% should be discarded + * total (rounded) percentages = 90 + */ + int[] extracted = AAFrequency.extractCdnaProfile(profile, false); + int[] expected = new int[] { AlignmentAnnotation.CDNA_PROFILE, 3, 90, + encoded2, 58, encoded1, 25, encoded3, 7 }; + org.testng.Assert.assertEquals(extracted, expected); + } + + @Test(groups = { "Functional" }) + public void testExtractHMMProfile() + throws MalformedURLException, IOException + { int[] expected = { 0, 4, 100, 'T', 71, 'C', 12, 'G', 9, 'A', 9 }; - int[] actual = AAFrequency.getHMMProfileFor(aa, 17, false); + int[] actual = AAFrequency.extractHMMProfile(hmm, 17, false, false); for (int i = 0; i < actual.length; i++) { if (i == 2) { - assertEquals(actual[i], expected[i], 5); + assertEquals(actual[i], expected[i]); } else { - assertEquals(actual[i], expected[i], 1); + assertEquals(actual[i], expected[i]); } } - int[] expected2 = { 0, 4, 85, 'A', 85, 'C', 0, 'G', 0, 'T', 0 }; - int[] actual2 = AAFrequency.getHMMProfileFor(aa, 2, true); - for (int i = 0; i < actual.length; i++) + int[] expected2 = { 0, 4, 100, 'A', 85, 'C', 0, 'G', 0, 'T', 0 }; + int[] actual2 = AAFrequency.extractHMMProfile(hmm, 2, true, false); + for (int i = 0; i < actual2.length; i++) { if (i == 2) { - assertEquals(actual[i], expected[i], 5); + assertEquals(actual[i], expected[i]); } else { - assertEquals(actual[i], expected[i], 1); + assertEquals(actual[i], expected[i]); } } + + assertNull(AAFrequency.extractHMMProfile(null, 98978867, true, false)); + } + + @Test(groups = { "Functional" }) + public void testGetAnalogueCount() + { + /* + * 'T' in column 0 has emission probability 0.7859, scales to 7859 + */ + int count = AAFrequency.getAnalogueCount(hmm, 0, 'T', false, false); + assertEquals(7859, count); + + /* + * same with 'use info height': value is multiplied by log ratio + * log(value / background) / log(2) = log(0.7859/0.25)/0.693 + * = log(3.1)/0.693 = 1.145/0.693 = 1.66 + * so value becomes 1.2987 and scales up to 12987 + */ + count = AAFrequency.getAnalogueCount(hmm, 0, 'T', false, true); + assertEquals(12987, count); + + /* + * 'G' in column 20 has emission probability 0.75457, scales to 7546 + */ + count = AAFrequency.getAnalogueCount(hmm, 20, 'G', false, false); + assertEquals(7546, count); + + /* + * 'G' in column 1077 has emission probability 0.0533, here + * ignored (set to 0) since below background of 0.25 + */ + count = AAFrequency.getAnalogueCount(hmm, 1077, 'G', true, false); + assertEquals(0, count); + } + + @Test(groups = { "Functional" }) + public void testCompleteInformation() + { + ProfileI prof1 = new Profile(1, 0, 100, "A"); + ProfileI prof2 = new Profile(1, 0, 100, "-"); + + ProfilesI profs = new Profiles(new ProfileI[] { prof1, prof2 }); + Annotation ann1 = new Annotation(6.5f); + Annotation ann2 = new Annotation(0f); + Annotation[] annots = new Annotation[] { ann1, ann2 }; + SequenceI seq = new Sequence("", "AA", 0, 0); + seq.setHMM(hmm); + AlignmentAnnotation annot = new AlignmentAnnotation("", "", annots); + annot.setSequenceRef(seq); + AAFrequency.completeInformation(annot, profs, 0, 1); + float ic = annot.annotations[0].value; + assertEquals(0.91532f, ic, 0.0001f); + ic = annot.annotations[1].value; + assertEquals(0f, ic, 0.0001f); + int i = 0; } }