/*
- * Jalview - A Sequence Alignment Editor and Viewer (Version 2.9.0b1)
- * Copyright (C) 2015 The Jalview Authors
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
*
* This file is part of Jalview.
*
import static org.testng.AssertJUnit.assertEquals;
import static org.testng.AssertJUnit.assertNull;
+import jalview.datamodel.AlignmentAnnotation;
+import jalview.datamodel.Annotation;
+import jalview.datamodel.Profile;
+import jalview.datamodel.ProfileI;
+import jalview.datamodel.ProfilesI;
+import jalview.datamodel.ResidueCount;
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceI;
+import jalview.gui.JvOptionPane;
import java.util.Hashtable;
+import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
public class AAFrequencyTest
{
- private static final String C = AAFrequency.MAXCOUNT;
- private static final String R = AAFrequency.MAXRESIDUE;
-
- private static final String G = AAFrequency.PID_GAPS;
-
- private static final String N = AAFrequency.PID_NOGAPS;
-
- private static final String P = AAFrequency.PROFILE;
+ @BeforeClass(alwaysRun = true)
+ public void setUpJvOptionPane()
+ {
+ JvOptionPane.setInteractiveMode(false);
+ JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
+ }
@Test(groups = { "Functional" })
public void testCalculate_noProfile()
{
- SequenceI seq1 = new Sequence("Seq1", "CAGT");
- SequenceI seq2 = new Sequence("Seq2", "CACT");
- SequenceI seq3 = new Sequence("Seq3", "C--G");
- SequenceI seq4 = new Sequence("Seq4", "CA-t");
+ SequenceI seq1 = new Sequence("Seq1", "CAG-T");
+ SequenceI seq2 = new Sequence("Seq2", "CAC-T");
+ SequenceI seq3 = new Sequence("Seq3", "C---G");
+ SequenceI seq4 = new Sequence("Seq4", "CA--t");
SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 };
- Hashtable[] result = new Hashtable[seq1.getLength()];
-
- AAFrequency.calculate(seqs, 0, seq1.getLength(), result, false);
+ int width = seq1.getLength();
+ ProfilesI result = AAFrequency.calculate(seqs, width, 0, width, false);
// col 0 is 100% C
- Hashtable col = result[0];
- assertEquals(100f, (Float) col.get(G), 0.0001f);
- assertEquals(100f, (Float) col.get(N), 0.0001f);
- assertEquals(4, col.get(C));
- assertEquals("C", col.get(R));
- assertNull(col.get(P));
+ ProfileI col = result.get(0);
+ assertEquals(100f, col.getPercentageIdentity(false));
+ assertEquals(100f, col.getPercentageIdentity(true));
+ assertEquals(4, col.getMaxCount());
+ assertEquals("C", col.getModalResidue());
+ assertNull(col.getCounts());
// col 1 is 75% A
- col = result[1];
- assertEquals(75f, (Float) col.get(G), 0.0001f);
- assertEquals(100f, (Float) col.get(N), 0.0001f);
- assertEquals(3, col.get(C));
- assertEquals("A", col.get(R));
+ col = result.get(1);
+ assertEquals(75f, col.getPercentageIdentity(false));
+ assertEquals(100f, col.getPercentageIdentity(true));
+ assertEquals(3, col.getMaxCount());
+ assertEquals("A", col.getModalResidue());
// col 2 is 50% G 50% C or 25/25 counting gaps
- col = result[2];
- assertEquals(25f, (Float) col.get(G), 0.0001f);
- assertEquals(50f, (Float) col.get(N), 0.0001f);
- assertEquals(1, col.get(C));
- assertEquals("CG", col.get(R));
-
- // col 3 is 75% T 25% G
- col = result[3];
- assertEquals(75f, (Float) col.get(G), 0.0001f);
- assertEquals(75f, (Float) col.get(N), 0.0001f);
- assertEquals(3, col.get(C));
- assertEquals("T", col.get(R));
+ col = result.get(2);
+ assertEquals(25f, col.getPercentageIdentity(false));
+ assertEquals(50f, col.getPercentageIdentity(true));
+ assertEquals(1, col.getMaxCount());
+ assertEquals("CG", col.getModalResidue());
+
+ // col 3 is all gaps
+ col = result.get(3);
+ assertEquals(0f, col.getPercentageIdentity(false));
+ assertEquals(0f, col.getPercentageIdentity(true));
+ assertEquals(0, col.getMaxCount());
+ assertEquals("", col.getModalResidue());
+
+ // col 4 is 75% T 25% G
+ col = result.get(4);
+ assertEquals(75f, col.getPercentageIdentity(false));
+ assertEquals(75f, col.getPercentageIdentity(true));
+ assertEquals(3, col.getMaxCount());
+ assertEquals("T", col.getModalResidue());
}
@Test(groups = { "Functional" })
SequenceI seq3 = new Sequence("Seq3", "C--G");
SequenceI seq4 = new Sequence("Seq4", "CA-t");
SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 };
- Hashtable[] result = new Hashtable[seq1.getLength()];
-
- AAFrequency.calculate(seqs, 0, seq1.getLength(), result, true);
- int[][] profile = (int[][]) result[0].get(P);
- assertEquals(4, profile[0]['C']);
- assertEquals(4, profile[1][0]); // no of seqs
- assertEquals(4, profile[1][1]); // nongapped in column
-
- profile = (int[][]) result[1].get(P);
- assertEquals(3, profile[0]['A']);
- assertEquals(4, profile[1][0]);
- assertEquals(3, profile[1][1]);
-
- profile = (int[][]) result[2].get(P);
- assertEquals(1, profile[0]['G']);
- assertEquals(1, profile[0]['C']);
- assertEquals(4, profile[1][0]);
- assertEquals(2, profile[1][1]);
-
- profile = (int[][]) result[3].get(P);
- assertEquals(3, profile[0]['T']);
- assertEquals(1, profile[0]['G']);
- assertEquals(4, profile[1][0]);
- assertEquals(4, profile[1][1]);
+ int width = seq1.getLength();
+ ProfilesI result = AAFrequency.calculate(seqs, width, 0, width, true);
+
+ ProfileI profile = result.get(0);
+ assertEquals(4, profile.getCounts().getCount('C'));
+ assertEquals(4, profile.getHeight());
+ assertEquals(4, profile.getNonGapped());
+
+ profile = result.get(1);
+ assertEquals(3, profile.getCounts().getCount('A'));
+ assertEquals(4, profile.getHeight());
+ assertEquals(3, profile.getNonGapped());
+
+ profile = result.get(2);
+ assertEquals(1, profile.getCounts().getCount('C'));
+ assertEquals(1, profile.getCounts().getCount('G'));
+ assertEquals(4, profile.getHeight());
+ assertEquals(2, profile.getNonGapped());
+
+ profile = result.get(3);
+ assertEquals(3, profile.getCounts().getCount('T'));
+ assertEquals(1, profile.getCounts().getCount('G'));
+ assertEquals(4, profile.getHeight());
+ assertEquals(4, profile.getNonGapped());
}
- @Test(groups = { "Functional" })
+ @Test(groups = { "Functional" }, enabled = false)
public void testCalculate_withProfileTiming()
{
SequenceI seq1 = new Sequence("Seq1", "CAGT");
SequenceI seq3 = new Sequence("Seq3", "C--G");
SequenceI seq4 = new Sequence("Seq4", "CA-t");
SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 };
- Hashtable[] result = new Hashtable[seq1.getLength()];
- // ensure class loaded and initialized
- AAFrequency.calculate(seqs, 0, seq1.getLength(), result, true);
+ // ensure class loaded and initialised
+ int width = seq1.getLength();
+ AAFrequency.calculate(seqs, width, 0, width, true);
+
int reps = 100000;
long start = System.currentTimeMillis();
for (int i = 0; i < reps; i++)
{
- AAFrequency.calculate(seqs, 0, seq1.getLength(), result, true);
+ AAFrequency.calculate(seqs, width, 0, width, true);
}
System.out.println(System.currentTimeMillis() - start);
}
+ /**
+ * Test generation of consensus annotation with options 'include gaps'
+ * (profile percentages are of all sequences, whether gapped or not), and
+ * 'show logo' (the full profile with all residue percentages is reported in
+ * the description for the tooltip)
+ */
+ @Test(groups = { "Functional" })
+ public void testCompleteConsensus_includeGaps_showLogo()
+ {
+ /*
+ * first compute the profiles
+ */
+ SequenceI seq1 = new Sequence("Seq1", "CAG-T");
+ SequenceI seq2 = new Sequence("Seq2", "CAC-T");
+ SequenceI seq3 = new Sequence("Seq3", "C---G");
+ SequenceI seq4 = new Sequence("Seq4", "CA--t");
+ SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 };
+ int width = seq1.getLength();
+ ProfilesI profiles = AAFrequency.calculate(seqs, width, 0, width, true);
+
+ AlignmentAnnotation consensus = new AlignmentAnnotation("Consensus",
+ "PID", new Annotation[width]);
+ AAFrequency.completeConsensus(consensus, profiles, 0, 5, false, true,
+ 4);
+
+ Annotation ann = consensus.annotations[0];
+ assertEquals("C 100%", ann.description);
+ assertEquals("C", ann.displayCharacter);
+ ann = consensus.annotations[1];
+ assertEquals("A 75%", ann.description);
+ assertEquals("A", ann.displayCharacter);
+ ann = consensus.annotations[2];
+ assertEquals("C 25%; G 25%", ann.description);
+ assertEquals("+", ann.displayCharacter);
+ ann = consensus.annotations[3];
+ assertEquals("", ann.description);
+ assertEquals("-", ann.displayCharacter);
+ ann = consensus.annotations[4];
+ assertEquals("T 75%; G 25%", ann.description);
+ assertEquals("T", ann.displayCharacter);
+ }
+
+ /**
+ * Test generation of consensus annotation with options 'ignore gaps' (profile
+ * percentages are of the non-gapped sequences) and 'no logo' (only the modal
+ * residue[s] percentage is reported in the description for the tooltip)
+ */
+ @Test(groups = { "Functional" })
+ public void testCompleteConsensus_ignoreGaps_noLogo()
+ {
+ /*
+ * first compute the profiles
+ */
+ SequenceI seq1 = new Sequence("Seq1", "CAG-T");
+ SequenceI seq2 = new Sequence("Seq2", "CAC-T");
+ SequenceI seq3 = new Sequence("Seq3", "C---G");
+ SequenceI seq4 = new Sequence("Seq4", "CA--t");
+ SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 };
+ int width = seq1.getLength();
+ ProfilesI profiles = AAFrequency.calculate(seqs, width, 0, width, true);
+
+ AlignmentAnnotation consensus = new AlignmentAnnotation("Consensus",
+ "PID", new Annotation[width]);
+ AAFrequency.completeConsensus(consensus, profiles, 0, 5, true, false,
+ 4);
+
+ Annotation ann = consensus.annotations[0];
+ assertEquals("C 100%", ann.description);
+ assertEquals("C", ann.displayCharacter);
+ ann = consensus.annotations[1];
+ assertEquals("A 100%", ann.description);
+ assertEquals("A", ann.displayCharacter);
+ ann = consensus.annotations[2];
+ assertEquals("[CG] 50%", ann.description);
+ assertEquals("+", ann.displayCharacter);
+ ann = consensus.annotations[3];
+ assertEquals("", ann.description);
+ assertEquals("-", ann.displayCharacter);
+ ann = consensus.annotations[4];
+ assertEquals("T 75%", ann.description);
+ assertEquals("T", ann.displayCharacter);
+ }
+
+ /**
+ * Test to include rounding down of a non-zero count to 0% (JAL-3202)
+ */
+ @Test(groups = { "Functional" })
+ public void testExtractProfile()
+ {
+ /*
+ * 200 sequences of which 30 gapped (170 ungapped)
+ * max count 70 for modal residue 'G'
+ */
+ ProfileI profile = new Profile(200, 30, 70, "G");
+ ResidueCount counts = new ResidueCount();
+ counts.put('G', 70);
+ counts.put('R', 60);
+ counts.put('L', 38);
+ counts.put('H', 2);
+ profile.setCounts(counts);
+
+ /*
+ * [0, noOfValues, totalPercent, char1, count1, ...]
+ * G: 70/170 = 41.2 = 41
+ * R: 60/170 = 35.3 = 35
+ * L: 38/170 = 22.3 = 22
+ * H: 2/170 = 1
+ * total (rounded) percentages = 99
+ */
+ int[] extracted = AAFrequency.extractProfile(profile, true);
+ int[] expected = new int[] { 0, 4, 99, 'G', 41, 'R', 35, 'L', 22, 'H',
+ 1 };
+ org.testng.Assert.assertEquals(extracted, expected);
+
+ /*
+ * add some counts of 1; these round down to 0% and should be discarded
+ */
+ counts.put('G', 68); // 68/170 = 40% exactly (percentages now total 98)
+ counts.put('Q', 1);
+ counts.put('K', 1);
+ extracted = AAFrequency.extractProfile(profile, true);
+ expected = new int[] { 0, 4, 98, 'G', 40, 'R', 35, 'L', 22, 'H', 1 };
+ org.testng.Assert.assertEquals(extracted, expected);
+
+ }
+
+ /**
+ * Tests for the profile calculation where gaps are included i.e. the
+ * denominator is the total number of sequences in the column
+ */
@Test(groups = { "Functional" })
- public void testGetPercentageFormat()
+ public void testExtractProfile_countGaps()
{
- assertNull(AAFrequency.getPercentageFormat(0));
- assertNull(AAFrequency.getPercentageFormat(99));
- assertEquals("%3.1f", AAFrequency.getPercentageFormat(100).toString());
- assertEquals("%3.1f", AAFrequency.getPercentageFormat(999).toString());
- assertEquals("%3.2f", AAFrequency.getPercentageFormat(1000).toString());
- assertEquals("%3.2f", AAFrequency.getPercentageFormat(9999).toString());
+ /*
+ * 200 sequences of which 30 gapped (170 ungapped)
+ * max count 70 for modal residue 'G'
+ */
+ ProfileI profile = new Profile(200, 30, 70, "G");
+ ResidueCount counts = new ResidueCount();
+ counts.put('G', 70);
+ counts.put('R', 60);
+ counts.put('L', 38);
+ counts.put('H', 2);
+ profile.setCounts(counts);
+
+ /*
+ * [0, noOfValues, totalPercent, char1, count1, ...]
+ * G: 70/200 = 35%
+ * R: 60/200 = 30%
+ * L: 38/200 = 19%
+ * H: 2/200 = 1%
+ * total (rounded) percentages = 85
+ */
+ int[] extracted = AAFrequency.extractProfile(profile, false);
+ int[] expected = new int[] { AlignmentAnnotation.SEQUENCE_PROFILE, 4,
+ 85, 'G', 35, 'R', 30, 'L', 19, 'H', 1 };
+ org.testng.Assert.assertEquals(extracted, expected);
+
+ /*
+ * add some counts of 1; these round down to 0% and should be discarded
+ */
+ counts.put('G', 68); // 68/200 = 34%
+ counts.put('Q', 1);
+ counts.put('K', 1);
+ extracted = AAFrequency.extractProfile(profile, false);
+ expected = new int[] { AlignmentAnnotation.SEQUENCE_PROFILE, 4, 84, 'G',
+ 34, 'R', 30, 'L', 19, 'H', 1 };
+ org.testng.Assert.assertEquals(extracted, expected);
+
+ }
+
+ @Test(groups = { "Functional" })
+ public void testExtractCdnaProfile()
+ {
+ /*
+ * 200 sequences of which 30 gapped (170 ungapped)
+ * max count 70 for modal residue 'G'
+ */
+ Hashtable profile = new Hashtable();
+
+ /*
+ * cdna profile is {seqCount, ungappedCount, codonCount1, ...codonCount64}
+ * where 1..64 positions correspond to encoded codons
+ * see CodingUtils.encodeCodon()
+ */
+ int[] codonCounts = new int[66];
+ char[] codon1 = new char[] { 'G', 'C', 'A' };
+ char[] codon2 = new char[] { 'c', 'C', 'A' };
+ char[] codon3 = new char[] { 't', 'g', 'A' };
+ char[] codon4 = new char[] { 'G', 'C', 't' };
+ int encoded1 = CodingUtils.encodeCodon(codon1);
+ int encoded2 = CodingUtils.encodeCodon(codon2);
+ int encoded3 = CodingUtils.encodeCodon(codon3);
+ int encoded4 = CodingUtils.encodeCodon(codon4);
+ codonCounts[2 + encoded1] = 30;
+ codonCounts[2 + encoded2] = 70;
+ codonCounts[2 + encoded3] = 9;
+ codonCounts[2 + encoded4] = 1;
+ codonCounts[0] = 120;
+ codonCounts[1] = 110;
+ profile.put(AAFrequency.PROFILE, codonCounts);
+
+ /*
+ * [0, noOfValues, totalPercent, char1, count1, ...]
+ * codon1: 30/110 = 27.2 = 27%
+ * codon2: 70/110 = 63.6% = 63%
+ * codon3: 9/110 = 8.1% = 8%
+ * codon4: 1/110 = 0.9% = 0% should be discarded
+ * total (rounded) percentages = 98
+ */
+ int[] extracted = AAFrequency.extractCdnaProfile(profile, true);
+ int[] expected = new int[] { AlignmentAnnotation.CDNA_PROFILE, 3, 98,
+ encoded2, 63, encoded1, 27, encoded3, 8 };
+ org.testng.Assert.assertEquals(extracted, expected);
+ }
+
+ @Test(groups = { "Functional" })
+ public void testExtractCdnaProfile_countGaps()
+ {
+ /*
+ * 200 sequences of which 30 gapped (170 ungapped)
+ * max count 70 for modal residue 'G'
+ */
+ Hashtable profile = new Hashtable();
+
+ /*
+ * cdna profile is {seqCount, ungappedCount, codonCount1, ...codonCount64}
+ * where 1..64 positions correspond to encoded codons
+ * see CodingUtils.encodeCodon()
+ */
+ int[] codonCounts = new int[66];
+ char[] codon1 = new char[] { 'G', 'C', 'A' };
+ char[] codon2 = new char[] { 'c', 'C', 'A' };
+ char[] codon3 = new char[] { 't', 'g', 'A' };
+ char[] codon4 = new char[] { 'G', 'C', 't' };
+ int encoded1 = CodingUtils.encodeCodon(codon1);
+ int encoded2 = CodingUtils.encodeCodon(codon2);
+ int encoded3 = CodingUtils.encodeCodon(codon3);
+ int encoded4 = CodingUtils.encodeCodon(codon4);
+ codonCounts[2 + encoded1] = 30;
+ codonCounts[2 + encoded2] = 70;
+ codonCounts[2 + encoded3] = 9;
+ codonCounts[2 + encoded4] = 1;
+ codonCounts[0] = 120;
+ codonCounts[1] = 110;
+ profile.put(AAFrequency.PROFILE, codonCounts);
+
+ /*
+ * [0, noOfValues, totalPercent, char1, count1, ...]
+ * codon1: 30/120 = 25%
+ * codon2: 70/120 = 58.3 = 58%
+ * codon3: 9/120 = 7.5 = 7%
+ * codon4: 1/120 = 0.8 = 0% should be discarded
+ * total (rounded) percentages = 90
+ */
+ int[] extracted = AAFrequency.extractCdnaProfile(profile, false);
+ int[] expected = new int[] { AlignmentAnnotation.CDNA_PROFILE, 3, 90,
+ encoded2, 58, encoded1, 25, encoded3, 7 };
+ org.testng.Assert.assertEquals(extracted, expected);
}
}