2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertNull;
26 import jalview.datamodel.AlignmentAnnotation;
27 import jalview.datamodel.Annotation;
28 import jalview.datamodel.Profile;
29 import jalview.datamodel.ProfileI;
30 import jalview.datamodel.ProfilesI;
31 import jalview.datamodel.ResidueCount;
32 import jalview.datamodel.Sequence;
33 import jalview.datamodel.SequenceI;
34 import jalview.gui.JvOptionPane;
36 import java.util.Hashtable;
38 import org.testng.annotations.BeforeClass;
39 import org.testng.annotations.Test;
41 public class AAFrequencyTest
44 @BeforeClass(alwaysRun = true)
45 public void setUpJvOptionPane()
47 JvOptionPane.setInteractiveMode(false);
48 JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
51 @Test(groups = { "Functional" })
52 public void testCalculate_noProfile()
54 SequenceI seq1 = new Sequence("Seq1", "CAG-T");
55 SequenceI seq2 = new Sequence("Seq2", "CAC-T");
56 SequenceI seq3 = new Sequence("Seq3", "C---G");
57 SequenceI seq4 = new Sequence("Seq4", "CA--t");
58 SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 };
59 int width = seq1.getLength();
60 ProfilesI result = AAFrequency.calculate(seqs, width, 0, width, false);
63 ProfileI col = result.get(0);
64 assertEquals(100f, col.getPercentageIdentity(false));
65 assertEquals(100f, col.getPercentageIdentity(true));
66 assertEquals(4, col.getMaxCount());
67 assertEquals("C", col.getModalResidue());
68 assertNull(col.getCounts());
72 assertEquals(75f, col.getPercentageIdentity(false));
73 assertEquals(100f, col.getPercentageIdentity(true));
74 assertEquals(3, col.getMaxCount());
75 assertEquals("A", col.getModalResidue());
77 // col 2 is 50% G 50% C or 25/25 counting gaps
79 assertEquals(25f, col.getPercentageIdentity(false));
80 assertEquals(50f, col.getPercentageIdentity(true));
81 assertEquals(1, col.getMaxCount());
82 assertEquals("CG", col.getModalResidue());
86 assertEquals(0f, col.getPercentageIdentity(false));
87 assertEquals(0f, col.getPercentageIdentity(true));
88 assertEquals(0, col.getMaxCount());
89 assertEquals("", col.getModalResidue());
91 // col 4 is 75% T 25% G
93 assertEquals(75f, col.getPercentageIdentity(false));
94 assertEquals(75f, col.getPercentageIdentity(true));
95 assertEquals(3, col.getMaxCount());
96 assertEquals("T", col.getModalResidue());
99 @Test(groups = { "Functional" })
100 public void testCalculate_withProfile()
102 SequenceI seq1 = new Sequence("Seq1", "CAGT");
103 SequenceI seq2 = new Sequence("Seq2", "CACT");
104 SequenceI seq3 = new Sequence("Seq3", "C--G");
105 SequenceI seq4 = new Sequence("Seq4", "CA-t");
106 SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 };
107 int width = seq1.getLength();
108 ProfilesI result = AAFrequency.calculate(seqs, width, 0, width, true);
110 ProfileI profile = result.get(0);
111 assertEquals(4, profile.getCounts().getCount('C'));
112 assertEquals(4, profile.getHeight());
113 assertEquals(4, profile.getNonGapped());
115 profile = result.get(1);
116 assertEquals(3, profile.getCounts().getCount('A'));
117 assertEquals(4, profile.getHeight());
118 assertEquals(3, profile.getNonGapped());
120 profile = result.get(2);
121 assertEquals(1, profile.getCounts().getCount('C'));
122 assertEquals(1, profile.getCounts().getCount('G'));
123 assertEquals(4, profile.getHeight());
124 assertEquals(2, profile.getNonGapped());
126 profile = result.get(3);
127 assertEquals(3, profile.getCounts().getCount('T'));
128 assertEquals(1, profile.getCounts().getCount('G'));
129 assertEquals(4, profile.getHeight());
130 assertEquals(4, profile.getNonGapped());
133 @Test(groups = { "Functional" }, enabled = false)
134 public void testCalculate_withProfileTiming()
136 SequenceI seq1 = new Sequence("Seq1", "CAGT");
137 SequenceI seq2 = new Sequence("Seq2", "CACT");
138 SequenceI seq3 = new Sequence("Seq3", "C--G");
139 SequenceI seq4 = new Sequence("Seq4", "CA-t");
140 SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 };
142 // ensure class loaded and initialised
143 int width = seq1.getLength();
144 AAFrequency.calculate(seqs, width, 0, width, true);
147 long start = System.currentTimeMillis();
148 for (int i = 0; i < reps; i++)
150 AAFrequency.calculate(seqs, width, 0, width, true);
152 System.out.println(System.currentTimeMillis() - start);
156 * Test generation of consensus annotation with options 'include gaps'
157 * (profile percentages are of all sequences, whether gapped or not), and
158 * 'show logo' (the full profile with all residue percentages is reported in
159 * the description for the tooltip)
161 @Test(groups = { "Functional" })
162 public void testCompleteConsensus_includeGaps_showLogo()
165 * first compute the profiles
167 SequenceI seq1 = new Sequence("Seq1", "CAG-T");
168 SequenceI seq2 = new Sequence("Seq2", "CAC-T");
169 SequenceI seq3 = new Sequence("Seq3", "C---G");
170 SequenceI seq4 = new Sequence("Seq4", "CA--t");
171 SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 };
172 int width = seq1.getLength();
173 ProfilesI profiles = AAFrequency.calculate(seqs, width, 0, width, true);
175 AlignmentAnnotation consensus = new AlignmentAnnotation("Consensus",
176 "PID", new Annotation[width]);
177 AAFrequency.completeConsensus(consensus, profiles, 0, 5, false, true,
180 Annotation ann = consensus.annotations[0];
181 assertEquals("C 100%", ann.description);
182 assertEquals("C", ann.displayCharacter);
183 ann = consensus.annotations[1];
184 assertEquals("A 75%", ann.description);
185 assertEquals("A", ann.displayCharacter);
186 ann = consensus.annotations[2];
187 assertEquals("C 25%; G 25%", ann.description);
188 assertEquals("+", ann.displayCharacter);
189 ann = consensus.annotations[3];
190 assertEquals("", ann.description);
191 assertEquals("-", ann.displayCharacter);
192 ann = consensus.annotations[4];
193 assertEquals("T 75%; G 25%", ann.description);
194 assertEquals("T", ann.displayCharacter);
198 * Test generation of consensus annotation with options 'ignore gaps' (profile
199 * percentages are of the non-gapped sequences) and 'no logo' (only the modal
200 * residue[s] percentage is reported in the description for the tooltip)
202 @Test(groups = { "Functional" })
203 public void testCompleteConsensus_ignoreGaps_noLogo()
206 * first compute the profiles
208 SequenceI seq1 = new Sequence("Seq1", "CAG-T");
209 SequenceI seq2 = new Sequence("Seq2", "CAC-T");
210 SequenceI seq3 = new Sequence("Seq3", "C---G");
211 SequenceI seq4 = new Sequence("Seq4", "CA--t");
212 SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 };
213 int width = seq1.getLength();
214 ProfilesI profiles = AAFrequency.calculate(seqs, width, 0, width, true);
216 AlignmentAnnotation consensus = new AlignmentAnnotation("Consensus",
217 "PID", new Annotation[width]);
218 AAFrequency.completeConsensus(consensus, profiles, 0, 5, true, false,
221 Annotation ann = consensus.annotations[0];
222 assertEquals("C 100%", ann.description);
223 assertEquals("C", ann.displayCharacter);
224 ann = consensus.annotations[1];
225 assertEquals("A 100%", ann.description);
226 assertEquals("A", ann.displayCharacter);
227 ann = consensus.annotations[2];
228 assertEquals("[CG] 50%", ann.description);
229 assertEquals("+", ann.displayCharacter);
230 ann = consensus.annotations[3];
231 assertEquals("", ann.description);
232 assertEquals("-", ann.displayCharacter);
233 ann = consensus.annotations[4];
234 assertEquals("T 75%", ann.description);
235 assertEquals("T", ann.displayCharacter);
239 * Test to include rounding down of a non-zero count to 0% (JAL-3202)
241 @Test(groups = { "Functional" })
242 public void testExtractProfile()
245 * 200 sequences of which 30 gapped (170 ungapped)
246 * max count 70 for modal residue 'G'
248 ProfileI profile = new Profile(200, 30, 70, "G");
249 ResidueCount counts = new ResidueCount();
254 profile.setCounts(counts);
257 * [0, noOfValues, totalPercent, char1, count1, ...]
258 * G: 70/170 = 41.2 = 41
259 * R: 60/170 = 35.3 = 35
260 * L: 38/170 = 22.3 = 22
262 * total (rounded) percentages = 99
264 int[] extracted = AAFrequency.extractProfile(profile, true);
265 int[] expected = new int[] { 0, 4, 99, 'G', 41, 'R', 35, 'L', 22, 'H',
267 org.testng.Assert.assertEquals(extracted, expected);
270 * add some counts of 1; these round down to 0% and should be discarded
272 counts.put('G', 68); // 68/170 = 40% exactly (percentages now total 98)
275 extracted = AAFrequency.extractProfile(profile, true);
276 expected = new int[] { 0, 4, 98, 'G', 40, 'R', 35, 'L', 22, 'H', 1 };
277 org.testng.Assert.assertEquals(extracted, expected);
282 * Tests for the profile calculation where gaps are included i.e. the
283 * denominator is the total number of sequences in the column
285 @Test(groups = { "Functional" })
286 public void testExtractProfile_countGaps()
289 * 200 sequences of which 30 gapped (170 ungapped)
290 * max count 70 for modal residue 'G'
292 ProfileI profile = new Profile(200, 30, 70, "G");
293 ResidueCount counts = new ResidueCount();
298 profile.setCounts(counts);
301 * [0, noOfValues, totalPercent, char1, count1, ...]
306 * total (rounded) percentages = 85
308 int[] extracted = AAFrequency.extractProfile(profile, false);
309 int[] expected = new int[] { AlignmentAnnotation.SEQUENCE_PROFILE, 4,
310 85, 'G', 35, 'R', 30, 'L', 19, 'H', 1 };
311 org.testng.Assert.assertEquals(extracted, expected);
314 * add some counts of 1; these round down to 0% and should be discarded
316 counts.put('G', 68); // 68/200 = 34%
319 extracted = AAFrequency.extractProfile(profile, false);
320 expected = new int[] { AlignmentAnnotation.SEQUENCE_PROFILE, 4, 84, 'G',
321 34, 'R', 30, 'L', 19, 'H', 1 };
322 org.testng.Assert.assertEquals(extracted, expected);
326 @Test(groups = { "Functional" })
327 public void testExtractCdnaProfile()
330 * 200 sequences of which 30 gapped (170 ungapped)
331 * max count 70 for modal residue 'G'
333 Hashtable profile = new Hashtable();
336 * cdna profile is {seqCount, ungappedCount, codonCount1, ...codonCount64}
337 * where 1..64 positions correspond to encoded codons
338 * see CodingUtils.encodeCodon()
340 int[] codonCounts = new int[66];
341 char[] codon1 = new char[] { 'G', 'C', 'A' };
342 char[] codon2 = new char[] { 'c', 'C', 'A' };
343 char[] codon3 = new char[] { 't', 'g', 'A' };
344 char[] codon4 = new char[] { 'G', 'C', 't' };
345 int encoded1 = CodingUtils.encodeCodon(codon1);
346 int encoded2 = CodingUtils.encodeCodon(codon2);
347 int encoded3 = CodingUtils.encodeCodon(codon3);
348 int encoded4 = CodingUtils.encodeCodon(codon4);
349 codonCounts[2 + encoded1] = 30;
350 codonCounts[2 + encoded2] = 70;
351 codonCounts[2 + encoded3] = 9;
352 codonCounts[2 + encoded4] = 1;
353 codonCounts[0] = 120;
354 codonCounts[1] = 110;
355 profile.put(AAFrequency.PROFILE, codonCounts);
358 * [0, noOfValues, totalPercent, char1, count1, ...]
359 * codon1: 30/110 = 27.2 = 27%
360 * codon2: 70/110 = 63.6% = 63%
361 * codon3: 9/110 = 8.1% = 8%
362 * codon4: 1/110 = 0.9% = 0% should be discarded
363 * total (rounded) percentages = 98
365 int[] extracted = AAFrequency.extractCdnaProfile(profile, true);
366 int[] expected = new int[] { AlignmentAnnotation.CDNA_PROFILE, 3, 98,
367 encoded2, 63, encoded1, 27, encoded3, 8 };
368 org.testng.Assert.assertEquals(extracted, expected);
371 @Test(groups = { "Functional" })
372 public void testExtractCdnaProfile_countGaps()
375 * 200 sequences of which 30 gapped (170 ungapped)
376 * max count 70 for modal residue 'G'
378 Hashtable profile = new Hashtable();
381 * cdna profile is {seqCount, ungappedCount, codonCount1, ...codonCount64}
382 * where 1..64 positions correspond to encoded codons
383 * see CodingUtils.encodeCodon()
385 int[] codonCounts = new int[66];
386 char[] codon1 = new char[] { 'G', 'C', 'A' };
387 char[] codon2 = new char[] { 'c', 'C', 'A' };
388 char[] codon3 = new char[] { 't', 'g', 'A' };
389 char[] codon4 = new char[] { 'G', 'C', 't' };
390 int encoded1 = CodingUtils.encodeCodon(codon1);
391 int encoded2 = CodingUtils.encodeCodon(codon2);
392 int encoded3 = CodingUtils.encodeCodon(codon3);
393 int encoded4 = CodingUtils.encodeCodon(codon4);
394 codonCounts[2 + encoded1] = 30;
395 codonCounts[2 + encoded2] = 70;
396 codonCounts[2 + encoded3] = 9;
397 codonCounts[2 + encoded4] = 1;
398 codonCounts[0] = 120;
399 codonCounts[1] = 110;
400 profile.put(AAFrequency.PROFILE, codonCounts);
403 * [0, noOfValues, totalPercent, char1, count1, ...]
404 * codon1: 30/120 = 25%
405 * codon2: 70/120 = 58.3 = 58%
406 * codon3: 9/120 = 7.5 = 7%
407 * codon4: 1/120 = 0.8 = 0% should be discarded
408 * total (rounded) percentages = 90
410 int[] extracted = AAFrequency.extractCdnaProfile(profile, false);
411 int[] expected = new int[] { AlignmentAnnotation.CDNA_PROFILE, 3, 90,
412 encoded2, 58, encoded1, 25, encoded3, 7 };
413 org.testng.Assert.assertEquals(extracted, expected);