2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertNull;
26 import jalview.datamodel.AlignmentAnnotation;
27 import jalview.datamodel.Annotation;
28 import jalview.datamodel.HiddenMarkovModel;
29 import jalview.datamodel.Profile;
30 import jalview.datamodel.ProfileI;
31 import jalview.datamodel.Profiles;
32 import jalview.datamodel.ProfilesI;
33 import jalview.datamodel.ResidueCount;
34 import jalview.datamodel.Sequence;
35 import jalview.datamodel.SequenceI;
36 import jalview.gui.JvOptionPane;
37 import jalview.io.DataSourceType;
38 import jalview.io.FileParse;
39 import jalview.io.HMMFile;
41 import java.io.IOException;
42 import java.net.MalformedURLException;
44 import java.util.Hashtable;
46 import org.testng.annotations.BeforeClass;
47 import org.testng.annotations.Test;
49 public class AAFrequencyTest
51 HiddenMarkovModel hmm;
53 @BeforeClass(alwaysRun = true)
54 public void setUpJvOptionPane()
56 JvOptionPane.setInteractiveMode(false);
57 JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
60 @BeforeClass(alwaysRun = true)
61 public void setUp() throws IOException, MalformedURLException
64 * load a dna (ACGT) HMM file to a HiddenMarkovModel
66 HMMFile hmmFile = new HMMFile(new FileParse(
67 "test/jalview/io/test_MADE1_hmm.txt", DataSourceType.FILE));
68 hmm = hmmFile.getHMM();
71 @Test(groups = { "Functional" })
72 public void testCalculate_noProfile()
74 SequenceI seq1 = new Sequence("Seq1", "CAG-T");
75 SequenceI seq2 = new Sequence("Seq2", "CAC-T");
76 SequenceI seq3 = new Sequence("Seq3", "C---G");
77 SequenceI seq4 = new Sequence("Seq4", "CA--t");
78 SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 };
79 int width = seq1.getLength();
80 ProfilesI result = AAFrequency.calculate(seqs, width, 0, width, false);
83 ProfileI col = result.get(0);
84 assertEquals(100f, col.getPercentageIdentity(false));
85 assertEquals(100f, col.getPercentageIdentity(true));
86 assertEquals(4, col.getMaxCount());
87 assertEquals("C", col.getModalResidue());
88 assertNull(col.getCounts());
92 assertEquals(75f, col.getPercentageIdentity(false));
93 assertEquals(100f, col.getPercentageIdentity(true));
94 assertEquals(3, col.getMaxCount());
95 assertEquals("A", col.getModalResidue());
97 // col 2 is 50% G 50% C or 25/25 counting gaps
99 assertEquals(25f, col.getPercentageIdentity(false));
100 assertEquals(50f, col.getPercentageIdentity(true));
101 assertEquals(1, col.getMaxCount());
102 assertEquals("CG", col.getModalResidue());
106 assertEquals(0f, col.getPercentageIdentity(false));
107 assertEquals(0f, col.getPercentageIdentity(true));
108 assertEquals(0, col.getMaxCount());
109 assertEquals("", col.getModalResidue());
111 // col 4 is 75% T 25% G
113 assertEquals(75f, col.getPercentageIdentity(false));
114 assertEquals(75f, col.getPercentageIdentity(true));
115 assertEquals(3, col.getMaxCount());
116 assertEquals("T", col.getModalResidue());
119 @Test(groups = { "Functional" })
120 public void testCalculate_withProfile()
122 SequenceI seq1 = new Sequence("Seq1", "CAGT");
123 SequenceI seq2 = new Sequence("Seq2", "CACT");
124 SequenceI seq3 = new Sequence("Seq3", "C--G");
125 SequenceI seq4 = new Sequence("Seq4", "CA-t");
126 SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 };
127 int width = seq1.getLength();
128 ProfilesI result = AAFrequency.calculate(seqs, width, 0, width, true);
130 ProfileI profile = result.get(0);
131 assertEquals(4, profile.getCounts().getCount('C'));
132 assertEquals(4, profile.getHeight());
133 assertEquals(4, profile.getNonGapped());
135 profile = result.get(1);
136 assertEquals(3, profile.getCounts().getCount('A'));
137 assertEquals(4, profile.getHeight());
138 assertEquals(3, profile.getNonGapped());
140 profile = result.get(2);
141 assertEquals(1, profile.getCounts().getCount('C'));
142 assertEquals(1, profile.getCounts().getCount('G'));
143 assertEquals(4, profile.getHeight());
144 assertEquals(2, profile.getNonGapped());
146 profile = result.get(3);
147 assertEquals(3, profile.getCounts().getCount('T'));
148 assertEquals(1, profile.getCounts().getCount('G'));
149 assertEquals(4, profile.getHeight());
150 assertEquals(4, profile.getNonGapped());
153 @Test(groups = { "Functional" }, enabled = false)
154 public void testCalculate_withProfileTiming()
156 SequenceI seq1 = new Sequence("Seq1", "CAGT");
157 SequenceI seq2 = new Sequence("Seq2", "CACT");
158 SequenceI seq3 = new Sequence("Seq3", "C--G");
159 SequenceI seq4 = new Sequence("Seq4", "CA-t");
160 SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 };
162 // ensure class loaded and initialised
163 int width = seq1.getLength();
164 AAFrequency.calculate(seqs, width, 0, width, true);
167 long start = System.currentTimeMillis();
168 for (int i = 0; i < reps; i++)
170 AAFrequency.calculate(seqs, width, 0, width, true);
172 System.out.println(System.currentTimeMillis() - start);
176 * Test generation of consensus annotation with options 'include gaps'
177 * (profile percentages are of all sequences, whether gapped or not), and
178 * 'show logo' (the full profile with all residue percentages is reported in
179 * the description for the tooltip)
181 @Test(groups = { "Functional" })
182 public void testCompleteConsensus_includeGaps_showLogo()
185 * first compute the profiles
187 SequenceI seq1 = new Sequence("Seq1", "CAG-T");
188 SequenceI seq2 = new Sequence("Seq2", "CAC-T");
189 SequenceI seq3 = new Sequence("Seq3", "C---G");
190 SequenceI seq4 = new Sequence("Seq4", "CA--t");
191 SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 };
192 int width = seq1.getLength();
193 ProfilesI profiles = AAFrequency.calculate(seqs, width, 0, width, true);
195 AlignmentAnnotation consensus = new AlignmentAnnotation("Consensus",
196 "PID", new Annotation[width]);
197 AAFrequency.completeConsensus(consensus, profiles, 0, 5, false, true,
200 Annotation ann = consensus.annotations[0];
201 assertEquals("C 100%", ann.description);
202 assertEquals("C", ann.displayCharacter);
203 ann = consensus.annotations[1];
204 assertEquals("A 75%", ann.description);
205 assertEquals("A", ann.displayCharacter);
206 ann = consensus.annotations[2];
207 assertEquals("C 25%; G 25%", ann.description);
208 assertEquals("+", ann.displayCharacter);
209 ann = consensus.annotations[3];
210 assertEquals("", ann.description);
211 assertEquals("-", ann.displayCharacter);
212 ann = consensus.annotations[4];
213 assertEquals("T 75%; G 25%", ann.description);
214 assertEquals("T", ann.displayCharacter);
218 * Test generation of consensus annotation with options 'ignore gaps' (profile
219 * percentages are of the non-gapped sequences) and 'no logo' (only the modal
220 * residue[s] percentage is reported in the description for the tooltip)
222 @Test(groups = { "Functional" })
223 public void testCompleteConsensus_ignoreGaps_noLogo()
226 * first compute the profiles
228 SequenceI seq1 = new Sequence("Seq1", "CAG-T");
229 SequenceI seq2 = new Sequence("Seq2", "CAC-T");
230 SequenceI seq3 = new Sequence("Seq3", "C---G");
231 SequenceI seq4 = new Sequence("Seq4", "CA--t");
232 SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 };
233 int width = seq1.getLength();
234 ProfilesI profiles = AAFrequency.calculate(seqs, width, 0, width, true);
236 AlignmentAnnotation consensus = new AlignmentAnnotation("Consensus",
237 "PID", new Annotation[width]);
238 AAFrequency.completeConsensus(consensus, profiles, 0, 5, true, false,
241 Annotation ann = consensus.annotations[0];
242 assertEquals("C 100%", ann.description);
243 assertEquals("C", ann.displayCharacter);
244 ann = consensus.annotations[1];
245 assertEquals("A 100%", ann.description);
246 assertEquals("A", ann.displayCharacter);
247 ann = consensus.annotations[2];
248 assertEquals("[CG] 50%", ann.description);
249 assertEquals("+", ann.displayCharacter);
250 ann = consensus.annotations[3];
251 assertEquals("", ann.description);
252 assertEquals("-", ann.displayCharacter);
253 ann = consensus.annotations[4];
254 assertEquals("T 75%", ann.description);
255 assertEquals("T", ann.displayCharacter);
259 * Test to include rounding down of a non-zero count to 0% (JAL-3202)
261 @Test(groups = { "Functional" })
262 public void testExtractProfile()
265 * 200 sequences of which 30 gapped (170 ungapped)
266 * max count 70 for modal residue 'G'
268 ProfileI profile = new Profile(200, 30, 70, "G");
269 ResidueCount counts = new ResidueCount();
274 profile.setCounts(counts);
277 * [0, noOfValues, totalPercent, char1, count1, ...]
278 * G: 70/170 = 41.2 = 41
279 * R: 60/170 = 35.3 = 35
280 * L: 38/170 = 22.3 = 22
282 * total (rounded) percentages = 99
284 int[] extracted = AAFrequency.extractProfile(profile, true);
285 int[] expected = new int[] { 0, 4, 99, 'G', 41, 'R', 35, 'L', 22, 'H',
287 org.testng.Assert.assertEquals(extracted, expected);
290 * add some counts of 1; these round down to 0% and should be discarded
292 counts.put('G', 68); // 68/170 = 40% exactly (percentages now total 98)
295 extracted = AAFrequency.extractProfile(profile, true);
296 expected = new int[] { 0, 4, 98, 'G', 40, 'R', 35, 'L', 22, 'H', 1 };
297 org.testng.Assert.assertEquals(extracted, expected);
302 * Tests for the profile calculation where gaps are included i.e. the
303 * denominator is the total number of sequences in the column
305 @Test(groups = { "Functional" })
306 public void testExtractProfile_countGaps()
309 * 200 sequences of which 30 gapped (170 ungapped)
310 * max count 70 for modal residue 'G'
312 ProfileI profile = new Profile(200, 30, 70, "G");
313 ResidueCount counts = new ResidueCount();
318 profile.setCounts(counts);
321 * [0, noOfValues, totalPercent, char1, count1, ...]
326 * total (rounded) percentages = 85
328 int[] extracted = AAFrequency.extractProfile(profile, false);
329 int[] expected = new int[] { AlignmentAnnotation.SEQUENCE_PROFILE, 4,
330 85, 'G', 35, 'R', 30, 'L', 19, 'H', 1 };
331 org.testng.Assert.assertEquals(extracted, expected);
334 * add some counts of 1; these round down to 0% and should be discarded
336 counts.put('G', 68); // 68/200 = 34%
339 extracted = AAFrequency.extractProfile(profile, false);
340 expected = new int[] { AlignmentAnnotation.SEQUENCE_PROFILE, 4, 84, 'G',
341 34, 'R', 30, 'L', 19, 'H', 1 };
342 org.testng.Assert.assertEquals(extracted, expected);
346 @Test(groups = { "Functional" })
347 public void testExtractCdnaProfile()
350 * 200 sequences of which 30 gapped (170 ungapped)
351 * max count 70 for modal residue 'G'
353 Hashtable profile = new Hashtable();
356 * cdna profile is {seqCount, ungappedCount, codonCount1, ...codonCount64}
357 * where 1..64 positions correspond to encoded codons
358 * see CodingUtils.encodeCodon()
360 int[] codonCounts = new int[66];
361 char[] codon1 = new char[] { 'G', 'C', 'A' };
362 char[] codon2 = new char[] { 'c', 'C', 'A' };
363 char[] codon3 = new char[] { 't', 'g', 'A' };
364 char[] codon4 = new char[] { 'G', 'C', 't' };
365 int encoded1 = CodingUtils.encodeCodon(codon1);
366 int encoded2 = CodingUtils.encodeCodon(codon2);
367 int encoded3 = CodingUtils.encodeCodon(codon3);
368 int encoded4 = CodingUtils.encodeCodon(codon4);
369 codonCounts[2 + encoded1] = 30;
370 codonCounts[2 + encoded2] = 70;
371 codonCounts[2 + encoded3] = 9;
372 codonCounts[2 + encoded4] = 1;
373 codonCounts[0] = 120;
374 codonCounts[1] = 110;
375 profile.put(AAFrequency.PROFILE, codonCounts);
378 * [0, noOfValues, totalPercent, char1, count1, ...]
379 * codon1: 30/110 = 27.2 = 27%
380 * codon2: 70/110 = 63.6% = 63%
381 * codon3: 9/110 = 8.1% = 8%
382 * codon4: 1/110 = 0.9% = 0% should be discarded
383 * total (rounded) percentages = 98
385 int[] extracted = AAFrequency.extractCdnaProfile(profile, true);
386 int[] expected = new int[] { AlignmentAnnotation.CDNA_PROFILE, 3, 98,
387 encoded2, 63, encoded1, 27, encoded3, 8 };
388 org.testng.Assert.assertEquals(extracted, expected);
391 @Test(groups = { "Functional" })
392 public void testExtractCdnaProfile_countGaps()
395 * 200 sequences of which 30 gapped (170 ungapped)
396 * max count 70 for modal residue 'G'
398 Hashtable profile = new Hashtable();
401 * cdna profile is {seqCount, ungappedCount, codonCount1, ...codonCount64}
402 * where 1..64 positions correspond to encoded codons
403 * see CodingUtils.encodeCodon()
405 int[] codonCounts = new int[66];
406 char[] codon1 = new char[] { 'G', 'C', 'A' };
407 char[] codon2 = new char[] { 'c', 'C', 'A' };
408 char[] codon3 = new char[] { 't', 'g', 'A' };
409 char[] codon4 = new char[] { 'G', 'C', 't' };
410 int encoded1 = CodingUtils.encodeCodon(codon1);
411 int encoded2 = CodingUtils.encodeCodon(codon2);
412 int encoded3 = CodingUtils.encodeCodon(codon3);
413 int encoded4 = CodingUtils.encodeCodon(codon4);
414 codonCounts[2 + encoded1] = 30;
415 codonCounts[2 + encoded2] = 70;
416 codonCounts[2 + encoded3] = 9;
417 codonCounts[2 + encoded4] = 1;
418 codonCounts[0] = 120;
419 codonCounts[1] = 110;
420 profile.put(AAFrequency.PROFILE, codonCounts);
423 * [0, noOfValues, totalPercent, char1, count1, ...]
424 * codon1: 30/120 = 25%
425 * codon2: 70/120 = 58.3 = 58%
426 * codon3: 9/120 = 7.5 = 7%
427 * codon4: 1/120 = 0.8 = 0% should be discarded
428 * total (rounded) percentages = 90
430 int[] extracted = AAFrequency.extractCdnaProfile(profile, false);
431 int[] expected = new int[] { AlignmentAnnotation.CDNA_PROFILE, 3, 90,
432 encoded2, 58, encoded1, 25, encoded3, 7 };
433 org.testng.Assert.assertEquals(extracted, expected);
436 @Test(groups = { "Functional" })
437 public void testExtractHMMProfile()
438 throws MalformedURLException, IOException
440 int[] expected = { 0, 4, 100, 'T', 71, 'C', 12, 'G', 9, 'A', 9 };
441 int[] actual = AAFrequency.extractHMMProfile(hmm, 17, false, false);
442 for (int i = 0; i < actual.length; i++)
446 assertEquals(actual[i], expected[i]);
450 assertEquals(actual[i], expected[i]);
454 int[] expected2 = { 0, 4, 100, 'A', 85, 'C', 0, 'G', 0, 'T', 0 };
455 int[] actual2 = AAFrequency.extractHMMProfile(hmm, 2, true, false);
456 for (int i = 0; i < actual2.length; i++)
460 assertEquals(actual[i], expected[i]);
464 assertEquals(actual[i], expected[i]);
468 assertNull(AAFrequency.extractHMMProfile(null, 98978867, true, false));
471 @Test(groups = { "Functional" })
472 public void testGetAnalogueCount()
475 * 'T' in column 0 has emission probability 0.7859, scales to 7859
477 int count = AAFrequency.getAnalogueCount(hmm, 0, 'T', false, false);
478 assertEquals(7859, count);
481 * same with 'use info height': value is multiplied by log ratio
482 * log(value / background) / log(2) = log(0.7859/0.25)/0.693
483 * = log(3.1)/0.693 = 1.145/0.693 = 1.66
484 * so value becomes 1.2987 and scales up to 12987
486 count = AAFrequency.getAnalogueCount(hmm, 0, 'T', false, true);
487 assertEquals(12987, count);
490 * 'G' in column 20 has emission probability 0.75457, scales to 7546
492 count = AAFrequency.getAnalogueCount(hmm, 20, 'G', false, false);
493 assertEquals(7546, count);
496 * 'G' in column 1077 has emission probability 0.0533, here
497 * ignored (set to 0) since below background of 0.25
499 count = AAFrequency.getAnalogueCount(hmm, 1077, 'G', true, false);
500 assertEquals(0, count);
503 @Test(groups = { "Functional" })
504 public void testCompleteInformation()
506 ProfileI prof1 = new Profile(1, 0, 100, "A");
507 ProfileI prof2 = new Profile(1, 0, 100, "-");
509 ProfilesI profs = new Profiles(new ProfileI[] { prof1, prof2 });
510 Annotation ann1 = new Annotation(6.5f);
511 Annotation ann2 = new Annotation(0f);
512 Annotation[] annots = new Annotation[] { ann1, ann2 };
513 SequenceI seq = new Sequence("", "AA", 0, 0);
515 AlignmentAnnotation annot = new AlignmentAnnotation("", "", annots);
516 annot.setSequenceRef(seq);
517 AAFrequency.completeInformation(annot, profs, 0, 1);
518 float ic = annot.annotations[0].value;
519 assertEquals(0.91532f, ic, 0.0001f);
520 ic = annot.annotations[1].value;
521 assertEquals(0f, ic, 0.0001f);