2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertNull;
26 import jalview.datamodel.AlignmentAnnotation;
27 import jalview.datamodel.Annotation;
28 import jalview.datamodel.HiddenMarkovModel;
29 import jalview.datamodel.Profile;
30 import jalview.datamodel.ProfileI;
31 import jalview.datamodel.Profiles;
32 import jalview.datamodel.ProfilesI;
33 import jalview.datamodel.ResidueCount;
34 import jalview.datamodel.Sequence;
35 import jalview.datamodel.SequenceI;
36 import jalview.gui.JvOptionPane;
37 import jalview.io.DataSourceType;
38 import jalview.io.FileParse;
39 import jalview.io.HMMFile;
41 import java.io.IOException;
42 import java.net.MalformedURLException;
44 import java.util.Hashtable;
46 import org.testng.annotations.BeforeClass;
47 import org.testng.annotations.Test;
49 public class AAFrequencyTest
51 HiddenMarkovModel hmm;
53 @BeforeClass(alwaysRun = true)
54 public void setUpJvOptionPane()
56 JvOptionPane.setInteractiveMode(false);
57 JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
60 @BeforeClass(alwaysRun = true)
61 public void setUp() throws IOException, MalformedURLException
64 * load a dna (ACGT) HMM file to a HiddenMarkovModel
66 HMMFile hmmFile = new HMMFile(new FileParse(
67 "test/jalview/io/test_MADE1_hmm.txt", DataSourceType.FILE));
68 hmm = hmmFile.getHMM();
71 @Test(groups = { "Functional" })
72 public void testCalculate_noProfile()
74 SequenceI seq1 = new Sequence("Seq1", "CAG-T");
75 SequenceI seq2 = new Sequence("Seq2", "CAC-T");
76 SequenceI seq3 = new Sequence("Seq3", "C---G");
77 SequenceI seq4 = new Sequence("Seq4", "CA--t");
78 SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 };
79 int width = seq1.getLength();
80 ProfilesI result = AAFrequency.calculate(seqs, width, 0, width,
84 ProfileI col = result.get(0);
85 assertEquals(100f, col.getPercentageIdentity(false));
86 assertEquals(100f, col.getPercentageIdentity(true));
87 assertEquals(4, col.getMaxCount());
88 assertEquals("C", col.getModalResidue());
89 assertNull(col.getCounts());
93 assertEquals(75f, col.getPercentageIdentity(false));
94 assertEquals(100f, col.getPercentageIdentity(true));
95 assertEquals(3, col.getMaxCount());
96 assertEquals("A", col.getModalResidue());
98 // col 2 is 50% G 50% C or 25/25 counting gaps
100 assertEquals(25f, col.getPercentageIdentity(false));
101 assertEquals(50f, col.getPercentageIdentity(true));
102 assertEquals(1, col.getMaxCount());
103 assertEquals("CG", col.getModalResidue());
107 assertEquals(0f, col.getPercentageIdentity(false));
108 assertEquals(0f, col.getPercentageIdentity(true));
109 assertEquals(0, col.getMaxCount());
110 assertEquals("", col.getModalResidue());
112 // col 4 is 75% T 25% G
114 assertEquals(75f, col.getPercentageIdentity(false));
115 assertEquals(75f, col.getPercentageIdentity(true));
116 assertEquals(3, col.getMaxCount());
117 assertEquals("T", col.getModalResidue());
120 @Test(groups = { "Functional" })
121 public void testCalculate_withProfile()
123 SequenceI seq1 = new Sequence("Seq1", "CAGT");
124 SequenceI seq2 = new Sequence("Seq2", "CACT");
125 SequenceI seq3 = new Sequence("Seq3", "C--G");
126 SequenceI seq4 = new Sequence("Seq4", "CA-t");
127 SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 };
128 int width = seq1.getLength();
129 ProfilesI result = AAFrequency.calculate(seqs, width, 0, width,
132 ProfileI profile = result.get(0);
133 assertEquals(4, profile.getCounts().getCount('C'));
134 assertEquals(4, profile.getHeight());
135 assertEquals(4, profile.getNonGapped());
137 profile = result.get(1);
138 assertEquals(3, profile.getCounts().getCount('A'));
139 assertEquals(4, profile.getHeight());
140 assertEquals(3, profile.getNonGapped());
142 profile = result.get(2);
143 assertEquals(1, profile.getCounts().getCount('C'));
144 assertEquals(1, profile.getCounts().getCount('G'));
145 assertEquals(4, profile.getHeight());
146 assertEquals(2, profile.getNonGapped());
148 profile = result.get(3);
149 assertEquals(3, profile.getCounts().getCount('T'));
150 assertEquals(1, profile.getCounts().getCount('G'));
151 assertEquals(4, profile.getHeight());
152 assertEquals(4, profile.getNonGapped());
155 @Test(groups = { "Functional" }, enabled = false)
156 public void testCalculate_withProfileTiming()
158 SequenceI seq1 = new Sequence("Seq1", "CAGT");
159 SequenceI seq2 = new Sequence("Seq2", "CACT");
160 SequenceI seq3 = new Sequence("Seq3", "C--G");
161 SequenceI seq4 = new Sequence("Seq4", "CA-t");
162 SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 };
164 // ensure class loaded and initialised
165 int width = seq1.getLength();
166 AAFrequency.calculate(seqs, width, 0, width, true);
169 long start = System.currentTimeMillis();
170 for (int i = 0; i < reps; i++)
172 AAFrequency.calculate(seqs, width, 0, width, true);
174 System.out.println(System.currentTimeMillis() - start);
178 * Test generation of consensus annotation with options 'include gaps'
179 * (profile percentages are of all sequences, whether gapped or not), and
180 * 'show logo' (the full profile with all residue percentages is reported in
181 * the description for the tooltip)
183 @Test(groups = { "Functional" })
184 public void testCompleteConsensus_includeGaps_showLogo()
187 * first compute the profiles
189 SequenceI seq1 = new Sequence("Seq1", "CAG-T");
190 SequenceI seq2 = new Sequence("Seq2", "CAC-T");
191 SequenceI seq3 = new Sequence("Seq3", "C---G");
192 SequenceI seq4 = new Sequence("Seq4", "CA--t");
193 SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 };
194 int width = seq1.getLength();
195 ProfilesI profiles = AAFrequency.calculate(seqs, width, 0, width, true);
197 AlignmentAnnotation consensus = new AlignmentAnnotation("Consensus",
198 "PID", new Annotation[width]);
200 .completeConsensus(consensus, profiles, 0, 5, false, true, 4);
202 Annotation ann = consensus.annotations[0];
203 assertEquals("C 100%", ann.description);
204 assertEquals("C", ann.displayCharacter);
205 ann = consensus.annotations[1];
206 assertEquals("A 75%", ann.description);
207 assertEquals("A", ann.displayCharacter);
208 ann = consensus.annotations[2];
209 assertEquals("C 25%; G 25%", ann.description);
210 assertEquals("+", ann.displayCharacter);
211 ann = consensus.annotations[3];
212 assertEquals("", ann.description);
213 assertEquals("-", ann.displayCharacter);
214 ann = consensus.annotations[4];
215 assertEquals("T 75%; G 25%", ann.description);
216 assertEquals("T", ann.displayCharacter);
220 * Test generation of consensus annotation with options 'ignore gaps' (profile
221 * percentages are of the non-gapped sequences) and 'no logo' (only the modal
222 * residue[s] percentage is reported in the description for the tooltip)
224 @Test(groups = { "Functional" })
225 public void testCompleteConsensus_ignoreGaps_noLogo()
228 * first compute the profiles
230 SequenceI seq1 = new Sequence("Seq1", "CAG-T");
231 SequenceI seq2 = new Sequence("Seq2", "CAC-T");
232 SequenceI seq3 = new Sequence("Seq3", "C---G");
233 SequenceI seq4 = new Sequence("Seq4", "CA--t");
234 SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 };
235 int width = seq1.getLength();
236 ProfilesI profiles = AAFrequency.calculate(seqs, width, 0, width, true);
238 AlignmentAnnotation consensus = new AlignmentAnnotation("Consensus",
239 "PID", new Annotation[width]);
241 .completeConsensus(consensus, profiles, 0, 5, true, false, 4);
243 Annotation ann = consensus.annotations[0];
244 assertEquals("C 100%", ann.description);
245 assertEquals("C", ann.displayCharacter);
246 ann = consensus.annotations[1];
247 assertEquals("A 100%", ann.description);
248 assertEquals("A", ann.displayCharacter);
249 ann = consensus.annotations[2];
250 assertEquals("[CG] 50%", ann.description);
251 assertEquals("+", ann.displayCharacter);
252 ann = consensus.annotations[3];
253 assertEquals("", ann.description);
254 assertEquals("-", ann.displayCharacter);
255 ann = consensus.annotations[4];
256 assertEquals("T 75%", ann.description);
257 assertEquals("T", ann.displayCharacter);
261 * Test to include rounding down of a non-zero count to 0% (JAL-3202)
263 @Test(groups = { "Functional" })
264 public void testExtractProfile()
267 * 200 sequences of which 30 gapped (170 ungapped)
268 * max count 70 for modal residue 'G'
270 ProfileI profile = new Profile(200, 30, 70, "G");
271 ResidueCount counts = new ResidueCount();
276 profile.setCounts(counts);
279 * [0, noOfValues, totalPercent, char1, count1, ...]
280 * G: 70/170 = 41.2 = 41
281 * R: 60/170 = 35.3 = 35
282 * L: 38/170 = 22.3 = 22
284 * total (rounded) percentages = 99
286 int[] extracted = AAFrequency.extractProfile(profile, true);
287 int[] expected = new int[] { 0, 4, 99, 'G', 41, 'R', 35, 'L', 22, 'H',
289 org.testng.Assert.assertEquals(extracted, expected);
292 * add some counts of 1; these round down to 0% and should be discarded
294 counts.put('G', 68); // 68/170 = 40% exactly (percentages now total 98)
297 extracted = AAFrequency.extractProfile(profile, true);
298 expected = new int[] { 0, 4, 98, 'G', 40, 'R', 35, 'L', 22, 'H', 1 };
299 org.testng.Assert.assertEquals(extracted, expected);
304 * Tests for the profile calculation where gaps are included i.e. the
305 * denominator is the total number of sequences in the column
307 @Test(groups = { "Functional" })
308 public void testExtractProfile_countGaps()
311 * 200 sequences of which 30 gapped (170 ungapped)
312 * max count 70 for modal residue 'G'
314 ProfileI profile = new Profile(200, 30, 70, "G");
315 ResidueCount counts = new ResidueCount();
320 profile.setCounts(counts);
323 * [0, noOfValues, totalPercent, char1, count1, ...]
328 * total (rounded) percentages = 85
330 int[] extracted = AAFrequency.extractProfile(profile, false);
331 int[] expected = new int[] { AlignmentAnnotation.SEQUENCE_PROFILE, 4,
332 85, 'G', 35, 'R', 30, 'L', 19, 'H',
334 org.testng.Assert.assertEquals(extracted, expected);
337 * add some counts of 1; these round down to 0% and should be discarded
339 counts.put('G', 68); // 68/200 = 34%
342 extracted = AAFrequency.extractProfile(profile, false);
343 expected = new int[] { AlignmentAnnotation.SEQUENCE_PROFILE, 4, 84, 'G',
344 34, 'R', 30, 'L', 19, 'H', 1 };
345 org.testng.Assert.assertEquals(extracted, expected);
349 @Test(groups = { "Functional" })
350 public void testExtractCdnaProfile()
353 * 200 sequences of which 30 gapped (170 ungapped)
354 * max count 70 for modal residue 'G'
356 Hashtable profile = new Hashtable();
359 * cdna profile is {seqCount, ungappedCount, codonCount1, ...codonCount64}
360 * where 1..64 positions correspond to encoded codons
361 * see CodingUtils.encodeCodon()
363 int[] codonCounts = new int[66];
364 char[] codon1 = new char[] { 'G', 'C', 'A' };
365 char[] codon2 = new char[] { 'c', 'C', 'A' };
366 char[] codon3 = new char[] { 't', 'g', 'A' };
367 char[] codon4 = new char[] { 'G', 'C', 't' };
368 int encoded1 = CodingUtils.encodeCodon(codon1);
369 int encoded2 = CodingUtils.encodeCodon(codon2);
370 int encoded3 = CodingUtils.encodeCodon(codon3);
371 int encoded4 = CodingUtils.encodeCodon(codon4);
372 codonCounts[2 + encoded1] = 30;
373 codonCounts[2 + encoded2] = 70;
374 codonCounts[2 + encoded3] = 9;
375 codonCounts[2 + encoded4] = 1;
376 codonCounts[0] = 120;
377 codonCounts[1] = 110;
378 profile.put(AAFrequency.PROFILE, codonCounts);
381 * [0, noOfValues, totalPercent, char1, count1, ...]
382 * codon1: 30/110 = 27.2 = 27%
383 * codon2: 70/110 = 63.6% = 63%
384 * codon3: 9/110 = 8.1% = 8%
385 * codon4: 1/110 = 0.9% = 0% should be discarded
386 * total (rounded) percentages = 98
388 int[] extracted = AAFrequency.extractCdnaProfile(profile, true);
389 int[] expected = new int[] { AlignmentAnnotation.CDNA_PROFILE, 3, 98,
390 encoded2, 63, encoded1, 27, encoded3, 8 };
391 org.testng.Assert.assertEquals(extracted, expected);
394 @Test(groups = { "Functional" })
395 public void testExtractCdnaProfile_countGaps()
398 * 200 sequences of which 30 gapped (170 ungapped)
399 * max count 70 for modal residue 'G'
401 Hashtable profile = new Hashtable();
404 * cdna profile is {seqCount, ungappedCount, codonCount1, ...codonCount64}
405 * where 1..64 positions correspond to encoded codons
406 * see CodingUtils.encodeCodon()
408 int[] codonCounts = new int[66];
409 char[] codon1 = new char[] { 'G', 'C', 'A' };
410 char[] codon2 = new char[] { 'c', 'C', 'A' };
411 char[] codon3 = new char[] { 't', 'g', 'A' };
412 char[] codon4 = new char[] { 'G', 'C', 't' };
413 int encoded1 = CodingUtils.encodeCodon(codon1);
414 int encoded2 = CodingUtils.encodeCodon(codon2);
415 int encoded3 = CodingUtils.encodeCodon(codon3);
416 int encoded4 = CodingUtils.encodeCodon(codon4);
417 codonCounts[2 + encoded1] = 30;
418 codonCounts[2 + encoded2] = 70;
419 codonCounts[2 + encoded3] = 9;
420 codonCounts[2 + encoded4] = 1;
421 codonCounts[0] = 120;
422 codonCounts[1] = 110;
423 profile.put(AAFrequency.PROFILE, codonCounts);
426 * [0, noOfValues, totalPercent, char1, count1, ...]
427 * codon1: 30/120 = 25%
428 * codon2: 70/120 = 58.3 = 58%
429 * codon3: 9/120 = 7.5 = 7%
430 * codon4: 1/120 = 0.8 = 0% should be discarded
431 * total (rounded) percentages = 90
433 int[] extracted = AAFrequency.extractCdnaProfile(profile, false);
434 int[] expected = new int[] { AlignmentAnnotation.CDNA_PROFILE, 3, 90,
435 encoded2, 58, encoded1, 25, encoded3, 7 };
436 org.testng.Assert.assertEquals(extracted, expected);
439 @Test(groups = { "Functional" })
440 public void testExtractHMMProfile()
441 throws MalformedURLException, IOException
443 int[] expected = { 0, 4, 100, 'T', 71, 'C', 12, 'G', 9, 'A', 9 };
444 int[] actual = AAFrequency.extractHMMProfile(hmm, 17, false, false);
445 for (int i = 0; i < actual.length; i++)
449 assertEquals(actual[i], expected[i]);
453 assertEquals(actual[i], expected[i]);
457 int[] expected2 = { 0, 4, 100, 'A', 85, 'C', 0, 'G', 0, 'T', 0 };
458 int[] actual2 = AAFrequency.extractHMMProfile(hmm, 2, true, false);
459 for (int i = 0; i < actual2.length; i++)
463 assertEquals(actual[i], expected[i]);
467 assertEquals(actual[i], expected[i]);
471 assertNull(AAFrequency.extractHMMProfile(null, 98978867, true, false));
474 @Test(groups = { "Functional" })
475 public void testGetAnalogueCount()
478 * 'T' in column 0 has emission probability 0.7859, scales to 7859
480 int count = AAFrequency.getAnalogueCount(hmm, 0, 'T', false, false);
481 assertEquals(7859, count);
484 * same with 'use info height': value is multiplied by log ratio
485 * log(value / background) / log(2) = log(0.7859/0.25)/0.693
486 * = log(3.1)/0.693 = 1.145/0.693 = 1.66
487 * so value becomes 1.2987 and scales up to 12987
489 count = AAFrequency.getAnalogueCount(hmm, 0, 'T', false, true);
490 assertEquals(12987, count);
493 * 'G' in column 20 has emission probability 0.75457, scales to 7546
495 count = AAFrequency.getAnalogueCount(hmm, 20, 'G', false, false);
496 assertEquals(7546, count);
499 * 'G' in column 1077 has emission probability 0.0533, here
500 * ignored (set to 0) since below background of 0.25
502 count = AAFrequency.getAnalogueCount(hmm, 1077, 'G', true, false);
503 assertEquals(0, count);
506 @Test(groups = { "Functional" })
507 public void testCompleteInformation()
509 ProfileI prof1 = new Profile(1, 0, 100, "A");
510 ProfileI prof2 = new Profile(1, 0, 100, "-");
512 ProfilesI profs = new Profiles(new ProfileI[] { prof1, prof2 });
513 Annotation ann1 = new Annotation(6.5f);
514 Annotation ann2 = new Annotation(0f);
515 Annotation[] annots = new Annotation[] { ann1, ann2 };
516 SequenceI seq = new Sequence("", "AA", 0, 0);
518 AlignmentAnnotation annot = new AlignmentAnnotation("", "", annots);
519 annot.setSequenceRef(seq);
520 AAFrequency.completeInformation(annot, profs, 0, 1);
521 float ic = annot.annotations[0].value;
522 assertEquals(0.91532f, ic, 0.0001f);
523 ic = annot.annotations[1].value;
524 assertEquals(0f, ic, 0.0001f);