2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertNull;
26 import jalview.datamodel.AlignmentAnnotation;
27 import jalview.datamodel.Annotation;
28 import jalview.datamodel.HiddenMarkovModel;
29 import jalview.datamodel.Profile;
30 import jalview.datamodel.ProfileI;
31 import jalview.datamodel.Profiles;
32 import jalview.datamodel.ProfilesI;
33 import jalview.datamodel.Sequence;
34 import jalview.datamodel.SequenceI;
35 import jalview.gui.JvOptionPane;
36 import jalview.io.DataSourceType;
37 import jalview.io.FileParse;
38 import jalview.io.HMMFile;
40 import java.io.IOException;
41 import java.net.MalformedURLException;
43 import org.testng.annotations.BeforeClass;
44 import org.testng.annotations.Test;
46 public class AAFrequencyTest
48 HiddenMarkovModel hmm;
50 @BeforeClass(alwaysRun = true)
51 public void setUpJvOptionPane()
53 JvOptionPane.setInteractiveMode(false);
54 JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
57 @BeforeClass(alwaysRun = true)
58 public void setUp() throws IOException, MalformedURLException
61 * load a dna (ACGT) HMM file to a HiddenMarkovModel
63 HMMFile hmmFile = new HMMFile(new FileParse(
64 "test/jalview/io/test_MADE1_hmm.txt", DataSourceType.FILE));
65 hmm = hmmFile.getHMM();
68 @Test(groups = { "Functional" })
69 public void testCalculate_noProfile()
71 SequenceI seq1 = new Sequence("Seq1", "CAG-T");
72 SequenceI seq2 = new Sequence("Seq2", "CAC-T");
73 SequenceI seq3 = new Sequence("Seq3", "C---G");
74 SequenceI seq4 = new Sequence("Seq4", "CA--t");
75 SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 };
76 int width = seq1.getLength();
77 ProfilesI result = AAFrequency.calculate(seqs, width, 0, width,
81 ProfileI col = result.get(0);
82 assertEquals(100f, col.getPercentageIdentity(false));
83 assertEquals(100f, col.getPercentageIdentity(true));
84 assertEquals(4, col.getMaxCount());
85 assertEquals("C", col.getModalResidue());
86 assertNull(col.getCounts());
90 assertEquals(75f, col.getPercentageIdentity(false));
91 assertEquals(100f, col.getPercentageIdentity(true));
92 assertEquals(3, col.getMaxCount());
93 assertEquals("A", col.getModalResidue());
95 // col 2 is 50% G 50% C or 25/25 counting gaps
97 assertEquals(25f, col.getPercentageIdentity(false));
98 assertEquals(50f, col.getPercentageIdentity(true));
99 assertEquals(1, col.getMaxCount());
100 assertEquals("CG", col.getModalResidue());
104 assertEquals(0f, col.getPercentageIdentity(false));
105 assertEquals(0f, col.getPercentageIdentity(true));
106 assertEquals(0, col.getMaxCount());
107 assertEquals("", col.getModalResidue());
109 // col 4 is 75% T 25% G
111 assertEquals(75f, col.getPercentageIdentity(false));
112 assertEquals(75f, col.getPercentageIdentity(true));
113 assertEquals(3, col.getMaxCount());
114 assertEquals("T", col.getModalResidue());
117 @Test(groups = { "Functional" })
118 public void testCalculate_withProfile()
120 SequenceI seq1 = new Sequence("Seq1", "CAGT");
121 SequenceI seq2 = new Sequence("Seq2", "CACT");
122 SequenceI seq3 = new Sequence("Seq3", "C--G");
123 SequenceI seq4 = new Sequence("Seq4", "CA-t");
124 SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 };
125 int width = seq1.getLength();
126 ProfilesI result = AAFrequency.calculate(seqs, width, 0, width,
129 ProfileI profile = result.get(0);
130 assertEquals(4, profile.getCounts().getCount('C'));
131 assertEquals(4, profile.getHeight());
132 assertEquals(4, profile.getNonGapped());
134 profile = result.get(1);
135 assertEquals(3, profile.getCounts().getCount('A'));
136 assertEquals(4, profile.getHeight());
137 assertEquals(3, profile.getNonGapped());
139 profile = result.get(2);
140 assertEquals(1, profile.getCounts().getCount('C'));
141 assertEquals(1, profile.getCounts().getCount('G'));
142 assertEquals(4, profile.getHeight());
143 assertEquals(2, profile.getNonGapped());
145 profile = result.get(3);
146 assertEquals(3, profile.getCounts().getCount('T'));
147 assertEquals(1, profile.getCounts().getCount('G'));
148 assertEquals(4, profile.getHeight());
149 assertEquals(4, profile.getNonGapped());
152 @Test(groups = { "Functional" }, enabled = false)
153 public void testCalculate_withProfileTiming()
155 SequenceI seq1 = new Sequence("Seq1", "CAGT");
156 SequenceI seq2 = new Sequence("Seq2", "CACT");
157 SequenceI seq3 = new Sequence("Seq3", "C--G");
158 SequenceI seq4 = new Sequence("Seq4", "CA-t");
159 SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 };
161 // ensure class loaded and initialised
162 int width = seq1.getLength();
163 AAFrequency.calculate(seqs, width, 0, width, true);
166 long start = System.currentTimeMillis();
167 for (int i = 0; i < reps; i++)
169 AAFrequency.calculate(seqs, width, 0, width, true);
171 System.out.println(System.currentTimeMillis() - start);
175 * Test generation of consensus annotation with options 'include gaps'
176 * (profile percentages are of all sequences, whether gapped or not), and
177 * 'show logo' (the full profile with all residue percentages is reported in
178 * the description for the tooltip)
180 @Test(groups = { "Functional" })
181 public void testCompleteConsensus_includeGaps_showLogo()
184 * first compute the profiles
186 SequenceI seq1 = new Sequence("Seq1", "CAG-T");
187 SequenceI seq2 = new Sequence("Seq2", "CAC-T");
188 SequenceI seq3 = new Sequence("Seq3", "C---G");
189 SequenceI seq4 = new Sequence("Seq4", "CA--t");
190 SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 };
191 int width = seq1.getLength();
192 ProfilesI profiles = AAFrequency.calculate(seqs, width, 0, width, true);
194 AlignmentAnnotation consensus = new AlignmentAnnotation("Consensus",
195 "PID", new Annotation[width]);
197 .completeConsensus(consensus, profiles, 0, 5, false, true, 4);
199 Annotation ann = consensus.annotations[0];
200 assertEquals("C 100%", ann.description);
201 assertEquals("C", ann.displayCharacter);
202 ann = consensus.annotations[1];
203 assertEquals("A 75%", ann.description);
204 assertEquals("A", ann.displayCharacter);
205 ann = consensus.annotations[2];
206 assertEquals("C 25%; G 25%", ann.description);
207 assertEquals("+", ann.displayCharacter);
208 ann = consensus.annotations[3];
209 assertEquals("", ann.description);
210 assertEquals("-", ann.displayCharacter);
211 ann = consensus.annotations[4];
212 assertEquals("T 75%; G 25%", ann.description);
213 assertEquals("T", ann.displayCharacter);
217 * Test generation of consensus annotation with options 'ignore gaps' (profile
218 * percentages are of the non-gapped sequences) and 'no logo' (only the modal
219 * residue[s] percentage is reported in the description for the tooltip)
221 @Test(groups = { "Functional" })
222 public void testCompleteConsensus_ignoreGaps_noLogo()
225 * first compute the profiles
227 SequenceI seq1 = new Sequence("Seq1", "CAG-T");
228 SequenceI seq2 = new Sequence("Seq2", "CAC-T");
229 SequenceI seq3 = new Sequence("Seq3", "C---G");
230 SequenceI seq4 = new Sequence("Seq4", "CA--t");
231 SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 };
232 int width = seq1.getLength();
233 ProfilesI profiles = AAFrequency.calculate(seqs, width, 0, width, true);
235 AlignmentAnnotation consensus = new AlignmentAnnotation("Consensus",
236 "PID", new Annotation[width]);
238 .completeConsensus(consensus, profiles, 0, 5, true, false, 4);
240 Annotation ann = consensus.annotations[0];
241 assertEquals("C 100%", ann.description);
242 assertEquals("C", ann.displayCharacter);
243 ann = consensus.annotations[1];
244 assertEquals("A 100%", ann.description);
245 assertEquals("A", ann.displayCharacter);
246 ann = consensus.annotations[2];
247 assertEquals("[CG] 50%", ann.description);
248 assertEquals("+", ann.displayCharacter);
249 ann = consensus.annotations[3];
250 assertEquals("", ann.description);
251 assertEquals("-", ann.displayCharacter);
252 ann = consensus.annotations[4];
253 assertEquals("T 75%", ann.description);
254 assertEquals("T", ann.displayCharacter);
257 @Test(groups = { "Functional" })
258 public void testExtractHMMProfile()
259 throws MalformedURLException, IOException
261 int[] expected = { 0, 4, 100, 'T', 71, 'C', 12, 'G', 9, 'A', 9 };
262 int[] actual = AAFrequency.extractHMMProfile(hmm, 17, false, false);
263 for (int i = 0; i < actual.length; i++)
267 assertEquals(actual[i], expected[i]);
271 assertEquals(actual[i], expected[i]);
275 int[] expected2 = { 0, 4, 100, 'A', 85, 'C', 0, 'G', 0, 'T', 0 };
276 int[] actual2 = AAFrequency.extractHMMProfile(hmm, 2, true, false);
277 for (int i = 0; i < actual2.length; i++)
281 assertEquals(actual[i], expected[i]);
285 assertEquals(actual[i], expected[i]);
289 assertNull(AAFrequency.extractHMMProfile(null, 98978867, true, false));
292 @Test(groups = { "Functional" })
293 public void testGetAnalogueCount()
296 * 'T' in column 0 has emission probability 0.7859, scales to 7859
298 int count = AAFrequency.getAnalogueCount(hmm, 0, 'T', false, false);
299 assertEquals(7859, count);
302 * same with 'use info height': value is multiplied by log ratio
303 * log(value / background) / log(2) = log(0.7859/0.25)/0.693
304 * = log(3.1)/0.693 = 1.145/0.693 = 1.66
305 * so value becomes 1.2987 and scales up to 12987
307 count = AAFrequency.getAnalogueCount(hmm, 0, 'T', false, true);
308 assertEquals(12987, count);
311 * 'G' in column 20 has emission probability 0.75457, scales to 7546
313 count = AAFrequency.getAnalogueCount(hmm, 20, 'G', false, false);
314 assertEquals(7546, count);
317 * 'G' in column 1077 has emission probability 0.0533, here
318 * ignored (set to 0) since below background of 0.25
320 count = AAFrequency.getAnalogueCount(hmm, 1077, 'G', true, false);
321 assertEquals(0, count);
324 @Test(groups = { "Functional" })
325 public void testCompleteInformation()
327 ProfileI prof1 = new Profile(1, 0, 100, "A");
328 ProfileI prof2 = new Profile(1, 0, 100, "-");
330 ProfilesI profs = new Profiles(new ProfileI[] { prof1, prof2 });
331 Annotation ann1 = new Annotation(6.5f);
332 Annotation ann2 = new Annotation(0f);
333 Annotation[] annots = new Annotation[] { ann1, ann2 };
334 SequenceI seq = new Sequence("", "AA", 0, 0);
336 AlignmentAnnotation annot = new AlignmentAnnotation("", "", annots);
337 annot.setSequenceRef(seq);
338 AAFrequency.completeInformation(annot, profs, 0, 1);
339 float ic = annot.annotations[0].value;
340 assertEquals(0.91532f, ic, 0.0001f);
341 ic = annot.annotations[1].value;
342 assertEquals(0f, ic, 0.0001f);