2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertFalse;
25 import static org.testng.AssertJUnit.assertTrue;
27 import org.testng.annotations.BeforeClass;
28 import org.testng.annotations.BeforeMethod;
29 import org.testng.annotations.Test;
31 import jalview.bin.Cache;
32 import jalview.datamodel.Sequence;
33 import jalview.datamodel.SequenceI;
34 import jalview.gui.JvOptionPane;
36 public class ComparisonTest
39 @BeforeClass(alwaysRun = true)
40 public void setUpJvOptionPane()
42 JvOptionPane.setInteractiveMode(false);
43 JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
46 @BeforeMethod(alwaysRun = true)
47 public void loadProperties()
49 Cache.loadProperties("test/jalview/util/comparisonTestProps.jvprops");
52 @Test(groups = { "Functional" })
53 public void testIsGap()
55 assertTrue(Comparison.isGap('-'));
56 assertTrue(Comparison.isGap('.'));
57 assertTrue(Comparison.isGap(' '));
58 assertFalse(Comparison.isGap('X'));
59 assertFalse(Comparison.isGap('x'));
60 assertFalse(Comparison.isGap('*'));
61 assertFalse(Comparison.isGap('G'));
65 * Test for isNucleotide is that sequences in a dataset are more than 85%
66 * AGCTU. Test is not case-sensitive and ignores gaps.
68 @Test(groups = { "Functional" })
69 public void testIsNucleotide_sequences()
71 SequenceI seq = new Sequence("eightypercent+fivepercent", "agctuagcPV");
72 assertFalse(Comparison.isNucleotide(new SequenceI[] { seq }));
74 Comparison.isNucleotide(new SequenceI[][]
75 { new SequenceI[] { seq } }));
77 seq = new Sequence("eightyfivepercent+tenpercent",
78 "agctuagcgVagctuagcuVE");
79 assertFalse(Comparison.isNucleotide(new SequenceI[] { seq }));
81 seq = new Sequence(">nineyfivepercent+0percent",
82 "aagctuagcgEagctuagcua");
83 assertFalse(Comparison.isNucleotide(new SequenceI[] { seq }));
85 seq = new Sequence("nineyfivepercent+0percent", "agctuagcgEagctuagcua");
86 assertFalse(Comparison.isNucleotide(new SequenceI[] { seq }));
88 seq = new Sequence("nineyfivepercent+fivepercent",
89 "agctuagcgWagctuagcua");
90 assertTrue(Comparison.isNucleotide(new SequenceI[] { seq }));
92 seq = new Sequence("nineyfivepercent+tenpercent",
93 "agctuagcgEWWctuagcua");
94 assertFalse(Comparison.isNucleotide(new SequenceI[] { seq }));
96 seq = new Sequence("eightyfivepercent+fifteenpercent",
97 "agctuagcgWWWctuagcua");
98 assertFalse(Comparison.isNucleotide(new SequenceI[] { seq }));
100 seq = new Sequence("eightyfivepercentgapped",
101 "--agc--tuA--GCPV-a---gct-uA-GC---UV");
102 assertFalse(Comparison.isNucleotide(new SequenceI[] { seq }));
104 seq = new Sequence("ninetyfivepercentgapped",
105 "ag--ct-u-a---gc---g----aag--c---tuagcuV");
106 assertTrue(Comparison.isNucleotide(new SequenceI[] { seq }));
108 seq = new Sequence("allgap", "---------");
109 assertFalse(Comparison.isNucleotide(new SequenceI[] { seq }));
111 seq = new Sequence("DNA", "ACTugGCCAG");
112 SequenceI seq2 = new Sequence("Protein", "FLIMVSPTYW");
114 * 90% DNA but one protein sequence - expect false
117 Comparison.isNucleotide(new SequenceI[]
118 { seq, seq, seq, seq, seq, seq, seq, seq, seq, seq2 }));
120 Comparison.isNucleotide(new SequenceI[][]
121 { new SequenceI[] { seq }, new SequenceI[] { seq, seq, seq },
123 { seq, seq, seq, seq, seq, seq2 } }));
125 * 80% DNA but one protein sequence - Expect false
128 Comparison.isNucleotide(new SequenceI[]
129 { seq, seq, seq, seq, seq, seq, seq, seq, seq2, seq2 }));
131 Comparison.isNucleotide(new SequenceI[][]
132 { new SequenceI[] { seq }, new SequenceI[] { seq, seq, seq },
134 { seq, seq, seq, seq, seq2, seq2, null } }));
136 String seqString = "aaatatatatgEcctgagtcgt";
137 seq = new Sequence("ShortProteinThatLooksLikeDNA", seqString);
138 assertFalse(Comparison.isNucleotide(new SequenceI[] { seq }));
139 seq = new Sequence("LongProteinThatLooksLikeDNA", seqString.repeat(10));
140 assertTrue(Comparison.isNucleotide(new SequenceI[] { seq }));
142 assertFalse(Comparison.isNucleotide((SequenceI[]) null));
143 assertFalse(Comparison.isNucleotide((SequenceI[][]) null));
147 * Test the percentage identity calculation for two sequences
149 @Test(groups = { "Functional" })
150 public void testPID_includingGaps()
152 String seq1 = "ABCDEFG"; // extra length here is ignored
153 String seq2 = "abcdef";
154 assertEquals("identical", 100f, Comparison.PID(seq1, seq2), 0.001f);
156 // comparison range defaults to length of first sequence
157 seq2 = "abcdefghijklmnopqrstuvwxyz";
158 assertEquals("identical", 100f, Comparison.PID(seq1, seq2), 0.001f);
160 // 5 identical, 2 gap-gap, 2 gap-residue, 1 mismatch
163 int length = seq1.length();
165 // match gap-residue, match gap-gap: 9/10 identical
166 // TODO should gap-gap be included in a PID score? JAL-791
167 assertEquals(90f, Comparison.PID(seq1, seq2, 0, length, true, false),
169 // overloaded version of the method signature above:
170 assertEquals(90f, Comparison.PID(seq1, seq2), 0.001f);
172 // don't match gap-residue, match gap-gap: 7/10 identical
173 // TODO should gap-gap be included in a PID score?
174 assertEquals(70f, Comparison.PID(seq1, seq2, 0, length, false, false),
178 @Test(groups = { "Functional" })
179 public void testIsNucleotide()
181 assertTrue(Comparison.isNucleotide('a'));
182 assertTrue(Comparison.isNucleotide('A'));
183 assertTrue(Comparison.isNucleotide('c'));
184 assertTrue(Comparison.isNucleotide('C'));
185 assertTrue(Comparison.isNucleotide('g'));
186 assertTrue(Comparison.isNucleotide('G'));
187 assertTrue(Comparison.isNucleotide('t'));
188 assertTrue(Comparison.isNucleotide('T'));
189 assertTrue(Comparison.isNucleotide('u'));
190 assertTrue(Comparison.isNucleotide('U'));
191 assertFalse(Comparison.isNucleotide('-'));
192 assertFalse(Comparison.isNucleotide('P'));
195 @Test(groups = { "Functional" })
196 public void testIsNucleotideAmbiguity()
198 assertTrue(Comparison.isNucleotide('b', true));
199 assertTrue(Comparison.isNucleotide('B', true));
200 assertTrue(Comparison.isNucleotide('d', true));
201 assertTrue(Comparison.isNucleotide('V', true));
202 assertTrue(Comparison.isNucleotide('M', true));
203 assertTrue(Comparison.isNucleotide('s', true));
204 assertTrue(Comparison.isNucleotide('W', true));
205 assertTrue(Comparison.isNucleotide('x', true));
206 assertTrue(Comparison.isNucleotide('Y', true));
207 assertTrue(Comparison.isNucleotide('r', true));
208 assertTrue(Comparison.isNucleotide('i', true));
209 assertFalse(Comparison.isNucleotide('-', true));
210 assertFalse(Comparison.isNucleotide('n', true));
211 assertFalse(Comparison.isNucleotide('P', true));
215 * Test the percentage identity calculation for two sequences
217 @Test(groups = { "Functional" })
218 public void testPID_ungappedOnly()
220 // 5 identical, 2 gap-gap, 2 gap-residue, 1 mismatch
221 // the extra length of seq1 is ignored
222 String seq1 = "a--b-cdefhr";
223 String seq2 = "a---bcdefg";
224 int length = seq1.length();
227 * As currently coded, 'ungappedOnly' ignores gap-residue but counts
228 * gap-gap. Is this a bug - should gap-gap also be ignored, giving a PID of
231 * Note also there is no variant of the calculation that penalises
232 * gap-residue i.e. counts it as a mismatch. This would give a score of 5/8
233 * (if we ignore gap-gap) or 5/10 (if we count gap-gap as a match).
235 // match gap-residue, match gap-gap: 7/8 identical
236 assertEquals(87.5f, Comparison.PID(seq1, seq2, 0, length, true, true),
239 // don't match gap-residue with 'ungapped only' - same as above
240 assertEquals(87.5f, Comparison.PID(seq1, seq2, 0, length, false, true),
244 @Test(groups = { "Functional" })
245 public void testIsNucleotideSequence()
247 assertFalse(Comparison.isNucleotideSequence(null, true));
248 assertTrue(Comparison.isNucleotideSequence("", true));
249 assertTrue(Comparison.isNucleotideSequence("aAgGcCtTuU", true));
250 assertTrue(Comparison.isNucleotideSequence("aAgGcCtTuU", false));
251 assertFalse(Comparison.isNucleotideSequence("xAgGcCtTuU", false));
252 assertFalse(Comparison.isNucleotideSequence("aAgGcCtTuUx", false));
253 assertTrue(Comparison.isNucleotideSequence("a A-g.GcCtTuU", true));
254 assertFalse(Comparison.isNucleotideSequence("a A-g.GcCtTuU", false));
255 assertFalse(Comparison.isNucleotideSequence("gatactawgataca", false));
256 // including nucleotide ambiguity
258 Comparison.isNucleotideSequence("gatacaWgataca", true, true));
260 Comparison.isNucleotideSequence("gatacaEgataca", true, true));
262 // not quite all nucleotides and ambiguity codes
263 Sequence seq = new Sequence("Ambiguity DNA codes", "gatacagatacabve");
264 assertFalse(Comparison.isNucleotide(seq));
265 // all nucleotide and nucleotide ambiguity codes
266 seq = new Sequence("Ambiguity DNA codes", "gatacagatacabvt");
267 assertFalse(Comparison.isNucleotide(seq));
268 seq = new Sequence("Ambiguity DNA codes", "agatacabb");
269 assertFalse(Comparison.isNucleotide(seq));
270 // 55% nucleotide with only Xs or Ns
271 assertTrue(Comparison
272 .isNucleotide(new Sequence("dnaWithXs", "gatacaXXXX")));
273 assertTrue(Comparison
274 .isNucleotide(new Sequence("dnaWithXs", "gatacaNNNN")));
275 assertFalse(Comparison
276 .isNucleotide(new Sequence("dnaWithXs", "gatacXXXXX")));
277 assertFalse(Comparison
278 .isNucleotide(new Sequence("dnaWithXs", "gatacNNNNN")));
281 @Test(groups = { "Functional" })
282 public void testIsSameResidue()
284 assertTrue(Comparison.isSameResidue('a', 'a', false));
285 assertTrue(Comparison.isSameResidue('a', 'a', true));
286 assertTrue(Comparison.isSameResidue('A', 'a', false));
287 assertTrue(Comparison.isSameResidue('a', 'A', false));
289 assertFalse(Comparison.isSameResidue('a', 'A', true));
290 assertFalse(Comparison.isSameResidue('A', 'a', true));
293 @Test(groups = { "Functional" })
294 public void testNucleotideProportion()
296 assertFalse(Comparison.myShortSequenceNucleotideProportionCount(2, 3));
297 assertTrue(Comparison.myShortSequenceNucleotideProportionCount(3, 3));
298 assertFalse(Comparison.myShortSequenceNucleotideProportionCount(2, 4));
299 assertTrue(Comparison.myShortSequenceNucleotideProportionCount(3, 4));
301 Comparison.myShortSequenceNucleotideProportionCount(17, 20));
302 assertTrue(Comparison.myShortSequenceNucleotideProportionCount(18, 20));
304 Comparison.myShortSequenceNucleotideProportionCount(38, 50));
305 assertTrue(Comparison.myShortSequenceNucleotideProportionCount(39, 50));
307 Comparison.myShortSequenceNucleotideProportionCount(54, 100));
309 Comparison.myShortSequenceNucleotideProportionCount(55, 100));