2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertFalse;
25 import static org.testng.AssertJUnit.assertTrue;
27 import jalview.datamodel.Sequence;
28 import jalview.datamodel.SequenceI;
29 import jalview.gui.JvOptionPane;
31 import org.testng.Assert;
32 import org.testng.annotations.BeforeClass;
33 import org.testng.annotations.Test;
35 public class ComparisonTest
38 @BeforeClass(alwaysRun = true)
39 public void setUpJvOptionPane()
41 JvOptionPane.setInteractiveMode(false);
42 JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
45 @Test(groups = { "Functional" })
46 public void testIsGap()
48 assertTrue(Comparison.isGap('-'));
49 assertTrue(Comparison.isGap('.'));
50 assertTrue(Comparison.isGap(' '));
51 assertFalse(Comparison.isGap('X'));
52 assertFalse(Comparison.isGap('x'));
53 assertFalse(Comparison.isGap('*'));
54 assertFalse(Comparison.isGap('G'));
56 // consistency - test Comparison.isGap covers all gapChars
57 StringBuilder missing = new StringBuilder();
58 for (int i = 0, iSize = Comparison.GapChars.length(); i < iSize; i++)
60 char gc = Comparison.GapChars.charAt(i);
61 if (!Comparison.isGap(gc))
66 if (missing.length() > 0)
69 "Comparison.GapChars contains symbols not covered by Comparison.isGap: '"
70 + missing.toString() + "'");
75 * Test for isNucleotide is that sequences in a dataset are more than 85%
76 * AGCTU. Test is not case-sensitive and ignores gaps.
78 @Test(groups = { "Functional" })
79 public void testIsNucleotide_sequences()
81 SequenceI seq = new Sequence("eightypercent", "agctuAGCPV");
82 assertFalse(Comparison.isNucleotide(new SequenceI[] { seq }));
83 assertFalse(Comparison.isNucleotide(new SequenceI[][] { new SequenceI[]
86 seq = new Sequence("eightyfivepercent", "agctuAGCPVagctuAGCUV");
87 assertFalse(Comparison.isNucleotide(new SequenceI[] { seq }));
89 seq = new Sequence("nineypercent", "agctuAGCgVagctuAGCUV");
90 assertTrue(Comparison.isNucleotide(new SequenceI[] { seq }));
92 seq = new Sequence("eightyfivepercentgapped",
93 "--agc--tuA--GCPV-a---gct-uA-GC---UV");
94 assertFalse(Comparison.isNucleotide(new SequenceI[] { seq }));
96 seq = new Sequence("nineypercentgapped",
97 "ag--ct-u-A---GC---g----Vag--c---tuAGCUV");
98 assertTrue(Comparison.isNucleotide(new SequenceI[] { seq }));
100 seq = new Sequence("allgap", "---------");
101 assertFalse(Comparison.isNucleotide(new SequenceI[] { seq }));
103 seq = new Sequence("DNA", "ACTugGCCAG");
104 SequenceI seq2 = new Sequence("Protein", "FLIMVSPTYW");
108 assertTrue(Comparison.isNucleotide(new SequenceI[] { seq, seq, seq,
109 seq, seq, seq, seq, seq, seq, seq2 }));
110 assertTrue(Comparison.isNucleotide(new SequenceI[][] {
111 new SequenceI[] { seq }, new SequenceI[] { seq, seq, seq },
112 new SequenceI[] { seq, seq, seq, seq, seq, seq2 } }));
116 assertFalse(Comparison.isNucleotide(new SequenceI[] { seq, seq, seq,
117 seq, seq, seq, seq, seq, seq2, seq2 }));
118 assertFalse(Comparison.isNucleotide(new SequenceI[][] { new SequenceI[]
119 { seq }, new SequenceI[] { seq, seq, seq },
120 new SequenceI[] { seq, seq, seq, seq, seq2, seq2, null } }));
122 seq = new Sequence("ProteinThatLooksLikeDNA", "WYATGCCTGAgtcgt");
124 assertTrue(Comparison.isNucleotide(new SequenceI[] { seq }));
126 assertFalse(Comparison.isNucleotide((SequenceI[]) null));
127 assertFalse(Comparison.isNucleotide((SequenceI[][]) null));
131 * Test the percentage identity calculation for two sequences
133 @Test(groups = { "Functional" })
134 public void testPID_includingGaps()
136 String seq1 = "ABCDEFG"; // extra length here is ignored
137 String seq2 = "abcdef";
138 assertEquals("identical", 100f, Comparison.PID(seq1, seq2), 0.001f);
140 // comparison range defaults to length of first sequence
141 seq2 = "abcdefghijklmnopqrstuvwxyz";
142 assertEquals("identical", 100f, Comparison.PID(seq1, seq2), 0.001f);
144 // 5 identical, 2 gap-gap, 2 gap-residue, 1 mismatch
147 int length = seq1.length();
149 // match gap-residue, match gap-gap: 9/10 identical
150 // TODO should gap-gap be included in a PID score? JAL-791
151 assertEquals(90f, Comparison.PID(seq1, seq2, 0, length, true, false),
153 // overloaded version of the method signature above:
154 assertEquals(90f, Comparison.PID(seq1, seq2), 0.001f);
156 // don't match gap-residue, match gap-gap: 7/10 identical
157 // TODO should gap-gap be included in a PID score?
158 assertEquals(70f, Comparison.PID(seq1, seq2, 0, length, false, false),
162 @Test(groups = { "Functional" })
163 public void testIsNucleotide()
165 assertTrue(Comparison.isNucleotide('a'));
166 assertTrue(Comparison.isNucleotide('A'));
167 assertTrue(Comparison.isNucleotide('c'));
168 assertTrue(Comparison.isNucleotide('C'));
169 assertTrue(Comparison.isNucleotide('g'));
170 assertTrue(Comparison.isNucleotide('G'));
171 assertTrue(Comparison.isNucleotide('t'));
172 assertTrue(Comparison.isNucleotide('T'));
173 assertTrue(Comparison.isNucleotide('u'));
174 assertTrue(Comparison.isNucleotide('U'));
175 assertFalse(Comparison.isNucleotide('-'));
176 assertFalse(Comparison.isNucleotide('P'));
180 * Test the percentage identity calculation for two sequences
182 @Test(groups = { "Functional" })
183 public void testPID_ungappedOnly()
185 // 5 identical, 2 gap-gap, 2 gap-residue, 1 mismatch
186 // the extra length of seq1 is ignored
187 String seq1 = "a--b-cdefhr";
188 String seq2 = "a---bcdefg";
189 int length = seq1.length();
192 * As currently coded, 'ungappedOnly' ignores gap-residue but counts
193 * gap-gap. Is this a bug - should gap-gap also be ignored, giving a PID of
196 * Note also there is no variant of the calculation that penalises
197 * gap-residue i.e. counts it as a mismatch. This would give a score of 5/8
198 * (if we ignore gap-gap) or 5/10 (if we count gap-gap as a match).
200 // match gap-residue, match gap-gap: 7/8 identical
201 assertEquals(87.5f, Comparison.PID(seq1, seq2, 0, length, true, true),
204 // don't match gap-residue with 'ungapped only' - same as above
205 assertEquals(87.5f, Comparison.PID(seq1, seq2, 0, length, false, true),
209 @Test(groups = { "Functional" })
210 public void testIsNucleotideSequence()
212 assertFalse(Comparison.isNucleotideSequence(null, true));
213 assertTrue(Comparison.isNucleotideSequence("", true));
214 assertTrue(Comparison.isNucleotideSequence("aAgGcCtTuU", true));
215 assertTrue(Comparison.isNucleotideSequence("aAgGcCtTuU", false));
216 assertFalse(Comparison.isNucleotideSequence("xAgGcCtTuU", false));
217 assertFalse(Comparison.isNucleotideSequence("aAgGcCtTuUx", false));
218 assertTrue(Comparison.isNucleotideSequence("a A-g.GcCtTuU", true));
219 assertFalse(Comparison.isNucleotideSequence("a A-g.GcCtTuU", false));
222 @Test(groups = { "Functional" })
223 public void testIsSameResidue()
225 assertTrue(Comparison.isSameResidue('a', 'a', false));
226 assertTrue(Comparison.isSameResidue('a', 'a', true));
227 assertTrue(Comparison.isSameResidue('A', 'a', false));
228 assertTrue(Comparison.isSameResidue('a', 'A', false));
230 assertFalse(Comparison.isSameResidue('a', 'A', true));
231 assertFalse(Comparison.isSameResidue('A', 'a', true));