2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertFalse;
25 import static org.testng.AssertJUnit.assertTrue;
27 import jalview.datamodel.Sequence;
28 import jalview.datamodel.SequenceI;
30 import org.testng.annotations.Test;
32 public class ComparisonTest
35 @Test(groups = { "Functional" })
36 public void testIsGap()
38 assertTrue(Comparison.isGap('-'));
39 assertTrue(Comparison.isGap('.'));
40 assertTrue(Comparison.isGap(' '));
41 assertFalse(Comparison.isGap('X'));
42 assertFalse(Comparison.isGap('x'));
43 assertFalse(Comparison.isGap('*'));
44 assertFalse(Comparison.isGap('G'));
48 * Test for isNucleotide is that sequences in a dataset are more than 85%
49 * AGCTU. Test is not case-sensitive and ignores gaps.
51 @Test(groups = { "Functional" })
52 public void testIsNucleotide()
54 SequenceI seq = new Sequence("eightypercent", "agctuAGCPV");
55 assertFalse(Comparison.isNucleotide(new SequenceI[] { seq }));
56 assertFalse(Comparison.isNucleotide(new SequenceI[][] { new SequenceI[]
59 seq = new Sequence("eightyfivepercent", "agctuAGCPVagctuAGCUV");
60 assertFalse(Comparison.isNucleotide(new SequenceI[] { seq }));
62 seq = new Sequence("nineypercent", "agctuAGCgVagctuAGCUV");
63 assertTrue(Comparison.isNucleotide(new SequenceI[] { seq }));
65 seq = new Sequence("eightyfivepercentgapped",
66 "--agc--tuA--GCPV-a---gct-uA-GC---UV");
67 assertFalse(Comparison.isNucleotide(new SequenceI[] { seq }));
69 seq = new Sequence("nineypercentgapped",
70 "ag--ct-u-A---GC---g----Vag--c---tuAGCUV");
71 assertTrue(Comparison.isNucleotide(new SequenceI[] { seq }));
73 seq = new Sequence("allgap", "---------");
74 assertFalse(Comparison.isNucleotide(new SequenceI[] { seq }));
76 seq = new Sequence("DNA", "ACTugGCCAG");
77 SequenceI seq2 = new Sequence("Protein", "FLIMVSPTYW");
81 assertTrue(Comparison.isNucleotide(new SequenceI[] { seq, seq, seq,
82 seq, seq, seq, seq, seq, seq, seq2 }));
83 assertTrue(Comparison.isNucleotide(new SequenceI[][] {
84 new SequenceI[] { seq }, new SequenceI[] { seq, seq, seq },
85 new SequenceI[] { seq, seq, seq, seq, seq, seq2 } }));
89 assertFalse(Comparison.isNucleotide(new SequenceI[] { seq, seq, seq,
90 seq, seq, seq, seq, seq, seq2, seq2 }));
91 assertFalse(Comparison.isNucleotide(new SequenceI[][] { new SequenceI[]
92 { seq }, new SequenceI[] { seq, seq, seq },
93 new SequenceI[] { seq, seq, seq, seq, seq2, seq2, null } }));
95 seq = new Sequence("ProteinThatLooksLikeDNA", "WYATGCCTGAgtcgt");
97 assertTrue(Comparison.isNucleotide(new SequenceI[] { seq }));
99 assertFalse(Comparison.isNucleotide((SequenceI[]) null));
100 assertFalse(Comparison.isNucleotide((SequenceI[][]) null));
104 * Test the percentage identity calculation for two sequences
106 @Test(groups = { "Functional" })
107 public void testPID_includingGaps()
109 String seq1 = "ABCDEF";
110 String seq2 = "abcdef";
111 assertEquals("identical", 100f, Comparison.PID(seq1, seq2), 0.001f);
113 // comparison range defaults to length of first sequence
114 seq2 = "abcdefghijklmnopqrstuvwxyz";
115 assertEquals("identical", 100f, Comparison.PID(seq1, seq2), 0.001f);
117 // 5 identical, 2 gap-gap, 2 gap-residue, 1 mismatch
120 int length = seq1.length();
122 // match gap-residue, match gap-gap: 9/10 identical
123 assertEquals(90f, Comparison.PID(seq1, seq2, 0, length, true, false),
125 // overloaded version of the method signature above:
126 assertEquals(90f, Comparison.PID(seq1, seq2), 0.001f);
128 // don't match gap-residue, match gap-gap: 7/10 identical
129 assertEquals(70f, Comparison.PID(seq1, seq2, 0, length, false, false),
134 * Test the percentage identity calculation for two sequences
136 @Test(groups = { "Functional" })
137 public void testPID_ungappedOnly()
139 // 5 identical, 2 gap-gap, 2 gap-residue, 1 mismatch
140 String seq1 = "a--b-cdefh";
141 String seq2 = "a---bcdefg";
142 int length = seq1.length();
145 * As currently coded, 'ungappedOnly' ignores gap-residue but counts
146 * gap-gap. Is this a bug - should gap-gap also be ignored, giving a PID of
149 * Note also there is no variant of the calculation that penalises
150 * gap-residue i.e. counts it as a mismatch. This would give a score of 5/8
151 * (if we ignore gap-gap) or 5/10 (if we count gap-gap as a match).
153 // match gap-residue, match gap-gap: 7/8 identical
154 assertEquals(87.5f, Comparison.PID(seq1, seq2, 0, length, true, true),
157 // don't match gap-residue with 'ungapped only' - same as above
158 assertEquals(87.5f, Comparison.PID(seq1, seq2, 0, length, false, true),