* Test the percentage identity calculation for two sequences
*/
@Test(groups = { "Functional" })
- public void testPID()
+ public void testPID_includingGaps()
{
String seq1 = "ABCDEF";
String seq2 = "abcdef";
assertEquals(90f, Comparison.PID(seq1, seq2), 0.001f);
// don't match gap-residue, match gap-gap: 7/10 identical
- assertEquals(70f,
- Comparison.PID(seq1, seq2, 0, seq1.length(), false, false),
+ assertEquals(70f, Comparison.PID(seq1, seq2, 0, length, false, false),
0.001f);
+ }
+ /**
+ * Test the percentage identity calculation for two sequences
+ */
+ @Test(groups = { "Functional" })
+ public void testPID_ungappedOnly()
+ {
+ // 5 identical, 2 gap-gap, 2 gap-residue, 1 mismatch
+ String seq1 = "a--b-cdefh";
+ String seq2 = "a---bcdefg";
+ int length = seq1.length();
+
+ /*
+ * As currently coded, 'ungappedOnly' ignores gap-residue but counts
+ * gap-gap. Is this a bug - should gap-gap also be ignored, giving a PID of
+ * 5/6?
+ *
+ * Note also there is no variant of the calculation that penalises
+ * gap-residue i.e. counts it as a mismatch. This would give a score of 5/8
+ * (if we ignore gap-gap) or 5/10 (if we count gap-gap as a match).
+ */
// match gap-residue, match gap-gap: 7/8 identical
- assertEquals(87.5f,
- Comparison.PID(seq1, seq2, 0, seq1.length(), true, true),
+ assertEquals(87.5f, Comparison.PID(seq1, seq2, 0, length, true, true),
0.001f);
// don't match gap-residue with 'ungapped only' - same as above
- assertEquals(87.5f,
- Comparison.PID(seq1, seq2, 0, seq1.length(), false, true),
+ assertEquals(87.5f, Comparison.PID(seq1, seq2, 0, length, false, true),
0.001f);
}
}