1 package jalview.analysis.scoremodels;
3 import static org.testng.Assert.assertEquals;
4 import static org.testng.Assert.assertNotNull;
5 import static org.testng.Assert.assertNotSame;
6 import static org.testng.Assert.assertTrue;
7 import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
9 import jalview.api.analysis.SimilarityParamsI;
10 import jalview.io.DataSourceType;
11 import jalview.io.FileParse;
12 import jalview.io.ScoreMatrixFile;
13 import jalview.math.MatrixI;
14 import jalview.schemes.ResidueProperties;
16 import java.io.IOException;
17 import java.net.MalformedURLException;
18 import java.util.Arrays;
20 import org.testng.annotations.Test;
22 public class ScoreMatrixTest
24 @Test(groups = "Functional")
25 public void testConstructor()
27 // note score matrix does not have to be symmetric (though it should be!)
28 float[][] scores = new float[3][];
29 scores[0] = new float[] { 1f, 2f, 3f };
30 scores[1] = new float[] { 4f, 5f, 6f };
31 scores[2] = new float[] { 7f, 8f, 9f };
32 ScoreMatrix sm = new ScoreMatrix("Test", "ABC".toCharArray(), scores);
33 assertEquals(sm.getSize(), 3);
34 assertArrayEquals(scores, sm.getMatrix());
35 assertEquals(sm.getPairwiseScore('A', 'a'), 1f);
36 assertEquals(sm.getPairwiseScore('b', 'c'), 6f);
37 assertEquals(sm.getPairwiseScore('c', 'b'), 8f);
38 assertEquals(sm.getPairwiseScore('A', 'D'), 0f);
39 assertEquals(sm.getMatrixIndex('c'), 2);
40 assertEquals(sm.getMatrixIndex(' '), -1);
42 assertEquals(sm.getGapIndex(), -1); // no gap symbol
46 groups = "Functional",
47 expectedExceptions = { IllegalArgumentException.class })
48 public void testConstructor_matrixTooSmall()
50 float[][] scores = new float[2][];
51 scores[0] = new float[] { 1f, 2f };
52 scores[1] = new float[] { 3f, 4f };
53 new ScoreMatrix("Test", "ABC".toCharArray(), scores);
57 groups = "Functional",
58 expectedExceptions = { IllegalArgumentException.class })
59 public void testConstructor_matrixTooBig()
61 float[][] scores = new float[2][];
62 scores[0] = new float[] { 1f, 2f };
63 scores[1] = new float[] { 3f, 4f };
64 new ScoreMatrix("Test", "A".toCharArray(), scores);
68 groups = "Functional",
69 expectedExceptions = { IllegalArgumentException.class })
70 public void testConstructor_matrixNotSquare()
72 float[][] scores = new float[2][];
73 scores[0] = new float[] { 1f, 2f };
74 scores[1] = new float[] { 3f };
75 new ScoreMatrix("Test", "AB".toCharArray(), scores);
78 @Test(groups = "Functional")
79 public void testBuildSymbolIndex()
81 float[][] scores = new float[2][];
82 scores[0] = new float[] { 1f, 2f };
83 scores[1] = new float[] { 3f, 4f };
84 ScoreMatrix sm = new ScoreMatrix("Test", new char[] { 'A', '.' },
86 short[] index = sm.buildSymbolIndex("AX-yxYp".toCharArray());
88 assertEquals(index.length, 128); // ASCII character set size
90 assertEquals(index['A'], 0);
91 assertEquals(index['a'], 0); // lower-case mapping added
92 assertEquals(index['X'], 1);
93 assertEquals(index['-'], 2);
94 assertEquals(index['y'], 3); // lower-case override
95 assertEquals(index['x'], 4); // lower-case override
96 assertEquals(index['Y'], 5);
97 assertEquals(index['p'], 6);
98 assertEquals(index['P'], -1); // lower-case doesn't map upper-case
101 * check all unmapped symbols have index for unmapped
103 for (int c = 0; c < index.length; c++)
105 if (!"AaXx-. Yyp".contains(String.valueOf((char) c)))
107 assertEquals(index[c], -1);
113 * check that characters not in the basic ASCII set are simply ignored
115 @Test(groups = "Functional")
116 public void testBuildSymbolIndex_nonAscii()
118 float[][] scores = new float[2][];
119 scores[0] = new float[] { 1f, 2f };
120 scores[1] = new float[] { 3f, 4f };
121 ScoreMatrix sm = new ScoreMatrix("Test", new char[] { 'A', '.' },
123 char[] weird = new char[] { 128, 245, 'P' };
124 short[] index = sm.buildSymbolIndex(weird);
125 assertEquals(index.length, 128);
126 assertEquals(index['P'], 2);
127 assertEquals(index['p'], 2);
128 for (int c = 0; c < index.length; c++)
130 if (c != 'P' && c != 'p')
132 assertEquals(index[c], -1);
137 @Test(groups = "Functional")
138 public void testGetMatrix()
140 ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
141 float[][] m = sm.getMatrix();
142 assertEquals(m.length, sm.getSize());
143 assertEquals(m[2][4], -3f);
144 // verify a defensive copy is returned
145 float[][] m2 = sm.getMatrix();
146 assertNotSame(m, m2);
147 assertTrue(Arrays.deepEquals(m, m2));
150 @Test(groups = "Functional")
151 public void testGetMatrixIndex()
153 ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
154 assertEquals(sm.getMatrixIndex('A'), 0);
155 assertEquals(sm.getMatrixIndex('R'), 1);
156 assertEquals(sm.getMatrixIndex('r'), 1);
157 assertEquals(sm.getMatrixIndex('N'), 2);
158 assertEquals(sm.getMatrixIndex('D'), 3);
159 assertEquals(sm.getMatrixIndex('X'), 22);
160 assertEquals(sm.getMatrixIndex('x'), 22);
161 assertEquals(sm.getMatrixIndex('-'), 23);
162 assertEquals(sm.getMatrixIndex('*'), 24);
163 assertEquals(sm.getMatrixIndex('.'), -1);
164 assertEquals(sm.getMatrixIndex(' '), -1);
165 assertEquals(sm.getMatrixIndex('?'), -1);
166 assertEquals(sm.getMatrixIndex((char) 128), -1);
169 @Test(groups = "Functional")
170 public void testGetGapIndex()
172 ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
173 assertEquals(sm.getGapIndex(), 23);
176 @Test(groups = "Functional")
177 public void testGetSize()
179 ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
180 assertEquals(sm.getMatrix().length, sm.getSize());
183 @Test(groups = "Functional")
184 public void testComputePairwiseScores()
187 * NB score matrix expects '-' for gap
189 String[] seqs = new String[] { "FKL", "R-D", "QIA", "GWC" };
190 ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
192 MatrixI pairwise = sm.findSimilarities(seqs, SimilarityParams.Jalview);
195 * should be NxN where N = number of sequences
197 assertEquals(pairwise.height(), 4);
198 assertEquals(pairwise.width(), 4);
201 * should be symmetrical (because BLOSUM62 is)
203 for (int i = 0; i < pairwise.height(); i++)
205 for (int j = i + 1; j < pairwise.width(); j++)
207 assertEquals(pairwise.getValue(i, j), pairwise.getValue(j, i),
208 String.format("Not symmetric at [%d, %d]", i, j));
212 * verify expected BLOSUM dot product scores
214 // F.F + K.K + L.L = 6 + 5 + 4 = 15
215 assertEquals(pairwise.getValue(0, 0), 15d);
216 // R.R + -.- + D.D = 5 + 1 + 6 = 12
217 assertEquals(pairwise.getValue(1, 1), 12d);
218 // Q.Q + I.I + A.A = 5 + 4 + 4 = 13
219 assertEquals(pairwise.getValue(2, 2), 13d);
220 // G.G + W.W + C.C = 6 + 11 + 9 = 26
221 assertEquals(pairwise.getValue(3, 3), 26d);
222 // F.R + K.- + L.D = -3 + -4 + -4 = -11
223 assertEquals(pairwise.getValue(0, 1), -11d);
224 // F.Q + K.I + L.A = -3 + -3 + -1 = -7
225 assertEquals(pairwise.getValue(0, 2), -7d);
226 // F.G + K.W + L.C = -3 + -3 + -1 = -7
227 assertEquals(pairwise.getValue(0, 3), -7d);
228 // R.Q + -.I + D.A = 1 + -4 + -2 = -5
229 assertEquals(pairwise.getValue(1, 2), -5d);
230 // R.G + -.W + D.C = -2 + -4 + -3 = -9
231 assertEquals(pairwise.getValue(1, 3), -9d);
232 // Q.G + I.W + A.C = -2 + -3 + 0 = -5
233 assertEquals(pairwise.getValue(2, 3), -5d);
237 * Test that the result of outputMatrix can be reparsed to give an identical
240 * @throws IOException
241 * @throws MalformedURLException
243 @Test(groups = "Functional")
244 public void testOutputMatrix_roundTrip() throws MalformedURLException,
247 ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
248 String output = sm.outputMatrix(false);
249 FileParse fp = new FileParse(output, DataSourceType.PASTE);
250 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
251 ScoreMatrix sm2 = parser.parseMatrix();
253 assertTrue(sm2.equals(sm));
256 @Test(groups = "Functional")
257 public void testEqualsAndHashCode()
259 ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
260 ScoreMatrix sm2 = new ScoreMatrix(sm.getName(), sm.getSymbols()
261 .toCharArray(), sm.getMatrix());
262 assertTrue(sm.equals(sm2));
263 assertEquals(sm.hashCode(), sm2.hashCode());
267 * Tests for scoring options where the longer length of two sequences is used
269 @Test(groups = "Functional")
270 public void testcomputeSimilarity_matchLongestSequence()
273 * ScoreMatrix expects '-' for gaps
275 String s1 = "FR-K-S";
277 ScoreMatrix blosum = ScoreModels.getInstance().getBlosum62();
280 * score gap-gap and gap-char
281 * shorter sequence treated as if with trailing gaps
282 * score = F^F + R^S + -^- + K^- + -^L + S^-
283 * = 6 + -1 + 1 + -4 + -4 + -4 = -6
285 SimilarityParamsI params = new SimilarityParams(true, true, true, false);
286 assertEquals(blosum.computeSimilarity(s1, s2, params), -6d);
287 // matchGap (arg2) is ignored:
288 params = new SimilarityParams(true, false, true, false);
289 assertEquals(blosum.computeSimilarity(s1, s2, params), -6d);
292 * score gap-char but not gap-gap
293 * score = F^F + R^S + 0 + K^- + -^L + S^-
294 * = 6 + -1 + 0 + -4 + -4 + -4 = -7
296 params = new SimilarityParams(false, true, true, false);
297 assertEquals(blosum.computeSimilarity(s1, s2, params), -7d);
298 // matchGap (arg2) is ignored:
299 params = new SimilarityParams(false, false, true, false);
300 assertEquals(blosum.computeSimilarity(s1, s2, params), -7d);
303 * score gap-gap but not gap-char
304 * score = F^F + R^S + -^- + 0 + 0 + 0
307 params = new SimilarityParams(true, false, false, false);
308 assertEquals(blosum.computeSimilarity(s1, s2, params), 6d);
309 // matchGap (arg2) is ignored:
310 params = new SimilarityParams(true, true, false, false);
311 assertEquals(blosum.computeSimilarity(s1, s2, params), 6d);
314 * score neither gap-gap nor gap-char
315 * score = F^F + R^S + 0 + 0 + 0 + 0
318 params = new SimilarityParams(false, false, false, false);
319 assertEquals(blosum.computeSimilarity(s1, s2, params), 5d);
320 // matchGap (arg2) is ignored:
321 params = new SimilarityParams(false, true, false, false);
322 assertEquals(blosum.computeSimilarity(s1, s2, params), 5d);
326 * Tests for scoring options where only the shorter length of two sequences is
329 @Test(groups = "Functional")
330 public void testcomputeSimilarity_matchShortestSequence()
333 * ScoreMatrix expects '-' for gaps
335 String s1 = "FR-K-S";
337 ScoreMatrix blosum = ScoreModels.getInstance().getBlosum62();
340 * score gap-gap and gap-char
341 * match shorter sequence only
342 * score = F^F + R^S + -^- + K^- + -^L
343 * = 6 + -1 + 1 + -4 + -4 = -2
345 SimilarityParamsI params = new SimilarityParams(true, true, true, true);
346 assertEquals(blosum.computeSimilarity(s1, s2, params), -2d);
347 // matchGap (arg2) is ignored:
348 params = new SimilarityParams(true, false, true, true);
349 assertEquals(blosum.computeSimilarity(s1, s2, params), -2d);
352 * score gap-char but not gap-gap
353 * score = F^F + R^S + 0 + K^- + -^L
354 * = 6 + -1 + 0 + -4 + -4 = -3
356 params = new SimilarityParams(false, true, true, true);
357 assertEquals(blosum.computeSimilarity(s1, s2, params), -3d);
358 // matchGap (arg2) is ignored:
359 params = new SimilarityParams(false, false, true, true);
360 assertEquals(blosum.computeSimilarity(s1, s2, params), -3d);
363 * score gap-gap but not gap-char
364 * score = F^F + R^S + -^- + 0 + 0
367 params = new SimilarityParams(true, false, false, true);
368 assertEquals(blosum.computeSimilarity(s1, s2, params), 6d);
369 // matchGap (arg2) is ignored:
370 params = new SimilarityParams(true, true, false, true);
371 assertEquals(blosum.computeSimilarity(s1, s2, params), 6d);
374 * score neither gap-gap nor gap-char
375 * score = F^F + R^S + 0 + 0 + 0
378 params = new SimilarityParams(false, false, false, true);
379 assertEquals(blosum.computeSimilarity(s1, s2, params), 5d);
380 // matchGap (arg2) is ignored:
381 params = new SimilarityParams(false, true, false, true);
382 assertEquals(blosum.computeSimilarity(s1, s2, params), 5d);
385 @Test(groups = "Functional")
386 public void testSymmetric()
388 verifySymmetric(ScoreModels.getInstance().getBlosum62());
389 verifySymmetric(ScoreModels.getInstance().getPam250());
390 verifySymmetric(ScoreModels.getInstance().getDefaultModel(false)); // dna
393 private void verifySymmetric(ScoreMatrix sm)
395 float[][] m = sm.getMatrix();
397 for (int row = 0; row < rows; row++)
399 assertEquals(m[row].length, rows);
400 for (int col = 0; col < rows; col++)
402 assertEquals(m[row][col], m[col][row], String.format("%s [%s, %s]",
403 sm.getName(), ResidueProperties.aa[row],
404 ResidueProperties.aa[col]));
410 * A test that just asserts the expected values in the Blosum62 score matrix
412 @Test(groups = "Functional")
413 public void testBlosum62_values()
415 ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
418 * verify expected scores against ARNDCQEGHILKMFPSTWYVBZX
419 * scraped from https://www.ncbi.nlm.nih.gov/Class/FieldGuide/BLOSUM62.txt
421 verifyValues(sm, 'A', new float[] { 4, -1, -2, -2, 0, -1, -1, 0, -2,
423 -1, -1, -1, -2, -1, 1, 0, -3, -2, 0, -2, -1, 0 });
424 verifyValues(sm, 'R', new float[] { -1, 5, 0, -2, -3, 1, 0, -2, 0, -3,
425 -2, 2, -1, -3, -2, -1, -1, -3, -2, -3, -1, 0, -1 });
426 verifyValues(sm, 'N', new float[] { -2, 0, 6, 1, -3, 0, 0, 0, 1, -3,
428 0, -2, -3, -2, 1, 0, -4, -2, -3, 3, 0, -1 });
429 verifyValues(sm, 'D', new float[] { -2, -2, 1, 6, -3, 0, 2, -1, -1, -3,
430 -4, -1, -3, -3, -1, 0, -1, -4, -3, -3, 4, 1, -1 });
431 verifyValues(sm, 'C', new float[] { 0, -3, -3, -3, 9, -3, -4, -3, -3,
433 -1, -3, -1, -2, -3, -1, -1, -2, -2, -1, -3, -3, -2 });
434 verifyValues(sm, 'Q', new float[] { -1, 1, 0, 0, -3, 5, 2, -2, 0, -3,
436 1, 0, -3, -1, 0, -1, -2, -1, -2, 0, 3, -1 });
437 verifyValues(sm, 'E', new float[] { -1, 0, 0, 2, -4, 2, 5, -2, 0, -3,
439 1, -2, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1 });
440 verifyValues(sm, 'G', new float[] { 0, -2, 0, -1, -3, -2, -2, 6, -2,
442 -4, -2, -3, -3, -2, 0, -2, -2, -3, -3, -1, -2, -1 });
443 verifyValues(sm, 'H', new float[] { -2, 0, 1, -1, -3, 0, 0, -2, 8, -3,
444 -3, -1, -2, -1, -2, -1, -2, -2, 2, -3, 0, 0, -1 });
445 verifyValues(sm, 'I', new float[] { -1, -3, -3, -3, -1, -3, -3, -4, -3,
446 4, 2, -3, 1, 0, -3, -2, -1, -3, -1, 3, -3, -3, -1 });
447 verifyValues(sm, 'L', new float[] { -1, -2, -3, -4, -1, -2, -3, -4, -3,
448 2, 4, -2, 2, 0, -3, -2, -1, -2, -1, 1, -4, -3, -1 });
449 verifyValues(sm, 'K', new float[] { -1, 2, 0, -1, -3, 1, 1, -2, -1, -3,
450 -2, 5, -1, -3, -1, 0, -1, -3, -2, -2, 0, 1, -1 });
451 verifyValues(sm, 'M', new float[] { -1, -1, -2, -3, -1, 0, -2, -3, -2,
453 2, -1, 5, 0, -2, -1, -1, -1, -1, 1, -3, -1, -1 });
454 verifyValues(sm, 'F', new float[] { -2, -3, -3, -3, -2, -3, -3, -3, -1,
455 0, 0, -3, 0, 6, -4, -2, -2, 1, 3, -1, -3, -3, -1 });
456 verifyValues(sm, 'P', new float[] { -1, -2, -2, -1, -3, -1, -1, -2, -2,
457 -3, -3, -1, -2, -4, 7, -1, -1, -4, -3, -2, -2, -1, -2 });
458 verifyValues(sm, 'S', new float[] { 1, -1, 1, 0, -1, 0, 0, 0, -1, -2,
460 0, -1, -2, -1, 4, 1, -3, -2, -2, 0, 0, 0 });
461 verifyValues(sm, 'T', new float[] { 0, -1, 0, -1, -1, -1, -1, -2, -2,
463 -1, -1, -1, -2, -1, 1, 5, -2, -2, 0, -1, -1, 0 });
464 verifyValues(sm, 'W', new float[] { -3, -3, -4, -4, -2, -2, -3, -2, -2,
465 -3, -2, -3, -1, 1, -4, -3, -2, 11, 2, -3, -4, -3, -2 });
466 verifyValues(sm, 'Y', new float[] { -2, -2, -2, -3, -2, -1, -2, -3, 2,
467 -1, -1, -2, -1, 3, -3, -2, -2, 2, 7, -1, -3, -2, -1 });
468 verifyValues(sm, 'V', new float[] { 0, -3, -3, -3, -1, -2, -2, -3, -3,
470 1, -2, 1, -1, -2, -2, 0, -3, -1, 4, -3, -2, -1 });
471 verifyValues(sm, 'B', new float[] { -2, -1, 3, 4, -3, 0, 1, -1, 0, -3,
472 -4, 0, -3, -3, -2, 0, -1, -4, -3, -3, 4, 1, -1 });
473 verifyValues(sm, 'Z', new float[] { -1, 0, 0, 1, -3, 3, 4, -2, 0, -3,
475 1, -1, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1 });
476 verifyValues(sm, 'X', new float[] { 0, -1, -1, -1, -2, -1, -1, -1, -1,
477 -1, -1, -1, -1, -1, -2, 0, 0, -2, -1, -1, -1, -1, -1 });
481 * Helper method to check pairwise scores for one residue
486 * score values against 'res', in ResidueProperties.aaIndex order
488 private void verifyValues(ScoreMatrix sm, char res, float[] expected)
490 for (int j = 0; j < expected.length; j++)
492 char c2 = ResidueProperties.aa[j].charAt(0);
493 assertEquals(sm.getPairwiseScore(res, c2), expected[j],
494 String.format("%s->%s", res, c2));
498 @Test(groups = "Functional")
499 public void testConstructor_gapDash()
501 float[][] scores = new float[2][];
502 scores[0] = new float[] { 1f, 2f };
503 scores[1] = new float[] { 4f, 5f };
504 ScoreMatrix sm = new ScoreMatrix("Test", new char[] { 'A', '-' },
506 assertEquals(sm.getSize(), 2);
507 assertArrayEquals(scores, sm.getMatrix());
508 assertEquals(sm.getPairwiseScore('A', 'a'), 1f);
509 assertEquals(sm.getPairwiseScore('A', 'A'), 1f);
510 assertEquals(sm.getPairwiseScore('a', '-'), 2f);
511 assertEquals(sm.getPairwiseScore('-', 'A'), 4f);
512 assertEquals(sm.getMatrixIndex('a'), 0);
513 assertEquals(sm.getMatrixIndex('A'), 0);
514 assertEquals(sm.getMatrixIndex('-'), 1);
515 assertEquals(sm.getMatrixIndex(' '), -1);
516 assertEquals(sm.getMatrixIndex('.'), -1);
518 assertEquals(sm.getGapIndex(), 1);