1 package jalview.analysis.scoremodels;
3 import static org.testng.Assert.assertEquals;
4 import static org.testng.Assert.assertNotNull;
5 import static org.testng.Assert.assertNotSame;
6 import static org.testng.Assert.assertTrue;
7 import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
9 import jalview.api.analysis.SimilarityParamsI;
10 import jalview.io.DataSourceType;
11 import jalview.io.FileParse;
12 import jalview.io.ScoreMatrixFile;
13 import jalview.math.MatrixI;
14 import jalview.schemes.ResidueProperties;
16 import java.io.IOException;
17 import java.net.MalformedURLException;
18 import java.util.Arrays;
20 import org.testng.annotations.Test;
22 public class ScoreMatrixTest
24 @Test(groups = "Functional")
25 public void testConstructor()
27 // note score matrix does not have to be symmetric (though it should be!)
28 float[][] scores = new float[3][];
29 scores[0] = new float[] { 1f, 2f, 3f };
30 scores[1] = new float[] { 4f, 5f, 6f };
31 scores[2] = new float[] { 7f, 8f, 9f };
32 ScoreMatrix sm = new ScoreMatrix("Test", "ABC".toCharArray(), scores);
33 assertEquals(sm.getSize(), 3);
34 assertArrayEquals(scores, sm.getMatrix());
35 assertEquals(sm.getPairwiseScore('A', 'a'), 1f);
36 assertEquals(sm.getPairwiseScore('b', 'c'), 6f);
37 assertEquals(sm.getPairwiseScore('c', 'b'), 8f);
38 assertEquals(sm.getPairwiseScore('A', 'D'), 0f);
39 assertEquals(sm.getMatrixIndex('c'), 2);
40 assertEquals(sm.getMatrixIndex(' '), -1);
44 groups = "Functional",
45 expectedExceptions = { IllegalArgumentException.class })
46 public void testConstructor_matrixTooSmall()
48 float[][] scores = new float[2][];
49 scores[0] = new float[] { 1f, 2f };
50 scores[1] = new float[] { 3f, 4f };
51 new ScoreMatrix("Test", "ABC".toCharArray(), scores);
55 groups = "Functional",
56 expectedExceptions = { IllegalArgumentException.class })
57 public void testConstructor_matrixTooBig()
59 float[][] scores = new float[2][];
60 scores[0] = new float[] { 1f, 2f };
61 scores[1] = new float[] { 3f, 4f };
62 new ScoreMatrix("Test", "A".toCharArray(), scores);
66 groups = "Functional",
67 expectedExceptions = { IllegalArgumentException.class })
68 public void testConstructor_matrixNotSquare()
70 float[][] scores = new float[2][];
71 scores[0] = new float[] { 1f, 2f };
72 scores[1] = new float[] { 3f };
73 new ScoreMatrix("Test", "AB".toCharArray(), scores);
76 @Test(groups = "Functional")
77 public void testBuildSymbolIndex()
79 short[] index = ScoreMatrix.buildSymbolIndex("AX-. yxYp".toCharArray());
81 assertEquals(index.length, 128); // ASCII character set size
83 assertEquals(index['A'], 0);
84 assertEquals(index['a'], 0); // lower-case mapping added
85 assertEquals(index['X'], 1);
86 assertEquals(index['-'], 2);
87 assertEquals(index['.'], 3);
88 assertEquals(index[' '], 4);
89 assertEquals(index['y'], 5); // lower-case override
90 assertEquals(index['x'], 6); // lower-case override
91 assertEquals(index['Y'], 7);
92 assertEquals(index['p'], 8);
93 assertEquals(index['P'], -1); // lower-case doesn't map upper-case
96 * check all unmapped symbols have index for unmapped
98 for (int c = 0; c < index.length; c++)
100 if (!"AaXx-. Yyp".contains(String.valueOf((char) c)))
102 assertEquals(index[c], -1);
108 * check that characters not in the basic ASCII set are simply ignored
110 @Test(groups = "Functional")
111 public void testBuildSymbolIndex_nonAscii()
113 char[] weird = new char[] { 128, 245, 'P' };
114 short[] index = ScoreMatrix.buildSymbolIndex(weird);
115 assertEquals(index.length, 128);
116 assertEquals(index['P'], 2);
117 assertEquals(index['p'], 2);
118 for (int c = 0; c < index.length; c++)
120 if (c != 'P' && c != 'p')
122 assertEquals(index[c], -1);
127 @Test(groups = "Functional")
128 public void testGetMatrix()
130 ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
131 float[][] m = sm.getMatrix();
132 assertEquals(m.length, sm.getSize());
133 assertEquals(m[2][4], -3f);
134 // verify a defensive copy is returned
135 float[][] m2 = sm.getMatrix();
136 assertNotSame(m, m2);
137 assertTrue(Arrays.deepEquals(m, m2));
140 @Test(groups = "Functional")
141 public void testGetMatrixIndex()
143 ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
144 assertEquals(sm.getMatrixIndex('A'), 0);
145 assertEquals(sm.getMatrixIndex('R'), 1);
146 assertEquals(sm.getMatrixIndex('r'), 1);
147 assertEquals(sm.getMatrixIndex('N'), 2);
148 assertEquals(sm.getMatrixIndex('D'), 3);
149 assertEquals(sm.getMatrixIndex('X'), 22);
150 assertEquals(sm.getMatrixIndex('x'), 22);
151 assertEquals(sm.getMatrixIndex(' '), 23);
152 assertEquals(sm.getMatrixIndex('*'), 24);
153 assertEquals(sm.getMatrixIndex('.'), -1);
154 assertEquals(sm.getMatrixIndex('-'), -1);
155 assertEquals(sm.getMatrixIndex('?'), -1);
156 assertEquals(sm.getMatrixIndex((char) 128), -1);
159 @Test(groups = "Functional")
160 public void testGetSize()
162 ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
163 assertEquals(sm.getMatrix().length, sm.getSize());
166 @Test(groups = "Functional")
167 public void testComputePairwiseScores()
170 * NB score matrix assumes space for gap - Jalview converts
171 * space to gap before computing PCA or Tree
173 String[] seqs = new String[] { "FKL", "R D", "QIA", "GWC" };
174 ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
176 MatrixI pairwise = sm.findSimilarities(seqs, SimilarityParams.Jalview);
179 * should be NxN where N = number of sequences
181 assertEquals(pairwise.height(), 4);
182 assertEquals(pairwise.width(), 4);
185 * should be symmetrical (because BLOSUM62 is)
187 for (int i = 0; i < pairwise.height(); i++)
189 for (int j = i + 1; j < pairwise.width(); j++)
191 assertEquals(pairwise.getValue(i, j), pairwise.getValue(j, i),
192 String.format("Not symmetric at [%d, %d]", i, j));
196 * verify expected BLOSUM dot product scores
198 // F.F + K.K + L.L = 6 + 5 + 4 = 15
199 assertEquals(pairwise.getValue(0, 0), 15d);
200 // R.R + -.- + D.D = 5 + 1 + 6 = 12
201 assertEquals(pairwise.getValue(1, 1), 12d);
202 // Q.Q + I.I + A.A = 5 + 4 + 4 = 13
203 assertEquals(pairwise.getValue(2, 2), 13d);
204 // G.G + W.W + C.C = 6 + 11 + 9 = 26
205 assertEquals(pairwise.getValue(3, 3), 26d);
206 // F.R + K.- + L.D = -3 + -4 + -4 = -11
207 assertEquals(pairwise.getValue(0, 1), -11d);
208 // F.Q + K.I + L.A = -3 + -3 + -1 = -7
209 assertEquals(pairwise.getValue(0, 2), -7d);
210 // F.G + K.W + L.C = -3 + -3 + -1 = -7
211 assertEquals(pairwise.getValue(0, 3), -7d);
212 // R.Q + -.I + D.A = 1 + -4 + -2 = -5
213 assertEquals(pairwise.getValue(1, 2), -5d);
214 // R.G + -.W + D.C = -2 + -4 + -3 = -9
215 assertEquals(pairwise.getValue(1, 3), -9d);
216 // Q.G + I.W + A.C = -2 + -3 + 0 = -5
217 assertEquals(pairwise.getValue(2, 3), -5d);
221 * Test that the result of outputMatrix can be reparsed to give an identical
224 * @throws IOException
225 * @throws MalformedURLException
227 @Test(groups = "Functional")
228 public void testOutputMatrix_roundTrip() throws MalformedURLException,
231 ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
232 String output = sm.outputMatrix(false);
233 FileParse fp = new FileParse(output, DataSourceType.PASTE);
234 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
235 ScoreMatrix sm2 = parser.parseMatrix();
237 assertTrue(sm2.equals(sm));
240 @Test(groups = "Functional")
241 public void testEqualsAndHashCode()
243 ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
244 ScoreMatrix sm2 = new ScoreMatrix(sm.getName(), sm.getSymbols()
245 .toCharArray(), sm.getMatrix());
246 assertTrue(sm.equals(sm2));
247 assertEquals(sm.hashCode(), sm2.hashCode());
251 * Tests for scoring options where the longer length of two sequences is used
253 @Test(groups = "Functional")
254 public void testcomputeSimilarity_matchLongestSequence()
256 // TODO params.matchGaps() is not used for ScoreMatrix
257 // - includeGaps is sufficient (there is no denominator)
258 // ==> bespoke parameters only 3 booleans?
260 * for now, using space for gap to match callers of
261 * AlignmentView.getSequenceStrings()
262 * may change this to '-' (with corresponding change to matrices)
264 String s1 = "FR K S";
266 ScoreMatrix blosum = ScoreModels.getInstance().getBlosum62();
269 * score gap-gap and gap-char
270 * shorter sequence treated as if with trailing gaps
271 * score = F^F + R^S + -^- + K^- + -^L + S^-
272 * = 6 + -1 + 1 + -4 + -4 + -4 = -6
274 SimilarityParamsI params = new SimilarityParams(true, true, true, false);
275 assertEquals(blosum.computeSimilarity(s1, s2, params), -6d);
276 // matchGap (arg2) is ignored:
277 params = new SimilarityParams(true, false, true, false);
278 assertEquals(blosum.computeSimilarity(s1, s2, params), -6d);
281 * score gap-char but not gap-gap
282 * score = F^F + R^S + 0 + K^- + -^L + S^-
283 * = 6 + -1 + 0 + -4 + -4 + -4 = -7
285 params = new SimilarityParams(false, true, true, false);
286 assertEquals(blosum.computeSimilarity(s1, s2, params), -7d);
287 // matchGap (arg2) is ignored:
288 params = new SimilarityParams(false, false, true, false);
289 assertEquals(blosum.computeSimilarity(s1, s2, params), -7d);
292 * score gap-gap but not gap-char
293 * score = F^F + R^S + -^- + 0 + 0 + 0
296 params = new SimilarityParams(true, false, false, false);
297 assertEquals(blosum.computeSimilarity(s1, s2, params), 6d);
298 // matchGap (arg2) is ignored:
299 params = new SimilarityParams(true, true, false, false);
300 assertEquals(blosum.computeSimilarity(s1, s2, params), 6d);
303 * score neither gap-gap nor gap-char
304 * score = F^F + R^S + 0 + 0 + 0 + 0
307 params = new SimilarityParams(false, false, false, false);
308 assertEquals(blosum.computeSimilarity(s1, s2, params), 5d);
309 // matchGap (arg2) is ignored:
310 params = new SimilarityParams(false, true, false, false);
311 assertEquals(blosum.computeSimilarity(s1, s2, params), 5d);
315 * Tests for scoring options where only the shorter length of two sequences is
318 @Test(groups = "Functional")
319 public void testcomputeSimilarity_matchShortestSequence()
321 // TODO params.matchGaps() is not used for ScoreMatrix
322 // - includeGaps is sufficient (there is no denominator)
323 // ==> bespoke parameters only 3 booleans?
325 * for now, using space for gap to match callers of
326 * AlignmentView.getSequenceStrings()
327 * may change this to '-' (with corresponding change to matrices)
329 String s1 = "FR K S";
331 ScoreMatrix blosum = ScoreModels.getInstance().getBlosum62();
334 * score gap-gap and gap-char
335 * match shorter sequence only
336 * score = F^F + R^S + -^- + K^- + -^L
337 * = 6 + -1 + 1 + -4 + -4 = -2
339 SimilarityParamsI params = new SimilarityParams(true, true, true, true);
340 assertEquals(blosum.computeSimilarity(s1, s2, params), -2d);
341 // matchGap (arg2) is ignored:
342 params = new SimilarityParams(true, false, true, true);
343 assertEquals(blosum.computeSimilarity(s1, s2, params), -2d);
346 * score gap-char but not gap-gap
347 * score = F^F + R^S + 0 + K^- + -^L
348 * = 6 + -1 + 0 + -4 + -4 = -3
350 params = new SimilarityParams(false, true, true, true);
351 assertEquals(blosum.computeSimilarity(s1, s2, params), -3d);
352 // matchGap (arg2) is ignored:
353 params = new SimilarityParams(false, false, true, true);
354 assertEquals(blosum.computeSimilarity(s1, s2, params), -3d);
357 * score gap-gap but not gap-char
358 * score = F^F + R^S + -^- + 0 + 0
361 params = new SimilarityParams(true, false, false, true);
362 assertEquals(blosum.computeSimilarity(s1, s2, params), 6d);
363 // matchGap (arg2) is ignored:
364 params = new SimilarityParams(true, true, false, true);
365 assertEquals(blosum.computeSimilarity(s1, s2, params), 6d);
368 * score neither gap-gap nor gap-char
369 * score = F^F + R^S + 0 + 0 + 0
372 params = new SimilarityParams(false, false, false, true);
373 assertEquals(blosum.computeSimilarity(s1, s2, params), 5d);
374 // matchGap (arg2) is ignored:
375 params = new SimilarityParams(false, true, false, true);
376 assertEquals(blosum.computeSimilarity(s1, s2, params), 5d);
379 @Test(groups = "Functional")
380 public void testSymmetric()
382 verifySymmetric(ScoreModels.getInstance().getBlosum62());
383 verifySymmetric(ScoreModels.getInstance().getPam250());
384 verifySymmetric(ScoreModels.getInstance().getDefaultModel(false)); // dna
387 private void verifySymmetric(ScoreMatrix sm)
389 float[][] m = sm.getMatrix();
391 for (int row = 0; row < rows; row++)
393 assertEquals(m[row].length, rows);
394 for (int col = 0; col < rows; col++)
396 assertEquals(m[row][col], m[col][row], String.format("%s [%s, %s]",
397 sm.getName(), ResidueProperties.aa[row],
398 ResidueProperties.aa[col]));
404 * A test that just asserts the expected values in the Blosum62 score matrix
406 @Test(groups = "Functional")
407 public void testBlosum62_values()
409 ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
412 * verify expected scores against ARNDCQEGHILKMFPSTWYVBZX
413 * scraped from https://www.ncbi.nlm.nih.gov/Class/FieldGuide/BLOSUM62.txt
415 verifyValues(sm, 'A', new float[] { 4, -1, -2, -2, 0, -1, -1, 0, -2,
417 -1, -1, -1, -2, -1, 1, 0, -3, -2, 0, -2, -1, 0 });
418 verifyValues(sm, 'R', new float[] { -1, 5, 0, -2, -3, 1, 0, -2, 0, -3,
419 -2, 2, -1, -3, -2, -1, -1, -3, -2, -3, -1, 0, -1 });
420 verifyValues(sm, 'N', new float[] { -2, 0, 6, 1, -3, 0, 0, 0, 1, -3,
422 0, -2, -3, -2, 1, 0, -4, -2, -3, 3, 0, -1 });
423 verifyValues(sm, 'D', new float[] { -2, -2, 1, 6, -3, 0, 2, -1, -1, -3,
424 -4, -1, -3, -3, -1, 0, -1, -4, -3, -3, 4, 1, -1 });
425 verifyValues(sm, 'C', new float[] { 0, -3, -3, -3, 9, -3, -4, -3, -3,
427 -1, -3, -1, -2, -3, -1, -1, -2, -2, -1, -3, -3, -2 });
428 verifyValues(sm, 'Q', new float[] { -1, 1, 0, 0, -3, 5, 2, -2, 0, -3,
430 1, 0, -3, -1, 0, -1, -2, -1, -2, 0, 3, -1 });
431 verifyValues(sm, 'E', new float[] { -1, 0, 0, 2, -4, 2, 5, -2, 0, -3,
433 1, -2, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1 });
434 verifyValues(sm, 'G', new float[] { 0, -2, 0, -1, -3, -2, -2, 6, -2,
436 -4, -2, -3, -3, -2, 0, -2, -2, -3, -3, -1, -2, -1 });
437 verifyValues(sm, 'H', new float[] { -2, 0, 1, -1, -3, 0, 0, -2, 8, -3,
438 -3, -1, -2, -1, -2, -1, -2, -2, 2, -3, 0, 0, -1 });
439 verifyValues(sm, 'I', new float[] { -1, -3, -3, -3, -1, -3, -3, -4, -3,
440 4, 2, -3, 1, 0, -3, -2, -1, -3, -1, 3, -3, -3, -1 });
441 verifyValues(sm, 'L', new float[] { -1, -2, -3, -4, -1, -2, -3, -4, -3,
442 2, 4, -2, 2, 0, -3, -2, -1, -2, -1, 1, -4, -3, -1 });
443 verifyValues(sm, 'K', new float[] { -1, 2, 0, -1, -3, 1, 1, -2, -1, -3,
444 -2, 5, -1, -3, -1, 0, -1, -3, -2, -2, 0, 1, -1 });
445 verifyValues(sm, 'M', new float[] { -1, -1, -2, -3, -1, 0, -2, -3, -2,
447 2, -1, 5, 0, -2, -1, -1, -1, -1, 1, -3, -1, -1 });
448 verifyValues(sm, 'F', new float[] { -2, -3, -3, -3, -2, -3, -3, -3, -1,
449 0, 0, -3, 0, 6, -4, -2, -2, 1, 3, -1, -3, -3, -1 });
450 verifyValues(sm, 'P', new float[] { -1, -2, -2, -1, -3, -1, -1, -2, -2,
451 -3, -3, -1, -2, -4, 7, -1, -1, -4, -3, -2, -2, -1, -2 });
452 verifyValues(sm, 'S', new float[] { 1, -1, 1, 0, -1, 0, 0, 0, -1, -2,
454 0, -1, -2, -1, 4, 1, -3, -2, -2, 0, 0, 0 });
455 verifyValues(sm, 'T', new float[] { 0, -1, 0, -1, -1, -1, -1, -2, -2,
457 -1, -1, -1, -2, -1, 1, 5, -2, -2, 0, -1, -1, 0 });
458 verifyValues(sm, 'W', new float[] { -3, -3, -4, -4, -2, -2, -3, -2, -2,
459 -3, -2, -3, -1, 1, -4, -3, -2, 11, 2, -3, -4, -3, -2 });
460 verifyValues(sm, 'Y', new float[] { -2, -2, -2, -3, -2, -1, -2, -3, 2,
461 -1, -1, -2, -1, 3, -3, -2, -2, 2, 7, -1, -3, -2, -1 });
462 verifyValues(sm, 'V', new float[] { 0, -3, -3, -3, -1, -2, -2, -3, -3,
464 1, -2, 1, -1, -2, -2, 0, -3, -1, 4, -3, -2, -1 });
465 verifyValues(sm, 'B', new float[] { -2, -1, 3, 4, -3, 0, 1, -1, 0, -3,
466 -4, 0, -3, -3, -2, 0, -1, -4, -3, -3, 4, 1, -1 });
467 verifyValues(sm, 'Z', new float[] { -1, 0, 0, 1, -3, 3, 4, -2, 0, -3,
469 1, -1, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1 });
470 verifyValues(sm, 'X', new float[] { 0, -1, -1, -1, -2, -1, -1, -1, -1,
471 -1, -1, -1, -1, -1, -2, 0, 0, -2, -1, -1, -1, -1, -1 });
475 * Helper method to check pairwise scores for one residue
480 * score values against 'res', in ResidueProperties.aaIndex order
482 private void verifyValues(ScoreMatrix sm, char res, float[] expected)
484 for (int j = 0; j < expected.length; j++)
486 char c2 = ResidueProperties.aa[j].charAt(0);
487 assertEquals(sm.getPairwiseScore(res, c2), expected[j],
488 String.format("%s->%s", res, c2));