1 package jalview.analysis.scoremodels;
3 import static org.testng.Assert.assertEquals;
4 import static org.testng.Assert.assertNotNull;
5 import static org.testng.Assert.assertNotSame;
6 import static org.testng.Assert.assertTrue;
7 import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
9 import jalview.api.analysis.SimilarityParamsI;
10 import jalview.io.DataSourceType;
11 import jalview.io.FileParse;
12 import jalview.io.ScoreMatrixFile;
13 import jalview.math.MatrixI;
15 import java.io.IOException;
16 import java.net.MalformedURLException;
17 import java.util.Arrays;
19 import org.testng.annotations.Test;
21 public class ScoreMatrixTest
23 @Test(groups = "Functional")
24 public void testConstructor()
26 // note score matrix does not have to be symmetric (though it should be!)
27 float[][] scores = new float[3][];
28 scores[0] = new float[] { 1f, 2f, 3f };
29 scores[1] = new float[] { 4f, 5f, 6f };
30 scores[2] = new float[] { 7f, 8f, 9f };
31 ScoreMatrix sm = new ScoreMatrix("Test", "ABC".toCharArray(), scores);
32 assertEquals(sm.getSize(), 3);
33 assertArrayEquals(scores, sm.getMatrix());
34 assertEquals(sm.getPairwiseScore('A', 'a'), 1f);
35 assertEquals(sm.getPairwiseScore('b', 'c'), 6f);
36 assertEquals(sm.getPairwiseScore('c', 'b'), 8f);
37 assertEquals(sm.getPairwiseScore('A', 'D'), 0f);
38 assertEquals(sm.getMatrixIndex('c'), 2);
39 assertEquals(sm.getMatrixIndex(' '), -1);
43 groups = "Functional",
44 expectedExceptions = { IllegalArgumentException.class })
45 public void testConstructor_matrixTooSmall()
47 float[][] scores = new float[2][];
48 scores[0] = new float[] { 1f, 2f };
49 scores[1] = new float[] { 3f, 4f };
50 new ScoreMatrix("Test", "ABC".toCharArray(), scores);
54 groups = "Functional",
55 expectedExceptions = { IllegalArgumentException.class })
56 public void testConstructor_matrixTooBig()
58 float[][] scores = new float[2][];
59 scores[0] = new float[] { 1f, 2f };
60 scores[1] = new float[] { 3f, 4f };
61 new ScoreMatrix("Test", "A".toCharArray(), scores);
65 groups = "Functional",
66 expectedExceptions = { IllegalArgumentException.class })
67 public void testConstructor_matrixNotSquare()
69 float[][] scores = new float[2][];
70 scores[0] = new float[] { 1f, 2f };
71 scores[1] = new float[] { 3f };
72 new ScoreMatrix("Test", "AB".toCharArray(), scores);
75 @Test(groups = "Functional")
76 public void testBuildSymbolIndex()
78 short[] index = ScoreMatrix.buildSymbolIndex("AX-. yxYp".toCharArray());
80 assertEquals(index.length, 128); // ASCII character set size
82 assertEquals(index['A'], 0);
83 assertEquals(index['a'], 0); // lower-case mapping added
84 assertEquals(index['X'], 1);
85 assertEquals(index['-'], 2);
86 assertEquals(index['.'], 3);
87 assertEquals(index[' '], 4);
88 assertEquals(index['y'], 5); // lower-case override
89 assertEquals(index['x'], 6); // lower-case override
90 assertEquals(index['Y'], 7);
91 assertEquals(index['p'], 8);
92 assertEquals(index['P'], -1); // lower-case doesn't map upper-case
95 * check all unmapped symbols have index for unmapped
97 for (int c = 0; c < index.length; c++)
99 if (!"AaXx-. Yyp".contains(String.valueOf((char) c)))
101 assertEquals(index[c], -1);
107 * check that characters not in the basic ASCII set are simply ignored
109 @Test(groups = "Functional")
110 public void testBuildSymbolIndex_nonAscii()
112 char[] weird = new char[] { 128, 245, 'P' };
113 short[] index = ScoreMatrix.buildSymbolIndex(weird);
114 assertEquals(index.length, 128);
115 assertEquals(index['P'], 2);
116 assertEquals(index['p'], 2);
117 for (int c = 0; c < index.length; c++)
119 if (c != 'P' && c != 'p')
121 assertEquals(index[c], -1);
126 @Test(groups = "Functional")
127 public void testGetMatrix()
129 ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
130 float[][] m = sm.getMatrix();
131 assertEquals(m.length, sm.getSize());
132 assertEquals(m[2][4], -3f);
133 // verify a defensive copy is returned
134 float[][] m2 = sm.getMatrix();
135 assertNotSame(m, m2);
136 assertTrue(Arrays.deepEquals(m, m2));
139 @Test(groups = "Functional")
140 public void testGetMatrixIndex()
142 ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
143 assertEquals(sm.getMatrixIndex('A'), 0);
144 assertEquals(sm.getMatrixIndex('R'), 1);
145 assertEquals(sm.getMatrixIndex('r'), 1);
146 assertEquals(sm.getMatrixIndex('N'), 2);
147 assertEquals(sm.getMatrixIndex('D'), 3);
148 assertEquals(sm.getMatrixIndex('X'), 22);
149 assertEquals(sm.getMatrixIndex('x'), 22);
150 assertEquals(sm.getMatrixIndex(' '), 23);
151 assertEquals(sm.getMatrixIndex('*'), 24);
152 assertEquals(sm.getMatrixIndex('.'), -1);
153 assertEquals(sm.getMatrixIndex('-'), -1);
154 assertEquals(sm.getMatrixIndex('?'), -1);
155 assertEquals(sm.getMatrixIndex((char) 128), -1);
158 @Test(groups = "Functional")
159 public void testGetSize()
161 ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
162 assertEquals(sm.getMatrix().length, sm.getSize());
165 @Test(groups = "Functional")
166 public void testComputePairwiseScores()
169 * NB score matrix assumes space for gap - Jalview converts
170 * space to gap before computing PCA or Tree
172 String[] seqs = new String[] { "FKL", "R D", "QIA", "GWC" };
173 ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
175 MatrixI pairwise = sm.findSimilarities(seqs, SimilarityParams.Jalview);
178 * should be NxN where N = number of sequences
180 assertEquals(pairwise.height(), 4);
181 assertEquals(pairwise.width(), 4);
184 * should be symmetrical (because BLOSUM62 is)
186 for (int i = 0; i < pairwise.height(); i++)
188 for (int j = i + 1; j < pairwise.width(); j++)
190 assertEquals(pairwise.getValue(i, j), pairwise.getValue(j, i),
191 String.format("Not symmetric at [%d, %d]", i, j));
195 * verify expected BLOSUM dot product scores
197 // F.F + K.K + L.L = 6 + 5 + 4 = 15
198 assertEquals(pairwise.getValue(0, 0), 15d);
199 // R.R + -.- + D.D = 5 + 1 + 6 = 12
200 assertEquals(pairwise.getValue(1, 1), 12d);
201 // Q.Q + I.I + A.A = 5 + 4 + 4 = 13
202 assertEquals(pairwise.getValue(2, 2), 13d);
203 // G.G + W.W + C.C = 6 + 11 + 9 = 26
204 assertEquals(pairwise.getValue(3, 3), 26d);
205 // F.R + K.- + L.D = -3 + -4 + -4 = -11
206 assertEquals(pairwise.getValue(0, 1), -11d);
207 // F.Q + K.I + L.A = -3 + -3 + -1 = -7
208 assertEquals(pairwise.getValue(0, 2), -7d);
209 // F.G + K.W + L.C = -3 + -3 + -1 = -7
210 assertEquals(pairwise.getValue(0, 3), -7d);
211 // R.Q + -.I + D.A = 1 + -4 + -2 = -5
212 assertEquals(pairwise.getValue(1, 2), -5d);
213 // R.G + -.W + D.C = -2 + -4 + -3 = -9
214 assertEquals(pairwise.getValue(1, 3), -9d);
215 // Q.G + I.W + A.C = -2 + -3 + 0 = -5
216 assertEquals(pairwise.getValue(2, 3), -5d);
220 * Test that the result of outputMatrix can be reparsed to give an identical
223 * @throws IOException
224 * @throws MalformedURLException
226 @Test(groups = "Functional")
227 public void testOutputMatrix_roundTrip() throws MalformedURLException,
230 ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
231 String output = sm.outputMatrix(false);
232 FileParse fp = new FileParse(output, DataSourceType.PASTE);
233 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
234 ScoreMatrix sm2 = parser.parseMatrix();
236 assertTrue(sm2.equals(sm));
239 @Test(groups = "Functional")
240 public void testEqualsAndHashCode()
242 ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
243 ScoreMatrix sm2 = new ScoreMatrix(sm.getName(), sm.getSymbols()
244 .toCharArray(), sm.getMatrix());
245 assertTrue(sm.equals(sm2));
246 assertEquals(sm.hashCode(), sm2.hashCode());
250 * Tests for scoring options where the longer length of two sequences is used
252 @Test(groups = "Functional")
253 public void testcomputeSimilarity_matchLongestSequence()
255 // TODO params.matchGaps() is not used for ScoreMatrix
256 // - includeGaps is sufficient (there is no denominator)
257 // ==> bespoke parameters only 3 booleans?
259 * for now, using space for gap to match callers of
260 * AlignmentView.getSequenceStrings()
261 * may change this to '-' (with corresponding change to matrices)
263 String s1 = "FR K S";
265 ScoreMatrix blosum = ScoreModels.getInstance().getBlosum62();
268 * score gap-gap and gap-char
269 * shorter sequence treated as if with trailing gaps
270 * score = F^F + R^S + -^- + K^- + -^L + S^-
271 * = 6 + -1 + 1 + -4 + -4 + -4 = -6
273 SimilarityParamsI params = new SimilarityParams(true, true, true, false);
274 assertEquals(blosum.computeSimilarity(s1, s2, params), -6d);
275 // matchGap (arg2) is ignored:
276 params = new SimilarityParams(true, false, true, false);
277 assertEquals(blosum.computeSimilarity(s1, s2, params), -6d);
280 * score gap-char but not gap-gap
281 * score = F^F + R^S + 0 + K^- + -^L + S^-
282 * = 6 + -1 + 0 + -4 + -4 + -4 = -7
284 params = new SimilarityParams(false, true, true, false);
285 assertEquals(blosum.computeSimilarity(s1, s2, params), -7d);
286 // matchGap (arg2) is ignored:
287 params = new SimilarityParams(false, false, true, false);
288 assertEquals(blosum.computeSimilarity(s1, s2, params), -7d);
291 * score gap-gap but not gap-char
292 * score = F^F + R^S + -^- + 0 + 0 + 0
295 params = new SimilarityParams(true, false, false, false);
296 assertEquals(blosum.computeSimilarity(s1, s2, params), 6d);
297 // matchGap (arg2) is ignored:
298 params = new SimilarityParams(true, true, false, false);
299 assertEquals(blosum.computeSimilarity(s1, s2, params), 6d);
302 * score neither gap-gap nor gap-char
303 * score = F^F + R^S + 0 + 0 + 0 + 0
306 params = new SimilarityParams(false, false, false, false);
307 assertEquals(blosum.computeSimilarity(s1, s2, params), 5d);
308 // matchGap (arg2) is ignored:
309 params = new SimilarityParams(false, true, false, false);
310 assertEquals(blosum.computeSimilarity(s1, s2, params), 5d);
314 * Tests for scoring options where only the shorter length of two sequences is
317 @Test(groups = "Functional")
318 public void testcomputeSimilarity_matchShortestSequence()
320 // TODO params.matchGaps() is not used for ScoreMatrix
321 // - includeGaps is sufficient (there is no denominator)
322 // ==> bespoke parameters only 3 booleans?
324 * for now, using space for gap to match callers of
325 * AlignmentView.getSequenceStrings()
326 * may change this to '-' (with corresponding change to matrices)
328 String s1 = "FR K S";
330 ScoreMatrix blosum = ScoreModels.getInstance().getBlosum62();
333 * score gap-gap and gap-char
334 * match shorter sequence only
335 * score = F^F + R^S + -^- + K^- + -^L
336 * = 6 + -1 + 1 + -4 + -4 = -2
338 SimilarityParamsI params = new SimilarityParams(true, true, true, true);
339 assertEquals(blosum.computeSimilarity(s1, s2, params), -2d);
340 // matchGap (arg2) is ignored:
341 params = new SimilarityParams(true, false, true, true);
342 assertEquals(blosum.computeSimilarity(s1, s2, params), -2d);
345 * score gap-char but not gap-gap
346 * score = F^F + R^S + 0 + K^- + -^L
347 * = 6 + -1 + 0 + -4 + -4 = -3
349 params = new SimilarityParams(false, true, true, true);
350 assertEquals(blosum.computeSimilarity(s1, s2, params), -3d);
351 // matchGap (arg2) is ignored:
352 params = new SimilarityParams(false, false, true, true);
353 assertEquals(blosum.computeSimilarity(s1, s2, params), -3d);
356 * score gap-gap but not gap-char
357 * score = F^F + R^S + -^- + 0 + 0
360 params = new SimilarityParams(true, false, false, true);
361 assertEquals(blosum.computeSimilarity(s1, s2, params), 6d);
362 // matchGap (arg2) is ignored:
363 params = new SimilarityParams(true, true, false, true);
364 assertEquals(blosum.computeSimilarity(s1, s2, params), 6d);
367 * score neither gap-gap nor gap-char
368 * score = F^F + R^S + 0 + 0 + 0
371 params = new SimilarityParams(false, false, false, true);
372 assertEquals(blosum.computeSimilarity(s1, s2, params), 5d);
373 // matchGap (arg2) is ignored:
374 params = new SimilarityParams(false, true, false, true);
375 assertEquals(blosum.computeSimilarity(s1, s2, params), 5d);