package jalview.analysis.scoremodels;
+
import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertNotEquals;
+import static org.testng.Assert.assertNotNull;
+import static org.testng.Assert.assertNotSame;
+import static org.testng.Assert.assertNull;
+import static org.testng.Assert.assertTrue;
import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
+import jalview.api.analysis.SimilarityParamsI;
+import jalview.io.DataSourceType;
+import jalview.io.FileParse;
+import jalview.io.ScoreMatrixFile;
import jalview.math.MatrixI;
+import jalview.schemes.ResidueProperties;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.util.Arrays;
import org.testng.annotations.Test;
assertEquals(sm.getPairwiseScore('A', 'D'), 0f);
assertEquals(sm.getMatrixIndex('c'), 2);
assertEquals(sm.getMatrixIndex(' '), -1);
+
+ assertEquals(sm.getGapIndex(), -1); // no gap symbol
}
@Test(
@Test(groups = "Functional")
public void testBuildSymbolIndex()
{
- short[] index = ScoreMatrix.buildSymbolIndex("AX-. yxYp".toCharArray());
+ float[][] scores = new float[2][];
+ scores[0] = new float[] { 1f, 2f };
+ scores[1] = new float[] { 3f, 4f };
+ ScoreMatrix sm = new ScoreMatrix("Test", new char[] { 'A', '.' },
+ scores);
+ short[] index = sm.buildSymbolIndex("AX-yxYp".toCharArray());
assertEquals(index.length, 128); // ASCII character set size
assertEquals(index['a'], 0); // lower-case mapping added
assertEquals(index['X'], 1);
assertEquals(index['-'], 2);
- assertEquals(index['.'], 3);
- assertEquals(index[' '], 4);
- assertEquals(index['y'], 5); // lower-case override
- assertEquals(index['x'], 6); // lower-case override
- assertEquals(index['Y'], 7);
- assertEquals(index['p'], 8);
+ assertEquals(index['y'], 3); // lower-case override
+ assertEquals(index['x'], 4); // lower-case override
+ assertEquals(index['Y'], 5);
+ assertEquals(index['p'], 6);
assertEquals(index['P'], -1); // lower-case doesn't map upper-case
/*
@Test(groups = "Functional")
public void testBuildSymbolIndex_nonAscii()
{
+ float[][] scores = new float[2][];
+ scores[0] = new float[] { 1f, 2f };
+ scores[1] = new float[] { 3f, 4f };
+ ScoreMatrix sm = new ScoreMatrix("Test", new char[] { 'A', '.' },
+ scores);
char[] weird = new char[] { 128, 245, 'P' };
- short[] index = ScoreMatrix.buildSymbolIndex(weird);
+ short[] index = sm.buildSymbolIndex(weird);
assertEquals(index.length, 128);
assertEquals(index['P'], 2);
assertEquals(index['p'], 2);
}
@Test(groups = "Functional")
+ public void testGetMatrix()
+ {
+ ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
+ float[][] m = sm.getMatrix();
+ assertEquals(m.length, sm.getSize());
+ assertEquals(m[2][4], -3f);
+ // verify a defensive copy is returned
+ float[][] m2 = sm.getMatrix();
+ assertNotSame(m, m2);
+ assertTrue(Arrays.deepEquals(m, m2));
+ }
+
+ @Test(groups = "Functional")
public void testGetMatrixIndex()
{
ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
assertEquals(sm.getMatrixIndex('D'), 3);
assertEquals(sm.getMatrixIndex('X'), 22);
assertEquals(sm.getMatrixIndex('x'), 22);
- assertEquals(sm.getMatrixIndex(' '), 23);
- assertEquals(sm.getMatrixIndex('*'), 24);
+ assertEquals(sm.getMatrixIndex('-'), 23);
+ assertEquals(sm.getMatrixIndex('*'), -1);
assertEquals(sm.getMatrixIndex('.'), -1);
- assertEquals(sm.getMatrixIndex('-'), -1);
+ assertEquals(sm.getMatrixIndex(' '), -1);
assertEquals(sm.getMatrixIndex('?'), -1);
assertEquals(sm.getMatrixIndex((char) 128), -1);
}
@Test(groups = "Functional")
+ public void testGetGapIndex()
+ {
+ ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
+ assertEquals(sm.getGapIndex(), 23);
+ }
+
+ @Test(groups = "Functional")
public void testGetSize()
{
ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
public void testComputePairwiseScores()
{
/*
- * NB score matrix assumes space for gap - Jalview converts
- * space to gap before computing PCA or Tree
+ * NB score matrix expects '-' for gap
*/
- String[] seqs = new String[] { "FKL", "R D", "QIA", "GWC" };
+ String[] seqs = new String[] { "FKL", "R-D", "QIA", "GWC" };
ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
- MatrixI pairwise = sm.computePairwiseScores(seqs);
+ MatrixI pairwise = sm.findSimilarities(seqs, SimilarityParams.Jalview);
/*
* should be NxN where N = number of sequences
// Q.G + I.W + A.C = -2 + -3 + 0 = -5
assertEquals(pairwise.getValue(2, 3), -5d);
}
+
+ /**
+ * Test that the result of outputMatrix can be reparsed to give an identical
+ * ScoreMatrix
+ *
+ * @throws IOException
+ * @throws MalformedURLException
+ */
+ @Test(groups = "Functional")
+ public void testOutputMatrix_roundTrip() throws MalformedURLException,
+ IOException
+ {
+ ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
+ String output = sm.outputMatrix(false);
+ FileParse fp = new FileParse(output, DataSourceType.PASTE);
+ ScoreMatrixFile parser = new ScoreMatrixFile(fp);
+ ScoreMatrix sm2 = parser.parseMatrix();
+ assertNotNull(sm2);
+ assertTrue(sm2.equals(sm));
+ }
+
+ @Test(groups = "Functional")
+ public void testEqualsAndHashCode()
+ {
+ ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
+ ScoreMatrix sm2 = new ScoreMatrix(sm.getName(), sm.getSymbols()
+ .toCharArray(), sm.getMatrix());
+ assertTrue(sm.equals(sm2));
+ assertEquals(sm.hashCode(), sm2.hashCode());
+
+ sm2 = ScoreModels.getInstance().getPam250();
+ assertFalse(sm.equals(sm2));
+ assertNotEquals(sm.hashCode(), sm2.hashCode());
+
+ assertFalse(sm.equals("hello"));
+ }
+
+ /**
+ * Tests for scoring options where the longer length of two sequences is used
+ */
+ @Test(groups = "Functional")
+ public void testcomputeSimilarity_matchLongestSequence()
+ {
+ /*
+ * ScoreMatrix expects '-' for gaps
+ */
+ String s1 = "FR-K-S";
+ String s2 = "FS--L";
+ ScoreMatrix blosum = ScoreModels.getInstance().getBlosum62();
+
+ /*
+ * score gap-gap and gap-char
+ * shorter sequence treated as if with trailing gaps
+ * score = F^F + R^S + -^- + K^- + -^L + S^-
+ * = 6 + -1 + 1 + -4 + -4 + -4 = -6
+ */
+ SimilarityParamsI params = new SimilarityParams(true, true, true, false);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), -6d);
+ // matchGap (arg2) is ignored:
+ params = new SimilarityParams(true, false, true, false);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), -6d);
+
+ /*
+ * score gap-char but not gap-gap
+ * score = F^F + R^S + 0 + K^- + -^L + S^-
+ * = 6 + -1 + 0 + -4 + -4 + -4 = -7
+ */
+ params = new SimilarityParams(false, true, true, false);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), -7d);
+ // matchGap (arg2) is ignored:
+ params = new SimilarityParams(false, false, true, false);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), -7d);
+
+ /*
+ * score gap-gap but not gap-char
+ * score = F^F + R^S + -^- + 0 + 0 + 0
+ * = 6 + -1 + 1 = 6
+ */
+ params = new SimilarityParams(true, false, false, false);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), 6d);
+ // matchGap (arg2) is ignored:
+ params = new SimilarityParams(true, true, false, false);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), 6d);
+
+ /*
+ * score neither gap-gap nor gap-char
+ * score = F^F + R^S + 0 + 0 + 0 + 0
+ * = 6 + -1 = 5
+ */
+ params = new SimilarityParams(false, false, false, false);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), 5d);
+ // matchGap (arg2) is ignored:
+ params = new SimilarityParams(false, true, false, false);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), 5d);
+ }
+
+ /**
+ * Tests for scoring options where only the shorter length of two sequences is
+ * used
+ */
+ @Test(groups = "Functional")
+ public void testcomputeSimilarity_matchShortestSequence()
+ {
+ /*
+ * ScoreMatrix expects '-' for gaps
+ */
+ String s1 = "FR-K-S";
+ String s2 = "FS--L";
+ ScoreMatrix blosum = ScoreModels.getInstance().getBlosum62();
+
+ /*
+ * score gap-gap and gap-char
+ * match shorter sequence only
+ * score = F^F + R^S + -^- + K^- + -^L
+ * = 6 + -1 + 1 + -4 + -4 = -2
+ */
+ SimilarityParamsI params = new SimilarityParams(true, true, true, true);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), -2d);
+ // matchGap (arg2) is ignored:
+ params = new SimilarityParams(true, false, true, true);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), -2d);
+
+ /*
+ * score gap-char but not gap-gap
+ * score = F^F + R^S + 0 + K^- + -^L
+ * = 6 + -1 + 0 + -4 + -4 = -3
+ */
+ params = new SimilarityParams(false, true, true, true);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), -3d);
+ // matchGap (arg2) is ignored:
+ params = new SimilarityParams(false, false, true, true);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), -3d);
+
+ /*
+ * score gap-gap but not gap-char
+ * score = F^F + R^S + -^- + 0 + 0
+ * = 6 + -1 + 1 = 6
+ */
+ params = new SimilarityParams(true, false, false, true);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), 6d);
+ // matchGap (arg2) is ignored:
+ params = new SimilarityParams(true, true, false, true);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), 6d);
+
+ /*
+ * score neither gap-gap nor gap-char
+ * score = F^F + R^S + 0 + 0 + 0
+ * = 6 + -1 = 5
+ */
+ params = new SimilarityParams(false, false, false, true);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), 5d);
+ // matchGap (arg2) is ignored:
+ params = new SimilarityParams(false, true, false, true);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), 5d);
+ }
+
+ @Test(groups = "Functional")
+ public void testSymmetric()
+ {
+ verifySymmetric(ScoreModels.getInstance().getBlosum62());
+ verifySymmetric(ScoreModels.getInstance().getPam250());
+ verifySymmetric(ScoreModels.getInstance().getDefaultModel(false)); // dna
+ }
+
+ private void verifySymmetric(ScoreMatrix sm)
+ {
+ float[][] m = sm.getMatrix();
+ int rows = m.length;
+ for (int row = 0; row < rows; row++)
+ {
+ assertEquals(m[row].length, rows);
+ for (int col = 0; col < rows; col++)
+ {
+ assertEquals(m[row][col], m[col][row], String.format("%s [%s, %s]",
+ sm.getName(), ResidueProperties.aa[row],
+ ResidueProperties.aa[col]));
+ }
+ }
+ }
+
+ /**
+ * A test that just asserts the expected values in the Blosum62 score matrix
+ */
+ @Test(groups = "Functional")
+ public void testBlosum62_values()
+ {
+ ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
+
+ assertTrue(sm.isProtein());
+ assertFalse(sm.isDNA());
+ assertNull(sm.getDescription());
+ sm.setDescription("BLOSUM62");
+ assertEquals(sm.getDescription(), "BLOSUM62");
+
+ /*
+ * verify expected scores against ARNDCQEGHILKMFPSTWYVBZX
+ * scraped from https://www.ncbi.nlm.nih.gov/Class/FieldGuide/BLOSUM62.txt
+ */
+ verifyValues(sm, 'A', new float[] { 4, -1, -2, -2, 0, -1, -1, 0, -2,
+ -1,
+ -1, -1, -1, -2, -1, 1, 0, -3, -2, 0, -2, -1, 0 });
+ verifyValues(sm, 'R', new float[] { -1, 5, 0, -2, -3, 1, 0, -2, 0, -3,
+ -2, 2, -1, -3, -2, -1, -1, -3, -2, -3, -1, 0, -1 });
+ verifyValues(sm, 'N', new float[] { -2, 0, 6, 1, -3, 0, 0, 0, 1, -3,
+ -3,
+ 0, -2, -3, -2, 1, 0, -4, -2, -3, 3, 0, -1 });
+ verifyValues(sm, 'D', new float[] { -2, -2, 1, 6, -3, 0, 2, -1, -1, -3,
+ -4, -1, -3, -3, -1, 0, -1, -4, -3, -3, 4, 1, -1 });
+ verifyValues(sm, 'C', new float[] { 0, -3, -3, -3, 9, -3, -4, -3, -3,
+ -1,
+ -1, -3, -1, -2, -3, -1, -1, -2, -2, -1, -3, -3, -2 });
+ verifyValues(sm, 'Q', new float[] { -1, 1, 0, 0, -3, 5, 2, -2, 0, -3,
+ -2,
+ 1, 0, -3, -1, 0, -1, -2, -1, -2, 0, 3, -1 });
+ verifyValues(sm, 'E', new float[] { -1, 0, 0, 2, -4, 2, 5, -2, 0, -3,
+ -3,
+ 1, -2, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1 });
+ verifyValues(sm, 'G', new float[] { 0, -2, 0, -1, -3, -2, -2, 6, -2,
+ -4,
+ -4, -2, -3, -3, -2, 0, -2, -2, -3, -3, -1, -2, -1 });
+ verifyValues(sm, 'H', new float[] { -2, 0, 1, -1, -3, 0, 0, -2, 8, -3,
+ -3, -1, -2, -1, -2, -1, -2, -2, 2, -3, 0, 0, -1 });
+ verifyValues(sm, 'I', new float[] { -1, -3, -3, -3, -1, -3, -3, -4, -3,
+ 4, 2, -3, 1, 0, -3, -2, -1, -3, -1, 3, -3, -3, -1 });
+ verifyValues(sm, 'L', new float[] { -1, -2, -3, -4, -1, -2, -3, -4, -3,
+ 2, 4, -2, 2, 0, -3, -2, -1, -2, -1, 1, -4, -3, -1 });
+ verifyValues(sm, 'K', new float[] { -1, 2, 0, -1, -3, 1, 1, -2, -1, -3,
+ -2, 5, -1, -3, -1, 0, -1, -3, -2, -2, 0, 1, -1 });
+ verifyValues(sm, 'M', new float[] { -1, -1, -2, -3, -1, 0, -2, -3, -2,
+ 1,
+ 2, -1, 5, 0, -2, -1, -1, -1, -1, 1, -3, -1, -1 });
+ verifyValues(sm, 'F', new float[] { -2, -3, -3, -3, -2, -3, -3, -3, -1,
+ 0, 0, -3, 0, 6, -4, -2, -2, 1, 3, -1, -3, -3, -1 });
+ verifyValues(sm, 'P', new float[] { -1, -2, -2, -1, -3, -1, -1, -2, -2,
+ -3, -3, -1, -2, -4, 7, -1, -1, -4, -3, -2, -2, -1, -2 });
+ verifyValues(sm, 'S', new float[] { 1, -1, 1, 0, -1, 0, 0, 0, -1, -2,
+ -2,
+ 0, -1, -2, -1, 4, 1, -3, -2, -2, 0, 0, 0 });
+ verifyValues(sm, 'T', new float[] { 0, -1, 0, -1, -1, -1, -1, -2, -2,
+ -1,
+ -1, -1, -1, -2, -1, 1, 5, -2, -2, 0, -1, -1, 0 });
+ verifyValues(sm, 'W', new float[] { -3, -3, -4, -4, -2, -2, -3, -2, -2,
+ -3, -2, -3, -1, 1, -4, -3, -2, 11, 2, -3, -4, -3, -2 });
+ verifyValues(sm, 'Y', new float[] { -2, -2, -2, -3, -2, -1, -2, -3, 2,
+ -1, -1, -2, -1, 3, -3, -2, -2, 2, 7, -1, -3, -2, -1 });
+ verifyValues(sm, 'V', new float[] { 0, -3, -3, -3, -1, -2, -2, -3, -3,
+ 3,
+ 1, -2, 1, -1, -2, -2, 0, -3, -1, 4, -3, -2, -1 });
+ verifyValues(sm, 'B', new float[] { -2, -1, 3, 4, -3, 0, 1, -1, 0, -3,
+ -4, 0, -3, -3, -2, 0, -1, -4, -3, -3, 4, 1, -1 });
+ verifyValues(sm, 'Z', new float[] { -1, 0, 0, 1, -3, 3, 4, -2, 0, -3,
+ -3,
+ 1, -1, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1 });
+ verifyValues(sm, 'X', new float[] { 0, -1, -1, -1, -2, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -2, 0, 0, -2, -1, -1, -1, -1, -1 });
+ }
+
+ /**
+ * Helper method to check pairwise scores for one residue
+ *
+ * @param sm
+ * @param res
+ * @param expected
+ * score values against 'res', in ResidueProperties.aaIndex order
+ */
+ private void verifyValues(ScoreMatrix sm, char res, float[] expected)
+ {
+ for (int j = 0; j < expected.length; j++)
+ {
+ char c2 = ResidueProperties.aa[j].charAt(0);
+ assertEquals(sm.getPairwiseScore(res, c2), expected[j],
+ String.format("%s->%s", res, c2));
+ }
+ }
+
+ @Test(groups = "Functional")
+ public void testConstructor_gapDash()
+ {
+ float[][] scores = new float[2][];
+ scores[0] = new float[] { 1f, 2f };
+ scores[1] = new float[] { 4f, 5f };
+ ScoreMatrix sm = new ScoreMatrix("Test", new char[] { 'A', '-' },
+ scores);
+ assertEquals(sm.getSize(), 2);
+ assertArrayEquals(scores, sm.getMatrix());
+ assertEquals(sm.getPairwiseScore('A', 'a'), 1f);
+ assertEquals(sm.getPairwiseScore('A', 'A'), 1f);
+ assertEquals(sm.getPairwiseScore('a', '-'), 2f);
+ assertEquals(sm.getPairwiseScore('-', 'A'), 4f);
+ assertEquals(sm.getMatrixIndex('a'), 0);
+ assertEquals(sm.getMatrixIndex('A'), 0);
+ assertEquals(sm.getMatrixIndex('-'), 1);
+ assertEquals(sm.getMatrixIndex(' '), -1);
+ assertEquals(sm.getMatrixIndex('.'), -1);
+
+ assertEquals(sm.getGapIndex(), 1);
+ }
+
+ @Test(groups = "Functional")
+ public void testGetPairwiseScore()
+ {
+ float[][] scores = new float[2][];
+ scores[0] = new float[] { 1f, 2f };
+ scores[1] = new float[] { 4f, 5f };
+ ScoreMatrix sm = new ScoreMatrix("Test", new char[] { 'A', 'B' },
+ scores);
+ assertEquals(sm.getPairwiseScore('A', 'A'), 1f);
+ assertEquals(sm.getPairwiseScore('A', 'a'), 1f);
+ assertEquals(sm.getPairwiseScore('A', 'B'), 2f);
+ assertEquals(sm.getPairwiseScore('b', 'a'), 4f);
+ assertEquals(sm.getPairwiseScore('B', 'b'), 5f);
+
+ /*
+ * unknown symbols currently score zero
+ */
+ assertEquals(sm.getPairwiseScore('A', '-'), 0f);
+ assertEquals(sm.getPairwiseScore('-', '-'), 0f);
+ assertEquals(sm.getPairwiseScore('Q', 'W'), 0f);
+
+ /*
+ * symbols not in basic ASCII set score zero
+ */
+ char c = (char) 200;
+ assertEquals(sm.getPairwiseScore('Q', c), 0f);
+ assertEquals(sm.getPairwiseScore(c, 'Q'), 0f);
+ }
+
+ @Test(groups = "Functional")
+ public void testGetMinimumScore()
+ {
+ ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
+ assertEquals(sm.getMinimumScore(), -4f);
+ }
+
+ @Test(groups = "Functional")
+ public void testGetMaximumScore()
+ {
+ ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
+ assertEquals(sm.getMaximumScore(), 11f);
+ }
+
+ @Test(groups = "Functional")
+ public void testOutputMatrix_html()
+ {
+ float[][] scores = new float[2][];
+ scores[0] = new float[] { 1f, 2f };
+ scores[1] = new float[] { 4f, -5.3E-10f };
+ ScoreMatrix sm = new ScoreMatrix("Test", "AB".toCharArray(), scores);
+ String html = sm.outputMatrix(true);
+ String expected = "<table border=\"1\"><tr><th></th><th> A </th><th> B </th></tr>\n"
+ + "<tr><td>A</td><td>1.0</td><td>2.0</td></tr>\n"
+ + "<tr><td>B</td><td>4.0</td><td>-5.3E-10</td></tr>\n"
+ + "</table>";
+ assertEquals(html, expected);
+ }
}