From 35679161e01fac267b1a58bec4244baae94d7fd2 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Mon, 6 Feb 2017 14:50:11 +0000 Subject: [PATCH] JAL-2379 computePairwiseScores() moved to ScoreMatrix --- src/jalview/analysis/PCA.java | 47 +--------- src/jalview/schemes/ScoreMatrix.java | 96 +++++++++++++++----- test/jalview/analysis/PCATest.java | 61 ------------- test/jalview/schemes/ScoreMatrixTest.java | 137 +++++++++++++++++++++++++++-- 4 files changed, 204 insertions(+), 137 deletions(-) delete mode 100644 test/jalview/analysis/PCATest.java diff --git a/src/jalview/analysis/PCA.java b/src/jalview/analysis/PCA.java index 1a4060c..d978451 100755 --- a/src/jalview/analysis/PCA.java +++ b/src/jalview/analysis/PCA.java @@ -20,7 +20,6 @@ */ package jalview.analysis; -import jalview.math.Matrix; import jalview.math.MatrixI; import jalview.schemes.ResidueProperties; import jalview.schemes.ScoreMatrix; @@ -263,9 +262,7 @@ public class PCA implements Runnable + (jvCalcMode ? "Jalview variant" : "Original SeqSpace") + "\n"); - // MatrixI mt = m.transpose(); - // eigenvector = mt.preMultiply(jvCalcMode ? m2 : m); - eigenvector = computePairwiseScores(); + eigenvector = scoreMatrix.computePairwiseScores(seqs); details.append(" --- OrigT * Orig ---- \n"); eigenvector.print(ps, "%8.2f"); @@ -307,48 +304,6 @@ public class PCA implements Runnable // + (System.currentTimeMillis() - now) + "ms")); } - /** - * Computes an NxN matrix where N is the number of sequences, and entry [i, j] - * is sequence[i] pairwise multiplied with sequence[j], as a sum of scores - * computed using the current score matrix. For example - * - */ - MatrixI computePairwiseScores() - { - double[][] values = new double[seqs.length][]; - for (int row = 0; row < seqs.length; row++) - { - values[row] = new double[seqs.length]; - for (int col = 0; col < seqs.length; col++) - { - int total = 0; - int width = Math.min(seqs[row].length(), seqs[col].length()); - for (int i = 0; i < width; i++) - { - char c1 = seqs[row].charAt(i); - char c2 = seqs[col].charAt(i); - int score = scoreMatrix.getPairwiseScore(c1, c2); - total += score; - } - values[row][col] = total; - } - } - return new Matrix(values); - } - public void setJvCalcMode(boolean calcMode) { this.jvCalcMode = calcMode; diff --git a/src/jalview/schemes/ScoreMatrix.java b/src/jalview/schemes/ScoreMatrix.java index c47654a..b5e3ef1 100644 --- a/src/jalview/schemes/ScoreMatrix.java +++ b/src/jalview/schemes/ScoreMatrix.java @@ -21,6 +21,8 @@ package jalview.schemes; import jalview.analysis.scoremodels.PairwiseSeqScoreModel; +import jalview.math.Matrix; +import jalview.math.MatrixI; public class ScoreMatrix extends PairwiseSeqScoreModel { @@ -77,10 +79,11 @@ public class ScoreMatrix extends PairwiseSeqScoreModel } /** + * Answers the score for substituting first char in A1 with first char in A2 * * @param A1 * @param A2 - * @return score for substituting first char in A1 with first char in A2 + * @return */ public int getPairwiseScore(String A1, String A2) { @@ -100,32 +103,36 @@ public class ScoreMatrix extends PairwiseSeqScoreModel : ResidueProperties.nucleotideIndex[d]; /* + * FIXME: 2.10.1 PCA treats gap as [22] or 'X', but Tree + * calculation treats as [23]; which is correct? + */ + /* * hack to convert unassigned / unknown (including gap) * to index of unknown (X for amino acids, N for nucleotide) * TODO: statically assign gap characters to this index? */ - if (type == 0) - { - if (a == ResidueProperties.maxProteinIndex) - { - a = ResidueProperties.aaIndex['X']; - } - if (b == ResidueProperties.maxProteinIndex) - { - b = ResidueProperties.aaIndex['X']; - } - } - if (type != 0) - { - if (a == ResidueProperties.maxNucleotideIndex) - { - a = ResidueProperties.nucleotideIndex['N']; - } - if (b == ResidueProperties.maxNucleotideIndex) - { - b = ResidueProperties.nucleotideIndex['N']; - } - } +// if (type == 0) +// { +// if (a == ResidueProperties.maxProteinIndex) +// { +// a = ResidueProperties.aaIndex['X']; +// } +// if (b == ResidueProperties.maxProteinIndex) +// { +// b = ResidueProperties.aaIndex['X']; +// } +// } +// if (type != 0) +// { +// if (a == ResidueProperties.maxNucleotideIndex) +// { +// a = ResidueProperties.nucleotideIndex['N']; +// } +// if (b == ResidueProperties.maxNucleotideIndex) +// { +// b = ResidueProperties.nucleotideIndex['N']; +// } +// } pog = matrix[a][b]; } catch (Exception e) { @@ -197,4 +204,47 @@ public class ScoreMatrix extends PairwiseSeqScoreModel } return sb.toString(); } + + /** + * Computes an NxN matrix where N is the number of sequences, and entry [i, j] + * is sequence[i] pairwise multiplied with sequence[j], as a sum of scores + * computed using the current score matrix. For example + * + */ + public MatrixI computePairwiseScores(String[] seqs) + { + double[][] values = new double[seqs.length][]; + for (int row = 0; row < seqs.length; row++) + { + values[row] = new double[seqs.length]; + for (int col = 0; col < seqs.length; col++) + { + int total = 0; + int width = Math.min(seqs[row].length(), seqs[col].length()); + for (int i = 0; i < width; i++) + { + char c1 = seqs[row].charAt(i); + char c2 = seqs[col].charAt(i); + int score = getPairwiseScore(c1, c2); + total += score; + } + values[row][col] = total; + } + } + return new Matrix(values); + } } diff --git a/test/jalview/analysis/PCATest.java b/test/jalview/analysis/PCATest.java deleted file mode 100644 index 815b71f..0000000 --- a/test/jalview/analysis/PCATest.java +++ /dev/null @@ -1,61 +0,0 @@ -package jalview.analysis; - -import static org.testng.Assert.assertEquals; - -import jalview.math.MatrixI; - -import org.testng.annotations.Test; - -public class PCATest -{ - @Test(groups = "Functional") - public void testComputePairwiseScores() - { - String[] seqs = new String[] { "FKL", "R-D", "QIA", "GWC" }; - PCA pca = new PCA(seqs, false, "BLOSUM62"); - - MatrixI pairwise = pca.computePairwiseScores(); - - /* - * should be NxN where N = number of sequences - */ - assertEquals(pairwise.height(), 4); - assertEquals(pairwise.width(), 4); - - /* - * should be symmetrical (because BLOSUM62 is) - */ - for (int i = 0; i < pairwise.height(); i++) - { - for (int j = 0; j < pairwise.width(); j++) - { - assertEquals(pairwise.getValue(i, j), pairwise.getValue(j, i), - "Not symmetric"); - } - } - /* - * verify expected BLOSUM dot product scores - * Note: gap is treated like 'X' [22] in the matrix - */ - // F.F + K.K + L.L = 6 + 5 + 4 = 15 - assertEquals(pairwise.getValue(0, 0), 15d); - // R.R + X.X + D.D = 5 + -1 + 6 = 10 - assertEquals(pairwise.getValue(1, 1), 10d); - // Q.Q + I.I + A.A = 5 + 4 + 4 = 13 - assertEquals(pairwise.getValue(2, 2), 13d); - // G.G + W.W + C.C = 6 + 11 + 9 = 26 - assertEquals(pairwise.getValue(3, 3), 26d); - // F.R + K.X + L.D = -3 + -1 + -3 = -8 - assertEquals(pairwise.getValue(0, 1), -8d); - // F.Q + K.I + L.A = -3 + -3 + -1 = -7 - assertEquals(pairwise.getValue(0, 2), -7d); - // F.G + K.W + L.C = -3 + -3 + -1 = -7 - assertEquals(pairwise.getValue(0, 3), -7d); - // R.Q + X.I + D.A = 1 + -1 + -2 = -2 - assertEquals(pairwise.getValue(1, 2), -2d); - // R.G + X.W + D.C = -2 + -2 + -3 = -7 - assertEquals(pairwise.getValue(1, 3), -7d); - // Q.G + I.W + A.C = -2 + -3 + 0 = -5 - assertEquals(pairwise.getValue(2, 3), -5d); - } -} diff --git a/test/jalview/schemes/ScoreMatrixTest.java b/test/jalview/schemes/ScoreMatrixTest.java index 0ae7375..99af601 100644 --- a/test/jalview/schemes/ScoreMatrixTest.java +++ b/test/jalview/schemes/ScoreMatrixTest.java @@ -2,6 +2,8 @@ package jalview.schemes; import static org.testng.Assert.assertEquals; +import jalview.math.MatrixI; + import org.testng.annotations.Test; public class ScoreMatrixTest @@ -23,13 +25,6 @@ public class ScoreMatrixTest assertEquals(m[row].length, rows); for (int col = 0; col < rows; col++) { - // if (m[row][col] != m[col][row]) - // { - // System.out.println(String.format("%s [%d, %d] [%s, %s]", - // sm.getName(), m[row][col], m[col][row], - // ResidueProperties.aa[row], - // ResidueProperties.aa[col])); - // } assertEquals(m[row][col], m[col][row], String.format("%s [%s, %s]", sm.getName(), ResidueProperties.aa[row], ResidueProperties.aa[col])); @@ -43,4 +38,132 @@ public class ScoreMatrixTest assertEquals(rows, (sm.isDNA() ? ResidueProperties.maxNucleotideIndex : ResidueProperties.maxProteinIndex) + 1); } + + /** + * A test that just asserts the expected values in the Blosum62 score matrix + */ + @Test(groups = "Functional") + public void testBlosum62_values() + { + ScoreMatrix sm = ResidueProperties.getScoreMatrix("BLOSUM62"); + + /* + * verify expected scores against ARNDCQEGHILKMFPSTWYVBZX + * scraped from https://www.ncbi.nlm.nih.gov/Class/FieldGuide/BLOSUM62.txt + */ + verifyValues(sm, 'A', new int[] { 4, -1, -2, -2, 0, -1, -1, 0, -2, -1, + -1, -1, -1, -2, -1, 1, 0, -3, -2, 0, -2, -1, 0 }); + verifyValues(sm, 'R', new int[] { -1, 5, 0, -2, -3, 1, 0, -2, 0, -3, + -2, 2, -1, -3, -2, -1, -1, -3, -2, -3, -1, 0, -1 }); + verifyValues(sm, 'N', new int[] { -2, 0, 6, 1, -3, 0, 0, 0, 1, -3, -3, + 0, -2, -3, -2, 1, 0, -4, -2, -3, 3, 0, -1 }); + verifyValues(sm, 'D', new int[] { -2, -2, 1, 6, -3, 0, 2, -1, -1, -3, + -4, -1, -3, -3, -1, 0, -1, -4, -3, -3, 4, 1, -1 }); + verifyValues(sm, 'C', new int[] { 0, -3, -3, -3, 9, -3, -4, -3, -3, -1, + -1, -3, -1, -2, -3, -1, -1, -2, -2, -1, -3, -3, -2 }); + verifyValues(sm, 'Q', new int[] { -1, 1, 0, 0, -3, 5, 2, -2, 0, -3, -2, + 1, 0, -3, -1, 0, -1, -2, -1, -2, 0, 3, -1 }); + verifyValues(sm, 'E', new int[] { -1, 0, 0, 2, -4, 2, 5, -2, 0, -3, -3, + 1, -2, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1 }); + verifyValues(sm, 'G', new int[] { 0, -2, 0, -1, -3, -2, -2, 6, -2, -4, + -4, -2, -3, -3, -2, 0, -2, -2, -3, -3, -1, -2, -1 }); + verifyValues(sm, 'H', new int[] { -2, 0, 1, -1, -3, 0, 0, -2, 8, -3, + -3, -1, -2, -1, -2, -1, -2, -2, 2, -3, 0, 0, -1 }); + verifyValues(sm, 'I', new int[] { -1, -3, -3, -3, -1, -3, -3, -4, -3, + 4, 2, -3, 1, 0, -3, -2, -1, -3, -1, 3, -3, -3, -1 }); + verifyValues(sm, 'L', new int[] { -1, -2, -3, -4, -1, -2, -3, -4, -3, + 2, 4, -2, 2, 0, -3, -2, -1, -2, -1, 1, -4, -3, -1 }); + verifyValues(sm, 'K', new int[] { -1, 2, 0, -1, -3, 1, 1, -2, -1, -3, + -2, 5, -1, -3, -1, 0, -1, -3, -2, -2, 0, 1, -1 }); + verifyValues(sm, 'M', new int[] { -1, -1, -2, -3, -1, 0, -2, -3, -2, 1, + 2, -1, 5, 0, -2, -1, -1, -1, -1, 1, -3, -1, -1 }); + verifyValues(sm, 'F', new int[] { -2, -3, -3, -3, -2, -3, -3, -3, -1, + 0, 0, -3, 0, 6, -4, -2, -2, 1, 3, -1, -3, -3, -1 }); + verifyValues(sm, 'P', new int[] { -1, -2, -2, -1, -3, -1, -1, -2, -2, + -3, -3, -1, -2, -4, 7, -1, -1, -4, -3, -2, -2, -1, -2 }); + verifyValues(sm, 'S', new int[] { 1, -1, 1, 0, -1, 0, 0, 0, -1, -2, -2, + 0, -1, -2, -1, 4, 1, -3, -2, -2, 0, 0, 0 }); + verifyValues(sm, 'T', new int[] { 0, -1, 0, -1, -1, -1, -1, -2, -2, -1, + -1, -1, -1, -2, -1, 1, 5, -2, -2, 0, -1, -1, 0 }); + verifyValues(sm, 'W', new int[] { -3, -3, -4, -4, -2, -2, -3, -2, -2, + -3, -2, -3, -1, 1, -4, -3, -2, 11, 2, -3, -4, -3, -2 }); + verifyValues(sm, 'Y', new int[] { -2, -2, -2, -3, -2, -1, -2, -3, 2, + -1, -1, -2, -1, 3, -3, -2, -2, 2, 7, -1, -3, -2, -1 }); + verifyValues(sm, 'V', new int[] { 0, -3, -3, -3, -1, -2, -2, -3, -3, 3, + 1, -2, 1, -1, -2, -2, 0, -3, -1, 4, -3, -2, -1 }); + verifyValues(sm, 'B', new int[] { -2, -1, 3, 4, -3, 0, 1, -1, 0, -3, + -4, 0, -3, -3, -2, 0, -1, -4, -3, -3, 4, 1, -1 }); + verifyValues(sm, 'Z', new int[] { -1, 0, 0, 1, -3, 3, 4, -2, 0, -3, -3, + 1, -1, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1 }); + verifyValues(sm, 'X', new int[] { 0, -1, -1, -1, -2, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -2, 0, 0, -2, -1, -1, -1, -1, -1 }); + } + /** + * Helper method to check pairwise scores for one residue + * + * @param sm + * @param res + * @param expected + * score values against 'res', in ResidueProperties.aaIndex order + */ + private void verifyValues(ScoreMatrix sm, char res, int[] expected) + { + for (int j = 0; j < expected.length; j++) + { + char c2 = ResidueProperties.aa[j].charAt(0); + assertEquals(sm.getPairwiseScore(res, c2), expected[j], + String.format("%s->%s", res, c2)); + } + } + + @Test(groups = "Functional") + public void testComputePairwiseScores() + { + String[] seqs = new String[] { "FKL", "R-D", "QIA", "GWC" }; + ScoreMatrix sm = ResidueProperties.getScoreMatrix("BLOSUM62"); + + MatrixI pairwise = sm.computePairwiseScores(seqs); + + /* + * should be NxN where N = number of sequences + */ + assertEquals(pairwise.height(), 4); + assertEquals(pairwise.width(), 4); + + /* + * should be symmetrical (because BLOSUM62 is) + */ + for (int i = 0; i < pairwise.height(); i++) + { + for (int j = 0; j < pairwise.width(); j++) + { + assertEquals(pairwise.getValue(i, j), pairwise.getValue(j, i), + "Not symmetric"); + } + } + /* + * verify expected BLOSUM dot product scores + * Note: gap is treated like 'X' [22] in the matrix + */ + // F.F + K.K + L.L = 6 + 5 + 4 = 15 + assertEquals(pairwise.getValue(0, 0), 15d); + // R.R + X.X + D.D = 5 + -1 + 6 = 10 + assertEquals(pairwise.getValue(1, 1), 10d); + // Q.Q + I.I + A.A = 5 + 4 + 4 = 13 + assertEquals(pairwise.getValue(2, 2), 13d); + // G.G + W.W + C.C = 6 + 11 + 9 = 26 + assertEquals(pairwise.getValue(3, 3), 26d); + // F.R + K.X + L.D = -3 + -1 + -3 = -8 + assertEquals(pairwise.getValue(0, 1), -8d); + // F.Q + K.I + L.A = -3 + -3 + -1 = -7 + assertEquals(pairwise.getValue(0, 2), -7d); + // F.G + K.W + L.C = -3 + -3 + -1 = -7 + assertEquals(pairwise.getValue(0, 3), -7d); + // R.Q + X.I + D.A = 1 + -1 + -2 = -2 + assertEquals(pairwise.getValue(1, 2), -2d); + // R.G + X.W + D.C = -2 + -2 + -3 = -7 + assertEquals(pairwise.getValue(1, 3), -7d); + // Q.G + I.W + A.C = -2 + -3 + 0 = -5 + assertEquals(pairwise.getValue(2, 3), -5d); + } } -- 1.7.10.2