From e0f478e7dd73384609bd596f7edf6106dc7d9330 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Tue, 28 Mar 2017 12:55:33 +0100 Subject: [PATCH] JAL-2403 JAL-2416 pairwise score unexpected symbols as minimum matrix substitution score (or 1 for identity) (reproduces '*' score column) --- src/jalview/analysis/scoremodels/ScoreMatrix.java | 9 ++++---- test/jalview/analysis/AlignSeqTest.java | 2 +- .../analysis/scoremodels/ScoreMatrixTest.java | 24 +++++++++++++------- test/jalview/io/ScoreMatrixFileTest.java | 6 +++-- 4 files changed, 25 insertions(+), 16 deletions(-) diff --git a/src/jalview/analysis/scoremodels/ScoreMatrix.java b/src/jalview/analysis/scoremodels/ScoreMatrix.java index 84e91ae..7e07b9f 100644 --- a/src/jalview/analysis/scoremodels/ScoreMatrix.java +++ b/src/jalview/analysis/scoremodels/ScoreMatrix.java @@ -289,8 +289,9 @@ public class ScoreMatrix implements SimilarityScoreModelI, } /** - * Returns the pairwise score for substituting c with d, or zero if c or d is - * an unscored or unexpected character + * Returns the pairwise score for substituting c with d. If either c or d is + * an unexpected character, returns 1 for identity (c == d), else the minimum + * score value in the matrix. */ @Override public float getPairwiseScore(char c, char d) @@ -315,10 +316,8 @@ public class ScoreMatrix implements SimilarityScoreModelI, /* * one or both symbols not found in the matrix - * note: a possible strategy here would be to return the minimum - * matrix value if c != d */ - return 0; + return c == d ? 1 : getMinimumScore(); } /** diff --git a/test/jalview/analysis/AlignSeqTest.java b/test/jalview/analysis/AlignSeqTest.java index 85f619b..b9e866d 100644 --- a/test/jalview/analysis/AlignSeqTest.java +++ b/test/jalview/analysis/AlignSeqTest.java @@ -70,7 +70,7 @@ public class AlignSeqTest { AlignSeq as = new AlignSeq(new Sequence("s1", "PFY"), new Sequence( "s2", "RQW"), AlignSeq.PEP); - int[] expected = new int[] { 0, 0, 1, 1, 2, 2, 21, 21, 22, 22, -1, 24, + int[] expected = new int[] { 0, 0, 1, 1, 2, 2, 21, 21, 22, 22, -1, -1, -1, 23, -1 }; String s = "aArRnNzZxX *.-?"; assertArrayEquals(expected, as.indexEncode(s)); diff --git a/test/jalview/analysis/scoremodels/ScoreMatrixTest.java b/test/jalview/analysis/scoremodels/ScoreMatrixTest.java index 9c9e917..16d9504 100644 --- a/test/jalview/analysis/scoremodels/ScoreMatrixTest.java +++ b/test/jalview/analysis/scoremodels/ScoreMatrixTest.java @@ -30,7 +30,7 @@ public class ScoreMatrixTest // note score matrix does not have to be symmetric (though it should be!) float[][] scores = new float[3][]; scores[0] = new float[] { 1f, 2f, 3f }; - scores[1] = new float[] { 4f, 5f, 6f }; + scores[1] = new float[] { -4f, 5f, 6f }; scores[2] = new float[] { 7f, 8f, 9f }; ScoreMatrix sm = new ScoreMatrix("Test", "ABC".toCharArray(), scores); assertEquals(sm.getSize(), 3); @@ -38,10 +38,15 @@ public class ScoreMatrixTest assertEquals(sm.getPairwiseScore('A', 'a'), 1f); assertEquals(sm.getPairwiseScore('b', 'c'), 6f); assertEquals(sm.getPairwiseScore('c', 'b'), 8f); - assertEquals(sm.getPairwiseScore('A', 'D'), 0f); assertEquals(sm.getMatrixIndex('c'), 2); assertEquals(sm.getMatrixIndex(' '), -1); + // substitution to or from unknown symbol gets minimum score + assertEquals(sm.getPairwiseScore('A', 'D'), -4f); + assertEquals(sm.getPairwiseScore('D', 'A'), -4f); + // unknown-to-self gets a score of 1 + assertEquals(sm.getPairwiseScore('D', 'D'), 1f); + assertEquals(sm.getGapIndex(), -1); // no gap symbol } @@ -538,21 +543,24 @@ public class ScoreMatrixTest { float[][] scores = new float[2][]; scores[0] = new float[] { 1f, 2f }; - scores[1] = new float[] { 4f, 5f }; + scores[1] = new float[] { -4f, 5f }; ScoreMatrix sm = new ScoreMatrix("Test", new char[] { 'A', 'B' }, scores); assertEquals(sm.getPairwiseScore('A', 'A'), 1f); assertEquals(sm.getPairwiseScore('A', 'a'), 1f); assertEquals(sm.getPairwiseScore('A', 'B'), 2f); - assertEquals(sm.getPairwiseScore('b', 'a'), 4f); + assertEquals(sm.getPairwiseScore('b', 'a'), -4f); assertEquals(sm.getPairwiseScore('B', 'b'), 5f); /* - * unknown symbols currently score zero + * unknown symbols currently score minimum score + * or 1 for identity with self */ - assertEquals(sm.getPairwiseScore('A', '-'), 0f); - assertEquals(sm.getPairwiseScore('-', '-'), 0f); - assertEquals(sm.getPairwiseScore('Q', 'W'), 0f); + assertEquals(sm.getPairwiseScore('A', '-'), -4f); + assertEquals(sm.getPairwiseScore('-', 'A'), -4f); + assertEquals(sm.getPairwiseScore('-', '-'), 1f); + assertEquals(sm.getPairwiseScore('Q', 'W'), -4f); + assertEquals(sm.getPairwiseScore('Q', 'Q'), 1f); /* * symbols not in basic ASCII set score zero diff --git a/test/jalview/io/ScoreMatrixFileTest.java b/test/jalview/io/ScoreMatrixFileTest.java index 52ad735..a98b2d6 100644 --- a/test/jalview/io/ScoreMatrixFileTest.java +++ b/test/jalview/io/ScoreMatrixFileTest.java @@ -51,6 +51,7 @@ public class ScoreMatrixFileTest assertNull(sm.getDescription()); assertTrue(sm.isDNA()); assertFalse(sm.isProtein()); + assertEquals(sm.getMinimumScore(), 1.1f); assertEquals(sm.getPairwiseScore('A', 'A'), 1.1f); assertEquals(sm.getPairwiseScore('A', 'T'), 1.2f); assertEquals(sm.getPairwiseScore('a', 'T'), 1.2f); // A/a equivalent @@ -58,8 +59,9 @@ public class ScoreMatrixFileTest assertEquals(sm.getPairwiseScore('a', 't'), 1.4f); assertEquals(sm.getPairwiseScore('U', 'x'), 3.5f); assertEquals(sm.getPairwiseScore('u', 'x'), 3.5f); - assertEquals(sm.getPairwiseScore('U', 'X'), 0f); // X (upper) unmapped - assertEquals(sm.getPairwiseScore('A', '.'), 0f); // . unmapped + // X (upper) and '.' unmapped - get minimum score + assertEquals(sm.getPairwiseScore('U', 'X'), 1.1f); + assertEquals(sm.getPairwiseScore('A', '.'), 1.1f); assertEquals(sm.getPairwiseScore('-', '-'), 7.6f); assertEquals(sm.getPairwiseScore('A', (char) 128), 0f); // out of range } -- 1.7.10.2