import jalview.datamodel.AlignmentView;
import jalview.math.Matrix;
import jalview.math.MatrixI;
+import jalview.util.Comparison;
import java.util.Arrays;
}
/**
+ * Computes pairwise similarities of a set of sequences using the given
+ * parameters
+ *
* @param seqs
+ * @param params
* @return
*/
- protected MatrixI findSimilarities(String[] seqs,
- SimilarityParamsI options)
+ protected MatrixI findSimilarities(String[] seqs, SimilarityParamsI params)
{
- // todo use options in calculation
double[][] values = new double[seqs.length][];
for (int row = 0; row < seqs.length; row++)
{
values[row] = new double[seqs.length];
for (int col = 0; col < seqs.length; col++)
{
- int total = 0;
- int width = Math.min(seqs[row].length(), seqs[col].length());
- for (int i = 0; i < width; i++)
- {
- char c1 = seqs[row].charAt(i);
- char c2 = seqs[col].charAt(i);
- float score = getPairwiseScore(c1, c2);
- total += score;
- }
+ double total = computeSimilarity(seqs[row], seqs[col], params);
values[row][col] = total;
}
}
}
/**
+ * Calculates the pairwise similarity of two strings using the given
+ * calculation parameters
+ *
+ * @param seq1
+ * @param seq2
+ * @param params
+ * @return
+ */
+ protected double computeSimilarity(String seq1, String seq2,
+ SimilarityParamsI params)
+ {
+ int len1 = seq1.length();
+ int len2 = seq2.length();
+ double total = 0;
+
+ int width = Math.max(len1, len2);
+ for (int i = 0; i < width; i++)
+ {
+ if (i >= len1 || i >= len2)
+ {
+ /*
+ * off the end of one sequence; stop if we are only matching
+ * on the shorter sequence length, else treat as trailing gap
+ */
+ if (params.denominateByShortestLength())
+ {
+ break;
+ }
+ }
+ // Change GAP_SPACE to GAP_DASH if we adopt - for gap in matrices
+ char c1 = i >= len1 ? Comparison.GAP_SPACE : seq1.charAt(i);
+ char c2 = i >= len2 ? Comparison.GAP_SPACE : seq2.charAt(i);
+ boolean gap1 = Comparison.isGap(c1);
+ boolean gap2 = Comparison.isGap(c2);
+
+ if (gap1 && gap2)
+ {
+ /*
+ * gap-gap: include if options say so, else ignore
+ */
+ if (!params.includeGappedColumns())
+ {
+ continue;
+ }
+ }
+ else if (gap1 || gap2)
+ {
+ /*
+ * gap-residue: score if options say so
+ */
+ if (!params.includesGaps())
+ {
+ continue;
+ }
+ }
+ float score = getPairwiseScore(c1, c2);
+ total += score;
+ }
+ return total;
+ }
+
+ /**
* Answers a hashcode computed from the symbol alphabet and the matrix score
* values
*/
import static org.testng.Assert.assertTrue;
import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
+import jalview.api.analysis.SimilarityParamsI;
import jalview.io.DataSourceType;
import jalview.io.FileParse;
import jalview.io.ScoreMatrixFile;
assertTrue(sm.equals(sm2));
assertEquals(sm.hashCode(), sm2.hashCode());
}
+
+ /**
+ * Tests for percentage identity variants where the longer length of two
+ * sequences is used
+ */
+ @Test(groups = "Functional")
+ public void testcomputeSimilarity_matchLongestSequence()
+ {
+ // TODO params.matchGaps() is not used for ScoreMatrix
+ // - includeGaps is sufficient (there is no denominator)
+ // ==> bespoke parameters only 3 booleans?
+ /*
+ * for now, using space for gap to match callers of
+ * AlignmentView.getSequenceStrings()
+ * may change this to '-' (with corresponding change to matrices)
+ */
+ String s1 = "FR K S";
+ String s2 = "FS L";
+ ScoreMatrix blosum = ScoreModels.getInstance().getBlosum62();
+
+ /*
+ * score gap-gap and gap-char
+ * shorter sequence treated as if with trailing gaps
+ * score = F^F + R^S + -^- + K^- + -^L + S^-
+ * = 6 + -1 + 1 + -4 + -4 + -4 = -6
+ */
+ SimilarityParamsI params = new SimilarityParams(true, true, true, false);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), -6d);
+ // matchGap (arg2) is ignored:
+ params = new SimilarityParams(true, false, true, false);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), -6d);
+
+ /*
+ * score gap-char but not gap-gap
+ * score = F^F + R^S + 0 + K^- + -^L + S^-
+ * = 6 + -1 + 0 + -4 + -4 + -4 = -7
+ */
+ params = new SimilarityParams(false, true, true, false);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), -7d);
+ // matchGap (arg2) is ignored:
+ params = new SimilarityParams(false, false, true, false);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), -7d);
+
+ /*
+ * score gap-gap but not gap-char
+ * score = F^F + R^S + -^- + 0 + 0 + 0
+ * = 6 + -1 + 1 = 6
+ */
+ params = new SimilarityParams(true, false, false, false);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), 6d);
+ // matchGap (arg2) is ignored:
+ params = new SimilarityParams(true, true, false, false);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), 6d);
+
+ /*
+ * score neither gap-gap nor gap-char
+ * score = F^F + R^S + 0 + 0 + 0 + 0
+ * = 6 + -1 = 5
+ */
+ params = new SimilarityParams(false, false, false, false);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), 5d);
+ // matchGap (arg2) is ignored:
+ params = new SimilarityParams(false, true, false, false);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), 5d);
+ }
+
+ /**
+ * Tests for percentage identity variants where only the shorter length of two
+ * sequences is used
+ */
+ @Test(groups = "Functional")
+ public void testcomputeSimilarity_matchShortestSequence()
+ {
+ // TODO params.matchGaps() is not used for ScoreMatrix
+ // - includeGaps is sufficient (there is no denominator)
+ // ==> bespoke parameters only 3 booleans?
+ /*
+ * for now, using space for gap to match callers of
+ * AlignmentView.getSequenceStrings()
+ * may change this to '-' (with corresponding change to matrices)
+ */
+ String s1 = "FR K S";
+ String s2 = "FS L";
+ ScoreMatrix blosum = ScoreModels.getInstance().getBlosum62();
+
+ /*
+ * score gap-gap and gap-char
+ * shorter sequence treated as if with trailing gaps
+ * score = F^F + R^S + -^- + K^- + -^L
+ * = 6 + -1 + 1 + -4 + -4 = -2
+ */
+ SimilarityParamsI params = new SimilarityParams(true, true, true, true);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), -2d);
+ // matchGap (arg2) is ignored:
+ params = new SimilarityParams(true, false, true, true);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), -2d);
+
+ /*
+ * score gap-char but not gap-gap
+ * score = F^F + R^S + 0 + K^- + -^L
+ * = 6 + -1 + 0 + -4 + -4 = -3
+ */
+ params = new SimilarityParams(false, true, true, true);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), -3d);
+ // matchGap (arg2) is ignored:
+ params = new SimilarityParams(false, false, true, true);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), -3d);
+
+ /*
+ * score gap-gap but not gap-char
+ * score = F^F + R^S + -^- + 0 + 0
+ * = 6 + -1 + 1 = 6
+ */
+ params = new SimilarityParams(true, false, false, true);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), 6d);
+ // matchGap (arg2) is ignored:
+ params = new SimilarityParams(true, true, false, true);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), 6d);
+
+ /*
+ * score neither gap-gap nor gap-char
+ * score = F^F + R^S + 0 + 0 + 0
+ * = 6 + -1 = 5
+ */
+ params = new SimilarityParams(false, false, false, true);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), 5d);
+ // matchGap (arg2) is ignored:
+ params = new SimilarityParams(false, true, false, true);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), 5d);
+ }
}