--- /dev/null
+package jalview.analysis.scoremodels;
+
+import jalview.api.analysis.PairwiseScoreModelI;
+import jalview.api.analysis.SimilarityParamsI;
+import jalview.api.analysis.SimilarityScoreModelI;
+import jalview.datamodel.AlignmentView;
+import jalview.math.Matrix;
+import jalview.math.MatrixI;
+import jalview.util.Comparison;
+
+/**
+ * A class to provide sequence pairwise similarity based on residue identity
+ */
+public class PIDModel implements SimilarityScoreModelI,
+ PairwiseScoreModelI
+{
+
+ @Override
+ public String getName()
+ {
+ return "% Identity (PID)";
+ }
+
+ @Override
+ public boolean isDNA()
+ {
+ return true;
+ }
+
+ @Override
+ public boolean isProtein()
+ {
+ return true;
+ }
+
+ /**
+ * Answers 1 if c and d are the same residue (ignoring case), and not gap
+ * characters. Answers 0 for non-matching or gap characters.
+ */
+ @Override
+ public float getPairwiseScore(char c, char d)
+ {
+ c = toUpper(c);
+ d = toUpper(d);
+ if (c == d && !Comparison.isGap(c))
+ {
+ return 1f;
+ }
+ return 0f;
+ }
+
+ /**
+ * @param c
+ */
+ protected static char toUpper(char c)
+ {
+ if ('a' <= c && c <= 'z')
+ {
+ c += 'A' - 'a';
+ }
+ return c;
+ }
+
+ @Override
+ public MatrixI findSimilarities(AlignmentView seqData)
+ {
+ // TODO reuse code in ScoreMatrix instead somehow
+ String[] seqs = seqData.getSequenceStrings(' ');
+ return findSimilarities(seqs, SimilarityParams.Jalview);
+ }
+
+ /**
+ * Compute percentage identity scores, using the gap treatment and
+ * normalisation specified by the options parameter
+ *
+ * @param seqs
+ * @param options
+ * @return
+ */
+ protected MatrixI findSimilarities(String[] seqs,
+ SimilarityParamsI options)
+ {
+ double[][] values = new double[seqs.length][];
+ for (int row = 0; row < seqs.length; row++)
+ {
+ values[row] = new double[seqs.length];
+ for (int col = 0; col < seqs.length; col++)
+ {
+ double total = computePID(seqs[row], seqs[col], options);
+ values[row][col] = total;
+ }
+ }
+ return new Matrix(values);
+ }
+
+ /**
+ * Computes a percentage identity for two sequences, using the algorithm
+ * choices specified by the options parameter
+ *
+ * @param seq1
+ * @param seq2
+ * @param options
+ * @return
+ */
+ protected double computePID(String seq1, String seq2,
+ SimilarityParamsI options)
+ {
+ int len1 = seq1.length();
+ int len2 = seq2.length();
+ int width = Math.max(len1, len2);
+ int total = 0;
+ int divideBy = 0;
+
+ for (int i = 0; i < width; i++)
+ {
+ if (i >= len1 || i >= len2)
+ {
+ /*
+ * off the end of one sequence; stop if we are only matching
+ * on the shorter sequence length, else treat as trailing gap
+ */
+ if (options.denominateByShortestLength())
+ {
+ break;
+ }
+ if (options.denominatorIncludesGaps())
+ {
+ divideBy++;
+ }
+ if (options.matchGaps())
+ {
+ total++;
+ }
+ continue;
+ }
+ char c1 = seq1.charAt(i);
+ char c2 = seq2.charAt(i);
+ boolean gap1 = Comparison.isGap(c1);
+ boolean gap2 = Comparison.isGap(c2);
+
+ if (gap1 && gap2)
+ {
+ /*
+ * gap-gap: include if options say so, if so
+ * have to score as identity; else ignore
+ */
+ if (options.includeGappedColumns())
+ {
+ divideBy++;
+ total++;
+ }
+ continue;
+ }
+
+ if (gap1 || gap2)
+ {
+ /*
+ * gap-residue: include if options say so,
+ * count as match if options say so
+ */
+ if (options.denominatorIncludesGaps())
+ {
+ divideBy++;
+ }
+ if (options.matchGaps())
+ {
+ total++;
+ }
+ continue;
+ }
+
+ /*
+ * remaining case is gap-residue
+ */
+ if (toUpper(c1) == toUpper(c2))
+ {
+ total++;
+ }
+ divideBy++;
+ }
+
+ return divideBy == 0 ? 0D : 100D * total / divideBy;
+ }
+}
--- /dev/null
+package jalview.analysis.scoremodels;
+
+import static org.testng.Assert.assertEquals;
+
+import jalview.util.Comparison;
+
+import org.testng.annotations.Test;
+
+public class PIDModelTest
+{
+ private static final double DELTA = 0.00001D;
+
+ @Test(groups = "Functional")
+ public void testGetPairwiseScore()
+ {
+ PIDModel sm = new PIDModel();
+ assertEquals(sm.getPairwiseScore('A', 'A'), 1f);
+ assertEquals(sm.getPairwiseScore('A', 'a'), 1f);
+ assertEquals(sm.getPairwiseScore('a', 'A'), 1f);
+ assertEquals(sm.getPairwiseScore('A', 'B'), 0f);
+ assertEquals(sm.getPairwiseScore('A', ' '), 0f);
+ assertEquals(sm.getPairwiseScore(' ', ' '), 0f);
+ assertEquals(sm.getPairwiseScore('.', '.'), 0f);
+ assertEquals(sm.getPairwiseScore('-', '-'), 0f);
+ }
+
+ /**
+ * Regression test to verify that a (suitably configured) PIDModel computes
+ * the same percentage identities as the Comparison.PID method
+ */
+ @Test(groups = "Functional")
+ public void testComputePID_matchesComparisonPID()
+ {
+ /*
+ * same length, no gaps
+ */
+ String s1 = "ARFNQDWSGI";
+ String s2 = "ARKNQDQSGI";
+ double newScore = new PIDModel().computePID(s1, s2,
+ SimilarityParams.Jalview);
+ double oldScore = Comparison.PID(s1, s2);
+ assertEquals(newScore, oldScore, DELTA);
+
+ /*
+ * same length, with gaps
+ */
+ s1 = "-RFNQDWSGI";
+ s2 = "ARKNQ-QSGI";
+ newScore = new PIDModel().computePID(s1, s2,
+ SimilarityParams.Jalview);
+ oldScore = Comparison.PID(s1, s2);
+ assertEquals(newScore, oldScore, DELTA);
+
+ /*
+ * s2 longer than s1, with gaps
+ */
+ s1 = "ARK-";
+ s2 = "-RFNQ";
+ newScore = new PIDModel().computePID(s1, s2,
+ SimilarityParams.Jalview);
+ oldScore = Comparison.PID(s1, s2);
+ assertEquals(newScore, oldScore, DELTA);
+
+ /*
+ * s1 longer than s2, with gaps
+ */
+ s1 = "-RFNQ";
+ s2 = "ARK-";
+ newScore = new PIDModel().computePID(s1, s2,
+ SimilarityParams.Jalview);
+ oldScore = Comparison.PID(s1, s2);
+ assertEquals(newScore, oldScore, DELTA);
+
+ /*
+ * same but now also with gapped columns
+ */
+ s1 = "-R-F-NQ";
+ s2 = "AR-K--";
+ newScore = new PIDModel().computePID(s1, s2, SimilarityParams.Jalview);
+ oldScore = Comparison.PID(s1, s2);
+ assertEquals(newScore, oldScore, DELTA);
+ }
+}