+package jalview.analysis.scoremodels;
+
+import jalview.api.analysis.PairwiseScoreModelI;
+import jalview.api.analysis.SimilarityParamsI;
+import jalview.api.analysis.SimilarityScoreModelI;
+import jalview.datamodel.AlignmentView;
+import jalview.math.Matrix;
+import jalview.math.MatrixI;
+import jalview.util.Comparison;
+
+/**
+ * A class to provide sequence pairwise similarity based on residue identity
+ */
+public class PIDModel implements SimilarityScoreModelI,
+ PairwiseScoreModelI
+{
+
+ @Override
+ public String getName()
+ {
+ return "% Identity (PID)";
+ }
+
+ @Override
+ public boolean isDNA()
+ {
+ return true;
+ }
+
+ @Override
+ public boolean isProtein()
+ {
+ return true;
+ }
+
+ /**
+ * Answers 1 if c and d are the same residue (ignoring case), and not gap
+ * characters. Answers 0 for non-matching or gap characters.
+ */
+ @Override
+ public float getPairwiseScore(char c, char d)
+ {
+ c = toUpper(c);
+ d = toUpper(d);
+ if (c == d && !Comparison.isGap(c))
+ {
+ return 1f;
+ }
+ return 0f;
+ }
+
+ /**
+ * @param c
+ */
+ protected static char toUpper(char c)
+ {
+ if ('a' <= c && c <= 'z')
+ {
+ c += 'A' - 'a';
+ }
+ return c;
+ }
+
+ @Override
+ public MatrixI findSimilarities(AlignmentView seqData)
+ {
+ // TODO reuse code in ScoreMatrix instead somehow
+ String[] seqs = seqData.getSequenceStrings(' ');
+ return findSimilarities(seqs, SimilarityParams.Jalview);
+ }
+
+ /**
+ * Compute percentage identity scores, using the gap treatment and
+ * normalisation specified by the options parameter
+ *
+ * @param seqs
+ * @param options
+ * @return
+ */
+ protected MatrixI findSimilarities(String[] seqs,
+ SimilarityParamsI options)
+ {
+ double[][] values = new double[seqs.length][];
+ for (int row = 0; row < seqs.length; row++)
+ {
+ values[row] = new double[seqs.length];
+ for (int col = 0; col < seqs.length; col++)
+ {
+ double total = computePID(seqs[row], seqs[col], options);
+ values[row][col] = total;
+ }
+ }
+ return new Matrix(values);
+ }
+
+ /**
+ * Computes a percentage identity for two sequences, using the algorithm
+ * choices specified by the options parameter
+ *
+ * @param seq1
+ * @param seq2
+ * @param options
+ * @return
+ */
+ protected double computePID(String seq1, String seq2,
+ SimilarityParamsI options)
+ {
+ int len1 = seq1.length();
+ int len2 = seq2.length();
+ int width = Math.max(len1, len2);
+ int total = 0;
+ int divideBy = 0;
+
+ for (int i = 0; i < width; i++)
+ {
+ if (i >= len1 || i >= len2)
+ {
+ /*
+ * off the end of one sequence; stop if we are only matching
+ * on the shorter sequence length, else treat as trailing gap
+ */
+ if (options.denominateByShortestLength())
+ {
+ break;
+ }
+ if (options.denominatorIncludesGaps())
+ {
+ divideBy++;
+ }
+ if (options.matchGaps())
+ {
+ total++;
+ }
+ continue;
+ }
+ char c1 = seq1.charAt(i);
+ char c2 = seq2.charAt(i);
+ boolean gap1 = Comparison.isGap(c1);
+ boolean gap2 = Comparison.isGap(c2);
+
+ if (gap1 && gap2)
+ {
+ /*
+ * gap-gap: include if options say so, if so
+ * have to score as identity; else ignore
+ */
+ if (options.includeGappedColumns())
+ {
+ divideBy++;
+ total++;
+ }
+ continue;
+ }
+
+ if (gap1 || gap2)
+ {
+ /*
+ * gap-residue: include if options say so,
+ * count as match if options say so
+ */
+ if (options.denominatorIncludesGaps())
+ {
+ divideBy++;
+ }
+ if (options.matchGaps())
+ {
+ total++;
+ }
+ continue;
+ }
+
+ /*
+ * remaining case is gap-residue
+ */
+ if (toUpper(c1) == toUpper(c2))
+ {
+ total++;
+ }
+ divideBy++;
+ }
+
+ return divideBy == 0 ? 0D : 100D * total / divideBy;
+ }
+}