1 package jalview.analysis.scoremodels;
3 import jalview.api.analysis.PairwiseScoreModelI;
4 import jalview.api.analysis.SimilarityParamsI;
5 import jalview.api.analysis.SimilarityScoreModelI;
6 import jalview.datamodel.AlignmentView;
7 import jalview.math.Matrix;
8 import jalview.math.MatrixI;
9 import jalview.util.Comparison;
12 * A class to provide sequence pairwise similarity based on residue identity
14 public class PIDModel implements SimilarityScoreModelI,
19 public String getName()
21 return "% Identity (PID)";
25 public boolean isDNA()
31 public boolean isProtein()
37 * Answers 1 if c and d are the same residue (ignoring case), and not gap
38 * characters. Answers 0 for non-matching or gap characters.
41 public float getPairwiseScore(char c, char d)
45 if (c == d && !Comparison.isGap(c))
55 protected static char toUpper(char c)
57 if ('a' <= c && c <= 'z')
65 public MatrixI findSimilarities(AlignmentView seqData)
67 // TODO reuse code in ScoreMatrix instead somehow
68 String[] seqs = seqData.getSequenceStrings(' ');
69 return findSimilarities(seqs, SimilarityParams.Jalview);
73 * Compute percentage identity scores, using the gap treatment and
74 * normalisation specified by the options parameter
80 protected MatrixI findSimilarities(String[] seqs,
81 SimilarityParamsI options)
83 double[][] values = new double[seqs.length][];
84 for (int row = 0; row < seqs.length; row++)
86 values[row] = new double[seqs.length];
87 for (int col = 0; col < seqs.length; col++)
89 double total = computePID(seqs[row], seqs[col], options);
90 values[row][col] = total;
93 return new Matrix(values);
97 * Computes a percentage identity for two sequences, using the algorithm
98 * choices specified by the options parameter
105 protected double computePID(String seq1, String seq2,
106 SimilarityParamsI options)
108 int len1 = seq1.length();
109 int len2 = seq2.length();
110 int width = Math.max(len1, len2);
114 for (int i = 0; i < width; i++)
116 if (i >= len1 || i >= len2)
119 * off the end of one sequence; stop if we are only matching
120 * on the shorter sequence length, else treat as trailing gap
122 if (options.denominateByShortestLength())
126 if (options.denominatorIncludesGaps())
130 if (options.matchGaps())
136 char c1 = seq1.charAt(i);
137 char c2 = seq2.charAt(i);
138 boolean gap1 = Comparison.isGap(c1);
139 boolean gap2 = Comparison.isGap(c2);
144 * gap-gap: include if options say so, if so
145 * have to score as identity; else ignore
147 if (options.includeGappedColumns())
158 * gap-residue: include if options say so,
159 * count as match if options say so
161 if (options.denominatorIncludesGaps())
165 if (options.matchGaps())
173 * remaining case is gap-residue
175 if (toUpper(c1) == toUpper(c2))
182 return divideBy == 0 ? 0D : 100D * total / divideBy;