1 package jalview.analysis.scoremodels;
3 import jalview.api.AlignmentViewPanel;
4 import jalview.api.analysis.PairwiseScoreModelI;
5 import jalview.api.analysis.ScoreModelI;
6 import jalview.api.analysis.SimilarityParamsI;
7 import jalview.datamodel.AlignmentView;
8 import jalview.math.Matrix;
9 import jalview.math.MatrixI;
10 import jalview.util.Comparison;
13 * A class to provide sequence pairwise similarity based on residue identity.
14 * Instances of this class are immutable and thread-safe, so the same object is
15 * returned from calls to getInstance().
17 public class PIDModel extends SimilarityScoreModel implements
20 private static final String NAME = "PID";
30 public String getName()
36 * Answers null for description. If a display name is needed, use getName() or
37 * an internationalized string built from the name.
40 public String getDescription()
46 public boolean isDNA()
52 public boolean isProtein()
58 * Answers 1 if c and d are the same residue (ignoring case), and not gap
59 * characters. Answers 0 for non-matching or gap characters.
62 public float getPairwiseScore(char c, char d)
66 if (c == d && !Comparison.isGap(c))
76 protected static char toUpper(char c)
78 if ('a' <= c && c <= 'z')
86 * Computes similarity scores based on pairwise percentage identity of
87 * sequences. For consistency with Jalview 2.10.1's SeqSpace mode PCA
88 * calculation, the percentage scores are rescaled to the width of the
89 * sequences (as if counts of identical residues). This method is thread-safe.
92 public MatrixI findSimilarities(AlignmentView seqData,
93 SimilarityParamsI options)
95 String[] seqs = seqData.getSequenceStrings(Comparison.GAP_DASH);
97 MatrixI result = findSimilarities(seqs, options);
99 result.multiply(seqData.getWidth() / 100d);
105 * A distance score is computed in the usual way (by reversing the range of
106 * the similarity score results), and then rescaled to percentage values
107 * (reversing the rescaling to count values done in findSimilarities). This
108 * method is thread-safe.
111 public MatrixI findDistances(AlignmentView seqData,
112 SimilarityParamsI options)
114 MatrixI result = super.findDistances(seqData, options);
116 if (seqData.getWidth() != 0)
118 result.multiply(100d / seqData.getWidth());
125 * Compute percentage identity scores, using the gap treatment and
126 * normalisation specified by the options parameter
132 protected MatrixI findSimilarities(String[] seqs,
133 SimilarityParamsI options)
135 // TODO reuse code in ScoreMatrix instead somehow
136 double[][] values = new double[seqs.length][];
137 for (int row = 0; row < seqs.length; row++)
139 values[row] = new double[seqs.length];
140 for (int col = 0; col < seqs.length; col++)
142 double total = computePID(seqs[row], seqs[col], options);
143 values[row][col] = total;
146 return new Matrix(values);
150 * Computes a percentage identity for two sequences, using the algorithm
151 * choices specified by the options parameter
158 public static double computePID(String seq1, String seq2,
159 SimilarityParamsI options)
161 int len1 = seq1.length();
162 int len2 = seq2.length();
163 int width = Math.max(len1, len2);
167 for (int i = 0; i < width; i++)
169 if (i >= len1 || i >= len2)
172 * off the end of one sequence; stop if we are only matching
173 * on the shorter sequence length, else treat as trailing gap
175 if (options.denominateByShortestLength())
179 if (options.includeGaps())
183 if (options.matchGaps())
189 char c1 = seq1.charAt(i);
190 char c2 = seq2.charAt(i);
191 boolean gap1 = Comparison.isGap(c1);
192 boolean gap2 = Comparison.isGap(c2);
197 * gap-gap: include if options say so, if so
198 * have to score as identity; else ignore
200 if (options.includeGappedColumns())
211 * gap-residue: include if options say so,
212 * count as match if options say so
214 if (options.includeGaps())
218 if (options.matchGaps())
226 * remaining case is gap-residue
228 if (toUpper(c1) == toUpper(c2))
235 return divideBy == 0 ? 0D : 100D * total / divideBy;
239 public ScoreModelI getInstance(AlignmentViewPanel avp)