2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis.scoremodels;
23 import jalview.api.AlignmentViewPanel;
24 import jalview.api.analysis.PairwiseScoreModelI;
25 import jalview.api.analysis.ScoreModelI;
26 import jalview.api.analysis.SimilarityParamsI;
27 import jalview.datamodel.AlignmentView;
28 import jalview.math.Matrix;
29 import jalview.math.MatrixI;
30 import jalview.util.Comparison;
33 * A class to provide sequence pairwise similarity based on residue identity.
34 * Instances of this class are immutable and thread-safe, so the same object is
35 * returned from calls to getInstance().
37 public class PIDModel extends SimilarityScoreModel
38 implements PairwiseScoreModelI
40 private static final String NAME = "PID";
50 public String getName()
56 * Answers null for description. If a display name is needed, use getName() or
57 * an internationalized string built from the name.
60 public String getDescription()
66 public boolean isDNA()
72 public boolean isProtein()
78 public boolean isSecondaryStructure()
84 * Answers 1 if c and d are the same residue (ignoring case), and not gap
85 * characters. Answers 0 for non-matching or gap characters.
88 public float getPairwiseScore(char c, char d)
92 if (c == d && !Comparison.isGap(c))
102 protected static char toUpper(char c)
104 if ('a' <= c && c <= 'z')
112 * Computes similarity scores based on pairwise percentage identity of
113 * sequences. For consistency with Jalview 2.10.1's SeqSpace mode PCA
114 * calculation, the percentage scores are rescaled to the width of the
115 * sequences (as if counts of identical residues). This method is thread-safe.
118 public MatrixI findSimilarities(AlignmentView seqData,
119 SimilarityParamsI options)
121 String[] seqs = seqData.getSequenceStrings(Comparison.GAP_DASH);
123 MatrixI result = findSimilarities(seqs, options);
125 result.multiply(seqData.getWidth() / 100d);
131 * A distance score is computed in the usual way (by reversing the range of
132 * the similarity score results), and then rescaled to percentage values
133 * (reversing the rescaling to count values done in findSimilarities). This
134 * method is thread-safe.
137 public MatrixI findDistances(AlignmentView seqData,
138 SimilarityParamsI options)
140 MatrixI result = super.findDistances(seqData, options);
142 if (seqData.getWidth() != 0)
144 result.multiply(100d / seqData.getWidth());
151 * Compute percentage identity scores, using the gap treatment and
152 * normalisation specified by the options parameter
158 protected MatrixI findSimilarities(String[] seqs,
159 SimilarityParamsI options)
162 * calculation is symmetric so just compute lower diagonal
164 double[][] values = new double[seqs.length][seqs.length];
165 for (int row = 0; row < seqs.length; row++)
167 for (int col = row; col < seqs.length; col++)
169 double total = computePID(seqs[row], seqs[col], options);
170 values[row][col] = total;
171 values[col][row] = total;
174 return new Matrix(values);
178 * Computes a percentage identity for two sequences, using the algorithm
179 * choices specified by the options parameter
186 public static double computePID(String seq1, String seq2,
187 SimilarityParamsI options)
189 int len1 = seq1.length();
190 int len2 = seq2.length();
191 int width = Math.max(len1, len2);
195 for (int i = 0; i < width; i++)
197 if (i >= len1 || i >= len2)
200 * off the end of one sequence; stop if we are only matching
201 * on the shorter sequence length, else treat as trailing gap
203 if (options.denominateByShortestLength())
207 if (options.includeGaps())
211 if (options.matchGaps())
217 char c1 = seq1.charAt(i);
218 char c2 = seq2.charAt(i);
219 boolean gap1 = Comparison.isGap(c1);
220 boolean gap2 = Comparison.isGap(c2);
225 * gap-gap: include if options say so, if so
226 * have to score as identity; else ignore
228 if (options.includeGappedColumns())
239 * gap-residue: include if options say so,
240 * count as match if options say so
242 if (options.includeGaps())
246 if (options.matchGaps())
254 * remaining case is gap-residue
256 if (toUpper(c1) == toUpper(c2))
263 return divideBy == 0 ? 0D : 100D * total / divideBy;
267 public ScoreModelI getInstance(AlignmentViewPanel avp)