2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.schemes;
23 import jalview.analysis.scoremodels.PairwiseSeqScoreModel;
24 import jalview.math.Matrix;
25 import jalview.math.MatrixI;
27 public class ScoreMatrix extends PairwiseSeqScoreModel
32 public String getName()
38 * reference to integer score matrix
43 * 0 for Protein Score matrix. 1 for dna score matrix
50 * Unique, human readable name for the matrix
52 * Pairwise scores indexed according to appropriate symbol alphabet
54 * 0 for Protein, 1 for NA
56 ScoreMatrix(String name, int[][] matrix, int type)
64 public boolean isDNA()
70 public boolean isProtein()
76 public int[][] getMatrix()
82 * Answers the score for substituting first char in A1 with first char in A2
88 public int getPairwiseScore(String A1, String A2)
90 return getPairwiseScore(A1.charAt(0), A2.charAt(0));
94 public int getPairwiseScore(char c, char d)
100 int a = (type == 0) ? ResidueProperties.aaIndex[c]
101 : ResidueProperties.nucleotideIndex[c];
102 int b = (type == 0) ? ResidueProperties.aaIndex[d]
103 : ResidueProperties.nucleotideIndex[d];
106 * FIXME: 2.10.1 PCA treats gap as [22] or 'X', but Tree
107 * calculation treats as [23]; which is correct?
110 * hack to convert unassigned / unknown (including gap)
111 * to index of unknown (X for amino acids, N for nucleotide)
112 * TODO: statically assign gap characters to this index?
116 // if (a == ResidueProperties.maxProteinIndex)
118 // a = ResidueProperties.aaIndex['X'];
120 // if (b == ResidueProperties.maxProteinIndex)
122 // b = ResidueProperties.aaIndex['X'];
127 // if (a == ResidueProperties.maxNucleotideIndex)
129 // a = ResidueProperties.nucleotideIndex['N'];
131 // if (b == ResidueProperties.maxNucleotideIndex)
133 // b = ResidueProperties.nucleotideIndex['N'];
137 } catch (Exception e)
139 // System.out.println("Unknown residue in " + A1 + " " + A2);
146 * pretty print the matrix
149 public String toString()
151 return outputMatrix(false);
154 public String outputMatrix(boolean html)
156 StringBuffer sb = new StringBuffer();
157 int[] symbols = (type == 0) ? ResidueProperties.aaIndex
158 : ResidueProperties.nucleotideIndex;
159 int symMax = (type == 0) ? ResidueProperties.maxProteinIndex
160 : ResidueProperties.maxNucleotideIndex;
161 boolean header = true;
164 sb.append("<table border=\"1\">");
166 for (char sym = 'A'; sym <= 'Z'; sym++)
168 if (symbols[sym] >= 0 && symbols[sym] < symMax)
172 sb.append(html ? "<tr><td></td>" : "");
173 for (char sym2 = 'A'; sym2 <= 'Z'; sym2++)
175 if (symbols[sym2] >= 0 && symbols[sym2] < symMax)
177 sb.append((html ? "<td> " : "\t") + sym2
178 + (html ? " </td>" : ""));
182 sb.append(html ? "</tr>\n" : "\n");
188 sb.append((html ? "<td>" : "") + sym + (html ? "</td>" : ""));
189 for (char sym2 = 'A'; sym2 <= 'Z'; sym2++)
191 if (symbols[sym2] >= 0 && symbols[sym2] < symMax)
193 sb.append((html ? "<td>" : "\t")
194 + matrix[symbols[sym]][symbols[sym2]]
195 + (html ? "</td>" : ""));
198 sb.append(html ? "</tr>\n" : "\n");
203 sb.append("</table>");
205 return sb.toString();
209 * Computes an NxN matrix where N is the number of sequences, and entry [i, j]
210 * is sequence[i] pairwise multiplied with sequence[j], as a sum of scores
211 * computed using the current score matrix. For example
213 * <li>Sequences:</li>
218 * <li>Score matrix is BLOSUM62</li>
219 * <li>Gaps treated same as X (unknown)</li>
220 * <li>product [0, 0] = F.F + K.K + L.L = 6 + 5 + 4 = 15</li>
221 * <li>product [1, 1] = R.R + -.- + D.D = 5 + -1 + 6 = 10</li>
222 * <li>product [2, 2] = Q.Q + I.I + A.A = 5 + 4 + 4 = 13</li>
223 * <li>product [3, 3] = G.G + W.W + C.C = 6 + 11 + 9 = 26</li>
224 * <li>product[0, 1] = F.R + K.- + L.D = -3 + -1 + -3 = -8
228 public MatrixI computePairwiseScores(String[] seqs)
230 double[][] values = new double[seqs.length][];
231 for (int row = 0; row < seqs.length; row++)
233 values[row] = new double[seqs.length];
234 for (int col = 0; col < seqs.length; col++)
237 int width = Math.min(seqs[row].length(), seqs[col].length());
238 for (int i = 0; i < width; i++)
240 char c1 = seqs[row].charAt(i);
241 char c2 = seqs[col].charAt(i);
242 int score = getPairwiseScore(c1, c2);
245 values[row][col] = total;
248 return new Matrix(values);