2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis.scoremodels;
23 import java.util.Arrays;
25 public class ScoreMatrix implements PairwiseScoreModelI
27 public static final short UNMAPPED = (short) -1;
29 private static final String BAD_ASCII_ERROR = "Unexpected character %s in getPairwiseScore";
31 private static final int MAX_ASCII = 127;
34 * the name of the model as shown in menus
39 * the characters that the model provides scores for
41 private char[] symbols;
44 * the score matrix; both dimensions must equal the number of symbols
45 * matrix[i][j] is the substitution score for replacing symbols[i] with symbols[j]
47 private float[][] matrix;
50 * quick lookup to convert from an ascii character value to the index
51 * of the corresponding symbol in the score matrix
53 private short[] symbolIndex;
56 * true for Protein Score matrix, false for dna score matrix
58 private boolean peptide;
61 * Constructor given a name, symbol alphabet, and matrix of scores for pairs
62 * of symbols. The matrix should be square and of the same size as the
63 * alphabet, for example 20x20 for a 20 symbol alphabet.
66 * Unique, human readable name for the matrix
68 * the symbols to which scores apply
70 * Pairwise scores indexed according to the symbol alphabet
72 public ScoreMatrix(String name, char[] alphabet, float[][] matrix)
74 if (alphabet.length != matrix.length)
76 throw new IllegalArgumentException(
77 "score matrix size must match alphabet size");
79 for (float[] row : matrix)
81 if (row.length != alphabet.length)
83 throw new IllegalArgumentException(
84 "score matrix size must be square");
90 this.symbols = alphabet;
92 symbolIndex = buildSymbolIndex(alphabet);
95 * crude heuristic for now...
97 peptide = alphabet.length >= 20;
101 * Returns an array A where A[i] is the position in the alphabet array of the
102 * character whose value is i. For example if the alphabet is { 'A', 'D', 'X'
103 * } then A['D'] = A[68] = 1.
105 * Unmapped characters (not in the alphabet) get an index of -1.
107 * Mappings are added automatically for lower case symbols (for non case
108 * sensitive scoring), unless they are explicitly present in the alphabet (are
109 * scored separately in the score matrix).
114 static short[] buildSymbolIndex(char[] alphabet)
116 short[] index = new short[MAX_ASCII + 1];
117 Arrays.fill(index, UNMAPPED);
119 for (char c : alphabet)
127 * also map lower-case character (unless separately mapped)
129 if (c >= 'A' && c <= 'Z')
131 short lowerCase = (short) (c + ('a' - 'A'));
132 if (index[lowerCase] == UNMAPPED)
134 index[lowerCase] = pos;
143 public String getName()
149 public boolean isDNA()
155 public boolean isProtein()
161 * Returns the score matrix as used in getPairwiseScore. If using this matrix
162 * directly, callers <em>must</em> also call <code>getMatrixIndex</code> in
163 * order to get the matrix index for each character (symbol).
166 * @see #getMatrixIndex(char)
168 public float[][] getMatrix()
174 * Answers the matrix index for a given character, or -1 if unmapped in the
175 * matrix. Use this method only if using <code>getMatrix</code> in order to
176 * compute scores directly (without symbol lookup) for efficiency.
182 public int getMatrixIndex(char c)
184 if (c < symbolIndex.length)
186 return symbolIndex[c];
195 * Returns the pairwise score for substituting c with d, or zero if c or d is
196 * an unscored or unexpected character
199 public float getPairwiseScore(char c, char d)
201 if (c >= symbolIndex.length)
203 System.err.println(String.format(BAD_ASCII_ERROR, c));
206 if (d >= symbolIndex.length)
208 System.err.println(String.format(BAD_ASCII_ERROR, d));
212 int cIndex = symbolIndex[c];
213 int dIndex = symbolIndex[d];
214 if (cIndex != UNMAPPED && dIndex != UNMAPPED)
216 return matrix[cIndex][dIndex];
222 * pretty print the matrix
225 public String toString()
227 return outputMatrix(false);
231 * Print the score matrix, optionally formatted as html, with the alphabet symbols as column headings and at the start of each row
235 public String outputMatrix(boolean html)
237 StringBuilder sb = new StringBuilder(512);
240 * heading row with alphabet
244 sb.append("<table border=\"1\">");
245 sb.append(html ? "<tr><th></th>" : "");
247 for (char sym : symbols)
251 sb.append("<th> ").append(sym).append(" </th>");
255 sb.append("\t").append(sym);
258 sb.append(html ? "</tr>\n" : "\n");
263 for (char c1 : symbols)
267 sb.append("<tr><td>");
269 sb.append(c1).append(html ? "</td>" : "");
270 for (char c2 : symbols)
272 sb.append(html ? "<td>" : "\t")
273 .append(matrix[symbolIndex[c1]][symbolIndex[c2]])
274 .append(html ? "</td>" : "");
276 sb.append(html ? "</tr>\n" : "\n");
280 sb.append("</table>");
282 return sb.toString();
286 * Answers the number of symbols coded for (also equal to the number of rows
287 * and columns of the score matrix)
293 return symbols.length;