*/
package jalview.analysis.scoremodels;
+import jalview.api.analysis.PairwiseScoreModelI;
+import jalview.api.analysis.SimilarityParamsI;
+import jalview.api.analysis.SimilarityScoreModelI;
+import jalview.datamodel.AlignmentView;
import jalview.math.Matrix;
import jalview.math.MatrixI;
+import jalview.util.Comparison;
import java.util.Arrays;
-public class ScoreMatrix implements PairwiseScoreModelI
+public class ScoreMatrix implements SimilarityScoreModelI,
+ PairwiseScoreModelI
{
+ /*
+ * Jalview 2.10.1 treated gaps as X (peptide) or N (nucleotide)
+ * for pairwise scoring; 2.10.2 uses gap score (last column) in
+ * score matrix (JAL-2397)
+ * Set this flag to true (via Groovy) for 2.10.1 behaviour
+ */
+ private static boolean scoreGapAsAny = false;
+
public static final short UNMAPPED = (short) -1;
private static final String BAD_ASCII_ERROR = "Unexpected character %s in getPairwiseScore";
}
/**
- * Returns the score matrix as used in getPairwiseScore. If using this matrix
- * directly, callers <em>must</em> also call <code>getMatrixIndex</code> in
- * order to get the matrix index for each character (symbol).
+ * Returns a copy of the score matrix as used in getPairwiseScore. If using
+ * this matrix directly, callers <em>must</em> also call
+ * <code>getMatrixIndex</code> in order to get the matrix index for each
+ * character (symbol).
*
* @return
* @see #getMatrixIndex(char)
*/
public float[][] getMatrix()
{
- return matrix;
+ float[][] v = new float[matrix.length][matrix.length];
+ for (int i = 0; i < matrix.length; i++)
+ {
+ v[i] = Arrays.copyOf(matrix[i], matrix[i].length);
+ }
+ return v;
}
/**
}
/**
- * Print the score matrix, optionally formatted as html, with the alphabet symbols as column headings and at the start of each row
+ * Print the score matrix, optionally formatted as html, with the alphabet
+ * symbols as column headings and at the start of each row.
+ * <p>
+ * The non-html format should give an output which can be parsed as a score
+ * matrix file
+ *
* @param html
* @return
*/
sb.append("<table border=\"1\">");
sb.append(html ? "<tr><th></th>" : "");
}
+ else
+ {
+ sb.append("ScoreMatrix ").append(getName()).append("\n");
+ sb.append(symbols).append("\n");
+ }
for (char sym : symbols)
{
if (html)
* <li>and so on</li>
* </ul>
*/
- public MatrixI computePairwiseScores(String[] seqs)
+ @Override
+ public MatrixI findSimilarities(AlignmentView seqstrings,
+ SimilarityParamsI options)
+ {
+ char gapChar = scoreGapAsAny ? (seqstrings.isNa() ? 'N' : 'X') : ' ';
+ String[] seqs = seqstrings.getSequenceStrings(gapChar);
+ return findSimilarities(seqs, options);
+ }
+
+ /**
+ * Computes pairwise similarities of a set of sequences using the given
+ * parameters
+ *
+ * @param seqs
+ * @param params
+ * @return
+ */
+ protected MatrixI findSimilarities(String[] seqs, SimilarityParamsI params)
{
double[][] values = new double[seqs.length][];
for (int row = 0; row < seqs.length; row++)
values[row] = new double[seqs.length];
for (int col = 0; col < seqs.length; col++)
{
- int total = 0;
- int width = Math.min(seqs[row].length(), seqs[col].length());
- for (int i = 0; i < width; i++)
- {
- char c1 = seqs[row].charAt(i);
- char c2 = seqs[col].charAt(i);
- float score = getPairwiseScore(c1, c2);
- total += score;
- }
+ double total = computeSimilarity(seqs[row], seqs[col], params);
values[row][col] = total;
}
}
return new Matrix(values);
}
+
+ /**
+ * Calculates the pairwise similarity of two strings using the given
+ * calculation parameters
+ *
+ * @param seq1
+ * @param seq2
+ * @param params
+ * @return
+ */
+ protected double computeSimilarity(String seq1, String seq2,
+ SimilarityParamsI params)
+ {
+ int len1 = seq1.length();
+ int len2 = seq2.length();
+ double total = 0;
+
+ int width = Math.max(len1, len2);
+ for (int i = 0; i < width; i++)
+ {
+ if (i >= len1 || i >= len2)
+ {
+ /*
+ * off the end of one sequence; stop if we are only matching
+ * on the shorter sequence length, else treat as trailing gap
+ */
+ if (params.denominateByShortestLength())
+ {
+ break;
+ }
+ }
+ // Change GAP_SPACE to GAP_DASH if we adopt - for gap in matrices
+ char c1 = i >= len1 ? Comparison.GAP_SPACE : seq1.charAt(i);
+ char c2 = i >= len2 ? Comparison.GAP_SPACE : seq2.charAt(i);
+ boolean gap1 = Comparison.isGap(c1);
+ boolean gap2 = Comparison.isGap(c2);
+
+ if (gap1 && gap2)
+ {
+ /*
+ * gap-gap: include if options say so, else ignore
+ */
+ if (!params.includeGappedColumns())
+ {
+ continue;
+ }
+ }
+ else if (gap1 || gap2)
+ {
+ /*
+ * gap-residue: score if options say so
+ */
+ if (!params.includesGaps())
+ {
+ continue;
+ }
+ }
+ float score = getPairwiseScore(c1, c2);
+ total += score;
+ }
+ return total;
+ }
+
+ /**
+ * Answers a hashcode computed from the symbol alphabet and the matrix score
+ * values
+ */
+ @Override
+ public int hashCode()
+ {
+ int hs = Arrays.hashCode(symbols);
+ for (float[] row : matrix)
+ {
+ hs = hs * 31 + Arrays.hashCode(row);
+ }
+ return hs;
+ }
+
+ /**
+ * Answers true if the argument is a ScoreMatrix with the same symbol alphabet
+ * and score values, else false
+ */
+ @Override
+ public boolean equals(Object obj)
+ {
+ if (!(obj instanceof ScoreMatrix))
+ {
+ return false;
+ }
+ ScoreMatrix sm = (ScoreMatrix) obj;
+ if (Arrays.equals(symbols, sm.symbols)
+ && Arrays.deepEquals(matrix, sm.matrix))
+ {
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Returns the alphabet the matrix scores for, as a string of characters
+ *
+ * @return
+ */
+ public String getSymbols()
+ {
+ return new String(symbols);
+ }
}