+
+ /**
+ * Answers the number of symbols coded for (also equal to the number of rows
+ * and columns of the score matrix)
+ *
+ * @return
+ */
+ public int getSize()
+ {
+ return symbols.length;
+ }
+
+ /**
+ * Computes an NxN matrix where N is the number of sequences, and entry [i, j]
+ * is sequence[i] pairwise multiplied with sequence[j], as a sum of scores
+ * computed using the current score matrix. For example
+ * <ul>
+ * <li>Sequences:</li>
+ * <li>FKL</li>
+ * <li>R-D</li>
+ * <li>QIA</li>
+ * <li>GWC</li>
+ * <li>Score matrix is BLOSUM62</li>
+ * <li>Gaps treated same as X (unknown)</li>
+ * <li>product [0, 0] = F.F + K.K + L.L = 6 + 5 + 4 = 15</li>
+ * <li>product [1, 1] = R.R + -.- + D.D = 5 + -1 + 6 = 10</li>
+ * <li>product [2, 2] = Q.Q + I.I + A.A = 5 + 4 + 4 = 13</li>
+ * <li>product [3, 3] = G.G + W.W + C.C = 6 + 11 + 9 = 26</li>
+ * <li>product[0, 1] = F.R + K.- + L.D = -3 + -1 + -3 = -8
+ * <li>and so on</li>
+ * </ul>
+ */
+ @Override
+ public MatrixI findSimilarities(AlignmentView seqstrings,
+ SimilarityParamsI options)
+ {
+ char gapChar = scoreGapAsAny ? (seqstrings.isNa() ? 'N' : 'X')
+ : gapCharacter;
+ String[] seqs = seqstrings.getSequenceStrings(gapChar);
+ return findSimilarities(seqs, options);
+ }
+
+ /**
+ * Computes pairwise similarities of a set of sequences using the given
+ * parameters
+ *
+ * @param seqs
+ * @param params
+ * @return
+ */
+ protected MatrixI findSimilarities(String[] seqs, SimilarityParamsI params)
+ {
+ double[][] values = new double[seqs.length][];
+ for (int row = 0; row < seqs.length; row++)
+ {
+ values[row] = new double[seqs.length];
+ for (int col = 0; col < seqs.length; col++)
+ {
+ double total = computeSimilarity(seqs[row], seqs[col], params);
+ values[row][col] = total;
+ }
+ }
+ return new Matrix(values);
+ }
+
+ /**
+ * Calculates the pairwise similarity of two strings using the given
+ * calculation parameters
+ *
+ * @param seq1
+ * @param seq2
+ * @param params
+ * @return
+ */
+ protected double computeSimilarity(String seq1, String seq2,
+ SimilarityParamsI params)
+ {
+ int len1 = seq1.length();
+ int len2 = seq2.length();
+ double total = 0;
+
+ int width = Math.max(len1, len2);
+ for (int i = 0; i < width; i++)
+ {
+ if (i >= len1 || i >= len2)
+ {
+ /*
+ * off the end of one sequence; stop if we are only matching
+ * on the shorter sequence length, else treat as trailing gap
+ */
+ if (params.denominateByShortestLength())
+ {
+ break;
+ }
+ }
+
+ char c1 = i >= len1 ? gapCharacter : seq1.charAt(i);
+ char c2 = i >= len2 ? gapCharacter : seq2.charAt(i);
+ boolean gap1 = Comparison.isGap(c1);
+ boolean gap2 = Comparison.isGap(c2);
+
+ if (gap1 && gap2)
+ {
+ /*
+ * gap-gap: include if options say so, else ignore
+ */
+ if (!params.includeGappedColumns())
+ {
+ continue;
+ }
+ }
+ else if (gap1 || gap2)
+ {
+ /*
+ * gap-residue: score if options say so
+ */
+ if (!params.includeGaps())
+ {
+ continue;
+ }
+ }
+ float score = getPairwiseScore(c1, c2);
+ total += score;
+ }
+ return total;
+ }
+
+ /**
+ * Answers a hashcode computed from the symbol alphabet and the matrix score
+ * values
+ */
+ @Override
+ public int hashCode()
+ {
+ int hs = Arrays.hashCode(symbols);
+ for (float[] row : matrix)
+ {
+ hs = hs * 31 + Arrays.hashCode(row);
+ }
+ return hs;
+ }
+
+ /**
+ * Answers true if the argument is a ScoreMatrix with the same symbol alphabet
+ * and score values, else false
+ */
+ @Override
+ public boolean equals(Object obj)
+ {
+ if (!(obj instanceof ScoreMatrix))
+ {
+ return false;
+ }
+ ScoreMatrix sm = (ScoreMatrix) obj;
+ if (Arrays.equals(symbols, sm.symbols)
+ && Arrays.deepEquals(matrix, sm.matrix))
+ {
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Returns the alphabet the matrix scores for, as a string of characters
+ *
+ * @return
+ */
+ public String getSymbols()
+ {
+ return new String(symbols);
+ }
+
+ public void setDescription(String desc)
+ {
+ description = desc;
+ }