*/
private void calcSeqNum(int i, ScoreMatrix sm)
{
- int gapIndex = sm.getMatrixIndex(' ');
+ int gapIndex = sm.getGapIndex();
int sSize = sequences.length;
if ((i > -1) && (i < sSize))
calcSeqNum(i, sm);
}
- int gapIndex = sm.getMatrixIndex(' ');
+ int gapIndex = sm.getGapIndex();
if ((cons2 == null) || seqNumsChanged)
{
double max = -Double.MAX_VALUE;
ScoreMatrix blosum62 = ScoreModels.getInstance().getBlosum62();
float[][] blosumScores = blosum62.getMatrix();
- int gapIndex = blosum62.getMatrixIndex(' ');
// Loop over columns // JBPNote Profiling info
// long ts = System.currentTimeMillis();
// todo ? remove '*' (unused?) from score matrix and
// use getSize() here instead of getSize() - 1 ??
final int symbolCount = blosum62.getSize() - 1; // 24;
+ int gapIndex = blosum62.getGapIndex();
for (j = startRes; j <= endRes; j++)
{
import java.util.Arrays;
+/**
+ * A class that models a substitution score matrix for any given alphabet of
+ * symbols
+ */
public class ScoreMatrix implements SimilarityScoreModelI,
PairwiseScoreModelI
{
- private static final char GAP_CHARACTER = Comparison.GAP_DASH;
+ /*
+ * this fields records which gap character (if any) is used in the alphabet;
+ * space, dash or dot are recognised as gap symbols
+ */
+ private char gapCharacter = '0';
/*
* Jalview 2.10.1 treated gaps as X (peptide) or N (nucleotide)
* Mappings are added automatically for lower case symbols (for non case
* sensitive scoring), unless they are explicitly present in the alphabet (are
* scored separately in the score matrix).
+ * <p>
+ * the gap character (space, dash or dot) included in the alphabet (if any) is
+ * recorded in a field
*
* @param alphabet
* @return
*/
- static short[] buildSymbolIndex(char[] alphabet)
+ short[] buildSymbolIndex(char[] alphabet)
{
short[] index = new short[MAX_ASCII + 1];
Arrays.fill(index, UNMAPPED);
short pos = 0;
for (char c : alphabet)
{
+ if (Comparison.isGap(c))
+ {
+ gapCharacter = c;
+ }
+
if (c <= MAX_ASCII)
{
index[c] = pos;
}
/**
+ * Answers the matrix index for the gap character, or -1 if unmapped in the
+ * matrix. Use this method only if using <code>getMatrix</code> in order to
+ * compute scores directly (without symbol lookup) for efficiency.
+ *
+ * @return
+ * @see #getMatrix()
+ */
+ public int getGapIndex()
+ {
+ return getMatrixIndex(gapCharacter);
+ }
+
+ /**
* Returns the pairwise score for substituting c with d, or zero if c or d is
* an unscored or unexpected character
*/
}
}
- char c1 = i >= len1 ? GAP_CHARACTER : seq1.charAt(i);
- char c2 = i >= len2 ? GAP_CHARACTER : seq2.charAt(i);
+ char c1 = i >= len1 ? gapCharacter : seq1.charAt(i);
+ char c2 = i >= len2 ? gapCharacter : seq2.charAt(i);
boolean gap1 = Comparison.isGap(c1);
boolean gap2 = Comparison.isGap(c2);
assertEquals(sm.getPairwiseScore('A', 'D'), 0f);
assertEquals(sm.getMatrixIndex('c'), 2);
assertEquals(sm.getMatrixIndex(' '), -1);
+
+ assertEquals(sm.getGapIndex(), -1); // no gap symbol
}
@Test(
@Test(groups = "Functional")
public void testBuildSymbolIndex()
{
- short[] index = ScoreMatrix.buildSymbolIndex("AX-. yxYp".toCharArray());
+ float[][] scores = new float[2][];
+ scores[0] = new float[] { 1f, 2f };
+ scores[1] = new float[] { 3f, 4f };
+ ScoreMatrix sm = new ScoreMatrix("Test", new char[] { 'A', '.' },
+ scores);
+ short[] index = sm.buildSymbolIndex("AX-yxYp".toCharArray());
assertEquals(index.length, 128); // ASCII character set size
assertEquals(index['a'], 0); // lower-case mapping added
assertEquals(index['X'], 1);
assertEquals(index['-'], 2);
- assertEquals(index['.'], 3);
- assertEquals(index[' '], 4);
- assertEquals(index['y'], 5); // lower-case override
- assertEquals(index['x'], 6); // lower-case override
- assertEquals(index['Y'], 7);
- assertEquals(index['p'], 8);
+ assertEquals(index['y'], 3); // lower-case override
+ assertEquals(index['x'], 4); // lower-case override
+ assertEquals(index['Y'], 5);
+ assertEquals(index['p'], 6);
assertEquals(index['P'], -1); // lower-case doesn't map upper-case
/*
@Test(groups = "Functional")
public void testBuildSymbolIndex_nonAscii()
{
+ float[][] scores = new float[2][];
+ scores[0] = new float[] { 1f, 2f };
+ scores[1] = new float[] { 3f, 4f };
+ ScoreMatrix sm = new ScoreMatrix("Test", new char[] { 'A', '.' },
+ scores);
char[] weird = new char[] { 128, 245, 'P' };
- short[] index = ScoreMatrix.buildSymbolIndex(weird);
+ short[] index = sm.buildSymbolIndex(weird);
assertEquals(index.length, 128);
assertEquals(index['P'], 2);
assertEquals(index['p'], 2);
}
@Test(groups = "Functional")
+ public void testGetGapIndex()
+ {
+ ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
+ assertEquals(sm.getGapIndex(), 23);
+ }
+
+ @Test(groups = "Functional")
public void testGetSize()
{
ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
String.format("%s->%s", res, c2));
}
}
+
+ @Test(groups = "Functional")
+ public void testConstructor_gapDash()
+ {
+ float[][] scores = new float[2][];
+ scores[0] = new float[] { 1f, 2f };
+ scores[1] = new float[] { 4f, 5f };
+ ScoreMatrix sm = new ScoreMatrix("Test", new char[] { 'A', '-' },
+ scores);
+ assertEquals(sm.getSize(), 2);
+ assertArrayEquals(scores, sm.getMatrix());
+ assertEquals(sm.getPairwiseScore('A', 'a'), 1f);
+ assertEquals(sm.getPairwiseScore('A', 'A'), 1f);
+ assertEquals(sm.getPairwiseScore('a', '-'), 2f);
+ assertEquals(sm.getPairwiseScore('-', 'A'), 4f);
+ assertEquals(sm.getMatrixIndex('a'), 0);
+ assertEquals(sm.getMatrixIndex('A'), 0);
+ assertEquals(sm.getMatrixIndex('-'), 1);
+ assertEquals(sm.getMatrixIndex(' '), -1);
+ assertEquals(sm.getMatrixIndex('.'), -1);
+
+ assertEquals(sm.getGapIndex(), 1);
+ }
}