# The first line declares a ScoreMatrix with the name BLOSUM62 (shown in menus)
# The second line gives the symbols for which scores are held in the matrix
# These may include a space (but not as the first or last character)
-# Scores are not case sensitive, unless column(s) are provided for lower case characters
#
+# Scores are not symbol case sensitive, unless column(s) are provided for lower case characters
+# The 'guide symbol' at the start of each row of score values is optional
#
-# Comment line with symbols is provided as a guide
+# Comment header line with symbols is provided as a guide
# Values may be integer or floating point, delimited by tab, space, comma or combinations
#
# A R N D C Q E G H I L K M F P S T W Y V B Z X *
#
- 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0 -4 -4
- -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 -1 0 -1 -4 -4
- -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 3 0 -1 -4 -4
- -2 -2 1 6 -3 0 2 -1 -1 -3 -4 -1 -3 -3 -1 0 -1 -4 -3 -3 4 1 -1 -4 -4
- 0 3 -3 -3 9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -4 -4
- -1 1 0 0 -3 5 2 -2 0 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2 0 3 -1 -4 -4
- -1 0 0 2 -4 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4 -4
- 0 -2 0 -1 -3 -2 -2 6 -2 -4 -4 -2 -3 -3 -2 0 -2 -2 -3 -3 -1 -2 -1 -4 -4
- -2 0 1 -1 -3 0 0 -2 8 -3 -3 -1 -2 -1 -2 -1 -2 -2 2 -3 0 0 -1 -4 -4
- -1 -3 -3 -3 -1 -3 -3 -4 -3 4 2 -3 1 0 -3 -2 -1 -3 -1 3 -3 -3 -1 -4 -4
- -1 -2 -3 -4 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1 -4 -3 -1 -4 -4
- -1 2 0 -1 -3 1 1 -2 -1 -3 -2 5 -1 -3 -1 0 -1 -3 -2 -2 0 1 -1 -4 -4
- -1 -1 -2 -3 -1 0 -2 -3 -2 1 2 -1 5 0 -2 -1 -1 -1 -1 1 -3 -1 -1 -4 -4
- -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1 -3 -3 -1 -4 -4
- -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2 -2 -1 -2 -4 -4
- 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2 0 0 0 -4 -4
- 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 5 -2 -2 0 -1 -1 0 -4 -4
- -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3 -4 -3 -2 -4 -4
- -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1 -3 -2 -1 -4 -4
- 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 -3 -2 -1 -4 -4
- -2 -1 3 4 -3 0 1 -1 0 -3 -4 0 -3 -3 -2 0 -1 -4 -3 -3 4 1 -1 -4 -4
- -1 0 0 1 -3 3 4 -2 0 -3 -3 1 -1 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4 -4
- 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 0 0 -2 -1 -1 -1 -1 -1 -4 -4
- -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 1 1
+A 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0 -4 -4
+R -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 -1 0 -1 -4 -4
+N -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 3 0 -1 -4 -4
+D -2 -2 1 6 -3 0 2 -1 -1 -3 -4 -1 -3 -3 -1 0 -1 -4 -3 -3 4 1 -1 -4 -4
+C 0 3 -3 -3 9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -4 -4
+Q -1 1 0 0 -3 5 2 -2 0 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2 0 3 -1 -4 -4
+E -1 0 0 2 -4 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4 -4
+G 0 -2 0 -1 -3 -2 -2 6 -2 -4 -4 -2 -3 -3 -2 0 -2 -2 -3 -3 -1 -2 -1 -4 -4
+H -2 0 1 -1 -3 0 0 -2 8 -3 -3 -1 -2 -1 -2 -1 -2 -2 2 -3 0 0 -1 -4 -4
+I -1 -3 -3 -3 -1 -3 -3 -4 -3 4 2 -3 1 0 -3 -2 -1 -3 -1 3 -3 -3 -1 -4 -4
+L -1 -2 -3 -4 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1 -4 -3 -1 -4 -4
+K -1 2 0 -1 -3 1 1 -2 -1 -3 -2 5 -1 -3 -1 0 -1 -3 -2 -2 0 1 -1 -4 -4
+M -1 -1 -2 -3 -1 0 -2 -3 -2 1 2 -1 5 0 -2 -1 -1 -1 -1 1 -3 -1 -1 -4 -4
+F -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1 -3 -3 -1 -4 -4
+P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2 -2 -1 -2 -4 -4
+S 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2 0 0 0 -4 -4
+T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 5 -2 -2 0 -1 -1 0 -4 -4
+W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3 -4 -3 -2 -4 -4
+Y -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1 -3 -2 -1 -4 -4
+V 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 -3 -2 -1 -4 -4
+B -2 -1 3 4 -3 0 1 -1 0 -3 -4 0 -3 -3 -2 0 -1 -4 -3 -3 4 1 -1 -4 -4
+Z -1 0 0 1 -3 3 4 -2 0 -3 -3 1 -1 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4 -4
+X 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 0 0 -2 -1 -1 -1 -1 -1 -4 -4
-4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 1 1
+* -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 1 1
#
# A R N D C Q E G H I L K M F P S T W Y V B Z X *
# Comment line with symbols is provided as a guide
# Values may be integer or floating point, delimited by tab, space, comma or combinations
#
-# A R N D C Q E G H I L K M F P S T W Y V B Z X *
+# A R N D C Q E G H I L K M F P S T W Y V B Z X *
#
2 -2 0 0 -2 0 0 1 -1 -1 -2 -1 -1 -3 1 1 1 -6 -3 0 0 0 0 -8 -8
-2 6 0 -1 -4 1 -1 -3 2 -2 -3 3 0 -4 0 0 -1 2 -4 -2 -1 0 -1 -8 -8
* Parse a score matrix from the given input stream and returns a ScoreMatrix
* object. If parsing fails, error messages are written to syserr and null is
* returned. It is the caller's responsibility to close the input stream.
+ * Expected format:
+ *
+ * <pre>
+ * ScoreMatrix displayName
+ * # comment lines begin with hash sign
+ * # symbol alphabet should be the next non-comment line
+ * ARNDCQEGHILKMFPSTWYVBZX *
+ * # scores matrix, with space, comma or tab delimited values
+ * # [i, j] = score for substituting symbol[i] with symbol[j]
+ * # first column in each row is optionally the 'substituted' symbol
+ * A 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0 -4 -4
+ * ..etc..
+ * </pre>
*
* @param is
* @return
/*
* subsequent lines should be the symbol scores
+ * optionally with the symbol as the first column for readability
*/
StringTokenizer scoreLine = new StringTokenizer(data, DELIMITERS);
+ if (scoreLine.countTokens() == size + 1)
+ {
+ /*
+ * check 'guide' symbol is the row'th letter of the alphabet
+ */
+ String symbol = scoreLine.nextToken();
+ if (symbol.length() > 1
+ || symbol.charAt(0) != alphabet.charAt(row))
+ {
+ System.err
+ .println(String
+ .format("Error parsing score matrix at line %d, expected %s but found %s",
+ lineNo, alphabet.charAt(row), symbol));
+ return null;
+ }
+ }
if (scoreLine.countTokens() != size)
{
System.err.println(String.format(
- "Expected %d tokens at line %d but found %d", size,
+ "Expected %d scores at line %d but found %d", size,
lineNo, scoreLine.countTokens()));
return null;
}
scores[row] = new float[size];
int col = 0;
String value = null;
- while (scoreLine.hasMoreTokens()) {
- try {
+ while (scoreLine.hasMoreTokens())
+ {
+ try
+ {
value = scoreLine.nextToken();
scores[row][col] = Float.valueOf(value);
col++;
/*
* some messy but valid input data, with comma, space
* or tab (or combinations) as score value delimiters
+ * this example includes 'guide' symbols on score rows
*/
String data = "ScoreMatrix MyTest\n" + "ATU tx-\n"
- + "1.1,1.2,1.3,1.4, 1.5, 1.6, 1.7\n"
- + "2.1 2.2 2.3 2.4 2.5 2.6 2.7\n"
- + "3.1\t3.2\t3.3\t3.4\t3.5\t3.6\t3.7\n"
+ + "A,1.1,1.2,1.3,1.4, 1.5, 1.6, 1.7\n"
+ + "T,2.1 2.2 2.3 2.4 2.5 2.6 2.7\n"
+ + "U\t3.1\t3.2\t3.3\t3.4\t3.5\t3.6\t3.7\n"
+ " 4.1 ,4.2,\t,4.3 ,\t4.4\t, \4.5,4.6 4.7\n"
- + ", 5.1,5.3,5.3,5.4,5.5, 5.6, 5.7\n"
- + "\t6.1, 6.2 6.3 6.4 6.5 6.6 6.7\n"
- + ", \t7.1\t7.2 7.3, 7.4, 7.5\t,7.6,7.7\n";
+ + "t, 5.1,5.3,5.3,5.4,5.5, 5.6, 5.7\n"
+ + "x\t6.1, 6.2 6.3 6.4 6.5 6.6 6.7\n"
+ + "-, \t7.1\t7.2 7.3, 7.4, 7.5\t,7.6,7.7\n";
ScoreMatrix sm = ScoreMatrix.parse(new ByteArrayInputStream(data
.getBytes()));
assertNotNull(sm);
assertEquals(sm.getPairwiseScore('A', '.'), 0f); // . unmapped
assertEquals(sm.getPairwiseScore('-', '-'), 7.7f);
assertEquals(sm.getPairwiseScore('A', (char) 128), 0f); // out of range
+
+ /*
+ * without guide symbols on score rows
+ */
+ data = "ScoreMatrix MyTest\nXY\n1 2\n3 4\n";
+ sm = ScoreMatrix.parse(new ByteArrayInputStream(data.getBytes()));
+ assertNotNull(sm);
+ assertEquals(sm.getPairwiseScore('X', 'X'), 1f);
+ assertEquals(sm.getPairwiseScore('X', 'y'), 2f);
+ assertEquals(sm.getPairwiseScore('y', 'x'), 3f);
+ assertEquals(sm.getPairwiseScore('y', 'Y'), 4f);
+ assertEquals(sm.getPairwiseScore('D', 'R'), 0f);
}
@Test(groups = "Functional")
sm = ScoreMatrix.parse(new ByteArrayInputStream(data.getBytes()));
assertNull(sm);
+ /*
+ * Bad guide character on scores row
+ */
+ data = "ScoreMatrix MyTest\nXY\nX 1 2\ny 3 4\n";
+ sm = ScoreMatrix.parse(new ByteArrayInputStream(data.getBytes()));
+ assertNull(sm);
}
}
--- /dev/null
+package jalview.analysis.scoremodels;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
+
+import jalview.api.analysis.ScoreModelI;
+
+import java.util.Iterator;
+
+import org.testng.annotations.Test;
+
+public class ScoreModelsTest
+{
+ /**
+ * Verify that the singleton constructor successfully loads Jalview's built-in
+ * score models
+ */
+ @Test
+ public void testConstructor()
+ {
+ Iterator<ScoreModelI> models = ScoreModels.getInstance().getModels()
+ .iterator();
+ assertTrue(models.hasNext());
+
+ /*
+ * models are served in alphabetical order of name
+ * it so happens the 3 ScoreMatrix models precede the two
+ * others
+ */
+ ScoreModelI sm = models.next();
+ assertTrue(sm instanceof ScoreMatrix);
+ assertEquals(sm.getName(), "BLOSUM62");
+ assertEquals(((ScoreMatrix) sm).getPairwiseScore('I', 'R'), -3f);
+
+ sm = models.next();
+ assertTrue(sm instanceof ScoreMatrix);
+ assertEquals(sm.getName(), "DNA");
+ assertEquals(((ScoreMatrix) sm).getPairwiseScore('c', 'x'), 1f);
+
+ sm = models.next();
+ assertTrue(sm instanceof ScoreMatrix);
+ assertEquals(sm.getName(), "PAM250");
+ assertEquals(((ScoreMatrix) sm).getPairwiseScore('R', 'C'), -4f);
+
+ sm = models.next();
+ assertFalse(sm instanceof ScoreMatrix);
+ assertEquals(sm.getName(), "PID");
+
+ sm = models.next();
+ assertFalse(sm instanceof ScoreMatrix);
+ assertEquals(sm.getName(), "Sequence Feature Similarity");
+ }
+}