ScoreMatrix BLOSUM62
-ARNDCQEGHILKMFPSTWYVBZX *
+ARNDCQEGHILKMFPSTWYVBZX-*
#
# The BLOSUM62 substitution matrix, as at https://www.ncbi.nlm.nih.gov/Class/FieldGuide/BLOSUM62.txt
# The first line declares a ScoreMatrix with the name BLOSUM62 (shown in menus)
-# The second line gives the symbols for which scores are held in the matrix
-# These may include a space (but not as the first or last character)
#
# Scores are not symbol case sensitive, unless column(s) are provided for lower case characters
# The 'guide symbol' at the start of each row of score values is optional
-#
-# Header line with symbols may be provided as a guide
# Values may be integer or floating point, delimited by tab, space, comma or combinations
#
- A R N D C Q E G H I L K M F P S T W Y V B Z X *
+ A R N D C Q E G H I L K M F P S T W Y V B Z X - *
#
A 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0 -4 -4
R -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 -1 0 -1 -4 -4
B -2 -1 3 4 -3 0 1 -1 0 -3 -4 0 -3 -3 -2 0 -1 -4 -3 -3 4 1 -1 -4 -4
Z -1 0 0 1 -3 3 4 -2 0 -3 -3 1 -1 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4 -4
X 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 0 0 -2 -1 -1 -1 -1 -1 -4 -4
- -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 1 1
+- -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 1 1
* -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 1 1
#
# A R N D C Q E G H I L K M F P S T W Y V B Z X *
ScoreMatrix DNA
-ACGTUIXRYN -
+ACGTUIXRYN-
#
# A DNA substitution matrix.
# This is an ad-hoc matrix which, in addition to penalising mutations between the common
# any of (ACGTU), and unfavourably match each other.
#
# The first line declares a ScoreMatrix with the name DNA (shown in menus)
-# The second line gives the symbols for which scores are held in the matrix
-# These may include a space (but not as the first or last character)
# Scores are not case sensitive, unless column(s) are provided for lower case characters
#
-#
-# Header line with symbols is provided as a guide
# Values may be integer or floating point, delimited by tab, space, comma or combinations
#
- A C G T U I X R Y N -
- 10 -8 -8 -8 -8 1 1 1 -8 1 1 1
- -8 10 -8 -8 -8 1 1 -8 1 1 1 1
- -8 -8 10 -8 -8 1 1 1 -8 1 1 1
- -8 -8 -8 10 10 1 1 -8 1 1 1 1
- -8 -8 -8 10 10 1 1 -8 1 1 1 1
- 1 1 1 1 1 10 0 0 0 1 1 1
- 1 1 1 1 1 0 10 0 0 1 1 1
- 1 -8 1 -8 -8 0 0 10 -8 1 1 1
- -8 1 -8 1 1 0 0 -8 10 1 1 1
- 1 1 1 1 1 1 1 1 1 10 1 1
- 1 1 1 1 1 1 1 1 1 1 1 1
- 1 1 1 1 1 1 1 1 1 1 1 1
+ A C G T U I X R Y N -
+A 10 -8 -8 -8 -8 1 1 1 -8 1 1
+C -8 10 -8 -8 -8 1 1 -8 1 1 1
+G -8 -8 10 -8 -8 1 1 1 -8 1 1
+T -8 -8 -8 10 10 1 1 -8 1 1 1
+U -8 -8 -8 10 10 1 1 -8 1 1 1
+I 1 1 1 1 1 10 0 0 0 1 1
+X 1 1 1 1 1 0 10 0 0 1 1
+R 1 -8 1 -8 -8 0 0 10 -8 1 1
+Y -8 1 -8 1 1 0 0 -8 10 1 1
+N 1 1 1 1 1 1 1 1 1 10 1
+- 1 1 1 1 1 1 1 1 1 1 1
#
# A C G T U I X R Y N -
ScoreMatrix PAM250
-ARNDCQEGHILKMFPSTWYVBZX *
+ARNDCQEGHILKMFPSTWYVBZX-*
#
# The PAM250 substitution matrix
# The first line declares a ScoreMatrix with the name PAM250 (shown in menus)
-# The second line gives the symbols for which scores are held in the matrix
-# These may include a space (but not as the first or last character)
# Scores are not case sensitive, unless column(s) are provided for lower case characters
-#
-#
-# Header line with symbols is provided as a guide
# Values may be integer or floating point, delimited by tab, space, comma or combinations
#
-# A R N D C Q E G H I L K M F P S T W Y V B Z X *
- 2 -2 0 0 -2 0 0 1 -1 -1 -2 -1 -1 -3 1 1 1 -6 -3 0 0 0 0 -8 -8
- -2 6 0 -1 -4 1 -1 -3 2 -2 -3 3 0 -4 0 0 -1 2 -4 -2 -1 0 -1 -8 -8
- 0 0 2 2 -4 1 1 0 2 -2 -3 1 -2 -3 0 1 0 -4 -2 -2 2 1 0 -8 -8
- 0 -1 2 4 -5 2 3 1 1 -2 -4 0 -3 -6 -1 0 0 -7 -4 -2 3 3 -1 -8 -8
- -2 -4 -4 -5 12 -5 -5 -3 -3 -2 -6 -5 -5 -4 -3 0 -2 -8 0 -2 -4 -5 -3 -8 -8
- 0 1 1 2 -5 4 2 -1 3 -2 -2 1 -1 -5 0 -1 -1 -5 -4 -2 1 3 -1 -8 -8
- 0 -1 1 3 -5 2 4 0 1 -2 -3 0 -2 -5 -1 0 0 -7 -4 -2 3 3 -1 -8 -8
- 1 -3 0 1 -3 -1 0 5 -2 -3 -4 -2 -3 -5 0 1 0 -7 -5 -1 0 0 -1 -8 -8
- -1 2 2 1 -3 3 1 -2 6 -2 -2 0 -2 -2 0 -1 -1 -3 0 -2 1 2 -1 -8 -8
- -1 -2 -2 -2 -2 -2 -2 -3 -2 5 2 -2 2 1 -2 -1 0 -5 -1 4 -2 -2 -1 -8 -8
- -2 -3 -3 -4 -6 -2 -3 -4 -2 2 6 -3 4 2 -3 -3 -2 -2 -1 2 -3 -3 -1 -8 -8
- -1 3 1 0 -5 1 0 -2 0 -2 -3 5 0 -5 -1 0 0 -3 -4 -2 1 0 -1 -8 -8
- -1 0 -2 -3 -5 -1 -2 -3 -2 2 4 0 6 0 -2 -2 -1 -4 -2 2 -2 -2 -1 -8 -8
- -3 -4 -3 -6 -4 -5 -5 -5 -2 1 2 -5 0 9 -5 -3 -3 0 7 -1 -4 -5 -2 -8 -8
- 1 0 0 -1 -3 0 -1 0 0 -2 -3 -1 -2 -5 6 1 0 -6 -5 -1 -1 0 -1 -8 -8
- 1 0 1 0 0 -1 0 1 -1 -1 -3 0 -2 -3 1 2 1 -2 -3 -1 0 0 0 -8 -8
- 1 -1 0 0 -2 -1 0 0 -1 0 -2 0 -1 -3 0 1 3 -5 -3 0 0 -1 0 -8 -8
- -6 2 -4 -7 -8 -5 -7 -7 -3 -5 -2 -3 -4 0 -6 -2 -5 17 0 -6 -5 -6 -4 -8 -8
- -3 -4 -2 -4 0 -4 -4 -5 0 -1 -1 -4 -2 7 -5 -3 -3 0 10 -2 -3 -4 -2 -8 -8
- 0 -2 -2 -2 -2 -2 -2 -1 -2 4 2 -2 2 -1 -1 -1 0 -6 -2 4 -2 -2 -1 -8 -8
- 0 -1 2 3 -4 1 3 0 1 -2 -3 1 -2 -4 -1 0 0 -5 -3 -2 3 2 -1 -8 -8
- 0 0 1 3 -5 3 3 0 2 -2 -3 0 -2 -5 0 0 -1 -6 -4 -2 2 3 -1 -8 -8
- 0 -1 0 -1 -3 -1 -1 -1 -1 -1 -1 -1 -1 -2 -1 0 0 -4 -2 -1 -1 -1 -1 -8 -8
- -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 1 1
- -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 1 1
+# A R N D C Q E G H I L K M F P S T W Y V B Z X - *
+A 2 -2 0 0 -2 0 0 1 -1 -1 -2 -1 -1 -3 1 1 1 -6 -3 0 0 0 0 -8 -8
+R -2 6 0 -1 -4 1 -1 -3 2 -2 -3 3 0 -4 0 0 -1 2 -4 -2 -1 0 -1 -8 -8
+N 0 0 2 2 -4 1 1 0 2 -2 -3 1 -2 -3 0 1 0 -4 -2 -2 2 1 0 -8 -8
+D 0 -1 2 4 -5 2 3 1 1 -2 -4 0 -3 -6 -1 0 0 -7 -4 -2 3 3 -1 -8 -8
+C -2 -4 -4 -5 12 -5 -5 -3 -3 -2 -6 -5 -5 -4 -3 0 -2 -8 0 -2 -4 -5 -3 -8 -8
+Q 0 1 1 2 -5 4 2 -1 3 -2 -2 1 -1 -5 0 -1 -1 -5 -4 -2 1 3 -1 -8 -8
+E 0 -1 1 3 -5 2 4 0 1 -2 -3 0 -2 -5 -1 0 0 -7 -4 -2 3 3 -1 -8 -8
+G 1 -3 0 1 -3 -1 0 5 -2 -3 -4 -2 -3 -5 0 1 0 -7 -5 -1 0 0 -1 -8 -8
+H -1 2 2 1 -3 3 1 -2 6 -2 -2 0 -2 -2 0 -1 -1 -3 0 -2 1 2 -1 -8 -8
+I -1 -2 -2 -2 -2 -2 -2 -3 -2 5 2 -2 2 1 -2 -1 0 -5 -1 4 -2 -2 -1 -8 -8
+L -2 -3 -3 -4 -6 -2 -3 -4 -2 2 6 -3 4 2 -3 -3 -2 -2 -1 2 -3 -3 -1 -8 -8
+K -1 3 1 0 -5 1 0 -2 0 -2 -3 5 0 -5 -1 0 0 -3 -4 -2 1 0 -1 -8 -8
+M -1 0 -2 -3 -5 -1 -2 -3 -2 2 4 0 6 0 -2 -2 -1 -4 -2 2 -2 -2 -1 -8 -8
+F -3 -4 -3 -6 -4 -5 -5 -5 -2 1 2 -5 0 9 -5 -3 -3 0 7 -1 -4 -5 -2 -8 -8
+P 1 0 0 -1 -3 0 -1 0 0 -2 -3 -1 -2 -5 6 1 0 -6 -5 -1 -1 0 -1 -8 -8
+S 1 0 1 0 0 -1 0 1 -1 -1 -3 0 -2 -3 1 2 1 -2 -3 -1 0 0 0 -8 -8
+T 1 -1 0 0 -2 -1 0 0 -1 0 -2 0 -1 -3 0 1 3 -5 -3 0 0 -1 0 -8 -8
+W -6 2 -4 -7 -8 -5 -7 -7 -3 -5 -2 -3 -4 0 -6 -2 -5 17 0 -6 -5 -6 -4 -8 -8
+Y -3 -4 -2 -4 0 -4 -4 -5 0 -1 -1 -4 -2 7 -5 -3 -3 0 10 -2 -3 -4 -2 -8 -8
+V 0 -2 -2 -2 -2 -2 -2 -1 -2 4 2 -2 2 -1 -1 -1 0 -6 -2 4 -2 -2 -1 -8 -8
+B 0 -1 2 3 -4 1 3 0 1 -2 -3 1 -2 -4 -1 0 0 -5 -3 -2 3 2 -1 -8 -8
+Z 0 0 1 3 -5 3 3 0 2 -2 -3 0 -2 -5 0 0 -1 -6 -4 -2 2 3 -1 -8 -8
+X 0 -1 0 -1 -3 -1 -1 -1 -1 -1 -1 -1 -1 -2 -1 0 0 -4 -2 -1 -1 -1 -1 -8 -8
+- -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 1 1
+* -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 1 1
#
# A R N D C Q E G H I L K M F P S T W Y V B Z X *
public MatrixI findSimilarities(AlignmentView seqData,
SimilarityParamsI options)
{
- String[] seqs = seqData.getSequenceStrings(' ');
+ String[] seqs = seqData.getSequenceStrings(Comparison.GAP_DASH);
return findSimilarities(seqs, options);
}
public class ScoreMatrix implements SimilarityScoreModelI,
PairwiseScoreModelI
{
+ private static final char GAP_CHARACTER = Comparison.GAP_DASH;
+
/*
* Jalview 2.10.1 treated gaps as X (peptide) or N (nucleotide)
* for pairwise scoring; 2.10.2 uses gap score (last column) in
public MatrixI findSimilarities(AlignmentView seqstrings,
SimilarityParamsI options)
{
- char gapChar = scoreGapAsAny ? (seqstrings.isNa() ? 'N' : 'X') : ' ';
+ char gapChar = scoreGapAsAny ? (seqstrings.isNa() ? 'N' : 'X')
+ : Comparison.GAP_DASH;
String[] seqs = seqstrings.getSequenceStrings(gapChar);
return findSimilarities(seqs, options);
}
break;
}
}
- // Change GAP_SPACE to GAP_DASH if we adopt - for gap in matrices
- char c1 = i >= len1 ? Comparison.GAP_SPACE : seq1.charAt(i);
- char c2 = i >= len2 ? Comparison.GAP_SPACE : seq2.charAt(i);
+
+ char c1 = i >= len1 ? GAP_CHARACTER : seq1.charAt(i);
+ char c2 = i >= len2 ? GAP_CHARACTER : seq2.charAt(i);
boolean gap1 = Comparison.isGap(c1);
boolean gap2 = Comparison.isGap(c2);
assertEquals(sm.getMatrixIndex('D'), 3);
assertEquals(sm.getMatrixIndex('X'), 22);
assertEquals(sm.getMatrixIndex('x'), 22);
- assertEquals(sm.getMatrixIndex(' '), 23);
+ assertEquals(sm.getMatrixIndex('-'), 23);
assertEquals(sm.getMatrixIndex('*'), 24);
assertEquals(sm.getMatrixIndex('.'), -1);
- assertEquals(sm.getMatrixIndex('-'), -1);
+ assertEquals(sm.getMatrixIndex(' '), -1);
assertEquals(sm.getMatrixIndex('?'), -1);
assertEquals(sm.getMatrixIndex((char) 128), -1);
}
public void testComputePairwiseScores()
{
/*
- * NB score matrix assumes space for gap - Jalview converts
- * space to gap before computing PCA or Tree
+ * NB score matrix expects '-' for gap
*/
- String[] seqs = new String[] { "FKL", "R D", "QIA", "GWC" };
+ String[] seqs = new String[] { "FKL", "R-D", "QIA", "GWC" };
ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
MatrixI pairwise = sm.findSimilarities(seqs, SimilarityParams.Jalview);
@Test(groups = "Functional")
public void testcomputeSimilarity_matchLongestSequence()
{
- // TODO params.matchGaps() is not used for ScoreMatrix
- // - includeGaps is sufficient (there is no denominator)
- // ==> bespoke parameters only 3 booleans?
/*
- * for now, using space for gap to match callers of
- * AlignmentView.getSequenceStrings()
- * may change this to '-' (with corresponding change to matrices)
+ * ScoreMatrix expects '-' for gaps
*/
- String s1 = "FR K S";
- String s2 = "FS L";
+ String s1 = "FR-K-S";
+ String s2 = "FS--L";
ScoreMatrix blosum = ScoreModels.getInstance().getBlosum62();
/*
@Test(groups = "Functional")
public void testcomputeSimilarity_matchShortestSequence()
{
- // TODO params.matchGaps() is not used for ScoreMatrix
- // - includeGaps is sufficient (there is no denominator)
- // ==> bespoke parameters only 3 booleans?
/*
- * for now, using space for gap to match callers of
- * AlignmentView.getSequenceStrings()
- * may change this to '-' (with corresponding change to matrices)
+ * ScoreMatrix expects '-' for gaps
*/
- String s1 = "FR K S";
- String s2 = "FS L";
+ String s1 = "FR-K-S";
+ String s2 = "FS--L";
ScoreMatrix blosum = ScoreModels.getInstance().getBlosum62();
/*