label.principal_component_analysis = Principal Component Analysis
label.average_distance_identity = Average Distance Using % Identity
label.neighbour_joining_identity = Neighbour Joining Using % Identity
+label.choose_tree = Choose Tree Calculation
label.treecalc_title = {0} Using {1}
label.tree_calc_av = Average Distance
label.tree_calc_nj = Neighbour Joining
label.principal_component_analysis = Análisis del Componente Principal
label.average_distance_identity = Distancia Media Usando % de Identidad
label.neighbour_joining_identity = Unir vecinos utilizando % de Identidad
+label.choose_tree = Elegir el cálculo del árbol
label.treecalc_title = {0} utilizando {1}
label.tree_calc_av = Distancia media
label.tree_calc_nj = Unir vecinos
--- /dev/null
+ScoreMatrix BLOSUM62
+ARNDCQEGHILKMFPSTWYVBZX *
+#
+# The BLOSUM62 substitution matrix, as at https://www.ncbi.nlm.nih.gov/Class/FieldGuide/BLOSUM62.txt
+# The first line declares a ScoreMatrix with the name BLOSUM62 (shown in menus)
+# The second line gives the symbols for which scores are held in the matrix
+# These may include a space (but not as the first or last character)
+# Scores are not case sensitive, unless column(s) are provided for lower case characters
+#
+#
+# Comment line with symbols is provided as a guide
+# Values may be integer or floating point, delimited by tab, space, comma or combinations
+#
+# A R N D C Q E G H I L K M F P S T W Y V B Z X *
+#
+ 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0 -4 -4
+ -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 -1 0 -1 -4 -4
+ -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 3 0 -1 -4 -4
+ -2 -2 1 6 -3 0 2 -1 -1 -3 -4 -1 -3 -3 -1 0 -1 -4 -3 -3 4 1 -1 -4 -4
+ 0 3 -3 -3 9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -4 -4
+ -1 1 0 0 -3 5 2 -2 0 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2 0 3 -1 -4 -4
+ -1 0 0 2 -4 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4 -4
+ 0 -2 0 -1 -3 -2 -2 6 -2 -4 -4 -2 -3 -3 -2 0 -2 -2 -3 -3 -1 -2 -1 -4 -4
+ -2 0 1 -1 -3 0 0 -2 8 -3 -3 -1 -2 -1 -2 -1 -2 -2 2 -3 0 0 -1 -4 -4
+ -1 -3 -3 -3 -1 -3 -3 -4 -3 4 2 -3 1 0 -3 -2 -1 -3 -1 3 -3 -3 -1 -4 -4
+ -1 -2 -3 -4 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1 -4 -3 -1 -4 -4
+ -1 2 0 -1 -3 1 1 -2 -1 -3 -2 5 -1 -3 -1 0 -1 -3 -2 -2 0 1 -1 -4 -4
+ -1 -1 -2 -3 -1 0 -2 -3 -2 1 2 -1 5 0 -2 -1 -1 -1 -1 1 -3 -1 -1 -4 -4
+ -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1 -3 -3 -1 -4 -4
+ -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2 -2 -1 -2 -4 -4
+ 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2 0 0 0 -4 -4
+ 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 5 -2 -2 0 -1 -1 0 -4 -4
+ -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3 -4 -3 -2 -4 -4
+ -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1 -3 -2 -1 -4 -4
+ 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 -3 -2 -1 -4 -4
+ -2 -1 3 4 -3 0 1 -1 0 -3 -4 0 -3 -3 -2 0 -1 -4 -3 -3 4 1 -1 -4 -4
+ -1 0 0 1 -3 3 4 -2 0 -3 -3 1 -1 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4 -4
+ 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 0 0 -2 -1 -1 -1 -1 -1 -4 -4
+ -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 1 1
+ -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 1 1
+#
+# A R N D C Q E G H I L K M F P S T W Y V B Z X *
--- /dev/null
+ScoreMatrix DNA
+ACGTUIXRYN -
+#
+# A DNA substitution matrix.
+# This is an ad-hoc matrix which, in addition to penalising mutations between the common
+# nucleotides (ACGT), includes T/U equivalence in order to allow both DNA and/or RNA.
+# In addition, it encodes weak equivalence between R and Y with AG and CTU, respectively,
+# and N is allowed to match any other base weakly.
+# This matrix also includes I (Inosine) and X (Xanthine), but encodes them to weakly match
+# any of (ACGTU), and unfavourably match each other.
+#
+# The first line declares a ScoreMatrix with the name DNA (shown in menus)
+# The second line gives the symbols for which scores are held in the matrix
+# These may include a space (but not as the first or last character)
+# Scores are not case sensitive, unless column(s) are provided for lower case characters
+#
+#
+# Comment line with symbols is provided as a guide
+# Values may be integer or floating point, delimited by tab, space, comma or combinations
+#
+# A C G T U I X R Y N -
+#
+ 10 -8 -8 -8 -8 1 1 1 -8 1 1 1
+ -8 10 -8 -8 -8 1 1 -8 1 1 1 1
+ -8 -8 10 -8 -8 1 1 1 -8 1 1 1
+ -8 -8 -8 10 10 1 1 -8 1 1 1 1
+ -8 -8 -8 10 10 1 1 -8 1 1 1 1
+ 1 1 1 1 1 10 0 0 0 1 1 1
+ 1 1 1 1 1 0 10 0 0 1 1 1
+ 1 -8 1 -8 -8 0 0 10 -8 1 1 1
+ -8 1 -8 1 1 0 0 -8 10 1 1 1
+ 1 1 1 1 1 1 1 1 1 10 1 1
+ 1 1 1 1 1 1 1 1 1 1 1 1
+ 1 1 1 1 1 1 1 1 1 1 1 1
+#
+# A C G T U I X R Y N -
--- /dev/null
+ScoreMatrix PAM250
+ARNDCQEGHILKMFPSTWYVBZX *
+#
+# The PAM250 substitution matrix
+# The first line declares a ScoreMatrix with the name PAM250 (shown in menus)
+# The second line gives the symbols for which scores are held in the matrix
+# These may include a space (but not as the first or last character)
+# Scores are not case sensitive, unless column(s) are provided for lower case characters
+#
+#
+# Comment line with symbols is provided as a guide
+# Values may be integer or floating point, delimited by tab, space, comma or combinations
+#
+# A R N D C Q E G H I L K M F P S T W Y V B Z X *
+#
+ 2 -2 0 0 -2 0 0 1 -1 -1 -2 -1 -1 -3 1 1 1 -6 -3 0 0 0 0 -8 -8
+ -2 6 0 -1 -4 1 -1 -3 2 -2 -3 3 0 -4 0 0 -1 2 -4 -2 -1 0 -1 -8 -8
+ 0 0 2 2 -4 1 1 0 2 -2 -3 1 -2 -3 0 1 0 -4 -2 -2 2 1 0 -8 -8
+ 0 -1 2 4 -5 2 3 1 1 -2 -4 0 -3 -6 -1 0 0 -7 -4 -2 3 3 -1 -8 -8
+ -2 -4 -4 -5 12 -5 -5 -3 -3 -2 -6 -5 -5 -4 -3 0 -2 -8 0 -2 -4 -5 -3 -8 -8
+ 0 1 1 2 -5 4 2 -1 3 -2 -2 1 -1 -5 0 -1 -1 -5 -4 -2 1 3 -1 -8 -8
+ 0 -1 1 3 -5 2 4 0 1 -2 -3 0 -2 -5 -1 0 0 -7 -4 -2 3 3 -1 -8 -8
+ 1 -3 0 1 -3 -1 0 5 -2 -3 -4 -2 -3 -5 0 1 0 -7 -5 -1 0 0 -1 -8 -8
+ -1 2 2 1 -3 3 1 -2 6 -2 -2 0 -2 -2 0 -1 -1 -3 0 -2 1 2 -1 -8 -8
+ -1 -2 -2 -2 -2 -2 -2 -3 -2 5 2 -2 2 1 -2 -1 0 -5 -1 4 -2 -2 -1 -8 -8
+ -2 -3 -3 -4 -6 -2 -3 -4 -2 2 6 -3 4 2 -3 -3 -2 -2 -1 2 -3 -3 -1 -8 -8
+ -1 3 1 0 -5 1 0 -2 0 -2 -3 5 0 -5 -1 0 0 -3 -4 -2 1 0 -1 -8 -8
+ -1 0 -2 -3 -5 -1 -2 -3 -2 2 4 0 6 0 -2 -2 -1 -4 -2 2 -2 -2 -1 -8 -8
+ -3 -4 -3 -6 -4 -5 -5 -5 -2 1 2 -5 0 9 -5 -3 -3 0 7 -1 -4 -5 -2 -8 -8
+ 1 0 0 -1 -3 0 -1 0 0 -2 -3 -1 -2 -5 6 1 0 -6 -5 -1 -1 0 -1 -8 -8
+ 1 0 1 0 0 -1 0 1 -1 -1 -3 0 -2 -3 1 2 1 -2 -3 -1 0 0 0 -8 -8
+ 1 -1 0 0 -2 -1 0 0 -1 0 -2 0 -1 -3 0 1 3 -5 -3 0 0 -1 0 -8 -8
+ -6 2 -4 -7 -8 -5 -7 -7 -3 -5 -2 -3 -4 0 -6 -2 -5 17 0 -6 -5 -6 -4 -8 -8
+ -3 -4 -2 -4 0 -4 -4 -5 0 -1 -1 -4 -2 7 -5 -3 -3 0 10 -2 -3 -4 -2 -8 -8
+ 0 -2 -2 -2 -2 -2 -2 -1 -2 4 2 -2 2 -1 -1 -1 0 -6 -2 4 -2 -2 -1 -8 -8
+ 0 -1 2 3 -4 1 3 0 1 -2 -3 1 -2 -4 -1 0 0 -5 -3 -2 3 2 -1 -8 -8
+ 0 0 1 3 -5 3 3 0 2 -2 -3 0 -2 -5 0 0 -1 -6 -4 -2 2 3 -1 -8 -8
+ 0 -1 0 -1 -3 -1 -1 -1 -1 -1 -1 -1 -1 -2 -1 0 0 -4 -2 -1 -1 -1 -1 -8 -8
+ -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 1 1
+ -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 1 1
+#
+# A R N D C Q E G H I L K M F P S T W Y V B Z X *
colourBySequence();
- int max = -10;
+ float max = -10;
int maxchain = -1;
int pdbstart = 0;
int pdbend = 0;
colourBySequence();
- int max = -10;
+ float max = -10;
int maxchain = -1;
int pdbstart = 0;
int pdbend = 0;
static String[] pep = { "A", "R", "N", "D", "C", "Q", "E", "G", "H", "I",
"L", "K", "M", "F", "P", "S", "T", "W", "Y", "V", "B", "Z", "X", "-" };
- int[][] score;
+ float[][] score;
- int[][] E;
+ float[][] E;
- int[][] F;
+ float[][] F;
int[][] traceback;
int count;
/** DOCUMENT ME!! */
- public int maxscore;
+ public float maxscore;
float pid;
int gapExtend = 20;
- int[][] lookup = ResidueProperties.getBLOSUM62();
+ float[][] lookup = ResidueProperties.getBLOSUM62();
String[] intToStr = pep;
*
* @return DOCUMENT ME!
*/
- public int getMaxScore()
+ public float getMaxScore()
{
return maxscore;
}
seq2 = new int[s2str.length()];
// System.out.println("seq2 " + rt.freeMemory() + " " + rt.totalMemory());
- score = new int[s1str.length()][s2str.length()];
+ score = new float[s1str.length()][s2str.length()];
// System.out.println("score " + rt.freeMemory() + " " + rt.totalMemory());
- E = new int[s1str.length()][s2str.length()];
+ E = new float[s1str.length()][s2str.length()];
// System.out.println("E " + rt.freeMemory() + " " + rt.totalMemory());
- F = new int[s1str.length()][s2str.length()];
+ F = new float[s1str.length()][s2str.length()];
traceback = new int[s1str.length()][s2str.length()];
// System.out.println("F " + rt.freeMemory() + " " + rt.totalMemory());
public void traceAlignment()
{
// Find the maximum score along the rhs or bottom row
- int max = -9999;
+ float max = -9999;
for (int i = 0; i < seq1.length; i++)
{
public int findTrace(int i, int j)
{
int t = 0;
- int max = score[i - 1][j - 1] + (lookup[seq1[i]][seq2[j]] * 10);
+ float max = score[i - 1][j - 1] + (lookup[seq1[i]][seq2[j]] * 10);
if (F[i][j] > max)
{
/**
* DOCUMENT ME!
*
- * @param i1
+ * @param f1
* DOCUMENT ME!
- * @param i2
+ * @param f2
* DOCUMENT ME!
- * @param i3
+ * @param f3
* DOCUMENT ME!
*
* @return DOCUMENT ME!
*/
- public int max(int i1, int i2, int i3)
+ public float max(float f1, float f2, float f3)
{
- int max = i1;
+ float max = f1;
- if (i2 > i1)
+ if (f2 > f1)
{
- max = i2;
+ max = f2;
}
- if (i3 > max)
+ if (f3 > max)
{
- max = i3;
+ max = f3;
}
return max;
/**
* DOCUMENT ME!
*
- * @param i1
+ * @param f1
* DOCUMENT ME!
- * @param i2
+ * @param f2
* DOCUMENT ME!
*
* @return DOCUMENT ME!
*/
- public int max(int i1, int i2)
+ public float max(float f1, float f2)
{
- int max = i1;
+ float max = f1;
- if (i2 > i1)
+ if (f2 > f1)
{
- max = i2;
+ max = f2;
}
return max;
{
SequenceI bestm = null;
AlignSeq bestaseq = null;
- int bestscore = 0;
+ float bestscore = 0;
for (SequenceI msq : al.getSequences())
{
AlignSeq aseq = doGlobalNWAlignment(msq, sq, dnaOrProtein);
quality = new Vector<Double>();
double max = -10000;
- int[][] BLOSUM62 = ResidueProperties.getBLOSUM62();
+ float[][] BLOSUM62 = ResidueProperties.getBLOSUM62();
// Loop over columns // JBPNote Profiling info
// long ts = System.currentTimeMillis();
*/
package jalview.analysis;
+import jalview.analysis.scoremodels.ScoreModels;
import jalview.api.analysis.ScoreModelI;
import jalview.datamodel.AlignmentView;
import jalview.datamodel.BinaryNode;
import jalview.datamodel.SequenceI;
import jalview.datamodel.SequenceNode;
import jalview.io.NewickFile;
-import jalview.schemes.ResidueProperties;
import java.util.Enumeration;
import java.util.List;
if (sm == null && !(pwtype.equals("PID")))
{
- if (ResidueProperties.getScoreMatrix(pwtype) == null)
+ if (ScoreModels.getInstance().forName(pwtype) == null)
{
pwtype = "BLOSUM62";
}
if (_pwmatrix == null)
{
// Resolve substitution model
- _pwmatrix = ResidueProperties.getScoreModel(pwtype);
+ _pwmatrix = ScoreModels.getInstance().forName(pwtype);
if (_pwmatrix == null)
{
- _pwmatrix = ResidueProperties.getScoreMatrix("BLOSUM62");
+ _pwmatrix = ScoreModels.getInstance().forName("BLOSUM62");
}
}
dist = _pwmatrix.findDistances(seqData);
package jalview.analysis;
import jalview.analysis.scoremodels.ScoreMatrix;
+import jalview.analysis.scoremodels.ScoreModels;
import jalview.datamodel.BinarySequence;
import jalview.datamodel.BinarySequence.InvalidSequenceTypeException;
import jalview.math.Matrix;
-import jalview.schemes.ResidueProperties;
import java.io.PrintStream;
String sm = s_m;
if (sm != null)
{
- smtrx = ResidueProperties.getScoreMatrix(sm);
+ smtrx = (ScoreMatrix) ScoreModels.getInstance().forName(sm);
}
if (smtrx == null)
{
// either we were given a non-existent score matrix or a scoremodel that
// isn't based on a pairwise symbol score matrix
- smtrx = ResidueProperties.getScoreMatrix(sm = (nucleotides ? "DNA"
- : "BLOSUM62"));
+ smtrx = (ScoreMatrix) ScoreModels.getInstance().forName(
+ sm = (nucleotides ? "DNA" : "BLOSUM62"));
}
details.append("PCA calculation using " + sm
+ " sequence similarity matrix\n========\n\n");
public abstract class PairwiseSeqScoreModel implements ScoreModelI
{
- abstract public int getPairwiseScore(char c, char d);
+ abstract public float getPairwiseScore(char c, char d);
+ @Override
public float[][] findDistances(AlignmentView seqData)
{
String[] sequenceString = seqData
int noseqs = sequenceString.length;
float[][] distance = new float[noseqs][noseqs];
- int maxscore = 0;
+ float maxscore = 0;
int end = sequenceString[0].length();
for (int i = 0; i < (noseqs - 1); i++)
{
for (int j = i; j < noseqs; j++)
{
- int score = 0;
+ float score = 0;
for (int k = 0; k < end; k++)
{
}
}
- distance[i][j] = (float) score;
+ distance[i][j] = score;
if (score > maxscore)
{
{
for (int j = i; j < noseqs; j++)
{
- distance[i][j] = (float) maxscore - distance[i][j];
+ distance[i][j] = maxscore - distance[i][j];
distance[j][i] = distance[i][j];
}
}
return distance;
}
- abstract public int[][] getMatrix();
+ abstract public float[][] getMatrix();
}
package jalview.analysis.scoremodels;
import jalview.api.analysis.ScoreModelI;
-import jalview.schemes.ResidueProperties;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.Arrays;
+import java.util.StringTokenizer;
public class ScoreMatrix extends PairwiseSeqScoreModel implements
ScoreModelI
{
- String name;
+ public static final short UNMAPPED = (short) -1;
- @Override
- public String getName()
- {
- return name;
- }
+ private static final String DELIMITERS = " ,\t";
- /**
- * reference to integer score matrix
+ private static final String COMMENT_CHAR = "#";
+
+ private static final String BAD_ASCII_ERROR = "Unexpected character %s in getPairwiseScore";
+
+ private static final int MAX_ASCII = 127;
+
+ /*
+ * the name of the model as shown in menus
*/
- int[][] matrix;
+ private String name;
- /**
- * 0 for Protein Score matrix. 1 for dna score matrix
+ /*
+ * the characters that the model provides scores for
+ */
+ private char[] symbols;
+
+ /*
+ * the score matrix; both dimensions must equal the number of symbols
+ * matrix[i][j] is the substitution score for replacing symbols[i] with symbols[j]
+ */
+ private float[][] matrix;
+
+ /*
+ * quick lookup to convert from an ascii character value to the index
+ * of the corresponding symbol in the score matrix
*/
- int type;
+ private short[] symbolIndex;
+
+ /*
+ * true for Protein Score matrix, false for dna score matrix
+ */
+ private boolean peptide;
/**
+ * Constructor
*
* @param name
* Unique, human readable name for the matrix
+ * @param alphabet
+ * the symbols to which scores apply
* @param matrix
- * Pairwise scores indexed according to appropriate symbol alphabet
- * @param type
- * 0 for Protein, 1 for NA
+ * Pairwise scores indexed according to the symbol alphabet
*/
- public ScoreMatrix(String name, int[][] matrix, int type)
+ public ScoreMatrix(String name, char[] alphabet, float[][] matrix)
{
this.matrix = matrix;
- this.type = type;
this.name = name;
+ this.symbols = alphabet;
+
+ symbolIndex = buildSymbolIndex(alphabet);
+
+ /*
+ * crude heuristic for now...
+ */
+ peptide = alphabet.length >= 20;
+ }
+
+ /**
+ * Returns an array A where A[i] is the position in the alphabet array of the
+ * character whose value is i. For example if the alphabet is { 'A', 'D', 'X'
+ * } then A['D'] = A[68] = 1.
+ * <p>
+ * Unmapped characters (not in the alphabet) get an index of -1.
+ * <p>
+ * Mappings are added automatically for lower case symbols (for non case
+ * sensitive scoring), unless they are explicitly present in the alphabet (are
+ * scored separately in the score matrix).
+ *
+ * @param alphabet
+ * @return
+ */
+ static short[] buildSymbolIndex(char[] alphabet)
+ {
+ short[] index = new short[MAX_ASCII + 1];
+ Arrays.fill(index, UNMAPPED);
+ short pos = 0;
+ for (char c : alphabet)
+ {
+ if (c <= MAX_ASCII)
+ {
+ index[c] = pos;
+ }
+
+ /*
+ * also map lower-case character (unless separately mapped)
+ */
+ if (c >= 'A' && c <= 'Z')
+ {
+ short lowerCase = (short) (c + ('a' - 'A'));
+ if (index[lowerCase] == UNMAPPED)
+ {
+ index[lowerCase] = pos;
+ }
+ }
+ pos++;
+ }
+ return index;
+ }
+
+ @Override
+ public String getName()
+ {
+ return name;
}
@Override
public boolean isDNA()
{
- return type == 1;
+ return !peptide;
}
@Override
public boolean isProtein()
{
- return type == 0;
+ return peptide;
}
@Override
- public int[][] getMatrix()
+ public float[][] getMatrix()
{
return matrix;
}
/**
- *
- * @param A1
- * @param A2
- * @return score for substituting first char in A1 with first char in A2
+ * Returns the pairwise score for substituting c with d, or zero if c or d is
+ * an unscored or unexpected character
*/
- public int getPairwiseScore(String A1, String A2)
- {
- return getPairwiseScore(A1.charAt(0), A2.charAt(0));
- }
-
@Override
- public int getPairwiseScore(char c, char d)
+ public float getPairwiseScore(char c, char d)
{
- int pog = 0;
-
- try
+ if (c > MAX_ASCII)
{
- int a = (type == 0) ? ResidueProperties.aaIndex[c]
- : ResidueProperties.nucleotideIndex[c];
- int b = (type == 0) ? ResidueProperties.aaIndex[d]
- : ResidueProperties.nucleotideIndex[d];
-
- pog = matrix[a][b];
- } catch (Exception e)
+ System.err.println(String.format(BAD_ASCII_ERROR, c));
+ return 0;
+ }
+ if (d > MAX_ASCII)
{
- // System.out.println("Unknown residue in " + A1 + " " + A2);
+ System.err.println(String.format(BAD_ASCII_ERROR, d));
+ return 0;
}
- return pog;
+ int cIndex = symbolIndex[c];
+ int dIndex = symbolIndex[d];
+ if (cIndex != UNMAPPED && dIndex != UNMAPPED)
+ {
+ return matrix[cIndex][dIndex];
+ }
+ return 0;
}
/**
return outputMatrix(false);
}
+ /**
+ * Print the score matrix, optionally formatted as html, with the alphabet symbols as column headings and at the start of each row
+ * @param html
+ * @return
+ */
public String outputMatrix(boolean html)
{
- StringBuffer sb = new StringBuffer();
- int[] symbols = (type == 0) ? ResidueProperties.aaIndex
- : ResidueProperties.nucleotideIndex;
- int symMax = (type == 0) ? ResidueProperties.maxProteinIndex
- : ResidueProperties.maxNucleotideIndex;
- boolean header = true;
+ StringBuilder sb = new StringBuilder(512);
+
+ /*
+ * heading row with alphabet
+ */
if (html)
{
sb.append("<table border=\"1\">");
+ sb.append(html ? "<tr><th></th>" : "");
+ }
+ for (char sym : symbols)
+ {
+ if (html)
+ {
+ sb.append("<th> ").append(sym).append(" </th>");
+ }
+ else
+ {
+ sb.append("\t").append(sym);
+ }
+ }
+ sb.append(html ? "</tr>\n" : "\n");
+
+ /*
+ * table of scores
+ */
+ for (char c1 : symbols)
+ {
+ if (html)
+ {
+ sb.append("<tr><td>");
+ }
+ sb.append(c1).append(html ? "</td>" : "");
+ for (char c2 : symbols)
+ {
+ sb.append(html ? "<td>" : "\t")
+ .append(matrix[symbolIndex[c1]][symbolIndex[c2]])
+ .append(html ? "</td>" : "");
+ }
+ sb.append(html ? "</tr>\n" : "\n");
}
- for (char sym = 'A'; sym <= 'Z'; sym++)
+ if (html)
+ {
+ sb.append("</table>");
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Parse a score matrix from the given input stream and returns a ScoreMatrix
+ * object. If parsing fails, error messages are written to syserr and null is
+ * returned. It is the caller's responsibility to close the input stream.
+ *
+ * @param is
+ * @return
+ */
+ public static ScoreMatrix parse(InputStream is)
+ {
+ ScoreMatrix sm = null;
+ BufferedReader br = new BufferedReader(new InputStreamReader(is));
+ int lineNo = 0;
+ String name = null;
+ String alphabet = null;
+ float[][] scores = null;
+ int size = 0;
+ int row = 0;
+
+ try
{
- if (symbols[sym] >= 0 && symbols[sym] < symMax)
+ String data;
+
+ while ((data = br.readLine()) != null)
{
- if (header)
+ lineNo++;
+ data = data.trim();
+ if (data.startsWith(COMMENT_CHAR))
+ {
+ continue;
+ }
+ if (data.toLowerCase().startsWith("scorematrix"))
{
- sb.append(html ? "<tr><td></td>" : "");
- for (char sym2 = 'A'; sym2 <= 'Z'; sym2++)
+ /*
+ * Parse name from ScoreMatrix <name>
+ */
+ if (name != null)
+ {
+ System.err
+ .println("Warning: 'ScoreMatrix' repeated in file at line "
+ + lineNo);
+ }
+ StringTokenizer nameLine = new StringTokenizer(data, DELIMITERS);
+ if (nameLine.countTokens() != 2)
{
- if (symbols[sym2] >= 0 && symbols[sym2] < symMax)
- {
- sb.append((html ? "<td> " : "\t") + sym2
- + (html ? " </td>" : ""));
- }
+ System.err
+ .println("Format error: expected 'ScoreMatrix <name>', found '"
+ + data + "' at line " + lineNo);
+ return null;
}
- header = false;
- sb.append(html ? "</tr>\n" : "\n");
+ nameLine.nextToken();
+ name = nameLine.nextToken();
+ continue;
}
- if (html)
+ else if (name == null)
{
- sb.append("<tr>");
+ System.err
+ .println("Format error: 'ScoreMatrix <name>' should be the first non-comment line");
+ return null;
}
- sb.append((html ? "<td>" : "") + sym + (html ? "</td>" : ""));
- for (char sym2 = 'A'; sym2 <= 'Z'; sym2++)
+
+ /*
+ * next line after ScoreMatrix should be the alphabet of scored symbols
+ */
+ if (alphabet == null)
+ {
+ alphabet = data;
+ size = alphabet.length();
+ scores = new float[size][];
+ continue;
+ }
+
+ /*
+ * too much information?
+ */
+ if (row >= size && data.length() > 0) {
+ System.err
+ .println("Unexpected extra input line in score model file "
+ + data);
+ return null;
+ }
+
+ /*
+ * subsequent lines should be the symbol scores
+ */
+ StringTokenizer scoreLine = new StringTokenizer(data, DELIMITERS);
+ if (scoreLine.countTokens() != size)
{
- if (symbols[sym2] >= 0 && symbols[sym2] < symMax)
+ System.err.println(String.format(
+ "Expected %d tokens at line %d but found %d", size,
+ lineNo, scoreLine.countTokens()));
+ return null;
+ }
+ scores[row] = new float[size];
+ int col = 0;
+ String value = null;
+ while (scoreLine.hasMoreTokens()) {
+ try {
+ value = scoreLine.nextToken();
+ scores[row][col] = Float.valueOf(value);
+ col++;
+ } catch (NumberFormatException e)
{
- sb.append((html ? "<td>" : "\t")
- + matrix[symbols[sym]][symbols[sym2]]
- + (html ? "</td>" : ""));
+ System.err.println(String.format(
+ "Invalid score value %s at line %d column %d", value,
+ lineNo, col));
+ return null;
}
}
- sb.append(html ? "</tr>\n" : "\n");
+ row++;
}
+ } catch (IOException e)
+ {
+ System.err.println("Error reading score matrix file: "
+ + e.getMessage() + " at line " + lineNo);
}
- if (html)
+
+ /*
+ * out of data - check we found enough
+ */
+ if (row < size)
{
- sb.append("</table>");
+ System.err
+ .println(String
+ .format("Expected %d rows of score data in score matrix but only found %d",
+ size, row));
+ return null;
}
- return sb.toString();
+
+ /*
+ * If we get here, then name, alphabet and scores have been parsed successfully
+ */
+ sm = new ScoreMatrix(name, alphabet.toCharArray(), scores);
+ return sm;
}
}
package jalview.analysis.scoremodels;
import jalview.api.analysis.ScoreModelI;
-import jalview.schemes.ResidueProperties;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
import java.util.Map;
import java.util.TreeMap;
* using TreeMap keeps models ordered alphabetically by name
*/
models = new TreeMap<String, ScoreModelI>(String.CASE_INSENSITIVE_ORDER);
- registerScoreModel(new ScoreMatrix("BLOSUM62",
- ResidueProperties.BLOSUM62, 0));
- registerScoreModel(new ScoreMatrix("PAM250", ResidueProperties.PAM250,
- 0));
- registerScoreModel(new ScoreMatrix("DNA", ResidueProperties.DNA, 1));
+ loadScoreMatrix("/scoreModel/blosum62.scm");
+ loadScoreMatrix("/scoreModel/pam250.scm");
+ loadScoreMatrix("/scoreModel/dna.scm");
registerScoreModel(new FeatureScoreModel());
registerScoreModel(new PIDScoreModel());
}
- public Iterable<String> getModelNames()
+ /**
+ * Try to load a score matrix from the given resource file, and if successful,
+ * register it. Answers true if successful, else false. Any errors are
+ * reported on syserr but not thrown.
+ *
+ * @param string
+ */
+ boolean loadScoreMatrix(String resourcePath)
+ {
+ URL url = this.getClass().getResource(resourcePath);
+ if (url == null)
+ {
+ System.err.println("Failed to locate " + resourcePath);
+ return false;
+ }
+ boolean success = false;
+ InputStream is = null;
+ try
+ {
+ is = url.openStream();
+ ScoreMatrix sm = ScoreMatrix.parse(is);
+ if (sm != null)
+ {
+ registerScoreModel(sm);
+ success = true;
+ }
+ } catch (IOException e)
+ {
+ } finally
+ {
+ if (is != null)
+ {
+ try
+ {
+ is.close();
+ } catch (IOException e)
+ {
+ }
+ }
+ }
+ return success;
+ }
+
+ /**
+ * Answers an iterable set of the registered score models. Currently these are
+ * ordered by name (not case sensitive).
+ *
+ * @return
+ */
+ public Iterable<ScoreModelI> getModels()
{
- return models.keySet();
+ return models.values();
}
public ScoreModelI forName(String s)
: ResidueProperties.aaIndex, matrix.getMatrix());
}
- private void matrixEncode(final int[] aaIndex, final int[][] matrix)
+ private void matrixEncode(final int[] aaIndex, final float[][] matrix)
{
// Set all matrix to 0
// dbinary = new double[getSequence().length * 21];
*/
package jalview.gui;
+import jalview.analysis.scoremodels.ScoreMatrix;
+import jalview.analysis.scoremodels.ScoreModels;
+import jalview.api.analysis.ScoreModelI;
import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.AlignmentView;
import jalview.datamodel.SeqCigar;
import jalview.datamodel.SequenceI;
import jalview.jbgui.GPCAPanel;
-import jalview.schemes.ResidueProperties;
import jalview.util.MessageManager;
import jalview.viewmodel.AlignmentViewport;
import jalview.viewmodel.PCAModel;
protected void scoreMatrix_menuSelected()
{
scoreMatrixMenu.removeAll();
- for (final String sm : ResidueProperties.scoreMatrices.keySet())
+ for (ScoreModelI sm : ScoreModels.getInstance().getModels())
{
- if (ResidueProperties.getScoreMatrix(sm) != null)
+ if (sm instanceof ScoreMatrix)
{
+ final String name = sm.getName();
// create an entry for this score matrix for use in PCA
JCheckBoxMenuItem jm = new JCheckBoxMenuItem();
jm.setText(MessageManager.getStringOrReturn("label.score_model_",
- sm));
- jm.setSelected(pcaModel.getScore_matrix().equals(sm));
- if ((ResidueProperties.scoreMatrices.get(sm).isDNA() && ResidueProperties.scoreMatrices
- .get(sm).isProtein())
- || pcaModel.isNucleotide() == ResidueProperties.scoreMatrices
- .get(sm).isDNA())
+ name));
+ jm.setSelected(pcaModel.getScore_matrix().equals(name));
+ if ((!pcaModel.isNucleotide() && !sm.isDNA())
+ || (pcaModel.isNucleotide() && sm.isDNA()))
{
- final PCAPanel us = this;
jm.addActionListener(new ActionListener()
{
@Override
public void actionPerformed(ActionEvent e)
{
- if (!pcaModel.getScore_matrix().equals(sm))
+ if (!pcaModel.getScore_matrix().equals(name))
{
- pcaModel.setScore_matrix(sm);
- Thread worker = new Thread(us);
+ pcaModel.setScore_matrix(name);
+ Thread worker = new Thread(PCAPanel.this);
worker.start();
}
}
matrixNames = new JComboBox<String>();
ScoreModels scoreModels = ScoreModels.getInstance();
- for (String scoreType : scoreModels.getModelNames())
+ for (ScoreModelI sm : scoreModels.getModels())
{
- ScoreModelI sm = scoreModels.forName(scoreType);
- if (sm.isDNA() == af.getViewport().getAlignment().isNucleotide()
- || sm.isProtein() == !af.getViewport().getAlignment()
- .isNucleotide())
+ boolean nucleotide = af.getViewport().getAlignment().isNucleotide();
+ if (sm.isDNA() && nucleotide || sm.isProtein() && !nucleotide)
{
matrixNames.addItem(sm.getName());
}
import jalview.analysis.AlignmentSorter;
import jalview.analysis.NJTree;
+import jalview.analysis.scoremodels.ScoreModels;
import jalview.api.analysis.ScoreModelI;
import jalview.api.analysis.ViewBasedAnalysisI;
import jalview.bin.Cache;
import jalview.io.JalviewFileView;
import jalview.io.NewickFile;
import jalview.jbgui.GTreePanel;
-import jalview.schemes.ResidueProperties;
import jalview.util.ImageMaker;
import jalview.util.MessageManager;
import jalview.viewmodel.AlignmentViewport;
seqs = av.getSelectionGroup().getSequencesInOrder(
av.getAlignment());
}
- ScoreModelI sm = ResidueProperties.getScoreModel(pwtype);
+ ScoreModelI sm = ScoreModels.getInstance().forName(pwtype);
if (sm instanceof ViewBasedAnalysisI)
{
try
}
else
{
- int c = 0;
+ float score = 0;
for (char consensus : consensusResidue.toCharArray())
{
- c += ResidueProperties.getBLOSUM62(consensus, res);
+ score += ResidueProperties.getBLOSUM62(consensus, res);
}
- if (c > 0)
+ if (score > 0)
{
colour = LIGHT_BLUE;
}
// public static final double hydmax = 1.38;
// public static final double hydmin = -2.53;
- public static final int[][] BLOSUM62 = {
+ public static final float[][] BLOSUM62 = {
{ 4, -1, -2, -2, 0, -1, -1, 0, -2, -1, -1, -1, -1, -2, -1, 1, 0, -3,
-2, 0, -2, -1, 0, -4 },
{ -1, 5, 0, -2, -3, 1, 0, -2, 0, -3, -2, 2, -1, -3, -2, -1, -1, -3,
{ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
-4, -4, -4, -4, -4, -4, 1 }, };
- public static final int[][] PAM250 = {
+ public static final float[][] PAM250 = {
{ 2, -2, 0, 0, -2, 0, 0, 1, -1, -1, -2, -1, -1, -3, 1, 1, 1, -6, -3,
0, 0, 0, 0, -8 },
{ -2, 6, 0, -1, -4, 1, -1, -3, 2, -2, -3, 3, 0, -4, 0, 0, -1, 2, -4,
// treats T and U identically. R and Y weak equivalence with AG and CTU.
// N matches any other base weakly
//
- public static final int[][] DNA = {
+ public static final float[][] DNA = {
{ 10, -8, -8, -8, -8, 1, 1, 1, -8, 1, 1 }, // A
{ -8, 10, -8, -8, -8, 1, 1, -8, 1, 1, 1 }, // C
{ -8, -8, 10, -8, -8, 1, 1, 1, -8, 1, 1 }, // G
*/
static
{
- scoreMatrices.put("BLOSUM62", new ScoreMatrix("BLOSUM62", BLOSUM62, 0));
- scoreMatrices.put("PAM250", new ScoreMatrix("PAM250", PAM250, 0));
- scoreMatrices.put("DNA", new ScoreMatrix("DNA", DNA, 1));
+ // scoreMatrices.put("BLOSUM62", new ScoreMatrix("BLOSUM62", BLOSUM62));
+ // scoreMatrices.put("PAM250", new ScoreMatrix("PAM250", PAM250));
+ // scoreMatrices.put("DNA", new ScoreMatrix("DNA", DNA));
}
public static List<String> STOP = Arrays.asList("TGA", "TAA", "TAG");
return aa3Hash;
}
- public static int[][] getDNA()
+ public static float[][] getDNA()
{
return ResidueProperties.DNA;
}
- public static int[][] getBLOSUM62()
+ public static float[][] getBLOSUM62()
{
return ResidueProperties.BLOSUM62;
}
- public static int getPAM250(String A1, String A2)
+ public static float getPAM250(String A1, String A2)
{
return getPAM250(A1.charAt(0), A2.charAt(0));
}
- public static int getBLOSUM62(char c1, char c2)
+ public static float getBLOSUM62(char c1, char c2)
{
- int pog = 0;
+ float pog = 0;
try
{
return cdn;
}
- public static int[][] getDefaultPeptideMatrix()
+ public static float[][] getDefaultPeptideMatrix()
{
return ResidueProperties.getBLOSUM62();
}
- public static int[][] getDefaultDnaMatrix()
+ public static float[][] getDefaultDnaMatrix()
{
return ResidueProperties.getDNA();
}
return scoreMatrices.get(pwtype);
}
- public static int getPAM250(char c, char d)
+ public static float getPAM250(char c, char d)
{
int a = aaIndex[c];
int b = aaIndex[d];
- int pog = ResidueProperties.PAM250[a][b];
+ float pog = ResidueProperties.PAM250[a][b];
return pog;
}
* Attempt pairwise alignment of the sequence with each chain in the PDB,
* and remember the highest scoring chain
*/
- int max = -10;
+ float max = -10;
AlignSeq maxAlignseq = null;
String maxChainId = " ";
PDBChain maxChain = null;
seqstrings = seqstrings2;
seqs = seqs2;
nucleotide = nucleotide2;
- score_matrix = nucleotide2 ? "PID" : "BLOSUM62";
+ score_matrix = nucleotide2 ? "DNA" : "BLOSUM62";
}
private volatile PCA pca;
for (int i = 0; i < pca.getM().rows; i++)
{
- ((SequencePoint) points.elementAt(i)).coord = scores[i];
+ points.elementAt(i).coord = scores[i];
}
}
};
as.printAlignment(ps);
- String expected = "Score = 320\nLength of alignment = 10\nSequence Seq1 : 3 - 18 (Sequence length = 14)\nSequence Seq1 : 1 - 10 (Sequence length = 10)\n\n"
+ String expected = "Score = 320.0\nLength of alignment = 10\nSequence Seq1 : 3 - 18 (Sequence length = 14)\nSequence Seq1 : 1 - 10 (Sequence length = 10)\n\n"
+ "Seq1 SDFAQQQRRR\n"
+ " ||||||| \n"
+ "Seq1 SDFAQQQSSS\n\n" + "Percentage ID = 70.00\n";
--- /dev/null
+package jalview.analysis.scoremodels;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertNotNull;
+import static org.testng.Assert.assertNull;
+import static org.testng.Assert.assertTrue;
+
+import java.io.ByteArrayInputStream;
+
+import org.testng.annotations.Test;
+
+public class ScoreMatrixTest
+{
+ @Test(groups = "Functional")
+ public void testBuildSymbolIndex()
+ {
+ short[] index = ScoreMatrix.buildSymbolIndex("AX-. yxYp".toCharArray());
+
+ assertEquals(index.length, 128); // ASCII character set size
+
+ assertEquals(index['A'], 0);
+ assertEquals(index['a'], 0); // lower-case mapping added
+ assertEquals(index['X'], 1);
+ assertEquals(index['-'], 2);
+ assertEquals(index['.'], 3);
+ assertEquals(index[' '], 4);
+ assertEquals(index['y'], 5); // lower-case override
+ assertEquals(index['x'], 6); // lower-case override
+ assertEquals(index['Y'], 7);
+ assertEquals(index['p'], 8);
+ assertEquals(index['P'], -1); // lower-case doesn't map upper-case
+
+ /*
+ * check all unmapped symbols have index for unmapped
+ */
+ for (int c = 0; c < index.length; c++)
+ {
+ if (!"AaXx-. Yyp".contains(String.valueOf((char) c)))
+ {
+ assertEquals(index[c], -1);
+ }
+ }
+ }
+
+ /**
+ * check that characters not in the basic ASCII set are simply ignored
+ */
+ @Test(groups = "Functional")
+ public void testBuildSymbolIndex_nonAscii()
+ {
+ char[] weird = new char[] { 128, 245, 'P' };
+ short[] index = ScoreMatrix.buildSymbolIndex(weird);
+ assertEquals(index.length, 128);
+ assertEquals(index['P'], 2);
+ assertEquals(index['p'], 2);
+ for (int c = 0; c < index.length; c++)
+ {
+ if (c != 'P' && c != 'p')
+ {
+ assertEquals(index[c], -1);
+ }
+ }
+ }
+
+ /**
+ * Test a successful parse of a (small) score matrix file
+ */
+ @Test(groups = "Functional")
+ public void testParse()
+ {
+ /*
+ * some messy but valid input data, with comma, space
+ * or tab (or combinations) as score value delimiters
+ */
+ String data = "ScoreMatrix MyTest\n" + "ATU tx-\n"
+ + "1.1,1.2,1.3,1.4, 1.5, 1.6, 1.7\n"
+ + "2.1 2.2 2.3 2.4 2.5 2.6 2.7\n"
+ + "3.1\t3.2\t3.3\t3.4\t3.5\t3.6\t3.7\n"
+ + " 4.1 ,4.2,\t,4.3 ,\t4.4\t, \4.5,4.6 4.7\n"
+ + ", 5.1,5.3,5.3,5.4,5.5, 5.6, 5.7\n"
+ + "\t6.1, 6.2 6.3 6.4 6.5 6.6 6.7\n"
+ + ", \t7.1\t7.2 7.3, 7.4, 7.5\t,7.6,7.7\n";
+ ScoreMatrix sm = ScoreMatrix.parse(new ByteArrayInputStream(data
+ .getBytes()));
+ assertNotNull(sm);
+ assertEquals(sm.getName(), "MyTest");
+ assertTrue(sm.isDNA());
+ assertFalse(sm.isProtein());
+ assertEquals(sm.getPairwiseScore('A', 'A'), 1.1f);
+ assertEquals(sm.getPairwiseScore('A', 'T'), 1.2f);
+ assertEquals(sm.getPairwiseScore('a', 'T'), 1.2f); // A/a equivalent
+ assertEquals(sm.getPairwiseScore('A', 't'), 1.5f); // T/t not equivalent
+ assertEquals(sm.getPairwiseScore('a', 't'), 1.5f);
+ assertEquals(sm.getPairwiseScore('T', ' '), 2.4f);
+ assertEquals(sm.getPairwiseScore('U', 'x'), 3.6f);
+ assertEquals(sm.getPairwiseScore('u', 'x'), 3.6f);
+ assertEquals(sm.getPairwiseScore('U', 'X'), 0f); // X (upper) unmapped
+ assertEquals(sm.getPairwiseScore('A', '.'), 0f); // . unmapped
+ assertEquals(sm.getPairwiseScore('-', '-'), 7.7f);
+ assertEquals(sm.getPairwiseScore('A', (char) 128), 0f); // out of range
+ }
+
+ @Test(groups = "Functional")
+ public void testParse_invalidInput()
+ {
+ /*
+ * valid first
+ */
+ String data = "ScoreMatrix MyTest\nXY\n1 2\n3 4\n";
+ ScoreMatrix sm = ScoreMatrix.parse(new ByteArrayInputStream(data
+ .getBytes()));
+ assertNotNull(sm);
+
+ /*
+ * Name missing
+ */
+ data = "ScoreMatrix\nXY\n1 2\n3 4\n";
+ sm = ScoreMatrix.parse(new ByteArrayInputStream(data.getBytes()));
+ assertNull(sm);
+
+ /*
+ * ScoreMatrix header missing
+ */
+ data = "XY\n1 2\n3 4\n";
+ sm = ScoreMatrix.parse(new ByteArrayInputStream(data.getBytes()));
+ assertNull(sm);
+
+ /*
+ * Not enough rows
+ */
+ data = "ScoreMatrix MyTest\nXY\n1 2\n";
+ sm = ScoreMatrix.parse(new ByteArrayInputStream(data.getBytes()));
+ assertNull(sm);
+
+ /*
+ * Not enough columns
+ */
+ data = "ScoreMatrix MyTest\nXY\n1 2\n3\n";
+ sm = ScoreMatrix.parse(new ByteArrayInputStream(data.getBytes()));
+ assertNull(sm);
+
+ /*
+ * Too many columns
+ */
+ data = "ScoreMatrix MyTest\nXY\n1 2\n3 4 5\n";
+ sm = ScoreMatrix.parse(new ByteArrayInputStream(data.getBytes()));
+ assertNull(sm);
+
+ /*
+ * Too many rows
+ */
+ data = "ScoreMatrix MyTest\nXY\n1 2\n3 4\n6 7";
+ sm = ScoreMatrix.parse(new ByteArrayInputStream(data.getBytes()));
+ assertNull(sm);
+
+ /*
+ * unsupported delimiter |
+ */
+ data = "ScoreMatrix MyTest\nXY\n1|2\n3|4\n";
+ sm = ScoreMatrix.parse(new ByteArrayInputStream(data.getBytes()));
+ assertNull(sm);
+
+ /*
+ * Bad float value
+ */
+ data = "ScoreMatrix MyTest\nXY\n1 2\n3 four\n";
+ sm = ScoreMatrix.parse(new ByteArrayInputStream(data.getBytes()));
+ assertNull(sm);
+
+ }
+}