private String matrixName;
- boolean lowerDiagonalOnly;
+ /*
+ * aaindex format has scores for diagonal and below only
+ */
+ boolean isLowerDiagonalOnly;
+ /*
+ * ncbi format has symbols as first column on score rows
+ */
+ boolean hasGuideColumn;
* Constructor
int row = 0;
String err = null;
String data;
- lowerDiagonalOnly = false;
+ isLowerDiagonalOnly = false;
while ((data = nextLine()) != null)
if (name != null)
- System.err
- .println("Warning: 'ScoreMatrix' repeated in file at line "
+ throw new FileFormatException(
+ "Error: 'ScoreMatrix' repeated in file at line "
+ lineNo);
StringTokenizer nameLine = new StringTokenizer(data, DELIMITERS);
parseValues(data, lineNo, scores, row, alphabet);
- if (row == size)
- {
- break;
- }
- if (data != null)
- {
- System.err.println("Warning: unexpected extra data in matrix file: "
- + data);
- }
ScoreMatrix sm = new ScoreMatrix(name, alphabet, scores);
StringTokenizer scoreLine = new StringTokenizer(data, DELIMITERS);
int tokenCount = scoreLine.countTokens();
- if (tokenCount == size + 1)
+ /*
+ * inspect first row to see if it includes the symbol in the first column,
+ * and to see if it is lower diagonal values only (i.e. just one score)
+ */
+ if (row == 0)
+ {
+ if (data.startsWith(String.valueOf(alphabet[0])))
+ {
+ hasGuideColumn = true;
+ }
+ if (tokenCount == (hasGuideColumn ? 2 : 1))
+ {
+ isLowerDiagonalOnly = true;
+ }
+ }
+ if (hasGuideColumn)
* check 'guide' symbol is the row'th letter of the alphabet
lineNo, alphabet[row], symbol);
throw new FileFormatException(err);
+ tokenCount = scoreLine.countTokens(); // excluding guide symbol
- tokenCount = scoreLine.countTokens();
- * AAIndex format only has the lower diagonal i.e.
- * 1 score in row 0, 2 in row 1, etc
- * check this in all but the last row (which is the same either way)
+ * check the right number of values (lower diagonal or full format)
- if (row < size - 1)
+ if (isLowerDiagonalOnly && tokenCount != row + 1)
- boolean lowerDiagonal = tokenCount == row + 1;
- if (lowerDiagonalOnly && !lowerDiagonal)
- {
- /*
- * had detected lower diagonal form but now it isn't - error
- */
- err = String.format("Unexpected number of tokens at line %d",
- lineNo);
+ err = String.format(
+ "Expected %d scores at line %d: '%s' but found %d", row + 1,
+ lineNo, data, tokenCount);
throw new FileFormatException(err);
- }
- lowerDiagonalOnly = lowerDiagonal;
- if (!lowerDiagonalOnly && tokenCount != size)
+ if (!isLowerDiagonalOnly && tokenCount != size)
- err = String.format("Expected %d scores at line %d but found %d",
- size, lineNo, scoreLine.countTokens());
+ err = String.format(
+ "Expected %d scores at line %d: '%s' but found %d", size,
+ lineNo, data, scoreLine.countTokens());
throw new FileFormatException(err);
+ /*
+ * parse and set the values, setting the symmetrical value
+ * as well if lower diagonal format data
+ */
scores[row] = new float[size];
int col = 0;
String value = null;
value = scoreLine.nextToken();
scores[row][col] = Float.valueOf(value);
- if (lowerDiagonalOnly)
+ if (isLowerDiagonalOnly)
scores[col][row] = scores[row][col];
return false;
- /**
- * Answers true if the data line consists of the alphabet characters,
- * delimited (as to provide a heading row). Otherwise returns false (e.g. if
- * the data is a row of score values).
- *
- * @param data
- * @param alphabet
- * @return
- */
- private boolean isHeaderLine(String data, String alphabet)
- {
- StringTokenizer scoreLine = new StringTokenizer(data, DELIMITERS);
- int i = 0;
- while (scoreLine.hasMoreElements())
- {
- /*
- * skip over characters in the alphabet that are
- * also a delimiter (e.g. space)
- */
- char symbol = alphabet.charAt(i++);
- if (!DELIMITERS.contains(String.valueOf(symbol)))
- {
- if (!String.valueOf(symbol).equals(scoreLine.nextToken()))
- {
- return false;
- }
- }
- }
- return true;
- }
public String getMatrixName()
return matrixName;
import static;
import jalview.analysis.scoremodels.ScoreMatrix;
+import jalview.analysis.scoremodels.ScoreModels;
* @throws MalformedURLException
@Test(groups = "Functional")
- public void testParse() throws MalformedURLException, IOException
+ public void testParseMatrix_ncbiMixedDelimiters()
+ throws MalformedURLException,
+ IOException
* some messy but valid input data, with comma, space
@Test(groups = "Functional")
- public void testParse_headerMissing()
+ public void testParseMatrix_headerMissing()
String data;
@Test(groups = "Functional")
- public void testParse_notEnoughRows()
+ public void testParseMatrix_ncbiNotEnoughRows()
- String data = "ScoreMatrix MyTest\nX Y\n1 2\n";
+ String data = "ScoreMatrix MyTest\nX Y Z\n1 2 3\n4 5 6\n";
new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
} catch (IOException e)
- "Expected 2 rows of score data in score matrix but only found 1");
+ "Expected 3 rows of score data in score matrix but only found 2");
@Test(groups = "Functional")
- public void testParse_notEnoughColumns()
+ public void testParseMatrix_ncbiNotEnoughColumns()
- String data = "ScoreMatrix MyTest\nX Y\n1 2\n3\n";
+ String data = "ScoreMatrix MyTest\nX Y Z\n1 2 3\n4 5\n7 8 9\n";
new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
} catch (IOException e)
- "Expected 2 scores at line 4 but found 1");
+ "Expected 3 scores at line 4: '4 5' but found 2");
@Test(groups = "Functional")
- public void testParse_tooManyColumns()
+ public void testParseMatrix_ncbiTooManyColumns()
* with two too many columns:
- String data = "ScoreMatrix MyTest\nX\tY\n1 2\n3 4 5 6\n";
+ String data = "ScoreMatrix MyTest\nX\tY\tZ\n1 2 3\n4 5 6 7\n8 9 10\n";
new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
} catch (IOException e)
- "Expected 2 scores at line 4 but found 4");
+ "Expected 3 scores at line 4: '4 5 6 7' but found 4");
} catch (IOException e)
- "Expected 2 scores at line 4 but found 4");
+ "Expected 2 scores at line 4: 'Y 3 4 5' but found 3");
- * with no guide character and one too many columns:
- * parser guesses the first column is the guide character
+ * with no guide character and one too many columns
data = "ScoreMatrix MyTest\nX Y\n1 2\n3 4 5\n";
} catch (IOException e)
- "Error parsing score matrix at line 4, expected 'Y' but found '3'");
+ "Expected 2 scores at line 4: '3 4 5' but found 3");
@Test(groups = "Functional")
- public void testParse_tooManyRows()
+ public void testParseMatrix_ncbiTooManyRows()
- String data = "ScoreMatrix MyTest\n\tX\tY\n1 2\n3 4\n6 7";
+ String data = "ScoreMatrix MyTest\n\tX\tY\tZ\n1 2 3\n4 5 6\n7 8 9\n10 11 12\n";
new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
} catch (IOException e)
- "Unexpected extra input line in score model file: '6 7'");
+ "Unexpected extra input line in score model file: '10 11 12'");
@Test(groups = "Functional")
- public void testParse_badDelimiter()
+ public void testParseMatrix_ncbiBadDelimiter()
String data = "ScoreMatrix MyTest\n X Y Z\n1|2|3\n4|5|6\n";
@Test(groups = "Functional")
- public void testParse_badFloat()
+ public void testParseMatrix_ncbiBadFloat()
- String data = "ScoreMatrix MyTest\n\tX\tY\n1 2\n3 four\n";
+ String data = "ScoreMatrix MyTest\n\tX\tY\tZ\n1 2 3\n4 five 6\n7 8 9\n";
new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
} catch (IOException e)
- "Invalid score value 'four' at line 4 column 1");
+ "Invalid score value 'five' at line 4 column 1");
@Test(groups = "Functional")
- public void testParse_badGuideCharacter()
+ public void testParseMatrix_ncbiBadGuideCharacter()
String data = "ScoreMatrix MyTest\n\tX Y\nX 1 2\ny 3 4\n";
"Error parsing score matrix at line 4, expected 'Y' but found 'y'");
+ data = "ScoreMatrix MyTest\n\tX Y\nXX 1 2\nY 3 4\n";
+ try
+ {
+ new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
+ .parseMatrix();
+ fail("expected exception");
+ } catch (IOException e)
+ {
+ assertEquals(e.getMessage(),
+ "Error parsing score matrix at line 3, expected 'X' but found 'XX'");
+ }
@Test(groups = "Functional")
- public void testParse_nameMissing()
+ public void testParseMatrix_ncbiNameMissing()
- * Name missing
+ * Name missing on ScoreMatrix header line
String data = "ScoreMatrix\nX Y\n1 2\n3 4\n";
* @throws MalformedURLException
@Test(groups = "Functional")
- public void testParse_ncbiFormat() throws MalformedURLException,
+ public void testParseMatrix_ncbiFormat() throws MalformedURLException,
- String data = "ScoreMatrix MyTest\n" + "\tA\tB\tC\n"
+ // input including comment and blank lines
+ String data = "ScoreMatrix MyTest\n#comment\n\n" + "\tA\tB\tC\n"
+ "A\t1.0\t2.0\t3.0\n" + "B\t4.0\t5.0\t6.0\n"
+ "C\t7.0\t8.0\t9.0\n";
FileParse fp = new FileParse(data, DataSourceType.PASTE);
assertEquals(sm.getName(), "MyTest");
+ assertEquals(parser.getMatrixName(), "MyTest");
assertEquals(sm.getPairwiseScore('A', 'A'), 1.0f);
assertEquals(sm.getPairwiseScore('B', 'c'), 6.0f);
assertEquals(sm.getSize(), 3);
* @throws MalformedURLException
@Test(groups = "Functional")
- public void testParse_aaIndexBlosum80() throws MalformedURLException,
+ public void testParseMatrix_aaIndexBlosum80()
+ throws MalformedURLException,
FileParse fp = new FileParse("resources/scoreModel/blosum80.scm",
assertEquals(sm.getPairwiseScore('A', 'R'), -3f);
assertEquals(sm.getPairwiseScore('r', 'a'), -3f); // A/a equivalent
+ /**
+ * Test a successful parse of a (small) score matrix file
+ *
+ * @throws IOException
+ * @throws MalformedURLException
+ */
+ @Test(groups = "Functional")
+ public void testParseMatrix_aaindexFormat() throws MalformedURLException,
+ IOException
+ {
+ /*
+ * aaindex format has scores for diagonal and below only
+ */
+ String data = "H MyTest\n" + "D My description\n" + "R PMID:1438297\n"
+ + "A Authors, names\n" + "T Journal title\n"
+ + "J Journal reference\n" + "* matrix in 1/3 Bit Units\n"
+ + "M rows = ABC, cols = ABC\n" + "A\t1.0\n"
+ + "B\t4.0\t5.0\n"
+ + "C\t7.0\t8.0\t9.0\n";
+ FileParse fp = new FileParse(data, DataSourceType.PASTE);
+ ScoreMatrixFile parser = new ScoreMatrixFile(fp);
+ ScoreMatrix sm = parser.parseMatrix();
+ assertNotNull(sm);
+ assertEquals(sm.getSize(), 3);
+ assertEquals(sm.getGapIndex(), -1);
+ assertEquals(sm.getName(), "MyTest");
+ assertEquals(sm.getDescription(), "My description");
+ assertEquals(sm.getPairwiseScore('A', 'A'), 1.0f);
+ assertEquals(sm.getPairwiseScore('A', 'B'), 4.0f);
+ assertEquals(sm.getPairwiseScore('A', 'C'), 7.0f);
+ assertEquals(sm.getPairwiseScore('B', 'A'), 4.0f);
+ assertEquals(sm.getPairwiseScore('B', 'B'), 5.0f);
+ assertEquals(sm.getPairwiseScore('B', 'C'), 8.0f);
+ assertEquals(sm.getPairwiseScore('C', 'C'), 9.0f);
+ assertEquals(sm.getPairwiseScore('C', 'B'), 8.0f);
+ assertEquals(sm.getPairwiseScore('C', 'A'), 7.0f);
+ }
+ @Test(groups = "Functional")
+ public void testParseMatrix_aaindex_mMissing()
+ throws MalformedURLException,
+ IOException
+ {
+ /*
+ * aaindex format but M cols=, rows= is missing
+ */
+ String data = "H MyTest\n" + "A\t1.0\n"
+ + "B\t4.0\t5.0\n"
+ + "C\t7.0\t8.0\t9.0\n";
+ FileParse fp = new FileParse(data, DataSourceType.PASTE);
+ ScoreMatrixFile parser = new ScoreMatrixFile(fp);
+ try
+ {
+ parser.parseMatrix();
+ fail("Expected exception");
+ } catch (FileFormatException e)
+ {
+ assertEquals(e.getMessage(), "No alphabet specified in matrix file");
+ }
+ }
+ @Test(groups = "Functional")
+ public void testParseMatrix_aaindex_rowColMismatch()
+ throws MalformedURLException,
+ IOException
+ {
+ String data = "H MyTest\n" + "M rows=ABC, cols=ABD\n" + "A\t1.0\n"
+ + "B\t4.0\t5.0\n"
+ + "C\t7.0\t8.0\t9.0\n";
+ FileParse fp = new FileParse(data, DataSourceType.PASTE);
+ ScoreMatrixFile parser = new ScoreMatrixFile(fp);
+ try
+ {
+ parser.parseMatrix();
+ fail("Expected exception");
+ } catch (FileFormatException e)
+ {
+ assertEquals(
+ e.getMessage(),
+ "Unexpected aaIndex score matrix data at line 2: M rows=ABC, cols=ABD rows != cols");
+ }
+ }
+ @Test(groups = "Functional")
+ public void testParseMatrix_ncbiHeaderRepeated()
+ {
+ String data = "ScoreMatrix BLOSUM\nScoreMatrix PAM250\nX Y\n1 2\n3 4\n";
+ try
+ {
+ new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
+ .parseMatrix();
+ fail("expected exception");
+ } catch (IOException e)
+ {
+ assertEquals(e.getMessage(),
+ "Error: 'ScoreMatrix' repeated in file at line 2");
+ }
+ }
+ @Test(groups = "Functional")
+ public void testParseMatrix_aaindex_tooManyRows()
+ throws MalformedURLException,
+ IOException
+ {
+ String data = "H MyTest\n" + "M rows=ABC, cols=ABC\n" + "A\t1.0\n"
+ + "B\t4.0\t5.0\n" + "C\t7.0\t8.0\t9.0\n" + "C\t7.0\t8.0\t9.0\n";
+ FileParse fp = new FileParse(data, DataSourceType.PASTE);
+ ScoreMatrixFile parser = new ScoreMatrixFile(fp);
+ try
+ {
+ parser.parseMatrix();
+ fail("Expected exception");
+ } catch (FileFormatException e)
+ {
+ assertEquals(e.getMessage(), "Too many data rows in matrix file");
+ }
+ }
+ @Test(groups = "Functional")
+ public void testParseMatrix_aaindex_extraDataLines()
+ throws MalformedURLException,
+ IOException
+ {
+ String data = "H MyTest\n" + "M rows=ABC, cols=ABC\n" + "A\t1.0\n"
+ + "B\t4.0\t5.0\n" + "C\t7.0\t8.0\t9.0\n" + "something extra\n";
+ FileParse fp = new FileParse(data, DataSourceType.PASTE);
+ ScoreMatrixFile parser = new ScoreMatrixFile(fp);
+ try
+ {
+ parser.parseMatrix();
+ fail("Expected exception");
+ } catch (FileFormatException e)
+ {
+ assertEquals(e.getMessage(), "Too many data rows in matrix file");
+ }
+ }
+ @Test(groups = "Functional")
+ public void testParseMatrix_aaindex_tooFewColumns()
+ throws MalformedURLException,
+ IOException
+ {
+ String data = "H MyTest\n" + "M rows=ABC, cols=ABC\n" + "A\t1.0\n"
+ + "B\t4.0\t5.0\n" + "C\t7.0\t8.0\n";
+ FileParse fp = new FileParse(data, DataSourceType.PASTE);
+ ScoreMatrixFile parser = new ScoreMatrixFile(fp);
+ try
+ {
+ parser.parseMatrix();
+ fail("Expected exception");
+ } catch (FileFormatException e)
+ {
+ assertEquals(
+ e.getMessage(),
+ "Expected 3 scores at line 5: 'C\t7.0\t8.0' but found 2");
+ }
+ }
+ /**
+ * Test a successful parse and register of a score matrix file
+ *
+ * @throws IOException
+ * @throws MalformedURLException
+ */
+ @Test(groups = "Functional")
+ public void testParse_ncbiFormat() throws MalformedURLException,
+ IOException
+ {
+ assertNull(ScoreModels.getInstance().forName("MyNewTest"));
+ String data = "ScoreMatrix MyNewTest\n" + "\tA\tB\tC\n"
+ + "A\t1.0\t2.0\t3.0\n" + "B\t4.0\t5.0\t6.0\n"
+ + "C\t7.0\t8.0\t9.0\n";
+ FileParse fp = new FileParse(data, DataSourceType.PASTE);
+ ScoreMatrixFile parser = new ScoreMatrixFile(fp);
+ parser.parse();
+ ScoreMatrix sm = (ScoreMatrix) ScoreModels.getInstance().forName(
+ "MyNewTest");
+ assertNotNull(sm);
+ assertEquals(sm.getName(), "MyNewTest");
+ assertEquals(parser.getMatrixName(), "MyNewTest");
+ assertEquals(sm.getPairwiseScore('A', 'A'), 1.0f);
+ assertEquals(sm.getPairwiseScore('B', 'c'), 6.0f);
+ assertEquals(sm.getSize(), 3);
+ }