X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=test%2Fjalview%2Fio%2FScoreMatrixFileTest.java;h=99ec9a6bcd08d7791c59a6e7fbb905c07f5df490;hb=57738a1f3c19b1c3a00bd3ac5108f8cd0af32f99;hp=77b7282531e2fcc290d15b9c8faee6b2a5c03982;hpb=45e015aabe8f35a4a13be26e7630641ef8c94fbb;p=jalview.git diff --git a/test/jalview/io/ScoreMatrixFileTest.java b/test/jalview/io/ScoreMatrixFileTest.java index 77b7282..99ec9a6 100644 --- a/test/jalview/io/ScoreMatrixFileTest.java +++ b/test/jalview/io/ScoreMatrixFileTest.java @@ -3,19 +3,28 @@ package jalview.io; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertNull; import static org.testng.Assert.assertTrue; import static org.testng.Assert.fail; import jalview.analysis.scoremodels.ScoreMatrix; +import jalview.analysis.scoremodels.ScoreModels; import java.io.IOException; import java.net.MalformedURLException; +import org.testng.annotations.AfterMethod; import org.testng.annotations.Test; public class ScoreMatrixFileTest { + @AfterMethod(alwaysRun = true) + public void tearDownAfterTest() + { + ScoreModels.getInstance().reset(); + } + /** * Test a successful parse of a (small) score matrix file * @@ -23,63 +32,52 @@ public class ScoreMatrixFileTest * @throws MalformedURLException */ @Test(groups = "Functional") - public void testParse() throws MalformedURLException, IOException + public void testParseMatrix_ncbiMixedDelimiters() + throws MalformedURLException, IOException { /* * some messy but valid input data, with comma, space * or tab (or combinations) as score value delimiters * this example includes 'guide' symbols on score rows */ - String data = "ScoreMatrix MyTest\n" + "ATU tx-\n" - + "A,1.1,1.2,1.3,1.4, 1.5, 1.6, 1.7\n" - + "T,2.1 2.2 2.3 2.4 2.5 2.6 2.7\n" - + "U\t3.1\t3.2\t3.3\t3.4\t3.5\t3.6\t3.7\n" - + " 4.1 ,4.2,\t,4.3 ,\t4.4\t, \4.5,4.6 4.7\n" - + "t, 5.1,5.3,5.3,5.4,5.5, 5.6, 5.7\n" - + "x\t6.1, 6.2 6.3 6.4 6.5 6.6 6.7\n" - + "-, \t7.1\t7.2 7.3, 7.4, 7.5\t,7.6,7.7\n"; + String data = "ScoreMatrix MyTest (example)\n" + "A\tT\tU\tt\tx\t-\n" + + "A,1.1,1.2,1.3,1.4, 1.5, 1.6\n" + + "T,2.1 2.2 2.3 2.4 2.5 2.6\n" + + "U\t3.1\t3.2\t3.3\t3.4\t3.5\t3.6\t\n" + + "t, 5.1,5.3,5.3,5.4,5.5, 5.6\n" + + "x\t6.1, 6.2 6.3 6.4 6.5 6.6\n" + + "-, \t7.1\t7.2 7.3, 7.4, 7.5\t,7.6\n"; FileParse fp = new FileParse(data, DataSourceType.PASTE); ScoreMatrixFile parser = new ScoreMatrixFile(fp); ScoreMatrix sm = parser.parseMatrix(); assertNotNull(sm); - assertEquals(sm.getName(), "MyTest"); + assertEquals(sm.getName(), "MyTest (example)"); + assertEquals(sm.getSize(), 6); + assertNull(sm.getDescription()); assertTrue(sm.isDNA()); assertFalse(sm.isProtein()); + assertEquals(sm.getMinimumScore(), 1.1f); assertEquals(sm.getPairwiseScore('A', 'A'), 1.1f); assertEquals(sm.getPairwiseScore('A', 'T'), 1.2f); assertEquals(sm.getPairwiseScore('a', 'T'), 1.2f); // A/a equivalent - assertEquals(sm.getPairwiseScore('A', 't'), 1.5f); // T/t not equivalent - assertEquals(sm.getPairwiseScore('a', 't'), 1.5f); - assertEquals(sm.getPairwiseScore('T', ' '), 2.4f); - assertEquals(sm.getPairwiseScore('U', 'x'), 3.6f); - assertEquals(sm.getPairwiseScore('u', 'x'), 3.6f); - assertEquals(sm.getPairwiseScore('U', 'X'), 0f); // X (upper) unmapped - assertEquals(sm.getPairwiseScore('A', '.'), 0f); // . unmapped - assertEquals(sm.getPairwiseScore('-', '-'), 7.7f); + assertEquals(sm.getPairwiseScore('A', 't'), 1.4f); // T/t not equivalent + assertEquals(sm.getPairwiseScore('a', 't'), 1.4f); + assertEquals(sm.getPairwiseScore('U', 'x'), 3.5f); + assertEquals(sm.getPairwiseScore('u', 'x'), 3.5f); + // X (upper) and '.' unmapped - get minimum score + assertEquals(sm.getPairwiseScore('U', 'X'), 1.1f); + assertEquals(sm.getPairwiseScore('A', '.'), 1.1f); + assertEquals(sm.getPairwiseScore('-', '-'), 7.6f); assertEquals(sm.getPairwiseScore('A', (char) 128), 0f); // out of range - - /* - * without guide symbols on score rows - */ - data = "ScoreMatrix MyTest\nXY\n1 2\n3 4\n"; - fp = new FileParse(data, DataSourceType.PASTE); - parser = new ScoreMatrixFile(fp); - sm = parser.parseMatrix(); - assertNotNull(sm); - assertEquals(sm.getPairwiseScore('X', 'X'), 1f); - assertEquals(sm.getPairwiseScore('X', 'y'), 2f); - assertEquals(sm.getPairwiseScore('y', 'x'), 3f); - assertEquals(sm.getPairwiseScore('y', 'Y'), 4f); - assertEquals(sm.getPairwiseScore('D', 'R'), 0f); } @Test(groups = "Functional") - public void testParse_headerMissing() + public void testParseMatrix_headerMissing() { String data; - data = "XY\n1 2\n3 4\n"; + data = "X Y\n1 2\n3 4\n"; try { new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE)) @@ -93,9 +91,9 @@ public class ScoreMatrixFileTest } @Test(groups = "Functional") - public void testParse_notEnoughRows() + public void testParseMatrix_ncbiNotEnoughRows() { - String data = "ScoreMatrix MyTest\nXY\n1 2\n"; + String data = "ScoreMatrix MyTest\nX Y Z\n1 2 3\n4 5 6\n"; try { new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE)) @@ -104,14 +102,14 @@ public class ScoreMatrixFileTest } catch (IOException e) { assertEquals(e.getMessage(), - "Expected 2 rows of score data in score matrix but only found 1"); + "Expected 3 rows of score data in score matrix but only found 2"); } } @Test(groups = "Functional") - public void testParse_notEnoughColumns() + public void testParseMatrix_ncbiNotEnoughColumns() { - String data = "ScoreMatrix MyTest\nXY\n1 2\n3\n"; + String data = "ScoreMatrix MyTest\nX Y Z\n1 2 3\n4 5\n7 8 9\n"; try { new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE)) @@ -120,17 +118,17 @@ public class ScoreMatrixFileTest } catch (IOException e) { assertEquals(e.getMessage(), - "Expected 2 scores at line 4 but found 1"); + "Expected 3 scores at line 4: '4 5' but found 2"); } } @Test(groups = "Functional") - public void testParse_tooManyColumns() + public void testParseMatrix_ncbiTooManyColumns() { /* * with two too many columns: */ - String data = "ScoreMatrix MyTest\nXY\n1 2\n3 4 5 6\n"; + String data = "ScoreMatrix MyTest\nX\tY\tZ\n1 2 3\n4 5 6 7\n8 9 10\n"; try { new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE)) @@ -139,13 +137,13 @@ public class ScoreMatrixFileTest } catch (IOException e) { assertEquals(e.getMessage(), - "Expected 2 scores at line 4 but found 4"); + "Expected 3 scores at line 4: '4 5 6 7' but found 4"); } /* * with guide character and one too many columns: */ - data = "ScoreMatrix MyTest\nXY\nX 1 2\nY 3 4 5\n"; + data = "ScoreMatrix MyTest\nX Y\nX 1 2\nY 3 4 5\n"; try { new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE)) @@ -154,14 +152,13 @@ public class ScoreMatrixFileTest } catch (IOException e) { assertEquals(e.getMessage(), - "Expected 2 scores at line 4 but found 4"); + "Expected 2 scores at line 4: 'Y 3 4 5' but found 3"); } /* - * with no guide character and one too many columns: - * parser guesses the first column is the guide character + * with no guide character and one too many columns */ - data = "ScoreMatrix MyTest\nXY\n1 2\n3 4 5\n"; + data = "ScoreMatrix MyTest\nX Y\n1 2\n3 4 5\n"; try { new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE)) @@ -170,14 +167,14 @@ public class ScoreMatrixFileTest } catch (IOException e) { assertEquals(e.getMessage(), - "Error parsing score matrix at line 4, expected 'Y' but found '3'"); + "Expected 2 scores at line 4: '3 4 5' but found 3"); } } @Test(groups = "Functional") - public void testParse_tooManyRows() + public void testParseMatrix_ncbiTooManyRows() { - String data = "ScoreMatrix MyTest\nXY\n1 2\n3 4\n6 7"; + String data = "ScoreMatrix MyTest\n\tX\tY\tZ\n1 2 3\n4 5 6\n7 8 9\n10 11 12\n"; try { new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE)) @@ -186,14 +183,14 @@ public class ScoreMatrixFileTest } catch (IOException e) { assertEquals(e.getMessage(), - "Unexpected extra input line in score model file: '6 7'"); + "Unexpected extra input line in score model file: '10 11 12'"); } } @Test(groups = "Functional") - public void testParse_badDelimiter() + public void testParseMatrix_ncbiBadDelimiter() { - String data = "ScoreMatrix MyTest\nXY\n1|2\n3|4\n"; + String data = "ScoreMatrix MyTest\n X Y Z\n1|2|3\n4|5|6\n"; try { new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE)) @@ -202,14 +199,14 @@ public class ScoreMatrixFileTest } catch (IOException e) { assertEquals(e.getMessage(), - "Expected 2 scores at line 3 but found 1"); + "Invalid score value '1|2|3' at line 3 column 0"); } } @Test(groups = "Functional") - public void testParse_badFloat() + public void testParseMatrix_ncbiBadFloat() { - String data = "ScoreMatrix MyTest\nXY\n1 2\n3 four\n"; + String data = "ScoreMatrix MyTest\n\tX\tY\tZ\n1 2 3\n4 five 6\n7 8 9\n"; try { new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE)) @@ -218,14 +215,14 @@ public class ScoreMatrixFileTest } catch (IOException e) { assertEquals(e.getMessage(), - "Invalid score value 'four' at line 4 column 1"); + "Invalid score value 'five' at line 4 column 1"); } } @Test(groups = "Functional") - public void testParse_badGuideCharacter() + public void testParseMatrix_ncbiBadGuideCharacter() { - String data = "ScoreMatrix MyTest\nXY\nX 1 2\ny 3 4\n"; + String data = "ScoreMatrix MyTest\n\tX Y\nX 1 2\ny 3 4\n"; try { new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE)) @@ -236,15 +233,27 @@ public class ScoreMatrixFileTest assertEquals(e.getMessage(), "Error parsing score matrix at line 4, expected 'Y' but found 'y'"); } + + data = "ScoreMatrix MyTest\n\tX Y\nXX 1 2\nY 3 4\n"; + try + { + new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE)) + .parseMatrix(); + fail("expected exception"); + } catch (IOException e) + { + assertEquals(e.getMessage(), + "Error parsing score matrix at line 3, expected 'X' but found 'XX'"); + } } @Test(groups = "Functional") - public void testParse_nameMissing() + public void testParseMatrix_ncbiNameMissing() { /* - * Name missing + * Name missing on ScoreMatrix header line */ - String data = "ScoreMatrix\nXY\n1 2\n3 4\n"; + String data = "ScoreMatrix\nX Y\n1 2\n3 4\n"; try { new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE)) @@ -252,9 +261,240 @@ public class ScoreMatrixFileTest fail("expected exception"); } catch (IOException e) { - assertEquals( - e.getMessage(), + assertEquals(e.getMessage(), "Format error: expected 'ScoreMatrix ', found 'ScoreMatrix' at line 1"); } } + + /** + * Test a successful parse of a (small) score matrix file + * + * @throws IOException + * @throws MalformedURLException + */ + @Test(groups = "Functional") + public void testParseMatrix_ncbiFormat() + throws MalformedURLException, IOException + { + // input including comment and blank lines + String data = "ScoreMatrix MyTest\n#comment\n\n" + "\tA\tB\tC\n" + + "A\t1.0\t2.0\t3.0\n" + "B\t4.0\t5.0\t6.0\n" + + "C\t7.0\t8.0\t9.0\n"; + FileParse fp = new FileParse(data, DataSourceType.PASTE); + ScoreMatrixFile parser = new ScoreMatrixFile(fp); + ScoreMatrix sm = parser.parseMatrix(); + + assertNotNull(sm); + assertEquals(sm.getName(), "MyTest"); + assertEquals(parser.getMatrixName(), "MyTest"); + assertEquals(sm.getPairwiseScore('A', 'A'), 1.0f); + assertEquals(sm.getPairwiseScore('B', 'c'), 6.0f); + assertEquals(sm.getSize(), 3); + } + + /** + * Test a successful parse of a (small) score matrix file + * + * @throws IOException + * @throws MalformedURLException + */ + @Test(groups = "Functional") + public void testParseMatrix_aaIndexBlosum80() + throws MalformedURLException, IOException + { + FileParse fp = new FileParse("resources/scoreModel/blosum80.scm", + DataSourceType.FILE); + ScoreMatrixFile parser = new ScoreMatrixFile(fp); + ScoreMatrix sm = parser.parseMatrix(); + + assertNotNull(sm); + assertEquals(sm.getName(), "HENS920103"); + assertEquals(sm.getDescription(), + "BLOSUM80 substitution matrix (Henikoff-Henikoff, 1992)"); + assertFalse(sm.isDNA()); + assertTrue(sm.isProtein()); + assertEquals(20, sm.getSize()); + + assertEquals(sm.getPairwiseScore('A', 'A'), 7f); + assertEquals(sm.getPairwiseScore('A', 'R'), -3f); + assertEquals(sm.getPairwiseScore('r', 'a'), -3f); // A/a equivalent + } + + /** + * Test a successful parse of a (small) score matrix file + * + * @throws IOException + * @throws MalformedURLException + */ + @Test(groups = "Functional") + public void testParseMatrix_aaindexFormat() + throws MalformedURLException, IOException + { + /* + * aaindex format has scores for diagonal and below only + */ + String data = "H MyTest\n" + "D My description\n" + "R PMID:1438297\n" + + "A Authors, names\n" + "T Journal title\n" + + "J Journal reference\n" + "* matrix in 1/3 Bit Units\n" + + "M rows = ABC, cols = ABC\n" + "A\t1.0\n" + "B\t4.0\t5.0\n" + + "C\t7.0\t8.0\t9.0\n"; + FileParse fp = new FileParse(data, DataSourceType.PASTE); + ScoreMatrixFile parser = new ScoreMatrixFile(fp); + ScoreMatrix sm = parser.parseMatrix(); + + assertNotNull(sm); + assertEquals(sm.getSize(), 3); + assertEquals(sm.getName(), "MyTest"); + assertEquals(sm.getDescription(), "My description"); + assertEquals(sm.getPairwiseScore('A', 'A'), 1.0f); + assertEquals(sm.getPairwiseScore('A', 'B'), 4.0f); + assertEquals(sm.getPairwiseScore('A', 'C'), 7.0f); + assertEquals(sm.getPairwiseScore('B', 'A'), 4.0f); + assertEquals(sm.getPairwiseScore('B', 'B'), 5.0f); + assertEquals(sm.getPairwiseScore('B', 'C'), 8.0f); + assertEquals(sm.getPairwiseScore('C', 'C'), 9.0f); + assertEquals(sm.getPairwiseScore('C', 'B'), 8.0f); + assertEquals(sm.getPairwiseScore('C', 'A'), 7.0f); + } + + @Test(groups = "Functional") + public void testParseMatrix_aaindex_mMissing() + throws MalformedURLException, IOException + { + /* + * aaindex format but M cols=, rows= is missing + */ + String data = "H MyTest\n" + "A\t1.0\n" + "B\t4.0\t5.0\n" + + "C\t7.0\t8.0\t9.0\n"; + FileParse fp = new FileParse(data, DataSourceType.PASTE); + ScoreMatrixFile parser = new ScoreMatrixFile(fp); + try + { + parser.parseMatrix(); + fail("Expected exception"); + } catch (FileFormatException e) + { + assertEquals(e.getMessage(), "No alphabet specified in matrix file"); + } + } + + @Test(groups = "Functional") + public void testParseMatrix_aaindex_rowColMismatch() + throws MalformedURLException, IOException + { + String data = "H MyTest\n" + "M rows=ABC, cols=ABD\n" + "A\t1.0\n" + + "B\t4.0\t5.0\n" + "C\t7.0\t8.0\t9.0\n"; + FileParse fp = new FileParse(data, DataSourceType.PASTE); + ScoreMatrixFile parser = new ScoreMatrixFile(fp); + try + { + parser.parseMatrix(); + fail("Expected exception"); + } catch (FileFormatException e) + { + assertEquals(e.getMessage(), + "Unexpected aaIndex score matrix data at line 2: M rows=ABC, cols=ABD rows != cols"); + } + } + + @Test(groups = "Functional") + public void testParseMatrix_ncbiHeaderRepeated() + { + String data = "ScoreMatrix BLOSUM\nScoreMatrix PAM250\nX Y\n1 2\n3 4\n"; + try + { + new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE)) + .parseMatrix(); + fail("expected exception"); + } catch (IOException e) + { + assertEquals(e.getMessage(), + "Error: 'ScoreMatrix' repeated in file at line 2"); + } + } + + @Test(groups = "Functional") + public void testParseMatrix_aaindex_tooManyRows() + throws MalformedURLException, IOException + { + String data = "H MyTest\n" + "M rows=ABC, cols=ABC\n" + "A\t1.0\n" + + "B\t4.0\t5.0\n" + "C\t7.0\t8.0\t9.0\n" + "C\t7.0\t8.0\t9.0\n"; + FileParse fp = new FileParse(data, DataSourceType.PASTE); + ScoreMatrixFile parser = new ScoreMatrixFile(fp); + try + { + parser.parseMatrix(); + fail("Expected exception"); + } catch (FileFormatException e) + { + assertEquals(e.getMessage(), "Too many data rows in matrix file"); + } + } + + @Test(groups = "Functional") + public void testParseMatrix_aaindex_extraDataLines() + throws MalformedURLException, IOException + { + String data = "H MyTest\n" + "M rows=ABC, cols=ABC\n" + "A\t1.0\n" + + "B\t4.0\t5.0\n" + "C\t7.0\t8.0\t9.0\n" + "something extra\n"; + FileParse fp = new FileParse(data, DataSourceType.PASTE); + ScoreMatrixFile parser = new ScoreMatrixFile(fp); + try + { + parser.parseMatrix(); + fail("Expected exception"); + } catch (FileFormatException e) + { + assertEquals(e.getMessage(), "Too many data rows in matrix file"); + } + } + + @Test(groups = "Functional") + public void testParseMatrix_aaindex_tooFewColumns() + throws MalformedURLException, IOException + { + String data = "H MyTest\n" + "M rows=ABC, cols=ABC\n" + "A\t1.0\n" + + "B\t4.0\t5.0\n" + "C\t7.0\t8.0\n"; + FileParse fp = new FileParse(data, DataSourceType.PASTE); + ScoreMatrixFile parser = new ScoreMatrixFile(fp); + try + { + parser.parseMatrix(); + fail("Expected exception"); + } catch (FileFormatException e) + { + assertEquals(e.getMessage(), + "Expected 3 scores at line 5: 'C\t7.0\t8.0' but found 2"); + } + } + + /** + * Test a successful parse and register of a score matrix file + * + * @throws IOException + * @throws MalformedURLException + */ + @Test(groups = "Functional") + public void testParse_ncbiFormat() + throws MalformedURLException, IOException + { + assertNull(ScoreModels.getInstance().getScoreModel("MyNewTest", null)); + + String data = "ScoreMatrix MyNewTest\n" + "\tA\tB\tC\n" + + "A\t1.0\t2.0\t3.0\n" + "B\t4.0\t5.0\t6.0\n" + + "C\t7.0\t8.0\t9.0\n"; + FileParse fp = new FileParse(data, DataSourceType.PASTE); + ScoreMatrixFile parser = new ScoreMatrixFile(fp); + + parser.parse(); + + ScoreMatrix sm = (ScoreMatrix) ScoreModels.getInstance() + .getScoreModel("MyNewTest", null); + assertNotNull(sm); + assertEquals(sm.getName(), "MyNewTest"); + assertEquals(parser.getMatrixName(), "MyNewTest"); + assertEquals(sm.getPairwiseScore('A', 'A'), 1.0f); + assertEquals(sm.getPairwiseScore('B', 'c'), 6.0f); + assertEquals(sm.getSize(), 3); + } }