3 import static org.testng.Assert.assertEquals;
4 import static org.testng.Assert.assertFalse;
5 import static org.testng.Assert.assertNotNull;
6 import static org.testng.Assert.assertNull;
7 import static org.testng.Assert.assertTrue;
8 import static org.testng.Assert.fail;
10 import jalview.analysis.scoremodels.ScoreMatrix;
11 import jalview.analysis.scoremodels.ScoreModels;
13 import java.io.IOException;
14 import java.net.MalformedURLException;
16 import org.testng.annotations.AfterMethod;
17 import org.testng.annotations.Test;
19 public class ScoreMatrixFileTest
22 @AfterMethod(alwaysRun=true)
23 public void tearDownAfterTest()
25 ScoreModels.getInstance().reset();
29 * Test a successful parse of a (small) score matrix file
32 * @throws MalformedURLException
34 @Test(groups = "Functional")
35 public void testParseMatrix_ncbiMixedDelimiters()
36 throws MalformedURLException,
40 * some messy but valid input data, with comma, space
41 * or tab (or combinations) as score value delimiters
42 * this example includes 'guide' symbols on score rows
44 String data = "ScoreMatrix MyTest (example)\n" + "A\tT\tU\tt\tx\t-\n"
45 + "A,1.1,1.2,1.3,1.4, 1.5, 1.6\n"
46 + "T,2.1 2.2 2.3 2.4 2.5 2.6\n"
47 + "U\t3.1\t3.2\t3.3\t3.4\t3.5\t3.6\t\n"
48 + "t, 5.1,5.3,5.3,5.4,5.5, 5.6\n"
49 + "x\t6.1, 6.2 6.3 6.4 6.5 6.6\n"
50 + "-, \t7.1\t7.2 7.3, 7.4, 7.5\t,7.6\n";
51 FileParse fp = new FileParse(data, DataSourceType.PASTE);
52 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
53 ScoreMatrix sm = parser.parseMatrix();
56 assertEquals(sm.getName(), "MyTest (example)");
57 assertEquals(sm.getSize(), 6);
58 assertNull(sm.getDescription());
59 assertTrue(sm.isDNA());
60 assertFalse(sm.isProtein());
61 assertEquals(sm.getMinimumScore(), 1.1f);
62 assertEquals(sm.getPairwiseScore('A', 'A'), 1.1f);
63 assertEquals(sm.getPairwiseScore('A', 'T'), 1.2f);
64 assertEquals(sm.getPairwiseScore('a', 'T'), 1.2f); // A/a equivalent
65 assertEquals(sm.getPairwiseScore('A', 't'), 1.4f); // T/t not equivalent
66 assertEquals(sm.getPairwiseScore('a', 't'), 1.4f);
67 assertEquals(sm.getPairwiseScore('U', 'x'), 3.5f);
68 assertEquals(sm.getPairwiseScore('u', 'x'), 3.5f);
69 // X (upper) and '.' unmapped - get minimum score
70 assertEquals(sm.getPairwiseScore('U', 'X'), 1.1f);
71 assertEquals(sm.getPairwiseScore('A', '.'), 1.1f);
72 assertEquals(sm.getPairwiseScore('-', '-'), 7.6f);
73 assertEquals(sm.getPairwiseScore('A', (char) 128), 0f); // out of range
76 @Test(groups = "Functional")
77 public void testParseMatrix_headerMissing()
81 data = "X Y\n1 2\n3 4\n";
84 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
86 fail("expected exception");
87 } catch (IOException e)
89 assertEquals(e.getMessage(),
90 "Format error: 'ScoreMatrix <name>' should be the first non-comment line");
94 @Test(groups = "Functional")
95 public void testParseMatrix_ncbiNotEnoughRows()
97 String data = "ScoreMatrix MyTest\nX Y Z\n1 2 3\n4 5 6\n";
100 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
102 fail("expected exception");
103 } catch (IOException e)
105 assertEquals(e.getMessage(),
106 "Expected 3 rows of score data in score matrix but only found 2");
110 @Test(groups = "Functional")
111 public void testParseMatrix_ncbiNotEnoughColumns()
113 String data = "ScoreMatrix MyTest\nX Y Z\n1 2 3\n4 5\n7 8 9\n";
116 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
118 fail("expected exception");
119 } catch (IOException e)
121 assertEquals(e.getMessage(),
122 "Expected 3 scores at line 4: '4 5' but found 2");
126 @Test(groups = "Functional")
127 public void testParseMatrix_ncbiTooManyColumns()
130 * with two too many columns:
132 String data = "ScoreMatrix MyTest\nX\tY\tZ\n1 2 3\n4 5 6 7\n8 9 10\n";
135 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
137 fail("expected exception");
138 } catch (IOException e)
140 assertEquals(e.getMessage(),
141 "Expected 3 scores at line 4: '4 5 6 7' but found 4");
145 * with guide character and one too many columns:
147 data = "ScoreMatrix MyTest\nX Y\nX 1 2\nY 3 4 5\n";
150 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
152 fail("expected exception");
153 } catch (IOException e)
155 assertEquals(e.getMessage(),
156 "Expected 2 scores at line 4: 'Y 3 4 5' but found 3");
160 * with no guide character and one too many columns
162 data = "ScoreMatrix MyTest\nX Y\n1 2\n3 4 5\n";
165 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
167 fail("expected exception");
168 } catch (IOException e)
170 assertEquals(e.getMessage(),
171 "Expected 2 scores at line 4: '3 4 5' but found 3");
175 @Test(groups = "Functional")
176 public void testParseMatrix_ncbiTooManyRows()
178 String data = "ScoreMatrix MyTest\n\tX\tY\tZ\n1 2 3\n4 5 6\n7 8 9\n10 11 12\n";
181 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
183 fail("expected exception");
184 } catch (IOException e)
186 assertEquals(e.getMessage(),
187 "Unexpected extra input line in score model file: '10 11 12'");
191 @Test(groups = "Functional")
192 public void testParseMatrix_ncbiBadDelimiter()
194 String data = "ScoreMatrix MyTest\n X Y Z\n1|2|3\n4|5|6\n";
197 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
199 fail("expected exception");
200 } catch (IOException e)
202 assertEquals(e.getMessage(),
203 "Invalid score value '1|2|3' at line 3 column 0");
207 @Test(groups = "Functional")
208 public void testParseMatrix_ncbiBadFloat()
210 String data = "ScoreMatrix MyTest\n\tX\tY\tZ\n1 2 3\n4 five 6\n7 8 9\n";
213 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
215 fail("expected exception");
216 } catch (IOException e)
218 assertEquals(e.getMessage(),
219 "Invalid score value 'five' at line 4 column 1");
223 @Test(groups = "Functional")
224 public void testParseMatrix_ncbiBadGuideCharacter()
226 String data = "ScoreMatrix MyTest\n\tX Y\nX 1 2\ny 3 4\n";
229 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
231 fail("expected exception");
232 } catch (IOException e)
234 assertEquals(e.getMessage(),
235 "Error parsing score matrix at line 4, expected 'Y' but found 'y'");
238 data = "ScoreMatrix MyTest\n\tX Y\nXX 1 2\nY 3 4\n";
241 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
243 fail("expected exception");
244 } catch (IOException e)
246 assertEquals(e.getMessage(),
247 "Error parsing score matrix at line 3, expected 'X' but found 'XX'");
251 @Test(groups = "Functional")
252 public void testParseMatrix_ncbiNameMissing()
255 * Name missing on ScoreMatrix header line
257 String data = "ScoreMatrix\nX Y\n1 2\n3 4\n";
260 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
262 fail("expected exception");
263 } catch (IOException e)
267 "Format error: expected 'ScoreMatrix <name>', found 'ScoreMatrix' at line 1");
272 * Test a successful parse of a (small) score matrix file
274 * @throws IOException
275 * @throws MalformedURLException
277 @Test(groups = "Functional")
278 public void testParseMatrix_ncbiFormat() throws MalformedURLException,
281 // input including comment and blank lines
282 String data = "ScoreMatrix MyTest\n#comment\n\n" + "\tA\tB\tC\n"
283 + "A\t1.0\t2.0\t3.0\n" + "B\t4.0\t5.0\t6.0\n"
284 + "C\t7.0\t8.0\t9.0\n";
285 FileParse fp = new FileParse(data, DataSourceType.PASTE);
286 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
287 ScoreMatrix sm = parser.parseMatrix();
290 assertEquals(sm.getName(), "MyTest");
291 assertEquals(parser.getMatrixName(), "MyTest");
292 assertEquals(sm.getPairwiseScore('A', 'A'), 1.0f);
293 assertEquals(sm.getPairwiseScore('B', 'c'), 6.0f);
294 assertEquals(sm.getSize(), 3);
298 * Test a successful parse of a (small) score matrix file
300 * @throws IOException
301 * @throws MalformedURLException
303 @Test(groups = "Functional")
304 public void testParseMatrix_aaIndexBlosum80()
305 throws MalformedURLException,
308 FileParse fp = new FileParse("resources/scoreModel/blosum80.scm",
309 DataSourceType.FILE);
310 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
311 ScoreMatrix sm = parser.parseMatrix();
314 assertEquals(sm.getName(), "HENS920103");
315 assertEquals(sm.getDescription(),
316 "BLOSUM80 substitution matrix (Henikoff-Henikoff, 1992)");
317 assertFalse(sm.isDNA());
318 assertTrue(sm.isProtein());
319 assertEquals(20, sm.getSize());
321 assertEquals(sm.getPairwiseScore('A', 'A'), 7f);
322 assertEquals(sm.getPairwiseScore('A', 'R'), -3f);
323 assertEquals(sm.getPairwiseScore('r', 'a'), -3f); // A/a equivalent
327 * Test a successful parse of a (small) score matrix file
329 * @throws IOException
330 * @throws MalformedURLException
332 @Test(groups = "Functional")
333 public void testParseMatrix_aaindexFormat() throws MalformedURLException,
337 * aaindex format has scores for diagonal and below only
339 String data = "H MyTest\n" + "D My description\n" + "R PMID:1438297\n"
340 + "A Authors, names\n" + "T Journal title\n"
341 + "J Journal reference\n" + "* matrix in 1/3 Bit Units\n"
342 + "M rows = ABC, cols = ABC\n" + "A\t1.0\n"
344 + "C\t7.0\t8.0\t9.0\n";
345 FileParse fp = new FileParse(data, DataSourceType.PASTE);
346 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
347 ScoreMatrix sm = parser.parseMatrix();
350 assertEquals(sm.getSize(), 3);
351 assertEquals(sm.getName(), "MyTest");
352 assertEquals(sm.getDescription(), "My description");
353 assertEquals(sm.getPairwiseScore('A', 'A'), 1.0f);
354 assertEquals(sm.getPairwiseScore('A', 'B'), 4.0f);
355 assertEquals(sm.getPairwiseScore('A', 'C'), 7.0f);
356 assertEquals(sm.getPairwiseScore('B', 'A'), 4.0f);
357 assertEquals(sm.getPairwiseScore('B', 'B'), 5.0f);
358 assertEquals(sm.getPairwiseScore('B', 'C'), 8.0f);
359 assertEquals(sm.getPairwiseScore('C', 'C'), 9.0f);
360 assertEquals(sm.getPairwiseScore('C', 'B'), 8.0f);
361 assertEquals(sm.getPairwiseScore('C', 'A'), 7.0f);
364 @Test(groups = "Functional")
365 public void testParseMatrix_aaindex_mMissing()
366 throws MalformedURLException,
370 * aaindex format but M cols=, rows= is missing
372 String data = "H MyTest\n" + "A\t1.0\n"
374 + "C\t7.0\t8.0\t9.0\n";
375 FileParse fp = new FileParse(data, DataSourceType.PASTE);
376 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
379 parser.parseMatrix();
380 fail("Expected exception");
381 } catch (FileFormatException e)
383 assertEquals(e.getMessage(), "No alphabet specified in matrix file");
387 @Test(groups = "Functional")
388 public void testParseMatrix_aaindex_rowColMismatch()
389 throws MalformedURLException,
392 String data = "H MyTest\n" + "M rows=ABC, cols=ABD\n" + "A\t1.0\n"
394 + "C\t7.0\t8.0\t9.0\n";
395 FileParse fp = new FileParse(data, DataSourceType.PASTE);
396 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
399 parser.parseMatrix();
400 fail("Expected exception");
401 } catch (FileFormatException e)
405 "Unexpected aaIndex score matrix data at line 2: M rows=ABC, cols=ABD rows != cols");
409 @Test(groups = "Functional")
410 public void testParseMatrix_ncbiHeaderRepeated()
412 String data = "ScoreMatrix BLOSUM\nScoreMatrix PAM250\nX Y\n1 2\n3 4\n";
415 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
417 fail("expected exception");
418 } catch (IOException e)
420 assertEquals(e.getMessage(),
421 "Error: 'ScoreMatrix' repeated in file at line 2");
425 @Test(groups = "Functional")
426 public void testParseMatrix_aaindex_tooManyRows()
427 throws MalformedURLException,
430 String data = "H MyTest\n" + "M rows=ABC, cols=ABC\n" + "A\t1.0\n"
431 + "B\t4.0\t5.0\n" + "C\t7.0\t8.0\t9.0\n" + "C\t7.0\t8.0\t9.0\n";
432 FileParse fp = new FileParse(data, DataSourceType.PASTE);
433 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
436 parser.parseMatrix();
437 fail("Expected exception");
438 } catch (FileFormatException e)
440 assertEquals(e.getMessage(), "Too many data rows in matrix file");
444 @Test(groups = "Functional")
445 public void testParseMatrix_aaindex_extraDataLines()
446 throws MalformedURLException,
449 String data = "H MyTest\n" + "M rows=ABC, cols=ABC\n" + "A\t1.0\n"
450 + "B\t4.0\t5.0\n" + "C\t7.0\t8.0\t9.0\n" + "something extra\n";
451 FileParse fp = new FileParse(data, DataSourceType.PASTE);
452 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
455 parser.parseMatrix();
456 fail("Expected exception");
457 } catch (FileFormatException e)
459 assertEquals(e.getMessage(), "Too many data rows in matrix file");
463 @Test(groups = "Functional")
464 public void testParseMatrix_aaindex_tooFewColumns()
465 throws MalformedURLException,
468 String data = "H MyTest\n" + "M rows=ABC, cols=ABC\n" + "A\t1.0\n"
469 + "B\t4.0\t5.0\n" + "C\t7.0\t8.0\n";
470 FileParse fp = new FileParse(data, DataSourceType.PASTE);
471 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
474 parser.parseMatrix();
475 fail("Expected exception");
476 } catch (FileFormatException e)
480 "Expected 3 scores at line 5: 'C\t7.0\t8.0' but found 2");
485 * Test a successful parse and register of a score matrix file
487 * @throws IOException
488 * @throws MalformedURLException
490 @Test(groups = "Functional")
491 public void testParse_ncbiFormat() throws MalformedURLException,
494 assertNull(ScoreModels.getInstance().getScoreModel("MyNewTest", null));
496 String data = "ScoreMatrix MyNewTest\n" + "\tA\tB\tC\n"
497 + "A\t1.0\t2.0\t3.0\n" + "B\t4.0\t5.0\t6.0\n"
498 + "C\t7.0\t8.0\t9.0\n";
499 FileParse fp = new FileParse(data, DataSourceType.PASTE);
500 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
504 ScoreMatrix sm = (ScoreMatrix) ScoreModels.getInstance().getScoreModel(
507 assertEquals(sm.getName(), "MyNewTest");
508 assertEquals(parser.getMatrixName(), "MyNewTest");
509 assertEquals(sm.getPairwiseScore('A', 'A'), 1.0f);
510 assertEquals(sm.getPairwiseScore('B', 'c'), 6.0f);
511 assertEquals(sm.getSize(), 3);