3 import static org.testng.Assert.assertEquals;
4 import static org.testng.Assert.assertFalse;
5 import static org.testng.Assert.assertNotNull;
6 import static org.testng.Assert.assertNull;
7 import static org.testng.Assert.assertTrue;
8 import static org.testng.Assert.fail;
10 import jalview.analysis.scoremodels.ScoreMatrix;
11 import jalview.analysis.scoremodels.ScoreModels;
13 import java.io.IOException;
14 import java.net.MalformedURLException;
16 import org.testng.annotations.AfterMethod;
17 import org.testng.annotations.Test;
19 public class ScoreMatrixFileTest
22 @AfterMethod(alwaysRun = true)
23 public void tearDownAfterTest()
25 ScoreModels.getInstance().reset();
29 * Test a successful parse of a (small) score matrix file
32 * @throws MalformedURLException
34 @Test(groups = "Functional")
35 public void testParseMatrix_ncbiMixedDelimiters()
36 throws MalformedURLException, IOException
39 * some messy but valid input data, with comma, space
40 * or tab (or combinations) as score value delimiters
41 * this example includes 'guide' symbols on score rows
43 String data = "ScoreMatrix MyTest (example)\n" + "A\tT\tU\tt\tx\t-\n"
44 + "A,1.1,1.2,1.3,1.4, 1.5, 1.6\n"
45 + "T,2.1 2.2 2.3 2.4 2.5 2.6\n"
46 + "U\t3.1\t3.2\t3.3\t3.4\t3.5\t3.6\t\n"
47 + "t, 5.1,5.3,5.3,5.4,5.5, 5.6\n"
48 + "x\t6.1, 6.2 6.3 6.4 6.5 6.6\n"
49 + "-, \t7.1\t7.2 7.3, 7.4, 7.5\t,7.6\n";
50 FileParse fp = new FileParse(data, DataSourceType.PASTE);
51 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
52 ScoreMatrix sm = parser.parseMatrix();
55 assertEquals(sm.getName(), "MyTest (example)");
56 assertEquals(sm.getSize(), 6);
57 assertNull(sm.getDescription());
58 assertTrue(sm.isDNA());
59 assertFalse(sm.isProtein());
60 assertEquals(sm.getMinimumScore(), 1.1f);
61 assertEquals(sm.getPairwiseScore('A', 'A'), 1.1f);
62 assertEquals(sm.getPairwiseScore('A', 'T'), 1.2f);
63 assertEquals(sm.getPairwiseScore('a', 'T'), 1.2f); // A/a equivalent
64 assertEquals(sm.getPairwiseScore('A', 't'), 1.4f); // T/t not equivalent
65 assertEquals(sm.getPairwiseScore('a', 't'), 1.4f);
66 assertEquals(sm.getPairwiseScore('U', 'x'), 3.5f);
67 assertEquals(sm.getPairwiseScore('u', 'x'), 3.5f);
68 // X (upper) and '.' unmapped - get minimum score
69 assertEquals(sm.getPairwiseScore('U', 'X'), 1.1f);
70 assertEquals(sm.getPairwiseScore('A', '.'), 1.1f);
71 assertEquals(sm.getPairwiseScore('-', '-'), 7.6f);
72 assertEquals(sm.getPairwiseScore('A', (char) 128), 0f); // out of range
75 @Test(groups = "Functional")
76 public void testParseMatrix_headerMissing()
80 data = "X Y\n1 2\n3 4\n";
83 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
85 fail("expected exception");
86 } catch (IOException e)
88 assertEquals(e.getMessage(),
89 "Format error: 'ScoreMatrix <name>' should be the first non-comment line");
93 @Test(groups = "Functional")
94 public void testParseMatrix_ncbiNotEnoughRows()
96 String data = "ScoreMatrix MyTest\nX Y Z\n1 2 3\n4 5 6\n";
99 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
101 fail("expected exception");
102 } catch (IOException e)
104 assertEquals(e.getMessage(),
105 "Expected 3 rows of score data in score matrix but only found 2");
109 @Test(groups = "Functional")
110 public void testParseMatrix_ncbiNotEnoughColumns()
112 String data = "ScoreMatrix MyTest\nX Y Z\n1 2 3\n4 5\n7 8 9\n";
115 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
117 fail("expected exception");
118 } catch (IOException e)
120 assertEquals(e.getMessage(),
121 "Expected 3 scores at line 4: '4 5' but found 2");
125 @Test(groups = "Functional")
126 public void testParseMatrix_ncbiTooManyColumns()
129 * with two too many columns:
131 String data = "ScoreMatrix MyTest\nX\tY\tZ\n1 2 3\n4 5 6 7\n8 9 10\n";
134 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
136 fail("expected exception");
137 } catch (IOException e)
139 assertEquals(e.getMessage(),
140 "Expected 3 scores at line 4: '4 5 6 7' but found 4");
144 * with guide character and one too many columns:
146 data = "ScoreMatrix MyTest\nX Y\nX 1 2\nY 3 4 5\n";
149 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
151 fail("expected exception");
152 } catch (IOException e)
154 assertEquals(e.getMessage(),
155 "Expected 2 scores at line 4: 'Y 3 4 5' but found 3");
159 * with no guide character and one too many columns
161 data = "ScoreMatrix MyTest\nX Y\n1 2\n3 4 5\n";
164 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
166 fail("expected exception");
167 } catch (IOException e)
169 assertEquals(e.getMessage(),
170 "Expected 2 scores at line 4: '3 4 5' but found 3");
174 @Test(groups = "Functional")
175 public void testParseMatrix_ncbiTooManyRows()
177 String data = "ScoreMatrix MyTest\n\tX\tY\tZ\n1 2 3\n4 5 6\n7 8 9\n10 11 12\n";
180 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
182 fail("expected exception");
183 } catch (IOException e)
185 assertEquals(e.getMessage(),
186 "Unexpected extra input line in score model file: '10 11 12'");
190 @Test(groups = "Functional")
191 public void testParseMatrix_ncbiBadDelimiter()
193 String data = "ScoreMatrix MyTest\n X Y Z\n1|2|3\n4|5|6\n";
196 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
198 fail("expected exception");
199 } catch (IOException e)
201 assertEquals(e.getMessage(),
202 "Invalid score value '1|2|3' at line 3 column 0");
206 @Test(groups = "Functional")
207 public void testParseMatrix_ncbiBadFloat()
209 String data = "ScoreMatrix MyTest\n\tX\tY\tZ\n1 2 3\n4 five 6\n7 8 9\n";
212 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
214 fail("expected exception");
215 } catch (IOException e)
217 assertEquals(e.getMessage(),
218 "Invalid score value 'five' at line 4 column 1");
222 @Test(groups = "Functional")
223 public void testParseMatrix_ncbiBadGuideCharacter()
225 String data = "ScoreMatrix MyTest\n\tX Y\nX 1 2\ny 3 4\n";
228 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
230 fail("expected exception");
231 } catch (IOException e)
233 assertEquals(e.getMessage(),
234 "Error parsing score matrix at line 4, expected 'Y' but found 'y'");
237 data = "ScoreMatrix MyTest\n\tX Y\nXX 1 2\nY 3 4\n";
240 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
242 fail("expected exception");
243 } catch (IOException e)
245 assertEquals(e.getMessage(),
246 "Error parsing score matrix at line 3, expected 'X' but found 'XX'");
250 @Test(groups = "Functional")
251 public void testParseMatrix_ncbiNameMissing()
254 * Name missing on ScoreMatrix header line
256 String data = "ScoreMatrix\nX Y\n1 2\n3 4\n";
259 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
261 fail("expected exception");
262 } catch (IOException e)
264 assertEquals(e.getMessage(),
265 "Format error: expected 'ScoreMatrix <name>', found 'ScoreMatrix' at line 1");
270 * Test a successful parse of a (small) score matrix file
272 * @throws IOException
273 * @throws MalformedURLException
275 @Test(groups = "Functional")
276 public void testParseMatrix_ncbiFormat()
277 throws MalformedURLException, IOException
279 // input including comment and blank lines
280 String data = "ScoreMatrix MyTest\n#comment\n\n" + "\tA\tB\tC\n"
281 + "A\t1.0\t2.0\t3.0\n" + "B\t4.0\t5.0\t6.0\n"
282 + "C\t7.0\t8.0\t9.0\n";
283 FileParse fp = new FileParse(data, DataSourceType.PASTE);
284 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
285 ScoreMatrix sm = parser.parseMatrix();
288 assertEquals(sm.getName(), "MyTest");
289 assertEquals(parser.getMatrixName(), "MyTest");
290 assertEquals(sm.getPairwiseScore('A', 'A'), 1.0f);
291 assertEquals(sm.getPairwiseScore('B', 'c'), 6.0f);
292 assertEquals(sm.getSize(), 3);
296 * Test a successful parse of a (small) score matrix file
298 * @throws IOException
299 * @throws MalformedURLException
301 @Test(groups = "Functional")
302 public void testParseMatrix_aaIndexBlosum80()
303 throws MalformedURLException, IOException
305 FileParse fp = new FileParse("resources/scoreModel/blosum80.scm",
306 DataSourceType.FILE);
307 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
308 ScoreMatrix sm = parser.parseMatrix();
311 assertEquals(sm.getName(), "HENS920103");
312 assertEquals(sm.getDescription(),
313 "BLOSUM80 substitution matrix (Henikoff-Henikoff, 1992)");
314 assertFalse(sm.isDNA());
315 assertTrue(sm.isProtein());
316 assertEquals(20, sm.getSize());
318 assertEquals(sm.getPairwiseScore('A', 'A'), 7f);
319 assertEquals(sm.getPairwiseScore('A', 'R'), -3f);
320 assertEquals(sm.getPairwiseScore('r', 'a'), -3f); // A/a equivalent
324 * Test a successful parse of a (small) score matrix file
326 * @throws IOException
327 * @throws MalformedURLException
329 @Test(groups = "Functional")
330 public void testParseMatrix_aaindexFormat()
331 throws MalformedURLException, IOException
334 * aaindex format has scores for diagonal and below only
336 String data = "H MyTest\n" + "D My description\n" + "R PMID:1438297\n"
337 + "A Authors, names\n" + "T Journal title\n"
338 + "J Journal reference\n" + "* matrix in 1/3 Bit Units\n"
339 + "M rows = ABC, cols = ABC\n" + "A\t1.0\n" + "B\t4.0\t5.0\n"
340 + "C\t7.0\t8.0\t9.0\n";
341 FileParse fp = new FileParse(data, DataSourceType.PASTE);
342 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
343 ScoreMatrix sm = parser.parseMatrix();
346 assertEquals(sm.getSize(), 3);
347 assertEquals(sm.getName(), "MyTest");
348 assertEquals(sm.getDescription(), "My description");
349 assertEquals(sm.getPairwiseScore('A', 'A'), 1.0f);
350 assertEquals(sm.getPairwiseScore('A', 'B'), 4.0f);
351 assertEquals(sm.getPairwiseScore('A', 'C'), 7.0f);
352 assertEquals(sm.getPairwiseScore('B', 'A'), 4.0f);
353 assertEquals(sm.getPairwiseScore('B', 'B'), 5.0f);
354 assertEquals(sm.getPairwiseScore('B', 'C'), 8.0f);
355 assertEquals(sm.getPairwiseScore('C', 'C'), 9.0f);
356 assertEquals(sm.getPairwiseScore('C', 'B'), 8.0f);
357 assertEquals(sm.getPairwiseScore('C', 'A'), 7.0f);
360 @Test(groups = "Functional")
361 public void testParseMatrix_aaindex_mMissing()
362 throws MalformedURLException, IOException
365 * aaindex format but M cols=, rows= is missing
367 String data = "H MyTest\n" + "A\t1.0\n" + "B\t4.0\t5.0\n"
368 + "C\t7.0\t8.0\t9.0\n";
369 FileParse fp = new FileParse(data, DataSourceType.PASTE);
370 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
373 parser.parseMatrix();
374 fail("Expected exception");
375 } catch (FileFormatException e)
377 assertEquals(e.getMessage(), "No alphabet specified in matrix file");
381 @Test(groups = "Functional")
382 public void testParseMatrix_aaindex_rowColMismatch()
383 throws MalformedURLException, IOException
385 String data = "H MyTest\n" + "M rows=ABC, cols=ABD\n" + "A\t1.0\n"
386 + "B\t4.0\t5.0\n" + "C\t7.0\t8.0\t9.0\n";
387 FileParse fp = new FileParse(data, DataSourceType.PASTE);
388 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
391 parser.parseMatrix();
392 fail("Expected exception");
393 } catch (FileFormatException e)
395 assertEquals(e.getMessage(),
396 "Unexpected aaIndex score matrix data at line 2: M rows=ABC, cols=ABD rows != cols");
400 @Test(groups = "Functional")
401 public void testParseMatrix_ncbiHeaderRepeated()
403 String data = "ScoreMatrix BLOSUM\nScoreMatrix PAM250\nX Y\n1 2\n3 4\n";
406 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
408 fail("expected exception");
409 } catch (IOException e)
411 assertEquals(e.getMessage(),
412 "Error: 'ScoreMatrix' repeated in file at line 2");
416 @Test(groups = "Functional")
417 public void testParseMatrix_aaindex_tooManyRows()
418 throws MalformedURLException, IOException
420 String data = "H MyTest\n" + "M rows=ABC, cols=ABC\n" + "A\t1.0\n"
421 + "B\t4.0\t5.0\n" + "C\t7.0\t8.0\t9.0\n" + "C\t7.0\t8.0\t9.0\n";
422 FileParse fp = new FileParse(data, DataSourceType.PASTE);
423 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
426 parser.parseMatrix();
427 fail("Expected exception");
428 } catch (FileFormatException e)
430 assertEquals(e.getMessage(), "Too many data rows in matrix file");
434 @Test(groups = "Functional")
435 public void testParseMatrix_aaindex_extraDataLines()
436 throws MalformedURLException, IOException
438 String data = "H MyTest\n" + "M rows=ABC, cols=ABC\n" + "A\t1.0\n"
439 + "B\t4.0\t5.0\n" + "C\t7.0\t8.0\t9.0\n" + "something extra\n";
440 FileParse fp = new FileParse(data, DataSourceType.PASTE);
441 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
444 parser.parseMatrix();
445 fail("Expected exception");
446 } catch (FileFormatException e)
448 assertEquals(e.getMessage(), "Too many data rows in matrix file");
452 @Test(groups = "Functional")
453 public void testParseMatrix_aaindex_tooFewColumns()
454 throws MalformedURLException, IOException
456 String data = "H MyTest\n" + "M rows=ABC, cols=ABC\n" + "A\t1.0\n"
457 + "B\t4.0\t5.0\n" + "C\t7.0\t8.0\n";
458 FileParse fp = new FileParse(data, DataSourceType.PASTE);
459 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
462 parser.parseMatrix();
463 fail("Expected exception");
464 } catch (FileFormatException e)
466 assertEquals(e.getMessage(),
467 "Expected 3 scores at line 5: 'C\t7.0\t8.0' but found 2");
472 * Test a successful parse and register of a score matrix file
474 * @throws IOException
475 * @throws MalformedURLException
477 @Test(groups = "Functional")
478 public void testParse_ncbiFormat()
479 throws MalformedURLException, IOException
481 assertNull(ScoreModels.getInstance().getScoreModel("MyNewTest", null));
483 String data = "ScoreMatrix MyNewTest\n" + "\tA\tB\tC\n"
484 + "A\t1.0\t2.0\t3.0\n" + "B\t4.0\t5.0\t6.0\n"
485 + "C\t7.0\t8.0\t9.0\n";
486 FileParse fp = new FileParse(data, DataSourceType.PASTE);
487 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
491 ScoreMatrix sm = (ScoreMatrix) ScoreModels.getInstance()
492 .getScoreModel("MyNewTest", null);
494 assertEquals(sm.getName(), "MyNewTest");
495 assertEquals(parser.getMatrixName(), "MyNewTest");
496 assertEquals(sm.getPairwiseScore('A', 'A'), 1.0f);
497 assertEquals(sm.getPairwiseScore('B', 'c'), 6.0f);
498 assertEquals(sm.getSize(), 3);