3 import static org.testng.Assert.assertEquals;
4 import static org.testng.Assert.assertFalse;
5 import static org.testng.Assert.assertNotNull;
6 import static org.testng.Assert.assertNull;
7 import static org.testng.Assert.assertTrue;
8 import static org.testng.Assert.fail;
10 import jalview.analysis.scoremodels.ScoreMatrix;
11 import jalview.analysis.scoremodels.ScoreModels;
13 import java.io.IOException;
14 import java.net.MalformedURLException;
16 import org.testng.annotations.Test;
18 public class ScoreMatrixFileTest
22 * Test a successful parse of a (small) score matrix file
25 * @throws MalformedURLException
27 @Test(groups = "Functional")
28 public void testParseMatrix_ncbiMixedDelimiters()
29 throws MalformedURLException,
33 * some messy but valid input data, with comma, space
34 * or tab (or combinations) as score value delimiters
35 * this example includes 'guide' symbols on score rows
37 String data = "ScoreMatrix MyTest (example)\n" + "A\tT\tU\tt\tx\t-\n"
38 + "A,1.1,1.2,1.3,1.4, 1.5, 1.6\n"
39 + "T,2.1 2.2 2.3 2.4 2.5 2.6\n"
40 + "U\t3.1\t3.2\t3.3\t3.4\t3.5\t3.6\t\n"
41 + "t, 5.1,5.3,5.3,5.4,5.5, 5.6\n"
42 + "x\t6.1, 6.2 6.3 6.4 6.5 6.6\n"
43 + "-, \t7.1\t7.2 7.3, 7.4, 7.5\t,7.6\n";
44 FileParse fp = new FileParse(data, DataSourceType.PASTE);
45 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
46 ScoreMatrix sm = parser.parseMatrix();
49 assertEquals(sm.getName(), "MyTest (example)");
50 assertEquals(sm.getSize(), 6);
51 assertNull(sm.getDescription());
52 assertTrue(sm.isDNA());
53 assertFalse(sm.isProtein());
54 assertEquals(sm.getMinimumScore(), 1.1f);
55 assertEquals(sm.getPairwiseScore('A', 'A'), 1.1f);
56 assertEquals(sm.getPairwiseScore('A', 'T'), 1.2f);
57 assertEquals(sm.getPairwiseScore('a', 'T'), 1.2f); // A/a equivalent
58 assertEquals(sm.getPairwiseScore('A', 't'), 1.4f); // T/t not equivalent
59 assertEquals(sm.getPairwiseScore('a', 't'), 1.4f);
60 assertEquals(sm.getPairwiseScore('U', 'x'), 3.5f);
61 assertEquals(sm.getPairwiseScore('u', 'x'), 3.5f);
62 // X (upper) and '.' unmapped - get minimum score
63 assertEquals(sm.getPairwiseScore('U', 'X'), 1.1f);
64 assertEquals(sm.getPairwiseScore('A', '.'), 1.1f);
65 assertEquals(sm.getPairwiseScore('-', '-'), 7.6f);
66 assertEquals(sm.getPairwiseScore('A', (char) 128), 0f); // out of range
69 @Test(groups = "Functional")
70 public void testParseMatrix_headerMissing()
74 data = "X Y\n1 2\n3 4\n";
77 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
79 fail("expected exception");
80 } catch (IOException e)
82 assertEquals(e.getMessage(),
83 "Format error: 'ScoreMatrix <name>' should be the first non-comment line");
87 @Test(groups = "Functional")
88 public void testParseMatrix_ncbiNotEnoughRows()
90 String data = "ScoreMatrix MyTest\nX Y Z\n1 2 3\n4 5 6\n";
93 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
95 fail("expected exception");
96 } catch (IOException e)
98 assertEquals(e.getMessage(),
99 "Expected 3 rows of score data in score matrix but only found 2");
103 @Test(groups = "Functional")
104 public void testParseMatrix_ncbiNotEnoughColumns()
106 String data = "ScoreMatrix MyTest\nX Y Z\n1 2 3\n4 5\n7 8 9\n";
109 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
111 fail("expected exception");
112 } catch (IOException e)
114 assertEquals(e.getMessage(),
115 "Expected 3 scores at line 4: '4 5' but found 2");
119 @Test(groups = "Functional")
120 public void testParseMatrix_ncbiTooManyColumns()
123 * with two too many columns:
125 String data = "ScoreMatrix MyTest\nX\tY\tZ\n1 2 3\n4 5 6 7\n8 9 10\n";
128 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
130 fail("expected exception");
131 } catch (IOException e)
133 assertEquals(e.getMessage(),
134 "Expected 3 scores at line 4: '4 5 6 7' but found 4");
138 * with guide character and one too many columns:
140 data = "ScoreMatrix MyTest\nX Y\nX 1 2\nY 3 4 5\n";
143 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
145 fail("expected exception");
146 } catch (IOException e)
148 assertEquals(e.getMessage(),
149 "Expected 2 scores at line 4: 'Y 3 4 5' but found 3");
153 * with no guide character and one too many columns
155 data = "ScoreMatrix MyTest\nX Y\n1 2\n3 4 5\n";
158 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
160 fail("expected exception");
161 } catch (IOException e)
163 assertEquals(e.getMessage(),
164 "Expected 2 scores at line 4: '3 4 5' but found 3");
168 @Test(groups = "Functional")
169 public void testParseMatrix_ncbiTooManyRows()
171 String data = "ScoreMatrix MyTest\n\tX\tY\tZ\n1 2 3\n4 5 6\n7 8 9\n10 11 12\n";
174 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
176 fail("expected exception");
177 } catch (IOException e)
179 assertEquals(e.getMessage(),
180 "Unexpected extra input line in score model file: '10 11 12'");
184 @Test(groups = "Functional")
185 public void testParseMatrix_ncbiBadDelimiter()
187 String data = "ScoreMatrix MyTest\n X Y Z\n1|2|3\n4|5|6\n";
190 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
192 fail("expected exception");
193 } catch (IOException e)
195 assertEquals(e.getMessage(),
196 "Invalid score value '1|2|3' at line 3 column 0");
200 @Test(groups = "Functional")
201 public void testParseMatrix_ncbiBadFloat()
203 String data = "ScoreMatrix MyTest\n\tX\tY\tZ\n1 2 3\n4 five 6\n7 8 9\n";
206 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
208 fail("expected exception");
209 } catch (IOException e)
211 assertEquals(e.getMessage(),
212 "Invalid score value 'five' at line 4 column 1");
216 @Test(groups = "Functional")
217 public void testParseMatrix_ncbiBadGuideCharacter()
219 String data = "ScoreMatrix MyTest\n\tX Y\nX 1 2\ny 3 4\n";
222 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
224 fail("expected exception");
225 } catch (IOException e)
227 assertEquals(e.getMessage(),
228 "Error parsing score matrix at line 4, expected 'Y' but found 'y'");
231 data = "ScoreMatrix MyTest\n\tX Y\nXX 1 2\nY 3 4\n";
234 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
236 fail("expected exception");
237 } catch (IOException e)
239 assertEquals(e.getMessage(),
240 "Error parsing score matrix at line 3, expected 'X' but found 'XX'");
244 @Test(groups = "Functional")
245 public void testParseMatrix_ncbiNameMissing()
248 * Name missing on ScoreMatrix header line
250 String data = "ScoreMatrix\nX Y\n1 2\n3 4\n";
253 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
255 fail("expected exception");
256 } catch (IOException e)
260 "Format error: expected 'ScoreMatrix <name>', found 'ScoreMatrix' at line 1");
265 * Test a successful parse of a (small) score matrix file
267 * @throws IOException
268 * @throws MalformedURLException
270 @Test(groups = "Functional")
271 public void testParseMatrix_ncbiFormat() throws MalformedURLException,
274 // input including comment and blank lines
275 String data = "ScoreMatrix MyTest\n#comment\n\n" + "\tA\tB\tC\n"
276 + "A\t1.0\t2.0\t3.0\n" + "B\t4.0\t5.0\t6.0\n"
277 + "C\t7.0\t8.0\t9.0\n";
278 FileParse fp = new FileParse(data, DataSourceType.PASTE);
279 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
280 ScoreMatrix sm = parser.parseMatrix();
283 assertEquals(sm.getName(), "MyTest");
284 assertEquals(parser.getMatrixName(), "MyTest");
285 assertEquals(sm.getPairwiseScore('A', 'A'), 1.0f);
286 assertEquals(sm.getPairwiseScore('B', 'c'), 6.0f);
287 assertEquals(sm.getSize(), 3);
291 * Test a successful parse of a (small) score matrix file
293 * @throws IOException
294 * @throws MalformedURLException
296 @Test(groups = "Functional")
297 public void testParseMatrix_aaIndexBlosum80()
298 throws MalformedURLException,
301 FileParse fp = new FileParse("resources/scoreModel/blosum80.scm",
302 DataSourceType.FILE);
303 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
304 ScoreMatrix sm = parser.parseMatrix();
307 assertEquals(sm.getName(), "HENS920103");
308 assertEquals(sm.getDescription(),
309 "BLOSUM80 substitution matrix (Henikoff-Henikoff, 1992)");
310 assertFalse(sm.isDNA());
311 assertTrue(sm.isProtein());
312 assertEquals(20, sm.getSize());
313 assertEquals(sm.getGapIndex(), -1);
315 assertEquals(sm.getPairwiseScore('A', 'A'), 7f);
316 assertEquals(sm.getPairwiseScore('A', 'R'), -3f);
317 assertEquals(sm.getPairwiseScore('r', 'a'), -3f); // A/a equivalent
321 * Test a successful parse of a (small) score matrix file
323 * @throws IOException
324 * @throws MalformedURLException
326 @Test(groups = "Functional")
327 public void testParseMatrix_aaindexFormat() throws MalformedURLException,
331 * aaindex format has scores for diagonal and below only
333 String data = "H MyTest\n" + "D My description\n" + "R PMID:1438297\n"
334 + "A Authors, names\n" + "T Journal title\n"
335 + "J Journal reference\n" + "* matrix in 1/3 Bit Units\n"
336 + "M rows = ABC, cols = ABC\n" + "A\t1.0\n"
338 + "C\t7.0\t8.0\t9.0\n";
339 FileParse fp = new FileParse(data, DataSourceType.PASTE);
340 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
341 ScoreMatrix sm = parser.parseMatrix();
344 assertEquals(sm.getSize(), 3);
345 assertEquals(sm.getGapIndex(), -1);
346 assertEquals(sm.getName(), "MyTest");
347 assertEquals(sm.getDescription(), "My description");
348 assertEquals(sm.getPairwiseScore('A', 'A'), 1.0f);
349 assertEquals(sm.getPairwiseScore('A', 'B'), 4.0f);
350 assertEquals(sm.getPairwiseScore('A', 'C'), 7.0f);
351 assertEquals(sm.getPairwiseScore('B', 'A'), 4.0f);
352 assertEquals(sm.getPairwiseScore('B', 'B'), 5.0f);
353 assertEquals(sm.getPairwiseScore('B', 'C'), 8.0f);
354 assertEquals(sm.getPairwiseScore('C', 'C'), 9.0f);
355 assertEquals(sm.getPairwiseScore('C', 'B'), 8.0f);
356 assertEquals(sm.getPairwiseScore('C', 'A'), 7.0f);
359 @Test(groups = "Functional")
360 public void testParseMatrix_aaindex_mMissing()
361 throws MalformedURLException,
365 * aaindex format but M cols=, rows= is missing
367 String data = "H MyTest\n" + "A\t1.0\n"
369 + "C\t7.0\t8.0\t9.0\n";
370 FileParse fp = new FileParse(data, DataSourceType.PASTE);
371 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
374 parser.parseMatrix();
375 fail("Expected exception");
376 } catch (FileFormatException e)
378 assertEquals(e.getMessage(), "No alphabet specified in matrix file");
382 @Test(groups = "Functional")
383 public void testParseMatrix_aaindex_rowColMismatch()
384 throws MalformedURLException,
387 String data = "H MyTest\n" + "M rows=ABC, cols=ABD\n" + "A\t1.0\n"
389 + "C\t7.0\t8.0\t9.0\n";
390 FileParse fp = new FileParse(data, DataSourceType.PASTE);
391 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
394 parser.parseMatrix();
395 fail("Expected exception");
396 } catch (FileFormatException e)
400 "Unexpected aaIndex score matrix data at line 2: M rows=ABC, cols=ABD rows != cols");
404 @Test(groups = "Functional")
405 public void testParseMatrix_ncbiHeaderRepeated()
407 String data = "ScoreMatrix BLOSUM\nScoreMatrix PAM250\nX Y\n1 2\n3 4\n";
410 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
412 fail("expected exception");
413 } catch (IOException e)
415 assertEquals(e.getMessage(),
416 "Error: 'ScoreMatrix' repeated in file at line 2");
420 @Test(groups = "Functional")
421 public void testParseMatrix_aaindex_tooManyRows()
422 throws MalformedURLException,
425 String data = "H MyTest\n" + "M rows=ABC, cols=ABC\n" + "A\t1.0\n"
426 + "B\t4.0\t5.0\n" + "C\t7.0\t8.0\t9.0\n" + "C\t7.0\t8.0\t9.0\n";
427 FileParse fp = new FileParse(data, DataSourceType.PASTE);
428 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
431 parser.parseMatrix();
432 fail("Expected exception");
433 } catch (FileFormatException e)
435 assertEquals(e.getMessage(), "Too many data rows in matrix file");
439 @Test(groups = "Functional")
440 public void testParseMatrix_aaindex_extraDataLines()
441 throws MalformedURLException,
444 String data = "H MyTest\n" + "M rows=ABC, cols=ABC\n" + "A\t1.0\n"
445 + "B\t4.0\t5.0\n" + "C\t7.0\t8.0\t9.0\n" + "something extra\n";
446 FileParse fp = new FileParse(data, DataSourceType.PASTE);
447 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
450 parser.parseMatrix();
451 fail("Expected exception");
452 } catch (FileFormatException e)
454 assertEquals(e.getMessage(), "Too many data rows in matrix file");
458 @Test(groups = "Functional")
459 public void testParseMatrix_aaindex_tooFewColumns()
460 throws MalformedURLException,
463 String data = "H MyTest\n" + "M rows=ABC, cols=ABC\n" + "A\t1.0\n"
464 + "B\t4.0\t5.0\n" + "C\t7.0\t8.0\n";
465 FileParse fp = new FileParse(data, DataSourceType.PASTE);
466 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
469 parser.parseMatrix();
470 fail("Expected exception");
471 } catch (FileFormatException e)
475 "Expected 3 scores at line 5: 'C\t7.0\t8.0' but found 2");
480 * Test a successful parse and register of a score matrix file
482 * @throws IOException
483 * @throws MalformedURLException
485 @Test(groups = "Functional")
486 public void testParse_ncbiFormat() throws MalformedURLException,
489 assertNull(ScoreModels.getInstance().forName("MyNewTest"));
491 String data = "ScoreMatrix MyNewTest\n" + "\tA\tB\tC\n"
492 + "A\t1.0\t2.0\t3.0\n" + "B\t4.0\t5.0\t6.0\n"
493 + "C\t7.0\t8.0\t9.0\n";
494 FileParse fp = new FileParse(data, DataSourceType.PASTE);
495 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
499 ScoreMatrix sm = (ScoreMatrix) ScoreModels.getInstance().forName(
502 assertEquals(sm.getName(), "MyNewTest");
503 assertEquals(parser.getMatrixName(), "MyNewTest");
504 assertEquals(sm.getPairwiseScore('A', 'A'), 1.0f);
505 assertEquals(sm.getPairwiseScore('B', 'c'), 6.0f);
506 assertEquals(sm.getSize(), 3);