2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
23 import static org.testng.Assert.assertEquals;
24 import static org.testng.Assert.assertFalse;
25 import static org.testng.Assert.assertNotNull;
26 import static org.testng.Assert.assertNull;
27 import static org.testng.Assert.assertTrue;
28 import static org.testng.Assert.fail;
30 import jalview.analysis.scoremodels.ScoreMatrix;
31 import jalview.analysis.scoremodels.ScoreModels;
33 import java.io.IOException;
34 import java.net.MalformedURLException;
36 import org.testng.annotations.AfterMethod;
37 import org.testng.annotations.Test;
39 public class ScoreMatrixFileTest
42 @AfterMethod(alwaysRun = true)
43 public void tearDownAfterTest()
45 ScoreModels.getInstance().reset();
49 * Test a successful parse of a (small) score matrix file
52 * @throws MalformedURLException
54 @Test(groups = "Functional")
55 public void testParseMatrix_ncbiMixedDelimiters()
56 throws MalformedURLException, IOException
59 * some messy but valid input data, with comma, space
60 * or tab (or combinations) as score value delimiters
61 * this example includes 'guide' symbols on score rows
63 String data = "ScoreMatrix MyTest (example)\n" + "A\tT\tU\tt\tx\t-\n"
64 + "A,1.1,1.2,1.3,1.4, 1.5, 1.6\n"
65 + "T,2.1 2.2 2.3 2.4 2.5 2.6\n"
66 + "U\t3.1\t3.2\t3.3\t3.4\t3.5\t3.6\t\n"
67 + "t, 5.1,5.3,5.3,5.4,5.5, 5.6\n"
68 + "x\t6.1, 6.2 6.3 6.4 6.5 6.6\n"
69 + "-, \t7.1\t7.2 7.3, 7.4, 7.5\t,7.6\n";
70 FileParse fp = new FileParse(data, DataSourceType.PASTE);
71 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
72 ScoreMatrix sm = parser.parseMatrix();
75 assertEquals(sm.getName(), "MyTest (example)");
76 assertEquals(sm.getSize(), 6);
77 assertNull(sm.getDescription());
78 assertTrue(sm.isDNA());
79 assertFalse(sm.isProtein());
80 assertEquals(sm.getMinimumScore(), 1.1f);
81 assertEquals(sm.getPairwiseScore('A', 'A'), 1.1f);
82 assertEquals(sm.getPairwiseScore('A', 'T'), 1.2f);
83 assertEquals(sm.getPairwiseScore('a', 'T'), 1.2f); // A/a equivalent
84 assertEquals(sm.getPairwiseScore('A', 't'), 1.4f); // T/t not equivalent
85 assertEquals(sm.getPairwiseScore('a', 't'), 1.4f);
86 assertEquals(sm.getPairwiseScore('U', 'x'), 3.5f);
87 assertEquals(sm.getPairwiseScore('u', 'x'), 3.5f);
88 // X (upper) and '.' unmapped - get minimum score
89 assertEquals(sm.getPairwiseScore('U', 'X'), 1.1f);
90 assertEquals(sm.getPairwiseScore('A', '.'), 1.1f);
91 assertEquals(sm.getPairwiseScore('-', '-'), 7.6f);
92 assertEquals(sm.getPairwiseScore('A', (char) 128), 0f); // out of range
95 @Test(groups = "Functional")
96 public void testParseMatrix_headerMissing()
100 data = "X Y\n1 2\n3 4\n";
103 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
105 fail("expected exception");
106 } catch (IOException e)
108 assertEquals(e.getMessage(),
109 "Format error: 'ScoreMatrix <name>' should be the first non-comment line");
113 @Test(groups = "Functional")
114 public void testParseMatrix_ncbiNotEnoughRows()
116 String data = "ScoreMatrix MyTest\nX Y Z\n1 2 3\n4 5 6\n";
119 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
121 fail("expected exception");
122 } catch (IOException e)
124 assertEquals(e.getMessage(),
125 "Expected 3 rows of score data in score matrix but only found 2");
129 @Test(groups = "Functional")
130 public void testParseMatrix_ncbiNotEnoughColumns()
132 String data = "ScoreMatrix MyTest\nX Y Z\n1 2 3\n4 5\n7 8 9\n";
135 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
137 fail("expected exception");
138 } catch (IOException e)
140 assertEquals(e.getMessage(),
141 "Expected 3 scores at line 4: '4 5' but found 2");
145 @Test(groups = "Functional")
146 public void testParseMatrix_ncbiTooManyColumns()
149 * with two too many columns:
151 String data = "ScoreMatrix MyTest\nX\tY\tZ\n1 2 3\n4 5 6 7\n8 9 10\n";
154 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
156 fail("expected exception");
157 } catch (IOException e)
159 assertEquals(e.getMessage(),
160 "Expected 3 scores at line 4: '4 5 6 7' but found 4");
164 * with guide character and one too many columns:
166 data = "ScoreMatrix MyTest\nX Y\nX 1 2\nY 3 4 5\n";
169 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
171 fail("expected exception");
172 } catch (IOException e)
174 assertEquals(e.getMessage(),
175 "Expected 2 scores at line 4: 'Y 3 4 5' but found 3");
179 * with no guide character and one too many columns
181 data = "ScoreMatrix MyTest\nX Y\n1 2\n3 4 5\n";
184 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
186 fail("expected exception");
187 } catch (IOException e)
189 assertEquals(e.getMessage(),
190 "Expected 2 scores at line 4: '3 4 5' but found 3");
194 @Test(groups = "Functional")
195 public void testParseMatrix_ncbiTooManyRows()
197 String data = "ScoreMatrix MyTest\n\tX\tY\tZ\n1 2 3\n4 5 6\n7 8 9\n10 11 12\n";
200 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
202 fail("expected exception");
203 } catch (IOException e)
205 assertEquals(e.getMessage(),
206 "Unexpected extra input line in score model file: '10 11 12'");
210 @Test(groups = "Functional")
211 public void testParseMatrix_ncbiBadDelimiter()
213 String data = "ScoreMatrix MyTest\n X Y Z\n1|2|3\n4|5|6\n";
216 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
218 fail("expected exception");
219 } catch (IOException e)
221 assertEquals(e.getMessage(),
222 "Invalid score value '1|2|3' at line 3 column 0");
226 @Test(groups = "Functional")
227 public void testParseMatrix_ncbiBadFloat()
229 String data = "ScoreMatrix MyTest\n\tX\tY\tZ\n1 2 3\n4 five 6\n7 8 9\n";
232 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
234 fail("expected exception");
235 } catch (IOException e)
237 assertEquals(e.getMessage(),
238 "Invalid score value 'five' at line 4 column 1");
242 @Test(groups = "Functional")
243 public void testParseMatrix_ncbiBadGuideCharacter()
245 String data = "ScoreMatrix MyTest\n\tX Y\nX 1 2\ny 3 4\n";
248 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
250 fail("expected exception");
251 } catch (IOException e)
253 assertEquals(e.getMessage(),
254 "Error parsing score matrix at line 4, expected 'Y' but found 'y'");
257 data = "ScoreMatrix MyTest\n\tX Y\nXX 1 2\nY 3 4\n";
260 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
262 fail("expected exception");
263 } catch (IOException e)
265 assertEquals(e.getMessage(),
266 "Error parsing score matrix at line 3, expected 'X' but found 'XX'");
270 @Test(groups = "Functional")
271 public void testParseMatrix_ncbiNameMissing()
274 * Name missing on ScoreMatrix header line
276 String data = "ScoreMatrix\nX Y\n1 2\n3 4\n";
279 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
281 fail("expected exception");
282 } catch (IOException e)
284 assertEquals(e.getMessage(),
285 "Format error: expected 'ScoreMatrix <name>', found 'ScoreMatrix' at line 1");
290 * Test a successful parse of a (small) score matrix file
292 * @throws IOException
293 * @throws MalformedURLException
295 @Test(groups = "Functional")
296 public void testParseMatrix_ncbiFormat()
297 throws MalformedURLException, IOException
299 // input including comment and blank lines
300 String data = "ScoreMatrix MyTest\n#comment\n\n" + "\tA\tB\tC\n"
301 + "A\t1.0\t2.0\t3.0\n" + "B\t4.0\t5.0\t6.0\n"
302 + "C\t7.0\t8.0\t9.0\n";
303 FileParse fp = new FileParse(data, DataSourceType.PASTE);
304 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
305 ScoreMatrix sm = parser.parseMatrix();
308 assertEquals(sm.getName(), "MyTest");
309 assertEquals(parser.getMatrixName(), "MyTest");
310 assertEquals(sm.getPairwiseScore('A', 'A'), 1.0f);
311 assertEquals(sm.getPairwiseScore('B', 'c'), 6.0f);
312 assertEquals(sm.getSize(), 3);
316 * Test a successful parse of a (small) score matrix file
318 * @throws IOException
319 * @throws MalformedURLException
321 @Test(groups = "Functional")
322 public void testParseMatrix_aaIndexBlosum80()
323 throws MalformedURLException, IOException
325 FileParse fp = new FileParse("resources/scoreModel/blosum80.scm",
326 DataSourceType.FILE);
327 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
328 ScoreMatrix sm = parser.parseMatrix();
331 assertEquals(sm.getName(), "HENS920103");
332 assertEquals(sm.getDescription(),
333 "BLOSUM80 substitution matrix (Henikoff-Henikoff, 1992)");
334 assertFalse(sm.isDNA());
335 assertTrue(sm.isProtein());
336 assertEquals(20, sm.getSize());
338 assertEquals(sm.getPairwiseScore('A', 'A'), 7f);
339 assertEquals(sm.getPairwiseScore('A', 'R'), -3f);
340 assertEquals(sm.getPairwiseScore('r', 'a'), -3f); // A/a equivalent
344 * Test a successful parse of a (small) score matrix file
346 * @throws IOException
347 * @throws MalformedURLException
349 @Test(groups = "Functional")
350 public void testParseMatrix_aaindexFormat()
351 throws MalformedURLException, IOException
354 * aaindex format has scores for diagonal and below only
356 String data = "H MyTest\n" + "D My description\n" + "R PMID:1438297\n"
357 + "A Authors, names\n" + "T Journal title\n"
358 + "J Journal reference\n" + "* matrix in 1/3 Bit Units\n"
359 + "M rows = ABC, cols = ABC\n" + "A\t1.0\n" + "B\t4.0\t5.0\n"
360 + "C\t7.0\t8.0\t9.0\n";
361 FileParse fp = new FileParse(data, DataSourceType.PASTE);
362 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
363 ScoreMatrix sm = parser.parseMatrix();
366 assertEquals(sm.getSize(), 3);
367 assertEquals(sm.getName(), "MyTest");
368 assertEquals(sm.getDescription(), "My description");
369 assertEquals(sm.getPairwiseScore('A', 'A'), 1.0f);
370 assertEquals(sm.getPairwiseScore('A', 'B'), 4.0f);
371 assertEquals(sm.getPairwiseScore('A', 'C'), 7.0f);
372 assertEquals(sm.getPairwiseScore('B', 'A'), 4.0f);
373 assertEquals(sm.getPairwiseScore('B', 'B'), 5.0f);
374 assertEquals(sm.getPairwiseScore('B', 'C'), 8.0f);
375 assertEquals(sm.getPairwiseScore('C', 'C'), 9.0f);
376 assertEquals(sm.getPairwiseScore('C', 'B'), 8.0f);
377 assertEquals(sm.getPairwiseScore('C', 'A'), 7.0f);
380 @Test(groups = "Functional")
381 public void testParseMatrix_aaindex_mMissing()
382 throws MalformedURLException, IOException
385 * aaindex format but M cols=, rows= is missing
387 String data = "H MyTest\n" + "A\t1.0\n" + "B\t4.0\t5.0\n"
388 + "C\t7.0\t8.0\t9.0\n";
389 FileParse fp = new FileParse(data, DataSourceType.PASTE);
390 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
393 parser.parseMatrix();
394 fail("Expected exception");
395 } catch (FileFormatException e)
397 assertEquals(e.getMessage(), "No alphabet specified in matrix file");
401 @Test(groups = "Functional")
402 public void testParseMatrix_aaindex_rowColMismatch()
403 throws MalformedURLException, IOException
405 String data = "H MyTest\n" + "M rows=ABC, cols=ABD\n" + "A\t1.0\n"
406 + "B\t4.0\t5.0\n" + "C\t7.0\t8.0\t9.0\n";
407 FileParse fp = new FileParse(data, DataSourceType.PASTE);
408 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
411 parser.parseMatrix();
412 fail("Expected exception");
413 } catch (FileFormatException e)
415 assertEquals(e.getMessage(),
416 "Unexpected aaIndex score matrix data at line 2: M rows=ABC, cols=ABD rows != cols");
420 @Test(groups = "Functional")
421 public void testParseMatrix_ncbiHeaderRepeated()
423 String data = "ScoreMatrix BLOSUM\nScoreMatrix PAM250\nX Y\n1 2\n3 4\n";
426 new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
428 fail("expected exception");
429 } catch (IOException e)
431 assertEquals(e.getMessage(),
432 "Error: 'ScoreMatrix' repeated in file at line 2");
436 @Test(groups = "Functional")
437 public void testParseMatrix_aaindex_tooManyRows()
438 throws MalformedURLException, IOException
440 String data = "H MyTest\n" + "M rows=ABC, cols=ABC\n" + "A\t1.0\n"
441 + "B\t4.0\t5.0\n" + "C\t7.0\t8.0\t9.0\n" + "C\t7.0\t8.0\t9.0\n";
442 FileParse fp = new FileParse(data, DataSourceType.PASTE);
443 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
446 parser.parseMatrix();
447 fail("Expected exception");
448 } catch (FileFormatException e)
450 assertEquals(e.getMessage(), "Too many data rows in matrix file");
454 @Test(groups = "Functional")
455 public void testParseMatrix_aaindex_extraDataLines()
456 throws MalformedURLException, IOException
458 String data = "H MyTest\n" + "M rows=ABC, cols=ABC\n" + "A\t1.0\n"
459 + "B\t4.0\t5.0\n" + "C\t7.0\t8.0\t9.0\n" + "something extra\n";
460 FileParse fp = new FileParse(data, DataSourceType.PASTE);
461 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
464 parser.parseMatrix();
465 fail("Expected exception");
466 } catch (FileFormatException e)
468 assertEquals(e.getMessage(), "Too many data rows in matrix file");
472 @Test(groups = "Functional")
473 public void testParseMatrix_aaindex_tooFewColumns()
474 throws MalformedURLException, IOException
476 String data = "H MyTest\n" + "M rows=ABC, cols=ABC\n" + "A\t1.0\n"
477 + "B\t4.0\t5.0\n" + "C\t7.0\t8.0\n";
478 FileParse fp = new FileParse(data, DataSourceType.PASTE);
479 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
482 parser.parseMatrix();
483 fail("Expected exception");
484 } catch (FileFormatException e)
486 assertEquals(e.getMessage(),
487 "Expected 3 scores at line 5: 'C\t7.0\t8.0' but found 2");
492 * Test a successful parse and register of a score matrix file
494 * @throws IOException
495 * @throws MalformedURLException
497 @Test(groups = "Functional")
498 public void testParse_ncbiFormat()
499 throws MalformedURLException, IOException
501 assertNull(ScoreModels.getInstance().getScoreModel("MyNewTest", null));
503 String data = "ScoreMatrix MyNewTest\n" + "\tA\tB\tC\n"
504 + "A\t1.0\t2.0\t3.0\n" + "B\t4.0\t5.0\t6.0\n"
505 + "C\t7.0\t8.0\t9.0\n";
506 FileParse fp = new FileParse(data, DataSourceType.PASTE);
507 ScoreMatrixFile parser = new ScoreMatrixFile(fp);
511 ScoreMatrix sm = (ScoreMatrix) ScoreModels.getInstance()
512 .getScoreModel("MyNewTest", null);
514 assertEquals(sm.getName(), "MyNewTest");
515 assertEquals(parser.getMatrixName(), "MyNewTest");
516 assertEquals(sm.getPairwiseScore('A', 'A'), 1.0f);
517 assertEquals(sm.getPairwiseScore('B', 'c'), 6.0f);
518 assertEquals(sm.getSize(), 3);