3 import static org.testng.AssertJUnit.assertEquals;
4 import static org.testng.AssertJUnit.assertFalse;
5 import static org.testng.AssertJUnit.assertNull;
6 import static org.testng.AssertJUnit.assertTrue;
7 import static org.testng.AssertJUnit.fail;
9 import jalview.datamodel.AlignmentI;
10 import jalview.datamodel.Sequence;
11 import jalview.datamodel.SequenceI;
13 import java.io.IOException;
14 import java.util.Vector;
16 import org.testng.annotations.Test;
19 * Unit tests for MegaFile - read and write in MEGA format(s).
21 public class MegaFileTest
23 private static final String TWENTY_CHARS = "9876543210abcdefghij";
25 private static final String THIRTY_CHARS = "0123456789klmnopqrstABCDEFGHIJ";
28 private static final String INTERLEAVED =
30 "TITLE: Interleaved sequence data\n\n" +
32 "#CPZANT MNOPQR\n\n" +
36 private static final String INTERLEAVED_NOHEADERS =
38 + "#CPZANT MNOPQR\n\n"
42 // interleaved sequences, with 50 residues
43 private static final String INTERLEAVED_50RESIDUES =
45 + "!TITLE Interleaved sequence data\n\n"
46 + "#U455 " + THIRTY_CHARS + TWENTY_CHARS + "\n"
47 + "#CPZANT " + TWENTY_CHARS + THIRTY_CHARS + "\n";
49 private static final String NONINTERLEAVED =
51 + "!TITLE Noninterleaved sequence data\n\n"
59 // this one starts interleaved then switches to non-interleaved
60 private static final String MIXED =
62 + "!TITLE This is a mess\n\n"
63 + "#CPZANT KLMNOPWXYZCGATC\n\n"
67 // interleaved with a new sequence appearing in the second block :-O
68 private static final String INTERLEAVED_SEQUENCE_ERROR =
70 + "!TITLE Interleaved sequence data\n\n"
72 + "#CPZANT MNOPQR\n\n"
75 // interleaved with description, bases/gaps in triplet groups
76 private static final String INTERLEAVED_WITH_DESCRIPTION =
78 + "!Title Data with description;\n"
79 + "!Format DataType=DNA indel=- CodeTable=Standard Missing=? MatchChar=.;\n\n"
81 + " Line one of description\n"
82 + " Line two of description;\n\n"
84 + "#CPZANT ATC -G-\n\n"
86 + "#CPZANT CA- -GC\n";
91 * Test parse of interleaved mega format data.
95 @Test(groups = { "Functional" })
96 public void testParse_interleaved() throws IOException
98 MegaFile testee = new MegaFile(INTERLEAVED, AppletFormatAdapter.PASTE);
99 assertEquals("Title not as expected", "Interleaved sequence data",
100 testee.getAlignmentProperty(MegaFile.PROP_TITLE));
101 Vector<SequenceI> seqs = testee.getSeqs();
102 // should be 2 sequences
103 assertEquals("Expected two sequences", 2, seqs.size());
104 // check sequence names correct and order preserved
105 assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
106 assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
108 // check sequence data
109 assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0)
110 .getSequenceAsString());
111 assertEquals("Second sequence data wrong", "MNOPQRWXYZGC", seqs.get(1)
112 .getSequenceAsString());
113 assertTrue("File format is not flagged as interleaved",
114 testee.isInterleaved());
118 * Test parse of noninterleaved mega format data.
120 * @throws IOException
122 @Test(groups = { "Functional" })
123 public void testParse_nonInterleaved() throws IOException
125 MegaFile testee = new MegaFile(NONINTERLEAVED,
126 AppletFormatAdapter.PASTE);
127 assertEquals("Title not as expected", "Noninterleaved sequence data",
128 testee.getAlignmentProperty(MegaFile.PROP_TITLE));
129 Vector<SequenceI> seqs = testee.getSeqs();
130 // should be 2 sequences
131 assertEquals("Expected two sequences", 2, seqs.size());
132 // check sequence names correct and order preserved
133 assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
134 assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
136 // check sequence data
137 assertEquals("First sequence data wrong", "ABCFEDHIJMNOPQR", seqs
138 .get(0).getSequenceAsString());
139 assertEquals("Second sequence data wrong", "KLMNOPWXYZCGATC",
140 seqs.get(1).getSequenceAsString());
141 assertFalse("File format is not flagged as noninterleaved",
142 testee.isInterleaved());
146 * Test parsing an interleaved file with an extra sequence appearing after the
147 * first block - should fail.
149 @Test(groups = { "Functional" })
150 public void testParse_interleavedExtraSequenceError()
154 new MegaFile(INTERLEAVED_SEQUENCE_ERROR, AppletFormatAdapter.PASTE);
155 fail("Expected extra sequence IOException");
156 } catch (IOException e)
159 "Unexpected exception message",
160 "Parse error: misplaced new sequence starting at #U456 KLMNOP",
166 * Test a mixed up file.
168 @Test(groups = { "Functional" })
169 public void testParse_mixedInterleavedNonInterleaved()
173 new MegaFile(MIXED, AppletFormatAdapter.PASTE);
174 fail("Expected mixed content exception");
175 } catch (IOException e)
178 "Unexpected exception message",
179 "Parse error: mix of interleaved and noninterleaved detected, at line: ABCFEDHIJ",
185 @Test(groups = { "Functional" })
186 public void testGetSequenceId()
188 assertEquals("AB123", MegaFile.getSequenceId("#AB123 CGATC"));
189 assertEquals("AB123", MegaFile.getSequenceId("#AB123 CGATC"));
190 assertEquals("AB123", MegaFile.getSequenceId("#AB123 CGC TAC"));
191 assertEquals("AB123", MegaFile.getSequenceId("#AB123"));
192 assertNull(MegaFile.getSequenceId("AB123 CTAG"));
193 assertNull(MegaFile.getSequenceId("AB123"));
194 assertNull(MegaFile.getSequenceId(""));
195 assertNull(MegaFile.getSequenceId(null));
198 @Test(groups = { "Functional" })
199 public void testGetMaxIdLength()
201 SequenceI[] seqs = new Sequence[2];
202 seqs[0] = new Sequence("Something", "GCATAC");
203 seqs[1] = new Sequence("SomethingElse", "GCATAC");
204 assertEquals(13, MegaFile.getMaxIdLength(seqs));
205 seqs[1] = new Sequence("DNA", "GCATAC");
206 assertEquals(9, MegaFile.getMaxIdLength(seqs));
209 @Test(groups = { "Functional" })
210 public void testGetMaxSequenceLength()
212 SequenceI[] seqs = new Sequence[2];
213 seqs[0] = new Sequence("Seq1", "GCATAC");
214 seqs[1] = new Sequence("Seq2", "GCATACTAG");
215 assertEquals(9, MegaFile.getMaxSequenceLength(seqs));
216 seqs[1] = new Sequence("Seq2", "GCA");
217 assertEquals(6, MegaFile.getMaxSequenceLength(seqs));
221 * Test (parse and) print of interleaved mega format data.
223 * @throws IOException
225 @Test(groups = { "Functional" })
226 public void testPrint_interleaved() throws IOException
228 MegaFile testee = new MegaFile(INTERLEAVED, AppletFormatAdapter.PASTE);
229 String printed = testee.print();
230 System.out.println(printed);
231 // normally output should match input
232 // we cheated here with a number of short input lines
233 // nb don't get Title in output if not calling print(AlignmentI)
234 String expected = "#MEGA\n\n" + "#U455 ABCDEF [6]\n"
235 + "#CPZANT MNOPQR [6]\n\n" + "#U455 KLMNOP [12]\n"
236 + "#CPZANT WXYZGC [12]"
238 assertEquals("Print format wrong", expected, printed);
242 * Test (parse and) print of interleaved data with no headers (acceptable).
244 * @throws IOException
246 @Test(groups = { "Functional" })
247 public void testPrint_interleavedNoHeaders() throws IOException
249 MegaFile testee = new MegaFile(INTERLEAVED_NOHEADERS,
250 AppletFormatAdapter.PASTE);
251 String printed = testee.print();
252 System.out.println(printed);
255 assertEquals("Print format wrong",
256 "#MEGA\n\n" + "#U455 ABCDEF [6]\n"
257 + "#CPZANT MNOPQR [6]\n\n"
258 + "#U455 KLMNOP [12]\n"
259 + "#CPZANT WXYZGC [12]\n",
265 * Test (parse and) print of noninterleaved mega format data.
267 * @throws IOException
269 @Test(groups = { "Functional" })
270 public void testPrint_noninterleaved() throws IOException
272 MegaFile testee = new MegaFile(NONINTERLEAVED,
273 AppletFormatAdapter.PASTE);
274 assertEquals(10, testee.getPositionsPerLine());
275 String printed = testee.print();
276 System.out.println(printed);
277 // normally output should match input
278 // we cheated here with a number of short input lines
279 String expected = "#MEGA\n\n"
280 + "#U455\n" + "ABCFEDHIJM\nNOPQR\n\n"
281 + "#CPZANT\n" + "KLMNOPWXYZ\nCGATC\n";
282 assertEquals("Print format wrong", expected, printed);
286 * Test (parse and) print of interleaved mega format data extending to more
287 * than one line of output.
289 * @throws IOException
291 @Test(groups = { "Functional" })
292 public void testPrint_interleavedMultiLine() throws IOException
294 MegaFile testee = new MegaFile(INTERLEAVED_50RESIDUES,
295 AppletFormatAdapter.PASTE);
296 assertEquals(50, testee.getPositionsPerLine());
298 * now simulate choosing 20 residues per line on output
300 testee.setPositionsPerLine(20);
301 String printed = testee.print();
302 System.out.println(printed);
304 //0123456789klmnopqrstABCDEFGHIJ9876543210abcdefghij
307 "#U455 0123456789 klmnopqrst [20]\n" + // first 20
308 "#CPZANT 9876543210 abcdefghij [20]\n\n" +
309 "#U455 ABCDEFGHIJ 9876543210 [40]\n" + // next 20
310 "#CPZANT 0123456789 klmnopqrst [40]\n\n" +
311 "#U455 abcdefghij [50]\n" + // last 10
312 "#CPZANT ABCDEFGHIJ [50]\n";
314 assertEquals("Print format wrong", expected, printed);
318 * Test (parse and) print of noninterleaved mega format data extending to more
319 * than one line of output.
321 * @throws IOException
323 @Test(groups = { "Functional" })
324 public void testPrint_noninterleavedMultiLine() throws IOException
326 final String NONINTERLEAVED_LONGERTHAN50 = "#SIXTY\n" + THIRTY_CHARS
327 + "\n" + TWENTY_CHARS + "9993332221\n";
328 MegaFile testee = new MegaFile(NONINTERLEAVED_LONGERTHAN50,
329 AppletFormatAdapter.PASTE);
330 assertEquals(30, testee.getPositionsPerLine());
331 testee.setPositionsPerLine(25);
332 String printed = testee.print();
333 // 60 character sequence should be output as 50 on first line then 10 more
334 String expected = "#MEGA\n\n" + "#SIXTY\n"
335 + "0123456789klmnopqrstABCDE\n" + "FGHIJ9876543210abcdefghij\n"
337 assertEquals("Print format wrong", expected, printed);
341 * Test parse of data including description
343 * @throws IOException
345 @Test(groups = { "Functional" })
346 public void testParse_withDescription() throws IOException
348 MegaFile testee = new MegaFile(INTERLEAVED_WITH_DESCRIPTION,
349 AppletFormatAdapter.PASTE);
350 assertEquals("Title not as expected", "Data with description",
351 testee.getAlignmentProperty(MegaFile.PROP_TITLE));
353 Vector<SequenceI> seqs = testee.getSeqs();
354 // should be 2 sequences
355 assertEquals("Expected two sequences", 2, seqs.size());
356 // check sequence names correct and order preserved
357 assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
358 assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
360 // check sequence data
361 assertEquals("First sequence data wrong", "C--GTACGA--T", seqs.get(0)
362 .getSequenceAsString());
363 assertEquals("Second sequence data wrong", "ATC-G-CA--GC", seqs.get(1)
364 .getSequenceAsString());
365 assertTrue("File format is not flagged as interleaved",
366 testee.isInterleaved());
369 "Description property not parsed",
370 " Line one of description\n" + " Line two of description",
371 testee.getAlignmentProperty(MegaFile.PROP_DESCRIPTION));
374 @Test(groups = { "Functional" })
375 public void testGetNonCommentContent() throws FileFormatException
377 assertEquals("abcde", MegaFile.getNonCommentContent("abcde", 0));
378 assertEquals("CGT ACG GAC ",
379 MegaFile.getNonCommentContent("CGT ACG GAC [9]", 0));
380 assertEquals("", MegaFile.getNonCommentContent("abcde", 1));
381 assertEquals(" abcde",
382 MegaFile.getNonCommentContent("and others ] abcde", 1));
383 assertEquals(" abcde", MegaFile.getNonCommentContent(
384 "and others [including refs] ] abcde", 1));
385 assertEquals(" x ] abcde",
386 MegaFile.getNonCommentContent("and others ] x ] abcde", 1));
389 @Test(groups = { "Functional" })
390 public void testCommentDepth() throws FileFormatException
392 assertEquals(0, MegaFile.commentDepth("abcde", 0));
393 assertEquals(1, MegaFile.commentDepth("abc[de", 0));
394 assertEquals(3, MegaFile.commentDepth("ab[c[de", 1));
395 assertEquals(1, MegaFile.commentDepth("ab]c[d]e[f", 1));
396 assertEquals(0, MegaFile.commentDepth("a]b[c]d]e", 1));
399 @Test(groups = { "Functional" })
400 public void testGetValue()
402 assertEquals("Mega", MegaFile.getValue("Name=Mega"));
403 assertEquals("Mega", MegaFile.getValue("Name =Mega"));
404 assertEquals("Mega", MegaFile.getValue(" Name = Mega "));
405 assertEquals("Mega", MegaFile.getValue("Name = Mega; "));
406 assertEquals("Mega", MegaFile.getValue(" Name = Mega ; "));
407 assertEquals("Mega", MegaFile.getValue("\t!Name \t= \tMega ; "));
408 assertEquals("Mega", MegaFile.getValue("!Name \t\t Mega; "));
409 assertEquals("", MegaFile.getValue("Name"));
413 * Test reading a MEGA file to an alignment then writing it out in MEGA
414 * format. Verify the output is (functionally) the same as the input.
416 * @throws IOException
418 @Test(groups = "Functional")
419 public void testRoundTrip_Interleaved() throws IOException
421 AppletFormatAdapter fa = new AppletFormatAdapter();
422 AlignmentI al = fa.readFile(INTERLEAVED_WITH_DESCRIPTION,
423 AppletFormatAdapter.PASTE, "MEGA");
424 MegaFile output = new MegaFile();
425 String formatted = output.print(al);
428 "#MEGA\n!Title Data with description;\n" +
430 " Line one of description\n" +
431 " Line two of description;\n" +
433 " DataType=DNA CodeTable=Standard\n" +
434 " NSeqs=2 NSites=12\n" + // NSites includes gaps
435 " Indel=- Identical=. Missing=?;\n\n" +
436 "#U455 C-- GTA [6]\n" +
437 "#CPZANT ATC -G- [6]\n\n" +
438 "#U455 CGA --T [12]\n" +
439 "#CPZANT CA- -GC [12]\n";
441 assertEquals("Roundtrip didn't match", expected,
446 * Test reading a MEGA file to an alignment then writing it out in MEGA
447 * format. Verify the output is (functionally) the same as the input.
449 * @throws IOException
451 @Test(groups = "Functional")
452 public void testRoundTrip_multilineFormatWithComments()
455 AppletFormatAdapter fa = new AppletFormatAdapter();
457 AlignmentI al = fa.readFile("#MEGA\n"
458 + "!Title Data with description;\n"
459 + "[ this comment should be ignored\n"
460 + "including [this nested comment]\n"
463 + "DataType=DNA CodeTable=Standard\n"
464 + "indel=- Missing=? MatchChar=.;\n\n"
466 + " Line one of description\n"
467 + " Line two of description;\n\n"
469 + "#CPZANT ATC GGG\n\n"
471 + "#CPZANT CAA TGC\n",
472 AppletFormatAdapter.PASTE, "MEGA");
474 MegaFile output = new MegaFile();
475 String formatted = output.print(al);
478 "#MEGA\n!Title Data with description;\n" +
480 " Line one of description\n" +
481 " Line two of description;\n" +
483 " DataType=DNA CodeTable=Standard\n" +
484 " NSeqs=2 NSites=12\n" +
485 " Indel=- Identical=. Missing=?;\n\n" +
486 "#U455 CGC GTA [6]\n" +
487 "#CPZANT ATC GGG [6]\n\n" +
488 "#U455 CGA TTT [12]\n" +
489 "#CPZANT CAA TGC [12]\n";
491 assertEquals("Roundtrip didn't match", expected,
498 * Test parse of interleaved mega format data where the identity character is
499 * used in sequences after the first
501 * @throws IOException
503 @Test(groups = { "Functional" })
504 public void testParse_interleavedWithIdentity() throws IOException
507 MegaFile testee = new MegaFile("#MEGA\n"+
508 "!TITLE Interleaved sequence data;\n" +
509 "!Format Identical=.;\n\n" +
511 "#CPZANT M..P.R\n\n" +
513 "#CPZANT ..YZ..", AppletFormatAdapter.PASTE);
515 assertEquals("Title not as expected", "Interleaved sequence data",
516 testee.getAlignmentProperty(MegaFile.PROP_TITLE));
517 Vector<SequenceI> seqs = testee.getSeqs();
518 // should be 2 sequences
519 assertEquals("Expected two sequences", 2, seqs.size());
520 // check sequence names correct and order preserved
521 assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
522 assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
524 // check sequence data
525 assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0)
526 .getSequenceAsString());
527 assertEquals("Second sequence data wrong", "MBCPERKLYZOP", seqs.get(1)
528 .getSequenceAsString());
529 assertTrue("File format is not flagged as interleaved",
530 testee.isInterleaved());
534 * Test parse of noninterleaved format data including identity symbol
536 * @throws IOException
538 @Test(groups = { "Functional" })
539 public void testParse_nonInterleavedWithIdentity() throws IOException
542 MegaFile testee = new MegaFile("#MEGA\n"
543 + "!TITLE Noninterleaved sequence data;\n"
544 + "!Format MatchChar=.;\n"
551 AppletFormatAdapter.PASTE);
553 assertEquals("Title not as expected", "Noninterleaved sequence data",
554 testee.getAlignmentProperty(MegaFile.PROP_TITLE));
555 Vector<SequenceI> seqs = testee.getSeqs();
556 // should be 2 sequences
557 assertEquals("Expected two sequences", 2, seqs.size());
558 // check sequence names correct and order preserved
559 assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
560 assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
562 // check sequence data
563 assertEquals("First sequence data wrong", "ABCFEDHIJMNOPQR", seqs
564 .get(0).getSequenceAsString());
565 assertEquals("Second sequence data wrong", "KLCFODHXYZCGPQC",
566 seqs.get(1).getSequenceAsString());
567 assertFalse("File format is not flagged as noninterleaved",
568 testee.isInterleaved());
574 * Test parse of interleaved format data including position number comments.
576 * @throws IOException
578 @Test(groups = { "Functional" })
579 public void testParse_interleavedWithPositionNumber() throws IOException
582 MegaFile testee = new MegaFile("#MEGA\n"+
583 "TITLE: Interleaved sequence data\n\n" +
584 "#U455 ABCDEF [6]\n" +
585 "#CPZANT MNOPQR [6]\n\n" +
586 "#U455 KLMNOP [12]\n" +
587 "#CPZANT WXYZGC [12]\n", AppletFormatAdapter.PASTE);
589 assertEquals("Title not as expected", "Interleaved sequence data",
590 testee.getAlignmentProperty(MegaFile.PROP_TITLE));
591 Vector<SequenceI> seqs = testee.getSeqs();
592 // should be 2 sequences
593 assertEquals("Expected two sequences", 2, seqs.size());
594 // check sequence names correct and order preserved
595 assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
596 assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
598 // check sequence data
599 assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0)
600 .getSequenceAsString());
601 assertEquals("Second sequence data wrong", "MNOPQRWXYZGC", seqs.get(1)
602 .getSequenceAsString());
603 assertTrue("File format is not flagged as interleaved",
604 testee.isInterleaved());