3 import static org.testng.AssertJUnit.assertEquals;
4 import static org.testng.AssertJUnit.assertFalse;
5 import static org.testng.AssertJUnit.assertNull;
6 import static org.testng.AssertJUnit.assertTrue;
7 import static org.testng.AssertJUnit.fail;
9 import jalview.datamodel.AlignmentI;
10 import jalview.datamodel.Sequence;
11 import jalview.datamodel.SequenceI;
13 import java.io.IOException;
14 import java.util.Vector;
16 import org.testng.annotations.Test;
19 * Unit tests for MegaFile - read and write in MEGA format(s).
21 public class MegaFileTest
23 private static final String TWENTY_CHARS = "9876543210abcdefghij";
25 private static final String THIRTY_CHARS = "0123456789klmnopqrstABCDEFGHIJ";
28 private static final String INTERLEAVED =
30 "TITLE: Interleaved sequence data\n\n" +
32 "#CPZANT MNOPQR\n\n" +
36 private static final String INTERLEAVED_NOHEADERS =
38 + "#CPZANT MNOPQR\n\n"
42 // interleaved sequences, with 50 residues
43 private static final String INTERLEAVED_50RESIDUES =
45 + "!TITLE Interleaved sequence data\n\n"
46 + "#U455 " + THIRTY_CHARS + TWENTY_CHARS + "\n"
47 + "#CPZANT " + TWENTY_CHARS + THIRTY_CHARS + "\n";
49 private static final String NONINTERLEAVED =
51 + "!TITLE Noninterleaved sequence data\n\n"
59 // this one starts interleaved then switches to non-interleaved
60 private static final String MIXED =
62 + "!TITLE This is a mess\n\n"
63 + "#CPZANT KLMNOPWXYZCGATC\n\n"
67 // interleaved with a new sequence appearing in the second block :-O
68 private static final String INTERLEAVED_SEQUENCE_ERROR =
70 + "!TITLE Interleaved sequence data\n\n"
72 + "#CPZANT MNOPQR\n\n"
75 // the 'fancy' format, different header format, bases in triplet groups
76 private static final String INTERLEAVED_WITH_DESCRIPTION =
78 + "!Title Data with description;\n"
79 + "!Format DataType=DNA indel=- CodeTable=Standard Missing=? MatchChar=.;\n\n"
81 + " Line one of description\n"
82 + " Line two of description;\n\n"
84 + "#CPZANT ATC GGG\n\n"
86 + "#CPZANT CAA TGC\n";
91 * Test paste of interleaved mega format data.
95 @Test(groups = { "Functional" })
96 public void testParse_interleaved() throws IOException
98 MegaFile testee = new MegaFile(INTERLEAVED, AppletFormatAdapter.PASTE);
99 assertEquals("Title not as expected", "Interleaved sequence data",
100 testee.getAlignmentProperty(MegaFile.PROP_TITLE));
101 Vector<SequenceI> seqs = testee.getSeqs();
102 // should be 2 sequences
103 assertEquals("Expected two sequences", 2, seqs.size());
104 // check sequence names correct and order preserved
105 assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
106 assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
108 // check sequence data
109 assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0)
110 .getSequenceAsString());
111 assertEquals("Second sequence data wrong", "MNOPQRWXYZGC", seqs.get(1)
112 .getSequenceAsString());
113 assertTrue("File format is not flagged as interleaved",
114 testee.isInterleaved());
118 * Test paste of noninterleaved mega format data.
120 * @throws IOException
122 @Test(groups = { "Functional" })
123 public void testParse_nonInterleaved() throws IOException
125 MegaFile testee = new MegaFile(NONINTERLEAVED,
126 AppletFormatAdapter.PASTE);
127 assertEquals("Title not as expected", "Noninterleaved sequence data",
128 testee.getAlignmentProperty(MegaFile.PROP_TITLE));
129 Vector<SequenceI> seqs = testee.getSeqs();
130 // should be 2 sequences
131 assertEquals("Expected two sequences", 2, seqs.size());
132 // check sequence names correct and order preserved
133 assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
134 assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
136 // check sequence data
137 assertEquals("First sequence data wrong", "ABCFEDHIJMNOPQR", seqs
138 .get(0).getSequenceAsString());
139 assertEquals("Second sequence data wrong", "KLMNOPWXYZCGATC",
140 seqs.get(1).getSequenceAsString());
141 assertFalse("File format is not flagged as noninterleaved",
142 testee.isInterleaved());
146 * Test parsing an interleaved file with an extra sequence appearing after the
147 * first block - should fail.
149 @Test(groups = { "Functional" })
150 public void testParse_interleavedExtraSequenceError()
154 new MegaFile(INTERLEAVED_SEQUENCE_ERROR, AppletFormatAdapter.PASTE);
155 fail("Expected extra sequence IOException");
156 } catch (IOException e)
159 "Unexpected exception message",
160 "Parse error: misplaced new sequence starting at #U456 KLMNOP",
166 * Test a mixed up file.
168 @Test(groups = { "Functional" })
169 public void testParse_mixedInterleavedNonInterleaved()
173 new MegaFile(MIXED, AppletFormatAdapter.PASTE);
174 fail("Expected mixed content exception");
175 } catch (IOException e)
178 "Unexpected exception message",
179 "Parse error: mix of interleaved and noninterleaved detected, at line: ABCFEDHIJ",
185 @Test(groups = { "Functional" })
186 public void testGetSequenceId()
188 assertEquals("AB123", MegaFile.getSequenceId("#AB123 CGATC"));
189 assertEquals("AB123", MegaFile.getSequenceId("#AB123 CGATC"));
190 assertEquals("AB123", MegaFile.getSequenceId("#AB123 CGC TAC"));
191 assertEquals("AB123", MegaFile.getSequenceId("#AB123"));
192 assertNull(MegaFile.getSequenceId("AB123 CTAG"));
193 assertNull(MegaFile.getSequenceId("AB123"));
194 assertNull(MegaFile.getSequenceId(""));
195 assertNull(MegaFile.getSequenceId(null));
198 @Test(groups = { "Functional" })
199 public void testGetMaxIdLength()
201 SequenceI[] seqs = new Sequence[2];
202 seqs[0] = new Sequence("Something", "GCATAC");
203 seqs[1] = new Sequence("SomethingElse", "GCATAC");
204 assertEquals(13, MegaFile.getMaxIdLength(seqs));
205 seqs[1] = new Sequence("DNA", "GCATAC");
206 assertEquals(9, MegaFile.getMaxIdLength(seqs));
209 @Test(groups = { "Functional" })
210 public void testGetMaxSequenceLength()
212 SequenceI[] seqs = new Sequence[2];
213 seqs[0] = new Sequence("Seq1", "GCATAC");
214 seqs[1] = new Sequence("Seq2", "GCATACTAG");
215 assertEquals(9, MegaFile.getMaxSequenceLength(seqs));
216 seqs[1] = new Sequence("Seq2", "GCA");
217 assertEquals(6, MegaFile.getMaxSequenceLength(seqs));
221 * Test (parse and) print of interleaved mega format data.
223 * @throws IOException
225 @Test(groups = { "Functional" })
226 public void testPrint_interleaved() throws IOException
228 MegaFile testee = new MegaFile(INTERLEAVED, AppletFormatAdapter.PASTE);
229 String printed = testee.print();
230 System.out.println(printed);
231 // normally output should match input
232 // we cheated here with a number of short input lines
233 // nb don't get Title in output if not calling print(AlignmentI)
234 String expected = "#MEGA\n\n" + "#U455 ABCDEF\n"
235 + "#CPZANT MNOPQR\n\n" + "#U455 KLMNOP\n" + "#CPZANT WXYZGC"
237 assertEquals("Print format wrong", expected, printed);
241 * Test (parse and) print of interleaved data with no headers (acceptable).
243 * @throws IOException
245 @Test(groups = { "Functional" })
246 public void testPrint_interleavedNoHeaders() throws IOException
248 MegaFile testee = new MegaFile(INTERLEAVED_NOHEADERS,
249 AppletFormatAdapter.PASTE);
250 String printed = testee.print();
251 System.out.println(printed);
253 assertEquals("Print format wrong", "#MEGA\n\n" + INTERLEAVED_NOHEADERS,
258 * Test (parse and) print of noninterleaved mega format data.
260 * @throws IOException
262 @Test(groups = { "Functional" })
263 public void testPrint_noninterleaved() throws IOException
265 MegaFile testee = new MegaFile(NONINTERLEAVED,
266 AppletFormatAdapter.PASTE);
267 assertEquals(10, testee.getPositionsPerLine());
268 String printed = testee.print();
269 System.out.println(printed);
270 // normally output should match input
271 // we cheated here with a number of short input lines
272 String expected = "#MEGA\n\n"
273 + "#U455\n" + "ABCFEDHIJM\nNOPQR\n\n"
274 + "#CPZANT\n" + "KLMNOPWXYZ\nCGATC\n";
275 assertEquals("Print format wrong", expected, printed);
279 * Test (parse and) print of interleaved mega format data extending to more
280 * than one line of output.
282 * @throws IOException
284 @Test(groups = { "Functional" })
285 public void testPrint_interleavedMultiLine() throws IOException
287 MegaFile testee = new MegaFile(INTERLEAVED_50RESIDUES,
288 AppletFormatAdapter.PASTE);
289 assertEquals(50, testee.getPositionsPerLine());
291 * now simulate choosing 20 residues per line on output
293 testee.setPositionsPerLine(20);
294 String printed = testee.print();
295 System.out.println(printed);
297 //0123456789klmnopqrstABCDEFGHIJ9876543210abcdefghij
300 "#U455 0123456789 klmnopqrst\n" + // first 20
301 "#CPZANT 9876543210 abcdefghij\n\n" +
302 "#U455 ABCDEFGHIJ 9876543210\n" + // next 20
303 "#CPZANT 0123456789 klmnopqrst\n\n" +
304 "#U455 abcdefghij\n" + // last 10
305 "#CPZANT ABCDEFGHIJ\n";
307 assertEquals("Print format wrong", expected, printed);
311 * Test (parse and) print of noninterleaved mega format data extending to more
312 * than one line of output.
314 * @throws IOException
316 @Test(groups = { "Functional" })
317 public void testPrint_noninterleavedMultiLine() throws IOException
319 final String NONINTERLEAVED_LONGERTHAN50 = "#SIXTY\n" + THIRTY_CHARS
320 + "\n" + TWENTY_CHARS + "9993332221\n";
321 MegaFile testee = new MegaFile(NONINTERLEAVED_LONGERTHAN50,
322 AppletFormatAdapter.PASTE);
323 assertEquals(30, testee.getPositionsPerLine());
324 testee.setPositionsPerLine(25);
325 String printed = testee.print();
326 // 60 character sequence should be output as 50 on first line then 10 more
327 String expected = "#MEGA\n\n" + "#SIXTY\n"
328 + "0123456789klmnopqrstABCDE\n" + "FGHIJ9876543210abcdefghij\n"
330 assertEquals("Print format wrong", expected, printed);
334 * Test parse of data including description
336 * @throws IOException
338 @Test(groups = { "Functional" })
339 public void testParse_withDescription() throws IOException
341 MegaFile testee = new MegaFile(INTERLEAVED_WITH_DESCRIPTION,
342 AppletFormatAdapter.PASTE);
343 assertEquals("Title not as expected", "Data with description",
344 testee.getAlignmentProperty(MegaFile.PROP_TITLE));
346 Vector<SequenceI> seqs = testee.getSeqs();
347 // should be 2 sequences
348 assertEquals("Expected two sequences", 2, seqs.size());
349 // check sequence names correct and order preserved
350 assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
351 assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
353 // check sequence data
354 assertEquals("First sequence data wrong", "CGCGTACGATTT", seqs.get(0)
355 .getSequenceAsString());
356 assertEquals("Second sequence data wrong", "ATCGGGCAATGC", seqs.get(1)
357 .getSequenceAsString());
358 assertTrue("File format is not flagged as interleaved",
359 testee.isInterleaved());
362 "Description property not parsed",
363 " Line one of description\n" + " Line two of description",
364 testee.getAlignmentProperty(MegaFile.PROP_DESCRIPTION));
367 @Test(groups = { "Functional" })
368 public void testGetNonCommentContent() throws FileFormatException
370 assertEquals("abcde", MegaFile.getNonCommentContent("abcde", 0));
371 assertEquals("CGT ACG GAC ",
372 MegaFile.getNonCommentContent("CGT ACG GAC [9]", 0));
373 assertEquals("", MegaFile.getNonCommentContent("abcde", 1));
374 assertEquals(" abcde",
375 MegaFile.getNonCommentContent("and others ] abcde", 1));
376 assertEquals(" abcde", MegaFile.getNonCommentContent(
377 "and others [including refs] ] abcde", 1));
378 assertEquals(" x ] abcde",
379 MegaFile.getNonCommentContent("and others ] x ] abcde", 1));
382 @Test(groups = { "Functional" })
383 public void testCommentDepth() throws FileFormatException
385 assertEquals(0, MegaFile.commentDepth("abcde", 0));
386 assertEquals(1, MegaFile.commentDepth("abc[de", 0));
387 assertEquals(3, MegaFile.commentDepth("ab[c[de", 1));
388 assertEquals(1, MegaFile.commentDepth("ab]c[d]e[f", 1));
389 assertEquals(0, MegaFile.commentDepth("a]b[c]d]e", 1));
392 @Test(groups = { "Functional" })
393 public void testGetValue()
395 assertEquals("Mega", MegaFile.getValue("Name=Mega"));
396 assertEquals("Mega", MegaFile.getValue("Name =Mega"));
397 assertEquals("Mega", MegaFile.getValue(" Name = Mega "));
398 assertEquals("Mega", MegaFile.getValue("Name = Mega; "));
399 assertEquals("Mega", MegaFile.getValue(" Name = Mega ; "));
400 assertEquals("Mega", MegaFile.getValue("\t!Name \t= \tMega ; "));
401 assertEquals("Mega", MegaFile.getValue("!Name \t\t Mega; "));
402 assertEquals("", MegaFile.getValue("Name"));
406 * Test reading a MEGA file to an alignment then writing it out in MEGA
407 * format. Verify the output is (functionally) the same as the input.
409 * @throws IOException
411 @Test(groups = "Functional")
412 public void testRoundTrip_Interleaved() throws IOException
414 AppletFormatAdapter fa = new AppletFormatAdapter();
415 AlignmentI al = fa.readFile(INTERLEAVED_WITH_DESCRIPTION,
416 AppletFormatAdapter.PASTE, "MEGA");
417 MegaFile output = new MegaFile();
418 String formatted = output.print(al);
421 "#MEGA\n!Title Data with description;\n" +
423 " Line one of description\n" +
424 " Line two of description;\n" +
426 " DataType=DNA CodeTable=Standard\n" +
427 " NSeqs=2 NSites=12\n" +
428 " Indel=- Identical=. Missing=?;\n\n" +
430 "#CPZANT ATC GGG\n\n" +
434 assertEquals("Roundtrip didn't match", expected,
439 * Test reading a MEGA file to an alignment then writing it out in MEGA
440 * format. Verify the output is (functionally) the same as the input.
442 * @throws IOException
444 @Test(groups = "Functional")
445 public void testRoundTrip_multilineFormatWithComments()
448 AppletFormatAdapter fa = new AppletFormatAdapter();
450 AlignmentI al = fa.readFile("#MEGA\n"
451 + "!Title Data with description;\n"
452 + "[ this comment should be ignored\n"
453 + "including [this nested comment]\n"
456 + "DataType=DNA CodeTable=Standard\n"
457 + "indel=- Missing=? MatchChar=.;\n\n"
459 + " Line one of description\n"
460 + " Line two of description;\n\n"
462 + "#CPZANT ATC GGG\n\n"
464 + "#CPZANT CAA TGC\n",
465 AppletFormatAdapter.PASTE, "MEGA");
467 MegaFile output = new MegaFile();
468 String formatted = output.print(al);
471 "#MEGA\n!Title Data with description;\n" +
473 " Line one of description\n" +
474 " Line two of description;\n" +
476 " DataType=DNA CodeTable=Standard\n" +
477 " NSeqs=2 NSites=12\n" +
478 " Indel=- Identical=. Missing=?;\n\n" +
480 "#CPZANT ATC GGG\n\n" +
484 assertEquals("Roundtrip didn't match", expected,