3 import static org.testng.AssertJUnit.assertEquals;
4 import static org.testng.AssertJUnit.assertFalse;
5 import static org.testng.AssertJUnit.assertNull;
6 import static org.testng.AssertJUnit.assertTrue;
7 import static org.testng.AssertJUnit.fail;
9 import jalview.datamodel.Sequence;
10 import jalview.datamodel.SequenceI;
12 import java.io.IOException;
13 import java.util.Vector;
15 import org.testng.annotations.Test;
18 * Unit tests for MegaFile - read and write in MEGA format(s).
20 public class MegaFileTest
22 private static final String THIRTY_CHARS = "012345678901234567890123456789";
25 private static final String INTERLEAVED =
27 "TITLE: Interleaved sequence data\n\n" +
29 "#CPZANT MNOPQR\n\n" + "#U455 KLMNOP\n" +
32 private static final String INTERLEAVED_NOHEADERS =
34 + "#CPZANT MNOPQR\n\n"
38 // interleaved sequences, one with 60 one with 120 characters (on overlong
40 private static final String INTERLEAVED_LONGERTHAN50 =
42 + "TITLE: Interleaved sequence data\n\n"
43 + "#U455 " + THIRTY_CHARS + THIRTY_CHARS + "\n"
45 + THIRTY_CHARS + THIRTY_CHARS + THIRTY_CHARS + THIRTY_CHARS;
47 private static final String NONINTERLEAVED =
49 + "TITLE: Noninterleaved sequence data\n\n"
57 // Sequence length 60 (split over two lines)
58 private static final String NONINTERLEAVED_LONGERTHAN50 =
59 "#SIXTY\n" + THIRTY_CHARS + "\n" + THIRTY_CHARS;
61 // this one starts noninterleaved then switches to interleaved
62 private static final String MIXED =
64 + "TITLE: This is a mess\n\n" + "#CPZANT KLMNOPWXYZCGATC\n\n"
68 // interleaved with a new sequence appearing in the second block :-O
69 private static final String INTERLEAVED_SEQUENCE_ERROR =
71 + "TITLE: Interleaved sequence data\n\n"
73 + "#CPZANT MNOPQR\n\n"
76 // the 'fancy' format, different header format, bases in triplet groups
77 private static final String FANCY_FORMAT =
79 + "!Title Fancy format data;\n"
80 + "!Format DataType=DNA indel=- CodeTable=Standard;\n\n"
82 + " Line one of description\n"
83 + " Line two of description;\n\n"
84 + "!Gene=Adh Property=Coding CodonStart=1;\n"
86 + "#CPZANT MNO PQR\n\n"
90 // interleaved sequence data for two genes
91 private static final String TWO_GENES =
93 + "!Title Fancy format data;\n"
94 + "!Format DataType=DNA indel=- CodeTable=Standard;\n\n"
96 + " Line one of description\n"
97 + " Line two of description;\n\n"
98 + "!Gene=Adh Property=Coding CodonStart=1;\n"
100 + "#CPZANT MNO PQR\n\n"
102 + "#CPZANT WXY Z\n"; //TODO complete
107 * Test paste of interleaved mega format data.
109 * @throws IOException
111 @Test(groups = { "Functional" })
112 public void testParse_interleaved() throws IOException
114 MegaFile testee = new MegaFile(INTERLEAVED, AppletFormatAdapter.PASTE);
115 assertEquals("Title not as expected", "Interleaved sequence data",
116 testee.getAlignmentProperty(MegaFile.PROP_TITLE));
117 Vector<SequenceI> seqs = testee.getSeqs();
118 // should be 2 sequences
119 assertEquals("Expected two sequences", 2, seqs.size());
120 // check sequence names correct and order preserved
121 assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
122 assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
124 // check sequence data
125 assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0)
126 .getSequenceAsString());
127 assertEquals("Second sequence data wrong", "MNOPQRWXYZ", seqs.get(1)
128 .getSequenceAsString());
129 assertTrue("File format is not flagged as interleaved",
130 testee.isInterleaved());
134 * Test paste of noninterleaved mega format data.
136 * @throws IOException
138 @Test(groups = { "Functional" })
139 public void testParse_nonInterleaved() throws IOException
141 MegaFile testee = new MegaFile(NONINTERLEAVED,
142 AppletFormatAdapter.PASTE);
143 assertEquals("Title not as expected", "Noninterleaved sequence data",
144 testee.getAlignmentProperty(MegaFile.PROP_TITLE));
145 Vector<SequenceI> seqs = testee.getSeqs();
146 // should be 2 sequences
147 assertEquals("Expected two sequences", 2, seqs.size());
148 // check sequence names correct and order preserved
149 assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
150 assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
152 // check sequence data
153 assertEquals("First sequence data wrong", "ABCFEDHIJMNOPQR", seqs
154 .get(0).getSequenceAsString());
155 assertEquals("Second sequence data wrong", "KLMNOPWXYZCGATC",
156 seqs.get(1).getSequenceAsString());
157 assertFalse("File format is not flagged as noninterleaved",
158 testee.isInterleaved());
162 * Test parsing an interleaved file with an extra sequence appearing after the
163 * first block - should fail.
165 @Test(groups = { "Functional" })
166 public void testParse_interleavedExtraSequenceError()
170 new MegaFile(INTERLEAVED_SEQUENCE_ERROR, AppletFormatAdapter.PASTE);
171 fail("Expected extra sequence IOException");
172 } catch (IOException e)
175 "Unexpected exception message",
176 "Parse error: misplaced new sequence starting at #U456 KLMNOP",
182 * Test a mixed up file.
184 @Test(groups = { "Functional" })
185 public void testParse_mixedInterleavedNonInterleaved()
189 new MegaFile(MIXED, AppletFormatAdapter.PASTE);
190 fail("Expected mixed content exception");
191 } catch (IOException e)
194 "Unexpected exception message",
195 "Parse error: mix of interleaved and noninterleaved detected, at line: ABCFEDHIJ",
201 @Test(groups = { "Functional" })
202 public void testGetSequenceId()
204 assertEquals("AB123", MegaFile.getSequenceId("#AB123 CGATC"));
205 assertEquals("AB123", MegaFile.getSequenceId("#AB123 CGATC"));
206 assertEquals("AB123", MegaFile.getSequenceId("#AB123 CGC TAC"));
207 assertEquals("AB123", MegaFile.getSequenceId("#AB123"));
208 assertNull(MegaFile.getSequenceId("AB123 CTAG"));
209 assertNull(MegaFile.getSequenceId("AB123"));
210 assertNull(MegaFile.getSequenceId(""));
211 assertNull(MegaFile.getSequenceId(null));
214 @Test(groups = { "Functional" })
215 public void testGetMaxIdLength()
217 SequenceI[] seqs = new Sequence[2];
218 seqs[0] = new Sequence("Something", "GCATAC");
219 seqs[1] = new Sequence("SomethingElse", "GCATAC");
220 assertEquals(13, MegaFile.getMaxIdLength(seqs));
221 seqs[1] = new Sequence("DNA", "GCATAC");
222 assertEquals(9, MegaFile.getMaxIdLength(seqs));
225 @Test(groups = { "Functional" })
226 public void testGetMaxSequenceLength()
228 SequenceI[] seqs = new Sequence[2];
229 seqs[0] = new Sequence("Seq1", "GCATAC");
230 seqs[1] = new Sequence("Seq2", "GCATACTAG");
231 assertEquals(9, MegaFile.getMaxSequenceLength(seqs));
232 seqs[1] = new Sequence("Seq2", "GCA");
233 assertEquals(6, MegaFile.getMaxSequenceLength(seqs));
237 * Test (parse and) print of interleaved mega format data.
239 * @throws IOException
241 @Test(groups = { "Functional" })
242 public void testPrint_interleaved() throws IOException
244 MegaFile testee = new MegaFile(INTERLEAVED, AppletFormatAdapter.PASTE);
245 String printed = testee.print();
246 System.out.println(printed);
247 // normally output should match input
248 // we cheated here with a number of short input lines
249 String expected = "#MEGA\n" + "!TITLE Interleaved sequence data;\n\n"
250 + "#U455 ABCDEFKLMNOP\n" + "#CPZANT MNOPQRWXYZ"
252 assertEquals("Print format wrong", expected, printed);
256 * Test (parse and) print of interleaved data with no headers (acceptable).
258 * @throws IOException
260 @Test(groups = { "Functional" })
261 public void testPrint_interleavedNoHeaders() throws IOException
263 MegaFile testee = new MegaFile(INTERLEAVED_NOHEADERS,
264 AppletFormatAdapter.PASTE);
265 String printed = testee.print();
266 System.out.println(printed);
267 // normally output should match input
268 // we cheated here with a number of short input lines
269 String expected = "#MEGA\n\n" + "#U455 ABCDEFKLMNOP" + "\n"
270 + "#CPZANT MNOPQRWXYZ\n";
271 assertEquals("Print format wrong", expected, printed);
275 * Test (parse and) print of noninterleaved mega format data.
277 * @throws IOException
279 @Test(groups = { "Functional" })
280 public void testPrint_noninterleaved() throws IOException
282 MegaFile testee = new MegaFile(NONINTERLEAVED,
283 AppletFormatAdapter.PASTE);
284 String printed = testee.print();
285 System.out.println(printed);
286 // normally output should match input
287 // we cheated here with a number of short input lines
288 String expected = "#MEGA\n"
289 + "!TITLE Noninterleaved sequence data;\n\n"
290 + "#U455\n" + "ABCFEDHIJMNOPQR\n\n" + "#CPZANT\n"
291 + "KLMNOPWXYZCGATC\n";
292 assertEquals("Print format wrong", expected, printed);
296 * Test (parse and) print of interleaved mega format data extending to more
297 * than one line of output.
299 * @throws IOException
301 @Test(groups = { "Functional" })
302 public void testPrint_interleavedMultiLine() throws IOException
304 MegaFile testee = new MegaFile(INTERLEAVED_LONGERTHAN50,
305 AppletFormatAdapter.PASTE);
306 String printed = testee.print();
307 System.out.println(printed);
308 // first sequence is length 60, second length 120
309 // should be output as 50 + 10 + 0 and as 50 + 50 + 20 character lines
311 String expected = "#MEGA\n" + "!TITLE Interleaved sequence data;\n\n"
312 + "#U455 " + THIRTY_CHARS + "01234567890123456789\n"
313 + "#CPZANT " + THIRTY_CHARS + "01234567890123456789\n" + "\n"
314 + "#U455 " + "0123456789\n" + "#CPZANT " + THIRTY_CHARS
315 + "01234567890123456789\n\n" + "#U455 \n" + "#CPZANT "
316 + "01234567890123456789"
318 assertEquals("Print format wrong", expected, printed);
322 * Test (parse and) print of noninterleaved mega format data extending to more
323 * than one line of output.
325 * @throws IOException
327 @Test(groups = { "Functional" })
328 public void testPrint_noninterleavedMultiLine() throws IOException
330 MegaFile testee = new MegaFile(NONINTERLEAVED_LONGERTHAN50,
331 AppletFormatAdapter.PASTE);
332 String printed = testee.print();
333 System.out.println(printed);
334 // 60 character sequence should be output as 50 on first line then 10 more
335 String expected = "#MEGA\n\n" + "#SIXTY\n" + THIRTY_CHARS
336 + "01234567890123456789\n" + "0123456789\n";
337 assertEquals("Print format wrong", expected, printed);
341 * Test paste / parse of 'fancy format' data.
343 * @throws IOException
345 @Test(groups = { "Functional" })
346 public void testParse_fancyFormat() throws IOException
348 MegaFile testee = new MegaFile(FANCY_FORMAT, AppletFormatAdapter.PASTE);
349 assertEquals("Title not as expected", "Fancy format data",
350 testee.getAlignmentProperty(MegaFile.PROP_TITLE));
352 // assertEquals("Format property not parsed",
353 // "DataType=DNA indel=- CodeTable=Standard;",
354 // testee.getAlignmentProperty(MegaFile.PROP_FORMAT));
355 Vector<SequenceI> seqs = testee.getSeqs();
356 // should be 2 sequences
357 assertEquals("Expected two sequences", 2, seqs.size());
358 // check sequence names correct and order preserved
359 assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
360 assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
362 // check sequence data
363 assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0)
364 .getSequenceAsString());
365 assertEquals("Second sequence data wrong", "MNOPQRWXYZ", seqs.get(1)
366 .getSequenceAsString());
367 assertTrue("File format is not flagged as interleaved",
368 testee.isInterleaved());
370 assertEquals("Description property not parsed",
371 " Line one of description\n"
372 + " Line two of description\n",
373 testee.getAlignmentProperty(MegaFile.PROP_DESCRIPTION));
376 @Test(groups = { "Functional" })
377 public void testGetNonCommentContent() throws FileFormatException
379 assertEquals("abcde", MegaFile.getNonCommentContent("abcde", 0));
380 assertEquals("CGT ACG GAC ",
381 MegaFile.getNonCommentContent("CGT ACG GAC [9]", 0));
382 assertEquals("", MegaFile.getNonCommentContent("abcde", 1));
383 assertEquals(" abcde",
384 MegaFile.getNonCommentContent("and others ] abcde", 1));
385 assertEquals(" abcde", MegaFile.getNonCommentContent(
386 "and others [including refs] ] abcde", 1));
387 assertEquals(" x ] abcde",
388 MegaFile.getNonCommentContent("and others ] x ] abcde", 1));
391 @Test(groups = { "Functional" })
392 public void testCommentDepth() throws FileFormatException
394 assertEquals(0, MegaFile.commentDepth("abcde", 0));
395 assertEquals(1, MegaFile.commentDepth("abc[de", 0));
396 assertEquals(3, MegaFile.commentDepth("ab[c[de", 1));
397 assertEquals(1, MegaFile.commentDepth("ab]c[d]e[f", 1));
398 assertEquals(0, MegaFile.commentDepth("a]b[c]d]e", 1));
401 @Test(groups = { "Functional" })
402 public void testGetValue()
404 assertEquals("Mega", MegaFile.getValue("Name=Mega"));
405 assertEquals("Mega", MegaFile.getValue("Name =Mega"));
406 assertEquals("Mega", MegaFile.getValue(" Name = Mega "));
407 assertEquals("Mega", MegaFile.getValue("Name = Mega; "));
408 assertEquals("Mega", MegaFile.getValue(" Name = Mega ; "));
409 assertEquals("Mega", MegaFile.getValue("\t!Name \t= \tMega ; "));
410 assertEquals("Mega", MegaFile.getValue("!Name \t\t Mega; "));
411 assertEquals("", MegaFile.getValue("Name"));