3 import static org.junit.Assert.assertEquals;
4 import static org.junit.Assert.assertFalse;
5 import static org.junit.Assert.assertNull;
6 import static org.junit.Assert.assertTrue;
7 import static org.junit.Assert.fail;
8 import jalview.datamodel.Sequence;
9 import jalview.datamodel.SequenceI;
10 import jalview.io.MegaFile.FileFormat;
12 import java.io.IOException;
13 import java.util.Vector;
15 import org.junit.Test;
18 * Unit tests for MegaFile - read and write in MEGA format(s).
20 public class MegaFileTest
22 private static final String THIRTY_CHARS = "012345678901234567890123456789";
24 private static final String NEWLINE = System
25 .getProperty("line.separator");
27 private static final String INTERLEAVED = "#MEGA" + NEWLINE
28 + "TITLE: Interleaved sequence data" + NEWLINE + NEWLINE
29 + "#U455 ABCDEF" + NEWLINE + "#CPZANT MNOPQR" + NEWLINE
30 + NEWLINE + "#U455 KLMNOP" + NEWLINE + "#CPZANT WXYZ";
32 private static final String INTERLEAVED_NOHEADERS = "#U455 ABCDEF"
33 + NEWLINE + "#CPZANT MNOPQR" + NEWLINE + NEWLINE
34 + "#U455 KLMNOP" + NEWLINE + "#CPZANT WXYZ";
36 // interleaved sequences, one with 60 one with 120 characters (on overlong
38 private static final String INTERLEAVED_LONGERTHAN50 = "#MEGA" + NEWLINE
39 + "TITLE: Interleaved sequence data" + NEWLINE + NEWLINE
40 + "#U455 " + THIRTY_CHARS + THIRTY_CHARS + NEWLINE + "#CPZANT "
41 + THIRTY_CHARS + THIRTY_CHARS + THIRTY_CHARS + THIRTY_CHARS;
43 private static final String NONINTERLEAVED = "#MEGA" + NEWLINE
44 + "TITLE: Noninterleaved sequence data" + NEWLINE + NEWLINE
45 + "#U455 " + NEWLINE + "ABCFEDHIJ" + NEWLINE + "MNOPQR"
47 + "#CPZANT " + NEWLINE + "KLMNOPWXYZ" + NEWLINE + "CGATC";
49 // Sequence length 60 (split over two lines)
50 private static final String NONINTERLEAVED_LONGERTHAN50 = "#SIXTY"
51 + NEWLINE + THIRTY_CHARS + NEWLINE + THIRTY_CHARS;
53 // this one starts noninterleaved then switches to interleaved
54 private static final String MIXED = "#MEGA" + NEWLINE
55 + "TITLE: This is a mess" + NEWLINE + NEWLINE
56 + "#CPZANT KLMNOPWXYZCGATC" + NEWLINE + NEWLINE + "#U455 "
57 + NEWLINE + "ABCFEDHIJ";
59 // interleaved with a new sequence appearing in the second block :-O
60 private static final String INTERLEAVED_SEQUENCE_ERROR = "#MEGA"
61 + NEWLINE + "TITLE: Interleaved sequence data" + NEWLINE
62 + NEWLINE + "#U455 ABCDEF" + NEWLINE + "#CPZANT MNOPQR"
63 + NEWLINE + NEWLINE + "#U456 KLMNOP" + NEWLINE;
65 // the 'fancy' format, different header format, bases in triplet groups
66 private static final String FANCY_FORMAT = "#MEGA" + NEWLINE
67 + "!Title Fancy format data" + NEWLINE
68 + "!Format DataType=DNA indel=- CodeTable=Standard;" + NEWLINE
70 + "!Description" + NEWLINE + " Line one of description" + NEWLINE + " Line two of description"
72 + "!Gene=Adh Property=Coding CodonStart=1;" + NEWLINE
73 + "#U455 ABC DEF" + NEWLINE + "#CPZANT MNO PQR" + NEWLINE
74 + NEWLINE + "#U455 KLM NOP" + NEWLINE + "#CPZANT WXY Z";
77 * Test paste of interleaved mega format data.
82 public void testParse_interleaved() throws IOException
84 MegaFile testee = new MegaFile(INTERLEAVED, AppletFormatAdapter.PASTE);
85 assertEquals("Title not as expected", "Interleaved sequence data",
86 testee.getAlignmentProperty(MegaFile.PROP_TITLE));
87 assertEquals("Not identified as simple format", FileFormat.SIMPLE,
88 testee.getFileFormat());
89 Vector<SequenceI> seqs = testee.getSeqs();
90 // should be 2 sequences
91 assertEquals("Expected two sequences", 2, seqs.size());
92 // check sequence names correct and order preserved
93 assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
94 assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
96 // check sequence data
97 assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0)
98 .getSequenceAsString());
99 assertEquals("Second sequence data wrong", "MNOPQRWXYZ", seqs.get(1)
100 .getSequenceAsString());
101 assertTrue("File format is not flagged as interleaved",
102 testee.isInterleaved());
106 * Test paste of noninterleaved mega format data.
108 * @throws IOException
111 public void testParse_nonInterleaved() throws IOException
113 MegaFile testee = new MegaFile(NONINTERLEAVED, AppletFormatAdapter.PASTE);
114 assertEquals("Title not as expected", "Noninterleaved sequence data",
115 testee.getAlignmentProperty(MegaFile.PROP_TITLE));
116 assertEquals("Not identified as simple format", FileFormat.SIMPLE,
117 testee.getFileFormat());
118 Vector<SequenceI> seqs = testee.getSeqs();
119 // should be 2 sequences
120 assertEquals("Expected two sequences", 2, seqs.size());
121 // check sequence names correct and order preserved
122 assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
123 assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
125 // check sequence data
126 assertEquals("First sequence data wrong", "ABCFEDHIJMNOPQR", seqs
127 .get(0).getSequenceAsString());
128 assertEquals("Second sequence data wrong", "KLMNOPWXYZCGATC", seqs.get(1)
129 .getSequenceAsString());
130 assertFalse("File format is not flagged as noninterleaved",
131 testee.isInterleaved());
135 * Test parsing an interleaved file with an extra sequence appearing after the
136 * first block - should fail.
139 public void testParse_interleavedExtraSequenceError()
143 new MegaFile(INTERLEAVED_SEQUENCE_ERROR, AppletFormatAdapter.PASTE);
144 fail("Expected extra sequence IOException");
145 } catch (IOException e)
148 "Unexpected exception message",
149 "Parse error: misplaced new sequence starting at #U456 KLMNOP",
155 * Test a mixed up file.
158 public void testParse_mixedInterleavedNonInterleaved()
162 new MegaFile(MIXED, AppletFormatAdapter.PASTE);
163 fail("Expected mixed content exception");
164 } catch (IOException e)
167 "Unexpected exception message",
168 "Parse error: mix of interleaved and noninterleaved detected, at line: ABCFEDHIJ",
175 public void testGetSequenceId()
177 assertEquals("AB123", MegaFile.getSequenceId("#AB123 CGATC"));
178 assertEquals("AB123", MegaFile.getSequenceId("#AB123 CGATC"));
179 assertEquals("AB123", MegaFile.getSequenceId("#AB123 CGC TAC"));
180 assertEquals("AB123", MegaFile.getSequenceId("#AB123"));
181 assertNull(MegaFile.getSequenceId("AB123 CTAG"));
182 assertNull(MegaFile.getSequenceId("AB123"));
183 assertNull(MegaFile.getSequenceId(""));
184 assertNull(MegaFile.getSequenceId(null));
188 public void testGetMaxIdLength()
190 SequenceI[] seqs = new Sequence[2];
191 seqs[0] = new Sequence("Something", "GCATAC");
192 seqs[1] = new Sequence("SomethingElse", "GCATAC");
193 assertEquals(13, MegaFile.getMaxIdLength(seqs));
194 seqs[1] = new Sequence("DNA", "GCATAC");
195 assertEquals(9, MegaFile.getMaxIdLength(seqs));
199 public void testGetMaxSequenceLength()
201 SequenceI[] seqs = new Sequence[2];
202 seqs[0] = new Sequence("Seq1", "GCATAC");
203 seqs[1] = new Sequence("Seq2", "GCATACTAG");
204 assertEquals(9, MegaFile.getMaxSequenceLength(seqs));
205 seqs[1] = new Sequence("Seq2", "GCA");
206 assertEquals(6, MegaFile.getMaxSequenceLength(seqs));
210 * Test (parse and) print of interleaved mega format data.
212 * @throws IOException
215 public void testPrint_interleaved() throws IOException
217 MegaFile testee = new MegaFile(INTERLEAVED, AppletFormatAdapter.PASTE);
218 String printed = testee.print();
219 System.out.println(printed);
220 // normally output should match input
221 // we cheated here with a number of short input lines
222 String expected = "#MEGA" + NEWLINE
223 + "TITLE: Interleaved sequence data" + NEWLINE + NEWLINE
224 + "#U455 ABCDEFKLMNOP" + NEWLINE + "#CPZANT MNOPQRWXYZ"
226 assertEquals("Print format wrong", expected, printed);
230 * Test (parse and) print of interleaved data with no headers (acceptable).
232 * @throws IOException
235 public void testPrint_interleavedNoHeaders() throws IOException
237 MegaFile testee = new MegaFile(INTERLEAVED_NOHEADERS,
238 AppletFormatAdapter.PASTE);
239 String printed = testee.print();
240 System.out.println(printed);
241 // normally output should match input
242 // we cheated here with a number of short input lines
243 String expected = "#MEGA" + NEWLINE + NEWLINE
244 + "#U455 ABCDEFKLMNOP" + NEWLINE + "#CPZANT MNOPQRWXYZ"
246 assertEquals("Print format wrong", expected, printed);
250 * Test (parse and) print of noninterleaved mega format data.
252 * @throws IOException
255 public void testPrint_noninterleaved() throws IOException
257 MegaFile testee = new MegaFile(NONINTERLEAVED,
258 AppletFormatAdapter.PASTE);
259 String printed = testee.print();
260 System.out.println(printed);
261 // normally output should match input
262 // we cheated here with a number of short input lines
263 String expected = "#MEGA" + NEWLINE
264 + "TITLE: Noninterleaved sequence data" + NEWLINE + NEWLINE
265 + "#U455" + NEWLINE + "ABCFEDHIJMNOPQR" + NEWLINE + NEWLINE
266 + "#CPZANT" + NEWLINE + "KLMNOPWXYZCGATC" + NEWLINE;
267 assertEquals("Print format wrong", expected, printed);
271 * Test (parse and) print of interleaved mega format data extending to more
272 * than one line of output.
274 * @throws IOException
277 public void testPrint_interleavedMultiLine() throws IOException
279 MegaFile testee = new MegaFile(INTERLEAVED_LONGERTHAN50,
280 AppletFormatAdapter.PASTE);
281 String printed = testee.print();
282 System.out.println(printed);
283 // first sequence is length 60, second length 120
284 // should be output as 50 + 10 + 0 and as 50 + 50 + 20 character lines
286 String expected = "#MEGA" + NEWLINE
287 + "TITLE: Interleaved sequence data" + NEWLINE + NEWLINE
288 + "#U455 " + THIRTY_CHARS + "01234567890123456789" + NEWLINE
289 + "#CPZANT " + THIRTY_CHARS + "01234567890123456789" + NEWLINE
290 + NEWLINE + "#U455 " + "0123456789" + NEWLINE
291 + "#CPZANT " + THIRTY_CHARS + "01234567890123456789" + NEWLINE
292 + NEWLINE + "#U455 " + NEWLINE + "#CPZANT "
293 + "01234567890123456789" + NEWLINE;
294 assertEquals("Print format wrong", expected, printed);
298 * Test (parse and) print of noninterleaved mega format data extending to more
299 * than one line of output.
301 * @throws IOException
304 public void testPrint_noninterleavedMultiLine() throws IOException
306 MegaFile testee = new MegaFile(NONINTERLEAVED_LONGERTHAN50,
307 AppletFormatAdapter.PASTE);
308 String printed = testee.print();
309 System.out.println(printed);
310 // 60 character sequence should be output as 50 on first line then 10 more
311 String expected = "#MEGA" + NEWLINE + NEWLINE
312 + "#SIXTY" + NEWLINE + THIRTY_CHARS + "01234567890123456789"
313 + NEWLINE + "0123456789" + NEWLINE;
314 assertEquals("Print format wrong", expected, printed);
318 * Test paste / parse of 'fancy format' data.
320 * @throws IOException
323 public void testParse_fancyFormat() throws IOException
325 MegaFile testee = new MegaFile(FANCY_FORMAT, AppletFormatAdapter.PASTE);
326 assertEquals("Title not as expected", "Fancy format data",
327 testee.getAlignmentProperty("Title"));
328 // TODO handle "Title" and "TITLE" uniformly !?!
329 assertEquals("Format property not parsed",
330 "DataType=DNA indel=- CodeTable=Standard;",
331 testee.getAlignmentProperty(MegaFile.PROP_FORMAT));
332 assertEquals("Gene property not parsed",
333 "Adh Property=Coding CodonStart=1;",
334 testee.getAlignmentProperty(MegaFile.PROP_GENE));
335 assertEquals("Not identified as simple format", FileFormat.FANCY,
336 testee.getFileFormat());
337 Vector<SequenceI> seqs = testee.getSeqs();
338 // should be 2 sequences
339 assertEquals("Expected two sequences", 2, seqs.size());
340 // check sequence names correct and order preserved
341 assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
342 assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
344 // check sequence data
345 assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0)
346 .getSequenceAsString());
347 assertEquals("Second sequence data wrong", "MNOPQRWXYZ", seqs.get(1)
348 .getSequenceAsString());
349 assertTrue("File format is not flagged as interleaved",
350 testee.isInterleaved());
352 assertEquals("Description property not parsed",
353 " Line one of description" + NEWLINE
354 + " Line two of description" + NEWLINE,
355 testee.getAlignmentProperty(MegaFile.PROP_DESCRIPTION));
359 public void testParsePropertyValue()
361 assertEquals("Description",
362 MegaFile.parsePropertyValue("Description=Melanogaster")[0]);
363 assertEquals("Melanogaster",
364 MegaFile.parsePropertyValue("Description=Melanogaster")[1]);
366 assertEquals("Description",
367 MegaFile.parsePropertyValue("!Description=Melanogaster")[0]);
368 assertEquals("Melanogaster",
369 MegaFile.parsePropertyValue("!Description=Melanogaster")[1]);
371 assertEquals("Description",
372 MegaFile.parsePropertyValue("Description: Melanogaster")[0]);
373 assertEquals("Melanogaster",
374 MegaFile.parsePropertyValue("Description: Melanogaster")[1]);
376 assertEquals("Description",
377 MegaFile.parsePropertyValue("!Description Melanogaster")[0]);
378 assertEquals("Melanogaster",
379 MegaFile.parsePropertyValue("!Description Melanogaster")[1]);
381 assertEquals("Description",
382 MegaFile.parsePropertyValue("Description")[0]);
383 assertEquals("", MegaFile.parsePropertyValue("Description")[1]);
385 assertEquals("Description",
386 MegaFile.parsePropertyValue("!Description")[0]);
387 assertEquals("", MegaFile.parsePropertyValue("!Description")[1]);