--- /dev/null
+package jalview.io;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceI;
+import jalview.io.MegaFile.FileFormat;
+
+import java.io.IOException;
+import java.util.Vector;
+
+import org.junit.Test;
+
+/*
+ * Unit tests for MegaFile - read and write in MEGA format(s).
+ */
+public class MegaFileTest
+{
+ private static final String THIRTY_CHARS = "012345678901234567890123456789";
+
+ private static final String NEWLINE = System
+ .getProperty("line.separator");
+
+ private static final String INTERLEAVED = "#MEGA" + NEWLINE
+ + "TITLE: Interleaved sequence data" + NEWLINE + NEWLINE
+ + "#U455 ABCDEF" + NEWLINE + "#CPZANT MNOPQR" + NEWLINE
+ + NEWLINE + "#U455 KLMNOP" + NEWLINE + "#CPZANT WXYZ";
+
+ private static final String INTERLEAVED_NOHEADERS = "#U455 ABCDEF"
+ + NEWLINE + "#CPZANT MNOPQR" + NEWLINE + NEWLINE
+ + "#U455 KLMNOP" + NEWLINE + "#CPZANT WXYZ";
+
+ // interleaved sequences, one with 60 one with 120 characters (on overlong
+ // input lines)
+ private static final String INTERLEAVED_LONGERTHAN50 = "#MEGA" + NEWLINE
+ + "TITLE: Interleaved sequence data" + NEWLINE + NEWLINE
+ + "#U455 " + THIRTY_CHARS + THIRTY_CHARS + NEWLINE + "#CPZANT "
+ + THIRTY_CHARS + THIRTY_CHARS + THIRTY_CHARS + THIRTY_CHARS;
+
+ private static final String NONINTERLEAVED = "#MEGA" + NEWLINE
+ + "TITLE: Noninterleaved sequence data" + NEWLINE + NEWLINE
+ + "#U455 " + NEWLINE + "ABCFEDHIJ" + NEWLINE + "MNOPQR"
+ + NEWLINE + NEWLINE
+ + "#CPZANT " + NEWLINE + "KLMNOPWXYZ" + NEWLINE + "CGATC";
+
+ // Sequence length 60 (split over two lines)
+ private static final String NONINTERLEAVED_LONGERTHAN50 = "#SIXTY"
+ + NEWLINE + THIRTY_CHARS + NEWLINE + THIRTY_CHARS;
+
+ // this one starts noninterleaved then switches to interleaved
+ private static final String MIXED = "#MEGA" + NEWLINE
+ + "TITLE: This is a mess" + NEWLINE + NEWLINE
+ + "#CPZANT KLMNOPWXYZCGATC" + NEWLINE + NEWLINE + "#U455 "
+ + NEWLINE + "ABCFEDHIJ";
+
+ // interleaved with a new sequence appearing in the second block :-O
+ private static final String INTERLEAVED_SEQUENCE_ERROR = "#MEGA"
+ + NEWLINE + "TITLE: Interleaved sequence data" + NEWLINE
+ + NEWLINE + "#U455 ABCDEF" + NEWLINE + "#CPZANT MNOPQR"
+ + NEWLINE + NEWLINE + "#U456 KLMNOP" + NEWLINE;
+
+ // the 'fancy' format, different header format, bases in triplet groups
+ private static final String FANCY_FORMAT = "#MEGA" + NEWLINE
+ + "!Title Fancy format data" + NEWLINE
+ + "!Format DataType=DNA indel=- CodeTable=Standard;" + NEWLINE
+ + NEWLINE
+ + "!Description" + NEWLINE + " Line one of description" + NEWLINE + " Line two of description"
+ + NEWLINE + NEWLINE
+ + "!Gene=Adh Property=Coding CodonStart=1;" + NEWLINE
+ + "#U455 ABC DEF" + NEWLINE + "#CPZANT MNO PQR" + NEWLINE
+ + NEWLINE + "#U455 KLM NOP" + NEWLINE + "#CPZANT WXY Z";
+
+ /**
+ * Test paste of interleaved mega format data.
+ *
+ * @throws IOException
+ */
+ @Test
+ public void testParse_interleaved() throws IOException
+ {
+ MegaFile testee = new MegaFile(INTERLEAVED, AppletFormatAdapter.PASTE);
+ assertEquals("Title not as expected", "Interleaved sequence data",
+ testee.getAlignmentProperty(MegaFile.PROP_TITLE));
+ assertEquals("Not identified as simple format", FileFormat.SIMPLE,
+ testee.getFileFormat());
+ Vector<SequenceI> seqs = testee.getSeqs();
+ // should be 2 sequences
+ assertEquals("Expected two sequences", 2, seqs.size());
+ // check sequence names correct and order preserved
+ assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
+ assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
+ .getName());
+ // check sequence data
+ assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0)
+ .getSequenceAsString());
+ assertEquals("Second sequence data wrong", "MNOPQRWXYZ", seqs.get(1)
+ .getSequenceAsString());
+ assertTrue("File format is not flagged as interleaved",
+ testee.isInterleaved());
+ }
+
+ /**
+ * Test paste of noninterleaved mega format data.
+ *
+ * @throws IOException
+ */
+ @Test
+ public void testParse_nonInterleaved() throws IOException
+ {
+ MegaFile testee = new MegaFile(NONINTERLEAVED, AppletFormatAdapter.PASTE);
+ assertEquals("Title not as expected", "Noninterleaved sequence data",
+ testee.getAlignmentProperty(MegaFile.PROP_TITLE));
+ assertEquals("Not identified as simple format", FileFormat.SIMPLE,
+ testee.getFileFormat());
+ Vector<SequenceI> seqs = testee.getSeqs();
+ // should be 2 sequences
+ assertEquals("Expected two sequences", 2, seqs.size());
+ // check sequence names correct and order preserved
+ assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
+ assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
+ .getName());
+ // check sequence data
+ assertEquals("First sequence data wrong", "ABCFEDHIJMNOPQR", seqs
+ .get(0).getSequenceAsString());
+ assertEquals("Second sequence data wrong", "KLMNOPWXYZCGATC", seqs.get(1)
+ .getSequenceAsString());
+ assertFalse("File format is not flagged as noninterleaved",
+ testee.isInterleaved());
+ }
+
+ /**
+ * Test parsing an interleaved file with an extra sequence appearing after the
+ * first block - should fail.
+ */
+ @Test
+ public void testParse_interleavedExtraSequenceError()
+ {
+ try
+ {
+ new MegaFile(INTERLEAVED_SEQUENCE_ERROR, AppletFormatAdapter.PASTE);
+ fail("Expected extra sequence IOException");
+ } catch (IOException e)
+ {
+ assertEquals(
+ "Unexpected exception message",
+ "Parse error: misplaced new sequence starting at #U456 KLMNOP",
+ e.getMessage());
+ }
+ }
+
+ /**
+ * Test a mixed up file.
+ */
+ @Test
+ public void testParse_mixedInterleavedNonInterleaved()
+ {
+ try
+ {
+ new MegaFile(MIXED, AppletFormatAdapter.PASTE);
+ fail("Expected mixed content exception");
+ } catch (IOException e)
+ {
+ assertEquals(
+ "Unexpected exception message",
+ "Parse error: mix of interleaved and noninterleaved detected, at line: ABCFEDHIJ",
+ e.getMessage());
+ }
+
+ }
+
+ @Test
+ public void testGetSequenceId()
+ {
+ assertEquals("AB123", MegaFile.getSequenceId("#AB123 CGATC"));
+ assertEquals("AB123", MegaFile.getSequenceId("#AB123 CGATC"));
+ assertEquals("AB123", MegaFile.getSequenceId("#AB123 CGC TAC"));
+ assertEquals("AB123", MegaFile.getSequenceId("#AB123"));
+ assertNull(MegaFile.getSequenceId("AB123 CTAG"));
+ assertNull(MegaFile.getSequenceId("AB123"));
+ assertNull(MegaFile.getSequenceId(""));
+ assertNull(MegaFile.getSequenceId(null));
+ }
+
+ @Test
+ public void testGetMaxIdLength()
+ {
+ SequenceI[] seqs = new Sequence[2];
+ seqs[0] = new Sequence("Something", "GCATAC");
+ seqs[1] = new Sequence("SomethingElse", "GCATAC");
+ assertEquals(13, MegaFile.getMaxIdLength(seqs));
+ seqs[1] = new Sequence("DNA", "GCATAC");
+ assertEquals(9, MegaFile.getMaxIdLength(seqs));
+ }
+
+ @Test
+ public void testGetMaxSequenceLength()
+ {
+ SequenceI[] seqs = new Sequence[2];
+ seqs[0] = new Sequence("Seq1", "GCATAC");
+ seqs[1] = new Sequence("Seq2", "GCATACTAG");
+ assertEquals(9, MegaFile.getMaxSequenceLength(seqs));
+ seqs[1] = new Sequence("Seq2", "GCA");
+ assertEquals(6, MegaFile.getMaxSequenceLength(seqs));
+ }
+
+ /**
+ * Test (parse and) print of interleaved mega format data.
+ *
+ * @throws IOException
+ */
+ @Test
+ public void testPrint_interleaved() throws IOException
+ {
+ MegaFile testee = new MegaFile(INTERLEAVED, AppletFormatAdapter.PASTE);
+ String printed = testee.print();
+ System.out.println(printed);
+ // normally output should match input
+ // we cheated here with a number of short input lines
+ String expected = "#MEGA" + NEWLINE
+ + "TITLE: Interleaved sequence data" + NEWLINE + NEWLINE
+ + "#U455 ABCDEFKLMNOP" + NEWLINE + "#CPZANT MNOPQRWXYZ"
+ + NEWLINE;
+ assertEquals("Print format wrong", expected, printed);
+ }
+
+ /**
+ * Test (parse and) print of interleaved data with no headers (acceptable).
+ *
+ * @throws IOException
+ */
+ @Test
+ public void testPrint_interleavedNoHeaders() throws IOException
+ {
+ MegaFile testee = new MegaFile(INTERLEAVED_NOHEADERS,
+ AppletFormatAdapter.PASTE);
+ String printed = testee.print();
+ System.out.println(printed);
+ // normally output should match input
+ // we cheated here with a number of short input lines
+ String expected = "#MEGA" + NEWLINE + NEWLINE
+ + "#U455 ABCDEFKLMNOP" + NEWLINE + "#CPZANT MNOPQRWXYZ"
+ + NEWLINE;
+ assertEquals("Print format wrong", expected, printed);
+ }
+
+ /**
+ * Test (parse and) print of noninterleaved mega format data.
+ *
+ * @throws IOException
+ */
+ @Test
+ public void testPrint_noninterleaved() throws IOException
+ {
+ MegaFile testee = new MegaFile(NONINTERLEAVED,
+ AppletFormatAdapter.PASTE);
+ String printed = testee.print();
+ System.out.println(printed);
+ // normally output should match input
+ // we cheated here with a number of short input lines
+ String expected = "#MEGA" + NEWLINE
+ + "TITLE: Noninterleaved sequence data" + NEWLINE + NEWLINE
+ + "#U455" + NEWLINE + "ABCFEDHIJMNOPQR" + NEWLINE + NEWLINE
+ + "#CPZANT" + NEWLINE + "KLMNOPWXYZCGATC" + NEWLINE;
+ assertEquals("Print format wrong", expected, printed);
+ }
+
+ /**
+ * Test (parse and) print of interleaved mega format data extending to more
+ * than one line of output.
+ *
+ * @throws IOException
+ */
+ @Test
+ public void testPrint_interleavedMultiLine() throws IOException
+ {
+ MegaFile testee = new MegaFile(INTERLEAVED_LONGERTHAN50,
+ AppletFormatAdapter.PASTE);
+ String printed = testee.print();
+ System.out.println(printed);
+ // first sequence is length 60, second length 120
+ // should be output as 50 + 10 + 0 and as 50 + 50 + 20 character lines
+ // respectively
+ String expected = "#MEGA" + NEWLINE
+ + "TITLE: Interleaved sequence data" + NEWLINE + NEWLINE
+ + "#U455 " + THIRTY_CHARS + "01234567890123456789" + NEWLINE
+ + "#CPZANT " + THIRTY_CHARS + "01234567890123456789" + NEWLINE
+ + NEWLINE + "#U455 " + "0123456789" + NEWLINE
+ + "#CPZANT " + THIRTY_CHARS + "01234567890123456789" + NEWLINE
+ + NEWLINE + "#U455 " + NEWLINE + "#CPZANT "
+ + "01234567890123456789" + NEWLINE;
+ assertEquals("Print format wrong", expected, printed);
+ }
+
+ /**
+ * Test (parse and) print of noninterleaved mega format data extending to more
+ * than one line of output.
+ *
+ * @throws IOException
+ */
+ @Test
+ public void testPrint_noninterleavedMultiLine() throws IOException
+ {
+ MegaFile testee = new MegaFile(NONINTERLEAVED_LONGERTHAN50,
+ AppletFormatAdapter.PASTE);
+ String printed = testee.print();
+ System.out.println(printed);
+ // 60 character sequence should be output as 50 on first line then 10 more
+ String expected = "#MEGA" + NEWLINE + NEWLINE
+ + "#SIXTY" + NEWLINE + THIRTY_CHARS + "01234567890123456789"
+ + NEWLINE + "0123456789" + NEWLINE;
+ assertEquals("Print format wrong", expected, printed);
+ }
+
+ /**
+ * Test paste / parse of 'fancy format' data.
+ *
+ * @throws IOException
+ */
+ @Test
+ public void testParse_fancyFormat() throws IOException
+ {
+ MegaFile testee = new MegaFile(FANCY_FORMAT, AppletFormatAdapter.PASTE);
+ assertEquals("Title not as expected", "Fancy format data",
+ testee.getAlignmentProperty("Title"));
+ // TODO handle "Title" and "TITLE" uniformly !?!
+ assertEquals("Format property not parsed",
+ "DataType=DNA indel=- CodeTable=Standard;",
+ testee.getAlignmentProperty(MegaFile.PROP_FORMAT));
+ assertEquals("Gene property not parsed",
+ "Adh Property=Coding CodonStart=1;",
+ testee.getAlignmentProperty(MegaFile.PROP_GENE));
+ assertEquals("Not identified as simple format", FileFormat.FANCY,
+ testee.getFileFormat());
+ Vector<SequenceI> seqs = testee.getSeqs();
+ // should be 2 sequences
+ assertEquals("Expected two sequences", 2, seqs.size());
+ // check sequence names correct and order preserved
+ assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
+ assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
+ .getName());
+ // check sequence data
+ assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0)
+ .getSequenceAsString());
+ assertEquals("Second sequence data wrong", "MNOPQRWXYZ", seqs.get(1)
+ .getSequenceAsString());
+ assertTrue("File format is not flagged as interleaved",
+ testee.isInterleaved());
+
+ assertEquals("Description property not parsed",
+ " Line one of description" + NEWLINE
+ + " Line two of description" + NEWLINE,
+ testee.getAlignmentProperty(MegaFile.PROP_DESCRIPTION));
+ }
+
+ @Test
+ public void testParsePropertyValue()
+ {
+ assertEquals("Description",
+ MegaFile.parsePropertyValue("Description=Melanogaster")[0]);
+ assertEquals("Melanogaster",
+ MegaFile.parsePropertyValue("Description=Melanogaster")[1]);
+
+ assertEquals("Description",
+ MegaFile.parsePropertyValue("!Description=Melanogaster")[0]);
+ assertEquals("Melanogaster",
+ MegaFile.parsePropertyValue("!Description=Melanogaster")[1]);
+
+ assertEquals("Description",
+ MegaFile.parsePropertyValue("Description: Melanogaster")[0]);
+ assertEquals("Melanogaster",
+ MegaFile.parsePropertyValue("Description: Melanogaster")[1]);
+
+ assertEquals("Description",
+ MegaFile.parsePropertyValue("!Description Melanogaster")[0]);
+ assertEquals("Melanogaster",
+ MegaFile.parsePropertyValue("!Description Melanogaster")[1]);
+
+ assertEquals("Description",
+ MegaFile.parsePropertyValue("Description")[0]);
+ assertEquals("", MegaFile.parsePropertyValue("Description")[1]);
+
+ assertEquals("Description",
+ MegaFile.parsePropertyValue("!Description")[0]);
+ assertEquals("", MegaFile.parsePropertyValue("!Description")[1]);
+ }
+}