package jalview.io; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceI; import jalview.io.MegaFile.FileFormat; import java.io.IOException; import java.util.Vector; import org.junit.Test; /* * Unit tests for MegaFile - read and write in MEGA format(s). */ public class MegaFileTest { private static final String THIRTY_CHARS = "012345678901234567890123456789"; private static final String NEWLINE = System .getProperty("line.separator"); private static final String INTERLEAVED = "#MEGA" + NEWLINE + "TITLE: Interleaved sequence data" + NEWLINE + NEWLINE + "#U455 ABCDEF" + NEWLINE + "#CPZANT MNOPQR" + NEWLINE + NEWLINE + "#U455 KLMNOP" + NEWLINE + "#CPZANT WXYZ"; private static final String INTERLEAVED_NOHEADERS = "#U455 ABCDEF" + NEWLINE + "#CPZANT MNOPQR" + NEWLINE + NEWLINE + "#U455 KLMNOP" + NEWLINE + "#CPZANT WXYZ"; // interleaved sequences, one with 60 one with 120 characters (on overlong // input lines) private static final String INTERLEAVED_LONGERTHAN50 = "#MEGA" + NEWLINE + "TITLE: Interleaved sequence data" + NEWLINE + NEWLINE + "#U455 " + THIRTY_CHARS + THIRTY_CHARS + NEWLINE + "#CPZANT " + THIRTY_CHARS + THIRTY_CHARS + THIRTY_CHARS + THIRTY_CHARS; private static final String NONINTERLEAVED = "#MEGA" + NEWLINE + "TITLE: Noninterleaved sequence data" + NEWLINE + NEWLINE + "#U455 " + NEWLINE + "ABCFEDHIJ" + NEWLINE + "MNOPQR" + NEWLINE + NEWLINE + "#CPZANT " + NEWLINE + "KLMNOPWXYZ" + NEWLINE + "CGATC"; // Sequence length 60 (split over two lines) private static final String NONINTERLEAVED_LONGERTHAN50 = "#SIXTY" + NEWLINE + THIRTY_CHARS + NEWLINE + THIRTY_CHARS; // this one starts noninterleaved then switches to interleaved private static final String MIXED = "#MEGA" + NEWLINE + "TITLE: This is a mess" + NEWLINE + NEWLINE + "#CPZANT KLMNOPWXYZCGATC" + NEWLINE + NEWLINE + "#U455 " + NEWLINE + "ABCFEDHIJ"; // interleaved with a new sequence appearing in the second block :-O private static final String INTERLEAVED_SEQUENCE_ERROR = "#MEGA" + NEWLINE + "TITLE: Interleaved sequence data" + NEWLINE + NEWLINE + "#U455 ABCDEF" + NEWLINE + "#CPZANT MNOPQR" + NEWLINE + NEWLINE + "#U456 KLMNOP" + NEWLINE; // the 'fancy' format, different header format, bases in triplet groups private static final String FANCY_FORMAT = "#MEGA" + NEWLINE + "!Title Fancy format data" + NEWLINE + "!Format DataType=DNA indel=- CodeTable=Standard;" + NEWLINE + NEWLINE + "!Description" + NEWLINE + " Line one of description" + NEWLINE + " Line two of description" + NEWLINE + NEWLINE + "!Gene=Adh Property=Coding CodonStart=1;" + NEWLINE + "#U455 ABC DEF" + NEWLINE + "#CPZANT MNO PQR" + NEWLINE + NEWLINE + "#U455 KLM NOP" + NEWLINE + "#CPZANT WXY Z"; /** * Test paste of interleaved mega format data. * * @throws IOException */ @Test public void testParse_interleaved() throws IOException { MegaFile testee = new MegaFile(INTERLEAVED, AppletFormatAdapter.PASTE); assertEquals("Title not as expected", "Interleaved sequence data", testee.getAlignmentProperty(MegaFile.PROP_TITLE)); assertEquals("Not identified as simple format", FileFormat.SIMPLE, testee.getFileFormat()); Vector seqs = testee.getSeqs(); // should be 2 sequences assertEquals("Expected two sequences", 2, seqs.size()); // check sequence names correct and order preserved assertEquals("First sequence id wrong", "U455", seqs.get(0).getName()); assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1) .getName()); // check sequence data assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0) .getSequenceAsString()); assertEquals("Second sequence data wrong", "MNOPQRWXYZ", seqs.get(1) .getSequenceAsString()); assertTrue("File format is not flagged as interleaved", testee.isInterleaved()); } /** * Test paste of noninterleaved mega format data. * * @throws IOException */ @Test public void testParse_nonInterleaved() throws IOException { MegaFile testee = new MegaFile(NONINTERLEAVED, AppletFormatAdapter.PASTE); assertEquals("Title not as expected", "Noninterleaved sequence data", testee.getAlignmentProperty(MegaFile.PROP_TITLE)); assertEquals("Not identified as simple format", FileFormat.SIMPLE, testee.getFileFormat()); Vector seqs = testee.getSeqs(); // should be 2 sequences assertEquals("Expected two sequences", 2, seqs.size()); // check sequence names correct and order preserved assertEquals("First sequence id wrong", "U455", seqs.get(0).getName()); assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1) .getName()); // check sequence data assertEquals("First sequence data wrong", "ABCFEDHIJMNOPQR", seqs .get(0).getSequenceAsString()); assertEquals("Second sequence data wrong", "KLMNOPWXYZCGATC", seqs.get(1) .getSequenceAsString()); assertFalse("File format is not flagged as noninterleaved", testee.isInterleaved()); } /** * Test parsing an interleaved file with an extra sequence appearing after the * first block - should fail. */ @Test public void testParse_interleavedExtraSequenceError() { try { new MegaFile(INTERLEAVED_SEQUENCE_ERROR, AppletFormatAdapter.PASTE); fail("Expected extra sequence IOException"); } catch (IOException e) { assertEquals( "Unexpected exception message", "Parse error: misplaced new sequence starting at #U456 KLMNOP", e.getMessage()); } } /** * Test a mixed up file. */ @Test public void testParse_mixedInterleavedNonInterleaved() { try { new MegaFile(MIXED, AppletFormatAdapter.PASTE); fail("Expected mixed content exception"); } catch (IOException e) { assertEquals( "Unexpected exception message", "Parse error: mix of interleaved and noninterleaved detected, at line: ABCFEDHIJ", e.getMessage()); } } @Test public void testGetSequenceId() { assertEquals("AB123", MegaFile.getSequenceId("#AB123 CGATC")); assertEquals("AB123", MegaFile.getSequenceId("#AB123 CGATC")); assertEquals("AB123", MegaFile.getSequenceId("#AB123 CGC TAC")); assertEquals("AB123", MegaFile.getSequenceId("#AB123")); assertNull(MegaFile.getSequenceId("AB123 CTAG")); assertNull(MegaFile.getSequenceId("AB123")); assertNull(MegaFile.getSequenceId("")); assertNull(MegaFile.getSequenceId(null)); } @Test public void testGetMaxIdLength() { SequenceI[] seqs = new Sequence[2]; seqs[0] = new Sequence("Something", "GCATAC"); seqs[1] = new Sequence("SomethingElse", "GCATAC"); assertEquals(13, MegaFile.getMaxIdLength(seqs)); seqs[1] = new Sequence("DNA", "GCATAC"); assertEquals(9, MegaFile.getMaxIdLength(seqs)); } @Test public void testGetMaxSequenceLength() { SequenceI[] seqs = new Sequence[2]; seqs[0] = new Sequence("Seq1", "GCATAC"); seqs[1] = new Sequence("Seq2", "GCATACTAG"); assertEquals(9, MegaFile.getMaxSequenceLength(seqs)); seqs[1] = new Sequence("Seq2", "GCA"); assertEquals(6, MegaFile.getMaxSequenceLength(seqs)); } /** * Test (parse and) print of interleaved mega format data. * * @throws IOException */ @Test public void testPrint_interleaved() throws IOException { MegaFile testee = new MegaFile(INTERLEAVED, AppletFormatAdapter.PASTE); String printed = testee.print(); System.out.println(printed); // normally output should match input // we cheated here with a number of short input lines String expected = "#MEGA" + NEWLINE + "TITLE: Interleaved sequence data" + NEWLINE + NEWLINE + "#U455 ABCDEFKLMNOP" + NEWLINE + "#CPZANT MNOPQRWXYZ" + NEWLINE; assertEquals("Print format wrong", expected, printed); } /** * Test (parse and) print of interleaved data with no headers (acceptable). * * @throws IOException */ @Test public void testPrint_interleavedNoHeaders() throws IOException { MegaFile testee = new MegaFile(INTERLEAVED_NOHEADERS, AppletFormatAdapter.PASTE); String printed = testee.print(); System.out.println(printed); // normally output should match input // we cheated here with a number of short input lines String expected = "#MEGA" + NEWLINE + NEWLINE + "#U455 ABCDEFKLMNOP" + NEWLINE + "#CPZANT MNOPQRWXYZ" + NEWLINE; assertEquals("Print format wrong", expected, printed); } /** * Test (parse and) print of noninterleaved mega format data. * * @throws IOException */ @Test public void testPrint_noninterleaved() throws IOException { MegaFile testee = new MegaFile(NONINTERLEAVED, AppletFormatAdapter.PASTE); String printed = testee.print(); System.out.println(printed); // normally output should match input // we cheated here with a number of short input lines String expected = "#MEGA" + NEWLINE + "TITLE: Noninterleaved sequence data" + NEWLINE + NEWLINE + "#U455" + NEWLINE + "ABCFEDHIJMNOPQR" + NEWLINE + NEWLINE + "#CPZANT" + NEWLINE + "KLMNOPWXYZCGATC" + NEWLINE; assertEquals("Print format wrong", expected, printed); } /** * Test (parse and) print of interleaved mega format data extending to more * than one line of output. * * @throws IOException */ @Test public void testPrint_interleavedMultiLine() throws IOException { MegaFile testee = new MegaFile(INTERLEAVED_LONGERTHAN50, AppletFormatAdapter.PASTE); String printed = testee.print(); System.out.println(printed); // first sequence is length 60, second length 120 // should be output as 50 + 10 + 0 and as 50 + 50 + 20 character lines // respectively String expected = "#MEGA" + NEWLINE + "TITLE: Interleaved sequence data" + NEWLINE + NEWLINE + "#U455 " + THIRTY_CHARS + "01234567890123456789" + NEWLINE + "#CPZANT " + THIRTY_CHARS + "01234567890123456789" + NEWLINE + NEWLINE + "#U455 " + "0123456789" + NEWLINE + "#CPZANT " + THIRTY_CHARS + "01234567890123456789" + NEWLINE + NEWLINE + "#U455 " + NEWLINE + "#CPZANT " + "01234567890123456789" + NEWLINE; assertEquals("Print format wrong", expected, printed); } /** * Test (parse and) print of noninterleaved mega format data extending to more * than one line of output. * * @throws IOException */ @Test public void testPrint_noninterleavedMultiLine() throws IOException { MegaFile testee = new MegaFile(NONINTERLEAVED_LONGERTHAN50, AppletFormatAdapter.PASTE); String printed = testee.print(); System.out.println(printed); // 60 character sequence should be output as 50 on first line then 10 more String expected = "#MEGA" + NEWLINE + NEWLINE + "#SIXTY" + NEWLINE + THIRTY_CHARS + "01234567890123456789" + NEWLINE + "0123456789" + NEWLINE; assertEquals("Print format wrong", expected, printed); } /** * Test paste / parse of 'fancy format' data. * * @throws IOException */ @Test public void testParse_fancyFormat() throws IOException { MegaFile testee = new MegaFile(FANCY_FORMAT, AppletFormatAdapter.PASTE); assertEquals("Title not as expected", "Fancy format data", testee.getAlignmentProperty("Title")); // TODO handle "Title" and "TITLE" uniformly !?! assertEquals("Format property not parsed", "DataType=DNA indel=- CodeTable=Standard;", testee.getAlignmentProperty(MegaFile.PROP_FORMAT)); assertEquals("Gene property not parsed", "Adh Property=Coding CodonStart=1;", testee.getAlignmentProperty(MegaFile.PROP_GENE)); assertEquals("Not identified as simple format", FileFormat.FANCY, testee.getFileFormat()); Vector seqs = testee.getSeqs(); // should be 2 sequences assertEquals("Expected two sequences", 2, seqs.size()); // check sequence names correct and order preserved assertEquals("First sequence id wrong", "U455", seqs.get(0).getName()); assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1) .getName()); // check sequence data assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0) .getSequenceAsString()); assertEquals("Second sequence data wrong", "MNOPQRWXYZ", seqs.get(1) .getSequenceAsString()); assertTrue("File format is not flagged as interleaved", testee.isInterleaved()); assertEquals("Description property not parsed", " Line one of description" + NEWLINE + " Line two of description" + NEWLINE, testee.getAlignmentProperty(MegaFile.PROP_DESCRIPTION)); } @Test public void testParsePropertyValue() { assertEquals("Description", MegaFile.parsePropertyValue("Description=Melanogaster")[0]); assertEquals("Melanogaster", MegaFile.parsePropertyValue("Description=Melanogaster")[1]); assertEquals("Description", MegaFile.parsePropertyValue("!Description=Melanogaster")[0]); assertEquals("Melanogaster", MegaFile.parsePropertyValue("!Description=Melanogaster")[1]); assertEquals("Description", MegaFile.parsePropertyValue("Description: Melanogaster")[0]); assertEquals("Melanogaster", MegaFile.parsePropertyValue("Description: Melanogaster")[1]); assertEquals("Description", MegaFile.parsePropertyValue("!Description Melanogaster")[0]); assertEquals("Melanogaster", MegaFile.parsePropertyValue("!Description Melanogaster")[1]); assertEquals("Description", MegaFile.parsePropertyValue("Description")[0]); assertEquals("", MegaFile.parsePropertyValue("Description")[1]); assertEquals("Description", MegaFile.parsePropertyValue("!Description")[0]); assertEquals("", MegaFile.parsePropertyValue("!Description")[1]); } }