import static org.testng.AssertJUnit.assertTrue;
import static org.testng.AssertJUnit.fail;
+import jalview.datamodel.AlignmentI;
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceI;
*/
public class MegaFileTest
{
- private static final String THIRTY_CHARS = "012345678901234567890123456789";
+ private static final String TWENTY_CHARS = "9876543210abcdefghij";
+
+ private static final String THIRTY_CHARS = "0123456789klmnopqrstABCDEFGHIJ";
//@formatter:off
private static final String INTERLEAVED =
"#MEGA\n"+
"TITLE: Interleaved sequence data\n\n" +
"#U455 ABCDEF\n" +
- "#CPZANT MNOPQR\n\n" + "#U455 KLMNOP\n" +
- "#CPZANT WXYZ";
+ "#CPZANT MNOPQR\n\n" +
+ "#U455 KLMNOP\n" +
+ "#CPZANT WXYZGC";
private static final String INTERLEAVED_NOHEADERS =
"#U455 ABCDEF\n"
- + "#CPZANT MNOPQR\n\n"
+ + "#CPZANT MNOPQR\n\n"
+ "#U455 KLMNOP\n"
- + "#CPZANT WXYZ\n";
+ + "#CPZANT WXYZGC\n";
- // interleaved sequences, one with 60 one with 120 characters (on overlong
- // input lines)
- private static final String INTERLEAVED_LONGERTHAN50 =
+ // interleaved sequences, with 50 residues
+ private static final String INTERLEAVED_50RESIDUES =
"#MEGA\n"
- + "TITLE: Interleaved sequence data\n\n"
- + "#U455 " + THIRTY_CHARS + THIRTY_CHARS + "\n"
- + "#CPZANT "
- + THIRTY_CHARS + THIRTY_CHARS + THIRTY_CHARS + THIRTY_CHARS;
+ + "!TITLE Interleaved sequence data\n\n"
+ + "#U455 " + THIRTY_CHARS + TWENTY_CHARS + "\n"
+ + "#CPZANT " + TWENTY_CHARS + THIRTY_CHARS + "\n";
private static final String NONINTERLEAVED =
"#MEGA\n"
- + "TITLE: Noninterleaved sequence data\n\n"
+ + "!TITLE Noninterleaved sequence data\n\n"
+ "#U455 \n"
+ "ABCFEDHIJ\n"
+ "MNOPQR\n\n"
+ "#CPZANT \n"
+ "KLMNOPWXYZ\n"
+ "CGATC\n";
-
- // Sequence length 60 (split over two lines)
- private static final String NONINTERLEAVED_LONGERTHAN50 =
- "#SIXTY\n" + THIRTY_CHARS + "\n" + THIRTY_CHARS;
-
- // this one starts noninterleaved then switches to interleaved
+
+ // this one starts interleaved then switches to non-interleaved
private static final String MIXED =
"#MEGA\n"
- + "TITLE: This is a mess\n\n" + "#CPZANT KLMNOPWXYZCGATC\n\n"
+ + "!TITLE This is a mess\n\n"
+ + "#CPZANT KLMNOPWXYZCGATC\n\n"
+ "#U455\n "
+ "ABCFEDHIJ\n";
// interleaved with a new sequence appearing in the second block :-O
private static final String INTERLEAVED_SEQUENCE_ERROR =
"#MEGA" + "\n"
- + "TITLE: Interleaved sequence data\n\n"
+ + "!TITLE Interleaved sequence data\n\n"
+ "#U455 ABCDEF\n"
+ "#CPZANT MNOPQR\n\n"
+ "#U456 KLMNOP\n";
// the 'fancy' format, different header format, bases in triplet groups
- private static final String FANCY_FORMAT =
+ private static final String INTERLEAVED_WITH_DESCRIPTION =
"#MEGA\n"
- + "!Title Fancy format data;\n"
- + "!Format DataType=DNA indel=- CodeTable=Standard;\n\n"
+ + "!Title Data with description;\n"
+ + "!Format DataType=DNA indel=- CodeTable=Standard Missing=? MatchChar=.;\n\n"
+ "!Description\n"
+ " Line one of description\n"
+ " Line two of description;\n\n"
- + "!Gene=Adh Property=Coding CodonStart=1;\n"
- + "#U455 ABC DEF\n"
- + "#CPZANT MNO PQR\n\n"
- + "#U455 KLM NOP\n"
- + "#CPZANT WXY Z\n";
-
- // interleaved sequence data for two genes
- private static final String TWO_GENES =
- "#MEGA\n"
- + "!Title Fancy format data;\n"
- + "!Format DataType=DNA indel=- CodeTable=Standard;\n\n"
- + "!Description\n"
- + " Line one of description\n"
- + " Line two of description;\n\n"
- + "!Gene=Adh Property=Coding CodonStart=1;\n"
- + "#U455 ABC DEF\n"
- + "#CPZANT MNO PQR\n\n"
- + "#U455 KLM NOP\n"
- + "#CPZANT WXY Z\n"; //TODO complete
+ + "#U455 CGC GTA\n"
+ + "#CPZANT ATC GGG\n\n"
+ + "#U455 CGA TTT\n"
+ + "#CPZANT CAA TGC\n";
//@formatter:on
// check sequence data
assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0)
.getSequenceAsString());
- assertEquals("Second sequence data wrong", "MNOPQRWXYZ", seqs.get(1)
+ assertEquals("Second sequence data wrong", "MNOPQRWXYZGC", seqs.get(1)
.getSequenceAsString());
assertTrue("File format is not flagged as interleaved",
testee.isInterleaved());
System.out.println(printed);
// normally output should match input
// we cheated here with a number of short input lines
- String expected = "#MEGA\n" + "!TITLE Interleaved sequence data;\n\n"
- + "#U455 ABCDEFKLMNOP\n" + "#CPZANT MNOPQRWXYZ"
+ // nb don't get Title in output if not calling print(AlignmentI)
+ String expected = "#MEGA\n\n" + "#U455 ABCDEF\n"
+ + "#CPZANT MNOPQR\n\n" + "#U455 KLMNOP\n" + "#CPZANT WXYZGC"
+ "\n";
assertEquals("Print format wrong", expected, printed);
}
AppletFormatAdapter.PASTE);
String printed = testee.print();
System.out.println(printed);
- // normally output should match input
- // we cheated here with a number of short input lines
- String expected = "#MEGA\n\n" + "#U455 ABCDEFKLMNOP" + "\n"
- + "#CPZANT MNOPQRWXYZ\n";
- assertEquals("Print format wrong", expected, printed);
+
+ assertEquals("Print format wrong", "#MEGA\n\n" + INTERLEAVED_NOHEADERS,
+ printed);
}
/**
{
MegaFile testee = new MegaFile(NONINTERLEAVED,
AppletFormatAdapter.PASTE);
+ assertEquals(10, testee.getPositionsPerLine());
String printed = testee.print();
System.out.println(printed);
// normally output should match input
// we cheated here with a number of short input lines
- String expected = "#MEGA\n"
- + "!TITLE Noninterleaved sequence data;\n\n"
- + "#U455\n" + "ABCFEDHIJMNOPQR\n\n" + "#CPZANT\n"
- + "KLMNOPWXYZCGATC\n";
+ String expected = "#MEGA\n\n"
+ + "#U455\n" + "ABCFEDHIJM\nNOPQR\n\n"
+ + "#CPZANT\n" + "KLMNOPWXYZ\nCGATC\n";
assertEquals("Print format wrong", expected, printed);
}
@Test(groups = { "Functional" })
public void testPrint_interleavedMultiLine() throws IOException
{
- MegaFile testee = new MegaFile(INTERLEAVED_LONGERTHAN50,
+ MegaFile testee = new MegaFile(INTERLEAVED_50RESIDUES,
AppletFormatAdapter.PASTE);
+ assertEquals(50, testee.getPositionsPerLine());
+ /*
+ * now simulate choosing 20 residues per line on output
+ */
+ testee.setPositionsPerLine(20);
String printed = testee.print();
System.out.println(printed);
- // first sequence is length 60, second length 120
- // should be output as 50 + 10 + 0 and as 50 + 50 + 20 character lines
- // respectively
- String expected = "#MEGA\n" + "!TITLE Interleaved sequence data;\n\n"
- + "#U455 " + THIRTY_CHARS + "01234567890123456789\n"
- + "#CPZANT " + THIRTY_CHARS + "01234567890123456789\n" + "\n"
- + "#U455 " + "0123456789\n" + "#CPZANT " + THIRTY_CHARS
- + "01234567890123456789\n\n" + "#U455 \n" + "#CPZANT "
- + "01234567890123456789"
- + "\n";
+ //@formatter:off
+ //0123456789klmnopqrstABCDEFGHIJ9876543210abcdefghij
+ String expected =
+ "#MEGA\n\n" +
+ "#U455 0123456789 klmnopqrst\n" + // first 20
+ "#CPZANT 9876543210 abcdefghij\n\n" +
+ "#U455 ABCDEFGHIJ 9876543210\n" + // next 20
+ "#CPZANT 0123456789 klmnopqrst\n\n" +
+ "#U455 abcdefghij\n" + // last 10
+ "#CPZANT ABCDEFGHIJ\n";
+ //@formatter:on
assertEquals("Print format wrong", expected, printed);
}
@Test(groups = { "Functional" })
public void testPrint_noninterleavedMultiLine() throws IOException
{
+ final String NONINTERLEAVED_LONGERTHAN50 = "#SIXTY\n" + THIRTY_CHARS
+ + "\n" + TWENTY_CHARS + "9993332221\n";
MegaFile testee = new MegaFile(NONINTERLEAVED_LONGERTHAN50,
AppletFormatAdapter.PASTE);
+ assertEquals(30, testee.getPositionsPerLine());
+ testee.setPositionsPerLine(25);
String printed = testee.print();
- System.out.println(printed);
// 60 character sequence should be output as 50 on first line then 10 more
- String expected = "#MEGA\n\n" + "#SIXTY\n" + THIRTY_CHARS
- + "01234567890123456789\n" + "0123456789\n";
+ String expected = "#MEGA\n\n" + "#SIXTY\n"
+ + "0123456789klmnopqrstABCDE\n" + "FGHIJ9876543210abcdefghij\n"
+ + "9993332221\n";
assertEquals("Print format wrong", expected, printed);
}
/**
- * Test paste / parse of 'fancy format' data.
+ * Test parse of data including description
*
* @throws IOException
*/
@Test(groups = { "Functional" })
- public void testParse_fancyFormat() throws IOException
+ public void testParse_withDescription() throws IOException
{
- MegaFile testee = new MegaFile(FANCY_FORMAT, AppletFormatAdapter.PASTE);
- assertEquals("Title not as expected", "Fancy format data",
+ MegaFile testee = new MegaFile(INTERLEAVED_WITH_DESCRIPTION,
+ AppletFormatAdapter.PASTE);
+ assertEquals("Title not as expected", "Data with description",
testee.getAlignmentProperty(MegaFile.PROP_TITLE));
- // assertEquals("Format property not parsed",
- // "DataType=DNA indel=- CodeTable=Standard;",
- // testee.getAlignmentProperty(MegaFile.PROP_FORMAT));
Vector<SequenceI> seqs = testee.getSeqs();
// should be 2 sequences
assertEquals("Expected two sequences", 2, seqs.size());
assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
.getName());
// check sequence data
- assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0)
+ assertEquals("First sequence data wrong", "CGCGTACGATTT", seqs.get(0)
.getSequenceAsString());
- assertEquals("Second sequence data wrong", "MNOPQRWXYZ", seqs.get(1)
+ assertEquals("Second sequence data wrong", "ATCGGGCAATGC", seqs.get(1)
.getSequenceAsString());
assertTrue("File format is not flagged as interleaved",
testee.isInterleaved());
- assertEquals("Description property not parsed",
- " Line one of description\n"
- + " Line two of description\n",
+ assertEquals(
+ "Description property not parsed",
+ " Line one of description\n" + " Line two of description",
testee.getAlignmentProperty(MegaFile.PROP_DESCRIPTION));
}
assertEquals("Mega", MegaFile.getValue("!Name \t\t Mega; "));
assertEquals("", MegaFile.getValue("Name"));
}
+
+ /**
+ * Test reading a MEGA file to an alignment then writing it out in MEGA
+ * format. Verify the output is (functionally) the same as the input.
+ *
+ * @throws IOException
+ */
+ @Test(groups = "Functional")
+ public void testRoundTrip_Interleaved() throws IOException
+ {
+ AppletFormatAdapter fa = new AppletFormatAdapter();
+ AlignmentI al = fa.readFile(INTERLEAVED_WITH_DESCRIPTION,
+ AppletFormatAdapter.PASTE, "MEGA");
+ MegaFile output = new MegaFile();
+ String formatted = output.print(al);
+ //@formatter:off
+ String expected =
+ "#MEGA\n!Title Data with description;\n" +
+ "!Description Line one of description\n" +
+ " Line two of description;\n" +
+ "!Format\n" +
+ " DataType=DNA CodeTable=Standard\n" +
+ " NSeqs=2 NSites=12\n" +
+ " Indel=- Identical=. Missing=?;\n\n" +
+ "#U455 CGC GTA\n" +
+ "#CPZANT ATC GGG\n\n" +
+ "#U455 CGA TTT\n" +
+ "#CPZANT CAA TGC\n";
+ //@formatter:on
+ assertEquals("Roundtrip didn't match", expected,
+ formatted);
+ }
}