3 import static org.testng.AssertJUnit.assertEquals;
4 import static org.testng.AssertJUnit.assertFalse;
5 import static org.testng.AssertJUnit.assertNull;
6 import static org.testng.AssertJUnit.assertTrue;
7 import static org.testng.AssertJUnit.fail;
9 import jalview.datamodel.AlignmentAnnotation;
10 import jalview.datamodel.AlignmentI;
11 import jalview.datamodel.Annotation;
12 import jalview.datamodel.Sequence;
13 import jalview.datamodel.SequenceFeature;
14 import jalview.datamodel.SequenceI;
16 import java.io.IOException;
17 import java.util.List;
18 import java.util.Vector;
20 import org.testng.annotations.Test;
23 * Unit tests for MegaFile - read and write in MEGA format(s).
25 public class MegaFileTest
27 private static final String TWENTY_CHARS = "9876543210abcdefghij";
29 private static final String THIRTY_CHARS = "0123456789klmnopqrstABCDEFGHIJ";
32 private static final String INTERLEAVED =
34 "TITLE: Interleaved sequence data\n\n" +
36 "#CPZANT MNOPQR\n\n" +
40 private static final String INTERLEAVED_NOHEADERS =
42 + "#CPZANT MNOPQR\n\n"
46 // interleaved sequences, with 50 residues
47 private static final String INTERLEAVED_50RESIDUES =
49 + "!TITLE Interleaved sequence data\n\n"
50 + "#U455 " + THIRTY_CHARS + TWENTY_CHARS + "\n"
51 + "#CPZANT " + TWENTY_CHARS + THIRTY_CHARS + "\n";
53 private static final String NONINTERLEAVED =
55 + "!TITLE Noninterleaved sequence data\n\n"
63 // this one starts interleaved then switches to non-interleaved
64 private static final String MIXED =
66 + "!TITLE This is a mess\n\n"
67 + "#CPZANT KLMNOPWXYZCGATC\n\n"
71 // interleaved with a new sequence appearing in the second block :-O
72 private static final String INTERLEAVED_SEQUENCE_ERROR =
74 + "!TITLE Interleaved sequence data\n\n"
76 + "#CPZANT MNOPQR\n\n"
79 // interleaved with description, bases/gaps in triplet groups
80 private static final String INTERLEAVED_WITH_DESCRIPTION =
82 + "!Title Data with description;\n"
83 + "!Format DataType=DNA indel=-\tCodeTable=Standard Missing=? MatchChar=.;\n\n"
85 + " Line one of description\n"
86 + " Line two of description;\n\n"
88 + "#CPZANT ATC -G-\n\n"
90 + "#CPZANT CA- -GC\n";
95 * Test parse of interleaved mega format data.
99 @Test(groups = { "Functional" })
100 public void testParse_interleaved() throws IOException
102 MegaFile testee = new MegaFile(INTERLEAVED, AppletFormatAdapter.PASTE);
103 assertEquals("Title not as expected", "Interleaved sequence data",
104 testee.getAlignmentProperty(MegaFile.PROP_TITLE));
105 Vector<SequenceI> seqs = testee.getSeqs();
106 // should be 2 sequences
107 assertEquals("Expected two sequences", 2, seqs.size());
108 // check sequence names correct and order preserved
109 assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
110 assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
112 // check sequence data
113 assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0)
114 .getSequenceAsString());
115 assertEquals("Second sequence data wrong", "MNOPQRWXYZGC", seqs.get(1)
116 .getSequenceAsString());
117 assertTrue("File format is not flagged as interleaved",
118 testee.isInterleaved());
122 * Test parse of noninterleaved mega format data.
124 * @throws IOException
126 @Test(groups = { "Functional" })
127 public void testParse_nonInterleaved() throws IOException
129 MegaFile testee = new MegaFile(NONINTERLEAVED,
130 AppletFormatAdapter.PASTE);
131 assertEquals("Title not as expected", "Noninterleaved sequence data",
132 testee.getAlignmentProperty(MegaFile.PROP_TITLE));
133 Vector<SequenceI> seqs = testee.getSeqs();
134 // should be 2 sequences
135 assertEquals("Expected two sequences", 2, seqs.size());
136 // check sequence names correct and order preserved
137 assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
138 assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
140 // check sequence data
141 assertEquals("First sequence data wrong", "ABCFEDHIJMNOPQR", seqs
142 .get(0).getSequenceAsString());
143 assertEquals("Second sequence data wrong", "KLMNOPWXYZCGATC",
144 seqs.get(1).getSequenceAsString());
145 assertFalse("File format is not flagged as noninterleaved",
146 testee.isInterleaved());
150 * Test parsing an interleaved file with an extra sequence appearing after the
151 * first block - should fail.
153 @Test(groups = { "Functional" })
154 public void testParse_interleavedExtraSequenceError()
158 new MegaFile(INTERLEAVED_SEQUENCE_ERROR, AppletFormatAdapter.PASTE);
159 fail("Expected extra sequence IOException");
160 } catch (IOException e)
163 "Unexpected exception message",
164 "Parse error: misplaced new sequence starting at #U456 KLMNOP",
170 * Test a mixed up file.
172 @Test(groups = { "Functional" })
173 public void testParse_mixedInterleavedNonInterleaved()
177 new MegaFile(MIXED, AppletFormatAdapter.PASTE);
178 fail("Expected mixed content exception");
179 } catch (IOException e)
182 "Unexpected exception message",
183 "Parse error: interleaved was true but now seems to be false, at line: ABCFEDHIJ",
189 @Test(groups = { "Functional" })
190 public void testGetSequenceId()
192 assertEquals("AB123", MegaFile.getSequenceId("#AB123 CGATC"));
193 assertEquals("AB123", MegaFile.getSequenceId("#AB123 CGATC"));
194 assertEquals("AB123", MegaFile.getSequenceId("#AB123 CGC TAC"));
195 assertEquals("AB123", MegaFile.getSequenceId("#AB123"));
196 assertNull(MegaFile.getSequenceId("AB123 CTAG"));
197 assertNull(MegaFile.getSequenceId("AB123"));
198 assertNull(MegaFile.getSequenceId(""));
199 assertNull(MegaFile.getSequenceId(null));
202 @Test(groups = { "Functional" })
203 public void testGetMaxIdLength()
205 SequenceI[] seqs = new Sequence[2];
206 seqs[0] = new Sequence("Something", "GCATAC");
207 seqs[1] = new Sequence("SomethingElse", "GCATAC");
208 assertEquals(13, MegaFile.getMaxIdLength(seqs));
209 seqs[1] = new Sequence("DNA", "GCATAC");
210 assertEquals(9, MegaFile.getMaxIdLength(seqs));
213 @Test(groups = { "Functional" })
214 public void testGetMaxSequenceLength()
216 SequenceI[] seqs = new Sequence[2];
217 seqs[0] = new Sequence("Seq1", "GCATAC");
218 seqs[1] = new Sequence("Seq2", "GCATACTAG");
219 assertEquals(9, MegaFile.getMaxSequenceLength(seqs));
220 seqs[1] = new Sequence("Seq2", "GCA");
221 assertEquals(6, MegaFile.getMaxSequenceLength(seqs));
225 * Test (parse and) print of interleaved mega format data.
227 * @throws IOException
229 @Test(groups = { "Functional" })
230 public void testPrint_interleaved() throws IOException
232 MegaFile testee = new MegaFile(INTERLEAVED, AppletFormatAdapter.PASTE);
233 String printed = testee.print();
234 System.out.println(printed);
235 // normally output should match input
236 // we cheated here with a number of short input lines
237 // nb don't get Title in output if not calling print(AlignmentI)
241 "#U455 ABCDEF [6]\n" +
242 "#CPZANT MNOPQR [6]\n\n" +
243 "#U455 KLMNOP [12]\n" +
244 "#CPZANT WXYZGC [12]\n";
246 assertEquals("Print format wrong", expected, printed);
250 * Test (parse and) print of interleaved data with no headers (acceptable).
252 * @throws IOException
254 @Test(groups = { "Functional" })
255 public void testPrint_interleavedNoHeaders() throws IOException
257 MegaFile testee = new MegaFile(INTERLEAVED_NOHEADERS,
258 AppletFormatAdapter.PASTE);
259 String printed = testee.print();
260 System.out.println(printed);
263 assertEquals("Print format wrong",
264 "#MEGA\n\n" + "#U455 ABCDEF [6]\n"
265 + "#CPZANT MNOPQR [6]\n\n"
266 + "#U455 KLMNOP [12]\n"
267 + "#CPZANT WXYZGC [12]\n",
273 * Test (parse and) print of noninterleaved mega format data.
275 * @throws IOException
277 @Test(groups = { "Functional" })
278 public void testPrint_noninterleaved() throws IOException
280 MegaFile testee = new MegaFile(NONINTERLEAVED,
281 AppletFormatAdapter.PASTE);
282 assertEquals(10, testee.getPositionsPerLine());
283 String printed = testee.print();
284 System.out.println(printed);
285 // normally output should match input
286 // we cheated here with a number of short input lines
287 String expected = "#MEGA\n\n" + "#U455\n"
288 + "ABCFEDHIJM [10]\nNOPQR [15]\n\n" + "#CPZANT\n"
289 + "KLMNOPWXYZ [10]\nCGATC [15]\n";
290 assertEquals("Print format wrong", expected, printed);
294 * Test (parse and) print of interleaved mega format data extending to more
295 * than one line of output.
297 * @throws IOException
299 @Test(groups = { "Functional" })
300 public void testPrint_interleavedMultiLine() throws IOException
302 MegaFile testee = new MegaFile(INTERLEAVED_50RESIDUES,
303 AppletFormatAdapter.PASTE);
304 assertEquals(50, testee.getPositionsPerLine());
306 * now simulate choosing 20 residues per line on output
308 testee.setPositionsPerLine(20);
309 String printed = testee.print();
310 System.out.println(printed);
314 "#U455 0123456789 klmnopqrst [20]\n" + // first 20
315 "#CPZANT 9876543210 abcdefghij [20]\n\n" +
316 "#U455 ABCDEFGHIJ 9876543210 [40]\n" + // next 20
317 "#CPZANT 0123456789 klmnopqrst [40]\n\n" +
318 "#U455 abcdefghij [50]\n" + // last 10
319 "#CPZANT ABCDEFGHIJ [50]\n";
321 assertEquals("Print format wrong", expected, printed);
325 * Test (parse and) print of noninterleaved mega format data extending to more
326 * than one line of output.
328 * @throws IOException
330 @Test(groups = { "Functional" })
331 public void testPrint_noninterleavedMultiLine() throws IOException
333 final String NONINTERLEAVED_LONGERTHAN50 = "#SIXTY\n" + THIRTY_CHARS
334 + "\n" + TWENTY_CHARS + "9993332221\n";
335 MegaFile testee = new MegaFile(NONINTERLEAVED_LONGERTHAN50,
336 AppletFormatAdapter.PASTE);
337 assertEquals(30, testee.getPositionsPerLine());
338 testee.setPositionsPerLine(25);
339 String printed = testee.print();
342 * 25 positions per line is rounded down to 20 (two blocks of 10)
344 String expected = "#MEGA\n\n" + "#SIXTY\n"
345 + "0123456789 klmnopqrst [20]\n"
346 + "ABCDEFGHIJ 9876543210 [40]\n"
347 + "abcdefghij 9993332221 [60]\n";
348 assertEquals("Print format wrong", expected, printed);
352 * Test parse of data including description
354 * @throws IOException
356 @Test(groups = { "Functional" })
357 public void testParse_withDescription() throws IOException
359 MegaFile testee = new MegaFile(INTERLEAVED_WITH_DESCRIPTION,
360 AppletFormatAdapter.PASTE);
361 assertEquals("Title not as expected", "Data with description",
362 testee.getAlignmentProperty(MegaFile.PROP_TITLE));
364 Vector<SequenceI> seqs = testee.getSeqs();
365 // should be 2 sequences
366 assertEquals("Expected two sequences", 2, seqs.size());
367 // check sequence names correct and order preserved
368 assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
369 assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
371 // check sequence data
372 assertEquals("First sequence data wrong", "C--GTACGA--T", seqs.get(0)
373 .getSequenceAsString());
374 assertEquals("Second sequence data wrong", "ATC-G-CA--GC", seqs.get(1)
375 .getSequenceAsString());
376 assertTrue("File format is not flagged as interleaved",
377 testee.isInterleaved());
380 "Description property not parsed",
381 " Line one of description\n" + " Line two of description",
382 testee.getAlignmentProperty(MegaFile.PROP_DESCRIPTION));
385 @Test(groups = { "Functional" })
386 public void testGetNonCommentContent() throws FileFormatException
388 assertEquals("abcde", MegaFile.getNonCommentContent("abcde", 0));
389 assertEquals("CGT ACG GAC ",
390 MegaFile.getNonCommentContent("CGT ACG GAC [9]", 0));
391 assertEquals("", MegaFile.getNonCommentContent("abcde", 1));
392 assertEquals(" abcde",
393 MegaFile.getNonCommentContent("and others ] abcde", 1));
394 assertEquals(" abcde", MegaFile.getNonCommentContent(
395 "and others [including refs] ] abcde", 1));
396 assertEquals(" x ] abcde",
397 MegaFile.getNonCommentContent("and others ] x ] abcde", 1));
400 @Test(groups = { "Functional" })
401 public void testCommentDepth() throws FileFormatException
403 assertEquals(0, MegaFile.commentDepth("abcde", 0));
404 assertEquals(1, MegaFile.commentDepth("abc[de", 0));
405 assertEquals(3, MegaFile.commentDepth("ab[c[de", 1));
406 assertEquals(1, MegaFile.commentDepth("ab]c[d]e[f", 1));
407 assertEquals(0, MegaFile.commentDepth("a]b[c]d]e", 1));
410 @Test(groups = { "Functional" })
411 public void testGetValue()
413 assertEquals("Mega", MegaFile.getValue("Name=Mega"));
414 assertEquals("Mega", MegaFile.getValue("Name =Mega"));
415 assertEquals("Mega", MegaFile.getValue(" Name = Mega "));
416 assertEquals("Mega", MegaFile.getValue("Name = Mega; "));
417 assertEquals("Mega", MegaFile.getValue(" Name = Mega ; "));
418 assertEquals("Mega", MegaFile.getValue("\t!Name \t= \tMega ; "));
419 assertEquals("Mega", MegaFile.getValue("!Name \t\t Mega; "));
420 assertEquals("", MegaFile.getValue("Name"));
424 * Test reading a MEGA file to an alignment then writing it out in MEGA
425 * format. Verify the output is (functionally) the same as the input.
427 * @throws IOException
429 @Test(groups = "Functional")
430 public void testRoundTrip_Interleaved() throws IOException
432 AppletFormatAdapter fa = new AppletFormatAdapter();
433 AlignmentI al = fa.readFile(INTERLEAVED_WITH_DESCRIPTION,
434 AppletFormatAdapter.PASTE, "MEGA");
435 MegaFile output = new MegaFile();
436 String formatted = output.print(al);
439 "#MEGA\n!Title Data with description;\n" +
441 " Line one of description\n" +
442 " Line two of description;\n" +
444 " DataType=DNA CodeTable=Standard\n" +
445 " NSeqs=2 NSites=12\n" + // NSites includes gaps
446 " Indel=- Identical=. Missing=?;\n\n" +
447 "#U455 C-- GTA [6]\n" +
448 "#CPZANT ATC -G- [6]\n\n" +
449 "#U455 CGA --T [12]\n" +
450 "#CPZANT CA- -GC [12]\n";
452 assertEquals("Roundtrip didn't match", expected,
457 * Test reading a MEGA file to an alignment then writing it out in MEGA
458 * format. Verify the output is (functionally) the same as the input.
460 * @throws IOException
462 @Test(groups = "Functional")
463 public void testRoundTrip_multilineFormatWithComments()
466 AppletFormatAdapter fa = new AppletFormatAdapter();
468 AlignmentI al = fa.readFile("#MEGA\n"
469 + "!Title Data with description;\n"
470 + "[ this comment should be ignored\n"
471 + "including [this nested comment]\n"
474 + "DataType=DNA CodeTable=Standard\n"
475 + "indel=- Missing=? MatchChar=.;\n\n"
477 + " Line one of description\n"
478 + " Line two of description;\n\n"
480 + "#CPZANT ATC GGG\n\n"
482 + "#CPZANT CAA TGC\n",
483 AppletFormatAdapter.PASTE, "MEGA");
485 MegaFile output = new MegaFile();
486 String formatted = output.print(al);
489 "#MEGA\n!Title Data with description;\n" +
491 " Line one of description\n" +
492 " Line two of description;\n" +
494 " DataType=DNA CodeTable=Standard\n" +
495 " NSeqs=2 NSites=12\n" +
496 " Indel=- Identical=. Missing=?;\n\n" +
497 "#U455 CGC GTA [6]\n" +
498 "#CPZANT ATC GGG [6]\n\n" +
499 "#U455 CGA TTT [12]\n" +
500 "#CPZANT CAA TGC [12]\n";
502 assertEquals("Roundtrip didn't match", expected,
509 * Test parse of interleaved mega format data where the identity character is
510 * used in sequences after the first
512 * @throws IOException
514 @Test(groups = { "Functional" })
515 public void testParse_interleavedWithIdentityAndTabs() throws IOException
518 // uses tab instead of space separators to check robustness
519 MegaFile testee = new MegaFile("#MEGA\n"+
520 "!TITLE\tInterleaved sequence data;\n" +
521 "!Format\tIdentical=.;\n\n" +
523 "#CPZANT\tM..P.R\n\n" +
524 "#U455\t\tKLMNOP\n" +
525 "#CPZANT\t..YZ..", AppletFormatAdapter.PASTE);
527 assertEquals("Title not as expected", "Interleaved sequence data",
528 testee.getAlignmentProperty(MegaFile.PROP_TITLE));
529 Vector<SequenceI> seqs = testee.getSeqs();
530 // should be 2 sequences
531 assertEquals("Expected two sequences", 2, seqs.size());
532 // check sequence names correct and order preserved
533 assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
534 assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
536 // check sequence data
537 assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0)
538 .getSequenceAsString());
539 assertEquals("Second sequence data wrong", "MBCPERKLYZOP", seqs.get(1)
540 .getSequenceAsString());
541 assertTrue("File format is not flagged as interleaved",
542 testee.isInterleaved());
546 * Test parse of noninterleaved format data including identity symbol
548 * @throws IOException
550 @Test(groups = { "Functional" })
551 public void testParse_nonInterleavedWithIdentity() throws IOException
554 MegaFile testee = new MegaFile("#MEGA\n"
555 + "!TITLE Noninterleaved sequence data;\n"
556 + "!Format MatchChar=.;\n"
563 AppletFormatAdapter.PASTE);
565 assertEquals("Title not as expected", "Noninterleaved sequence data",
566 testee.getAlignmentProperty(MegaFile.PROP_TITLE));
567 Vector<SequenceI> seqs = testee.getSeqs();
568 // should be 2 sequences
569 assertEquals("Expected two sequences", 2, seqs.size());
570 // check sequence names correct and order preserved
571 assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
572 assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
574 // check sequence data
575 assertEquals("First sequence data wrong", "ABCFEDHIJMNOPQR", seqs
576 .get(0).getSequenceAsString());
577 assertEquals("Second sequence data wrong", "KLCFODHXYZCGPQC",
578 seqs.get(1).getSequenceAsString());
579 assertFalse("File format is not flagged as noninterleaved",
580 testee.isInterleaved());
586 * Test parse of interleaved format data including position number comments.
588 * @throws IOException
590 @Test(groups = { "Functional" })
591 public void testParse_interleavedWithPositionNumber() throws IOException
594 MegaFile testee = new MegaFile("#MEGA\n"+
595 "TITLE: Interleaved sequence data\n\n" +
596 "#U455 ABCDEF [6]\n" +
597 "#CPZANT MNOPQR [6]\n\n" +
598 "#U455 KLMNOP [12]\n" +
599 "#CPZANT WXYZGC [12]\n", AppletFormatAdapter.PASTE);
601 assertEquals("Title not as expected", "Interleaved sequence data",
602 testee.getAlignmentProperty(MegaFile.PROP_TITLE));
603 Vector<SequenceI> seqs = testee.getSeqs();
604 // should be 2 sequences
605 assertEquals("Expected two sequences", 2, seqs.size());
606 // check sequence names correct and order preserved
607 assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
608 assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
610 // check sequence data
611 assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0)
612 .getSequenceAsString());
613 assertEquals("Second sequence data wrong", "MNOPQRWXYZGC", seqs.get(1)
614 .getSequenceAsString());
615 assertTrue("File format is not flagged as interleaved",
616 testee.isInterleaved());
622 * Test parse of data with !Gene and !Domain statements.
624 * @throws IOException
626 @Test(groups = { "Functional" })
627 public void testParse_geneDomains() throws IOException
630 String data = "#MEGA\n"+
631 "TITLE: Interleaved sequence data\n\n" +
633 "#CPZANT TTTTTT\n\n" +
634 "!Domain=Exon1 Gene=Adh Property=Coding CodonStart=1;\n" +
636 "#CPZANT AAAAAA\n\n" +
637 "!domain=Intron1 Property=Intron Gene=Adh;\n" +
639 "#CPZANT cccccc\n\n" +
640 "!Domain=Exon2 Gene=Adh Property=Exon CodonStart=1;\n" +
642 "#CPZANT gggggg\n\n" +
643 // explicit end of Exon2, implicit end of Adh:
644 "!Domain=Exon2 Property=domainend;\n" +
645 "!Domain=Intron1 Gene=Opsin Property=Noncoding;\n" +
647 "#CPZANT AAAAAA\n\n" +
648 // end Opsin, start MEF2A
649 "!Domain=Exon1 Gene=MEF2A Property=Coding CodonStart=1;\n" +
651 "#CPZANT cccccc\n\n" +
653 "!Domain=BindingSite;\n" +
655 "#CPZANT TTTTTT\n\n";
657 MegaFile testee = new MegaFile(data, AppletFormatAdapter.PASTE);
659 Vector<SequenceI> seqs = testee.getSeqs();
660 // should be 2 sequences
661 assertEquals("Expected two sequences", 2, seqs.size());
662 // check sequence data
663 assertEquals("First sequence data wrong",
664 "CCCCCCGGGGGGttttttaaaaaaGGGGGGttttttCCCCCC", seqs.get(0)
665 .getSequenceAsString());
666 assertEquals("Second sequence data wrong",
667 "TTTTTTAAAAAAccccccggggggAAAAAAccccccTTTTTT", seqs.get(1)
668 .getSequenceAsString());
671 * sequences should have features for Gene=Adh 7-24, Exon1 7-12, Intron1
672 * 13-18, Exon2 19-24, BindingSite 25-30
674 for (SequenceI seq : seqs) {
675 SequenceFeature[] sfs = seq.getSequenceFeatures();
676 // features are added in the order in which their end is found
677 // (Domain before Gene when they end together)
678 assertEquals(9, sfs.length);
679 // TODO settle which way round type/description go!
680 verifySequenceFeature(sfs[0], "Exon1 (Adh Coding)", "Domain", 7, 12);
681 verifySequenceFeature(sfs[1], "Intron1 (Adh Noncoding)", "Domain",
683 verifySequenceFeature(sfs[2], "Exon2 (Adh Coding)", "Domain", 19, 24);
684 verifySequenceFeature(sfs[3], "Adh", "Gene", 7, 24);
685 verifySequenceFeature(sfs[4], "Intron1 (Opsin Noncoding)", "Domain",
687 verifySequenceFeature(sfs[5], "Opsin", "Gene", 25, 30);
688 verifySequenceFeature(sfs[6], "Exon1 (MEF2A Coding)", "Domain", 31,
690 verifySequenceFeature(sfs[7], "MEF2A", "Gene", 31, 36);
691 verifySequenceFeature(sfs[8], "BindingSite", "Domain", 37, 42);
695 * verify gene and domain alignment annotations
697 assertEquals(2, testee.annotations.size());
698 AlignmentAnnotation ann = testee.annotations.get(0);
699 assertEquals("MEGA Gene", ann.label);
700 assertEquals(42, ann.annotations.length);
701 verifyAnnotation(ann, 0, 6, null);
702 verifyAnnotation(ann, 6, 24, "Adh");
703 verifyAnnotation(ann, 24, 30, "Opsin");
704 verifyAnnotation(ann, 30, 36, "MEF2A");
705 verifyAnnotation(ann, 37, 42, null);
707 ann = testee.annotations.get(1);
708 assertEquals("MEGA Domain", ann.label);
709 assertEquals(42, ann.annotations.length);
710 verifyAnnotation(ann, 0, 6, null);
711 verifyAnnotation(ann, 6, 12, "Exon1 (Adh Coding)");
712 verifyAnnotation(ann, 12, 18, "Intron1 (Adh Noncoding)");
713 verifyAnnotation(ann, 19, 24, "Exon2 (Adh Coding)");
714 verifyAnnotation(ann, 25, 30, "Intron1 (Opsin Noncoding)");
715 verifyAnnotation(ann, 31, 36, "Exon1 (MEF2A Coding)");
716 verifyAnnotation(ann, 37, 42, "BindingSite");
721 * Helper method to verify a range of annotation positions all have the given
725 * array of annotations to check
727 * start index to check
729 * end index to check (exclusive)
733 protected void verifyAnnotation(AlignmentAnnotation ann, int from,
734 int to, String description)
736 for (int pos = from; pos < to; pos++)
738 if (description == null)
740 assertNull(ann.annotations[pos]);
744 assertEquals(description, ann.annotations[pos].description);
750 * Helper method to assert properties of a SequenceFeature
758 protected void verifySequenceFeature(SequenceFeature sf, String type,
759 String description, int begin, int end)
761 assertEquals(description, sf.type);
762 assertEquals(type, sf.description);
763 assertEquals(begin, sf.begin);
764 assertEquals(end, sf.end);
768 * Test parse of data including !Label statements. An underscore means no
769 * label, other characters are treated as alignment annotation.
771 * @throws IOException
773 @Test(groups = { "Functional" })
774 public void testParse_withLabels() throws IOException
777 MegaFile testee = new MegaFile("#MEGA\n"+
778 "TITLE: Interleaved sequence data\n\n" +
780 "#CPZANT MNO PQR\n" +
781 "!Label +-_ 23_\n\n" +
782 // a row with no labels = null annotation
784 "#CPZANT mno pqr\n\n" +
786 "#CPZANT WXY ZGC\n" +
787 "!label __3 +X_\n", AppletFormatAdapter.PASTE);
789 Vector<SequenceI> seqs = testee.getSeqs();
790 assertEquals("Expected two sequences", 2, seqs.size());
791 assertEquals("First sequence data wrong", "ABCDEFabcdefKLMNOP", seqs
793 .getSequenceAsString());
794 assertEquals("Second sequence data wrong", "MNOPQRmnopqrWXYZGC", seqs
796 .getSequenceAsString());
798 // check AlignmentAnnotation added with expected values
799 assertEquals(1, testee.annotations.size());
800 AlignmentAnnotation aa = testee.annotations.get(0);
801 assertNull(aa.sequenceRef);
802 assertEquals("MEGA Label", aa.label);
803 assertEquals(18, aa.annotations.length);
804 assertEquals("+, -, , 2, 3, , , , , , , , , , 3, +, X, , ",
811 * Test case where a domain is implicitly terminated by starting a new gene
813 * @throws IOException
815 @Test(groups = { "Functional" })
816 public void testParse_changeOfGeneEndsDomain() throws IOException
819 // uses tab instead of space separators to check robustness
820 MegaFile testee = new MegaFile("#MEGA\n"+
821 "!TITLE Interleaved sequence data;\n" +
822 "!Format Identical=.;\n\n" +
823 "!Gene=gene1 Domain=Exon1 Property=Coding;\n" +
825 "#CPZANT M..P.R\n\n" +
828 "#CPZANT ..YZ..", AppletFormatAdapter.PASTE);
830 Vector<SequenceI> seqs = testee.getSeqs();
831 assertEquals("Expected two sequences", 2, seqs.size());
832 assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0)
833 .getSequenceAsString());
834 assertEquals("Second sequence data wrong", "MBCPERKLYZOP", seqs.get(1)
835 .getSequenceAsString());
836 assertTrue("File format is not flagged as interleaved",
837 testee.isInterleaved());
839 for (SequenceI seq : seqs)
841 SequenceFeature[] sfs = seq.getSequenceFeatures();
842 assertEquals(3, sfs.length);
843 verifySequenceFeature(sfs[0], "Exon1 (gene1 Coding)", "Domain", 1, 6);
844 verifySequenceFeature(sfs[1], "gene1", "Gene", 1, 6);
845 verifySequenceFeature(sfs[2], "gene2", "Gene", 7, 12);
852 * Test case where the declared gap character is one Jalview does not support;
853 * it should be converted to a '-'
855 * @throws IOException
857 @Test(groups = { "Functional" })
858 public void testParse_weirdGapCharacter() throws IOException
861 String data = "#MEGA\n"+
862 "!TITLE Interleaved sequence data;\n" +
863 "!Format Identical=. Indel=%;\n\n" +
865 "#CPZANT M..P.R\n\n" +
868 AppletFormatAdapter fa = new AppletFormatAdapter();
869 AlignmentI al = fa.readFile(data,
870 AppletFormatAdapter.PASTE, "MEGA");
872 List<SequenceI> seqs = al.getSequences();
873 assertEquals("First sequence data wrong", "-BC-EFKLMNOP", seqs.get(0)
874 .getSequenceAsString());
875 assertEquals("Second sequence data wrong", "MBCPERK--ZOP", seqs.get(1)
876 .getSequenceAsString());
877 assertEquals('-', al.getGapCharacter());
881 * Test reading a MEGA file to an alignment then writing it out in MEGA
882 * format. Includes !Label statements which should be converted to
883 * AlignmentAnnotation and back again.
885 * @throws IOException
887 @Test(groups = "Functional")
888 public void testRoundTrip_withLabels() throws IOException
890 AppletFormatAdapter fa = new AppletFormatAdapter();
893 String data = "#MEGA\n"
895 + "#CPZANT ATC -G-\n"
896 + "!Label F__E_H\n\n"
898 + "#CPZANT CA- -GC\n"
900 AlignmentI al = fa.readFile(data,
901 AppletFormatAdapter.PASTE, "MEGA");
902 AlignmentAnnotation aa = al.getAlignmentAnnotation()[0];
903 assertEquals("MEGA Label", aa.label);
904 assertEquals("F, , , E, , H, F, F, H, , , E, ",
907 MegaFile output = new MegaFile();
908 String formatted = output.print(al);
912 " DataType=Nucleotide CodeTable=Standard\n" +
913 " NSeqs=2 NSites=12\n" +
915 "#U455 C-- GTA [6]\n" +
916 "#CPZANT ATC -G- [6]\n" +
917 "!Label F__ E_H;\n\n" +
918 "#U455 CGA --T [12]\n" +
919 "#CPZANT CA- -GC [12]\n" +
922 assertEquals("Roundtrip didn't match", expected,
927 * Test (parse and) print of MEGA data with !Gene statements.
929 * @throws IOException
931 @Test(groups = { "Functional" })
932 public void testPrint_genes() throws IOException
935 * to keep the test concise, input data is in the exact format that Jalview
936 * would output it; the important thing is functional equivalence of input
940 String data = "#MEGA\n\n"+
942 "#Seq2 MNOP [4]\n\n" +
943 "!Domain=Exon1 Gene=Adh Property=Coding;\n" +
944 "#Seq1 EFGHI [9]\n" +
945 "#Seq2 QRSTU [9]\n\n" +
946 "!Domain=Intron1 Gene=Adh Property=Noncoding;\n" +
948 "#Seq2 VW [11]\n\n" +
949 "!Domain=Intron1 Property=domainend;\n" +
953 MegaFile testee = new MegaFile(data, AppletFormatAdapter.PASTE);
954 String printed = testee.print();
955 assertEquals("Print format wrong", data, printed);
958 @Test(groups = { "Functional" })
959 public void testGetDomainFromAnnotation()
961 Annotation[] anns = new Annotation[5];
962 anns[1] = new Annotation("", "Intron1", '0', 0f);
963 anns[2] = new Annotation("", "Intron2 (Aspx)", '0', 0f);
964 anns[3] = new Annotation("", "Intron3 (Aspy Coding)", '0', 0f);
965 anns[4] = new Annotation("", "Intron4 (Coding)", '0', 0f);
966 AlignmentAnnotation aa = new AlignmentAnnotation("", "", anns);
968 assertNull(MegaFile.getDomainFromAnnotation(0, null));
970 assertNull(MegaFile.getDomainFromAnnotation(0, aa));
971 // column out of range:
972 assertNull(MegaFile.getDomainFromAnnotation(5, aa));
973 // domain with no Gene or Property:
974 assertEquals("Intron1", MegaFile.getDomainFromAnnotation(1, aa));
975 // domain with Gene but no Property:
976 assertEquals("Intron2", MegaFile.getDomainFromAnnotation(2, aa));
977 // domain with Gene and Property:
978 assertEquals("Intron3", MegaFile.getDomainFromAnnotation(3, aa));
979 // domain with Property and no Gene:
980 assertEquals("Intron4", MegaFile.getDomainFromAnnotation(4, aa));
983 @Test(groups = { "Functional" })
984 public void testGetGeneFromAnnotation()
986 Annotation[] anns = new Annotation[3];
987 anns[1] = new Annotation("", "Aspx", '0', 0f);
988 AlignmentAnnotation aa = new AlignmentAnnotation("", "", anns);
990 assertNull(MegaFile.getGeneFromAnnotation(0, null));
992 assertNull(MegaFile.getGeneFromAnnotation(0, aa));
993 // column out of range:
994 assertNull(MegaFile.getGeneFromAnnotation(3, aa));
996 assertEquals("Aspx", MegaFile.getGeneFromAnnotation(1, aa));
999 @Test(groups = { "Functional" })
1000 public void testGetPropertyFromAnnotation()
1002 Annotation[] anns = new Annotation[5];
1003 anns[1] = new Annotation("", "Intron1", '0', 0f);
1004 anns[2] = new Annotation("", "Intron2 (Aspx)", '0', 0f);
1005 anns[3] = new Annotation("", "Intron3 (Aspy Noncoding)", '0', 0f);
1006 anns[4] = new Annotation("", "Exon1 (Aspx Coding)", '0', 0f);
1007 AlignmentAnnotation aa = new AlignmentAnnotation("", "", anns);
1009 assertNull(MegaFile.getPropertyFromAnnotation(0, null));
1011 assertNull(MegaFile.getPropertyFromAnnotation(0, aa));
1012 // column out of range:
1013 assertNull(MegaFile.getPropertyFromAnnotation(5, aa));
1014 // domain with no Gene or Property:
1015 assertNull(MegaFile.getPropertyFromAnnotation(1, aa));
1016 // domain with Gene but no Property:
1017 assertNull(MegaFile.getPropertyFromAnnotation(2, aa));
1018 // domain with Gene and Property:
1019 assertEquals("Noncoding", MegaFile.getPropertyFromAnnotation(3, aa));
1020 assertEquals("Coding", MegaFile.getPropertyFromAnnotation(4, aa));