3 import static org.testng.AssertJUnit.assertEquals;
4 import static org.testng.AssertJUnit.assertFalse;
5 import static org.testng.AssertJUnit.assertNull;
6 import static org.testng.AssertJUnit.assertTrue;
7 import static org.testng.AssertJUnit.fail;
9 import jalview.datamodel.AlignmentAnnotation;
10 import jalview.datamodel.AlignmentI;
11 import jalview.datamodel.Annotation;
12 import jalview.datamodel.Sequence;
13 import jalview.datamodel.SequenceFeature;
14 import jalview.datamodel.SequenceI;
16 import java.io.IOException;
17 import java.util.List;
18 import java.util.Vector;
20 import org.testng.annotations.Test;
23 * Unit tests for MegaFile - read and write in MEGA format(s).
25 public class MegaFileTest
27 private static final String TWENTY_CHARS = "9876543210abcdefghij";
29 private static final String THIRTY_CHARS = "0123456789klmnopqrstABCDEFGHIJ";
32 private static final String INTERLEAVED =
34 "TITLE: Interleaved sequence data\n\n" +
36 "#CPZANT MNOPQR\n\n" +
40 private static final String INTERLEAVED_NOHEADERS =
42 + "#CPZANT MNOPQR\n\n"
46 // interleaved sequences, with 50 residues
47 private static final String INTERLEAVED_50RESIDUES =
49 + "!TITLE Interleaved sequence data\n\n"
50 + "#U455 " + THIRTY_CHARS + TWENTY_CHARS + "\n"
51 + "#CPZANT " + TWENTY_CHARS + THIRTY_CHARS + "\n";
53 private static final String NONINTERLEAVED =
55 + "!TITLE Noninterleaved sequence data\n\n"
63 // this one starts interleaved then switches to non-interleaved
64 private static final String MIXED =
66 + "!TITLE This is a mess\n\n"
67 + "#CPZANT KLMNOPWXYZCGATC\n\n"
71 // interleaved with a new sequence appearing in the second block :-O
72 private static final String INTERLEAVED_SEQUENCE_ERROR =
74 + "!TITLE Interleaved sequence data\n\n"
76 + "#CPZANT MNOPQR\n\n"
78 + "#U456 KLMNOP\n"; // wossis?
80 // interleaved with description, bases/gaps in triplet groups
81 private static final String INTERLEAVED_WITH_DESCRIPTION =
83 + "!Title Data with description;\n"
84 + "!Format DataType=DNA indel=-\tCodeTable=Standard Missing=? MatchChar=.;\n\n"
86 + " Line one of description\n"
87 + " Line two of description;\n\n"
89 + "#CPZANT ATC -G-\n\n"
91 + "#CPZANT CA- -GC\n";
96 * Test parse of interleaved mega format data.
100 @Test(groups = { "Functional" })
101 public void testParse_interleaved() throws IOException
103 MegaFile testee = new MegaFile(INTERLEAVED, AppletFormatAdapter.PASTE);
104 assertEquals("Title not as expected", "Interleaved sequence data",
105 testee.getAlignmentProperty(MegaFile.PROP_TITLE));
106 Vector<SequenceI> seqs = testee.getSeqs();
107 // should be 2 sequences
108 assertEquals("Expected two sequences", 2, seqs.size());
109 // check sequence names correct and order preserved
110 assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
111 assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
113 // check sequence data
114 assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0)
115 .getSequenceAsString());
116 assertEquals("Second sequence data wrong", "MNOPQRWXYZGC", seqs.get(1)
117 .getSequenceAsString());
118 assertTrue("File format is not flagged as interleaved",
119 testee.isInterleaved());
123 * Test parse of noninterleaved mega format data.
125 * @throws IOException
127 @Test(groups = { "Functional" })
128 public void testParse_nonInterleaved() throws IOException
130 MegaFile testee = new MegaFile(NONINTERLEAVED,
131 AppletFormatAdapter.PASTE);
132 assertEquals("Title not as expected", "Noninterleaved sequence data",
133 testee.getAlignmentProperty(MegaFile.PROP_TITLE));
134 Vector<SequenceI> seqs = testee.getSeqs();
135 // should be 2 sequences
136 assertEquals("Expected two sequences", 2, seqs.size());
137 // check sequence names correct and order preserved
138 assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
139 assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
141 // check sequence data
142 assertEquals("First sequence data wrong", "ABCFEDHIJMNOPQR", seqs
143 .get(0).getSequenceAsString());
144 assertEquals("Second sequence data wrong", "KLMNOPWXYZCGATC",
145 seqs.get(1).getSequenceAsString());
146 assertFalse("File format is not flagged as noninterleaved",
147 testee.isInterleaved());
151 * Test parsing an interleaved file with an extra sequence appearing after the
152 * first block - should fail.
154 @Test(groups = { "Functional" })
155 public void testParse_interleavedExtraSequenceError()
159 new MegaFile(INTERLEAVED_SEQUENCE_ERROR, AppletFormatAdapter.PASTE);
160 fail("Expected extra sequence IOException");
161 } catch (IOException e)
164 "Unexpected exception message",
165 "Parse error: misplaced new sequence starting at #U456 KLMNOP",
171 * Test a mixed up file.
173 @Test(groups = { "Functional" })
174 public void testParse_mixedInterleavedNonInterleaved()
178 new MegaFile(MIXED, AppletFormatAdapter.PASTE);
179 fail("Expected mixed content exception");
180 } catch (IOException e)
183 "Unexpected exception message",
184 "Parse error: interleaved was true but now seems to be false, at line: ABCFEDHIJ",
190 @Test(groups = { "Functional" })
191 public void testGetSequenceId()
193 assertEquals("AB123", MegaFile.getSequenceId("#AB123 CGATC"));
194 assertEquals("AB123", MegaFile.getSequenceId("#AB123 CGATC"));
195 assertEquals("AB123", MegaFile.getSequenceId("#AB123 CGC TAC"));
196 assertEquals("AB123", MegaFile.getSequenceId("#AB123"));
197 assertNull(MegaFile.getSequenceId("AB123 CTAG"));
198 assertNull(MegaFile.getSequenceId("AB123"));
199 assertNull(MegaFile.getSequenceId(""));
200 assertNull(MegaFile.getSequenceId(null));
203 @Test(groups = { "Functional" })
204 public void testGetMaxIdLength()
206 SequenceI[] seqs = new Sequence[2];
207 seqs[0] = new Sequence("Something", "GCATAC");
208 seqs[1] = new Sequence("SomethingElse", "GCATAC");
209 assertEquals(13, MegaFile.getMaxIdLength(seqs));
210 seqs[1] = new Sequence("DNA", "GCATAC");
211 assertEquals(9, MegaFile.getMaxIdLength(seqs));
214 @Test(groups = { "Functional" })
215 public void testGetMaxSequenceLength()
217 SequenceI[] seqs = new Sequence[2];
218 seqs[0] = new Sequence("Seq1", "GCATAC");
219 seqs[1] = new Sequence("Seq2", "GCATACTAG");
220 assertEquals(9, MegaFile.getMaxSequenceLength(seqs));
221 seqs[1] = new Sequence("Seq2", "GCA");
222 assertEquals(6, MegaFile.getMaxSequenceLength(seqs));
226 * Test (parse and) print of interleaved mega format data.
228 * @throws IOException
230 @Test(groups = { "Functional" })
231 public void testPrint_interleaved() throws IOException
233 MegaFile testee = new MegaFile(INTERLEAVED, AppletFormatAdapter.PASTE);
234 String printed = testee.print();
235 System.out.println(printed);
236 // normally output should match input
237 // we cheated here with a number of short input lines
238 // nb don't get Title in output if not calling print(AlignmentI)
242 "#U455 ABCDEF [6]\n" +
243 "#CPZANT MNOPQR [6]\n\n" +
244 "#U455 KLMNOP [12]\n" +
245 "#CPZANT WXYZGC [12]\n";
247 assertEquals("Print format wrong", expected, printed);
251 * Test (parse and) print of interleaved data with no headers (acceptable).
253 * @throws IOException
255 @Test(groups = { "Functional" })
256 public void testPrint_interleavedNoHeaders() throws IOException
258 MegaFile testee = new MegaFile(INTERLEAVED_NOHEADERS,
259 AppletFormatAdapter.PASTE);
260 String printed = testee.print();
261 System.out.println(printed);
264 assertEquals("Print format wrong",
265 "#MEGA\n\n" + "#U455 ABCDEF [6]\n"
266 + "#CPZANT MNOPQR [6]\n\n"
267 + "#U455 KLMNOP [12]\n"
268 + "#CPZANT WXYZGC [12]\n",
274 * Test (parse and) print of noninterleaved mega format data.
276 * @throws IOException
278 @Test(groups = { "Functional" })
279 public void testPrint_noninterleaved() throws IOException
281 MegaFile testee = new MegaFile(NONINTERLEAVED,
282 AppletFormatAdapter.PASTE);
283 assertEquals(10, testee.getPositionsPerLine());
284 String printed = testee.print();
285 System.out.println(printed);
286 // normally output should match input
287 // we cheated here with a number of short input lines
288 String expected = "#MEGA\n\n" + "#U455\n"
289 + "ABCFEDHIJM [10]\nNOPQR [15]\n\n" + "#CPZANT\n"
290 + "KLMNOPWXYZ [10]\nCGATC [15]\n";
291 assertEquals("Print format wrong", expected, printed);
295 * Test (parse and) print of interleaved mega format data extending to more
296 * than one line of output.
298 * @throws IOException
300 @Test(groups = { "Functional" })
301 public void testPrint_interleavedMultiLine() throws IOException
303 MegaFile testee = new MegaFile(INTERLEAVED_50RESIDUES,
304 AppletFormatAdapter.PASTE);
305 assertEquals(50, testee.getPositionsPerLine());
307 * now simulate choosing 20 residues per line on output
309 testee.setPositionsPerLine(20);
310 String printed = testee.print();
311 System.out.println(printed);
315 "#U455 0123456789 klmnopqrst [20]\n" + // first 20
316 "#CPZANT 9876543210 abcdefghij [20]\n\n" +
317 "#U455 ABCDEFGHIJ 9876543210 [40]\n" + // next 20
318 "#CPZANT 0123456789 klmnopqrst [40]\n\n" +
319 "#U455 abcdefghij [50]\n" + // last 10
320 "#CPZANT ABCDEFGHIJ [50]\n";
322 assertEquals("Print format wrong", expected, printed);
326 * Test (parse and) print of noninterleaved mega format data extending to more
327 * than one line of output.
329 * @throws IOException
331 @Test(groups = { "Functional" })
332 public void testPrint_noninterleavedMultiLine() throws IOException
334 final String NONINTERLEAVED_LONGERTHAN50 = "#SIXTY\n" + THIRTY_CHARS
335 + "\n" + TWENTY_CHARS + "9993332221\n";
336 MegaFile testee = new MegaFile(NONINTERLEAVED_LONGERTHAN50,
337 AppletFormatAdapter.PASTE);
338 assertEquals(30, testee.getPositionsPerLine());
339 testee.setPositionsPerLine(25);
340 String printed = testee.print();
343 * 25 positions per line is rounded down to 20 (two blocks of 10)
345 String expected = "#MEGA\n\n" + "#SIXTY\n"
346 + "0123456789 klmnopqrst [20]\n"
347 + "ABCDEFGHIJ 9876543210 [40]\n"
348 + "abcdefghij 9993332221 [60]\n";
349 assertEquals("Print format wrong", expected, printed);
353 * Test parse of data including description
355 * @throws IOException
357 @Test(groups = { "Functional" })
358 public void testParse_withDescription() throws IOException
360 MegaFile testee = new MegaFile(INTERLEAVED_WITH_DESCRIPTION,
361 AppletFormatAdapter.PASTE);
362 assertEquals("Title not as expected", "Data with description",
363 testee.getAlignmentProperty(MegaFile.PROP_TITLE));
365 Vector<SequenceI> seqs = testee.getSeqs();
366 // should be 2 sequences
367 assertEquals("Expected two sequences", 2, seqs.size());
368 // check sequence names correct and order preserved
369 assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
370 assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
372 // check sequence data
373 assertEquals("First sequence data wrong", "C--GTACGA--T", seqs.get(0)
374 .getSequenceAsString());
375 assertEquals("Second sequence data wrong", "ATC-G-CA--GC", seqs.get(1)
376 .getSequenceAsString());
377 assertTrue("File format is not flagged as interleaved",
378 testee.isInterleaved());
381 "Description property not parsed",
382 " Line one of description\n" + " Line two of description",
383 testee.getAlignmentProperty(MegaFile.PROP_DESCRIPTION));
386 @Test(groups = { "Functional" })
387 public void testGetNonCommentContent() throws FileFormatException
389 assertEquals("abcde", MegaFile.getNonCommentContent("abcde", 0));
390 assertEquals("CGT ACG GAC ",
391 MegaFile.getNonCommentContent("CGT ACG GAC [9]", 0));
392 assertEquals("", MegaFile.getNonCommentContent("abcde", 1));
393 assertEquals(" abcde",
394 MegaFile.getNonCommentContent("and others ] abcde", 1));
395 assertEquals(" abcde", MegaFile.getNonCommentContent(
396 "and others [including refs] ] abcde", 1));
397 assertEquals(" x ] abcde",
398 MegaFile.getNonCommentContent("and others ] x ] abcde", 1));
401 @Test(groups = { "Functional" })
402 public void testCommentDepth() throws FileFormatException
404 assertEquals(0, MegaFile.commentDepth("abcde", 0));
405 assertEquals(1, MegaFile.commentDepth("abc[de", 0));
406 assertEquals(3, MegaFile.commentDepth("ab[c[de", 1));
407 assertEquals(1, MegaFile.commentDepth("ab]c[d]e[f", 1));
408 assertEquals(0, MegaFile.commentDepth("a]b[c]d]e", 1));
411 @Test(groups = { "Functional" })
412 public void testGetValue()
414 assertEquals("Mega", MegaFile.getValue("Name=Mega"));
415 assertEquals("Mega", MegaFile.getValue("Name =Mega"));
416 assertEquals("Mega", MegaFile.getValue(" Name = Mega "));
417 assertEquals("Mega", MegaFile.getValue("Name = Mega; "));
418 assertEquals("Mega", MegaFile.getValue(" Name = Mega ; "));
419 assertEquals("Mega", MegaFile.getValue("\t!Name \t= \tMega ; "));
420 assertEquals("Mega", MegaFile.getValue("!Name \t\t Mega; "));
421 assertEquals("", MegaFile.getValue("Name"));
425 * Test reading a MEGA file to an alignment then writing it out in MEGA
426 * format. Verify the output is (functionally) the same as the input.
428 * @throws IOException
430 @Test(groups = "Functional")
431 public void testRoundTrip_Interleaved() throws IOException
433 AppletFormatAdapter fa = new AppletFormatAdapter();
434 AlignmentI al = fa.readFile(INTERLEAVED_WITH_DESCRIPTION,
435 AppletFormatAdapter.PASTE, "MEGA");
436 MegaFile output = new MegaFile();
437 String formatted = output.print(al);
440 "#MEGA\n!Title Data with description;\n" +
442 " Line one of description\n" +
443 " Line two of description;\n" +
445 " DataType=DNA CodeTable=Standard\n" +
446 " NSeqs=2 NSites=12\n" + // NSites includes gaps
447 " Indel=- Identical=. Missing=?;\n\n" +
448 "#U455 C-- GTA [6]\n" +
449 "#CPZANT ATC -G- [6]\n\n" +
450 "#U455 CGA --T [12]\n" +
451 "#CPZANT CA- -GC [12]\n";
453 assertEquals("Roundtrip didn't match", expected,
458 * Test reading a MEGA file to an alignment then writing it out in MEGA
459 * format. Verify the output is (functionally) the same as the input.
461 * @throws IOException
463 @Test(groups = "Functional")
464 public void testRoundTrip_multilineFormatWithComments()
467 AppletFormatAdapter fa = new AppletFormatAdapter();
469 AlignmentI al = fa.readFile("#MEGA\n"
470 + "!Title Data with description;\n"
471 + "[ this comment should be ignored\n"
472 + "including [this nested comment]\n"
475 + "DataType=DNA CodeTable=Standard\n"
476 + "indel=- Missing=? MatchChar=.;\n\n"
478 + " Line one of description\n"
479 + " Line two of description;\n\n"
481 + "#CPZANT ATC GGG\n\n"
483 + "#CPZANT CAA TGC\n",
484 AppletFormatAdapter.PASTE, "MEGA");
486 MegaFile output = new MegaFile();
487 String formatted = output.print(al);
490 "#MEGA\n!Title Data with description;\n" +
492 " Line one of description\n" +
493 " Line two of description;\n" +
495 " DataType=DNA CodeTable=Standard\n" +
496 " NSeqs=2 NSites=12\n" +
497 " Indel=- Identical=. Missing=?;\n\n" +
498 "#U455 CGC GTA [6]\n" +
499 "#CPZANT ATC GGG [6]\n\n" +
500 "#U455 CGA TTT [12]\n" +
501 "#CPZANT CAA TGC [12]\n";
503 assertEquals("Roundtrip didn't match", expected,
510 * Test parse of interleaved mega format data where the identity character is
511 * used in sequences after the first
513 * @throws IOException
515 @Test(groups = { "Functional" })
516 public void testParse_interleavedWithIdentityAndTabs() throws IOException
519 // uses tab instead of space separators to check robustness
520 MegaFile testee = new MegaFile("#MEGA\n"+
521 "!TITLE\tInterleaved sequence data;\n" +
522 "!Format\tIdentical=.;\n\n" +
524 "#CPZANT\tM..P.R\n\n" +
525 "#U455\t\tKLMNOP\n" +
526 "#CPZANT\t..YZ..", AppletFormatAdapter.PASTE);
528 assertEquals("Title not as expected", "Interleaved sequence data",
529 testee.getAlignmentProperty(MegaFile.PROP_TITLE));
530 Vector<SequenceI> seqs = testee.getSeqs();
531 // should be 2 sequences
532 assertEquals("Expected two sequences", 2, seqs.size());
533 // check sequence names correct and order preserved
534 assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
535 assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
537 // check sequence data
538 assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0)
539 .getSequenceAsString());
540 assertEquals("Second sequence data wrong", "MBCPERKLYZOP", seqs.get(1)
541 .getSequenceAsString());
542 assertTrue("File format is not flagged as interleaved",
543 testee.isInterleaved());
547 * Test parse of noninterleaved format data including identity symbol
549 * @throws IOException
551 @Test(groups = { "Functional" })
552 public void testParse_nonInterleavedWithIdentity() throws IOException
555 MegaFile testee = new MegaFile("#MEGA\n"
556 + "!TITLE Noninterleaved sequence data;\n"
557 + "!Format MatchChar=.;\n"
564 AppletFormatAdapter.PASTE);
566 assertEquals("Title not as expected", "Noninterleaved sequence data",
567 testee.getAlignmentProperty(MegaFile.PROP_TITLE));
568 Vector<SequenceI> seqs = testee.getSeqs();
569 // should be 2 sequences
570 assertEquals("Expected two sequences", 2, seqs.size());
571 // check sequence names correct and order preserved
572 assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
573 assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
575 // check sequence data
576 assertEquals("First sequence data wrong", "ABCFEDHIJMNOPQR", seqs
577 .get(0).getSequenceAsString());
578 assertEquals("Second sequence data wrong", "KLCFODHXYZCGPQC",
579 seqs.get(1).getSequenceAsString());
580 assertFalse("File format is not flagged as noninterleaved",
581 testee.isInterleaved());
587 * Test parse of interleaved format data including position number comments.
589 * @throws IOException
591 @Test(groups = { "Functional" })
592 public void testParse_interleavedWithPositionNumber() throws IOException
595 MegaFile testee = new MegaFile("#MEGA\n"+
596 "TITLE: Interleaved sequence data\n\n" +
597 "#U455 ABCDEF [6]\n" +
598 "#CPZANT MNOPQR [6]\n\n" +
599 "#U455 KLMNOP [12]\n" +
600 "#CPZANT WXYZGC [12]\n", AppletFormatAdapter.PASTE);
602 assertEquals("Title not as expected", "Interleaved sequence data",
603 testee.getAlignmentProperty(MegaFile.PROP_TITLE));
604 Vector<SequenceI> seqs = testee.getSeqs();
605 // should be 2 sequences
606 assertEquals("Expected two sequences", 2, seqs.size());
607 // check sequence names correct and order preserved
608 assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
609 assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
611 // check sequence data
612 assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0)
613 .getSequenceAsString());
614 assertEquals("Second sequence data wrong", "MNOPQRWXYZGC", seqs.get(1)
615 .getSequenceAsString());
616 assertTrue("File format is not flagged as interleaved",
617 testee.isInterleaved());
623 * Test parse of data with !Gene and !Domain statements.
625 * @throws IOException
627 @Test(groups = { "Functional" })
628 public void testParse_geneDomains() throws IOException
631 String data = "#MEGA\n"+
632 "TITLE: Interleaved sequence data\n\n" +
634 "#CPZANT TTTTTT\n\n" +
635 "!Domain=Exon1 Gene=Adh Property=Coding CodonStart=1;\n" +
637 "#CPZANT AAAAAA\n\n" +
638 "!domain=Intron1 Property=Intron Gene=Adh;\n" +
640 "#CPZANT cccccc\n\n" +
641 "!Domain=Exon2 Gene=Adh Property=Exon CodonStart=1;\n" +
643 "#CPZANT gggggg\n\n" +
644 // explicit end of Exon2, implicit end of Adh:
645 "!Domain=Exon2 Property=domainend;\n" +
646 "!Domain=Intron1 Gene=Opsin Property=Noncoding;\n" +
648 "#CPZANT AAAAAA\n\n" +
649 // end Opsin, start MEF2A
650 "!Domain=Exon1 Gene=MEF2A Property=Coding CodonStart=1;\n" +
652 "#CPZANT cccccc\n\n" +
654 "!Domain=BindingSite;\n" +
656 "#CPZANT TTTTTT\n\n";
658 MegaFile testee = new MegaFile(data, AppletFormatAdapter.PASTE);
660 Vector<SequenceI> seqs = testee.getSeqs();
661 // should be 2 sequences
662 assertEquals("Expected two sequences", 2, seqs.size());
663 // check sequence data
664 assertEquals("First sequence data wrong",
665 "CCCCCCGGGGGGttttttaaaaaaGGGGGGttttttCCCCCC", seqs.get(0)
666 .getSequenceAsString());
667 assertEquals("Second sequence data wrong",
668 "TTTTTTAAAAAAccccccggggggAAAAAAccccccTTTTTT", seqs.get(1)
669 .getSequenceAsString());
672 * sequences should have features for Gene=Adh 7-24, Exon1 7-12, Intron1
673 * 13-18, Exon2 19-24, BindingSite 25-30
675 for (SequenceI seq : seqs) {
676 SequenceFeature[] sfs = seq.getSequenceFeatures();
677 // features are added in the order in which their end is found
678 // (Domain before Gene when they end together)
679 assertEquals(9, sfs.length);
680 // TODO settle which way round type/description go!
681 verifySequenceFeature(sfs[0], "Exon1 (Adh Coding)", "Domain", 7, 12);
682 verifySequenceFeature(sfs[1], "Intron1 (Adh Noncoding)", "Domain",
684 verifySequenceFeature(sfs[2], "Exon2 (Adh Coding)", "Domain", 19, 24);
685 verifySequenceFeature(sfs[3], "Adh", "Gene", 7, 24);
686 verifySequenceFeature(sfs[4], "Intron1 (Opsin Noncoding)", "Domain",
688 verifySequenceFeature(sfs[5], "Opsin", "Gene", 25, 30);
689 verifySequenceFeature(sfs[6], "Exon1 (MEF2A Coding)", "Domain", 31,
691 verifySequenceFeature(sfs[7], "MEF2A", "Gene", 31, 36);
692 verifySequenceFeature(sfs[8], "BindingSite", "Domain", 37, 42);
696 * verify gene and domain alignment annotations
698 assertEquals(2, testee.annotations.size());
699 AlignmentAnnotation ann = testee.annotations.get(0);
700 assertEquals("MEGA Gene", ann.label);
701 assertEquals(42, ann.annotations.length);
702 verifyAnnotation(ann, 0, 6, null);
703 verifyAnnotation(ann, 6, 24, "Adh");
704 verifyAnnotation(ann, 24, 30, "Opsin");
705 verifyAnnotation(ann, 30, 36, "MEF2A");
706 verifyAnnotation(ann, 37, 42, null);
708 ann = testee.annotations.get(1);
709 assertEquals("MEGA Domain", ann.label);
710 assertEquals(42, ann.annotations.length);
711 verifyAnnotation(ann, 0, 6, null);
712 verifyAnnotation(ann, 6, 12, "Exon1 (Adh Coding)");
713 verifyAnnotation(ann, 12, 18, "Intron1 (Adh Noncoding)");
714 verifyAnnotation(ann, 19, 24, "Exon2 (Adh Coding)");
715 verifyAnnotation(ann, 25, 30, "Intron1 (Opsin Noncoding)");
716 verifyAnnotation(ann, 31, 36, "Exon1 (MEF2A Coding)");
717 verifyAnnotation(ann, 37, 42, "BindingSite");
722 * Helper method to verify a range of annotation positions all have the given
726 * array of annotations to check
728 * start index to check
730 * end index to check (exclusive)
734 protected void verifyAnnotation(AlignmentAnnotation ann, int from,
735 int to, String description)
737 for (int pos = from; pos < to; pos++)
739 if (description == null)
741 assertNull(ann.annotations[pos]);
745 assertEquals(description, ann.annotations[pos].description);
751 * Helper method to assert properties of a SequenceFeature
759 protected void verifySequenceFeature(SequenceFeature sf, String type,
760 String description, int begin, int end)
762 assertEquals(description, sf.type);
763 assertEquals(type, sf.description);
764 assertEquals(begin, sf.begin);
765 assertEquals(end, sf.end);
769 * Test parse of data including !Label statements. An underscore means no
770 * label, other characters are treated as alignment annotation.
772 * @throws IOException
774 @Test(groups = { "Functional" })
775 public void testParse_withLabels() throws IOException
778 MegaFile testee = new MegaFile("#MEGA\n"+
779 "TITLE: Interleaved sequence data\n\n" +
781 "#CPZANT MNO PQR\n" +
782 "!Label +-_ 23_\n\n" +
783 // a row with no labels = null annotation
785 "#CPZANT mno pqr\n\n" +
787 "#CPZANT WXY ZGC\n" +
788 "!label __3 +X_\n", AppletFormatAdapter.PASTE);
790 Vector<SequenceI> seqs = testee.getSeqs();
791 assertEquals("Expected two sequences", 2, seqs.size());
792 assertEquals("First sequence data wrong", "ABCDEFabcdefKLMNOP", seqs
794 .getSequenceAsString());
795 assertEquals("Second sequence data wrong", "MNOPQRmnopqrWXYZGC", seqs
797 .getSequenceAsString());
799 // check AlignmentAnnotation added with expected values
800 assertEquals(1, testee.annotations.size());
801 AlignmentAnnotation aa = testee.annotations.get(0);
802 assertNull(aa.sequenceRef);
803 assertEquals("MEGA Label", aa.label);
804 assertEquals(18, aa.annotations.length);
805 assertEquals("+, -, , 2, 3, , , , , , , , , , 3, +, X, , ",
812 * Test case where a domain is implicitly terminated by starting a new gene
814 * @throws IOException
816 @Test(groups = { "Functional" })
817 public void testParse_changeOfGeneEndsDomain() throws IOException
820 // uses tab instead of space separators to check robustness
821 MegaFile testee = new MegaFile("#MEGA\n"+
822 "!TITLE Interleaved sequence data;\n" +
823 "!Format Identical=.;\n\n" +
824 "!Gene=gene1 Domain=Exon1 Property=Coding;\n" +
826 "#CPZANT M..P.R\n\n" +
829 "#CPZANT ..YZ..", AppletFormatAdapter.PASTE);
831 Vector<SequenceI> seqs = testee.getSeqs();
832 assertEquals("Expected two sequences", 2, seqs.size());
833 assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0)
834 .getSequenceAsString());
835 assertEquals("Second sequence data wrong", "MBCPERKLYZOP", seqs.get(1)
836 .getSequenceAsString());
837 assertTrue("File format is not flagged as interleaved",
838 testee.isInterleaved());
840 for (SequenceI seq : seqs)
842 SequenceFeature[] sfs = seq.getSequenceFeatures();
843 assertEquals(3, sfs.length);
844 verifySequenceFeature(sfs[0], "Exon1 (gene1 Coding)", "Domain", 1, 6);
845 verifySequenceFeature(sfs[1], "gene1", "Gene", 1, 6);
846 verifySequenceFeature(sfs[2], "gene2", "Gene", 7, 12);
853 * Test case where the declared gap character is one Jalview does not support;
854 * it should be converted to a '-'
856 * @throws IOException
858 @Test(groups = { "Functional" })
859 public void testParse_weirdGapCharacter() throws IOException
862 String data = "#MEGA\n"+
863 "!TITLE Interleaved sequence data;\n" +
864 "!Format Identical=. Indel=%;\n\n" +
866 "#CPZANT M..P.R\n\n" +
869 AppletFormatAdapter fa = new AppletFormatAdapter();
870 AlignmentI al = fa.readFile(data,
871 AppletFormatAdapter.PASTE, "MEGA");
873 List<SequenceI> seqs = al.getSequences();
874 assertEquals("First sequence data wrong", "-BC-EFKLMNOP", seqs.get(0)
875 .getSequenceAsString());
876 assertEquals("Second sequence data wrong", "MBCPERK--ZOP", seqs.get(1)
877 .getSequenceAsString());
878 assertEquals('-', al.getGapCharacter());
882 * Test reading a MEGA file to an alignment then writing it out in MEGA
883 * format. Includes !Label statements which should be converted to
884 * AlignmentAnnotation and back again.
886 * @throws IOException
888 @Test(groups = "Functional")
889 public void testRoundTrip_withLabels() throws IOException
891 AppletFormatAdapter fa = new AppletFormatAdapter();
894 String data = "#MEGA\n"
896 + "#CPZANT ATC -G-\n"
897 + "!Label F__E_H\n\n"
899 + "#CPZANT CA- -GC\n"
901 AlignmentI al = fa.readFile(data,
902 AppletFormatAdapter.PASTE, "MEGA");
903 AlignmentAnnotation aa = al.getAlignmentAnnotation()[0];
904 assertEquals("MEGA Label", aa.label);
905 assertEquals("F, , , E, , H, F, F, H, , , E, ",
908 MegaFile output = new MegaFile();
909 String formatted = output.print(al);
913 " DataType=Nucleotide CodeTable=Standard\n" +
914 " NSeqs=2 NSites=12\n" +
916 "#U455 C-- GTA [6]\n" +
917 "#CPZANT ATC -G- [6]\n" +
918 "!Label F__ E_H;\n\n" +
919 "#U455 CGA --T [12]\n" +
920 "#CPZANT CA- -GC [12]\n" +
923 assertEquals("Roundtrip didn't match", expected,
928 * Test (parse and) print of MEGA data with !Gene statements.
930 * @throws IOException
932 @Test(groups = { "Functional" })
933 public void testPrint_genes() throws IOException
936 * to keep the test concise, input data is in the exact format that Jalview
937 * would output it; the important thing is functional equivalence of input
941 String data = "#MEGA\n\n"+
943 "#Seq2 MNOP [4]\n\n" +
944 "!Domain=Exon1 Gene=Adh Property=Coding;\n" +
945 "#Seq1 EFGHI [9]\n" +
946 "#Seq2 QRSTU [9]\n\n" +
947 "!Domain=Intron1 Gene=Adh Property=Noncoding;\n" +
949 "#Seq2 VW [11]\n\n" +
950 "!Domain=Intron1 Property=domainend;\n" +
954 MegaFile testee = new MegaFile(data, AppletFormatAdapter.PASTE);
955 String printed = testee.print();
956 assertEquals("Print format wrong", data, printed);
959 @Test(groups = { "Functional" })
960 public void testGetDomainFromAnnotation()
962 Annotation[] anns = new Annotation[5];
963 anns[1] = new Annotation("", "Intron1", '0', 0f);
964 anns[2] = new Annotation("", "Intron2 (Aspx)", '0', 0f);
965 anns[3] = new Annotation("", "Intron3 (Aspy Coding)", '0', 0f);
966 anns[4] = new Annotation("", "Intron4 (Coding)", '0', 0f);
967 AlignmentAnnotation aa = new AlignmentAnnotation("", "", anns);
969 assertNull(MegaFile.getDomainFromAnnotation(0, null));
971 assertNull(MegaFile.getDomainFromAnnotation(0, aa));
972 // column out of range:
973 assertNull(MegaFile.getDomainFromAnnotation(5, aa));
974 // domain with no Gene or Property:
975 assertEquals("Intron1", MegaFile.getDomainFromAnnotation(1, aa));
976 // domain with Gene but no Property:
977 assertEquals("Intron2", MegaFile.getDomainFromAnnotation(2, aa));
978 // domain with Gene and Property:
979 assertEquals("Intron3", MegaFile.getDomainFromAnnotation(3, aa));
980 // domain with Property and no Gene:
981 assertEquals("Intron4", MegaFile.getDomainFromAnnotation(4, aa));
984 @Test(groups = { "Functional" })
985 public void testGetGeneFromAnnotation()
987 Annotation[] anns = new Annotation[3];
988 anns[1] = new Annotation("", "Aspx", '0', 0f);
989 AlignmentAnnotation aa = new AlignmentAnnotation("", "", anns);
991 assertNull(MegaFile.getGeneFromAnnotation(0, null));
993 assertNull(MegaFile.getGeneFromAnnotation(0, aa));
994 // column out of range:
995 assertNull(MegaFile.getGeneFromAnnotation(3, aa));
997 assertEquals("Aspx", MegaFile.getGeneFromAnnotation(1, aa));
1000 @Test(groups = { "Functional" })
1001 public void testGetPropertyFromAnnotation()
1003 Annotation[] anns = new Annotation[5];
1004 anns[1] = new Annotation("", "Intron1", '0', 0f);
1005 anns[2] = new Annotation("", "Intron2 (Aspx)", '0', 0f);
1006 anns[3] = new Annotation("", "Intron3 (Aspy Noncoding)", '0', 0f);
1007 anns[4] = new Annotation("", "Exon1 (Aspx Coding)", '0', 0f);
1008 AlignmentAnnotation aa = new AlignmentAnnotation("", "", anns);
1010 assertNull(MegaFile.getPropertyFromAnnotation(0, null));
1012 assertNull(MegaFile.getPropertyFromAnnotation(0, aa));
1013 // column out of range:
1014 assertNull(MegaFile.getPropertyFromAnnotation(5, aa));
1015 // domain with no Gene or Property:
1016 assertNull(MegaFile.getPropertyFromAnnotation(1, aa));
1017 // domain with Gene but no Property:
1018 assertNull(MegaFile.getPropertyFromAnnotation(2, aa));
1019 // domain with Gene and Property:
1020 assertEquals("Noncoding", MegaFile.getPropertyFromAnnotation(3, aa));
1021 assertEquals("Coding", MegaFile.getPropertyFromAnnotation(4, aa));
1027 * Test parse of interleaved data with no blank lines to separate blocks of
1028 * sequence data; to confirm we can handle this correctly
1030 * @throws IOException
1032 @Test(groups = { "Functional" })
1033 public void testParse_interleaved_noBlankLines() throws IOException
1035 String data = INTERLEAVED.replace("\n\n", "\n");
1036 MegaFile testee = new MegaFile(data, AppletFormatAdapter.PASTE);
1037 assertEquals("Title not as expected", "Interleaved sequence data",
1038 testee.getAlignmentProperty(MegaFile.PROP_TITLE));
1039 Vector<SequenceI> seqs = testee.getSeqs();
1040 // should be 2 sequences
1041 assertEquals("Expected two sequences", 2, seqs.size());
1042 // check sequence names correct and order preserved
1043 assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
1044 assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
1046 // check sequence data
1047 assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0)
1048 .getSequenceAsString());
1049 assertEquals("Second sequence data wrong", "MNOPQRWXYZGC", seqs.get(1)
1050 .getSequenceAsString());
1051 assertTrue("File format is not flagged as interleaved",
1052 testee.isInterleaved());