3 import static org.testng.AssertJUnit.assertEquals;
4 import static org.testng.AssertJUnit.assertFalse;
5 import static org.testng.AssertJUnit.assertNull;
6 import static org.testng.AssertJUnit.assertTrue;
7 import static org.testng.AssertJUnit.fail;
9 import jalview.datamodel.AlignmentAnnotation;
10 import jalview.datamodel.AlignmentI;
11 import jalview.datamodel.Sequence;
12 import jalview.datamodel.SequenceFeature;
13 import jalview.datamodel.SequenceI;
15 import java.io.IOException;
16 import java.util.List;
17 import java.util.Vector;
19 import org.testng.annotations.Test;
22 * Unit tests for MegaFile - read and write in MEGA format(s).
24 public class MegaFileTest
26 private static final String TWENTY_CHARS = "9876543210abcdefghij";
28 private static final String THIRTY_CHARS = "0123456789klmnopqrstABCDEFGHIJ";
31 private static final String INTERLEAVED =
33 "TITLE: Interleaved sequence data\n\n" +
35 "#CPZANT MNOPQR\n\n" +
39 private static final String INTERLEAVED_NOHEADERS =
41 + "#CPZANT MNOPQR\n\n"
45 // interleaved sequences, with 50 residues
46 private static final String INTERLEAVED_50RESIDUES =
48 + "!TITLE Interleaved sequence data\n\n"
49 + "#U455 " + THIRTY_CHARS + TWENTY_CHARS + "\n"
50 + "#CPZANT " + TWENTY_CHARS + THIRTY_CHARS + "\n";
52 private static final String NONINTERLEAVED =
54 + "!TITLE Noninterleaved sequence data\n\n"
62 // this one starts interleaved then switches to non-interleaved
63 private static final String MIXED =
65 + "!TITLE This is a mess\n\n"
66 + "#CPZANT KLMNOPWXYZCGATC\n\n"
70 // interleaved with a new sequence appearing in the second block :-O
71 private static final String INTERLEAVED_SEQUENCE_ERROR =
73 + "!TITLE Interleaved sequence data\n\n"
75 + "#CPZANT MNOPQR\n\n"
78 // interleaved with description, bases/gaps in triplet groups
79 private static final String INTERLEAVED_WITH_DESCRIPTION =
81 + "!Title Data with description;\n"
82 + "!Format DataType=DNA indel=-\tCodeTable=Standard Missing=? MatchChar=.;\n\n"
84 + " Line one of description\n"
85 + " Line two of description;\n\n"
87 + "#CPZANT ATC -G-\n\n"
89 + "#CPZANT CA- -GC\n";
94 * Test parse of interleaved mega format data.
98 @Test(groups = { "Functional" })
99 public void testParse_interleaved() throws IOException
101 MegaFile testee = new MegaFile(INTERLEAVED, AppletFormatAdapter.PASTE);
102 assertEquals("Title not as expected", "Interleaved sequence data",
103 testee.getAlignmentProperty(MegaFile.PROP_TITLE));
104 Vector<SequenceI> seqs = testee.getSeqs();
105 // should be 2 sequences
106 assertEquals("Expected two sequences", 2, seqs.size());
107 // check sequence names correct and order preserved
108 assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
109 assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
111 // check sequence data
112 assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0)
113 .getSequenceAsString());
114 assertEquals("Second sequence data wrong", "MNOPQRWXYZGC", seqs.get(1)
115 .getSequenceAsString());
116 assertTrue("File format is not flagged as interleaved",
117 testee.isInterleaved());
121 * Test parse of noninterleaved mega format data.
123 * @throws IOException
125 @Test(groups = { "Functional" })
126 public void testParse_nonInterleaved() throws IOException
128 MegaFile testee = new MegaFile(NONINTERLEAVED,
129 AppletFormatAdapter.PASTE);
130 assertEquals("Title not as expected", "Noninterleaved sequence data",
131 testee.getAlignmentProperty(MegaFile.PROP_TITLE));
132 Vector<SequenceI> seqs = testee.getSeqs();
133 // should be 2 sequences
134 assertEquals("Expected two sequences", 2, seqs.size());
135 // check sequence names correct and order preserved
136 assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
137 assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
139 // check sequence data
140 assertEquals("First sequence data wrong", "ABCFEDHIJMNOPQR", seqs
141 .get(0).getSequenceAsString());
142 assertEquals("Second sequence data wrong", "KLMNOPWXYZCGATC",
143 seqs.get(1).getSequenceAsString());
144 assertFalse("File format is not flagged as noninterleaved",
145 testee.isInterleaved());
149 * Test parsing an interleaved file with an extra sequence appearing after the
150 * first block - should fail.
152 @Test(groups = { "Functional" })
153 public void testParse_interleavedExtraSequenceError()
157 new MegaFile(INTERLEAVED_SEQUENCE_ERROR, AppletFormatAdapter.PASTE);
158 fail("Expected extra sequence IOException");
159 } catch (IOException e)
162 "Unexpected exception message",
163 "Parse error: misplaced new sequence starting at #U456 KLMNOP",
169 * Test a mixed up file.
171 @Test(groups = { "Functional" })
172 public void testParse_mixedInterleavedNonInterleaved()
176 new MegaFile(MIXED, AppletFormatAdapter.PASTE);
177 fail("Expected mixed content exception");
178 } catch (IOException e)
181 "Unexpected exception message",
182 "Parse error: interleaved was true but now seems to be false, at line: ABCFEDHIJ",
188 @Test(groups = { "Functional" })
189 public void testGetSequenceId()
191 assertEquals("AB123", MegaFile.getSequenceId("#AB123 CGATC"));
192 assertEquals("AB123", MegaFile.getSequenceId("#AB123 CGATC"));
193 assertEquals("AB123", MegaFile.getSequenceId("#AB123 CGC TAC"));
194 assertEquals("AB123", MegaFile.getSequenceId("#AB123"));
195 assertNull(MegaFile.getSequenceId("AB123 CTAG"));
196 assertNull(MegaFile.getSequenceId("AB123"));
197 assertNull(MegaFile.getSequenceId(""));
198 assertNull(MegaFile.getSequenceId(null));
201 @Test(groups = { "Functional" })
202 public void testGetMaxIdLength()
204 SequenceI[] seqs = new Sequence[2];
205 seqs[0] = new Sequence("Something", "GCATAC");
206 seqs[1] = new Sequence("SomethingElse", "GCATAC");
207 assertEquals(13, MegaFile.getMaxIdLength(seqs));
208 seqs[1] = new Sequence("DNA", "GCATAC");
209 assertEquals(9, MegaFile.getMaxIdLength(seqs));
212 @Test(groups = { "Functional" })
213 public void testGetMaxSequenceLength()
215 SequenceI[] seqs = new Sequence[2];
216 seqs[0] = new Sequence("Seq1", "GCATAC");
217 seqs[1] = new Sequence("Seq2", "GCATACTAG");
218 assertEquals(9, MegaFile.getMaxSequenceLength(seqs));
219 seqs[1] = new Sequence("Seq2", "GCA");
220 assertEquals(6, MegaFile.getMaxSequenceLength(seqs));
224 * Test (parse and) print of interleaved mega format data.
226 * @throws IOException
228 @Test(groups = { "Functional" })
229 public void testPrint_interleaved() throws IOException
231 MegaFile testee = new MegaFile(INTERLEAVED, AppletFormatAdapter.PASTE);
232 String printed = testee.print();
233 System.out.println(printed);
234 // normally output should match input
235 // we cheated here with a number of short input lines
236 // nb don't get Title in output if not calling print(AlignmentI)
237 String expected = "#MEGA\n\n" + "#U455 ABCDEF [6]\n"
238 + "#CPZANT MNOPQR [6]\n\n" + "#U455 KLMNOP [12]\n"
239 + "#CPZANT WXYZGC [12]"
241 assertEquals("Print format wrong", expected, printed);
245 * Test (parse and) print of interleaved data with no headers (acceptable).
247 * @throws IOException
249 @Test(groups = { "Functional" })
250 public void testPrint_interleavedNoHeaders() throws IOException
252 MegaFile testee = new MegaFile(INTERLEAVED_NOHEADERS,
253 AppletFormatAdapter.PASTE);
254 String printed = testee.print();
255 System.out.println(printed);
258 assertEquals("Print format wrong",
259 "#MEGA\n\n" + "#U455 ABCDEF [6]\n"
260 + "#CPZANT MNOPQR [6]\n\n"
261 + "#U455 KLMNOP [12]\n"
262 + "#CPZANT WXYZGC [12]\n",
268 * Test (parse and) print of noninterleaved mega format data.
270 * @throws IOException
272 @Test(groups = { "Functional" })
273 public void testPrint_noninterleaved() throws IOException
275 MegaFile testee = new MegaFile(NONINTERLEAVED,
276 AppletFormatAdapter.PASTE);
277 assertEquals(10, testee.getPositionsPerLine());
278 String printed = testee.print();
279 System.out.println(printed);
280 // normally output should match input
281 // we cheated here with a number of short input lines
282 String expected = "#MEGA\n\n" + "#U455\n"
283 + "ABCFEDHIJM [10]\nNOPQR [15]\n\n" + "#CPZANT\n"
284 + "KLMNOPWXYZ [10]\nCGATC [15]\n";
285 assertEquals("Print format wrong", expected, printed);
289 * Test (parse and) print of interleaved mega format data extending to more
290 * than one line of output.
292 * @throws IOException
294 @Test(groups = { "Functional" })
295 public void testPrint_interleavedMultiLine() throws IOException
297 MegaFile testee = new MegaFile(INTERLEAVED_50RESIDUES,
298 AppletFormatAdapter.PASTE);
299 assertEquals(50, testee.getPositionsPerLine());
301 * now simulate choosing 20 residues per line on output
303 testee.setPositionsPerLine(20);
304 String printed = testee.print();
305 System.out.println(printed);
309 "#U455 0123456789 klmnopqrst [20]\n" + // first 20
310 "#CPZANT 9876543210 abcdefghij [20]\n\n" +
311 "#U455 ABCDEFGHIJ 9876543210 [40]\n" + // next 20
312 "#CPZANT 0123456789 klmnopqrst [40]\n\n" +
313 "#U455 abcdefghij [50]\n" + // last 10
314 "#CPZANT ABCDEFGHIJ [50]\n";
316 assertEquals("Print format wrong", expected, printed);
320 * Test (parse and) print of noninterleaved mega format data extending to more
321 * than one line of output.
323 * @throws IOException
325 @Test(groups = { "Functional" })
326 public void testPrint_noninterleavedMultiLine() throws IOException
328 final String NONINTERLEAVED_LONGERTHAN50 = "#SIXTY\n" + THIRTY_CHARS
329 + "\n" + TWENTY_CHARS + "9993332221\n";
330 MegaFile testee = new MegaFile(NONINTERLEAVED_LONGERTHAN50,
331 AppletFormatAdapter.PASTE);
332 assertEquals(30, testee.getPositionsPerLine());
333 testee.setPositionsPerLine(25);
334 String printed = testee.print();
337 * 25 positions per line is rounded down to 20 (two blocks of 10)
339 String expected = "#MEGA\n\n" + "#SIXTY\n"
340 + "0123456789 klmnopqrst [20]\n"
341 + "ABCDEFGHIJ 9876543210 [40]\n"
342 + "abcdefghij 9993332221 [60]\n";
343 assertEquals("Print format wrong", expected, printed);
347 * Test parse of data including description
349 * @throws IOException
351 @Test(groups = { "Functional" })
352 public void testParse_withDescription() throws IOException
354 MegaFile testee = new MegaFile(INTERLEAVED_WITH_DESCRIPTION,
355 AppletFormatAdapter.PASTE);
356 assertEquals("Title not as expected", "Data with description",
357 testee.getAlignmentProperty(MegaFile.PROP_TITLE));
359 Vector<SequenceI> seqs = testee.getSeqs();
360 // should be 2 sequences
361 assertEquals("Expected two sequences", 2, seqs.size());
362 // check sequence names correct and order preserved
363 assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
364 assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
366 // check sequence data
367 assertEquals("First sequence data wrong", "C--GTACGA--T", seqs.get(0)
368 .getSequenceAsString());
369 assertEquals("Second sequence data wrong", "ATC-G-CA--GC", seqs.get(1)
370 .getSequenceAsString());
371 assertTrue("File format is not flagged as interleaved",
372 testee.isInterleaved());
375 "Description property not parsed",
376 " Line one of description\n" + " Line two of description",
377 testee.getAlignmentProperty(MegaFile.PROP_DESCRIPTION));
380 @Test(groups = { "Functional" })
381 public void testGetNonCommentContent() throws FileFormatException
383 assertEquals("abcde", MegaFile.getNonCommentContent("abcde", 0));
384 assertEquals("CGT ACG GAC ",
385 MegaFile.getNonCommentContent("CGT ACG GAC [9]", 0));
386 assertEquals("", MegaFile.getNonCommentContent("abcde", 1));
387 assertEquals(" abcde",
388 MegaFile.getNonCommentContent("and others ] abcde", 1));
389 assertEquals(" abcde", MegaFile.getNonCommentContent(
390 "and others [including refs] ] abcde", 1));
391 assertEquals(" x ] abcde",
392 MegaFile.getNonCommentContent("and others ] x ] abcde", 1));
395 @Test(groups = { "Functional" })
396 public void testCommentDepth() throws FileFormatException
398 assertEquals(0, MegaFile.commentDepth("abcde", 0));
399 assertEquals(1, MegaFile.commentDepth("abc[de", 0));
400 assertEquals(3, MegaFile.commentDepth("ab[c[de", 1));
401 assertEquals(1, MegaFile.commentDepth("ab]c[d]e[f", 1));
402 assertEquals(0, MegaFile.commentDepth("a]b[c]d]e", 1));
405 @Test(groups = { "Functional" })
406 public void testGetValue()
408 assertEquals("Mega", MegaFile.getValue("Name=Mega"));
409 assertEquals("Mega", MegaFile.getValue("Name =Mega"));
410 assertEquals("Mega", MegaFile.getValue(" Name = Mega "));
411 assertEquals("Mega", MegaFile.getValue("Name = Mega; "));
412 assertEquals("Mega", MegaFile.getValue(" Name = Mega ; "));
413 assertEquals("Mega", MegaFile.getValue("\t!Name \t= \tMega ; "));
414 assertEquals("Mega", MegaFile.getValue("!Name \t\t Mega; "));
415 assertEquals("", MegaFile.getValue("Name"));
419 * Test reading a MEGA file to an alignment then writing it out in MEGA
420 * format. Verify the output is (functionally) the same as the input.
422 * @throws IOException
424 @Test(groups = "Functional")
425 public void testRoundTrip_Interleaved() throws IOException
427 AppletFormatAdapter fa = new AppletFormatAdapter();
428 AlignmentI al = fa.readFile(INTERLEAVED_WITH_DESCRIPTION,
429 AppletFormatAdapter.PASTE, "MEGA");
430 MegaFile output = new MegaFile();
431 String formatted = output.print(al);
434 "#MEGA\n!Title Data with description;\n" +
436 " Line one of description\n" +
437 " Line two of description;\n" +
439 " DataType=DNA CodeTable=Standard\n" +
440 " NSeqs=2 NSites=12\n" + // NSites includes gaps
441 " Indel=- Identical=. Missing=?;\n\n" +
442 "#U455 C-- GTA [6]\n" +
443 "#CPZANT ATC -G- [6]\n\n" +
444 "#U455 CGA --T [12]\n" +
445 "#CPZANT CA- -GC [12]\n";
447 assertEquals("Roundtrip didn't match", expected,
452 * Test reading a MEGA file to an alignment then writing it out in MEGA
453 * format. Verify the output is (functionally) the same as the input.
455 * @throws IOException
457 @Test(groups = "Functional")
458 public void testRoundTrip_multilineFormatWithComments()
461 AppletFormatAdapter fa = new AppletFormatAdapter();
463 AlignmentI al = fa.readFile("#MEGA\n"
464 + "!Title Data with description;\n"
465 + "[ this comment should be ignored\n"
466 + "including [this nested comment]\n"
469 + "DataType=DNA CodeTable=Standard\n"
470 + "indel=- Missing=? MatchChar=.;\n\n"
472 + " Line one of description\n"
473 + " Line two of description;\n\n"
475 + "#CPZANT ATC GGG\n\n"
477 + "#CPZANT CAA TGC\n",
478 AppletFormatAdapter.PASTE, "MEGA");
480 MegaFile output = new MegaFile();
481 String formatted = output.print(al);
484 "#MEGA\n!Title Data with description;\n" +
486 " Line one of description\n" +
487 " Line two of description;\n" +
489 " DataType=DNA CodeTable=Standard\n" +
490 " NSeqs=2 NSites=12\n" +
491 " Indel=- Identical=. Missing=?;\n\n" +
492 "#U455 CGC GTA [6]\n" +
493 "#CPZANT ATC GGG [6]\n\n" +
494 "#U455 CGA TTT [12]\n" +
495 "#CPZANT CAA TGC [12]\n";
497 assertEquals("Roundtrip didn't match", expected,
504 * Test parse of interleaved mega format data where the identity character is
505 * used in sequences after the first
507 * @throws IOException
509 @Test(groups = { "Functional" })
510 public void testParse_interleavedWithIdentityAndTabs() throws IOException
513 // uses tab instead of space separators to check robustness
514 MegaFile testee = new MegaFile("#MEGA\n"+
515 "!TITLE\tInterleaved sequence data;\n" +
516 "!Format\tIdentical=.;\n\n" +
518 "#CPZANT\tM..P.R\n\n" +
519 "#U455\t\tKLMNOP\n" +
520 "#CPZANT\t..YZ..", AppletFormatAdapter.PASTE);
522 assertEquals("Title not as expected", "Interleaved sequence data",
523 testee.getAlignmentProperty(MegaFile.PROP_TITLE));
524 Vector<SequenceI> seqs = testee.getSeqs();
525 // should be 2 sequences
526 assertEquals("Expected two sequences", 2, seqs.size());
527 // check sequence names correct and order preserved
528 assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
529 assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
531 // check sequence data
532 assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0)
533 .getSequenceAsString());
534 assertEquals("Second sequence data wrong", "MBCPERKLYZOP", seqs.get(1)
535 .getSequenceAsString());
536 assertTrue("File format is not flagged as interleaved",
537 testee.isInterleaved());
541 * Test parse of noninterleaved format data including identity symbol
543 * @throws IOException
545 @Test(groups = { "Functional" })
546 public void testParse_nonInterleavedWithIdentity() throws IOException
549 MegaFile testee = new MegaFile("#MEGA\n"
550 + "!TITLE Noninterleaved sequence data;\n"
551 + "!Format MatchChar=.;\n"
558 AppletFormatAdapter.PASTE);
560 assertEquals("Title not as expected", "Noninterleaved sequence data",
561 testee.getAlignmentProperty(MegaFile.PROP_TITLE));
562 Vector<SequenceI> seqs = testee.getSeqs();
563 // should be 2 sequences
564 assertEquals("Expected two sequences", 2, seqs.size());
565 // check sequence names correct and order preserved
566 assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
567 assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
569 // check sequence data
570 assertEquals("First sequence data wrong", "ABCFEDHIJMNOPQR", seqs
571 .get(0).getSequenceAsString());
572 assertEquals("Second sequence data wrong", "KLCFODHXYZCGPQC",
573 seqs.get(1).getSequenceAsString());
574 assertFalse("File format is not flagged as noninterleaved",
575 testee.isInterleaved());
581 * Test parse of interleaved format data including position number comments.
583 * @throws IOException
585 @Test(groups = { "Functional" })
586 public void testParse_interleavedWithPositionNumber() throws IOException
589 MegaFile testee = new MegaFile("#MEGA\n"+
590 "TITLE: Interleaved sequence data\n\n" +
591 "#U455 ABCDEF [6]\n" +
592 "#CPZANT MNOPQR [6]\n\n" +
593 "#U455 KLMNOP [12]\n" +
594 "#CPZANT WXYZGC [12]\n", AppletFormatAdapter.PASTE);
596 assertEquals("Title not as expected", "Interleaved sequence data",
597 testee.getAlignmentProperty(MegaFile.PROP_TITLE));
598 Vector<SequenceI> seqs = testee.getSeqs();
599 // should be 2 sequences
600 assertEquals("Expected two sequences", 2, seqs.size());
601 // check sequence names correct and order preserved
602 assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
603 assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
605 // check sequence data
606 assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0)
607 .getSequenceAsString());
608 assertEquals("Second sequence data wrong", "MNOPQRWXYZGC", seqs.get(1)
609 .getSequenceAsString());
610 assertTrue("File format is not flagged as interleaved",
611 testee.isInterleaved());
617 * Test parse of data with !Gene and !Domain statements.
619 * @throws IOException
621 @Test(groups = { "Functional" })
622 public void testParse_geneDomains() throws IOException
625 String data = "#MEGA\n"+
626 "TITLE: Interleaved sequence data\n\n" +
628 "#CPZANT TTTTTT\n\n" +
629 "!Domain=Exon1 Gene=Adh Property=Coding CodonStart=1;\n" +
631 "#CPZANT AAAAAA\n\n" +
632 "!domain=Intron1 Property=Intron Gene=Adh;\n" +
634 "#CPZANT cccccc\n\n" +
635 "!Domain=Exon2 Gene=Adh Property=Exon CodonStart=1;\n" +
637 "#CPZANT gggggg\n\n" +
638 // explicit end of Exon2, implicit end of Adh:
639 "!Domain=Exon2 Property=domainend;\n" +
640 "!Domain=Intron1 Gene=Opsin Property=Noncoding;\n" +
642 "#CPZANT AAAAAA\n\n" +
643 // end Opsin, start MEF2A
644 "!Domain=Exon1 Gene=MEF2A Property=Coding CodonStart=1;\n" +
646 "#CPZANT cccccc\n\n" +
648 "!Domain=BindingSite;\n" +
650 "#CPZANT TTTTTT\n\n";
652 MegaFile testee = new MegaFile(data, AppletFormatAdapter.PASTE);
654 Vector<SequenceI> seqs = testee.getSeqs();
655 // should be 2 sequences
656 assertEquals("Expected two sequences", 2, seqs.size());
657 // check sequence data
658 assertEquals("First sequence data wrong",
659 "CCCCCCGGGGGGttttttaaaaaaGGGGGGttttttCCCCCC", seqs.get(0)
660 .getSequenceAsString());
661 assertEquals("Second sequence data wrong",
662 "TTTTTTAAAAAAccccccggggggAAAAAAccccccTTTTTT", seqs.get(1)
663 .getSequenceAsString());
666 * sequences should have features for Gene=Adh 7-24, Exon1 7-12, Intron1
667 * 13-18, Exon2 19-24, BindingSite 25-30
669 for (SequenceI seq : seqs) {
670 SequenceFeature[] sfs = seq.getSequenceFeatures();
671 // features are added in the order in which their end is found
672 // (Domain before Gene when they end together)
673 assertEquals(9, sfs.length);
674 // TODO settle which way round type/description go!
675 verifySequenceFeature(sfs[0], "Exon1 (Adh Coding)", "Domain", 7, 12);
676 verifySequenceFeature(sfs[1], "Intron1 (Adh Noncoding)", "Domain",
678 verifySequenceFeature(sfs[2], "Exon2 (Adh Coding)", "Domain", 19, 24);
679 verifySequenceFeature(sfs[3], "Adh", "Gene", 7, 24);
680 verifySequenceFeature(sfs[4], "Intron1 (Opsin Noncoding)", "Domain",
682 verifySequenceFeature(sfs[5], "Opsin", "Gene", 25, 30);
683 verifySequenceFeature(sfs[6], "Exon1 (MEF2A Coding)", "Domain", 31,
685 verifySequenceFeature(sfs[7], "MEF2A", "Gene", 31, 36);
686 verifySequenceFeature(sfs[8], "BindingSite", "Domain", 37, 42);
690 * verify gene and domain alignment annotations
692 assertEquals(2, testee.annotations.size());
693 AlignmentAnnotation ann = testee.annotations.get(0);
694 assertEquals("MEGA Gene", ann.label);
695 assertEquals(42, ann.annotations.length);
696 verifyAnnotation(ann, 0, 6, null);
697 verifyAnnotation(ann, 6, 24, "Adh");
698 verifyAnnotation(ann, 24, 30, "Opsin");
699 verifyAnnotation(ann, 30, 36, "MEF2A");
700 verifyAnnotation(ann, 37, 42, null);
702 ann = testee.annotations.get(1);
703 assertEquals("MEGA Domain", ann.label);
704 assertEquals(42, ann.annotations.length);
705 verifyAnnotation(ann, 0, 6, null);
706 verifyAnnotation(ann, 6, 12, "Exon1 (Adh Coding)");
707 verifyAnnotation(ann, 12, 18, "Intron1 (Adh Noncoding)");
708 verifyAnnotation(ann, 19, 24, "Exon2 (Adh Coding)");
709 verifyAnnotation(ann, 25, 30, "Intron1 (Opsin Noncoding)");
710 verifyAnnotation(ann, 31, 36, "Exon1 (MEF2A Coding)");
711 verifyAnnotation(ann, 37, 42, "BindingSite");
716 * Helper method to verify a range of annotation positions all have the given
720 * array of annotations to check
722 * start index to check
724 * end index to check (exclusive)
728 protected void verifyAnnotation(AlignmentAnnotation ann, int from,
729 int to, String description)
731 for (int pos = from; pos < to; pos++)
733 if (description == null)
735 assertNull(ann.annotations[pos]);
739 assertEquals(description, ann.annotations[pos].description);
745 * Helper method to assert properties of a SequenceFeature
753 protected void verifySequenceFeature(SequenceFeature sf, String type,
754 String description, int begin, int end)
756 assertEquals(description, sf.type);
757 assertEquals(type, sf.description);
758 assertEquals(begin, sf.begin);
759 assertEquals(end, sf.end);
763 * Test parse of data including !Label statements. An underscore means no
764 * label, other characters are treated as alignment annotation.
766 * @throws IOException
768 @Test(groups = { "Functional" })
769 public void testParse_withLabels() throws IOException
772 MegaFile testee = new MegaFile("#MEGA\n"+
773 "TITLE: Interleaved sequence data\n\n" +
775 "#CPZANT MNO PQR\n" +
776 "!Label +-_ 23_\n\n" +
777 // a row with no labels = null annotation
779 "#CPZANT mno pqr\n\n" +
781 "#CPZANT WXY ZGC\n" +
782 "!label __3 +X_\n", AppletFormatAdapter.PASTE);
784 Vector<SequenceI> seqs = testee.getSeqs();
785 assertEquals("Expected two sequences", 2, seqs.size());
786 assertEquals("First sequence data wrong", "ABCDEFabcdefKLMNOP", seqs
788 .getSequenceAsString());
789 assertEquals("Second sequence data wrong", "MNOPQRmnopqrWXYZGC", seqs
791 .getSequenceAsString());
793 // check AlignmentAnnotation added with expected values
794 assertEquals(1, testee.annotations.size());
795 AlignmentAnnotation aa = testee.annotations.get(0);
796 assertNull(aa.sequenceRef);
797 assertEquals("MEGA Label", aa.label);
798 assertEquals(18, aa.annotations.length);
799 assertEquals("+, -, , 2, 3, , , , , , , , , , 3, +, X, , ",
806 * Test case where a domain is implicitly terminated by starting a new gene
808 * @throws IOException
810 @Test(groups = { "Functional" })
811 public void testParse_changeOfGeneEndsDomain() throws IOException
814 // uses tab instead of space separators to check robustness
815 MegaFile testee = new MegaFile("#MEGA\n"+
816 "!TITLE Interleaved sequence data;\n" +
817 "!Format Identical=.;\n\n" +
818 "!Gene=gene1 Domain=Exon1 Property=Coding;\n" +
820 "#CPZANT M..P.R\n\n" +
823 "#CPZANT ..YZ..", AppletFormatAdapter.PASTE);
825 Vector<SequenceI> seqs = testee.getSeqs();
826 assertEquals("Expected two sequences", 2, seqs.size());
827 assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0)
828 .getSequenceAsString());
829 assertEquals("Second sequence data wrong", "MBCPERKLYZOP", seqs.get(1)
830 .getSequenceAsString());
831 assertTrue("File format is not flagged as interleaved",
832 testee.isInterleaved());
834 for (SequenceI seq : seqs)
836 SequenceFeature[] sfs = seq.getSequenceFeatures();
837 assertEquals(3, sfs.length);
838 verifySequenceFeature(sfs[0], "Exon1 (gene1 Coding)", "Domain", 1, 6);
839 verifySequenceFeature(sfs[1], "gene1", "Gene", 1, 6);
840 verifySequenceFeature(sfs[2], "gene2", "Gene", 7, 12);
847 * Test case where the declared gap character is one Jalview does not support;
848 * it should be converted to a '-'
850 * @throws IOException
852 @Test(groups = { "Functional" })
853 public void testParse_weirdGapCharacter() throws IOException
856 String data = "#MEGA\n"+
857 "!TITLE Interleaved sequence data;\n" +
858 "!Format Identical=. Indel=%;\n\n" +
860 "#CPZANT M..P.R\n\n" +
863 AppletFormatAdapter fa = new AppletFormatAdapter();
864 AlignmentI al = fa.readFile(data,
865 AppletFormatAdapter.PASTE, "MEGA");
867 List<SequenceI> seqs = al.getSequences();
868 assertEquals("First sequence data wrong", "-BC-EFKLMNOP", seqs.get(0)
869 .getSequenceAsString());
870 assertEquals("Second sequence data wrong", "MBCPERK--ZOP", seqs.get(1)
871 .getSequenceAsString());
872 assertEquals('-', al.getGapCharacter());
876 * Test reading a MEGA file to an alignment then writing it out in MEGA
877 * format. Includes !Label statements which should be converted to
878 * AlignmentAnnotation and back again.
880 * @throws IOException
882 @Test(groups = "Functional")
883 public void testRoundTrip_withLabels() throws IOException
885 AppletFormatAdapter fa = new AppletFormatAdapter();
888 String data = "#MEGA\n"
890 + "#CPZANT ATC -G-\n"
891 + "!Label F__E_H\n\n"
893 + "#CPZANT CA- -GC\n"
895 AlignmentI al = fa.readFile(data,
896 AppletFormatAdapter.PASTE, "MEGA");
897 AlignmentAnnotation aa = al.getAlignmentAnnotation()[0];
898 assertEquals("MEGA Label", aa.label);
899 assertEquals("F, , , E, , H, F, F, H, , , E, ",
902 MegaFile output = new MegaFile();
903 String formatted = output.print(al);
907 " DataType=Nucleotide CodeTable=Standard\n" +
908 " NSeqs=2 NSites=12\n" +
910 "#U455 C-- GTA [6]\n" +
911 "#CPZANT ATC -G- [6]\n" +
912 "!Label F__ E_H;\n\n" +
913 "#U455 CGA --T [12]\n" +
914 "#CPZANT CA- -GC [12]\n" +
917 assertEquals("Roundtrip didn't match", expected,