3 import static org.testng.AssertJUnit.assertEquals;
4 import static org.testng.AssertJUnit.assertFalse;
5 import static org.testng.AssertJUnit.assertNull;
6 import static org.testng.AssertJUnit.assertTrue;
7 import static org.testng.AssertJUnit.fail;
9 import jalview.datamodel.AlignmentAnnotation;
10 import jalview.datamodel.AlignmentI;
11 import jalview.datamodel.Sequence;
12 import jalview.datamodel.SequenceFeature;
13 import jalview.datamodel.SequenceI;
15 import java.io.IOException;
16 import java.util.Vector;
18 import org.testng.annotations.Test;
21 * Unit tests for MegaFile - read and write in MEGA format(s).
23 public class MegaFileTest
25 private static final String TWENTY_CHARS = "9876543210abcdefghij";
27 private static final String THIRTY_CHARS = "0123456789klmnopqrstABCDEFGHIJ";
30 private static final String INTERLEAVED =
32 "TITLE: Interleaved sequence data\n\n" +
34 "#CPZANT MNOPQR\n\n" +
38 private static final String INTERLEAVED_NOHEADERS =
40 + "#CPZANT MNOPQR\n\n"
44 // interleaved sequences, with 50 residues
45 private static final String INTERLEAVED_50RESIDUES =
47 + "!TITLE Interleaved sequence data\n\n"
48 + "#U455 " + THIRTY_CHARS + TWENTY_CHARS + "\n"
49 + "#CPZANT " + TWENTY_CHARS + THIRTY_CHARS + "\n";
51 private static final String NONINTERLEAVED =
53 + "!TITLE Noninterleaved sequence data\n\n"
61 // this one starts interleaved then switches to non-interleaved
62 private static final String MIXED =
64 + "!TITLE This is a mess\n\n"
65 + "#CPZANT KLMNOPWXYZCGATC\n\n"
69 // interleaved with a new sequence appearing in the second block :-O
70 private static final String INTERLEAVED_SEQUENCE_ERROR =
72 + "!TITLE Interleaved sequence data\n\n"
74 + "#CPZANT MNOPQR\n\n"
77 // interleaved with description, bases/gaps in triplet groups
78 private static final String INTERLEAVED_WITH_DESCRIPTION =
80 + "!Title Data with description;\n"
81 + "!Format DataType=DNA indel=-\tCodeTable=Standard Missing=? MatchChar=.;\n\n"
83 + " Line one of description\n"
84 + " Line two of description;\n\n"
86 + "#CPZANT ATC -G-\n\n"
88 + "#CPZANT CA- -GC\n";
93 * Test parse of interleaved mega format data.
97 @Test(groups = { "Functional" })
98 public void testParse_interleaved() throws IOException
100 MegaFile testee = new MegaFile(INTERLEAVED, AppletFormatAdapter.PASTE);
101 assertEquals("Title not as expected", "Interleaved sequence data",
102 testee.getAlignmentProperty(MegaFile.PROP_TITLE));
103 Vector<SequenceI> seqs = testee.getSeqs();
104 // should be 2 sequences
105 assertEquals("Expected two sequences", 2, seqs.size());
106 // check sequence names correct and order preserved
107 assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
108 assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
110 // check sequence data
111 assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0)
112 .getSequenceAsString());
113 assertEquals("Second sequence data wrong", "MNOPQRWXYZGC", seqs.get(1)
114 .getSequenceAsString());
115 assertTrue("File format is not flagged as interleaved",
116 testee.isInterleaved());
120 * Test parse of noninterleaved mega format data.
122 * @throws IOException
124 @Test(groups = { "Functional" })
125 public void testParse_nonInterleaved() throws IOException
127 MegaFile testee = new MegaFile(NONINTERLEAVED,
128 AppletFormatAdapter.PASTE);
129 assertEquals("Title not as expected", "Noninterleaved sequence data",
130 testee.getAlignmentProperty(MegaFile.PROP_TITLE));
131 Vector<SequenceI> seqs = testee.getSeqs();
132 // should be 2 sequences
133 assertEquals("Expected two sequences", 2, seqs.size());
134 // check sequence names correct and order preserved
135 assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
136 assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
138 // check sequence data
139 assertEquals("First sequence data wrong", "ABCFEDHIJMNOPQR", seqs
140 .get(0).getSequenceAsString());
141 assertEquals("Second sequence data wrong", "KLMNOPWXYZCGATC",
142 seqs.get(1).getSequenceAsString());
143 assertFalse("File format is not flagged as noninterleaved",
144 testee.isInterleaved());
148 * Test parsing an interleaved file with an extra sequence appearing after the
149 * first block - should fail.
151 @Test(groups = { "Functional" })
152 public void testParse_interleavedExtraSequenceError()
156 new MegaFile(INTERLEAVED_SEQUENCE_ERROR, AppletFormatAdapter.PASTE);
157 fail("Expected extra sequence IOException");
158 } catch (IOException e)
161 "Unexpected exception message",
162 "Parse error: misplaced new sequence starting at #U456 KLMNOP",
168 * Test a mixed up file.
170 @Test(groups = { "Functional" })
171 public void testParse_mixedInterleavedNonInterleaved()
175 new MegaFile(MIXED, AppletFormatAdapter.PASTE);
176 fail("Expected mixed content exception");
177 } catch (IOException e)
180 "Unexpected exception message",
181 "Parse error: interleaved was true but now seems to be false, at line: ABCFEDHIJ",
187 @Test(groups = { "Functional" })
188 public void testGetSequenceId()
190 assertEquals("AB123", MegaFile.getSequenceId("#AB123 CGATC"));
191 assertEquals("AB123", MegaFile.getSequenceId("#AB123 CGATC"));
192 assertEquals("AB123", MegaFile.getSequenceId("#AB123 CGC TAC"));
193 assertEquals("AB123", MegaFile.getSequenceId("#AB123"));
194 assertNull(MegaFile.getSequenceId("AB123 CTAG"));
195 assertNull(MegaFile.getSequenceId("AB123"));
196 assertNull(MegaFile.getSequenceId(""));
197 assertNull(MegaFile.getSequenceId(null));
200 @Test(groups = { "Functional" })
201 public void testGetMaxIdLength()
203 SequenceI[] seqs = new Sequence[2];
204 seqs[0] = new Sequence("Something", "GCATAC");
205 seqs[1] = new Sequence("SomethingElse", "GCATAC");
206 assertEquals(13, MegaFile.getMaxIdLength(seqs));
207 seqs[1] = new Sequence("DNA", "GCATAC");
208 assertEquals(9, MegaFile.getMaxIdLength(seqs));
211 @Test(groups = { "Functional" })
212 public void testGetMaxSequenceLength()
214 SequenceI[] seqs = new Sequence[2];
215 seqs[0] = new Sequence("Seq1", "GCATAC");
216 seqs[1] = new Sequence("Seq2", "GCATACTAG");
217 assertEquals(9, MegaFile.getMaxSequenceLength(seqs));
218 seqs[1] = new Sequence("Seq2", "GCA");
219 assertEquals(6, MegaFile.getMaxSequenceLength(seqs));
223 * Test (parse and) print of interleaved mega format data.
225 * @throws IOException
227 @Test(groups = { "Functional" })
228 public void testPrint_interleaved() throws IOException
230 MegaFile testee = new MegaFile(INTERLEAVED, AppletFormatAdapter.PASTE);
231 String printed = testee.print();
232 System.out.println(printed);
233 // normally output should match input
234 // we cheated here with a number of short input lines
235 // nb don't get Title in output if not calling print(AlignmentI)
236 String expected = "#MEGA\n\n" + "#U455 ABCDEF [6]\n"
237 + "#CPZANT MNOPQR [6]\n\n" + "#U455 KLMNOP [12]\n"
238 + "#CPZANT WXYZGC [12]"
240 assertEquals("Print format wrong", expected, printed);
244 * Test (parse and) print of interleaved data with no headers (acceptable).
246 * @throws IOException
248 @Test(groups = { "Functional" })
249 public void testPrint_interleavedNoHeaders() throws IOException
251 MegaFile testee = new MegaFile(INTERLEAVED_NOHEADERS,
252 AppletFormatAdapter.PASTE);
253 String printed = testee.print();
254 System.out.println(printed);
257 assertEquals("Print format wrong",
258 "#MEGA\n\n" + "#U455 ABCDEF [6]\n"
259 + "#CPZANT MNOPQR [6]\n\n"
260 + "#U455 KLMNOP [12]\n"
261 + "#CPZANT WXYZGC [12]\n",
267 * Test (parse and) print of noninterleaved mega format data.
269 * @throws IOException
271 @Test(groups = { "Functional" })
272 public void testPrint_noninterleaved() throws IOException
274 MegaFile testee = new MegaFile(NONINTERLEAVED,
275 AppletFormatAdapter.PASTE);
276 assertEquals(10, testee.getPositionsPerLine());
277 String printed = testee.print();
278 System.out.println(printed);
279 // normally output should match input
280 // we cheated here with a number of short input lines
281 String expected = "#MEGA\n\n"
282 + "#U455\n" + "ABCFEDHIJM\nNOPQR\n\n"
283 + "#CPZANT\n" + "KLMNOPWXYZ\nCGATC\n";
284 assertEquals("Print format wrong", expected, printed);
288 * Test (parse and) print of interleaved mega format data extending to more
289 * than one line of output.
291 * @throws IOException
293 @Test(groups = { "Functional" })
294 public void testPrint_interleavedMultiLine() throws IOException
296 MegaFile testee = new MegaFile(INTERLEAVED_50RESIDUES,
297 AppletFormatAdapter.PASTE);
298 assertEquals(50, testee.getPositionsPerLine());
300 * now simulate choosing 20 residues per line on output
302 testee.setPositionsPerLine(20);
303 String printed = testee.print();
304 System.out.println(printed);
306 //0123456789klmnopqrstABCDEFGHIJ9876543210abcdefghij
309 "#U455 0123456789 klmnopqrst [20]\n" + // first 20
310 "#CPZANT 9876543210 abcdefghij [20]\n\n" +
311 "#U455 ABCDEFGHIJ 9876543210 [40]\n" + // next 20
312 "#CPZANT 0123456789 klmnopqrst [40]\n\n" +
313 "#U455 abcdefghij [50]\n" + // last 10
314 "#CPZANT ABCDEFGHIJ [50]\n";
316 assertEquals("Print format wrong", expected, printed);
320 * Test (parse and) print of noninterleaved mega format data extending to more
321 * than one line of output.
323 * @throws IOException
325 @Test(groups = { "Functional" })
326 public void testPrint_noninterleavedMultiLine() throws IOException
328 final String NONINTERLEAVED_LONGERTHAN50 = "#SIXTY\n" + THIRTY_CHARS
329 + "\n" + TWENTY_CHARS + "9993332221\n";
330 MegaFile testee = new MegaFile(NONINTERLEAVED_LONGERTHAN50,
331 AppletFormatAdapter.PASTE);
332 assertEquals(30, testee.getPositionsPerLine());
333 testee.setPositionsPerLine(25);
334 String printed = testee.print();
335 // 60 character sequence should be output as 50 on first line then 10 more
336 String expected = "#MEGA\n\n" + "#SIXTY\n"
337 + "0123456789klmnopqrstABCDE\n" + "FGHIJ9876543210abcdefghij\n"
339 assertEquals("Print format wrong", expected, printed);
343 * Test parse of data including description
345 * @throws IOException
347 @Test(groups = { "Functional" })
348 public void testParse_withDescription() throws IOException
350 MegaFile testee = new MegaFile(INTERLEAVED_WITH_DESCRIPTION,
351 AppletFormatAdapter.PASTE);
352 assertEquals("Title not as expected", "Data with description",
353 testee.getAlignmentProperty(MegaFile.PROP_TITLE));
355 Vector<SequenceI> seqs = testee.getSeqs();
356 // should be 2 sequences
357 assertEquals("Expected two sequences", 2, seqs.size());
358 // check sequence names correct and order preserved
359 assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
360 assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
362 // check sequence data
363 assertEquals("First sequence data wrong", "C--GTACGA--T", seqs.get(0)
364 .getSequenceAsString());
365 assertEquals("Second sequence data wrong", "ATC-G-CA--GC", seqs.get(1)
366 .getSequenceAsString());
367 assertTrue("File format is not flagged as interleaved",
368 testee.isInterleaved());
371 "Description property not parsed",
372 " Line one of description\n" + " Line two of description",
373 testee.getAlignmentProperty(MegaFile.PROP_DESCRIPTION));
376 @Test(groups = { "Functional" })
377 public void testGetNonCommentContent() throws FileFormatException
379 assertEquals("abcde", MegaFile.getNonCommentContent("abcde", 0));
380 assertEquals("CGT ACG GAC ",
381 MegaFile.getNonCommentContent("CGT ACG GAC [9]", 0));
382 assertEquals("", MegaFile.getNonCommentContent("abcde", 1));
383 assertEquals(" abcde",
384 MegaFile.getNonCommentContent("and others ] abcde", 1));
385 assertEquals(" abcde", MegaFile.getNonCommentContent(
386 "and others [including refs] ] abcde", 1));
387 assertEquals(" x ] abcde",
388 MegaFile.getNonCommentContent("and others ] x ] abcde", 1));
391 @Test(groups = { "Functional" })
392 public void testCommentDepth() throws FileFormatException
394 assertEquals(0, MegaFile.commentDepth("abcde", 0));
395 assertEquals(1, MegaFile.commentDepth("abc[de", 0));
396 assertEquals(3, MegaFile.commentDepth("ab[c[de", 1));
397 assertEquals(1, MegaFile.commentDepth("ab]c[d]e[f", 1));
398 assertEquals(0, MegaFile.commentDepth("a]b[c]d]e", 1));
401 @Test(groups = { "Functional" })
402 public void testGetValue()
404 assertEquals("Mega", MegaFile.getValue("Name=Mega"));
405 assertEquals("Mega", MegaFile.getValue("Name =Mega"));
406 assertEquals("Mega", MegaFile.getValue(" Name = Mega "));
407 assertEquals("Mega", MegaFile.getValue("Name = Mega; "));
408 assertEquals("Mega", MegaFile.getValue(" Name = Mega ; "));
409 assertEquals("Mega", MegaFile.getValue("\t!Name \t= \tMega ; "));
410 assertEquals("Mega", MegaFile.getValue("!Name \t\t Mega; "));
411 assertEquals("", MegaFile.getValue("Name"));
415 * Test reading a MEGA file to an alignment then writing it out in MEGA
416 * format. Verify the output is (functionally) the same as the input.
418 * @throws IOException
420 @Test(groups = "Functional")
421 public void testRoundTrip_Interleaved() throws IOException
423 AppletFormatAdapter fa = new AppletFormatAdapter();
424 AlignmentI al = fa.readFile(INTERLEAVED_WITH_DESCRIPTION,
425 AppletFormatAdapter.PASTE, "MEGA");
426 MegaFile output = new MegaFile();
427 String formatted = output.print(al);
430 "#MEGA\n!Title Data with description;\n" +
432 " Line one of description\n" +
433 " Line two of description;\n" +
435 " DataType=DNA CodeTable=Standard\n" +
436 " NSeqs=2 NSites=12\n" + // NSites includes gaps
437 " Indel=- Identical=. Missing=?;\n\n" +
438 "#U455 C-- GTA [6]\n" +
439 "#CPZANT ATC -G- [6]\n\n" +
440 "#U455 CGA --T [12]\n" +
441 "#CPZANT CA- -GC [12]\n";
443 assertEquals("Roundtrip didn't match", expected,
448 * Test reading a MEGA file to an alignment then writing it out in MEGA
449 * format. Verify the output is (functionally) the same as the input.
451 * @throws IOException
453 @Test(groups = "Functional")
454 public void testRoundTrip_multilineFormatWithComments()
457 AppletFormatAdapter fa = new AppletFormatAdapter();
459 AlignmentI al = fa.readFile("#MEGA\n"
460 + "!Title Data with description;\n"
461 + "[ this comment should be ignored\n"
462 + "including [this nested comment]\n"
465 + "DataType=DNA CodeTable=Standard\n"
466 + "indel=- Missing=? MatchChar=.;\n\n"
468 + " Line one of description\n"
469 + " Line two of description;\n\n"
471 + "#CPZANT ATC GGG\n\n"
473 + "#CPZANT CAA TGC\n",
474 AppletFormatAdapter.PASTE, "MEGA");
476 MegaFile output = new MegaFile();
477 String formatted = output.print(al);
480 "#MEGA\n!Title Data with description;\n" +
482 " Line one of description\n" +
483 " Line two of description;\n" +
485 " DataType=DNA CodeTable=Standard\n" +
486 " NSeqs=2 NSites=12\n" +
487 " Indel=- Identical=. Missing=?;\n\n" +
488 "#U455 CGC GTA [6]\n" +
489 "#CPZANT ATC GGG [6]\n\n" +
490 "#U455 CGA TTT [12]\n" +
491 "#CPZANT CAA TGC [12]\n";
493 assertEquals("Roundtrip didn't match", expected,
500 * Test parse of interleaved mega format data where the identity character is
501 * used in sequences after the first
503 * @throws IOException
505 @Test(groups = { "Functional" })
506 public void testParse_interleavedWithIdentityAndTabs() throws IOException
509 // uses tab instead of space separators to check robustness
510 MegaFile testee = new MegaFile("#MEGA\n"+
511 "!TITLE\tInterleaved sequence data;\n" +
512 "!Format\tIdentical=.;\n\n" +
514 "#CPZANT\tM..P.R\n\n" +
515 "#U455\t\tKLMNOP\n" +
516 "#CPZANT\t..YZ..", AppletFormatAdapter.PASTE);
518 assertEquals("Title not as expected", "Interleaved sequence data",
519 testee.getAlignmentProperty(MegaFile.PROP_TITLE));
520 Vector<SequenceI> seqs = testee.getSeqs();
521 // should be 2 sequences
522 assertEquals("Expected two sequences", 2, seqs.size());
523 // check sequence names correct and order preserved
524 assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
525 assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
527 // check sequence data
528 assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0)
529 .getSequenceAsString());
530 assertEquals("Second sequence data wrong", "MBCPERKLYZOP", seqs.get(1)
531 .getSequenceAsString());
532 assertTrue("File format is not flagged as interleaved",
533 testee.isInterleaved());
537 * Test parse of noninterleaved format data including identity symbol
539 * @throws IOException
541 @Test(groups = { "Functional" })
542 public void testParse_nonInterleavedWithIdentity() throws IOException
545 MegaFile testee = new MegaFile("#MEGA\n"
546 + "!TITLE Noninterleaved sequence data;\n"
547 + "!Format MatchChar=.;\n"
554 AppletFormatAdapter.PASTE);
556 assertEquals("Title not as expected", "Noninterleaved sequence data",
557 testee.getAlignmentProperty(MegaFile.PROP_TITLE));
558 Vector<SequenceI> seqs = testee.getSeqs();
559 // should be 2 sequences
560 assertEquals("Expected two sequences", 2, seqs.size());
561 // check sequence names correct and order preserved
562 assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
563 assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
565 // check sequence data
566 assertEquals("First sequence data wrong", "ABCFEDHIJMNOPQR", seqs
567 .get(0).getSequenceAsString());
568 assertEquals("Second sequence data wrong", "KLCFODHXYZCGPQC",
569 seqs.get(1).getSequenceAsString());
570 assertFalse("File format is not flagged as noninterleaved",
571 testee.isInterleaved());
577 * Test parse of interleaved format data including position number comments.
579 * @throws IOException
581 @Test(groups = { "Functional" })
582 public void testParse_interleavedWithPositionNumber() throws IOException
585 MegaFile testee = new MegaFile("#MEGA\n"+
586 "TITLE: Interleaved sequence data\n\n" +
587 "#U455 ABCDEF [6]\n" +
588 "#CPZANT MNOPQR [6]\n\n" +
589 "#U455 KLMNOP [12]\n" +
590 "#CPZANT WXYZGC [12]\n", AppletFormatAdapter.PASTE);
592 assertEquals("Title not as expected", "Interleaved sequence data",
593 testee.getAlignmentProperty(MegaFile.PROP_TITLE));
594 Vector<SequenceI> seqs = testee.getSeqs();
595 // should be 2 sequences
596 assertEquals("Expected two sequences", 2, seqs.size());
597 // check sequence names correct and order preserved
598 assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
599 assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
601 // check sequence data
602 assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0)
603 .getSequenceAsString());
604 assertEquals("Second sequence data wrong", "MNOPQRWXYZGC", seqs.get(1)
605 .getSequenceAsString());
606 assertTrue("File format is not flagged as interleaved",
607 testee.isInterleaved());
613 * Test parse of data with !Gene and !Domain statements.
615 * @throws IOException
617 @Test(groups = { "Functional" })
618 public void testParse_geneDomains() throws IOException
621 String data = "#MEGA\n"+
622 "TITLE: Interleaved sequence data\n\n" +
624 "#CPZANT TTTTTT\n\n" +
625 "!Domain=Exon1 Gene=Adh Property=Coding CodonStart=1;\n" +
627 "#CPZANT AAAAAA\n\n" +
628 "!domain=Intron1 Property=Intron Gene=Adh;\n" +
630 "#CPZANT cccccc\n\n" +
631 "!Domain=Exon2 Gene=Adh Property=Exon CodonStart=1;\n" +
633 "#CPZANT gggggg\n\n" +
634 // explicit end of Exon2, implicit end of Adh:
635 "!Domain=Exon2 Property=domainend;\n" +
636 "!Domain=Intron1 Gene=Opsin Property=Noncoding;\n" +
638 "#CPZANT AAAAAA\n\n" +
639 // end Opsin, start MEF2A
640 "!Domain=Exon1 Gene=MEF2A Property=Coding CodonStart=1;\n" +
642 "#CPZANT cccccc\n\n" +
644 "!Domain=BindingSite;\n" +
646 "#CPZANT TTTTTT\n\n";
648 MegaFile testee = new MegaFile(data, AppletFormatAdapter.PASTE);
650 Vector<SequenceI> seqs = testee.getSeqs();
651 // should be 2 sequences
652 assertEquals("Expected two sequences", 2, seqs.size());
653 // check sequence data
654 assertEquals("First sequence data wrong",
655 "CCCCCCGGGGGGttttttaaaaaaGGGGGGttttttCCCCCC", seqs.get(0)
656 .getSequenceAsString());
657 assertEquals("Second sequence data wrong",
658 "TTTTTTAAAAAAccccccggggggAAAAAAccccccTTTTTT", seqs.get(1)
659 .getSequenceAsString());
662 * sequences should have features for Gene=Adh 7-24, Exon1 7-12, Intron1
663 * 13-18, Exon2 19-24, BindingSite 25-30
665 for (SequenceI seq : seqs) {
666 SequenceFeature[] sfs = seq.getSequenceFeatures();
667 // features are added in the order in which their end is found
668 // (Domain before Gene when they end together)
669 assertEquals(9, sfs.length);
670 // TODO settle which way round type/description go!
671 verifySequenceFeature(sfs[0], "Exon1 (Adh Coding)", "Domain", 7, 12);
672 verifySequenceFeature(sfs[1], "Intron1 (Adh Noncoding)", "Domain",
674 verifySequenceFeature(sfs[2], "Exon2 (Adh Coding)", "Domain", 19, 24);
675 verifySequenceFeature(sfs[3], "Adh", "Gene", 7, 24);
676 verifySequenceFeature(sfs[4], "Intron1 (Opsin Noncoding)", "Domain",
678 verifySequenceFeature(sfs[5], "Opsin", "Gene", 25, 30);
679 verifySequenceFeature(sfs[6], "Exon1 (MEF2A Coding)", "Domain", 31,
681 verifySequenceFeature(sfs[7], "MEF2A", "Gene", 31, 36);
682 verifySequenceFeature(sfs[8], "BindingSite", "Domain", 37, 42);
687 * Helper method to assert properties of a SequenceFeature
695 protected void verifySequenceFeature(SequenceFeature sf,
696 String description, String type, int begin, int end)
698 assertEquals(description, sf.type);
699 assertEquals(type, sf.description);
700 assertEquals(begin, sf.begin);
701 assertEquals(end, sf.end);
707 * Test parse of data including !Label statements. An underscore means no
708 * label, other characters are treated as alignment annotation.
710 * @throws IOException
712 @Test(groups = { "Functional" })
713 public void testParse_withLabels() throws IOException
716 MegaFile testee = new MegaFile("#MEGA\n"+
717 "TITLE: Interleaved sequence data\n\n" +
719 "#CPZANT MNO PQR\n" +
722 "#CPZANT WXY ZGC\n" +
723 "!label __3 +X_\n", AppletFormatAdapter.PASTE);
725 Vector<SequenceI> seqs = testee.getSeqs();
726 assertEquals("Expected two sequences", 2, seqs.size());
727 assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0)
728 .getSequenceAsString());
729 assertEquals("Second sequence data wrong", "MNOPQRWXYZGC", seqs.get(1)
730 .getSequenceAsString());
732 // check AlignmentAnnotation added with expected values
733 assertEquals(1, testee.annotations.size());
734 AlignmentAnnotation aa = testee.annotations.get(0);
735 assertNull(aa.sequenceRef);
736 assertEquals(12, aa.annotations.length);
737 assertEquals("+, -, , 2, 3, , , , 3, +, X, , ", aa.toString());
743 * Test case where a domain is implicitly terminated by starting a new gene
745 * @throws IOException
747 @Test(groups = { "Functional" })
748 public void testParse_changeOfGeneEndsDomain() throws IOException
751 // uses tab instead of space separators to check robustness
752 MegaFile testee = new MegaFile("#MEGA\n"+
753 "!TITLE Interleaved sequence data;\n" +
754 "!Format Identical=.;\n\n" +
755 "!Gene=gene1 Domain=Exon1 Property=Coding;\n" +
757 "#CPZANT M..P.R\n\n" +
760 "#CPZANT ..YZ..", AppletFormatAdapter.PASTE);
762 Vector<SequenceI> seqs = testee.getSeqs();
763 assertEquals("Expected two sequences", 2, seqs.size());
764 assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0)
765 .getSequenceAsString());
766 assertEquals("Second sequence data wrong", "MBCPERKLYZOP", seqs.get(1)
767 .getSequenceAsString());
768 assertTrue("File format is not flagged as interleaved",
769 testee.isInterleaved());
771 for (SequenceI seq : seqs)
773 SequenceFeature[] sfs = seq.getSequenceFeatures();
774 assertEquals(3, sfs.length);
775 verifySequenceFeature(sfs[0], "Exon1 (gene1 Coding)", "Domain", 1, 6);
776 verifySequenceFeature(sfs[1], "gene1", "Gene", 1, 6);
777 verifySequenceFeature(sfs[2], "gene2", "Gene", 7, 12);