import static org.testng.AssertJUnit.assertTrue;
import static org.testng.AssertJUnit.fail;
+import jalview.datamodel.AlignmentAnnotation;
import jalview.datamodel.AlignmentI;
+import jalview.datamodel.Annotation;
import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
import java.io.IOException;
+import java.util.List;
import java.util.Vector;
import org.testng.annotations.Test;
+ "!TITLE Interleaved sequence data\n\n"
+ "#U455 ABCDEF\n"
+ "#CPZANT MNOPQR\n\n"
- + "#U456 KLMNOP\n";
+ + "#U455 GHIJKL\n"
+ + "#U456 KLMNOP\n"; // wossis?
- // the 'fancy' format, different header format, bases in triplet groups
+ // interleaved with description, bases/gaps in triplet groups
private static final String INTERLEAVED_WITH_DESCRIPTION =
"#MEGA\n"
+ "!Title Data with description;\n"
- + "!Format DataType=DNA indel=- CodeTable=Standard Missing=? MatchChar=.;\n\n"
+ + "!Format DataType=DNA indel=-\tCodeTable=Standard Missing=? MatchChar=.;\n\n"
+ "!Description\n"
+ " Line one of description\n"
+ " Line two of description;\n\n"
- + "#U455 CGC GTA\n"
- + "#CPZANT ATC GGG\n\n"
- + "#U455 CGA TTT\n"
- + "#CPZANT CAA TGC\n";
+ + "#U455 C-- GTA\n"
+ + "#CPZANT ATC -G-\n\n"
+ + "#U455 CGA --T\n"
+ + "#CPZANT CA- -GC\n";
//@formatter:on
/**
- * Test paste of interleaved mega format data.
+ * Test parse of interleaved mega format data.
*
* @throws IOException
*/
}
/**
- * Test paste of noninterleaved mega format data.
+ * Test parse of noninterleaved mega format data.
*
* @throws IOException
*/
{
assertEquals(
"Unexpected exception message",
- "Parse error: mix of interleaved and noninterleaved detected, at line: ABCFEDHIJ",
+ "Parse error: interleaved was true but now seems to be false, at line: ABCFEDHIJ",
e.getMessage());
}
// normally output should match input
// we cheated here with a number of short input lines
// nb don't get Title in output if not calling print(AlignmentI)
- String expected = "#MEGA\n\n" + "#U455 ABCDEF [6]\n"
- + "#CPZANT MNOPQR [6]\n\n" + "#U455 KLMNOP [12]\n"
- + "#CPZANT WXYZGC [12]"
- + "\n";
+ //@formatter:off
+ String expected =
+ "#MEGA\n\n" +
+ "#U455 ABCDEF [6]\n" +
+ "#CPZANT MNOPQR [6]\n\n" +
+ "#U455 KLMNOP [12]\n" +
+ "#CPZANT WXYZGC [12]\n";
+ //@formatter:on
assertEquals("Print format wrong", expected, printed);
}
System.out.println(printed);
// normally output should match input
// we cheated here with a number of short input lines
- String expected = "#MEGA\n\n"
- + "#U455\n" + "ABCFEDHIJM\nNOPQR\n\n"
- + "#CPZANT\n" + "KLMNOPWXYZ\nCGATC\n";
+ String expected = "#MEGA\n\n" + "#U455\n"
+ + "ABCFEDHIJM [10]\nNOPQR [15]\n\n" + "#CPZANT\n"
+ + "KLMNOPWXYZ [10]\nCGATC [15]\n";
assertEquals("Print format wrong", expected, printed);
}
String printed = testee.print();
System.out.println(printed);
//@formatter:off
- //0123456789klmnopqrstABCDEFGHIJ9876543210abcdefghij
String expected =
"#MEGA\n\n" +
"#U455 0123456789 klmnopqrst [20]\n" + // first 20
assertEquals(30, testee.getPositionsPerLine());
testee.setPositionsPerLine(25);
String printed = testee.print();
- // 60 character sequence should be output as 50 on first line then 10 more
+
+ /*
+ * 25 positions per line is rounded down to 20 (two blocks of 10)
+ */
String expected = "#MEGA\n\n" + "#SIXTY\n"
- + "0123456789klmnopqrstABCDE\n" + "FGHIJ9876543210abcdefghij\n"
- + "9993332221\n";
+ + "0123456789 klmnopqrst [20]\n"
+ + "ABCDEFGHIJ 9876543210 [40]\n"
+ + "abcdefghij 9993332221 [60]\n";
assertEquals("Print format wrong", expected, printed);
}
assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
.getName());
// check sequence data
- assertEquals("First sequence data wrong", "CGCGTACGATTT", seqs.get(0)
+ assertEquals("First sequence data wrong", "C--GTACGA--T", seqs.get(0)
.getSequenceAsString());
- assertEquals("Second sequence data wrong", "ATCGGGCAATGC", seqs.get(1)
+ assertEquals("Second sequence data wrong", "ATC-G-CA--GC", seqs.get(1)
.getSequenceAsString());
assertTrue("File format is not flagged as interleaved",
testee.isInterleaved());
" Line two of description;\n" +
"!Format\n" +
" DataType=DNA CodeTable=Standard\n" +
- " NSeqs=2 NSites=12\n" +
+ " NSeqs=2 NSites=12\n" + // NSites includes gaps
" Indel=- Identical=. Missing=?;\n\n" +
- "#U455 CGC GTA [6]\n" +
- "#CPZANT ATC GGG [6]\n\n" +
- "#U455 CGA TTT [12]\n" +
- "#CPZANT CAA TGC [12]\n";
+ "#U455 C-- GTA [6]\n" +
+ "#CPZANT ATC -G- [6]\n\n" +
+ "#U455 CGA --T [12]\n" +
+ "#CPZANT CA- -GC [12]\n";
//@formatter:on
assertEquals("Roundtrip didn't match", expected,
formatted);
assertEquals("Roundtrip didn't match", expected,
formatted);
}
+
+ //@formatter:on
+
+ /**
+ * Test parse of interleaved mega format data where the identity character is
+ * used in sequences after the first
+ *
+ * @throws IOException
+ */
+ @Test(groups = { "Functional" })
+ public void testParse_interleavedWithIdentityAndTabs() throws IOException
+ {
+ //@formatter:off
+ // uses tab instead of space separators to check robustness
+ MegaFile testee = new MegaFile("#MEGA\n"+
+ "!TITLE\tInterleaved sequence data;\n" +
+ "!Format\tIdentical=.;\n\n" +
+ "#U455\tABCDEF\n" +
+ "#CPZANT\tM..P.R\n\n" +
+ "#U455\t\tKLMNOP\n" +
+ "#CPZANT\t..YZ..", AppletFormatAdapter.PASTE);
+ //@formatter:on
+ assertEquals("Title not as expected", "Interleaved sequence data",
+ testee.getAlignmentProperty(MegaFile.PROP_TITLE));
+ Vector<SequenceI> seqs = testee.getSeqs();
+ // should be 2 sequences
+ assertEquals("Expected two sequences", 2, seqs.size());
+ // check sequence names correct and order preserved
+ assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
+ assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
+ .getName());
+ // check sequence data
+ assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0)
+ .getSequenceAsString());
+ assertEquals("Second sequence data wrong", "MBCPERKLYZOP", seqs.get(1)
+ .getSequenceAsString());
+ assertTrue("File format is not flagged as interleaved",
+ testee.isInterleaved());
+ }
+
+ /**
+ * Test parse of noninterleaved format data including identity symbol
+ *
+ * @throws IOException
+ */
+ @Test(groups = { "Functional" })
+ public void testParse_nonInterleavedWithIdentity() throws IOException
+ {
+ //@formatter:off
+ MegaFile testee = new MegaFile("#MEGA\n"
+ + "!TITLE Noninterleaved sequence data;\n"
+ + "!Format MatchChar=.;\n"
+ + "#U455 \n"
+ + "ABCFEDHIJ\n"
+ + "MNOPQR\n\n"
+ + "#CPZANT \n"
+ + "KL..O..XYZ\n"
+ + "CG..C\n",
+ AppletFormatAdapter.PASTE);
+ //@formatter:on
+ assertEquals("Title not as expected", "Noninterleaved sequence data",
+ testee.getAlignmentProperty(MegaFile.PROP_TITLE));
+ Vector<SequenceI> seqs = testee.getSeqs();
+ // should be 2 sequences
+ assertEquals("Expected two sequences", 2, seqs.size());
+ // check sequence names correct and order preserved
+ assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
+ assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
+ .getName());
+ // check sequence data
+ assertEquals("First sequence data wrong", "ABCFEDHIJMNOPQR", seqs
+ .get(0).getSequenceAsString());
+ assertEquals("Second sequence data wrong", "KLCFODHXYZCGPQC",
+ seqs.get(1).getSequenceAsString());
+ assertFalse("File format is not flagged as noninterleaved",
+ testee.isInterleaved());
+ }
+
+ //@formatter:on
+
+ /**
+ * Test parse of interleaved format data including position number comments.
+ *
+ * @throws IOException
+ */
+ @Test(groups = { "Functional" })
+ public void testParse_interleavedWithPositionNumber() throws IOException
+ {
+ //@formatter:off
+ MegaFile testee = new MegaFile("#MEGA\n"+
+ "TITLE: Interleaved sequence data\n\n" +
+ "#U455 ABCDEF [6]\n" +
+ "#CPZANT MNOPQR [6]\n\n" +
+ "#U455 KLMNOP [12]\n" +
+ "#CPZANT WXYZGC [12]\n", AppletFormatAdapter.PASTE);
+ //@formatter:on
+ assertEquals("Title not as expected", "Interleaved sequence data",
+ testee.getAlignmentProperty(MegaFile.PROP_TITLE));
+ Vector<SequenceI> seqs = testee.getSeqs();
+ // should be 2 sequences
+ assertEquals("Expected two sequences", 2, seqs.size());
+ // check sequence names correct and order preserved
+ assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
+ assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
+ .getName());
+ // check sequence data
+ assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0)
+ .getSequenceAsString());
+ assertEquals("Second sequence data wrong", "MNOPQRWXYZGC", seqs.get(1)
+ .getSequenceAsString());
+ assertTrue("File format is not flagged as interleaved",
+ testee.isInterleaved());
+ }
+
+ //@formatter:on
+
+ /**
+ * Test parse of data with !Gene and !Domain statements.
+ *
+ * @throws IOException
+ */
+ @Test(groups = { "Functional" })
+ public void testParse_geneDomains() throws IOException
+ {
+ //@formatter:off
+ String data = "#MEGA\n"+
+ "TITLE: Interleaved sequence data\n\n" +
+ "#U455 CCCCCC\n" +
+ "#CPZANT TTTTTT\n\n" +
+ "!Domain=Exon1 Gene=Adh Property=Coding CodonStart=1;\n" +
+ "#U455 GGGGGG\n" +
+ "#CPZANT AAAAAA\n\n" +
+ "!domain=Intron1 Property=Intron Gene=Adh;\n" +
+ "#U455 tttttt\n" +
+ "#CPZANT cccccc\n\n" +
+ "!Domain=Exon2 Gene=Adh Property=Exon CodonStart=1;\n" +
+ "#U455 aaaaaa\n" +
+ "#CPZANT gggggg\n\n" +
+ // explicit end of Exon2, implicit end of Adh:
+ "!Domain=Exon2 Property=domainend;\n" +
+ "!Domain=Intron1 Gene=Opsin Property=Noncoding;\n" +
+ "#U455 GGGGGG\n" +
+ "#CPZANT AAAAAA\n\n" +
+ // end Opsin, start MEF2A
+ "!Domain=Exon1 Gene=MEF2A Property=Coding CodonStart=1;\n" +
+ "#U455 tttttt\n" +
+ "#CPZANT cccccc\n\n" +
+ // end MEF2A
+ "!Domain=BindingSite;\n" +
+ "#U455 CCCCCC\n" +
+ "#CPZANT TTTTTT\n\n";
+ //@formatter:on
+ MegaFile testee = new MegaFile(data, AppletFormatAdapter.PASTE);
+
+ Vector<SequenceI> seqs = testee.getSeqs();
+ // should be 2 sequences
+ assertEquals("Expected two sequences", 2, seqs.size());
+ // check sequence data
+ assertEquals("First sequence data wrong",
+ "CCCCCCGGGGGGttttttaaaaaaGGGGGGttttttCCCCCC", seqs.get(0)
+ .getSequenceAsString());
+ assertEquals("Second sequence data wrong",
+ "TTTTTTAAAAAAccccccggggggAAAAAAccccccTTTTTT", seqs.get(1)
+ .getSequenceAsString());
+
+ /*
+ * sequences should have features for Gene=Adh 7-24, Exon1 7-12, Intron1
+ * 13-18, Exon2 19-24, BindingSite 25-30
+ */
+ for (SequenceI seq : seqs) {
+ SequenceFeature[] sfs = seq.getSequenceFeatures();
+ // features are added in the order in which their end is found
+ // (Domain before Gene when they end together)
+ assertEquals(9, sfs.length);
+ // TODO settle which way round type/description go!
+ verifySequenceFeature(sfs[0], "Exon1 (Adh Coding)", "Domain", 7, 12);
+ verifySequenceFeature(sfs[1], "Intron1 (Adh Noncoding)", "Domain",
+ 13, 18);
+ verifySequenceFeature(sfs[2], "Exon2 (Adh Coding)", "Domain", 19, 24);
+ verifySequenceFeature(sfs[3], "Adh", "Gene", 7, 24);
+ verifySequenceFeature(sfs[4], "Intron1 (Opsin Noncoding)", "Domain",
+ 25, 30);
+ verifySequenceFeature(sfs[5], "Opsin", "Gene", 25, 30);
+ verifySequenceFeature(sfs[6], "Exon1 (MEF2A Coding)", "Domain", 31,
+ 36);
+ verifySequenceFeature(sfs[7], "MEF2A", "Gene", 31, 36);
+ verifySequenceFeature(sfs[8], "BindingSite", "Domain", 37, 42);
+ }
+
+ /*
+ * verify gene and domain alignment annotations
+ */
+ assertEquals(2, testee.annotations.size());
+ AlignmentAnnotation ann = testee.annotations.get(0);
+ assertEquals("MEGA Gene", ann.label);
+ assertEquals(42, ann.annotations.length);
+ verifyAnnotation(ann, 0, 6, null);
+ verifyAnnotation(ann, 6, 24, "Adh");
+ verifyAnnotation(ann, 24, 30, "Opsin");
+ verifyAnnotation(ann, 30, 36, "MEF2A");
+ verifyAnnotation(ann, 37, 42, null);
+
+ ann = testee.annotations.get(1);
+ assertEquals("MEGA Domain", ann.label);
+ assertEquals(42, ann.annotations.length);
+ verifyAnnotation(ann, 0, 6, null);
+ verifyAnnotation(ann, 6, 12, "Exon1 (Adh Coding)");
+ verifyAnnotation(ann, 12, 18, "Intron1 (Adh Noncoding)");
+ verifyAnnotation(ann, 19, 24, "Exon2 (Adh Coding)");
+ verifyAnnotation(ann, 25, 30, "Intron1 (Opsin Noncoding)");
+ verifyAnnotation(ann, 31, 36, "Exon1 (MEF2A Coding)");
+ verifyAnnotation(ann, 37, 42, "BindingSite");
+
+ }
+
+ /**
+ * Helper method to verify a range of annotation positions all have the given
+ * description
+ *
+ * @param ann
+ * array of annotations to check
+ * @param from
+ * start index to check
+ * @param to
+ * end index to check (exclusive)
+ * @param description
+ * value to assert
+ */
+ protected void verifyAnnotation(AlignmentAnnotation ann, int from,
+ int to, String description)
+ {
+ for (int pos = from; pos < to; pos++)
+ {
+ if (description == null)
+ {
+ assertNull(ann.annotations[pos]);
+ }
+ else
+ {
+ assertEquals(description, ann.annotations[pos].description);
+ }
+ }
+ }
+
+ /**
+ * Helper method to assert properties of a SequenceFeature
+ *
+ * @param sf
+ * @param type
+ * @param description
+ * @param begin
+ * @param end
+ */
+ protected void verifySequenceFeature(SequenceFeature sf, String type,
+ String description, int begin, int end)
+ {
+ assertEquals(description, sf.type);
+ assertEquals(type, sf.description);
+ assertEquals(begin, sf.begin);
+ assertEquals(end, sf.end);
+ }
+
+ /**
+ * Test parse of data including !Label statements. An underscore means no
+ * label, other characters are treated as alignment annotation.
+ *
+ * @throws IOException
+ */
+ @Test(groups = { "Functional" })
+ public void testParse_withLabels() throws IOException
+ {
+ //@formatter:off
+ MegaFile testee = new MegaFile("#MEGA\n"+
+ "TITLE: Interleaved sequence data\n\n" +
+ "#U455 ABC DEF\n" +
+ "#CPZANT MNO PQR\n" +
+ "!Label +-_ 23_\n\n" +
+ // a row with no labels = null annotation
+ "#U455 abc def\n" +
+ "#CPZANT mno pqr\n\n" +
+ "#U455 KLM NOP\n" +
+ "#CPZANT WXY ZGC\n" +
+ "!label __3 +X_\n", AppletFormatAdapter.PASTE);
+ //@formatter:on
+ Vector<SequenceI> seqs = testee.getSeqs();
+ assertEquals("Expected two sequences", 2, seqs.size());
+ assertEquals("First sequence data wrong", "ABCDEFabcdefKLMNOP", seqs
+ .get(0)
+ .getSequenceAsString());
+ assertEquals("Second sequence data wrong", "MNOPQRmnopqrWXYZGC", seqs
+ .get(1)
+ .getSequenceAsString());
+
+ // check AlignmentAnnotation added with expected values
+ assertEquals(1, testee.annotations.size());
+ AlignmentAnnotation aa = testee.annotations.get(0);
+ assertNull(aa.sequenceRef);
+ assertEquals("MEGA Label", aa.label);
+ assertEquals(18, aa.annotations.length);
+ assertEquals("+, -, , 2, 3, , , , , , , , , , 3, +, X, , ",
+ aa.toString());
+ }
+
+ //@formatter:on
+
+ /**
+ * Test case where a domain is implicitly terminated by starting a new gene
+ *
+ * @throws IOException
+ */
+ @Test(groups = { "Functional" })
+ public void testParse_changeOfGeneEndsDomain() throws IOException
+ {
+ //@formatter:off
+ // uses tab instead of space separators to check robustness
+ MegaFile testee = new MegaFile("#MEGA\n"+
+ "!TITLE Interleaved sequence data;\n" +
+ "!Format Identical=.;\n\n" +
+ "!Gene=gene1 Domain=Exon1 Property=Coding;\n" +
+ "#U455 ABCDEF\n" +
+ "#CPZANT M..P.R\n\n" +
+ "!Gene=gene2;\n" +
+ "#U455 KLMNOP\n" +
+ "#CPZANT ..YZ..", AppletFormatAdapter.PASTE);
+ //@formatter:on
+ Vector<SequenceI> seqs = testee.getSeqs();
+ assertEquals("Expected two sequences", 2, seqs.size());
+ assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0)
+ .getSequenceAsString());
+ assertEquals("Second sequence data wrong", "MBCPERKLYZOP", seqs.get(1)
+ .getSequenceAsString());
+ assertTrue("File format is not flagged as interleaved",
+ testee.isInterleaved());
+
+ for (SequenceI seq : seqs)
+ {
+ SequenceFeature[] sfs = seq.getSequenceFeatures();
+ assertEquals(3, sfs.length);
+ verifySequenceFeature(sfs[0], "Exon1 (gene1 Coding)", "Domain", 1, 6);
+ verifySequenceFeature(sfs[1], "gene1", "Gene", 1, 6);
+ verifySequenceFeature(sfs[2], "gene2", "Gene", 7, 12);
+ }
+ }
+
+ //@formatter:on
+
+ /**
+ * Test case where the declared gap character is one Jalview does not support;
+ * it should be converted to a '-'
+ *
+ * @throws IOException
+ */
+ @Test(groups = { "Functional" })
+ public void testParse_weirdGapCharacter() throws IOException
+ {
+ //@formatter:off
+ String data = "#MEGA\n"+
+ "!TITLE Interleaved sequence data;\n" +
+ "!Format Identical=. Indel=%;\n\n" +
+ "#U455 %BC%EF\n" +
+ "#CPZANT M..P.R\n\n" +
+ "#U455 KLMNOP\n" +
+ "#CPZANT .%%Z..";
+ AppletFormatAdapter fa = new AppletFormatAdapter();
+ AlignmentI al = fa.readFile(data,
+ AppletFormatAdapter.PASTE, "MEGA");
+ //@formatter:on
+ List<SequenceI> seqs = al.getSequences();
+ assertEquals("First sequence data wrong", "-BC-EFKLMNOP", seqs.get(0)
+ .getSequenceAsString());
+ assertEquals("Second sequence data wrong", "MBCPERK--ZOP", seqs.get(1)
+ .getSequenceAsString());
+ assertEquals('-', al.getGapCharacter());
+ }
+
+ /**
+ * Test reading a MEGA file to an alignment then writing it out in MEGA
+ * format. Includes !Label statements which should be converted to
+ * AlignmentAnnotation and back again.
+ *
+ * @throws IOException
+ */
+ @Test(groups = "Functional")
+ public void testRoundTrip_withLabels() throws IOException
+ {
+ AppletFormatAdapter fa = new AppletFormatAdapter();
+
+ //@formatter:off
+ String data = "#MEGA\n"
+ + "#U455 C-- GTA\n"
+ + "#CPZANT ATC -G-\n"
+ + "!Label F__E_H\n\n"
+ + "#U455 CGA --T\n"
+ + "#CPZANT CA- -GC\n"
+ + "!Label FFH__E\n";
+ AlignmentI al = fa.readFile(data,
+ AppletFormatAdapter.PASTE, "MEGA");
+ AlignmentAnnotation aa = al.getAlignmentAnnotation()[0];
+ assertEquals("MEGA Label", aa.label);
+ assertEquals("F, , , E, , H, F, F, H, , , E, ",
+ aa.toString());
+
+ MegaFile output = new MegaFile();
+ String formatted = output.print(al);
+ String expected =
+ "#MEGA\n" +
+ "!Format\n" +
+ " DataType=Nucleotide CodeTable=Standard\n" +
+ " NSeqs=2 NSites=12\n" +
+ " Indel=-;\n\n" +
+ "#U455 C-- GTA [6]\n" +
+ "#CPZANT ATC -G- [6]\n" +
+ "!Label F__ E_H;\n\n" +
+ "#U455 CGA --T [12]\n" +
+ "#CPZANT CA- -GC [12]\n" +
+ "!Label FFH __E;\n";
+ //@formatter:on
+ assertEquals("Roundtrip didn't match", expected,
+ formatted);
+ }
+
+ /**
+ * Test (parse and) print of MEGA data with !Gene statements.
+ *
+ * @throws IOException
+ */
+ @Test(groups = { "Functional" })
+ public void testPrint_genes() throws IOException
+ {
+ /*
+ * to keep the test concise, input data is in the exact format that Jalview
+ * would output it; the important thing is functional equivalence of input
+ * and output
+ */
+ //@formatter:off
+ String data = "#MEGA\n\n"+
+ "#Seq1 ABCD [4]\n" +
+ "#Seq2 MNOP [4]\n\n" +
+ "!Domain=Exon1 Gene=Adh Property=Coding;\n" +
+ "#Seq1 EFGHI [9]\n" +
+ "#Seq2 QRSTU [9]\n\n" +
+ "!Domain=Intron1 Gene=Adh Property=Noncoding;\n" +
+ "#Seq1 JK [11]\n" +
+ "#Seq2 VW [11]\n\n" +
+ "!Domain=Intron1 Property=domainend;\n" +
+ "#Seq1 LMN [14]\n" +
+ "#Seq2 XYZ [14]\n";
+ //@formatter:on
+ MegaFile testee = new MegaFile(data, AppletFormatAdapter.PASTE);
+ String printed = testee.print();
+ assertEquals("Print format wrong", data, printed);
+ }
+
+ @Test(groups = { "Functional" })
+ public void testGetDomainFromAnnotation()
+ {
+ Annotation[] anns = new Annotation[5];
+ anns[1] = new Annotation("", "Intron1", '0', 0f);
+ anns[2] = new Annotation("", "Intron2 (Aspx)", '0', 0f);
+ anns[3] = new Annotation("", "Intron3 (Aspy Coding)", '0', 0f);
+ anns[4] = new Annotation("", "Intron4 (Coding)", '0', 0f);
+ AlignmentAnnotation aa = new AlignmentAnnotation("", "", anns);
+ // no annotations:
+ assertNull(MegaFile.getDomainFromAnnotation(0, null));
+ // null annotation:
+ assertNull(MegaFile.getDomainFromAnnotation(0, aa));
+ // column out of range:
+ assertNull(MegaFile.getDomainFromAnnotation(5, aa));
+ // domain with no Gene or Property:
+ assertEquals("Intron1", MegaFile.getDomainFromAnnotation(1, aa));
+ // domain with Gene but no Property:
+ assertEquals("Intron2", MegaFile.getDomainFromAnnotation(2, aa));
+ // domain with Gene and Property:
+ assertEquals("Intron3", MegaFile.getDomainFromAnnotation(3, aa));
+ // domain with Property and no Gene:
+ assertEquals("Intron4", MegaFile.getDomainFromAnnotation(4, aa));
+ }
+
+ @Test(groups = { "Functional" })
+ public void testGetGeneFromAnnotation()
+ {
+ Annotation[] anns = new Annotation[3];
+ anns[1] = new Annotation("", "Aspx", '0', 0f);
+ AlignmentAnnotation aa = new AlignmentAnnotation("", "", anns);
+ // no annotations:
+ assertNull(MegaFile.getGeneFromAnnotation(0, null));
+ // null annotation:
+ assertNull(MegaFile.getGeneFromAnnotation(0, aa));
+ // column out of range:
+ assertNull(MegaFile.getGeneFromAnnotation(3, aa));
+ // gene annotation:
+ assertEquals("Aspx", MegaFile.getGeneFromAnnotation(1, aa));
+ }
+
+ @Test(groups = { "Functional" })
+ public void testGetPropertyFromAnnotation()
+ {
+ Annotation[] anns = new Annotation[5];
+ anns[1] = new Annotation("", "Intron1", '0', 0f);
+ anns[2] = new Annotation("", "Intron2 (Aspx)", '0', 0f);
+ anns[3] = new Annotation("", "Intron3 (Aspy Noncoding)", '0', 0f);
+ anns[4] = new Annotation("", "Exon1 (Aspx Coding)", '0', 0f);
+ AlignmentAnnotation aa = new AlignmentAnnotation("", "", anns);
+ // no annotations:
+ assertNull(MegaFile.getPropertyFromAnnotation(0, null));
+ // null annotation:
+ assertNull(MegaFile.getPropertyFromAnnotation(0, aa));
+ // column out of range:
+ assertNull(MegaFile.getPropertyFromAnnotation(5, aa));
+ // domain with no Gene or Property:
+ assertNull(MegaFile.getPropertyFromAnnotation(1, aa));
+ // domain with Gene but no Property:
+ assertNull(MegaFile.getPropertyFromAnnotation(2, aa));
+ // domain with Gene and Property:
+ assertEquals("Noncoding", MegaFile.getPropertyFromAnnotation(3, aa));
+ assertEquals("Coding", MegaFile.getPropertyFromAnnotation(4, aa));
+ }
+
+ //@formatter:on
+
+ /**
+ * Test parse of interleaved data with no blank lines to separate blocks of
+ * sequence data; to confirm we can handle this correctly
+ *
+ * @throws IOException
+ */
+ @Test(groups = { "Functional" })
+ public void testParse_interleaved_noBlankLines() throws IOException
+ {
+ String data = INTERLEAVED.replace("\n\n", "\n");
+ MegaFile testee = new MegaFile(data, AppletFormatAdapter.PASTE);
+ assertEquals("Title not as expected", "Interleaved sequence data",
+ testee.getAlignmentProperty(MegaFile.PROP_TITLE));
+ Vector<SequenceI> seqs = testee.getSeqs();
+ // should be 2 sequences
+ assertEquals("Expected two sequences", 2, seqs.size());
+ // check sequence names correct and order preserved
+ assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
+ assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
+ .getName());
+ // check sequence data
+ assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0)
+ .getSequenceAsString());
+ assertEquals("Second sequence data wrong", "MNOPQRWXYZGC", seqs.get(1)
+ .getSequenceAsString());
+ assertTrue("File format is not flagged as interleaved",
+ testee.isInterleaved());
+ }
}