*/
public class MegaFile extends AlignFile
{
+ private static final int DEFAULT_LINE_LENGTH = 60;
+
+ private static final String INDENT = " ";
+
+ private static final String N_SITES = "NSites";
+
+ private static final String N_SEQS = "NSeqs";
+
+ private static final String MISSING = "Missing";
+
+ private static final String IDENTICAL = "Identical";
+
+ private static final String INDEL = "Indel";
+
+ private static final String CODETABLE = "CodeTable";
+
+ private static final String PROTEIN = "Protein";
+
+ private static final String NUCLEOTIDE = "Nucleotide";
+
+ private static final String DATATYPE = "DataType";
+
private static final char COMMENT_START = '[';
private static final char COMMENT_END = ']';
private static final String MEGA_ID = HASHSIGN + "MEGA";
- private static final String TITLE = "TITLE";
+ private static final String TITLE = "Title";
private static final String FORMAT = "Format";
private static final String DOMAIN = "Domain";
- private static final String INTERLEAVED = "Interleaved";
-
/*
* names of properties to save to the alignment (may affect eventual output
* format)
static final String PROP_MISSING = "MEGA_MISSING";
+ static final String PROP_DATATYPE = "MEGA_DATATYPE";
+
+ // number of bases per line of file (value is inferred)
+ static final String PROP_LINELENGTH = "MEGA_LINELENGTH";
+
// TODO: need a controlled name for Gene as a feature if we want to be able to
// output the MEGA file with !Gene headers
// WTF do we do if the sequences get realigned?
private static final String SPACE = " ";
- private static final int POSITIONS_PER_LINE = 50;
+ /*
+ * number of sequence positions output per line
+ */
+ private int positionsPerLine;
private String title;
dataLine = nextNonCommentLine();
}
+ // remember the (longest) line length read in, so we can output the same
+ setAlignmentProperty(PROP_LINELENGTH, String.valueOf(positionsPerLine));
+
setSequences(seqData);
}
* Add the current line of data to the sequence.
*/
sb.append(dataLine);
+
+ setPositionsPerLine(Math.max(positionsPerLine, dataLine.length()));
}
/**
/*
* Do nothing if this line is _only_ a sequence id with no data following.
*
- * Remove any internal spaces (present in the 'fancy' file format)
+ * Remove any internal spaces
*/
if (data != null && data.length() > 0)
{
data = data.replace(SPACE, "");
}
sb.append(data);
+ setPositionsPerLine(Math.max(positionsPerLine, data.length()));
assertInterleaved(true, dataLine);
}
}
if (isTitle(inputLine))
{
- setAlignmentProperty(PROP_TITLE, getValue(inputLine));
+ this.title = getValue(inputLine);
+ setAlignmentProperty(PROP_TITLE, title);
}
else if (inputLine.startsWith(BANG + DESCRIPTION))
{
{
inputLine = inputLine.substring(0, inputLine.length() - 1);
}
+ if (inputLine.length() == 0)
+ {
+ return;
+ }
String[] tokens = inputLine.trim().split("\\s"); // any whitespace
for (String token : tokens)
{
/*
* Jalview will work out whether nucleotide or not anyway
*/
- if (keyword.equalsIgnoreCase("DataType"))
+ if (keyword.equalsIgnoreCase(DATATYPE))
{
if (value.equalsIgnoreCase("DNA") || value.equalsIgnoreCase("RNA")
|| value.equalsIgnoreCase("Nucleotide"))
this.nucleotide = true;
// alignment computes whether or not it is nucleotide when created
}
- else if (value.equalsIgnoreCase("Protein"))
+ else if (value.equalsIgnoreCase(PROTEIN))
{
this.nucleotide = false;
}
{
throw new FileFormatException(msg);
}
+ setAlignmentProperty(PROP_DATATYPE, value);
}
/*
* accept non-Standard code table but save in case we want to disable
* 'translate as cDNA'
*/
- else if (keyword.equalsIgnoreCase("CodeTable"))
+ else if (keyword.equalsIgnoreCase(CODETABLE))
{
setAlignmentProperty(PROP_CODETABLE, value);
}
/*
* save gap char to set later on alignment once created
*/
- else if (keyword.equalsIgnoreCase("Indel"))
+ else if (keyword.equalsIgnoreCase(INDEL))
{
this.gapCharacter = value.charAt(0);
}
- else if (keyword.equalsIgnoreCase("Identical")
+ else if (keyword.equalsIgnoreCase(IDENTICAL)
|| keyword.equalsIgnoreCase("MatchChar"))
{
+ setAlignmentProperty(PROP_IDENTITY, value);
if (!".".equals(value))
{
- setAlignmentProperty(PROP_IDENTITY, value);
System.err.println("Warning: " + token
+ " not supported, Jalview uses '.' for identity");
}
}
- else if (keyword.equalsIgnoreCase("Missing"))
+ else if (keyword.equalsIgnoreCase(MISSING))
{
setAlignmentProperty(PROP_MISSING, value);
System.err.println("Warning: " + token + " not supported");
setAlignmentProperty(PROP_MISSING, value);
}
- else if (!keyword.equalsIgnoreCase("NSeqs")
- && !keyword.equalsIgnoreCase("NSites"))
+ else if (!keyword.equalsIgnoreCase(N_SEQS)
+ && !keyword.equalsIgnoreCase(N_SITES))
{
System.err.println("Warning: " + msg);
}
return false;
}
String upper = inputLine.toUpperCase();
- return (upper.startsWith(TITLE) || upper.startsWith(BANG + TITLE));
+ return (upper.startsWith(TITLE.toUpperCase()) || upper.startsWith(BANG
+ + TITLE.toUpperCase()));
}
/**
{
if (line.endsWith(SEMICOLON))
{
- desc.append(line.substring(0, line.length() - 1)).append(newline);
+ desc.append(line.substring(0, line.length() - 1));
break;
}
else if (line.length() > 0)
}
/**
- * Write out the alignment sequences in Mega format.
+ * Returns the alignment sequences in Mega format.
*/
@Override
public String print()
{
- return print(getSeqsAsArray());
+ return MEGA_ID + newline + print(getSeqsAsArray());
}
/**
* Write out the alignment sequences in Mega format - interleaved unless
* explicitly noninterleaved.
*/
- public String print(SequenceI[] s)
+ protected String print(SequenceI[] s)
{
- // TODO: is there a way to preserve the 'interleaved' property so it can
- // affect output?
-
- String result = null;
+ String result;
if (this.interleaved != null && !this.interleaved)
{
result = printNonInterleaved(s);
}
/**
- * Print the sequences in interleaved format, each row 15 space-separated
- * triplets.
- *
- * @param s
- * @return
- */
- protected String printInterleavedCodons(SequenceI[] s)
- {
- // TODO not coded yet - defaulting to the 'simple' format output
- return printInterleaved(s);
- }
-
- /**
- * Print to string in Interleaved format - blocks of next 50 characters of
- * each sequence in turn.
+ * Print to string in Interleaved format - blocks of next N characters of each
+ * sequence in turn.
*
* @param s
*/
{
int maxIdLength = getMaxIdLength(s);
int maxSequenceLength = getMaxSequenceLength(s);
- int numLines = maxSequenceLength / POSITIONS_PER_LINE + 3; // approx
+ int numLines = maxSequenceLength / positionsPerLine + 3; // approx
/*
* Size a buffer to hold the whole output
*/
StringBuilder sb = new StringBuilder(numLines
- * (maxIdLength + 2 + POSITIONS_PER_LINE));
- printHeaders(sb);
+ * (maxIdLength + 2 + positionsPerLine));
+
+ int numDataBlocks = (maxSequenceLength - 1) / positionsPerLine + 1;
+ int spaceEvery = this.nucleotide != null && this.nucleotide ? 3 : 10;
+ int chunksPerLine = (positionsPerLine + spaceEvery - 1) / spaceEvery;
- int numDataBlocks = (maxSequenceLength - 1) / POSITIONS_PER_LINE + 1;
+ /*
+ * Output as: #Seqid CGT AGC ACT ... or blocks of 10 for peptide
+ */
+ int from = 0;
for (int i = 0; i < numDataBlocks; i++)
{
sb.append(newline);
+ boolean first = true;
+ int advancedBy = 0;
for (SequenceI seq : s)
{
-
- String seqId = String.format("#%-" + maxIdLength + "s ",
+ int seqFrom = from;
+ String seqId = String.format("#%-" + maxIdLength + "s",
seq.getName());
- char[] subSequence = seq.getSequence(i * POSITIONS_PER_LINE,
- (i + 1) * POSITIONS_PER_LINE);
+
+ /*
+ * output next line for this sequence
+ */
sb.append(seqId);
- sb.append(subSequence);
+ int lastPos = seqFrom + positionsPerLine; // exclusive
+ for (int j = 0; j < chunksPerLine; j++)
+ {
+ char[] subSequence = seq.getSequence(seqFrom,
+ Math.min(lastPos, seqFrom + spaceEvery));
+ if (subSequence.length > 0)
+ {
+ sb.append(SPACE).append(subSequence);
+ }
+ seqFrom += subSequence.length;
+ if (first)
+ {
+ // all sequences should be the same length in MEGA
+ advancedBy += subSequence.length;
+ }
+ }
sb.append(newline);
+ first = false;
}
+ from += advancedBy;
}
return new String(sb);
}
/**
- * Append the MEGA header and any other known properties
+ * Outputs to string the MEGA header and any other known and relevant
+ * alignment properties
*
- * @param sb
+ * @param al
*/
- private void printHeaders(StringBuilder sb)
+ protected String printHeaders(AlignmentI al)
{
- sb.append(MEGA_ID);
- sb.append(newline);
+ StringBuilder sb = new StringBuilder(128);
+ sb.append(MEGA_ID).append(newline);
+ printProperty(al, sb, PROP_TITLE, TITLE);
+ printProperty(al, sb, PROP_DESCRIPTION, DESCRIPTION);
- String ttle = getAlignmentProperty(PROP_TITLE);
- if (ttle != null)
+ /*
+ * !Format DataType CodeTable
+ */
+ sb.append(BANG).append(FORMAT).append(newline);
+ String dataType = (String) al.getProperty(PROP_DATATYPE);
+ if (dataType == null)
{
- sb.append(BANG).append(TITLE).append(SPACE).append(ttle)
- .append(SEMICOLON).append(newline);
+ dataType = al.isNucleotide() ? NUCLEOTIDE : PROTEIN;
}
+ sb.append(INDENT).append(DATATYPE).append(EQUALS).append(dataType);
+ String codeTable = (String) al.getProperty(PROP_CODETABLE);
+ sb.append(SPACE).append(CODETABLE).append(EQUALS)
+ .append(codeTable == null ? "Standard" : codeTable)
+ .append(newline);
+
+ /*
+ * !Format NSeqs NSites
+ * NSites the length of any sequence (they should all be the same), excluding
+ * gaps?!?
+ */
+ sb.append(INDENT).append(N_SEQS).append(EQUALS).append(al.getHeight());
+ SequenceI seq = al.getSequenceAt(0);
+ sb.append(SPACE).append(N_SITES).append(EQUALS)
+ .append(seq.getEnd() - seq.getStart() + 1);
+ sb.append(newline);
- String desc = getAlignmentProperty(PROP_DESCRIPTION);
- if (desc != null)
+ /*
+ * !Format Indel Identical Missing
+ */
+ sb.append(INDENT);
+ sb.append(INDEL).append(EQUALS).append(al.getGapCharacter());
+ String identity = (String) al.getProperty(PROP_IDENTITY);
+ if (identity != null)
+ {
+ sb.append(SPACE).append(IDENTICAL).append(EQUALS).append(identity);
+ }
+ String missing = (String) al.getProperty(PROP_MISSING);
+ if (missing != null)
{
- sb.append(BANG).append(DESCRIPTION).append(SPACE).append(desc)
- .append(SEMICOLON).append(newline);
+ sb.append(SPACE).append(MISSING).append(EQUALS).append(missing);
}
+ sb.append(SEMICOLON).append(newline);
+
+ return sb.toString();
}
/**
{
int maxSequenceLength = getMaxSequenceLength(s);
// approx
- int numLines = maxSequenceLength / POSITIONS_PER_LINE + 2 + s.length;
+ int numLines = maxSequenceLength / positionsPerLine + 2 + s.length;
/*
* Roughly size a buffer to hold the whole output
*/
- StringBuilder sb = new StringBuilder(numLines * POSITIONS_PER_LINE);
- printHeaders(sb);
+ StringBuilder sb = new StringBuilder(numLines * positionsPerLine);
+ int spaceEvery = this.nucleotide != null && this.nucleotide ? 3 : 10;
+ int chunksPerLine = positionsPerLine / spaceEvery;
for (SequenceI seq : s)
{
sb.append(newline);
sb.append(HASHSIGN + seq.getName()).append(newline);
int startPos = 0;
- while (startPos <= seq.getLength())
+ while (startPos < seq.getLength())
{
- char[] subSequence = seq.getSequence(startPos, startPos
- + POSITIONS_PER_LINE);
- sb.append(subSequence);
+ boolean firstChunk = true;
+ /*
+ * print next line for this sequence
+ */
+ int lastPos = startPos + positionsPerLine; // exclusive
+ for (int j = 0; j < chunksPerLine; j++)
+ {
+ char[] subSequence = seq.getSequence(startPos,
+ Math.min(lastPos, startPos + positionsPerLine));
+ if (subSequence.length > 0)
+ {
+ if (!firstChunk)
+ {
+ sb.append(SPACE);
+ }
+ sb.append(subSequence);
+ firstChunk = false;
+ }
+ startPos += subSequence.length;
+ }
sb.append(newline);
- startPos += POSITIONS_PER_LINE;
}
}
* @throws IOException
*/
protected void assertInterleaved(boolean isIt, String dataLine)
- throws IOException
+ throws FileFormatException
{
if (this.interleaved != null && isIt != this.interleaved.booleanValue())
{
- throw new IOException(
+ throw new FileFormatException(
"Parse error: mix of interleaved and noninterleaved detected, at line: "
+ dataLine);
}
this.interleaved = new Boolean(isIt);
+ setAlignmentProperty(PROP_INTERLEAVED, interleaved.toString());
}
public boolean isInterleaved()
+ (nucleotide ? " not" : ""));
}
}
+
+ /**
+ * Print the given alignment in MEGA format. If the alignment was created by
+ * parsing a MEGA file, it should have properties set (e.g. Title) which can
+ * influence the output.
+ */
+ @Override
+ public String print(AlignmentI al)
+ {
+ this.nucleotide = al.isNucleotide();
+ String lineLength = (String) al.getProperty(PROP_LINELENGTH);
+ this.positionsPerLine = lineLength == null ? DEFAULT_LINE_LENGTH : Integer
+ .parseInt(lineLength);
+ return printHeaders(al) + print(al.getSequencesArray());
+ }
+
+ /**
+ * Helper method to append a property e.g. !Title to the output buffer, if the
+ * property is set on the alignment.
+ *
+ * @param al
+ * @param headers
+ * @param propertyName
+ * @param propertyKeyword
+ */
+ protected void printProperty(AlignmentI al, StringBuilder headers,
+ String propertyName, String propertyKeyword)
+ {
+ String propertyValue = (String) al.getProperty(propertyName);
+ if (propertyValue != null)
+ {
+ headers.append(BANG).append(propertyKeyword).append(SPACE)
+ .append(propertyValue).append(SEMICOLON)
+ .append(newline);
+ }
+ }
+
+ /**
+ * Returns the number of sequence positions output per line
+ *
+ * @return
+ */
+ public int getPositionsPerLine()
+ {
+ return positionsPerLine;
+ }
+
+ /**
+ * Sets the number of sequence positions output per line. Note these will be
+ * formatted in blocks of 3 (nucleotide) or 10 (peptide).
+ *
+ * @param p
+ */
+ public void setPositionsPerLine(int p)
+ {
+ this.positionsPerLine = p;
+ }
}
import static org.testng.AssertJUnit.assertTrue;
import static org.testng.AssertJUnit.fail;
+import jalview.datamodel.AlignmentI;
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceI;
*/
public class MegaFileTest
{
- private static final String THIRTY_CHARS = "012345678901234567890123456789";
+ private static final String TWENTY_CHARS = "9876543210abcdefghij";
+
+ private static final String THIRTY_CHARS = "0123456789klmnopqrstABCDEFGHIJ";
//@formatter:off
private static final String INTERLEAVED =
"#MEGA\n"+
"TITLE: Interleaved sequence data\n\n" +
"#U455 ABCDEF\n" +
- "#CPZANT MNOPQR\n\n" + "#U455 KLMNOP\n" +
- "#CPZANT WXYZ";
+ "#CPZANT MNOPQR\n\n" +
+ "#U455 KLMNOP\n" +
+ "#CPZANT WXYZGC";
private static final String INTERLEAVED_NOHEADERS =
"#U455 ABCDEF\n"
- + "#CPZANT MNOPQR\n\n"
+ + "#CPZANT MNOPQR\n\n"
+ "#U455 KLMNOP\n"
- + "#CPZANT WXYZ\n";
+ + "#CPZANT WXYZGC\n";
- // interleaved sequences, one with 60 one with 120 characters (on overlong
- // input lines)
- private static final String INTERLEAVED_LONGERTHAN50 =
+ // interleaved sequences, with 50 residues
+ private static final String INTERLEAVED_50RESIDUES =
"#MEGA\n"
- + "TITLE: Interleaved sequence data\n\n"
- + "#U455 " + THIRTY_CHARS + THIRTY_CHARS + "\n"
- + "#CPZANT "
- + THIRTY_CHARS + THIRTY_CHARS + THIRTY_CHARS + THIRTY_CHARS;
+ + "!TITLE Interleaved sequence data\n\n"
+ + "#U455 " + THIRTY_CHARS + TWENTY_CHARS + "\n"
+ + "#CPZANT " + TWENTY_CHARS + THIRTY_CHARS + "\n";
private static final String NONINTERLEAVED =
"#MEGA\n"
- + "TITLE: Noninterleaved sequence data\n\n"
+ + "!TITLE Noninterleaved sequence data\n\n"
+ "#U455 \n"
+ "ABCFEDHIJ\n"
+ "MNOPQR\n\n"
+ "#CPZANT \n"
+ "KLMNOPWXYZ\n"
+ "CGATC\n";
-
- // Sequence length 60 (split over two lines)
- private static final String NONINTERLEAVED_LONGERTHAN50 =
- "#SIXTY\n" + THIRTY_CHARS + "\n" + THIRTY_CHARS;
-
- // this one starts noninterleaved then switches to interleaved
+
+ // this one starts interleaved then switches to non-interleaved
private static final String MIXED =
"#MEGA\n"
- + "TITLE: This is a mess\n\n" + "#CPZANT KLMNOPWXYZCGATC\n\n"
+ + "!TITLE This is a mess\n\n"
+ + "#CPZANT KLMNOPWXYZCGATC\n\n"
+ "#U455\n "
+ "ABCFEDHIJ\n";
// interleaved with a new sequence appearing in the second block :-O
private static final String INTERLEAVED_SEQUENCE_ERROR =
"#MEGA" + "\n"
- + "TITLE: Interleaved sequence data\n\n"
+ + "!TITLE Interleaved sequence data\n\n"
+ "#U455 ABCDEF\n"
+ "#CPZANT MNOPQR\n\n"
+ "#U456 KLMNOP\n";
// the 'fancy' format, different header format, bases in triplet groups
- private static final String FANCY_FORMAT =
+ private static final String INTERLEAVED_WITH_DESCRIPTION =
"#MEGA\n"
- + "!Title Fancy format data;\n"
- + "!Format DataType=DNA indel=- CodeTable=Standard;\n\n"
+ + "!Title Data with description;\n"
+ + "!Format DataType=DNA indel=- CodeTable=Standard Missing=? MatchChar=.;\n\n"
+ "!Description\n"
+ " Line one of description\n"
+ " Line two of description;\n\n"
- + "!Gene=Adh Property=Coding CodonStart=1;\n"
- + "#U455 ABC DEF\n"
- + "#CPZANT MNO PQR\n\n"
- + "#U455 KLM NOP\n"
- + "#CPZANT WXY Z\n";
-
- // interleaved sequence data for two genes
- private static final String TWO_GENES =
- "#MEGA\n"
- + "!Title Fancy format data;\n"
- + "!Format DataType=DNA indel=- CodeTable=Standard;\n\n"
- + "!Description\n"
- + " Line one of description\n"
- + " Line two of description;\n\n"
- + "!Gene=Adh Property=Coding CodonStart=1;\n"
- + "#U455 ABC DEF\n"
- + "#CPZANT MNO PQR\n\n"
- + "#U455 KLM NOP\n"
- + "#CPZANT WXY Z\n"; //TODO complete
+ + "#U455 CGC GTA\n"
+ + "#CPZANT ATC GGG\n\n"
+ + "#U455 CGA TTT\n"
+ + "#CPZANT CAA TGC\n";
//@formatter:on
// check sequence data
assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0)
.getSequenceAsString());
- assertEquals("Second sequence data wrong", "MNOPQRWXYZ", seqs.get(1)
+ assertEquals("Second sequence data wrong", "MNOPQRWXYZGC", seqs.get(1)
.getSequenceAsString());
assertTrue("File format is not flagged as interleaved",
testee.isInterleaved());
System.out.println(printed);
// normally output should match input
// we cheated here with a number of short input lines
- String expected = "#MEGA\n" + "!TITLE Interleaved sequence data;\n\n"
- + "#U455 ABCDEFKLMNOP\n" + "#CPZANT MNOPQRWXYZ"
+ // nb don't get Title in output if not calling print(AlignmentI)
+ String expected = "#MEGA\n\n" + "#U455 ABCDEF\n"
+ + "#CPZANT MNOPQR\n\n" + "#U455 KLMNOP\n" + "#CPZANT WXYZGC"
+ "\n";
assertEquals("Print format wrong", expected, printed);
}
AppletFormatAdapter.PASTE);
String printed = testee.print();
System.out.println(printed);
- // normally output should match input
- // we cheated here with a number of short input lines
- String expected = "#MEGA\n\n" + "#U455 ABCDEFKLMNOP" + "\n"
- + "#CPZANT MNOPQRWXYZ\n";
- assertEquals("Print format wrong", expected, printed);
+
+ assertEquals("Print format wrong", "#MEGA\n\n" + INTERLEAVED_NOHEADERS,
+ printed);
}
/**
{
MegaFile testee = new MegaFile(NONINTERLEAVED,
AppletFormatAdapter.PASTE);
+ assertEquals(10, testee.getPositionsPerLine());
String printed = testee.print();
System.out.println(printed);
// normally output should match input
// we cheated here with a number of short input lines
- String expected = "#MEGA\n"
- + "!TITLE Noninterleaved sequence data;\n\n"
- + "#U455\n" + "ABCFEDHIJMNOPQR\n\n" + "#CPZANT\n"
- + "KLMNOPWXYZCGATC\n";
+ String expected = "#MEGA\n\n"
+ + "#U455\n" + "ABCFEDHIJM\nNOPQR\n\n"
+ + "#CPZANT\n" + "KLMNOPWXYZ\nCGATC\n";
assertEquals("Print format wrong", expected, printed);
}
@Test(groups = { "Functional" })
public void testPrint_interleavedMultiLine() throws IOException
{
- MegaFile testee = new MegaFile(INTERLEAVED_LONGERTHAN50,
+ MegaFile testee = new MegaFile(INTERLEAVED_50RESIDUES,
AppletFormatAdapter.PASTE);
+ assertEquals(50, testee.getPositionsPerLine());
+ /*
+ * now simulate choosing 20 residues per line on output
+ */
+ testee.setPositionsPerLine(20);
String printed = testee.print();
System.out.println(printed);
- // first sequence is length 60, second length 120
- // should be output as 50 + 10 + 0 and as 50 + 50 + 20 character lines
- // respectively
- String expected = "#MEGA\n" + "!TITLE Interleaved sequence data;\n\n"
- + "#U455 " + THIRTY_CHARS + "01234567890123456789\n"
- + "#CPZANT " + THIRTY_CHARS + "01234567890123456789\n" + "\n"
- + "#U455 " + "0123456789\n" + "#CPZANT " + THIRTY_CHARS
- + "01234567890123456789\n\n" + "#U455 \n" + "#CPZANT "
- + "01234567890123456789"
- + "\n";
+ //@formatter:off
+ //0123456789klmnopqrstABCDEFGHIJ9876543210abcdefghij
+ String expected =
+ "#MEGA\n\n" +
+ "#U455 0123456789 klmnopqrst\n" + // first 20
+ "#CPZANT 9876543210 abcdefghij\n\n" +
+ "#U455 ABCDEFGHIJ 9876543210\n" + // next 20
+ "#CPZANT 0123456789 klmnopqrst\n\n" +
+ "#U455 abcdefghij\n" + // last 10
+ "#CPZANT ABCDEFGHIJ\n";
+ //@formatter:on
assertEquals("Print format wrong", expected, printed);
}
@Test(groups = { "Functional" })
public void testPrint_noninterleavedMultiLine() throws IOException
{
+ final String NONINTERLEAVED_LONGERTHAN50 = "#SIXTY\n" + THIRTY_CHARS
+ + "\n" + TWENTY_CHARS + "9993332221\n";
MegaFile testee = new MegaFile(NONINTERLEAVED_LONGERTHAN50,
AppletFormatAdapter.PASTE);
+ assertEquals(30, testee.getPositionsPerLine());
+ testee.setPositionsPerLine(25);
String printed = testee.print();
- System.out.println(printed);
// 60 character sequence should be output as 50 on first line then 10 more
- String expected = "#MEGA\n\n" + "#SIXTY\n" + THIRTY_CHARS
- + "01234567890123456789\n" + "0123456789\n";
+ String expected = "#MEGA\n\n" + "#SIXTY\n"
+ + "0123456789klmnopqrstABCDE\n" + "FGHIJ9876543210abcdefghij\n"
+ + "9993332221\n";
assertEquals("Print format wrong", expected, printed);
}
/**
- * Test paste / parse of 'fancy format' data.
+ * Test parse of data including description
*
* @throws IOException
*/
@Test(groups = { "Functional" })
- public void testParse_fancyFormat() throws IOException
+ public void testParse_withDescription() throws IOException
{
- MegaFile testee = new MegaFile(FANCY_FORMAT, AppletFormatAdapter.PASTE);
- assertEquals("Title not as expected", "Fancy format data",
+ MegaFile testee = new MegaFile(INTERLEAVED_WITH_DESCRIPTION,
+ AppletFormatAdapter.PASTE);
+ assertEquals("Title not as expected", "Data with description",
testee.getAlignmentProperty(MegaFile.PROP_TITLE));
- // assertEquals("Format property not parsed",
- // "DataType=DNA indel=- CodeTable=Standard;",
- // testee.getAlignmentProperty(MegaFile.PROP_FORMAT));
Vector<SequenceI> seqs = testee.getSeqs();
// should be 2 sequences
assertEquals("Expected two sequences", 2, seqs.size());
assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
.getName());
// check sequence data
- assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0)
+ assertEquals("First sequence data wrong", "CGCGTACGATTT", seqs.get(0)
.getSequenceAsString());
- assertEquals("Second sequence data wrong", "MNOPQRWXYZ", seqs.get(1)
+ assertEquals("Second sequence data wrong", "ATCGGGCAATGC", seqs.get(1)
.getSequenceAsString());
assertTrue("File format is not flagged as interleaved",
testee.isInterleaved());
- assertEquals("Description property not parsed",
- " Line one of description\n"
- + " Line two of description\n",
+ assertEquals(
+ "Description property not parsed",
+ " Line one of description\n" + " Line two of description",
testee.getAlignmentProperty(MegaFile.PROP_DESCRIPTION));
}
assertEquals("Mega", MegaFile.getValue("!Name \t\t Mega; "));
assertEquals("", MegaFile.getValue("Name"));
}
+
+ /**
+ * Test reading a MEGA file to an alignment then writing it out in MEGA
+ * format. Verify the output is (functionally) the same as the input.
+ *
+ * @throws IOException
+ */
+ @Test(groups = "Functional")
+ public void testRoundTrip_Interleaved() throws IOException
+ {
+ AppletFormatAdapter fa = new AppletFormatAdapter();
+ AlignmentI al = fa.readFile(INTERLEAVED_WITH_DESCRIPTION,
+ AppletFormatAdapter.PASTE, "MEGA");
+ MegaFile output = new MegaFile();
+ String formatted = output.print(al);
+ //@formatter:off
+ String expected =
+ "#MEGA\n!Title Data with description;\n" +
+ "!Description Line one of description\n" +
+ " Line two of description;\n" +
+ "!Format\n" +
+ " DataType=DNA CodeTable=Standard\n" +
+ " NSeqs=2 NSites=12\n" +
+ " Indel=- Identical=. Missing=?;\n\n" +
+ "#U455 CGC GTA\n" +
+ "#CPZANT ATC GGG\n\n" +
+ "#U455 CGA TTT\n" +
+ "#CPZANT CAA TGC\n";
+ //@formatter:on
+ assertEquals("Roundtrip didn't match", expected,
+ formatted);
+ }
}