/*
* Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.0b1)
* Copyright (C) 2014 The Jalview Authors
*
* This file is part of Jalview.
*
* Jalview is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
*
* Jalview is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty
* of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with Jalview. If not, see .
* The Jalview Authors are detailed in the 'AUTHORS' file.
*/
package jalview.io;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceI;
import java.io.IOException;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
/**
* A parser for input or output of MEGA format files.
*
* Tamura K, Stecher G, Peterson D, Filipski A, and Kumar S (2013) MEGA6:
* Molecular Evolutionary Genetics Analysis Version 6.0. Molecular Biology and
* Evolution 30: 2725-2729.
*
*
* MEGA file format is supported as described in
* http://www.megasoftware.net/manual.pdf
* Limitations:
*
* - nested comments (marked by [ ]) are accepted but not preserved
* - to be completed
*
*
* @see http://www.megasoftware.net/
*/
public class MegaFile extends AlignFile
{
private static final int DEFAULT_LINE_LENGTH = 60;
private static final String INDENT = " ";
private static final String N_SITES = "NSites";
private static final String N_SEQS = "NSeqs";
private static final String MISSING = "Missing";
private static final String IDENTICAL = "Identical";
private static final String INDEL = "Indel";
private static final String CODETABLE = "CodeTable";
private static final String PROTEIN = "Protein";
private static final String NUCLEOTIDE = "Nucleotide";
private static final String DATATYPE = "DataType";
private static final char COMMENT_START = '[';
private static final char COMMENT_END = ']';
private static final String HASHSIGN = "#";
private static final String SEMICOLON = ";";
private static final String BANG = "!";
private static final String EQUALS = "=";
private static final String MEGA_ID = HASHSIGN + "MEGA";
private static final String TITLE = "Title";
private static final String FORMAT = "Format";
private static final String DESCRIPTION = "Description";
private static final String GENE = "Gene";
private static final String DOMAIN = "Domain";
/*
* names of properties to save to the alignment (may affect eventual output
* format)
*/
static final String PROP_TITLE = "MEGA_TITLE";
static final String PROP_INTERLEAVED = "MEGA_INTERLEAVED";
static final String PROP_DESCRIPTION = "MEGA_DESCRIPTION";
static final String PROP_CODETABLE = "MEGA_CODETABLE";
static final String PROP_IDENTITY = "MEGA_IDENTITY";
static final String PROP_MISSING = "MEGA_MISSING";
static final String PROP_DATATYPE = "MEGA_DATATYPE";
// number of bases per line of file (value is inferred)
static final String PROP_LINELENGTH = "MEGA_LINELENGTH";
// TODO: need a controlled name for Gene as a feature if we want to be able to
// output the MEGA file with !Gene headers
// WTF do we do if the sequences get realigned?
// initial size for sequence data buffer
private static final int SEQBUFFERSIZE = 256;
private static final String SPACE = " ";
/*
* number of sequence positions output per line
*/
private int positionsPerLine;
private String title;
// gap character may be explicitly declared, if not we infer it
private Character gapCharacter;
// this can be True, False or null (meaning not asserted in file)
private Boolean nucleotide;
// set once we have seen one block of interleaved data
private boolean firstDataBlockRead = false;
// this can be True, False or null (meaning we don't know yet)
private Boolean interleaved;
public MegaFile()
{
}
public MegaFile(String inFile, String type) throws IOException
{
super(inFile, type);
}
public MegaFile(FileParse source) throws IOException
{
super(source);
}
/**
* Parse the input stream.
*/
@Override
public void parse() throws IOException
{
/*
* Read and process MEGA and Title/Format/Description headers if present.
* Returns the first data line following the headers.
*/
String dataLine = parseHeaderLines();
/*
* Temporary store of {sequenceId, positionData} while parsing interleaved
* sequences; sequences are maintained in the order in which they are added
* i.e. read in the file
*/
Map seqData = new LinkedHashMap();
/*
* The id of the sequence being read (for non-interleaved)
*/
String currentId = "";
while (dataLine != null)
{
dataLine = dataLine.trim();
if (dataLine.length() > 0)
{
if (dataLine.startsWith(BANG + GENE))
{
parseGene(dataLine);
}
else if (dataLine.startsWith(BANG + DOMAIN))
{
parseDomain(dataLine);
}
else
{
currentId = parseDataLine(dataLine, seqData, currentId);
}
}
else if (!seqData.isEmpty())
{
/*
* Blank line after processing some data...
*/
this.firstDataBlockRead = true;
}
dataLine = nextNonCommentLine();
}
// remember the (longest) line length read in, so we can output the same
setAlignmentProperty(PROP_LINELENGTH, String.valueOf(positionsPerLine));
setSequences(seqData);
}
/**
* Parse a !Gene command line
*
* @param dataLine
*/
protected void parseGene(String dataLine)
{
}
/**
* Parse a !Domain command line
*
* @param dataLine
*/
private void parseDomain(String dataLine)
{
}
/**
* Returns the next line that is not a comment, or null at end of file.
* Comments in MEGA are within [ ] brackets, and may be nested.
*
* @return
* @throws IOException
*/
protected String nextNonCommentLine() throws IOException
{
return nextNonCommentLine(0);
}
/**
* Returns the next line that is not a comment, or null at end of file.
* Comments in MEGA are within [ ] brackets, and may be nested.
*
* @param depth
* current depth of nesting of comments while parsing
* @return
* @throws IOException
*/
protected String nextNonCommentLine(final int depth) throws IOException
{
String data = null;
data = nextLine();
if (data == null)
{
if (depth > 0)
{
System.err.println("Warning: unterminated comment in data file");
}
return data;
}
int leftBracket = data.indexOf(COMMENT_START);
/*
* reject unnested comment following data on the same line
*/
if (depth == 0 && leftBracket > 0)
{
throw new FileFormatException(
"Can't parse comment following data at " + data);
}
/*
* If we are in a (possibly nested) comment after parsing this line, keep
* reading recursively until the comment has unwound
*/
int newDepth = commentDepth(data, depth);
if (newDepth > 0)
{
return nextNonCommentLine(newDepth);
}
else
{
/*
* not in a comment by end of this line; return what is left (or the next
* line if that is empty)
*/
String nonCommentPart = getNonCommentContent(data, depth);
// if (nonCommentPart.length() > 0)
// {
return nonCommentPart;
// }
// return nextNonCommentLine(0);
}
}
/**
* Returns what is left of the input data after removing any comments, whether
* 'in progress' from preceding lines, or embedded in the current line
*
* @param data
* input data
* @param depth
* nested depth of comments pending termination
* @return
* @throws FileFormatException
*/
protected static String getNonCommentContent(String data, int depth)
throws FileFormatException
{
int len = data.length();
StringBuilder result = new StringBuilder(len);
for (int i = 0; i < len; i++)
{
char c = data.charAt(i);
switch (c)
{
case COMMENT_START:
depth++;
break;
case COMMENT_END:
if (depth > 0)
{
depth--;
}
else
{
result.append(c);
}
break;
default:
if (depth == 0)
{
result.append(c);
}
}
}
return result.toString();
}
/**
* Calculates new depth of comment after parsing an input line i.e. the excess
* of opening '[' over closing ']' characters. Any excess ']' are ignored (not
* treated as comment delimiters).
*
* @param data
* input line
* @param depth
* current comment nested depth before parsing the line
* @return new depth after parsing the line
*/
protected static int commentDepth(CharSequence data, int depth)
{
int newDepth = depth;
int len = data.length();
for (int i = 0; i < len; i++)
{
char c = data.charAt(i);
if (c == COMMENT_START)
{
newDepth++;
}
else if (c == COMMENT_END && newDepth > 0)
{
newDepth--;
}
}
return newDepth;
}
/**
* Convert the parsed sequence strings to objects and store them in the model.
*
* @param seqData
*/
protected void setSequences(Map seqData)
{
Set> datasets = seqData.entrySet();
for (Entry dataset : datasets)
{
String sequenceId = dataset.getKey();
StringBuilder characters = dataset.getValue();
SequenceI s = new Sequence(sequenceId, new String(characters));
this.seqs.addElement(s);
}
}
/**
* Process one line of sequence data. If it has no sequence identifier, append
* to the current id's sequence. Else parse out the sequence id and append the
* data (if any) to that id's sequence. Returns the sequence id (implicit or
* explicit) for this line.
*
* @param dataLine
* @param seqData
* @param currentid
* @return
* @throws IOException
*/
protected String parseDataLine(String dataLine,
Map seqData, String currentId)
throws IOException
{
String seqId = getSequenceId(dataLine);
if (seqId == null)
{
/*
* Just character data
*/
parseNoninterleavedDataLine(dataLine, seqData, currentId);
return currentId;
}
else if ((HASHSIGN + seqId).trim().equals(dataLine.trim()))
{
/*
* Sequence id only - header line for noninterleaved data
*/
return seqId;
}
else
{
/*
* Sequence id followed by data
*/
parseInterleavedDataLine(dataLine, seqData, seqId);
return seqId;
}
}
/**
* Add a line of sequence data to the buffer for the given sequence id. Start
* a new one if we haven't seen it before.
*
* @param dataLine
* @param seqData
* @param currentId
* @throws IOException
*/
protected void parseNoninterleavedDataLine(String dataLine,
Map seqData, String currentId)
throws IOException
{
if (currentId == null)
{
/*
* Oops. Data but no sequence id context.
*/
throw new IOException("No sequence id context at: " + dataLine);
}
assertInterleaved(false, dataLine);
StringBuilder sb = getSequenceDataBuffer(seqData, currentId);
/*
* Add the current line of data to the sequence.
*/
sb.append(dataLine);
setPositionsPerLine(Math.max(positionsPerLine, dataLine.length()));
}
/**
* Get the sequence data for this sequence id, starting a new one if
* necessary.
*
* @param seqData
* @param currentId
* @return
*/
protected StringBuilder getSequenceDataBuffer(
Map seqData, String currentId)
{
StringBuilder sb = seqData.get(currentId);
if (sb == null)
{
// first data met for this sequence id, start a new buffer
sb = new StringBuilder(SEQBUFFERSIZE);
seqData.put(currentId, sb);
}
return sb;
}
/**
* Parse one line of interleaved data e.g.
*
*
* #TheSeqId CGATCGCATGCA
*
*
* @param dataLine
* @param seqData
* @param seqId
* @throws IOException
*/
protected void parseInterleavedDataLine(String dataLine,
Map seqData, String seqId)
throws IOException
{
/*
* New sequence found in second or later data block - error.
*/
if (this.firstDataBlockRead && !seqData.containsKey(seqId))
{
throw new IOException(
"Parse error: misplaced new sequence starting at " + dataLine);
}
StringBuilder sb = getSequenceDataBuffer(seqData, seqId);
String data = dataLine.substring(seqId.length() + 1).trim();
/*
* Do nothing if this line is _only_ a sequence id with no data following.
*
* Remove any internal spaces
*/
if (data != null && data.length() > 0)
{
if (data.indexOf(SPACE) != -1)
{
data = data.replace(SPACE, "");
}
sb.append(data);
setPositionsPerLine(Math.max(positionsPerLine, data.length()));
assertInterleaved(true, dataLine);
}
}
/**
* If the line begins with (e.g.) "#abcde " then returns "abcde" as the
* identifier. Else returns null.
*
* @param dataLine
* @return
*/
public static String getSequenceId(String dataLine)
{
// TODO refactor to a StringUtils type class
if (dataLine != null)
{
if (dataLine.startsWith(HASHSIGN))
{
int spacePos = dataLine.indexOf(" ");
return (spacePos == -1 ? dataLine.substring(1) : dataLine
.substring(1, spacePos));
}
}
return null;
}
/**
* Read the #MEGA and Title/Format/Description header lines (if present).
*
* Save as alignment properties in case useful.
*
* @return the next non-blank line following the header lines.
* @throws IOException
*/
protected String parseHeaderLines() throws IOException
{
String inputLine = null;
while ((inputLine = nextNonCommentLine()) != null)
{
inputLine = inputLine.trim();
/*
* skip blank lines
*/
if (inputLine.length() == 0)
{
continue;
}
if (inputLine.toUpperCase().startsWith(MEGA_ID))
{
continue;
}
if (isTitle(inputLine))
{
this.title = getValue(inputLine);
setAlignmentProperty(PROP_TITLE, title);
}
else if (inputLine.startsWith(BANG + DESCRIPTION))
{
parseDescription(inputLine);
}
else if (inputLine.startsWith(BANG + FORMAT))
{
parseFormat(inputLine);
}
else if (!inputLine.toUpperCase().startsWith(MEGA_ID))
{
/*
* Return the first 'data line' i.e. one that is not blank, #MEGA or
* TITLE:
*/
break;
}
}
return inputLine;
}
/**
* Parse a !Format statement. This may be multiline, and is ended by a
* semicolon.
*
* @param inputLine
* @throws IOException
*/
protected void parseFormat(String inputLine) throws IOException
{
while (inputLine != null)
{
parseFormatLine(inputLine);
if (inputLine.endsWith(SEMICOLON))
{
break;
}
inputLine = nextNonCommentLine();
}
}
/**
* Parse one line of a !Format statement. This may contain one or more
* keyword=value pairs.
*
* @param inputLine
* @throws FileFormatException
*/
protected void parseFormatLine(String inputLine)
throws FileFormatException
{
if (inputLine.startsWith(BANG + FORMAT))
{
inputLine = inputLine.substring((BANG + FORMAT).length());
}
if (inputLine.endsWith(SEMICOLON))
{
inputLine = inputLine.substring(0, inputLine.length() - 1);
}
if (inputLine.length() == 0)
{
return;
}
String[] tokens = inputLine.trim().split("\\s"); // any whitespace
for (String token : tokens)
{
parseFormatKeyword(token);
}
}
/**
* Parse a Keyword=Value token. Possible keywords are
*
* - DataType= DNA, RNA, Nucleotide, Protein
* - DataFormat= Interleaved, ?
* - NSeqs= number of sequences (synonym NTaxa)
* - NSites= number of bases / residues
* - Property= Exon (or Coding), Intron (or Noncoding), End (of domain)
* - Indel= gap character
* - Identical= identity character (to first sequence) (synonym MatchChar)
* - Missing= missing data character
* - CodeTable= Standard, other (MEGA supports various)
*
*
* @param token
* @throws FileFormatException
* if an unrecognised keyword or value is encountered
*/
protected void parseFormatKeyword(String token)
throws FileFormatException
{
String msg = "Unrecognised Format command: " + token;
String[] bits = token.split(EQUALS);
if (bits.length != 2)
{
throw new FileFormatException(msg);
}
String keyword = bits[0];
String value = bits[1];
/*
* Jalview will work out whether nucleotide or not anyway
*/
if (keyword.equalsIgnoreCase(DATATYPE))
{
if (value.equalsIgnoreCase("DNA") || value.equalsIgnoreCase("RNA")
|| value.equalsIgnoreCase("Nucleotide"))
{
this.nucleotide = true;
// alignment computes whether or not it is nucleotide when created
}
else if (value.equalsIgnoreCase(PROTEIN))
{
this.nucleotide = false;
}
else
{
throw new FileFormatException(msg);
}
setAlignmentProperty(PROP_DATATYPE, value);
}
/*
* accept non-Standard code table but save in case we want to disable
* 'translate as cDNA'
*/
else if (keyword.equalsIgnoreCase(CODETABLE))
{
setAlignmentProperty(PROP_CODETABLE, value);
}
/*
* save gap char to set later on alignment once created
*/
else if (keyword.equalsIgnoreCase(INDEL))
{
this.gapCharacter = value.charAt(0);
}
else if (keyword.equalsIgnoreCase(IDENTICAL)
|| keyword.equalsIgnoreCase("MatchChar"))
{
setAlignmentProperty(PROP_IDENTITY, value);
if (!".".equals(value))
{
System.err.println("Warning: " + token
+ " not supported, Jalview uses '.' for identity");
}
}
else if (keyword.equalsIgnoreCase(MISSING))
{
setAlignmentProperty(PROP_MISSING, value);
System.err.println("Warning: " + token + " not supported");
}
else if (keyword.equalsIgnoreCase("Property"))
{
// TODO: figure out what to do with this
// can it appear more than once in a file?
setAlignmentProperty(PROP_MISSING, value);
}
else if (!keyword.equalsIgnoreCase(N_SEQS)
&& !keyword.equalsIgnoreCase(N_SITES))
{
System.err.println("Warning: " + msg);
}
}
/**
* Returns the trimmed data on the line following either whitespace or '=',
* with any trailing semi-colon removed
* So
*
* - Hello World
* - !Hello: \tWorld;
* - !Hello=World
*
* should all return "World"
*
* @param inputLine
* @return
*/
protected static String getValue(String inputLine)
{
if (inputLine == null)
{
return null;
}
String value = null;
String s = inputLine.replaceAll("\t", " ").trim();
/*
* KEYWORD = VALUE should return VALUE
*/
int equalsPos = s.indexOf("=");
if (equalsPos >= 0)
{
value = s.substring(equalsPos + 1);
}
else
{
int spacePos = s.indexOf(' ');
value = spacePos == -1 ? "" : s.substring(spacePos + 1);
}
value = value.trim();
if (value.endsWith(SEMICOLON))
{
value = value.substring(0, value.length() - 1).trim();
}
return value;
}
/**
* Returns true if the input line starts with "TITLE" or "!TITLE" (not case
* sensitive). The latter is the official format, some older data file
* examples have it without the !.
*
* @param inputLine
* @return
*/
protected static boolean isTitle(String inputLine)
{
if (inputLine == null)
{
return false;
}
String upper = inputLine.toUpperCase();
return (upper.startsWith(TITLE.toUpperCase()) || upper.startsWith(BANG
+ TITLE.toUpperCase()));
}
/**
* Reads lines until terminated by semicolon, appending each to the
* Description property value.
*
* @throws IOException
*/
protected void parseDescription(String firstDescriptionLine)
throws IOException
{
StringBuilder desc = new StringBuilder(256);
String line = getValue(firstDescriptionLine);
while (line != null)
{
if (line.endsWith(SEMICOLON))
{
desc.append(line.substring(0, line.length() - 1));
break;
}
else if (line.length() > 0)
{
desc.append(line).append(newline);
}
line = nextNonCommentLine();
}
setAlignmentProperty(PROP_DESCRIPTION, desc.toString());
}
/**
* Returns the alignment sequences in Mega format.
*/
@Override
public String print()
{
return MEGA_ID + newline + print(getSeqsAsArray());
}
/**
* Write out the alignment sequences in Mega format - interleaved unless
* explicitly noninterleaved.
*/
protected String print(SequenceI[] s)
{
String result;
if (this.interleaved != null && !this.interleaved)
{
result = printNonInterleaved(s);
}
else
{
result = printInterleaved(s);
}
return result;
}
/**
* Print to string in Interleaved format - blocks of next N characters of each
* sequence in turn.
*
* @param s
*/
protected String printInterleaved(SequenceI[] s)
{
int maxIdLength = getMaxIdLength(s);
int maxSequenceLength = getMaxSequenceLength(s);
int numLines = maxSequenceLength / positionsPerLine + 3; // approx
/*
* Size a buffer to hold the whole output
*/
StringBuilder sb = new StringBuilder(numLines
* (maxIdLength + 2 + positionsPerLine));
int numDataBlocks = (maxSequenceLength - 1) / positionsPerLine + 1;
int spaceEvery = this.nucleotide != null && this.nucleotide ? 3 : 10;
int chunksPerLine = (positionsPerLine + spaceEvery - 1) / spaceEvery;
/*
* Output as: #Seqid CGT AGC ACT ... or blocks of 10 for peptide
*/
int from = 0;
for (int i = 0; i < numDataBlocks; i++)
{
sb.append(newline);
boolean first = true;
int advancedBy = 0;
for (SequenceI seq : s)
{
int seqFrom = from;
String seqId = String.format("#%-" + maxIdLength + "s",
seq.getName());
/*
* output next line for this sequence
*/
sb.append(seqId);
int lastPos = seqFrom + positionsPerLine; // exclusive
for (int j = 0; j < chunksPerLine; j++)
{
char[] subSequence = seq.getSequence(seqFrom,
Math.min(lastPos, seqFrom + spaceEvery));
if (subSequence.length > 0)
{
sb.append(SPACE).append(subSequence);
}
seqFrom += subSequence.length;
if (first)
{
// all sequences should be the same length in MEGA
advancedBy += subSequence.length;
}
}
sb.append(newline);
first = false;
}
from += advancedBy;
}
return new String(sb);
}
/**
* Outputs to string the MEGA header and any other known and relevant
* alignment properties
*
* @param al
*/
protected String printHeaders(AlignmentI al)
{
StringBuilder sb = new StringBuilder(128);
sb.append(MEGA_ID).append(newline);
String propertyValue = (String) al.getProperty(PROP_TITLE);
if (propertyValue != null)
{
sb.append(BANG).append(TITLE).append(SPACE)
.append(propertyValue)
.append(SEMICOLON)
.append(newline);
}
propertyValue = (String) al.getProperty(PROP_DESCRIPTION);
if (propertyValue != null)
{
sb.append(BANG).append(DESCRIPTION).append(newline)
.append(propertyValue).append(SEMICOLON)
.append(newline);
}
/*
* !Format DataType CodeTable
*/
sb.append(BANG).append(FORMAT).append(newline);
String dataType = (String) al.getProperty(PROP_DATATYPE);
if (dataType == null)
{
dataType = al.isNucleotide() ? NUCLEOTIDE : PROTEIN;
}
sb.append(INDENT).append(DATATYPE).append(EQUALS).append(dataType);
String codeTable = (String) al.getProperty(PROP_CODETABLE);
sb.append(SPACE).append(CODETABLE).append(EQUALS)
.append(codeTable == null ? "Standard" : codeTable)
.append(newline);
/*
* !Format NSeqs NSites
* NSites the length of any sequence (they should all be the same), excluding
* gaps?!?
*/
sb.append(INDENT).append(N_SEQS).append(EQUALS).append(al.getHeight());
SequenceI seq = al.getSequenceAt(0);
sb.append(SPACE).append(N_SITES).append(EQUALS)
.append(seq.getEnd() - seq.getStart() + 1);
sb.append(newline);
/*
* !Format Indel Identical Missing
*/
sb.append(INDENT);
sb.append(INDEL).append(EQUALS).append(al.getGapCharacter());
String identity = (String) al.getProperty(PROP_IDENTITY);
if (identity != null)
{
sb.append(SPACE).append(IDENTICAL).append(EQUALS).append(identity);
}
String missing = (String) al.getProperty(PROP_MISSING);
if (missing != null)
{
sb.append(SPACE).append(MISSING).append(EQUALS).append(missing);
}
sb.append(SEMICOLON).append(newline);
return sb.toString();
}
/**
* Get the longest sequence id (to allow aligned printout).
*
* @param s
* @return
*/
protected static int getMaxIdLength(SequenceI[] s)
{
// TODO pull up for reuse
int maxLength = 0;
for (SequenceI seq : s)
{
int len = seq.getName().length();
if (len > maxLength)
{
maxLength = len;
}
}
return maxLength;
}
/**
* Get the longest sequence length
*
* @param s
* @return
*/
protected static int getMaxSequenceLength(SequenceI[] s)
{
// TODO pull up for reuse
int maxLength = 0;
for (SequenceI seq : s)
{
int len = seq.getLength();
if (len > maxLength)
{
maxLength = len;
}
}
return maxLength;
}
/**
* Print to string in noninterleaved format - all of each sequence in turn, in
* blocks of 50 characters.
*
* @param s
* @return
*/
protected String printNonInterleaved(SequenceI[] s)
{
int maxSequenceLength = getMaxSequenceLength(s);
// approx
int numLines = maxSequenceLength / positionsPerLine + 2 + s.length;
/*
* Roughly size a buffer to hold the whole output
*/
StringBuilder sb = new StringBuilder(numLines * positionsPerLine);
int spaceEvery = this.nucleotide != null && this.nucleotide ? 3 : 10;
int chunksPerLine = positionsPerLine / spaceEvery;
for (SequenceI seq : s)
{
sb.append(newline);
sb.append(HASHSIGN + seq.getName()).append(newline);
int startPos = 0;
while (startPos < seq.getLength())
{
boolean firstChunk = true;
/*
* print next line for this sequence
*/
int lastPos = startPos + positionsPerLine; // exclusive
for (int j = 0; j < chunksPerLine; j++)
{
char[] subSequence = seq.getSequence(startPos,
Math.min(lastPos, startPos + positionsPerLine));
if (subSequence.length > 0)
{
if (!firstChunk)
{
sb.append(SPACE);
}
sb.append(subSequence);
firstChunk = false;
}
startPos += subSequence.length;
}
sb.append(newline);
}
}
return new String(sb);
}
/**
* Flag this file as interleaved or not, based on data format. Throws an
* exception if has previously been determined to be otherwise.
*
* @param isIt
* @param dataLine
* @throws IOException
*/
protected void assertInterleaved(boolean isIt, String dataLine)
throws FileFormatException
{
if (this.interleaved != null && isIt != this.interleaved.booleanValue())
{
throw new FileFormatException(
"Parse error: mix of interleaved and noninterleaved detected, at line: "
+ dataLine);
}
this.interleaved = new Boolean(isIt);
setAlignmentProperty(PROP_INTERLEAVED, interleaved.toString());
}
public boolean isInterleaved()
{
return this.interleaved == null ? false : this.interleaved
.booleanValue();
}
/**
* Adds saved parsed values either as alignment properties, or (in some cases)
* as specific member fields of the alignment
*/
@Override
public void addProperties(AlignmentI al)
{
super.addProperties(al);
if (this.gapCharacter != null)
{
al.setGapCharacter(gapCharacter);
}
/*
* warn if e.g. DataType=DNA but data is protein (or vice versa)
*/
if (this.nucleotide != null && this.nucleotide != al.isNucleotide()) {
System.err.println("Warning: " + this.title + " declared "
+ (nucleotide ? "" : " not ") + "nucleotide but it is"
+ (nucleotide ? " not" : ""));
}
}
/**
* Print the given alignment in MEGA format. If the alignment was created by
* parsing a MEGA file, it should have properties set (e.g. Title) which can
* influence the output.
*/
@Override
public String print(AlignmentI al)
{
this.nucleotide = al.isNucleotide();
String lineLength = (String) al.getProperty(PROP_LINELENGTH);
this.positionsPerLine = lineLength == null ? DEFAULT_LINE_LENGTH : Integer
.parseInt(lineLength);
return printHeaders(al) + print(al.getSequencesArray());
}
/**
* Returns the number of sequence positions output per line
*
* @return
*/
public int getPositionsPerLine()
{
return positionsPerLine;
}
/**
* Sets the number of sequence positions output per line. Note these will be
* formatted in blocks of 3 (nucleotide) or 10 (peptide).
*
* @param p
*/
public void setPositionsPerLine(int p)
{
this.positionsPerLine = p;
}
}