import java.util.Enumeration;
import java.util.Hashtable;
import java.util.List;
+import java.util.Map;
+import java.util.Set;
import java.util.Vector;
/**
/**
* Properties to be added to generated alignment object
*/
- protected Hashtable properties;
+ protected Hashtable<String, String> properties;
long start;
{
if (properties != null && properties.size() > 0)
{
- Enumeration keys = properties.keys();
- Enumeration vals = properties.elements();
+ Enumeration<String> keys = properties.keys();
+ Enumeration<String> vals = properties.elements();
while (keys.hasMoreElements())
{
al.setProperty(keys.nextElement(), vals.nextElement());
* @param value
* - non-null value
*/
- protected void setAlignmentProperty(Object key, Object value)
+ protected void setAlignmentProperty(String key, String value)
{
if (key == null)
{
}
if (properties == null)
{
- properties = new Hashtable();
+ properties = new Hashtable<String, String>();
}
properties.put(key, value);
}
- protected Object getAlignmentProperty(Object key)
+ /**
+ * Return the alignment properties (or null if none set)
+ *
+ * @return
+ */
+ protected Set<Map.Entry<String, String>> getAlignmentProperties()
+ {
+ return (this.properties == null ? null : this.properties.entrySet());
+ }
+
+ protected String getAlignmentProperty(String key)
{
if (properties != null && key != null)
{
* List of valid format strings used in the isValidFormat method
*/
public static final String[] READABLE_FORMATS = new String[] { "BLC",
- "CLUSTAL", "FASTA", "MSF", "PileUp", "PIR", "PFAM", "STH", "PDB",
+ "CLUSTAL", "FASTA", "MEGA", "MSF", "PileUp", "PIR", "PFAM", "STH",
+ "PDB",
"JnetFile", "RNAML", PhylipFile.FILE_DESC, JSONFile.FILE_DESC,
IdentifyFile.GFF3File, "HTML" };
* corresponding to READABLE_FNAMES
*/
public static final String[] READABLE_EXTENSIONS = new String[] {
- "fa, fasta, mfa, fastq", "aln", "pfam", "msf", "pir", "blc", "amsa",
+ "fa, fasta, mfa, fastq", "aln", "pfam", "meg", "msf", "pir", "blc",
+ "amsa",
"sto,stk", "xml,rnaml", PhylipFile.FILE_EXT, JSONFile.FILE_EXT,
".gff2,gff3", "jar,jvp", HtmlFile.FILE_EXT };
* READABLE_EXTENSIONS
*/
public static final String[] READABLE_FNAMES = new String[] { "Fasta",
- "Clustal", "PFAM", "MSF", "PIR", "BLC", "AMSA", "Stockholm", "RNAML",
+ "Clustal", "PFAM", "MEGA", "MSF", "PIR", "BLC", "AMSA", "Stockholm",
+ "RNAML",
PhylipFile.FILE_DESC, JSONFile.FILE_DESC, IdentifyFile.GFF3File,
"Jalview", HtmlFile.FILE_DESC };
* method
*/
public static final String[] WRITEABLE_FORMATS = new String[] { "BLC",
- "CLUSTAL", "FASTA", "MSF", "PileUp", "PIR", "PFAM", "AMSA", "STH",
+ "CLUSTAL", "FASTA", "MEGA", "MSF", "PileUp", "PIR", "PFAM", "AMSA",
+ "STH",
PhylipFile.FILE_DESC, JSONFile.FILE_DESC };
/**
* that are writable by the application.
*/
public static final String[] WRITABLE_EXTENSIONS = new String[] {
- "fa, fasta, mfa, fastq", "aln", "pfam", "msf", "pir", "blc", "amsa",
+ "fa, fasta, mfa, fastq", "aln", "pfam", "meg", "msf", "pir", "blc",
+ "amsa",
"sto,stk", PhylipFile.FILE_EXT, JSONFile.FILE_EXT, "jvp" };
/**
* WRITABLE_EXTENSIONS list of formats.
*/
public static final String[] WRITABLE_FNAMES = new String[] { "Fasta",
- "Clustal", "PFAM", "MSF", "PIR", "BLC", "AMSA", "STH",
+ "Clustal", "PFAM", "MEGA", "MSF", "PIR", "BLC", "AMSA", "STH",
PhylipFile.FILE_DESC, JSONFile.FILE_DESC, "Jalview" };
public static String INVALID_CHARACTERS = "Contains invalid characters";
{
alignFile = new RnamlFile(inFile, type);
}
+ else if (format.equals("MEGA"))
+ {
+ alignFile = new MegaFile(inFile, type);
+ }
else if (format.equals(IdentifyFile.GFF3File))
{
alignFile = new Gff3File(inFile, type);
{
alignFile = new RnamlFile(source);
}
+ else if (format.equals("MEGA"))
+ {
+ alignFile = new MegaFile(source);
+ }
else if (format.equals("SimpleBLAST"))
{
alignFile = new SimpleBlastFile(source);
{
afile = new RnamlFile();
}
-
+ else if (format.equalsIgnoreCase("MEGA"))
+ {
+ afile = new MegaFile();
+ }
else
{
throw new Exception(
--- /dev/null
+package jalview.io;
+
+import java.io.IOException;
+
+@SuppressWarnings("serial")
+public class FileFormatException extends IOException
+{
+ public FileFormatException(String msg)
+ {
+ super(msg);
+ }
+}
}
data = data.toUpperCase();
+ if (data.startsWith("#MEGA"))
+ {
+ reply = "MEGA";
+ break;
+ }
if (data.startsWith("##GFF-VERSION"))
{
reply = GFF3File;
{
if (getUI() instanceof javax.swing.plaf.basic.BasicFileChooserUI)
{
- final javax.swing.plaf.basic.BasicFileChooserUI ui = (javax.swing.plaf.basic.BasicFileChooserUI) getUI();
- final String name = ui.getFileName().trim();
+ final javax.swing.plaf.basic.BasicFileChooserUI myui = (javax.swing.plaf.basic.BasicFileChooserUI) getUI();
+ final String name = myui.getFileName().trim();
if ((name == null) || (name.length() == 0))
{
@Override
public void run()
{
- String currentName = ui.getFileName();
+ String currentName = myui.getFileName();
if ((currentName == null) || (currentName.length() == 0))
{
- ui.setFileName(name);
+ myui.setFileName(name);
}
}
});
{
format = "PFAM";
}
+ else if (format.toUpperCase().startsWith("MEGA"))
+ {
+ format = "MEGA";
+ }
else if (format.toUpperCase().startsWith(PhylipFile.FILE_DESC))
{
format = PhylipFile.FILE_DESC;
class RecentlyOpened extends JPanel
{
- JList list;
+ JList<String> list;
public RecentlyOpened()
{
String historyItems = jalview.bin.Cache.getProperty("RECENT_FILE");
StringTokenizer st;
- Vector recent = new Vector();
+ Vector<String> recent = new Vector<String>();
if (historyItems != null)
{
while (st.hasMoreTokens())
{
- recent.addElement(st.nextElement());
+ recent.addElement(st.nextToken());
}
}
- list = new JList(recent);
+ list = new JList<String>(recent);
DefaultListCellRenderer dlcr = new DefaultListCellRenderer();
dlcr.setHorizontalAlignment(DefaultListCellRenderer.RIGHT);
layout.putConstraint(SpringLayout.NORTH, scroller, 5,
SpringLayout.NORTH, this);
- if (new Platform().isAMac())
+ if (Platform.isAMac())
{
scroller.setPreferredSize(new Dimension(500, 100));
}
--- /dev/null
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.0b1)
+ * Copyright (C) 2014 The Jalview Authors
+ *
+ * This file is part of Jalview.
+ *
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along with Jalview. If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
+package jalview.io;
+
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceI;
+
+import java.io.IOException;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+/**
+ * A parser for input or output of MEGA format files. <br>
+ * <br>
+ * Tamura K, Stecher G, Peterson D, Filipski A, and Kumar S (2013) MEGA6:
+ * Molecular Evolutionary Genetics Analysis Version 6.0. Molecular Biology and
+ * Evolution 30: 2725-2729. <br>
+ * <br>
+ *
+ * MEGA file format is supported as described in
+ * http://www.megasoftware.net/manual.pdf <br>
+ * Limitations:
+ * <ul>
+ * <li>nested comments (marked by [ ]) are accepted but not preserved</li>
+ * <li>to be completed</li>
+ * </ul>
+ *
+ * @see http://www.megasoftware.net/
+ */
+public class MegaFile extends AlignFile
+{
+ private static final char COMMENT_START = '[';
+
+ private static final char COMMENT_END = ']';
+
+ private static final String HASHSIGN = "#";
+
+ private static final String SEMICOLON = ";";
+
+ private static final String BANG = "!";
+
+ private static final String EQUALS = "=";
+
+ private static final String MEGA_ID = HASHSIGN + "MEGA";
+
+ private static final String TITLE = "TITLE";
+
+ private static final String FORMAT = "Format";
+
+ private static final String DESCRIPTION = "Description";
+
+ private static final String GENE = "Gene";
+
+ private static final String DOMAIN = "Domain";
+
+ private static final String INTERLEAVED = "Interleaved";
+
+ /*
+ * names of properties to save to the alignment (may affect eventual output
+ * format)
+ */
+ static final String PROP_TITLE = "MEGA_TITLE";
+
+ static final String PROP_INTERLEAVED = "MEGA_INTERLEAVED";
+
+ static final String PROP_DESCRIPTION = "MEGA_DESCRIPTION";
+
+ static final String PROP_CODETABLE = "MEGA_CODETABLE";
+
+ static final String PROP_IDENTITY = "MEGA_IDENTITY";
+
+ static final String PROP_MISSING = "MEGA_MISSING";
+
+ // TODO: need a controlled name for Gene as a feature if we want to be able to
+ // output the MEGA file with !Gene headers
+ // WTF do we do if the sequences get realigned?
+
+ // initial size for sequence data buffer
+ private static final int SEQBUFFERSIZE = 256;
+
+ private static final String SPACE = " ";
+
+ private static final int POSITIONS_PER_LINE = 50;
+
+ private String title;
+
+ // gap character may be explicitly declared, if not we infer it
+ private Character gapCharacter;
+
+ // this can be True, False or null (meaning not asserted in file)
+ private Boolean nucleotide;
+
+ // set once we have seen one block of interleaved data
+ private boolean firstDataBlockRead = false;
+
+ // this can be True, False or null (meaning we don't know yet)
+ private Boolean interleaved;
+
+ public MegaFile()
+ {
+ }
+
+ public MegaFile(String inFile, String type) throws IOException
+ {
+ super(inFile, type);
+ }
+
+ public MegaFile(FileParse source) throws IOException
+ {
+ super(source);
+ }
+
+ /**
+ * Parse the input stream.
+ */
+ @Override
+ public void parse() throws IOException
+ {
+ /*
+ * Read and process MEGA and Title/Format/Description headers if present.
+ * Returns the first data line following the headers.
+ */
+ String dataLine = parseHeaderLines();
+
+ /*
+ * Temporary store of {sequenceId, positionData} while parsing interleaved
+ * sequences; sequences are maintained in the order in which they are added
+ * i.e. read in the file
+ */
+ Map<String, StringBuilder> seqData = new LinkedHashMap<String, StringBuilder>();
+
+ /*
+ * The id of the sequence being read (for non-interleaved)
+ */
+ String currentId = "";
+
+ while (dataLine != null)
+ {
+ dataLine = dataLine.trim();
+ if (dataLine.length() > 0)
+ {
+ if (dataLine.startsWith(BANG + GENE))
+ {
+ parseGene(dataLine);
+ }
+ else if (dataLine.startsWith(BANG + DOMAIN))
+ {
+ parseDomain(dataLine);
+ }
+ else
+ {
+ currentId = parseDataLine(dataLine, seqData, currentId);
+ }
+ }
+ else if (!seqData.isEmpty())
+ {
+ /*
+ * Blank line after processing some data...
+ */
+ this.firstDataBlockRead = true;
+ }
+ dataLine = nextNonCommentLine();
+ }
+
+ setSequences(seqData);
+ }
+
+ /**
+ * Parse a !Gene command line
+ *
+ * @param dataLine
+ */
+ protected void parseGene(String dataLine)
+ {
+ }
+
+ /**
+ * Parse a !Domain command line
+ *
+ * @param dataLine
+ */
+ private void parseDomain(String dataLine)
+ {
+ }
+
+ /**
+ * Returns the next line that is not a comment, or null at end of file.
+ * Comments in MEGA are within [ ] brackets, and may be nested.
+ *
+ * @return
+ * @throws IOException
+ */
+ protected String nextNonCommentLine() throws IOException
+ {
+ return nextNonCommentLine(0);
+ }
+
+ /**
+ * Returns the next line that is not a comment, or null at end of file.
+ * Comments in MEGA are within [ ] brackets, and may be nested.
+ *
+ * @param depth
+ * current depth of nesting of comments while parsing
+ * @return
+ * @throws IOException
+ */
+ protected String nextNonCommentLine(final int depth) throws IOException
+ {
+ String data = null;
+ data = nextLine();
+ if (data == null)
+ {
+ if (depth > 0)
+ {
+ System.err.println("Warning: unterminated comment in data file");
+ }
+ return data;
+ }
+ int leftBracket = data.indexOf(COMMENT_START);
+
+ /*
+ * reject unnested comment following data on the same line
+ */
+ if (depth == 0 && leftBracket > 0)
+ {
+ throw new FileFormatException(
+ "Can't parse comment following data at " + data);
+ }
+
+ /*
+ * If we are in a (possibly nested) comment after parsing this line, keep
+ * reading recursively until the comment has unwound
+ */
+ int newDepth = commentDepth(data, depth);
+ if (newDepth > 0)
+ {
+ return nextNonCommentLine(newDepth);
+ }
+ else
+ {
+ /*
+ * not in a comment by end of this line; return what is left (or the next
+ * line if that is empty)
+ */
+ String nonCommentPart = getNonCommentContent(data, depth);
+ // if (nonCommentPart.length() > 0)
+ // {
+ return nonCommentPart;
+ // }
+ // return nextNonCommentLine(0);
+ }
+ }
+
+ /**
+ * Returns what is left of the input data after removing any comments, whether
+ * 'in progress' from preceding lines, or embedded in the current line
+ *
+ * @param data
+ * input data
+ * @param depth
+ * nested depth of comments pending termination
+ * @return
+ * @throws FileFormatException
+ */
+ protected static String getNonCommentContent(String data, int depth)
+ throws FileFormatException
+ {
+ int len = data.length();
+ StringBuilder result = new StringBuilder(len);
+ for (int i = 0; i < len; i++)
+ {
+ char c = data.charAt(i);
+ switch (c)
+ {
+ case COMMENT_START:
+ depth++;
+ break;
+
+ case COMMENT_END:
+ if (depth > 0)
+ {
+ depth--;
+ }
+ else
+ {
+ result.append(c);
+ }
+ break;
+
+ default:
+ if (depth == 0)
+ {
+ result.append(c);
+ }
+ }
+ }
+ return result.toString();
+ }
+
+ /**
+ * Calculates new depth of comment after parsing an input line i.e. the excess
+ * of opening '[' over closing ']' characters. Any excess ']' are ignored (not
+ * treated as comment delimiters).
+ *
+ * @param data
+ * input line
+ * @param depth
+ * current comment nested depth before parsing the line
+ * @return new depth after parsing the line
+ */
+ protected static int commentDepth(CharSequence data, int depth)
+ {
+ int newDepth = depth;
+ int len = data.length();
+ for (int i = 0; i < len; i++)
+ {
+ char c = data.charAt(i);
+ if (c == COMMENT_START)
+ {
+ newDepth++;
+ }
+ else if (c == COMMENT_END && newDepth > 0)
+ {
+ newDepth--;
+ }
+ }
+ return newDepth;
+ }
+
+ /**
+ * Convert the parsed sequence strings to objects and store them in the model.
+ *
+ * @param seqData
+ */
+ protected void setSequences(Map<String, StringBuilder> seqData)
+ {
+ Set<Entry<String, StringBuilder>> datasets = seqData.entrySet();
+
+ for (Entry<String, StringBuilder> dataset : datasets)
+ {
+ String sequenceId = dataset.getKey();
+ StringBuilder characters = dataset.getValue();
+ SequenceI s = new Sequence(sequenceId, new String(characters));
+ this.seqs.addElement(s);
+ }
+ }
+
+ /**
+ * Process one line of sequence data. If it has no sequence identifier, append
+ * to the current id's sequence. Else parse out the sequence id and append the
+ * data (if any) to that id's sequence. Returns the sequence id (implicit or
+ * explicit) for this line.
+ *
+ * @param dataLine
+ * @param seqData
+ * @param currentid
+ * @return
+ * @throws IOException
+ */
+ protected String parseDataLine(String dataLine,
+ Map<String, StringBuilder> seqData, String currentId)
+ throws IOException
+ {
+ String seqId = getSequenceId(dataLine);
+ if (seqId == null)
+ {
+ /*
+ * Just character data
+ */
+ parseNoninterleavedDataLine(dataLine, seqData, currentId);
+ return currentId;
+ }
+ else if ((HASHSIGN + seqId).trim().equals(dataLine.trim()))
+ {
+ /*
+ * Sequence id only - header line for noninterleaved data
+ */
+ return seqId;
+ }
+ else
+ {
+ /*
+ * Sequence id followed by data
+ */
+ parseInterleavedDataLine(dataLine, seqData, seqId);
+ return seqId;
+ }
+ }
+
+ /**
+ * Add a line of sequence data to the buffer for the given sequence id. Start
+ * a new one if we haven't seen it before.
+ *
+ * @param dataLine
+ * @param seqData
+ * @param currentId
+ * @throws IOException
+ */
+ protected void parseNoninterleavedDataLine(String dataLine,
+ Map<String, StringBuilder> seqData, String currentId)
+ throws IOException
+ {
+ if (currentId == null)
+ {
+ /*
+ * Oops. Data but no sequence id context.
+ */
+ throw new IOException("No sequence id context at: " + dataLine);
+ }
+
+ assertInterleaved(false, dataLine);
+
+ StringBuilder sb = getSequenceDataBuffer(seqData, currentId);
+
+ /*
+ * Add the current line of data to the sequence.
+ */
+ sb.append(dataLine);
+ }
+
+ /**
+ * Get the sequence data for this sequence id, starting a new one if
+ * necessary.
+ *
+ * @param seqData
+ * @param currentId
+ * @return
+ */
+ protected StringBuilder getSequenceDataBuffer(
+ Map<String, StringBuilder> seqData, String currentId)
+ {
+ StringBuilder sb = seqData.get(currentId);
+ if (sb == null)
+ {
+ // first data met for this sequence id, start a new buffer
+ sb = new StringBuilder(SEQBUFFERSIZE);
+ seqData.put(currentId, sb);
+ }
+ return sb;
+ }
+
+ /**
+ * Parse one line of interleaved data e.g.
+ *
+ * <pre>
+ * #TheSeqId CGATCGCATGCA
+ * </pre>
+ *
+ * @param dataLine
+ * @param seqData
+ * @param seqId
+ * @throws IOException
+ */
+ protected void parseInterleavedDataLine(String dataLine,
+ Map<String, StringBuilder> seqData, String seqId)
+ throws IOException
+ {
+ /*
+ * New sequence found in second or later data block - error.
+ */
+ if (this.firstDataBlockRead && !seqData.containsKey(seqId))
+ {
+ throw new IOException(
+ "Parse error: misplaced new sequence starting at " + dataLine);
+ }
+
+ StringBuilder sb = getSequenceDataBuffer(seqData, seqId);
+ String data = dataLine.substring(seqId.length() + 1).trim();
+
+ /*
+ * Do nothing if this line is _only_ a sequence id with no data following.
+ *
+ * Remove any internal spaces (present in the 'fancy' file format)
+ */
+ if (data != null && data.length() > 0)
+ {
+ if (data.indexOf(SPACE) != -1)
+ {
+ data = data.replace(SPACE, "");
+ }
+ sb.append(data);
+ assertInterleaved(true, dataLine);
+ }
+ }
+
+ /**
+ * If the line begins with (e.g.) "#abcde " then returns "abcde" as the
+ * identifier. Else returns null.
+ *
+ * @param dataLine
+ * @return
+ */
+ public static String getSequenceId(String dataLine)
+ {
+ // TODO refactor to a StringUtils type class
+ if (dataLine != null)
+ {
+ if (dataLine.startsWith(HASHSIGN))
+ {
+ int spacePos = dataLine.indexOf(" ");
+ return (spacePos == -1 ? dataLine.substring(1) : dataLine
+ .substring(1, spacePos));
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Read the #MEGA and Title/Format/Description header lines (if present).
+ *
+ * Save as alignment properties in case useful.
+ *
+ * @return the next non-blank line following the header lines.
+ * @throws IOException
+ */
+ protected String parseHeaderLines() throws IOException
+ {
+ String inputLine = null;
+ while ((inputLine = nextNonCommentLine()) != null)
+ {
+ inputLine = inputLine.trim();
+
+ /*
+ * skip blank lines
+ */
+ if (inputLine.length() == 0)
+ {
+ continue;
+ }
+
+ if (inputLine.toUpperCase().startsWith(MEGA_ID))
+ {
+ continue;
+ }
+
+ if (isTitle(inputLine))
+ {
+ setAlignmentProperty(PROP_TITLE, getValue(inputLine));
+ }
+ else if (inputLine.startsWith(BANG + DESCRIPTION))
+ {
+ parseDescription(inputLine);
+ }
+
+ else if (inputLine.startsWith(BANG + FORMAT))
+ {
+ parseFormat(inputLine);
+ }
+ else if (!inputLine.toUpperCase().startsWith(MEGA_ID))
+ {
+
+ /*
+ * Return the first 'data line' i.e. one that is not blank, #MEGA or
+ * TITLE:
+ */
+ break;
+ }
+ }
+ return inputLine;
+ }
+
+ /**
+ * Parse a !Format statement. This may be multiline, and is ended by a
+ * semicolon.
+ *
+ * @param inputLine
+ * @throws IOException
+ */
+ protected void parseFormat(String inputLine) throws IOException
+ {
+ while (inputLine != null)
+ {
+ parseFormatLine(inputLine);
+ if (inputLine.endsWith(SEMICOLON))
+ {
+ break;
+ }
+ inputLine = nextNonCommentLine();
+ }
+ }
+
+ /**
+ * Parse one line of a !Format statement. This may contain one or more
+ * keyword=value pairs.
+ *
+ * @param inputLine
+ * @throws FileFormatException
+ */
+ protected void parseFormatLine(String inputLine)
+ throws FileFormatException
+ {
+ if (inputLine.startsWith(BANG + FORMAT))
+ {
+ inputLine = inputLine.substring((BANG + FORMAT).length());
+ }
+ if (inputLine.endsWith(SEMICOLON))
+ {
+ inputLine = inputLine.substring(0, inputLine.length() - 1);
+ }
+ String[] tokens = inputLine.trim().split("\\s"); // any whitespace
+ for (String token : tokens)
+ {
+ parseFormatKeyword(token);
+ }
+ }
+
+ /**
+ * Parse a Keyword=Value token. Possible keywords are
+ * <ul>
+ * <li>DataType= DNA, RNA, Nucleotide, Protein</li>
+ * <li>DataFormat= Interleaved, ?</li>
+ * <li>NSeqs= number of sequences (synonym NTaxa)</li>
+ * <li>NSites= number of bases / residues</li>
+ * <li>Property= Exon (or Coding), Intron (or Noncoding), End (of domain)</li>
+ * <li>Indel= gap character</li>
+ * <li>Identical= identity character (to first sequence) (synonym MatchChar)</li>
+ * <li>Missing= missing data character</li>
+ * <li>CodeTable= Standard, other (MEGA supports various)</li>
+ * </ul>
+ *
+ * @param token
+ * @throws FileFormatException
+ * if an unrecognised keyword or value is encountered
+ */
+ protected void parseFormatKeyword(String token)
+ throws FileFormatException
+ {
+ String msg = "Unrecognised Format command: " + token;
+ String[] bits = token.split(EQUALS);
+ if (bits.length != 2)
+ {
+ throw new FileFormatException(msg);
+ }
+ String keyword = bits[0];
+ String value = bits[1];
+
+ /*
+ * Jalview will work out whether nucleotide or not anyway
+ */
+ if (keyword.equalsIgnoreCase("DataType"))
+ {
+ if (value.equalsIgnoreCase("DNA") || value.equalsIgnoreCase("RNA")
+ || value.equalsIgnoreCase("Nucleotide"))
+ {
+ this.nucleotide = true;
+ // alignment computes whether or not it is nucleotide when created
+ }
+ else if (value.equalsIgnoreCase("Protein"))
+ {
+ this.nucleotide = false;
+ }
+ else
+ {
+ throw new FileFormatException(msg);
+ }
+ }
+
+ /*
+ * accept non-Standard code table but save in case we want to disable
+ * 'translate as cDNA'
+ */
+ else if (keyword.equalsIgnoreCase("CodeTable"))
+ {
+ setAlignmentProperty(PROP_CODETABLE, value);
+ }
+
+ /*
+ * save gap char to set later on alignment once created
+ */
+ else if (keyword.equalsIgnoreCase("Indel"))
+ {
+ this.gapCharacter = value.charAt(0);
+ }
+
+ else if (keyword.equalsIgnoreCase("Identical")
+ || keyword.equalsIgnoreCase("MatchChar"))
+ {
+ if (!".".equals(value))
+ {
+ setAlignmentProperty(PROP_IDENTITY, value);
+ System.err.println("Warning: " + token
+ + " not supported, Jalview uses '.' for identity");
+ }
+ }
+
+ else if (keyword.equalsIgnoreCase("Missing"))
+ {
+ setAlignmentProperty(PROP_MISSING, value);
+ System.err.println("Warning: " + token + " not supported");
+ }
+
+ else if (keyword.equalsIgnoreCase("Property"))
+ {
+ // TODO: figure out what to do with this
+ // can it appear more than once in a file?
+ setAlignmentProperty(PROP_MISSING, value);
+ }
+
+ else if (!keyword.equalsIgnoreCase("NSeqs")
+ && !keyword.equalsIgnoreCase("NSites"))
+ {
+ System.err.println("Warning: " + msg);
+ }
+ }
+
+ /**
+ * Returns the trimmed data on the line following either whitespace or '=',
+ * with any trailing semi-colon removed<br>
+ * So
+ * <ul>
+ * <li>Hello World</li>
+ * <li>!Hello: \tWorld;</li>
+ * <li>!Hello=World</li>
+ * <ul>
+ * should all return "World"
+ *
+ * @param inputLine
+ * @return
+ */
+ protected static String getValue(String inputLine)
+ {
+ if (inputLine == null)
+ {
+ return null;
+ }
+ String value = null;
+ String s = inputLine.replaceAll("\t", " ").trim();
+
+ /*
+ * KEYWORD = VALUE should return VALUE
+ */
+ int equalsPos = s.indexOf("=");
+ if (equalsPos >= 0)
+ {
+ value = s.substring(equalsPos + 1);
+ }
+ else
+ {
+ int spacePos = s.indexOf(' ');
+ value = spacePos == -1 ? "" : s.substring(spacePos + 1);
+ }
+ value = value.trim();
+ if (value.endsWith(SEMICOLON))
+ {
+ value = value.substring(0, value.length() - 1).trim();
+ }
+ return value;
+ }
+
+ /**
+ * Returns true if the input line starts with "TITLE" or "!TITLE" (not case
+ * sensitive). The latter is the official format, some older data file
+ * examples have it without the !.
+ *
+ * @param inputLine
+ * @return
+ */
+ protected static boolean isTitle(String inputLine)
+ {
+ if (inputLine == null)
+ {
+ return false;
+ }
+ String upper = inputLine.toUpperCase();
+ return (upper.startsWith(TITLE) || upper.startsWith(BANG + TITLE));
+ }
+
+ /**
+ * Reads lines until terminated by semicolon, appending each to the
+ * Description property value.
+ *
+ * @throws IOException
+ */
+ protected void parseDescription(String firstDescriptionLine)
+ throws IOException
+ {
+ StringBuilder desc = new StringBuilder(256);
+ String line = getValue(firstDescriptionLine);
+ while (line != null)
+ {
+ if (line.endsWith(SEMICOLON))
+ {
+ desc.append(line.substring(0, line.length() - 1)).append(newline);
+ break;
+ }
+ else if (line.length() > 0)
+ {
+ desc.append(line).append(newline);
+ }
+ line = nextNonCommentLine();
+ }
+ setAlignmentProperty(PROP_DESCRIPTION, desc.toString());
+ }
+
+ /**
+ * Write out the alignment sequences in Mega format.
+ */
+ @Override
+ public String print()
+ {
+ return print(getSeqsAsArray());
+ }
+
+ /**
+ * Write out the alignment sequences in Mega format - interleaved unless
+ * explicitly noninterleaved.
+ */
+ public String print(SequenceI[] s)
+ {
+ // TODO: is there a way to preserve the 'interleaved' property so it can
+ // affect output?
+
+ String result = null;
+ if (this.interleaved != null && !this.interleaved)
+ {
+ result = printNonInterleaved(s);
+ }
+ else
+ {
+ result = printInterleaved(s);
+ }
+ return result;
+ }
+
+ /**
+ * Print the sequences in interleaved format, each row 15 space-separated
+ * triplets.
+ *
+ * @param s
+ * @return
+ */
+ protected String printInterleavedCodons(SequenceI[] s)
+ {
+ // TODO not coded yet - defaulting to the 'simple' format output
+ return printInterleaved(s);
+ }
+
+ /**
+ * Print to string in Interleaved format - blocks of next 50 characters of
+ * each sequence in turn.
+ *
+ * @param s
+ */
+ protected String printInterleaved(SequenceI[] s)
+ {
+ int maxIdLength = getMaxIdLength(s);
+ int maxSequenceLength = getMaxSequenceLength(s);
+ int numLines = maxSequenceLength / POSITIONS_PER_LINE + 3; // approx
+
+ /*
+ * Size a buffer to hold the whole output
+ */
+ StringBuilder sb = new StringBuilder(numLines
+ * (maxIdLength + 2 + POSITIONS_PER_LINE));
+ printHeaders(sb);
+
+ int numDataBlocks = (maxSequenceLength - 1) / POSITIONS_PER_LINE + 1;
+ for (int i = 0; i < numDataBlocks; i++)
+ {
+ sb.append(newline);
+ for (SequenceI seq : s)
+ {
+
+ String seqId = String.format("#%-" + maxIdLength + "s ",
+ seq.getName());
+ char[] subSequence = seq.getSequence(i * POSITIONS_PER_LINE,
+ (i + 1) * POSITIONS_PER_LINE);
+ sb.append(seqId);
+ sb.append(subSequence);
+ sb.append(newline);
+ }
+ }
+
+ return new String(sb);
+ }
+
+ /**
+ * Append the MEGA header and any other known properties
+ *
+ * @param sb
+ */
+ private void printHeaders(StringBuilder sb)
+ {
+ sb.append(MEGA_ID);
+ sb.append(newline);
+
+ String ttle = getAlignmentProperty(PROP_TITLE);
+ if (ttle != null)
+ {
+ sb.append(BANG).append(TITLE).append(SPACE).append(ttle)
+ .append(SEMICOLON).append(newline);
+ }
+
+ String desc = getAlignmentProperty(PROP_DESCRIPTION);
+ if (desc != null)
+ {
+ sb.append(BANG).append(DESCRIPTION).append(SPACE).append(desc)
+ .append(SEMICOLON).append(newline);
+ }
+ }
+
+ /**
+ * Get the longest sequence id (to allow aligned printout).
+ *
+ * @param s
+ * @return
+ */
+ protected static int getMaxIdLength(SequenceI[] s)
+ {
+ // TODO pull up for reuse
+ int maxLength = 0;
+ for (SequenceI seq : s)
+ {
+ int len = seq.getName().length();
+ if (len > maxLength)
+ {
+ maxLength = len;
+ }
+ }
+ return maxLength;
+ }
+
+ /**
+ * Get the longest sequence length
+ *
+ * @param s
+ * @return
+ */
+ protected static int getMaxSequenceLength(SequenceI[] s)
+ {
+ // TODO pull up for reuse
+ int maxLength = 0;
+ for (SequenceI seq : s)
+ {
+ int len = seq.getLength();
+ if (len > maxLength)
+ {
+ maxLength = len;
+ }
+ }
+ return maxLength;
+ }
+
+ /**
+ * Print to string in noninterleaved format - all of each sequence in turn, in
+ * blocks of 50 characters.
+ *
+ * @param s
+ * @return
+ */
+ protected String printNonInterleaved(SequenceI[] s)
+ {
+ int maxSequenceLength = getMaxSequenceLength(s);
+ // approx
+ int numLines = maxSequenceLength / POSITIONS_PER_LINE + 2 + s.length;
+
+ /*
+ * Roughly size a buffer to hold the whole output
+ */
+ StringBuilder sb = new StringBuilder(numLines * POSITIONS_PER_LINE);
+ printHeaders(sb);
+
+ for (SequenceI seq : s)
+ {
+ sb.append(newline);
+ sb.append(HASHSIGN + seq.getName()).append(newline);
+ int startPos = 0;
+ while (startPos <= seq.getLength())
+ {
+ char[] subSequence = seq.getSequence(startPos, startPos
+ + POSITIONS_PER_LINE);
+ sb.append(subSequence);
+ sb.append(newline);
+ startPos += POSITIONS_PER_LINE;
+ }
+ }
+
+ return new String(sb);
+ }
+
+ /**
+ * Flag this file as interleaved or not, based on data format. Throws an
+ * exception if has previously been determined to be otherwise.
+ *
+ * @param isIt
+ * @param dataLine
+ * @throws IOException
+ */
+ protected void assertInterleaved(boolean isIt, String dataLine)
+ throws IOException
+ {
+ if (this.interleaved != null && isIt != this.interleaved.booleanValue())
+ {
+ throw new IOException(
+ "Parse error: mix of interleaved and noninterleaved detected, at line: "
+ + dataLine);
+ }
+ this.interleaved = new Boolean(isIt);
+ }
+
+ public boolean isInterleaved()
+ {
+ return this.interleaved == null ? false : this.interleaved
+ .booleanValue();
+ }
+
+ /**
+ * Adds saved parsed values either as alignment properties, or (in some cases)
+ * as specific member fields of the alignment
+ */
+ @Override
+ public void addProperties(AlignmentI al)
+ {
+ super.addProperties(al);
+ if (this.gapCharacter != null)
+ {
+ al.setGapCharacter(gapCharacter);
+ }
+
+ /*
+ * warn if e.g. DataType=DNA but data is protein (or vice versa)
+ */
+ if (this.nucleotide != null && this.nucleotide != al.isNucleotide()) {
+ System.err.println("Warning: " + this.title + " declared "
+ + (nucleotide ? "" : " not ") + "nucleotide but it is"
+ + (nucleotide ? " not" : ""));
+ }
+ }
+}
{ "examples/testdata/test.aln", "CLUSTAL" },
{ "examples/testdata/test.pfam", "PFAM" },
{ "examples/testdata/test.msf", "MSF" },
+ { "examples/testdata/test.meg", "MEGA" },
{ "examples/testdata/test.pir", "PIR" },
{ "examples/testdata/test.html", "HTML" },
{ "examples/testdata/test.pileup", "PileUp" },
--- /dev/null
+package jalview.io;
+
+import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertFalse;
+import static org.testng.AssertJUnit.assertNull;
+import static org.testng.AssertJUnit.assertTrue;
+import static org.testng.AssertJUnit.fail;
+
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceI;
+
+import java.io.IOException;
+import java.util.Vector;
+
+import org.testng.annotations.Test;
+
+/*
+ * Unit tests for MegaFile - read and write in MEGA format(s).
+ */
+public class MegaFileTest
+{
+ private static final String THIRTY_CHARS = "012345678901234567890123456789";
+
+ //@formatter:off
+ private static final String INTERLEAVED =
+ "#MEGA\n"+
+ "TITLE: Interleaved sequence data\n\n" +
+ "#U455 ABCDEF\n" +
+ "#CPZANT MNOPQR\n\n" + "#U455 KLMNOP\n" +
+ "#CPZANT WXYZ";
+
+ private static final String INTERLEAVED_NOHEADERS =
+ "#U455 ABCDEF\n"
+ + "#CPZANT MNOPQR\n\n"
+ + "#U455 KLMNOP\n"
+ + "#CPZANT WXYZ\n";
+
+ // interleaved sequences, one with 60 one with 120 characters (on overlong
+ // input lines)
+ private static final String INTERLEAVED_LONGERTHAN50 =
+ "#MEGA\n"
+ + "TITLE: Interleaved sequence data\n\n"
+ + "#U455 " + THIRTY_CHARS + THIRTY_CHARS + "\n"
+ + "#CPZANT "
+ + THIRTY_CHARS + THIRTY_CHARS + THIRTY_CHARS + THIRTY_CHARS;
+
+ private static final String NONINTERLEAVED =
+ "#MEGA\n"
+ + "TITLE: Noninterleaved sequence data\n\n"
+ + "#U455 \n"
+ + "ABCFEDHIJ\n"
+ + "MNOPQR\n\n"
+ + "#CPZANT \n"
+ + "KLMNOPWXYZ\n"
+ + "CGATC\n";
+
+ // Sequence length 60 (split over two lines)
+ private static final String NONINTERLEAVED_LONGERTHAN50 =
+ "#SIXTY\n" + THIRTY_CHARS + "\n" + THIRTY_CHARS;
+
+ // this one starts noninterleaved then switches to interleaved
+ private static final String MIXED =
+ "#MEGA\n"
+ + "TITLE: This is a mess\n\n" + "#CPZANT KLMNOPWXYZCGATC\n\n"
+ + "#U455\n "
+ + "ABCFEDHIJ\n";
+
+ // interleaved with a new sequence appearing in the second block :-O
+ private static final String INTERLEAVED_SEQUENCE_ERROR =
+ "#MEGA" + "\n"
+ + "TITLE: Interleaved sequence data\n\n"
+ + "#U455 ABCDEF\n"
+ + "#CPZANT MNOPQR\n\n"
+ + "#U456 KLMNOP\n";
+
+ // the 'fancy' format, different header format, bases in triplet groups
+ private static final String FANCY_FORMAT =
+ "#MEGA\n"
+ + "!Title Fancy format data;\n"
+ + "!Format DataType=DNA indel=- CodeTable=Standard;\n\n"
+ + "!Description\n"
+ + " Line one of description\n"
+ + " Line two of description;\n\n"
+ + "!Gene=Adh Property=Coding CodonStart=1;\n"
+ + "#U455 ABC DEF\n"
+ + "#CPZANT MNO PQR\n\n"
+ + "#U455 KLM NOP\n"
+ + "#CPZANT WXY Z\n";
+
+ // interleaved sequence data for two genes
+ private static final String TWO_GENES =
+ "#MEGA\n"
+ + "!Title Fancy format data;\n"
+ + "!Format DataType=DNA indel=- CodeTable=Standard;\n\n"
+ + "!Description\n"
+ + " Line one of description\n"
+ + " Line two of description;\n\n"
+ + "!Gene=Adh Property=Coding CodonStart=1;\n"
+ + "#U455 ABC DEF\n"
+ + "#CPZANT MNO PQR\n\n"
+ + "#U455 KLM NOP\n"
+ + "#CPZANT WXY Z\n"; //TODO complete
+
+ //@formatter:on
+
+ /**
+ * Test paste of interleaved mega format data.
+ *
+ * @throws IOException
+ */
+ @Test(groups = { "Functional" })
+ public void testParse_interleaved() throws IOException
+ {
+ MegaFile testee = new MegaFile(INTERLEAVED, AppletFormatAdapter.PASTE);
+ assertEquals("Title not as expected", "Interleaved sequence data",
+ testee.getAlignmentProperty(MegaFile.PROP_TITLE));
+ Vector<SequenceI> seqs = testee.getSeqs();
+ // should be 2 sequences
+ assertEquals("Expected two sequences", 2, seqs.size());
+ // check sequence names correct and order preserved
+ assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
+ assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
+ .getName());
+ // check sequence data
+ assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0)
+ .getSequenceAsString());
+ assertEquals("Second sequence data wrong", "MNOPQRWXYZ", seqs.get(1)
+ .getSequenceAsString());
+ assertTrue("File format is not flagged as interleaved",
+ testee.isInterleaved());
+ }
+
+ /**
+ * Test paste of noninterleaved mega format data.
+ *
+ * @throws IOException
+ */
+ @Test(groups = { "Functional" })
+ public void testParse_nonInterleaved() throws IOException
+ {
+ MegaFile testee = new MegaFile(NONINTERLEAVED,
+ AppletFormatAdapter.PASTE);
+ assertEquals("Title not as expected", "Noninterleaved sequence data",
+ testee.getAlignmentProperty(MegaFile.PROP_TITLE));
+ Vector<SequenceI> seqs = testee.getSeqs();
+ // should be 2 sequences
+ assertEquals("Expected two sequences", 2, seqs.size());
+ // check sequence names correct and order preserved
+ assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
+ assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
+ .getName());
+ // check sequence data
+ assertEquals("First sequence data wrong", "ABCFEDHIJMNOPQR", seqs
+ .get(0).getSequenceAsString());
+ assertEquals("Second sequence data wrong", "KLMNOPWXYZCGATC",
+ seqs.get(1).getSequenceAsString());
+ assertFalse("File format is not flagged as noninterleaved",
+ testee.isInterleaved());
+ }
+
+ /**
+ * Test parsing an interleaved file with an extra sequence appearing after the
+ * first block - should fail.
+ */
+ @Test(groups = { "Functional" })
+ public void testParse_interleavedExtraSequenceError()
+ {
+ try
+ {
+ new MegaFile(INTERLEAVED_SEQUENCE_ERROR, AppletFormatAdapter.PASTE);
+ fail("Expected extra sequence IOException");
+ } catch (IOException e)
+ {
+ assertEquals(
+ "Unexpected exception message",
+ "Parse error: misplaced new sequence starting at #U456 KLMNOP",
+ e.getMessage());
+ }
+ }
+
+ /**
+ * Test a mixed up file.
+ */
+ @Test(groups = { "Functional" })
+ public void testParse_mixedInterleavedNonInterleaved()
+ {
+ try
+ {
+ new MegaFile(MIXED, AppletFormatAdapter.PASTE);
+ fail("Expected mixed content exception");
+ } catch (IOException e)
+ {
+ assertEquals(
+ "Unexpected exception message",
+ "Parse error: mix of interleaved and noninterleaved detected, at line: ABCFEDHIJ",
+ e.getMessage());
+ }
+
+ }
+
+ @Test(groups = { "Functional" })
+ public void testGetSequenceId()
+ {
+ assertEquals("AB123", MegaFile.getSequenceId("#AB123 CGATC"));
+ assertEquals("AB123", MegaFile.getSequenceId("#AB123 CGATC"));
+ assertEquals("AB123", MegaFile.getSequenceId("#AB123 CGC TAC"));
+ assertEquals("AB123", MegaFile.getSequenceId("#AB123"));
+ assertNull(MegaFile.getSequenceId("AB123 CTAG"));
+ assertNull(MegaFile.getSequenceId("AB123"));
+ assertNull(MegaFile.getSequenceId(""));
+ assertNull(MegaFile.getSequenceId(null));
+ }
+
+ @Test(groups = { "Functional" })
+ public void testGetMaxIdLength()
+ {
+ SequenceI[] seqs = new Sequence[2];
+ seqs[0] = new Sequence("Something", "GCATAC");
+ seqs[1] = new Sequence("SomethingElse", "GCATAC");
+ assertEquals(13, MegaFile.getMaxIdLength(seqs));
+ seqs[1] = new Sequence("DNA", "GCATAC");
+ assertEquals(9, MegaFile.getMaxIdLength(seqs));
+ }
+
+ @Test(groups = { "Functional" })
+ public void testGetMaxSequenceLength()
+ {
+ SequenceI[] seqs = new Sequence[2];
+ seqs[0] = new Sequence("Seq1", "GCATAC");
+ seqs[1] = new Sequence("Seq2", "GCATACTAG");
+ assertEquals(9, MegaFile.getMaxSequenceLength(seqs));
+ seqs[1] = new Sequence("Seq2", "GCA");
+ assertEquals(6, MegaFile.getMaxSequenceLength(seqs));
+ }
+
+ /**
+ * Test (parse and) print of interleaved mega format data.
+ *
+ * @throws IOException
+ */
+ @Test(groups = { "Functional" })
+ public void testPrint_interleaved() throws IOException
+ {
+ MegaFile testee = new MegaFile(INTERLEAVED, AppletFormatAdapter.PASTE);
+ String printed = testee.print();
+ System.out.println(printed);
+ // normally output should match input
+ // we cheated here with a number of short input lines
+ String expected = "#MEGA\n" + "!TITLE Interleaved sequence data;\n\n"
+ + "#U455 ABCDEFKLMNOP\n" + "#CPZANT MNOPQRWXYZ"
+ + "\n";
+ assertEquals("Print format wrong", expected, printed);
+ }
+
+ /**
+ * Test (parse and) print of interleaved data with no headers (acceptable).
+ *
+ * @throws IOException
+ */
+ @Test(groups = { "Functional" })
+ public void testPrint_interleavedNoHeaders() throws IOException
+ {
+ MegaFile testee = new MegaFile(INTERLEAVED_NOHEADERS,
+ AppletFormatAdapter.PASTE);
+ String printed = testee.print();
+ System.out.println(printed);
+ // normally output should match input
+ // we cheated here with a number of short input lines
+ String expected = "#MEGA\n\n" + "#U455 ABCDEFKLMNOP" + "\n"
+ + "#CPZANT MNOPQRWXYZ\n";
+ assertEquals("Print format wrong", expected, printed);
+ }
+
+ /**
+ * Test (parse and) print of noninterleaved mega format data.
+ *
+ * @throws IOException
+ */
+ @Test(groups = { "Functional" })
+ public void testPrint_noninterleaved() throws IOException
+ {
+ MegaFile testee = new MegaFile(NONINTERLEAVED,
+ AppletFormatAdapter.PASTE);
+ String printed = testee.print();
+ System.out.println(printed);
+ // normally output should match input
+ // we cheated here with a number of short input lines
+ String expected = "#MEGA\n"
+ + "!TITLE Noninterleaved sequence data;\n\n"
+ + "#U455\n" + "ABCFEDHIJMNOPQR\n\n" + "#CPZANT\n"
+ + "KLMNOPWXYZCGATC\n";
+ assertEquals("Print format wrong", expected, printed);
+ }
+
+ /**
+ * Test (parse and) print of interleaved mega format data extending to more
+ * than one line of output.
+ *
+ * @throws IOException
+ */
+ @Test(groups = { "Functional" })
+ public void testPrint_interleavedMultiLine() throws IOException
+ {
+ MegaFile testee = new MegaFile(INTERLEAVED_LONGERTHAN50,
+ AppletFormatAdapter.PASTE);
+ String printed = testee.print();
+ System.out.println(printed);
+ // first sequence is length 60, second length 120
+ // should be output as 50 + 10 + 0 and as 50 + 50 + 20 character lines
+ // respectively
+ String expected = "#MEGA\n" + "!TITLE Interleaved sequence data;\n\n"
+ + "#U455 " + THIRTY_CHARS + "01234567890123456789\n"
+ + "#CPZANT " + THIRTY_CHARS + "01234567890123456789\n" + "\n"
+ + "#U455 " + "0123456789\n" + "#CPZANT " + THIRTY_CHARS
+ + "01234567890123456789\n\n" + "#U455 \n" + "#CPZANT "
+ + "01234567890123456789"
+ + "\n";
+ assertEquals("Print format wrong", expected, printed);
+ }
+
+ /**
+ * Test (parse and) print of noninterleaved mega format data extending to more
+ * than one line of output.
+ *
+ * @throws IOException
+ */
+ @Test(groups = { "Functional" })
+ public void testPrint_noninterleavedMultiLine() throws IOException
+ {
+ MegaFile testee = new MegaFile(NONINTERLEAVED_LONGERTHAN50,
+ AppletFormatAdapter.PASTE);
+ String printed = testee.print();
+ System.out.println(printed);
+ // 60 character sequence should be output as 50 on first line then 10 more
+ String expected = "#MEGA\n\n" + "#SIXTY\n" + THIRTY_CHARS
+ + "01234567890123456789\n" + "0123456789\n";
+ assertEquals("Print format wrong", expected, printed);
+ }
+
+ /**
+ * Test paste / parse of 'fancy format' data.
+ *
+ * @throws IOException
+ */
+ @Test(groups = { "Functional" })
+ public void testParse_fancyFormat() throws IOException
+ {
+ MegaFile testee = new MegaFile(FANCY_FORMAT, AppletFormatAdapter.PASTE);
+ assertEquals("Title not as expected", "Fancy format data",
+ testee.getAlignmentProperty(MegaFile.PROP_TITLE));
+
+ // assertEquals("Format property not parsed",
+ // "DataType=DNA indel=- CodeTable=Standard;",
+ // testee.getAlignmentProperty(MegaFile.PROP_FORMAT));
+ Vector<SequenceI> seqs = testee.getSeqs();
+ // should be 2 sequences
+ assertEquals("Expected two sequences", 2, seqs.size());
+ // check sequence names correct and order preserved
+ assertEquals("First sequence id wrong", "U455", seqs.get(0).getName());
+ assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
+ .getName());
+ // check sequence data
+ assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0)
+ .getSequenceAsString());
+ assertEquals("Second sequence data wrong", "MNOPQRWXYZ", seqs.get(1)
+ .getSequenceAsString());
+ assertTrue("File format is not flagged as interleaved",
+ testee.isInterleaved());
+
+ assertEquals("Description property not parsed",
+ " Line one of description\n"
+ + " Line two of description\n",
+ testee.getAlignmentProperty(MegaFile.PROP_DESCRIPTION));
+ }
+
+ @Test(groups = { "Functional" })
+ public void testGetNonCommentContent() throws FileFormatException
+ {
+ assertEquals("abcde", MegaFile.getNonCommentContent("abcde", 0));
+ assertEquals("CGT ACG GAC ",
+ MegaFile.getNonCommentContent("CGT ACG GAC [9]", 0));
+ assertEquals("", MegaFile.getNonCommentContent("abcde", 1));
+ assertEquals(" abcde",
+ MegaFile.getNonCommentContent("and others ] abcde", 1));
+ assertEquals(" abcde", MegaFile.getNonCommentContent(
+ "and others [including refs] ] abcde", 1));
+ assertEquals(" x ] abcde",
+ MegaFile.getNonCommentContent("and others ] x ] abcde", 1));
+ }
+
+ @Test(groups = { "Functional" })
+ public void testCommentDepth() throws FileFormatException
+ {
+ assertEquals(0, MegaFile.commentDepth("abcde", 0));
+ assertEquals(1, MegaFile.commentDepth("abc[de", 0));
+ assertEquals(3, MegaFile.commentDepth("ab[c[de", 1));
+ assertEquals(1, MegaFile.commentDepth("ab]c[d]e[f", 1));
+ assertEquals(0, MegaFile.commentDepth("a]b[c]d]e", 1));
+ }
+
+ @Test(groups = { "Functional" })
+ public void testGetValue()
+ {
+ assertEquals("Mega", MegaFile.getValue("Name=Mega"));
+ assertEquals("Mega", MegaFile.getValue("Name =Mega"));
+ assertEquals("Mega", MegaFile.getValue(" Name = Mega "));
+ assertEquals("Mega", MegaFile.getValue("Name = Mega; "));
+ assertEquals("Mega", MegaFile.getValue(" Name = Mega ; "));
+ assertEquals("Mega", MegaFile.getValue("\t!Name \t= \tMega ; "));
+ assertEquals("Mega", MegaFile.getValue("!Name \t\t Mega; "));
+ assertEquals("", MegaFile.getValue("Name"));
+ }
+}