From 7c91e006a3fa55f0fe9110bf3f18a6176c6ce457 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Tue, 29 Sep 2015 16:47:49 +0100 Subject: [PATCH] JAL-1499 first pass update of parsing and tests, more to come --- src/jalview/io/AlignFile.java | 24 +- src/jalview/io/AppletFormatAdapter.java | 30 +- src/jalview/io/FileFormatException.java | 12 + src/jalview/io/IdentifyFile.java | 5 + src/jalview/io/JalviewFileChooser.java | 22 +- src/jalview/io/MegaFile.java | 1048 +++++++++++++++++++++++++++++++ test/jalview/io/IdentifyFileTest.java | 1 + test/jalview/io/MegaFileTest.java | 413 ++++++++++++ 8 files changed, 1533 insertions(+), 22 deletions(-) create mode 100644 src/jalview/io/FileFormatException.java create mode 100644 src/jalview/io/MegaFile.java create mode 100644 test/jalview/io/MegaFileTest.java diff --git a/src/jalview/io/AlignFile.java b/src/jalview/io/AlignFile.java index 7e0cabd..2c42de0 100755 --- a/src/jalview/io/AlignFile.java +++ b/src/jalview/io/AlignFile.java @@ -32,6 +32,8 @@ import java.util.ArrayList; import java.util.Enumeration; import java.util.Hashtable; import java.util.List; +import java.util.Map; +import java.util.Set; import java.util.Vector; /** @@ -64,7 +66,7 @@ public abstract class AlignFile extends FileParse /** * Properties to be added to generated alignment object */ - protected Hashtable properties; + protected Hashtable properties; long start; @@ -249,8 +251,8 @@ public abstract class AlignFile extends FileParse { if (properties != null && properties.size() > 0) { - Enumeration keys = properties.keys(); - Enumeration vals = properties.elements(); + Enumeration keys = properties.keys(); + Enumeration vals = properties.elements(); while (keys.hasMoreElements()) { al.setProperty(keys.nextElement(), vals.nextElement()); @@ -268,7 +270,7 @@ public abstract class AlignFile extends FileParse * @param value * - non-null value */ - protected void setAlignmentProperty(Object key, Object value) + protected void setAlignmentProperty(String key, String value) { if (key == null) { @@ -282,12 +284,22 @@ public abstract class AlignFile extends FileParse } if (properties == null) { - properties = new Hashtable(); + properties = new Hashtable(); } properties.put(key, value); } - protected Object getAlignmentProperty(Object key) + /** + * Return the alignment properties (or null if none set) + * + * @return + */ + protected Set> getAlignmentProperties() + { + return (this.properties == null ? null : this.properties.entrySet()); + } + + protected String getAlignmentProperty(String key) { if (properties != null && key != null) { diff --git a/src/jalview/io/AppletFormatAdapter.java b/src/jalview/io/AppletFormatAdapter.java index 239c531..d2d607b 100755 --- a/src/jalview/io/AppletFormatAdapter.java +++ b/src/jalview/io/AppletFormatAdapter.java @@ -84,7 +84,8 @@ public class AppletFormatAdapter * List of valid format strings used in the isValidFormat method */ public static final String[] READABLE_FORMATS = new String[] { "BLC", - "CLUSTAL", "FASTA", "MSF", "PileUp", "PIR", "PFAM", "STH", "PDB", + "CLUSTAL", "FASTA", "MEGA", "MSF", "PileUp", "PIR", "PFAM", "STH", + "PDB", "JnetFile", "RNAML", PhylipFile.FILE_DESC, JSONFile.FILE_DESC, IdentifyFile.GFF3File, "HTML" }; @@ -93,7 +94,8 @@ public class AppletFormatAdapter * corresponding to READABLE_FNAMES */ public static final String[] READABLE_EXTENSIONS = new String[] { - "fa, fasta, mfa, fastq", "aln", "pfam", "msf", "pir", "blc", "amsa", + "fa, fasta, mfa, fastq", "aln", "pfam", "meg", "msf", "pir", "blc", + "amsa", "sto,stk", "xml,rnaml", PhylipFile.FILE_EXT, JSONFile.FILE_EXT, ".gff2,gff3", "jar,jvp", HtmlFile.FILE_EXT }; @@ -102,7 +104,8 @@ public class AppletFormatAdapter * READABLE_EXTENSIONS */ public static final String[] READABLE_FNAMES = new String[] { "Fasta", - "Clustal", "PFAM", "MSF", "PIR", "BLC", "AMSA", "Stockholm", "RNAML", + "Clustal", "PFAM", "MEGA", "MSF", "PIR", "BLC", "AMSA", "Stockholm", + "RNAML", PhylipFile.FILE_DESC, JSONFile.FILE_DESC, IdentifyFile.GFF3File, "Jalview", HtmlFile.FILE_DESC }; @@ -111,7 +114,8 @@ public class AppletFormatAdapter * method */ public static final String[] WRITEABLE_FORMATS = new String[] { "BLC", - "CLUSTAL", "FASTA", "MSF", "PileUp", "PIR", "PFAM", "AMSA", "STH", + "CLUSTAL", "FASTA", "MEGA", "MSF", "PileUp", "PIR", "PFAM", "AMSA", + "STH", PhylipFile.FILE_DESC, JSONFile.FILE_DESC }; /** @@ -119,7 +123,8 @@ public class AppletFormatAdapter * that are writable by the application. */ public static final String[] WRITABLE_EXTENSIONS = new String[] { - "fa, fasta, mfa, fastq", "aln", "pfam", "msf", "pir", "blc", "amsa", + "fa, fasta, mfa, fastq", "aln", "pfam", "meg", "msf", "pir", "blc", + "amsa", "sto,stk", PhylipFile.FILE_EXT, JSONFile.FILE_EXT, "jvp" }; /** @@ -127,7 +132,7 @@ public class AppletFormatAdapter * WRITABLE_EXTENSIONS list of formats. */ public static final String[] WRITABLE_FNAMES = new String[] { "Fasta", - "Clustal", "PFAM", "MSF", "PIR", "BLC", "AMSA", "STH", + "Clustal", "PFAM", "MEGA", "MSF", "PIR", "BLC", "AMSA", "STH", PhylipFile.FILE_DESC, JSONFile.FILE_DESC, "Jalview" }; public static String INVALID_CHARACTERS = "Contains invalid characters"; @@ -306,6 +311,10 @@ public class AppletFormatAdapter { alignFile = new RnamlFile(inFile, type); } + else if (format.equals("MEGA")) + { + alignFile = new MegaFile(inFile, type); + } else if (format.equals(IdentifyFile.GFF3File)) { alignFile = new Gff3File(inFile, type); @@ -418,6 +427,10 @@ public class AppletFormatAdapter { alignFile = new RnamlFile(source); } + else if (format.equals("MEGA")) + { + alignFile = new MegaFile(source); + } else if (format.equals("SimpleBLAST")) { alignFile = new SimpleBlastFile(source); @@ -598,7 +611,10 @@ public class AppletFormatAdapter { afile = new RnamlFile(); } - + else if (format.equalsIgnoreCase("MEGA")) + { + afile = new MegaFile(); + } else { throw new Exception( diff --git a/src/jalview/io/FileFormatException.java b/src/jalview/io/FileFormatException.java new file mode 100644 index 0000000..761099b --- /dev/null +++ b/src/jalview/io/FileFormatException.java @@ -0,0 +1,12 @@ +package jalview.io; + +import java.io.IOException; + +@SuppressWarnings("serial") +public class FileFormatException extends IOException +{ + public FileFormatException(String msg) + { + super(msg); + } +} diff --git a/src/jalview/io/IdentifyFile.java b/src/jalview/io/IdentifyFile.java index aec0540..ef96123 100755 --- a/src/jalview/io/IdentifyFile.java +++ b/src/jalview/io/IdentifyFile.java @@ -132,6 +132,11 @@ public class IdentifyFile } data = data.toUpperCase(); + if (data.startsWith("#MEGA")) + { + reply = "MEGA"; + break; + } if (data.startsWith("##GFF-VERSION")) { reply = GFF3File; diff --git a/src/jalview/io/JalviewFileChooser.java b/src/jalview/io/JalviewFileChooser.java index 1ea6795..212c482 100755 --- a/src/jalview/io/JalviewFileChooser.java +++ b/src/jalview/io/JalviewFileChooser.java @@ -125,8 +125,8 @@ public class JalviewFileChooser extends JFileChooser { if (getUI() instanceof javax.swing.plaf.basic.BasicFileChooserUI) { - final javax.swing.plaf.basic.BasicFileChooserUI ui = (javax.swing.plaf.basic.BasicFileChooserUI) getUI(); - final String name = ui.getFileName().trim(); + final javax.swing.plaf.basic.BasicFileChooserUI myui = (javax.swing.plaf.basic.BasicFileChooserUI) getUI(); + final String name = myui.getFileName().trim(); if ((name == null) || (name.length() == 0)) { @@ -138,10 +138,10 @@ public class JalviewFileChooser extends JFileChooser @Override public void run() { - String currentName = ui.getFileName(); + String currentName = myui.getFileName(); if ((currentName == null) || (currentName.length() == 0)) { - ui.setFileName(name); + myui.setFileName(name); } } }); @@ -190,6 +190,10 @@ public class JalviewFileChooser extends JFileChooser { format = "PFAM"; } + else if (format.toUpperCase().startsWith("MEGA")) + { + format = "MEGA"; + } else if (format.toUpperCase().startsWith(PhylipFile.FILE_DESC)) { format = PhylipFile.FILE_DESC; @@ -259,14 +263,14 @@ public class JalviewFileChooser extends JFileChooser class RecentlyOpened extends JPanel { - JList list; + JList list; public RecentlyOpened() { String historyItems = jalview.bin.Cache.getProperty("RECENT_FILE"); StringTokenizer st; - Vector recent = new Vector(); + Vector recent = new Vector(); if (historyItems != null) { @@ -274,11 +278,11 @@ public class JalviewFileChooser extends JFileChooser while (st.hasMoreTokens()) { - recent.addElement(st.nextElement()); + recent.addElement(st.nextToken()); } } - list = new JList(recent); + list = new JList(recent); DefaultListCellRenderer dlcr = new DefaultListCellRenderer(); dlcr.setHorizontalAlignment(DefaultListCellRenderer.RIGHT); @@ -304,7 +308,7 @@ public class JalviewFileChooser extends JFileChooser layout.putConstraint(SpringLayout.NORTH, scroller, 5, SpringLayout.NORTH, this); - if (new Platform().isAMac()) + if (Platform.isAMac()) { scroller.setPreferredSize(new Dimension(500, 100)); } diff --git a/src/jalview/io/MegaFile.java b/src/jalview/io/MegaFile.java new file mode 100644 index 0000000..90693f1 --- /dev/null +++ b/src/jalview/io/MegaFile.java @@ -0,0 +1,1048 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.0b1) + * Copyright (C) 2014 The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ +package jalview.io; + +import jalview.datamodel.AlignmentI; +import jalview.datamodel.Sequence; +import jalview.datamodel.SequenceI; + +import java.io.IOException; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; + +/** + * A parser for input or output of MEGA format files.
+ *
+ * Tamura K, Stecher G, Peterson D, Filipski A, and Kumar S (2013) MEGA6: + * Molecular Evolutionary Genetics Analysis Version 6.0. Molecular Biology and + * Evolution 30: 2725-2729.
+ *
+ * + * MEGA file format is supported as described in + * http://www.megasoftware.net/manual.pdf
+ * Limitations: + *
    + *
  • nested comments (marked by [ ]) are accepted but not preserved
  • + *
  • to be completed
  • + *
+ * + * @see http://www.megasoftware.net/ + */ +public class MegaFile extends AlignFile +{ + private static final char COMMENT_START = '['; + + private static final char COMMENT_END = ']'; + + private static final String HASHSIGN = "#"; + + private static final String SEMICOLON = ";"; + + private static final String BANG = "!"; + + private static final String EQUALS = "="; + + private static final String MEGA_ID = HASHSIGN + "MEGA"; + + private static final String TITLE = "TITLE"; + + private static final String FORMAT = "Format"; + + private static final String DESCRIPTION = "Description"; + + private static final String GENE = "Gene"; + + private static final String DOMAIN = "Domain"; + + private static final String INTERLEAVED = "Interleaved"; + + /* + * names of properties to save to the alignment (may affect eventual output + * format) + */ + static final String PROP_TITLE = "MEGA_TITLE"; + + static final String PROP_INTERLEAVED = "MEGA_INTERLEAVED"; + + static final String PROP_DESCRIPTION = "MEGA_DESCRIPTION"; + + static final String PROP_CODETABLE = "MEGA_CODETABLE"; + + static final String PROP_IDENTITY = "MEGA_IDENTITY"; + + static final String PROP_MISSING = "MEGA_MISSING"; + + // TODO: need a controlled name for Gene as a feature if we want to be able to + // output the MEGA file with !Gene headers + // WTF do we do if the sequences get realigned? + + // initial size for sequence data buffer + private static final int SEQBUFFERSIZE = 256; + + private static final String SPACE = " "; + + private static final int POSITIONS_PER_LINE = 50; + + private String title; + + // gap character may be explicitly declared, if not we infer it + private Character gapCharacter; + + // this can be True, False or null (meaning not asserted in file) + private Boolean nucleotide; + + // set once we have seen one block of interleaved data + private boolean firstDataBlockRead = false; + + // this can be True, False or null (meaning we don't know yet) + private Boolean interleaved; + + public MegaFile() + { + } + + public MegaFile(String inFile, String type) throws IOException + { + super(inFile, type); + } + + public MegaFile(FileParse source) throws IOException + { + super(source); + } + + /** + * Parse the input stream. + */ + @Override + public void parse() throws IOException + { + /* + * Read and process MEGA and Title/Format/Description headers if present. + * Returns the first data line following the headers. + */ + String dataLine = parseHeaderLines(); + + /* + * Temporary store of {sequenceId, positionData} while parsing interleaved + * sequences; sequences are maintained in the order in which they are added + * i.e. read in the file + */ + Map seqData = new LinkedHashMap(); + + /* + * The id of the sequence being read (for non-interleaved) + */ + String currentId = ""; + + while (dataLine != null) + { + dataLine = dataLine.trim(); + if (dataLine.length() > 0) + { + if (dataLine.startsWith(BANG + GENE)) + { + parseGene(dataLine); + } + else if (dataLine.startsWith(BANG + DOMAIN)) + { + parseDomain(dataLine); + } + else + { + currentId = parseDataLine(dataLine, seqData, currentId); + } + } + else if (!seqData.isEmpty()) + { + /* + * Blank line after processing some data... + */ + this.firstDataBlockRead = true; + } + dataLine = nextNonCommentLine(); + } + + setSequences(seqData); + } + + /** + * Parse a !Gene command line + * + * @param dataLine + */ + protected void parseGene(String dataLine) + { + } + + /** + * Parse a !Domain command line + * + * @param dataLine + */ + private void parseDomain(String dataLine) + { + } + + /** + * Returns the next line that is not a comment, or null at end of file. + * Comments in MEGA are within [ ] brackets, and may be nested. + * + * @return + * @throws IOException + */ + protected String nextNonCommentLine() throws IOException + { + return nextNonCommentLine(0); + } + + /** + * Returns the next line that is not a comment, or null at end of file. + * Comments in MEGA are within [ ] brackets, and may be nested. + * + * @param depth + * current depth of nesting of comments while parsing + * @return + * @throws IOException + */ + protected String nextNonCommentLine(final int depth) throws IOException + { + String data = null; + data = nextLine(); + if (data == null) + { + if (depth > 0) + { + System.err.println("Warning: unterminated comment in data file"); + } + return data; + } + int leftBracket = data.indexOf(COMMENT_START); + + /* + * reject unnested comment following data on the same line + */ + if (depth == 0 && leftBracket > 0) + { + throw new FileFormatException( + "Can't parse comment following data at " + data); + } + + /* + * If we are in a (possibly nested) comment after parsing this line, keep + * reading recursively until the comment has unwound + */ + int newDepth = commentDepth(data, depth); + if (newDepth > 0) + { + return nextNonCommentLine(newDepth); + } + else + { + /* + * not in a comment by end of this line; return what is left (or the next + * line if that is empty) + */ + String nonCommentPart = getNonCommentContent(data, depth); + // if (nonCommentPart.length() > 0) + // { + return nonCommentPart; + // } + // return nextNonCommentLine(0); + } + } + + /** + * Returns what is left of the input data after removing any comments, whether + * 'in progress' from preceding lines, or embedded in the current line + * + * @param data + * input data + * @param depth + * nested depth of comments pending termination + * @return + * @throws FileFormatException + */ + protected static String getNonCommentContent(String data, int depth) + throws FileFormatException + { + int len = data.length(); + StringBuilder result = new StringBuilder(len); + for (int i = 0; i < len; i++) + { + char c = data.charAt(i); + switch (c) + { + case COMMENT_START: + depth++; + break; + + case COMMENT_END: + if (depth > 0) + { + depth--; + } + else + { + result.append(c); + } + break; + + default: + if (depth == 0) + { + result.append(c); + } + } + } + return result.toString(); + } + + /** + * Calculates new depth of comment after parsing an input line i.e. the excess + * of opening '[' over closing ']' characters. Any excess ']' are ignored (not + * treated as comment delimiters). + * + * @param data + * input line + * @param depth + * current comment nested depth before parsing the line + * @return new depth after parsing the line + */ + protected static int commentDepth(CharSequence data, int depth) + { + int newDepth = depth; + int len = data.length(); + for (int i = 0; i < len; i++) + { + char c = data.charAt(i); + if (c == COMMENT_START) + { + newDepth++; + } + else if (c == COMMENT_END && newDepth > 0) + { + newDepth--; + } + } + return newDepth; + } + + /** + * Convert the parsed sequence strings to objects and store them in the model. + * + * @param seqData + */ + protected void setSequences(Map seqData) + { + Set> datasets = seqData.entrySet(); + + for (Entry dataset : datasets) + { + String sequenceId = dataset.getKey(); + StringBuilder characters = dataset.getValue(); + SequenceI s = new Sequence(sequenceId, new String(characters)); + this.seqs.addElement(s); + } + } + + /** + * Process one line of sequence data. If it has no sequence identifier, append + * to the current id's sequence. Else parse out the sequence id and append the + * data (if any) to that id's sequence. Returns the sequence id (implicit or + * explicit) for this line. + * + * @param dataLine + * @param seqData + * @param currentid + * @return + * @throws IOException + */ + protected String parseDataLine(String dataLine, + Map seqData, String currentId) + throws IOException + { + String seqId = getSequenceId(dataLine); + if (seqId == null) + { + /* + * Just character data + */ + parseNoninterleavedDataLine(dataLine, seqData, currentId); + return currentId; + } + else if ((HASHSIGN + seqId).trim().equals(dataLine.trim())) + { + /* + * Sequence id only - header line for noninterleaved data + */ + return seqId; + } + else + { + /* + * Sequence id followed by data + */ + parseInterleavedDataLine(dataLine, seqData, seqId); + return seqId; + } + } + + /** + * Add a line of sequence data to the buffer for the given sequence id. Start + * a new one if we haven't seen it before. + * + * @param dataLine + * @param seqData + * @param currentId + * @throws IOException + */ + protected void parseNoninterleavedDataLine(String dataLine, + Map seqData, String currentId) + throws IOException + { + if (currentId == null) + { + /* + * Oops. Data but no sequence id context. + */ + throw new IOException("No sequence id context at: " + dataLine); + } + + assertInterleaved(false, dataLine); + + StringBuilder sb = getSequenceDataBuffer(seqData, currentId); + + /* + * Add the current line of data to the sequence. + */ + sb.append(dataLine); + } + + /** + * Get the sequence data for this sequence id, starting a new one if + * necessary. + * + * @param seqData + * @param currentId + * @return + */ + protected StringBuilder getSequenceDataBuffer( + Map seqData, String currentId) + { + StringBuilder sb = seqData.get(currentId); + if (sb == null) + { + // first data met for this sequence id, start a new buffer + sb = new StringBuilder(SEQBUFFERSIZE); + seqData.put(currentId, sb); + } + return sb; + } + + /** + * Parse one line of interleaved data e.g. + * + *
+   * #TheSeqId CGATCGCATGCA
+   * 
+ * + * @param dataLine + * @param seqData + * @param seqId + * @throws IOException + */ + protected void parseInterleavedDataLine(String dataLine, + Map seqData, String seqId) + throws IOException + { + /* + * New sequence found in second or later data block - error. + */ + if (this.firstDataBlockRead && !seqData.containsKey(seqId)) + { + throw new IOException( + "Parse error: misplaced new sequence starting at " + dataLine); + } + + StringBuilder sb = getSequenceDataBuffer(seqData, seqId); + String data = dataLine.substring(seqId.length() + 1).trim(); + + /* + * Do nothing if this line is _only_ a sequence id with no data following. + * + * Remove any internal spaces (present in the 'fancy' file format) + */ + if (data != null && data.length() > 0) + { + if (data.indexOf(SPACE) != -1) + { + data = data.replace(SPACE, ""); + } + sb.append(data); + assertInterleaved(true, dataLine); + } + } + + /** + * If the line begins with (e.g.) "#abcde " then returns "abcde" as the + * identifier. Else returns null. + * + * @param dataLine + * @return + */ + public static String getSequenceId(String dataLine) + { + // TODO refactor to a StringUtils type class + if (dataLine != null) + { + if (dataLine.startsWith(HASHSIGN)) + { + int spacePos = dataLine.indexOf(" "); + return (spacePos == -1 ? dataLine.substring(1) : dataLine + .substring(1, spacePos)); + } + } + return null; + } + + /** + * Read the #MEGA and Title/Format/Description header lines (if present). + * + * Save as alignment properties in case useful. + * + * @return the next non-blank line following the header lines. + * @throws IOException + */ + protected String parseHeaderLines() throws IOException + { + String inputLine = null; + while ((inputLine = nextNonCommentLine()) != null) + { + inputLine = inputLine.trim(); + + /* + * skip blank lines + */ + if (inputLine.length() == 0) + { + continue; + } + + if (inputLine.toUpperCase().startsWith(MEGA_ID)) + { + continue; + } + + if (isTitle(inputLine)) + { + setAlignmentProperty(PROP_TITLE, getValue(inputLine)); + } + else if (inputLine.startsWith(BANG + DESCRIPTION)) + { + parseDescription(inputLine); + } + + else if (inputLine.startsWith(BANG + FORMAT)) + { + parseFormat(inputLine); + } + else if (!inputLine.toUpperCase().startsWith(MEGA_ID)) + { + + /* + * Return the first 'data line' i.e. one that is not blank, #MEGA or + * TITLE: + */ + break; + } + } + return inputLine; + } + + /** + * Parse a !Format statement. This may be multiline, and is ended by a + * semicolon. + * + * @param inputLine + * @throws IOException + */ + protected void parseFormat(String inputLine) throws IOException + { + while (inputLine != null) + { + parseFormatLine(inputLine); + if (inputLine.endsWith(SEMICOLON)) + { + break; + } + inputLine = nextNonCommentLine(); + } + } + + /** + * Parse one line of a !Format statement. This may contain one or more + * keyword=value pairs. + * + * @param inputLine + * @throws FileFormatException + */ + protected void parseFormatLine(String inputLine) + throws FileFormatException + { + if (inputLine.startsWith(BANG + FORMAT)) + { + inputLine = inputLine.substring((BANG + FORMAT).length()); + } + if (inputLine.endsWith(SEMICOLON)) + { + inputLine = inputLine.substring(0, inputLine.length() - 1); + } + String[] tokens = inputLine.trim().split("\\s"); // any whitespace + for (String token : tokens) + { + parseFormatKeyword(token); + } + } + + /** + * Parse a Keyword=Value token. Possible keywords are + *
    + *
  • DataType= DNA, RNA, Nucleotide, Protein
  • + *
  • DataFormat= Interleaved, ?
  • + *
  • NSeqs= number of sequences (synonym NTaxa)
  • + *
  • NSites= number of bases / residues
  • + *
  • Property= Exon (or Coding), Intron (or Noncoding), End (of domain)
  • + *
  • Indel= gap character
  • + *
  • Identical= identity character (to first sequence) (synonym MatchChar)
  • + *
  • Missing= missing data character
  • + *
  • CodeTable= Standard, other (MEGA supports various)
  • + *
+ * + * @param token + * @throws FileFormatException + * if an unrecognised keyword or value is encountered + */ + protected void parseFormatKeyword(String token) + throws FileFormatException + { + String msg = "Unrecognised Format command: " + token; + String[] bits = token.split(EQUALS); + if (bits.length != 2) + { + throw new FileFormatException(msg); + } + String keyword = bits[0]; + String value = bits[1]; + + /* + * Jalview will work out whether nucleotide or not anyway + */ + if (keyword.equalsIgnoreCase("DataType")) + { + if (value.equalsIgnoreCase("DNA") || value.equalsIgnoreCase("RNA") + || value.equalsIgnoreCase("Nucleotide")) + { + this.nucleotide = true; + // alignment computes whether or not it is nucleotide when created + } + else if (value.equalsIgnoreCase("Protein")) + { + this.nucleotide = false; + } + else + { + throw new FileFormatException(msg); + } + } + + /* + * accept non-Standard code table but save in case we want to disable + * 'translate as cDNA' + */ + else if (keyword.equalsIgnoreCase("CodeTable")) + { + setAlignmentProperty(PROP_CODETABLE, value); + } + + /* + * save gap char to set later on alignment once created + */ + else if (keyword.equalsIgnoreCase("Indel")) + { + this.gapCharacter = value.charAt(0); + } + + else if (keyword.equalsIgnoreCase("Identical") + || keyword.equalsIgnoreCase("MatchChar")) + { + if (!".".equals(value)) + { + setAlignmentProperty(PROP_IDENTITY, value); + System.err.println("Warning: " + token + + " not supported, Jalview uses '.' for identity"); + } + } + + else if (keyword.equalsIgnoreCase("Missing")) + { + setAlignmentProperty(PROP_MISSING, value); + System.err.println("Warning: " + token + " not supported"); + } + + else if (keyword.equalsIgnoreCase("Property")) + { + // TODO: figure out what to do with this + // can it appear more than once in a file? + setAlignmentProperty(PROP_MISSING, value); + } + + else if (!keyword.equalsIgnoreCase("NSeqs") + && !keyword.equalsIgnoreCase("NSites")) + { + System.err.println("Warning: " + msg); + } + } + + /** + * Returns the trimmed data on the line following either whitespace or '=', + * with any trailing semi-colon removed
+ * So + *
    + *
  • Hello World
  • + *
  • !Hello: \tWorld;
  • + *
  • !Hello=World
  • + *
      + * should all return "World" + * + * @param inputLine + * @return + */ + protected static String getValue(String inputLine) + { + if (inputLine == null) + { + return null; + } + String value = null; + String s = inputLine.replaceAll("\t", " ").trim(); + + /* + * KEYWORD = VALUE should return VALUE + */ + int equalsPos = s.indexOf("="); + if (equalsPos >= 0) + { + value = s.substring(equalsPos + 1); + } + else + { + int spacePos = s.indexOf(' '); + value = spacePos == -1 ? "" : s.substring(spacePos + 1); + } + value = value.trim(); + if (value.endsWith(SEMICOLON)) + { + value = value.substring(0, value.length() - 1).trim(); + } + return value; + } + + /** + * Returns true if the input line starts with "TITLE" or "!TITLE" (not case + * sensitive). The latter is the official format, some older data file + * examples have it without the !. + * + * @param inputLine + * @return + */ + protected static boolean isTitle(String inputLine) + { + if (inputLine == null) + { + return false; + } + String upper = inputLine.toUpperCase(); + return (upper.startsWith(TITLE) || upper.startsWith(BANG + TITLE)); + } + + /** + * Reads lines until terminated by semicolon, appending each to the + * Description property value. + * + * @throws IOException + */ + protected void parseDescription(String firstDescriptionLine) + throws IOException + { + StringBuilder desc = new StringBuilder(256); + String line = getValue(firstDescriptionLine); + while (line != null) + { + if (line.endsWith(SEMICOLON)) + { + desc.append(line.substring(0, line.length() - 1)).append(newline); + break; + } + else if (line.length() > 0) + { + desc.append(line).append(newline); + } + line = nextNonCommentLine(); + } + setAlignmentProperty(PROP_DESCRIPTION, desc.toString()); + } + + /** + * Write out the alignment sequences in Mega format. + */ + @Override + public String print() + { + return print(getSeqsAsArray()); + } + + /** + * Write out the alignment sequences in Mega format - interleaved unless + * explicitly noninterleaved. + */ + public String print(SequenceI[] s) + { + // TODO: is there a way to preserve the 'interleaved' property so it can + // affect output? + + String result = null; + if (this.interleaved != null && !this.interleaved) + { + result = printNonInterleaved(s); + } + else + { + result = printInterleaved(s); + } + return result; + } + + /** + * Print the sequences in interleaved format, each row 15 space-separated + * triplets. + * + * @param s + * @return + */ + protected String printInterleavedCodons(SequenceI[] s) + { + // TODO not coded yet - defaulting to the 'simple' format output + return printInterleaved(s); + } + + /** + * Print to string in Interleaved format - blocks of next 50 characters of + * each sequence in turn. + * + * @param s + */ + protected String printInterleaved(SequenceI[] s) + { + int maxIdLength = getMaxIdLength(s); + int maxSequenceLength = getMaxSequenceLength(s); + int numLines = maxSequenceLength / POSITIONS_PER_LINE + 3; // approx + + /* + * Size a buffer to hold the whole output + */ + StringBuilder sb = new StringBuilder(numLines + * (maxIdLength + 2 + POSITIONS_PER_LINE)); + printHeaders(sb); + + int numDataBlocks = (maxSequenceLength - 1) / POSITIONS_PER_LINE + 1; + for (int i = 0; i < numDataBlocks; i++) + { + sb.append(newline); + for (SequenceI seq : s) + { + + String seqId = String.format("#%-" + maxIdLength + "s ", + seq.getName()); + char[] subSequence = seq.getSequence(i * POSITIONS_PER_LINE, + (i + 1) * POSITIONS_PER_LINE); + sb.append(seqId); + sb.append(subSequence); + sb.append(newline); + } + } + + return new String(sb); + } + + /** + * Append the MEGA header and any other known properties + * + * @param sb + */ + private void printHeaders(StringBuilder sb) + { + sb.append(MEGA_ID); + sb.append(newline); + + String ttle = getAlignmentProperty(PROP_TITLE); + if (ttle != null) + { + sb.append(BANG).append(TITLE).append(SPACE).append(ttle) + .append(SEMICOLON).append(newline); + } + + String desc = getAlignmentProperty(PROP_DESCRIPTION); + if (desc != null) + { + sb.append(BANG).append(DESCRIPTION).append(SPACE).append(desc) + .append(SEMICOLON).append(newline); + } + } + + /** + * Get the longest sequence id (to allow aligned printout). + * + * @param s + * @return + */ + protected static int getMaxIdLength(SequenceI[] s) + { + // TODO pull up for reuse + int maxLength = 0; + for (SequenceI seq : s) + { + int len = seq.getName().length(); + if (len > maxLength) + { + maxLength = len; + } + } + return maxLength; + } + + /** + * Get the longest sequence length + * + * @param s + * @return + */ + protected static int getMaxSequenceLength(SequenceI[] s) + { + // TODO pull up for reuse + int maxLength = 0; + for (SequenceI seq : s) + { + int len = seq.getLength(); + if (len > maxLength) + { + maxLength = len; + } + } + return maxLength; + } + + /** + * Print to string in noninterleaved format - all of each sequence in turn, in + * blocks of 50 characters. + * + * @param s + * @return + */ + protected String printNonInterleaved(SequenceI[] s) + { + int maxSequenceLength = getMaxSequenceLength(s); + // approx + int numLines = maxSequenceLength / POSITIONS_PER_LINE + 2 + s.length; + + /* + * Roughly size a buffer to hold the whole output + */ + StringBuilder sb = new StringBuilder(numLines * POSITIONS_PER_LINE); + printHeaders(sb); + + for (SequenceI seq : s) + { + sb.append(newline); + sb.append(HASHSIGN + seq.getName()).append(newline); + int startPos = 0; + while (startPos <= seq.getLength()) + { + char[] subSequence = seq.getSequence(startPos, startPos + + POSITIONS_PER_LINE); + sb.append(subSequence); + sb.append(newline); + startPos += POSITIONS_PER_LINE; + } + } + + return new String(sb); + } + + /** + * Flag this file as interleaved or not, based on data format. Throws an + * exception if has previously been determined to be otherwise. + * + * @param isIt + * @param dataLine + * @throws IOException + */ + protected void assertInterleaved(boolean isIt, String dataLine) + throws IOException + { + if (this.interleaved != null && isIt != this.interleaved.booleanValue()) + { + throw new IOException( + "Parse error: mix of interleaved and noninterleaved detected, at line: " + + dataLine); + } + this.interleaved = new Boolean(isIt); + } + + public boolean isInterleaved() + { + return this.interleaved == null ? false : this.interleaved + .booleanValue(); + } + + /** + * Adds saved parsed values either as alignment properties, or (in some cases) + * as specific member fields of the alignment + */ + @Override + public void addProperties(AlignmentI al) + { + super.addProperties(al); + if (this.gapCharacter != null) + { + al.setGapCharacter(gapCharacter); + } + + /* + * warn if e.g. DataType=DNA but data is protein (or vice versa) + */ + if (this.nucleotide != null && this.nucleotide != al.isNucleotide()) { + System.err.println("Warning: " + this.title + " declared " + + (nucleotide ? "" : " not ") + "nucleotide but it is" + + (nucleotide ? " not" : "")); + } + } +} diff --git a/test/jalview/io/IdentifyFileTest.java b/test/jalview/io/IdentifyFileTest.java index c958ff0..5a0bb29 100644 --- a/test/jalview/io/IdentifyFileTest.java +++ b/test/jalview/io/IdentifyFileTest.java @@ -50,6 +50,7 @@ public class IdentifyFileTest { "examples/testdata/test.aln", "CLUSTAL" }, { "examples/testdata/test.pfam", "PFAM" }, { "examples/testdata/test.msf", "MSF" }, + { "examples/testdata/test.meg", "MEGA" }, { "examples/testdata/test.pir", "PIR" }, { "examples/testdata/test.html", "HTML" }, { "examples/testdata/test.pileup", "PileUp" }, diff --git a/test/jalview/io/MegaFileTest.java b/test/jalview/io/MegaFileTest.java new file mode 100644 index 0000000..92a3c3c --- /dev/null +++ b/test/jalview/io/MegaFileTest.java @@ -0,0 +1,413 @@ +package jalview.io; + +import static org.testng.AssertJUnit.assertEquals; +import static org.testng.AssertJUnit.assertFalse; +import static org.testng.AssertJUnit.assertNull; +import static org.testng.AssertJUnit.assertTrue; +import static org.testng.AssertJUnit.fail; + +import jalview.datamodel.Sequence; +import jalview.datamodel.SequenceI; + +import java.io.IOException; +import java.util.Vector; + +import org.testng.annotations.Test; + +/* + * Unit tests for MegaFile - read and write in MEGA format(s). + */ +public class MegaFileTest +{ + private static final String THIRTY_CHARS = "012345678901234567890123456789"; + + //@formatter:off + private static final String INTERLEAVED = + "#MEGA\n"+ + "TITLE: Interleaved sequence data\n\n" + + "#U455 ABCDEF\n" + + "#CPZANT MNOPQR\n\n" + "#U455 KLMNOP\n" + + "#CPZANT WXYZ"; + + private static final String INTERLEAVED_NOHEADERS = + "#U455 ABCDEF\n" + + "#CPZANT MNOPQR\n\n" + + "#U455 KLMNOP\n" + + "#CPZANT WXYZ\n"; + + // interleaved sequences, one with 60 one with 120 characters (on overlong + // input lines) + private static final String INTERLEAVED_LONGERTHAN50 = + "#MEGA\n" + + "TITLE: Interleaved sequence data\n\n" + + "#U455 " + THIRTY_CHARS + THIRTY_CHARS + "\n" + + "#CPZANT " + + THIRTY_CHARS + THIRTY_CHARS + THIRTY_CHARS + THIRTY_CHARS; + + private static final String NONINTERLEAVED = + "#MEGA\n" + + "TITLE: Noninterleaved sequence data\n\n" + + "#U455 \n" + + "ABCFEDHIJ\n" + + "MNOPQR\n\n" + + "#CPZANT \n" + + "KLMNOPWXYZ\n" + + "CGATC\n"; + + // Sequence length 60 (split over two lines) + private static final String NONINTERLEAVED_LONGERTHAN50 = + "#SIXTY\n" + THIRTY_CHARS + "\n" + THIRTY_CHARS; + + // this one starts noninterleaved then switches to interleaved + private static final String MIXED = + "#MEGA\n" + + "TITLE: This is a mess\n\n" + "#CPZANT KLMNOPWXYZCGATC\n\n" + + "#U455\n " + + "ABCFEDHIJ\n"; + + // interleaved with a new sequence appearing in the second block :-O + private static final String INTERLEAVED_SEQUENCE_ERROR = + "#MEGA" + "\n" + + "TITLE: Interleaved sequence data\n\n" + + "#U455 ABCDEF\n" + + "#CPZANT MNOPQR\n\n" + + "#U456 KLMNOP\n"; + + // the 'fancy' format, different header format, bases in triplet groups + private static final String FANCY_FORMAT = + "#MEGA\n" + + "!Title Fancy format data;\n" + + "!Format DataType=DNA indel=- CodeTable=Standard;\n\n" + + "!Description\n" + + " Line one of description\n" + + " Line two of description;\n\n" + + "!Gene=Adh Property=Coding CodonStart=1;\n" + + "#U455 ABC DEF\n" + + "#CPZANT MNO PQR\n\n" + + "#U455 KLM NOP\n" + + "#CPZANT WXY Z\n"; + + // interleaved sequence data for two genes + private static final String TWO_GENES = + "#MEGA\n" + + "!Title Fancy format data;\n" + + "!Format DataType=DNA indel=- CodeTable=Standard;\n\n" + + "!Description\n" + + " Line one of description\n" + + " Line two of description;\n\n" + + "!Gene=Adh Property=Coding CodonStart=1;\n" + + "#U455 ABC DEF\n" + + "#CPZANT MNO PQR\n\n" + + "#U455 KLM NOP\n" + + "#CPZANT WXY Z\n"; //TODO complete + + //@formatter:on + + /** + * Test paste of interleaved mega format data. + * + * @throws IOException + */ + @Test(groups = { "Functional" }) + public void testParse_interleaved() throws IOException + { + MegaFile testee = new MegaFile(INTERLEAVED, AppletFormatAdapter.PASTE); + assertEquals("Title not as expected", "Interleaved sequence data", + testee.getAlignmentProperty(MegaFile.PROP_TITLE)); + Vector seqs = testee.getSeqs(); + // should be 2 sequences + assertEquals("Expected two sequences", 2, seqs.size()); + // check sequence names correct and order preserved + assertEquals("First sequence id wrong", "U455", seqs.get(0).getName()); + assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1) + .getName()); + // check sequence data + assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0) + .getSequenceAsString()); + assertEquals("Second sequence data wrong", "MNOPQRWXYZ", seqs.get(1) + .getSequenceAsString()); + assertTrue("File format is not flagged as interleaved", + testee.isInterleaved()); + } + + /** + * Test paste of noninterleaved mega format data. + * + * @throws IOException + */ + @Test(groups = { "Functional" }) + public void testParse_nonInterleaved() throws IOException + { + MegaFile testee = new MegaFile(NONINTERLEAVED, + AppletFormatAdapter.PASTE); + assertEquals("Title not as expected", "Noninterleaved sequence data", + testee.getAlignmentProperty(MegaFile.PROP_TITLE)); + Vector seqs = testee.getSeqs(); + // should be 2 sequences + assertEquals("Expected two sequences", 2, seqs.size()); + // check sequence names correct and order preserved + assertEquals("First sequence id wrong", "U455", seqs.get(0).getName()); + assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1) + .getName()); + // check sequence data + assertEquals("First sequence data wrong", "ABCFEDHIJMNOPQR", seqs + .get(0).getSequenceAsString()); + assertEquals("Second sequence data wrong", "KLMNOPWXYZCGATC", + seqs.get(1).getSequenceAsString()); + assertFalse("File format is not flagged as noninterleaved", + testee.isInterleaved()); + } + + /** + * Test parsing an interleaved file with an extra sequence appearing after the + * first block - should fail. + */ + @Test(groups = { "Functional" }) + public void testParse_interleavedExtraSequenceError() + { + try + { + new MegaFile(INTERLEAVED_SEQUENCE_ERROR, AppletFormatAdapter.PASTE); + fail("Expected extra sequence IOException"); + } catch (IOException e) + { + assertEquals( + "Unexpected exception message", + "Parse error: misplaced new sequence starting at #U456 KLMNOP", + e.getMessage()); + } + } + + /** + * Test a mixed up file. + */ + @Test(groups = { "Functional" }) + public void testParse_mixedInterleavedNonInterleaved() + { + try + { + new MegaFile(MIXED, AppletFormatAdapter.PASTE); + fail("Expected mixed content exception"); + } catch (IOException e) + { + assertEquals( + "Unexpected exception message", + "Parse error: mix of interleaved and noninterleaved detected, at line: ABCFEDHIJ", + e.getMessage()); + } + + } + + @Test(groups = { "Functional" }) + public void testGetSequenceId() + { + assertEquals("AB123", MegaFile.getSequenceId("#AB123 CGATC")); + assertEquals("AB123", MegaFile.getSequenceId("#AB123 CGATC")); + assertEquals("AB123", MegaFile.getSequenceId("#AB123 CGC TAC")); + assertEquals("AB123", MegaFile.getSequenceId("#AB123")); + assertNull(MegaFile.getSequenceId("AB123 CTAG")); + assertNull(MegaFile.getSequenceId("AB123")); + assertNull(MegaFile.getSequenceId("")); + assertNull(MegaFile.getSequenceId(null)); + } + + @Test(groups = { "Functional" }) + public void testGetMaxIdLength() + { + SequenceI[] seqs = new Sequence[2]; + seqs[0] = new Sequence("Something", "GCATAC"); + seqs[1] = new Sequence("SomethingElse", "GCATAC"); + assertEquals(13, MegaFile.getMaxIdLength(seqs)); + seqs[1] = new Sequence("DNA", "GCATAC"); + assertEquals(9, MegaFile.getMaxIdLength(seqs)); + } + + @Test(groups = { "Functional" }) + public void testGetMaxSequenceLength() + { + SequenceI[] seqs = new Sequence[2]; + seqs[0] = new Sequence("Seq1", "GCATAC"); + seqs[1] = new Sequence("Seq2", "GCATACTAG"); + assertEquals(9, MegaFile.getMaxSequenceLength(seqs)); + seqs[1] = new Sequence("Seq2", "GCA"); + assertEquals(6, MegaFile.getMaxSequenceLength(seqs)); + } + + /** + * Test (parse and) print of interleaved mega format data. + * + * @throws IOException + */ + @Test(groups = { "Functional" }) + public void testPrint_interleaved() throws IOException + { + MegaFile testee = new MegaFile(INTERLEAVED, AppletFormatAdapter.PASTE); + String printed = testee.print(); + System.out.println(printed); + // normally output should match input + // we cheated here with a number of short input lines + String expected = "#MEGA\n" + "!TITLE Interleaved sequence data;\n\n" + + "#U455 ABCDEFKLMNOP\n" + "#CPZANT MNOPQRWXYZ" + + "\n"; + assertEquals("Print format wrong", expected, printed); + } + + /** + * Test (parse and) print of interleaved data with no headers (acceptable). + * + * @throws IOException + */ + @Test(groups = { "Functional" }) + public void testPrint_interleavedNoHeaders() throws IOException + { + MegaFile testee = new MegaFile(INTERLEAVED_NOHEADERS, + AppletFormatAdapter.PASTE); + String printed = testee.print(); + System.out.println(printed); + // normally output should match input + // we cheated here with a number of short input lines + String expected = "#MEGA\n\n" + "#U455 ABCDEFKLMNOP" + "\n" + + "#CPZANT MNOPQRWXYZ\n"; + assertEquals("Print format wrong", expected, printed); + } + + /** + * Test (parse and) print of noninterleaved mega format data. + * + * @throws IOException + */ + @Test(groups = { "Functional" }) + public void testPrint_noninterleaved() throws IOException + { + MegaFile testee = new MegaFile(NONINTERLEAVED, + AppletFormatAdapter.PASTE); + String printed = testee.print(); + System.out.println(printed); + // normally output should match input + // we cheated here with a number of short input lines + String expected = "#MEGA\n" + + "!TITLE Noninterleaved sequence data;\n\n" + + "#U455\n" + "ABCFEDHIJMNOPQR\n\n" + "#CPZANT\n" + + "KLMNOPWXYZCGATC\n"; + assertEquals("Print format wrong", expected, printed); + } + + /** + * Test (parse and) print of interleaved mega format data extending to more + * than one line of output. + * + * @throws IOException + */ + @Test(groups = { "Functional" }) + public void testPrint_interleavedMultiLine() throws IOException + { + MegaFile testee = new MegaFile(INTERLEAVED_LONGERTHAN50, + AppletFormatAdapter.PASTE); + String printed = testee.print(); + System.out.println(printed); + // first sequence is length 60, second length 120 + // should be output as 50 + 10 + 0 and as 50 + 50 + 20 character lines + // respectively + String expected = "#MEGA\n" + "!TITLE Interleaved sequence data;\n\n" + + "#U455 " + THIRTY_CHARS + "01234567890123456789\n" + + "#CPZANT " + THIRTY_CHARS + "01234567890123456789\n" + "\n" + + "#U455 " + "0123456789\n" + "#CPZANT " + THIRTY_CHARS + + "01234567890123456789\n\n" + "#U455 \n" + "#CPZANT " + + "01234567890123456789" + + "\n"; + assertEquals("Print format wrong", expected, printed); + } + + /** + * Test (parse and) print of noninterleaved mega format data extending to more + * than one line of output. + * + * @throws IOException + */ + @Test(groups = { "Functional" }) + public void testPrint_noninterleavedMultiLine() throws IOException + { + MegaFile testee = new MegaFile(NONINTERLEAVED_LONGERTHAN50, + AppletFormatAdapter.PASTE); + String printed = testee.print(); + System.out.println(printed); + // 60 character sequence should be output as 50 on first line then 10 more + String expected = "#MEGA\n\n" + "#SIXTY\n" + THIRTY_CHARS + + "01234567890123456789\n" + "0123456789\n"; + assertEquals("Print format wrong", expected, printed); + } + + /** + * Test paste / parse of 'fancy format' data. + * + * @throws IOException + */ + @Test(groups = { "Functional" }) + public void testParse_fancyFormat() throws IOException + { + MegaFile testee = new MegaFile(FANCY_FORMAT, AppletFormatAdapter.PASTE); + assertEquals("Title not as expected", "Fancy format data", + testee.getAlignmentProperty(MegaFile.PROP_TITLE)); + + // assertEquals("Format property not parsed", + // "DataType=DNA indel=- CodeTable=Standard;", + // testee.getAlignmentProperty(MegaFile.PROP_FORMAT)); + Vector seqs = testee.getSeqs(); + // should be 2 sequences + assertEquals("Expected two sequences", 2, seqs.size()); + // check sequence names correct and order preserved + assertEquals("First sequence id wrong", "U455", seqs.get(0).getName()); + assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1) + .getName()); + // check sequence data + assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0) + .getSequenceAsString()); + assertEquals("Second sequence data wrong", "MNOPQRWXYZ", seqs.get(1) + .getSequenceAsString()); + assertTrue("File format is not flagged as interleaved", + testee.isInterleaved()); + + assertEquals("Description property not parsed", + " Line one of description\n" + + " Line two of description\n", + testee.getAlignmentProperty(MegaFile.PROP_DESCRIPTION)); + } + + @Test(groups = { "Functional" }) + public void testGetNonCommentContent() throws FileFormatException + { + assertEquals("abcde", MegaFile.getNonCommentContent("abcde", 0)); + assertEquals("CGT ACG GAC ", + MegaFile.getNonCommentContent("CGT ACG GAC [9]", 0)); + assertEquals("", MegaFile.getNonCommentContent("abcde", 1)); + assertEquals(" abcde", + MegaFile.getNonCommentContent("and others ] abcde", 1)); + assertEquals(" abcde", MegaFile.getNonCommentContent( + "and others [including refs] ] abcde", 1)); + assertEquals(" x ] abcde", + MegaFile.getNonCommentContent("and others ] x ] abcde", 1)); + } + + @Test(groups = { "Functional" }) + public void testCommentDepth() throws FileFormatException + { + assertEquals(0, MegaFile.commentDepth("abcde", 0)); + assertEquals(1, MegaFile.commentDepth("abc[de", 0)); + assertEquals(3, MegaFile.commentDepth("ab[c[de", 1)); + assertEquals(1, MegaFile.commentDepth("ab]c[d]e[f", 1)); + assertEquals(0, MegaFile.commentDepth("a]b[c]d]e", 1)); + } + + @Test(groups = { "Functional" }) + public void testGetValue() + { + assertEquals("Mega", MegaFile.getValue("Name=Mega")); + assertEquals("Mega", MegaFile.getValue("Name =Mega")); + assertEquals("Mega", MegaFile.getValue(" Name = Mega ")); + assertEquals("Mega", MegaFile.getValue("Name = Mega; ")); + assertEquals("Mega", MegaFile.getValue(" Name = Mega ; ")); + assertEquals("Mega", MegaFile.getValue("\t!Name \t= \tMega ; ")); + assertEquals("Mega", MegaFile.getValue("!Name \t\t Mega; ")); + assertEquals("", MegaFile.getValue("Name")); + } +} -- 1.7.10.2