From 6a495a81e764c82f9fdfb6b3f990b962a5b85286 Mon Sep 17 00:00:00 2001 From: Jim Procter Date: Wed, 14 May 2014 13:34:19 +0100 Subject: [PATCH] JAL-1499 patch from Mungo Carstairs --- resources/lang/Messages.properties | 3 +- src/jalview/gui/Preferences.java | 53 ++- src/jalview/io/AlignFile.java | 29 +- src/jalview/io/AppletFormatAdapter.java | 40 +- src/jalview/io/FormatAdapter.java | 14 +- src/jalview/io/IdentifyFile.java | 8 +- src/jalview/io/JalviewFileChooser.java | 31 +- src/jalview/io/MegaFile.java | 784 +++++++++++++++++++++++++++++++ src/jalview/jbgui/GPreferences.java | 57 ++- test/jalview/io/MegaFileTest.java | 389 +++++++++++++++ 10 files changed, 1356 insertions(+), 52 deletions(-) mode change 100755 => 100644 src/jalview/gui/Preferences.java mode change 100755 => 100644 src/jalview/io/AlignFile.java mode change 100755 => 100644 src/jalview/io/AppletFormatAdapter.java mode change 100755 => 100644 src/jalview/io/FormatAdapter.java mode change 100755 => 100644 src/jalview/io/IdentifyFile.java mode change 100755 => 100644 src/jalview/io/JalviewFileChooser.java create mode 100644 src/jalview/io/MegaFile.java mode change 100755 => 100644 src/jalview/jbgui/GPreferences.java create mode 100644 test/jalview/io/MegaFileTest.java diff --git a/resources/lang/Messages.properties b/resources/lang/Messages.properties index 6736791..d69fd45 100644 --- a/resources/lang/Messages.properties +++ b/resources/lang/Messages.properties @@ -182,6 +182,7 @@ label.zappo = Zappo label.taylor = Taylor label.blc = BLC label.fasta = Fasta +label.meg = MEGA label.msf = MSF label.pfam = PFAM label.pileup = Pileup @@ -640,7 +641,7 @@ label.view_structure = View Structure label.clustalx_colours = Clustalx colours label.above_identity_percentage = Above % Identity label.create_sequence_details_report_annotation_for = Annotation for {0} -label.sequece_details_for = Sequece Details for {0} +label.sequece_details_for = Sequence Details for {0} label.sequence_name = Sequence Name label.sequence_description = Sequence Description label.edit_sequence_name_description = Edit Sequence Name/Description diff --git a/src/jalview/gui/Preferences.java b/src/jalview/gui/Preferences.java old mode 100755 new mode 100644 index 4f52f52..f7d189b --- a/src/jalview/gui/Preferences.java +++ b/src/jalview/gui/Preferences.java @@ -20,19 +20,29 @@ */ package jalview.gui; -import java.util.*; - -import java.awt.*; -import java.awt.event.*; - -import javax.swing.*; - -import jalview.bin.*; -import jalview.io.*; -import jalview.jbgui.*; -import jalview.schemes.*; +import jalview.bin.Cache; +import jalview.io.JalviewFileChooser; +import jalview.io.JalviewFileView; +import jalview.jbgui.GPreferences; +import jalview.jbgui.GSequenceLink; +import jalview.schemes.ColourSchemeProperty; import jalview.util.MessageManager; +import java.awt.BorderLayout; +import java.awt.Color; +import java.awt.Dimension; +import java.awt.Font; +import java.awt.event.ActionEvent; +import java.awt.event.MouseEvent; +import java.util.Collection; +import java.util.StringTokenizer; +import java.util.Vector; + +import javax.swing.JColorChooser; +import javax.swing.JFileChooser; +import javax.swing.JInternalFrame; +import javax.swing.JOptionPane; + /** * DOCUMENT ME! * @@ -111,7 +121,7 @@ public class Preferences extends GPreferences DasSourceBrowser dasSource; - private WsPreferences wsPrefs; + private final WsPreferences wsPrefs; /** * Creates a new Preferences object. @@ -294,6 +304,7 @@ public class Preferences extends GPreferences * @param e * DOCUMENT ME! */ + @Override public void ok_actionPerformed(ActionEvent e) { @@ -461,6 +472,8 @@ public class Preferences extends GPreferences Boolean.toString(clustaljv.isSelected())); Cache.applicationProperties.setProperty("FASTA_JVSUFFIX", Boolean.toString(fastajv.isSelected())); + Cache.applicationProperties.setProperty("MEGA_JVSUFFIX", + Boolean.toString(megajv.isSelected())); Cache.applicationProperties.setProperty("MSF_JVSUFFIX", Boolean.toString(msfjv.isSelected())); Cache.applicationProperties.setProperty("PFAM_JVSUFFIX", @@ -500,14 +513,17 @@ public class Preferences extends GPreferences /** * DOCUMENT ME! */ + @Override public void startupFileTextfield_mouseClicked() { JalviewFileChooser chooser = new JalviewFileChooser( jalview.bin.Cache.getProperty("LAST_DIRECTORY"), new String[] - { "fa, fasta, fastq", "aln", "pfam", "msf", "pir", "blc", "jar" }, + { "fa, fasta, fastq", "aln", "pfam", "meg", "msf", "pir", + "blc", "jar" }, new String[] - { "Fasta", "Clustal", "PFAM", "MSF", "PIR", "BLC", "Jalview" }, + { "Fasta", "Clustal", "PFAM", "MEGA", "MSF", "PIR", "BLC", + "Jalview" }, jalview.bin.Cache.getProperty("DEFAULT_FILE_FORMAT")); chooser.setFileView(new JalviewFileView()); chooser.setDialogTitle("Select startup file"); @@ -529,6 +545,7 @@ public class Preferences extends GPreferences * @param e * DOCUMENT ME! */ + @Override public void cancel_actionPerformed(ActionEvent e) { try @@ -547,6 +564,7 @@ public class Preferences extends GPreferences * @param e * DOCUMENT ME! */ + @Override public void annotations_actionPerformed(ActionEvent e) { conservation.setEnabled(annotations.isSelected()); @@ -560,6 +578,7 @@ public class Preferences extends GPreferences && (identity.isSelected() || showGroupConsensus.isSelected())); } + @Override public void newLink_actionPerformed(ActionEvent e) { @@ -586,6 +605,7 @@ public class Preferences extends GPreferences } } + @Override public void editLink_actionPerformed(ActionEvent e) { GSequenceLink link = new GSequenceLink(); @@ -627,6 +647,7 @@ public class Preferences extends GPreferences } } + @Override public void deleteLink_actionPerformed(ActionEvent e) { int index = linkNameList.getSelectedIndex(); @@ -649,6 +670,7 @@ public class Preferences extends GPreferences linkURLList.setListData(urlLinks); } + @Override public void defaultBrowser_mouseClicked(MouseEvent e) { JFileChooser chooser = new JFileChooser("."); @@ -670,6 +692,7 @@ public class Preferences extends GPreferences * jalview.jbgui.GPreferences#showunconserved_actionPerformed(java.awt.event * .ActionEvent) */ + @Override protected void showunconserved_actionPerformed(ActionEvent e) { // TODO Auto-generated method stub @@ -685,6 +708,7 @@ public class Preferences extends GPreferences return groupURLLinks; } + @Override public void minColour_actionPerformed() { Color col = JColorChooser.showDialog(this, @@ -696,6 +720,7 @@ public class Preferences extends GPreferences minColour.repaint(); } + @Override public void maxColour_actionPerformed() { Color col = JColorChooser.showDialog(this, diff --git a/src/jalview/io/AlignFile.java b/src/jalview/io/AlignFile.java old mode 100755 new mode 100644 index c3c86d6..977cb25 --- a/src/jalview/io/AlignFile.java +++ b/src/jalview/io/AlignFile.java @@ -28,6 +28,8 @@ import jalview.datamodel.SequenceI; import java.io.IOException; import java.util.Enumeration; import java.util.Hashtable; +import java.util.Map; +import java.util.Set; import java.util.Vector; /** @@ -55,7 +57,7 @@ public abstract class AlignFile extends FileParse /** * Properties to be added to generated alignment object */ - protected Hashtable properties; + protected Hashtable properties; long start; @@ -130,7 +132,7 @@ public abstract class AlignFile extends FileParse for (int i = 0; i < seqs.size(); i++) { - s[i] = (SequenceI) seqs.elementAt(i); + s[i] = seqs.elementAt(i); } return s; @@ -173,8 +175,8 @@ public abstract class AlignFile extends FileParse { if (properties != null && properties.size() > 0) { - Enumeration keys = properties.keys(); - Enumeration vals = properties.elements(); + Enumeration keys = properties.keys(); + Enumeration vals = properties.elements(); while (keys.hasMoreElements()) { al.setProperty(keys.nextElement(), vals.nextElement()); @@ -205,7 +207,7 @@ public abstract class AlignFile extends FileParse } if (properties == null) { - properties = new Hashtable(); + properties = new Hashtable(); } properties.put(key, value); } @@ -224,11 +226,20 @@ public abstract class AlignFile extends FileParse */ protected void initData() { - seqs = new Vector(); + seqs = new Vector(); annotations = new Vector(); } /** + * Return the alignment properties (or null if none set) + * + * @return + */ + protected Set> getAlignmentProperties() + { + return (this.properties == null ? null : this.properties.entrySet()); + } + /** * DOCUMENT ME! * * @param s @@ -236,7 +247,7 @@ public abstract class AlignFile extends FileParse */ protected void setSeqs(SequenceI[] s) { - seqs = new Vector(); + seqs = new Vector(); for (int i = 0; i < s.length; i++) { @@ -296,13 +307,13 @@ public abstract class AlignFile extends FileParse /** * vector of String[] treeName, newickString pairs */ - Vector newickStrings = null; + Vector newickStrings = null; protected void addNewickTree(String treeName, String newickString) { if (newickStrings == null) { - newickStrings = new Vector(); + newickStrings = new Vector(); } newickStrings.addElement(new String[] { treeName, newickString }); diff --git a/src/jalview/io/AppletFormatAdapter.java b/src/jalview/io/AppletFormatAdapter.java old mode 100755 new mode 100644 index d7da302..288d476 --- a/src/jalview/io/AppletFormatAdapter.java +++ b/src/jalview/io/AppletFormatAdapter.java @@ -20,11 +20,12 @@ */ package jalview.io; +import jalview.datamodel.Alignment; +import jalview.datamodel.AlignmentI; + import java.io.File; import java.io.InputStream; -import jalview.datamodel.*; - /** * A low level class for alignment and feature IO with alignment formatting * methods used by both applet and application for generating flat alignment @@ -40,22 +41,23 @@ public class AppletFormatAdapter * List of valid format strings used in the isValidFormat method */ public static final String[] READABLE_FORMATS = new String[] - { "BLC", "CLUSTAL", "FASTA", "MSF", "PileUp", "PIR", "PFAM", "STH", - "PDB", "JnetFile", "RNAML" }; // , "SimpleBLAST" }; - + { "BLC", "CLUSTAL", "FASTA", "MEGA", "MSF", "PileUp", "PIR", "PFAM", + "PDB", "JnetFile", "RNAML" }; /** * List of valid format strings for use by callers of the formatSequences * method */ public static final String[] WRITEABLE_FORMATS = new String[] - { "BLC", "CLUSTAL", "FASTA", "MSF", "PileUp", "PIR", "PFAM", "AMSA" }; + { "BLC", "CLUSTAL", "FASTA", "MEGA", "MSF", "PileUp", "PIR", "PFAM", + "STH", + "AMSA" }; /** * List of extensions corresponding to file format types in WRITABLE_FNAMES * that are writable by the application. */ public static final String[] WRITABLE_EXTENSIONS = new String[] - { "fa, fasta, mfa, fastq", "aln", "pfam", "msf", "pir", "blc", "amsa", + { "fa, fasta, mfa, fastq", "aln", "pfam", "meg", "msf", "pir", "blc", "amsa", "jvp", "sto,stk", "jar" }; /** @@ -63,15 +65,16 @@ public class AppletFormatAdapter * WRITABLE_EXTENSIONS list of formats. */ public static final String[] WRITABLE_FNAMES = new String[] - { "Fasta", "Clustal", "PFAM", "MSF", "PIR", "BLC", "AMSA", "Jalview", - "STH", "Jalview" }; + { "Fasta", "Clustal", "PFAM", "MEGA", "MSF", "PIR", "BLC", "AMSA", + "Jalview", + "STH", "Jalview"}; /** * List of readable format file extensions by application in order * corresponding to READABLE_FNAMES */ public static final String[] READABLE_EXTENSIONS = new String[] - { "fa, fasta, mfa, fastq", "aln", "pfam", "msf", "pir", "blc", "amsa", + { "fa, fasta, mfa, fastq", "aln", "pfam", "meg", "msf", "pir", "blc", "amsa", "jar,jvp", "sto,stk", "xml,rnaml" }; // ".blast" /** @@ -79,8 +82,8 @@ public class AppletFormatAdapter * READABLE_EXTENSIONS */ public static final String[] READABLE_FNAMES = new String[] - { "Fasta", "Clustal", "PFAM", "MSF", "PIR", "BLC", "AMSA", "Jalview", - "Stockholm", "RNAML" };// , + { "Fasta", "Clustal", "PFAM", "MEGA", "MSF", "PIR", "BLC", "AMSA", "Jalview", + "Stockholm", "RNAML" }; // "SimpleBLAST" // }; @@ -245,6 +248,10 @@ public class AppletFormatAdapter { afile = new RnamlFile(inFile, type); } + else if (format.equals("MEGA")) + { + afile = new MegaFile(inFile, type); + } Alignment al = new Alignment(afile.getSeqsAsArray()); @@ -360,6 +367,10 @@ public class AppletFormatAdapter { afile = new SimpleBlastFile(source); } + else if (format.equals("MEGA")) + { + afile = new MegaFile(source); + } Alignment al = new Alignment(afile.getSeqsAsArray()); @@ -467,7 +478,10 @@ public class AppletFormatAdapter { afile = new RnamlFile(); } - + else if (format.equalsIgnoreCase("MEGA")) + { + afile = new MegaFile(); + } else { throw new Exception( diff --git a/src/jalview/io/FormatAdapter.java b/src/jalview/io/FormatAdapter.java old mode 100755 new mode 100644 index 9913428..c57ef8d --- a/src/jalview/io/FormatAdapter.java +++ b/src/jalview/io/FormatAdapter.java @@ -20,7 +20,13 @@ */ package jalview.io; -import jalview.datamodel.*; +import jalview.datamodel.Alignment; +import jalview.datamodel.AlignmentAnnotation; +import jalview.datamodel.AlignmentI; +import jalview.datamodel.ColumnSelection; +import jalview.datamodel.Sequence; +import jalview.datamodel.SequenceGroup; +import jalview.datamodel.SequenceI; /** * Additional formatting methods used by the application in a number of places. @@ -125,6 +131,12 @@ public class FormatAdapter extends AppletFormatAdapter afile.addJVSuffix(jalview.bin.Cache.getDefault("PFAM_JVSUFFIX", true)); } + else if (format.equalsIgnoreCase("MEGA")) + { + afile = new MegaFile(); + afile.addJVSuffix(jalview.bin.Cache.getDefault("MEGA_JVSUFFIX", + true)); + } /* * amsa is not supported by this function - it requires an alignment * rather than a sequence vector else if (format.equalsIgnoreCase("AMSA")) diff --git a/src/jalview/io/IdentifyFile.java b/src/jalview/io/IdentifyFile.java old mode 100755 new mode 100644 index 08d4dca..555d376 --- a/src/jalview/io/IdentifyFile.java +++ b/src/jalview/io/IdentifyFile.java @@ -20,7 +20,7 @@ */ package jalview.io; -import java.io.*; +import java.io.IOException; /** * DOCUMENT ME! @@ -138,7 +138,11 @@ public class IdentifyFile if ((data.indexOf("<") > -1)) { reply = "RNAML"; - + break; + } + if ((data.indexOf("#MEGA") > -1)) + { + reply = "MEGA"; break; } diff --git a/src/jalview/io/JalviewFileChooser.java b/src/jalview/io/JalviewFileChooser.java old mode 100755 new mode 100644 index 90e3229..068dca7 --- a/src/jalview/io/JalviewFileChooser.java +++ b/src/jalview/io/JalviewFileChooser.java @@ -23,12 +23,22 @@ package jalview.io; import jalview.util.MessageManager; -import java.io.*; -import java.util.*; - -import java.awt.*; -import java.awt.event.*; -import javax.swing.*; +import java.awt.Component; +import java.awt.Dimension; +import java.awt.EventQueue; +import java.awt.HeadlessException; +import java.awt.event.MouseAdapter; +import java.awt.event.MouseEvent; +import java.io.File; +import java.util.StringTokenizer; +import java.util.Vector; + +import javax.swing.DefaultListCellRenderer; +import javax.swing.JFileChooser; +import javax.swing.JList; +import javax.swing.JOptionPane; +import javax.swing.JPanel; +import javax.swing.JScrollPane; /** * Enhanced file chooser dialog box. @@ -104,6 +114,7 @@ public class JalviewFileChooser extends JFileChooser setAccessory(new RecentlyOpened()); } + @Override public void setFileFilter(javax.swing.filechooser.FileFilter filter) { super.setFileFilter(filter); @@ -122,6 +133,7 @@ public class JalviewFileChooser extends JFileChooser EventQueue.invokeLater(new Thread() { + @Override public void run() { String currentName = ui.getFileName(); @@ -176,10 +188,15 @@ public class JalviewFileChooser extends JFileChooser { format = "PFAM"; } + else if (format.toUpperCase().startsWith("MEGA")) + { + format = "MEGA"; + } return format; } + @Override public int showSaveDialog(Component parent) throws HeadlessException { this.setAccessory(null); @@ -265,6 +282,7 @@ public class JalviewFileChooser extends JFileChooser list.addMouseListener(new MouseAdapter() { + @Override public void mousePressed(MouseEvent evt) { recentListSelectionChanged(list.getSelectedValue()); @@ -279,6 +297,7 @@ public class JalviewFileChooser extends JFileChooser javax.swing.SwingUtilities.invokeLater(new Runnable() { + @Override public void run() { scroller.getHorizontalScrollBar().setValue( diff --git a/src/jalview/io/MegaFile.java b/src/jalview/io/MegaFile.java new file mode 100644 index 0000000..f6ff645 --- /dev/null +++ b/src/jalview/io/MegaFile.java @@ -0,0 +1,784 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.0b1) + * Copyright (C) 2014 The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ +package jalview.io; + +import jalview.datamodel.Sequence; +import jalview.datamodel.SequenceI; + +import java.io.IOException; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; + +public class MegaFile extends AlignFile +{ + /* + * Simple file format as at + * http://www.hiv.lanl.gov/content/sequence/HelpDocs/SEQsamples.html + * + * Fancy file format as at + * http://primerdigital.com/fastpcr/images/Drosophila_Adh.txt + */ + public enum FileFormat + { + SIMPLE, FANCY + } + + private static final String HASHSIGN = "#"; // TODO: public constants file + + private static final String COLON = ":"; + + private static final String BANG = "!"; + + private static final String EQUALS = "="; + + private static final String MEGA_ID = HASHSIGN + "MEGA"; + + public static final String PROP_TITLE = "TITLE"; + + public static final String PROP_FORMAT = "Format"; + + public static final String PROP_DESCRIPTION = "Description"; + + public static final String PROP_GENE = "Gene"; + + public static final String PROP_INTERLEAVED = "Interleaved"; + + // initial size for sequence data buffer + private static final int SEQBUFFERSIZE = 256; + + private static final String SPACE = " "; + + private static final int POSITIONS_PER_LINE = 50; + + // this can be True, False or null (meaning we don't know yet) + private Boolean interleaved; + + // set once we have seen one block of interleaved data + private boolean firstDataBlockRead = false; + + private FileFormat fileFormat; + + public MegaFile() + { + } + + public MegaFile(String inFile, String type) throws IOException + { + super(inFile, type); + } + + public MegaFile(FileParse source) throws IOException + { + super(source); + } + + /** + * Parse the input stream. + */ + @Override + public void parse() throws IOException + { + /* + * Read MEGA and Title/Format/Description/Gene headers if present. These are + * saved as alignment properties. Returns the first sequence data line + */ + String dataLine = parseHeaderLines(); + + /* + * If we didn't positively identify as 'fancy format', assume 'simple + * format' + */ + if (this.fileFormat == null) + { + setFileFormat(FileFormat.SIMPLE); + } + + /* + * Temporary store of {sequenceId, positionData} while parsing appending + */ + Map seqData = new LinkedHashMap(); + + /* + * The id of the sequence being read (for non-interleaved) + */ + String currentId = ""; + + while (dataLine != null) + { + dataLine = dataLine.trim(); + if (dataLine.length() > 0) + { + currentId = parseDataLine(dataLine, seqData, currentId); + } + else if (!seqData.isEmpty()) + { + /* + * Blank line after processing some data... + */ + this.firstDataBlockRead = true; + } + dataLine = nextLine(); + } + + setSequences(seqData); + } + + /** + * Convert the parsed sequence strings to objects and store them in the model. + * + * @param seqData + */ + protected void setSequences(Map seqData) + { + Set> datasets = seqData.entrySet(); + + for (Entry dataset : datasets) + { + String sequenceId = dataset.getKey(); + StringBuilder characters = dataset.getValue(); + SequenceI s = new Sequence(sequenceId, new String(characters)); + this.seqs.addElement(s); + } + } + + /** + * Process one line of sequence data. If it has no sequence identifier, append + * to the current id's sequence. Else parse out the sequence id and append the + * data (if any) to that id's sequence. Returns the sequence id (implicit or + * explicit) for this line. + * + * @param dataLine + * @param seqData + * @param currentid + * @return + * @throws IOException + */ + protected String parseDataLine(String dataLine, + Map seqData, String currentId) + throws IOException + { + String seqId = getSequenceId(dataLine); + if (seqId == null) + { + /* + * Just character data + */ + parseNoninterleavedDataLine(dataLine, seqData, currentId); + return currentId; + } + else if ((HASHSIGN + seqId).trim().equals(dataLine.trim())) + { + /* + * Sequence id only - header line for noninterleaved data + */ + return seqId; + } + else + { + /* + * Sequence id followed by data + */ + parseInterleavedDataLine(dataLine, seqData, seqId); + return seqId; + } + } + + /** + * Add a line of sequence data to the buffer for the given sequence id. Start + * a new one if we haven't seen it before. + * + * @param dataLine + * @param seqData + * @param currentId + * @throws IOException + */ + protected void parseNoninterleavedDataLine(String dataLine, + Map seqData, String currentId) + throws IOException + { + if (currentId == null) + { + /* + * Oops. Data but no sequence id context. + */ + throw new IOException("No sequence id context at: " + dataLine); + } + + assertInterleaved(false, dataLine); + + StringBuilder sb = getSequenceDataBuffer(seqData, currentId); + + /* + * Add the current line of data to the sequence. + */ + sb.append(dataLine); + } + + /** + * Get the sequence data for this sequence id, starting a new one if + * necessary. + * + * @param seqData + * @param currentId + * @return + */ + protected StringBuilder getSequenceDataBuffer( + Map seqData, String currentId) + { + StringBuilder sb = seqData.get(currentId); + if (sb == null) + { + // first data met for this sequence id, start a new buffer + sb = new StringBuilder(SEQBUFFERSIZE); + seqData.put(currentId, sb); + } + return sb; + } + + /** + * Parse one line of interleaved data e.g. + * + *
+   * #TheSeqId CGATCGCATGCA
+   * 
+ * + * @param dataLine + * @param seqData + * @param seqId + * @throws IOException + */ + protected void parseInterleavedDataLine(String dataLine, + Map seqData, String seqId) + throws IOException + { + /* + * New sequence found in second or later data block - error. + */ + if (this.firstDataBlockRead && !seqData.containsKey(seqId)) + { + throw new IOException( + "Parse error: misplaced new sequence starting at " + dataLine); + } + + StringBuilder sb = getSequenceDataBuffer(seqData, seqId); + String data = dataLine.substring(seqId.length() + 1).trim(); + + /* + * Do nothing if this line is _only_ a sequence id with no data following. + * + * Remove any internal spaces (present in the 'fancy' file format) + */ + if (data != null && data.length() > 0) + { + if (data.indexOf(SPACE) != -1) + { + data = data.replace(SPACE, ""); + } + sb.append(data); + assertInterleaved(true, dataLine); + } + } + + /** + * If the line begins with (e.g.) "#abcde " then returns "abcde" as the + * identifier. Else returns null. + * + * @param dataLine + * @return + */ + public static String getSequenceId(String dataLine) + { + // TODO refactor to a StringUtils type class + if (dataLine != null) + { + if (dataLine.startsWith(HASHSIGN)) + { + int spacePos = dataLine.indexOf(" "); + return (spacePos == -1 ? dataLine.substring(1) : dataLine + .substring(1, spacePos)); + } + } + return null; + } + + /** + * Read the #MEGA and Title/Format/Description/Gene header lines (if present). + * + * Save as annotation properties in case useful. + * + * @return the next non-blank line following the header lines. + * @throws IOException + */ + protected String parseHeaderLines() throws IOException + { + String inputLine = null; + while ((inputLine = nextLine()) != null) + { + inputLine = inputLine.trim(); + + /* + * skip blank lines + */ + if (inputLine.length() == 0) + { + continue; + } + + if (inputLine.startsWith(BANG)) + { + setFileFormat(FileFormat.FANCY); + } + + if (inputLine.startsWith(BANG + PROP_DESCRIPTION)) + { + parseDescriptionLines(); + } + + else if (isPropertyLine(inputLine)) + { + /* + * If a property is matched, parse and save it. + */ + String[] property_value = parsePropertyValue(inputLine); + setAlignmentProperty(property_value[0], property_value[1]); + } + else if (!inputLine.toUpperCase().startsWith(MEGA_ID)) + { + + /* + * Return the first 'data line' i.e. one that is not blank, #MEGA or + * TITLE: + */ + break; + } + } + return inputLine; + } + + /** + * Read following lines until blank, appending each to the Description + * property value. + * + * Assumes the !Description line itself does not include description text. + * + * Assumes the description is followed by a blank line (else we will consume + * one too many). + * + * @throws IOException + */ + protected void parseDescriptionLines() throws IOException + { + StringBuilder desc = new StringBuilder(256); + String line = null; + while ((line = nextLine()) != null) { + if ("".equals(line.trim())) + { + break; + } + desc.append(line).append(newline); + } + setAlignmentProperty(PROP_DESCRIPTION, desc.toString()); + } + + /** + * Test whether the line holds an expected property declaration. + * + * @param inputLine + * @return + */ + protected boolean isPropertyLine(String inputLine) + { + if (lineMatchesFlag(inputLine, PROP_TITLE, BANG, COLON) + || lineMatchesFlag(inputLine, PROP_FORMAT, BANG, COLON) + || lineMatchesFlag(inputLine, PROP_DESCRIPTION, BANG, COLON) + || lineMatchesFlag(inputLine, PROP_GENE, BANG, COLON)) + { + return true; + } + return false; + } + + /** + * Helper method that extract the name and value of a property, assuming the + * first space or equals sign is the separator. + * + * Thus "Description: Melanogaster" or "!Description=Melanogaster" both return + * {"Description", "Melanogaster"}. + * + * Returns an empty value string if no space or equals sign is present. + * + * @param s + * @return + */ + public static String[] parsePropertyValue(String s) + { + // TODO refactor to a string utils helper class (or find equivalent) + // TODO handle other cases e.g. "Description = Melanogaster" + String propertyName = s; + String value = ""; + + int separatorPos = -1; + + if (s != null) + { + int spacePos = s.indexOf(SPACE); + int eqPos = s.indexOf(EQUALS); + if (spacePos == -1 && eqPos > -1) + { + separatorPos = eqPos; + } + else if (spacePos > -1 && eqPos == -1) + { + separatorPos = spacePos; + } + else if (spacePos > -1 && eqPos > -1) + { + separatorPos = Math.min(spacePos, eqPos); + } + } + if (separatorPos > -1) + { + value = s.substring(separatorPos + 1); + propertyName = s.substring(0, separatorPos); + } + + /* + * finally strip any leading / trailing chars from property name + */ + if (propertyName.startsWith(BANG)) + { + propertyName = propertyName.substring(1); + } + if (propertyName.endsWith(COLON)) + { + propertyName = propertyName.substring(0, propertyName.length() - 1); + } + + return new String[] + { propertyName, value }; + } + + /** + * Test whether a line starts with the specified flag field followed by a + * space (or nothing). + * + * Here we accept an optional prefix and suffix on the flag, and the check is + * not case-sensitive. So these would match for "Title" + * + *
+   * Title Melanogaster
+   * Title=Melanogaster
+   * TITLE Melanogaster
+   * TITLE=Melanogaster
+   * !Title Melanogaster
+   * !Title=Melanogaster
+   * !TITLE Melanogaster
+   * !TITLE=Melanogaster
+   * Title: Melanogaster
+   * Title:=Melanogaster
+   * TITLE: Melanogaster
+   * TITLE:=Melanogaster
+   * !Title: Melanogaster
+   * !Title:=Melanogaster
+   * !TITLE: Melanogaster
+   * !TITLE:=Melanogaster
+   * Title
+   * TITLE
+   * !Title
+   * !TITLE
+   * 
+ * + * @param line + * @param flag + * @param prefix + * @param suffix + * @return + */ + public static boolean lineMatchesFlag(String line, String flag, String prefix, String suffix) + { + // TODO refactor to a string utils helper class + boolean result = false; + if (line != null && flag != null) { + String lineUpper = line.toUpperCase().trim(); + String flagUpper = flag.toUpperCase(); + + // skip prefix character e.g. ! before attempting match + if (lineUpper.startsWith(prefix)) { + lineUpper = lineUpper.substring(1); + } + + // test for flag + SPACE or flag + EQUALS, with or without suffix + if (lineUpper.startsWith(flagUpper + SPACE) + || lineUpper.startsWith(flagUpper + EQUALS) + || lineUpper.startsWith(flagUpper + suffix + SPACE) + || lineUpper.startsWith(flagUpper + suffix + EQUALS)) + { + result = true; + } + else + { + // test for exact match i.e. flag only on this line + if (lineUpper.equals(flagUpper) + || lineUpper.startsWith(flagUpper + suffix)) + { + result = true; + } + } + } + return result; + } + + /** + * Write out the alignment sequences in Mega format. + */ + @Override + public String print() + { + return print(getSeqsAsArray()); + } + + /** + * Write out the alignment sequences in Mega format - interleaved unless + * explicitly noninterleaved. + */ + public String print(SequenceI[] s) + { + // TODO: is there a way to preserve the 'interleaved' property so it can + // affect output? + + String result = null; + if (this.fileFormat == FileFormat.FANCY) + { + result = printInterleavedCodons(s); + } + else if (this.interleaved != null && !this.interleaved) + { + result = printNonInterleaved(s); + } + else + { + result = printInterleaved(s); + } + return result; + } + + /** + * Print the sequences in interleaved format, each row 15 space-separated + * triplets. + * + * @param s + * @return + */ + protected String printInterleavedCodons(SequenceI[] s) + { + // TODO not coded yet - defaulting to the 'simple' format output + return printInterleaved(s); + } + + /** + * Print to string in Interleaved format - blocks of next 50 characters of + * each sequence in turn. + * + * @param s + */ + protected String printInterleaved(SequenceI[] s) + { + int maxIdLength = getMaxIdLength(s); + int maxSequenceLength = getMaxSequenceLength(s); + int numLines = maxSequenceLength / POSITIONS_PER_LINE + 3; // approx + + /* + * Size a buffer to hold the whole output + */ + StringBuilder sb = new StringBuilder(numLines + * (maxIdLength + 2 + POSITIONS_PER_LINE)); + printHeaders(sb, FileFormat.SIMPLE); + + int numDataBlocks = (maxSequenceLength - 1) / POSITIONS_PER_LINE + 1; + for (int i = 0; i < numDataBlocks; i++) + { + sb.append(newline); + for (SequenceI seq : s) + { + + String seqId = String.format("#%-" + maxIdLength + "s ", + seq.getName()); + char[] subSequence = seq.getSequence(i * POSITIONS_PER_LINE, + (i + 1) * POSITIONS_PER_LINE); + sb.append(seqId); + sb.append(subSequence); + sb.append(newline); + } + } + + return new String(sb); + } + + /** + * Append the MEGA header and any other known properties + * + * @param sb + */ + private void printHeaders(StringBuilder sb, FileFormat format) + { + sb.append(MEGA_ID); + sb.append(newline); + /* + * + */ + Set> props = getAlignmentProperties(); + if (props != null) + { + for (Entry prop : props) + { + Object key = prop.getKey(); + Object value = prop.getValue(); + if (key instanceof String && value instanceof String) + { + if (format == FileFormat.FANCY) + { + sb.append(BANG).append(key).append(SPACE).append(value); + } + else + { + sb.append(key).append(COLON).append(SPACE).append(value); + } + sb.append(newline); + } + } + } + } + + /** + * Get the longest sequence id (to allow aligned printout). + * + * @param s + * @return + */ + protected static int getMaxIdLength(SequenceI[] s) + { + // TODO pull up for reuse + int maxLength = 0; + for (SequenceI seq : s) + { + int len = seq.getName().length(); + if (len > maxLength) + { + maxLength = len; + } + } + return maxLength; + } + + /** + * Get the longest sequence length + * + * @param s + * @return + */ + protected static int getMaxSequenceLength(SequenceI[] s) + { + // TODO pull up for reuse + int maxLength = 0; + for (SequenceI seq : s) + { + int len = seq.getLength(); + if (len > maxLength) + { + maxLength = len; + } + } + return maxLength; + } + + /** + * Print to string in noninterleaved format - all of each sequence in turn, in + * blocks of 50 characters. + * + * @param s + * @return + */ + protected String printNonInterleaved(SequenceI[] s) + { + int maxSequenceLength = getMaxSequenceLength(s); + // approx + int numLines = maxSequenceLength / POSITIONS_PER_LINE + 2 + s.length; + + /* + * Roughly size a buffer to hold the whole output + */ + StringBuilder sb = new StringBuilder(numLines * POSITIONS_PER_LINE); + printHeaders(sb, FileFormat.SIMPLE); + + for (SequenceI seq : s) + { + sb.append(newline); + sb.append(HASHSIGN + seq.getName()).append(newline); + int startPos = 0; + while (startPos <= seq.getLength()) + { + char[] subSequence = seq.getSequence(startPos, startPos + + POSITIONS_PER_LINE); + sb.append(subSequence); + sb.append(newline); + startPos += POSITIONS_PER_LINE; + } + } + + return new String(sb); + } + + /** + * Flag this file as interleaved or not, based on data format. Throws an + * exception if has previously been determined to be otherwise. + * + * @param isIt + * @param dataLine + * @throws IOException + */ + protected void assertInterleaved(boolean isIt, String dataLine) + throws IOException + { + if (this.interleaved != null && isIt != this.interleaved.booleanValue()) + { + throw new IOException( + "Parse error: mix of interleaved and noninterleaved detected, at line: " + + dataLine); + } + this.interleaved = new Boolean(isIt); + } + + public boolean isInterleaved() + { + return this.interleaved == null ? false : this.interleaved + .booleanValue(); + } + + public FileFormat getFileFormat() + { + return this.fileFormat; + } + + public void setFileFormat(FileFormat fileFormat) + { + this.fileFormat = fileFormat; + } +} diff --git a/src/jalview/jbgui/GPreferences.java b/src/jalview/jbgui/GPreferences.java old mode 100755 new mode 100644 index 86fe535..932fb34 --- a/src/jalview/jbgui/GPreferences.java +++ b/src/jalview/jbgui/GPreferences.java @@ -23,13 +23,40 @@ package jalview.jbgui; import jalview.gui.JvSwingUtils; import jalview.util.MessageManager; -import java.awt.*; -import java.awt.event.*; - -import javax.swing.*; -import javax.swing.border.*; -import javax.swing.event.*; +import java.awt.BorderLayout; +import java.awt.Color; +import java.awt.Dimension; +import java.awt.FlowLayout; +import java.awt.Font; +import java.awt.GridBagConstraints; +import java.awt.GridBagLayout; +import java.awt.GridLayout; +import java.awt.Insets; import java.awt.Rectangle; +import java.awt.event.ActionEvent; +import java.awt.event.ActionListener; +import java.awt.event.KeyEvent; +import java.awt.event.MouseAdapter; +import java.awt.event.MouseEvent; + +import javax.swing.BorderFactory; +import javax.swing.DefaultListCellRenderer; +import javax.swing.JButton; +import javax.swing.JCheckBox; +import javax.swing.JComboBox; +import javax.swing.JLabel; +import javax.swing.JList; +import javax.swing.JPanel; +import javax.swing.JScrollPane; +import javax.swing.JTabbedPane; +import javax.swing.JTextField; +import javax.swing.ListSelectionModel; +import javax.swing.SwingConstants; +import javax.swing.border.Border; +import javax.swing.border.EmptyBorder; +import javax.swing.border.TitledBorder; +import javax.swing.event.ListSelectionEvent; +import javax.swing.event.ListSelectionListener; /** * DOCUMENT ME! @@ -185,6 +212,8 @@ public class GPreferences extends JPanel protected JCheckBox clustaljv = new JCheckBox(); + protected JCheckBox megajv = new JCheckBox(); + protected JCheckBox msfjv = new JCheckBox(); protected JCheckBox fastajv = new JCheckBox(); @@ -285,6 +314,7 @@ public class GPreferences extends JPanel ok.setText(MessageManager.getString("action.ok")); ok.addActionListener(new ActionListener() { + @Override public void actionPerformed(ActionEvent e) { ok_actionPerformed(e); @@ -293,6 +323,7 @@ public class GPreferences extends JPanel cancel.setText(MessageManager.getString("action.cancel")); cancel.addActionListener(new ActionListener() { + @Override public void actionPerformed(ActionEvent e) { cancel_actionPerformed(e); @@ -384,6 +415,7 @@ public class GPreferences extends JPanel annotations.setBounds(new Rectangle(169, 12, 200, 23)); annotations.addActionListener(new ActionListener() { + @Override public void actionPerformed(ActionEvent e) { annotations_actionPerformed(e); @@ -391,6 +423,7 @@ public class GPreferences extends JPanel }); identity.addActionListener(new ActionListener() { + @Override public void actionPerformed(ActionEvent e) { annotations_actionPerformed(e); @@ -398,6 +431,7 @@ public class GPreferences extends JPanel }); showGroupConsensus.addActionListener(new ActionListener() { + @Override public void actionPerformed(ActionEvent e) { annotations_actionPerformed(e); @@ -411,6 +445,7 @@ public class GPreferences extends JPanel .getString("action.show_unconserved")); showUnconserved.addActionListener(new ActionListener() { + @Override public void actionPerformed(ActionEvent e) { showunconserved_actionPerformed(e); @@ -459,6 +494,7 @@ public class GPreferences extends JPanel minColour.setPreferredSize(new Dimension(40, 20)); minColour.addMouseListener(new MouseAdapter() { + @Override public void mousePressed(MouseEvent e) { minColour_actionPerformed(); @@ -472,6 +508,7 @@ public class GPreferences extends JPanel maxColour.setPreferredSize(new Dimension(40, 20)); maxColour.addMouseListener(new MouseAdapter() { + @Override public void mousePressed(MouseEvent e) { maxColour_actionPerformed(); @@ -487,6 +524,7 @@ public class GPreferences extends JPanel startupFileTextfield.setBounds(new Rectangle(172, 273, 270, 20)); startupFileTextfield.addMouseListener(new MouseAdapter() { + @Override public void mouseClicked(MouseEvent e) { if (e.getClickCount() > 1) @@ -529,6 +567,7 @@ public class GPreferences extends JPanel newLink.setText(MessageManager.getString("action.new")); newLink.addActionListener(new java.awt.event.ActionListener() { + @Override public void actionPerformed(ActionEvent e) { newLink_actionPerformed(e); @@ -537,6 +576,7 @@ public class GPreferences extends JPanel editLink.setText(MessageManager.getString("action.edit")); editLink.addActionListener(new java.awt.event.ActionListener() { + @Override public void actionPerformed(ActionEvent e) { editLink_actionPerformed(e); @@ -545,6 +585,7 @@ public class GPreferences extends JPanel deleteLink.setText(MessageManager.getString("action.delete")); deleteLink.addActionListener(new java.awt.event.ActionListener() { + @Override public void actionPerformed(ActionEvent e) { deleteLink_actionPerformed(e); @@ -553,6 +594,7 @@ public class GPreferences extends JPanel linkURLList.addListSelectionListener(new ListSelectionListener() { + @Override public void valueChanged(ListSelectionEvent e) { int index = linkURLList.getSelectedIndex(); @@ -562,6 +604,7 @@ public class GPreferences extends JPanel linkNameList.addListSelectionListener(new ListSelectionListener() { + @Override public void valueChanged(ListSelectionEvent e) { int index = linkNameList.getSelectedIndex(); @@ -583,6 +626,7 @@ public class GPreferences extends JPanel defaultBrowser.addMouseListener(new MouseAdapter() { + @Override public void mouseClicked(MouseEvent e) { if (e.getClickCount() > 1) @@ -597,6 +641,7 @@ public class GPreferences extends JPanel useProxy.setText(MessageManager.getString("label.use_proxy_server")); useProxy.addActionListener(new ActionListener() { + @Override public void actionPerformed(ActionEvent e) { useProxy_actionPerformed(); diff --git a/test/jalview/io/MegaFileTest.java b/test/jalview/io/MegaFileTest.java new file mode 100644 index 0000000..61487e3 --- /dev/null +++ b/test/jalview/io/MegaFileTest.java @@ -0,0 +1,389 @@ +package jalview.io; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; +import jalview.datamodel.Sequence; +import jalview.datamodel.SequenceI; +import jalview.io.MegaFile.FileFormat; + +import java.io.IOException; +import java.util.Vector; + +import org.junit.Test; + +/* + * Unit tests for MegaFile - read and write in MEGA format(s). + */ +public class MegaFileTest +{ + private static final String THIRTY_CHARS = "012345678901234567890123456789"; + + private static final String NEWLINE = System + .getProperty("line.separator"); + + private static final String INTERLEAVED = "#MEGA" + NEWLINE + + "TITLE: Interleaved sequence data" + NEWLINE + NEWLINE + + "#U455 ABCDEF" + NEWLINE + "#CPZANT MNOPQR" + NEWLINE + + NEWLINE + "#U455 KLMNOP" + NEWLINE + "#CPZANT WXYZ"; + + private static final String INTERLEAVED_NOHEADERS = "#U455 ABCDEF" + + NEWLINE + "#CPZANT MNOPQR" + NEWLINE + NEWLINE + + "#U455 KLMNOP" + NEWLINE + "#CPZANT WXYZ"; + + // interleaved sequences, one with 60 one with 120 characters (on overlong + // input lines) + private static final String INTERLEAVED_LONGERTHAN50 = "#MEGA" + NEWLINE + + "TITLE: Interleaved sequence data" + NEWLINE + NEWLINE + + "#U455 " + THIRTY_CHARS + THIRTY_CHARS + NEWLINE + "#CPZANT " + + THIRTY_CHARS + THIRTY_CHARS + THIRTY_CHARS + THIRTY_CHARS; + + private static final String NONINTERLEAVED = "#MEGA" + NEWLINE + + "TITLE: Noninterleaved sequence data" + NEWLINE + NEWLINE + + "#U455 " + NEWLINE + "ABCFEDHIJ" + NEWLINE + "MNOPQR" + + NEWLINE + NEWLINE + + "#CPZANT " + NEWLINE + "KLMNOPWXYZ" + NEWLINE + "CGATC"; + + // Sequence length 60 (split over two lines) + private static final String NONINTERLEAVED_LONGERTHAN50 = "#SIXTY" + + NEWLINE + THIRTY_CHARS + NEWLINE + THIRTY_CHARS; + + // this one starts noninterleaved then switches to interleaved + private static final String MIXED = "#MEGA" + NEWLINE + + "TITLE: This is a mess" + NEWLINE + NEWLINE + + "#CPZANT KLMNOPWXYZCGATC" + NEWLINE + NEWLINE + "#U455 " + + NEWLINE + "ABCFEDHIJ"; + + // interleaved with a new sequence appearing in the second block :-O + private static final String INTERLEAVED_SEQUENCE_ERROR = "#MEGA" + + NEWLINE + "TITLE: Interleaved sequence data" + NEWLINE + + NEWLINE + "#U455 ABCDEF" + NEWLINE + "#CPZANT MNOPQR" + + NEWLINE + NEWLINE + "#U456 KLMNOP" + NEWLINE; + + // the 'fancy' format, different header format, bases in triplet groups + private static final String FANCY_FORMAT = "#MEGA" + NEWLINE + + "!Title Fancy format data" + NEWLINE + + "!Format DataType=DNA indel=- CodeTable=Standard;" + NEWLINE + + NEWLINE + + "!Description" + NEWLINE + " Line one of description" + NEWLINE + " Line two of description" + + NEWLINE + NEWLINE + + "!Gene=Adh Property=Coding CodonStart=1;" + NEWLINE + + "#U455 ABC DEF" + NEWLINE + "#CPZANT MNO PQR" + NEWLINE + + NEWLINE + "#U455 KLM NOP" + NEWLINE + "#CPZANT WXY Z"; + + /** + * Test paste of interleaved mega format data. + * + * @throws IOException + */ + @Test + public void testParse_interleaved() throws IOException + { + MegaFile testee = new MegaFile(INTERLEAVED, AppletFormatAdapter.PASTE); + assertEquals("Title not as expected", "Interleaved sequence data", + testee.getAlignmentProperty(MegaFile.PROP_TITLE)); + assertEquals("Not identified as simple format", FileFormat.SIMPLE, + testee.getFileFormat()); + Vector seqs = testee.getSeqs(); + // should be 2 sequences + assertEquals("Expected two sequences", 2, seqs.size()); + // check sequence names correct and order preserved + assertEquals("First sequence id wrong", "U455", seqs.get(0).getName()); + assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1) + .getName()); + // check sequence data + assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0) + .getSequenceAsString()); + assertEquals("Second sequence data wrong", "MNOPQRWXYZ", seqs.get(1) + .getSequenceAsString()); + assertTrue("File format is not flagged as interleaved", + testee.isInterleaved()); + } + + /** + * Test paste of noninterleaved mega format data. + * + * @throws IOException + */ + @Test + public void testParse_nonInterleaved() throws IOException + { + MegaFile testee = new MegaFile(NONINTERLEAVED, AppletFormatAdapter.PASTE); + assertEquals("Title not as expected", "Noninterleaved sequence data", + testee.getAlignmentProperty(MegaFile.PROP_TITLE)); + assertEquals("Not identified as simple format", FileFormat.SIMPLE, + testee.getFileFormat()); + Vector seqs = testee.getSeqs(); + // should be 2 sequences + assertEquals("Expected two sequences", 2, seqs.size()); + // check sequence names correct and order preserved + assertEquals("First sequence id wrong", "U455", seqs.get(0).getName()); + assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1) + .getName()); + // check sequence data + assertEquals("First sequence data wrong", "ABCFEDHIJMNOPQR", seqs + .get(0).getSequenceAsString()); + assertEquals("Second sequence data wrong", "KLMNOPWXYZCGATC", seqs.get(1) + .getSequenceAsString()); + assertFalse("File format is not flagged as noninterleaved", + testee.isInterleaved()); + } + + /** + * Test parsing an interleaved file with an extra sequence appearing after the + * first block - should fail. + */ + @Test + public void testParse_interleavedExtraSequenceError() + { + try + { + new MegaFile(INTERLEAVED_SEQUENCE_ERROR, AppletFormatAdapter.PASTE); + fail("Expected extra sequence IOException"); + } catch (IOException e) + { + assertEquals( + "Unexpected exception message", + "Parse error: misplaced new sequence starting at #U456 KLMNOP", + e.getMessage()); + } + } + + /** + * Test a mixed up file. + */ + @Test + public void testParse_mixedInterleavedNonInterleaved() + { + try + { + new MegaFile(MIXED, AppletFormatAdapter.PASTE); + fail("Expected mixed content exception"); + } catch (IOException e) + { + assertEquals( + "Unexpected exception message", + "Parse error: mix of interleaved and noninterleaved detected, at line: ABCFEDHIJ", + e.getMessage()); + } + + } + + @Test + public void testGetSequenceId() + { + assertEquals("AB123", MegaFile.getSequenceId("#AB123 CGATC")); + assertEquals("AB123", MegaFile.getSequenceId("#AB123 CGATC")); + assertEquals("AB123", MegaFile.getSequenceId("#AB123 CGC TAC")); + assertEquals("AB123", MegaFile.getSequenceId("#AB123")); + assertNull(MegaFile.getSequenceId("AB123 CTAG")); + assertNull(MegaFile.getSequenceId("AB123")); + assertNull(MegaFile.getSequenceId("")); + assertNull(MegaFile.getSequenceId(null)); + } + + @Test + public void testGetMaxIdLength() + { + SequenceI[] seqs = new Sequence[2]; + seqs[0] = new Sequence("Something", "GCATAC"); + seqs[1] = new Sequence("SomethingElse", "GCATAC"); + assertEquals(13, MegaFile.getMaxIdLength(seqs)); + seqs[1] = new Sequence("DNA", "GCATAC"); + assertEquals(9, MegaFile.getMaxIdLength(seqs)); + } + + @Test + public void testGetMaxSequenceLength() + { + SequenceI[] seqs = new Sequence[2]; + seqs[0] = new Sequence("Seq1", "GCATAC"); + seqs[1] = new Sequence("Seq2", "GCATACTAG"); + assertEquals(9, MegaFile.getMaxSequenceLength(seqs)); + seqs[1] = new Sequence("Seq2", "GCA"); + assertEquals(6, MegaFile.getMaxSequenceLength(seqs)); + } + + /** + * Test (parse and) print of interleaved mega format data. + * + * @throws IOException + */ + @Test + public void testPrint_interleaved() throws IOException + { + MegaFile testee = new MegaFile(INTERLEAVED, AppletFormatAdapter.PASTE); + String printed = testee.print(); + System.out.println(printed); + // normally output should match input + // we cheated here with a number of short input lines + String expected = "#MEGA" + NEWLINE + + "TITLE: Interleaved sequence data" + NEWLINE + NEWLINE + + "#U455 ABCDEFKLMNOP" + NEWLINE + "#CPZANT MNOPQRWXYZ" + + NEWLINE; + assertEquals("Print format wrong", expected, printed); + } + + /** + * Test (parse and) print of interleaved data with no headers (acceptable). + * + * @throws IOException + */ + @Test + public void testPrint_interleavedNoHeaders() throws IOException + { + MegaFile testee = new MegaFile(INTERLEAVED_NOHEADERS, + AppletFormatAdapter.PASTE); + String printed = testee.print(); + System.out.println(printed); + // normally output should match input + // we cheated here with a number of short input lines + String expected = "#MEGA" + NEWLINE + NEWLINE + + "#U455 ABCDEFKLMNOP" + NEWLINE + "#CPZANT MNOPQRWXYZ" + + NEWLINE; + assertEquals("Print format wrong", expected, printed); + } + + /** + * Test (parse and) print of noninterleaved mega format data. + * + * @throws IOException + */ + @Test + public void testPrint_noninterleaved() throws IOException + { + MegaFile testee = new MegaFile(NONINTERLEAVED, + AppletFormatAdapter.PASTE); + String printed = testee.print(); + System.out.println(printed); + // normally output should match input + // we cheated here with a number of short input lines + String expected = "#MEGA" + NEWLINE + + "TITLE: Noninterleaved sequence data" + NEWLINE + NEWLINE + + "#U455" + NEWLINE + "ABCFEDHIJMNOPQR" + NEWLINE + NEWLINE + + "#CPZANT" + NEWLINE + "KLMNOPWXYZCGATC" + NEWLINE; + assertEquals("Print format wrong", expected, printed); + } + + /** + * Test (parse and) print of interleaved mega format data extending to more + * than one line of output. + * + * @throws IOException + */ + @Test + public void testPrint_interleavedMultiLine() throws IOException + { + MegaFile testee = new MegaFile(INTERLEAVED_LONGERTHAN50, + AppletFormatAdapter.PASTE); + String printed = testee.print(); + System.out.println(printed); + // first sequence is length 60, second length 120 + // should be output as 50 + 10 + 0 and as 50 + 50 + 20 character lines + // respectively + String expected = "#MEGA" + NEWLINE + + "TITLE: Interleaved sequence data" + NEWLINE + NEWLINE + + "#U455 " + THIRTY_CHARS + "01234567890123456789" + NEWLINE + + "#CPZANT " + THIRTY_CHARS + "01234567890123456789" + NEWLINE + + NEWLINE + "#U455 " + "0123456789" + NEWLINE + + "#CPZANT " + THIRTY_CHARS + "01234567890123456789" + NEWLINE + + NEWLINE + "#U455 " + NEWLINE + "#CPZANT " + + "01234567890123456789" + NEWLINE; + assertEquals("Print format wrong", expected, printed); + } + + /** + * Test (parse and) print of noninterleaved mega format data extending to more + * than one line of output. + * + * @throws IOException + */ + @Test + public void testPrint_noninterleavedMultiLine() throws IOException + { + MegaFile testee = new MegaFile(NONINTERLEAVED_LONGERTHAN50, + AppletFormatAdapter.PASTE); + String printed = testee.print(); + System.out.println(printed); + // 60 character sequence should be output as 50 on first line then 10 more + String expected = "#MEGA" + NEWLINE + NEWLINE + + "#SIXTY" + NEWLINE + THIRTY_CHARS + "01234567890123456789" + + NEWLINE + "0123456789" + NEWLINE; + assertEquals("Print format wrong", expected, printed); + } + + /** + * Test paste / parse of 'fancy format' data. + * + * @throws IOException + */ + @Test + public void testParse_fancyFormat() throws IOException + { + MegaFile testee = new MegaFile(FANCY_FORMAT, AppletFormatAdapter.PASTE); + assertEquals("Title not as expected", "Fancy format data", + testee.getAlignmentProperty("Title")); + // TODO handle "Title" and "TITLE" uniformly !?! + assertEquals("Format property not parsed", + "DataType=DNA indel=- CodeTable=Standard;", + testee.getAlignmentProperty(MegaFile.PROP_FORMAT)); + assertEquals("Gene property not parsed", + "Adh Property=Coding CodonStart=1;", + testee.getAlignmentProperty(MegaFile.PROP_GENE)); + assertEquals("Not identified as simple format", FileFormat.FANCY, + testee.getFileFormat()); + Vector seqs = testee.getSeqs(); + // should be 2 sequences + assertEquals("Expected two sequences", 2, seqs.size()); + // check sequence names correct and order preserved + assertEquals("First sequence id wrong", "U455", seqs.get(0).getName()); + assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1) + .getName()); + // check sequence data + assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0) + .getSequenceAsString()); + assertEquals("Second sequence data wrong", "MNOPQRWXYZ", seqs.get(1) + .getSequenceAsString()); + assertTrue("File format is not flagged as interleaved", + testee.isInterleaved()); + + assertEquals("Description property not parsed", + " Line one of description" + NEWLINE + + " Line two of description" + NEWLINE, + testee.getAlignmentProperty(MegaFile.PROP_DESCRIPTION)); + } + + @Test + public void testParsePropertyValue() + { + assertEquals("Description", + MegaFile.parsePropertyValue("Description=Melanogaster")[0]); + assertEquals("Melanogaster", + MegaFile.parsePropertyValue("Description=Melanogaster")[1]); + + assertEquals("Description", + MegaFile.parsePropertyValue("!Description=Melanogaster")[0]); + assertEquals("Melanogaster", + MegaFile.parsePropertyValue("!Description=Melanogaster")[1]); + + assertEquals("Description", + MegaFile.parsePropertyValue("Description: Melanogaster")[0]); + assertEquals("Melanogaster", + MegaFile.parsePropertyValue("Description: Melanogaster")[1]); + + assertEquals("Description", + MegaFile.parsePropertyValue("!Description Melanogaster")[0]); + assertEquals("Melanogaster", + MegaFile.parsePropertyValue("!Description Melanogaster")[1]); + + assertEquals("Description", + MegaFile.parsePropertyValue("Description")[0]); + assertEquals("", MegaFile.parsePropertyValue("Description")[1]); + + assertEquals("Description", + MegaFile.parsePropertyValue("!Description")[0]); + assertEquals("", MegaFile.parsePropertyValue("!Description")[1]); + } +} -- 1.7.10.2