- /*\r
- * Jalview - A Sequence Alignment Editor and Viewer\r
- * Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle\r
- *\r
- * This program is free software; you can redistribute it and/or\r
- * modify it under the terms of the GNU General Public License\r
- * as published by the Free Software Foundation; either version 2\r
- * of the License, or (at your option) any later version.\r
- *\r
- * This program is distributed in the hope that it will be useful,\r
- * but WITHOUT ANY WARRANTY; without even the implied warranty of\r
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r
- * GNU General Public License for more details.\r
- *\r
- * You should have received a copy of the GNU General Public License\r
- * along with this program; if not, write to the Free Software\r
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA\r
- */\r
- package jalview.io;\r
-\r
- import jalview.datamodel.*;\r
-\r
- import java.util.Vector;\r
-\r
-\r
- /**\r
- * DOCUMENT ME!\r
- *\r
- * @author $author$\r
- * @version $Revision$\r
- */\r
- public class AppletFormatAdapter\r
- {\r
- /** DOCUMENT ME!! */\r
- public static final Vector formats = new Vector();\r
-\r
- public static String INVALID_CHARACTERS = "Contains invalid characters";\r
-\r
- public static String SUPPORTED_FORMATS = "Formats currently supported are\n" +\r
- "Fasta, MSF, Clustal, BLC, PIR, MSP, and PFAM";\r
-\r
- static\r
- {\r
- formats.addElement("BLC");\r
- formats.addElement("CLUSTAL");\r
- formats.addElement("FASTA");\r
- formats.addElement("MSF");\r
- formats.addElement("PileUp");\r
- formats.addElement("PIR");\r
- formats.addElement("PFAM");\r
- }\r
-\r
- AlignFile afile = null;\r
-\r
- /**\r
- * DOCUMENT ME!\r
- *\r
- * @param inFile DOCUMENT ME!\r
- * @param type DOCUMENT ME!\r
- * @param format DOCUMENT ME!\r
- *\r
- * @return DOCUMENT ME!\r
- */\r
- public SequenceI[] readFile(String inFile, String type, String format)\r
- throws java.io.IOException\r
- {\r
- try\r
- {\r
- if (format.equals("FASTA"))\r
- {\r
- afile = new FastaFile(inFile, type);\r
- }\r
- else if (format.equals("MSF"))\r
- {\r
- afile = new MSFfile(inFile, type);\r
- }\r
- else if (format.equals("PileUp"))\r
- {\r
- afile = new PileUpfile(inFile, type);\r
- }\r
- else if (format.equals("CLUSTAL"))\r
- {\r
- afile = new ClustalFile(inFile, type);\r
- }\r
- else if (format.equals("BLC"))\r
- {\r
- afile = new BLCFile(inFile, type);\r
- }\r
- else if (format.equals("PIR"))\r
- {\r
- afile = new PIRFile(inFile, type);\r
- }\r
- else if (format.equals("PFAM"))\r
- {\r
- afile = new PfamFile(inFile, type);\r
- }\r
-\r
- return afile.getSeqsAsArray();\r
- }\r
- catch (Exception e)\r
- {\r
- System.err.println("Failed to read alignment using the '" + format +\r
- "' reader.\n"+e);\r
-\r
- if(e.getMessage()!=null && e.getMessage().startsWith(INVALID_CHARACTERS))\r
- throw new java.io.IOException(e.getMessage());\r
-\r
- // Finally test if the user has pasted just the sequence, no id\r
- if(type.equalsIgnoreCase("Paste"))\r
- {\r
- try{\r
- // Possible sequence is just residues with no label\r
- afile = new FastaFile(">UNKNOWN\n" + inFile, "Paste");\r
- return afile.getSeqsAsArray();\r
- }\r
- catch(Exception ex)\r
- {\r
- if(ex.toString().startsWith(INVALID_CHARACTERS))\r
- throw new java.io.IOException(e.getMessage());\r
-\r
- ex.printStackTrace();\r
- }\r
- }\r
-\r
- // If we get to this stage, the format was not supported\r
- throw new java.io.IOException(SUPPORTED_FORMATS);\r
- }\r
- }\r
-\r
-\r
- /**\r
- * DOCUMENT ME!\r
- *\r
- * @param format DOCUMENT ME!\r
- * @param seqs DOCUMENT ME!\r
- *\r
- * @return DOCUMENT ME!\r
- */\r
- public String formatSequences(String format,\r
- Vector seqs,\r
- boolean jvsuffix)\r
- {\r
- SequenceI[] s = new SequenceI[seqs.size()];\r
-\r
- for (int i = 0; i < seqs.size(); i++)\r
- s[i] = (SequenceI) seqs.elementAt(i);\r
-\r
- try\r
- {\r
- AlignFile afile = null;\r
-\r
- if (format.equalsIgnoreCase("FASTA"))\r
- {\r
- afile = new FastaFile();\r
- }\r
- else if (format.equalsIgnoreCase("MSF"))\r
- {\r
- afile = new MSFfile();\r
- }\r
- else if (format.equalsIgnoreCase("PileUp"))\r
- {\r
- afile = new PileUpfile();\r
- }\r
- else if (format.equalsIgnoreCase("CLUSTAL"))\r
- {\r
- afile = new ClustalFile();\r
- }\r
- else if (format.equalsIgnoreCase("BLC"))\r
- {\r
- afile = new BLCFile();\r
- }\r
- else if (format.equalsIgnoreCase("PIR"))\r
- {\r
- afile = new PIRFile();\r
- }\r
- else if (format.equalsIgnoreCase("PFAM"))\r
- {\r
- afile = new PfamFile();\r
- }\r
-\r
- afile.addJVSuffix(jvsuffix);\r
-\r
- afile.setSeqs(s);\r
-\r
- return afile.print();\r
- }\r
- catch (Exception e)\r
- {\r
- System.err.println("Failed to write alignment as a '" + format +\r
- "' file\n");\r
- e.printStackTrace();\r
- }\r
-\r
- return null;\r
- }\r
- }\r
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer
+ * Copyright (C) 2007 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+package jalview.io;
+
+import java.io.File;
+
+import jalview.datamodel.*;
+
+/**
+ * A low level class for alignment and feature IO
+ * with alignment formatting methods used by both applet
+ * and application for generating flat alignment files.
+ * It also holds the lists of magic format names
+ * that the applet and application will allow the user to read or write files with.
+ *
+ * @author $author$
+ * @version $Revision$
+ */
+public class AppletFormatAdapter
+{
+ /**
+ * List of valid format strings used in the isValidFormat method
+ */
+ public static final String[] READABLE_FORMATS = new String[]
+ {
+ "BLC", "CLUSTAL", "FASTA", "MSF", "PileUp", "PIR", "PFAM", "STH", "PDB", "JnetFile"
+ };
+ /**
+ * List of valid format strings for use by callers of the formatSequences method
+ */
+ public static final String[] WRITEABLE_FORMATS = new String[]
+ {
+ "BLC", "CLUSTAL", "FASTA", "MSF", "PileUp", "PIR", "PFAM" , "AMSA"
+ };
+ /**
+ * List of extensions corresponding to file format types
+ * in WRITABLE_FNAMES that are writable by the
+ * application.
+ */
+ public static final String[] WRITABLE_EXTENSIONS = new String[]
+ { "fa, fasta, fastq", "aln", "pfam", "msf", "pir", "blc","amsa","jar" };
+ /**
+ * List of writable formats by the application. Order must
+ * correspond with the WRITABLE_EXTENSIONS list of formats.
+ */
+ public static final String[] WRITABLE_FNAMES = new String[]
+ { "Fasta", "Clustal", "PFAM", "MSF", "PIR", "BLC", "AMSA", "Jalview" };
+
+ /**
+ * List of readable format file extensions by application in order
+ * corresponding to READABLE_FNAMES
+ */
+ public static final String[] READABLE_EXTENSIONS = new String[]
+ {
+ "fa, fasta, fastq", "aln", "pfam", "msf", "pir", "blc",
+ "amsa","jar"
+ };
+ /**
+ * List of readable formats by application in order
+ * corresponding to READABLE_EXTENSIONS
+ */
+ public static final String[] READABLE_FNAMES = new String[]
+ {
+ "Fasta", "Clustal", "PFAM", "MSF", "PIR", "BLC", "AMSA","Jalview"
+ };
+
+ public static String INVALID_CHARACTERS = "Contains invalid characters";
+ // TODO: make these messages dynamic
+ public static String SUPPORTED_FORMATS = "Formats currently supported are\n" +
+ prettyPrint(READABLE_FORMATS);
+ /**
+ *
+ * @param els
+ * @return grammatically correct(ish) list consisting of els elements.
+ */
+ public static String prettyPrint(String[] els) {
+ StringBuffer list = new StringBuffer();
+ for (int i=0,iSize=els.length-1; i<iSize;i++)
+ {
+ list.append(els[i]);
+ list.append(",");
+ }
+ list.append(" and "+els[els.length-1]+".");
+ return list.toString();
+ }
+ public static String FILE = "File";
+ public static String URL = "URL";
+ public static String PASTE = "Paste";
+ public static String CLASSLOADER = "ClassLoader";
+
+ AlignFile afile = null;
+ String inFile;
+ /**
+ * check that this format is valid for reading
+ * @param format a format string to be compared with READABLE_FORMATS
+ * @return true if format is readable
+ */
+ public static final boolean isValidFormat(String format)
+ {
+ boolean valid = false;
+ for (int i = 0; i < READABLE_FORMATS.length; i++)
+ {
+ if (READABLE_FORMATS[i].equalsIgnoreCase(format))
+ {
+ return true;
+ }
+ }
+
+ return valid;
+ }
+
+ /**
+ * Constructs the correct filetype parser for a characterised datasource
+ *
+ * @param inFile data/data location
+ * @param type type of datasource
+ * @param format File format of data provided by datasource
+ *
+ * @return DOCUMENT ME!
+ */
+ public Alignment readFile(String inFile, String type, String format)
+ throws java.io.IOException
+ {
+ this.inFile = inFile;
+ try
+ {
+ if (format.equals("FASTA"))
+ {
+ afile = new FastaFile(inFile, type);
+ }
+ else if (format.equals("MSF"))
+ {
+ afile = new MSFfile(inFile, type);
+ }
+ else if (format.equals("PileUp"))
+ {
+ afile = new PileUpfile(inFile, type);
+ }
+ else if (format.equals("CLUSTAL"))
+ {
+ afile = new ClustalFile(inFile, type);
+ }
+ else if (format.equals("BLC"))
+ {
+ afile = new BLCFile(inFile, type);
+ }
+ else if (format.equals("PIR"))
+ {
+ afile = new PIRFile(inFile, type);
+ }
+ else if (format.equals("PFAM"))
+ {
+ afile = new PfamFile(inFile, type);
+ }
+ else if (format.equals("JnetFile"))
+ {
+ afile = new JPredFile(inFile, type);
+ ( (JPredFile) afile).removeNonSequences();
+ }
+ else if (format.equals("PDB"))
+ {
+ afile = new MCview.PDBfile(inFile, type);
+ }
+ else if (format.equals("STH"))
+ {
+ afile = new StockholmFile(inFile, type);
+ }
+
+ Alignment al = new Alignment(afile.getSeqsAsArray());
+
+ afile.addAnnotations(al);
+
+ return al;
+ }
+ catch (Exception e)
+ {
+ e.printStackTrace();
+ System.err.println("Failed to read alignment using the '" + format +
+ "' reader.\n" + e);
+
+ if (e.getMessage() != null &&
+ e.getMessage().startsWith(INVALID_CHARACTERS))
+ {
+ throw new java.io.IOException(e.getMessage());
+ }
+
+ // Finally test if the user has pasted just the sequence, no id
+ if (type.equalsIgnoreCase("Paste"))
+ {
+ try
+ {
+ // Possible sequence is just residues with no label
+ afile = new FastaFile(">UNKNOWN\n" + inFile, "Paste");
+ Alignment al = new Alignment(afile.getSeqsAsArray());
+ afile.addAnnotations(al);
+ return al;
+
+ }
+ catch (Exception ex)
+ {
+ if (ex.toString().startsWith(INVALID_CHARACTERS))
+ {
+ throw new java.io.IOException(e.getMessage());
+ }
+
+ ex.printStackTrace();
+ }
+ }
+
+ // If we get to this stage, the format was not supported
+ throw new java.io.IOException(SUPPORTED_FORMATS);
+ }
+ }
+
+ /**
+ * Construct an output class for an alignment in a particular filetype
+ *
+ * @param format string name of alignment format
+ * @param alignment the alignment to be written out
+ * @param jvsuffix passed to AlnFile class controls whether /START-END is added to sequence names
+ *
+ * @return alignment flat file contents
+ */
+ public String formatSequences(String format,
+ AlignmentI alignment,
+ boolean jvsuffix)
+ {
+ try
+ {
+ AlignFile afile = null;
+
+ if (format.equalsIgnoreCase("FASTA"))
+ {
+ afile = new FastaFile();
+ }
+ else if (format.equalsIgnoreCase("MSF"))
+ {
+ afile = new MSFfile();
+ }
+ else if (format.equalsIgnoreCase("PileUp"))
+ {
+ afile = new PileUpfile();
+ }
+ else if (format.equalsIgnoreCase("CLUSTAL"))
+ {
+ afile = new ClustalFile();
+ }
+ else if (format.equalsIgnoreCase("BLC"))
+ {
+ afile = new BLCFile();
+ }
+ else if (format.equalsIgnoreCase("PIR"))
+ {
+ afile = new PIRFile();
+ }
+ else if (format.equalsIgnoreCase("PFAM"))
+ {
+ afile = new PfamFile();
+ }
+ else if (format.equalsIgnoreCase("STH"))
+ {
+ afile = new StockholmFile();
+ }
+ else if (format.equalsIgnoreCase("AMSA"))
+ {
+ afile = new AMSAFile(alignment);
+ } else {
+ throw new Exception("Implementation error: Unknown file format string");
+ }
+
+ afile.addJVSuffix(jvsuffix);
+
+ afile.setSeqs(alignment.getSequencesArray());
+
+ return afile.print();
+ }
+ catch (Exception e)
+ {
+ System.err.println("Failed to write alignment as a '" + format +
+ "' file\n");
+ e.printStackTrace();
+ }
+
+ return null;
+ }
+ public static void main(String[] args)
+ {
+ int i=0;
+ while (i<args.length)
+ {
+ File f = new File(args[i]);
+ if (f.exists())
+ {
+ try {
+ AppletFormatAdapter afa = new AppletFormatAdapter();
+ Runtime r = Runtime.getRuntime();
+ System.gc();
+ long memf = -r.totalMemory()+r.freeMemory();
+ long t1 = -System.currentTimeMillis();
+ Alignment al = afa.readFile(args[i], FILE, new IdentifyFile().Identify(args[i], FILE));
+ t1 +=System.currentTimeMillis();
+ System.gc();
+ memf += r.totalMemory()-r.freeMemory();
+ System.out.println("Read took "+(t1/1000.0)+" seconds.");
+ System.out.println("Difference between free memory now and before is "+(memf/(1024.0*1024.0)*1.0)+" MB");
+
+ } catch (Exception e)
+ {
+ System.err.println("Exception when dealing with "+i+"'th argument: "+args[i]+"\n"+e);
+ }
+ } else {
+ System.err.println("Ignoring argument '"+args[i]+"' ("+i+"'th)- not a readable file.");
+ }
+ i++;
+ }
+
+ }
+}