2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.4)
3 * Copyright (C) 2008 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
30 public class IdentifyFile
33 * Identify a datasource's file content.
34 * @note Do not use this method
35 * for stream sources - create a FileParse object instead.
37 * @param file DOCUMENT ME!
38 * @param protocol DOCUMENT ME!
41 public String Identify(String file, String protocol)
43 String emessage = "UNIDENTIFIED FILE PARSING ERROR";
44 FileParse parser = null;
46 parser = new FileParse(file, protocol);
47 if (parser.isValid()) {
48 return Identify(parser);
50 } catch (Exception e) {
51 System.err.println("Error whilst identifying");
52 e.printStackTrace(System.err);
53 emessage = e.getMessage();
56 return parser.errormessage;
59 public String Identify(FileParse source) {
60 return Identify(source, true); // preserves original behaviour prior to version 2.3
63 * Identify contents of source, closing it or resetting source to start afterwards.
66 * @return filetype string
68 public String Identify(FileParse source, boolean closeSource) {
69 String reply = "PFAM";
72 boolean lineswereskipped=false;
73 boolean isBinary = false; // true if length is non-zero and non-printable characters are encountered
79 while ( (data = source.nextLine()) != null)
81 length+=data.length();
82 if (!lineswereskipped)
84 for (int i=0;!isBinary && i<data.length(); i++)
86 char c = data.charAt(i);
87 isBinary = (c<32 && c!='\t' && c!='\n' && c!='\r' && c!=5 && c!=27); // nominal binary character filter excluding CR, LF, tab,DEL and ^E for certain blast ids
92 // jar files are special - since they contain all sorts of random characters.
93 if (source.inFile!=null)
95 String fileStr=source.inFile.getName();
96 // possibly a Jalview archive.
97 if (fileStr.lastIndexOf(".jar")>-1 || fileStr.lastIndexOf(".zip")>-1)
102 if (!lineswereskipped && data.startsWith("PK")) {
103 reply="Jalview"; // archive.
107 data = data.toUpperCase();
109 if ( (data.indexOf("# STOCKHOLM") > -1))
116 if ((data.length() < 1) || (data.indexOf("#") == 0))
118 lineswereskipped=true;
122 if (data.indexOf("PILEUP") > -1)
129 if ( (data.indexOf("//") == 0) ||
130 ( (data.indexOf("!!") > -1) &&
131 (data.indexOf("!!") < data.indexOf(
132 "_MULTIPLE_ALIGNMENT "))))
138 else if (data.indexOf("CLUSTAL") > -1)
144 else if ( (data.indexOf(">P1;") > -1) ||
145 (data.indexOf(">DL;") > -1))
151 else if (data.indexOf(">") > -1)
153 // could be BLC file, read next line to confirm
154 data = source.nextLine();
156 if (data.indexOf(">") > -1)
162 //Is this a single line BLC file?
164 String data2 = source.nextLine();
166 && data.indexOf("*") > -1
167 && data.indexOf("*") == data2.indexOf("*"))
174 // TODO : AMSA File is indicated if there is annotation in the FASTA file - but FASTA will automatically generate this at the mo.
179 else if (data.indexOf("HEADER") == 0 ||
180 data.indexOf("ATOM") == 0)
185 else if (!lineswereskipped
186 && data.charAt(0)!='*'
187 && data.charAt(0)!=' '
188 && data.indexOf(":") < data.indexOf(",")) // && data.indexOf(",")<data.indexOf(",", data.indexOf(",")))
190 // file looks like a concise JNet file
195 lineswereskipped=true; // this means there was some junk before any key file signature
200 source.reset(); // so the file can be parsed from the beginning again.
205 System.err.println("File Identification failed!\n" + ex);
206 return source.errormessage;
210 System.err.println("File Identification failed! - Empty file was read.");
211 return "EMPTY DATA FILE";
215 public static void main(String[] args) {
216 for (int i=0; args!=null && i<args.length; i++)
218 IdentifyFile ider = new IdentifyFile();
219 String type = ider.Identify(args[i], AppletFormatAdapter.FILE);
220 System.out.println("Type of "+args[i]+" is "+type);
222 if (args==null || args.length==0)
224 System.err.println("Usage: <Filename> [<Filename> ...]");