2 * Jalview - A Sequence Alignment Editor and Viewer
3 * Copyright (C) 2007 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
30 public class IdentifyFile
33 * Identify a datasource's file content.
34 * @note Do not use this method
35 * for stream sources - create a FileParse object instead.
37 * @param file DOCUMENT ME!
38 * @param protocol DOCUMENT ME!
41 public String Identify(String file, String protocol)
43 FileParse parser = null;
45 parser = new FileParse(file, protocol);
46 if (parser.isValid()) {
47 return Identify(parser);
49 } catch (Exception e) {
50 System.err.println("Error whilst identifying");
51 e.printStackTrace(System.err);
54 return parser.errormessage;
55 return "UNIDENTIFIED FILE PARSING ERROR";
57 public String Identify(FileParse source) {
58 return Identify(source, true); // preserves original behaviour prior to version 2.3
61 * Identify contents of source, closing it or resetting source to start afterwards.
64 * @return filetype string
66 public String Identify(FileParse source, boolean closeSource) {
67 String reply = "PFAM";
70 boolean lineswereskipped=false;
71 boolean isBinary = false; // true if length is non-zero and non-printable characters are encountered
73 while ( (data = source.nextLine()) != null)
75 length+=data.length();
76 if (!lineswereskipped)
78 for (int i=0;!isBinary && i<data.length(); i++)
80 char c = data.charAt(i);
81 isBinary = (c<32 && c!='\t' && c!='\n' && c!='\r' && c!=5 && c!=27); // nominal binary character filter excluding CR, LF, tab,DEL and ^E for certain blast ids
86 // jar files are special - since they contain all sorts of random characters.
87 if (source.inFile!=null)
89 String fileStr=source.inFile.getName();
90 // possibly a Jalview archive.
91 if (fileStr.lastIndexOf(".jar")>-1 || fileStr.lastIndexOf(".zip")>-1)
96 if (!lineswereskipped && data.startsWith("PK")) {
97 reply="Jalview"; // archive.
101 data = data.toUpperCase();
103 if ( (data.indexOf("# STOCKHOLM") > -1))
110 if ((data.length() < 1) || (data.indexOf("#") == 0))
112 lineswereskipped=true;
116 if (data.indexOf("PILEUP") > -1)
123 if ( (data.indexOf("//") == 0) ||
124 ( (data.indexOf("!!") > -1) &&
125 (data.indexOf("!!") < data.indexOf(
126 "_MULTIPLE_ALIGNMENT "))))
132 else if (data.indexOf("CLUSTAL") > -1)
138 else if ( (data.indexOf(">P1;") > -1) ||
139 (data.indexOf(">DL;") > -1))
145 else if (data.indexOf(">") > -1)
147 // could be BLC file, read next line to confirm
148 data = source.nextLine();
150 if (data.indexOf(">") > -1)
156 //Is this a single line BLC file?
158 String data2 = source.nextLine();
160 && data.indexOf("*") > -1
161 && data.indexOf("*") == data2.indexOf("*"))
168 // TODO : AMSA File is indicated if there is annotation in the FASTA file - but FASTA will automatically generate this at the mo.
173 else if (data.indexOf("HEADER") == 0 ||
174 data.indexOf("ATOM") == 0)
179 else if (!lineswereskipped
180 && data.charAt(0)!='*'
181 && data.charAt(0)!=' '
182 && data.indexOf(":") < data.indexOf(",")) // && data.indexOf(",")<data.indexOf(",", data.indexOf(",")))
184 // file looks like a concise JNet file
189 lineswereskipped=true; // this means there was some junk before any key file signature
194 source.reset(); // so the file can be parsed from the beginning again.
199 System.err.println("File Identification failed!\n" + ex);
200 return source.errormessage;
204 System.err.println("File Identification failed! - Empty file was read.");
205 return "EMPTY DATA FILE";
209 public static void main(String[] args) {
210 for (int i=0; args!=null && i<args.length; i++)
212 IdentifyFile ider = new IdentifyFile();
213 String type = ider.Identify(args[i], AppletFormatAdapter.FILE);
214 System.out.println("Type of "+args[i]+" is "+type);
216 if (args==null || args.length==0)
218 System.err.println("Usage: <Filename> [<Filename> ...]");