X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FIdentifyFile.java;h=731304307931bb6184db693e870b3006ab215c27;hb=a45774ee31d9f35d4eff46d54d7deab719afb092;hp=7f3db40720fda78e4b677235514f8715ed01c61e;hpb=e8b5b1e551794bf54de00c610363ffd140c62e55;p=jalview.git diff --git a/src/jalview/io/IdentifyFile.java b/src/jalview/io/IdentifyFile.java index 7f3db40..7313043 100755 --- a/src/jalview/io/IdentifyFile.java +++ b/src/jalview/io/IdentifyFile.java @@ -1,20 +1,19 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer - * Copyright (C) 2007 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7) + * Copyright (C) 2011 J Procter, AM Waterhouse, G Barton, M Clamp, S Searle + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with Jalview. If not, see . */ package jalview.io; @@ -23,63 +22,120 @@ import java.net.*; /** * DOCUMENT ME! - * + * * @author $author$ * @version $Revision$ */ public class IdentifyFile { /** - * Identify a datasource's file content. - * @note Do not use this method - * for stream sources - create a FileParse object instead. - * - * @param file DOCUMENT ME! - * @param protocol DOCUMENT ME! + * Identify a datasource's file content. + * + * @note Do not use this method for stream sources - create a FileParse object + * instead. + * + * @param file + * DOCUMENT ME! + * @param protocol + * DOCUMENT ME! * @return ID String */ public String Identify(String file, String protocol) { + String emessage = "UNIDENTIFIED FILE PARSING ERROR"; FileParse parser = null; - try { + try + { parser = new FileParse(file, protocol); - if (parser.isValid()) { + if (parser.isValid()) + { return Identify(parser); } - } catch (Exception e) { + } catch (Exception e) + { System.err.println("Error whilst identifying"); e.printStackTrace(System.err); + emessage = e.getMessage(); } - if (parser!=null) + if (parser != null) return parser.errormessage; - return "UNIDENTIFIED FILE PARSING ERROR"; + return emessage; } - public String Identify(FileParse source) { - return Identify(source, true); // preserves original behaviour prior to version 2.3 + + public String Identify(FileParse source) + { + return Identify(source, true); // preserves original behaviour prior to + // version 2.3 } + /** - * Identify contents of source, closing it or resetting source to start afterwards. + * Identify contents of source, closing it or resetting source to start + * afterwards. + * * @param source * @param closeSource * @return filetype string */ - public String Identify(FileParse source, boolean closeSource) { + public String Identify(FileParse source, boolean closeSource) + { String reply = "PFAM"; String data; - try { - while ( (data = source.nextLine()) != null) + int length = 0; + boolean lineswereskipped = false; + boolean isBinary = false; // true if length is non-zero and non-printable + // characters are encountered + try + { + if (!closeSource) { + source.mark(); + } + while ((data = source.nextLine()) != null) + { + length += data.length(); + if (!lineswereskipped) + { + for (int i = 0; !isBinary && i < data.length(); i++) + { + char c = data.charAt(i); + isBinary = (c < 32 && c != '\t' && c != '\n' && c != '\r' + && c != 5 && c != 27); // nominal binary character filter + // excluding CR, LF, tab,DEL and ^E + // for certain blast ids + } + } + if (isBinary) + { + // jar files are special - since they contain all sorts of random + // characters. + if (source.inFile != null) + { + String fileStr = source.inFile.getName(); + // possibly a Jalview archive. + if (fileStr.lastIndexOf(".jar") > -1 + || fileStr.lastIndexOf(".zip") > -1) + { + reply = "Jalview"; + } + } + if (!lineswereskipped && data.startsWith("PK")) + { + reply = "Jalview"; // archive. + break; + } + } data = data.toUpperCase(); - if ( (data.indexOf("# STOCKHOLM") > -1)) + if ((data.indexOf("# STOCKHOLM") > -1)) { reply = "STH"; break; } - if ( (data.indexOf("#") == 0) || (data.length() < 1)) + if ((data.length() < 1) || (data.indexOf("#") == 0)) { + lineswereskipped = true; continue; } @@ -90,10 +146,9 @@ public class IdentifyFile break; } - if ( (data.indexOf("//") == 0) || - ( (data.indexOf("!!") > -1) && - (data.indexOf("!!") < data.indexOf( - "_MULTIPLE_ALIGNMENT ")))) + if ((data.indexOf("//") == 0) + || ((data.indexOf("!!") > -1) && (data.indexOf("!!") < data + .indexOf("_MULTIPLE_ALIGNMENT ")))) { reply = "MSF"; @@ -105,16 +160,17 @@ public class IdentifyFile break; } - else if ( (data.indexOf(">P1;") > -1) || - (data.indexOf(">DL;") > -1)) - { - reply = "PIR"; - - break; - } else if (data.indexOf(">") > -1) { - // could be BLC file, read next line to confirm + // FASTA, PIR file or BLC file + boolean checkPIR = false,starterm=false; + if ((data.indexOf(">P1;") > -1) || (data.indexOf(">DL;") > -1)) + { + // watch for PIR file attributes + checkPIR = true; + reply = "PIR"; + } + // could also be BLC file, read next line to confirm data = source.nextLine(); if (data.indexOf(">") > -1) @@ -123,60 +179,122 @@ public class IdentifyFile } else { - //Is this a single line BLC file? - source.nextLine(); + // Is this a single line BLC file? + String data1 = source.nextLine(); String data2 = source.nextLine(); - if (data2 != null - && data.indexOf("*") > -1 - && data.indexOf("*") == data2.indexOf("*")) + if (checkPIR) { - reply = "BLC"; + starterm = (data1!=null && data1.indexOf("*")>-1) || (data2!=null && data2.indexOf("*")>-1); + } + if (data2 != null && data.indexOf("*") > -1) + { + if (data.indexOf("*") == data2.indexOf("*")) + { + reply = "BLC"; + } + // otherwise can still possibly be a PIR file } else { reply = "FASTA"; + // TODO : AMSA File is indicated if there is annotation in the + // FASTA file - but FASTA will automatically generate this at the + // mo. + if (!checkPIR) { + break; + } } } + // final check for PIR content. require + // >P1;title\n\nterminated sequence to occur at least once. + // TODO the PIR/fasta ambiguity may be the use case that is needed to have + // a 'Parse as type XXX' parameter for the applet/application. + if (checkPIR) + { + String dta=null; + if (!starterm) + { + do { + try { + dta = source.nextLine(); + } catch(IOException ex) {}; + if (dta!=null && dta.indexOf("*")>-1) + { + starterm = true; + } + } while (dta!=null && !starterm); + } + if (starterm) + { + reply="PIR"; + break; + } else { + reply="FASTA"; // probably a bad choice! + } + } + // read as a FASTA (probably) break; } - else if (data.indexOf("HEADER") == 0 || - data.indexOf("ATOM") == 0) + else if (data.indexOf("HEADER") == 0 || data.indexOf("ATOM") == 0) { reply = "PDB"; break; } - else if (data.indexOf(":") < data.indexOf(",")) // && data.indexOf(",")-1 || fileStr.lastIndexOf(".zip")>-1) - { - reply = "Jalview"; - } - } else if (data.startsWith("PK")) { - reply="Jalview"; // archive. - break; - } + + lineswereskipped = true; // this means there was some junk before any + // key file signature } - if (closeSource) { + if (closeSource) + { source.close(); - } else { + } + else + { source.reset(); // so the file can be parsed from the beginning again. } - } - catch (Exception ex) + } catch (Exception ex) { System.err.println("File Identification failed!\n" + ex); return source.errormessage; } - + if (length == 0) + { + System.err + .println("File Identification failed! - Empty file was read."); + return "EMPTY DATA FILE"; + } return reply; } + + public static void main(String[] args) + { + for (int i = 0; args != null && i < args.length; i++) + { + IdentifyFile ider = new IdentifyFile(); + String type = ider.Identify(args[i], AppletFormatAdapter.FILE); + System.out.println("Type of " + args[i] + " is " + type); + } + if (args == null || args.length == 0) + { + System.err.println("Usage: [ ...]"); + } + } }