X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=src%2Fjalview%2Fio%2FIdentifyFile.java;h=889359f01351afc32e5549a40ec8ca38bd769199;hb=9e96b685cef70e6ed165ba8f643fefd2996d688d;hp=2679b484dc27cea46a3261a743982c84814a2db5;hpb=67b813a6274ce28d9c56a0fceb4d755144d3e9cf;p=jalview.git diff --git a/src/jalview/io/IdentifyFile.java b/src/jalview/io/IdentifyFile.java index 2679b48..889359f 100755 --- a/src/jalview/io/IdentifyFile.java +++ b/src/jalview/io/IdentifyFile.java @@ -1,47 +1,50 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer (Development Version 2.4.1) - * Copyright (C) 2009 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle - * - * This program is free software; you can redistribute it and/or + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 + * as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.io; -import java.io.*; -import java.net.*; +import java.io.IOException; /** * DOCUMENT ME! - * + * * @author $author$ * @version $Revision$ */ public class IdentifyFile { + public static final String FeaturesFile = "GFF or Jalview features"; + /** * Identify a datasource's file content. - * + * * @note Do not use this method for stream sources - create a FileParse object * instead. - * + * * @param file - * DOCUMENT ME! + * DOCUMENT ME! * @param protocol - * DOCUMENT ME! + * DOCUMENT ME! * @return ID String */ - public String Identify(String file, String protocol) + public String identify(String file, String protocol) { String emessage = "UNIDENTIFIED FILE PARSING ERROR"; FileParse parser = null; @@ -50,7 +53,7 @@ public class IdentifyFile parser = new FileParse(file, protocol); if (parser.isValid()) { - return Identify(parser); + return identify(parser); } } catch (Exception e) { @@ -59,32 +62,35 @@ public class IdentifyFile emessage = e.getMessage(); } if (parser != null) + { return parser.errormessage; + } return emessage; } - public String Identify(FileParse source) + public String identify(FileParse source) { - return Identify(source, true); // preserves original behaviour prior to - // version 2.3 + return identify(source, true); // preserves original behaviour prior to + // version 2.3 } /** * Identify contents of source, closing it or resetting source to start * afterwards. - * + * * @param source * @param closeSource * @return filetype string */ - public String Identify(FileParse source, boolean closeSource) + public String identify(FileParse source, boolean closeSource) { String reply = "PFAM"; String data; - int length = 0; + int bytesRead = 0; + int trimmedLength = 0; boolean lineswereskipped = false; boolean isBinary = false; // true if length is non-zero and non-printable - // characters are encountered + // characters are encountered try { if (!closeSource) @@ -93,7 +99,8 @@ public class IdentifyFile } while ((data = source.nextLine()) != null) { - length += data.length(); + bytesRead += data.length(); + trimmedLength += data.trim().length(); if (!lineswereskipped) { for (int i = 0; !isBinary && i < data.length(); i++) @@ -101,8 +108,8 @@ public class IdentifyFile char c = data.charAt(i); isBinary = (c < 32 && c != '\t' && c != '\n' && c != '\r' && c != 5 && c != 27); // nominal binary character filter - // excluding CR, LF, tab,DEL and ^E - // for certain blast ids + // excluding CR, LF, tab,DEL and ^E + // for certain blast ids } } if (isBinary) @@ -127,44 +134,34 @@ public class IdentifyFile } data = data.toUpperCase(); - if ((data.indexOf("# STOCKHOLM") > -1)) + if (data.startsWith("##GFF-VERSION")) { - reply = "STH"; - + // GFF - possibly embedded in a Jalview features file! + reply = FeaturesFile; break; } - - if ((data.length() < 1) || (data.indexOf("#") == 0)) + if (looksLikeFeatureData(data)) { - lineswereskipped = true; - continue; - } - - if (data.indexOf("PILEUP") > -1) - { - reply = "PileUp"; - + reply = FeaturesFile; break; } - - if ((data.indexOf("//") == 0) - || ((data.indexOf("!!") > -1) && (data.indexOf("!!") < data - .indexOf("_MULTIPLE_ALIGNMENT ")))) + if (data.indexOf("# STOCKHOLM") > -1) { - reply = "MSF"; - + reply = "STH"; break; } - else if (data.indexOf("CLUSTAL") > -1) + if (data.indexOf("_ENTRY.ID") > -1 + || data.indexOf("_AUDIT_AUTHOR.NAME") > -1 + || data.indexOf("_ATOM_SITE.") > -1) { - reply = "CLUSTAL"; - + reply = "mmCIF"; break; } - else if (data.indexOf(">") > -1) + // if (data.indexOf(">") > -1) + if (data.startsWith(">")) { // FASTA, PIR file or BLC file - boolean checkPIR = false; + boolean checkPIR = false, starterm = false; if ((data.indexOf(">P1;") > -1) || (data.indexOf(">DL;") > -1)) { // watch for PIR file attributes @@ -181,13 +178,24 @@ public class IdentifyFile else { // Is this a single line BLC file? - source.nextLine(); + String data1 = source.nextLine(); String data2 = source.nextLine(); - if (data2 != null && data.indexOf("*") > -1) + int c1; + if (checkPIR) + { + starterm = (data1 != null && data1.indexOf("*") > -1) + || (data2 != null && data2.indexOf("*") > -1); + } + if (data2 != null && (c1 = data.indexOf("*")) > -1) { - if (data.indexOf("*") == data2.indexOf("*")) + if (c1 == 0 && c1 == data2.indexOf("*")) { - reply = "BLC"; + reply = "BLC"; + } + else + { + reply = "FASTA"; // possibly a bad choice - may be recognised as + // PIR } // otherwise can still possibly be a PIR file } @@ -197,41 +205,121 @@ public class IdentifyFile // TODO : AMSA File is indicated if there is annotation in the // FASTA file - but FASTA will automatically generate this at the // mo. + if (!checkPIR) + { + break; + } + } + } + // final check for PIR content. require + // >P1;title\n\nterminated sequence to occur at least once. + + // TODO the PIR/fasta ambiguity may be the use case that is needed to + // have + // a 'Parse as type XXX' parameter for the applet/application. + if (checkPIR) + { + String dta = null; + if (!starterm) + { + do + { + try + { + dta = source.nextLine(); + } catch (IOException ex) + { + } + if (dta != null && dta.indexOf("*") > -1) + { + starterm = true; + } + } while (dta != null && !starterm); + } + if (starterm) + { + reply = "PIR"; break; } + else + { + reply = "FASTA"; // probably a bad choice! + } } - // TODO final check for PIR content. require >P1;title\n\nterminated sequence to occur at least once. - // the PIR/fasta ambiguity may be the use case that is needed to have a 'Parse as type XXX' parameter for the applet/application. + // read as a FASTA (probably) break; } + int lessThan = data.indexOf("<"); + if ((lessThan > -1)) // possible Markup Language data i.e HTML, + // RNAML, XML + { + String upper = data.toUpperCase(); + if (upper.substring(lessThan).startsWith(" -1) + { + reply = JSONFile.FILE_DESC; + break; + } + if ((data.length() < 1) || (data.indexOf("#") == 0)) + { + lineswereskipped = true; + continue; + } + + if (data.indexOf("PILEUP") > -1) + { + reply = "PileUp"; + + break; + } + + if ((data.indexOf("//") == 0) + || ((data.indexOf("!!") > -1) && (data.indexOf("!!") < data + .indexOf("_MULTIPLE_ALIGNMENT ")))) + { + reply = "MSF"; + + break; + } + else if (data.indexOf("CLUSTAL") > -1) + { + reply = "CLUSTAL"; + + break; + } + else if (data.indexOf("HEADER") == 0 || data.indexOf("ATOM") == 0) { reply = "PDB"; break; } - /*// TODO comment out SimpleBLAST identification for Jalview 2.4.1 - else if (!lineswereskipped && data.indexOf("BLAST")<4) + else if (data.matches("\\s*\\d+\\s+\\d+\\s*")) { - reply = "SimpleBLAST"; + reply = PhylipFile.FILE_DESC; break; - } - // end comments for Jalview 2.4.1 - * - */ - else if (!lineswereskipped && data.charAt(0) != '*' - && data.charAt(0) != ' ' - && data.indexOf(":") < data.indexOf(",")) // && - // data.indexOf(",") -1 + && commaPos > -1 && colonPos < commaPos; + // && data.indexOf(",")