X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FIdentifyFile.java;h=deae9ae7ece309e8f40515ff38306621b1acb42e;hb=37de9310bec3501cbc6381e0c3dcb282fcaad812;hp=365982ea9ef5b40a7ecc042dc88f749578adb0d7;hpb=68c26e8852772be21fdb5b091fa9083d8cdb6eec;p=jalview.git diff --git a/src/jalview/io/IdentifyFile.java b/src/jalview/io/IdentifyFile.java index 365982e..deae9ae 100755 --- a/src/jalview/io/IdentifyFile.java +++ b/src/jalview/io/IdentifyFile.java @@ -1,6 +1,6 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.2) - * Copyright (C) 2014 The Jalview Authors + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors * * This file is part of Jalview. * @@ -30,6 +30,8 @@ import java.io.IOException; */ public class IdentifyFile { + public static final String FeaturesFile = "GFF or Jalview features"; + /** * Identify a datasource's file content. * @@ -42,7 +44,7 @@ public class IdentifyFile * DOCUMENT ME! * @return ID String */ - public String Identify(String file, String protocol) + public String identify(String file, String protocol) { String emessage = "UNIDENTIFIED FILE PARSING ERROR"; FileParse parser = null; @@ -51,7 +53,7 @@ public class IdentifyFile parser = new FileParse(file, protocol); if (parser.isValid()) { - return Identify(parser); + return identify(parser); } } catch (Exception e) { @@ -66,9 +68,9 @@ public class IdentifyFile return emessage; } - public String Identify(FileParse source) + public String identify(FileParse source) { - return Identify(source, true); // preserves original behaviour prior to + return identify(source, true); // preserves original behaviour prior to // version 2.3 } @@ -80,11 +82,12 @@ public class IdentifyFile * @param closeSource * @return filetype string */ - public String Identify(FileParse source, boolean closeSource) + public String identify(FileParse source, boolean closeSource) { String reply = "PFAM"; String data; - int length = 0; + int bytesRead = 0; + int trimmedLength = 0; boolean lineswereskipped = false; boolean isBinary = false; // true if length is non-zero and non-printable // characters are encountered @@ -96,7 +99,8 @@ public class IdentifyFile } while ((data = source.nextLine()) != null) { - length += data.length(); + bytesRead += data.length(); + trimmedLength += data.trim().length(); if (!lineswereskipped) { for (int i = 0; !isBinary && i < data.length(); i++) @@ -130,54 +134,31 @@ public class IdentifyFile } data = data.toUpperCase(); - if ((data.indexOf("# STOCKHOLM") > -1)) + if (data.startsWith("##GFF-VERSION")) { - reply = "STH"; - + // GFF - possibly embedded in a Jalview features file! + reply = FeaturesFile; break; } - if (data.matches("<(\"[^\"]*\"|'[^']*'|[^'\">])*>")) + if (looksLikeFeatureData(data)) { - reply = "HTML"; + reply = FeaturesFile; break; } - - if ((data.indexOf("<") > -1)) - { - reply = "RNAML"; - - break; - } - - if ((data.length() < 1) || (data.indexOf("#") == 0)) - { - lineswereskipped = true; - continue; - } - - if (data.indexOf("PILEUP") > -1) - { - reply = "PileUp"; - - break; - } - - if ((data.indexOf("//") == 0) - || ((data.indexOf("!!") > -1) && (data.indexOf("!!") < data - .indexOf("_MULTIPLE_ALIGNMENT ")))) + if (data.indexOf("# STOCKHOLM") > -1) { - reply = "MSF"; - + reply = "STH"; break; } - else if (data.indexOf("CLUSTAL") > -1) + if (data.indexOf("_ENTRY.ID") > -1 + || data.indexOf("_AUDIT_AUTHOR.NAME") > -1 + || data.indexOf("_ATOM_SITE.") > -1) { - reply = "CLUSTAL"; - + reply = "mmCIF"; break; } - - else if (data.indexOf(">") > -1) + // if (data.indexOf(">") > -1) + if (data.startsWith(">")) { // FASTA, PIR file or BLC file boolean checkPIR = false, starterm = false; @@ -249,7 +230,6 @@ public class IdentifyFile } catch (IOException ex) { } - ; if (dta != null && dta.indexOf("*") > -1) { starterm = true; @@ -269,6 +249,56 @@ public class IdentifyFile // read as a FASTA (probably) break; } + int lessThan = data.indexOf("<"); + if ((lessThan > -1)) // possible Markup Language data i.e HTML, + // RNAML, XML + { + String upper = data.toUpperCase(); + if (upper.substring(lessThan).startsWith(" -1) + { + reply = JSONFile.FILE_DESC; + break; + } + if ((data.length() < 1) || (data.indexOf("#") == 0)) + { + lineswereskipped = true; + continue; + } + + if (data.indexOf("PILEUP") > -1) + { + reply = "PileUp"; + + break; + } + + if ((data.indexOf("//") == 0) + || ((data.indexOf("!!") > -1) && (data.indexOf("!!") < data + .indexOf("_MULTIPLE_ALIGNMENT ")))) + { + reply = "MSF"; + + break; + } + else if (data.indexOf("CLUSTAL") > -1) + { + reply = "CLUSTAL"; + + break; + } + else if (data.indexOf("HEADER") == 0 || data.indexOf("ATOM") == 0) { reply = "PDB"; @@ -279,24 +309,13 @@ public class IdentifyFile reply = PhylipFile.FILE_DESC; break; } - - - /* - * // TODO comment out SimpleBLAST identification for Jalview 2.4.1 else - * if (!lineswereskipped && data.indexOf("BLAST")<4) { reply = - * "SimpleBLAST"; break; - * - * } // end comments for Jalview 2.4.1 - */ - else if (!lineswereskipped && data.charAt(0) != '*' - && data.charAt(0) != ' ' - && data.indexOf(":") < data.indexOf(",")) // && - // data.indexOf(",") -1 + && commaPos > -1 && colonPos < commaPos; + // && data.indexOf(",")