X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FIdentifyFile.java;h=f8d8415bf749474734b9a195015cfd728b9a8c38;hb=4815037f13748676b04b91dc761b73cf6f2d6ecd;hp=17ba32edf9a675e0c7b231c905f6c3e83c845865;hpb=72738726bce0733f652a5dbcc9db7c5a13bc2d80;p=jalview.git diff --git a/src/jalview/io/IdentifyFile.java b/src/jalview/io/IdentifyFile.java index 17ba32e..f8d8415 100755 --- a/src/jalview/io/IdentifyFile.java +++ b/src/jalview/io/IdentifyFile.java @@ -30,8 +30,6 @@ import java.io.IOException; */ public class IdentifyFile { - public static final String GFF3File = "GFF v2 or v3"; - /** * Identify a datasource's file content. * @@ -39,21 +37,21 @@ public class IdentifyFile * instead. * * @param file - * DOCUMENT ME! - * @param protocol - * DOCUMENT ME! - * @return ID String + * @param sourceType + * @return + * @throws FileFormatException */ - public String Identify(String file, String protocol) + public FileFormatI identify(String file, DataSourceType sourceType) + throws FileFormatException { String emessage = "UNIDENTIFIED FILE PARSING ERROR"; FileParse parser = null; try { - parser = new FileParse(file, protocol); + parser = new FileParse(file, sourceType); if (parser.isValid()) { - return Identify(parser); + return identify(parser); } } catch (Exception e) { @@ -63,15 +61,22 @@ public class IdentifyFile } if (parser != null) { - return parser.errormessage; + throw new FileFormatException(parser.errormessage); } - return emessage; + throw new FileFormatException(emessage); + } + + public FileFormatI identify(FileParse source) throws FileFormatException + { + return identify(source, true); + // preserves original behaviour prior to version 2.3 } - public String Identify(FileParse source) + public FileFormatI identify(AlignmentFileI file, boolean closeSource) + throws IOException { - return Identify(source, true); // preserves original behaviour prior to - // version 2.3 + FileParse fp = new FileParse(file.getInFile(), file.getDataSourceType()); + return identify(fp, closeSource); } /** @@ -80,13 +85,16 @@ public class IdentifyFile * * @param source * @param closeSource - * @return filetype string + * @return (best guess at) file format + * @throws FileFormatException */ - public String Identify(FileParse source, boolean closeSource) + public FileFormatI identify(FileParse source, boolean closeSource) + throws FileFormatException { - String reply = null; + FileFormatI reply = FileFormat.Pfam; String data; - int length = 0; + int bytesRead = 0; + int trimmedLength = 0; boolean lineswereskipped = false; boolean isBinary = false; // true if length is non-zero and non-printable // characters are encountered @@ -98,7 +106,8 @@ public class IdentifyFile } while ((data = source.nextLine()) != null) { - length += data.trim().length(); + bytesRead += data.length(); + trimmedLength += data.trim().length(); if (!lineswereskipped) { for (int i = 0; !isBinary && i < data.length(); i++) @@ -121,12 +130,12 @@ public class IdentifyFile if (fileStr.lastIndexOf(".jar") > -1 || fileStr.lastIndexOf(".zip") > -1) { - reply = "Jalview"; + reply = FileFormat.Jalview; } } if (!lineswereskipped && data.startsWith("PK")) { - reply = "Jalview"; // archive. + reply = FileFormat.Jalview; // archive. break; } } @@ -134,77 +143,29 @@ public class IdentifyFile if (data.startsWith("##GFF-VERSION")) { - reply = GFF3File; + // GFF - possibly embedded in a Jalview features file! + reply = FileFormat.Features; break; } - if (data.indexOf("# STOCKHOLM") > -1) + if (looksLikeFeatureData(data)) { - reply = "STH"; + reply = FileFormat.Features; break; } - - if ((data.indexOf("<") > -1)) // possible Markup Language data i.e HTML, - // RNAML, XML - { - boolean identified = false; - do - { - if (data.matches("<(?i)html(\"[^\"]*\"|'[^']*'|[^'\">])*>")) - { - reply = HtmlFile.FILE_DESC; - identified = true; - break; - } - - if (data.matches("<(?i)rnaml (\"[^\"]*\"|'[^']*'|[^'\">])*>")) - { - reply = "RNAML"; - identified = true; - break; - } - } while ((data = source.nextLine()) != null); - - if (identified) - { - break; - } - } - - - if (data.indexOf("{\"") > -1) - { - reply = JSONFile.FILE_DESC; - break; - } - if ((data.length() < 1) || (data.indexOf("#") == 0)) - { - lineswereskipped = true; - continue; - } - - if (data.indexOf("PILEUP") > -1) - { - reply = "PileUp"; - - break; - } - - if ((data.indexOf("//") == 0) - || ((data.indexOf("!!") > -1) && (data.indexOf("!!") < data - .indexOf("_MULTIPLE_ALIGNMENT ")))) + if (data.indexOf("# STOCKHOLM") > -1) { - reply = "MSF"; - + reply = FileFormat.Stockholm; break; } - else if (data.indexOf("CLUSTAL") > -1) + if (data.indexOf("_ENTRY.ID") > -1 + || data.indexOf("_AUDIT_AUTHOR.NAME") > -1 + || data.indexOf("_ATOM_SITE.") > -1) { - reply = "CLUSTAL"; - + reply = FileFormat.MMCif; break; } - - else if (data.indexOf(">") > -1) + // if (data.indexOf(">") > -1) + if (data.startsWith(">")) { // FASTA, PIR file or BLC file boolean checkPIR = false, starterm = false; @@ -212,14 +173,14 @@ public class IdentifyFile { // watch for PIR file attributes checkPIR = true; - reply = "PIR"; + reply = FileFormat.PIR; } // could also be BLC file, read next line to confirm data = source.nextLine(); if (data.indexOf(">") > -1) { - reply = "BLC"; + reply = FileFormat.BLC; } else { @@ -236,18 +197,19 @@ public class IdentifyFile { if (c1 == 0 && c1 == data2.indexOf("*")) { - reply = "BLC"; + reply = FileFormat.BLC; } else { - reply = "FASTA"; // possibly a bad choice - may be recognised as + reply = FileFormat.Fasta; // possibly a bad choice - may be + // recognised as // PIR } // otherwise can still possibly be a PIR file } else { - reply = "FASTA"; + reply = FileFormat.Fasta; // TODO : AMSA File is indicated if there is annotation in the // FASTA file - but FASTA will automatically generate this at the // mo. @@ -276,7 +238,6 @@ public class IdentifyFile } catch (IOException ex) { } - ; if (dta != null && dta.indexOf("*") > -1) { starterm = true; @@ -285,46 +246,85 @@ public class IdentifyFile } if (starterm) { - reply = "PIR"; + reply = FileFormat.PIR; break; } else { - reply = "FASTA"; // probably a bad choice! + reply = FileFormat.Fasta; // probably a bad choice! } } // read as a FASTA (probably) break; } - else if (data.indexOf("HEADER") == 0 || data.indexOf("ATOM") == 0) + int lessThan = data.indexOf("<"); + if ((lessThan > -1)) // possible Markup Language data i.e HTML, + // RNAML, XML { - reply = "PDB"; + String upper = data.toUpperCase(); + if (upper.substring(lessThan).startsWith(" -1) + { + reply = FileFormat.Json; break; } - else if (data.matches("\\s*\\d+\\s+\\d+\\s*")) + if ((data.length() < 1) || (data.indexOf("#") == 0)) + { + lineswereskipped = true; + continue; + } + + if (data.indexOf("PILEUP") > -1) + { + reply = FileFormat.Pileup; + + break; + } + + if ((data.indexOf("//") == 0) + || ((data.indexOf("!!") > -1) && (data.indexOf("!!") < data + .indexOf("_MULTIPLE_ALIGNMENT ")))) { - reply = PhylipFile.FILE_DESC; + reply = FileFormat.MSF; + break; } + else if (data.indexOf("CLUSTAL") > -1) + { + reply = FileFormat.Clustal; + break; + } - /* - * // TODO comment out SimpleBLAST identification for Jalview 2.4.1 else - * if (!lineswereskipped && data.indexOf("BLAST")<4) { reply = - * "SimpleBLAST"; break; - * - * } // end comments for Jalview 2.4.1 - */ - else if (!lineswereskipped && data.charAt(0) != '*' - && data.charAt(0) != ' ' - && data.indexOf(":") < data.indexOf(",")) // && - // data.indexOf(",") -1 + && commaPos > -1 && colonPos < commaPos; + // && data.indexOf(",")