X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FIdentifyFile.java;h=c21127ee201140c2b12bded740ab3278afd648c1;hb=57738a1f3c19b1c3a00bd3ac5108f8cd0af32f99;hp=89fc3cd2a1f31fe6a539541cdf890f64aafd8773;hpb=ee198b3ca3687f18a2ee186f4e7c7330f4ea30f0;p=jalview.git diff --git a/src/jalview/io/IdentifyFile.java b/src/jalview/io/IdentifyFile.java index 89fc3cd..c21127e 100755 --- a/src/jalview/io/IdentifyFile.java +++ b/src/jalview/io/IdentifyFile.java @@ -20,6 +20,9 @@ */ package jalview.io; +import java.util.Locale; + +import java.io.File; import java.io.IOException; /** @@ -30,7 +33,41 @@ import java.io.IOException; */ public class IdentifyFile { - public static final String FeaturesFile = "GFF or Jalview features"; + + public FileFormatI identify(Object file, DataSourceType protocol) + throws FileFormatException + { + // BH 2018 + return (file instanceof File ? identify((File) file, protocol) + : identify((String) file, protocol)); + + } + + public FileFormatI identify(File file, DataSourceType sourceType) + throws FileFormatException + { + // BH 2018 + String emessage = "UNIDENTIFIED FILE PARSING ERROR"; + FileParse parser = null; + try + { + parser = new FileParse(file, sourceType); + if (parser.isValid()) + { + return identify(parser); + } + } catch (Exception e) + { + System.err.println("Error whilst identifying " + file); + e.printStackTrace(System.err); + emessage = e.getMessage(); + } + if (parser != null) + { + throw new FileFormatException(parser.errormessage); + } + throw new FileFormatException(emessage); + } /** * Identify a datasource's file content. @@ -57,7 +94,7 @@ public class IdentifyFile } } catch (Exception e) { - System.err.println("Error whilst identifying"); + System.err.println("Error whilst identifying " + file); e.printStackTrace(System.err); emessage = e.getMessage(); } @@ -74,6 +111,14 @@ public class IdentifyFile // preserves original behaviour prior to version 2.3 } + public FileFormatI identify(AlignmentFileReaderI file, + boolean closeSource) throws IOException + { + FileParse fp = new FileParse(file.getInFile(), + file.getDataSourceType()); + return identify(fp, closeSource); + } + /** * Identify contents of source, closing it or resetting source to start * afterwards. @@ -93,12 +138,15 @@ public class IdentifyFile boolean lineswereskipped = false; boolean isBinary = false; // true if length is non-zero and non-printable // characters are encountered + try { if (!closeSource) { source.mark(); } + boolean aaIndexHeaderRead = false; + while ((data = source.nextLine()) != null) { bytesRead += data.length(); @@ -121,21 +169,48 @@ public class IdentifyFile if (source.inFile != null) { String fileStr = source.inFile.getName(); - // possibly a Jalview archive. - if (fileStr.lastIndexOf(".jar") > -1 - || fileStr.lastIndexOf(".zip") > -1) + if (fileStr.contains(".jar") || fileStr.contains(".zip") + || fileStr.contains(".jvp")) { + // possibly a Jalview archive (but check further) reply = FileFormat.Jalview; } } if (!lineswereskipped && data.startsWith("PK")) { - reply = FileFormat.Jalview; // archive. + reply = FileFormat.Jalview; // archive break; } } - data = data.toUpperCase(); + data = data.toUpperCase(Locale.ROOT); + if (data.startsWith(ScoreMatrixFile.SCOREMATRIX)) + { + reply = FileFormat.ScoreMatrix; + break; + } + if (data.startsWith("LOCUS")) + { + reply = FileFormat.GenBank; + break; + } + if (data.startsWith("ID ")) + { + if (data.substring(2).trim().split(";").length == 7) + { + reply = FileFormat.Embl; + break; + } + } + if (data.startsWith("H ") && !aaIndexHeaderRead) + { + aaIndexHeaderRead = true; + } + if (data.startsWith("D ") && aaIndexHeaderRead) + { + reply = FileFormat.ScoreMatrix; + break; + } if (data.startsWith("##GFF-VERSION")) { // GFF - possibly embedded in a Jalview features file! @@ -252,11 +327,16 @@ public class IdentifyFile // read as a FASTA (probably) break; } + if (data.indexOf("{\"") > -1) + { + reply = FileFormat.Json; + break; + } int lessThan = data.indexOf("<"); if ((lessThan > -1)) // possible Markup Language data i.e HTML, - // RNAML, XML + // RNAML, XML { - String upper = data.toUpperCase(); + String upper = data.toUpperCase(Locale.ROOT); if (upper.substring(lessThan).startsWith(" -1) - { - reply = FileFormat.Json; - break; - } if ((data.length() < 1) || (data.indexOf("#") == 0)) { lineswereskipped = true; @@ -287,9 +362,8 @@ public class IdentifyFile break; } - if ((data.indexOf("//") == 0) - || ((data.indexOf("!!") > -1) && (data.indexOf("!!") < data - .indexOf("_MULTIPLE_ALIGNMENT ")))) + if ((data.indexOf("//") == 0) || ((data.indexOf("!!") > -1) && (data + .indexOf("!!") < data.indexOf("_MULTIPLE_ALIGNMENT ")))) { reply = FileFormat.MSF; @@ -339,10 +413,11 @@ public class IdentifyFile } if (trimmedLength == 0) { - System.err - .println("File Identification failed! - Empty file was read."); + System.err.println( + "File Identification failed! - Empty file was read."); throw new FileFormatException("EMPTY DATA FILE"); } + System.out.println("File format identified as " + reply.toString()); return reply; } @@ -364,8 +439,9 @@ public class IdentifyFile } /** - * Returns true if the data has at least 6 tab-delimited fields _and_ - * fields 4 and 5 are integer (start/end) + * Returns true if the data has at least 6 tab-delimited fields _and_ fields 4 + * and 5 are integer (start/end) + * * @param data * @return */ @@ -376,20 +452,28 @@ public class IdentifyFile return false; } String[] columns = data.split("\t"); - if (columns.length < 6) { + if (columns.length < 6) + { return false; } for (int col = 3; col < 5; col++) { - try { + try + { Integer.parseInt(columns[col]); - } catch (NumberFormatException e) { + } catch (NumberFormatException e) + { return false; } } return true; } + /** + * + * @param args + * @j2sIgnore + */ public static void main(String[] args) { for (int i = 0; args != null && i < args.length; i++) @@ -401,9 +485,9 @@ public class IdentifyFile type = ider.identify(args[i], DataSourceType.FILE); } catch (FileFormatException e) { - System.err.println(String.format( - "Error '%s' identifying file type for %s", args[i], - e.getMessage())); + System.err.println( + String.format("Error '%s' identifying file type for %s", + args[i], e.getMessage())); } System.out.println("Type of " + args[i] + " is " + type); } @@ -412,4 +496,5 @@ public class IdentifyFile System.err.println("Usage: [ ...]"); } } + }