X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FIdentifyFile.java;h=621cfac89dfc097ec3ad469eb42dbd3b1c82bfd7;hb=990ed4ffbaa7a95b2ebb6bf6ab0440310f6e83ab;hp=40e93904f809578a7ef9c9fd2ec61cf370822206;hpb=26ba864a6c290121fe6cf616794d2d0bea65fb7d;p=jalview.git diff --git a/src/jalview/io/IdentifyFile.java b/src/jalview/io/IdentifyFile.java index 40e9390..621cfac 100755 --- a/src/jalview/io/IdentifyFile.java +++ b/src/jalview/io/IdentifyFile.java @@ -20,6 +20,7 @@ */ package jalview.io; +import java.io.File; import java.io.IOException; /** @@ -30,7 +31,39 @@ import java.io.IOException; */ public class IdentifyFile { - public static final String FeaturesFile = "GFF or Jalview features"; + + public FileFormatI identify(Object file, DataSourceType protocol) throws FileFormatException + { + // BH 2018 + return (file instanceof File ? identify((File) file, protocol) : identify((String) file, protocol)); + + } + + public FileFormatI identify(File file, DataSourceType sourceType) + throws FileFormatException + { + // BH 2018 + String emessage = "UNIDENTIFIED FILE PARSING ERROR"; + FileParse parser = null; + try + { + parser = new FileParse(file, sourceType); + if (parser.isValid()) + { + return identify(parser); + } + } catch (Exception e) + { + System.err.println("Error whilst identifying " + file); + e.printStackTrace(System.err); + emessage = e.getMessage(); + } + if (parser != null) + { + throw new FileFormatException(parser.errormessage); + } + throw new FileFormatException(emessage); + } /** * Identify a datasource's file content. @@ -39,39 +72,47 @@ public class IdentifyFile * instead. * * @param file - * DOCUMENT ME! - * @param protocol - * DOCUMENT ME! - * @return ID String + * @param sourceType + * @return + * @throws FileFormatException */ - public String identify(String file, String protocol) + public FileFormatI identify(String file, DataSourceType sourceType) + throws FileFormatException { String emessage = "UNIDENTIFIED FILE PARSING ERROR"; FileParse parser = null; try { - parser = new FileParse(file, protocol); + parser = new FileParse(file, sourceType); if (parser.isValid()) { return identify(parser); } } catch (Exception e) { - System.err.println("Error whilst identifying"); + System.err.println("Error whilst identifying " + file); e.printStackTrace(System.err); emessage = e.getMessage(); } if (parser != null) { - return parser.errormessage; + throw new FileFormatException(parser.errormessage); } - return emessage; + throw new FileFormatException(emessage); } - public String identify(FileParse source) + public FileFormatI identify(FileParse source) throws FileFormatException { - return identify(source, true); // preserves original behaviour prior to - // version 2.3 + return identify(source, true); + // preserves original behaviour prior to version 2.3 + } + + public FileFormatI identify(AlignmentFileReaderI file, + boolean closeSource) throws IOException + { + FileParse fp = new FileParse(file.getInFile(), + file.getDataSourceType()); + return identify(fp, closeSource); } /** @@ -80,23 +121,28 @@ public class IdentifyFile * * @param source * @param closeSource - * @return filetype string + * @return (best guess at) file format + * @throws FileFormatException */ - public String identify(FileParse source, boolean closeSource) + public FileFormatI identify(FileParse source, boolean closeSource) + throws FileFormatException { - String reply = "PFAM"; + FileFormatI reply = FileFormat.Pfam; String data; int bytesRead = 0; int trimmedLength = 0; boolean lineswereskipped = false; boolean isBinary = false; // true if length is non-zero and non-printable // characters are encountered + try { if (!closeSource) { source.mark(); } + boolean aaIndexHeaderRead = false; + while ((data = source.nextLine()) != null) { bytesRead += data.length(); @@ -105,7 +151,7 @@ public class IdentifyFile { for (int i = 0; !isBinary && i < data.length(); i++) { - char c = data.charAt(i); + int c = data.charAt(i); isBinary = (c < 32 && c != '\t' && c != '\n' && c != '\r' && c != 5 && c != 27); // nominal binary character filter // excluding CR, LF, tab,DEL and ^E @@ -119,35 +165,57 @@ public class IdentifyFile if (source.inFile != null) { String fileStr = source.inFile.getName(); - // possibly a Jalview archive. - if (fileStr.lastIndexOf(".jar") > -1 - || fileStr.lastIndexOf(".zip") > -1) + if (fileStr.contains(".jar") || fileStr.contains(".zip") + || fileStr.contains(".jvp")) { - reply = "Jalview"; + // possibly a Jalview archive (but check further) + reply = FileFormat.Jalview; + break; } } if (!lineswereskipped && data.startsWith("PK")) { - reply = "Jalview"; // archive. + reply = FileFormat.Jalview; // archive break; } } data = data.toUpperCase(); + if (data.startsWith(ScoreMatrixFile.SCOREMATRIX)) + { + reply = FileFormat.ScoreMatrix; + break; + } + if (data.startsWith("H ") && !aaIndexHeaderRead) + { + aaIndexHeaderRead = true; + } + if (data.startsWith("D ") && aaIndexHeaderRead) + { + reply = FileFormat.ScoreMatrix; + break; + } if (data.startsWith("##GFF-VERSION")) { // GFF - possibly embedded in a Jalview features file! - reply = FeaturesFile; + reply = FileFormat.Features; break; } if (looksLikeFeatureData(data)) { - reply = FeaturesFile; + reply = FileFormat.Features; break; } if (data.indexOf("# STOCKHOLM") > -1) { - reply = "STH"; + reply = FileFormat.Stockholm; + break; + } + if (data.indexOf("_ENTRY.ID") > -1 + || data.indexOf("_AUDIT_AUTHOR.NAME") > -1 + || data.indexOf("_ATOM_SITE.") > -1) + { + reply = FileFormat.MMCif; break; } // if (data.indexOf(">") > -1) @@ -159,14 +227,14 @@ public class IdentifyFile { // watch for PIR file attributes checkPIR = true; - reply = "PIR"; + reply = FileFormat.PIR; } // could also be BLC file, read next line to confirm data = source.nextLine(); if (data.indexOf(">") > -1) { - reply = "BLC"; + reply = FileFormat.BLC; } else { @@ -183,18 +251,19 @@ public class IdentifyFile { if (c1 == 0 && c1 == data2.indexOf("*")) { - reply = "BLC"; + reply = FileFormat.BLC; } else { - reply = "FASTA"; // possibly a bad choice - may be recognised as + reply = FileFormat.Fasta; // possibly a bad choice - may be + // recognised as // PIR } // otherwise can still possibly be a PIR file } else { - reply = "FASTA"; + reply = FileFormat.Fasta; // TODO : AMSA File is indicated if there is annotation in the // FASTA file - but FASTA will automatically generate this at the // mo. @@ -223,7 +292,6 @@ public class IdentifyFile } catch (IOException ex) { } - ; if (dta != null && dta.indexOf("*") > -1) { starterm = true; @@ -232,54 +300,39 @@ public class IdentifyFile } if (starterm) { - reply = "PIR"; + reply = FileFormat.PIR; break; } else { - reply = "FASTA"; // probably a bad choice! + reply = FileFormat.Fasta; // probably a bad choice! } } // read as a FASTA (probably) break; } - if ((data.indexOf("<") > -1)) // possible Markup Language data i.e HTML, - // RNAML, XML + if (data.indexOf("{\"") > -1) { - // FIXME this is nuts - it consumes the rest of the file if no match - boolean identified = false; - do - { - if (data.matches("<(?i)html(\"[^\"]*\"|'[^']*'|[^'\">])*>")) - { - reply = HtmlFile.FILE_DESC; - identified = true; - break; - } - - if (data.matches("<(?i)rnaml (\"[^\"]*\"|'[^']*'|[^'\">])*>")) - { - reply = "RNAML"; - identified = true; - break; - } - } while ((data = source.nextLine()) != null); - - if (identified) + reply = FileFormat.Json; + break; + } + int lessThan = data.indexOf("<"); + if ((lessThan > -1)) // possible Markup Language data i.e HTML, + // RNAML, XML + { + String upper = data.toUpperCase(); + if (upper.substring(lessThan).startsWith(" -1) - { - reply = JSONFile.FILE_DESC; - break; - } if ((data.length() < 1) || (data.indexOf("#") == 0)) { lineswereskipped = true; @@ -288,41 +341,40 @@ public class IdentifyFile if (data.indexOf("PILEUP") > -1) { - reply = "PileUp"; + reply = FileFormat.Pileup; break; } - if ((data.indexOf("//") == 0) - || ((data.indexOf("!!") > -1) && (data.indexOf("!!") < data - .indexOf("_MULTIPLE_ALIGNMENT ")))) + if ((data.indexOf("//") == 0) || ((data.indexOf("!!") > -1) && (data + .indexOf("!!") < data.indexOf("_MULTIPLE_ALIGNMENT ")))) { - reply = "MSF"; + reply = FileFormat.MSF; break; } else if (data.indexOf("CLUSTAL") > -1) { - reply = "CLUSTAL"; + reply = FileFormat.Clustal; break; } else if (data.indexOf("HEADER") == 0 || data.indexOf("ATOM") == 0) { - reply = "PDB"; + reply = FileFormat.PDB; break; } else if (data.matches("\\s*\\d+\\s+\\d+\\s*")) { - reply = PhylipFile.FILE_DESC; + reply = FileFormat.Phylip; break; } else { if (!lineswereskipped && looksLikeJnetData(data)) { - reply = "JnetFile"; + reply = FileFormat.Jnet; break; } } @@ -341,14 +393,15 @@ public class IdentifyFile } catch (Exception ex) { System.err.println("File Identification failed!\n" + ex); - return source.errormessage; + throw new FileFormatException(source.errormessage); } if (trimmedLength == 0) { - System.err - .println("File Identification failed! - Empty file was read."); - return "EMPTY DATA FILE"; + System.err.println( + "File Identification failed! - Empty file was read."); + throw new FileFormatException("EMPTY DATA FILE"); } + System.out.println("File format identified as " + reply.toString()); return reply; } @@ -370,8 +423,9 @@ public class IdentifyFile } /** - * Returns true if the data has at least 6 tab-delimited fields _and_ - * fields 4 and 5 are integer (start/end) + * Returns true if the data has at least 6 tab-delimited fields _and_ fields 4 + * and 5 are integer (start/end) + * * @param data * @return */ @@ -382,27 +436,43 @@ public class IdentifyFile return false; } String[] columns = data.split("\t"); - if (columns.length < 6) { + if (columns.length < 6) + { return false; } for (int col = 3; col < 5; col++) { - try { + try + { Integer.parseInt(columns[col]); - } catch (NumberFormatException e) { + } catch (NumberFormatException e) + { return false; } } return true; } + /** + * + * @param args + * @j2sIgnore + */ public static void main(String[] args) { - for (int i = 0; args != null && i < args.length; i++) { IdentifyFile ider = new IdentifyFile(); - String type = ider.identify(args[i], AppletFormatAdapter.FILE); + FileFormatI type = null; + try + { + type = ider.identify(args[i], DataSourceType.FILE); + } catch (FileFormatException e) + { + System.err.println( + String.format("Error '%s' identifying file type for %s", + args[i], e.getMessage())); + } System.out.println("Type of " + args[i] + " is " + type); } if (args == null || args.length == 0) @@ -410,4 +480,6 @@ public class IdentifyFile System.err.println("Usage: [ ...]"); } } + + }