X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FIdentifyFile.java;h=ff959b0d79b281f5bb078e500bc768e270931c93;hb=f4766a7bbcfae845fc95923b01fa14ff83d589ff;hp=e90e0ef613f6b75b24c1c449e2e3ca9a6b118bb9;hpb=d7c00f48b00e3ede57c46ae4daf6a9203b9b6059;p=jalview.git diff --git a/src/jalview/io/IdentifyFile.java b/src/jalview/io/IdentifyFile.java index e90e0ef..ff959b0 100755 --- a/src/jalview/io/IdentifyFile.java +++ b/src/jalview/io/IdentifyFile.java @@ -1,28 +1,30 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7) - * Copyright (C) 2011 J Procter, AM Waterhouse, J Engelhardt, LM Lui, G Barton, M Clamp, S Searle - * + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * * This file is part of Jalview. - * + * * Jalview is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - * - * Jalview is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along with Jalview. If not, see . + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.io; -import java.io.*; -import java.net.*; +import java.io.IOException; /** * DOCUMENT ME! - * + * * @author $author$ * @version $Revision$ */ @@ -30,26 +32,26 @@ public class IdentifyFile { /** * Identify a datasource's file content. - * + * * @note Do not use this method for stream sources - create a FileParse object * instead. - * + * * @param file - * DOCUMENT ME! - * @param protocol - * DOCUMENT ME! - * @return ID String + * @param sourceType + * @return + * @throws FileFormatException */ - public String Identify(String file, String protocol) + public FileFormatI identify(String file, DataSourceType sourceType) + throws FileFormatException { String emessage = "UNIDENTIFIED FILE PARSING ERROR"; FileParse parser = null; try { - parser = new FileParse(file, protocol); + parser = new FileParse(file, sourceType); if (parser.isValid()) { - return Identify(parser); + return identify(parser); } } catch (Exception e) { @@ -58,41 +60,58 @@ public class IdentifyFile emessage = e.getMessage(); } if (parser != null) - return parser.errormessage; - return emessage; + { + throw new FileFormatException(parser.errormessage); + } + throw new FileFormatException(emessage); + } + + public FileFormatI identify(FileParse source) throws FileFormatException + { + return identify(source, true); + // preserves original behaviour prior to version 2.3 } - public String Identify(FileParse source) + public FileFormatI identify(AlignmentFileReaderI file, + boolean closeSource) throws IOException { - return Identify(source, true); // preserves original behaviour prior to - // version 2.3 + FileParse fp = new FileParse(file.getInFile(), + file.getDataSourceType()); + return identify(fp, closeSource); } /** * Identify contents of source, closing it or resetting source to start * afterwards. - * + * * @param source * @param closeSource - * @return filetype string + * @return (best guess at) file format + * @throws FileFormatException */ - public String Identify(FileParse source, boolean closeSource) + public FileFormatI identify(FileParse source, boolean closeSource) + throws FileFormatException { - String reply = "PFAM"; + FileFormatI reply = FileFormat.Pfam; String data; - int length = 0; + int bytesRead = 0; + int trimmedLength = 0; boolean lineswereskipped = false; boolean isBinary = false; // true if length is non-zero and non-printable // characters are encountered + try { if (!closeSource) { source.mark(); } + boolean aaIndexHeaderRead = false; + while ((data = source.nextLine()) != null) { - length += data.length(); + bytesRead += data.length(); + trimmedLength += data.trim().length(); if (!lineswereskipped) { for (int i = 0; !isBinary && i < data.length(); i++) @@ -115,101 +134,105 @@ public class IdentifyFile if (fileStr.lastIndexOf(".jar") > -1 || fileStr.lastIndexOf(".zip") > -1) { - reply = "Jalview"; + reply = FileFormat.Jalview; } } if (!lineswereskipped && data.startsWith("PK")) { - reply = "Jalview"; // archive. + reply = FileFormat.Jalview; // archive. break; } } data = data.toUpperCase(); - if ((data.indexOf("# STOCKHOLM") > -1)) + if (data.startsWith(ScoreMatrixFile.SCOREMATRIX)) { - reply = "STH"; - + reply = FileFormat.ScoreMatrix; break; } - - if ((data.indexOf("<") > -1)) + if (data.startsWith("H ") && !aaIndexHeaderRead) + { + aaIndexHeaderRead = true; + } + if (data.startsWith("D ") && aaIndexHeaderRead) { - reply = "RNAML"; - + reply = FileFormat.ScoreMatrix; break; } - - if ((data.length() < 1) || (data.indexOf("#") == 0)) + if (data.startsWith("##GFF-VERSION")) { - lineswereskipped = true; - continue; + // GFF - possibly embedded in a Jalview features file! + reply = FileFormat.Features; + break; } - - if (data.indexOf("PILEUP") > -1) + if (looksLikeFeatureData(data)) { - reply = "PileUp"; - + reply = FileFormat.Features; break; } - - if ((data.indexOf("//") == 0) - || ((data.indexOf("!!") > -1) && (data.indexOf("!!") < data - .indexOf("_MULTIPLE_ALIGNMENT ")))) + if (data.indexOf("# STOCKHOLM") > -1) { - reply = "MSF"; - + reply = FileFormat.Stockholm; break; } - else if (data.indexOf("CLUSTAL") > -1) + if (data.indexOf("_ENTRY.ID") > -1 + || data.indexOf("_AUDIT_AUTHOR.NAME") > -1 + || data.indexOf("_ATOM_SITE.") > -1) { - reply = "CLUSTAL"; - + reply = FileFormat.MMCif; break; } - - - else if (data.indexOf(">") > -1) + // if (data.indexOf(">") > -1) + if (data.startsWith(">")) { // FASTA, PIR file or BLC file - boolean checkPIR = false,starterm=false; + boolean checkPIR = false, starterm = false; if ((data.indexOf(">P1;") > -1) || (data.indexOf(">DL;") > -1)) { // watch for PIR file attributes checkPIR = true; - reply = "PIR"; + reply = FileFormat.PIR; } // could also be BLC file, read next line to confirm data = source.nextLine(); if (data.indexOf(">") > -1) { - reply = "BLC"; + reply = FileFormat.BLC; } else { // Is this a single line BLC file? String data1 = source.nextLine(); String data2 = source.nextLine(); + int c1; if (checkPIR) { - starterm = (data1!=null && data1.indexOf("*")>-1) || (data2!=null && data2.indexOf("*")>-1); + starterm = (data1 != null && data1.indexOf("*") > -1) + || (data2 != null && data2.indexOf("*") > -1); } - if (data2 != null && data.indexOf("*") > -1) + if (data2 != null && (c1 = data.indexOf("*")) > -1) { - if (data.indexOf("*") == data2.indexOf("*")) + if (c1 == 0 && c1 == data2.indexOf("*")) { - reply = "BLC"; + reply = FileFormat.BLC; + } + else + { + reply = FileFormat.Fasta; // possibly a bad choice - may be + // recognised as + // PIR } // otherwise can still possibly be a PIR file } else { - reply = "FASTA"; + reply = FileFormat.Fasta; // TODO : AMSA File is indicated if there is annotation in the // FASTA file - but FASTA will automatically generate this at the // mo. - if (!checkPIR) { + if (!checkPIR) + { break; } } @@ -217,56 +240,108 @@ public class IdentifyFile // final check for PIR content. require // >P1;title\n\nterminated sequence to occur at least once. - // TODO the PIR/fasta ambiguity may be the use case that is needed to have + // TODO the PIR/fasta ambiguity may be the use case that is needed to + // have // a 'Parse as type XXX' parameter for the applet/application. if (checkPIR) { - String dta=null; + String dta = null; if (!starterm) { - do { - try { + do + { + try + { dta = source.nextLine(); - } catch(IOException ex) {}; - if (dta!=null && dta.indexOf("*")>-1) + } catch (IOException ex) + { + } + if (dta != null && dta.indexOf("*") > -1) { starterm = true; } - } while (dta!=null && !starterm); + } while (dta != null && !starterm); } if (starterm) { - reply="PIR"; + reply = FileFormat.PIR; break; - } else { - reply="FASTA"; // probably a bad choice! + } + else + { + reply = FileFormat.Fasta; // probably a bad choice! } } // read as a FASTA (probably) break; } + if (data.indexOf("{\"") > -1) + { + reply = FileFormat.Json; + break; + } + int lessThan = data.indexOf("<"); + if ((lessThan > -1)) // possible Markup Language data i.e HTML, + // RNAML, XML + { + String upper = data.toUpperCase(); + if (upper.substring(lessThan).startsWith(" -1) + { + reply = FileFormat.Pileup; + + break; + } + + if ((data.indexOf("//") == 0) || ((data.indexOf("!!") > -1) && (data + .indexOf("!!") < data.indexOf("_MULTIPLE_ALIGNMENT ")))) + { + reply = FileFormat.MSF; + + break; + } + else if (data.indexOf("CLUSTAL") > -1) + { + reply = FileFormat.Clustal; + + break; + } + else if (data.indexOf("HEADER") == 0 || data.indexOf("ATOM") == 0) { - reply = "PDB"; + reply = FileFormat.PDB; break; } - /* - * // TODO comment out SimpleBLAST identification for Jalview 2.4.1 else - * if (!lineswereskipped && data.indexOf("BLAST")<4) { reply = - * "SimpleBLAST"; break; - * - * } // end comments for Jalview 2.4.1 - */ - else if (!lineswereskipped && data.charAt(0) != '*' - && data.charAt(0) != ' ' - && data.indexOf(":") < data.indexOf(",")) // && - // data.indexOf(",") -1 + && commaPos > -1 && colonPos < commaPos; + // && data.indexOf(",")