X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FIdentifyFile.java;h=9e2a4e695aff2c31f6463ea94b47839e8294cac7;hb=c19d2a91ca05e052e3408bf5852d88eb5d0608f1;hp=7ed7cbde257cf9d37a468b93a276c3e2630c91db;hpb=b2f9a8d7bce642ff4011bc6d49e02bb0569fbb11;p=jalview.git diff --git a/src/jalview/io/IdentifyFile.java b/src/jalview/io/IdentifyFile.java index 7ed7cbd..9e2a4e6 100755 --- a/src/jalview/io/IdentifyFile.java +++ b/src/jalview/io/IdentifyFile.java @@ -1,39 +1,43 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.1) - * Copyright (C) 2014 The Jalview Authors - * + * Jalview - A Sequence Alignment Editor and Viewer (Version 2.9.0b2) + * Copyright (C) 2015 The Jalview Authors + * * This file is part of Jalview. - * + * * Jalview is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - * - * Jalview is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along with Jalview. If not, see . + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.io; -import java.io.*; +import java.io.IOException; /** * DOCUMENT ME! - * + * * @author $author$ * @version $Revision$ */ public class IdentifyFile { + public static final String GFF3File = "GFF v2 or v3"; + /** * Identify a datasource's file content. - * + * * @note Do not use this method for stream sources - create a FileParse object * instead. - * + * * @param file * DOCUMENT ME! * @param protocol @@ -58,7 +62,9 @@ public class IdentifyFile emessage = e.getMessage(); } if (parser != null) + { return parser.errormessage; + } return emessage; } @@ -71,7 +77,7 @@ public class IdentifyFile /** * Identify contents of source, closing it or resetting source to start * afterwards. - * + * * @param source * @param closeSource * @return filetype string @@ -92,7 +98,7 @@ public class IdentifyFile } while ((data = source.nextLine()) != null) { - length += data.length(); + length += data.trim().length(); if (!lineswereskipped) { for (int i = 0; !isBinary && i < data.length(); i++) @@ -126,41 +132,18 @@ public class IdentifyFile } data = data.toUpperCase(); - if ((data.indexOf("# STOCKHOLM") > -1)) - { - reply = "STH"; - - break; - } - - if ((data.length() < 1) || (data.indexOf("#") == 0)) - { - lineswereskipped = true; - continue; - } - - if (data.indexOf("PILEUP") > -1) - { - reply = "PileUp"; - - break; - } - - if ((data.indexOf("//") == 0) - || ((data.indexOf("!!") > -1) && (data.indexOf("!!") < data - .indexOf("_MULTIPLE_ALIGNMENT ")))) + if (data.startsWith("##GFF-VERSION")) { - reply = "MSF"; - + reply = GFF3File; break; } - else if (data.indexOf("CLUSTAL") > -1) + if (data.indexOf("# STOCKHOLM") > -1) { - reply = "CLUSTAL"; - + reply = "STH"; break; } - else if (data.indexOf(">") > -1) + // if (data.indexOf(">") > -1) + if (data.startsWith(">")) { // FASTA, PIR file or BLC file boolean checkPIR = false, starterm = false; @@ -188,13 +171,16 @@ public class IdentifyFile starterm = (data1 != null && data1.indexOf("*") > -1) || (data2 != null && data2.indexOf("*") > -1); } - if (data2 != null && (c1=data.indexOf("*")) > -1) + if (data2 != null && (c1 = data.indexOf("*")) > -1) { - if (c1==0 && c1 == data2.indexOf("*")) + if (c1 == 0 && c1 == data2.indexOf("*")) { reply = "BLC"; - } else { - reply = "FASTA"; // possibly a bad choice - may be recognised as PIR + } + else + { + reply = "FASTA"; // possibly a bad choice - may be recognised as + // PIR } // otherwise can still possibly be a PIR file } @@ -249,11 +235,81 @@ public class IdentifyFile // read as a FASTA (probably) break; } + if ((data.indexOf("<") > -1)) // possible Markup Language data i.e HTML, + // RNAML, XML + { + boolean identified = false; + do + { + if (data.matches("<(?i)html(\"[^\"]*\"|'[^']*'|[^'\">])*>")) + { + reply = HtmlFile.FILE_DESC; + identified = true; + break; + } + + if (data.matches("<(?i)rnaml (\"[^\"]*\"|'[^']*'|[^'\">])*>")) + { + reply = "RNAML"; + identified = true; + break; + } + } while ((data = source.nextLine()) != null); + + if (identified) + { + break; + } + if (data == null) + { + break; + } + } + + if (data.indexOf("{\"") > -1) + { + reply = JSONFile.FILE_DESC; + break; + } + if ((data.length() < 1) || (data.indexOf("#") == 0)) + { + lineswereskipped = true; + continue; + } + + if (data.indexOf("PILEUP") > -1) + { + reply = "PileUp"; + + break; + } + + if ((data.indexOf("//") == 0) + || ((data.indexOf("!!") > -1) && (data.indexOf("!!") < data + .indexOf("_MULTIPLE_ALIGNMENT ")))) + { + reply = "MSF"; + + break; + } + else if (data.indexOf("CLUSTAL") > -1) + { + reply = "CLUSTAL"; + + break; + } + else if (data.indexOf("HEADER") == 0 || data.indexOf("ATOM") == 0) { reply = "PDB"; break; } + else if (data.matches("\\s*\\d+\\s+\\d+\\s*")) + { + reply = PhylipFile.FILE_DESC; + break; + } + /* * // TODO comment out SimpleBLAST identification for Jalview 2.4.1 else * if (!lineswereskipped && data.indexOf("BLAST")<4) { reply = @@ -299,6 +355,7 @@ public class IdentifyFile public static void main(String[] args) { + for (int i = 0; args != null && i < args.length; i++) { IdentifyFile ider = new IdentifyFile();