From: Jim Procter Date: Tue, 28 Jul 2020 12:36:00 +0000 (+0100) Subject: Merge branch 'feature/JAL-3615gzipXfam' into develop X-Git-Tag: Develop-2_11_2_0-d20201215~24^2~22 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=033962da90469745dd6639a1346ecbcbe2ed071f;hp=00816d26e65bf0c98538773087a8c7519aea672e;p=jalview.git Merge branch 'feature/JAL-3615gzipXfam' into develop --- diff --git a/src/jalview/io/FileParse.java b/src/jalview/io/FileParse.java index a5a4e36..39d8ad4 100755 --- a/src/jalview/io/FileParse.java +++ b/src/jalview/io/FileParse.java @@ -20,6 +20,13 @@ */ package jalview.io; +import jalview.api.AlignExportSettingsI; +import jalview.api.AlignViewportI; +import jalview.api.AlignmentViewPanel; +import jalview.api.FeatureSettingsModelI; +import jalview.util.MessageManager; + +import java.io.BufferedInputStream; import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.File; @@ -30,15 +37,12 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.io.StringReader; +import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; +import java.net.URLConnection; import java.util.zip.GZIPInputStream; -import jalview.api.AlignExportSettingsI; -import jalview.api.AlignViewportI; -import jalview.api.AlignmentViewPanel; -import jalview.api.FeatureSettingsModelI; -import jalview.util.MessageManager; import jalview.util.Platform; /** @@ -64,6 +68,7 @@ public class FileParse { return bytes; } + /** * a viewport associated with the current file operation. May be null. May * move to different object. @@ -190,29 +195,73 @@ public class FileParse } if (!error) { - if (fileStr.toLowerCase().endsWith(".gz")) + try { - try - { - dataIn = tryAsGzipSource(new FileInputStream(fileStr)); - dataName = fileStr; - return error; - } catch (Exception x) - { - warningMessage = "Failed to resolve as a GZ stream (" - + x.getMessage() + ")"; - // x.printStackTrace(); - } - ; + dataIn = checkForGzipStream(new FileInputStream(fileStr)); + dataName = fileStr; + } catch (Exception x) + { + warningMessage = "Failed to resolve " + fileStr + + " as a data source. (" + x.getMessage() + ")"; + // x.printStackTrace(); + error = true; } - - dataIn = new BufferedReader(new FileReader(fileStr)); - dataName = fileStr; + ; } return error; } + + /** + * Recognise the 2-byte magic header for gzip streams + * + * https://recalll.co/ask/v/topic/java-How-to-check-if-InputStream-is-Gzipped/555aadd62bd27354438b90f6 + * + * @param bytes - at least two bytes + * @return + */ + private static boolean isGzipStream(byte[] bytes) { + int head = ((int) bytes[0] & 0xff) | ((bytes[1] << 8) & 0xff00); + return (GZIPInputStream.GZIP_MAGIC == head); + } - private BufferedReader tryAsGzipSource(InputStream inputStream) + /** + * Returns a Reader for the given input after wrapping it in a buffered input + * stream, and then checking if it needs to be wrapped by a GZipInputStream + * + * @param input + * @return + */ + private BufferedReader checkForGzipStream(InputStream input) throws Exception { + + // NB: stackoverflow https://stackoverflow.com/questions/4818468/how-to-check-if-inputstream-is-gzipped + // could use a PushBackInputStream rather than a BufferedInputStream + + BufferedInputStream bufinput; + if (!input.markSupported()) { + bufinput= new BufferedInputStream(input,16); + input = bufinput; + } + input.mark(4); + byte[] bytes=input.readNBytes(2); + input.reset(); + if (bytes.length==2 && isGzipStream(bytes)) { + return getGzipReader(input); + } + // return a buffered reader for the stream. + InputStreamReader isReader= new InputStreamReader(input); + BufferedReader toReadFrom=new BufferedReader(isReader); + return toReadFrom; + } + /** + * Returns a {@code BufferedReader} which wraps the input stream with a + * GZIPInputStream. Throws a {@code ZipException} if a GZIP format error + * occurs or the compression method used is unsupported. + * + * @param inputStream + * @return + * @throws Exception + */ + private BufferedReader getGzipReader(InputStream inputStream) throws Exception { BufferedReader inData = new BufferedReader( @@ -223,44 +272,74 @@ public class FileParse return inData; } - private boolean checkURLSource(String fileStr) + /** + * Tries to read from the given URL. If successful, saves a reader to the + * response in field {@code dataIn}, otherwise (on exception, or HTTP response + * status not 200), throws an exception. + *

+ * If the response status includes + * + *

+   * Content-Type : application/x-gzip
+   * 
+ * + * then tries to read as gzipped content. + * + * @param urlStr + * @throws IOException + * @throws MalformedURLException + */ + private void checkURLSource(String urlStr) throws IOException, MalformedURLException { errormessage = "URL NOT FOUND"; - URL url = new URL(fileStr); - // - // GZIPInputStream code borrowed from Aquaria (soon to be open sourced) via - // Kenny Sabir - Exception e = null; - if (fileStr.toLowerCase().endsWith(".gz")) + URL url = new URL(urlStr); + URLConnection _conn = url.openConnection(); + if (_conn instanceof HttpURLConnection) { - try + HttpURLConnection conn = (HttpURLConnection) _conn; + int rc = conn.getResponseCode(); + if (rc != HttpURLConnection.HTTP_OK) { - InputStream inputStream = url.openStream(); - dataIn = tryAsGzipSource(inputStream); - dataName = fileStr; - return false; + throw new IOException( + "Response status from " + urlStr + " was " + rc); + } + } else { + try { + dataIn = checkForGzipStream(_conn.getInputStream()); + dataName=urlStr; + } catch (IOException ex) + { + throw new IOException("Failed to handle non-HTTP URI stream",ex); } catch (Exception ex) { - e = ex; + throw new IOException("Failed to determine type of input stream for given URI",ex); } + return; } - - try - { - dataIn = new BufferedReader(new InputStreamReader(url.openStream())); - } catch (IOException q) + String encoding = _conn.getContentEncoding(); + String contentType = _conn.getContentType(); + boolean isgzipped = "application/x-gzip".equalsIgnoreCase(contentType) + || "gzip".equals(encoding); + Exception e = null; + InputStream inputStream = _conn.getInputStream(); + if (isgzipped) { - if (e != null) + try + { + dataIn = getGzipReader(inputStream); + dataName = urlStr; + } catch (Exception e1) { throw new IOException(MessageManager .getString("exception.failed_to_resolve_gzip_stream"), e); } - throw q; + return; } - // record URL as name of datasource. - dataName = fileStr; - return false; + + dataIn = new BufferedReader(new InputStreamReader(inputStream)); + dataName = urlStr; + return; } /** @@ -345,7 +424,8 @@ public class FileParse { // this will be from JavaScript inFile = file; - dataIn = new BufferedReader(new InputStreamReader(new ByteArrayInputStream(bytes))); + dataIn = new BufferedReader( + new InputStreamReader(new ByteArrayInputStream(bytes))); dataName = fileStr; } else if (checkFileSource(fileStr)) @@ -453,8 +533,7 @@ public class FileParse { // pass up the reason why we have no source to read from throw new IOException(MessageManager.formatMessage( - "exception.failed_to_read_data_from_source", - new String[] + "exception.failed_to_read_data_from_source", new String[] { errormessage })); } error = false; diff --git a/src/jalview/ws/dbsources/Pfam.java b/src/jalview/ws/dbsources/Pfam.java index 8877c34..47e66ac 100644 --- a/src/jalview/ws/dbsources/Pfam.java +++ b/src/jalview/ws/dbsources/Pfam.java @@ -35,6 +35,11 @@ import com.stevesoft.pat.Regex; */ abstract public class Pfam extends Xfam { + /* + * append to URLs to retrieve as a gzipped file + */ + protected static final String GZIPPED = "/gzipped"; + static final String PFAM_BASEURL_KEY = "PFAM_BASEURL"; private static final String DEFAULT_PFAM_BASEURL = "https://pfam.xfam.org"; diff --git a/src/jalview/ws/dbsources/PfamFull.java b/src/jalview/ws/dbsources/PfamFull.java index 0600427..d71892b 100644 --- a/src/jalview/ws/dbsources/PfamFull.java +++ b/src/jalview/ws/dbsources/PfamFull.java @@ -34,7 +34,7 @@ public class PfamFull extends Pfam @Override public String getURLSuffix() { - return "/alignment/full"; + return "/alignment/full" + GZIPPED; } /* diff --git a/src/jalview/ws/dbsources/PfamSeed.java b/src/jalview/ws/dbsources/PfamSeed.java index dff8a17..f64d07f 100644 --- a/src/jalview/ws/dbsources/PfamSeed.java +++ b/src/jalview/ws/dbsources/PfamSeed.java @@ -36,7 +36,7 @@ public class PfamSeed extends Pfam @Override public String getURLSuffix() { - return "/alignment/seed"; + return "/alignment/seed" + GZIPPED; } /* diff --git a/src/jalview/ws/dbsources/Rfam.java b/src/jalview/ws/dbsources/Rfam.java index 1d9d99a..c9ee7fc 100644 --- a/src/jalview/ws/dbsources/Rfam.java +++ b/src/jalview/ws/dbsources/Rfam.java @@ -36,6 +36,11 @@ abstract public class Rfam extends Xfam private static final String DEFAULT_RFAM_BASEURL = "https://rfam.xfam.org"; + /* + * append to URLs to retrieve as a gzipped file + */ + protected static final String GZIPPED = "?gzip=1&download=1"; + @Override protected String getURLPrefix() { diff --git a/src/jalview/ws/dbsources/RfamFull.java b/src/jalview/ws/dbsources/RfamFull.java index d815336..396511c 100644 --- a/src/jalview/ws/dbsources/RfamFull.java +++ b/src/jalview/ws/dbsources/RfamFull.java @@ -36,7 +36,7 @@ public class RfamFull extends Rfam @Override public String getURLSuffix() { - return "/alignment/full"; + return "/alignment/full" + GZIPPED; } /* diff --git a/src/jalview/ws/dbsources/RfamSeed.java b/src/jalview/ws/dbsources/RfamSeed.java index a74e829..eaa574b 100644 --- a/src/jalview/ws/dbsources/RfamSeed.java +++ b/src/jalview/ws/dbsources/RfamSeed.java @@ -36,8 +36,7 @@ public class RfamSeed extends Rfam @Override public String getURLSuffix() { - // to download gzipped file add '?gzip=1' - return "/alignment/stockholm"; + return "/alignment/stockholm" + GZIPPED; } /* diff --git a/src/jalview/ws/dbsources/Xfam.java b/src/jalview/ws/dbsources/Xfam.java index b83f558..f0cb14b 100644 --- a/src/jalview/ws/dbsources/Xfam.java +++ b/src/jalview/ws/dbsources/Xfam.java @@ -36,7 +36,6 @@ import jalview.ws.seqfetcher.DbSourceProxyImpl; */ public abstract class Xfam extends DbSourceProxyImpl { - public Xfam() { super(); diff --git a/test/jalview/ws/dbsources/PfamFullTest.java b/test/jalview/ws/dbsources/PfamFullTest.java index f5cc640..23cceb2 100644 --- a/test/jalview/ws/dbsources/PfamFullTest.java +++ b/test/jalview/ws/dbsources/PfamFullTest.java @@ -38,7 +38,7 @@ public class PfamFullTest @Test(groups = "Functional") public void testGetURL() { - String path = "pfam.xfam.org/family/ABC/alignment/full"; + String path = "pfam.xfam.org/family/ABC/alignment/full/gzipped"; // with default value for domain String url = new PfamFull().getURL(" abc "); diff --git a/test/jalview/ws/dbsources/PfamSeedTest.java b/test/jalview/ws/dbsources/PfamSeedTest.java index 355ef0c..451810b 100644 --- a/test/jalview/ws/dbsources/PfamSeedTest.java +++ b/test/jalview/ws/dbsources/PfamSeedTest.java @@ -38,7 +38,7 @@ public class PfamSeedTest @Test(groups = "Functional") public void testGetURL() { - String path = "pfam.xfam.org/family/ABC/alignment/seed"; + String path = "pfam.xfam.org/family/ABC/alignment/seed/gzipped"; // with default value for domain String url = new PfamSeed().getURL(" abc "); diff --git a/test/jalview/ws/dbsources/RfamFullTest.java b/test/jalview/ws/dbsources/RfamFullTest.java index 2d1497f..87b963f 100644 --- a/test/jalview/ws/dbsources/RfamFullTest.java +++ b/test/jalview/ws/dbsources/RfamFullTest.java @@ -38,7 +38,7 @@ public class RfamFullTest @Test(groups = "Functional") public void testGetURL() { - String path = "rfam.xfam.org/family/ABC/alignment/full"; + String path = "rfam.xfam.org/family/ABC/alignment/full?gzip=1&download=1"; // with default value for domain String url = new RfamFull().getURL(" abc "); diff --git a/test/jalview/ws/dbsources/RfamSeedTest.java b/test/jalview/ws/dbsources/RfamSeedTest.java index 745ba2e..1165d1f 100644 --- a/test/jalview/ws/dbsources/RfamSeedTest.java +++ b/test/jalview/ws/dbsources/RfamSeedTest.java @@ -38,7 +38,7 @@ public class RfamSeedTest @Test(groups = "Functional") public void testGetURL() { - String path = "rfam.xfam.org/family/ABC/alignment/stockholm"; + String path = "rfam.xfam.org/family/ABC/alignment/stockholm?gzip=1&download=1"; // with default value for domain String url = new RfamSeed().getURL(" abc ");