JAL-2656 recognise non-HTTP URI input streams as gzipped by detecting the gzip magic...
[jalview.git] / src / jalview / io / FileParse.java
index 3c3e951..d0b61e4 100755 (executable)
  */
 package jalview.io;
 
+import jalview.api.AlignExportSettingsI;
+import jalview.api.AlignmentViewPanel;
+import jalview.api.FeatureSettingsModelI;
+import jalview.util.MessageManager;
+
+import java.io.BufferedInputStream;
 import java.io.BufferedReader;
 import java.io.ByteArrayInputStream;
 import java.io.File;
@@ -33,13 +39,9 @@ import java.io.StringReader;
 import java.net.HttpURLConnection;
 import java.net.MalformedURLException;
 import java.net.URL;
+import java.net.URLConnection;
 import java.util.zip.GZIPInputStream;
 
-import jalview.api.AlignExportSettingsI;
-import jalview.api.AlignViewportI;
-import jalview.api.AlignmentViewPanel;
-import jalview.api.FeatureSettingsModelI;
-import jalview.util.MessageManager;
 import jalview.util.Platform;
 
 /**
@@ -192,29 +194,64 @@ public class FileParse
     }
     if (!error)
     {
-      if (fileStr.toLowerCase().endsWith(".gz"))
+      try
       {
-        try
-        {
-          dataIn = getGzipReader(new FileInputStream(fileStr));
-          dataName = fileStr;
-          return error;
-        } catch (Exception x)
-        {
-          warningMessage = "Failed  to resolve as a GZ stream ("
-                  + x.getMessage() + ")";
-          // x.printStackTrace();
-        }
-        ;
+        dataIn = checkForGzipStream(new FileInputStream(fileStr));
+        dataName = fileStr;
+      } catch (Exception x)
+      {
+        warningMessage = "Failed to resolve " + fileStr
+                + " as a data source. (" + x.getMessage() + ")";
+        // x.printStackTrace();
+        error = true;
       }
-
-      dataIn = new BufferedReader(new FileReader(fileStr));
-      dataName = fileStr;
+      ;
     }
     return error;
   }
+  
+  /**
+   * Recognise the 2-byte magic header for gzip streams
+   * 
+   * https://recalll.co/ask/v/topic/java-How-to-check-if-InputStream-is-Gzipped/555aadd62bd27354438b90f6
+   * 
+   * @param bytes - at least two bytes 
+   * @return 
+   */
+  private static boolean isGzipStream(byte[] bytes) {
+    int head = ((int) bytes[0] & 0xff) | ((bytes[1] << 8) & 0xff00);
+    return (GZIPInputStream.GZIP_MAGIC == head);
+  }
 
   /**
+   * Returns a Reader for the given input after wrapping it in a buffered input
+   * stream, and then checking if it needs to be wrapped by a GZipInputStream
+   * 
+   * @param input
+   * @return
+   */
+  private BufferedReader checkForGzipStream(InputStream input) throws Exception {
+
+    // NB: stackoverflow https://stackoverflow.com/questions/4818468/how-to-check-if-inputstream-is-gzipped
+    // could use a PushBackInputStream rather than a BufferedInputStream
+    
+    BufferedInputStream bufinput;
+    if (!input.markSupported()) {
+       bufinput= new BufferedInputStream(input,16);
+       input = bufinput;
+    }
+    input.mark(4);
+    byte[] bytes=input.readNBytes(2);
+    input.reset();
+    if (bytes.length==2 && isGzipStream(bytes)) {
+      return getGzipReader(input);
+    }
+    // return a buffered reader for the stream.
+    InputStreamReader isReader= new InputStreamReader(input);
+    BufferedReader toReadFrom=new BufferedReader(isReader);
+    return toReadFrom;
+  }
+  /**
    * Returns a {@code BufferedReader} which wraps the input stream with a
    * GZIPInputStream. Throws a {@code ZipException} if a GZIP format error
    * occurs or the compression method used is unsupported.
@@ -256,19 +293,35 @@ public class FileParse
   {
     errormessage = "URL NOT FOUND";
     URL url = new URL(urlStr);
-    HttpURLConnection conn = (HttpURLConnection) url.openConnection();
-    int rc = conn.getResponseCode();
-    if (rc != HttpURLConnection.HTTP_OK)
+    URLConnection _conn = url.openConnection();
+    if (_conn instanceof HttpURLConnection)
     {
-      throw new IOException(
-              "Response status from " + urlStr + " was " + rc);
+      HttpURLConnection conn = (HttpURLConnection) _conn;
+      int rc = conn.getResponseCode();
+      if (rc != HttpURLConnection.HTTP_OK)
+      {
+        throw new IOException(
+                "Response status from " + urlStr + " was " + rc);
+      }
+    } else {
+      try {
+      dataIn = checkForGzipStream(_conn.getInputStream());
+      dataName=urlStr;
+      } catch (IOException ex)
+      {
+        throw new IOException("Failed to handle non-HTTP URI stream",ex);
+      } catch (Exception ex)
+      {
+        throw new IOException("Failed to determine type of input stream for given URI",ex);
+      }
+      return;
     }
-    String encoding = conn.getContentEncoding();
-    String contentType = conn.getContentType();
+    String encoding = _conn.getContentEncoding();
+    String contentType = _conn.getContentType();
     boolean isgzipped = "application/x-gzip".equalsIgnoreCase(contentType)
             || "gzip".equals(encoding);
     Exception e = null;
-    InputStream inputStream = conn.getInputStream();
+    InputStream inputStream = _conn.getInputStream();
     if (isgzipped)
     {
       try