src/jalview/io/FileParse.java

   1 /*
   2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
   3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
   4  *
   5  * This file is part of Jalview.
   6  *
   7  * Jalview is free software: you can redistribute it and/or
   8  * modify it under the terms of the GNU General Public License
   9  * as published by the Free Software Foundation, either version 3
  10  * of the License, or (at your option) any later version.
  11  *
  12  * Jalview is distributed in the hope that it will be useful, but
  13  * WITHOUT ANY WARRANTY; without even the implied warranty
  14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15  * PURPOSE.  See the GNU General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU General Public License
  18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
  19  * The Jalview Authors are detailed in the 'AUTHORS' file.
  20  */
  21 package jalview.io;
  22
  23 import jalview.api.AlignExportSettingsI;
  24 import jalview.api.AlignmentViewPanel;
  25 import jalview.api.FeatureSettingsModelI;
  26 import jalview.util.MessageManager;
  27
  28 import java.io.BufferedInputStream;
  29 import java.io.BufferedReader;
  30 import java.io.ByteArrayInputStream;
  31 import java.io.File;
  32 import java.io.FileInputStream;
  33 import java.io.FileReader;
  34 import java.io.IOException;
  35 import java.io.InputStream;
  36 import java.io.InputStreamReader;
  37 import java.io.Reader;
  38 import java.io.StringReader;
  39 import java.net.HttpURLConnection;
  40 import java.net.MalformedURLException;
  41 import java.net.URL;
  42 import java.net.URLConnection;
  43 import java.util.zip.GZIPInputStream;
  44
  45 import jalview.util.Platform;
  46
  47 /**
  48  * implements a random access wrapper around a particular datasource, for
  49  * passing to identifyFile and AlignFile objects.
  50  */
  51 public class FileParse
  52 {
  53   protected static final String SPACE = " ";
  54
  55   protected static final String TAB = "\t";
  56
  57   /**
  58    * text specifying source of data. usually filename or url.
  59    */
  60   private String dataName = "unknown source";
  61
  62   public File inFile = null;
  63
  64   private byte[] bytes; // from JavaScript
  65
  66   public byte[] getBytes()
  67   {
  68     return bytes;
  69   }
  70
  71   /**
  72    * a viewport associated with the current file operation. May be null. May
  73    * move to different object.
  74    */
  75   private AlignViewportI viewport;
  76
  77   /**
  78    * specific settings for exporting data from the current context
  79    */
  80   private AlignExportSettingsI exportSettings;
  81
  82   /**
  83    * sequence counter for FileParse object created from same data source
  84    */
  85   public int index = 1;
  86
  87   /**
  88    * separator for extracting specific 'frame' of a datasource for formats that
  89    * support multiple records (e.g. BLC, Stockholm, etc)
  90    */
  91   protected char suffixSeparator = '#';
  92
  93   /**
  94    * character used to write newlines
  95    */
  96   protected String newline = System.getProperty("line.separator");
  97
  98   public void setNewlineString(String nl)
  99   {
 100     newline = nl;
 101   }
 102
 103   public String getNewlineString()
 104   {
 105     return newline;
 106   }
 107
 108   /**
 109    * '#' separated string tagged on to end of filename or url that was clipped
 110    * off to resolve to valid filename
 111    */
 112   protected String suffix = null;
 113
 114   protected DataSourceType dataSourceType = null;
 115
 116   protected BufferedReader dataIn = null;
 117
 118   protected String errormessage = "UNINITIALISED SOURCE";
 119
 120   protected boolean error = true;
 121
 122   protected String warningMessage = null;
 123
 124   /**
 125    * size of readahead buffer used for when initial stream position is marked.
 126    */
 127   final int READAHEAD_LIMIT = 2048;
 128
 129   public FileParse()
 130   {
 131   }
 132
 133   /**
 134    * Create a new FileParse instance reading from the same datasource starting
 135    * at the current position. WARNING! Subsequent reads from either object will
 136    * affect the read position of the other, but not the error state.
 137    *
 138    * @param from
 139    */
 140   public FileParse(FileParse from) throws IOException
 141   {
 142     if (from == null)
 143     {
 144       throw new Error(MessageManager
 145               .getString("error.implementation_error_null_fileparse"));
 146     }
 147     if (from == this)
 148     {
 149       return;
 150     }
 151     index = ++from.index;
 152     inFile = from.inFile;
 153     suffixSeparator = from.suffixSeparator;
 154     suffix = from.suffix;
 155     errormessage = from.errormessage; // inherit potential error messages
 156     error = false; // reset any error condition.
 157     dataSourceType = from.dataSourceType;
 158     dataIn = from.dataIn;
 159     if (dataIn != null)
 160     {
 161       mark();
 162     }
 163     dataName = from.dataName;
 164   }
 165
 166   /**
 167    * Attempt to open a file as a datasource. Sets error and errormessage if
 168    * fileStr was invalid.
 169    *
 170    * @param fileStr
 171    * @return this.error (true if the source was invalid)
 172    */
 173   private boolean checkFileSource(String fileStr) throws IOException
 174   {
 175     error = false;
 176     this.inFile = new File(fileStr);
 177     // check to see if it's a Jar file in disguise.
 178     if (!inFile.exists())
 179     {
 180       errormessage = "FILE NOT FOUND";
 181       error = true;
 182     }
 183     if (!inFile.canRead())
 184     {
 185       errormessage = "FILE CANNOT BE OPENED FOR READING";
 186       error = true;
 187     }
 188     if (inFile.isDirectory())
 189     {
 190       // this is really a 'complex' filetype - but we don't handle directory
 191       // reads yet.
 192       errormessage = "FILE IS A DIRECTORY";
 193       error = true;
 194     }
 195     if (!error)
 196     {
 197       try
 198       {
 199         dataIn = checkForGzipStream(new FileInputStream(fileStr));
 200         dataName = fileStr;
 201       } catch (Exception x)
 202       {
 203         warningMessage = "Failed to resolve " + fileStr
 204                 + " as a data source. (" + x.getMessage() + ")";
 205         // x.printStackTrace();
 206         error = true;
 207       }
 208       ;
 209     }
 210     return error;
 211   }
 212
 213   /**
 214    * Recognise the 2-byte magic header for gzip streams
 215    *
 216    * https://recalll.co/ask/v/topic/java-How-to-check-if-InputStream-is-Gzipped/555aadd62bd27354438b90f6
 217    *
 218    * @param bytes - at least two bytes
 219    * @return
 220    */
 221   private static boolean isGzipStream(byte[] bytes) {
 222     int head = ((int) bytes[0] & 0xff) | ((bytes[1] << 8) & 0xff00);
 223     return (GZIPInputStream.GZIP_MAGIC == head);
 224   }
 225
 226   /**
 227    * Returns a Reader for the given input after wrapping it in a buffered input
 228    * stream, and then checking if it needs to be wrapped by a GZipInputStream
 229    *
 230    * @param input
 231    * @return
 232    */
 233   private BufferedReader checkForGzipStream(InputStream input) throws Exception {
 234
 235     // NB: stackoverflow https://stackoverflow.com/questions/4818468/how-to-check-if-inputstream-is-gzipped
 236     // could use a PushBackInputStream rather than a BufferedInputStream
 237
 238     BufferedInputStream bufinput;
 239     if (!input.markSupported()) {
 240        bufinput= new BufferedInputStream(input,16);
 241        input = bufinput;
 242     }
 243     input.mark(4);
 244     byte[] bytes=input.readNBytes(2);
 245     input.reset();
 246     if (bytes.length==2 && isGzipStream(bytes)) {
 247       return getGzipReader(input);
 248     }
 249     // return a buffered reader for the stream.
 250     InputStreamReader isReader= new InputStreamReader(input);
 251     BufferedReader toReadFrom=new BufferedReader(isReader);
 252     return toReadFrom;
 253   }
 254   /**
 255    * Returns a {@code BufferedReader} which wraps the input stream with a
 256    * GZIPInputStream. Throws a {@code ZipException} if a GZIP format error
 257    * occurs or the compression method used is unsupported.
 258    *
 259    * @param inputStream
 260    * @return
 261    * @throws Exception
 262    */
 263   private BufferedReader getGzipReader(InputStream inputStream)
 264           throws Exception
 265   {
 266     BufferedReader inData = new BufferedReader(
 267             new InputStreamReader(new GZIPInputStream(inputStream)));
 268     inData.mark(2048);
 269     inData.read();
 270     inData.reset();
 271     return inData;
 272   }
 273
 274   /**
 275    * Tries to read from the given URL. If successful, saves a reader to the
 276    * response in field {@code dataIn}, otherwise (on exception, or HTTP response
 277    * status not 200), throws an exception.
 278    * <p>
 279    * If the response status includes
 280    *
 281    * <pre>
 282    * Content-Type : application/x-gzip
 283    * </pre>
 284    *
 285    * then tries to read as gzipped content.
 286    *
 287    * @param urlStr
 288    * @throws IOException
 289    * @throws MalformedURLException
 290    */
 291   private void checkURLSource(String urlStr)
 292           throws IOException, MalformedURLException
 293   {
 294     errormessage = "URL NOT FOUND";
 295     URL url = new URL(urlStr);
 296     HttpURLConnection conn = (HttpURLConnection) url.openConnection();
 297     int rc = conn.getResponseCode();
 298     if (rc != HttpURLConnection.HTTP_OK)
 299     {
 300       throw new IOException(
 301               "Response status from " + urlStr + " was " + rc);
 302     }
 303     String encoding = conn.getContentEncoding();
 304     String contentType = conn.getContentType();
 305     boolean isgzipped = "application/x-gzip".equalsIgnoreCase(contentType)
 306             || "gzip".equals(encoding);
 307     Exception e = null;
 308     InputStream inputStream = conn.getInputStream();
 309     if (isgzipped)
 310     {
 311       try
 312       {
 313         dataIn = getGzipReader(inputStream);
 314         dataName = urlStr;
 315       } catch (Exception e1)
 316       {
 317         throw new IOException(MessageManager
 318                 .getString("exception.failed_to_resolve_gzip_stream"), e);
 319       }
 320       return;
 321     }
 322
 323     dataIn = new BufferedReader(new InputStreamReader(inputStream));
 324     dataName = urlStr;
 325     return;
 326   }
 327
 328   /**
 329    * sets the suffix string (if any) and returns remainder (if suffix was
 330    * detected)
 331    *
 332    * @param fileStr
 333    * @return truncated fileStr or null
 334    */
 335   private String extractSuffix(String fileStr)
 336   {
 337     // first check that there wasn't a suffix string tagged on.
 338     int sfpos = fileStr.lastIndexOf(suffixSeparator);
 339     if (sfpos > -1 && sfpos < fileStr.length() - 1)
 340     {
 341       suffix = fileStr.substring(sfpos + 1);
 342       // System.err.println("DEBUG: Found Suffix:"+suffix);
 343       return fileStr.substring(0, sfpos);
 344     }
 345     return null;
 346   }
 347
 348   /**
 349    * not for general use, creates a fileParse object for an existing reader with
 350    * configurable values for the origin and the type of the source
 351    */
 352   public FileParse(BufferedReader source, String originString,
 353           DataSourceType sourceType)
 354   {
 355     dataSourceType = sourceType;
 356     error = false;
 357     inFile = null;
 358     dataName = originString;
 359     dataIn = source;
 360     try
 361     {
 362       if (dataIn.markSupported())
 363       {
 364         dataIn.mark(READAHEAD_LIMIT);
 365       }
 366     } catch (IOException q)
 367     {
 368
 369     }
 370   }
 371
 372   /**
 373    * Create a datasource for input to Jalview. See AppletFormatAdapter for the
 374    * types of sources that are handled.
 375    *
 376    * @param file
 377    *          - datasource locator/content as File or String
 378    * @param sourceType
 379    *          - protocol of source
 380    * @throws MalformedURLException
 381    * @throws IOException
 382    */
 383   public FileParse(Object file, DataSourceType sourceType)
 384           throws MalformedURLException, IOException
 385   {
 386     if (file instanceof File)
 387     {
 388       parse((File) file, ((File) file).getPath(), sourceType, true);
 389     }
 390     else
 391     {
 392       parse(null, file.toString(), sourceType, false);
 393     }
 394   }
 395
 396   private void parse(File file, String fileStr, DataSourceType sourceType,
 397           boolean isFileObject) throws IOException
 398   {
 399     bytes = Platform.getFileBytes(file);
 400     dataSourceType = sourceType;
 401     error = false;
 402
 403     if (sourceType == DataSourceType.FILE)
 404     {
 405
 406       if (bytes != null)
 407       {
 408         // this will be from JavaScript
 409         inFile = file;
 410         dataIn = new BufferedReader(
 411                 new InputStreamReader(new ByteArrayInputStream(bytes)));
 412         dataName = fileStr;
 413       }
 414       else if (checkFileSource(fileStr))
 415       {
 416         String suffixLess = extractSuffix(fileStr);
 417         if (suffixLess != null)
 418         {
 419           if (checkFileSource(suffixLess))
 420           {
 421             throw new IOException(MessageManager.formatMessage(
 422                     "exception.problem_opening_file_also_tried",
 423                     new String[]
 424                     { inFile.getName(), suffixLess, errormessage }));
 425           }
 426         }
 427         else
 428         {
 429           throw new IOException(MessageManager.formatMessage(
 430                   "exception.problem_opening_file", new String[]
 431                   { inFile.getName(), errormessage }));
 432         }
 433       }
 434     }
 435     else if (sourceType == DataSourceType.RELATIVE_URL)
 436     {
 437       // BH 2018 hack for no support for access-origin
 438       bytes = Platform.getFileAsBytes(fileStr);
 439       dataIn = new BufferedReader(
 440               new InputStreamReader(new ByteArrayInputStream(bytes)));
 441       dataName = fileStr;
 442
 443     }
 444     else if (sourceType == DataSourceType.URL)
 445     {
 446       try
 447       {
 448         try
 449         {
 450           checkURLSource(fileStr);
 451           if (suffixSeparator == '#')
 452           {
 453             extractSuffix(fileStr); // URL lref is stored for later reference.
 454           }
 455         } catch (IOException e)
 456         {
 457           String suffixLess = extractSuffix(fileStr);
 458           if (suffixLess == null)
 459           {
 460             throw (e);
 461           }
 462           else
 463           {
 464             try
 465             {
 466               checkURLSource(suffixLess);
 467             } catch (IOException e2)
 468             {
 469               errormessage = "BAD URL WITH OR WITHOUT SUFFIX";
 470               throw (e); // just pass back original - everything was wrong.
 471             }
 472           }
 473         }
 474       } catch (Exception e)
 475       {
 476         errormessage = "CANNOT ACCESS DATA AT URL '" + fileStr + "' ("
 477                 + e.getMessage() + ")";
 478         error = true;
 479       }
 480     }
 481     else if (sourceType == DataSourceType.PASTE)
 482     {
 483       errormessage = "PASTE INACCESSIBLE!";
 484       dataIn = new BufferedReader(new StringReader(fileStr));
 485       dataName = "Paste";
 486     }
 487     else if (sourceType == DataSourceType.CLASSLOADER)
 488     {
 489       errormessage = "RESOURCE CANNOT BE LOCATED";
 490       InputStream is = getClass().getResourceAsStream("/" + fileStr);
 491       if (is == null)
 492       {
 493         String suffixLess = extractSuffix(fileStr);
 494         if (suffixLess != null)
 495         {
 496           is = getClass().getResourceAsStream("/" + suffixLess);
 497         }
 498       }
 499       if (is != null)
 500       {
 501         dataIn = new BufferedReader(new InputStreamReader(is));
 502         dataName = fileStr;
 503       }
 504       else
 505       {
 506         error = true;
 507       }
 508     }
 509     else
 510     {
 511       errormessage = "PROBABLE IMPLEMENTATION ERROR : Datasource Type given as '"
 512               + (sourceType != null ? sourceType : "null") + "'";
 513       error = true;
 514     }
 515     if (dataIn == null || error)
 516     {
 517       // pass up the reason why we have no source to read from
 518       throw new IOException(MessageManager.formatMessage(
 519               "exception.failed_to_read_data_from_source", new String[]
 520               { errormessage }));
 521     }
 522     error = false;
 523     dataIn.mark(READAHEAD_LIMIT);
 524   }
 525
 526   /**
 527    * mark the current position in the source as start for the purposes of it
 528    * being analysed by IdentifyFile().identify
 529    *
 530    * @throws IOException
 531    */
 532   public void mark() throws IOException
 533   {
 534     if (dataIn != null)
 535     {
 536       dataIn.mark(READAHEAD_LIMIT);
 537     }
 538     else
 539     {
 540       throw new IOException(
 541               MessageManager.getString("exception.no_init_source_stream"));
 542     }
 543   }
 544
 545   public String nextLine() throws IOException
 546   {
 547     if (!error)
 548     {
 549       return dataIn.readLine();
 550     }
 551     throw new IOException(MessageManager
 552             .formatMessage("exception.invalid_source_stream", new String[]
 553             { errormessage }));
 554   }
 555
 556   /**
 557    *
 558    * @return true if this FileParse is configured for Export only
 559    */
 560   public boolean isExporting()
 561   {
 562     return !error && dataIn == null;
 563   }
 564
 565   /**
 566    *
 567    * @return true if the data source is valid
 568    */
 569   public boolean isValid()
 570   {
 571     return !error;
 572   }
 573
 574   /**
 575    * closes the datasource and tidies up. source will be left in an error state
 576    */
 577   public void close() throws IOException
 578   {
 579     errormessage = "EXCEPTION ON CLOSE";
 580     error = true;
 581     dataIn.close();
 582     dataIn = null;
 583     errormessage = "SOURCE IS CLOSED";
 584   }
 585
 586   /**
 587    * Rewinds the datasource to the marked point if possible
 588    *
 589    * @param bytesRead
 590    *
 591    */
 592   public void reset(int bytesRead) throws IOException
 593   {
 594     if (bytesRead >= READAHEAD_LIMIT)
 595     {
 596       System.err.println(String.format(
 597               "File reset error: read %d bytes but reset limit is %d",
 598               bytesRead, READAHEAD_LIMIT));
 599     }
 600     if (dataIn != null && !error)
 601     {
 602       dataIn.reset();
 603     }
 604     else
 605     {
 606       throw new IOException(MessageManager.getString(
 607               "error.implementation_error_reset_called_for_invalid_source"));
 608     }
 609   }
 610
 611   /**
 612    *
 613    * @return true if there is a warning for the user
 614    */
 615   public boolean hasWarningMessage()
 616   {
 617     return (warningMessage != null && warningMessage.length() > 0);
 618   }
 619
 620   /**
 621    *
 622    * @return empty string or warning message about file that was just parsed.
 623    */
 624   public String getWarningMessage()
 625   {
 626     return warningMessage;
 627   }
 628
 629   public String getInFile()
 630   {
 631     if (inFile != null)
 632     {
 633       return inFile.getAbsolutePath() + " (" + index + ")";
 634     }
 635     else
 636     {
 637       return "From Paste + (" + index + ")";
 638     }
 639   }
 640
 641   /**
 642    * @return the dataName
 643    */
 644   public String getDataName()
 645   {
 646     return dataName;
 647   }
 648
 649   /**
 650    * set the (human readable) name or URI for this datasource
 651    *
 652    * @param dataname
 653    */
 654   protected void setDataName(String dataname)
 655   {
 656     dataName = dataname;
 657   }
 658
 659   /**
 660    * get the underlying bufferedReader for this data source.
 661    *
 662    * @return null if no reader available
 663    * @throws IOException
 664    */
 665   public Reader getReader()
 666   {
 667     if (dataIn != null) // Probably don't need to test for readiness &&
 668                         // dataIn.ready())
 669     {
 670       return dataIn;
 671     }
 672     return null;
 673   }
 674
 675   public AlignViewportI getViewport()
 676   {
 677     return viewport;
 678   }
 679
 680   public void setViewport(AlignViewportI viewport)
 681   {
 682     this.viewport = viewport;
 683   }
 684
 685   /**
 686    * @return the currently configured exportSettings for writing data.
 687    */
 688   public AlignExportSettingsI getExportSettings()
 689   {
 690     return exportSettings;
 691   }
 692
 693   /**
 694    * Set configuration for export of data.
 695    *
 696    * @param exportSettings
 697    *          the exportSettings to set
 698    */
 699   public void setExportSettings(AlignExportSettingsI exportSettings)
 700   {
 701     this.exportSettings = exportSettings;
 702   }
 703
 704   /**
 705    * method overridden by complex file exporter/importers which support
 706    * exporting visualisation and layout settings for a view
 707    *
 708    * @param avpanel
 709    */
 710   public void configureForView(AlignmentViewPanel avpanel)
 711   {
 712     if (avpanel != null)
 713     {
 714       setViewport(avpanel.getAlignViewport());
 715     }
 716     // could also set export/import settings
 717   }
 718
 719   /**
 720    * Returns the preferred feature colour configuration if there is one, else
 721    * null
 722    *
 723    * @return
 724    */
 725   public FeatureSettingsModelI getFeatureColourScheme()
 726   {
 727     return null;
 728   }
 729
 730   public DataSourceType getDataSourceType()
 731   {
 732     return dataSourceType;
 733   }
 734
 735   /**
 736    * Returns a buffered reader for the input object. Returns null, or throws
 737    * IOException, on failure.
 738    *
 739    * @param file
 740    *          a File, or a String which is a name of a file
 741    * @param sourceType
 742    * @return
 743    * @throws IOException
 744    */
 745   public BufferedReader getBufferedReader(Object file,
 746           DataSourceType sourceType) throws IOException
 747   {
 748     BufferedReader in = null;
 749     byte[] bytes;
 750
 751     switch (sourceType)
 752     {
 753     case FILE:
 754       if (file instanceof String)
 755       {
 756         return new BufferedReader(new FileReader((String) file));
 757       }
 758       bytes = Platform.getFileBytes((File) file);
 759       if (bytes != null)
 760       {
 761         return new BufferedReader(
 762                 new InputStreamReader(new ByteArrayInputStream(bytes)));
 763       }
 764       return new BufferedReader(new FileReader((File) file));
 765     case URL:
 766       URL url = new URL(file.toString());
 767       in = new BufferedReader(new InputStreamReader(url.openStream()));
 768       break;
 769     case RELATIVE_URL: // JalviewJS only
 770       bytes = Platform.getFileAsBytes(file.toString());
 771       if (bytes != null)
 772       {
 773         in = new BufferedReader(
 774                 new InputStreamReader(new ByteArrayInputStream(bytes)));
 775       }
 776       break;
 777     case PASTE:
 778       in = new BufferedReader(new StringReader(file.toString()));
 779       break;
 780     case CLASSLOADER:
 781       InputStream is = getClass().getResourceAsStream("/" + file);
 782       if (is != null)
 783       {
 784         in = new BufferedReader(new InputStreamReader(is));
 785       }
 786       break;
 787     }
 788
 789     return in;
 790   }
 791 }