2b46c0f6088b765381c3904bc9007fd2d1849fbe
[jalview.git] / forester / java / src / org / forester / io / parsers / phyloxml / PhyloXmlParser.java
1 // $Id:
2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
4 //
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
7 // All rights reserved
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
22 //
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
25
26 package org.forester.io.parsers.phyloxml;
27
28 import java.io.BufferedReader;
29 import java.io.File;
30 import java.io.FileInputStream;
31 import java.io.IOException;
32 import java.io.InputStream;
33 import java.io.InputStreamReader;
34 import java.io.Reader;
35 import java.io.StringReader;
36 import java.net.URL;
37 import java.util.Date;
38 import java.util.Enumeration;
39 import java.util.zip.ZipEntry;
40 import java.util.zip.ZipFile;
41 import java.util.zip.ZipInputStream;
42
43 import javax.xml.parsers.ParserConfigurationException;
44 import javax.xml.parsers.SAXParser;
45 import javax.xml.parsers.SAXParserFactory;
46
47 import org.forester.io.parsers.PhylogenyParser;
48 import org.forester.io.parsers.util.PhylogenyParserException;
49 import org.forester.phylogeny.Phylogeny;
50 import org.forester.util.ForesterConstants;
51 import org.forester.util.ForesterUtil;
52 import org.xml.sax.InputSource;
53 import org.xml.sax.SAXException;
54 import org.xml.sax.SAXNotRecognizedException;
55 import org.xml.sax.SAXNotSupportedException;
56 import org.xml.sax.SAXParseException;
57 import org.xml.sax.XMLReader;
58 import org.xml.sax.helpers.DefaultHandler;
59
60 public class PhyloXmlParser implements PhylogenyParser {
61
62     private static final String  UTF_8                                      = "UTF-8";
63     final public static String   JAXP_SCHEMA_LANGUAGE                       = "http://java.sun.com/xml/jaxp/properties/schemaLanguage";
64     final public static String   W3C_XML_SCHEMA                             = "http://www.w3.org/2001/XMLSchema";
65     final public static String   JAXP_SCHEMA_SOURCE                         = "http://java.sun.com/xml/jaxp/properties/schemaSource";
66     final public static String   SAX_FEATURES_VALIDATION                    = "http://xml.org/sax/features/validation";
67     final public static String   APACHE_FEATURES_VALIDATION_SCHEMA          = "http://apache.org/xml/features/validation/schema";
68     final public static String   APACHE_FEATURES_VALIDATION_SCHEMA_FULL     = "http://apache.org/xml/features/validation/schema-full-checking";
69     final public static String   APACHE_PROPERTIES_SCHEMA_EXTERNAL_LOCATION = "http://apache.org/xml/properties/schema/external-schemaLocation";
70     final static private boolean TIME                                       = false;
71     private Object               _source;
72     private boolean              _valid;
73     private boolean              _zipped_inputstream;
74     private int                  _error_count;
75     private int                  _warning_count;
76     private String               _schema_location;
77     private StringBuffer         _error_messages;
78     private StringBuffer         _warning_messages;
79
80     private PhyloXmlParser() {
81         init();
82         reset();
83     }
84
85     public int getErrorCount() {
86         return _error_count;
87     }
88
89     public StringBuffer getErrorMessages() {
90         return _error_messages;
91     }
92
93     private Reader getReaderFromZipFile() throws IOException {
94         Reader reader = null;
95         final ZipFile zip_file = new ZipFile( getSource().toString() );
96         final Enumeration<?> zip_file_entries = zip_file.entries();
97         while ( zip_file_entries.hasMoreElements() ) {
98             final ZipEntry zip_file_entry = ( ZipEntry ) zip_file_entries.nextElement();
99             if ( !zip_file_entry.isDirectory() && ( zip_file_entry.getSize() > 0 ) ) {
100                 final InputStream is = zip_file.getInputStream( zip_file_entry );
101                 reader = new InputStreamReader( is, UTF_8 );
102                 break;
103             }
104         }
105         try {
106             zip_file.close();
107         }
108         catch ( final Exception e ) {
109             // Ignore
110         }
111         return reader;
112     }
113
114     private String getSchemaLocation() {
115         return _schema_location;
116     }
117
118     private Object getSource() {
119         return _source;
120     }
121
122     public int getWarningCount() {
123         return _warning_count;
124     }
125
126     public StringBuffer getWarningMessages() {
127         return _warning_messages;
128     }
129
130     private void init() {
131         setZippedInputstream( false );
132     }
133
134     public boolean isValid() {
135         return _valid;
136     }
137
138     private boolean isZippedInputstream() {
139         return _zipped_inputstream;
140     }
141
142     @Override
143     public Phylogeny[] parse() throws IOException, PhylogenyParserException {
144         reset();
145         final PhyloXmlHandler handler = new PhyloXmlHandler();
146         final SAXParserFactory factory = SAXParserFactory.newInstance();
147         factory.setNamespaceAware( true );
148         try {
149             if ( !ForesterUtil.isEmpty( getSchemaLocation() ) ) {
150                 factory.setFeature( SAX_FEATURES_VALIDATION, true );
151                 factory.setFeature( APACHE_FEATURES_VALIDATION_SCHEMA, true );
152                 factory.setFeature( APACHE_FEATURES_VALIDATION_SCHEMA_FULL, true );
153             }
154         }
155         catch ( final SAXNotRecognizedException e ) {
156             e.printStackTrace();
157             throw new PhylogenyParserException( "sax not recognized exception: " + e.getLocalizedMessage() );
158         }
159         catch ( final SAXNotSupportedException e ) {
160             e.printStackTrace();
161             throw new PhylogenyParserException( "sax not supported exception: " + e.getLocalizedMessage() );
162         }
163         catch ( final ParserConfigurationException e ) {
164             e.printStackTrace();
165             throw new PhylogenyParserException( "parser configuration exception: " + e.getLocalizedMessage() );
166         }
167         catch ( final Exception e ) {
168             e.printStackTrace();
169             throw new PhylogenyParserException( "error while configuring sax parser: " + e.getLocalizedMessage() );
170         }
171         try {
172             final SAXParser parser = factory.newSAXParser();
173             if ( !ForesterUtil.isEmpty( getSchemaLocation() ) ) {
174                 parser.setProperty( JAXP_SCHEMA_LANGUAGE, W3C_XML_SCHEMA );
175                 parser.setProperty( JAXP_SCHEMA_SOURCE, getSchemaLocation() );
176                 parser.setProperty( APACHE_PROPERTIES_SCHEMA_EXTERNAL_LOCATION, getSchemaLocation() );
177             }
178             final XMLReader xml_reader = parser.getXMLReader();
179             xml_reader.setContentHandler( handler );
180             xml_reader.setErrorHandler( new PhyloXmlParserErrorHandler() );
181             long start_time = 0;
182             if ( TIME ) {
183                 start_time = new Date().getTime();
184             }
185             if ( getSource() instanceof File ) {
186                 if ( !getSource().toString().toLowerCase().endsWith( ".zip" ) ) {
187                     final InputStream is = new FileInputStream( (File) getSource() );
188                     final InputStreamReader isr = new InputStreamReader( is, UTF_8 );
189                     xml_reader.parse( new InputSource( new BufferedReader( isr ) ) );
190                 }
191                 else {
192                     final Reader reader = getReaderFromZipFile();
193                     if ( reader == null ) {
194                         throw new PhylogenyParserException( "zip file \"" + getSource()
195                                                             + "\" appears not to contain any entries" );
196                     }
197                     xml_reader.parse( new InputSource( new BufferedReader( reader ) ) );
198                 }
199             }
200             else if ( getSource() instanceof InputSource ) {
201                 final InputSource is = ( InputSource ) getSource();
202                 is.setEncoding( UTF_8 );
203                 xml_reader.parse( is );
204             }
205             else if ( getSource() instanceof InputStream ) {
206                 if ( !isZippedInputstream() ) {
207                     final InputStream is = ( InputStream ) getSource();
208                     xml_reader.parse( new InputSource( new BufferedReader( new InputStreamReader( is, UTF_8 ) ) ) );
209                 }
210                 else {
211                     final ZipInputStream zip_is = new ZipInputStream( ( InputStream ) getSource() );
212                     zip_is.getNextEntry();
213                     xml_reader.parse( new InputSource( new BufferedReader( new InputStreamReader( zip_is, UTF_8 ) ) ) );
214                 }
215             }
216             else if ( getSource() instanceof StringBuffer ) {
217                 final StringReader string_reader = new StringReader( getSource().toString() );
218                 xml_reader.parse( new InputSource( string_reader ) );
219             }
220             else {
221                 throw new PhylogenyParserException( "phyloXML parser: attempt to parse object of unsupported type: \""
222                         + getSource().getClass() + "\"" );
223             }
224             if ( TIME ) {
225                 System.out.println( "[TIME] phyloXML parsing: " + ( new Date().getTime() - start_time ) + "ms." );
226             }
227         }
228         catch ( final SAXException sax_exception ) {
229             throw new PhylogenyParserException( "failed to parse [" + getSource() + "]: "
230                     + sax_exception.getLocalizedMessage() );
231         }
232         catch ( final ParserConfigurationException parser_config_exception ) {
233             throw new PhylogenyParserException( "failed to parse [" + getSource()
234                                                 + "]. Problem with XML parser configuration: " + parser_config_exception.getLocalizedMessage() );
235         }
236         catch ( final IOException e ) {
237             throw new PhylogenyParserException( "problem with input source: " + e.getLocalizedMessage() );
238         }
239         catch ( final Exception e ) {
240             throw new PhylogenyParserException( e.getLocalizedMessage() );
241         }
242         catch ( final Error err ) {
243             err.printStackTrace();
244             throw new PhylogenyParserException( "severe error: " + err.getLocalizedMessage() );
245         }
246         final Phylogeny[] ps = new Phylogeny[ handler.getPhylogenies().size() ];
247         int i = 0;
248         for( final Phylogeny phylogeny : handler.getPhylogenies() ) {
249             ps[ i++ ] = phylogeny;
250         }
251         return ps;
252     }
253
254     private void reset() {
255         _valid = true;
256         _error_count = 0;
257         _warning_count = 0;
258         _error_messages = new StringBuffer();
259         _warning_messages = new StringBuffer();
260     }
261
262     @Override
263     public void setSource( final Object source ) {
264         _source = source;
265     }
266
267     public void setValidateAgainstSchema( final String schema_location ) {
268         _schema_location = schema_location;
269     }
270
271     public void setZippedInputstream( final boolean zipped_inputstream ) {
272         _zipped_inputstream = zipped_inputstream;
273     }
274
275     public static PhyloXmlParser createPhyloXmlParserXsdValidating() {
276         final PhyloXmlParser xml_parser = new PhyloXmlParser();
277         final ClassLoader cl = PhyloXmlParser.class.getClassLoader();
278         final URL xsd_url = cl.getResource( ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE );
279         if ( xsd_url != null ) {
280             xml_parser.setValidateAgainstSchema( xsd_url.toString() );
281         }
282         else {
283             throw new RuntimeException( "failed to get URL for phyloXML XSD from jar file from ["
284                     + ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE + "]" );
285         }
286         return xml_parser;
287     }
288
289     public static PhyloXmlParser createPhyloXmlParser() {
290         final PhyloXmlParser xml_parser = new PhyloXmlParser();
291         return xml_parser;
292     }
293
294     private class PhyloXmlParserErrorHandler extends DefaultHandler {
295
296         @Override
297         public void error( final SAXParseException e ) {
298             ++_error_count;
299             _valid = false;
300             throw new PhyloXmlException( "phyloXML error at line " + e.getLineNumber() + ": \n"
301                     + e.getLocalizedMessage() );
302         }
303
304         @Override
305         public void fatalError( final SAXParseException e ) {
306             ++_error_count;
307             _valid = false;
308             throw new PhyloXmlException( "fatal XML error at line " + e.getLineNumber() + ": \n"
309                     + e.getLocalizedMessage() );
310         }
311
312         @Override
313         public void warning( final SAXParseException e ) {
314             ++_warning_count;
315             if ( _error_messages.length() > 1 ) {
316                 _error_messages.append( ForesterUtil.LINE_SEPARATOR );
317             }
318             _warning_messages.append( "[line: " + e.getLineNumber() + "] " + e.getMessage() );
319         }
320     }
321
322     @Override
323     public String getName() {
324         return "phyloXML Parser";
325     }
326 }