in progress
[jalview.git] / forester / java / src / org / forester / io / parsers / phyloxml / PhyloXmlParser.java
1 // $Id:
2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
4 //
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
7 // All rights reserved
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
22 //
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
25
26 package org.forester.io.parsers.phyloxml;
27
28 import java.io.BufferedReader;
29 import java.io.File;
30 import java.io.FileInputStream;
31 import java.io.IOException;
32 import java.io.InputStream;
33 import java.io.InputStreamReader;
34 import java.io.Reader;
35 import java.io.StringReader;
36 import java.net.URL;
37 import java.util.Date;
38 import java.util.Enumeration;
39 import java.util.zip.ZipEntry;
40 import java.util.zip.ZipFile;
41 import java.util.zip.ZipInputStream;
42
43 import javax.xml.parsers.ParserConfigurationException;
44 import javax.xml.parsers.SAXParser;
45 import javax.xml.parsers.SAXParserFactory;
46
47 import org.forester.io.parsers.PhylogenyParser;
48 import org.forester.io.parsers.util.PhylogenyParserException;
49 import org.forester.phylogeny.Phylogeny;
50 import org.forester.util.ForesterConstants;
51 import org.forester.util.ForesterUtil;
52 import org.xml.sax.InputSource;
53 import org.xml.sax.SAXException;
54 import org.xml.sax.SAXNotRecognizedException;
55 import org.xml.sax.SAXNotSupportedException;
56 import org.xml.sax.SAXParseException;
57 import org.xml.sax.XMLReader;
58 import org.xml.sax.helpers.DefaultHandler;
59
60 public class PhyloXmlParser implements PhylogenyParser {
61
62     final public static String   JAXP_SCHEMA_LANGUAGE                       = "http://java.sun.com/xml/jaxp/properties/schemaLanguage";
63     final public static String   W3C_XML_SCHEMA                             = "http://www.w3.org/2001/XMLSchema";
64     final public static String   JAXP_SCHEMA_SOURCE                         = "http://java.sun.com/xml/jaxp/properties/schemaSource";
65     final public static String   SAX_FEATURES_VALIDATION                    = "http://xml.org/sax/features/validation";
66     final public static String   APACHE_FEATURES_VALIDATION_SCHEMA          = "http://apache.org/xml/features/validation/schema";
67     final public static String   APACHE_FEATURES_VALIDATION_SCHEMA_FULL     = "http://apache.org/xml/features/validation/schema-full-checking";
68     final public static String   APACHE_PROPERTIES_SCHEMA_EXTERNAL_LOCATION = "http://apache.org/xml/properties/schema/external-schemaLocation";
69     final static private boolean TIME                                       = false;
70     private Object               _source;
71     private boolean              _valid;
72     private boolean              _zipped_inputstream;
73     private int                  _error_count;
74     private int                  _warning_count;
75     private String               _schema_location;
76     private StringBuffer         _error_messages;
77     private StringBuffer         _warning_messages;
78
79     private PhyloXmlParser() {
80         init();
81         reset();
82     }
83
84     public int getErrorCount() {
85         return _error_count;
86     }
87
88     public StringBuffer getErrorMessages() {
89         return _error_messages;
90     }
91
92     private Reader getReaderFromZipFile() throws IOException {
93         Reader reader = null;
94         final ZipFile zip_file = new ZipFile( getSource().toString() );
95         final Enumeration<?> zip_file_entries = zip_file.entries();
96         while ( zip_file_entries.hasMoreElements() ) {
97             final ZipEntry zip_file_entry = ( ZipEntry ) zip_file_entries.nextElement();
98             if ( !zip_file_entry.isDirectory() && ( zip_file_entry.getSize() > 0 ) ) {
99                 final InputStream is = zip_file.getInputStream( zip_file_entry );
100                 reader = new InputStreamReader( is, ForesterConstants.UTF_8 );
101                 break;
102             }
103         }
104         try {
105             zip_file.close();
106         }
107         catch ( final Exception e ) {
108             // Ignore
109         }
110         return reader;
111     }
112
113     private String getSchemaLocation() {
114         return _schema_location;
115     }
116
117     private Object getSource() {
118         return _source;
119     }
120
121     public int getWarningCount() {
122         return _warning_count;
123     }
124
125     public StringBuffer getWarningMessages() {
126         return _warning_messages;
127     }
128
129     private void init() {
130         setZippedInputstream( false );
131     }
132
133     public boolean isValid() {
134         return _valid;
135     }
136
137     private boolean isZippedInputstream() {
138         return _zipped_inputstream;
139     }
140
141     @Override
142     public Phylogeny[] parse() throws IOException, PhylogenyParserException {
143         reset();
144         final PhyloXmlHandler handler = new PhyloXmlHandler();
145         final SAXParserFactory factory = SAXParserFactory.newInstance();
146         factory.setNamespaceAware( true );
147         try {
148             if ( !ForesterUtil.isEmpty( getSchemaLocation() ) ) {
149                 factory.setFeature( SAX_FEATURES_VALIDATION, true );
150                 factory.setFeature( APACHE_FEATURES_VALIDATION_SCHEMA, true );
151                 factory.setFeature( APACHE_FEATURES_VALIDATION_SCHEMA_FULL, true );
152             }
153         }
154         catch ( final SAXNotRecognizedException e ) {
155             e.printStackTrace();
156             throw new PhylogenyParserException( "sax not recognized exception: " + e.getLocalizedMessage() );
157         }
158         catch ( final SAXNotSupportedException e ) {
159             e.printStackTrace();
160             throw new PhylogenyParserException( "sax not supported exception: " + e.getLocalizedMessage() );
161         }
162         catch ( final ParserConfigurationException e ) {
163             e.printStackTrace();
164             throw new PhylogenyParserException( "parser configuration exception: " + e.getLocalizedMessage() );
165         }
166         catch ( final Exception e ) {
167             e.printStackTrace();
168             throw new PhylogenyParserException( "error while configuring sax parser: " + e.getLocalizedMessage() );
169         }
170         try {
171             final SAXParser parser = factory.newSAXParser();
172             if ( !ForesterUtil.isEmpty( getSchemaLocation() ) ) {
173                 parser.setProperty( JAXP_SCHEMA_LANGUAGE, W3C_XML_SCHEMA );
174                 parser.setProperty( JAXP_SCHEMA_SOURCE, getSchemaLocation() );
175                 parser.setProperty( APACHE_PROPERTIES_SCHEMA_EXTERNAL_LOCATION, getSchemaLocation() );
176             }
177             final XMLReader xml_reader = parser.getXMLReader();
178             xml_reader.setContentHandler( handler );
179             xml_reader.setErrorHandler( new PhyloXmlParserErrorHandler() );
180             long start_time = 0;
181             if ( TIME ) {
182                 start_time = new Date().getTime();
183             }
184             if ( getSource() instanceof File ) {
185                 if ( !getSource().toString().toLowerCase().endsWith( ".zip" ) ) {
186                     final InputStream is = new FileInputStream( (File) getSource() );
187                     final InputStreamReader isr = new InputStreamReader( is, ForesterConstants.UTF_8 );
188                     xml_reader.parse( new InputSource( new BufferedReader( isr ) ) );
189                 }
190                 else {
191                     final Reader reader = getReaderFromZipFile();
192                     if ( reader == null ) {
193                         throw new PhylogenyParserException( "zip file \"" + getSource()
194                                                             + "\" appears not to contain any entries" );
195                     }
196                     xml_reader.parse( new InputSource( new BufferedReader( reader ) ) );
197                 }
198             }
199             else if ( getSource() instanceof InputSource ) {
200                 final InputSource is = ( InputSource ) getSource();
201                 is.setEncoding( ForesterConstants.UTF_8 );
202                 xml_reader.parse( is );
203             }
204             else if ( getSource() instanceof InputStream ) {
205                 if ( !isZippedInputstream() ) {
206                     final InputStream is = ( InputStream ) getSource();
207                     xml_reader.parse( new InputSource( new BufferedReader( new InputStreamReader( is, ForesterConstants.UTF_8 ) ) ) );
208                 }
209                 else {
210                     final ZipInputStream zip_is = new ZipInputStream( ( InputStream ) getSource() );
211                     zip_is.getNextEntry();
212                     xml_reader.parse( new InputSource( new BufferedReader( new InputStreamReader( zip_is, ForesterConstants.UTF_8 ) ) ) );
213                 }
214             }
215             else if ( getSource() instanceof StringBuffer ) {
216                 final StringReader string_reader = new StringReader( getSource().toString() );
217                 xml_reader.parse( new InputSource( string_reader ) );
218             }
219             else {
220                 throw new PhylogenyParserException( "phyloXML parser: attempt to parse object of unsupported type: \""
221                         + getSource().getClass() + "\"" );
222             }
223             if ( TIME ) {
224                 System.out.println( "[TIME] phyloXML parsing: " + ( new Date().getTime() - start_time ) + "ms." );
225             }
226         }
227         catch ( final SAXException sax_exception ) {
228             throw new PhylogenyParserException( "failed to parse [" + getSource() + "]: "
229                     + sax_exception.getLocalizedMessage() );
230         }
231         catch ( final ParserConfigurationException parser_config_exception ) {
232             throw new PhylogenyParserException( "failed to parse [" + getSource()
233                                                 + "]. Problem with XML parser configuration: " + parser_config_exception.getLocalizedMessage() );
234         }
235         catch ( final IOException e ) {
236             throw new PhylogenyParserException( "problem with input source: " + e.getLocalizedMessage() );
237         }
238         catch ( final Exception e ) {
239             throw new PhylogenyParserException( e.getLocalizedMessage() );
240         }
241         catch ( final Error err ) {
242             err.printStackTrace();
243             throw new PhylogenyParserException( "severe error: " + err.getLocalizedMessage() );
244         }
245         final Phylogeny[] ps = new Phylogeny[ handler.getPhylogenies().size() ];
246         int i = 0;
247         for( final Phylogeny phylogeny : handler.getPhylogenies() ) {
248             ps[ i++ ] = phylogeny;
249         }
250         return ps;
251     }
252
253     private void reset() {
254         _valid = true;
255         _error_count = 0;
256         _warning_count = 0;
257         _error_messages = new StringBuffer();
258         _warning_messages = new StringBuffer();
259     }
260
261     @Override
262     public void setSource( final Object source ) {
263         _source = source;
264     }
265
266     public void setValidateAgainstSchema( final String schema_location ) {
267         _schema_location = schema_location;
268     }
269
270     public void setZippedInputstream( final boolean zipped_inputstream ) {
271         _zipped_inputstream = zipped_inputstream;
272     }
273
274     public static PhyloXmlParser createPhyloXmlParserXsdValidating() {
275         final PhyloXmlParser xml_parser = new PhyloXmlParser();
276         final ClassLoader cl = PhyloXmlParser.class.getClassLoader();
277         final URL xsd_url = cl.getResource( ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE );
278         if ( xsd_url != null ) {
279             xml_parser.setValidateAgainstSchema( xsd_url.toString() );
280         }
281         else {
282             throw new RuntimeException( "failed to get URL for phyloXML XSD from jar file from ["
283                     + ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE + "]" );
284         }
285         return xml_parser;
286     }
287
288     public static PhyloXmlParser createPhyloXmlParser() {
289         final PhyloXmlParser xml_parser = new PhyloXmlParser();
290         return xml_parser;
291     }
292
293     private class PhyloXmlParserErrorHandler extends DefaultHandler {
294
295         @Override
296         public void error( final SAXParseException e ) {
297             ++_error_count;
298             _valid = false;
299             throw new PhyloXmlException( "phyloXML error at line " + e.getLineNumber() + ": \n"
300                     + e.getLocalizedMessage() );
301         }
302
303         @Override
304         public void fatalError( final SAXParseException e ) {
305             ++_error_count;
306             _valid = false;
307             throw new PhyloXmlException( "fatal XML error at line " + e.getLineNumber() + ": \n"
308                     + e.getLocalizedMessage() );
309         }
310
311         @Override
312         public void warning( final SAXParseException e ) {
313             ++_warning_count;
314             if ( _error_messages.length() > 1 ) {
315                 _error_messages.append( ForesterUtil.LINE_SEPARATOR );
316             }
317             _warning_messages.append( "[line: " + e.getLineNumber() + "] " + e.getMessage() );
318         }
319     }
320
321     @Override
322     public String getName() {
323         return "phyloXML Parser";
324     }
325 }