2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
26 package org.forester.io.parsers.phyloxml;
29 import java.io.FileReader;
30 import java.io.IOException;
31 import java.io.InputStream;
32 import java.io.InputStreamReader;
33 import java.io.Reader;
34 import java.io.StringReader;
36 import java.util.Date;
37 import java.util.Enumeration;
38 import java.util.zip.ZipEntry;
39 import java.util.zip.ZipFile;
40 import java.util.zip.ZipInputStream;
42 import javax.xml.parsers.ParserConfigurationException;
43 import javax.xml.parsers.SAXParser;
44 import javax.xml.parsers.SAXParserFactory;
46 import org.forester.io.parsers.PhylogenyParser;
47 import org.forester.io.parsers.util.PhylogenyParserException;
48 import org.forester.phylogeny.Phylogeny;
49 import org.forester.util.ForesterConstants;
50 import org.forester.util.ForesterUtil;
51 import org.xml.sax.InputSource;
52 import org.xml.sax.SAXException;
53 import org.xml.sax.SAXNotRecognizedException;
54 import org.xml.sax.SAXNotSupportedException;
55 import org.xml.sax.SAXParseException;
56 import org.xml.sax.XMLReader;
57 import org.xml.sax.helpers.DefaultHandler;
59 public class PhyloXmlParser implements PhylogenyParser {
61 final public static String JAXP_SCHEMA_LANGUAGE = "http://java.sun.com/xml/jaxp/properties/schemaLanguage";
62 final public static String W3C_XML_SCHEMA = "http://www.w3.org/2001/XMLSchema";
63 final public static String JAXP_SCHEMA_SOURCE = "http://java.sun.com/xml/jaxp/properties/schemaSource";
64 final public static String SAX_FEATURES_VALIDATION = "http://xml.org/sax/features/validation";
65 final public static String APACHE_FEATURES_VALIDATION_SCHEMA = "http://apache.org/xml/features/validation/schema";
66 final public static String APACHE_FEATURES_VALIDATION_SCHEMA_FULL = "http://apache.org/xml/features/validation/schema-full-checking";
67 final public static String APACHE_PROPERTIES_SCHEMA_EXTERNAL_LOCATION = "http://apache.org/xml/properties/schema/external-schemaLocation";
68 final static private boolean TIME = false;
69 private Object _source;
70 private boolean _valid;
71 private boolean _zipped_inputstream;
72 private int _error_count;
73 private int _warning_count;
74 private String _schema_location;
75 private StringBuffer _error_messages;
76 private StringBuffer _warning_messages;
78 private PhyloXmlParser() {
83 public int getErrorCount() {
87 public StringBuffer getErrorMessages() {
88 return _error_messages;
91 private Reader getReaderFromZipFile() throws IOException {
93 final ZipFile zip_file = new ZipFile( getSource().toString() );
94 final Enumeration<?> zip_file_entries = zip_file.entries();
95 while ( zip_file_entries.hasMoreElements() ) {
96 final ZipEntry zip_file_entry = ( ZipEntry ) zip_file_entries.nextElement();
97 if ( !zip_file_entry.isDirectory() && ( zip_file_entry.getSize() > 0 ) ) {
98 final InputStream is = zip_file.getInputStream( zip_file_entry );
99 reader = new InputStreamReader( is );
106 private String getSchemaLocation() {
107 return _schema_location;
110 private Object getSource() {
114 public int getWarningCount() {
115 return _warning_count;
118 public StringBuffer getWarningMessages() {
119 return _warning_messages;
122 private void init() {
123 setZippedInputstream( false );
126 public boolean isValid() {
130 private boolean isZippedInputstream() {
131 return _zipped_inputstream;
135 public Phylogeny[] parse() throws IOException, PhylogenyParserException {
137 final PhyloXmlHandler handler = new PhyloXmlHandler();
138 final SAXParserFactory factory = SAXParserFactory.newInstance();
139 factory.setNamespaceAware( true );
141 if ( !ForesterUtil.isEmpty( getSchemaLocation() ) ) {
142 factory.setFeature( SAX_FEATURES_VALIDATION, true );
143 factory.setFeature( APACHE_FEATURES_VALIDATION_SCHEMA, true );
144 factory.setFeature( APACHE_FEATURES_VALIDATION_SCHEMA_FULL, true );
147 catch ( final SAXNotRecognizedException e ) {
149 throw new PhylogenyParserException( "sax not recognized exception: " + e.getLocalizedMessage() );
151 catch ( final SAXNotSupportedException e ) {
153 throw new PhylogenyParserException( "sax not supported exception: " + e.getLocalizedMessage() );
155 catch ( final ParserConfigurationException e ) {
157 throw new PhylogenyParserException( "parser configuration exception: " + e.getLocalizedMessage() );
159 catch ( final Exception e ) {
161 throw new PhylogenyParserException( "error while configuring sax parser: " + e.getLocalizedMessage() );
164 final SAXParser parser = factory.newSAXParser();
165 if ( !ForesterUtil.isEmpty( getSchemaLocation() ) ) {
166 parser.setProperty( JAXP_SCHEMA_LANGUAGE, W3C_XML_SCHEMA );
167 parser.setProperty( JAXP_SCHEMA_SOURCE, getSchemaLocation() );
168 parser.setProperty( APACHE_PROPERTIES_SCHEMA_EXTERNAL_LOCATION, getSchemaLocation() );
170 final XMLReader xml_reader = parser.getXMLReader();
171 xml_reader.setContentHandler( handler );
172 xml_reader.setErrorHandler( new PhyloXmlParserErrorHandler() );
175 start_time = new Date().getTime();
177 if ( getSource() instanceof File ) {
178 if ( !getSource().toString().toLowerCase().endsWith( ".zip" ) ) {
179 xml_reader.parse( new InputSource( new FileReader( ( File ) getSource() ) ) );
182 final Reader reader = getReaderFromZipFile();
183 if ( reader == null ) {
184 throw new PhylogenyParserException( "zip file \"" + getSource()
185 + "\" appears not to contain any entries" );
187 xml_reader.parse( new InputSource( reader ) );
190 else if ( getSource() instanceof InputSource ) {
191 xml_reader.parse( ( InputSource ) getSource() );
193 else if ( getSource() instanceof InputStream ) {
194 if ( !isZippedInputstream() ) {
195 final InputStream is = ( InputStream ) getSource();
196 xml_reader.parse( new InputSource( new InputStreamReader( is ) ) );
199 final ZipInputStream zip_is = new ZipInputStream( ( InputStream ) getSource() );
200 zip_is.getNextEntry();
201 xml_reader.parse( new InputSource( new InputStreamReader( zip_is ) ) );
204 else if ( getSource() instanceof String ) {
205 final File file = new File( getSource().toString() );
206 final Reader reader = new FileReader( file );
207 xml_reader.parse( new InputSource( reader ) );
209 else if ( getSource() instanceof StringBuffer ) {
210 final StringReader string_reader = new StringReader( getSource().toString() );
211 xml_reader.parse( new InputSource( string_reader ) );
214 throw new PhylogenyParserException( "phyloXML parser: attempt to parse object of unsupported type: \""
215 + getSource().getClass() + "\"" );
218 System.out.println( "[TIME] phyloXML parsing: " + ( new Date().getTime() - start_time ) + "ms." );
221 catch ( final SAXException sax_exception ) {
222 throw new PhylogenyParserException( "failed to parse [" + getSource() + "]: "
223 + sax_exception.getLocalizedMessage() );
225 catch ( final ParserConfigurationException parser_config_exception ) {
226 throw new PhylogenyParserException( "failed to parse [" + getSource()
227 + "]. Problem with XML parser configuration: " + parser_config_exception.getLocalizedMessage() );
229 catch ( final IOException e ) {
230 throw new PhylogenyParserException( "problem with input source: " + e.getLocalizedMessage() );
232 catch ( final Exception e ) {
233 throw new PhylogenyParserException( e.getLocalizedMessage() );
235 catch ( final Error err ) {
236 err.printStackTrace();
237 throw new PhylogenyParserException( "severe error: " + err.getLocalizedMessage() );
239 final Phylogeny[] ps = new Phylogeny[ handler.getPhylogenies().size() ];
241 for( final Phylogeny phylogeny : handler.getPhylogenies() ) {
242 ps[ i++ ] = phylogeny;
247 private void reset() {
251 _error_messages = new StringBuffer();
252 _warning_messages = new StringBuffer();
256 public void setSource( final Object source ) {
260 public void setValidateAgainstSchema( final String schema_location ) {
261 _schema_location = schema_location;
264 public void setZippedInputstream( final boolean zipped_inputstream ) {
265 _zipped_inputstream = zipped_inputstream;
268 public static PhyloXmlParser createPhyloXmlParserXsdValidating() {
269 final PhyloXmlParser xml_parser = new PhyloXmlParser();
270 final ClassLoader cl = PhyloXmlParser.class.getClassLoader();
271 final URL xsd_url = cl.getResource( ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE );
272 if ( xsd_url != null ) {
273 xml_parser.setValidateAgainstSchema( xsd_url.toString() );
276 throw new RuntimeException( "failed to get URL for phyloXML XSD from jar file from ["
277 + ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE + "]" );
282 public static PhyloXmlParser createPhyloXmlParser() {
283 final PhyloXmlParser xml_parser = new PhyloXmlParser();
287 private class PhyloXmlParserErrorHandler extends DefaultHandler {
290 public void error( final SAXParseException e ) {
293 throw new PhyloXmlException( "phyloXML error at line " + e.getLineNumber() + ": \n"
294 + e.getLocalizedMessage() );
298 public void fatalError( final SAXParseException e ) {
301 throw new PhyloXmlException( "fatal XML error at line " + e.getLineNumber() + ": \n"
302 + e.getLocalizedMessage() );
306 public void warning( final SAXParseException e ) {
308 if ( _error_messages.length() > 1 ) {
309 _error_messages.append( ForesterUtil.LINE_SEPARATOR );
311 _warning_messages.append( "[line: " + e.getLineNumber() + "] " + e.getMessage() );
316 public String getName() {
317 return "phyloXML Parser";