From: cmzmasek Date: Thu, 23 Jun 2016 22:59:55 +0000 (-0700) Subject: fixed issue with UTF8 encoding. X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=b0fd64a5d558885627ba7a7295d9ef612fe76cd0;p=jalview.git fixed issue with UTF8 encoding. --- diff --git a/forester/aptx/aptx_configuration_files/_aptx_configuration_file b/forester/aptx/aptx_configuration_files/_aptx_configuration_file index f0d78eb..1ffa5d8 100644 --- a/forester/aptx/aptx_configuration_files/_aptx_configuration_file +++ b/forester/aptx/aptx_configuration_files/_aptx_configuration_file @@ -17,7 +17,7 @@ native_ui: ? # value for bootstrap support) # # Font family name: 'font_family': -# Example: 'font_family: Arial,Calibri,Helvetica' +# Example: 'font_family: Sans,Helvetica' # It is advisable to use more than one value for font_family (in # decreasing order of preference). Font family names have to be # comma separated (no spaces). Spaces in font names have to be @@ -162,7 +162,7 @@ native_ui: ? min_confidence_value: 0.0 -font_family: Arial,Helvetica,Verdana,Tahoma,Dialog,Lucida-Sans,SansSerif,Sans-serif,Sans +font_family: SansSerif,Sans-serif,Sans,Dialog,Tahoma,Helvetica,Arial,Verdana font_size: 10 font_size_min: 2 font_size_max: 20 diff --git a/forester/java/src/org/forester/archaeopteryx/AptxConstants.java b/forester/java/src/org/forester/archaeopteryx/AptxConstants.java index 7d660f4..3a608f4 100644 --- a/forester/java/src/org/forester/archaeopteryx/AptxConstants.java +++ b/forester/java/src/org/forester/archaeopteryx/AptxConstants.java @@ -44,8 +44,8 @@ public final class AptxConstants { final static String VERSION = "0.9909 experimental"; final static String PRG_DATE = "160622"; final static String DEFAULT_CONFIGURATION_FILE_NAME = "_aptx_configuration_file"; - final static String[] DEFAULT_FONT_CHOICES = { "Arial", "Helvetica", - "Verdana", "Tahoma", "Dialog", "Lucida Sans", "SansSerif", "Sans-serif", "Sans" }; + final static String[] DEFAULT_FONT_CHOICES = { + "SansSerif", "Sans-serif", "Sans", "Dialog", "Lucida Sans", "Tahoma", "Helvetica", "Arial", "Verdana" }; final static boolean VERBOSE_DEFAULT = false; final static int DOMAIN_STRUCTURE_DEFAULT_WIDTH = 100; final static String AUTHOR_EMAIL = "phyloxml@gmail.com"; diff --git a/forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java b/forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java index 8b4fbdc..8d37609 100644 --- a/forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java +++ b/forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java @@ -1858,7 +1858,7 @@ public final class MainFrameApplication extends MainFrame { _options_jmenu.add( _print_size_mi = new JMenuItem( "" ) ); _options_jmenu.add( _choose_pdf_width_mi = new JMenuItem( "" ) ); _options_jmenu.addSeparator(); - _options_jmenu.add( customizeMenuItemAsLabel( new JMenuItem( "Newick/NHX/Nexus Input:" ), getConfiguration() ) ); + _options_jmenu.add( customizeMenuItemAsLabel( new JMenuItem( "Newick/NHX/Nexus Read:" ), getConfiguration() ) ); _options_jmenu .add( _internal_number_are_confidence_for_nh_parsing_cbmi = new JCheckBoxMenuItem( "Internal Node Names are Confidence Values" ) ); _options_jmenu.add( _replace_underscores_cbmi = new JCheckBoxMenuItem( "Replace Underscores with Spaces" ) ); @@ -1882,7 +1882,7 @@ public final class MainFrameApplication extends MainFrame { _radio_group_2.add( _extract_taxonomy_pfam_strict_rbmi ); _radio_group_2.add( _extract_taxonomy_pfam_relaxed_rbmi ); _radio_group_2.add( _extract_taxonomy_agressive_rbmi ); - _options_jmenu.add( customizeMenuItemAsLabel( new JMenuItem( "Newick/Nexus Output:" ), getConfiguration() ) ); + _options_jmenu.add( customizeMenuItemAsLabel( new JMenuItem( "Newick/Nexus Save:" ), getConfiguration() ) ); _options_jmenu .add( _use_brackets_for_conf_in_nh_export_cbmi = new JCheckBoxMenuItem( USE_BRACKETS_FOR_CONF_IN_NH_LABEL ) ); _use_brackets_for_conf_in_nh_export_cbmi diff --git a/forester/java/src/org/forester/io/parsers/nexus/NexusBinaryStatesMatrixParser.java b/forester/java/src/org/forester/io/parsers/nexus/NexusBinaryStatesMatrixParser.java index 721bcc6..e110ec1 100644 --- a/forester/java/src/org/forester/io/parsers/nexus/NexusBinaryStatesMatrixParser.java +++ b/forester/java/src/org/forester/io/parsers/nexus/NexusBinaryStatesMatrixParser.java @@ -38,6 +38,7 @@ import org.forester.io.parsers.util.PhylogenyParserException; public class NexusBinaryStatesMatrixParser { + private static final String UTF_8 = "UTF-8"; private Object _nexus_source; private CharacterStateMatrix _matrix; private int _nchar; @@ -61,7 +62,7 @@ public class NexusBinaryStatesMatrixParser { public void parse() throws IOException { reset(); - final BufferedReader reader = ParserUtils.createReader( getNexusSource() ); + final BufferedReader reader = ParserUtils.createReader( getNexusSource(), UTF_8 ); String line; boolean in_matrix = false; int identifier_index = 0; diff --git a/forester/java/src/org/forester/io/parsers/nexus/NexusCharactersParser.java b/forester/java/src/org/forester/io/parsers/nexus/NexusCharactersParser.java index 7962717..3407441 100644 --- a/forester/java/src/org/forester/io/parsers/nexus/NexusCharactersParser.java +++ b/forester/java/src/org/forester/io/parsers/nexus/NexusCharactersParser.java @@ -37,6 +37,7 @@ import org.forester.util.ForesterUtil; public class NexusCharactersParser { + private static final String UTF_8 = "UTF-8"; final private static String charstatelabels = NexusConstants.CHARSTATELABELS.toLowerCase(); private Object _nexus_source; private String[] _char_state_labels; @@ -51,7 +52,7 @@ public class NexusCharactersParser { public void parse() throws IOException { reset(); - final BufferedReader reader = ParserUtils.createReader( getNexusSource() ); + final BufferedReader reader = ParserUtils.createReader( getNexusSource(), UTF_8 ); String line; boolean in_charstatelabels = false; final List labels_list = new ArrayList(); diff --git a/forester/java/src/org/forester/io/parsers/nexus/NexusPhylogeniesParser.java b/forester/java/src/org/forester/io/parsers/nexus/NexusPhylogeniesParser.java index 0f51cd7..d4eabfd 100644 --- a/forester/java/src/org/forester/io/parsers/nexus/NexusPhylogeniesParser.java +++ b/forester/java/src/org/forester/io/parsers/nexus/NexusPhylogeniesParser.java @@ -53,6 +53,8 @@ import org.forester.util.ForesterUtil; public final class NexusPhylogeniesParser implements IteratingPhylogenyParser, PhylogenyParser { + private static final String UTF_8 = "UTF-8"; + final private static boolean DEBUG = false; final private static String begin_trees = NexusConstants.BEGIN_TREES.toLowerCase(); @@ -144,7 +146,7 @@ public final class NexusPhylogeniesParser implements IteratingPhylogenyParser, P _rooted_info_present = false; _is_rooted = false; _seqs = new HashMap(); - _br = ParserUtils.createReader( _nexus_source ); + _br = ParserUtils.createReader( _nexus_source, UTF_8 ); getNext(); } @@ -182,7 +184,7 @@ public final class NexusPhylogeniesParser implements IteratingPhylogenyParser, P if ( rooted_info_present ) { pars.setGuessRootedness( false ); } - pars.setSource( nhx ); + pars.setSource( nhx.toString() ); final Phylogeny p = pars.next(); if ( p == null ) { throw new PhylogenyParserException( "failed to create phylogeny" ); diff --git a/forester/java/src/org/forester/io/parsers/nexus/PaupLogParser.java b/forester/java/src/org/forester/io/parsers/nexus/PaupLogParser.java deleted file mode 100644 index a1ecfcd..0000000 --- a/forester/java/src/org/forester/io/parsers/nexus/PaupLogParser.java +++ /dev/null @@ -1,128 +0,0 @@ -// $Id: -// -// FORESTER -- software libraries and applications -// for evolutionary biology research and applications. -// -// Copyright (C) 2008-2009 Christian M. Zmasek -// Copyright (C) 2008-2009 Burnham Institute for Medical Research -// All rights reserved -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA -// -// Contact: phylosoft @ gmail . com -// WWW: www.phylosoft.org/ - -package org.forester.io.parsers.nexus; - -import java.io.BufferedReader; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import org.forester.evoinference.matrix.character.BasicCharacterStateMatrix; -import org.forester.evoinference.matrix.character.CharacterStateMatrix; -import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates; -import org.forester.io.parsers.util.ParserUtils; -import org.forester.io.parsers.util.PhylogenyParserException; - -public class PaupLogParser { - - private static final String DATA_MATRIX_AND_RECONSTRUCTED_STATES_FOR_INTERNAL_NODES = "data matrix and reconstructed states for internal nodes"; - private Object _nexus_source; - - private Object getNexusSource() { - return _nexus_source; - } - - public CharacterStateMatrix parse() throws IOException { - final BufferedReader reader = ParserUtils.createReader( getNexusSource() ); - String line; - boolean saw_line = false; - int identifier_index = 0; - boolean first_block = true; - boolean saw_data_matrix_line = false; - final List identifiers = new ArrayList(); - final List> states = new ArrayList>(); - boolean done = false; - while ( ( ( line = reader.readLine() ) != null ) && !done ) { - line = line.trim(); - if ( ( line.length() > 0 ) && !line.startsWith( "#" ) && !line.startsWith( ">" ) ) { - if ( ( ( identifier_index > 0 ) && line.startsWith( "Tree " ) ) - || line.startsWith( "Character change list" ) ) { - done = true; - continue; - } - if ( line.toLowerCase().startsWith( DATA_MATRIX_AND_RECONSTRUCTED_STATES_FOR_INTERNAL_NODES ) ) { - saw_line = false; - saw_data_matrix_line = true; - identifier_index = 0; - if ( first_block && ( line.indexOf( "continued" ) > 0 ) ) { - first_block = false; - } - } - if ( saw_data_matrix_line && line.startsWith( "----------" ) ) { - saw_line = true; - } - else if ( saw_line && ( line.indexOf( ' ' ) > 0 ) ) { - final String[] s = line.split( "\\s+" ); - if ( s.length != 2 ) { - throw new NexusFormatException( "unexpected format at line: " + line ); - } - final String identifier = s[ 0 ]; - final String row = s[ 1 ]; - if ( first_block ) { - if ( identifiers.contains( identifier ) ) { - throw new NexusFormatException( "identifier [" + identifier + "] is not unique in line: " - + line ); - } - identifiers.add( identifier ); - states.add( new ArrayList() ); - } - else { - if ( !identifiers.contains( identifier ) ) { - throw new NexusFormatException( "new identifier [" + identifier + "] at line: " + line ); - } - } - for( int c = 0; c < row.length(); ++c ) { - final char ch = row.charAt( c ); - if ( ch == '0' ) { - states.get( identifier_index ).add( BinaryStates.ABSENT ); - } - else if ( ch == '1' ) { - states.get( identifier_index ).add( BinaryStates.PRESENT ); - } - else { - throw new NexusFormatException( "unknown character state [" + ch + "] at line: " + line ); - } - } - ++identifier_index; - } - } - } - final CharacterStateMatrix matrix = new BasicCharacterStateMatrix( states ); - int i = 0; - for( final String identifier : identifiers ) { - matrix.setIdentifier( i++, identifier ); - } - return matrix; - } - - public void setSource( final Object nexus_source ) throws PhylogenyParserException, IOException { - if ( nexus_source == null ) { - throw new PhylogenyParserException( getClass() + ": attempt to parse null object." ); - } - _nexus_source = nexus_source; - } -} diff --git a/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java b/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java index 6989602..04f5f85 100644 --- a/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java +++ b/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java @@ -24,8 +24,9 @@ package org.forester.io.parsers.nhx; import java.io.BufferedReader; +import java.io.ByteArrayInputStream; import java.io.File; -import java.io.FileReader; +import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; @@ -67,10 +68,11 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse final static private boolean GUESS_IF_SUPPORT_VALUES = true; final static private boolean GUESS_ROOTEDNESS_DEFAULT = true; final static private boolean IGNORE_QUOTES_DEFAULT = false; - final static private byte STRING = 0; - final static private byte STRING_BUFFER = 1; - final static private byte STRING_BUILDER = 4; + final static private char BELL = 7; + public final static String UTF_8 = "UTF-8"; + public final static String ISO_8859_1 = "ISO-8859-1"; + private final static String ENCODING_DEFAULT = UTF_8; private boolean _allow_errors_in_distance_to_parent; private int _clade_level; private StringBuilder _current_anotation; @@ -86,9 +88,6 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse private byte _input_type; private BufferedReader _my_source_br = null; private char[] _my_source_charary = null; - private StringBuffer _my_source_sbuff = null; - private StringBuilder _my_source_sbuil = null; - private String _my_source_str = null; private Phylogeny _next; private Object _nhx_source; private boolean _replace_underscores; @@ -98,8 +97,15 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse private Object _source; private int _source_length; private TAXONOMY_EXTRACTION _taxonomy_extraction; + private final String _encoding; public NHXParser() { + _encoding = ENCODING_DEFAULT; + init(); + } + + public NHXParser( final String encoding ) { + _encoding = encoding; init(); } @@ -127,10 +133,8 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse @Override public final Phylogeny[] parse() throws IOException { final List l = new ArrayList(); - //int c = 0; while ( hasNext() ) { l.add( next() ); - // c++; } final Phylogeny[] p = new Phylogeny[ l.size() ]; for( int i = 0; i < l.size(); ++i ) { @@ -154,24 +158,9 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse _current_anotation = new StringBuilder(); _current_phylogeny = null; _current_node = null; - _my_source_str = null; - _my_source_sbuff = null; - _my_source_sbuil = null; _my_source_charary = null; determineAndProcessSourceType( _source ); switch ( _input_type ) { - case STRING: - _my_source_br = null; - _my_source_str = ( String ) _nhx_source; - break; - case STRING_BUFFER: - _my_source_br = null; - _my_source_sbuff = ( StringBuffer ) _nhx_source; - break; - case STRING_BUILDER: - _my_source_br = null; - _my_source_sbuil = ( StringBuilder ) _nhx_source; - break; case CHAR_ARRAY: _my_source_br = null; _my_source_charary = ( char[] ) _nhx_source; @@ -216,24 +205,12 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse throw new PhylogenyParserException( getClass() + ": attempt to parse null object." ); } else if ( nhx_source instanceof String ) { - _input_type = NHXParser.STRING; - _source_length = ( ( String ) nhx_source ).length(); - _nhx_source = nhx_source; - } - else if ( nhx_source instanceof StringBuilder ) { - _input_type = NHXParser.STRING_BUILDER; - _source_length = ( ( StringBuilder ) nhx_source ).length(); - _nhx_source = nhx_source; - } - else if ( nhx_source instanceof StringBuffer ) { - _input_type = NHXParser.STRING_BUFFER; - _source_length = ( ( StringBuffer ) nhx_source ).length(); - _nhx_source = nhx_source; - } - else if ( nhx_source instanceof StringBuilder ) { - _input_type = NHXParser.STRING_BUILDER; - _source_length = ( ( StringBuilder ) nhx_source ).length(); _nhx_source = nhx_source; + _input_type = NHXParser.BUFFERED_READER; + _source_length = 0; + InputStream is = new ByteArrayInputStream( (( String ) nhx_source ).getBytes(getEncoding())); + final InputStreamReader isr = new InputStreamReader( is, getEncoding() ); + _nhx_source = new BufferedReader( isr ); } else if ( nhx_source instanceof char[] ) { _input_type = NHXParser.CHAR_ARRAY; @@ -256,7 +233,9 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse if ( !ForesterUtil.isEmpty( error ) ) { throw new PhylogenyParserException( error ); } - _nhx_source = new BufferedReader( new FileReader( f ) ); + final InputStream is = new FileInputStream( f ); + final InputStreamReader isr = new InputStreamReader( is, getEncoding() ); + _nhx_source = new BufferedReader( isr ); } else if ( nhx_source instanceof URL ) { _input_type = NHXParser.BUFFERED_READER; @@ -269,7 +248,8 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse // catch ( final IOException e ) { // } } - final InputStreamReader isr = new InputStreamReader( ( ( URL ) nhx_source ).openStream() ); + final InputStream is = ( ( URL ) nhx_source ).openStream(); + final InputStreamReader isr = new InputStreamReader( is, getEncoding() ); _nhx_source = new BufferedReader( isr ); } else if ( nhx_source instanceof InputStream ) { @@ -283,12 +263,13 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse // catch ( final IOException e ) { // } } - final InputStreamReader isr = new InputStreamReader( ( InputStream ) nhx_source ); - _nhx_source = new BufferedReader( isr ); + final InputStream is = ( InputStream ) nhx_source; + final InputStreamReader isr = new InputStreamReader( is, getEncoding() ); + _nhx_source = new BufferedReader( isr ); } else { throw new IllegalArgumentException( getClass() + " can only parse objects of type String," - + " StringBuffer, StringBuilder, char[], File, InputStream, or URL " + + " char[], File, InputStream, or URL " + " [attempt to parse object of " + nhx_source.getClass() + "]." ); } } @@ -376,22 +357,7 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse if ( _i >= _source_length ) { break; } - else { - switch ( _input_type ) { - case STRING: - c = _my_source_str.charAt( _i ); - break; - case STRING_BUFFER: - c = _my_source_sbuff.charAt( _i ); - break; - case STRING_BUILDER: - c = _my_source_sbuil.charAt( _i ); - break; - case CHAR_ARRAY: - c = _my_source_charary[ _i ]; - break; - } - } + c = _my_source_charary[ _i ]; } if ( !_in_single_quote && !_in_double_quote ) { if ( c == ':' ) { @@ -407,7 +373,7 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse } // \n\t is always ignored, // "=34 '=39 space=32 - if ( ( c < 32 ) || ( c > 126 ) || ( isIgnoreQuotes() && ( ( c == 32 ) || ( c == 34 ) || ( c == 39 ) ) ) + if ( ( c < 32 ) || ( c == 127 ) || ( isIgnoreQuotes() && ( ( c == 32 ) || ( c == 34 ) || ( c == 39 ) ) ) || ( ( c == 32 ) && ( !_in_single_quote && !_in_double_quote ) ) || ( ( _clade_level == 0 ) && ( c == ';' ) && ( !_in_single_quote && !_in_double_quote ) ) ) { //do nothing @@ -464,7 +430,6 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse final Phylogeny phy = processOpenParen(); if ( phy != null ) { ++_i; - // return phy; _next = phy; return; } @@ -801,6 +766,10 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse } } + public String getEncoding() { + return _encoding; + } + public static enum TAXONOMY_EXTRACTION { AGGRESSIVE, NO, PFAM_STYLE_RELAXED, PFAM_STYLE_STRICT; } diff --git a/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlParser.java b/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlParser.java index 4779189..2b46c0f 100644 --- a/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlParser.java +++ b/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlParser.java @@ -25,8 +25,9 @@ package org.forester.io.parsers.phyloxml; +import java.io.BufferedReader; import java.io.File; -import java.io.FileReader; +import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; @@ -58,6 +59,7 @@ import org.xml.sax.helpers.DefaultHandler; public class PhyloXmlParser implements PhylogenyParser { + private static final String UTF_8 = "UTF-8"; final public static String JAXP_SCHEMA_LANGUAGE = "http://java.sun.com/xml/jaxp/properties/schemaLanguage"; final public static String W3C_XML_SCHEMA = "http://www.w3.org/2001/XMLSchema"; final public static String JAXP_SCHEMA_SOURCE = "http://java.sun.com/xml/jaxp/properties/schemaSource"; @@ -96,10 +98,16 @@ public class PhyloXmlParser implements PhylogenyParser { final ZipEntry zip_file_entry = ( ZipEntry ) zip_file_entries.nextElement(); if ( !zip_file_entry.isDirectory() && ( zip_file_entry.getSize() > 0 ) ) { final InputStream is = zip_file.getInputStream( zip_file_entry ); - reader = new InputStreamReader( is ); + reader = new InputStreamReader( is, UTF_8 ); break; } } + try { + zip_file.close(); + } + catch ( final Exception e ) { + // Ignore + } return reader; } @@ -176,7 +184,9 @@ public class PhyloXmlParser implements PhylogenyParser { } if ( getSource() instanceof File ) { if ( !getSource().toString().toLowerCase().endsWith( ".zip" ) ) { - xml_reader.parse( new InputSource( new FileReader( ( File ) getSource() ) ) ); + final InputStream is = new FileInputStream( (File) getSource() ); + final InputStreamReader isr = new InputStreamReader( is, UTF_8 ); + xml_reader.parse( new InputSource( new BufferedReader( isr ) ) ); } else { final Reader reader = getReaderFromZipFile(); @@ -184,28 +194,25 @@ public class PhyloXmlParser implements PhylogenyParser { throw new PhylogenyParserException( "zip file \"" + getSource() + "\" appears not to contain any entries" ); } - xml_reader.parse( new InputSource( reader ) ); + xml_reader.parse( new InputSource( new BufferedReader( reader ) ) ); } } else if ( getSource() instanceof InputSource ) { - xml_reader.parse( ( InputSource ) getSource() ); + final InputSource is = ( InputSource ) getSource(); + is.setEncoding( UTF_8 ); + xml_reader.parse( is ); } else if ( getSource() instanceof InputStream ) { if ( !isZippedInputstream() ) { final InputStream is = ( InputStream ) getSource(); - xml_reader.parse( new InputSource( new InputStreamReader( is ) ) ); + xml_reader.parse( new InputSource( new BufferedReader( new InputStreamReader( is, UTF_8 ) ) ) ); } else { final ZipInputStream zip_is = new ZipInputStream( ( InputStream ) getSource() ); zip_is.getNextEntry(); - xml_reader.parse( new InputSource( new InputStreamReader( zip_is ) ) ); + xml_reader.parse( new InputSource( new BufferedReader( new InputStreamReader( zip_is, UTF_8 ) ) ) ); } } - else if ( getSource() instanceof String ) { - final File file = new File( getSource().toString() ); - final Reader reader = new FileReader( file ); - xml_reader.parse( new InputSource( reader ) ); - } else if ( getSource() instanceof StringBuffer ) { final StringReader string_reader = new StringReader( getSource().toString() ); xml_reader.parse( new InputSource( string_reader ) ); diff --git a/forester/java/src/org/forester/io/parsers/tol/TolParser.java b/forester/java/src/org/forester/io/parsers/tol/TolParser.java index 6248f4a..7801c38 100644 --- a/forester/java/src/org/forester/io/parsers/tol/TolParser.java +++ b/forester/java/src/org/forester/io/parsers/tol/TolParser.java @@ -96,6 +96,12 @@ public class TolParser implements PhylogenyParser { break; } } + try { + zip_file.close(); + } + catch ( final Exception e ) { + // Ignore. + } return reader; } diff --git a/forester/java/src/org/forester/io/parsers/util/ParserUtils.java b/forester/java/src/org/forester/io/parsers/util/ParserUtils.java index e86ed49..0e16d86 100644 --- a/forester/java/src/org/forester/io/parsers/util/ParserUtils.java +++ b/forester/java/src/org/forester/io/parsers/util/ParserUtils.java @@ -28,8 +28,8 @@ package org.forester.io.parsers.util; import java.io.BufferedReader; import java.io.File; +import java.io.FileInputStream; import java.io.FileNotFoundException; -import java.io.FileReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; @@ -184,7 +184,7 @@ public final class ParserUtils { return parser; } - public static BufferedReader createReader( final Object source ) throws IOException, FileNotFoundException { + public static BufferedReader createReader( final Object source, final String encoding ) throws IOException, FileNotFoundException { BufferedReader reader = null; if ( ( source instanceof File ) || ( source instanceof String ) ) { File f = null; @@ -203,10 +203,12 @@ public final class ParserUtils { else if ( !f.canRead() ) { throw new IOException( "[" + f.getAbsolutePath() + "] is not a readable" ); } - reader = new BufferedReader( new FileReader( f ) ); + final InputStream is = new FileInputStream( f ); + final InputStreamReader isr = new InputStreamReader( is, encoding ); + reader = new BufferedReader( isr ); } else if ( source instanceof InputStream ) { - reader = new BufferedReader( new InputStreamReader( ( InputStream ) source ) ); + reader = new BufferedReader( new InputStreamReader( ( InputStream ) source, encoding ) ); } else if ( ( source instanceof StringBuffer ) || ( source instanceof StringBuilder ) ) { reader = new BufferedReader( new StringReader( source.toString() ) ); diff --git a/forester/java/src/org/forester/io/writers/PhylogenyWriter.java b/forester/java/src/org/forester/io/writers/PhylogenyWriter.java index 8c40eae..6d3c836 100644 --- a/forester/java/src/org/forester/io/writers/PhylogenyWriter.java +++ b/forester/java/src/org/forester/io/writers/PhylogenyWriter.java @@ -27,7 +27,6 @@ package org.forester.io.writers; import java.io.BufferedWriter; import java.io.File; -import java.io.FileWriter; import java.io.IOException; import java.io.PrintWriter; import java.io.StringWriter; @@ -50,6 +49,7 @@ import org.forester.util.ForesterUtil; public final class PhylogenyWriter { + private static final String UTF_8 = "UTF-8"; public final static boolean INDENT_PHYLOXML_DEAFULT = true; public final static String PHYLO_XML_INTENDATION_BASE = " "; public final static String PHYLO_XML_VERSION_ENCODING_LINE = ""; @@ -399,7 +399,7 @@ public final class PhylogenyWriter { public void toNexus( final File out_file, final Phylogeny tree, final NH_CONVERSION_SUPPORT_VALUE_STYLE svs ) throws IOException { - final Writer writer = new BufferedWriter( new PrintWriter( out_file ) ); + final Writer writer = new BufferedWriter( new PrintWriter( out_file, UTF_8 ) ); final List trees = new ArrayList( 1 ); trees.add( tree ); writeNexusStart( writer ); @@ -426,14 +426,14 @@ public final class PhylogenyWriter { final List trees, final int phyloxml_level, final String separator ) throws IOException { - final Writer writer = new BufferedWriter( new PrintWriter( out_file ) ); + final Writer writer = new BufferedWriter( new PrintWriter( out_file, UTF_8 ) ); toPhyloXML( writer, trees, phyloxml_level, separator ); writer.flush(); writer.close(); } public void toPhyloXML( final File out_file, final Phylogeny tree, final int phyloxml_level ) throws IOException { - final Writer writer = new BufferedWriter( new PrintWriter( out_file ) ); + final Writer writer = new BufferedWriter( new PrintWriter( out_file, UTF_8 ) ); writePhyloXmlStart( writer ); toPhyloXMLNoPhyloXmlSource( writer, tree, phyloxml_level ); writePhyloXmlEnd( writer ); @@ -633,18 +633,8 @@ public final class PhylogenyWriter { if ( out_file.exists() ) { throw new IOException( "attempt to overwrite existing file \"" + out_file.getAbsolutePath() + "\"" ); } - final PrintWriter out = new PrintWriter( new FileWriter( out_file ), true ); - if ( getOutputFormt() == FORMAT.PHYLO_XML ) { - out.print( PHYLO_XML_VERSION_ENCODING_LINE ); - out.print( ForesterUtil.LINE_SEPARATOR ); - out.print( PHYLO_XML_NAMESPACE_LINE ); - out.print( ForesterUtil.LINE_SEPARATOR ); - } + final PrintWriter out = new PrintWriter( out_file, UTF_8 ); out.print( sb ); - if ( getOutputFormt() == FORMAT.PHYLO_XML ) { - out.print( ForesterUtil.LINE_SEPARATOR ); - out.print( PHYLO_XML_END ); - } out.flush(); out.close(); } @@ -758,3 +748,7 @@ public final class PhylogenyWriter { NH, NHX, PHYLO_XML, NEXUS; } } + + + + diff --git a/forester/java/src/org/forester/surfacing/TestSurfacing.java b/forester/java/src/org/forester/surfacing/TestSurfacing.java index 6e1e6dc..72141d2 100644 --- a/forester/java/src/org/forester/surfacing/TestSurfacing.java +++ b/forester/java/src/org/forester/surfacing/TestSurfacing.java @@ -42,7 +42,6 @@ import org.forester.evoinference.matrix.character.CharacterStateMatrix; import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates; import org.forester.evoinference.matrix.character.CharacterStateMatrix.GainLossStates; import org.forester.io.parsers.HmmPfamOutputParser; -import org.forester.io.parsers.nexus.PaupLogParser; import org.forester.io.parsers.nhx.NHXParser; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; @@ -176,12 +175,7 @@ public class TestSurfacing { return false; } System.out.println( "OK." ); - System.out.print( " Paup log parser: " ); - if ( !TestSurfacing.testPaupLogParser( test_dir ) ) { - System.out.println( "failed." ); - return false; - } - System.out.println( "OK." ); + System.out.print( " Binary state matrix to gain loss matrix: " ); if ( !TestSurfacing.testBinaryStateMatrixToGainLossMatrix( test_dir ) ) { System.out.println( "failed." ); @@ -5432,73 +5426,5 @@ public class TestSurfacing { return true; } - private static boolean testPaupLogParser( final File test_dir ) { - try { - final PaupLogParser parser = new PaupLogParser(); - parser.setSource( new File( test_dir + ForesterUtil.getFileSeparator() + "paup_log_test_1" ) ); - final CharacterStateMatrix matrix = parser.parse(); - if ( matrix.getNumberOfIdentifiers() != 8 ) { - return false; - } - if ( !matrix.getIdentifier( 0 ).equals( "MOUSE" ) ) { - return false; - } - if ( !matrix.getIdentifier( 1 ).equals( "NEMVE" ) ) { - return false; - } - if ( !matrix.getIdentifier( 2 ).equals( "MONBE" ) ) { - return false; - } - if ( !matrix.getIdentifier( 3 ).equals( "DICDI" ) ) { - return false; - } - if ( !matrix.getIdentifier( 4 ).equals( "ARATH" ) ) { - return false; - } - if ( !matrix.getIdentifier( 5 ).equals( "6" ) ) { - return false; - } - if ( !matrix.getIdentifier( 6 ).equals( "7" ) ) { - return false; - } - if ( !matrix.getIdentifier( 7 ).equals( "8" ) ) { - return false; - } - if ( matrix.getNumberOfCharacters() != ( 66 + 66 + 28 ) ) { - return false; - } - if ( matrix.getState( 0, 4 ) != BinaryStates.ABSENT ) { - return false; - } - if ( matrix.getState( 0, 5 ) != BinaryStates.PRESENT ) { - return false; - } - if ( matrix.getState( 1, 5 ) != BinaryStates.PRESENT ) { - return false; - } - if ( matrix.getState( 7, 154 ) != BinaryStates.ABSENT ) { - return false; - } - if ( matrix.getState( 7, 155 ) != BinaryStates.PRESENT ) { - return false; - } - if ( matrix.getState( 7, 156 ) != BinaryStates.PRESENT ) { - return false; - } - if ( matrix.getState( 7, 157 ) != BinaryStates.ABSENT ) { - return false; - } - if ( matrix.getState( 7, 158 ) != BinaryStates.PRESENT ) { - return false; - } - if ( matrix.getState( 7, 159 ) != BinaryStates.ABSENT ) { - return false; - } - } - catch ( final Exception e ) { - e.printStackTrace( System.out ); - return false; - } - return true; - } + } diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index 3efa354..8ca0bd6 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -362,6 +362,15 @@ public final class Test { System.out.println( "failed." ); failed++; } + System.out.print( "NH parsing - special chars: " ); + if ( Test.testNHParsingSpecialChars() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } System.out.print( "Conversion to NHX (node level): " ); if ( Test.testNHXconversion() ) { System.out.println( "OK." ); @@ -488,6 +497,15 @@ public final class Test { System.out.println( "failed." ); failed++; } + System.out.print( "UTF-8 parsing from file: " ); + if ( Test.testUTF8ParsingFromFile() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } System.out.print( "Copying of node data: " ); if ( Test.testCopyOfNodeData() ) { System.out.println( "OK." ); @@ -1797,12 +1815,258 @@ public final class Test { } return true; } + + private static boolean testUTF8ParsingFromFile() { + try { + final PhyloXmlParser xml_parser = PhyloXmlParser.createPhyloXmlParser(); + final Phylogeny[] phylogenies_xml = ParserBasedPhylogenyFactory.getInstance().create( new File( Test.PATH_TO_TEST_DATA + "chars.xml" ), + xml_parser ); + if ( xml_parser.getErrorCount() > 0 ) { + System.out.println( xml_parser.getErrorMessages().toString() ); + return false; + } + if ( phylogenies_xml.length != 1 ) { + return false; + } + + final Phylogeny[] phylogenies_xml2 = ParserBasedPhylogenyFactory.getInstance().create( new StringBuffer( phylogenies_xml[0].toPhyloXML( 0 )), + xml_parser ); + + final Phylogeny[] phylogenies_nh = ParserBasedPhylogenyFactory.getInstance().create( new File( Test.PATH_TO_TEST_DATA + "chars.nh" ), new NHXParser() ); + if ( phylogenies_nh.length != 1 ) { + return false; + } + + final Phylogeny[] phylogenies_nex = ParserBasedPhylogenyFactory.getInstance().create( new File( Test.PATH_TO_TEST_DATA + "chars.nex" ), new NexusPhylogeniesParser() ); + if ( phylogenies_nex.length != 1 ) { + return false; + } + + final String[] xml_n = phylogenies_xml[0].getAllExternalNodeNames(); + final String[] xml_n2 = phylogenies_xml2[0].getAllExternalNodeNames(); + final String[] nh_n = phylogenies_nh[0].getAllExternalNodeNames(); + final String[] nex_n = phylogenies_nex[0].getAllExternalNodeNames(); + final String n0 = "AQ~!@#$%^&*()_+-=\\{}|;:\"<>?,./"; + final String n1 = "€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜˜˜™š›œžŸ¡¢£¤¥¦§¨©ª«¬®¯°±¹²³´µ¶·¸º»¼¿À÷þÿ"; + final String n2 = "漢字ひらがなカタカナ"; + final String n3 = "อักษรไทย"; + final String n4 = "繁體字"; + final String n5 = "한글"; + final String n6 = "देवनागरी"; + + final String n7 = "chữ Quốc ngữ"; + final String n8 = "ру́сский язы́к"; + final String n9 = "អក្សរខ្មែរ"; + + if ( !xml_n[0].equals( n0 ) ) { + System.out.println( xml_n[0] ); + System.out.println( n0 ); + return false; + } + if ( !xml_n2[0].equals( n0 ) ) { + System.out.println( xml_n2[0] ); + System.out.println( n0 ); + return false; + } + if ( !nh_n[0].equals( n0 ) ) { + System.out.println( nh_n[0] ); + System.out.println( n0 ); + return false; + } + if ( !nex_n[0].equals( n0 ) ) { + System.out.println( nex_n[0] ); + System.out.println( n0 ); + return false; + } + + if ( !xml_n[1].equals( n1 ) ) { + System.out.println( xml_n[1] ); + System.out.println( n1 ); + return false; + } + if ( !xml_n2[1].equals( n1 ) ) { + System.out.println( xml_n2[1] ); + System.out.println( n1 ); + return false; + } + if ( !nh_n[1].equals( n1 ) ) { + System.out.println( nh_n[1] ); + System.out.println( n1 ); + return false; + } + if ( !nex_n[1].equals( n1 ) ) { + System.out.println( nex_n[1] ); + System.out.println( n1 ); + return false; + } + + if ( !xml_n[2].equals( n2 ) ) { + System.out.println( xml_n[2] ); + System.out.println( n2 ); + return false; + } + if ( !xml_n2[2].equals( n2 ) ) { + System.out.println( xml_n2[2] ); + System.out.println( n2 ); + return false; + } + if ( !nh_n[2].equals( n2 ) ) { + System.out.println( nh_n[2] ); + System.out.println( n2 ); + return false; + } + if ( !nex_n[2].equals( n2 ) ) { + System.out.println( nex_n[2] ); + System.out.println( n2 ); + return false; + } + // + if ( !xml_n[3].equals( n3 ) ) { + System.out.println( xml_n[3] ); + System.out.println( n3 ); + return false; + } + if ( !xml_n2[3].equals( n3 ) ) { + System.out.println( xml_n2[3] ); + System.out.println( n3 ); + return false; + } + if ( !nh_n[3].equals( n3 ) ) { + System.out.println( nh_n[3] ); + System.out.println( n3 ); + return false; + } + if ( !nex_n[3].equals( n3 ) ) { + System.out.println( nex_n[3] ); + System.out.println( n3 ); + return false; + } + // + if ( !xml_n[4].equals( n4 ) ) { + System.out.println( xml_n[4] ); + System.out.println( n4 ); + return false; + } + if ( !nh_n[4].equals( n4 ) ) { + System.out.println( nh_n[4] ); + System.out.println( n4 ); + return false; + } + if ( !nex_n[4].equals( n4 ) ) { + System.out.println( nex_n[4] ); + System.out.println( n4 ); + return false; + } + // + if ( !xml_n[5].equals( n5 ) ) { + System.out.println( xml_n[5] ); + System.out.println( n5 ); + return false; + } + if ( !nh_n[5].equals( n5 ) ) { + System.out.println( nh_n[5] ); + System.out.println( n5 ); + return false; + } + if ( !nex_n[5].equals( n5 ) ) { + System.out.println( nex_n[5] ); + System.out.println( n5 ); + return false; + } + // + if ( !xml_n[6].equals( n6 ) ) { + System.out.println( xml_n[6] ); + System.out.println( n6 ); + return false; + } + if ( !nh_n[6].equals( n6 ) ) { + System.out.println( nh_n[6] ); + System.out.println( n6 ); + return false; + } + if ( !nex_n[6].equals( n6 ) ) { + System.out.println( nex_n[6] ); + System.out.println( n6 ); + return false; + } + // + if ( !xml_n[7].equals( n7 ) ) { + System.out.println( xml_n[7] ); + System.out.println( n7 ); + return false; + } + if ( !nh_n[7].equals( n7 ) ) { + System.out.println( nh_n[7] ); + System.out.println( n7 ); + return false; + } + if ( !nex_n[7].equals( n7 ) ) { + System.out.println( nex_n[7] ); + System.out.println( n7 ); + return false; + } + if ( !xml_n[8].equals( n8 ) ) { + System.out.println( xml_n[8] ); + System.out.println( n8 ); + return false; + } + if ( !nh_n[8].equals( n8 ) ) { + System.out.println( nh_n[8] ); + System.out.println( n8 ); + return false; + } + if ( !nex_n[8].equals( n8 ) ) { + System.out.println( nex_n[8] ); + System.out.println( n8 ); + return false; + } + if ( !xml_n[9].equals( n9 ) ) { + System.out.println( xml_n[9] ); + System.out.println( n9 ); + return false; + } + if ( !xml_n2[9].equals( n9 ) ) { + System.out.println( xml_n2[9] ); + System.out.println( n9 ); + return false; + } + if ( !nh_n[9].equals( n9 ) ) { + System.out.println( nh_n[9] ); + System.out.println( n9 ); + return false; + } + if ( !nex_n[9].equals( n9 ) ) { + System.out.println( nex_n[9] ); + System.out.println( n9 ); + return false; + } + if (!phylogenies_xml[0].toNewHampshire().equals( + phylogenies_nh[0].toNewHampshire() ) ) { + System.out.println( phylogenies_xml[0].toNewHampshire() ); + System.out.println( phylogenies_nh[0].toNewHampshire() ); + return false; + } + if (!phylogenies_xml[0].toNewHampshire().equals( + phylogenies_nex[0].toNewHampshire() ) ) { + System.out.println( phylogenies_xml[0].toNewHampshire() ); + System.out.println( phylogenies_nex[0].toNewHampshire() ); + return false; + } + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } + + private static boolean testBasicPhyloXMLparsing() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final PhyloXmlParser xml_parser = PhyloXmlParser.createPhyloXmlParser(); - final Phylogeny[] phylogenies_0 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_test_t1.xml", + final Phylogeny[] phylogenies_0 = factory.create( new File( Test.PATH_TO_TEST_DATA + "phyloxml_test_t1.xml" ), xml_parser ); if ( xml_parser.getErrorCount() > 0 ) { System.out.println( xml_parser.getErrorMessages().toString() ); @@ -2016,7 +2280,7 @@ public final class Test { else { xml_parser.setValidateAgainstSchema( PHYLOXML_REMOTE_XSD ); } - final Phylogeny[] phylogenies_0 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_test_t1.xml", + final Phylogeny[] phylogenies_0 = factory.create( new File( Test.PATH_TO_TEST_DATA + "phyloxml_test_t1.xml" ), xml_parser ); if ( xml_parser.getErrorCount() > 0 ) { System.out.println( xml_parser.getErrorMessages().toString() ); @@ -2182,6 +2446,7 @@ public final class Test { } if ( !( t3_rt.getNode( "root node" ).getNodeData().getReference().getDescription() .equals( "Aguinaldo, A. M. A.; J. M. Turbeville, L. S. Linford, M. C. Rivera, J. R. Garey, R. A. Raff, & J. A. Lake (1997). \"Evidence for a clade of nematodes, arthropods and other moulting animals\". Nature 387 (6632): 489–493." ) ) ) { + System.out.println( t3_rt.getNode( "root node" ).getNodeData().getReference().getDescription() ); return false; } if ( !t3_rt.getNode( "root node" ).getNodeData().getTaxonomy().getTaxonomyCode().equals( "ECDYS" ) ) { @@ -2368,7 +2633,7 @@ public final class Test { xml_parser.setValidateAgainstSchema( PHYLOXML_REMOTE_XSD ); } } - final Phylogeny[] phylogenies_0 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_test_t1.xml", + final Phylogeny[] phylogenies_0 = factory.create( new File( Test.PATH_TO_TEST_DATA + "phyloxml_test_t1.xml" ), xml_parser ); if ( xml_parser.getErrorCount() > 0 ) { System.out.println( xml_parser.getErrorMessages().toString() ); @@ -2403,7 +2668,7 @@ public final class Test { return false; } final String x2 = Test.PATH_TO_TEST_DATA + "phyloxml_test_t1.xml"; - final Phylogeny[] phylogenies_1 = factory.create( x2, xml_parser ); + final Phylogeny[] phylogenies_1 = factory.create( new File( x2 ), xml_parser ); if ( xml_parser.getErrorCount() > 0 ) { System.out.println( "errors:" ); System.out.println( xml_parser.getErrorMessages().toString() ); @@ -2412,7 +2677,7 @@ public final class Test { if ( phylogenies_1.length != 4 ) { return false; } - final Phylogeny[] phylogenies_2 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_test_t3.xml", + final Phylogeny[] phylogenies_2 = factory.create( new File(Test.PATH_TO_TEST_DATA + "phyloxml_test_t3.xml" ), xml_parser ); if ( xml_parser.getErrorCount() > 0 ) { System.out.println( "errors:" ); @@ -2425,7 +2690,7 @@ public final class Test { if ( phylogenies_2[ 0 ].getNumberOfExternalNodes() != 2 ) { return false; } - final Phylogeny[] phylogenies_3 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_test_t4.xml", + final Phylogeny[] phylogenies_3 = factory.create( new File( Test.PATH_TO_TEST_DATA + "phyloxml_test_t4.xml" ), xml_parser ); if ( xml_parser.getErrorCount() > 0 ) { System.out.println( xml_parser.getErrorMessages().toString() ); @@ -2447,7 +2712,7 @@ public final class Test { if ( !a.getNode( "node b1" ).getNodeData().getTaxonomy().getCommonName().equals( "b1 species" ) ) { return false; } - final Phylogeny[] phylogenies_4 = factory.create( Test.PATH_TO_TEST_DATA + "special_characters.xml", + final Phylogeny[] phylogenies_4 = factory.create( new File( Test.PATH_TO_TEST_DATA + "special_characters.xml") , xml_parser ); if ( xml_parser.getErrorCount() > 0 ) { System.out.println( xml_parser.getErrorMessages().toString() ); @@ -3071,7 +3336,7 @@ public final class Test { return false; } final StringBuffer sb5 = new StringBuffer( "(((A11:2)A1:2,(A21:1,A22:2,A23)A2:11,A3:2)A:2,B:10,C:3,D:8)" ); - final Phylogeny t5 = factory.create( sb5, new NHXParser() )[ 0 ]; + final Phylogeny t5 = factory.create( sb5.toString(), new NHXParser() )[ 0 ]; if ( t5.getNumberOfExternalNodes() != 8 ) { return false; } @@ -3079,17 +3344,17 @@ public final class Test { return false; } final StringBuffer sb6 = new StringBuffer( "(X,Y,Z,(((A111)A11:2)A1:2,(X,Y,Z,A21:1,A22:2,A23)A2:11,A3:2)A:2,B:10,C:3,D:8)" ); - final Phylogeny t6 = factory.create( sb6, new NHXParser() )[ 0 ]; + final Phylogeny t6 = factory.create( sb6.toString(), new NHXParser() )[ 0 ]; if ( t6.getHeight() != 15 ) { return false; } final StringBuffer sb7 = new StringBuffer( "(((A11:2)A1:2,(A21:1,A22:2,A23)A2:11,A3:2)A:2,B:10,C:15,D:8)" ); - final Phylogeny t7 = factory.create( sb7, new NHXParser() )[ 0 ]; + final Phylogeny t7 = factory.create( sb7.toString(), new NHXParser() )[ 0 ]; if ( t7.getHeight() != 15 ) { return false; } final StringBuffer sb8 = new StringBuffer( "(((A11:11)A1:2,(A21:2,A22:2,A23,A24,AA:)A2:11,A3:2)A:2,B:15,C:15,D:15)" ); - final Phylogeny t8 = factory.create( sb8, new NHXParser() )[ 0 ]; + final Phylogeny t8 = factory.create( sb8.toString(), new NHXParser() )[ 0 ]; if ( t8.getNumberOfExternalNodes() != 10 ) { return false; } @@ -6577,7 +6842,7 @@ public final class Test { PhylogenyNode n; List ext = new ArrayList(); final StringBuffer sb0 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h))fgh)cdefgh)abcdefgh" ); - final Phylogeny t0 = factory.create( sb0, new NHXParser() )[ 0 ]; + final Phylogeny t0 = factory.create( sb0.toString(), new NHXParser() )[ 0 ]; t0.getNode( "cd" ).setCollapse( true ); t0.getNode( "cde" ).setCollapse( true ); n = t0.getFirstExternalNode(); @@ -6605,7 +6870,7 @@ public final class Test { } ext.clear(); final StringBuffer sb1 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h))fgh)cdefgh)abcdefgh" ); - final Phylogeny t1 = factory.create( sb1, new NHXParser() )[ 0 ]; + final Phylogeny t1 = factory.create( sb1.toString(), new NHXParser() )[ 0 ]; t1.getNode( "ab" ).setCollapse( true ); t1.getNode( "cd" ).setCollapse( true ); t1.getNode( "cde" ).setCollapse( true ); @@ -6632,7 +6897,7 @@ public final class Test { } ext.clear(); final StringBuffer sb2 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" ); - final Phylogeny t2 = factory.create( sb2, new NHXParser() )[ 0 ]; + final Phylogeny t2 = factory.create( sb2.toString(), new NHXParser() )[ 0 ]; t2.getNode( "ab" ).setCollapse( true ); t2.getNode( "cd" ).setCollapse( true ); t2.getNode( "cde" ).setCollapse( true ); @@ -6660,7 +6925,7 @@ public final class Test { } ext.clear(); final StringBuffer sb3 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" ); - final Phylogeny t3 = factory.create( sb3, new NHXParser() )[ 0 ]; + final Phylogeny t3 = factory.create( sb3.toString(), new NHXParser() )[ 0 ]; t3.getNode( "ab" ).setCollapse( true ); t3.getNode( "cd" ).setCollapse( true ); t3.getNode( "cde" ).setCollapse( true ); @@ -6686,7 +6951,7 @@ public final class Test { } ext.clear(); final StringBuffer sb4 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" ); - final Phylogeny t4 = factory.create( sb4, new NHXParser() )[ 0 ]; + final Phylogeny t4 = factory.create( sb4.toString(), new NHXParser() )[ 0 ]; t4.getNode( "ab" ).setCollapse( true ); t4.getNode( "cd" ).setCollapse( true ); t4.getNode( "cde" ).setCollapse( true ); @@ -6701,7 +6966,7 @@ public final class Test { return false; } final StringBuffer sb5 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h))fgh)cdefgh)abcdefgh" ); - final Phylogeny t5 = factory.create( sb5, new NHXParser() )[ 0 ]; + final Phylogeny t5 = factory.create( sb5.toString(), new NHXParser() )[ 0 ]; ext.clear(); n = t5.getFirstExternalNode(); while ( n != null ) { @@ -6736,7 +7001,7 @@ public final class Test { return false; } final StringBuffer sb6 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h))fgh)cdefgh)abcdefgh" ); - final Phylogeny t6 = factory.create( sb6, new NHXParser() )[ 0 ]; + final Phylogeny t6 = factory.create( sb6.toString(), new NHXParser() )[ 0 ]; ext.clear(); t6.getNode( "ab" ).setCollapse( true ); n = t6.getNode( "ab" ); @@ -6769,7 +7034,7 @@ public final class Test { return false; } final StringBuffer sb7 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h))fgh)cdefgh)abcdefgh" ); - final Phylogeny t7 = factory.create( sb7, new NHXParser() )[ 0 ]; + final Phylogeny t7 = factory.create( sb7.toString(), new NHXParser() )[ 0 ]; ext.clear(); t7.getNode( "cd" ).setCollapse( true ); n = t7.getNode( "a" ); @@ -6802,7 +7067,7 @@ public final class Test { return false; } final StringBuffer sb8 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h))fgh)cdefgh)abcdefgh" ); - final Phylogeny t8 = factory.create( sb8, new NHXParser() )[ 0 ]; + final Phylogeny t8 = factory.create( sb8.toString(), new NHXParser() )[ 0 ]; ext.clear(); t8.getNode( "cd" ).setCollapse( true ); t8.getNode( "c" ).setCollapse( true ); @@ -6838,7 +7103,7 @@ public final class Test { return false; } final StringBuffer sb9 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" ); - final Phylogeny t9 = factory.create( sb9, new NHXParser() )[ 0 ]; + final Phylogeny t9 = factory.create( sb9.toString(), new NHXParser() )[ 0 ]; ext.clear(); t9.getNode( "gh" ).setCollapse( true ); n = t9.getNode( "a" ); @@ -6871,7 +7136,7 @@ public final class Test { return false; } final StringBuffer sb10 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" ); - final Phylogeny t10 = factory.create( sb10, new NHXParser() )[ 0 ]; + final Phylogeny t10 = factory.create( sb10.toString(), new NHXParser() )[ 0 ]; ext.clear(); t10.getNode( "gh" ).setCollapse( true ); t10.getNode( "g" ).setCollapse( true ); @@ -6906,7 +7171,7 @@ public final class Test { return false; } final StringBuffer sb11 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" ); - final Phylogeny t11 = factory.create( sb11, new NHXParser() )[ 0 ]; + final Phylogeny t11 = factory.create( sb11.toString(), new NHXParser() )[ 0 ]; ext.clear(); t11.getNode( "gh" ).setCollapse( true ); t11.getNode( "fgh" ).setCollapse( true ); @@ -6937,7 +7202,7 @@ public final class Test { return false; } final StringBuffer sb12 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" ); - final Phylogeny t12 = factory.create( sb12, new NHXParser() )[ 0 ]; + final Phylogeny t12 = factory.create( sb12.toString(), new NHXParser() )[ 0 ]; ext.clear(); t12.getNode( "gh" ).setCollapse( true ); t12.getNode( "fgh" ).setCollapse( true ); @@ -6971,7 +7236,7 @@ public final class Test { return false; } final StringBuffer sb13 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" ); - final Phylogeny t13 = factory.create( sb13, new NHXParser() )[ 0 ]; + final Phylogeny t13 = factory.create( sb13.toString(), new NHXParser() )[ 0 ]; ext.clear(); t13.getNode( "ab" ).setCollapse( true ); t13.getNode( "b" ).setCollapse( true ); @@ -7001,7 +7266,7 @@ public final class Test { return false; } final StringBuffer sb14 = new StringBuffer( "((a,b,0)ab,(((c,d)cd,e)cde,(f,(g,h,1,2)gh,0)fgh)cdefgh)abcdefgh" ); - final Phylogeny t14 = factory.create( sb14, new NHXParser() )[ 0 ]; + final Phylogeny t14 = factory.create( sb14.toString(), new NHXParser() )[ 0 ]; ext.clear(); t14.getNode( "ab" ).setCollapse( true ); t14.getNode( "a" ).setCollapse( true ); @@ -7031,7 +7296,7 @@ public final class Test { return false; } final StringBuffer sb15 = new StringBuffer( "((a,b,0)ab,(((c,d)cd,e)cde,x,(f,(g,h,1,2)gh,0)fgh)cdefgh)abcdefgh" ); - final Phylogeny t15 = factory.create( sb15, new NHXParser() )[ 0 ]; + final Phylogeny t15 = factory.create( sb15.toString(), new NHXParser() )[ 0 ]; ext.clear(); t15.getNode( "ab" ).setCollapse( true ); t15.getNode( "a" ).setCollapse( true ); @@ -7066,7 +7331,7 @@ public final class Test { // // final StringBuffer sb16 = new StringBuffer( "((a,b,0)ab,(((c,d)cd,e)cde,x,(f,(g,h,1,2)gh,0)fgh)cdefgh)abcdefgh" ); - final Phylogeny t16 = factory.create( sb16, new NHXParser() )[ 0 ]; + final Phylogeny t16 = factory.create( sb16.toString(), new NHXParser() )[ 0 ]; ext.clear(); t16.getNode( "ab" ).setCollapse( true ); t16.getNode( "a" ).setCollapse( true ); @@ -8286,10 +8551,10 @@ public final class Test { if ( !p1b.toNewHampshire().equals( "(';A;',';B;1;');" ) ) { return false; } - final Phylogeny p2 = factory.create( new StringBuffer( "(A,B2)" ), new NHXParser() )[ 0 ]; + final Phylogeny p2 = factory.create( new StringBuffer( "(A,B2)" ).toString(), new NHXParser() )[ 0 ]; final Phylogeny p3 = factory.create( new char[] { '(', 'A', ',', 'B', '3', ')' }, new NHXParser() )[ 0 ]; final Phylogeny p4 = factory.create( "(A,B4);", new NHXParser() )[ 0 ]; - final Phylogeny p5 = factory.create( new StringBuffer( "(A,B5);" ), new NHXParser() )[ 0 ]; + final Phylogeny p5 = factory.create( new StringBuffer( "(A,B5);" ).toString(), new NHXParser() )[ 0 ]; final Phylogeny[] p7 = factory.create( "(A,B7);(C,D7)", new NHXParser() ); final Phylogeny[] p8 = factory.create( "(A,B8) (C,D8)", new NHXParser() ); final Phylogeny[] p9 = factory.create( "(A,B9)\n(C,D9)", new NHXParser() ); @@ -8547,21 +8812,21 @@ public final class Test { if ( p46.length != 0 ) { return false; } - final Phylogeny p47 = factory.create( new StringBuffer( "((A,B)ab:2[0.44],C)" ), new NHXParser() )[ 0 ]; + final Phylogeny p47 = factory.create( new StringBuffer( "((A,B)ab:2[0.44],C)" ).toString(), new NHXParser() )[ 0 ]; if ( !isEqual( 0.44, p47.getNode( "ab" ).getBranchData().getConfidence( 0 ).getValue() ) ) { return false; } - final Phylogeny p48 = factory.create( new StringBuffer( "((A,B)ab:2[88],C)" ), new NHXParser() )[ 0 ]; + final Phylogeny p48 = factory.create( new StringBuffer( "((A,B)ab:2[88],C)" ).toString(), new NHXParser() )[ 0 ]; if ( !isEqual( 88, p48.getNode( "ab" ).getBranchData().getConfidence( 0 ).getValue() ) ) { return false; } final Phylogeny p49 = factory - .create( new StringBuffer( "((A,B)a[comment:a,b;(a)]b:2[0.44][comment(a,b,b);],C)" ), + .create( new StringBuffer( "((A,B)a[comment:a,b;(a)]b:2[0.44][comment(a,b,b);],C)" ).toString(), new NHXParser() )[ 0 ]; if ( !isEqual( 0.44, p49.getNode( "ab" ).getBranchData().getConfidence( 0 ).getValue() ) ) { return false; } - final Phylogeny p50 = factory.create( new StringBuffer( "((\"A\",B)ab:2[88],C)" ), new NHXParser() )[ 0 ]; + final Phylogeny p50 = factory.create( new StringBuffer( "((\"A\",B)ab:2[88],C)" ).toString(), new NHXParser() )[ 0 ]; if ( p50.getNode( "A" ) == null ) { return false; } @@ -8576,21 +8841,21 @@ public final class Test { .equals( "((A,B)88:2.0,C);" ) ) { return false; } - final Phylogeny p51 = factory.create( new StringBuffer( "((\"A(A\",B)ab:2[88],C)" ), new NHXParser() )[ 0 ]; + final Phylogeny p51 = factory.create( new StringBuffer( "((\"A(A\",B)ab:2[88],C)" ).toString(), new NHXParser() )[ 0 ]; if ( p51.getNode( "A(A" ) == null ) { return false; } - final Phylogeny p52 = factory.create( new StringBuffer( "(('A(A',B)ab:2[88],C)" ), new NHXParser() )[ 0 ]; + final Phylogeny p52 = factory.create( new StringBuffer( "(('A(A',B)ab:2[88],C)" ).toString(), new NHXParser() )[ 0 ]; if ( p52.getNode( "A(A" ) == null ) { return false; } final Phylogeny p53 = factory - .create( new StringBuffer( "(('A(A',\"B (x (a' ,b) f(x);\"[com])[ment]ab:2[88],C)" ), + .create( new StringBuffer( "(('A(A',\"B (x (a' ,b) f(x);\"[com])[ment]ab:2[88],C)" ).toString(), new NHXParser() )[ 0 ]; if ( p53.getNode( "B (x (a' ,b) f(x);" ) == null ) { return false; } - final Phylogeny p54 = factory.create( new StringBuffer( "((A,B):[88],C)" ), new NHXParser() )[ 0 ]; + final Phylogeny p54 = factory.create( new StringBuffer( "((A,B):[88],C)" ).toString(), new NHXParser() )[ 0 ]; if ( p54.getNode( "A" ) == null ) { return false; } @@ -8598,7 +8863,7 @@ public final class Test { return false; } final Phylogeny p55 = factory - .create( new StringBuffer( "((\"lcl|HPV32_L1.:1 s\":0.195593,\"lcl|HPV30_L1.1|;a\":0.114237):0.0359322,\"lcl|HPV56_L1.1|,d\":0.0727412,\"lcl|HPV66_L1.1x\":0.0798012);" ), + .create( new StringBuffer( "((\"lcl|HPV32_L1.:1 s\":0.195593,\"lcl|HPV30_L1.1|;a\":0.114237):0.0359322,\"lcl|HPV56_L1.1|,d\":0.0727412,\"lcl|HPV66_L1.1x\":0.0798012);" ).toString(), new NHXParser() )[ 0 ]; if ( !p55 .toNewHampshire() @@ -8607,7 +8872,7 @@ public final class Test { return false; } final Phylogeny p56 = factory - .create( new StringBuffer( "((\"lcl|HPV32_L1.:1 s\":0.195593,\"lcl|HPV30_L1.1|;a\":0.114\n237):0.0359322,\"lcl|HPV56_L1.1|,d\":0.0727412,\"lcl|HPV66_L1.1:x\":0.0798012);" ), + .create( new StringBuffer( "((\"lcl|HPV32_L1.:1 s\":0.195593,\"lcl|HPV30_L1.1|;a\":0.114\n237):0.0359322,\"lcl|HPV56_L1.1|,d\":0.0727412,\"lcl|HPV66_L1.1:x\":0.0798012);" ).toString(), new NHXParser() )[ 0 ]; if ( !p56 .toNewHampshire() @@ -8616,7 +8881,7 @@ public final class Test { return false; } final Phylogeny p57 = factory - .create( new StringBuffer( "((\"lcl|HPV32_L1.:1 s\":0.195593,\"lcl|HPV30_L1.1|;a\":0.114\n237):0.0359322,\"lcl|HPV56_L1.1|,d\":0.0727412,\"lcl|HPV66_L1.1:x\":0.0798012);" ), + .create( new StringBuffer( "((\"lcl|HPV32_L1.:1 s\":0.195593,\"lcl|HPV30_L1.1|;a\":0.114\n237):0.0359322,\"lcl|HPV56_L1.1|,d\":0.0727412,\"lcl|HPV66_L1.1:x\":0.0798012);" ).toString(), new NHXParser() )[ 0 ]; if ( !p57 .toNewHampshire() @@ -8625,25 +8890,25 @@ public final class Test { return false; } final String s58 = "('Homo \"man\" sapiens:1',\"Homo 'man' sapiens;\")';root \"1_ )';"; - final Phylogeny p58 = factory.create( new StringBuffer( s58 ), new NHXParser() )[ 0 ]; + final Phylogeny p58 = factory.create( s58, new NHXParser() )[ 0 ]; if ( !p58.toNewHampshire().equals( s58 ) ) { System.out.println( p58.toNewHampshire() ); return false; } final String s59 = "('Homo \"man sapiens:1',\"Homo 'man sapiens\")\"root; '1_ )\";"; - final Phylogeny p59 = factory.create( new StringBuffer( s59 ), new NHXParser() )[ 0 ]; + final Phylogeny p59 = factory.create( s59 , new NHXParser() )[ 0 ]; if ( !p59.toNewHampshire().equals( s59 ) ) { System.out.println( p59.toNewHampshire() ); return false; } final String s60 = "('\" ;,:\":\"',\"'abc def' g's_\",'=:0.45+,.:%~`!@#$%^&*()_-+={} | ;,');"; - final Phylogeny p60 = factory.create( new StringBuffer( s60 ), new NHXParser() )[ 0 ]; + final Phylogeny p60 = factory.create( s60, new NHXParser() )[ 0 ]; if ( !p60.toNewHampshire().equals( s60 ) ) { System.out.println( p60.toNewHampshire() ); return false; } final String s61 = "('H[omo] \"man\" sapiens:1',\"H[omo] 'man' sapiens;\",H[omo] sapiens)';root \"1_ )';"; - final Phylogeny p61 = factory.create( new StringBuffer( s61 ), new NHXParser() )[ 0 ]; + final Phylogeny p61 = factory.create( s61, new NHXParser() )[ 0 ]; if ( !p61.toNewHampshire() .equals( "('H{omo} \"man\" sapiens:1',\"H{omo} 'man' sapiens;\",Hsapiens)';root \"1_ )';" ) ) { System.out.println( p61.toNewHampshire() ); @@ -8656,7 +8921,36 @@ public final class Test { } return true; } - + + private static boolean testNHParsingSpecialChars() { + try { + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final String i0 = "(A!+=~QWERTY!@#$%^&*-,€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜˜˜™š›œžŸ¡¢£¤¥¦§¨©ª«¬®¯°±¹²³´µ¶·¸º»¼¿À÷þÿ)"; + final Phylogeny p0 = factory.create( i0, new NHXParser() )[ 0 ]; + if ( !p0.toNewHampshireX().equals( i0 ) ) { + System.out.println(); + System.out.println( p0.toNewHampshireX() ); + System.out.println( i0 ); + return false; + } + final String i1 = "(हिंदी,한글,ไทย,'Tiếng Việt',ひらがなカタカナ漢字,繁體字,русский)"; + final Phylogeny p1 = factory.create( i1, new NHXParser() )[ 0 ]; + if ( !p1.toNewHampshireX().equals( i1 ) ) { + System.out.println(); + System.out.println( p1.toNewHampshireX() ); + System.out.println( i1 ); + return false; + } + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } + + + private static boolean testNHParsingIter() { try { final String p0_str = "(A,B);"; @@ -9840,7 +10134,7 @@ public final class Test { xml_parser.setValidateAgainstSchema( PHYLOXML_REMOTE_XSD ); } } - final Phylogeny[] phylogenies_0 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_distribution.xml", + final Phylogeny[] phylogenies_0 = factory.create( new File( Test.PATH_TO_TEST_DATA + "phyloxml_distribution.xml" ), xml_parser ); if ( xml_parser.getErrorCount() > 0 ) { System.out.println( xml_parser.getErrorMessages().toString() ); diff --git a/forester/test_data/chars.nex b/forester/test_data/chars.nex new file mode 100644 index 0000000..5d1db95 --- /dev/null +++ b/forester/test_data/chars.nex @@ -0,0 +1,8 @@ +#NEXUS +Begin Taxa; + Dimensions NTax=10; + TaxLabels 'AQ~!@#$%^&*()_+-=\{}|;:"<>?,./' €‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜˜˜™š›œžŸ¡¢£¤¥¦§¨©ª«¬®¯°±¹²³´µ¶·¸º»¼¿À÷þÿ 漢字ひらがなカタカナ อักษรไทย 繁體字 한글 देवनागरी 'chữ Quốc ngữ' 'ру́сский язы́к' អក្សរខ្មែរ; +End; +Begin Trees; + Tree 'tree1'=[&R](('AQ~!@#$%^&*()_+-=\{}|;:"<>?,./',€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜˜˜™š›œžŸ¡¢£¤¥¦§¨©ª«¬®¯°±¹²³´µ¶·¸º»¼¿À÷þÿ),(漢字ひらがなカタカナ,อักษรไทย),(繁體字,한글),(देवनागरी,'chữ Quốc ngữ','ру́сский язы́к',អក្សរខ្មែរ)); +End; diff --git a/forester/test_data/chars.nh b/forester/test_data/chars.nh new file mode 100644 index 0000000..1d02611 --- /dev/null +++ b/forester/test_data/chars.nh @@ -0,0 +1 @@ +(('AQ~!@#$%^&*()_+-=\{}|;:"<>?,./',€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜˜˜™š›œžŸ¡¢£¤¥¦§¨©ª«¬®¯°±¹²³´µ¶·¸º»¼¿À÷þÿ),(漢字ひらがなカタカナ,อักษรไทย),(繁體字,한글),(देवनागरी,'chữ Quốc ngữ','ру́сский язы́к',អក្សរខ្មែរ)); \ No newline at end of file diff --git a/forester/test_data/chars.xml b/forester/test_data/chars.xml new file mode 100644 index 0000000..5bcd476 --- /dev/null +++ b/forester/test_data/chars.xml @@ -0,0 +1,45 @@ + + + + + + + AQ~!@#$%^&*()_+-=\{}|;:"<>?,./ + + + €‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜˜˜™š›œžŸ¡¢£¤¥¦§¨©ª«¬®¯°±¹²³´µ¶·¸º»¼¿À÷þÿ + + + + + 漢字ひらがなカタカナ + + + อักษรไทย + + + + + 繁體字 + + + 한글 + + + + + देवनागरी + + + chữ Quốc ngữ + + + ру́сский язы́к + + + អក្សរខ្មែរ + + + + + \ No newline at end of file