fixed issue with UTF8 encoding.
authorcmzmasek <czmasek@czamseklt-w7.jcvi.ORG>
Thu, 23 Jun 2016 22:59:55 +0000 (15:59 -0700)
committercmzmasek <czmasek@czamseklt-w7.jcvi.ORG>
Thu, 23 Jun 2016 22:59:55 +0000 (15:59 -0700)
17 files changed:
forester/aptx/aptx_configuration_files/_aptx_configuration_file
forester/java/src/org/forester/archaeopteryx/AptxConstants.java
forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java
forester/java/src/org/forester/io/parsers/nexus/NexusBinaryStatesMatrixParser.java
forester/java/src/org/forester/io/parsers/nexus/NexusCharactersParser.java
forester/java/src/org/forester/io/parsers/nexus/NexusPhylogeniesParser.java
forester/java/src/org/forester/io/parsers/nexus/PaupLogParser.java [deleted file]
forester/java/src/org/forester/io/parsers/nhx/NHXParser.java
forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlParser.java
forester/java/src/org/forester/io/parsers/tol/TolParser.java
forester/java/src/org/forester/io/parsers/util/ParserUtils.java
forester/java/src/org/forester/io/writers/PhylogenyWriter.java
forester/java/src/org/forester/surfacing/TestSurfacing.java
forester/java/src/org/forester/test/Test.java
forester/test_data/chars.nex [new file with mode: 0644]
forester/test_data/chars.nh [new file with mode: 0644]
forester/test_data/chars.xml [new file with mode: 0644]

index f0d78eb..1ffa5d8 100644 (file)
@@ -17,7 +17,7 @@ native_ui: ?
 #     value for bootstrap support)
 #
 #  Font family name: 'font_family':
-#     Example: 'font_family: Arial,Calibri,Helvetica'
+#     Example: 'font_family: Sans,Helvetica'
 #     It is advisable to use more than one value for font_family (in
 #     decreasing order of preference). Font family names have to be
 #     comma separated (no spaces). Spaces in font names have to be
@@ -162,7 +162,7 @@ native_ui: ?
 
 
 min_confidence_value:                      0.0
-font_family:                               Arial,Helvetica,Verdana,Tahoma,Dialog,Lucida-Sans,SansSerif,Sans-serif,Sans
+font_family:                               SansSerif,Sans-serif,Sans,Dialog,Tahoma,Helvetica,Arial,Verdana
 font_size:                                 10
 font_size_min:                             2
 font_size_max:                             20
index 7d660f4..3a608f4 100644 (file)
@@ -44,8 +44,8 @@ public final class AptxConstants {
     final static String         VERSION                                                       = "0.9909 experimental";
     final static String         PRG_DATE                                                      = "160622";
     final static String         DEFAULT_CONFIGURATION_FILE_NAME                               = "_aptx_configuration_file";
-    final static String[]       DEFAULT_FONT_CHOICES                                          = { "Arial", "Helvetica",
-            "Verdana", "Tahoma", "Dialog", "Lucida Sans", "SansSerif", "Sans-serif", "Sans"  };
+    final static String[]       DEFAULT_FONT_CHOICES                                          = { 
+            "SansSerif", "Sans-serif", "Sans", "Dialog", "Lucida Sans", "Tahoma", "Helvetica", "Arial", "Verdana" };
     final static boolean        VERBOSE_DEFAULT                                               = false;
     final static int            DOMAIN_STRUCTURE_DEFAULT_WIDTH                                = 100;
     final static String         AUTHOR_EMAIL                                                  = "phyloxml@gmail.com";
index 8b4fbdc..8d37609 100644 (file)
@@ -1858,7 +1858,7 @@ public final class MainFrameApplication extends MainFrame {
         _options_jmenu.add( _print_size_mi = new JMenuItem( "" ) );\r
         _options_jmenu.add( _choose_pdf_width_mi = new JMenuItem( "" ) );\r
         _options_jmenu.addSeparator();\r
-        _options_jmenu.add( customizeMenuItemAsLabel( new JMenuItem( "Newick/NHX/Nexus Input:" ), getConfiguration() ) );\r
+        _options_jmenu.add( customizeMenuItemAsLabel( new JMenuItem( "Newick/NHX/Nexus Read:" ), getConfiguration() ) );\r
         _options_jmenu\r
         .add( _internal_number_are_confidence_for_nh_parsing_cbmi = new JCheckBoxMenuItem( "Internal Node Names are Confidence Values" ) );\r
         _options_jmenu.add( _replace_underscores_cbmi = new JCheckBoxMenuItem( "Replace Underscores with Spaces" ) );\r
@@ -1882,7 +1882,7 @@ public final class MainFrameApplication extends MainFrame {
         _radio_group_2.add( _extract_taxonomy_pfam_strict_rbmi );\r
         _radio_group_2.add( _extract_taxonomy_pfam_relaxed_rbmi );\r
         _radio_group_2.add( _extract_taxonomy_agressive_rbmi );\r
-        _options_jmenu.add( customizeMenuItemAsLabel( new JMenuItem( "Newick/Nexus Output:" ), getConfiguration() ) );\r
+        _options_jmenu.add( customizeMenuItemAsLabel( new JMenuItem( "Newick/Nexus Save:" ), getConfiguration() ) );\r
         _options_jmenu\r
         .add( _use_brackets_for_conf_in_nh_export_cbmi = new JCheckBoxMenuItem( USE_BRACKETS_FOR_CONF_IN_NH_LABEL ) );\r
         _use_brackets_for_conf_in_nh_export_cbmi\r
index 721bcc6..e110ec1 100644 (file)
@@ -38,6 +38,7 @@ import org.forester.io.parsers.util.PhylogenyParserException;
 
 public class NexusBinaryStatesMatrixParser {
 
+    private static final String UTF_8 = "UTF-8";
     private Object                             _nexus_source;
     private CharacterStateMatrix<BinaryStates> _matrix;
     private int                                _nchar;
@@ -61,7 +62,7 @@ public class NexusBinaryStatesMatrixParser {
 
     public void parse() throws IOException {
         reset();
-        final BufferedReader reader = ParserUtils.createReader( getNexusSource() );
+        final BufferedReader reader = ParserUtils.createReader( getNexusSource(), UTF_8 );
         String line;
         boolean in_matrix = false;
         int identifier_index = 0;
index 7962717..3407441 100644 (file)
@@ -37,6 +37,7 @@ import org.forester.util.ForesterUtil;
 
 public class NexusCharactersParser {
 
+    private static final String UTF_8 = "UTF-8";
     final private static String charstatelabels = NexusConstants.CHARSTATELABELS.toLowerCase();
     private Object              _nexus_source;
     private String[]            _char_state_labels;
@@ -51,7 +52,7 @@ public class NexusCharactersParser {
 
     public void parse() throws IOException {
         reset();
-        final BufferedReader reader = ParserUtils.createReader( getNexusSource() );
+        final BufferedReader reader = ParserUtils.createReader( getNexusSource(), UTF_8 );
         String line;
         boolean in_charstatelabels = false;
         final List<String> labels_list = new ArrayList<String>();
index 0f51cd7..d4eabfd 100644 (file)
@@ -53,6 +53,8 @@ import org.forester.util.ForesterUtil;
 
 public final class NexusPhylogeniesParser implements IteratingPhylogenyParser, PhylogenyParser {
 
+    private static final String UTF_8 = "UTF-8";
+
     final private static boolean DEBUG                               = false;
     
     final private static String            begin_trees               = NexusConstants.BEGIN_TREES.toLowerCase();
@@ -144,7 +146,7 @@ public final class NexusPhylogeniesParser implements IteratingPhylogenyParser, P
         _rooted_info_present = false;
         _is_rooted = false;
         _seqs = new HashMap<String, MolecularSequence>();
-        _br = ParserUtils.createReader( _nexus_source );
+        _br = ParserUtils.createReader( _nexus_source, UTF_8 );
         getNext();
     }
 
@@ -182,7 +184,7 @@ public final class NexusPhylogeniesParser implements IteratingPhylogenyParser, P
         if ( rooted_info_present ) {
             pars.setGuessRootedness( false );
         }
-        pars.setSource( nhx );
+        pars.setSource( nhx.toString() );
         final Phylogeny p = pars.next();
         if ( p == null ) {
             throw new PhylogenyParserException( "failed to create phylogeny" );
diff --git a/forester/java/src/org/forester/io/parsers/nexus/PaupLogParser.java b/forester/java/src/org/forester/io/parsers/nexus/PaupLogParser.java
deleted file mode 100644 (file)
index a1ecfcd..0000000
+++ /dev/null
@@ -1,128 +0,0 @@
-// $Id:
-//
-// FORESTER -- software libraries and applications
-// for evolutionary biology research and applications.
-//
-// Copyright (C) 2008-2009 Christian M. Zmasek
-// Copyright (C) 2008-2009 Burnham Institute for Medical Research
-// All rights reserved
-//
-// This library is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 2.1 of the License, or (at your option) any later version.
-//
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// Lesser General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License along with this library; if not, write to the Free Software
-// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
-//
-// Contact: phylosoft @ gmail . com
-// WWW: www.phylosoft.org/
-
-package org.forester.io.parsers.nexus;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.forester.evoinference.matrix.character.BasicCharacterStateMatrix;
-import org.forester.evoinference.matrix.character.CharacterStateMatrix;
-import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates;
-import org.forester.io.parsers.util.ParserUtils;
-import org.forester.io.parsers.util.PhylogenyParserException;
-
-public class PaupLogParser {
-
-    private static final String DATA_MATRIX_AND_RECONSTRUCTED_STATES_FOR_INTERNAL_NODES = "data matrix and reconstructed states for internal nodes";
-    private Object              _nexus_source;
-
-    private Object getNexusSource() {
-        return _nexus_source;
-    }
-
-    public CharacterStateMatrix<BinaryStates> parse() throws IOException {
-        final BufferedReader reader = ParserUtils.createReader( getNexusSource() );
-        String line;
-        boolean saw_line = false;
-        int identifier_index = 0;
-        boolean first_block = true;
-        boolean saw_data_matrix_line = false;
-        final List<String> identifiers = new ArrayList<String>();
-        final List<List<BinaryStates>> states = new ArrayList<List<BinaryStates>>();
-        boolean done = false;
-        while ( ( ( line = reader.readLine() ) != null ) && !done ) {
-            line = line.trim();
-            if ( ( line.length() > 0 ) && !line.startsWith( "#" ) && !line.startsWith( ">" ) ) {
-                if ( ( ( identifier_index > 0 ) && line.startsWith( "Tree " ) )
-                        || line.startsWith( "Character change list" ) ) {
-                    done = true;
-                    continue;
-                }
-                if ( line.toLowerCase().startsWith( DATA_MATRIX_AND_RECONSTRUCTED_STATES_FOR_INTERNAL_NODES ) ) {
-                    saw_line = false;
-                    saw_data_matrix_line = true;
-                    identifier_index = 0;
-                    if ( first_block && ( line.indexOf( "continued" ) > 0 ) ) {
-                        first_block = false;
-                    }
-                }
-                if ( saw_data_matrix_line && line.startsWith( "----------" ) ) {
-                    saw_line = true;
-                }
-                else if ( saw_line && ( line.indexOf( ' ' ) > 0 ) ) {
-                    final String[] s = line.split( "\\s+" );
-                    if ( s.length != 2 ) {
-                        throw new NexusFormatException( "unexpected format at line: " + line );
-                    }
-                    final String identifier = s[ 0 ];
-                    final String row = s[ 1 ];
-                    if ( first_block ) {
-                        if ( identifiers.contains( identifier ) ) {
-                            throw new NexusFormatException( "identifier [" + identifier + "] is not unique in line: "
-                                    + line );
-                        }
-                        identifiers.add( identifier );
-                        states.add( new ArrayList<BinaryStates>() );
-                    }
-                    else {
-                        if ( !identifiers.contains( identifier ) ) {
-                            throw new NexusFormatException( "new identifier [" + identifier + "] at line: " + line );
-                        }
-                    }
-                    for( int c = 0; c < row.length(); ++c ) {
-                        final char ch = row.charAt( c );
-                        if ( ch == '0' ) {
-                            states.get( identifier_index ).add( BinaryStates.ABSENT );
-                        }
-                        else if ( ch == '1' ) {
-                            states.get( identifier_index ).add( BinaryStates.PRESENT );
-                        }
-                        else {
-                            throw new NexusFormatException( "unknown character state [" + ch + "] at line: " + line );
-                        }
-                    }
-                    ++identifier_index;
-                }
-            }
-        }
-        final CharacterStateMatrix<BinaryStates> matrix = new BasicCharacterStateMatrix<BinaryStates>( states );
-        int i = 0;
-        for( final String identifier : identifiers ) {
-            matrix.setIdentifier( i++, identifier );
-        }
-        return matrix;
-    }
-
-    public void setSource( final Object nexus_source ) throws PhylogenyParserException, IOException {
-        if ( nexus_source == null ) {
-            throw new PhylogenyParserException( getClass() + ": attempt to parse null object." );
-        }
-        _nexus_source = nexus_source;
-    }
-}
index 6989602..04f5f85 100644 (file)
@@ -24,8 +24,9 @@
 package org.forester.io.parsers.nhx;
 
 import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
 import java.io.File;
-import java.io.FileReader;
+import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
@@ -67,10 +68,11 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse
     final static private boolean GUESS_IF_SUPPORT_VALUES                    = true;
     final static private boolean GUESS_ROOTEDNESS_DEFAULT                   = true;
     final static private boolean IGNORE_QUOTES_DEFAULT                      = false;
-    final static private byte    STRING                                     = 0;
-    final static private byte    STRING_BUFFER                              = 1;
-    final static private byte    STRING_BUILDER                             = 4;
+   
     final static private char    BELL                                       = 7;
+    public final static String   UTF_8 = "UTF-8";
+    public final static String   ISO_8859_1 = "ISO-8859-1";
+    private final static String  ENCODING_DEFAULT = UTF_8;
     private boolean              _allow_errors_in_distance_to_parent;
     private int                  _clade_level;
     private StringBuilder        _current_anotation;
@@ -86,9 +88,6 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse
     private byte                 _input_type;
     private BufferedReader       _my_source_br                              = null;
     private char[]               _my_source_charary                         = null;
-    private StringBuffer         _my_source_sbuff                           = null;
-    private StringBuilder        _my_source_sbuil                           = null;
-    private String               _my_source_str                             = null;
     private Phylogeny            _next;
     private Object               _nhx_source;
     private boolean              _replace_underscores;
@@ -98,8 +97,15 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse
     private Object               _source;
     private int                  _source_length;
     private TAXONOMY_EXTRACTION  _taxonomy_extraction;
+    private final String         _encoding;
 
     public NHXParser() {
+        _encoding = ENCODING_DEFAULT;
+        init();
+    }
+    
+    public NHXParser( final String encoding ) {
+        _encoding = encoding;
         init();
     }
 
@@ -127,10 +133,8 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse
     @Override
     public final Phylogeny[] parse() throws IOException {
         final List<Phylogeny> l = new ArrayList<Phylogeny>();
-        //int c = 0;
         while ( hasNext() ) {
             l.add( next() );
-            // c++;
         }
         final Phylogeny[] p = new Phylogeny[ l.size() ];
         for( int i = 0; i < l.size(); ++i ) {
@@ -154,24 +158,9 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse
         _current_anotation = new StringBuilder();
         _current_phylogeny = null;
         _current_node = null;
-        _my_source_str = null;
-        _my_source_sbuff = null;
-        _my_source_sbuil = null;
         _my_source_charary = null;
         determineAndProcessSourceType( _source );
         switch ( _input_type ) {
-            case STRING:
-                _my_source_br = null;
-                _my_source_str = ( String ) _nhx_source;
-                break;
-            case STRING_BUFFER:
-                _my_source_br = null;
-                _my_source_sbuff = ( StringBuffer ) _nhx_source;
-                break;
-            case STRING_BUILDER:
-                _my_source_br = null;
-                _my_source_sbuil = ( StringBuilder ) _nhx_source;
-                break;
             case CHAR_ARRAY:
                 _my_source_br = null;
                 _my_source_charary = ( char[] ) _nhx_source;
@@ -216,24 +205,12 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse
             throw new PhylogenyParserException( getClass() + ": attempt to parse null object." );
         }
         else if ( nhx_source instanceof String ) {
-            _input_type = NHXParser.STRING;
-            _source_length = ( ( String ) nhx_source ).length();
-            _nhx_source = nhx_source;
-        }
-        else if ( nhx_source instanceof StringBuilder ) {
-            _input_type = NHXParser.STRING_BUILDER;
-            _source_length = ( ( StringBuilder ) nhx_source ).length();
-            _nhx_source = nhx_source;
-        }
-        else if ( nhx_source instanceof StringBuffer ) {
-            _input_type = NHXParser.STRING_BUFFER;
-            _source_length = ( ( StringBuffer ) nhx_source ).length();
-            _nhx_source = nhx_source;
-        }
-        else if ( nhx_source instanceof StringBuilder ) {
-            _input_type = NHXParser.STRING_BUILDER;
-            _source_length = ( ( StringBuilder ) nhx_source ).length();
             _nhx_source = nhx_source;
+            _input_type = NHXParser.BUFFERED_READER;
+            _source_length = 0;
+            InputStream is = new ByteArrayInputStream( (( String ) nhx_source ).getBytes(getEncoding()));
+            final InputStreamReader isr = new InputStreamReader( is, getEncoding() );
+            _nhx_source = new BufferedReader( isr );
         }
         else if ( nhx_source instanceof char[] ) {
             _input_type = NHXParser.CHAR_ARRAY;
@@ -256,7 +233,9 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse
             if ( !ForesterUtil.isEmpty( error ) ) {
                 throw new PhylogenyParserException( error );
             }
-            _nhx_source = new BufferedReader( new FileReader( f ) );
+            final InputStream is = new FileInputStream( f );
+            final InputStreamReader isr = new InputStreamReader( is, getEncoding() );
+            _nhx_source = new BufferedReader( isr );
         }
         else if ( nhx_source instanceof URL ) {
             _input_type = NHXParser.BUFFERED_READER;
@@ -269,7 +248,8 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse
                 //                catch ( final IOException e ) {
                 //                }
             }
-            final InputStreamReader isr = new InputStreamReader( ( ( URL ) nhx_source ).openStream() );
+            final InputStream is = ( ( URL ) nhx_source ).openStream();
+            final InputStreamReader isr = new InputStreamReader( is, getEncoding() );
             _nhx_source = new BufferedReader( isr );
         }
         else if ( nhx_source instanceof InputStream ) {
@@ -283,12 +263,13 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse
                 //                catch ( final IOException e ) {
                 //                }
             }
-            final InputStreamReader isr = new InputStreamReader( ( InputStream ) nhx_source );
-            _nhx_source = new BufferedReader( isr );
+            final InputStream is = ( InputStream ) nhx_source;
+            final InputStreamReader isr = new InputStreamReader( is, getEncoding() );
+             _nhx_source = new BufferedReader( isr );
         }
         else {
             throw new IllegalArgumentException( getClass() + " can only parse objects of type String,"
-                    + " StringBuffer, StringBuilder, char[], File, InputStream, or URL "
+                    + " char[], File, InputStream, or URL "
                     + " [attempt to parse object of " + nhx_source.getClass() + "]." );
         }
     }
@@ -376,22 +357,7 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse
                 if ( _i >= _source_length ) {
                     break;
                 }
-                else {
-                    switch ( _input_type ) {
-                        case STRING:
-                            c = _my_source_str.charAt( _i );
-                            break;
-                        case STRING_BUFFER:
-                            c = _my_source_sbuff.charAt( _i );
-                            break;
-                        case STRING_BUILDER:
-                            c = _my_source_sbuil.charAt( _i );
-                            break;
-                        case CHAR_ARRAY:
-                            c = _my_source_charary[ _i ];
-                            break;
-                    }
-                }
+                c = _my_source_charary[ _i ];
             }
             if ( !_in_single_quote && !_in_double_quote ) {
                 if ( c == ':' ) {
@@ -407,7 +373,7 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse
             }
             // \n\t is always ignored,
             // "=34  '=39 space=32
-            if ( ( c < 32 ) || ( c > 126 ) || ( isIgnoreQuotes() && ( ( c == 32 ) || ( c == 34 ) || ( c == 39 ) ) )
+            if ( ( c < 32 ) || ( c == 127 ) || ( isIgnoreQuotes() && ( ( c == 32 ) || ( c == 34 ) || ( c == 39 ) ) )
                     || ( ( c == 32 ) && ( !_in_single_quote && !_in_double_quote ) )
                     || ( ( _clade_level == 0 ) && ( c == ';' ) && ( !_in_single_quote && !_in_double_quote ) ) ) {
                 //do nothing
@@ -464,7 +430,6 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse
                 final Phylogeny phy = processOpenParen();
                 if ( phy != null ) {
                     ++_i;
-                    //  return phy;
                     _next = phy;
                     return;
                 }
@@ -801,6 +766,10 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse
         }
     }
 
+    public String getEncoding() {
+        return _encoding;
+    }
+
     public static enum TAXONOMY_EXTRACTION {
         AGGRESSIVE, NO, PFAM_STYLE_RELAXED, PFAM_STYLE_STRICT;
     }
index 4779189..2b46c0f 100644 (file)
@@ -25,8 +25,9 @@
 
 package org.forester.io.parsers.phyloxml;
 
+import java.io.BufferedReader;
 import java.io.File;
-import java.io.FileReader;
+import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
@@ -58,6 +59,7 @@ import org.xml.sax.helpers.DefaultHandler;
 
 public class PhyloXmlParser implements PhylogenyParser {
 
+    private static final String  UTF_8                                      = "UTF-8";
     final public static String   JAXP_SCHEMA_LANGUAGE                       = "http://java.sun.com/xml/jaxp/properties/schemaLanguage";
     final public static String   W3C_XML_SCHEMA                             = "http://www.w3.org/2001/XMLSchema";
     final public static String   JAXP_SCHEMA_SOURCE                         = "http://java.sun.com/xml/jaxp/properties/schemaSource";
@@ -96,10 +98,16 @@ public class PhyloXmlParser implements PhylogenyParser {
             final ZipEntry zip_file_entry = ( ZipEntry ) zip_file_entries.nextElement();
             if ( !zip_file_entry.isDirectory() && ( zip_file_entry.getSize() > 0 ) ) {
                 final InputStream is = zip_file.getInputStream( zip_file_entry );
-                reader = new InputStreamReader( is );
+                reader = new InputStreamReader( is, UTF_8 );
                 break;
             }
         }
+        try {
+            zip_file.close();
+        }
+        catch ( final Exception e ) {
+            // Ignore
+        }
         return reader;
     }
 
@@ -176,7 +184,9 @@ public class PhyloXmlParser implements PhylogenyParser {
             }
             if ( getSource() instanceof File ) {
                 if ( !getSource().toString().toLowerCase().endsWith( ".zip" ) ) {
-                    xml_reader.parse( new InputSource( new FileReader( ( File ) getSource() ) ) );
+                    final InputStream is = new FileInputStream( (File) getSource() );
+                    final InputStreamReader isr = new InputStreamReader( is, UTF_8 );
+                    xml_reader.parse( new InputSource( new BufferedReader( isr ) ) );
                 }
                 else {
                     final Reader reader = getReaderFromZipFile();
@@ -184,28 +194,25 @@ public class PhyloXmlParser implements PhylogenyParser {
                         throw new PhylogenyParserException( "zip file \"" + getSource()
                                                             + "\" appears not to contain any entries" );
                     }
-                    xml_reader.parse( new InputSource( reader ) );
+                    xml_reader.parse( new InputSource( new BufferedReader( reader ) ) );
                 }
             }
             else if ( getSource() instanceof InputSource ) {
-                xml_reader.parse( ( InputSource ) getSource() );
+                final InputSource is = ( InputSource ) getSource();
+                is.setEncoding( UTF_8 );
+                xml_reader.parse( is );
             }
             else if ( getSource() instanceof InputStream ) {
                 if ( !isZippedInputstream() ) {
                     final InputStream is = ( InputStream ) getSource();
-                    xml_reader.parse( new InputSource( new InputStreamReader( is ) ) );
+                    xml_reader.parse( new InputSource( new BufferedReader( new InputStreamReader( is, UTF_8 ) ) ) );
                 }
                 else {
                     final ZipInputStream zip_is = new ZipInputStream( ( InputStream ) getSource() );
                     zip_is.getNextEntry();
-                    xml_reader.parse( new InputSource( new InputStreamReader( zip_is ) ) );
+                    xml_reader.parse( new InputSource( new BufferedReader( new InputStreamReader( zip_is, UTF_8 ) ) ) );
                 }
             }
-            else if ( getSource() instanceof String ) {
-                final File file = new File( getSource().toString() );
-                final Reader reader = new FileReader( file );
-                xml_reader.parse( new InputSource( reader ) );
-            }
             else if ( getSource() instanceof StringBuffer ) {
                 final StringReader string_reader = new StringReader( getSource().toString() );
                 xml_reader.parse( new InputSource( string_reader ) );
index 6248f4a..7801c38 100644 (file)
@@ -96,6 +96,12 @@ public class TolParser implements PhylogenyParser {
                 break;
             }
         }
+        try {
+            zip_file.close();
+        }
+        catch ( final Exception e ) {
+            // Ignore.
+        }
         return reader;
     }
 
index e86ed49..0e16d86 100644 (file)
@@ -28,8 +28,8 @@ package org.forester.io.parsers.util;
 
 import java.io.BufferedReader;
 import java.io.File;
+import java.io.FileInputStream;
 import java.io.FileNotFoundException;
-import java.io.FileReader;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
@@ -184,7 +184,7 @@ public final class ParserUtils {
         return parser;
     }
 
-    public static BufferedReader createReader( final Object source ) throws IOException, FileNotFoundException {
+    public static BufferedReader createReader( final Object source, final String encoding ) throws IOException, FileNotFoundException {
         BufferedReader reader = null;
         if ( ( source instanceof File ) || ( source instanceof String ) ) {
             File f = null;
@@ -203,10 +203,12 @@ public final class ParserUtils {
             else if ( !f.canRead() ) {
                 throw new IOException( "[" + f.getAbsolutePath() + "] is not a readable" );
             }
-            reader = new BufferedReader( new FileReader( f ) );
+            final InputStream is = new FileInputStream( f );
+            final InputStreamReader isr = new InputStreamReader( is, encoding );
+            reader = new BufferedReader( isr );
         }
         else if ( source instanceof InputStream ) {
-            reader = new BufferedReader( new InputStreamReader( ( InputStream ) source ) );
+            reader = new BufferedReader( new InputStreamReader( ( InputStream ) source, encoding ) );
         }
         else if ( ( source instanceof StringBuffer ) || ( source instanceof StringBuilder ) ) {
             reader = new BufferedReader( new StringReader( source.toString() ) );
index 8c40eae..6d3c836 100644 (file)
@@ -27,7 +27,6 @@ package org.forester.io.writers;
 
 import java.io.BufferedWriter;
 import java.io.File;
-import java.io.FileWriter;
 import java.io.IOException;
 import java.io.PrintWriter;
 import java.io.StringWriter;
@@ -50,6 +49,7 @@ import org.forester.util.ForesterUtil;
 
 public final class PhylogenyWriter {
 
+    private static final String UTF_8 = "UTF-8";
     public final static boolean         INDENT_PHYLOXML_DEAFULT         = true;
     public final static String          PHYLO_XML_INTENDATION_BASE      = "  ";
     public final static String          PHYLO_XML_VERSION_ENCODING_LINE = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
@@ -399,7 +399,7 @@ public final class PhylogenyWriter {
 
     public void toNexus( final File out_file, final Phylogeny tree, final NH_CONVERSION_SUPPORT_VALUE_STYLE svs )
             throws IOException {
-        final Writer writer = new BufferedWriter( new PrintWriter( out_file ) );
+        final Writer writer = new BufferedWriter( new PrintWriter( out_file, UTF_8 ) );
         final List<Phylogeny> trees = new ArrayList<Phylogeny>( 1 );
         trees.add( tree );
         writeNexusStart( writer );
@@ -426,14 +426,14 @@ public final class PhylogenyWriter {
                             final List<Phylogeny> trees,
                             final int phyloxml_level,
                             final String separator ) throws IOException {
-        final Writer writer = new BufferedWriter( new PrintWriter( out_file ) );
+        final Writer writer = new BufferedWriter( new PrintWriter( out_file, UTF_8 ) );
         toPhyloXML( writer, trees, phyloxml_level, separator );
         writer.flush();
         writer.close();
     }
 
     public void toPhyloXML( final File out_file, final Phylogeny tree, final int phyloxml_level ) throws IOException {
-        final Writer writer = new BufferedWriter( new PrintWriter( out_file ) );
+        final Writer writer = new BufferedWriter( new PrintWriter( out_file, UTF_8 ) );
         writePhyloXmlStart( writer );
         toPhyloXMLNoPhyloXmlSource( writer, tree, phyloxml_level );
         writePhyloXmlEnd( writer );
@@ -633,18 +633,8 @@ public final class PhylogenyWriter {
         if ( out_file.exists() ) {
             throw new IOException( "attempt to overwrite existing file \"" + out_file.getAbsolutePath() + "\"" );
         }
-        final PrintWriter out = new PrintWriter( new FileWriter( out_file ), true );
-        if ( getOutputFormt() == FORMAT.PHYLO_XML ) {
-            out.print( PHYLO_XML_VERSION_ENCODING_LINE );
-            out.print( ForesterUtil.LINE_SEPARATOR );
-            out.print( PHYLO_XML_NAMESPACE_LINE );
-            out.print( ForesterUtil.LINE_SEPARATOR );
-        }
+        final PrintWriter out = new PrintWriter( out_file, UTF_8 );
         out.print( sb );
-        if ( getOutputFormt() == FORMAT.PHYLO_XML ) {
-            out.print( ForesterUtil.LINE_SEPARATOR );
-            out.print( PHYLO_XML_END );
-        }
         out.flush();
         out.close();
     }
@@ -758,3 +748,7 @@ public final class PhylogenyWriter {
         NH, NHX, PHYLO_XML, NEXUS;
     }
 }
+
+
+
+
index 6e1e6dc..72141d2 100644 (file)
@@ -42,7 +42,6 @@ import org.forester.evoinference.matrix.character.CharacterStateMatrix;
 import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates;
 import org.forester.evoinference.matrix.character.CharacterStateMatrix.GainLossStates;
 import org.forester.io.parsers.HmmPfamOutputParser;
-import org.forester.io.parsers.nexus.PaupLogParser;
 import org.forester.io.parsers.nhx.NHXParser;
 import org.forester.phylogeny.Phylogeny;
 import org.forester.phylogeny.PhylogenyNode;
@@ -176,12 +175,7 @@ public class TestSurfacing {
             return false;
         }
         System.out.println( "OK." );
-        System.out.print( "  Paup log parser: " );
-        if ( !TestSurfacing.testPaupLogParser( test_dir ) ) {
-            System.out.println( "failed." );
-            return false;
-        }
-        System.out.println( "OK." );
+       
         System.out.print( "  Binary state matrix to gain loss matrix: " );
         if ( !TestSurfacing.testBinaryStateMatrixToGainLossMatrix( test_dir ) ) {
             System.out.println( "failed." );
@@ -5432,73 +5426,5 @@ public class TestSurfacing {
         return true;
     }
 
-    private static boolean testPaupLogParser( final File test_dir ) {
-        try {
-            final PaupLogParser parser = new PaupLogParser();
-            parser.setSource( new File( test_dir + ForesterUtil.getFileSeparator() + "paup_log_test_1" ) );
-            final CharacterStateMatrix<BinaryStates> matrix = parser.parse();
-            if ( matrix.getNumberOfIdentifiers() != 8 ) {
-                return false;
-            }
-            if ( !matrix.getIdentifier( 0 ).equals( "MOUSE" ) ) {
-                return false;
-            }
-            if ( !matrix.getIdentifier( 1 ).equals( "NEMVE" ) ) {
-                return false;
-            }
-            if ( !matrix.getIdentifier( 2 ).equals( "MONBE" ) ) {
-                return false;
-            }
-            if ( !matrix.getIdentifier( 3 ).equals( "DICDI" ) ) {
-                return false;
-            }
-            if ( !matrix.getIdentifier( 4 ).equals( "ARATH" ) ) {
-                return false;
-            }
-            if ( !matrix.getIdentifier( 5 ).equals( "6" ) ) {
-                return false;
-            }
-            if ( !matrix.getIdentifier( 6 ).equals( "7" ) ) {
-                return false;
-            }
-            if ( !matrix.getIdentifier( 7 ).equals( "8" ) ) {
-                return false;
-            }
-            if ( matrix.getNumberOfCharacters() != ( 66 + 66 + 28 ) ) {
-                return false;
-            }
-            if ( matrix.getState( 0, 4 ) != BinaryStates.ABSENT ) {
-                return false;
-            }
-            if ( matrix.getState( 0, 5 ) != BinaryStates.PRESENT ) {
-                return false;
-            }
-            if ( matrix.getState( 1, 5 ) != BinaryStates.PRESENT ) {
-                return false;
-            }
-            if ( matrix.getState( 7, 154 ) != BinaryStates.ABSENT ) {
-                return false;
-            }
-            if ( matrix.getState( 7, 155 ) != BinaryStates.PRESENT ) {
-                return false;
-            }
-            if ( matrix.getState( 7, 156 ) != BinaryStates.PRESENT ) {
-                return false;
-            }
-            if ( matrix.getState( 7, 157 ) != BinaryStates.ABSENT ) {
-                return false;
-            }
-            if ( matrix.getState( 7, 158 ) != BinaryStates.PRESENT ) {
-                return false;
-            }
-            if ( matrix.getState( 7, 159 ) != BinaryStates.ABSENT ) {
-                return false;
-            }
-        }
-        catch ( final Exception e ) {
-            e.printStackTrace( System.out );
-            return false;
-        }
-        return true;
-    }
+   
 }
index 3efa354..8ca0bd6 100644 (file)
@@ -362,6 +362,15 @@ public final class Test {
             System.out.println( "failed." );
             failed++;
         }
+        System.out.print( "NH parsing - special chars: " );
+        if ( Test.testNHParsingSpecialChars() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
         System.out.print( "Conversion to NHX (node level): " );
         if ( Test.testNHXconversion() ) {
             System.out.println( "OK." );
@@ -488,6 +497,15 @@ public final class Test {
             System.out.println( "failed." );
             failed++;
         }
+        System.out.print( "UTF-8 parsing from file: " );
+        if ( Test.testUTF8ParsingFromFile() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
         System.out.print( "Copying of node data: " );
         if ( Test.testCopyOfNodeData() ) {
             System.out.println( "OK." );
@@ -1797,12 +1815,258 @@ public final class Test {
         }
         return true;
     }
+    
+    private static boolean testUTF8ParsingFromFile() {
+        try {
+            final PhyloXmlParser xml_parser = PhyloXmlParser.createPhyloXmlParser();
+            final Phylogeny[] phylogenies_xml = ParserBasedPhylogenyFactory.getInstance().create( new File( Test.PATH_TO_TEST_DATA + "chars.xml" ),
+                                                              xml_parser );
+            if ( xml_parser.getErrorCount() > 0 ) {
+                System.out.println( xml_parser.getErrorMessages().toString() );
+                return false;
+            }
+            if ( phylogenies_xml.length != 1 ) {
+                return false;
+            }
+         
+            final Phylogeny[] phylogenies_xml2 = ParserBasedPhylogenyFactory.getInstance().create( new StringBuffer( phylogenies_xml[0].toPhyloXML( 0 )),
+                                                                                                   xml_parser );
+            
+            final Phylogeny[] phylogenies_nh = ParserBasedPhylogenyFactory.getInstance().create( new File( Test.PATH_TO_TEST_DATA + "chars.nh" ), new NHXParser() );
+            if ( phylogenies_nh.length != 1 ) {
+                return false;
+            }
+           
+            final Phylogeny[] phylogenies_nex = ParserBasedPhylogenyFactory.getInstance().create( new File( Test.PATH_TO_TEST_DATA + "chars.nex" ), new NexusPhylogeniesParser() );
+            if ( phylogenies_nex.length != 1 ) {
+                return false;
+            }
+          
+            final String[] xml_n = phylogenies_xml[0].getAllExternalNodeNames();
+            final String[] xml_n2 = phylogenies_xml2[0].getAllExternalNodeNames();
+            final String[] nh_n = phylogenies_nh[0].getAllExternalNodeNames();
+            final String[] nex_n = phylogenies_nex[0].getAllExternalNodeNames();
+            final String n0 = "AQ~!@#$%^&*()_+-=\\{}|;:\"<>?,./";
+            final String n1 = "€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜˜˜™š›œžŸ¡¢£¤¥¦§¨©ª«¬®¯°±¹²³´µ¶·¸º»¼¿À÷þÿ";
+            final String n2 = "漢字ひらがなカタカナ";
+            final String n3 = "อักษรไทย";
+            final String n4 = "繁體字";
+            final String n5 = "한글";
+            final String n6 = "देवनागरी";
+            
+            final String n7 = "chữ Quốc ngữ";
+            final String n8 = "ру́сский язы́к";
+            final String n9 = "អក្សរខ្មែរ";
+            
+            if ( !xml_n[0].equals( n0 ) ) {
+                System.out.println( xml_n[0] );
+                System.out.println( n0 );
+                return false;
+            }
+            if ( !xml_n2[0].equals( n0 ) ) {
+                System.out.println( xml_n2[0] );
+                System.out.println( n0 );
+                return false;
+            }
+            if ( !nh_n[0].equals( n0 ) ) {
+                System.out.println( nh_n[0] );
+                System.out.println( n0 );
+                return false;
+            }
+            if ( !nex_n[0].equals( n0 ) ) {
+                System.out.println( nex_n[0] );
+                System.out.println( n0 );
+                return false;
+            }
+            
+            if ( !xml_n[1].equals( n1 ) ) {
+                System.out.println( xml_n[1] );
+                System.out.println( n1 );
+                return false;
+            }
+            if ( !xml_n2[1].equals( n1 ) ) {
+                System.out.println( xml_n2[1] );
+                System.out.println( n1 );
+                return false;
+            }
+            if ( !nh_n[1].equals( n1 ) ) {
+                System.out.println( nh_n[1] );
+                System.out.println( n1 );
+                return false;
+            }
+            if ( !nex_n[1].equals( n1 ) ) {
+                System.out.println( nex_n[1] );
+                System.out.println( n1 );
+                return false;
+            }
+            
+            if ( !xml_n[2].equals( n2 ) ) {
+                System.out.println( xml_n[2] );
+                System.out.println( n2 );
+                return false;
+            }
+            if ( !xml_n2[2].equals( n2 ) ) {
+                System.out.println( xml_n2[2] );
+                System.out.println( n2 );
+                return false;
+            }
+            if ( !nh_n[2].equals( n2 ) ) {
+                System.out.println( nh_n[2] );
+                System.out.println( n2 );
+                return false;
+            }
+            if ( !nex_n[2].equals( n2 ) ) {
+                System.out.println( nex_n[2] );
+                System.out.println( n2 );
+                return false;
+            }
+            //
+            if ( !xml_n[3].equals( n3 ) ) {
+                System.out.println( xml_n[3] );
+                System.out.println( n3 );
+                return false;
+            }
+            if ( !xml_n2[3].equals( n3 ) ) {
+                System.out.println( xml_n2[3] );
+                System.out.println( n3 );
+                return false;
+            }
+            if ( !nh_n[3].equals( n3 ) ) {
+                System.out.println( nh_n[3] );
+                System.out.println( n3 );
+                return false;
+            }
+            if ( !nex_n[3].equals( n3 ) ) {
+                System.out.println( nex_n[3] );
+                System.out.println( n3 );
+                return false;
+            }
+            //
+            if ( !xml_n[4].equals( n4 ) ) {
+                System.out.println( xml_n[4] );
+                System.out.println( n4 );
+                return false;
+            }
+            if ( !nh_n[4].equals( n4 ) ) {
+                System.out.println( nh_n[4] );
+                System.out.println( n4 );
+                return false;
+            }
+            if ( !nex_n[4].equals( n4 ) ) {
+                System.out.println( nex_n[4] );
+                System.out.println( n4 );
+                return false;
+            }
+            //
+            if ( !xml_n[5].equals( n5 ) ) {
+                System.out.println( xml_n[5] );
+                System.out.println( n5 );
+                return false;
+            }
+            if ( !nh_n[5].equals( n5 ) ) {
+                System.out.println( nh_n[5] );
+                System.out.println( n5 );
+                return false;
+            }
+            if ( !nex_n[5].equals( n5 ) ) {
+                System.out.println( nex_n[5] );
+                System.out.println( n5 );
+                return false;
+            }
+            //
+            if ( !xml_n[6].equals( n6 ) ) {
+                System.out.println( xml_n[6] );
+                System.out.println( n6 );
+                return false;
+            }
+            if ( !nh_n[6].equals( n6 ) ) {
+                System.out.println( nh_n[6] );
+                System.out.println( n6 );
+                return false;
+            }
+            if ( !nex_n[6].equals( n6 ) ) {
+                System.out.println( nex_n[6] );
+                System.out.println( n6 );
+                return false;
+            }
+            //
+            if ( !xml_n[7].equals( n7 ) ) {
+                System.out.println( xml_n[7] );
+                System.out.println( n7 );
+                return false;
+            }
+            if ( !nh_n[7].equals( n7 ) ) {
+                System.out.println( nh_n[7] );
+                System.out.println( n7 );
+                return false;
+            }
+            if ( !nex_n[7].equals( n7 ) ) {
+                System.out.println( nex_n[7] );
+                System.out.println( n7 );
+                return false;
+            }
+            if ( !xml_n[8].equals( n8 ) ) {
+                System.out.println( xml_n[8] );
+                System.out.println( n8 );
+                return false;
+            }
+            if ( !nh_n[8].equals( n8 ) ) {
+                System.out.println( nh_n[8] );
+                System.out.println( n8 );
+                return false;
+            }
+            if ( !nex_n[8].equals( n8 ) ) {
+                System.out.println( nex_n[8] );
+                System.out.println( n8 );
+                return false;
+            }
+            if ( !xml_n[9].equals( n9 ) ) {
+                System.out.println( xml_n[9] );
+                System.out.println( n9 );
+                return false;
+            }
+            if ( !xml_n2[9].equals( n9 ) ) {
+                System.out.println( xml_n2[9] );
+                System.out.println( n9 );
+                return false;
+            }
+            if ( !nh_n[9].equals( n9 ) ) {
+                System.out.println( nh_n[9] );
+                System.out.println( n9 );
+                return false;
+            }
+            if ( !nex_n[9].equals( n9 ) ) {
+                System.out.println( nex_n[9] );
+                System.out.println( n9 );
+                return false;
+            }
+            if (!phylogenies_xml[0].toNewHampshire().equals( 
+                                                            phylogenies_nh[0].toNewHampshire() ) ) {
+                System.out.println( phylogenies_xml[0].toNewHampshire() );
+                System.out.println( phylogenies_nh[0].toNewHampshire() );
+                return false;
+            }
+            if (!phylogenies_xml[0].toNewHampshire().equals( 
+                                                            phylogenies_nex[0].toNewHampshire() ) ) {
+                System.out.println( phylogenies_xml[0].toNewHampshire() );
+                System.out.println( phylogenies_nex[0].toNewHampshire() );
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+    
+    
 
     private static boolean testBasicPhyloXMLparsing() {
         try {
             final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
             final PhyloXmlParser xml_parser = PhyloXmlParser.createPhyloXmlParser();
-            final Phylogeny[] phylogenies_0 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_test_t1.xml",
+            final Phylogeny[] phylogenies_0 = factory.create( new File( Test.PATH_TO_TEST_DATA + "phyloxml_test_t1.xml" ),
                                                               xml_parser );
             if ( xml_parser.getErrorCount() > 0 ) {
                 System.out.println( xml_parser.getErrorMessages().toString() );
@@ -2016,7 +2280,7 @@ public final class Test {
             else {
                 xml_parser.setValidateAgainstSchema( PHYLOXML_REMOTE_XSD );
             }
-            final Phylogeny[] phylogenies_0 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_test_t1.xml",
+            final Phylogeny[] phylogenies_0 = factory.create( new File( Test.PATH_TO_TEST_DATA + "phyloxml_test_t1.xml" ),
                                                               xml_parser );
             if ( xml_parser.getErrorCount() > 0 ) {
                 System.out.println( xml_parser.getErrorMessages().toString() );
@@ -2182,6 +2446,7 @@ public final class Test {
             }
             if ( !( t3_rt.getNode( "root node" ).getNodeData().getReference().getDescription()
                     .equals( "Aguinaldo, A. M. A.; J. M. Turbeville, L. S. Linford, M. C. Rivera, J. R. Garey, R. A. Raff, & J. A. Lake (1997). \"Evidence for a clade of nematodes, arthropods and other moulting animals\". Nature 387 (6632): 489–493." ) ) ) {
+                System.out.println( t3_rt.getNode( "root node" ).getNodeData().getReference().getDescription() );
                 return false;
             }
             if ( !t3_rt.getNode( "root node" ).getNodeData().getTaxonomy().getTaxonomyCode().equals( "ECDYS" ) ) {
@@ -2368,7 +2633,7 @@ public final class Test {
                     xml_parser.setValidateAgainstSchema( PHYLOXML_REMOTE_XSD );
                 }
             }
-            final Phylogeny[] phylogenies_0 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_test_t1.xml",
+            final Phylogeny[] phylogenies_0 = factory.create( new File( Test.PATH_TO_TEST_DATA + "phyloxml_test_t1.xml" ),
                                                               xml_parser );
             if ( xml_parser.getErrorCount() > 0 ) {
                 System.out.println( xml_parser.getErrorMessages().toString() );
@@ -2403,7 +2668,7 @@ public final class Test {
                 return false;
             }
             final String x2 = Test.PATH_TO_TEST_DATA + "phyloxml_test_t1.xml";
-            final Phylogeny[] phylogenies_1 = factory.create( x2, xml_parser );
+            final Phylogeny[] phylogenies_1 = factory.create( new File( x2 ), xml_parser );
             if ( xml_parser.getErrorCount() > 0 ) {
                 System.out.println( "errors:" );
                 System.out.println( xml_parser.getErrorMessages().toString() );
@@ -2412,7 +2677,7 @@ public final class Test {
             if ( phylogenies_1.length != 4 ) {
                 return false;
             }
-            final Phylogeny[] phylogenies_2 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_test_t3.xml",
+            final Phylogeny[] phylogenies_2 = factory.create( new File(Test.PATH_TO_TEST_DATA + "phyloxml_test_t3.xml" ),
                                                               xml_parser );
             if ( xml_parser.getErrorCount() > 0 ) {
                 System.out.println( "errors:" );
@@ -2425,7 +2690,7 @@ public final class Test {
             if ( phylogenies_2[ 0 ].getNumberOfExternalNodes() != 2 ) {
                 return false;
             }
-            final Phylogeny[] phylogenies_3 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_test_t4.xml",
+            final Phylogeny[] phylogenies_3 = factory.create( new File( Test.PATH_TO_TEST_DATA + "phyloxml_test_t4.xml" ),
                                                               xml_parser );
             if ( xml_parser.getErrorCount() > 0 ) {
                 System.out.println( xml_parser.getErrorMessages().toString() );
@@ -2447,7 +2712,7 @@ public final class Test {
             if ( !a.getNode( "node b1" ).getNodeData().getTaxonomy().getCommonName().equals( "b1 species" ) ) {
                 return false;
             }
-            final Phylogeny[] phylogenies_4 = factory.create( Test.PATH_TO_TEST_DATA + "special_characters.xml",
+            final Phylogeny[] phylogenies_4 = factory.create( new File( Test.PATH_TO_TEST_DATA + "special_characters.xml") ,
                                                               xml_parser );
             if ( xml_parser.getErrorCount() > 0 ) {
                 System.out.println( xml_parser.getErrorMessages().toString() );
@@ -3071,7 +3336,7 @@ public final class Test {
                 return false;
             }
             final StringBuffer sb5 = new StringBuffer( "(((A11:2)A1:2,(A21:1,A22:2,A23)A2:11,A3:2)A:2,B:10,C:3,D:8)" );
-            final Phylogeny t5 = factory.create( sb5, new NHXParser() )[ 0 ];
+            final Phylogeny t5 = factory.create( sb5.toString(), new NHXParser() )[ 0 ];
             if ( t5.getNumberOfExternalNodes() != 8 ) {
                 return false;
             }
@@ -3079,17 +3344,17 @@ public final class Test {
                 return false;
             }
             final StringBuffer sb6 = new StringBuffer( "(X,Y,Z,(((A111)A11:2)A1:2,(X,Y,Z,A21:1,A22:2,A23)A2:11,A3:2)A:2,B:10,C:3,D:8)" );
-            final Phylogeny t6 = factory.create( sb6, new NHXParser() )[ 0 ];
+            final Phylogeny t6 = factory.create( sb6.toString(), new NHXParser() )[ 0 ];
             if ( t6.getHeight() != 15 ) {
                 return false;
             }
             final StringBuffer sb7 = new StringBuffer( "(((A11:2)A1:2,(A21:1,A22:2,A23)A2:11,A3:2)A:2,B:10,C:15,D:8)" );
-            final Phylogeny t7 = factory.create( sb7, new NHXParser() )[ 0 ];
+            final Phylogeny t7 = factory.create( sb7.toString(), new NHXParser() )[ 0 ];
             if ( t7.getHeight() != 15 ) {
                 return false;
             }
             final StringBuffer sb8 = new StringBuffer( "(((A11:11)A1:2,(A21:2,A22:2,A23,A24,AA:)A2:11,A3:2)A:2,B:15,C:15,D:15)" );
-            final Phylogeny t8 = factory.create( sb8, new NHXParser() )[ 0 ];
+            final Phylogeny t8 = factory.create( sb8.toString(), new NHXParser() )[ 0 ];
             if ( t8.getNumberOfExternalNodes() != 10 ) {
                 return false;
             }
@@ -6577,7 +6842,7 @@ public final class Test {
             PhylogenyNode n;
             List<PhylogenyNode> ext = new ArrayList<PhylogenyNode>();
             final StringBuffer sb0 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h))fgh)cdefgh)abcdefgh" );
-            final Phylogeny t0 = factory.create( sb0, new NHXParser() )[ 0 ];
+            final Phylogeny t0 = factory.create( sb0.toString(), new NHXParser() )[ 0 ];
             t0.getNode( "cd" ).setCollapse( true );
             t0.getNode( "cde" ).setCollapse( true );
             n = t0.getFirstExternalNode();
@@ -6605,7 +6870,7 @@ public final class Test {
             }
             ext.clear();
             final StringBuffer sb1 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h))fgh)cdefgh)abcdefgh" );
-            final Phylogeny t1 = factory.create( sb1, new NHXParser() )[ 0 ];
+            final Phylogeny t1 = factory.create( sb1.toString(), new NHXParser() )[ 0 ];
             t1.getNode( "ab" ).setCollapse( true );
             t1.getNode( "cd" ).setCollapse( true );
             t1.getNode( "cde" ).setCollapse( true );
@@ -6632,7 +6897,7 @@ public final class Test {
             }
             ext.clear();
             final StringBuffer sb2 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" );
-            final Phylogeny t2 = factory.create( sb2, new NHXParser() )[ 0 ];
+            final Phylogeny t2 = factory.create( sb2.toString(), new NHXParser() )[ 0 ];
             t2.getNode( "ab" ).setCollapse( true );
             t2.getNode( "cd" ).setCollapse( true );
             t2.getNode( "cde" ).setCollapse( true );
@@ -6660,7 +6925,7 @@ public final class Test {
             }
             ext.clear();
             final StringBuffer sb3 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" );
-            final Phylogeny t3 = factory.create( sb3, new NHXParser() )[ 0 ];
+            final Phylogeny t3 = factory.create( sb3.toString(), new NHXParser() )[ 0 ];
             t3.getNode( "ab" ).setCollapse( true );
             t3.getNode( "cd" ).setCollapse( true );
             t3.getNode( "cde" ).setCollapse( true );
@@ -6686,7 +6951,7 @@ public final class Test {
             }
             ext.clear();
             final StringBuffer sb4 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" );
-            final Phylogeny t4 = factory.create( sb4, new NHXParser() )[ 0 ];
+            final Phylogeny t4 = factory.create( sb4.toString(), new NHXParser() )[ 0 ];
             t4.getNode( "ab" ).setCollapse( true );
             t4.getNode( "cd" ).setCollapse( true );
             t4.getNode( "cde" ).setCollapse( true );
@@ -6701,7 +6966,7 @@ public final class Test {
                 return false;
             }
             final StringBuffer sb5 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h))fgh)cdefgh)abcdefgh" );
-            final Phylogeny t5 = factory.create( sb5, new NHXParser() )[ 0 ];
+            final Phylogeny t5 = factory.create( sb5.toString(), new NHXParser() )[ 0 ];
             ext.clear();
             n = t5.getFirstExternalNode();
             while ( n != null ) {
@@ -6736,7 +7001,7 @@ public final class Test {
                 return false;
             }
             final StringBuffer sb6 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h))fgh)cdefgh)abcdefgh" );
-            final Phylogeny t6 = factory.create( sb6, new NHXParser() )[ 0 ];
+            final Phylogeny t6 = factory.create( sb6.toString(), new NHXParser() )[ 0 ];
             ext.clear();
             t6.getNode( "ab" ).setCollapse( true );
             n = t6.getNode( "ab" );
@@ -6769,7 +7034,7 @@ public final class Test {
                 return false;
             }
             final StringBuffer sb7 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h))fgh)cdefgh)abcdefgh" );
-            final Phylogeny t7 = factory.create( sb7, new NHXParser() )[ 0 ];
+            final Phylogeny t7 = factory.create( sb7.toString(), new NHXParser() )[ 0 ];
             ext.clear();
             t7.getNode( "cd" ).setCollapse( true );
             n = t7.getNode( "a" );
@@ -6802,7 +7067,7 @@ public final class Test {
                 return false;
             }
             final StringBuffer sb8 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h))fgh)cdefgh)abcdefgh" );
-            final Phylogeny t8 = factory.create( sb8, new NHXParser() )[ 0 ];
+            final Phylogeny t8 = factory.create( sb8.toString(), new NHXParser() )[ 0 ];
             ext.clear();
             t8.getNode( "cd" ).setCollapse( true );
             t8.getNode( "c" ).setCollapse( true );
@@ -6838,7 +7103,7 @@ public final class Test {
                 return false;
             }
             final StringBuffer sb9 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" );
-            final Phylogeny t9 = factory.create( sb9, new NHXParser() )[ 0 ];
+            final Phylogeny t9 = factory.create( sb9.toString(), new NHXParser() )[ 0 ];
             ext.clear();
             t9.getNode( "gh" ).setCollapse( true );
             n = t9.getNode( "a" );
@@ -6871,7 +7136,7 @@ public final class Test {
                 return false;
             }
             final StringBuffer sb10 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" );
-            final Phylogeny t10 = factory.create( sb10, new NHXParser() )[ 0 ];
+            final Phylogeny t10 = factory.create( sb10.toString(), new NHXParser() )[ 0 ];
             ext.clear();
             t10.getNode( "gh" ).setCollapse( true );
             t10.getNode( "g" ).setCollapse( true );
@@ -6906,7 +7171,7 @@ public final class Test {
                 return false;
             }
             final StringBuffer sb11 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" );
-            final Phylogeny t11 = factory.create( sb11, new NHXParser() )[ 0 ];
+            final Phylogeny t11 = factory.create( sb11.toString(), new NHXParser() )[ 0 ];
             ext.clear();
             t11.getNode( "gh" ).setCollapse( true );
             t11.getNode( "fgh" ).setCollapse( true );
@@ -6937,7 +7202,7 @@ public final class Test {
                 return false;
             }
             final StringBuffer sb12 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" );
-            final Phylogeny t12 = factory.create( sb12, new NHXParser() )[ 0 ];
+            final Phylogeny t12 = factory.create( sb12.toString(), new NHXParser() )[ 0 ];
             ext.clear();
             t12.getNode( "gh" ).setCollapse( true );
             t12.getNode( "fgh" ).setCollapse( true );
@@ -6971,7 +7236,7 @@ public final class Test {
                 return false;
             }
             final StringBuffer sb13 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" );
-            final Phylogeny t13 = factory.create( sb13, new NHXParser() )[ 0 ];
+            final Phylogeny t13 = factory.create( sb13.toString(), new NHXParser() )[ 0 ];
             ext.clear();
             t13.getNode( "ab" ).setCollapse( true );
             t13.getNode( "b" ).setCollapse( true );
@@ -7001,7 +7266,7 @@ public final class Test {
                 return false;
             }
             final StringBuffer sb14 = new StringBuffer( "((a,b,0)ab,(((c,d)cd,e)cde,(f,(g,h,1,2)gh,0)fgh)cdefgh)abcdefgh" );
-            final Phylogeny t14 = factory.create( sb14, new NHXParser() )[ 0 ];
+            final Phylogeny t14 = factory.create( sb14.toString(), new NHXParser() )[ 0 ];
             ext.clear();
             t14.getNode( "ab" ).setCollapse( true );
             t14.getNode( "a" ).setCollapse( true );
@@ -7031,7 +7296,7 @@ public final class Test {
                 return false;
             }
             final StringBuffer sb15 = new StringBuffer( "((a,b,0)ab,(((c,d)cd,e)cde,x,(f,(g,h,1,2)gh,0)fgh)cdefgh)abcdefgh" );
-            final Phylogeny t15 = factory.create( sb15, new NHXParser() )[ 0 ];
+            final Phylogeny t15 = factory.create( sb15.toString(), new NHXParser() )[ 0 ];
             ext.clear();
             t15.getNode( "ab" ).setCollapse( true );
             t15.getNode( "a" ).setCollapse( true );
@@ -7066,7 +7331,7 @@ public final class Test {
             //
             //
             final StringBuffer sb16 = new StringBuffer( "((a,b,0)ab,(((c,d)cd,e)cde,x,(f,(g,h,1,2)gh,0)fgh)cdefgh)abcdefgh" );
-            final Phylogeny t16 = factory.create( sb16, new NHXParser() )[ 0 ];
+            final Phylogeny t16 = factory.create( sb16.toString(), new NHXParser() )[ 0 ];
             ext.clear();
             t16.getNode( "ab" ).setCollapse( true );
             t16.getNode( "a" ).setCollapse( true );
@@ -8286,10 +8551,10 @@ public final class Test {
             if ( !p1b.toNewHampshire().equals( "(';A;',';B;1;');" ) ) {
                 return false;
             }
-            final Phylogeny p2 = factory.create( new StringBuffer( "(A,B2)" ), new NHXParser() )[ 0 ];
+            final Phylogeny p2 = factory.create( new StringBuffer( "(A,B2)" ).toString(), new NHXParser() )[ 0 ];
             final Phylogeny p3 = factory.create( new char[] { '(', 'A', ',', 'B', '3', ')' }, new NHXParser() )[ 0 ];
             final Phylogeny p4 = factory.create( "(A,B4);", new NHXParser() )[ 0 ];
-            final Phylogeny p5 = factory.create( new StringBuffer( "(A,B5);" ), new NHXParser() )[ 0 ];
+            final Phylogeny p5 = factory.create( new StringBuffer( "(A,B5);" ).toString(), new NHXParser() )[ 0 ];
             final Phylogeny[] p7 = factory.create( "(A,B7);(C,D7)", new NHXParser() );
             final Phylogeny[] p8 = factory.create( "(A,B8) (C,D8)", new NHXParser() );
             final Phylogeny[] p9 = factory.create( "(A,B9)\n(C,D9)", new NHXParser() );
@@ -8547,21 +8812,21 @@ public final class Test {
             if ( p46.length != 0 ) {
                 return false;
             }
-            final Phylogeny p47 = factory.create( new StringBuffer( "((A,B)ab:2[0.44],C)" ), new NHXParser() )[ 0 ];
+            final Phylogeny p47 = factory.create( new StringBuffer( "((A,B)ab:2[0.44],C)" ).toString(), new NHXParser() )[ 0 ];
             if ( !isEqual( 0.44, p47.getNode( "ab" ).getBranchData().getConfidence( 0 ).getValue() ) ) {
                 return false;
             }
-            final Phylogeny p48 = factory.create( new StringBuffer( "((A,B)ab:2[88],C)" ), new NHXParser() )[ 0 ];
+            final Phylogeny p48 = factory.create( new StringBuffer( "((A,B)ab:2[88],C)" ).toString(), new NHXParser() )[ 0 ];
             if ( !isEqual( 88, p48.getNode( "ab" ).getBranchData().getConfidence( 0 ).getValue() ) ) {
                 return false;
             }
             final Phylogeny p49 = factory
-                    .create( new StringBuffer( "((A,B)a[comment:a,b;(a)]b:2[0.44][comment(a,b,b);],C)" ),
+                    .create( new StringBuffer( "((A,B)a[comment:a,b;(a)]b:2[0.44][comment(a,b,b);],C)" ).toString(),
                              new NHXParser() )[ 0 ];
             if ( !isEqual( 0.44, p49.getNode( "ab" ).getBranchData().getConfidence( 0 ).getValue() ) ) {
                 return false;
             }
-            final Phylogeny p50 = factory.create( new StringBuffer( "((\"A\",B)ab:2[88],C)" ), new NHXParser() )[ 0 ];
+            final Phylogeny p50 = factory.create( new StringBuffer( "((\"A\",B)ab:2[88],C)" ).toString(), new NHXParser() )[ 0 ];
             if ( p50.getNode( "A" ) == null ) {
                 return false;
             }
@@ -8576,21 +8841,21 @@ public final class Test {
                     .equals( "((A,B)88:2.0,C);" ) ) {
                 return false;
             }
-            final Phylogeny p51 = factory.create( new StringBuffer( "((\"A(A\",B)ab:2[88],C)" ), new NHXParser() )[ 0 ];
+            final Phylogeny p51 = factory.create( new StringBuffer( "((\"A(A\",B)ab:2[88],C)" ).toString(), new NHXParser() )[ 0 ];
             if ( p51.getNode( "A(A" ) == null ) {
                 return false;
             }
-            final Phylogeny p52 = factory.create( new StringBuffer( "(('A(A',B)ab:2[88],C)" ), new NHXParser() )[ 0 ];
+            final Phylogeny p52 = factory.create( new StringBuffer( "(('A(A',B)ab:2[88],C)" ).toString(), new NHXParser() )[ 0 ];
             if ( p52.getNode( "A(A" ) == null ) {
                 return false;
             }
             final Phylogeny p53 = factory
-                    .create( new StringBuffer( "(('A(A',\"B (x (a' ,b) f(x);\"[com])[ment]ab:2[88],C)" ),
+                    .create( new StringBuffer( "(('A(A',\"B (x (a' ,b) f(x);\"[com])[ment]ab:2[88],C)" ).toString(),
                              new NHXParser() )[ 0 ];
             if ( p53.getNode( "B (x (a' ,b) f(x);" ) == null ) {
                 return false;
             }
-            final Phylogeny p54 = factory.create( new StringBuffer( "((A,B):[88],C)" ), new NHXParser() )[ 0 ];
+            final Phylogeny p54 = factory.create( new StringBuffer( "((A,B):[88],C)" ).toString(), new NHXParser() )[ 0 ];
             if ( p54.getNode( "A" ) == null ) {
                 return false;
             }
@@ -8598,7 +8863,7 @@ public final class Test {
                 return false;
             }
             final Phylogeny p55 = factory
-                    .create( new StringBuffer( "((\"lcl|HPV32_L1.:1  s\":0.195593,\"lcl|HPV30_L1.1|;a\":0.114237):0.0359322,\"lcl|HPV56_L1.1|,d\":0.0727412,\"lcl|HPV66_L1.1x\":0.0798012);" ),
+                    .create( new StringBuffer( "((\"lcl|HPV32_L1.:1  s\":0.195593,\"lcl|HPV30_L1.1|;a\":0.114237):0.0359322,\"lcl|HPV56_L1.1|,d\":0.0727412,\"lcl|HPV66_L1.1x\":0.0798012);" ).toString(),
                              new NHXParser() )[ 0 ];
             if ( !p55
                     .toNewHampshire()
@@ -8607,7 +8872,7 @@ public final class Test {
                 return false;
             }
             final Phylogeny p56 = factory
-                    .create( new StringBuffer( "((\"lcl|HPV32_L1.:1      s\":0.195593,\"lcl|HPV30_L1.1|;a\":0.114\n237):0.0359322,\"lcl|HPV56_L1.1|,d\":0.0727412,\"lcl|HPV66_L1.1:x\":0.0798012);" ),
+                    .create( new StringBuffer( "((\"lcl|HPV32_L1.:1      s\":0.195593,\"lcl|HPV30_L1.1|;a\":0.114\n237):0.0359322,\"lcl|HPV56_L1.1|,d\":0.0727412,\"lcl|HPV66_L1.1:x\":0.0798012);" ).toString(),
                              new NHXParser() )[ 0 ];
             if ( !p56
                     .toNewHampshire()
@@ -8616,7 +8881,7 @@ public final class Test {
                 return false;
             }
             final Phylogeny p57 = factory
-                    .create( new StringBuffer( "((\"lcl|HPV32_L1.:1      s\":0.195593,\"lcl|HPV30_L1.1|;a\":0.114\n237):0.0359322,\"lcl|HPV56_L1.1|,d\":0.0727412,\"lcl|HPV66_L1.1:x\":0.0798012);" ),
+                    .create( new StringBuffer( "((\"lcl|HPV32_L1.:1      s\":0.195593,\"lcl|HPV30_L1.1|;a\":0.114\n237):0.0359322,\"lcl|HPV56_L1.1|,d\":0.0727412,\"lcl|HPV66_L1.1:x\":0.0798012);" ).toString(),
                              new NHXParser() )[ 0 ];
             if ( !p57
                     .toNewHampshire()
@@ -8625,25 +8890,25 @@ public final class Test {
                 return false;
             }
             final String s58 = "('Homo \"man\" sapiens:1',\"Homo 'man' sapiens;\")';root \"1_ )';";
-            final Phylogeny p58 = factory.create( new StringBuffer( s58 ), new NHXParser() )[ 0 ];
+            final Phylogeny p58 = factory.create( s58, new NHXParser() )[ 0 ];
             if ( !p58.toNewHampshire().equals( s58 ) ) {
                 System.out.println( p58.toNewHampshire() );
                 return false;
             }
             final String s59 = "('Homo \"man sapiens:1',\"Homo 'man sapiens\")\"root; '1_ )\";";
-            final Phylogeny p59 = factory.create( new StringBuffer( s59 ), new NHXParser() )[ 0 ];
+            final Phylogeny p59 = factory.create( s59 , new NHXParser() )[ 0 ];
             if ( !p59.toNewHampshire().equals( s59 ) ) {
                 System.out.println( p59.toNewHampshire() );
                 return false;
             }
             final String s60 = "('\" ;,:\":\"',\"'abc def' g's_\",'=:0.45+,.:%~`!@#$%^&*()_-+={} | ;,');";
-            final Phylogeny p60 = factory.create( new StringBuffer( s60 ), new NHXParser() )[ 0 ];
+            final Phylogeny p60 = factory.create( s60, new NHXParser() )[ 0 ];
             if ( !p60.toNewHampshire().equals( s60 ) ) {
                 System.out.println( p60.toNewHampshire() );
                 return false;
             }
             final String s61 = "('H[omo] \"man\" sapiens:1',\"H[omo] 'man' sapiens;\",H[omo] sapiens)';root \"1_ )';";
-            final Phylogeny p61 = factory.create( new StringBuffer( s61 ), new NHXParser() )[ 0 ];
+            final Phylogeny p61 = factory.create( s61, new NHXParser() )[ 0 ];
             if ( !p61.toNewHampshire()
                     .equals( "('H{omo} \"man\" sapiens:1',\"H{omo} 'man' sapiens;\",Hsapiens)';root \"1_ )';" ) ) {
                 System.out.println( p61.toNewHampshire() );
@@ -8656,7 +8921,36 @@ public final class Test {
         }
         return true;
     }
-
+    
+    private static boolean testNHParsingSpecialChars() {
+        try {
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();   
+            final String i0 = "(A!+=~QWERTY!@#$%^&*-,€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜˜˜™š›œžŸ¡¢£¤¥¦§¨©ª«¬®¯°±¹²³´µ¶·¸º»¼¿À÷þÿ)";
+            final Phylogeny p0 = factory.create( i0, new NHXParser() )[ 0 ];
+            if ( !p0.toNewHampshireX().equals( i0 ) ) {
+                System.out.println();
+                System.out.println( p0.toNewHampshireX() );
+                System.out.println( i0 );
+                return false;
+            }
+            final String i1 = "(हिंदी,한글,ไทย,'Tiếng Việt',ひらがなカタカナ漢字,繁體字,русский)";
+            final Phylogeny p1 = factory.create( i1, new NHXParser() )[ 0 ];
+            if ( !p1.toNewHampshireX().equals( i1 ) ) {
+                System.out.println();
+                System.out.println( p1.toNewHampshireX() );
+                System.out.println( i1 );
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+    
+    
+    
     private static boolean testNHParsingIter() {
         try {
             final String p0_str = "(A,B);";
@@ -9840,7 +10134,7 @@ public final class Test {
                     xml_parser.setValidateAgainstSchema( PHYLOXML_REMOTE_XSD );
                 }
             }
-            final Phylogeny[] phylogenies_0 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_distribution.xml",
+            final Phylogeny[] phylogenies_0 = factory.create( new File( Test.PATH_TO_TEST_DATA + "phyloxml_distribution.xml" ),
                                                               xml_parser );
             if ( xml_parser.getErrorCount() > 0 ) {
                 System.out.println( xml_parser.getErrorMessages().toString() );
diff --git a/forester/test_data/chars.nex b/forester/test_data/chars.nex
new file mode 100644 (file)
index 0000000..5d1db95
--- /dev/null
@@ -0,0 +1,8 @@
+#NEXUS\r
+Begin Taxa;\r
+ Dimensions NTax=10;\r
+ TaxLabels 'AQ~!@#$%^&*()_+-=\{}|;:"<>?,./' €‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜˜˜™š›œžŸ¡¢£¤¥¦§¨©ª«¬®¯°±¹²³´µ¶·¸º»¼¿À÷þÿ 漢字ひらがなカタカナ อักษรไทย 繁體字 한글 देवनागरी 'chữ Quốc ngữ' 'ру́сский язы́к' អក្សរខ្មែរ;\r
+End;\r
+Begin Trees;\r
+ Tree 'tree1'=[&R](('AQ~!@#$%^&*()_+-=\{}|;:"<>?,./',€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜˜˜™š›œžŸ¡¢£¤¥¦§¨©ª«¬®¯°±¹²³´µ¶·¸º»¼¿À÷þÿ),(漢字ひらがなカタカナ,อักษรไทย),(繁體字,한글),(देवनागरी,'chữ Quốc ngữ','ру́сский язы́к',អក្សរខ្មែរ));\r
+End;\r
diff --git a/forester/test_data/chars.nh b/forester/test_data/chars.nh
new file mode 100644 (file)
index 0000000..1d02611
--- /dev/null
@@ -0,0 +1 @@
+(('AQ~!@#$%^&*()_+-=\{}|;:"<>?,./',€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜˜˜™š›œžŸ¡¢£¤¥¦§¨©ª«¬®¯°±¹²³´µ¶·¸º»¼¿À÷þÿ),(漢字ひらがなカタカナ,อักษรไทย),(繁體字,한글),(देवनागरी,'chữ Quốc ngữ','ру́сский язы́к',អក្សរខ្មែរ));
\ No newline at end of file
diff --git a/forester/test_data/chars.xml b/forester/test_data/chars.xml
new file mode 100644 (file)
index 0000000..5bcd476
--- /dev/null
@@ -0,0 +1,45 @@
+<?xml version="1.0" encoding="UTF-8"?>\r
+<phyloxml xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.phyloxml.org http://www.phyloxml.org/1.10/phyloxml.xsd" xmlns="http://www.phyloxml.org">\r
+<phylogeny rooted="true" rerootable="true">\r
+  <clade>\r
+    <clade>\r
+      <clade>\r
+        <name>AQ~!@#$%^&amp;*()_+-=\{}|;:&quot;&lt;&gt;?,./</name>\r
+      </clade>\r
+      <clade>\r
+        <name>€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜˜˜™š›œžŸ¡¢£¤¥¦§¨©ª«¬®¯°±¹²³´µ¶·¸º»¼¿À÷þÿ</name>\r
+      </clade>\r
+    </clade>\r
+    <clade>\r
+      <clade>\r
+        <name>漢字ひらがなカタカナ</name>\r
+      </clade>\r
+      <clade>\r
+        <name>อักษรไทย</name>\r
+      </clade>\r
+    </clade>\r
+    <clade>\r
+      <clade>\r
+        <name>繁體字</name>\r
+      </clade>\r
+      <clade>\r
+        <name>한글</name>\r
+      </clade>\r
+    </clade>\r
+    <clade>\r
+      <clade>\r
+        <name>देवनागरी</name>\r
+      </clade>\r
+      <clade>\r
+        <name>chữ Quốc ngữ</name>\r
+      </clade>\r
+      <clade>\r
+        <name>ру́сский язы́к</name>\r
+      </clade>\r
+      <clade>\r
+        <name>អក្សរខ្មែរ</name>\r
+      </clade>\r
+    </clade>\r
+  </clade>\r
+</phylogeny>\r
+</phyloxml>
\ No newline at end of file