inprogress
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Tue, 15 Apr 2014 01:54:34 +0000 (01:54 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Tue, 15 Apr 2014 01:54:34 +0000 (01:54 +0000)
forester/java/src/org/forester/application/count_support.java
forester/java/src/org/forester/archaeopteryx/ArchaeopteryxE.java
forester/java/src/org/forester/archaeopteryx/MainFrame.java
forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java
forester/java/src/org/forester/io/parsers/nhx/NHXParser.java
forester/java/src/org/forester/io/parsers/util/ParserUtils.java
forester/java/src/org/forester/io/writers/PhylogenyWriter.java
forester/java/src/org/forester/phylogeny/Phylogeny.java
forester/java/src/org/forester/phylogeny/PhylogenyNode.java
forester/java/src/org/forester/test/Test.java
forester/java/src/org/forester/util/ForesterUtil.java

index b57f3c5..9be5f89 100644 (file)
@@ -186,7 +186,6 @@ public class count_support {
                     }
                     else {
                         w.toNewHampshire( evaluator_phylogenies_above_threshold,
-                                          true,
                                           branch_lengths_in_ev_out,
                                           evaluators_outfile,
                                           ";" + ForesterUtil.getLineSeparator() );
@@ -200,7 +199,7 @@ public class count_support {
                                            ";" + ForesterUtil.getLineSeparator() );
                     }
                     else {
-                        w.toNewHampshire( Arrays.asList( ev ), true, branch_lengths_in_ev_out, evaluators_outfile, ";"
+                        w.toNewHampshire( Arrays.asList( ev ), branch_lengths_in_ev_out, evaluators_outfile, ";"
                                 + ForesterUtil.getLineSeparator() );
                     }
                 }
index 9e48b82..3954da0 100644 (file)
@@ -1411,8 +1411,8 @@ public class ArchaeopteryxE extends JApplet implements ActionListener {
             if ( !ForesterUtil.isEmpty( getMainPanel().getCurrentPhylogeny().getName() ) ) {
                 title = "\"" + getMainPanel().getCurrentPhylogeny().getName() + "\" in " + title;
             }
-            showTextFrame( getMainPanel().getCurrentPhylogeny()
-                                   .toNewHampshire( false, getOptions().getNhConversionSupportValueStyle() ),
+            showTextFrame( getMainPanel().getCurrentPhylogeny().toNewHampshire( getOptions()
+                                   .getNhConversionSupportValueStyle() ),
                            title );
         }
     }
index b7dc624..4bac494 100644 (file)
@@ -1407,8 +1407,8 @@ public abstract class MainFrame extends JFrame implements ActionListener {
             if ( !ForesterUtil.isEmpty( _mainpanel.getCurrentPhylogeny().getName() ) ) {
                 title = "\"" + getMainPanel().getCurrentPhylogeny().getName() + "\" in " + title;
             }
-            showTextFrame( _mainpanel.getCurrentPhylogeny()
-                                   .toNewHampshire( false, getOptions().getNhConversionSupportValueStyle() ),
+            showTextFrame( _mainpanel.getCurrentPhylogeny().toNewHampshire( getOptions()
+                                   .getNhConversionSupportValueStyle() ),
                            title );
         }
     }
index d0484a4..6e13820 100644 (file)
@@ -2326,7 +2326,7 @@ public final class MainFrameApplication extends MainFrame {
     private boolean writeAsNewHampshire( final Phylogeny t, boolean exception, final File file ) {
         try {
             final PhylogenyWriter writer = new PhylogenyWriter();
-            writer.toNewHampshire( t, false, true, getOptions().getNhConversionSupportValueStyle(), file );
+            writer.toNewHampshire( t, true, getOptions().getNhConversionSupportValueStyle(), file );
         }
         catch ( final Exception e ) {
             exception = true;
index f54602f..2921230 100644 (file)
@@ -70,6 +70,7 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse
     final static private byte    STRING                                     = 0;
     final static private byte    STRING_BUFFER                              = 1;
     final static private byte    STRING_BUILDER                             = 4;
+    final static private char    BELL                                       = 7;
     private boolean              _allow_errors_in_distance_to_parent;
     private int                  _clade_level;
     private StringBuilder        _current_anotation;
@@ -296,7 +297,8 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse
                       _current_phylogeny.getRoot(),
                       getTaxonomyExtraction(),
                       isReplaceUnderscores(),
-                      isAllowErrorsInDistanceToParent() );
+                      isAllowErrorsInDistanceToParent(),
+                      true );
             if ( GUESS_IF_SUPPORT_VALUES ) {
                 if ( isBranchLengthsLikeBootstrapValues( _current_phylogeny ) ) {
                     moveBranchLengthsToConfidenceValues( _current_phylogeny );
@@ -321,7 +323,8 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse
                   new_node,
                   getTaxonomyExtraction(),
                   isReplaceUnderscores(),
-                  isAllowErrorsInDistanceToParent() );
+                  isAllowErrorsInDistanceToParent(),
+                  true );
         _current_phylogeny = new Phylogeny();
         _current_phylogeny.setRoot( new_node );
         return _current_phylogeny;
@@ -418,7 +421,7 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse
                     _in_double_quote = false;
                 }
                 else {
-                    _current_anotation.append( c );
+                    _current_anotation.append( c != ':' ? c : BELL );
                 }
             }
             else if ( c == '"' ) {
@@ -429,7 +432,7 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse
                     _in_single_quote = false;
                 }
                 else {
-                    _current_anotation.append( c );
+                    _current_anotation.append( c != ':' ? c : BELL );
                 }
             }
             else if ( c == 39 ) {
@@ -505,7 +508,8 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse
                       new_node,
                       getTaxonomyExtraction(),
                       isReplaceUnderscores(),
-                      isAllowErrorsInDistanceToParent() );
+                      isAllowErrorsInDistanceToParent(),
+                      true );
             _current_anotation = new StringBuilder();
             _current_node.addAsChild( new_node );
         }
@@ -514,7 +518,8 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse
                       _current_node.getLastChildNode(),
                       getTaxonomyExtraction(),
                       isReplaceUnderscores(),
-                      isAllowErrorsInDistanceToParent() );
+                      isAllowErrorsInDistanceToParent(),
+                      true );
             _current_anotation = new StringBuilder();
         }
         if ( !_current_node.isRoot() ) {
@@ -530,7 +535,8 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse
                       new_node,
                       getTaxonomyExtraction(),
                       isReplaceUnderscores(),
-                      isAllowErrorsInDistanceToParent() );
+                      isAllowErrorsInDistanceToParent(),
+                      true );
             if ( _current_node == null ) {
                 throw new NHXFormatException( "format might not be NH or NHX" );
             }
@@ -541,7 +547,8 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse
                       _current_node.getLastChildNode(),
                       getTaxonomyExtraction(),
                       isReplaceUnderscores(),
-                      isAllowErrorsInDistanceToParent() );
+                      isAllowErrorsInDistanceToParent(),
+                      true );
         }
         _current_anotation = new StringBuilder();
         _saw_closing_paren = false;
@@ -583,7 +590,8 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse
                                        final PhylogenyNode node_to_annotate,
                                        final TAXONOMY_EXTRACTION taxonomy_extraction,
                                        final boolean replace_underscores,
-                                       final boolean allow_errors_in_distance_to_parent ) throws NHXFormatException,
+                                       final boolean allow_errors_in_distance_to_parent,
+                                       final boolean replace_bell ) throws NHXFormatException,
             PhyloXmlDataFormatException {
         if ( ( taxonomy_extraction != TAXONOMY_EXTRACTION.NO ) && replace_underscores ) {
             throw new IllegalArgumentException( "cannot extract taxonomies and replace under scores at the same time" );
@@ -592,6 +600,7 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse
             if ( replace_underscores ) {
                 s = s.replaceAll( "_+", " " );
             }
+            s = s.replaceAll( "\\s+", " " ).trim();
             boolean is_nhx = false;
             final int ob = s.indexOf( "[" );
             if ( ob > -1 ) {
@@ -623,14 +632,30 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse
             final StringTokenizer t = new StringTokenizer( s, ":" );
             if ( t.countTokens() > 0 ) {
                 if ( !s.startsWith( ":" ) ) {
-                    node_to_annotate.setName( t.nextToken() );
+                    if ( ( s.indexOf( BELL ) <= -1 ) || !replace_bell ) {
+                        node_to_annotate.setName( t.nextToken() );
+                    }
+                    else {
+                        node_to_annotate.setName( t.nextToken().replace( BELL, ':' ) );
+                    }
                     if ( !replace_underscores && ( !is_nhx && ( taxonomy_extraction != TAXONOMY_EXTRACTION.NO ) ) ) {
                         ParserUtils.extractTaxonomyDataFromNodeName( node_to_annotate, taxonomy_extraction );
                     }
                 }
                 while ( t.hasMoreTokens() ) {
                     s = t.nextToken();
-                    if ( s.startsWith( NHXtags.SPECIES_NAME ) ) {
+                    if ( ( s.indexOf( BELL ) > -1 ) && replace_bell ) {
+                        s = s.replace( BELL, ':' );
+                    }
+                    if ( s.indexOf( '=' ) < 0 ) {
+                        if ( ( node_to_annotate.getDistanceToParent() != PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT )
+                                && !allow_errors_in_distance_to_parent ) {
+                            throw new NHXFormatException( "error in NHX formatted data: more than one distance to parent:"
+                                    + "\"" + s + "\"" );
+                        }
+                        node_to_annotate.setDistanceToParent( doubleValue( s, allow_errors_in_distance_to_parent ) );
+                    }
+                    else if ( s.startsWith( NHXtags.SPECIES_NAME ) ) {
                         if ( !node_to_annotate.getNodeData().isHasTaxonomy() ) {
                             node_to_annotate.getNodeData().setTaxonomy( new Taxonomy() );
                         }
@@ -672,14 +697,6 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse
                         }
                         node_to_annotate.getNodeData().getSequence().setName( s.substring( 3 ) );
                     }
-                    else if ( s.indexOf( '=' ) < 0 ) {
-                        if ( ( node_to_annotate.getDistanceToParent() != PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT )
-                                && !allow_errors_in_distance_to_parent ) {
-                            throw new NHXFormatException( "error in NHX formatted data: more than one distance to parent:"
-                                    + "\"" + s + "\"" );
-                        }
-                        node_to_annotate.setDistanceToParent( doubleValue( s, allow_errors_in_distance_to_parent ) );
-                    }
                 } // while ( t.hasMoreTokens() ) 
             }
         }
index b37dbeb..d904a81 100644 (file)
@@ -55,29 +55,43 @@ import org.forester.util.ForesterUtil;
 
 public final class ParserUtils {
 
-    final public static String   TAX_CODE                        = "(?:[A-Z9][A-Z]{2}[A-Z0-9]{2})|RAT|PIG|PEA";
-    final public static Pattern  TAXOMONY_CODE_PATTERN_A         = Pattern.compile( "(?:\\b|_)(" + TAX_CODE + ")\\b" );
-    final public static Pattern  TAXOMONY_CODE_PATTERN_BRACKETED = Pattern.compile( "\\[(" + TAX_CODE + ")\\]" );
-    final public static Pattern  TAXOMONY_CODE_PATTERN_PFR       = Pattern.compile( "(?:\\b|_)[a-zA-Z0-9]{3,}_("
-                                                                         + TAX_CODE + ")\\b" );
-    final public static Pattern  TAXOMONY_SN_PATTERN             = Pattern
-                                                                         .compile( "(?:\\b|_)[a-zA-Z0-9]{3,}_([A-Z][a-z]{2,30}_[a-z]{3,30}(?:_[a-z][a-z0-9_]+)?)\\b" );
-    final public static Pattern  TAXOMONY_SN_PATTERN_SN          = Pattern
-                                                                         .compile( "\\b([A-Z][a-z]{2,30}[_ ][a-z]{3,30}(?:[_ ][a-z]{3,30})?)(?:\\b|_)?" );
-    final public static Pattern  TAXOMONY_SN_PATTERN_STRAIN_1    = Pattern
-                                                                         .compile( "\\b([A-Z][a-z]{2,30}[_ ][a-z]{3,30}[_ ](?:str|subsp|var)[a-z]{0,5}\\.?[_ ]\\S{1,60})(?:\\b|_)" );
-    final public static Pattern  TAXOMONY_SN_PATTERN_STRAIN_2    = Pattern
-                                                                         .compile( "\\b([A-Z][a-z]{2,30}[_ ][a-z]{3,30}[_ ]\\((?:str|subsp|var)[a-z]{0,5}\\.?[_ ]\\S{1,60}\\))(?:\\b|_)?" );
-    final public static Pattern  TAXOMONY_SN_PATTERN_SP    = Pattern
-            .compile( "\\b([A-Z][a-z]{2,30}[_ ]sp\\.?)(?:\\b|_)?" );
-
-    final public static Pattern  TAXOMONY_SN_PATTERN_GENUS       = Pattern.compile( "([A-Z][a-z]{2,30})" );
-    final private static Pattern TAXOMONY_CODE_PATTERN_PFS       = Pattern.compile( "(?:\\b|_)[A-Z0-9]{4,}_("
-                                                                         + TAX_CODE + ")/\\d+-\\d+\\b" );
-    final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_PFR = Pattern
-                                                                         .compile( "(?:\\b|_)[A-Z0-9]{1,}_(\\d{1,7})\\b" );
-    final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_PFS = Pattern
-                                                                         .compile( "(?:\\b|_)[A-Z0-9]{4,}_(\\d{1,7})/\\d+-\\d+\\b" );
+    final public static String   TAX_CODE                             = "(?:[A-Z9][A-Z]{2}[A-Z0-9]{2})|RAT|PIG|PEA";
+    final private static String  SN_BN                                = "[A-Z][a-z]{2,30}[_ ][a-z]{3,30}";
+    final public static Pattern  TAXOMONY_CODE_PATTERN_A              = Pattern.compile( "(?:\\b|_)(" + TAX_CODE
+                                                                              + ")\\b" );
+    final public static Pattern  TAXOMONY_CODE_PATTERN_BRACKETED      = Pattern.compile( "\\[(" + TAX_CODE + ")\\]" );
+    final public static Pattern  TAXOMONY_CODE_PATTERN_PFR            = Pattern.compile( "(?:\\b|_)[a-zA-Z0-9]{3,}_("
+                                                                              + TAX_CODE + ")\\b" );
+    // final public static Pattern  TAXOMONY_SN_PATTERN                  = Pattern
+    //                                                                            .compile( "(?:\\b|_)[a-zA-Z0-9]{3,}_([A-Z][a-z]{2,30}_[a-z]{3,30}(?:_[a-z][a-z0-9_]+)?)\\b" );
+    final public static Pattern  TAXOMONY_SN_PATTERN_SN               = Pattern.compile( "(?:\\b|_)(" + SN_BN
+                                                                              + ")(?:(\\s*$)|([_ ][a-z]*[A-Z0-9]))" );
+    final public static Pattern  TAXOMONY_SN_PATTERN_SNS              = Pattern.compile( "(?:\\b|_)(" + SN_BN
+                                                                              + "[_ ][a-z]{3,30}"
+                                                                              + ")[_ ][a-z]*[A-Z0-9]" );
+    final public static Pattern  TAXOMONY_SN_PATTERN_SNS2             = Pattern.compile( "[A-Z0-9][a-z]*[_ ](" + SN_BN
+                                                                              + "[_ ][a-z]{3,30}" + ")\\s*$" );
+    final public static Pattern  TAXOMONY_SN_PATTERN_STRAIN_1         = Pattern
+                                                                              .compile( "(?:\\b|_)("
+                                                                                      + SN_BN
+                                                                                      + "[_ ](?:str|subsp|var)[a-z]{0,5}\\.?[_ ]\\S{1,60})(?:\\b|_)" );
+    final public static Pattern  TAXOMONY_SN_PATTERN_STRAIN_2         = Pattern
+                                                                              .compile( "(?:\\b|_)("
+                                                                                      + SN_BN
+                                                                                      + "[_ ]\\((?:str|subsp|var)[a-z]{0,5}\\.?[_ ]\\S{1,60}\\))" );
+    final public static Pattern  TAXOMONY_SN_PATTERN_STRAIN_SUBSTRAIN = Pattern
+                                                                              .compile( "(?:\\b|_)("
+                                                                                      + SN_BN
+                                                                                      + "[_ ]str[a-z]{0,3}\\.?[_ ]\\S{1,60}[_ ]substr[a-z]{0,3}\\.?[_ ]\\S{1,60})(?:\\b|_)" );
+    final public static Pattern  TAXOMONY_SN_PATTERN_SP               = Pattern
+                                                                              .compile( "(?:\\b|_)([A-Z][a-z]{2,30}[_ ]sp\\.?)(?:\\b|_)?" );
+    final public static Pattern  TAXOMONY_SN_PATTERN_GENUS            = Pattern.compile( "([A-Z][a-z]{2,30})" );
+    final private static Pattern TAXOMONY_CODE_PATTERN_PFS            = Pattern.compile( "(?:\\b|_)[A-Z0-9]{4,}_("
+                                                                              + TAX_CODE + ")/\\d+-\\d+\\b" );
+    final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_PFR      = Pattern
+                                                                              .compile( "(?:\\b|_)[A-Z0-9]{1,}_(\\d{1,7})\\b" );
+    final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_PFS      = Pattern
+                                                                              .compile( "(?:\\b|_)[A-Z0-9]{4,}_(\\d{1,7})/\\d+-\\d+\\b" );
 
     final public static PhylogenyParser createParserDependingFileContents( final File file,
                                                                            final boolean phyloxml_validate_against_xsd )
@@ -204,26 +218,62 @@ public final class ParserUtils {
     }
 
     public final static String extractScientificNameFromNodeName( final String name ) {
-        final Matcher m = TAXOMONY_SN_PATTERN.matcher( name );
-        if ( m.find() ) {
-            return m.group( 1 ).replace( '_', ' ' );
+        //  final Matcher m = TAXOMONY_SN_PATTERN.matcher( name );
+        //  if ( m.find() ) {
+        //      return m.group( 1 ).replace( '_', ' ' );
+        //  }
+        final Matcher m_ss = TAXOMONY_SN_PATTERN_STRAIN_SUBSTRAIN.matcher( name );
+        if ( m_ss.find() ) {
+            String s = m_ss.group( 1 ).replace( '_', ' ' );
+            if ( s.indexOf( " str " ) > 4 ) {
+                s = s.replaceFirst( " str ", " str. " );
+            }
+            if ( s.indexOf( " substr " ) > 4 ) {
+                s = s.replaceFirst( " substr ", " substr. " );
+            }
+            return s;
         }
         final Matcher m_str1 = TAXOMONY_SN_PATTERN_STRAIN_1.matcher( name );
         if ( m_str1.find() ) {
-            return m_str1.group( 1 ).replace( '_', ' ' );
+            String s = m_str1.group( 1 ).replace( '_', ' ' );
+            if ( s.indexOf( " str " ) > 4 ) {
+                s = s.replaceFirst( " str ", " str. " );
+            }
+            else if ( s.indexOf( " subsp " ) > 4 ) {
+                s = s.replaceFirst( " subsp ", " subsp. " );
+            }
+            else if ( s.indexOf( " var " ) > 4 ) {
+                s = s.replaceFirst( " var ", " var. " );
+            }
+            return s;
         }
         final Matcher m_str2 = TAXOMONY_SN_PATTERN_STRAIN_2.matcher( name );
         if ( m_str2.find() ) {
-            return m_str2.group( 1 ).replace( '_', ' ' );
+            String s = m_str2.group( 1 ).replace( '_', ' ' );
+            if ( s.indexOf( " (str " ) > 4 ) {
+                s = s.replaceFirst( " \\(str ", " (str. " );
+            }
+            else if ( s.indexOf( " (subsp " ) > 4 ) {
+                s = s.replaceFirst( " \\(subsp ", " (subsp. " );
+            }
+            else if ( s.indexOf( " (var " ) > 4 ) {
+                s = s.replaceFirst( " \\(var ", " (var. " );
+            }
+            return s;
+        }
+        final Matcher m_sns = TAXOMONY_SN_PATTERN_SNS.matcher( name );
+        if ( m_sns.find() ) {
+            return m_sns.group( 1 ).replace( '_', ' ' );
+        }
+        final Matcher m_sns2 = TAXOMONY_SN_PATTERN_SNS2.matcher( name );
+        if ( m_sns2.find() ) {
+            return m_sns2.group( 1 ).replace( '_', ' ' );
         }
         final Matcher m_sn = TAXOMONY_SN_PATTERN_SN.matcher( name );
-       
         if ( m_sn.find() ) {
             return m_sn.group( 1 ).replace( '_', ' ' );
         }
-        
         final Matcher m_sp = TAXOMONY_SN_PATTERN_SP.matcher( name );
-        
         if ( m_sp.find() ) {
             return m_sp.group( 1 ).replace( '_', ' ' );
         }
index e4b7dc7..6d490c5 100644 (file)
@@ -72,7 +72,6 @@ public final class PhylogenyWriter {
     private PhylogenyNode               _root;
     private boolean                     _has_next;
     private Stack<PostOrderStackObject> _stack;
-    private boolean                     _simple_nh;
     private boolean                     _nh_write_distance_to_parent;
     NH_CONVERSION_SUPPORT_VALUE_STYLE   _nh_conversion_support_style;
     private boolean                     _indent_phyloxml;
@@ -202,10 +201,6 @@ public final class PhylogenyWriter {
         return _saw_comma;
     }
 
-    private boolean isSimpleNH() {
-        return _simple_nh;
-    }
-
     private boolean isWriteDistanceToParentInNH() {
         return _nh_write_distance_to_parent;
     }
@@ -307,10 +302,6 @@ public final class PhylogenyWriter {
         _saw_comma = saw_comma;
     }
 
-    private void setSimpleNH( final boolean simple_nh ) {
-        _simple_nh = simple_nh;
-    }
-
     private void setStack( final Stack<PostOrderStackObject> stack ) {
         _stack = stack;
     }
@@ -324,62 +315,53 @@ public final class PhylogenyWriter {
     }
 
     public void toNewHampshire( final List<Phylogeny> trees,
-                                final boolean simple_nh,
                                 final boolean write_distance_to_parent,
                                 final File out_file,
                                 final String separator ) throws IOException {
         final Iterator<Phylogeny> it = trees.iterator();
         final StringBuffer sb = new StringBuffer();
         while ( it.hasNext() ) {
-            sb.append( toNewHampshire( it.next(), simple_nh, write_distance_to_parent ) );
+            sb.append( toNewHampshire( it.next(), write_distance_to_parent ) );
             sb.append( separator );
         }
         writeToFile( sb, out_file );
     }
 
     public StringBuffer toNewHampshire( final Phylogeny tree,
-                                        final boolean simple_nh,
                                         final boolean nh_write_distance_to_parent,
                                         final NH_CONVERSION_SUPPORT_VALUE_STYLE svs ) throws IOException {
         setOutputFormt( FORMAT.NH );
         setNhConversionSupportStyle( svs );
-        setSimpleNH( simple_nh );
         setWriteDistanceToParentInNH( nh_write_distance_to_parent );
         return getOutput( tree );
     }
 
-    public StringBuffer toNewHampshire( final Phylogeny tree,
-                                        final boolean simple_nh,
-                                        final boolean nh_write_distance_to_parent ) throws IOException {
+    public StringBuffer toNewHampshire( final Phylogeny tree, final boolean nh_write_distance_to_parent )
+            throws IOException {
         setOutputFormt( FORMAT.NH );
-        setSimpleNH( simple_nh );
         setWriteDistanceToParentInNH( nh_write_distance_to_parent );
         return getOutput( tree );
     }
 
-    public void toNewHampshire( final Phylogeny tree,
-                                final boolean simple_nh,
-                                final boolean write_distance_to_parent,
-                                final File out_file ) throws IOException {
-        writeToFile( toNewHampshire( tree, simple_nh, write_distance_to_parent ), out_file );
+    public void toNewHampshire( final Phylogeny tree, final boolean write_distance_to_parent, final File out_file )
+            throws IOException {
+        writeToFile( toNewHampshire( tree, write_distance_to_parent ), out_file );
     }
 
     public void toNewHampshire( final Phylogeny tree,
-                                final boolean simple_nh,
                                 final boolean write_distance_to_parent,
                                 final NH_CONVERSION_SUPPORT_VALUE_STYLE svs,
                                 final File out_file ) throws IOException {
-        writeToFile( toNewHampshire( tree, simple_nh, write_distance_to_parent, svs ), out_file );
+        writeToFile( toNewHampshire( tree, write_distance_to_parent, svs ), out_file );
     }
 
     public void toNewHampshire( final Phylogeny[] trees,
-                                final boolean simple_nh,
                                 final boolean write_distance_to_parent,
                                 final File out_file,
                                 final String separator ) throws IOException {
         final StringBuffer sb = new StringBuffer();
         for( final Phylogeny element : trees ) {
-            sb.append( toNewHampshire( element, simple_nh, write_distance_to_parent ) );
+            sb.append( toNewHampshire( element, write_distance_to_parent ) );
             sb.append( separator );
         }
         writeToFile( sb, out_file );
@@ -575,9 +557,7 @@ public final class PhylogenyWriter {
             getBuffer().append( node.toNewHampshireX() );
         }
         else if ( getOutputFormt() == FORMAT.NH ) {
-            getBuffer().append( node.toNewHampshire( isSimpleNH(),
-                                                     isWriteDistanceToParentInNH(),
-                                                     getNhConversionSupportStyle() ) );
+            getBuffer().append( node.toNewHampshire( isWriteDistanceToParentInNH(), getNhConversionSupportStyle() ) );
         }
     }
 
@@ -754,7 +734,7 @@ public final class PhylogenyWriter {
             else {
                 writer.write( "[&U]" );
             }
-            writer.write( phylogeny.toNewHampshire( false, svs ) );
+            writer.write( phylogeny.toNewHampshire( svs ) );
             writer.write( ForesterUtil.LINE_SEPARATOR );
             i++;
         }
index 3eac2bd..115e885 100644 (file)
@@ -1098,14 +1098,12 @@ public class Phylogeny {
     }
 
     public String toNewHampshire() {
-        return toNewHampshire( false, NH_CONVERSION_SUPPORT_VALUE_STYLE.NONE );
+        return toNewHampshire( NH_CONVERSION_SUPPORT_VALUE_STYLE.NONE );
     }
 
-    public String toNewHampshire( final boolean simple_nh,
-                                  final NH_CONVERSION_SUPPORT_VALUE_STYLE nh_conversion_support_style ) {
+    public String toNewHampshire( final NH_CONVERSION_SUPPORT_VALUE_STYLE nh_conversion_support_style ) {
         try {
-            return new PhylogenyWriter().toNewHampshire( this, simple_nh, true, nh_conversion_support_style )
-                    .toString();
+            return new PhylogenyWriter().toNewHampshire( this, true, nh_conversion_support_style ).toString();
         }
         catch ( final IOException e ) {
             throw new Error( "this should not have happend: " + e.getMessage() );
index d17fd41..6a4876b 100644 (file)
@@ -86,7 +86,7 @@ public final class PhylogenyNode implements Comparable<PhylogenyNode> {
     private PhylogenyNode( final String nhx,
                            final NHXParser.TAXONOMY_EXTRACTION taxonomy_extraction,
                            final boolean replace_underscores ) throws NHXFormatException, PhyloXmlDataFormatException {
-        NHXParser.parseNHX( nhx, this, taxonomy_extraction, replace_underscores, false );
+        NHXParser.parseNHX( nhx, this, taxonomy_extraction, replace_underscores, false, false );
         setId( PhylogenyNode.getNodeCount() );
         PhylogenyNode.increaseNodeCount();
         setSumExtNodes( 1 ); // For ext node, this number is 1 (not 0!!).
@@ -887,8 +887,7 @@ public final class PhylogenyNode implements Comparable<PhylogenyNode> {
     // ---------------------------------------------------------
     // Writing of Nodes to Strings
     // ---------------------------------------------------------
-    final public String toNewHampshire( final boolean simple_nh,
-                                        final boolean write_distance_to_parent,
+    final public String toNewHampshire( final boolean write_distance_to_parent,
                                         final NH_CONVERSION_SUPPORT_VALUE_STYLE svs ) {
         final StringBuilder sb = new StringBuilder();
         String data = "";
@@ -922,11 +921,9 @@ public final class PhylogenyNode implements Comparable<PhylogenyNode> {
                 data = getNodeData().getSequence().getName();
             }
         }
+        data = data.trim();
         if ( data.length() > 0 ) {
-            data = ForesterUtil.replaceIllegalNhCharacters( data );
-            if ( simple_nh && ( data.length() > 10 ) ) {
-                data = data.substring( 0, 11 );
-            }
+            data = data.replaceAll( "'", "_" );
             if ( ForesterUtil.isContainsParanthesesableNhCharacter( data ) ) {
                 sb.append( '\'' );
                 sb.append( data );
@@ -960,7 +957,8 @@ public final class PhylogenyNode implements Comparable<PhylogenyNode> {
         final StringBuffer sb = new StringBuffer();
         final StringBuffer s_nhx = new StringBuffer();
         if ( !ForesterUtil.isEmpty( getName() ) ) {
-            final String name = ForesterUtil.replaceIllegalNhCharacters( getName() );
+            //final String name = ForesterUtil.replaceIllegalNhCharacters( getName() );
+            final String name = getName().trim();
             if ( ForesterUtil.isContainsParanthesesableNhCharacter( name ) ) {
                 sb.append( '\'' );
                 sb.append( name );
index 78fe31b..ec44a86 100644 (file)
@@ -312,7 +312,6 @@ public final class Test {
             System.out.println( "failed." );
             failed++;
         }
-        System.exit( -1 );
         System.out.print( "Uri for Aptx web sequence accession: " );
         if ( Test.testCreateUriForSeqWeb() ) {
             System.out.println( "OK." );
@@ -385,6 +384,7 @@ public final class Test {
             System.out.println( "failed." );
             failed++;
         }
+        System.exit( 0 );
         System.out.print( "Nexus characters parsing: " );
         if ( Test.testNexusCharactersParsing() ) {
             System.out.println( "OK." );
@@ -3468,7 +3468,7 @@ public final class Test {
             if ( t4.getNumberOfExternalNodes() != 5 ) {
                 return false;
             }
-            String s = w.toNewHampshire( t4, false, true ).toString();
+            String s = w.toNewHampshire( t4, true ).toString();
             if ( !s.equals( "((A,(B11,B12)),(C,D));" ) ) {
                 return false;
             }
@@ -3489,7 +3489,7 @@ public final class Test {
             if ( !n.getName().equals( "D" ) ) {
                 return false;
             }
-            s = w.toNewHampshire( t4, false, true ).toString();
+            s = w.toNewHampshire( t4, true ).toString();
             if ( !s.equals( "((A,B12),D);" ) ) {
                 return false;
             }
@@ -3498,7 +3498,7 @@ public final class Test {
             if ( t5.getNumberOfExternalNodes() != 5 ) {
                 return false;
             }
-            s = w.toNewHampshire( t5, false, true ).toString();
+            s = w.toNewHampshire( t5, true ).toString();
             if ( !s.equals( "(((B11,B12),B2),(C,D));" ) ) {
                 return false;
             }
@@ -3507,7 +3507,7 @@ public final class Test {
             if ( t6.getNumberOfExternalNodes() != 5 ) {
                 return false;
             }
-            s = w.toNewHampshire( t6, false, false ).toString();
+            s = w.toNewHampshire( t6, false ).toString();
             if ( !s.equals( "((A,(B12,B2)),(C,D));" ) ) {
                 return false;
             }
@@ -3516,7 +3516,7 @@ public final class Test {
             if ( t7.getNumberOfExternalNodes() != 5 ) {
                 return false;
             }
-            s = w.toNewHampshire( t7, false, true ).toString();
+            s = w.toNewHampshire( t7, true ).toString();
             if ( !s.equals( "((A,(B11,B2)),(C,D));" ) ) {
                 return false;
             }
@@ -3525,7 +3525,7 @@ public final class Test {
             if ( t8.getNumberOfExternalNodes() != 5 ) {
                 return false;
             }
-            s = w.toNewHampshire( t8, false, false ).toString();
+            s = w.toNewHampshire( t8, false ).toString();
             if ( !s.equals( "((A,(B11,B12)),(C,D));" ) ) {
                 return false;
             }
@@ -3534,7 +3534,7 @@ public final class Test {
             if ( t9.getNumberOfExternalNodes() != 5 ) {
                 return false;
             }
-            s = w.toNewHampshire( t9, false, true ).toString();
+            s = w.toNewHampshire( t9, true ).toString();
             if ( !s.equals( "((A,((B11,B12),B2)),D);" ) ) {
                 return false;
             }
@@ -3543,7 +3543,7 @@ public final class Test {
             if ( t10.getNumberOfExternalNodes() != 5 ) {
                 return false;
             }
-            s = w.toNewHampshire( t10, false, true ).toString();
+            s = w.toNewHampshire( t10, true ).toString();
             if ( !s.equals( "((A,((B11,B12),B2)),C);" ) ) {
                 return false;
             }
@@ -3552,7 +3552,7 @@ public final class Test {
             if ( t11.getNumberOfExternalNodes() != 2 ) {
                 return false;
             }
-            s = w.toNewHampshire( t11, false, true ).toString();
+            s = w.toNewHampshire( t11, true ).toString();
             if ( !s.equals( "(B,C);" ) ) {
                 return false;
             }
@@ -3560,7 +3560,7 @@ public final class Test {
             if ( t11.getNumberOfExternalNodes() != 1 ) {
                 return false;
             }
-            s = w.toNewHampshire( t11, false, false ).toString();
+            s = w.toNewHampshire( t11, false ).toString();
             if ( !s.equals( "B;" ) ) {
                 return false;
             }
@@ -3569,7 +3569,7 @@ public final class Test {
             if ( t12.getNumberOfExternalNodes() != 8 ) {
                 return false;
             }
-            s = w.toNewHampshire( t12, false, true ).toString();
+            s = w.toNewHampshire( t12, true ).toString();
             if ( !s.equals( "((A1,A2,A3),(B1,B3),(C1,C2,C3));" ) ) {
                 return false;
             }
@@ -3577,7 +3577,7 @@ public final class Test {
             if ( t12.getNumberOfExternalNodes() != 7 ) {
                 return false;
             }
-            s = w.toNewHampshire( t12, false, true ).toString();
+            s = w.toNewHampshire( t12, true ).toString();
             if ( !s.equals( "((A1,A2,A3),B1,(C1,C2,C3));" ) ) {
                 return false;
             }
@@ -3585,7 +3585,7 @@ public final class Test {
             if ( t12.getNumberOfExternalNodes() != 6 ) {
                 return false;
             }
-            s = w.toNewHampshire( t12, false, true ).toString();
+            s = w.toNewHampshire( t12, true ).toString();
             if ( !s.equals( "((A1,A2,A3),B1,(C1,C2));" ) ) {
                 return false;
             }
@@ -3593,7 +3593,7 @@ public final class Test {
             if ( t12.getNumberOfExternalNodes() != 5 ) {
                 return false;
             }
-            s = w.toNewHampshire( t12, false, true ).toString();
+            s = w.toNewHampshire( t12, true ).toString();
             if ( !s.equals( "((A2,A3),B1,(C1,C2));" ) ) {
                 return false;
             }
@@ -3601,7 +3601,7 @@ public final class Test {
             if ( t12.getNumberOfExternalNodes() != 4 ) {
                 return false;
             }
-            s = w.toNewHampshire( t12, false, true ).toString();
+            s = w.toNewHampshire( t12, true ).toString();
             if ( !s.equals( "((A2,A3),(C1,C2));" ) ) {
                 return false;
             }
@@ -3609,7 +3609,7 @@ public final class Test {
             if ( t12.getNumberOfExternalNodes() != 3 ) {
                 return false;
             }
-            s = w.toNewHampshire( t12, false, true ).toString();
+            s = w.toNewHampshire( t12, true ).toString();
             if ( !s.equals( "(A2,(C1,C2));" ) ) {
                 return false;
             }
@@ -3617,7 +3617,7 @@ public final class Test {
             if ( t12.getNumberOfExternalNodes() != 2 ) {
                 return false;
             }
-            s = w.toNewHampshire( t12, false, true ).toString();
+            s = w.toNewHampshire( t12, true ).toString();
             if ( !s.equals( "(C1,C2);" ) ) {
                 return false;
             }
@@ -3626,7 +3626,7 @@ public final class Test {
             if ( t13.getNumberOfExternalNodes() != 4 ) {
                 return false;
             }
-            s = w.toNewHampshire( t13, false, true ).toString();
+            s = w.toNewHampshire( t13, true ).toString();
             if ( !s.equals( "(A,B,C,E:5.0);" ) ) {
                 return false;
             }
@@ -3635,7 +3635,7 @@ public final class Test {
             if ( t14.getNumberOfExternalNodes() != 5 ) {
                 return false;
             }
-            s = w.toNewHampshire( t14, false, true ).toString();
+            s = w.toNewHampshire( t14, true ).toString();
             if ( !s.equals( "((A,B,C,D:1.1),F);" ) ) {
                 return false;
             }
@@ -4177,25 +4177,95 @@ public final class Test {
             if ( !ParserUtils.extractScientificNameFromNodeName( "BCDO2_Mus_musculus" ).equals( "Mus musculus" ) ) {
                 return false;
             }
-            if ( !ParserUtils.extractScientificNameFromNodeName( "BCDO2_Mus_musculus_musculus" )
+            if ( !ParserUtils.extractScientificNameFromNodeName( "BCDO2 Mus musculus" ).equals( "Mus musculus" ) ) {
+                return false;
+            }
+            if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_BCDO2" ).equals( "Mus musculus" ) ) {
+                return false;
+            }
+            if ( !ParserUtils.extractScientificNameFromNodeName( "Mus musculus musculus BCDO2" )
+                    .equals( "Mus musculus musculus" ) ) {
+                return false;
+            }
+            if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_musculus_BCDO2" )
+                    .equals( "Mus musculus musculus" ) ) {
+                return false;
+            }
+            if ( !ParserUtils.extractScientificNameFromNodeName( "BCDO2 Mus musculus musculus" )
+                    .equals( "Mus musculus musculus" ) ) {
+                return false;
+            }
+            if ( !ParserUtils.extractScientificNameFromNodeName( "Bcl Mus musculus musculus" )
+                    .equals( "Mus musculus musculus" ) ) {
+                return false;
+            }
+            if ( ParserUtils.extractScientificNameFromNodeName( "vcl Mus musculus musculus" ) != null ) {
+                return false;
+            }
+            if ( !ParserUtils.extractScientificNameFromNodeName( "could_be_anything_Mus_musculus_musculus_BCDO2" )
                     .equals( "Mus musculus musculus" ) ) {
                 return false;
             }
-            if ( !ParserUtils.extractScientificNameFromNodeName( "BCDO2_Mus_musculus_musculus-12" )
+            if ( !ParserUtils.extractScientificNameFromNodeName( "could_be_anything_Mus_musculus_musculus_Musculus" )
                     .equals( "Mus musculus musculus" ) ) {
                 return false;
             }
-            if ( !ParserUtils.extractScientificNameFromNodeName( " -XS12_Mus_musculus-12" ).equals( "Mus musculus" ) ) {
+            if ( ParserUtils.extractScientificNameFromNodeName( "could_be_anything_Mus_musculus_musculus_musculus" ) != null ) {
+                return false;
+            }
+            if ( ParserUtils.extractScientificNameFromNodeName( "musculus" ) != null ) {
+                return false;
+            }
+            if ( ParserUtils.extractScientificNameFromNodeName( "mus_musculus" ) != null ) {
+                return false;
+            }
+            if ( ParserUtils.extractScientificNameFromNodeName( "mus_musculus_musculus" ) != null ) {
                 return false;
             }
-            if ( !ParserUtils.extractScientificNameFromNodeName( " -1234_Mus_musculus-12 affrre e" )
+            if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_musculus_1" )
+                    .equals( "Mus musculus musculus" ) ) {
+                return false;
+            }
+            if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_1" ).equals( "Mus musculus" ) ) {
+                return false;
+            }
+            if ( ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_bcl" ) != null ) {
+                return false;
+            }
+            if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_BCL" ).equals( "Mus musculus" ) ) {
+                return false;
+            }
+            if ( ParserUtils.extractScientificNameFromNodeName( "Mus musculus bcl" ) != null ) {
+                return false;
+            }
+            if ( !ParserUtils.extractScientificNameFromNodeName( "Mus musculus BCL" ).equals( "Mus musculus" ) ) {
+                return false;
+            }
+            if ( !ParserUtils.extractScientificNameFromNodeName( "Mus musculus xBCL" ).equals( "Mus musculus" ) ) {
+                return false;
+            }
+            if ( !ParserUtils.extractScientificNameFromNodeName( "Mus musculus x1" ).equals( "Mus musculus" ) ) {
+                return false;
+            }
+            if ( !ParserUtils.extractScientificNameFromNodeName( " -XS12_Mus_musculus_12" ).equals( "Mus musculus" ) ) {
+                return false;
+            }
+            if ( !ParserUtils.extractScientificNameFromNodeName( " -1234_Mus_musculus_12 affrre e" )
+                    .equals( "Mus musculus" ) ) {
+                return false;
+            }
+            if ( !ParserUtils.extractScientificNameFromNodeName( " -1234_Mus_musculus_12_affrre_e" )
                     .equals( "Mus musculus" ) ) {
                 return false;
             }
             if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus" ).equals( "Mus musculus" ) ) {
                 return false;
             }
-            if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_musculus" )
+            if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_musculus_2bcl2" )
+                    .equals( "Mus musculus musculus" ) ) {
+                return false;
+            }
+            if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_musculus_2bcl2" )
                     .equals( "Mus musculus musculus" ) ) {
                 return false;
             }
@@ -4206,7 +4276,8 @@ public final class Test {
             if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_123" ).equals( "Mus musculus" ) ) {
                 return false;
             }
-            if ( !ParserUtils.extractScientificNameFromNodeName( "Pilostyles mexicana Mexico Breedlove 27233" ).equals( "Pilostyles mexicana" ) ) {
+            if ( !ParserUtils.extractScientificNameFromNodeName( "Pilostyles mexicana Mexico Breedlove 27233" )
+                    .equals( "Pilostyles mexicana" ) ) {
                 return false;
             }
             if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia_coli_strain_K12/DH10B" )
@@ -4214,7 +4285,7 @@ public final class Test {
                 return false;
             }
             if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia_coli_str_K12/DH10B" )
-                    .equals( "Escherichia coli str K12/DH10B" ) ) {
+                    .equals( "Escherichia coli str. K12/DH10B" ) ) {
                 return false;
             }
             if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli str. K12/DH10B" )
@@ -4222,7 +4293,7 @@ public final class Test {
                 return false;
             }
             if ( !ParserUtils.extractScientificNameFromNodeName( "Arabidopsis_lyrata_subsp_lyrata" )
-                    .equals( "Arabidopsis lyrata subsp lyrata" ) ) {
+                    .equals( "Arabidopsis lyrata subsp. lyrata" ) ) {
                 return false;
             }
             if ( !ParserUtils.extractScientificNameFromNodeName( "Arabidopsis lyrata subsp. lyrata" )
@@ -4238,7 +4309,7 @@ public final class Test {
                 return false;
             }
             if ( !ParserUtils.extractScientificNameFromNodeName( "Arabidopsis lyrata subsp lyrata bcl2" )
-                    .equals( "Arabidopsis lyrata subsp lyrata" ) ) {
+                    .equals( "Arabidopsis lyrata subsp. lyrata" ) ) {
                 return false;
             }
             if ( !ParserUtils.extractScientificNameFromNodeName( "Arabidopsis lyrata subspecies lyrata bcl2" )
@@ -4261,35 +4332,63 @@ public final class Test {
                     .equals( "Escherichia coli (str. K12)" ) ) {
                 return false;
             }
+            if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli (str K12)" )
+                    .equals( "Escherichia coli (str. K12)" ) ) {
+                return false;
+            }
             if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli (str. K12) bcl2" )
                     .equals( "Escherichia coli (str. K12)" ) ) {
                 return false;
             }
-            if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp." )
-                    .equals( "Macrocera sp." ) ) {
-                
-                 return false;
+            if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli (var K12) bcl2" )
+                    .equals( "Escherichia coli (var. K12)" ) ) {
+                return false;
             }
-            if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp. 123" )
-                    .equals( "Macrocera sp." ) ) {
-                
+            if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli str. K-12 substr. MG1655star" )
+                    .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) {
                 return false;
             }
-            if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp. K12" )
-                    .equals( "Macrocera sp." ) ) {
-                
-                
+            if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli str K-12 substr MG1655star" )
+                    .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) {
+                return false;
+            }
+            if ( !ParserUtils
+                    .extractScientificNameFromNodeName( "could be anything Escherichia coli str K-12 substr MG1655star" )
+                    .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) {
+                return false;
+            }
+            if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli str K-12 substr MG1655star gene1" )
+                    .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) {
+                return false;
+            }
+            if ( !ParserUtils
+                    .extractScientificNameFromNodeName( "could be anything Escherichia coli str K-12 substr MG1655star GENE1" )
+                    .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) {
+                return false;
+            }
+            if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia_coli_str_K-12_substr_MG1655star" )
+                    .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) {
+                return false;
+            }
+            if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia_coli_str_K-12_substr_MG1655star" )
+                    .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) {
+                return false;
+            }
+            //
+            if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp." ).equals( "Macrocera sp." ) ) {
+                return false;
+            }
+            if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp. 123" ).equals( "Macrocera sp." ) ) {
+                return false;
+            }
+            if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp. K12" ).equals( "Macrocera sp." ) ) {
                 return false;
             }
             if ( !ParserUtils.extractScientificNameFromNodeName( "something Macrocera sp. K12" )
                     .equals( "Macrocera sp." ) ) {
-                
-                
                 return false;
-            } if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp" )
-                    .equals( "Macrocera sp" ) ) {
-                
-                
+            }
+            if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp" ).equals( "Macrocera sp" ) ) {
                 return false;
             }
         }
@@ -7695,10 +7794,10 @@ public final class Test {
             nhxp.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.NO );
             nhxp.setReplaceUnderscores( true );
             final Phylogeny uc0 = factory.create( "(A__A_,_B_B)", nhxp )[ 0 ];
-            if ( !uc0.getRoot().getChildNode( 0 ).getName().equals( "A A " ) ) {
+            if ( !uc0.getRoot().getChildNode( 0 ).getName().equals( "A A" ) ) {
                 return false;
             }
-            if ( !uc0.getRoot().getChildNode( 1 ).getName().equals( " B B" ) ) {
+            if ( !uc0.getRoot().getChildNode( 1 ).getName().equals( "B B" ) ) {
                 return false;
             }
             final Phylogeny p1b = factory
@@ -7989,14 +8088,14 @@ public final class Test {
             if ( p50.getNode( "A" ) == null ) {
                 return false;
             }
-            if ( !p50.toNewHampshire( false, NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS )
+            if ( !p50.toNewHampshire( NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS )
                     .equals( "((A,B)ab:2.0[88],C);" ) ) {
                 return false;
             }
-            if ( !p50.toNewHampshire( false, NH_CONVERSION_SUPPORT_VALUE_STYLE.NONE ).equals( "((A,B)ab:2.0,C);" ) ) {
+            if ( !p50.toNewHampshire( NH_CONVERSION_SUPPORT_VALUE_STYLE.NONE ).equals( "((A,B)ab:2.0,C);" ) ) {
                 return false;
             }
-            if ( !p50.toNewHampshire( false, NH_CONVERSION_SUPPORT_VALUE_STYLE.AS_INTERNAL_NODE_NAMES )
+            if ( !p50.toNewHampshire( NH_CONVERSION_SUPPORT_VALUE_STYLE.AS_INTERNAL_NODE_NAMES )
                     .equals( "((A,B)88:2.0,C);" ) ) {
                 return false;
             }
@@ -8014,13 +8113,39 @@ public final class Test {
             if ( p53.getNode( "B (x (a' ,b) f(x);" ) == null ) {
                 return false;
             }
-            // 
             final Phylogeny p54 = factory.create( new StringBuffer( "((A,B):[88],C)" ), new NHXParser() )[ 0 ];
             if ( p54.getNode( "A" ) == null ) {
                 return false;
             }
-            if ( !p54.toNewHampshire( false, NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS )
-                    .equals( "((A,B)[88],C);" ) ) {
+            if ( !p54.toNewHampshire( NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS ).equals( "((A,B)[88],C);" ) ) {
+                return false;
+            }
+            // 
+            final Phylogeny p55 = factory
+                    .create( new StringBuffer( "((\"lcl|HPV32_L1.:1  s\":0.195593,\"lcl|HPV30_L1.1|;a\":0.114237):0.0359322,\"lcl|HPV56_L1.1|,d\":0.0727412,\"lcl|HPV66_L1.1x\":0.0798012);" ),
+                             new NHXParser() )[ 0 ];
+            if ( !p55
+                    .toNewHampshire()
+                    .equals( "(('lcl|HPV32_L1.:1 s':0.195593,'lcl|HPV30_L1.1|;a':0.114237):0.0359322,'lcl|HPV56_L1.1|,d':0.0727412,lcl|HPV66_L1.1x:0.0798012);" ) ) {
+                System.out.println( p55.toNewHampshire() );
+                return false;
+            }
+            final Phylogeny p56 = factory
+                    .create( new StringBuffer( "((\"lcl|HPV32_L1.:1      s\":0.195593,\"lcl|HPV30_L1.1|;a\":0.114\n237):0.0359322,\"lcl|HPV56_L1.1|,d\":0.0727412,\"lcl|HPV66_L1.1:x\":0.0798012);" ),
+                             new NHXParser() )[ 0 ];
+            if ( !p56
+                    .toNewHampshire()
+                    .equals( "(('lcl|HPV32_L1.:1 s':0.195593,'lcl|HPV30_L1.1|;a':0.114237):0.0359322,'lcl|HPV56_L1.1|,d':0.0727412,'lcl|HPV66_L1.1:x':0.0798012);" ) ) {
+                System.out.println( p56.toNewHampshire() );
+                return false;
+            }
+            final Phylogeny p57 = factory
+                    .create( new StringBuffer( "((\"lcl|HPV32_L1.:1      s\":0.195593,\"lcl|HPV30_L1.1|;a\":0.114\n237):0.0359322,\"lcl|HPV56_L1.1|,d\":0.0727412,\"lcl|HPV66_L1.1:x\":0.0798012);" ),
+                             new NHXParser() )[ 0 ];
+            if ( !p57
+                    .toNewHampshire()
+                    .equals( "(('lcl|HPV32_L1.:1 s':0.195593,'lcl|HPV30_L1.1|;a':0.114237):0.0359322,'lcl|HPV56_L1.1|,d':0.0727412,'lcl|HPV66_L1.1:x':0.0798012);" ) ) {
+                System.out.println( p56.toNewHampshire() );
                 return false;
             }
         }
index 7492f1e..300de97 100644 (file)
@@ -80,7 +80,7 @@ public final class ForesterUtil {
     public final static String       OS_ARCH                          = System.getProperty( "os.arch" );
     public final static String       OS_NAME                          = System.getProperty( "os.name" );
     public final static String       OS_VERSION                       = System.getProperty( "os.version" );
-    public final static Pattern      PARANTHESESABLE_NH_CHARS_PATTERN = Pattern.compile( "[(),;\\s]" );
+    public final static Pattern      PARANTHESESABLE_NH_CHARS_PATTERN = Pattern.compile( "[(),;\\s:\\[\\]'\"]" );
     public final static double       ZERO_DIFF                        = 1.0E-9;
     public static final BigDecimal   NULL_BD                          = new BigDecimal( 0 );
     public static final NumberFormat FORMATTER_9;
@@ -958,18 +958,11 @@ public final class ForesterUtil {
         return s;
     }
 
-    final public static String replaceIllegalNhCharacters( final String nh ) {
-        if ( nh == null ) {
-            return "";
-        }
-        return nh.trim().replaceAll( "[\\[\\]:]+", "_" );
-    }
-
     final public static String replaceIllegalNhxCharacters( final String nhx ) {
         if ( nhx == null ) {
             return "";
         }
-        return nhx.trim().replaceAll( "[\\[\\](),:;\\s]+", "_" );
+        return nhx.trim().replaceAll( "[\\[\\]']+", "_" );
     }
 
     final public static double round( final double value, final int decimal_place ) {