From c956545c704f53df5c8711ede20e786641bfc7be Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Tue, 15 Apr 2014 01:54:34 +0000 Subject: [PATCH] inprogress --- .../org/forester/application/count_support.java | 3 +- .../org/forester/archaeopteryx/ArchaeopteryxE.java | 4 +- .../src/org/forester/archaeopteryx/MainFrame.java | 4 +- .../archaeopteryx/MainFrameApplication.java | 2 +- .../src/org/forester/io/parsers/nhx/NHXParser.java | 55 +++-- .../org/forester/io/parsers/util/ParserUtils.java | 112 +++++++--- .../org/forester/io/writers/PhylogenyWriter.java | 40 +--- .../java/src/org/forester/phylogeny/Phylogeny.java | 8 +- .../src/org/forester/phylogeny/PhylogenyNode.java | 14 +- forester/java/src/org/forester/test/Test.java | 233 +++++++++++++++----- .../java/src/org/forester/util/ForesterUtil.java | 11 +- 11 files changed, 323 insertions(+), 163 deletions(-) diff --git a/forester/java/src/org/forester/application/count_support.java b/forester/java/src/org/forester/application/count_support.java index b57f3c5..9be5f89 100644 --- a/forester/java/src/org/forester/application/count_support.java +++ b/forester/java/src/org/forester/application/count_support.java @@ -186,7 +186,6 @@ public class count_support { } else { w.toNewHampshire( evaluator_phylogenies_above_threshold, - true, branch_lengths_in_ev_out, evaluators_outfile, ";" + ForesterUtil.getLineSeparator() ); @@ -200,7 +199,7 @@ public class count_support { ";" + ForesterUtil.getLineSeparator() ); } else { - w.toNewHampshire( Arrays.asList( ev ), true, branch_lengths_in_ev_out, evaluators_outfile, ";" + w.toNewHampshire( Arrays.asList( ev ), branch_lengths_in_ev_out, evaluators_outfile, ";" + ForesterUtil.getLineSeparator() ); } } diff --git a/forester/java/src/org/forester/archaeopteryx/ArchaeopteryxE.java b/forester/java/src/org/forester/archaeopteryx/ArchaeopteryxE.java index 9e48b82..3954da0 100644 --- a/forester/java/src/org/forester/archaeopteryx/ArchaeopteryxE.java +++ b/forester/java/src/org/forester/archaeopteryx/ArchaeopteryxE.java @@ -1411,8 +1411,8 @@ public class ArchaeopteryxE extends JApplet implements ActionListener { if ( !ForesterUtil.isEmpty( getMainPanel().getCurrentPhylogeny().getName() ) ) { title = "\"" + getMainPanel().getCurrentPhylogeny().getName() + "\" in " + title; } - showTextFrame( getMainPanel().getCurrentPhylogeny() - .toNewHampshire( false, getOptions().getNhConversionSupportValueStyle() ), + showTextFrame( getMainPanel().getCurrentPhylogeny().toNewHampshire( getOptions() + .getNhConversionSupportValueStyle() ), title ); } } diff --git a/forester/java/src/org/forester/archaeopteryx/MainFrame.java b/forester/java/src/org/forester/archaeopteryx/MainFrame.java index b7dc624..4bac494 100644 --- a/forester/java/src/org/forester/archaeopteryx/MainFrame.java +++ b/forester/java/src/org/forester/archaeopteryx/MainFrame.java @@ -1407,8 +1407,8 @@ public abstract class MainFrame extends JFrame implements ActionListener { if ( !ForesterUtil.isEmpty( _mainpanel.getCurrentPhylogeny().getName() ) ) { title = "\"" + getMainPanel().getCurrentPhylogeny().getName() + "\" in " + title; } - showTextFrame( _mainpanel.getCurrentPhylogeny() - .toNewHampshire( false, getOptions().getNhConversionSupportValueStyle() ), + showTextFrame( _mainpanel.getCurrentPhylogeny().toNewHampshire( getOptions() + .getNhConversionSupportValueStyle() ), title ); } } diff --git a/forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java b/forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java index d0484a4..6e13820 100644 --- a/forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java +++ b/forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java @@ -2326,7 +2326,7 @@ public final class MainFrameApplication extends MainFrame { private boolean writeAsNewHampshire( final Phylogeny t, boolean exception, final File file ) { try { final PhylogenyWriter writer = new PhylogenyWriter(); - writer.toNewHampshire( t, false, true, getOptions().getNhConversionSupportValueStyle(), file ); + writer.toNewHampshire( t, true, getOptions().getNhConversionSupportValueStyle(), file ); } catch ( final Exception e ) { exception = true; diff --git a/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java b/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java index f54602f..2921230 100644 --- a/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java +++ b/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java @@ -70,6 +70,7 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse final static private byte STRING = 0; final static private byte STRING_BUFFER = 1; final static private byte STRING_BUILDER = 4; + final static private char BELL = 7; private boolean _allow_errors_in_distance_to_parent; private int _clade_level; private StringBuilder _current_anotation; @@ -296,7 +297,8 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse _current_phylogeny.getRoot(), getTaxonomyExtraction(), isReplaceUnderscores(), - isAllowErrorsInDistanceToParent() ); + isAllowErrorsInDistanceToParent(), + true ); if ( GUESS_IF_SUPPORT_VALUES ) { if ( isBranchLengthsLikeBootstrapValues( _current_phylogeny ) ) { moveBranchLengthsToConfidenceValues( _current_phylogeny ); @@ -321,7 +323,8 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse new_node, getTaxonomyExtraction(), isReplaceUnderscores(), - isAllowErrorsInDistanceToParent() ); + isAllowErrorsInDistanceToParent(), + true ); _current_phylogeny = new Phylogeny(); _current_phylogeny.setRoot( new_node ); return _current_phylogeny; @@ -418,7 +421,7 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse _in_double_quote = false; } else { - _current_anotation.append( c ); + _current_anotation.append( c != ':' ? c : BELL ); } } else if ( c == '"' ) { @@ -429,7 +432,7 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse _in_single_quote = false; } else { - _current_anotation.append( c ); + _current_anotation.append( c != ':' ? c : BELL ); } } else if ( c == 39 ) { @@ -505,7 +508,8 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse new_node, getTaxonomyExtraction(), isReplaceUnderscores(), - isAllowErrorsInDistanceToParent() ); + isAllowErrorsInDistanceToParent(), + true ); _current_anotation = new StringBuilder(); _current_node.addAsChild( new_node ); } @@ -514,7 +518,8 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse _current_node.getLastChildNode(), getTaxonomyExtraction(), isReplaceUnderscores(), - isAllowErrorsInDistanceToParent() ); + isAllowErrorsInDistanceToParent(), + true ); _current_anotation = new StringBuilder(); } if ( !_current_node.isRoot() ) { @@ -530,7 +535,8 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse new_node, getTaxonomyExtraction(), isReplaceUnderscores(), - isAllowErrorsInDistanceToParent() ); + isAllowErrorsInDistanceToParent(), + true ); if ( _current_node == null ) { throw new NHXFormatException( "format might not be NH or NHX" ); } @@ -541,7 +547,8 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse _current_node.getLastChildNode(), getTaxonomyExtraction(), isReplaceUnderscores(), - isAllowErrorsInDistanceToParent() ); + isAllowErrorsInDistanceToParent(), + true ); } _current_anotation = new StringBuilder(); _saw_closing_paren = false; @@ -583,7 +590,8 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse final PhylogenyNode node_to_annotate, final TAXONOMY_EXTRACTION taxonomy_extraction, final boolean replace_underscores, - final boolean allow_errors_in_distance_to_parent ) throws NHXFormatException, + final boolean allow_errors_in_distance_to_parent, + final boolean replace_bell ) throws NHXFormatException, PhyloXmlDataFormatException { if ( ( taxonomy_extraction != TAXONOMY_EXTRACTION.NO ) && replace_underscores ) { throw new IllegalArgumentException( "cannot extract taxonomies and replace under scores at the same time" ); @@ -592,6 +600,7 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse if ( replace_underscores ) { s = s.replaceAll( "_+", " " ); } + s = s.replaceAll( "\\s+", " " ).trim(); boolean is_nhx = false; final int ob = s.indexOf( "[" ); if ( ob > -1 ) { @@ -623,14 +632,30 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse final StringTokenizer t = new StringTokenizer( s, ":" ); if ( t.countTokens() > 0 ) { if ( !s.startsWith( ":" ) ) { - node_to_annotate.setName( t.nextToken() ); + if ( ( s.indexOf( BELL ) <= -1 ) || !replace_bell ) { + node_to_annotate.setName( t.nextToken() ); + } + else { + node_to_annotate.setName( t.nextToken().replace( BELL, ':' ) ); + } if ( !replace_underscores && ( !is_nhx && ( taxonomy_extraction != TAXONOMY_EXTRACTION.NO ) ) ) { ParserUtils.extractTaxonomyDataFromNodeName( node_to_annotate, taxonomy_extraction ); } } while ( t.hasMoreTokens() ) { s = t.nextToken(); - if ( s.startsWith( NHXtags.SPECIES_NAME ) ) { + if ( ( s.indexOf( BELL ) > -1 ) && replace_bell ) { + s = s.replace( BELL, ':' ); + } + if ( s.indexOf( '=' ) < 0 ) { + if ( ( node_to_annotate.getDistanceToParent() != PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ) + && !allow_errors_in_distance_to_parent ) { + throw new NHXFormatException( "error in NHX formatted data: more than one distance to parent:" + + "\"" + s + "\"" ); + } + node_to_annotate.setDistanceToParent( doubleValue( s, allow_errors_in_distance_to_parent ) ); + } + else if ( s.startsWith( NHXtags.SPECIES_NAME ) ) { if ( !node_to_annotate.getNodeData().isHasTaxonomy() ) { node_to_annotate.getNodeData().setTaxonomy( new Taxonomy() ); } @@ -672,14 +697,6 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse } node_to_annotate.getNodeData().getSequence().setName( s.substring( 3 ) ); } - else if ( s.indexOf( '=' ) < 0 ) { - if ( ( node_to_annotate.getDistanceToParent() != PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ) - && !allow_errors_in_distance_to_parent ) { - throw new NHXFormatException( "error in NHX formatted data: more than one distance to parent:" - + "\"" + s + "\"" ); - } - node_to_annotate.setDistanceToParent( doubleValue( s, allow_errors_in_distance_to_parent ) ); - } } // while ( t.hasMoreTokens() ) } } diff --git a/forester/java/src/org/forester/io/parsers/util/ParserUtils.java b/forester/java/src/org/forester/io/parsers/util/ParserUtils.java index b37dbeb..d904a81 100644 --- a/forester/java/src/org/forester/io/parsers/util/ParserUtils.java +++ b/forester/java/src/org/forester/io/parsers/util/ParserUtils.java @@ -55,29 +55,43 @@ import org.forester.util.ForesterUtil; public final class ParserUtils { - final public static String TAX_CODE = "(?:[A-Z9][A-Z]{2}[A-Z0-9]{2})|RAT|PIG|PEA"; - final public static Pattern TAXOMONY_CODE_PATTERN_A = Pattern.compile( "(?:\\b|_)(" + TAX_CODE + ")\\b" ); - final public static Pattern TAXOMONY_CODE_PATTERN_BRACKETED = Pattern.compile( "\\[(" + TAX_CODE + ")\\]" ); - final public static Pattern TAXOMONY_CODE_PATTERN_PFR = Pattern.compile( "(?:\\b|_)[a-zA-Z0-9]{3,}_(" - + TAX_CODE + ")\\b" ); - final public static Pattern TAXOMONY_SN_PATTERN = Pattern - .compile( "(?:\\b|_)[a-zA-Z0-9]{3,}_([A-Z][a-z]{2,30}_[a-z]{3,30}(?:_[a-z][a-z0-9_]+)?)\\b" ); - final public static Pattern TAXOMONY_SN_PATTERN_SN = Pattern - .compile( "\\b([A-Z][a-z]{2,30}[_ ][a-z]{3,30}(?:[_ ][a-z]{3,30})?)(?:\\b|_)?" ); - final public static Pattern TAXOMONY_SN_PATTERN_STRAIN_1 = Pattern - .compile( "\\b([A-Z][a-z]{2,30}[_ ][a-z]{3,30}[_ ](?:str|subsp|var)[a-z]{0,5}\\.?[_ ]\\S{1,60})(?:\\b|_)" ); - final public static Pattern TAXOMONY_SN_PATTERN_STRAIN_2 = Pattern - .compile( "\\b([A-Z][a-z]{2,30}[_ ][a-z]{3,30}[_ ]\\((?:str|subsp|var)[a-z]{0,5}\\.?[_ ]\\S{1,60}\\))(?:\\b|_)?" ); - final public static Pattern TAXOMONY_SN_PATTERN_SP = Pattern - .compile( "\\b([A-Z][a-z]{2,30}[_ ]sp\\.?)(?:\\b|_)?" ); - - final public static Pattern TAXOMONY_SN_PATTERN_GENUS = Pattern.compile( "([A-Z][a-z]{2,30})" ); - final private static Pattern TAXOMONY_CODE_PATTERN_PFS = Pattern.compile( "(?:\\b|_)[A-Z0-9]{4,}_(" - + TAX_CODE + ")/\\d+-\\d+\\b" ); - final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_PFR = Pattern - .compile( "(?:\\b|_)[A-Z0-9]{1,}_(\\d{1,7})\\b" ); - final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_PFS = Pattern - .compile( "(?:\\b|_)[A-Z0-9]{4,}_(\\d{1,7})/\\d+-\\d+\\b" ); + final public static String TAX_CODE = "(?:[A-Z9][A-Z]{2}[A-Z0-9]{2})|RAT|PIG|PEA"; + final private static String SN_BN = "[A-Z][a-z]{2,30}[_ ][a-z]{3,30}"; + final public static Pattern TAXOMONY_CODE_PATTERN_A = Pattern.compile( "(?:\\b|_)(" + TAX_CODE + + ")\\b" ); + final public static Pattern TAXOMONY_CODE_PATTERN_BRACKETED = Pattern.compile( "\\[(" + TAX_CODE + ")\\]" ); + final public static Pattern TAXOMONY_CODE_PATTERN_PFR = Pattern.compile( "(?:\\b|_)[a-zA-Z0-9]{3,}_(" + + TAX_CODE + ")\\b" ); + // final public static Pattern TAXOMONY_SN_PATTERN = Pattern + // .compile( "(?:\\b|_)[a-zA-Z0-9]{3,}_([A-Z][a-z]{2,30}_[a-z]{3,30}(?:_[a-z][a-z0-9_]+)?)\\b" ); + final public static Pattern TAXOMONY_SN_PATTERN_SN = Pattern.compile( "(?:\\b|_)(" + SN_BN + + ")(?:(\\s*$)|([_ ][a-z]*[A-Z0-9]))" ); + final public static Pattern TAXOMONY_SN_PATTERN_SNS = Pattern.compile( "(?:\\b|_)(" + SN_BN + + "[_ ][a-z]{3,30}" + + ")[_ ][a-z]*[A-Z0-9]" ); + final public static Pattern TAXOMONY_SN_PATTERN_SNS2 = Pattern.compile( "[A-Z0-9][a-z]*[_ ](" + SN_BN + + "[_ ][a-z]{3,30}" + ")\\s*$" ); + final public static Pattern TAXOMONY_SN_PATTERN_STRAIN_1 = Pattern + .compile( "(?:\\b|_)(" + + SN_BN + + "[_ ](?:str|subsp|var)[a-z]{0,5}\\.?[_ ]\\S{1,60})(?:\\b|_)" ); + final public static Pattern TAXOMONY_SN_PATTERN_STRAIN_2 = Pattern + .compile( "(?:\\b|_)(" + + SN_BN + + "[_ ]\\((?:str|subsp|var)[a-z]{0,5}\\.?[_ ]\\S{1,60}\\))" ); + final public static Pattern TAXOMONY_SN_PATTERN_STRAIN_SUBSTRAIN = Pattern + .compile( "(?:\\b|_)(" + + SN_BN + + "[_ ]str[a-z]{0,3}\\.?[_ ]\\S{1,60}[_ ]substr[a-z]{0,3}\\.?[_ ]\\S{1,60})(?:\\b|_)" ); + final public static Pattern TAXOMONY_SN_PATTERN_SP = Pattern + .compile( "(?:\\b|_)([A-Z][a-z]{2,30}[_ ]sp\\.?)(?:\\b|_)?" ); + final public static Pattern TAXOMONY_SN_PATTERN_GENUS = Pattern.compile( "([A-Z][a-z]{2,30})" ); + final private static Pattern TAXOMONY_CODE_PATTERN_PFS = Pattern.compile( "(?:\\b|_)[A-Z0-9]{4,}_(" + + TAX_CODE + ")/\\d+-\\d+\\b" ); + final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_PFR = Pattern + .compile( "(?:\\b|_)[A-Z0-9]{1,}_(\\d{1,7})\\b" ); + final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_PFS = Pattern + .compile( "(?:\\b|_)[A-Z0-9]{4,}_(\\d{1,7})/\\d+-\\d+\\b" ); final public static PhylogenyParser createParserDependingFileContents( final File file, final boolean phyloxml_validate_against_xsd ) @@ -204,26 +218,62 @@ public final class ParserUtils { } public final static String extractScientificNameFromNodeName( final String name ) { - final Matcher m = TAXOMONY_SN_PATTERN.matcher( name ); - if ( m.find() ) { - return m.group( 1 ).replace( '_', ' ' ); + // final Matcher m = TAXOMONY_SN_PATTERN.matcher( name ); + // if ( m.find() ) { + // return m.group( 1 ).replace( '_', ' ' ); + // } + final Matcher m_ss = TAXOMONY_SN_PATTERN_STRAIN_SUBSTRAIN.matcher( name ); + if ( m_ss.find() ) { + String s = m_ss.group( 1 ).replace( '_', ' ' ); + if ( s.indexOf( " str " ) > 4 ) { + s = s.replaceFirst( " str ", " str. " ); + } + if ( s.indexOf( " substr " ) > 4 ) { + s = s.replaceFirst( " substr ", " substr. " ); + } + return s; } final Matcher m_str1 = TAXOMONY_SN_PATTERN_STRAIN_1.matcher( name ); if ( m_str1.find() ) { - return m_str1.group( 1 ).replace( '_', ' ' ); + String s = m_str1.group( 1 ).replace( '_', ' ' ); + if ( s.indexOf( " str " ) > 4 ) { + s = s.replaceFirst( " str ", " str. " ); + } + else if ( s.indexOf( " subsp " ) > 4 ) { + s = s.replaceFirst( " subsp ", " subsp. " ); + } + else if ( s.indexOf( " var " ) > 4 ) { + s = s.replaceFirst( " var ", " var. " ); + } + return s; } final Matcher m_str2 = TAXOMONY_SN_PATTERN_STRAIN_2.matcher( name ); if ( m_str2.find() ) { - return m_str2.group( 1 ).replace( '_', ' ' ); + String s = m_str2.group( 1 ).replace( '_', ' ' ); + if ( s.indexOf( " (str " ) > 4 ) { + s = s.replaceFirst( " \\(str ", " (str. " ); + } + else if ( s.indexOf( " (subsp " ) > 4 ) { + s = s.replaceFirst( " \\(subsp ", " (subsp. " ); + } + else if ( s.indexOf( " (var " ) > 4 ) { + s = s.replaceFirst( " \\(var ", " (var. " ); + } + return s; + } + final Matcher m_sns = TAXOMONY_SN_PATTERN_SNS.matcher( name ); + if ( m_sns.find() ) { + return m_sns.group( 1 ).replace( '_', ' ' ); + } + final Matcher m_sns2 = TAXOMONY_SN_PATTERN_SNS2.matcher( name ); + if ( m_sns2.find() ) { + return m_sns2.group( 1 ).replace( '_', ' ' ); } final Matcher m_sn = TAXOMONY_SN_PATTERN_SN.matcher( name ); - if ( m_sn.find() ) { return m_sn.group( 1 ).replace( '_', ' ' ); } - final Matcher m_sp = TAXOMONY_SN_PATTERN_SP.matcher( name ); - if ( m_sp.find() ) { return m_sp.group( 1 ).replace( '_', ' ' ); } diff --git a/forester/java/src/org/forester/io/writers/PhylogenyWriter.java b/forester/java/src/org/forester/io/writers/PhylogenyWriter.java index e4b7dc7..6d490c5 100644 --- a/forester/java/src/org/forester/io/writers/PhylogenyWriter.java +++ b/forester/java/src/org/forester/io/writers/PhylogenyWriter.java @@ -72,7 +72,6 @@ public final class PhylogenyWriter { private PhylogenyNode _root; private boolean _has_next; private Stack _stack; - private boolean _simple_nh; private boolean _nh_write_distance_to_parent; NH_CONVERSION_SUPPORT_VALUE_STYLE _nh_conversion_support_style; private boolean _indent_phyloxml; @@ -202,10 +201,6 @@ public final class PhylogenyWriter { return _saw_comma; } - private boolean isSimpleNH() { - return _simple_nh; - } - private boolean isWriteDistanceToParentInNH() { return _nh_write_distance_to_parent; } @@ -307,10 +302,6 @@ public final class PhylogenyWriter { _saw_comma = saw_comma; } - private void setSimpleNH( final boolean simple_nh ) { - _simple_nh = simple_nh; - } - private void setStack( final Stack stack ) { _stack = stack; } @@ -324,62 +315,53 @@ public final class PhylogenyWriter { } public void toNewHampshire( final List trees, - final boolean simple_nh, final boolean write_distance_to_parent, final File out_file, final String separator ) throws IOException { final Iterator it = trees.iterator(); final StringBuffer sb = new StringBuffer(); while ( it.hasNext() ) { - sb.append( toNewHampshire( it.next(), simple_nh, write_distance_to_parent ) ); + sb.append( toNewHampshire( it.next(), write_distance_to_parent ) ); sb.append( separator ); } writeToFile( sb, out_file ); } public StringBuffer toNewHampshire( final Phylogeny tree, - final boolean simple_nh, final boolean nh_write_distance_to_parent, final NH_CONVERSION_SUPPORT_VALUE_STYLE svs ) throws IOException { setOutputFormt( FORMAT.NH ); setNhConversionSupportStyle( svs ); - setSimpleNH( simple_nh ); setWriteDistanceToParentInNH( nh_write_distance_to_parent ); return getOutput( tree ); } - public StringBuffer toNewHampshire( final Phylogeny tree, - final boolean simple_nh, - final boolean nh_write_distance_to_parent ) throws IOException { + public StringBuffer toNewHampshire( final Phylogeny tree, final boolean nh_write_distance_to_parent ) + throws IOException { setOutputFormt( FORMAT.NH ); - setSimpleNH( simple_nh ); setWriteDistanceToParentInNH( nh_write_distance_to_parent ); return getOutput( tree ); } - public void toNewHampshire( final Phylogeny tree, - final boolean simple_nh, - final boolean write_distance_to_parent, - final File out_file ) throws IOException { - writeToFile( toNewHampshire( tree, simple_nh, write_distance_to_parent ), out_file ); + public void toNewHampshire( final Phylogeny tree, final boolean write_distance_to_parent, final File out_file ) + throws IOException { + writeToFile( toNewHampshire( tree, write_distance_to_parent ), out_file ); } public void toNewHampshire( final Phylogeny tree, - final boolean simple_nh, final boolean write_distance_to_parent, final NH_CONVERSION_SUPPORT_VALUE_STYLE svs, final File out_file ) throws IOException { - writeToFile( toNewHampshire( tree, simple_nh, write_distance_to_parent, svs ), out_file ); + writeToFile( toNewHampshire( tree, write_distance_to_parent, svs ), out_file ); } public void toNewHampshire( final Phylogeny[] trees, - final boolean simple_nh, final boolean write_distance_to_parent, final File out_file, final String separator ) throws IOException { final StringBuffer sb = new StringBuffer(); for( final Phylogeny element : trees ) { - sb.append( toNewHampshire( element, simple_nh, write_distance_to_parent ) ); + sb.append( toNewHampshire( element, write_distance_to_parent ) ); sb.append( separator ); } writeToFile( sb, out_file ); @@ -575,9 +557,7 @@ public final class PhylogenyWriter { getBuffer().append( node.toNewHampshireX() ); } else if ( getOutputFormt() == FORMAT.NH ) { - getBuffer().append( node.toNewHampshire( isSimpleNH(), - isWriteDistanceToParentInNH(), - getNhConversionSupportStyle() ) ); + getBuffer().append( node.toNewHampshire( isWriteDistanceToParentInNH(), getNhConversionSupportStyle() ) ); } } @@ -754,7 +734,7 @@ public final class PhylogenyWriter { else { writer.write( "[&U]" ); } - writer.write( phylogeny.toNewHampshire( false, svs ) ); + writer.write( phylogeny.toNewHampshire( svs ) ); writer.write( ForesterUtil.LINE_SEPARATOR ); i++; } diff --git a/forester/java/src/org/forester/phylogeny/Phylogeny.java b/forester/java/src/org/forester/phylogeny/Phylogeny.java index 3eac2bd..115e885 100644 --- a/forester/java/src/org/forester/phylogeny/Phylogeny.java +++ b/forester/java/src/org/forester/phylogeny/Phylogeny.java @@ -1098,14 +1098,12 @@ public class Phylogeny { } public String toNewHampshire() { - return toNewHampshire( false, NH_CONVERSION_SUPPORT_VALUE_STYLE.NONE ); + return toNewHampshire( NH_CONVERSION_SUPPORT_VALUE_STYLE.NONE ); } - public String toNewHampshire( final boolean simple_nh, - final NH_CONVERSION_SUPPORT_VALUE_STYLE nh_conversion_support_style ) { + public String toNewHampshire( final NH_CONVERSION_SUPPORT_VALUE_STYLE nh_conversion_support_style ) { try { - return new PhylogenyWriter().toNewHampshire( this, simple_nh, true, nh_conversion_support_style ) - .toString(); + return new PhylogenyWriter().toNewHampshire( this, true, nh_conversion_support_style ).toString(); } catch ( final IOException e ) { throw new Error( "this should not have happend: " + e.getMessage() ); diff --git a/forester/java/src/org/forester/phylogeny/PhylogenyNode.java b/forester/java/src/org/forester/phylogeny/PhylogenyNode.java index d17fd41..6a4876b 100644 --- a/forester/java/src/org/forester/phylogeny/PhylogenyNode.java +++ b/forester/java/src/org/forester/phylogeny/PhylogenyNode.java @@ -86,7 +86,7 @@ public final class PhylogenyNode implements Comparable { private PhylogenyNode( final String nhx, final NHXParser.TAXONOMY_EXTRACTION taxonomy_extraction, final boolean replace_underscores ) throws NHXFormatException, PhyloXmlDataFormatException { - NHXParser.parseNHX( nhx, this, taxonomy_extraction, replace_underscores, false ); + NHXParser.parseNHX( nhx, this, taxonomy_extraction, replace_underscores, false, false ); setId( PhylogenyNode.getNodeCount() ); PhylogenyNode.increaseNodeCount(); setSumExtNodes( 1 ); // For ext node, this number is 1 (not 0!!). @@ -887,8 +887,7 @@ public final class PhylogenyNode implements Comparable { // --------------------------------------------------------- // Writing of Nodes to Strings // --------------------------------------------------------- - final public String toNewHampshire( final boolean simple_nh, - final boolean write_distance_to_parent, + final public String toNewHampshire( final boolean write_distance_to_parent, final NH_CONVERSION_SUPPORT_VALUE_STYLE svs ) { final StringBuilder sb = new StringBuilder(); String data = ""; @@ -922,11 +921,9 @@ public final class PhylogenyNode implements Comparable { data = getNodeData().getSequence().getName(); } } + data = data.trim(); if ( data.length() > 0 ) { - data = ForesterUtil.replaceIllegalNhCharacters( data ); - if ( simple_nh && ( data.length() > 10 ) ) { - data = data.substring( 0, 11 ); - } + data = data.replaceAll( "'", "_" ); if ( ForesterUtil.isContainsParanthesesableNhCharacter( data ) ) { sb.append( '\'' ); sb.append( data ); @@ -960,7 +957,8 @@ public final class PhylogenyNode implements Comparable { final StringBuffer sb = new StringBuffer(); final StringBuffer s_nhx = new StringBuffer(); if ( !ForesterUtil.isEmpty( getName() ) ) { - final String name = ForesterUtil.replaceIllegalNhCharacters( getName() ); + //final String name = ForesterUtil.replaceIllegalNhCharacters( getName() ); + final String name = getName().trim(); if ( ForesterUtil.isContainsParanthesesableNhCharacter( name ) ) { sb.append( '\'' ); sb.append( name ); diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index 78fe31b..ec44a86 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -312,7 +312,6 @@ public final class Test { System.out.println( "failed." ); failed++; } - System.exit( -1 ); System.out.print( "Uri for Aptx web sequence accession: " ); if ( Test.testCreateUriForSeqWeb() ) { System.out.println( "OK." ); @@ -385,6 +384,7 @@ public final class Test { System.out.println( "failed." ); failed++; } + System.exit( 0 ); System.out.print( "Nexus characters parsing: " ); if ( Test.testNexusCharactersParsing() ) { System.out.println( "OK." ); @@ -3468,7 +3468,7 @@ public final class Test { if ( t4.getNumberOfExternalNodes() != 5 ) { return false; } - String s = w.toNewHampshire( t4, false, true ).toString(); + String s = w.toNewHampshire( t4, true ).toString(); if ( !s.equals( "((A,(B11,B12)),(C,D));" ) ) { return false; } @@ -3489,7 +3489,7 @@ public final class Test { if ( !n.getName().equals( "D" ) ) { return false; } - s = w.toNewHampshire( t4, false, true ).toString(); + s = w.toNewHampshire( t4, true ).toString(); if ( !s.equals( "((A,B12),D);" ) ) { return false; } @@ -3498,7 +3498,7 @@ public final class Test { if ( t5.getNumberOfExternalNodes() != 5 ) { return false; } - s = w.toNewHampshire( t5, false, true ).toString(); + s = w.toNewHampshire( t5, true ).toString(); if ( !s.equals( "(((B11,B12),B2),(C,D));" ) ) { return false; } @@ -3507,7 +3507,7 @@ public final class Test { if ( t6.getNumberOfExternalNodes() != 5 ) { return false; } - s = w.toNewHampshire( t6, false, false ).toString(); + s = w.toNewHampshire( t6, false ).toString(); if ( !s.equals( "((A,(B12,B2)),(C,D));" ) ) { return false; } @@ -3516,7 +3516,7 @@ public final class Test { if ( t7.getNumberOfExternalNodes() != 5 ) { return false; } - s = w.toNewHampshire( t7, false, true ).toString(); + s = w.toNewHampshire( t7, true ).toString(); if ( !s.equals( "((A,(B11,B2)),(C,D));" ) ) { return false; } @@ -3525,7 +3525,7 @@ public final class Test { if ( t8.getNumberOfExternalNodes() != 5 ) { return false; } - s = w.toNewHampshire( t8, false, false ).toString(); + s = w.toNewHampshire( t8, false ).toString(); if ( !s.equals( "((A,(B11,B12)),(C,D));" ) ) { return false; } @@ -3534,7 +3534,7 @@ public final class Test { if ( t9.getNumberOfExternalNodes() != 5 ) { return false; } - s = w.toNewHampshire( t9, false, true ).toString(); + s = w.toNewHampshire( t9, true ).toString(); if ( !s.equals( "((A,((B11,B12),B2)),D);" ) ) { return false; } @@ -3543,7 +3543,7 @@ public final class Test { if ( t10.getNumberOfExternalNodes() != 5 ) { return false; } - s = w.toNewHampshire( t10, false, true ).toString(); + s = w.toNewHampshire( t10, true ).toString(); if ( !s.equals( "((A,((B11,B12),B2)),C);" ) ) { return false; } @@ -3552,7 +3552,7 @@ public final class Test { if ( t11.getNumberOfExternalNodes() != 2 ) { return false; } - s = w.toNewHampshire( t11, false, true ).toString(); + s = w.toNewHampshire( t11, true ).toString(); if ( !s.equals( "(B,C);" ) ) { return false; } @@ -3560,7 +3560,7 @@ public final class Test { if ( t11.getNumberOfExternalNodes() != 1 ) { return false; } - s = w.toNewHampshire( t11, false, false ).toString(); + s = w.toNewHampshire( t11, false ).toString(); if ( !s.equals( "B;" ) ) { return false; } @@ -3569,7 +3569,7 @@ public final class Test { if ( t12.getNumberOfExternalNodes() != 8 ) { return false; } - s = w.toNewHampshire( t12, false, true ).toString(); + s = w.toNewHampshire( t12, true ).toString(); if ( !s.equals( "((A1,A2,A3),(B1,B3),(C1,C2,C3));" ) ) { return false; } @@ -3577,7 +3577,7 @@ public final class Test { if ( t12.getNumberOfExternalNodes() != 7 ) { return false; } - s = w.toNewHampshire( t12, false, true ).toString(); + s = w.toNewHampshire( t12, true ).toString(); if ( !s.equals( "((A1,A2,A3),B1,(C1,C2,C3));" ) ) { return false; } @@ -3585,7 +3585,7 @@ public final class Test { if ( t12.getNumberOfExternalNodes() != 6 ) { return false; } - s = w.toNewHampshire( t12, false, true ).toString(); + s = w.toNewHampshire( t12, true ).toString(); if ( !s.equals( "((A1,A2,A3),B1,(C1,C2));" ) ) { return false; } @@ -3593,7 +3593,7 @@ public final class Test { if ( t12.getNumberOfExternalNodes() != 5 ) { return false; } - s = w.toNewHampshire( t12, false, true ).toString(); + s = w.toNewHampshire( t12, true ).toString(); if ( !s.equals( "((A2,A3),B1,(C1,C2));" ) ) { return false; } @@ -3601,7 +3601,7 @@ public final class Test { if ( t12.getNumberOfExternalNodes() != 4 ) { return false; } - s = w.toNewHampshire( t12, false, true ).toString(); + s = w.toNewHampshire( t12, true ).toString(); if ( !s.equals( "((A2,A3),(C1,C2));" ) ) { return false; } @@ -3609,7 +3609,7 @@ public final class Test { if ( t12.getNumberOfExternalNodes() != 3 ) { return false; } - s = w.toNewHampshire( t12, false, true ).toString(); + s = w.toNewHampshire( t12, true ).toString(); if ( !s.equals( "(A2,(C1,C2));" ) ) { return false; } @@ -3617,7 +3617,7 @@ public final class Test { if ( t12.getNumberOfExternalNodes() != 2 ) { return false; } - s = w.toNewHampshire( t12, false, true ).toString(); + s = w.toNewHampshire( t12, true ).toString(); if ( !s.equals( "(C1,C2);" ) ) { return false; } @@ -3626,7 +3626,7 @@ public final class Test { if ( t13.getNumberOfExternalNodes() != 4 ) { return false; } - s = w.toNewHampshire( t13, false, true ).toString(); + s = w.toNewHampshire( t13, true ).toString(); if ( !s.equals( "(A,B,C,E:5.0);" ) ) { return false; } @@ -3635,7 +3635,7 @@ public final class Test { if ( t14.getNumberOfExternalNodes() != 5 ) { return false; } - s = w.toNewHampshire( t14, false, true ).toString(); + s = w.toNewHampshire( t14, true ).toString(); if ( !s.equals( "((A,B,C,D:1.1),F);" ) ) { return false; } @@ -4177,25 +4177,95 @@ public final class Test { if ( !ParserUtils.extractScientificNameFromNodeName( "BCDO2_Mus_musculus" ).equals( "Mus musculus" ) ) { return false; } - if ( !ParserUtils.extractScientificNameFromNodeName( "BCDO2_Mus_musculus_musculus" ) + if ( !ParserUtils.extractScientificNameFromNodeName( "BCDO2 Mus musculus" ).equals( "Mus musculus" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_BCDO2" ).equals( "Mus musculus" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus musculus musculus BCDO2" ) + .equals( "Mus musculus musculus" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_musculus_BCDO2" ) + .equals( "Mus musculus musculus" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "BCDO2 Mus musculus musculus" ) + .equals( "Mus musculus musculus" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Bcl Mus musculus musculus" ) + .equals( "Mus musculus musculus" ) ) { + return false; + } + if ( ParserUtils.extractScientificNameFromNodeName( "vcl Mus musculus musculus" ) != null ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "could_be_anything_Mus_musculus_musculus_BCDO2" ) .equals( "Mus musculus musculus" ) ) { return false; } - if ( !ParserUtils.extractScientificNameFromNodeName( "BCDO2_Mus_musculus_musculus-12" ) + if ( !ParserUtils.extractScientificNameFromNodeName( "could_be_anything_Mus_musculus_musculus_Musculus" ) .equals( "Mus musculus musculus" ) ) { return false; } - if ( !ParserUtils.extractScientificNameFromNodeName( " -XS12_Mus_musculus-12" ).equals( "Mus musculus" ) ) { + if ( ParserUtils.extractScientificNameFromNodeName( "could_be_anything_Mus_musculus_musculus_musculus" ) != null ) { + return false; + } + if ( ParserUtils.extractScientificNameFromNodeName( "musculus" ) != null ) { + return false; + } + if ( ParserUtils.extractScientificNameFromNodeName( "mus_musculus" ) != null ) { + return false; + } + if ( ParserUtils.extractScientificNameFromNodeName( "mus_musculus_musculus" ) != null ) { return false; } - if ( !ParserUtils.extractScientificNameFromNodeName( " -1234_Mus_musculus-12 affrre e" ) + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_musculus_1" ) + .equals( "Mus musculus musculus" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_1" ).equals( "Mus musculus" ) ) { + return false; + } + if ( ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_bcl" ) != null ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_BCL" ).equals( "Mus musculus" ) ) { + return false; + } + if ( ParserUtils.extractScientificNameFromNodeName( "Mus musculus bcl" ) != null ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus musculus BCL" ).equals( "Mus musculus" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus musculus xBCL" ).equals( "Mus musculus" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus musculus x1" ).equals( "Mus musculus" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( " -XS12_Mus_musculus_12" ).equals( "Mus musculus" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( " -1234_Mus_musculus_12 affrre e" ) + .equals( "Mus musculus" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( " -1234_Mus_musculus_12_affrre_e" ) .equals( "Mus musculus" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus" ).equals( "Mus musculus" ) ) { return false; } - if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_musculus" ) + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_musculus_2bcl2" ) + .equals( "Mus musculus musculus" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_musculus_2bcl2" ) .equals( "Mus musculus musculus" ) ) { return false; } @@ -4206,7 +4276,8 @@ public final class Test { if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_123" ).equals( "Mus musculus" ) ) { return false; } - if ( !ParserUtils.extractScientificNameFromNodeName( "Pilostyles mexicana Mexico Breedlove 27233" ).equals( "Pilostyles mexicana" ) ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Pilostyles mexicana Mexico Breedlove 27233" ) + .equals( "Pilostyles mexicana" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia_coli_strain_K12/DH10B" ) @@ -4214,7 +4285,7 @@ public final class Test { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia_coli_str_K12/DH10B" ) - .equals( "Escherichia coli str K12/DH10B" ) ) { + .equals( "Escherichia coli str. K12/DH10B" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli str. K12/DH10B" ) @@ -4222,7 +4293,7 @@ public final class Test { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Arabidopsis_lyrata_subsp_lyrata" ) - .equals( "Arabidopsis lyrata subsp lyrata" ) ) { + .equals( "Arabidopsis lyrata subsp. lyrata" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Arabidopsis lyrata subsp. lyrata" ) @@ -4238,7 +4309,7 @@ public final class Test { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Arabidopsis lyrata subsp lyrata bcl2" ) - .equals( "Arabidopsis lyrata subsp lyrata" ) ) { + .equals( "Arabidopsis lyrata subsp. lyrata" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Arabidopsis lyrata subspecies lyrata bcl2" ) @@ -4261,35 +4332,63 @@ public final class Test { .equals( "Escherichia coli (str. K12)" ) ) { return false; } + if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli (str K12)" ) + .equals( "Escherichia coli (str. K12)" ) ) { + return false; + } if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli (str. K12) bcl2" ) .equals( "Escherichia coli (str. K12)" ) ) { return false; } - if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp." ) - .equals( "Macrocera sp." ) ) { - - return false; + if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli (var K12) bcl2" ) + .equals( "Escherichia coli (var. K12)" ) ) { + return false; } - if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp. 123" ) - .equals( "Macrocera sp." ) ) { - + if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli str. K-12 substr. MG1655star" ) + .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) { return false; } - if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp. K12" ) - .equals( "Macrocera sp." ) ) { - - + if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli str K-12 substr MG1655star" ) + .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) { + return false; + } + if ( !ParserUtils + .extractScientificNameFromNodeName( "could be anything Escherichia coli str K-12 substr MG1655star" ) + .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli str K-12 substr MG1655star gene1" ) + .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) { + return false; + } + if ( !ParserUtils + .extractScientificNameFromNodeName( "could be anything Escherichia coli str K-12 substr MG1655star GENE1" ) + .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia_coli_str_K-12_substr_MG1655star" ) + .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia_coli_str_K-12_substr_MG1655star" ) + .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) { + return false; + } + // + if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp." ).equals( "Macrocera sp." ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp. 123" ).equals( "Macrocera sp." ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp. K12" ).equals( "Macrocera sp." ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "something Macrocera sp. K12" ) .equals( "Macrocera sp." ) ) { - - return false; - } if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp" ) - .equals( "Macrocera sp" ) ) { - - + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp" ).equals( "Macrocera sp" ) ) { return false; } } @@ -7695,10 +7794,10 @@ public final class Test { nhxp.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.NO ); nhxp.setReplaceUnderscores( true ); final Phylogeny uc0 = factory.create( "(A__A_,_B_B)", nhxp )[ 0 ]; - if ( !uc0.getRoot().getChildNode( 0 ).getName().equals( "A A " ) ) { + if ( !uc0.getRoot().getChildNode( 0 ).getName().equals( "A A" ) ) { return false; } - if ( !uc0.getRoot().getChildNode( 1 ).getName().equals( " B B" ) ) { + if ( !uc0.getRoot().getChildNode( 1 ).getName().equals( "B B" ) ) { return false; } final Phylogeny p1b = factory @@ -7989,14 +8088,14 @@ public final class Test { if ( p50.getNode( "A" ) == null ) { return false; } - if ( !p50.toNewHampshire( false, NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS ) + if ( !p50.toNewHampshire( NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS ) .equals( "((A,B)ab:2.0[88],C);" ) ) { return false; } - if ( !p50.toNewHampshire( false, NH_CONVERSION_SUPPORT_VALUE_STYLE.NONE ).equals( "((A,B)ab:2.0,C);" ) ) { + if ( !p50.toNewHampshire( NH_CONVERSION_SUPPORT_VALUE_STYLE.NONE ).equals( "((A,B)ab:2.0,C);" ) ) { return false; } - if ( !p50.toNewHampshire( false, NH_CONVERSION_SUPPORT_VALUE_STYLE.AS_INTERNAL_NODE_NAMES ) + if ( !p50.toNewHampshire( NH_CONVERSION_SUPPORT_VALUE_STYLE.AS_INTERNAL_NODE_NAMES ) .equals( "((A,B)88:2.0,C);" ) ) { return false; } @@ -8014,13 +8113,39 @@ public final class Test { if ( p53.getNode( "B (x (a' ,b) f(x);" ) == null ) { return false; } - // final Phylogeny p54 = factory.create( new StringBuffer( "((A,B):[88],C)" ), new NHXParser() )[ 0 ]; if ( p54.getNode( "A" ) == null ) { return false; } - if ( !p54.toNewHampshire( false, NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS ) - .equals( "((A,B)[88],C);" ) ) { + if ( !p54.toNewHampshire( NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS ).equals( "((A,B)[88],C);" ) ) { + return false; + } + // + final Phylogeny p55 = factory + .create( new StringBuffer( "((\"lcl|HPV32_L1.:1 s\":0.195593,\"lcl|HPV30_L1.1|;a\":0.114237):0.0359322,\"lcl|HPV56_L1.1|,d\":0.0727412,\"lcl|HPV66_L1.1x\":0.0798012);" ), + new NHXParser() )[ 0 ]; + if ( !p55 + .toNewHampshire() + .equals( "(('lcl|HPV32_L1.:1 s':0.195593,'lcl|HPV30_L1.1|;a':0.114237):0.0359322,'lcl|HPV56_L1.1|,d':0.0727412,lcl|HPV66_L1.1x:0.0798012);" ) ) { + System.out.println( p55.toNewHampshire() ); + return false; + } + final Phylogeny p56 = factory + .create( new StringBuffer( "((\"lcl|HPV32_L1.:1 s\":0.195593,\"lcl|HPV30_L1.1|;a\":0.114\n237):0.0359322,\"lcl|HPV56_L1.1|,d\":0.0727412,\"lcl|HPV66_L1.1:x\":0.0798012);" ), + new NHXParser() )[ 0 ]; + if ( !p56 + .toNewHampshire() + .equals( "(('lcl|HPV32_L1.:1 s':0.195593,'lcl|HPV30_L1.1|;a':0.114237):0.0359322,'lcl|HPV56_L1.1|,d':0.0727412,'lcl|HPV66_L1.1:x':0.0798012);" ) ) { + System.out.println( p56.toNewHampshire() ); + return false; + } + final Phylogeny p57 = factory + .create( new StringBuffer( "((\"lcl|HPV32_L1.:1 s\":0.195593,\"lcl|HPV30_L1.1|;a\":0.114\n237):0.0359322,\"lcl|HPV56_L1.1|,d\":0.0727412,\"lcl|HPV66_L1.1:x\":0.0798012);" ), + new NHXParser() )[ 0 ]; + if ( !p57 + .toNewHampshire() + .equals( "(('lcl|HPV32_L1.:1 s':0.195593,'lcl|HPV30_L1.1|;a':0.114237):0.0359322,'lcl|HPV56_L1.1|,d':0.0727412,'lcl|HPV66_L1.1:x':0.0798012);" ) ) { + System.out.println( p56.toNewHampshire() ); return false; } } diff --git a/forester/java/src/org/forester/util/ForesterUtil.java b/forester/java/src/org/forester/util/ForesterUtil.java index 7492f1e..300de97 100644 --- a/forester/java/src/org/forester/util/ForesterUtil.java +++ b/forester/java/src/org/forester/util/ForesterUtil.java @@ -80,7 +80,7 @@ public final class ForesterUtil { public final static String OS_ARCH = System.getProperty( "os.arch" ); public final static String OS_NAME = System.getProperty( "os.name" ); public final static String OS_VERSION = System.getProperty( "os.version" ); - public final static Pattern PARANTHESESABLE_NH_CHARS_PATTERN = Pattern.compile( "[(),;\\s]" ); + public final static Pattern PARANTHESESABLE_NH_CHARS_PATTERN = Pattern.compile( "[(),;\\s:\\[\\]'\"]" ); public final static double ZERO_DIFF = 1.0E-9; public static final BigDecimal NULL_BD = new BigDecimal( 0 ); public static final NumberFormat FORMATTER_9; @@ -958,18 +958,11 @@ public final class ForesterUtil { return s; } - final public static String replaceIllegalNhCharacters( final String nh ) { - if ( nh == null ) { - return ""; - } - return nh.trim().replaceAll( "[\\[\\]:]+", "_" ); - } - final public static String replaceIllegalNhxCharacters( final String nhx ) { if ( nhx == null ) { return ""; } - return nhx.trim().replaceAll( "[\\[\\](),:;\\s]+", "_" ); + return nhx.trim().replaceAll( "[\\[\\]']+", "_" ); } final public static double round( final double value, final int decimal_place ) { -- 1.7.10.2