}
else {
w.toNewHampshire( evaluator_phylogenies_above_threshold,
- true,
branch_lengths_in_ev_out,
evaluators_outfile,
";" + ForesterUtil.getLineSeparator() );
";" + ForesterUtil.getLineSeparator() );
}
else {
- w.toNewHampshire( Arrays.asList( ev ), true, branch_lengths_in_ev_out, evaluators_outfile, ";"
+ w.toNewHampshire( Arrays.asList( ev ), branch_lengths_in_ev_out, evaluators_outfile, ";"
+ ForesterUtil.getLineSeparator() );
}
}
if ( !ForesterUtil.isEmpty( getMainPanel().getCurrentPhylogeny().getName() ) ) {
title = "\"" + getMainPanel().getCurrentPhylogeny().getName() + "\" in " + title;
}
- showTextFrame( getMainPanel().getCurrentPhylogeny()
- .toNewHampshire( false, getOptions().getNhConversionSupportValueStyle() ),
+ showTextFrame( getMainPanel().getCurrentPhylogeny().toNewHampshire( getOptions()
+ .getNhConversionSupportValueStyle() ),
title );
}
}
if ( !ForesterUtil.isEmpty( _mainpanel.getCurrentPhylogeny().getName() ) ) {
title = "\"" + getMainPanel().getCurrentPhylogeny().getName() + "\" in " + title;
}
- showTextFrame( _mainpanel.getCurrentPhylogeny()
- .toNewHampshire( false, getOptions().getNhConversionSupportValueStyle() ),
+ showTextFrame( _mainpanel.getCurrentPhylogeny().toNewHampshire( getOptions()
+ .getNhConversionSupportValueStyle() ),
title );
}
}
private boolean writeAsNewHampshire( final Phylogeny t, boolean exception, final File file ) {
try {
final PhylogenyWriter writer = new PhylogenyWriter();
- writer.toNewHampshire( t, false, true, getOptions().getNhConversionSupportValueStyle(), file );
+ writer.toNewHampshire( t, true, getOptions().getNhConversionSupportValueStyle(), file );
}
catch ( final Exception e ) {
exception = true;
final static private byte STRING = 0;
final static private byte STRING_BUFFER = 1;
final static private byte STRING_BUILDER = 4;
+ final static private char BELL = 7;
private boolean _allow_errors_in_distance_to_parent;
private int _clade_level;
private StringBuilder _current_anotation;
_current_phylogeny.getRoot(),
getTaxonomyExtraction(),
isReplaceUnderscores(),
- isAllowErrorsInDistanceToParent() );
+ isAllowErrorsInDistanceToParent(),
+ true );
if ( GUESS_IF_SUPPORT_VALUES ) {
if ( isBranchLengthsLikeBootstrapValues( _current_phylogeny ) ) {
moveBranchLengthsToConfidenceValues( _current_phylogeny );
new_node,
getTaxonomyExtraction(),
isReplaceUnderscores(),
- isAllowErrorsInDistanceToParent() );
+ isAllowErrorsInDistanceToParent(),
+ true );
_current_phylogeny = new Phylogeny();
_current_phylogeny.setRoot( new_node );
return _current_phylogeny;
_in_double_quote = false;
}
else {
- _current_anotation.append( c );
+ _current_anotation.append( c != ':' ? c : BELL );
}
}
else if ( c == '"' ) {
_in_single_quote = false;
}
else {
- _current_anotation.append( c );
+ _current_anotation.append( c != ':' ? c : BELL );
}
}
else if ( c == 39 ) {
new_node,
getTaxonomyExtraction(),
isReplaceUnderscores(),
- isAllowErrorsInDistanceToParent() );
+ isAllowErrorsInDistanceToParent(),
+ true );
_current_anotation = new StringBuilder();
_current_node.addAsChild( new_node );
}
_current_node.getLastChildNode(),
getTaxonomyExtraction(),
isReplaceUnderscores(),
- isAllowErrorsInDistanceToParent() );
+ isAllowErrorsInDistanceToParent(),
+ true );
_current_anotation = new StringBuilder();
}
if ( !_current_node.isRoot() ) {
new_node,
getTaxonomyExtraction(),
isReplaceUnderscores(),
- isAllowErrorsInDistanceToParent() );
+ isAllowErrorsInDistanceToParent(),
+ true );
if ( _current_node == null ) {
throw new NHXFormatException( "format might not be NH or NHX" );
}
_current_node.getLastChildNode(),
getTaxonomyExtraction(),
isReplaceUnderscores(),
- isAllowErrorsInDistanceToParent() );
+ isAllowErrorsInDistanceToParent(),
+ true );
}
_current_anotation = new StringBuilder();
_saw_closing_paren = false;
final PhylogenyNode node_to_annotate,
final TAXONOMY_EXTRACTION taxonomy_extraction,
final boolean replace_underscores,
- final boolean allow_errors_in_distance_to_parent ) throws NHXFormatException,
+ final boolean allow_errors_in_distance_to_parent,
+ final boolean replace_bell ) throws NHXFormatException,
PhyloXmlDataFormatException {
if ( ( taxonomy_extraction != TAXONOMY_EXTRACTION.NO ) && replace_underscores ) {
throw new IllegalArgumentException( "cannot extract taxonomies and replace under scores at the same time" );
if ( replace_underscores ) {
s = s.replaceAll( "_+", " " );
}
+ s = s.replaceAll( "\\s+", " " ).trim();
boolean is_nhx = false;
final int ob = s.indexOf( "[" );
if ( ob > -1 ) {
final StringTokenizer t = new StringTokenizer( s, ":" );
if ( t.countTokens() > 0 ) {
if ( !s.startsWith( ":" ) ) {
- node_to_annotate.setName( t.nextToken() );
+ if ( ( s.indexOf( BELL ) <= -1 ) || !replace_bell ) {
+ node_to_annotate.setName( t.nextToken() );
+ }
+ else {
+ node_to_annotate.setName( t.nextToken().replace( BELL, ':' ) );
+ }
if ( !replace_underscores && ( !is_nhx && ( taxonomy_extraction != TAXONOMY_EXTRACTION.NO ) ) ) {
ParserUtils.extractTaxonomyDataFromNodeName( node_to_annotate, taxonomy_extraction );
}
}
while ( t.hasMoreTokens() ) {
s = t.nextToken();
- if ( s.startsWith( NHXtags.SPECIES_NAME ) ) {
+ if ( ( s.indexOf( BELL ) > -1 ) && replace_bell ) {
+ s = s.replace( BELL, ':' );
+ }
+ if ( s.indexOf( '=' ) < 0 ) {
+ if ( ( node_to_annotate.getDistanceToParent() != PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT )
+ && !allow_errors_in_distance_to_parent ) {
+ throw new NHXFormatException( "error in NHX formatted data: more than one distance to parent:"
+ + "\"" + s + "\"" );
+ }
+ node_to_annotate.setDistanceToParent( doubleValue( s, allow_errors_in_distance_to_parent ) );
+ }
+ else if ( s.startsWith( NHXtags.SPECIES_NAME ) ) {
if ( !node_to_annotate.getNodeData().isHasTaxonomy() ) {
node_to_annotate.getNodeData().setTaxonomy( new Taxonomy() );
}
}
node_to_annotate.getNodeData().getSequence().setName( s.substring( 3 ) );
}
- else if ( s.indexOf( '=' ) < 0 ) {
- if ( ( node_to_annotate.getDistanceToParent() != PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT )
- && !allow_errors_in_distance_to_parent ) {
- throw new NHXFormatException( "error in NHX formatted data: more than one distance to parent:"
- + "\"" + s + "\"" );
- }
- node_to_annotate.setDistanceToParent( doubleValue( s, allow_errors_in_distance_to_parent ) );
- }
} // while ( t.hasMoreTokens() )
}
}
public final class ParserUtils {
- final public static String TAX_CODE = "(?:[A-Z9][A-Z]{2}[A-Z0-9]{2})|RAT|PIG|PEA";
- final public static Pattern TAXOMONY_CODE_PATTERN_A = Pattern.compile( "(?:\\b|_)(" + TAX_CODE + ")\\b" );
- final public static Pattern TAXOMONY_CODE_PATTERN_BRACKETED = Pattern.compile( "\\[(" + TAX_CODE + ")\\]" );
- final public static Pattern TAXOMONY_CODE_PATTERN_PFR = Pattern.compile( "(?:\\b|_)[a-zA-Z0-9]{3,}_("
- + TAX_CODE + ")\\b" );
- final public static Pattern TAXOMONY_SN_PATTERN = Pattern
- .compile( "(?:\\b|_)[a-zA-Z0-9]{3,}_([A-Z][a-z]{2,30}_[a-z]{3,30}(?:_[a-z][a-z0-9_]+)?)\\b" );
- final public static Pattern TAXOMONY_SN_PATTERN_SN = Pattern
- .compile( "\\b([A-Z][a-z]{2,30}[_ ][a-z]{3,30}(?:[_ ][a-z]{3,30})?)(?:\\b|_)?" );
- final public static Pattern TAXOMONY_SN_PATTERN_STRAIN_1 = Pattern
- .compile( "\\b([A-Z][a-z]{2,30}[_ ][a-z]{3,30}[_ ](?:str|subsp|var)[a-z]{0,5}\\.?[_ ]\\S{1,60})(?:\\b|_)" );
- final public static Pattern TAXOMONY_SN_PATTERN_STRAIN_2 = Pattern
- .compile( "\\b([A-Z][a-z]{2,30}[_ ][a-z]{3,30}[_ ]\\((?:str|subsp|var)[a-z]{0,5}\\.?[_ ]\\S{1,60}\\))(?:\\b|_)?" );
- final public static Pattern TAXOMONY_SN_PATTERN_SP = Pattern
- .compile( "\\b([A-Z][a-z]{2,30}[_ ]sp\\.?)(?:\\b|_)?" );
-
- final public static Pattern TAXOMONY_SN_PATTERN_GENUS = Pattern.compile( "([A-Z][a-z]{2,30})" );
- final private static Pattern TAXOMONY_CODE_PATTERN_PFS = Pattern.compile( "(?:\\b|_)[A-Z0-9]{4,}_("
- + TAX_CODE + ")/\\d+-\\d+\\b" );
- final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_PFR = Pattern
- .compile( "(?:\\b|_)[A-Z0-9]{1,}_(\\d{1,7})\\b" );
- final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_PFS = Pattern
- .compile( "(?:\\b|_)[A-Z0-9]{4,}_(\\d{1,7})/\\d+-\\d+\\b" );
+ final public static String TAX_CODE = "(?:[A-Z9][A-Z]{2}[A-Z0-9]{2})|RAT|PIG|PEA";
+ final private static String SN_BN = "[A-Z][a-z]{2,30}[_ ][a-z]{3,30}";
+ final public static Pattern TAXOMONY_CODE_PATTERN_A = Pattern.compile( "(?:\\b|_)(" + TAX_CODE
+ + ")\\b" );
+ final public static Pattern TAXOMONY_CODE_PATTERN_BRACKETED = Pattern.compile( "\\[(" + TAX_CODE + ")\\]" );
+ final public static Pattern TAXOMONY_CODE_PATTERN_PFR = Pattern.compile( "(?:\\b|_)[a-zA-Z0-9]{3,}_("
+ + TAX_CODE + ")\\b" );
+ // final public static Pattern TAXOMONY_SN_PATTERN = Pattern
+ // .compile( "(?:\\b|_)[a-zA-Z0-9]{3,}_([A-Z][a-z]{2,30}_[a-z]{3,30}(?:_[a-z][a-z0-9_]+)?)\\b" );
+ final public static Pattern TAXOMONY_SN_PATTERN_SN = Pattern.compile( "(?:\\b|_)(" + SN_BN
+ + ")(?:(\\s*$)|([_ ][a-z]*[A-Z0-9]))" );
+ final public static Pattern TAXOMONY_SN_PATTERN_SNS = Pattern.compile( "(?:\\b|_)(" + SN_BN
+ + "[_ ][a-z]{3,30}"
+ + ")[_ ][a-z]*[A-Z0-9]" );
+ final public static Pattern TAXOMONY_SN_PATTERN_SNS2 = Pattern.compile( "[A-Z0-9][a-z]*[_ ](" + SN_BN
+ + "[_ ][a-z]{3,30}" + ")\\s*$" );
+ final public static Pattern TAXOMONY_SN_PATTERN_STRAIN_1 = Pattern
+ .compile( "(?:\\b|_)("
+ + SN_BN
+ + "[_ ](?:str|subsp|var)[a-z]{0,5}\\.?[_ ]\\S{1,60})(?:\\b|_)" );
+ final public static Pattern TAXOMONY_SN_PATTERN_STRAIN_2 = Pattern
+ .compile( "(?:\\b|_)("
+ + SN_BN
+ + "[_ ]\\((?:str|subsp|var)[a-z]{0,5}\\.?[_ ]\\S{1,60}\\))" );
+ final public static Pattern TAXOMONY_SN_PATTERN_STRAIN_SUBSTRAIN = Pattern
+ .compile( "(?:\\b|_)("
+ + SN_BN
+ + "[_ ]str[a-z]{0,3}\\.?[_ ]\\S{1,60}[_ ]substr[a-z]{0,3}\\.?[_ ]\\S{1,60})(?:\\b|_)" );
+ final public static Pattern TAXOMONY_SN_PATTERN_SP = Pattern
+ .compile( "(?:\\b|_)([A-Z][a-z]{2,30}[_ ]sp\\.?)(?:\\b|_)?" );
+ final public static Pattern TAXOMONY_SN_PATTERN_GENUS = Pattern.compile( "([A-Z][a-z]{2,30})" );
+ final private static Pattern TAXOMONY_CODE_PATTERN_PFS = Pattern.compile( "(?:\\b|_)[A-Z0-9]{4,}_("
+ + TAX_CODE + ")/\\d+-\\d+\\b" );
+ final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_PFR = Pattern
+ .compile( "(?:\\b|_)[A-Z0-9]{1,}_(\\d{1,7})\\b" );
+ final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_PFS = Pattern
+ .compile( "(?:\\b|_)[A-Z0-9]{4,}_(\\d{1,7})/\\d+-\\d+\\b" );
final public static PhylogenyParser createParserDependingFileContents( final File file,
final boolean phyloxml_validate_against_xsd )
}
public final static String extractScientificNameFromNodeName( final String name ) {
- final Matcher m = TAXOMONY_SN_PATTERN.matcher( name );
- if ( m.find() ) {
- return m.group( 1 ).replace( '_', ' ' );
+ // final Matcher m = TAXOMONY_SN_PATTERN.matcher( name );
+ // if ( m.find() ) {
+ // return m.group( 1 ).replace( '_', ' ' );
+ // }
+ final Matcher m_ss = TAXOMONY_SN_PATTERN_STRAIN_SUBSTRAIN.matcher( name );
+ if ( m_ss.find() ) {
+ String s = m_ss.group( 1 ).replace( '_', ' ' );
+ if ( s.indexOf( " str " ) > 4 ) {
+ s = s.replaceFirst( " str ", " str. " );
+ }
+ if ( s.indexOf( " substr " ) > 4 ) {
+ s = s.replaceFirst( " substr ", " substr. " );
+ }
+ return s;
}
final Matcher m_str1 = TAXOMONY_SN_PATTERN_STRAIN_1.matcher( name );
if ( m_str1.find() ) {
- return m_str1.group( 1 ).replace( '_', ' ' );
+ String s = m_str1.group( 1 ).replace( '_', ' ' );
+ if ( s.indexOf( " str " ) > 4 ) {
+ s = s.replaceFirst( " str ", " str. " );
+ }
+ else if ( s.indexOf( " subsp " ) > 4 ) {
+ s = s.replaceFirst( " subsp ", " subsp. " );
+ }
+ else if ( s.indexOf( " var " ) > 4 ) {
+ s = s.replaceFirst( " var ", " var. " );
+ }
+ return s;
}
final Matcher m_str2 = TAXOMONY_SN_PATTERN_STRAIN_2.matcher( name );
if ( m_str2.find() ) {
- return m_str2.group( 1 ).replace( '_', ' ' );
+ String s = m_str2.group( 1 ).replace( '_', ' ' );
+ if ( s.indexOf( " (str " ) > 4 ) {
+ s = s.replaceFirst( " \\(str ", " (str. " );
+ }
+ else if ( s.indexOf( " (subsp " ) > 4 ) {
+ s = s.replaceFirst( " \\(subsp ", " (subsp. " );
+ }
+ else if ( s.indexOf( " (var " ) > 4 ) {
+ s = s.replaceFirst( " \\(var ", " (var. " );
+ }
+ return s;
+ }
+ final Matcher m_sns = TAXOMONY_SN_PATTERN_SNS.matcher( name );
+ if ( m_sns.find() ) {
+ return m_sns.group( 1 ).replace( '_', ' ' );
+ }
+ final Matcher m_sns2 = TAXOMONY_SN_PATTERN_SNS2.matcher( name );
+ if ( m_sns2.find() ) {
+ return m_sns2.group( 1 ).replace( '_', ' ' );
}
final Matcher m_sn = TAXOMONY_SN_PATTERN_SN.matcher( name );
-
if ( m_sn.find() ) {
return m_sn.group( 1 ).replace( '_', ' ' );
}
-
final Matcher m_sp = TAXOMONY_SN_PATTERN_SP.matcher( name );
-
if ( m_sp.find() ) {
return m_sp.group( 1 ).replace( '_', ' ' );
}
private PhylogenyNode _root;
private boolean _has_next;
private Stack<PostOrderStackObject> _stack;
- private boolean _simple_nh;
private boolean _nh_write_distance_to_parent;
NH_CONVERSION_SUPPORT_VALUE_STYLE _nh_conversion_support_style;
private boolean _indent_phyloxml;
return _saw_comma;
}
- private boolean isSimpleNH() {
- return _simple_nh;
- }
-
private boolean isWriteDistanceToParentInNH() {
return _nh_write_distance_to_parent;
}
_saw_comma = saw_comma;
}
- private void setSimpleNH( final boolean simple_nh ) {
- _simple_nh = simple_nh;
- }
-
private void setStack( final Stack<PostOrderStackObject> stack ) {
_stack = stack;
}
}
public void toNewHampshire( final List<Phylogeny> trees,
- final boolean simple_nh,
final boolean write_distance_to_parent,
final File out_file,
final String separator ) throws IOException {
final Iterator<Phylogeny> it = trees.iterator();
final StringBuffer sb = new StringBuffer();
while ( it.hasNext() ) {
- sb.append( toNewHampshire( it.next(), simple_nh, write_distance_to_parent ) );
+ sb.append( toNewHampshire( it.next(), write_distance_to_parent ) );
sb.append( separator );
}
writeToFile( sb, out_file );
}
public StringBuffer toNewHampshire( final Phylogeny tree,
- final boolean simple_nh,
final boolean nh_write_distance_to_parent,
final NH_CONVERSION_SUPPORT_VALUE_STYLE svs ) throws IOException {
setOutputFormt( FORMAT.NH );
setNhConversionSupportStyle( svs );
- setSimpleNH( simple_nh );
setWriteDistanceToParentInNH( nh_write_distance_to_parent );
return getOutput( tree );
}
- public StringBuffer toNewHampshire( final Phylogeny tree,
- final boolean simple_nh,
- final boolean nh_write_distance_to_parent ) throws IOException {
+ public StringBuffer toNewHampshire( final Phylogeny tree, final boolean nh_write_distance_to_parent )
+ throws IOException {
setOutputFormt( FORMAT.NH );
- setSimpleNH( simple_nh );
setWriteDistanceToParentInNH( nh_write_distance_to_parent );
return getOutput( tree );
}
- public void toNewHampshire( final Phylogeny tree,
- final boolean simple_nh,
- final boolean write_distance_to_parent,
- final File out_file ) throws IOException {
- writeToFile( toNewHampshire( tree, simple_nh, write_distance_to_parent ), out_file );
+ public void toNewHampshire( final Phylogeny tree, final boolean write_distance_to_parent, final File out_file )
+ throws IOException {
+ writeToFile( toNewHampshire( tree, write_distance_to_parent ), out_file );
}
public void toNewHampshire( final Phylogeny tree,
- final boolean simple_nh,
final boolean write_distance_to_parent,
final NH_CONVERSION_SUPPORT_VALUE_STYLE svs,
final File out_file ) throws IOException {
- writeToFile( toNewHampshire( tree, simple_nh, write_distance_to_parent, svs ), out_file );
+ writeToFile( toNewHampshire( tree, write_distance_to_parent, svs ), out_file );
}
public void toNewHampshire( final Phylogeny[] trees,
- final boolean simple_nh,
final boolean write_distance_to_parent,
final File out_file,
final String separator ) throws IOException {
final StringBuffer sb = new StringBuffer();
for( final Phylogeny element : trees ) {
- sb.append( toNewHampshire( element, simple_nh, write_distance_to_parent ) );
+ sb.append( toNewHampshire( element, write_distance_to_parent ) );
sb.append( separator );
}
writeToFile( sb, out_file );
getBuffer().append( node.toNewHampshireX() );
}
else if ( getOutputFormt() == FORMAT.NH ) {
- getBuffer().append( node.toNewHampshire( isSimpleNH(),
- isWriteDistanceToParentInNH(),
- getNhConversionSupportStyle() ) );
+ getBuffer().append( node.toNewHampshire( isWriteDistanceToParentInNH(), getNhConversionSupportStyle() ) );
}
}
else {
writer.write( "[&U]" );
}
- writer.write( phylogeny.toNewHampshire( false, svs ) );
+ writer.write( phylogeny.toNewHampshire( svs ) );
writer.write( ForesterUtil.LINE_SEPARATOR );
i++;
}
}
public String toNewHampshire() {
- return toNewHampshire( false, NH_CONVERSION_SUPPORT_VALUE_STYLE.NONE );
+ return toNewHampshire( NH_CONVERSION_SUPPORT_VALUE_STYLE.NONE );
}
- public String toNewHampshire( final boolean simple_nh,
- final NH_CONVERSION_SUPPORT_VALUE_STYLE nh_conversion_support_style ) {
+ public String toNewHampshire( final NH_CONVERSION_SUPPORT_VALUE_STYLE nh_conversion_support_style ) {
try {
- return new PhylogenyWriter().toNewHampshire( this, simple_nh, true, nh_conversion_support_style )
- .toString();
+ return new PhylogenyWriter().toNewHampshire( this, true, nh_conversion_support_style ).toString();
}
catch ( final IOException e ) {
throw new Error( "this should not have happend: " + e.getMessage() );
private PhylogenyNode( final String nhx,
final NHXParser.TAXONOMY_EXTRACTION taxonomy_extraction,
final boolean replace_underscores ) throws NHXFormatException, PhyloXmlDataFormatException {
- NHXParser.parseNHX( nhx, this, taxonomy_extraction, replace_underscores, false );
+ NHXParser.parseNHX( nhx, this, taxonomy_extraction, replace_underscores, false, false );
setId( PhylogenyNode.getNodeCount() );
PhylogenyNode.increaseNodeCount();
setSumExtNodes( 1 ); // For ext node, this number is 1 (not 0!!).
// ---------------------------------------------------------
// Writing of Nodes to Strings
// ---------------------------------------------------------
- final public String toNewHampshire( final boolean simple_nh,
- final boolean write_distance_to_parent,
+ final public String toNewHampshire( final boolean write_distance_to_parent,
final NH_CONVERSION_SUPPORT_VALUE_STYLE svs ) {
final StringBuilder sb = new StringBuilder();
String data = "";
data = getNodeData().getSequence().getName();
}
}
+ data = data.trim();
if ( data.length() > 0 ) {
- data = ForesterUtil.replaceIllegalNhCharacters( data );
- if ( simple_nh && ( data.length() > 10 ) ) {
- data = data.substring( 0, 11 );
- }
+ data = data.replaceAll( "'", "_" );
if ( ForesterUtil.isContainsParanthesesableNhCharacter( data ) ) {
sb.append( '\'' );
sb.append( data );
final StringBuffer sb = new StringBuffer();
final StringBuffer s_nhx = new StringBuffer();
if ( !ForesterUtil.isEmpty( getName() ) ) {
- final String name = ForesterUtil.replaceIllegalNhCharacters( getName() );
+ //final String name = ForesterUtil.replaceIllegalNhCharacters( getName() );
+ final String name = getName().trim();
if ( ForesterUtil.isContainsParanthesesableNhCharacter( name ) ) {
sb.append( '\'' );
sb.append( name );
System.out.println( "failed." );
failed++;
}
- System.exit( -1 );
System.out.print( "Uri for Aptx web sequence accession: " );
if ( Test.testCreateUriForSeqWeb() ) {
System.out.println( "OK." );
System.out.println( "failed." );
failed++;
}
+ System.exit( 0 );
System.out.print( "Nexus characters parsing: " );
if ( Test.testNexusCharactersParsing() ) {
System.out.println( "OK." );
if ( t4.getNumberOfExternalNodes() != 5 ) {
return false;
}
- String s = w.toNewHampshire( t4, false, true ).toString();
+ String s = w.toNewHampshire( t4, true ).toString();
if ( !s.equals( "((A,(B11,B12)),(C,D));" ) ) {
return false;
}
if ( !n.getName().equals( "D" ) ) {
return false;
}
- s = w.toNewHampshire( t4, false, true ).toString();
+ s = w.toNewHampshire( t4, true ).toString();
if ( !s.equals( "((A,B12),D);" ) ) {
return false;
}
if ( t5.getNumberOfExternalNodes() != 5 ) {
return false;
}
- s = w.toNewHampshire( t5, false, true ).toString();
+ s = w.toNewHampshire( t5, true ).toString();
if ( !s.equals( "(((B11,B12),B2),(C,D));" ) ) {
return false;
}
if ( t6.getNumberOfExternalNodes() != 5 ) {
return false;
}
- s = w.toNewHampshire( t6, false, false ).toString();
+ s = w.toNewHampshire( t6, false ).toString();
if ( !s.equals( "((A,(B12,B2)),(C,D));" ) ) {
return false;
}
if ( t7.getNumberOfExternalNodes() != 5 ) {
return false;
}
- s = w.toNewHampshire( t7, false, true ).toString();
+ s = w.toNewHampshire( t7, true ).toString();
if ( !s.equals( "((A,(B11,B2)),(C,D));" ) ) {
return false;
}
if ( t8.getNumberOfExternalNodes() != 5 ) {
return false;
}
- s = w.toNewHampshire( t8, false, false ).toString();
+ s = w.toNewHampshire( t8, false ).toString();
if ( !s.equals( "((A,(B11,B12)),(C,D));" ) ) {
return false;
}
if ( t9.getNumberOfExternalNodes() != 5 ) {
return false;
}
- s = w.toNewHampshire( t9, false, true ).toString();
+ s = w.toNewHampshire( t9, true ).toString();
if ( !s.equals( "((A,((B11,B12),B2)),D);" ) ) {
return false;
}
if ( t10.getNumberOfExternalNodes() != 5 ) {
return false;
}
- s = w.toNewHampshire( t10, false, true ).toString();
+ s = w.toNewHampshire( t10, true ).toString();
if ( !s.equals( "((A,((B11,B12),B2)),C);" ) ) {
return false;
}
if ( t11.getNumberOfExternalNodes() != 2 ) {
return false;
}
- s = w.toNewHampshire( t11, false, true ).toString();
+ s = w.toNewHampshire( t11, true ).toString();
if ( !s.equals( "(B,C);" ) ) {
return false;
}
if ( t11.getNumberOfExternalNodes() != 1 ) {
return false;
}
- s = w.toNewHampshire( t11, false, false ).toString();
+ s = w.toNewHampshire( t11, false ).toString();
if ( !s.equals( "B;" ) ) {
return false;
}
if ( t12.getNumberOfExternalNodes() != 8 ) {
return false;
}
- s = w.toNewHampshire( t12, false, true ).toString();
+ s = w.toNewHampshire( t12, true ).toString();
if ( !s.equals( "((A1,A2,A3),(B1,B3),(C1,C2,C3));" ) ) {
return false;
}
if ( t12.getNumberOfExternalNodes() != 7 ) {
return false;
}
- s = w.toNewHampshire( t12, false, true ).toString();
+ s = w.toNewHampshire( t12, true ).toString();
if ( !s.equals( "((A1,A2,A3),B1,(C1,C2,C3));" ) ) {
return false;
}
if ( t12.getNumberOfExternalNodes() != 6 ) {
return false;
}
- s = w.toNewHampshire( t12, false, true ).toString();
+ s = w.toNewHampshire( t12, true ).toString();
if ( !s.equals( "((A1,A2,A3),B1,(C1,C2));" ) ) {
return false;
}
if ( t12.getNumberOfExternalNodes() != 5 ) {
return false;
}
- s = w.toNewHampshire( t12, false, true ).toString();
+ s = w.toNewHampshire( t12, true ).toString();
if ( !s.equals( "((A2,A3),B1,(C1,C2));" ) ) {
return false;
}
if ( t12.getNumberOfExternalNodes() != 4 ) {
return false;
}
- s = w.toNewHampshire( t12, false, true ).toString();
+ s = w.toNewHampshire( t12, true ).toString();
if ( !s.equals( "((A2,A3),(C1,C2));" ) ) {
return false;
}
if ( t12.getNumberOfExternalNodes() != 3 ) {
return false;
}
- s = w.toNewHampshire( t12, false, true ).toString();
+ s = w.toNewHampshire( t12, true ).toString();
if ( !s.equals( "(A2,(C1,C2));" ) ) {
return false;
}
if ( t12.getNumberOfExternalNodes() != 2 ) {
return false;
}
- s = w.toNewHampshire( t12, false, true ).toString();
+ s = w.toNewHampshire( t12, true ).toString();
if ( !s.equals( "(C1,C2);" ) ) {
return false;
}
if ( t13.getNumberOfExternalNodes() != 4 ) {
return false;
}
- s = w.toNewHampshire( t13, false, true ).toString();
+ s = w.toNewHampshire( t13, true ).toString();
if ( !s.equals( "(A,B,C,E:5.0);" ) ) {
return false;
}
if ( t14.getNumberOfExternalNodes() != 5 ) {
return false;
}
- s = w.toNewHampshire( t14, false, true ).toString();
+ s = w.toNewHampshire( t14, true ).toString();
if ( !s.equals( "((A,B,C,D:1.1),F);" ) ) {
return false;
}
if ( !ParserUtils.extractScientificNameFromNodeName( "BCDO2_Mus_musculus" ).equals( "Mus musculus" ) ) {
return false;
}
- if ( !ParserUtils.extractScientificNameFromNodeName( "BCDO2_Mus_musculus_musculus" )
+ if ( !ParserUtils.extractScientificNameFromNodeName( "BCDO2 Mus musculus" ).equals( "Mus musculus" ) ) {
+ return false;
+ }
+ if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_BCDO2" ).equals( "Mus musculus" ) ) {
+ return false;
+ }
+ if ( !ParserUtils.extractScientificNameFromNodeName( "Mus musculus musculus BCDO2" )
+ .equals( "Mus musculus musculus" ) ) {
+ return false;
+ }
+ if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_musculus_BCDO2" )
+ .equals( "Mus musculus musculus" ) ) {
+ return false;
+ }
+ if ( !ParserUtils.extractScientificNameFromNodeName( "BCDO2 Mus musculus musculus" )
+ .equals( "Mus musculus musculus" ) ) {
+ return false;
+ }
+ if ( !ParserUtils.extractScientificNameFromNodeName( "Bcl Mus musculus musculus" )
+ .equals( "Mus musculus musculus" ) ) {
+ return false;
+ }
+ if ( ParserUtils.extractScientificNameFromNodeName( "vcl Mus musculus musculus" ) != null ) {
+ return false;
+ }
+ if ( !ParserUtils.extractScientificNameFromNodeName( "could_be_anything_Mus_musculus_musculus_BCDO2" )
.equals( "Mus musculus musculus" ) ) {
return false;
}
- if ( !ParserUtils.extractScientificNameFromNodeName( "BCDO2_Mus_musculus_musculus-12" )
+ if ( !ParserUtils.extractScientificNameFromNodeName( "could_be_anything_Mus_musculus_musculus_Musculus" )
.equals( "Mus musculus musculus" ) ) {
return false;
}
- if ( !ParserUtils.extractScientificNameFromNodeName( " -XS12_Mus_musculus-12" ).equals( "Mus musculus" ) ) {
+ if ( ParserUtils.extractScientificNameFromNodeName( "could_be_anything_Mus_musculus_musculus_musculus" ) != null ) {
+ return false;
+ }
+ if ( ParserUtils.extractScientificNameFromNodeName( "musculus" ) != null ) {
+ return false;
+ }
+ if ( ParserUtils.extractScientificNameFromNodeName( "mus_musculus" ) != null ) {
+ return false;
+ }
+ if ( ParserUtils.extractScientificNameFromNodeName( "mus_musculus_musculus" ) != null ) {
return false;
}
- if ( !ParserUtils.extractScientificNameFromNodeName( " -1234_Mus_musculus-12 affrre e" )
+ if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_musculus_1" )
+ .equals( "Mus musculus musculus" ) ) {
+ return false;
+ }
+ if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_1" ).equals( "Mus musculus" ) ) {
+ return false;
+ }
+ if ( ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_bcl" ) != null ) {
+ return false;
+ }
+ if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_BCL" ).equals( "Mus musculus" ) ) {
+ return false;
+ }
+ if ( ParserUtils.extractScientificNameFromNodeName( "Mus musculus bcl" ) != null ) {
+ return false;
+ }
+ if ( !ParserUtils.extractScientificNameFromNodeName( "Mus musculus BCL" ).equals( "Mus musculus" ) ) {
+ return false;
+ }
+ if ( !ParserUtils.extractScientificNameFromNodeName( "Mus musculus xBCL" ).equals( "Mus musculus" ) ) {
+ return false;
+ }
+ if ( !ParserUtils.extractScientificNameFromNodeName( "Mus musculus x1" ).equals( "Mus musculus" ) ) {
+ return false;
+ }
+ if ( !ParserUtils.extractScientificNameFromNodeName( " -XS12_Mus_musculus_12" ).equals( "Mus musculus" ) ) {
+ return false;
+ }
+ if ( !ParserUtils.extractScientificNameFromNodeName( " -1234_Mus_musculus_12 affrre e" )
+ .equals( "Mus musculus" ) ) {
+ return false;
+ }
+ if ( !ParserUtils.extractScientificNameFromNodeName( " -1234_Mus_musculus_12_affrre_e" )
.equals( "Mus musculus" ) ) {
return false;
}
if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus" ).equals( "Mus musculus" ) ) {
return false;
}
- if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_musculus" )
+ if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_musculus_2bcl2" )
+ .equals( "Mus musculus musculus" ) ) {
+ return false;
+ }
+ if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_musculus_2bcl2" )
.equals( "Mus musculus musculus" ) ) {
return false;
}
if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_123" ).equals( "Mus musculus" ) ) {
return false;
}
- if ( !ParserUtils.extractScientificNameFromNodeName( "Pilostyles mexicana Mexico Breedlove 27233" ).equals( "Pilostyles mexicana" ) ) {
+ if ( !ParserUtils.extractScientificNameFromNodeName( "Pilostyles mexicana Mexico Breedlove 27233" )
+ .equals( "Pilostyles mexicana" ) ) {
return false;
}
if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia_coli_strain_K12/DH10B" )
return false;
}
if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia_coli_str_K12/DH10B" )
- .equals( "Escherichia coli str K12/DH10B" ) ) {
+ .equals( "Escherichia coli str. K12/DH10B" ) ) {
return false;
}
if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli str. K12/DH10B" )
return false;
}
if ( !ParserUtils.extractScientificNameFromNodeName( "Arabidopsis_lyrata_subsp_lyrata" )
- .equals( "Arabidopsis lyrata subsp lyrata" ) ) {
+ .equals( "Arabidopsis lyrata subsp. lyrata" ) ) {
return false;
}
if ( !ParserUtils.extractScientificNameFromNodeName( "Arabidopsis lyrata subsp. lyrata" )
return false;
}
if ( !ParserUtils.extractScientificNameFromNodeName( "Arabidopsis lyrata subsp lyrata bcl2" )
- .equals( "Arabidopsis lyrata subsp lyrata" ) ) {
+ .equals( "Arabidopsis lyrata subsp. lyrata" ) ) {
return false;
}
if ( !ParserUtils.extractScientificNameFromNodeName( "Arabidopsis lyrata subspecies lyrata bcl2" )
.equals( "Escherichia coli (str. K12)" ) ) {
return false;
}
+ if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli (str K12)" )
+ .equals( "Escherichia coli (str. K12)" ) ) {
+ return false;
+ }
if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli (str. K12) bcl2" )
.equals( "Escherichia coli (str. K12)" ) ) {
return false;
}
- if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp." )
- .equals( "Macrocera sp." ) ) {
-
- return false;
+ if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli (var K12) bcl2" )
+ .equals( "Escherichia coli (var. K12)" ) ) {
+ return false;
}
- if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp. 123" )
- .equals( "Macrocera sp." ) ) {
-
+ if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli str. K-12 substr. MG1655star" )
+ .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) {
return false;
}
- if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp. K12" )
- .equals( "Macrocera sp." ) ) {
-
-
+ if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli str K-12 substr MG1655star" )
+ .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) {
+ return false;
+ }
+ if ( !ParserUtils
+ .extractScientificNameFromNodeName( "could be anything Escherichia coli str K-12 substr MG1655star" )
+ .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) {
+ return false;
+ }
+ if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli str K-12 substr MG1655star gene1" )
+ .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) {
+ return false;
+ }
+ if ( !ParserUtils
+ .extractScientificNameFromNodeName( "could be anything Escherichia coli str K-12 substr MG1655star GENE1" )
+ .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) {
+ return false;
+ }
+ if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia_coli_str_K-12_substr_MG1655star" )
+ .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) {
+ return false;
+ }
+ if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia_coli_str_K-12_substr_MG1655star" )
+ .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) {
+ return false;
+ }
+ //
+ if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp." ).equals( "Macrocera sp." ) ) {
+ return false;
+ }
+ if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp. 123" ).equals( "Macrocera sp." ) ) {
+ return false;
+ }
+ if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp. K12" ).equals( "Macrocera sp." ) ) {
return false;
}
if ( !ParserUtils.extractScientificNameFromNodeName( "something Macrocera sp. K12" )
.equals( "Macrocera sp." ) ) {
-
-
return false;
- } if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp" )
- .equals( "Macrocera sp" ) ) {
-
-
+ }
+ if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp" ).equals( "Macrocera sp" ) ) {
return false;
}
}
nhxp.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.NO );
nhxp.setReplaceUnderscores( true );
final Phylogeny uc0 = factory.create( "(A__A_,_B_B)", nhxp )[ 0 ];
- if ( !uc0.getRoot().getChildNode( 0 ).getName().equals( "A A " ) ) {
+ if ( !uc0.getRoot().getChildNode( 0 ).getName().equals( "A A" ) ) {
return false;
}
- if ( !uc0.getRoot().getChildNode( 1 ).getName().equals( " B B" ) ) {
+ if ( !uc0.getRoot().getChildNode( 1 ).getName().equals( "B B" ) ) {
return false;
}
final Phylogeny p1b = factory
if ( p50.getNode( "A" ) == null ) {
return false;
}
- if ( !p50.toNewHampshire( false, NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS )
+ if ( !p50.toNewHampshire( NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS )
.equals( "((A,B)ab:2.0[88],C);" ) ) {
return false;
}
- if ( !p50.toNewHampshire( false, NH_CONVERSION_SUPPORT_VALUE_STYLE.NONE ).equals( "((A,B)ab:2.0,C);" ) ) {
+ if ( !p50.toNewHampshire( NH_CONVERSION_SUPPORT_VALUE_STYLE.NONE ).equals( "((A,B)ab:2.0,C);" ) ) {
return false;
}
- if ( !p50.toNewHampshire( false, NH_CONVERSION_SUPPORT_VALUE_STYLE.AS_INTERNAL_NODE_NAMES )
+ if ( !p50.toNewHampshire( NH_CONVERSION_SUPPORT_VALUE_STYLE.AS_INTERNAL_NODE_NAMES )
.equals( "((A,B)88:2.0,C);" ) ) {
return false;
}
if ( p53.getNode( "B (x (a' ,b) f(x);" ) == null ) {
return false;
}
- //
final Phylogeny p54 = factory.create( new StringBuffer( "((A,B):[88],C)" ), new NHXParser() )[ 0 ];
if ( p54.getNode( "A" ) == null ) {
return false;
}
- if ( !p54.toNewHampshire( false, NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS )
- .equals( "((A,B)[88],C);" ) ) {
+ if ( !p54.toNewHampshire( NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS ).equals( "((A,B)[88],C);" ) ) {
+ return false;
+ }
+ //
+ final Phylogeny p55 = factory
+ .create( new StringBuffer( "((\"lcl|HPV32_L1.:1 s\":0.195593,\"lcl|HPV30_L1.1|;a\":0.114237):0.0359322,\"lcl|HPV56_L1.1|,d\":0.0727412,\"lcl|HPV66_L1.1x\":0.0798012);" ),
+ new NHXParser() )[ 0 ];
+ if ( !p55
+ .toNewHampshire()
+ .equals( "(('lcl|HPV32_L1.:1 s':0.195593,'lcl|HPV30_L1.1|;a':0.114237):0.0359322,'lcl|HPV56_L1.1|,d':0.0727412,lcl|HPV66_L1.1x:0.0798012);" ) ) {
+ System.out.println( p55.toNewHampshire() );
+ return false;
+ }
+ final Phylogeny p56 = factory
+ .create( new StringBuffer( "((\"lcl|HPV32_L1.:1 s\":0.195593,\"lcl|HPV30_L1.1|;a\":0.114\n237):0.0359322,\"lcl|HPV56_L1.1|,d\":0.0727412,\"lcl|HPV66_L1.1:x\":0.0798012);" ),
+ new NHXParser() )[ 0 ];
+ if ( !p56
+ .toNewHampshire()
+ .equals( "(('lcl|HPV32_L1.:1 s':0.195593,'lcl|HPV30_L1.1|;a':0.114237):0.0359322,'lcl|HPV56_L1.1|,d':0.0727412,'lcl|HPV66_L1.1:x':0.0798012);" ) ) {
+ System.out.println( p56.toNewHampshire() );
+ return false;
+ }
+ final Phylogeny p57 = factory
+ .create( new StringBuffer( "((\"lcl|HPV32_L1.:1 s\":0.195593,\"lcl|HPV30_L1.1|;a\":0.114\n237):0.0359322,\"lcl|HPV56_L1.1|,d\":0.0727412,\"lcl|HPV66_L1.1:x\":0.0798012);" ),
+ new NHXParser() )[ 0 ];
+ if ( !p57
+ .toNewHampshire()
+ .equals( "(('lcl|HPV32_L1.:1 s':0.195593,'lcl|HPV30_L1.1|;a':0.114237):0.0359322,'lcl|HPV56_L1.1|,d':0.0727412,'lcl|HPV66_L1.1:x':0.0798012);" ) ) {
+ System.out.println( p56.toNewHampshire() );
return false;
}
}
public final static String OS_ARCH = System.getProperty( "os.arch" );
public final static String OS_NAME = System.getProperty( "os.name" );
public final static String OS_VERSION = System.getProperty( "os.version" );
- public final static Pattern PARANTHESESABLE_NH_CHARS_PATTERN = Pattern.compile( "[(),;\\s]" );
+ public final static Pattern PARANTHESESABLE_NH_CHARS_PATTERN = Pattern.compile( "[(),;\\s:\\[\\]'\"]" );
public final static double ZERO_DIFF = 1.0E-9;
public static final BigDecimal NULL_BD = new BigDecimal( 0 );
public static final NumberFormat FORMATTER_9;
return s;
}
- final public static String replaceIllegalNhCharacters( final String nh ) {
- if ( nh == null ) {
- return "";
- }
- return nh.trim().replaceAll( "[\\[\\]:]+", "_" );
- }
-
final public static String replaceIllegalNhxCharacters( final String nhx ) {
if ( nhx == null ) {
return "";
}
- return nhx.trim().replaceAll( "[\\[\\](),:;\\s]+", "_" );
+ return nhx.trim().replaceAll( "[\\[\\]']+", "_" );
}
final public static double round( final double value, final int decimal_place ) {