From 37cb4c1bc15ea30a6f64953d5e23e09694083f06 Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Sat, 5 Jan 2013 05:00:34 +0000 Subject: [PATCH] further reduction of NHX fields iterating NHX parser clean up --- .../org/forester/analysis/TaxonomyDataManager.java | 3 +- .../src/org/forester/archaeopteryx/AptxUtil.java | 2 - .../org/forester/archaeopteryx/Configuration.java | 23 +- .../src/org/forester/archaeopteryx/Constants.java | 12 +- .../forester/archaeopteryx/MainFrameApplet.java | 4 +- .../src/org/forester/archaeopteryx/TreePanel.java | 1 - .../src/org/forester/io/parsers/nhx/NHXParser.java | 764 +++++++---------- .../org/forester/io/parsers/nhx/NHXParser2.java | 885 -------------------- .../src/org/forester/io/parsers/nhx/NHXtags.java | 17 +- .../org/forester/phylogeny/data/BranchColor.java | 10 +- .../org/forester/phylogeny/data/BranchData.java | 8 - .../org/forester/phylogeny/data/BranchWidth.java | 6 +- .../src/org/forester/phylogeny/data/NodeData.java | 3 - .../org/forester/phylogeny/data/PropertiesMap.java | 8 +- .../src/org/forester/phylogeny/data/Property.java | 48 +- forester/java/src/org/forester/test/Test.java | 179 ++-- .../src/org/forester/util/ForesterConstants.java | 4 +- 17 files changed, 434 insertions(+), 1543 deletions(-) delete mode 100644 forester/java/src/org/forester/io/parsers/nhx/NHXParser2.java diff --git a/forester/java/src/org/forester/analysis/TaxonomyDataManager.java b/forester/java/src/org/forester/analysis/TaxonomyDataManager.java index 302a93c..cddaa4f 100644 --- a/forester/java/src/org/forester/analysis/TaxonomyDataManager.java +++ b/forester/java/src/org/forester/analysis/TaxonomyDataManager.java @@ -337,7 +337,8 @@ public final class TaxonomyDataManager extends RunnableProcess { for( final UniProtTaxonomy up_taxonomy : up_taxonomies ) { boolean match = true; I: for( int i = 0; i < lineage.size(); ++i ) { - if ( i == up_taxonomy.getLineage().size() || !lineage.get( i ).equalsIgnoreCase( up_taxonomy.getLineage().get( i ) ) ) { + if ( ( i == up_taxonomy.getLineage().size() ) + || !lineage.get( i ).equalsIgnoreCase( up_taxonomy.getLineage().get( i ) ) ) { match = false; break I; } diff --git a/forester/java/src/org/forester/archaeopteryx/AptxUtil.java b/forester/java/src/org/forester/archaeopteryx/AptxUtil.java index 7dc6243..c2dfc54 100644 --- a/forester/java/src/org/forester/archaeopteryx/AptxUtil.java +++ b/forester/java/src/org/forester/archaeopteryx/AptxUtil.java @@ -23,8 +23,6 @@ // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester - - package org.forester.archaeopteryx; import java.awt.Color; diff --git a/forester/java/src/org/forester/archaeopteryx/Configuration.java b/forester/java/src/org/forester/archaeopteryx/Configuration.java index 7cefee5..7b440a8 100644 --- a/forester/java/src/org/forester/archaeopteryx/Configuration.java +++ b/forester/java/src/org/forester/archaeopteryx/Configuration.java @@ -163,17 +163,18 @@ public final class Configuration { String default_config_filename = Constants.DEFAULT_CONFIGURATION_FILE_NAME; final static String display_options[][] = { { "Phylogram", "display", "?" }, { "Node Name", "display", "yes" }, { "Taxonomy Code", "display", "yes" }, - { "Annotation", "nodisplay", "no" }, { "Confidence Values", "display", "?" }, { "Node Events", "display", "?" }, - { "Taxonomy Colorize", "display", "yes" }, { "Colorize Branches", "display", "no" }, - { "Use Branch-Widths", "display", "no" }, { "Show Custom Nodes", "display", "yes" }, - { "Domains", "nodisplay", "no" }, { "Binary Characters", "nodisplay", "no" }, - { "Binary Char Counts", "nodisplay", "no" }, { "Seq Name", "display", "yes" }, - { "Seq Acc", "display", "no" }, { "Show Internal Data", "display", "yes" }, - { "Dyna Hide", "display", "yes" }, { "Taxonomy Scientific", "display", "yes" }, - { "Taxonomy Common", "display", "no" }, { "Annotation Colorize", "nodisplay", "no" }, - { "Seq Symbol", "display", "yes" }, { "Rollover", "display", "yes" }, - { "Relation Confidence", "nodisplay", "no" }, { "Vector Data", "nodisplay", "no" }, - { "Taxonomy Images", "display", "no" }, { "Properties", "nodisplay", "no" } }; + { "Annotation", "nodisplay", "no" }, { "Confidence Values", "display", "?" }, + { "Node Events", "display", "?" }, { "Taxonomy Colorize", "display", "yes" }, + { "Colorize Branches", "display", "no" }, { "Use Branch-Widths", "display", "no" }, + { "Show Custom Nodes", "display", "yes" }, { "Domains", "nodisplay", "no" }, + { "Binary Characters", "nodisplay", "no" }, { "Binary Char Counts", "nodisplay", "no" }, + { "Seq Name", "display", "yes" }, { "Seq Acc", "display", "no" }, + { "Show Internal Data", "display", "yes" }, { "Dyna Hide", "display", "yes" }, + { "Taxonomy Scientific", "display", "yes" }, { "Taxonomy Common", "display", "no" }, + { "Annotation Colorize", "nodisplay", "no" }, { "Seq Symbol", "display", "yes" }, + { "Rollover", "display", "yes" }, { "Relation Confidence", "nodisplay", "no" }, + { "Vector Data", "nodisplay", "no" }, { "Taxonomy Images", "display", "no" }, + { "Properties", "nodisplay", "no" } }; final static String clickto_options[][] = { { "Display Node Data", "display" }, { "Collapse/Uncollapse", "display" }, { "Root/Reroot", "display" }, { "Sub/Super Tree", "display" }, { "Swap Descendants", "display" }, { "Colorize Subtree", "display" }, diff --git a/forester/java/src/org/forester/archaeopteryx/Constants.java b/forester/java/src/org/forester/archaeopteryx/Constants.java index 4bf2baf..48f7447 100644 --- a/forester/java/src/org/forester/archaeopteryx/Constants.java +++ b/forester/java/src/org/forester/archaeopteryx/Constants.java @@ -37,13 +37,13 @@ import org.forester.util.ForesterConstants; public final class Constants { final static boolean __ALLOW_PHYLOGENETIC_INFERENCE = true; - public final static boolean __RELEASE = false; // TODO remove me - public final static boolean __SNAPSHOT_RELEASE = false; // TODO remove me - public final static boolean __SYNTH_LF = false; // TODO remove me + public final static boolean __RELEASE = false; // TODO remove me + public final static boolean __SNAPSHOT_RELEASE = false; // TODO remove me + public final static boolean __SYNTH_LF = false; // TODO remove me public final static boolean ALLOW_DDBJ_BLAST = false; public final static String PRG_NAME = "Archaeopteryx"; final static String VERSION = "0.979"; - final static String PRG_DATE = "121229"; + final static String PRG_DATE = "130104"; final static String DEFAULT_CONFIGURATION_FILE_NAME = "_aptx_configuration_file"; final static String[] DEFAULT_FONT_CHOICES = { "Verdana", "Tahoma", "Arial", "Helvetica", "Dialog", "Lucida Sans", "SansSerif", "Sans-serif", "Sans" }; @@ -59,7 +59,7 @@ public final class Constants { final static float WHEEL_ZOOM_OUT_FACTOR = 1 / Constants.WHEEL_ZOOM_IN_FACTOR; final static float WHEEL_ZOOM_IN_X_CORRECTION_FACTOR = 1.085f; final static float WHEEL_ZOOM_OUT_X_CORRECTION_FACTOR = 1 / Constants.WHEEL_ZOOM_IN_X_CORRECTION_FACTOR; - static final boolean SPECIAL_CUSTOM = false; //TODO remove me + static final boolean SPECIAL_CUSTOM = false; //TODO remove me static final double EXT_NODE_INFO_LENGTH_MAX_RATIO = 0.95; static final Dimension NODE_PANEL_SPLIT_MINIMUM_SIZE = new Dimension( 100, 50 ); static final Dimension NODE_PANEL_SIZE = new Dimension( 500, 600 ); @@ -78,7 +78,7 @@ public final class Constants { final static String PHYLOXML_WEB_SITE = ForesterConstants.PHYLO_XML_LOCATION; final static String PHYLOXML_REFERENCE_URL = "http://www.biomedcentral.com/1471-2105/10/356/"; final static String APTX_REFERENCE_URL = "http://www.biomedcentral.com/bmcbioinformatics/"; - final static String APTX_REFERENCE = "Zmasek..."; //TODO + final static String APTX_REFERENCE = "Zmasek..."; //TODO final static String PHYLOXML_REFERENCE = ForesterConstants.PHYLO_XML_REFERENCE; final static String PHYLOXML_REFERENCE_SHORT = "Han MV and Zmasek CM (2009), BMC Bioinformatics, 10:356"; final static short NUMBER_OF_DIGITS_AFTER_COMMA_FOR_BRANCH_LENGTH_VALUES_DEFAULT = 2; diff --git a/forester/java/src/org/forester/archaeopteryx/MainFrameApplet.java b/forester/java/src/org/forester/archaeopteryx/MainFrameApplet.java index 492ecc0..871cbc6 100644 --- a/forester/java/src/org/forester/archaeopteryx/MainFrameApplet.java +++ b/forester/java/src/org/forester/archaeopteryx/MainFrameApplet.java @@ -52,8 +52,8 @@ import org.forester.util.ForesterUtil; public final class MainFrameApplet extends MainFrame { - private static final long serialVersionUID = 1941019292746717053L; - private final static int DEFAULT_FRAME_X_SIZE = 640; + private static final long serialVersionUID = 1941019292746717053L; + private final static int DEFAULT_FRAME_X_SIZE = 640; private final static int DEFAULT_FRAME_Y_SIZE = 580; private final ArchaeopteryxA _applet; private ButtonGroup _radio_group_1; diff --git a/forester/java/src/org/forester/archaeopteryx/TreePanel.java b/forester/java/src/org/forester/archaeopteryx/TreePanel.java index 18278b7..6b26c29 100644 --- a/forester/java/src/org/forester/archaeopteryx/TreePanel.java +++ b/forester/java/src/org/forester/archaeopteryx/TreePanel.java @@ -23,7 +23,6 @@ // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester - package org.forester.archaeopteryx; import java.awt.BasicStroke; diff --git a/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java b/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java index 4711c8b..ea8470f 100644 --- a/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java +++ b/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java @@ -2,9 +2,7 @@ // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // -// Copyright (C) 2008-2009 Christian M. Zmasek -// Copyright (C) 2008-2009 Burnham Institute for Medical Research -// All rights reserved +// Copyright (C) 2013 Christian M. Zmasek // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -25,9 +23,9 @@ package org.forester.io.parsers.nhx; -import java.awt.Color; import java.io.BufferedReader; import java.io.File; +import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.io.InputStream; @@ -59,11 +57,15 @@ import org.forester.util.ForesterUtil; public final class NHXParser implements PhylogenyParser { public static final TAXONOMY_EXTRACTION TAXONOMY_EXTRACTION_DEFAULT = TAXONOMY_EXTRACTION.NO; + public final static Pattern UC_LETTERS_NUMBERS_PATTERN = Pattern.compile( "^[A-Z0-9]+$" ); + public final static Pattern NUMBERS_ONLY_PATTERN = Pattern.compile( "^[0-9\\.]+$" ); + public final static Pattern MB_PROB_PATTERN = Pattern.compile( "prob=([^,]+)" ); + public final static Pattern MB_PROB_SD_PATTERN = Pattern.compile( "prob_stddev=([^,]+)" ); + public final static Pattern MB_BL_PATTERN = Pattern.compile( "length_median=([^,]+)" ); final static private boolean GUESS_ROOTEDNESS_DEFAULT = true; final static private boolean GUESS_IF_SUPPORT_VALUES = true; final static private boolean IGNORE_QUOTES_DEFAULT = false; final static public boolean REPLACE_UNDERSCORES_DEFAULT = false; - private boolean _saw_closing_paren; final static private byte STRING = 0; final static private byte STRING_BUFFER = 1; final static private byte CHAR_ARRAY = 2; @@ -77,82 +79,219 @@ public final class NHXParser implements PhylogenyParser { private StringBuilder _current_anotation; private Object _nhx_source; private int _clade_level; - private List _phylogenies; private Phylogeny _current_phylogeny; private TAXONOMY_EXTRACTION _taxonomy_extraction; private boolean _replace_underscores; - public final static Pattern UC_LETTERS_NUMBERS_PATTERN = Pattern.compile( "^[A-Z0-9]+$" ); - public final static Pattern NUMBERS_ONLY_PATTERN = Pattern.compile( "^[0-9\\.]+$" ); - public final static Pattern MB_PROB_PATTERN = Pattern.compile( "prob=([^,]+)" ); - public final static Pattern MB_PROB_SD_PATTERN = Pattern.compile( "prob_stddev=([^,]+)" ); - public final static Pattern MB_BL_PATTERN = Pattern.compile( "length_median=([^,]+)" ); + private boolean _in_comment = false; + private boolean _saw_colon = false; + private boolean _saw_closing_paren; + private boolean _saw_open_bracket = false; + private boolean _in_open_bracket = false; + private boolean _in_double_quote = false; + private boolean _in_single_quote = false; + private String _my_source_str = null; + private StringBuffer _my_source_sbuff = null; + private StringBuilder _my_source_sbuil = null; + private char[] _my_source_charary = null; + private BufferedReader _my_source_br = null; + private int _i; + private Phylogeny _next; + private Object _source; public NHXParser() { init(); } - public NHXParser.TAXONOMY_EXTRACTION getTaxonomyExtraction() { + public final TAXONOMY_EXTRACTION getTaxonomyExtraction() { return _taxonomy_extraction; } - /** - * Parses the source set with setSource( final Object nhx_source ). Returns - * the Phylogenies found in the source as Phylogeny[]. - * Everything between [ and ] is considered comment and ignored, - * unless: - * "[&&NHX... ]" - * or - * ":digits and/or.[bootstrap]" - * - * @see #setSource( final Object nhx_source ) - * @see org.forester.io.parsers.PhylogenyParser#parse() - * @return Phylogeny[] - * @throws IOException - * @throws NHXFormatException - * @throws PhylogenyParserException - */ + public final boolean hasNext() { + return _next != null; + } + + public final Phylogeny next() throws NHXFormatException, IOException { + final Phylogeny phy = _next; + getNext(); + return phy; + } + @Override - public Phylogeny[] parse() throws IOException, NHXFormatException { - boolean in_comment = false; - boolean saw_colon = false; - boolean saw_open_bracket = false; - boolean in_open_bracket = false; - boolean in_double_quote = false; - boolean in_single_quote = false; - setPhylogenies( new ArrayList() ); - setCladeLevel( 0 ); - newCurrentAnotation(); - setCurrentPhylogeny( null ); - setCurrentNode( null ); - int i = 0; - String my_source_str = null; - StringBuffer my_source_sbuff = null; - StringBuilder my_source_sbuil = null; - char[] my_source_charary = null; - BufferedReader my_source_br = null; - switch ( getInputType() ) { + public final Phylogeny[] parse() throws IOException { + reset(); + final List l = new ArrayList(); + while ( hasNext() ) { + l.add( next() ); + } + final Phylogeny[] p = new Phylogeny[ l.size() ]; + for( int i = 0; i < l.size(); ++i ) { + p[ i ] = l.get( i ); + } + return p; + } + + public final void reset() throws NHXFormatException, IOException { + _i = 0; + _next = null; + _in_comment = false; + _saw_colon = false; + _saw_open_bracket = false; + _in_open_bracket = false; + _in_double_quote = false; + _in_single_quote = false; + _clade_level = 0; + _current_anotation = new StringBuilder(); + _current_phylogeny = null; + _current_node = null; + _my_source_str = null; + _my_source_sbuff = null; + _my_source_sbuil = null; + _my_source_charary = null; + _my_source_br = null; + determineSourceType( _source ); + switch ( _input_type ) { case STRING: - my_source_str = ( String ) getNhxSource(); + _my_source_str = ( String ) _nhx_source; break; case STRING_BUFFER: - my_source_sbuff = ( StringBuffer ) getNhxSource(); + _my_source_sbuff = ( StringBuffer ) _nhx_source; break; case STRING_BUILDER: - my_source_sbuil = ( StringBuilder ) getNhxSource(); + _my_source_sbuil = ( StringBuilder ) _nhx_source; break; case CHAR_ARRAY: - my_source_charary = ( char[] ) getNhxSource(); + _my_source_charary = ( char[] ) _nhx_source; break; case BUFFERED_READER: - my_source_br = ( BufferedReader ) getNhxSource(); + if ( _my_source_br != null ) { + try { + _my_source_br.close(); + } + catch ( final IOException e ) { + //do nothing + } + } + _my_source_br = ( BufferedReader ) _nhx_source; break; default: throw new RuntimeException( "unknown input type" ); } + getNext(); + } + + public final void setGuessRootedness( final boolean guess_rootedness ) { + _guess_rootedness = guess_rootedness; + } + + public final void setIgnoreQuotes( final boolean ignore_quotes ) { + _ignore_quotes = ignore_quotes; + } + + public final void setReplaceUnderscores( final boolean replace_underscores ) { + _replace_underscores = replace_underscores; + } + + @Override + public final void setSource( final Object nhx_source ) throws NHXFormatException, IOException { + _source = nhx_source; + reset(); + } + + public final void setTaxonomyExtraction( final TAXONOMY_EXTRACTION taxonomy_extraction ) { + _taxonomy_extraction = taxonomy_extraction; + } + + private final void determineSourceType( final Object nhx_source ) throws PhylogenyParserException, + FileNotFoundException { + if ( nhx_source == null ) { + throw new PhylogenyParserException( getClass() + ": attempt to parse null object." ); + } + else if ( nhx_source instanceof String ) { + _input_type = NHXParser.STRING; + _source_length = ( ( String ) nhx_source ).length(); + _nhx_source = nhx_source; + } + else if ( nhx_source instanceof StringBuilder ) { + _input_type = NHXParser.STRING_BUILDER; + _source_length = ( ( StringBuilder ) nhx_source ).length(); + _nhx_source = nhx_source; + } + else if ( nhx_source instanceof StringBuffer ) { + _input_type = NHXParser.STRING_BUFFER; + _source_length = ( ( StringBuffer ) nhx_source ).length(); + _nhx_source = nhx_source; + } + else if ( nhx_source instanceof StringBuilder ) { + _input_type = NHXParser.STRING_BUILDER; + _source_length = ( ( StringBuilder ) nhx_source ).length(); + _nhx_source = nhx_source; + } + else if ( nhx_source instanceof char[] ) { + _input_type = NHXParser.CHAR_ARRAY; + _source_length = ( ( char[] ) nhx_source ).length; + _nhx_source = nhx_source; + } + else if ( nhx_source instanceof File ) { + _input_type = NHXParser.BUFFERED_READER; + _source_length = 0; + final File f = ( File ) nhx_source; + final String error = ForesterUtil.isReadableFile( f ); + if ( !ForesterUtil.isEmpty( error ) ) { + throw new PhylogenyParserException( error ); + } + _nhx_source = new BufferedReader( new FileReader( f ) ); + } + else if ( nhx_source instanceof InputStream ) { + _input_type = NHXParser.BUFFERED_READER; + _source_length = 0; + final InputStreamReader isr = new InputStreamReader( ( InputStream ) nhx_source ); + _nhx_source = new BufferedReader( isr ); + } + else { + throw new IllegalArgumentException( getClass() + " can only parse objects of type String," + + " StringBuffer, StringBuilder, char[], File," + " or InputStream " + + " [attempt to parse object of " + nhx_source.getClass() + "]." ); + } + } + + private final Phylogeny finishPhylogeny() throws PhylogenyParserException, NHXFormatException, + PhyloXmlDataFormatException { + if ( _current_phylogeny != null ) { + parseNHX( _current_anotation != null ? _current_anotation.toString() : "", + _current_phylogeny.getRoot(), + getTaxonomyExtraction(), + isReplaceUnderscores() ); + if ( GUESS_IF_SUPPORT_VALUES ) { + if ( isBranchLengthsLikeBootstrapValues( _current_phylogeny ) ) { + moveBranchLengthsToConfidenceValues( _current_phylogeny ); + } + } + if ( isGuessRootedness() ) { + final PhylogenyNode root = _current_phylogeny.getRoot(); + if ( ( root.getDistanceToParent() >= 0.0 ) || !ForesterUtil.isEmpty( root.getName() ) + || !ForesterUtil.isEmpty( PhylogenyMethods.getSpecies( root ) ) || root.isHasAssignedEvent() ) { + _current_phylogeny.setRooted( true ); + } + } + return _current_phylogeny; + } + return null; + } + + private final Phylogeny finishSingleNodePhylogeny() throws PhylogenyParserException, NHXFormatException, + PhyloXmlDataFormatException { + final PhylogenyNode new_node = new PhylogenyNode(); + parseNHX( _current_anotation.toString(), new_node, getTaxonomyExtraction(), isReplaceUnderscores() ); + _current_phylogeny = new Phylogeny(); + _current_phylogeny.setRoot( new_node ); + return _current_phylogeny; + } + + private final void getNext() throws IOException, NHXFormatException { while ( true ) { char c = '\b'; - if ( getInputType() == NHXParser.BUFFERED_READER ) { - final int ci = my_source_br.read(); + if ( _input_type == BUFFERED_READER ) { + final int ci = _my_source_br.read(); if ( ci >= 0 ) { c = ( char ) ci; } @@ -161,476 +300,221 @@ public final class NHXParser implements PhylogenyParser { } } else { - if ( i >= getSourceLength() ) { + if ( _i >= _source_length ) { break; } else { - switch ( getInputType() ) { + switch ( _input_type ) { case STRING: - c = my_source_str.charAt( i ); + c = _my_source_str.charAt( _i ); break; case STRING_BUFFER: - c = my_source_sbuff.charAt( i ); + c = _my_source_sbuff.charAt( _i ); break; case STRING_BUILDER: - c = my_source_sbuil.charAt( i ); + c = _my_source_sbuil.charAt( _i ); break; case CHAR_ARRAY: - c = my_source_charary[ i ]; + c = _my_source_charary[ _i ]; break; } } } - if ( !in_single_quote && !in_double_quote ) { + if ( !_in_single_quote && !_in_double_quote ) { if ( c == ':' ) { - saw_colon = true; + _saw_colon = true; } - else if ( !( ( c < 33 ) || ( c > 126 ) ) && saw_colon + else if ( !( ( c < 33 ) || ( c > 126 ) ) && _saw_colon && ( ( c != '[' ) && ( c != '.' ) && ( ( c < 48 ) || ( c > 57 ) ) ) ) { - saw_colon = false; + _saw_colon = false; } - if ( in_open_bracket && ( c == ']' ) ) { - in_open_bracket = false; + if ( _in_open_bracket && ( c == ']' ) ) { + _in_open_bracket = false; } } // \n\t is always ignored, // as is " (34) and ' (39) (space is 32): - if ( ( isIgnoreQuotes() && ( ( c < 33 ) || ( c > 126 ) || ( c == 34 ) || ( c == 39 ) || ( ( getCladeLevel() == 0 ) && ( c == ';' ) ) ) ) - || ( !isIgnoreQuotes() && ( ( c < 32 ) || ( c > 126 ) || ( ( getCladeLevel() == 0 ) && ( c == ';' ) ) ) ) ) { - // Do nothing. + if ( ( isIgnoreQuotes() && ( ( c < 33 ) || ( c > 126 ) || ( c == 34 ) || ( c == 39 ) || ( ( _clade_level == 0 ) && ( c == ';' ) ) ) ) + || ( !isIgnoreQuotes() && ( ( c < 32 ) || ( c > 126 ) || ( ( _clade_level == 0 ) && ( c == ';' ) ) ) ) ) { + //do nothing } - else if ( ( c == 32 ) && ( !in_single_quote && !in_double_quote ) ) { - // Do nothing. + else if ( ( c == 32 ) && ( !_in_single_quote && !_in_double_quote ) ) { + //do nothing } - else if ( in_comment ) { + else if ( _in_comment ) { if ( c == ']' ) { - in_comment = false; + _in_comment = false; } } - else if ( in_double_quote ) { + else if ( _in_double_quote ) { if ( c == '"' ) { - in_double_quote = false; + _in_double_quote = false; } else { - getCurrentAnotation().append( c ); + _current_anotation.append( c ); } } else if ( c == '"' ) { - in_double_quote = true; + _in_double_quote = true; } - else if ( in_single_quote ) { + else if ( _in_single_quote ) { if ( c == 39 ) { - in_single_quote = false; + _in_single_quote = false; } else { - getCurrentAnotation().append( c ); + _current_anotation.append( c ); } } else if ( c == 39 ) { - in_single_quote = true; + _in_single_quote = true; } else if ( c == '[' ) { - saw_open_bracket = true; - in_open_bracket = true; + _saw_open_bracket = true; + _in_open_bracket = true; } - else if ( saw_open_bracket ) { + else if ( _saw_open_bracket ) { if ( c != ']' ) { // everything not starting with "[&" is considered a comment // unless ":digits and/or . [bootstrap]": if ( c == '&' ) { - getCurrentAnotation().append( "[&" ); + _current_anotation.append( "[&" ); } - else if ( saw_colon ) { - getCurrentAnotation().append( "[" + c ); + else if ( _saw_colon ) { + _current_anotation.append( "[" + c ); } else { - in_comment = true; + _in_comment = true; } } // comment consisting just of "[]": - saw_open_bracket = false; - } - else if ( ( c == '(' ) && !in_open_bracket ) { - processOpenParen(); + _saw_open_bracket = false; + } + else if ( ( c == '(' ) && !_in_open_bracket ) { + final Phylogeny phy = processOpenParen(); + if ( phy != null ) { + ++_i; + // return phy; + _next = phy; + return; + } } - else if ( ( c == ')' ) && !in_open_bracket ) { + else if ( ( c == ')' ) && !_in_open_bracket ) { processCloseParen(); } - else if ( ( c == ',' ) && !in_open_bracket ) { + else if ( ( c == ',' ) && !_in_open_bracket ) { processComma(); } else { - getCurrentAnotation().append( c ); + _current_anotation.append( c ); } - ++i; - } - if ( getCladeLevel() != 0 ) { - setPhylogenies( null ); - throw new PhylogenyParserException( "error in NH (Newick)/NHX formatted data: most likely cause: number of open parens does not equal number of close parens" ); - } - if ( getCurrentPhylogeny() != null ) { - finishPhylogeny(); - } - else if ( getCurrentAnotation().length() > 0 ) { - finishSingleNodePhylogeny(); - } - else if ( getPhylogenies().size() < 1 ) { - getPhylogenies().add( new Phylogeny() ); - } - return getPhylogeniesAsArray(); - } // parse() - - public void setGuessRootedness( final boolean guess_rootedness ) { - _guess_rootedness = guess_rootedness; - } - - public void setIgnoreQuotes( final boolean ignore_quotes ) { - _ignore_quotes = ignore_quotes; - } - - public void setReplaceUnderscores( final boolean replace_underscores ) { - _replace_underscores = replace_underscores; - } - - /** - * This sets the source to be parsed. The source can be: String, - * StringBuffer, char[], File, or InputStream. The source can contain more - * than one phylogenies in either New Hamphshire (NH) or New Hamphshire - * Extended (NHX) format. There is no need to separate phylogenies with any - * special character. White space is always ignored, as are semicolons - * inbetween phylogenies. Example of a source describing two phylogenies - * (source is a String, in this example): "(A,(B,(C,(D,E)de)cde)bcde)abcde - * ((((A,B)ab,C)abc,D)abcd,E)abcde". Everything between a '[' followed by any - * character other than '&' and ']' is considered a comment and ignored - * (example: "[this is a comment]"). NHX tags are surrounded by '[&&NHX' and - * ']' (example: "[&&NHX:S=Varanus_storri]"). A sequence like "[& some - * info]" is ignored, too (at the PhylogenyNode level, though). - * Exception: numbers only between [ and ] (e.g. [90]) are interpreted as support values. - * - * @see #parse() - * @see org.forester.io.parsers.PhylogenyParser#setSource(java.lang.Object) - * @param nhx_source - * the source to be parsed (String, StringBuffer, char[], File, - * or InputStream) - * @throws IOException - * @throws PhylogenyParserException - */ - @Override - public void setSource( final Object nhx_source ) throws PhylogenyParserException, IOException { - if ( nhx_source == null ) { - throw new PhylogenyParserException( getClass() + ": attempt to parse null object." ); - } - else if ( nhx_source instanceof String ) { - setInputType( NHXParser.STRING ); - setSourceLength( ( ( String ) nhx_source ).length() ); - setNhxSource( nhx_source ); - } - else if ( nhx_source instanceof StringBuilder ) { - setInputType( NHXParser.STRING_BUILDER ); - setSourceLength( ( ( StringBuilder ) nhx_source ).length() ); - setNhxSource( nhx_source ); - } - else if ( nhx_source instanceof StringBuffer ) { - setInputType( NHXParser.STRING_BUFFER ); - setSourceLength( ( ( StringBuffer ) nhx_source ).length() ); - setNhxSource( nhx_source ); + ++_i; + } // while ( true ) + if ( _clade_level != 0 ) { + throw new PhylogenyParserException( "error in NH (Newick) formatted data: most likely cause: number of open parens does not equal number of close parens" ); } - else if ( nhx_source instanceof StringBuilder ) { - setInputType( NHXParser.STRING_BUILDER ); - setSourceLength( ( ( StringBuilder ) nhx_source ).length() ); - setNhxSource( nhx_source ); + if ( _current_phylogeny != null ) { + _next = finishPhylogeny(); + _current_phylogeny = null; + _current_anotation = null; } - else if ( nhx_source instanceof char[] ) { - setInputType( NHXParser.CHAR_ARRAY ); - setSourceLength( ( ( char[] ) nhx_source ).length ); - setNhxSource( nhx_source ); - } - else if ( nhx_source instanceof File ) { - setInputType( NHXParser.BUFFERED_READER ); - setSourceLength( 0 ); - final File f = ( File ) nhx_source; - final String error = ForesterUtil.isReadableFile( f ); - if ( !ForesterUtil.isEmpty( error ) ) { - throw new PhylogenyParserException( error ); - } - setNhxSource( new BufferedReader( new FileReader( f ) ) ); - } - else if ( nhx_source instanceof InputStream ) { - setInputType( NHXParser.BUFFERED_READER ); - setSourceLength( 0 ); - final InputStreamReader isr = new InputStreamReader( ( InputStream ) nhx_source ); - setNhxSource( new BufferedReader( isr ) ); + else if ( ( _current_anotation != null ) && ( _current_anotation.length() > 0 ) ) { + _next = finishSingleNodePhylogeny(); + _current_anotation = null; } else { - throw new IllegalArgumentException( getClass() + " can only parse objects of type String," - + " StringBuffer, char[], File," + " or InputStream " + " [attempt to parse object of " - + nhx_source.getClass() + "]." ); - } - } - - public void setTaxonomyExtraction( final NHXParser.TAXONOMY_EXTRACTION taxonomy_extraction ) { - _taxonomy_extraction = taxonomy_extraction; - } - - /** - * Decreases the clade level by one. - * - * @throws PhylogenyParserException - * if level goes below zero. - */ - private void decreaseCladeLevel() throws PhylogenyParserException { - if ( getCladeLevel() < 0 ) { - throw new PhylogenyParserException( "error in NH (Newick)/NHX formatted data: most likely cause: number of close parens is larger than number of open parens" ); - } - --_clade_level; - } - - /** - * Finishes the current Phylogeny and adds it to the list of Phylogenies - * created. - * - * @throws PhylogenyParserException - * @throws NHXFormatException - * @throws PhyloXmlDataFormatException - */ - private void finishPhylogeny() throws PhylogenyParserException, NHXFormatException, PhyloXmlDataFormatException { - setCladeLevel( 0 ); - if ( getCurrentPhylogeny() != null ) { - parseNHX( getCurrentAnotation().toString(), - getCurrentPhylogeny().getRoot(), - getTaxonomyExtraction(), - isReplaceUnderscores() ); - if ( NHXParser.GUESS_IF_SUPPORT_VALUES ) { - if ( NHXParser.isBranchLengthsLikeBootstrapValues( getCurrentPhylogeny() ) ) { - NHXParser.moveBranchLengthsToConfidenceValues( getCurrentPhylogeny() ); - } - } - if ( isGuessRootedness() ) { - final PhylogenyNode root = getCurrentPhylogeny().getRoot(); - if ( ( root.getDistanceToParent() >= 0.0 ) || !ForesterUtil.isEmpty( root.getName() ) - || !ForesterUtil.isEmpty( PhylogenyMethods.getSpecies( root ) ) || root.isHasAssignedEvent() ) { - getCurrentPhylogeny().setRooted( true ); - } - } - getPhylogenies().add( getCurrentPhylogeny() ); - } - } - - private void finishSingleNodePhylogeny() throws PhylogenyParserException, NHXFormatException, - PhyloXmlDataFormatException { - setCladeLevel( 0 ); - final PhylogenyNode new_node = new PhylogenyNode(); - parseNHX( getCurrentAnotation().toString(), new_node, getTaxonomyExtraction(), isReplaceUnderscores() ); - setCurrentPhylogeny( new Phylogeny() ); - getCurrentPhylogeny().setRoot( new_node ); - getPhylogenies().add( getCurrentPhylogeny() ); - } - - private int getCladeLevel() { - return _clade_level; - } - - private StringBuilder getCurrentAnotation() { - return _current_anotation; - } - - private PhylogenyNode getCurrentNode() { - return _current_node; - } - - private Phylogeny getCurrentPhylogeny() { - return _current_phylogeny; - } - - private byte getInputType() { - return _input_type; - } - - private Object getNhxSource() { - return _nhx_source; - } - - private List getPhylogenies() { - return _phylogenies; - } - - /** - * Returns the Phylogenies created as Array. - * - * @return the Phylogenies created as Array - */ - private Phylogeny[] getPhylogeniesAsArray() { - final Phylogeny[] p = new Phylogeny[ getPhylogenies().size() ]; - for( int i = 0; i < getPhylogenies().size(); ++i ) { - p[ i ] = getPhylogenies().get( i ); + _next = null; } - return p; - } - - private int getSourceLength() { - return _source_length; - } - - /** - * Increases the clade level by one. - */ - private void increaseCladeLevel() { - ++_clade_level; } - private void init() { + private final void init() { setTaxonomyExtraction( TAXONOMY_EXTRACTION_DEFAULT ); setReplaceUnderscores( REPLACE_UNDERSCORES_DEFAULT ); setGuessRootedness( GUESS_ROOTEDNESS_DEFAULT ); setIgnoreQuotes( IGNORE_QUOTES_DEFAULT ); } - private boolean isGuessRootedness() { + private final boolean isGuessRootedness() { return _guess_rootedness; } - private boolean isIgnoreQuotes() { + private final boolean isIgnoreQuotes() { return _ignore_quotes; } - private boolean isReplaceUnderscores() { + private final boolean isReplaceUnderscores() { return _replace_underscores; } - private boolean isSawClosingParen() { - return _saw_closing_paren; - } - - /** - * Replaces the current annotation with a new StringBuffer. - */ - private void newCurrentAnotation() { - setCurrentAnotation( new StringBuilder() ); - } - - /** - * Called if a closing paren is encountered. - * - * @throws PhylogenyParserException - * @throws NHXFormatException - * @throws PhyloXmlDataFormatException - */ - private void processCloseParen() throws PhylogenyParserException, NHXFormatException, PhyloXmlDataFormatException { - decreaseCladeLevel(); - if ( !isSawClosingParen() ) { + private final void processCloseParen() throws PhylogenyParserException, NHXFormatException, + PhyloXmlDataFormatException { + if ( _clade_level < 0 ) { + throw new PhylogenyParserException( "error in NH (Newick)/NHX formatted data: most likely cause: number of close parens is larger than number of open parens" ); + } + --_clade_level; + if ( !_saw_closing_paren ) { final PhylogenyNode new_node = new PhylogenyNode(); - parseNHX( getCurrentAnotation().toString(), new_node, getTaxonomyExtraction(), isReplaceUnderscores() ); - newCurrentAnotation(); - getCurrentNode().addAsChild( new_node ); + parseNHX( _current_anotation.toString(), new_node, getTaxonomyExtraction(), isReplaceUnderscores() ); + _current_anotation = new StringBuilder(); + _current_node.addAsChild( new_node ); } else { - parseNHX( getCurrentAnotation().toString(), - getCurrentNode().getLastChildNode(), + parseNHX( _current_anotation.toString(), + _current_node.getLastChildNode(), getTaxonomyExtraction(), isReplaceUnderscores() ); - newCurrentAnotation(); - } - if ( !getCurrentNode().isRoot() ) { - setCurrentNode( getCurrentNode().getParent() ); - } - setSawClosingParen( true ); - } // processCloseParen() - - /** - * Called if a comma is encountered. - * - * @throws PhylogenyParserException - * @throws NHXFormatException - * @throws PhyloXmlDataFormatException - */ - private void processComma() throws PhylogenyParserException, NHXFormatException, PhyloXmlDataFormatException { - if ( !isSawClosingParen() ) { + _current_anotation = new StringBuilder(); + } + if ( !_current_node.isRoot() ) { + _current_node = _current_node.getParent(); + } + _saw_closing_paren = true; + } + + private final void processComma() throws PhylogenyParserException, NHXFormatException, PhyloXmlDataFormatException { + if ( !_saw_closing_paren ) { final PhylogenyNode new_node = new PhylogenyNode(); - parseNHX( getCurrentAnotation().toString(), new_node, getTaxonomyExtraction(), isReplaceUnderscores() ); - if ( getCurrentNode() == null ) { + parseNHX( _current_anotation.toString(), new_node, getTaxonomyExtraction(), isReplaceUnderscores() ); + if ( _current_node == null ) { throw new NHXFormatException( "format might not be NH or NHX" ); } - getCurrentNode().addAsChild( new_node ); + _current_node.addAsChild( new_node ); } else { - parseNHX( getCurrentAnotation().toString(), - getCurrentNode().getLastChildNode(), + parseNHX( _current_anotation.toString(), + _current_node.getLastChildNode(), getTaxonomyExtraction(), isReplaceUnderscores() ); } - newCurrentAnotation(); - setSawClosingParen( false ); - } // processComma() - - /** - * Called if a opening paren is encountered. - * - * @throws PhylogenyParserException - * @throws NHXFormatException - * @throws PhyloXmlDataFormatException - */ - private void processOpenParen() throws PhylogenyParserException, NHXFormatException, PhyloXmlDataFormatException { + _current_anotation = new StringBuilder(); + _saw_closing_paren = false; + } + + private final Phylogeny processOpenParen() throws PhylogenyParserException, NHXFormatException, + PhyloXmlDataFormatException { + Phylogeny phy = null; final PhylogenyNode new_node = new PhylogenyNode(); - if ( getCladeLevel() == 0 ) { - if ( getCurrentPhylogeny() != null ) { - finishPhylogeny(); + if ( _clade_level == 0 ) { + if ( _current_phylogeny != null ) { + phy = finishPhylogeny(); } - setCladeLevel( 1 ); - newCurrentAnotation(); - setCurrentPhylogeny( new Phylogeny() ); - getCurrentPhylogeny().setRoot( new_node ); + _clade_level = 1; + _current_anotation = new StringBuilder(); + _current_phylogeny = new Phylogeny(); + _current_phylogeny.setRoot( new_node ); } else { - increaseCladeLevel(); - getCurrentNode().addAsChild( new_node ); + ++_clade_level; + _current_node.addAsChild( new_node ); } - setCurrentNode( new_node ); - setSawClosingParen( false ); - } - - private void setCladeLevel( final int clade_level ) { - if ( clade_level < 0 ) { - throw new IllegalArgumentException( "Attempt to set clade level to a number smaller than zero." ); - } - _clade_level = clade_level; - } - - private void setCurrentAnotation( final StringBuilder current_anotation ) { - _current_anotation = current_anotation; - } - - private void setCurrentNode( final PhylogenyNode current_node ) { - _current_node = current_node; - } - - private void setCurrentPhylogeny( final Phylogeny current_phylogeny ) { - _current_phylogeny = current_phylogeny; - } - - private void setInputType( final byte input_type ) { - _input_type = input_type; + _current_node = new_node; + _saw_closing_paren = false; + return phy; } - private void setNhxSource( final Object nhx_source ) { - _nhx_source = nhx_source; - } - - private void setPhylogenies( final ArrayList phylogenies ) { - _phylogenies = phylogenies; - } - - private void setSawClosingParen( final boolean saw_closing_paren ) { - _saw_closing_paren = saw_closing_paren; - } - - private void setSourceLength( final int source_length ) { - _source_length = source_length; - } - - public static void parseNHX( String s, - final PhylogenyNode node_to_annotate, - final TAXONOMY_EXTRACTION taxonomy_extraction, - final boolean replace_underscores ) throws NHXFormatException, + public final static void parseNHX( String s, + final PhylogenyNode node_to_annotate, + final TAXONOMY_EXTRACTION taxonomy_extraction, + final boolean replace_underscores ) throws NHXFormatException, PhyloXmlDataFormatException { if ( ( taxonomy_extraction != TAXONOMY_EXTRACTION.NO ) && replace_underscores ) { throw new IllegalArgumentException( "cannot extract taxonomies and replace under scores at the same time" ); @@ -677,13 +561,13 @@ public final class NHXParser implements PhylogenyParser { } while ( t.hasMoreTokens() ) { s = t.nextToken(); - if ( s.startsWith( org.forester.io.parsers.nhx.NHXtags.SPECIES_NAME ) ) { + if ( s.startsWith( NHXtags.SPECIES_NAME ) ) { if ( !node_to_annotate.getNodeData().isHasTaxonomy() ) { node_to_annotate.getNodeData().setTaxonomy( new Taxonomy() ); } node_to_annotate.getNodeData().getTaxonomy().setScientificName( s.substring( 2 ) ); } - else if ( s.startsWith( org.forester.io.parsers.nhx.NHXtags.IS_DUPLICATION ) ) { + else if ( s.startsWith( NHXtags.IS_DUPLICATION ) ) { if ( ( s.charAt( 2 ) == 'Y' ) || ( s.charAt( 2 ) == 'T' ) ) { node_to_annotate.getNodeData().setEvent( Event.createSingleDuplicationEvent() ); } @@ -706,15 +590,6 @@ public final class NHXParser implements PhylogenyParser { } node_to_annotate.getNodeData().getTaxonomy().setIdentifier( new Identifier( s.substring( 2 ) ) ); } - else if ( s.startsWith( NHXtags.PARENT_BRANCH_WIDTH ) ) { - PhylogenyMethods.setBranchWidthValue( node_to_annotate, Integer.parseInt( s.substring( 2 ) ) ); - } - else if ( s.startsWith( NHXtags.COLOR ) ) { - final Color c = NHXParser.stringToColor( s.substring( 2 ) ); - if ( c != null ) { - PhylogenyMethods.setBranchColorValue( node_to_annotate, c ); - } - } else if ( s.startsWith( NHXtags.DOMAIN_STRUCTURE ) ) { if ( !node_to_annotate.getNodeData().isHasSequence() ) { node_to_annotate.getNodeData().setSequence( new Sequence() ); @@ -747,7 +622,7 @@ public final class NHXParser implements PhylogenyParser { } } - private static double doubleValue( final String str ) throws NHXFormatException { + private final static double doubleValue( final String str ) throws NHXFormatException { try { return Double.valueOf( str ).doubleValue(); } @@ -757,7 +632,7 @@ public final class NHXParser implements PhylogenyParser { } } - private static boolean isBranchLengthsLikeBootstrapValues( final Phylogeny p ) { + private final static boolean isBranchLengthsLikeBootstrapValues( final Phylogeny p ) { final PhylogenyNodeIterator it = p.iteratorExternalForward(); final double d0 = it.next().getDistanceToParent(); if ( ( d0 < 10 ) || !it.hasNext() ) { @@ -772,7 +647,7 @@ public final class NHXParser implements PhylogenyParser { return true; } - private static void moveBranchLengthsToConfidenceValues( final Phylogeny p ) { + private final static void moveBranchLengthsToConfidenceValues( final Phylogeny p ) { final PhylogenyNodeIterator it = p.iteratorPostorder(); while ( it.hasNext() ) { final PhylogenyNode n = it.next(); @@ -781,7 +656,7 @@ public final class NHXParser implements PhylogenyParser { } } - private static void processMrBayes3Data( final String s, final PhylogenyNode node_to_annotate ) + private final static void processMrBayes3Data( final String s, final PhylogenyNode node_to_annotate ) throws NHXFormatException { double sd = -1; final Matcher mb_prob_sd_matcher = MB_PROB_SD_PATTERN.matcher( s ); @@ -829,21 +704,6 @@ public final class NHXParser implements PhylogenyParser { } } - /** - * Parses String s in the format r.g.b (e.g. "12.34.234" ) into red, green, - * and blue and returns the corresponding Color. - */ - private static Color stringToColor( final String s ) { - final StringTokenizer st = new StringTokenizer( s, "." ); - if ( st.countTokens() != 3 ) { - throw new IllegalArgumentException( "illegal format for color: " + s ); - } - final int red = ForesterUtil.limitRangeForColor( Integer.parseInt( st.nextToken() ) ); - final int green = ForesterUtil.limitRangeForColor( Integer.parseInt( st.nextToken() ) ); - final int blu = ForesterUtil.limitRangeForColor( Integer.parseInt( st.nextToken() ) ); - return new Color( red, green, blu ); - } - public static enum TAXONOMY_EXTRACTION { NO, YES, PFAM_STYLE_ONLY; } diff --git a/forester/java/src/org/forester/io/parsers/nhx/NHXParser2.java b/forester/java/src/org/forester/io/parsers/nhx/NHXParser2.java deleted file mode 100644 index d30c5d0..0000000 --- a/forester/java/src/org/forester/io/parsers/nhx/NHXParser2.java +++ /dev/null @@ -1,885 +0,0 @@ -// $Id: -// FORESTER -- software libraries and applications -// for evolutionary biology research and applications. -// -// Copyright (C) 2008-2009 Christian M. Zmasek -// Copyright (C) 2008-2009 Burnham Institute for Medical Research -// All rights reserved -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA -// -// Contact: phylosoft @ gmail . com -// WWW: https://sites.google.com/site/cmzmasek/home/software/forester - -package org.forester.io.parsers.nhx; - -import java.awt.Color; -import java.io.BufferedReader; -import java.io.File; -import java.io.FileNotFoundException; -import java.io.FileReader; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.util.ArrayList; -import java.util.List; -import java.util.StringTokenizer; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import org.forester.io.parsers.PhylogenyParser; -import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION; -import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; -import org.forester.io.parsers.util.ParserUtils; -import org.forester.io.parsers.util.PhylogenyParserException; -import org.forester.phylogeny.Phylogeny; -import org.forester.phylogeny.PhylogenyMethods; -import org.forester.phylogeny.PhylogenyNode; -import org.forester.phylogeny.data.Accession; -import org.forester.phylogeny.data.Confidence; -import org.forester.phylogeny.data.DomainArchitecture; -import org.forester.phylogeny.data.Event; -import org.forester.phylogeny.data.Identifier; -import org.forester.phylogeny.data.PhylogenyDataUtil; -import org.forester.phylogeny.data.Sequence; -import org.forester.phylogeny.data.Taxonomy; -import org.forester.phylogeny.iterators.PhylogenyNodeIterator; -import org.forester.util.ForesterUtil; - -public final class NHXParser2 implements PhylogenyParser { - - public static final TAXONOMY_EXTRACTION TAXONOMY_EXTRACTION_DEFAULT = TAXONOMY_EXTRACTION.NO; - final static private boolean GUESS_ROOTEDNESS_DEFAULT = true; - final static private boolean GUESS_IF_SUPPORT_VALUES = true; - final static private boolean IGNORE_QUOTES_DEFAULT = false; - final static public boolean REPLACE_UNDERSCORES_DEFAULT = false; - private boolean _saw_closing_paren; - final static private byte STRING = 0; - final static private byte STRING_BUFFER = 1; - final static private byte CHAR_ARRAY = 2; - final static private byte BUFFERED_READER = 3; - final static private byte STRING_BUILDER = 4; - private boolean _guess_rootedness; - private boolean _ignore_quotes; - private byte _input_type; - private int _source_length; - private PhylogenyNode _current_node; - private StringBuilder _current_anotation; - private Object _nhx_source; - private int _clade_level; - private Phylogeny _current_phylogeny; - private TAXONOMY_EXTRACTION _taxonomy_extraction; - private boolean _replace_underscores; - public final static Pattern UC_LETTERS_NUMBERS_PATTERN = Pattern.compile( "^[A-Z0-9]+$" ); - public final static Pattern NUMBERS_ONLY_PATTERN = Pattern.compile( "^[0-9\\.]+$" ); - public final static Pattern MB_PROB_PATTERN = Pattern.compile( "prob=([^,]+)" ); - public final static Pattern MB_PROB_SD_PATTERN = Pattern.compile( "prob_stddev=([^,]+)" ); - public final static Pattern MB_BL_PATTERN = Pattern.compile( "length_median=([^,]+)" ); - boolean _in_comment = false; - boolean _saw_colon = false; - boolean _saw_open_bracket = false; - boolean _in_open_bracket = false; - boolean _in_double_quote = false; - boolean _in_single_quote = false; - String _my_source_str = null; - StringBuffer _my_source_sbuff = null; - StringBuilder _my_source_sbuil = null; - char[] _my_source_charary = null; - BufferedReader _my_source_br = null; - int _i; - private Phylogeny _next; - private Object _source; - - public NHXParser2() { - init(); - } - - public TAXONOMY_EXTRACTION getTaxonomyExtraction() { - return _taxonomy_extraction; - } - - public boolean hasNext() { - return _next != null; - } - - public Phylogeny next() throws NHXFormatException, IOException { - final Phylogeny phy = _next; - getNext(); - return phy; - } - - @Override - public Phylogeny[] parse() throws IOException { - reset(); - List l = new ArrayList(); - System.out.println( ">> _next=" + _next ); - while ( hasNext() ) { - Phylogeny n = next(); - System.out.println( ">> going to add " + n ); - l.add( n ); - } - final Phylogeny[] p = new Phylogeny[ l.size() ]; - for( int i = 0; i < l.size(); ++i ) { - p[ i ] = l.get( i ); - } - return p; - } - - public void reset() throws NHXFormatException, IOException { - _i = 0; - _next = null; - _in_comment = false; - _saw_colon = false; - _saw_open_bracket = false; - _in_open_bracket = false; - _in_double_quote = false; - _in_single_quote = false; - setCladeLevel( 0 ); - newCurrentAnotation(); - setCurrentPhylogeny( null ); - setCurrentNode( null ); - _my_source_str = null; - _my_source_sbuff = null; - _my_source_sbuil = null; - _my_source_charary = null; - _my_source_br = null; - determineSourceType( _source ); - switch ( getInputType() ) { - case STRING: - _my_source_str = ( String ) getNhxSource(); - break; - case STRING_BUFFER: - _my_source_sbuff = ( StringBuffer ) getNhxSource(); - break; - case STRING_BUILDER: - _my_source_sbuil = ( StringBuilder ) getNhxSource(); - break; - case CHAR_ARRAY: - _my_source_charary = ( char[] ) getNhxSource(); - break; - case BUFFERED_READER: - if ( _my_source_br != null ) { - try { - _my_source_br.close(); - } - catch ( IOException e ) { - //do nothing - } - } - _my_source_br = ( BufferedReader ) getNhxSource(); - break; - default: - throw new RuntimeException( "unknown input type" ); - } - getNext(); - } - - public void setGuessRootedness( final boolean guess_rootedness ) { - _guess_rootedness = guess_rootedness; - } - - public void setIgnoreQuotes( final boolean ignore_quotes ) { - _ignore_quotes = ignore_quotes; - } - - public void setReplaceUnderscores( final boolean replace_underscores ) { - _replace_underscores = replace_underscores; - } - - /** - * This sets the source to be parsed. The source can be: String, - * StringBuffer, char[], File, or InputStream. The source can contain more - * than one phylogenies in either New Hamphshire (NH) or New Hamphshire - * Extended (NHX) format. There is no need to separate phylogenies with any - * special character. White space is always ignored, as are semicolons - * inbetween phylogenies. Example of a source describing two phylogenies - * (source is a String, in this example): "(A,(B,(C,(D,E)de)cde)bcde)abcde - * ((((A,B)ab,C)abc,D)abcd,E)abcde". Everything between a '[' followed by any - * character other than '&' and ']' is considered a comment and ignored - * (example: "[this is a comment]"). NHX tags are surrounded by '[&&NHX' and - * ']' (example: "[&&NHX:S=Varanus_storri]"). A sequence like "[& some - * info]" is ignored, too (at the PhylogenyNode level, though). - * Exception: numbers only between [ and ] (e.g. [90]) are interpreted as support values. - * - * @see #parse() - * @see org.forester.io.parsers.PhylogenyParser#setSource(java.lang.Object) - * @param nhx_source - * the source to be parsed (String, StringBuffer, char[], File, - * or InputStream) - * @throws NHXFormatException - * @throws IOException - * @throws PhylogenyParserException - */ - @Override - public void setSource( final Object nhx_source ) throws NHXFormatException, IOException { - _source = nhx_source; - reset(); - } - - private void determineSourceType( final Object nhx_source ) throws PhylogenyParserException, FileNotFoundException { - if ( nhx_source == null ) { - throw new PhylogenyParserException( getClass() + ": attempt to parse null object." ); - } - else if ( nhx_source instanceof String ) { - setInputType( NHXParser2.STRING ); - setSourceLength( ( ( String ) nhx_source ).length() ); - setNhxSource( nhx_source ); - } - else if ( nhx_source instanceof StringBuilder ) { - setInputType( NHXParser2.STRING_BUILDER ); - setSourceLength( ( ( StringBuilder ) nhx_source ).length() ); - setNhxSource( nhx_source ); - } - else if ( nhx_source instanceof StringBuffer ) { - setInputType( NHXParser2.STRING_BUFFER ); - setSourceLength( ( ( StringBuffer ) nhx_source ).length() ); - setNhxSource( nhx_source ); - } - else if ( nhx_source instanceof StringBuilder ) { - setInputType( NHXParser2.STRING_BUILDER ); - setSourceLength( ( ( StringBuilder ) nhx_source ).length() ); - setNhxSource( nhx_source ); - } - else if ( nhx_source instanceof char[] ) { - setInputType( NHXParser2.CHAR_ARRAY ); - setSourceLength( ( ( char[] ) nhx_source ).length ); - setNhxSource( nhx_source ); - } - else if ( nhx_source instanceof File ) { - setInputType( NHXParser2.BUFFERED_READER ); - setSourceLength( 0 ); - final File f = ( File ) nhx_source; - final String error = ForesterUtil.isReadableFile( f ); - if ( !ForesterUtil.isEmpty( error ) ) { - throw new PhylogenyParserException( error ); - } - setNhxSource( new BufferedReader( new FileReader( f ) ) ); - } - else if ( nhx_source instanceof InputStream ) { - setInputType( NHXParser2.BUFFERED_READER ); - setSourceLength( 0 ); - final InputStreamReader isr = new InputStreamReader( ( InputStream ) nhx_source ); - setNhxSource( new BufferedReader( isr ) ); - } - else { - throw new IllegalArgumentException( getClass() + " can only parse objects of type String," - + " StringBuffer, StringBuilder, char[], File," + " or InputStream " - + " [attempt to parse object of " + nhx_source.getClass() + "]." ); - } - } - - public void setTaxonomyExtraction( final TAXONOMY_EXTRACTION taxonomy_extraction ) { - _taxonomy_extraction = taxonomy_extraction; - } - - /** - * Decreases the clade level by one. - * - * @throws PhylogenyParserException - * if level goes below zero. - */ - private void decreaseCladeLevel() throws PhylogenyParserException { - if ( getCladeLevel() < 0 ) { - throw new PhylogenyParserException( "error in NH (Newick)/NHX formatted data: most likely cause: number of close parens is larger than number of open parens" ); - } - --_clade_level; - } - - private Phylogeny finishPhylogeny2() throws PhylogenyParserException, NHXFormatException, - PhyloXmlDataFormatException { - //setCladeLevel( 0 ); - if ( getCurrentPhylogeny() != null ) { - System.out.println( "fp: cp=" + getCurrentPhylogeny() ); - if ( getCurrentAnotation() != null ) { - System.out.println( "fp: ca=" + getCurrentAnotation().toString() ); - } - else { - System.out.println( "fp: ca=null" ); - } - parseNHX( getCurrentAnotation() != null ? getCurrentAnotation().toString() : "", getCurrentPhylogeny() - .getRoot(), getTaxonomyExtraction(), isReplaceUnderscores() ); - if ( GUESS_IF_SUPPORT_VALUES ) { - if ( isBranchLengthsLikeBootstrapValues( getCurrentPhylogeny() ) ) { - moveBranchLengthsToConfidenceValues( getCurrentPhylogeny() ); - } - } - if ( isGuessRootedness() ) { - final PhylogenyNode root = getCurrentPhylogeny().getRoot(); - if ( ( root.getDistanceToParent() >= 0.0 ) || !ForesterUtil.isEmpty( root.getName() ) - || !ForesterUtil.isEmpty( PhylogenyMethods.getSpecies( root ) ) || root.isHasAssignedEvent() ) { - getCurrentPhylogeny().setRooted( true ); - } - } - return getCurrentPhylogeny(); - } - return null; - } - - private Phylogeny finishSingleNodePhylogeny2() throws PhylogenyParserException, NHXFormatException, - PhyloXmlDataFormatException { - // setCladeLevel( 0 ); - final PhylogenyNode new_node = new PhylogenyNode(); - parseNHX( getCurrentAnotation().toString(), new_node, getTaxonomyExtraction(), isReplaceUnderscores() ); - setCurrentPhylogeny( new Phylogeny() ); - getCurrentPhylogeny().setRoot( new_node ); - return getCurrentPhylogeny(); - } - - private int getCladeLevel() { - return _clade_level; - } - - private StringBuilder getCurrentAnotation() { - return _current_anotation; - } - - private PhylogenyNode getCurrentNode() { - return _current_node; - } - - private Phylogeny getCurrentPhylogeny() { - return _current_phylogeny; - } - - private byte getInputType() { - return _input_type; - } - - private void getNext() throws IOException, NHXFormatException { - while ( true ) { - char c = '\b'; - if ( getInputType() == BUFFERED_READER ) { - final int ci = _my_source_br.read(); - if ( ci >= 0 ) { - c = ( char ) ci; - } - else { - break; - } - } - else { - if ( _i >= getSourceLength() ) { - break; - } - else { - switch ( getInputType() ) { - case STRING: - c = _my_source_str.charAt( _i ); - break; - case STRING_BUFFER: - c = _my_source_sbuff.charAt( _i ); - break; - case STRING_BUILDER: - c = _my_source_sbuil.charAt( _i ); - break; - case CHAR_ARRAY: - c = _my_source_charary[ _i ]; - break; - } - } - } - if ( !_in_single_quote && !_in_double_quote ) { - if ( c == ':' ) { - _saw_colon = true; - } - else if ( !( ( c < 33 ) || ( c > 126 ) ) && _saw_colon - && ( ( c != '[' ) && ( c != '.' ) && ( ( c < 48 ) || ( c > 57 ) ) ) ) { - _saw_colon = false; - } - if ( _in_open_bracket && ( c == ']' ) ) { - _in_open_bracket = false; - } - } - // \n\t is always ignored, - // as is " (34) and ' (39) (space is 32): - if ( ( isIgnoreQuotes() && ( ( c < 33 ) || ( c > 126 ) || ( c == 34 ) || ( c == 39 ) || ( ( getCladeLevel() == 0 ) && ( c == ';' ) ) ) ) - || ( !isIgnoreQuotes() && ( ( c < 32 ) || ( c > 126 ) || ( ( getCladeLevel() == 0 ) && ( c == ';' ) ) ) ) ) { - // Do nothing. - } - else if ( ( c == 32 ) && ( !_in_single_quote && !_in_double_quote ) ) { - // Do nothing. - } - else if ( _in_comment ) { - if ( c == ']' ) { - _in_comment = false; - } - } - else if ( _in_double_quote ) { - if ( c == '"' ) { - _in_double_quote = false; - } - else { - getCurrentAnotation().append( c ); - } - } - else if ( c == '"' ) { - _in_double_quote = true; - } - else if ( _in_single_quote ) { - if ( c == 39 ) { - _in_single_quote = false; - } - else { - getCurrentAnotation().append( c ); - } - } - else if ( c == 39 ) { - _in_single_quote = true; - } - else if ( c == '[' ) { - _saw_open_bracket = true; - _in_open_bracket = true; - } - else if ( _saw_open_bracket ) { - if ( c != ']' ) { - // everything not starting with "[&" is considered a comment - // unless ":digits and/or . [bootstrap]": - if ( c == '&' ) { - getCurrentAnotation().append( "[&" ); - } - else if ( _saw_colon ) { - getCurrentAnotation().append( "[" + c ); - } - else { - _in_comment = true; - } - } - // comment consisting just of "[]": - _saw_open_bracket = false; - } - else if ( ( c == '(' ) && !_in_open_bracket ) { - final Phylogeny phy = processOpenParen2(); - if ( phy != null ) { - ++_i; - // return phy; - _next = phy; - return; - } - } - else if ( ( c == ')' ) && !_in_open_bracket ) { - processCloseParen(); - } - else if ( ( c == ',' ) && !_in_open_bracket ) { - processComma(); - } - else { - getCurrentAnotation().append( c ); - } - ++_i; - } // while ( true ) - System.out.println( "done with loop" ); - if ( getCurrentPhylogeny() == null ) { - System.out.println( "... but is null" ); - } - if ( getCladeLevel() != 0 ) { - throw new PhylogenyParserException( "error in NH (Newick) formatted data: most likely cause: number of open parens does not equal number of close parens" ); - } - if ( getCurrentPhylogeny() != null ) { - System.out.println( "... and current=" + getCurrentPhylogeny() ); - _next = finishPhylogeny2(); - System.out.println( "... _next=" + _next ); - setCurrentPhylogeny( null ); - setCurrentAnotation( null ); - //return finishPhylogeny2(); - } - else if ( ( getCurrentAnotation() != null ) && ( getCurrentAnotation().length() > 0 ) ) { - System.out.println( "1node=" + getCurrentAnotation() ); - _next = finishSingleNodePhylogeny2(); - setCurrentAnotation( null ); - //return finishSingleNodePhylogeny2(); - } - else { - _next = null; - //return null; - } - } - - private Object getNhxSource() { - return _nhx_source; - } - - private int getSourceLength() { - return _source_length; - } - - private void increaseCladeLevel() { - ++_clade_level; - } - - private void init() { - setTaxonomyExtraction( TAXONOMY_EXTRACTION_DEFAULT ); - setReplaceUnderscores( REPLACE_UNDERSCORES_DEFAULT ); - setGuessRootedness( GUESS_ROOTEDNESS_DEFAULT ); - setIgnoreQuotes( IGNORE_QUOTES_DEFAULT ); - } - - private boolean isGuessRootedness() { - return _guess_rootedness; - } - - private boolean isIgnoreQuotes() { - return _ignore_quotes; - } - - private boolean isReplaceUnderscores() { - return _replace_underscores; - } - - private boolean isSawClosingParen() { - return _saw_closing_paren; - } - - /** - * Replaces the current annotation with a new StringBuffer. - */ - private void newCurrentAnotation() { - setCurrentAnotation( new StringBuilder() ); - } - - /** - * Called if a closing paren is encountered. - * - * @throws PhylogenyParserException - * @throws NHXFormatException - * @throws PhyloXmlDataFormatException - */ - private void processCloseParen() throws PhylogenyParserException, NHXFormatException, PhyloXmlDataFormatException { - decreaseCladeLevel(); - if ( !isSawClosingParen() ) { - final PhylogenyNode new_node = new PhylogenyNode(); - parseNHX( getCurrentAnotation().toString(), new_node, getTaxonomyExtraction(), isReplaceUnderscores() ); - newCurrentAnotation(); - getCurrentNode().addAsChild( new_node ); - } - else { - parseNHX( getCurrentAnotation().toString(), - getCurrentNode().getLastChildNode(), - getTaxonomyExtraction(), - isReplaceUnderscores() ); - newCurrentAnotation(); - } - if ( !getCurrentNode().isRoot() ) { - setCurrentNode( getCurrentNode().getParent() ); - } - setSawClosingParen( true ); - } - - /** - * Called if a comma is encountered. - * - * @throws PhylogenyParserException - * @throws NHXFormatException - * @throws PhyloXmlDataFormatException - */ - private void processComma() throws PhylogenyParserException, NHXFormatException, PhyloXmlDataFormatException { - if ( !isSawClosingParen() ) { - final PhylogenyNode new_node = new PhylogenyNode(); - parseNHX( getCurrentAnotation().toString(), new_node, getTaxonomyExtraction(), isReplaceUnderscores() ); - if ( getCurrentNode() == null ) { - throw new NHXFormatException( "format might not be NH or NHX" ); - } - getCurrentNode().addAsChild( new_node ); - } - else { - parseNHX( getCurrentAnotation().toString(), - getCurrentNode().getLastChildNode(), - getTaxonomyExtraction(), - isReplaceUnderscores() ); - } - newCurrentAnotation(); - setSawClosingParen( false ); - } - - private Phylogeny processOpenParen2() throws PhylogenyParserException, NHXFormatException, - PhyloXmlDataFormatException { - Phylogeny phy = null; - final PhylogenyNode new_node = new PhylogenyNode(); - System.out.println( "level=" + getCladeLevel() ); - if ( getCladeLevel() == 0 ) { - if ( getCurrentPhylogeny() != null ) { - phy = finishPhylogeny2(); - } - setCladeLevel( 1 ); - newCurrentAnotation(); - setCurrentPhylogeny( new Phylogeny() ); - getCurrentPhylogeny().setRoot( new_node ); - } - else { - increaseCladeLevel(); - getCurrentNode().addAsChild( new_node ); - } - setCurrentNode( new_node ); - setSawClosingParen( false ); - if ( phy != null ) { - System.out.println( "processOpenParen2 returns " + phy.toString() ); - } - else { - System.out.println( "processOpenParen2 returns null" ); - } - return phy; - } - - private void setCladeLevel( final int clade_level ) { - if ( clade_level < 0 ) { - throw new IllegalArgumentException( "attempt to set clade level to a number smaller than zero" ); - } - _clade_level = clade_level; - } - - private void setCurrentAnotation( final StringBuilder current_anotation ) { - _current_anotation = current_anotation; - } - - private void setCurrentNode( final PhylogenyNode current_node ) { - _current_node = current_node; - } - - private void setCurrentPhylogeny( final Phylogeny current_phylogeny ) { - _current_phylogeny = current_phylogeny; - } - - private void setInputType( final byte input_type ) { - _input_type = input_type; - } - - private void setNhxSource( final Object nhx_source ) { - _nhx_source = nhx_source; - } - - private void setSawClosingParen( final boolean saw_closing_paren ) { - _saw_closing_paren = saw_closing_paren; - } - - private void setSourceLength( final int source_length ) { - _source_length = source_length; - } - - public static void parseNHX( String s, - final PhylogenyNode node_to_annotate, - final TAXONOMY_EXTRACTION taxonomy_extraction, - final boolean replace_underscores ) throws NHXFormatException, - PhyloXmlDataFormatException { - if ( ( taxonomy_extraction != TAXONOMY_EXTRACTION.NO ) && replace_underscores ) { - throw new IllegalArgumentException( "cannot extract taxonomies and replace under scores at the same time" ); - } - if ( ( s != null ) && ( s.length() > 0 ) ) { - if ( replace_underscores ) { - s = s.replaceAll( "_+", " " ); - } - boolean is_nhx = false; - final int ob = s.indexOf( "[" ); - if ( ob > -1 ) { - String b = ""; - is_nhx = true; - final int cb = s.indexOf( "]" ); - if ( cb < 0 ) { - throw new NHXFormatException( "error in NHX formatted data: no closing \"]\" in \"" + s + "\"" ); - } - if ( s.indexOf( "&&NHX" ) == ( ob + 1 ) ) { - b = s.substring( ob + 6, cb ); - } - else { - // No &&NHX and digits only: is likely to be a support value. - final String bracketed = s.substring( ob + 1, cb ); - final Matcher numbers_only = NUMBERS_ONLY_PATTERN.matcher( bracketed ); - if ( numbers_only.matches() ) { - b = ":" + NHXtags.SUPPORT + bracketed; - } - else if ( s.indexOf( "prob=" ) > -1 ) { - processMrBayes3Data( s, node_to_annotate ); - } - } - s = s.substring( 0, ob ) + b; - if ( ( s.indexOf( "[" ) > -1 ) || ( s.indexOf( "]" ) > -1 ) ) { - throw new NHXFormatException( "error in NHX formatted data: more than one \"]\" or \"[\"" ); - } - } - final StringTokenizer t = new StringTokenizer( s, ":" ); - if ( t.countTokens() > 0 ) { - if ( !s.startsWith( ":" ) ) { - node_to_annotate.setName( t.nextToken() ); - if ( !replace_underscores && ( !is_nhx && ( taxonomy_extraction != TAXONOMY_EXTRACTION.NO ) ) ) { - ParserUtils.extractTaxonomyDataFromNodeName( node_to_annotate, taxonomy_extraction ); - } - } - while ( t.hasMoreTokens() ) { - s = t.nextToken(); - if ( s.startsWith( org.forester.io.parsers.nhx.NHXtags.SPECIES_NAME ) ) { - if ( !node_to_annotate.getNodeData().isHasTaxonomy() ) { - node_to_annotate.getNodeData().setTaxonomy( new Taxonomy() ); - } - node_to_annotate.getNodeData().getTaxonomy().setScientificName( s.substring( 2 ) ); - } - else if ( s.startsWith( org.forester.io.parsers.nhx.NHXtags.IS_DUPLICATION ) ) { - if ( ( s.charAt( 2 ) == 'Y' ) || ( s.charAt( 2 ) == 'T' ) ) { - node_to_annotate.getNodeData().setEvent( Event.createSingleDuplicationEvent() ); - } - else if ( ( s.charAt( 2 ) == 'N' ) || ( s.charAt( 2 ) == 'F' ) ) { - node_to_annotate.getNodeData().setEvent( Event.createSingleSpeciationEvent() ); - } - else if ( s.charAt( 2 ) == '?' ) { - node_to_annotate.getNodeData().setEvent( Event.createSingleSpeciationOrDuplicationEvent() ); - } - else { - throw new NHXFormatException( "error in NHX formatted data: :D=Y or :D=N or :D=?" ); - } - } - else if ( s.startsWith( NHXtags.SUPPORT ) ) { - PhylogenyMethods.setConfidence( node_to_annotate, doubleValue( s.substring( 2 ) ) ); - } - else if ( s.startsWith( NHXtags.TAXONOMY_ID ) ) { - if ( !node_to_annotate.getNodeData().isHasTaxonomy() ) { - node_to_annotate.getNodeData().setTaxonomy( new Taxonomy() ); - } - node_to_annotate.getNodeData().getTaxonomy().setIdentifier( new Identifier( s.substring( 2 ) ) ); - } - else if ( s.startsWith( NHXtags.PARENT_BRANCH_WIDTH ) ) { - PhylogenyMethods.setBranchWidthValue( node_to_annotate, Integer.parseInt( s.substring( 2 ) ) ); - } - else if ( s.startsWith( NHXtags.COLOR ) ) { - final Color c = NHXParser2.stringToColor( s.substring( 2 ) ); - if ( c != null ) { - PhylogenyMethods.setBranchColorValue( node_to_annotate, c ); - } - } - else if ( s.startsWith( NHXtags.DOMAIN_STRUCTURE ) ) { - if ( !node_to_annotate.getNodeData().isHasSequence() ) { - node_to_annotate.getNodeData().setSequence( new Sequence() ); - } - node_to_annotate.getNodeData().getSequence() - .setDomainArchitecture( new DomainArchitecture( s.substring( 3 ) ) ); - } - else if ( s.startsWith( NHXtags.SEQUENCE_ACCESSION ) ) { - if ( !node_to_annotate.getNodeData().isHasSequence() ) { - node_to_annotate.getNodeData().setSequence( new Sequence() ); - } - node_to_annotate.getNodeData().getSequence() - .setAccession( new Accession( s.substring( 3 ), "?" ) ); - } - else if ( s.startsWith( NHXtags.GENE_NAME ) ) { - if ( !node_to_annotate.getNodeData().isHasSequence() ) { - node_to_annotate.getNodeData().setSequence( new Sequence() ); - } - node_to_annotate.getNodeData().getSequence().setName( s.substring( 3 ) ); - } - else if ( s.indexOf( '=' ) < 0 ) { - if ( node_to_annotate.getDistanceToParent() != PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ) { - throw new NHXFormatException( "error in NHX formatted data: more than one distance to parent:" - + "\"" + s + "\"" ); - } - node_to_annotate.setDistanceToParent( doubleValue( s ) ); - } - } // while ( t.hasMoreTokens() ) - } - } - } - - private static double doubleValue( final String str ) throws NHXFormatException { - try { - return Double.valueOf( str ).doubleValue(); - } - catch ( final NumberFormatException ex ) { - throw new NHXFormatException( "error in NH/NHX formatted data: failed to parse number from " + "\"" + str - + "\"" ); - } - } - - private static boolean isBranchLengthsLikeBootstrapValues( final Phylogeny p ) { - final PhylogenyNodeIterator it = p.iteratorExternalForward(); - final double d0 = it.next().getDistanceToParent(); - if ( ( d0 < 10 ) || !it.hasNext() ) { - return false; - } - while ( it.hasNext() ) { - final double d = it.next().getDistanceToParent(); - if ( ( d != d0 ) || ( d < 10 ) ) { - return false; - } - } - return true; - } - - private static void moveBranchLengthsToConfidenceValues( final Phylogeny p ) { - final PhylogenyNodeIterator it = p.iteratorPostorder(); - while ( it.hasNext() ) { - final PhylogenyNode n = it.next(); - PhylogenyMethods.setBootstrapConfidence( n, n.getDistanceToParent() ); - n.setDistanceToParent( PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ); - } - } - - private static void processMrBayes3Data( final String s, final PhylogenyNode node_to_annotate ) - throws NHXFormatException { - double sd = -1; - final Matcher mb_prob_sd_matcher = MB_PROB_SD_PATTERN.matcher( s ); - if ( mb_prob_sd_matcher.find() ) { - try { - sd = Double.parseDouble( mb_prob_sd_matcher.group( 1 ) ); - } - catch ( final NumberFormatException e ) { - throw new NHXFormatException( "failed to parse probability standard deviation (Mr Bayes output) from \"" - + s + "\"" ); - } - } - final Matcher mb_prob_matcher = MB_PROB_PATTERN.matcher( s ); - if ( mb_prob_matcher.find() ) { - double prob = -1; - try { - prob = Double.parseDouble( mb_prob_matcher.group( 1 ) ); - } - catch ( final NumberFormatException e ) { - throw new NHXFormatException( "failed to parse probability (Mr Bayes output) from \"" + s + "\"" ); - } - if ( prob >= 0.0 ) { - if ( sd >= 0.0 ) { - node_to_annotate.getBranchData() - .addConfidence( new Confidence( prob, "posterior probability", sd ) ); - } - else { - node_to_annotate.getBranchData().addConfidence( new Confidence( prob, "posterior probability" ) ); - } - } - } - final Matcher mb_bl_matcher = MB_BL_PATTERN.matcher( s ); - if ( mb_bl_matcher.find() ) { - double bl = -1; - try { - bl = Double.parseDouble( mb_bl_matcher.group( 1 ) ); - } - catch ( final NumberFormatException e ) { - throw new NHXFormatException( "failed to parse median branch length (Mr Bayes output) from \"" + s - + "\"" ); - } - if ( bl >= 0.0 ) { - node_to_annotate.setDistanceToParent( bl ); - } - } - } - - /** - * Parses String s in the format r.g.b (e.g. "12.34.234" ) into red, green, - * and blue and returns the corresponding Color. - */ - private static Color stringToColor( final String s ) { - final StringTokenizer st = new StringTokenizer( s, "." ); - if ( st.countTokens() != 3 ) { - throw new IllegalArgumentException( "illegal format for color: " + s ); - } - final int red = ForesterUtil.limitRangeForColor( Integer.parseInt( st.nextToken() ) ); - final int green = ForesterUtil.limitRangeForColor( Integer.parseInt( st.nextToken() ) ); - final int blu = ForesterUtil.limitRangeForColor( Integer.parseInt( st.nextToken() ) ); - return new Color( red, green, blu ); - } -} diff --git a/forester/java/src/org/forester/io/parsers/nhx/NHXtags.java b/forester/java/src/org/forester/io/parsers/nhx/NHXtags.java index ca8e386..7cf98e6 100644 --- a/forester/java/src/org/forester/io/parsers/nhx/NHXtags.java +++ b/forester/java/src/org/forester/io/parsers/nhx/NHXtags.java @@ -27,14 +27,11 @@ package org.forester.io.parsers.nhx; public final class NHXtags { - public static final String COLOR = "C="; - public static final String PARENT_BRANCH_WIDTH = "W="; - public static final String TAXONOMY_ID = "T="; - public static final String SUPPORT = "B="; - public static final String IS_DUPLICATION = "D="; - public static final String SPECIES_NAME = "S="; - public static final String DOMAIN_STRUCTURE = "DS="; - public static final String GENE_NAME = "GN="; - public static final String SEQUENCE_ACCESSION = "AC="; - public static final Object BRANCH_WIDTH = "W="; + public static final String TAXONOMY_ID = "T="; + public static final String SUPPORT = "B="; + public static final String IS_DUPLICATION = "D="; + public static final String SPECIES_NAME = "S="; + public static final String DOMAIN_STRUCTURE = "DS="; + public static final String GENE_NAME = "GN="; + public static final String SEQUENCE_ACCESSION = "AC="; } diff --git a/forester/java/src/org/forester/phylogeny/data/BranchColor.java b/forester/java/src/org/forester/phylogeny/data/BranchColor.java index 50d7054..e3ceaf0 100644 --- a/forester/java/src/org/forester/phylogeny/data/BranchColor.java +++ b/forester/java/src/org/forester/phylogeny/data/BranchColor.java @@ -29,7 +29,6 @@ import java.awt.Color; import java.io.IOException; import java.io.Writer; -import org.forester.io.parsers.nhx.NHXtags; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.util.ForesterUtil; @@ -81,14 +80,7 @@ public class BranchColor implements PhylogenyData { @Override public StringBuffer toNHX() { - final StringBuffer sb = new StringBuffer(); - sb.append( NHXtags.COLOR ); - sb.append( getValue().getRed() ); - sb.append( "." ); - sb.append( getValue().getGreen() ); - sb.append( "." ); - sb.append( getValue().getBlue() ); - return sb; + throw new UnsupportedOperationException(); } @Override diff --git a/forester/java/src/org/forester/phylogeny/data/BranchData.java b/forester/java/src/org/forester/phylogeny/data/BranchData.java index 68f8e2c..51d895d 100644 --- a/forester/java/src/org/forester/phylogeny/data/BranchData.java +++ b/forester/java/src/org/forester/phylogeny/data/BranchData.java @@ -128,14 +128,6 @@ public class BranchData implements PhylogenyData { sb.append( ":" ); sb.append( getConfidence( 0 ).toNHX() ); } - if ( isHasBranchWidth() && ( getBranchWidth().getValue() != BranchWidth.BRANCH_WIDTH_DEFAULT_VALUE ) ) { - sb.append( ":" ); - sb.append( getBranchWidth().toNHX() ); - } - if ( isHasBranchColor() && ( getBranchColor().getValue() != null ) ) { - sb.append( ":" ); - sb.append( getBranchColor().toNHX() ); - } return sb; } diff --git a/forester/java/src/org/forester/phylogeny/data/BranchWidth.java b/forester/java/src/org/forester/phylogeny/data/BranchWidth.java index cc7d853..4e7f0bb 100644 --- a/forester/java/src/org/forester/phylogeny/data/BranchWidth.java +++ b/forester/java/src/org/forester/phylogeny/data/BranchWidth.java @@ -28,7 +28,6 @@ package org.forester.phylogeny.data; import java.io.IOException; import java.io.Writer; -import org.forester.io.parsers.nhx.NHXtags; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.util.ForesterUtil; @@ -71,10 +70,7 @@ public class BranchWidth implements PhylogenyData { @Override public StringBuffer toNHX() { - final StringBuffer sb = new StringBuffer(); - sb.append( NHXtags.PARENT_BRANCH_WIDTH ); - sb.append( getValue() ); - return sb; + throw new UnsupportedOperationException(); } @Override diff --git a/forester/java/src/org/forester/phylogeny/data/NodeData.java b/forester/java/src/org/forester/phylogeny/data/NodeData.java index d845897..fd8e460 100644 --- a/forester/java/src/org/forester/phylogeny/data/NodeData.java +++ b/forester/java/src/org/forester/phylogeny/data/NodeData.java @@ -449,9 +449,6 @@ public class NodeData implements PhylogenyData { if ( isHasEvent() ) { sb.append( getEvent().toNHX() ); } - if ( isHasProperties() ) { - sb.append( getProperties().toNHX() ); - } return sb; } diff --git a/forester/java/src/org/forester/phylogeny/data/PropertiesMap.java b/forester/java/src/org/forester/phylogeny/data/PropertiesMap.java index 0f47f27..4b404be 100644 --- a/forester/java/src/org/forester/phylogeny/data/PropertiesMap.java +++ b/forester/java/src/org/forester/phylogeny/data/PropertiesMap.java @@ -180,13 +180,7 @@ public class PropertiesMap implements PhylogenyData { @Override public StringBuffer toNHX() { - final StringBuffer sb = new StringBuffer(); - if ( getProperties() != null ) { - for( final String ref : getProperties().keySet() ) { - sb.append( getProperties().get( ref ).toNHX() ); - } - } - return sb; + throw new UnsupportedOperationException(); } @Override diff --git a/forester/java/src/org/forester/phylogeny/data/Property.java b/forester/java/src/org/forester/phylogeny/data/Property.java index b7b7c17..a1fba96 100644 --- a/forester/java/src/org/forester/phylogeny/data/Property.java +++ b/forester/java/src/org/forester/phylogeny/data/Property.java @@ -155,53 +155,7 @@ public class Property implements PhylogenyData { @Override public StringBuffer toNHX() { - final StringBuffer nhx = new StringBuffer(); - nhx.append( ":X" ); - switch ( getAppliesTo() ) { - case CLADE: - nhx.append( "C=" ); - break; - case NODE: - nhx.append( "N=" ); - break; - case PARENT_BRANCH: - nhx.append( "B=" ); - break; - case PHYLOGENY: - nhx.append( "P=" ); - break; - case ANNOTATION: - nhx.append( "S=" ); - break; - default: - nhx.append( "O=" ); - break; - } - if ( !getDataType().equals( "" ) ) { - if ( getDataType().equals( "xsd:string" ) ) { - nhx.append( "S=" ); - } - else if ( getDataType().equals( "xsd:long" ) ) { - nhx.append( "L=" ); - } - else if ( getDataType().equals( "xsd:decimal" ) ) { - nhx.append( "D=" ); - } - else if ( getDataType().equals( "xsd:boolean" ) ) { - nhx.append( "B=" ); - } - else if ( getDataType().equals( "xsd:anyUR" ) ) { - nhx.append( "U=" ); - } - } - nhx.append( getRef() ); - nhx.append( "=" ); - nhx.append( getValue() ); - if ( !getUnit().equals( "" ) ) { - nhx.append( "=" ); - nhx.append( getUnit() ); - } - return nhx; + throw new UnsupportedOperationException(); } @Override diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index 6a1744d..b74c3e1 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -52,7 +52,6 @@ import org.forester.io.parsers.nexus.NexusCharactersParser; import org.forester.io.parsers.nexus.NexusPhylogeniesParser; import org.forester.io.parsers.nhx.NHXParser; import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION; -import org.forester.io.parsers.nhx.NHXParser2; import org.forester.io.parsers.phyloxml.PhyloXmlParser; import org.forester.io.parsers.tol.TolParser; import org.forester.io.parsers.util.ParserUtils; @@ -244,7 +243,6 @@ public final class Test { System.out.println( "failed." ); failed++; } - System.exit( 0 ); System.out.print( "Conversion to NHX (node level): " ); if ( Test.testNHXconversion() ) { System.out.println( "OK." ); @@ -1912,7 +1910,7 @@ public final class Test { if ( t8.getHeight() != 15 ) { return false; } - final char[] a9 = new char[] {}; + final char[] a9 = new char[] { 'a' }; final Phylogeny t9 = factory.create( a9, new NHXParser() )[ 0 ]; if ( t9.getHeight() != 0 ) { return false; @@ -4569,7 +4567,7 @@ public final class Test { private static boolean testNHParsing() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final Phylogeny p1 = factory.create( "(A,B1)", new NHXParser2() )[ 0 ]; + final Phylogeny p1 = factory.create( "(A,B1)", new NHXParser() )[ 0 ]; if ( !p1.toNewHampshireX().equals( "(A,B1)" ) ) { return false; } @@ -4592,20 +4590,20 @@ public final class Test { if ( !p1b.toNewHampshire().equals( "(';A;',';B;1;');" ) ) { return false; } - final Phylogeny p2 = factory.create( new StringBuffer( "(A,B2)" ), new NHXParser2() )[ 0 ]; - final Phylogeny p3 = factory.create( new char[] { '(', 'A', ',', 'B', '3', ')' }, new NHXParser2() )[ 0 ]; - final Phylogeny p4 = factory.create( "(A,B4);", new NHXParser2() )[ 0 ]; - final Phylogeny p5 = factory.create( new StringBuffer( "(A,B5);" ), new NHXParser2() )[ 0 ]; - final Phylogeny[] p7 = factory.create( "(A,B7);(C,D7)", new NHXParser2() ); - final Phylogeny[] p8 = factory.create( "(A,B8) (C,D8)", new NHXParser2() ); - final Phylogeny[] p9 = factory.create( "(A,B9)\n(C,D9)", new NHXParser2() ); - final Phylogeny[] p10 = factory.create( "(A,B10);(C,D10);", new NHXParser2() ); - final Phylogeny[] p11 = factory.create( "(A,B11);(C,D11) (E,F11)\t(G,H11)", new NHXParser2() ); - final Phylogeny[] p12 = factory.create( "(A,B12) (C,D12) (E,F12) (G,H12)", new NHXParser2() ); + final Phylogeny p2 = factory.create( new StringBuffer( "(A,B2)" ), new NHXParser() )[ 0 ]; + final Phylogeny p3 = factory.create( new char[] { '(', 'A', ',', 'B', '3', ')' }, new NHXParser() )[ 0 ]; + final Phylogeny p4 = factory.create( "(A,B4);", new NHXParser() )[ 0 ]; + final Phylogeny p5 = factory.create( new StringBuffer( "(A,B5);" ), new NHXParser() )[ 0 ]; + final Phylogeny[] p7 = factory.create( "(A,B7);(C,D7)", new NHXParser() ); + final Phylogeny[] p8 = factory.create( "(A,B8) (C,D8)", new NHXParser() ); + final Phylogeny[] p9 = factory.create( "(A,B9)\n(C,D9)", new NHXParser() ); + final Phylogeny[] p10 = factory.create( "(A,B10);(C,D10);", new NHXParser() ); + final Phylogeny[] p11 = factory.create( "(A,B11);(C,D11) (E,F11)\t(G,H11)", new NHXParser() ); + final Phylogeny[] p12 = factory.create( "(A,B12) (C,D12) (E,F12) (G,H12)", new NHXParser() ); final Phylogeny[] p13 = factory.create( " ; (;A; , ; B ; 1 3 ; \n)\t ( \n ;" + " C ; ,; D;13;);;;;;;(;E;,;F;13 ;) ; " + "; ; ( \t\n\r\b; G ;, ;H ;1 3; ) ; ; ;", - new NHXParser2() ); + new NHXParser() ); if ( !p13[ 0 ].toNewHampshireX().equals( "(';A;',';B;13;')" ) ) { return false; } @@ -4618,10 +4616,10 @@ public final class Test { if ( !p13[ 3 ].toNewHampshireX().equals( "(';G;',';H;13;')" ) ) { return false; } - final Phylogeny[] p14 = factory.create( "(A,B14)ab", new NHXParser2() ); - final Phylogeny[] p15 = factory.create( "(A,B15)ab;", new NHXParser2() ); + final Phylogeny[] p14 = factory.create( "(A,B14)ab", new NHXParser() ); + final Phylogeny[] p15 = factory.create( "(A,B15)ab;", new NHXParser() ); final String p16_S = "((A,B),C)"; - final Phylogeny[] p16 = factory.create( p16_S, new NHXParser2() ); + final Phylogeny[] p16 = factory.create( p16_S, new NHXParser() ); if ( p16.length != 1 ) { return false; } @@ -4629,7 +4627,7 @@ public final class Test { return false; } final String p17_S = "(C,(A,B))"; - final Phylogeny[] p17 = factory.create( p17_S, new NHXParser2() ); + final Phylogeny[] p17 = factory.create( p17_S, new NHXParser() ); if ( p17.length != 1 ) { return false; } @@ -4637,7 +4635,7 @@ public final class Test { return false; } final String p18_S = "((A,B),(C,D))"; - final Phylogeny[] p18 = factory.create( p18_S, new NHXParser2() ); + final Phylogeny[] p18 = factory.create( p18_S, new NHXParser() ); if ( p18.length != 1 ) { return false; } @@ -4645,7 +4643,7 @@ public final class Test { return false; } final String p19_S = "(((A,B),C),D)"; - final Phylogeny[] p19 = factory.create( p19_S, new NHXParser2() ); + final Phylogeny[] p19 = factory.create( p19_S, new NHXParser() ); if ( p19.length != 1 ) { return false; } @@ -4653,7 +4651,7 @@ public final class Test { return false; } final String p20_S = "(A,(B,(C,D)))"; - final Phylogeny[] p20 = factory.create( p20_S, new NHXParser2() ); + final Phylogeny[] p20 = factory.create( p20_S, new NHXParser() ); if ( p20.length != 1 ) { return false; } @@ -4661,7 +4659,7 @@ public final class Test { return false; } final String p21_S = "(A,(B,(C,(D,E))))"; - final Phylogeny[] p21 = factory.create( p21_S, new NHXParser2() ); + final Phylogeny[] p21 = factory.create( p21_S, new NHXParser() ); if ( p21.length != 1 ) { return false; } @@ -4669,7 +4667,7 @@ public final class Test { return false; } final String p22_S = "((((A,B),C),D),E)"; - final Phylogeny[] p22 = factory.create( p22_S, new NHXParser2() ); + final Phylogeny[] p22 = factory.create( p22_S, new NHXParser() ); if ( p22.length != 1 ) { return false; } @@ -4677,7 +4675,7 @@ public final class Test { return false; } final String p23_S = "(A,(B,(C,(D,E)de)cde)bcde)abcde"; - final Phylogeny[] p23 = factory.create( p23_S, new NHXParser2() ); + final Phylogeny[] p23 = factory.create( p23_S, new NHXParser() ); if ( p23.length != 1 ) { System.out.println( "xl=" + p23.length ); System.exit( -1 ); @@ -4687,7 +4685,7 @@ public final class Test { return false; } final String p24_S = "((((A,B)ab,C)abc,D)abcd,E)abcde"; - final Phylogeny[] p24 = factory.create( p24_S, new NHXParser2() ); + final Phylogeny[] p24 = factory.create( p24_S, new NHXParser() ); if ( p24.length != 1 ) { return false; } @@ -4696,7 +4694,7 @@ public final class Test { } final String p241_S1 = "(A,(B,(C,(D,E)de)cde)bcde)abcde"; final String p241_S2 = "((((A,B)ab,C)abc,D)abcd,E)abcde"; - final Phylogeny[] p241 = factory.create( p241_S1 + p241_S2, new NHXParser2() ); + final Phylogeny[] p241 = factory.create( p241_S1 + p241_S2, new NHXParser() ); if ( p241.length != 2 ) { return false; } @@ -4714,17 +4712,17 @@ public final class Test { + "E)abcde)abcd,E)abcde,((((A,B)ab,C)abc,D)abcd,E)abcde)" + "ab,C)abc,((((A,B)ab,C)abc,D)abcd,E)abcde)abcd,E)abcde" + ")ab,C)abc,D)abcd,E)abcde)ab,C)abc,((((A,B)ab,C)abc,D)" + "abcd,E)abcde)abcd,E)abcde"; - final Phylogeny[] p25 = factory.create( p25_S, new NHXParser2() ); + final Phylogeny[] p25 = factory.create( p25_S, new NHXParser() ); if ( !p25[ 0 ].toNewHampshireX().equals( p25_S ) ) { return false; } final String p26_S = "(A,B)ab"; - final Phylogeny[] p26 = factory.create( p26_S, new NHXParser2() ); + final Phylogeny[] p26 = factory.create( p26_S, new NHXParser() ); if ( !p26[ 0 ].toNewHampshireX().equals( p26_S ) ) { return false; } final String p27_S = "((((A,B)ab,C)abc,D)abcd,E)abcde"; - final Phylogeny[] p27s = factory.create( p27_S, new NHXParser2() ); + final Phylogeny[] p27s = factory.create( p27_S, new NHXParser() ); if ( p27s.length != 1 ) { System.out.println( "xxl=" + p27s.length ); System.exit( -1 ); @@ -4736,7 +4734,7 @@ public final class Test { return false; } final Phylogeny[] p27 = factory.create( new File( Test.PATH_TO_TEST_DATA + "phylogeny27.nhx" ), - new NHXParser2() ); + new NHXParser() ); if ( p27.length != 1 ) { System.out.println( "yl=" + p27.length ); System.exit( -1 ); @@ -4752,7 +4750,7 @@ public final class Test { final String p28_S3 = "(A,B)ab"; final String p28_S4 = "((((A,B),C),D),;E;)"; final Phylogeny[] p28 = factory.create( new File( Test.PATH_TO_TEST_DATA + "phylogeny28.nhx" ), - new NHXParser2() ); + new NHXParser() ); if ( !p28[ 0 ].toNewHampshireX().equals( p28_S1 ) ) { return false; } @@ -4769,105 +4767,105 @@ public final class Test { return false; } final String p29_S = "((((A:0.01,B:0.684)ab:0.345,C:0.3451)abc:0.3451,D:1.5)abcd:0.134,E:0.32)abcde:0.1345"; - final Phylogeny[] p29 = factory.create( p29_S, new NHXParser2() ); + final Phylogeny[] p29 = factory.create( p29_S, new NHXParser() ); if ( !p29[ 0 ].toNewHampshireX().equals( p29_S ) ) { return false; } final String p30_S = "((((A:0.01,B:0.02):0.93,C:0.04):0.05,D:1.4):0.06,E):0.72"; - final Phylogeny[] p30 = factory.create( p30_S, new NHXParser2() ); + final Phylogeny[] p30 = factory.create( p30_S, new NHXParser() ); if ( !p30[ 0 ].toNewHampshireX().equals( p30_S ) ) { return false; } final String p32_S = " ; ; \n \t \b \f \r ;;;;;; "; - final Phylogeny[] p32 = factory.create( p32_S, new NHXParser2() ); + final Phylogeny[] p32 = factory.create( p32_S, new NHXParser() ); if ( ( p32.length != 0 ) ) { return false; } final String p33_S = "A"; - final Phylogeny[] p33 = factory.create( p33_S, new NHXParser2() ); + final Phylogeny[] p33 = factory.create( p33_S, new NHXParser() ); if ( !p33[ 0 ].toNewHampshireX().equals( p33_S ) ) { return false; } final String p34_S = "B;"; - final Phylogeny[] p34 = factory.create( p34_S, new NHXParser2() ); + final Phylogeny[] p34 = factory.create( p34_S, new NHXParser() ); if ( !p34[ 0 ].toNewHampshireX().equals( "B" ) ) { return false; } final String p35_S = "B:0.2"; - final Phylogeny[] p35 = factory.create( p35_S, new NHXParser2() ); + final Phylogeny[] p35 = factory.create( p35_S, new NHXParser() ); if ( !p35[ 0 ].toNewHampshireX().equals( p35_S ) ) { return false; } final String p36_S = "(A)"; - final Phylogeny[] p36 = factory.create( p36_S, new NHXParser2() ); + final Phylogeny[] p36 = factory.create( p36_S, new NHXParser() ); if ( !p36[ 0 ].toNewHampshireX().equals( p36_S ) ) { return false; } final String p37_S = "((A))"; - final Phylogeny[] p37 = factory.create( p37_S, new NHXParser2() ); + final Phylogeny[] p37 = factory.create( p37_S, new NHXParser() ); if ( !p37[ 0 ].toNewHampshireX().equals( p37_S ) ) { return false; } final String p38_S = "(((((((A:0.2):0.2):0.3):0.4):0.5):0.6):0.7):0.8"; - final Phylogeny[] p38 = factory.create( p38_S, new NHXParser2() ); + final Phylogeny[] p38 = factory.create( p38_S, new NHXParser() ); if ( !p38[ 0 ].toNewHampshireX().equals( p38_S ) ) { return false; } final String p39_S = "(((B,((((A:0.2):0.2):0.3):0.4):0.5):0.6):0.7):0.8"; - final Phylogeny[] p39 = factory.create( p39_S, new NHXParser2() ); + final Phylogeny[] p39 = factory.create( p39_S, new NHXParser() ); if ( !p39[ 0 ].toNewHampshireX().equals( p39_S ) ) { return false; } final String p40_S = "(A,B,C)"; - final Phylogeny[] p40 = factory.create( p40_S, new NHXParser2() ); + final Phylogeny[] p40 = factory.create( p40_S, new NHXParser() ); if ( !p40[ 0 ].toNewHampshireX().equals( p40_S ) ) { return false; } final String p41_S = "(A,B,C,D,E,F,G,H,I,J,K)"; - final Phylogeny[] p41 = factory.create( p41_S, new NHXParser2() ); + final Phylogeny[] p41 = factory.create( p41_S, new NHXParser() ); if ( !p41[ 0 ].toNewHampshireX().equals( p41_S ) ) { return false; } final String p42_S = "(A,B,(X,Y,Z),D,E,F,G,H,I,J,K)"; - final Phylogeny[] p42 = factory.create( p42_S, new NHXParser2() ); + final Phylogeny[] p42 = factory.create( p42_S, new NHXParser() ); if ( !p42[ 0 ].toNewHampshireX().equals( p42_S ) ) { return false; } final String p43_S = "(A,B,C,(AA,BB,CC,(CCC,DDD,EEE,(FFFF,GGGG)x)y,DD,EE,FF,GG,HH),D,E,(EE,FF),F,G,H,(((((5)4)3)2)1),I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,(XX,(YY)),Y,Z)"; - final Phylogeny[] p43 = factory.create( p43_S, new NHXParser2() ); + final Phylogeny[] p43 = factory.create( p43_S, new NHXParser() ); if ( !p43[ 0 ].toNewHampshireX().equals( p43_S ) ) { return false; } final String p44_S = "(((A,B,C,D),(A,B,C,D),(A,B,C,D),(A,B,C,D)),((A,B,C,D),(A,B,C,D),(A,B,C,D),(A,B,C,D)),((A,B,C,D),(A,B,C,D),(A,B,C,D),(A,B,C,D)),((A,B,C,D),(A,B,C,D),(A,B,C,D),(A,B,C,D)))"; - final Phylogeny[] p44 = factory.create( p44_S, new NHXParser2() ); + final Phylogeny[] p44 = factory.create( p44_S, new NHXParser() ); if ( !p44[ 0 ].toNewHampshireX().equals( p44_S ) ) { return false; } final String p45_S = "((((((((((A))))))))),(((((((((B))))))))),(((((((((C))))))))))"; - final Phylogeny[] p45 = factory.create( p45_S, new NHXParser2() ); + final Phylogeny[] p45 = factory.create( p45_S, new NHXParser() ); if ( !p45[ 0 ].toNewHampshireX().equals( p45_S ) ) { return false; } final String p46_S = ""; - final Phylogeny[] p46 = factory.create( p46_S, new NHXParser2() ); + final Phylogeny[] p46 = factory.create( p46_S, new NHXParser() ); if ( p46.length != 0 ) { return false; } - final Phylogeny p47 = factory.create( new StringBuffer( "((A,B)ab:2[0.44],C)" ), new NHXParser2() )[ 0 ]; + final Phylogeny p47 = factory.create( new StringBuffer( "((A,B)ab:2[0.44],C)" ), new NHXParser() )[ 0 ]; if ( !isEqual( 0.44, p47.getNode( "ab" ).getBranchData().getConfidence( 0 ).getValue() ) ) { return false; } - final Phylogeny p48 = factory.create( new StringBuffer( "((A,B)ab:2[88],C)" ), new NHXParser2() )[ 0 ]; + final Phylogeny p48 = factory.create( new StringBuffer( "((A,B)ab:2[88],C)" ), new NHXParser() )[ 0 ]; if ( !isEqual( 88, p48.getNode( "ab" ).getBranchData().getConfidence( 0 ).getValue() ) ) { return false; } final Phylogeny p49 = factory .create( new StringBuffer( "((A,B)a[comment:a,b;(a)]b:2[0.44][comment(a,b,b);],C)" ), - new NHXParser2() )[ 0 ]; + new NHXParser() )[ 0 ]; if ( !isEqual( 0.44, p49.getNode( "ab" ).getBranchData().getConfidence( 0 ).getValue() ) ) { return false; } - final Phylogeny p50 = factory.create( new StringBuffer( "((\"A\",B)ab:2[88],C)" ), new NHXParser2() )[ 0 ]; + final Phylogeny p50 = factory.create( new StringBuffer( "((\"A\",B)ab:2[88],C)" ), new NHXParser() )[ 0 ]; if ( p50.getNode( "A" ) == null ) { return false; } @@ -4882,22 +4880,22 @@ public final class Test { .equals( "((A,B)88:2.0,C);" ) ) { return false; } - final Phylogeny p51 = factory.create( new StringBuffer( "((\"A(A\",B)ab:2[88],C)" ), new NHXParser2() )[ 0 ]; + final Phylogeny p51 = factory.create( new StringBuffer( "((\"A(A\",B)ab:2[88],C)" ), new NHXParser() )[ 0 ]; if ( p51.getNode( "A(A" ) == null ) { return false; } - final Phylogeny p52 = factory.create( new StringBuffer( "(('A(A',B)ab:2[88],C)" ), new NHXParser2() )[ 0 ]; + final Phylogeny p52 = factory.create( new StringBuffer( "(('A(A',B)ab:2[88],C)" ), new NHXParser() )[ 0 ]; if ( p52.getNode( "A(A" ) == null ) { return false; } final Phylogeny p53 = factory .create( new StringBuffer( "(('A(A',\"B (x (a' ,b) f(x);\"[com])[ment]ab:2[88],C)" ), - new NHXParser2() )[ 0 ]; + new NHXParser() )[ 0 ]; if ( p53.getNode( "B (x (a' ,b) f(x);" ) == null ) { return false; } // - final Phylogeny p54 = factory.create( new StringBuffer( "((A,B):[88],C)" ), new NHXParser2() )[ 0 ]; + final Phylogeny p54 = factory.create( new StringBuffer( "((A,B):[88],C)" ), new NHXParser() )[ 0 ]; if ( p54.getNode( "A" ) == null ) { return false; } @@ -4915,13 +4913,13 @@ public final class Test { private static boolean testNHParsingIter() { try { - String p0_str = "(A,B);"; - NHXParser2 p = new NHXParser2(); + final String p0_str = "(A,B);"; + final NHXParser p = new NHXParser(); p.setSource( p0_str ); if ( !p.hasNext() ) { return false; } - Phylogeny p0 = p.next(); + final Phylogeny p0 = p.next(); if ( !p0.toNewHampshire().equals( p0_str ) ) { System.out.println( p0.toNewHampshire() ); return false; @@ -4933,43 +4931,43 @@ public final class Test { return false; } // - String p00_str = "(A,B)root;"; + final String p00_str = "(A,B)root;"; p.setSource( p00_str ); - Phylogeny p00 = p.next(); + final Phylogeny p00 = p.next(); if ( !p00.toNewHampshire().equals( p00_str ) ) { System.out.println( p00.toNewHampshire() ); return false; } // - String p000_str = "A;"; + final String p000_str = "A;"; p.setSource( p000_str ); - Phylogeny p000 = p.next(); + final Phylogeny p000 = p.next(); if ( !p000.toNewHampshire().equals( p000_str ) ) { System.out.println( p000.toNewHampshire() ); return false; } // - String p0000_str = "A"; + final String p0000_str = "A"; p.setSource( p0000_str ); - Phylogeny p0000 = p.next(); + final Phylogeny p0000 = p.next(); if ( !p0000.toNewHampshire().equals( "A;" ) ) { System.out.println( p0000.toNewHampshire() ); return false; } // p.setSource( "(A)" ); - Phylogeny p00000 = p.next(); + final Phylogeny p00000 = p.next(); if ( !p00000.toNewHampshire().equals( "(A);" ) ) { System.out.println( p00000.toNewHampshire() ); return false; } // - String p1_str = "(A,B)(C,D)(E,F)(G,H)"; + final String p1_str = "(A,B)(C,D)(E,F)(G,H)"; p.setSource( p1_str ); if ( !p.hasNext() ) { return false; } - Phylogeny p1_0 = p.next(); + final Phylogeny p1_0 = p.next(); if ( !p1_0.toNewHampshire().equals( "(A,B);" ) ) { System.out.println( p1_0.toNewHampshire() ); return false; @@ -4977,7 +4975,7 @@ public final class Test { if ( !p.hasNext() ) { return false; } - Phylogeny p1_1 = p.next(); + final Phylogeny p1_1 = p.next(); if ( !p1_1.toNewHampshire().equals( "(C,D);" ) ) { System.out.println( "(C,D) != " + p1_1.toNewHampshire() ); return false; @@ -4985,7 +4983,7 @@ public final class Test { if ( !p.hasNext() ) { return false; } - Phylogeny p1_2 = p.next(); + final Phylogeny p1_2 = p.next(); if ( !p1_2.toNewHampshire().equals( "(E,F);" ) ) { System.out.println( "(E,F) != " + p1_2.toNewHampshire() ); return false; @@ -4993,7 +4991,7 @@ public final class Test { if ( !p.hasNext() ) { return false; } - Phylogeny p1_3 = p.next(); + final Phylogeny p1_3 = p.next(); if ( !p1_3.toNewHampshire().equals( "(G,H);" ) ) { System.out.println( "(G,H) != " + p1_3.toNewHampshire() ); return false; @@ -5005,7 +5003,7 @@ public final class Test { return false; } // - String p2_str = "((1,2,3),B);(C,D) (E,F)root;(G,H); ;(X)"; + final String p2_str = "((1,2,3),B);(C,D) (E,F)root;(G,H); ;(X)"; p.setSource( p2_str ); if ( !p.hasNext() ) { return false; @@ -5102,12 +5100,12 @@ public final class Test { return false; } // - String p3_str = "((A,B),C)abc"; + final String p3_str = "((A,B),C)abc"; p.setSource( p3_str ); if ( !p.hasNext() ) { return false; } - Phylogeny p3_0 = p.next(); + final Phylogeny p3_0 = p.next(); if ( !p3_0.toNewHampshire().equals( "((A,B),C)abc;" ) ) { return false; } @@ -5118,12 +5116,12 @@ public final class Test { return false; } // - String p4_str = "((A,B)ab,C)abc"; + final String p4_str = "((A,B)ab,C)abc"; p.setSource( p4_str ); if ( !p.hasNext() ) { return false; } - Phylogeny p4_0 = p.next(); + final Phylogeny p4_0 = p.next(); if ( !p4_0.toNewHampshire().equals( "((A,B)ab,C)abc;" ) ) { return false; } @@ -5134,12 +5132,12 @@ public final class Test { return false; } // - String p5_str = "(((A,B)ab,C)abc,D)abcd"; + final String p5_str = "(((A,B)ab,C)abc,D)abcd"; p.setSource( p5_str ); if ( !p.hasNext() ) { return false; } - Phylogeny p5_0 = p.next(); + final Phylogeny p5_0 = p.next(); if ( !p5_0.toNewHampshire().equals( "(((A,B)ab,C)abc,D)abcd;" ) ) { return false; } @@ -5150,7 +5148,7 @@ public final class Test { return false; } // - String p6_str = "(A,(B,(C,(D,E)de)cde)bcde)abcde"; + final String p6_str = "(A,(B,(C,(D,E)de)cde)bcde)abcde"; p.setSource( p6_str ); if ( !p.hasNext() ) { return false; @@ -5180,7 +5178,7 @@ public final class Test { return false; } // - String p7_str = "((((A,B)ab,C)abc,D)abcd,E)abcde"; + final String p7_str = "((((A,B)ab,C)abc,D)abcd,E)abcde"; p.setSource( p7_str ); if ( !p.hasNext() ) { return false; @@ -5210,7 +5208,7 @@ public final class Test { return false; } // - String p8_str = "((((A,B)ab,C)abc,D)abcd,E)abcde ((((a,b)ab,c)abc,d)abcd,e)abcde"; + final String p8_str = "((((A,B)ab,C)abc,D)abcd,E)abcde ((((a,b)ab,c)abc,d)abcd,e)abcde"; p.setSource( p8_str ); if ( !p.hasNext() ) { return false; @@ -5310,9 +5308,9 @@ public final class Test { final PhylogenyNode n3 = PhylogenyNode.createInstanceFromNhxString( "n3" ); final PhylogenyNode n4 = PhylogenyNode.createInstanceFromNhxString( "n4:0.01" ); final PhylogenyNode n5 = PhylogenyNode - .createInstanceFromNhxString( "n5:0.1[&&NHX:S=Ecoli:E=1.1.1.1:D=Y:Co=Y:B=56:T=1:W=2:C=10.20.30]" ); + .createInstanceFromNhxString( "n5:0.1[&&NHX:S=Ecoli:E=1.1.1.1:D=Y:Co=Y:B=56:T=1]" ); final PhylogenyNode n6 = PhylogenyNode - .createInstanceFromNhxString( "n6:0.000001[&&NHX:S=Ecoli:E=1.1.1.1:D=N:Co=N:B=100:T=1:W=2:C=0.0.0]" ); + .createInstanceFromNhxString( "n6:0.000001[&&NHX:S=Ecoli:E=1.1.1.1:D=N:Co=N:B=100:T=1]" ); if ( !n1.toNewHampshireX().equals( "" ) ) { return false; } @@ -5325,10 +5323,10 @@ public final class Test { if ( !n4.toNewHampshireX().equals( "n4:0.01" ) ) { return false; } - if ( !n5.toNewHampshireX().equals( "n5:0.1[&&NHX:T=1:S=Ecoli:D=Y:B=56:W=2.0:C=10.20.30]" ) ) { + if ( !n5.toNewHampshireX().equals( "n5:0.1[&&NHX:T=1:S=Ecoli:D=Y:B=56]" ) ) { return false; } - if ( !n6.toNewHampshireX().equals( "n6:1.0E-6[&&NHX:T=1:S=Ecoli:D=N:B=100:W=2.0:C=0.0.0]" ) ) { + if ( !n6.toNewHampshireX().equals( "n6:1.0E-6[&&NHX:T=1:S=Ecoli:D=N:B=100]" ) ) { return false; } } @@ -5503,9 +5501,6 @@ public final class Test { if ( !n5.isHasAssignedEvent() ) { return false; } - if ( PhylogenyMethods.getBranchWidthValue( n5 ) != 2 ) { - return false; - } final PhylogenyNode n8 = PhylogenyNode .createInstanceFromNhxString( "n8_ECOLI/12:0.01", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); if ( !n8.getName().equals( "n8_ECOLI/12" ) ) { @@ -5733,7 +5728,7 @@ public final class Test { return false; } final PhylogenyNode n00 = PhylogenyNode - .createInstanceFromNhxString( "n7:0.000001[&&NHX:GN=gene_name:AC=accession123:S=Ecoli:D=N:Co=N:B=100:T=1:W=2:C=0.0.0]" ); + .createInstanceFromNhxString( "n7:0.000001[&&NHX:GN=gene_name:AC=accession123:S=Ecoli:D=N:Co=N:B=100:T=1]" ); if ( !n00.getNodeData().getSequence().getName().equals( "gene_name" ) ) { return false; } @@ -6024,7 +6019,7 @@ public final class Test { + "prob_range={1.000000000000000e+00,1.000000000000000e+00},prob(percent)=\"100\"," + "prob+-sd=\"100+-0\"]:6.375699999999999e-02[&length_mean=6.395210411945065e-02," + "length_median=6.375699999999999e-02,length_95%HPD={5.388600000000000e-02," - + "7.369400000000000e-02}])", new NHXParser2() )[ 0 ]; + + "7.369400000000000e-02}])", new NHXParser() )[ 0 ]; if ( !isEqual( p1.getNode( "1" ).getDistanceToParent(), 4.129e-02 ) ) { return false; } @@ -6051,7 +6046,7 @@ public final class Test { + "prob+-sd=\"100+-0\"]:6.375699999999999e-02[&length_mean=6.395210411945065e-02," + "length_median=6.375699999999999e-02,length_95%HPD={5.388600000000000e-02," + "7.369400000000000e-02}])", - new NHXParser2() )[ 0 ]; + new NHXParser() )[ 0 ]; if ( p2.getNode( "1" ) == null ) { return false; } diff --git a/forester/java/src/org/forester/util/ForesterConstants.java b/forester/java/src/org/forester/util/ForesterConstants.java index 0778147..e8ba376 100644 --- a/forester/java/src/org/forester/util/ForesterConstants.java +++ b/forester/java/src/org/forester/util/ForesterConstants.java @@ -27,8 +27,8 @@ package org.forester.util; public final class ForesterConstants { - public final static String FORESTER_VERSION = "1.014"; - public final static String FORESTER_DATE = "121229"; + public final static String FORESTER_VERSION = "1.015"; + public final static String FORESTER_DATE = "130104"; public final static String PHYLO_XML_VERSION = "1.10"; public final static String PHYLO_XML_LOCATION = "http://www.phyloxml.org"; public final static String PHYLO_XML_XSD = "phyloxml.xsd"; -- 1.7.10.2