import org.forester.analysis.TaxonomyDataManager;
import org.forester.io.parsers.PhylogenyParser;
+import org.forester.io.parsers.nexus.NexusPhylogeniesParser;
+import org.forester.io.parsers.nhx.NHXParser;
+import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION;
import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
import org.forester.io.parsers.tol.TolParser;
import org.forester.io.parsers.util.ParserUtils;
System.out.println( "[" + applet_name + "] > " + message );
}
- final static Phylogeny[] readPhylogeniesFromUrl( final URL url, final boolean phyloxml_validate_against_xsd )
+ final static Phylogeny[] readPhylogeniesFromUrl( final URL url,
+ final boolean phyloxml_validate_against_xsd,
+ final boolean replace_underscores,
+ final boolean internal_numbers_are_confidences,
+ final TAXONOMY_EXTRACTION taxonomy_extraction )
throws FileNotFoundException, IOException {
final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
- PhylogenyParser parser = null;
+ final PhylogenyParser parser;
+ boolean nhx_or_nexus = false;
if ( url.getHost().toLowerCase().indexOf( "tolweb" ) >= 0 ) {
parser = new TolParser();
}
else {
parser = ParserUtils.createParserDependingOnUrlContents( url, phyloxml_validate_against_xsd );
+ if ( parser instanceof NHXParser ) {
+ nhx_or_nexus = true;
+ final NHXParser nhx = ( NHXParser ) parser;
+ nhx.setReplaceUnderscores( replace_underscores );
+ nhx.setIgnoreQuotes( false );
+ nhx.setTaxonomyExtraction( taxonomy_extraction );
+ }
+ else if ( parser instanceof NexusPhylogeniesParser ) {
+ nhx_or_nexus = true;
+ final NexusPhylogeniesParser nex = ( NexusPhylogeniesParser ) parser;
+ nex.setReplaceUnderscores( replace_underscores );
+ nex.setIgnoreQuotes( false );
+ }
+ }
+ final Phylogeny[] phys = factory.create( url.openStream(), parser );
+ if ( nhx_or_nexus && internal_numbers_are_confidences ) {
+ for( final Phylogeny phy : phys ) {
+ PhylogenyMethods.transferInternalNodeNamesToConfidence( phy );
+ }
}
- return factory.create( url.openStream(), parser );
+ return phys;
}
final static void removeBranchColors( final Phylogeny phy ) {
final NHXParser nhx = ( NHXParser ) p;
nhx.setReplaceUnderscores( conf.isReplaceUnderscoresInNhParsing() );
nhx.setIgnoreQuotes( false );
- NHXParser.TAXONOMY_EXTRACTION te = NHXParser.TAXONOMY_EXTRACTION.NO;
- if ( conf.isExtractPfamTaxonomyCodesInNhParsing() ) {
- te = NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY;
- }
- nhx.setTaxonomyExtraction( te );
+ nhx.setTaxonomyExtraction( conf.getTaxonomyExtraction() );
}
else if ( p instanceof NexusPhylogeniesParser ) {
nhx_or_nexus = true;
_mainframe_applet = new MainFrameApplet( this, configuration );
URL url = null;
url = new URL( getUrlString() );
- final Phylogeny[] phys = AptxUtil.readPhylogeniesFromUrl( url,
- configuration.isValidatePhyloXmlAgainstSchema() );
+ final Phylogeny[] phys = AptxUtil.readPhylogeniesFromUrl( url, configuration
+ .isValidatePhyloXmlAgainstSchema(), configuration.isReplaceUnderscoresInNhParsing(), configuration
+ .isInternalNumberAreConfidenceForNhParsing(), configuration.getTaxonomyExtraction() );
AptxUtil.addPhylogeniesToTabs( phys,
new File( url.getFile() ).getName(),
getUrlString(),
// Load the tree from URL
if ( phys_url != null ) {
try {
- phys = AptxUtil.readPhylogeniesFromUrl( phys_url, getConfiguration().isValidatePhyloXmlAgainstSchema() );
+ phys = AptxUtil.readPhylogeniesFromUrl( phys_url,
+ getConfiguration().isValidatePhyloXmlAgainstSchema(),
+ getConfiguration().isReplaceUnderscoresInNhParsing(),
+ getConfiguration().isInternalNumberAreConfidenceForNhParsing(),
+ getConfiguration().getTaxonomyExtraction() );
}
catch ( final Exception e ) {
ForesterUtil.printErrorMessage( NAME, e.toString() );
import org.forester.archaeopteryx.Options.NODE_LABEL_DIRECTION;
import org.forester.archaeopteryx.Options.OVERVIEW_PLACEMENT_TYPE;
import org.forester.archaeopteryx.Options.PHYLOGENY_GRAPHICS_TYPE;
+import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION;
import org.forester.phylogeny.data.NodeData.NODE_DATA;
import org.forester.phylogeny.data.NodeVisualization;
import org.forester.phylogeny.data.NodeVisualization.NodeFill;
private short _number_of_digits_after_comma_for_branch_length_values = Constants.NUMBER_OF_DIGITS_AFTER_COMMA_FOR_BRANCH_LENGTH_VALUES_DEFAULT;
private boolean _editable = true;
private boolean _nh_parsing_replace_underscores = false;
- private boolean _nh_parsing_extract_pfam_taxonomy_codes = false;
+ private TAXONOMY_EXTRACTION _taxonomy_extraction = TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY;
private boolean _internal_number_are_confidence_for_nh_parsing = false;
private boolean _display_sequence_relations = false;
private boolean _validate_against_phyloxml_xsd_schema = Constants.VALIDATE_AGAINST_PHYLOXML_XSD_SCJEMA_DEFAULT;
return _editable;
}
- boolean isExtractPfamTaxonomyCodesInNhParsing() {
- return _nh_parsing_extract_pfam_taxonomy_codes;
+ final TAXONOMY_EXTRACTION getTaxonomyExtraction() {
+ return _taxonomy_extraction;
}
boolean isHasWebLink( final String source ) {
_editable = editable;
}
- public void setExtractPfamTaxonomyCodesInNhParsing( final boolean nh_parsing_extract_pfam_taxonomy_codes ) {
- _nh_parsing_extract_pfam_taxonomy_codes = nh_parsing_extract_pfam_taxonomy_codes;
+ final void setTaxonomyExtraction( final TAXONOMY_EXTRACTION taxonomy_extraction ) {
+ _taxonomy_extraction = taxonomy_extraction;
}
private void setGraphicsExportX( final int graphics_export_x ) {
}
else if ( key.equals( "replace_underscores_in_nh_parsing" ) ) {
final boolean r = parseBoolean( ( String ) st.nextElement() );
- if ( r && isExtractPfamTaxonomyCodesInNhParsing() ) {
+ if ( r && ( getTaxonomyExtraction() != TAXONOMY_EXTRACTION.NO ) ) {
ForesterUtil
.printWarningMessage( Constants.PRG_NAME,
"attempt to extract taxonomies and replace underscores at the same time" );
setReplaceUnderscoresInNhParsing( r );
}
}
- else if ( key.equals( "extract_taxonomy_codes_in_nh_parsing" ) ) {
- final boolean e = parseBoolean( ( String ) st.nextElement() );
- if ( e && isReplaceUnderscoresInNhParsing() ) {
+ else if ( key.equals( "taxonomy_extraction_in_nh_parsing" ) ) {
+ final String s = ( String ) st.nextElement();
+ if ( s.equalsIgnoreCase( "no" ) ) {
+ setTaxonomyExtraction( TAXONOMY_EXTRACTION.NO );
+ }
+ else if ( s.equalsIgnoreCase( "yes" ) ) {
+ setTaxonomyExtraction( TAXONOMY_EXTRACTION.YES );
+ }
+ else if ( s.equalsIgnoreCase( "pfam_only" ) ) {
+ setTaxonomyExtraction( TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY );
+ }
+ else {
+ ForesterUtil.printWarningMessage( Constants.PRG_NAME,
+ "unknown value for \"taxonomy_extraction_in_nh_parsing\": " + s );
+ }
+ if ( ( getTaxonomyExtraction() != TAXONOMY_EXTRACTION.NO ) && isReplaceUnderscoresInNhParsing() ) {
ForesterUtil
.printWarningMessage( Constants.PRG_NAME,
"attempt to extract taxonomies and replace underscores at the same time" );
}
- else {
- setExtractPfamTaxonomyCodesInNhParsing( e );
- }
}
else if ( key.equals( "internal_labels_are_confidence_values" ) ) {
setInternalNumberAreConfidenceForNhParsing( parseBoolean( ( String ) st.nextElement() ) );
import org.forester.archaeopteryx.tools.InferenceManager;
import org.forester.archaeopteryx.tools.ProcessPool;
import org.forester.archaeopteryx.tools.ProcessRunning;
+import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION;
import org.forester.phylogeny.Phylogeny;
import org.forester.phylogeny.PhylogenyNode;
import org.forester.phylogeny.PhylogenyNode.NH_CONVERSION_SUPPORT_VALUE_STYLE;
JMenuItem _choose_pdf_width_mi;
// _ parsing
JCheckBoxMenuItem _internal_number_are_confidence_for_nh_parsing_cbmi;
- JCheckBoxMenuItem _extract_pfam_style_tax_codes_cbmi;
+ JRadioButtonMenuItem _extract_taxonomy_no_rbmi;
+ JRadioButtonMenuItem _extract_taxonomy_yes_rbmi;
+ JRadioButtonMenuItem _extract_taxonomy_pfam_rbmi;
JCheckBoxMenuItem _replace_underscores_cbmi;
JCheckBoxMenuItem _use_brackets_for_conf_in_nh_export_cbmi;
JCheckBoxMenuItem _use_internal_names_for_conf_in_nh_export_cbmi;
&& _print_black_and_white_cbmi.isSelected() );
options.setInternalNumberAreConfidenceForNhParsing( ( _internal_number_are_confidence_for_nh_parsing_cbmi != null )
&& _internal_number_are_confidence_for_nh_parsing_cbmi.isSelected() );
- options.setExtractPfamTaxonomyCodesInNhParsing( ( _extract_pfam_style_tax_codes_cbmi != null )
- && _extract_pfam_style_tax_codes_cbmi.isSelected() );
+ if ( ( _extract_taxonomy_yes_rbmi != null ) && _extract_taxonomy_yes_rbmi.isSelected() ) {
+ options.setTaxonomyExtractio( TAXONOMY_EXTRACTION.YES );
+ }
+ else if ( ( _extract_taxonomy_pfam_rbmi != null ) && _extract_taxonomy_pfam_rbmi.isSelected() ) {
+ options.setTaxonomyExtractio( TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY );
+ }
+ else if ( ( _extract_taxonomy_no_rbmi != null ) && _extract_taxonomy_no_rbmi.isSelected() ) {
+ options.setTaxonomyExtractio( TAXONOMY_EXTRACTION.NO );
+ }
options.setReplaceUnderscoresInNhParsing( ( _replace_underscores_cbmi != null )
&& _replace_underscores_cbmi.isSelected() );
options.setMatchWholeTermsOnly( ( _search_whole_words_only_cbmi != null )
// Load the tree from URL
if ( url != null ) {
try {
- phys = AptxUtil.readPhylogeniesFromUrl( url, getConfiguration().isValidatePhyloXmlAgainstSchema() );
+ phys = AptxUtil.readPhylogeniesFromUrl( url,
+ configuration.isValidatePhyloXmlAgainstSchema(),
+ configuration.isReplaceUnderscoresInNhParsing(),
+ configuration.isInternalNumberAreConfidenceForNhParsing(),
+ configuration.getTaxonomyExtraction() );
}
catch ( final Exception e ) {
ForesterUtil.printErrorMessage( ArchaeopteryxA.NAME, e.toString() );
import org.forester.io.parsers.PhylogenyParser;
import org.forester.io.parsers.nexus.NexusPhylogeniesParser;
import org.forester.io.parsers.nhx.NHXParser;
+import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION;
import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
import org.forester.io.parsers.phyloxml.PhyloXmlParser;
import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
private Phylogeny _species_tree;
private File _current_dir;
private ButtonGroup _radio_group_1;
+ private ButtonGroup _radio_group_2;
// Others:
double _min_not_collapse = Constants.MIN_NOT_COLLAPSE_DEFAULT;
// Phylogeny Inference menu
updateOptions( getOptions() );
}
else if ( o == _replace_underscores_cbmi ) {
- if ( ( _extract_pfam_style_tax_codes_cbmi != null ) && _replace_underscores_cbmi.isSelected() ) {
- _extract_pfam_style_tax_codes_cbmi.setSelected( false );
+ if ( ( _extract_taxonomy_no_rbmi != null ) && !_extract_taxonomy_no_rbmi.isSelected() ) {
+ _extract_taxonomy_no_rbmi.setSelected( true );
}
updateOptions( getOptions() );
}
}
collapseBelowThreshold();
}
- else if ( o == _extract_pfam_style_tax_codes_cbmi ) {
- if ( ( _replace_underscores_cbmi != null ) && _extract_pfam_style_tax_codes_cbmi.isSelected() ) {
+ else if ( ( o == _extract_taxonomy_pfam_rbmi ) || ( o == _extract_taxonomy_yes_rbmi ) ) {
+ if ( _replace_underscores_cbmi != null ) {
_replace_underscores_cbmi.setSelected( false );
}
updateOptions( getOptions() );
_options_jmenu
.add( _internal_number_are_confidence_for_nh_parsing_cbmi = new JCheckBoxMenuItem( "Internal Node Names are Confidence Values" ) );
_options_jmenu.add( _replace_underscores_cbmi = new JCheckBoxMenuItem( "Replace Underscores with Spaces" ) );
+ //
+ _options_jmenu.add( _extract_taxonomy_no_rbmi = new JRadioButtonMenuItem( "No Taxonomy Extraction" ) );
+ _options_jmenu
+ .add( _extract_taxonomy_pfam_rbmi = new JRadioButtonMenuItem( "Extract Taxonomy Codes from Pfam-style Node Names" ) );
+ _extract_taxonomy_pfam_rbmi
+ .setToolTipText( "To extract 5-letter taxonomy codes from node names in the form of \"BCL2_MOUSE/134-298\"" );
_options_jmenu
- .add( _extract_pfam_style_tax_codes_cbmi = new JCheckBoxMenuItem( "Extract Taxonomy Codes from Pfam-style Labels" ) );
+ .add( _extract_taxonomy_yes_rbmi = new JRadioButtonMenuItem( "Extract Taxonomy Codes from Node Names" ) );
+ _extract_taxonomy_yes_rbmi
+ .setToolTipText( "To extract 5-letter taxonomy codes from node names in the form of \"BCL2_MOUSE\" or \"BCL2_MOUSE B-cell lymphoma 2...\"" );
+ _radio_group_2 = new ButtonGroup();
+ _radio_group_2.add( _extract_taxonomy_no_rbmi );
+ _radio_group_2.add( _extract_taxonomy_pfam_rbmi );
+ _radio_group_2.add( _extract_taxonomy_yes_rbmi );
+ //
_options_jmenu.add( customizeMenuItemAsLabel( new JMenuItem( "Newick/Nexus Output:" ), getConfiguration() ) );
_options_jmenu
.add( _use_brackets_for_conf_in_nh_export_cbmi = new JCheckBoxMenuItem( USE_BRACKETS_FOR_CONF_IN_NH_LABEL ) );
customizeCheckBoxMenuItem( _print_black_and_white_cbmi, getOptions().isPrintBlackAndWhite() );
customizeCheckBoxMenuItem( _internal_number_are_confidence_for_nh_parsing_cbmi, getOptions()
.isInternalNumberAreConfidenceForNhParsing() );
- customizeCheckBoxMenuItem( _extract_pfam_style_tax_codes_cbmi, getOptions()
- .isExtractPfamTaxonomyCodesInNhParsing() );
+ customizeRadioButtonMenuItem( _extract_taxonomy_no_rbmi,
+ getOptions().getTaxonomyExtraction() == TAXONOMY_EXTRACTION.NO );
+ customizeRadioButtonMenuItem( _extract_taxonomy_yes_rbmi,
+ getOptions().getTaxonomyExtraction() == TAXONOMY_EXTRACTION.YES );
+ customizeRadioButtonMenuItem( _extract_taxonomy_pfam_rbmi,
+ getOptions().getTaxonomyExtraction() == TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY );
customizeCheckBoxMenuItem( _replace_underscores_cbmi, getOptions().isReplaceUnderscoresInNhParsing() );
customizeCheckBoxMenuItem( _search_whole_words_only_cbmi, getOptions().isMatchWholeTermsOnly() );
customizeCheckBoxMenuItem( _inverse_search_result_cbmi, getOptions().isInverseSearchResult() );
private void setSpecialOptionsForNhxParser( final NHXParser nhx ) {
nhx.setReplaceUnderscores( getOptions().isReplaceUnderscoresInNhParsing() );
- NHXParser.TAXONOMY_EXTRACTION te = NHXParser.TAXONOMY_EXTRACTION.NO;
- if ( getOptions().isExtractPfamTaxonomyCodesInNhParsing() ) {
- te = NHXParser.TAXONOMY_EXTRACTION.YES;
- }
- nhx.setTaxonomyExtraction( te );
+ nhx.setTaxonomyExtraction( getOptions().getTaxonomyExtraction() );
}
private void writeAllToFile() {
import java.awt.Font;
+import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION;
import org.forester.phylogeny.PhylogenyNode.NH_CONVERSION_SUPPORT_VALUE_STYLE;
import org.forester.phylogeny.data.NodeData;
import org.forester.phylogeny.data.NodeData.NODE_DATA;
private short _number_of_digits_after_comma_for_confidence_values;
private short _number_of_digits_after_comma_for_branch_length_values;
private boolean _nh_parsing_replace_underscores;
- private boolean _nh_parsing_extract_pfam_taxonomy_codes;
+ private TAXONOMY_EXTRACTION _taxonomy_extraction;
private boolean _editable;
private boolean _background_color_gradient;
private boolean _show_domain_labels;
_number_of_digits_after_comma_for_branch_length_values = Constants.NUMBER_OF_DIGITS_AFTER_COMMA_FOR_BRANCH_LENGTH_VALUES_DEFAULT;
_number_of_digits_after_comma_for_confidence_values = Constants.NUMBER_OF_DIGITS_AFTER_COMMA_FOR_CONFIDENCE_VALUES_DEFAULT;
_nh_parsing_replace_underscores = false;
- _nh_parsing_extract_pfam_taxonomy_codes = false;
+ _taxonomy_extraction = TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY;
_cladogram_type = Constants.CLADOGRAM_TYPE_DEFAULT;
_show_domain_labels = true;
setAbbreviateScientificTaxonNames( false );
return _editable;
}
- final boolean isExtractPfamTaxonomyCodesInNhParsing() {
- return _nh_parsing_extract_pfam_taxonomy_codes;
+ final TAXONOMY_EXTRACTION getTaxonomyExtraction() {
+ return _taxonomy_extraction;
}
final boolean isGraphicsExportUsingActualSize() {
_editable = editable;
}
- final void setExtractPfamTaxonomyCodesInNhParsing( final boolean nh_parsing_extract_pfam_taxonomy_codes ) {
- _nh_parsing_extract_pfam_taxonomy_codes = nh_parsing_extract_pfam_taxonomy_codes;
+ final void setTaxonomyExtractio( final TAXONOMY_EXTRACTION taxonomy_extraction ) {
+ _taxonomy_extraction = taxonomy_extraction;
}
final void setGraphicsExportUsingActualSize( final boolean graphics_export_using_actual_size ) {
instance.setNumberOfDigitsAfterCommaForConfidenceValues( configuration
.getNumberOfDigitsAfterCommaForConfidenceValues() );
}
- instance.setExtractPfamTaxonomyCodesInNhParsing( configuration.isExtractPfamTaxonomyCodesInNhParsing() );
+ instance.setTaxonomyExtractio( configuration.getTaxonomyExtraction() );
instance.setReplaceUnderscoresInNhParsing( configuration.isReplaceUnderscoresInNhParsing() );
instance.setInternalNumberAreConfidenceForNhParsing( configuration
.isInternalNumberAreConfidenceForNhParsing() );
public final class NHXParser implements PhylogenyParser {
- public static final TAXONOMY_EXTRACTION TAXONOMY_EXTRACTION_DEFAULT = NHXParser.TAXONOMY_EXTRACTION.NO;
+ public static final TAXONOMY_EXTRACTION TAXONOMY_EXTRACTION_DEFAULT = TAXONOMY_EXTRACTION.NO;
final static private boolean GUESS_ROOTEDNESS_DEFAULT = true;
final static private boolean GUESS_IF_SUPPORT_VALUES = true;
final static private boolean IGNORE_QUOTES_DEFAULT = false;
private int _clade_level;
private List<Phylogeny> _phylogenies;
private Phylogeny _current_phylogeny;
- private NHXParser.TAXONOMY_EXTRACTION _taxonomy_extraction;
+ private TAXONOMY_EXTRACTION _taxonomy_extraction;
private boolean _replace_underscores;
public final static Pattern UC_LETTERS_NUMBERS_PATTERN = Pattern.compile( "^[A-Z0-9]+$" );
public final static Pattern NUMBERS_ONLY_PATTERN = Pattern.compile( "^[0-9\\.]+$" );
init();
}
- /**
- * Decreases the clade level by one.
- *
- * @throws PhylogenyParserException
- * if level goes below zero.
- */
- private void decreaseCladeLevel() throws PhylogenyParserException {
- if ( getCladeLevel() < 0 ) {
- throw new PhylogenyParserException( "error in NH (Newick)/NHX formatted data: most likely cause: number of close parens is larger than number of open parens" );
- }
- --_clade_level;
- }
-
- /**
- * Finishes the current Phylogeny and adds it to the list of Phylogenies
- * created.
- *
- * @throws PhylogenyParserException
- * @throws NHXFormatException
- * @throws PhyloXmlDataFormatException
- */
- private void finishPhylogeny() throws PhylogenyParserException, NHXFormatException, PhyloXmlDataFormatException {
- setCladeLevel( 0 );
- if ( getCurrentPhylogeny() != null ) {
- parseNHX( getCurrentAnotation().toString(),
- getCurrentPhylogeny().getRoot(),
- getTaxonomyExtraction(),
- isReplaceUnderscores() );
- if ( NHXParser.GUESS_IF_SUPPORT_VALUES ) {
- if ( NHXParser.isBranchLengthsLikeBootstrapValues( getCurrentPhylogeny() ) ) {
- NHXParser.moveBranchLengthsToConfidenceValues( getCurrentPhylogeny() );
- }
- }
- if ( isGuessRootedness() ) {
- final PhylogenyNode root = getCurrentPhylogeny().getRoot();
- if ( ( root.getDistanceToParent() >= 0.0 ) || !ForesterUtil.isEmpty( root.getName() )
- || !ForesterUtil.isEmpty( PhylogenyMethods.getSpecies( root ) ) || root.isHasAssignedEvent() ) {
- getCurrentPhylogeny().setRooted( true );
- }
- }
- getPhylogenies().add( getCurrentPhylogeny() );
- }
- }
-
- private void finishSingleNodePhylogeny() throws PhylogenyParserException, NHXFormatException,
- PhyloXmlDataFormatException {
- setCladeLevel( 0 );
- final PhylogenyNode new_node = new PhylogenyNode();
- parseNHX( getCurrentAnotation().toString(), new_node, getTaxonomyExtraction(), isReplaceUnderscores() );
- setCurrentPhylogeny( new Phylogeny() );
- getCurrentPhylogeny().setRoot( new_node );
- getPhylogenies().add( getCurrentPhylogeny() );
- }
-
- private int getCladeLevel() {
- return _clade_level;
- }
-
- private StringBuilder getCurrentAnotation() {
- return _current_anotation;
- }
-
- private PhylogenyNode getCurrentNode() {
- return _current_node;
- }
-
- private Phylogeny getCurrentPhylogeny() {
- return _current_phylogeny;
- }
-
- private byte getInputType() {
- return _input_type;
- }
-
- private Object getNhxSource() {
- return _nhx_source;
- }
-
- private List<Phylogeny> getPhylogenies() {
- return _phylogenies;
- }
-
- /**
- * Returns the Phylogenies created as Array.
- *
- * @return the Phylogenies created as Array
- */
- private Phylogeny[] getPhylogeniesAsArray() {
- final Phylogeny[] p = new Phylogeny[ getPhylogenies().size() ];
- for( int i = 0; i < getPhylogenies().size(); ++i ) {
- p[ i ] = getPhylogenies().get( i );
- }
- return p;
- }
-
- private int getSourceLength() {
- return _source_length;
- }
-
public NHXParser.TAXONOMY_EXTRACTION getTaxonomyExtraction() {
return _taxonomy_extraction;
}
}
/**
- * Increases the clade level by one.
- */
- private void increaseCladeLevel() {
- ++_clade_level;
- }
-
- private void init() {
- setTaxonomyExtraction( TAXONOMY_EXTRACTION_DEFAULT );
- setReplaceUnderscores( REPLACE_UNDERSCORES_DEFAULT );
- setGuessRootedness( GUESS_ROOTEDNESS_DEFAULT );
- setIgnoreQuotes( IGNORE_QUOTES_DEFAULT );
- setHasNext( false );
- }
-
- private boolean isGuessRootedness() {
- return _guess_rootedness;
- }
-
- private boolean isIgnoreQuotes() {
- return _ignore_quotes;
- }
-
- private boolean isReplaceUnderscores() {
- return _replace_underscores;
- }
-
- private boolean isSawClosingParen() {
- return _saw_closing_paren;
- }
-
- /**
- * Replaces the current annotation with a new StringBuffer.
- */
- private void newCurrentAnotation() {
- setCurrentAnotation( new StringBuilder() );
- }
-
- /**
* Parses the source set with setSource( final Object nhx_source ). Returns
* the Phylogenies found in the source as Phylogeny[].
* Everything between [ and ] is considered comment and ignored,
return getPhylogeniesAsArray();
} // parse()
+ public void setGuessRootedness( final boolean guess_rootedness ) {
+ _guess_rootedness = guess_rootedness;
+ }
+
+ public void setIgnoreQuotes( final boolean ignore_quotes ) {
+ _ignore_quotes = ignore_quotes;
+ }
+
+ public void setReplaceUnderscores( final boolean replace_underscores ) {
+ _replace_underscores = replace_underscores;
+ }
+
+ /**
+ * This sets the source to be parsed. The source can be: String,
+ * StringBuffer, char[], File, or InputStream. The source can contain more
+ * than one phylogenies in either New Hamphshire (NH) or New Hamphshire
+ * Extended (NHX) format. There is no need to separate phylogenies with any
+ * special character. White space is always ignored, as are semicolons
+ * inbetween phylogenies. Example of a source describing two phylogenies
+ * (source is a String, in this example): "(A,(B,(C,(D,E)de)cde)bcde)abcde
+ * ((((A,B)ab,C)abc,D)abcd,E)abcde". Everything between a '[' followed by any
+ * character other than '&' and ']' is considered a comment and ignored
+ * (example: "[this is a comment]"). NHX tags are surrounded by '[&&NHX' and
+ * ']' (example: "[&&NHX:S=Varanus_storri]"). A sequence like "[& some
+ * info]" is ignored, too (at the PhylogenyNode level, though).
+ * Exception: numbers only between [ and ] (e.g. [90]) are interpreted as support values.
+ *
+ * @see #parse()
+ * @see org.forester.io.parsers.PhylogenyParser#setSource(java.lang.Object)
+ * @param nhx_source
+ * the source to be parsed (String, StringBuffer, char[], File,
+ * or InputStream)
+ * @throws IOException
+ * @throws PhylogenyParserException
+ */
+ @Override
+ public void setSource( final Object nhx_source ) throws PhylogenyParserException, IOException {
+ if ( nhx_source == null ) {
+ throw new PhylogenyParserException( getClass() + ": attempt to parse null object." );
+ }
+ else if ( nhx_source instanceof String ) {
+ setInputType( NHXParser.STRING );
+ setSourceLength( ( ( String ) nhx_source ).length() );
+ setNhxSource( nhx_source );
+ }
+ else if ( nhx_source instanceof StringBuffer ) {
+ setInputType( NHXParser.STRING_BUFFER );
+ setSourceLength( ( ( StringBuffer ) nhx_source ).length() );
+ setNhxSource( nhx_source );
+ }
+ else if ( nhx_source instanceof char[] ) {
+ setInputType( NHXParser.CHAR_ARRAY );
+ setSourceLength( ( ( char[] ) nhx_source ).length );
+ setNhxSource( nhx_source );
+ }
+ else if ( nhx_source instanceof File ) {
+ setInputType( NHXParser.BUFFERED_READER );
+ setSourceLength( 0 );
+ final File f = ( File ) nhx_source;
+ final String error = ForesterUtil.isReadableFile( f );
+ if ( !ForesterUtil.isEmpty( error ) ) {
+ throw new PhylogenyParserException( error );
+ }
+ setNhxSource( new BufferedReader( new FileReader( f ) ) );
+ }
+ else if ( nhx_source instanceof InputStream ) {
+ setInputType( NHXParser.BUFFERED_READER );
+ setSourceLength( 0 );
+ final InputStreamReader isr = new InputStreamReader( ( InputStream ) nhx_source );
+ setNhxSource( new BufferedReader( isr ) );
+ }
+ else {
+ throw new IllegalArgumentException( getClass() + " can only parse objects of type String,"
+ + " StringBuffer, char[], File," + " or InputStream " + " [attempt to parse object of "
+ + nhx_source.getClass() + "]." );
+ }
+ setHasNext( true );
+ }
+
+ public void setTaxonomyExtraction( final NHXParser.TAXONOMY_EXTRACTION taxonomy_extraction ) {
+ _taxonomy_extraction = taxonomy_extraction;
+ }
+
+ /**
+ * Decreases the clade level by one.
+ *
+ * @throws PhylogenyParserException
+ * if level goes below zero.
+ */
+ private void decreaseCladeLevel() throws PhylogenyParserException {
+ if ( getCladeLevel() < 0 ) {
+ throw new PhylogenyParserException( "error in NH (Newick)/NHX formatted data: most likely cause: number of close parens is larger than number of open parens" );
+ }
+ --_clade_level;
+ }
+
+ /**
+ * Finishes the current Phylogeny and adds it to the list of Phylogenies
+ * created.
+ *
+ * @throws PhylogenyParserException
+ * @throws NHXFormatException
+ * @throws PhyloXmlDataFormatException
+ */
+ private void finishPhylogeny() throws PhylogenyParserException, NHXFormatException, PhyloXmlDataFormatException {
+ setCladeLevel( 0 );
+ if ( getCurrentPhylogeny() != null ) {
+ parseNHX( getCurrentAnotation().toString(),
+ getCurrentPhylogeny().getRoot(),
+ getTaxonomyExtraction(),
+ isReplaceUnderscores() );
+ if ( NHXParser.GUESS_IF_SUPPORT_VALUES ) {
+ if ( NHXParser.isBranchLengthsLikeBootstrapValues( getCurrentPhylogeny() ) ) {
+ NHXParser.moveBranchLengthsToConfidenceValues( getCurrentPhylogeny() );
+ }
+ }
+ if ( isGuessRootedness() ) {
+ final PhylogenyNode root = getCurrentPhylogeny().getRoot();
+ if ( ( root.getDistanceToParent() >= 0.0 ) || !ForesterUtil.isEmpty( root.getName() )
+ || !ForesterUtil.isEmpty( PhylogenyMethods.getSpecies( root ) ) || root.isHasAssignedEvent() ) {
+ getCurrentPhylogeny().setRooted( true );
+ }
+ }
+ getPhylogenies().add( getCurrentPhylogeny() );
+ }
+ }
+
+ private void finishSingleNodePhylogeny() throws PhylogenyParserException, NHXFormatException,
+ PhyloXmlDataFormatException {
+ setCladeLevel( 0 );
+ final PhylogenyNode new_node = new PhylogenyNode();
+ parseNHX( getCurrentAnotation().toString(), new_node, getTaxonomyExtraction(), isReplaceUnderscores() );
+ setCurrentPhylogeny( new Phylogeny() );
+ getCurrentPhylogeny().setRoot( new_node );
+ getPhylogenies().add( getCurrentPhylogeny() );
+ }
+
+ private int getCladeLevel() {
+ return _clade_level;
+ }
+
+ private StringBuilder getCurrentAnotation() {
+ return _current_anotation;
+ }
+
+ private PhylogenyNode getCurrentNode() {
+ return _current_node;
+ }
+
+ private Phylogeny getCurrentPhylogeny() {
+ return _current_phylogeny;
+ }
+
+ private byte getInputType() {
+ return _input_type;
+ }
+
+ private Object getNhxSource() {
+ return _nhx_source;
+ }
+
+ private List<Phylogeny> getPhylogenies() {
+ return _phylogenies;
+ }
+
+ /**
+ * Returns the Phylogenies created as Array.
+ *
+ * @return the Phylogenies created as Array
+ */
+ private Phylogeny[] getPhylogeniesAsArray() {
+ final Phylogeny[] p = new Phylogeny[ getPhylogenies().size() ];
+ for( int i = 0; i < getPhylogenies().size(); ++i ) {
+ p[ i ] = getPhylogenies().get( i );
+ }
+ return p;
+ }
+
+ private int getSourceLength() {
+ return _source_length;
+ }
+
+ /**
+ * Increases the clade level by one.
+ */
+ private void increaseCladeLevel() {
+ ++_clade_level;
+ }
+
+ private void init() {
+ setTaxonomyExtraction( TAXONOMY_EXTRACTION_DEFAULT );
+ setReplaceUnderscores( REPLACE_UNDERSCORES_DEFAULT );
+ setGuessRootedness( GUESS_ROOTEDNESS_DEFAULT );
+ setIgnoreQuotes( IGNORE_QUOTES_DEFAULT );
+ setHasNext( false );
+ }
+
+ private boolean isGuessRootedness() {
+ return _guess_rootedness;
+ }
+
+ private boolean isIgnoreQuotes() {
+ return _ignore_quotes;
+ }
+
+ private boolean isReplaceUnderscores() {
+ return _replace_underscores;
+ }
+
+ private boolean isSawClosingParen() {
+ return _saw_closing_paren;
+ }
+
+ /**
+ * Replaces the current annotation with a new StringBuffer.
+ */
+ private void newCurrentAnotation() {
+ setCurrentAnotation( new StringBuilder() );
+ }
+
/**
* Called if a closing paren is encountered.
*
_current_phylogeny = current_phylogeny;
}
- public void setGuessRootedness( final boolean guess_rootedness ) {
- _guess_rootedness = guess_rootedness;
- }
-
private void setHasNext( final boolean has_next ) {
_has_next = has_next;
}
- public void setIgnoreQuotes( final boolean ignore_quotes ) {
- _ignore_quotes = ignore_quotes;
- }
-
private void setInputType( final byte input_type ) {
_input_type = input_type;
}
_phylogenies = phylogenies;
}
- public void setReplaceUnderscores( final boolean replace_underscores ) {
- _replace_underscores = replace_underscores;
- }
-
private void setSawClosingParen( final boolean saw_closing_paren ) {
_saw_closing_paren = saw_closing_paren;
}
- /**
- * This sets the source to be parsed. The source can be: String,
- * StringBuffer, char[], File, or InputStream. The source can contain more
- * than one phylogenies in either New Hamphshire (NH) or New Hamphshire
- * Extended (NHX) format. There is no need to separate phylogenies with any
- * special character. White space is always ignored, as are semicolons
- * inbetween phylogenies. Example of a source describing two phylogenies
- * (source is a String, in this example): "(A,(B,(C,(D,E)de)cde)bcde)abcde
- * ((((A,B)ab,C)abc,D)abcd,E)abcde". Everything between a '[' followed by any
- * character other than '&' and ']' is considered a comment and ignored
- * (example: "[this is a comment]"). NHX tags are surrounded by '[&&NHX' and
- * ']' (example: "[&&NHX:S=Varanus_storri]"). A sequence like "[& some
- * info]" is ignored, too (at the PhylogenyNode level, though).
- * Exception: numbers only between [ and ] (e.g. [90]) are interpreted as support values.
- *
- * @see #parse()
- * @see org.forester.io.parsers.PhylogenyParser#setSource(java.lang.Object)
- * @param nhx_source
- * the source to be parsed (String, StringBuffer, char[], File,
- * or InputStream)
- * @throws IOException
- * @throws PhylogenyParserException
- */
- @Override
- public void setSource( final Object nhx_source ) throws PhylogenyParserException, IOException {
- if ( nhx_source == null ) {
- throw new PhylogenyParserException( getClass() + ": attempt to parse null object." );
- }
- else if ( nhx_source instanceof String ) {
- setInputType( NHXParser.STRING );
- setSourceLength( ( ( String ) nhx_source ).length() );
- setNhxSource( nhx_source );
- }
- else if ( nhx_source instanceof StringBuffer ) {
- setInputType( NHXParser.STRING_BUFFER );
- setSourceLength( ( ( StringBuffer ) nhx_source ).length() );
- setNhxSource( nhx_source );
- }
- else if ( nhx_source instanceof char[] ) {
- setInputType( NHXParser.CHAR_ARRAY );
- setSourceLength( ( ( char[] ) nhx_source ).length );
- setNhxSource( nhx_source );
- }
- else if ( nhx_source instanceof File ) {
- setInputType( NHXParser.BUFFERED_READER );
- setSourceLength( 0 );
- final File f = ( File ) nhx_source;
- final String error = ForesterUtil.isReadableFile( f );
- if ( !ForesterUtil.isEmpty( error ) ) {
- throw new PhylogenyParserException( error );
- }
- setNhxSource( new BufferedReader( new FileReader( f ) ) );
- }
- else if ( nhx_source instanceof InputStream ) {
- setInputType( NHXParser.BUFFERED_READER );
- setSourceLength( 0 );
- final InputStreamReader isr = new InputStreamReader( ( InputStream ) nhx_source );
- setNhxSource( new BufferedReader( isr ) );
- }
- else {
- throw new IllegalArgumentException( getClass() + " can only parse objects of type String,"
- + " StringBuffer, char[], File," + " or InputStream " + " [attempt to parse object of "
- + nhx_source.getClass() + "]." );
- }
- setHasNext( true );
- }
-
private void setSourceLength( final int source_length ) {
_source_length = source_length;
}
- public void setTaxonomyExtraction( final NHXParser.TAXONOMY_EXTRACTION taxonomy_extraction ) {
- _taxonomy_extraction = taxonomy_extraction;
- }
-
- private static double doubleValue( final String str ) throws NHXFormatException {
- try {
- return Double.valueOf( str ).doubleValue();
- }
- catch ( final NumberFormatException ex ) {
- throw new NHXFormatException( "error in NH/NHX formatted data: failed to parse number from " + "\"" + str
- + "\"" );
- }
- }
-
- private static boolean isBranchLengthsLikeBootstrapValues( final Phylogeny p ) {
- final PhylogenyNodeIterator it = p.iteratorExternalForward();
- final double d0 = it.next().getDistanceToParent();
- if ( ( d0 < 10 ) || !it.hasNext() ) {
- return false;
- }
- while ( it.hasNext() ) {
- final double d = it.next().getDistanceToParent();
- if ( ( d != d0 ) || ( d < 10 ) ) {
- return false;
- }
- }
- return true;
- }
-
- private static void moveBranchLengthsToConfidenceValues( final Phylogeny p ) {
- final PhylogenyNodeIterator it = p.iteratorPostorder();
- while ( it.hasNext() ) {
- final PhylogenyNode n = it.next();
- PhylogenyMethods.setBootstrapConfidence( n, n.getDistanceToParent() );
- n.setDistanceToParent( PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT );
- }
- }
-
public static void parseNHX( String s,
final PhylogenyNode node_to_annotate,
- final NHXParser.TAXONOMY_EXTRACTION taxonomy_extraction,
+ final TAXONOMY_EXTRACTION taxonomy_extraction,
final boolean replace_underscores ) throws NHXFormatException,
PhyloXmlDataFormatException {
- if ( ( taxonomy_extraction != NHXParser.TAXONOMY_EXTRACTION.NO ) && replace_underscores ) {
+ if ( ( taxonomy_extraction != TAXONOMY_EXTRACTION.NO ) && replace_underscores ) {
throw new IllegalArgumentException( "cannot extract taxonomies and replace under scores at the same time" );
}
if ( ( s != null ) && ( s.length() > 0 ) ) {
if ( t.countTokens() > 0 ) {
if ( !s.startsWith( ":" ) ) {
node_to_annotate.setName( t.nextToken() );
- if ( !replace_underscores
- && ( !is_nhx && ( taxonomy_extraction != NHXParser.TAXONOMY_EXTRACTION.NO ) ) ) {
+ if ( !replace_underscores && ( !is_nhx && ( taxonomy_extraction != TAXONOMY_EXTRACTION.NO ) ) ) {
final String tax = ParserUtils.extractTaxonomyCodeFromNodeName( node_to_annotate.getName(),
taxonomy_extraction );
if ( !ForesterUtil.isEmpty( tax ) ) {
}
}
+ private static double doubleValue( final String str ) throws NHXFormatException {
+ try {
+ return Double.valueOf( str ).doubleValue();
+ }
+ catch ( final NumberFormatException ex ) {
+ throw new NHXFormatException( "error in NH/NHX formatted data: failed to parse number from " + "\"" + str
+ + "\"" );
+ }
+ }
+
+ private static boolean isBranchLengthsLikeBootstrapValues( final Phylogeny p ) {
+ final PhylogenyNodeIterator it = p.iteratorExternalForward();
+ final double d0 = it.next().getDistanceToParent();
+ if ( ( d0 < 10 ) || !it.hasNext() ) {
+ return false;
+ }
+ while ( it.hasNext() ) {
+ final double d = it.next().getDistanceToParent();
+ if ( ( d != d0 ) || ( d < 10 ) ) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private static void moveBranchLengthsToConfidenceValues( final Phylogeny p ) {
+ final PhylogenyNodeIterator it = p.iteratorPostorder();
+ while ( it.hasNext() ) {
+ final PhylogenyNode n = it.next();
+ PhylogenyMethods.setBootstrapConfidence( n, n.getDistanceToParent() );
+ n.setDistanceToParent( PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT );
+ }
+ }
+
private static void processMrBayes3Data( final String s, final PhylogenyNode node_to_annotate )
throws NHXFormatException {
double sd = -1;
import org.forester.io.parsers.PhylogenyParser;
import org.forester.io.parsers.nexus.NexusPhylogeniesParser;
import org.forester.io.parsers.nhx.NHXParser;
+import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION;
import org.forester.io.parsers.phyloxml.PhyloXmlParser;
import org.forester.io.parsers.tol.TolParser;
import org.forester.phylogeny.Phylogeny;
public final class ParserUtils {
- final private static Pattern TAXOMONY_CODE_PATTERN_1 = Pattern.compile( "[A-Z0-9]{5}" );
- final private static Pattern TAXOMONY_CODE_PATTERN_2 = Pattern.compile( "([A-Z0-9]{5})[^A-Z].*" );
+ final private static Pattern TAXOMONY_CODE_PATTERN_1 = Pattern.compile( "[A-Z0-9]{5}|RAT|PIG|PEA" );
+ final private static Pattern TAXOMONY_CODE_PATTERN_2 = Pattern.compile( "([A-Z0-9]{5}|RAT|PIG|PEA)[^A-Za-z].*" );
+ final private static Pattern TAXOMONY_CODE_PATTERN_PF = Pattern.compile( "([A-Z0-9]{5}|RAT|PIG|PEA)/\\d+-\\d+" );
final public static PhylogenyParser createParserDependingFileContents( final File file,
final boolean phyloxml_validate_against_xsd )
return reader;
}
- /**
- * Extracts a code if and only if:
- * one and only one _,
- * shorter than 25,
- * no |,
- * no .,
- * if / present it has to be after the _,
- * if PFAM_STYLE_ONLY: / must be present,
- * tax code can only contain uppercase letters and numbers,
- * and must contain at least one uppercase letter.
- * Return null if no code extractable.
- *
- * @param name
- * @return
- */
- public static String extractTaxonomyCodeFromNodeName( final String name,
- final NHXParser.TAXONOMY_EXTRACTION taxonomy_extraction ) {
+ public final static String extractTaxonomyCodeFromNodeName( final String name,
+ final TAXONOMY_EXTRACTION taxonomy_extraction ) {
if ( ( name.indexOf( "_" ) > 0 )
- && ( name.length() < 31 )
- // && ( name.lastIndexOf( "_" ) == name.indexOf( "_" ) )
- && ( name.indexOf( "|" ) < 0 )
- && ( name.indexOf( "." ) < 0 )
- && ( ( taxonomy_extraction != NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ) || ( name.indexOf( "/" ) >= 0 ) )
- && ( ( ( name.indexOf( "/" ) ) < 0 ) || ( name.indexOf( "/" ) > name.indexOf( "_" ) ) ) ) {
- final String[] s = name.split( "[_/]" );
+ && ( ( taxonomy_extraction != TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ) || ( name.indexOf( "/" ) > 4 ) ) ) {
+ final String[] s = name.split( "[_\\s]" );
if ( s.length > 1 ) {
final String str = s[ 1 ];
- // if ( str.length() < 6 ) {
- if ( ( str.length() < 5 )
- && ( str.startsWith( "RAT" ) || str.startsWith( "PIG" ) || str.startsWith( "CAP" ) ) ) {
- return str.substring( 0, 3 );
- }
- final Matcher m1 = TAXOMONY_CODE_PATTERN_1.matcher( str );
- if ( m1.matches() ) {
- return m1.group();
- }
- final Matcher m2 = TAXOMONY_CODE_PATTERN_2.matcher( str );
- if ( m2.matches() ) {
- return m2.group( 1 );
+ if ( !ForesterUtil.isEmpty( str ) ) {
+ if ( taxonomy_extraction == TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ) {
+ final Matcher m = TAXOMONY_CODE_PATTERN_PF.matcher( str );
+ if ( m.matches() ) {
+ return m.group( 1 );
+ }
+ }
+ else {
+ final Matcher m1 = TAXOMONY_CODE_PATTERN_1.matcher( str );
+ if ( m1.matches() ) {
+ return m1.group();
+ }
+ final Matcher m2 = TAXOMONY_CODE_PATTERN_2.matcher( str );
+ if ( m2.matches() ) {
+ return m2.group( 1 );
+ }
+ }
}
- // return null;
- // final Matcher uc_letters_and_numbers = NHXParser.UC_LETTERS_NUMBERS_PATTERN.matcher( str );
- // if ( !uc_letters_and_numbers.matches() ) {
- // return null;
- // }
- // final Matcher numbers_only = NHXParser.NUMBERS_ONLY_PATTERN.matcher( str );
- // if ( numbers_only.matches() ) {
- // return null;
- // }
- // return str;
- // }
+ }
+ }
+ else if ( taxonomy_extraction == TAXONOMY_EXTRACTION.YES ) {
+ final Matcher m1 = TAXOMONY_CODE_PATTERN_1.matcher( name );
+ if ( m1.matches() ) {
+ return name;
}
}
return null;
import org.forester.io.parsers.nexus.NexusCharactersParser;
import org.forester.io.parsers.nexus.NexusPhylogeniesParser;
import org.forester.io.parsers.nhx.NHXParser;
+import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION;
import org.forester.io.parsers.phyloxml.PhyloXmlParser;
import org.forester.io.parsers.tol.TolParser;
+import org.forester.io.parsers.util.ParserUtils;
import org.forester.io.writers.PhylogenyWriter;
import org.forester.msa.BasicMsa;
import org.forester.msa.Mafft;
}
else {
System.out.println( "failed." );
- System.exit( -1 ); //TODO FIXME remove me!! ~
failed++;
}
System.out.print( "Hmmscan output parser: " );
System.out.println( "failed." );
failed++;
}
+ System.out.print( "Taxonomy extraction: " );
+ if ( Test.testExtractTaxonomyCodeFromNodeName() ) {
+ System.out.println( "OK." );
+ succeeded++;
+ }
+ else {
+ System.out.println( "failed." );
+ failed++;
+ }
System.out.print( "Basic node construction and parsing of NHX (node level): " );
if ( Test.testNHXNodeParsing() ) {
System.out.println( "OK." );
System.out.println( "failed." );
failed++;
}
- // System.out.print( "WABI TxSearch: " );
- // if ( Test.testWabiTxSearch() ) {
- // System.out.println( "OK." );
- // succeeded++;
- // }
- // else {
- // System.out
- // .println( "failed [will not count towards failed tests since it might be due to absence internet connection]" );
- // }
System.out.println();
final Runtime rt = java.lang.Runtime.getRuntime();
final long free_memory = rt.freeMemory() / 1000000;
else {
System.out.println( "Not OK." );
}
- // System.out.println();
- // Development.setTime( true );
- //try {
- // final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
- // final String clc = System.getProperty( "user.dir" ) + ForesterUtil.getFileSeparator()
- // + "examples" + ForesterUtil.getFileSeparator() + "CLC.nhx";
- // final String multi = Test.PATH_TO_EXAMPLE_FILES +
- // "multifurcations_ex_1.nhx";
- // final String domains = Test.PATH_TO_EXAMPLE_FILES + "domains1.nhx";
- // final Phylogeny t1 = factory.create( new File( domains ), new
- // NHXParser() )[ 0 ];
- // final Phylogeny t2 = factory.create( new File( clc ), new NHXParser() )[ 0 ];
- // }
- // catch ( final Exception e ) {
- // e.printStackTrace();
- // }
- // t1.getRoot().preorderPrint();
- // final PhylogenyFactory factory = ParserBasedPhylogenyFactory
- // .getInstance();
- // try {
- //
- // Helper.readNHtree( new File( PATH_TO_EXAMPLE_FILES
- // + "\\AtNBSpos.nhx" ) );
- // factory.create(
- // new File( PATH_TO_EXAMPLE_FILES + "\\AtNBSpos.nhx" ),
- // new NHXParser() );
- // Helper.readNHtree( new File( PATH_TO_EXAMPLE_FILES
- // + "\\AtNBSpos.nhx" ) );
- // factory.create(
- // new File( PATH_TO_EXAMPLE_FILES + "\\AtNBSpos.nhx" ),
- // new NHXParser() );
- //
- //
- // Helper.readNHtree( new File( PATH_TO_EXAMPLE_FILES
- // + "\\big_tree.nhx" ) );
- // Helper.readNHtree( new File( PATH_TO_EXAMPLE_FILES
- // + "\\big_tree.nhx" ) );
- // factory.create(
- // new File( PATH_TO_EXAMPLE_FILES + "\\big_tree.nhx" ),
- // new NHXParser() );
- // factory.create(
- // new File( PATH_TO_EXAMPLE_FILES + "\\big_tree.nhx" ),
- // new NHXParser() );
- //
- // Helper.readNHtree( new File( PATH_TO_EXAMPLE_FILES
- // + "\\big_tree.nhx" ) );
- // Helper.readNHtree( new File( PATH_TO_EXAMPLE_FILES
- // + "\\big_tree.nhx" ) );
- //
- // factory.create(
- // new File( PATH_TO_EXAMPLE_FILES + "\\big_tree.nhx" ),
- // new NHXParser() );
- // factory.create(
- // new File( PATH_TO_EXAMPLE_FILES + "\\big_tree.nhx" ),
- // new NHXParser() );
- //
- // Helper.readNHtree( new File( PATH_TO_EXAMPLE_FILES
- // + "\\AtNBSpos.nhx" ) );
- // factory.create(
- // new File( PATH_TO_EXAMPLE_FILES + "\\AtNBSpos.nhx" ),
- // new NHXParser() );
- //
- // }
- // catch ( IOException e ) {
- // // TODO Auto-generated catch block
- // e.printStackTrace();
- // }
+ }
+
+ private static boolean testExtractTaxonomyCodeFromNodeName() {
+ try {
+ if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "MOUSE", TAXONOMY_EXTRACTION.YES ).equals( "MOUSE" ) ) {
+ return false;
+ }
+ if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "RAT", TAXONOMY_EXTRACTION.YES ).equals( "RAT" ) ) {
+ return false;
+ }
+ if ( ParserUtils.extractTaxonomyCodeFromNodeName( "RAT1", TAXONOMY_EXTRACTION.YES ) != null ) {
+ return false;
+ }
+ if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_MOUSE function = 23445", TAXONOMY_EXTRACTION.YES )
+ .equals( "MOUSE" ) ) {
+ return false;
+ }
+ if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_MOUSE_function = 23445", TAXONOMY_EXTRACTION.YES )
+ .equals( "MOUSE" ) ) {
+ return false;
+ }
+ if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_MOUSE|function = 23445", TAXONOMY_EXTRACTION.YES )
+ .equals( "MOUSE" ) ) {
+ return false;
+ }
+ if ( ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_MOUSEfunction = 23445", TAXONOMY_EXTRACTION.YES ) != null ) {
+ return false;
+ }
+ if ( ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_MOUSEFunction = 23445", TAXONOMY_EXTRACTION.YES ) != null ) {
+ return false;
+ }
+ if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_RAT function = 23445", TAXONOMY_EXTRACTION.YES )
+ .equals( "RAT" ) ) {
+ return false;
+ }
+ if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_RAT_function = 23445", TAXONOMY_EXTRACTION.YES )
+ .equals( "RAT" ) ) {
+ return false;
+ }
+ if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_RAT|function = 23445", TAXONOMY_EXTRACTION.YES )
+ .equals( "RAT" ) ) {
+ return false;
+ }
+ if ( ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_RATfunction = 23445", TAXONOMY_EXTRACTION.YES ) != null ) {
+ return false;
+ }
+ if ( ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_RATFunction = 23445", TAXONOMY_EXTRACTION.YES ) != null ) {
+ return false;
+ }
+ if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_RAT/1-3", TAXONOMY_EXTRACTION.YES ).equals( "RAT" ) ) {
+ return false;
+ }
+ if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_PIG/1-3", TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY )
+ .equals( "PIG" ) ) {
+ return false;
+ }
+ if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_MOUSE/1-3", TAXONOMY_EXTRACTION.YES )
+ .equals( "MOUSE" ) ) {
+ return false;
+ }
+ if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_MOUSE/1-3", TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY )
+ .equals( "MOUSE" ) ) {
+ return false;
+ }
+ }
+ catch ( final Exception e ) {
+ e.printStackTrace( System.out );
+ return false;
+ }
+ return true;
}
private static boolean testBasicNodeMethods() {
if ( !n8.getName().equals( "n8_ECOLI/12" ) ) {
return false;
}
- if ( !PhylogenyMethods.getSpecies( n8 ).equals( "ECOLI" ) ) {
+ if ( PhylogenyMethods.getSpecies( n8 ).equals( "ECOLI" ) ) {
return false;
}
final PhylogenyNode n9 = PhylogenyNode
if ( !n9.getName().equals( "n9_ECOLI/12=12" ) ) {
return false;
}
- if ( !PhylogenyMethods.getSpecies( n9 ).equals( "ECOLI" ) ) {
+ if ( PhylogenyMethods.getSpecies( n9 ).equals( "ECOLI" ) ) {
return false;
}
final PhylogenyNode n10 = PhylogenyNode
if ( !b.getName().equals( "n10_ECOLI1/1-2" ) ) {
return false;
}
- if ( !PhylogenyMethods.getSpecies( b ).equals( "ECOLI" ) ) {
+ if ( PhylogenyMethods.getSpecies( b ).equals( "ECOLI" ) ) {
return false;
}
final PhylogenyNode c = PhylogenyNode
if ( !c.getName().equals( "n10_RATAF12/1000-2000" ) ) {
return false;
}
- if ( !PhylogenyMethods.getSpecies( c ).equals( "RATAF" ) ) {
+ if ( PhylogenyMethods.getSpecies( c ).equals( "RATAF" ) ) {
return false;
}
final PhylogenyNode c1 = PhylogenyNode
if ( !c1.getName().equals( "n10_BOVIN_1/1000-2000" ) ) {
return false;
}
- if ( !PhylogenyMethods.getSpecies( c1 ).equals( "BOVIN" ) ) {
+ if ( PhylogenyMethods.getSpecies( c1 ).equals( "BOVIN" ) ) {
return false;
}
final PhylogenyNode c2 = PhylogenyNode
if ( !d.getName().equals( "n10_RAT1/1-2" ) ) {
return false;
}
- if ( !PhylogenyMethods.getSpecies( d ).equals( "RAT" ) ) {
+ if ( PhylogenyMethods.getSpecies( d ).equals( "RAT" ) ) {
return false;
}
final PhylogenyNode e = PhylogenyNode
if ( n11.getDistanceToParent() != 0.4 ) {
return false;
}
- if ( !PhylogenyMethods.getSpecies( n11 ).equals( "ECOLI" ) ) {
+ if ( PhylogenyMethods.getSpecies( n11 ).equals( "ECOLI" ) ) {
return false;
}
final PhylogenyNode n12 = PhylogenyNode
if ( !m.getName().equals( "n10_MOUSEa" ) ) {
return false;
}
- if ( !PhylogenyMethods.getSpecies( m ).equals( "MOUSE" ) ) {
+ if ( PhylogenyMethods.getSpecies( m ).equals( "MOUSE" ) ) {
return false;
}
final PhylogenyNode o = PhylogenyNode.createInstanceFromNhxString( "n10_MOUSE_",