From: cmzmasek@gmail.com Date: Fri, 25 Jan 2013 23:36:16 +0000 (+0000) Subject: added AGRESSIVE tax extraction ^^ X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=3d6a64e0371ffedf725bf8467211ed860f298550;p=jalview.git added AGRESSIVE tax extraction ^^ --- diff --git a/forester/java/src/org/forester/application/phyloxml_converter.java b/forester/java/src/org/forester/application/phyloxml_converter.java index f82779c..164bfee 100644 --- a/forester/java/src/org/forester/application/phyloxml_converter.java +++ b/forester/java/src/org/forester/application/phyloxml_converter.java @@ -209,11 +209,13 @@ public class phyloxml_converter { && ( field != PhylogenyMethods.PhylogenyNodeField.TAXONOMY_COMMON_NAME ) && ( field != PhylogenyMethods.PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME ) ) { if ( extr_taxonomy_pf_only ) { - ( ( NHXParser ) parser ).setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + ( ( NHXParser ) parser ) + .setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); replace_underscores = false; } else if ( extr_taxonomy ) { - ( ( NHXParser ) parser ).setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.YES ); + ( ( NHXParser ) parser ) + .setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); replace_underscores = false; } } diff --git a/forester/java/src/org/forester/application/rio.java b/forester/java/src/org/forester/application/rio.java index 6ef7350..3ccacf5 100644 --- a/forester/java/src/org/forester/application/rio.java +++ b/forester/java/src/org/forester/application/rio.java @@ -308,13 +308,13 @@ public class rio { final NHXParser nhx = ( NHXParser ) p; nhx.setReplaceUnderscores( false ); nhx.setIgnoreQuotes( true ); - nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.YES ); + nhx.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGRESSIVE ); } else if ( p instanceof NexusPhylogeniesParser ) { final NexusPhylogeniesParser nex = ( NexusPhylogeniesParser ) p; nex.setReplaceUnderscores( false ); nex.setIgnoreQuotes( true ); - nex.setTaxonomyExtraction( TAXONOMY_EXTRACTION.YES ); + nex.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGRESSIVE ); } else { throw new RuntimeException( "unknown parser type: " + p ); diff --git a/forester/java/src/org/forester/application/support_transfer.java b/forester/java/src/org/forester/application/support_transfer.java index 59090c2..927da4f 100644 --- a/forester/java/src/org/forester/application/support_transfer.java +++ b/forester/java/src/org/forester/application/support_transfer.java @@ -95,7 +95,7 @@ public final class support_transfer { final PhylogenyParser pp_bl = ParserUtils.createParserDependingOnFileType( infile_bl, true ); final PhylogenyParser pp_s = ParserUtils.createParserDependingOnFileType( infile_support_vals, true ); if ( pp_bl instanceof NHXParser ) { - ( ( NHXParser ) pp_bl ).setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.YES ); + ( ( NHXParser ) pp_bl ).setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); } phylogeny_w_bl = factory.create( infile_bl, pp_bl )[ index_of_tree_w_bl ]; phylogeny_w_support_vals = factory.create( infile_support_vals, pp_s )[ 0 ]; diff --git a/forester/java/src/org/forester/archaeopteryx/AptxUtil.java b/forester/java/src/org/forester/archaeopteryx/AptxUtil.java index f84d8c4..0dd82d7 100644 --- a/forester/java/src/org/forester/archaeopteryx/AptxUtil.java +++ b/forester/java/src/org/forester/archaeopteryx/AptxUtil.java @@ -43,6 +43,7 @@ import java.net.URL; import java.text.ParseException; import java.util.Arrays; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Locale; @@ -474,7 +475,7 @@ public final class AptxUtil { for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) { final PhylogenyNode n = it.next(); if ( !n.isExternal() && !n.isCollapse() && ( n.getNumberOfDescendants() > 1 ) ) { - final Set taxs = PhylogenyMethods.obtainDistinctTaxonomies( n ); + final Set taxs = obtainDistinctTaxonomies( n ); if ( ( taxs != null ) && ( taxs.size() == 1 ) ) { AptxUtil.collapseSubtree( n, true ); if ( !n.getNodeData().isHasTaxonomy() ) { @@ -493,6 +494,40 @@ public final class AptxUtil { } } + /** + * Returns the set of distinct taxonomies of + * all external nodes of node. + * If at least one the external nodes has no taxonomy, + * null is returned. + * + */ + public static Set obtainDistinctTaxonomies( final PhylogenyNode node ) { + final List descs = node.getAllExternalDescendants(); + final Set tax_set = new HashSet(); + for( final PhylogenyNode n : descs ) { + if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) { + return null; + } + tax_set.add( n.getNodeData().getTaxonomy() ); + } + return tax_set; + } + + public static Set obtainAllDistinctTaxonomies( final PhylogenyNode node ) { + final List descs = node.getAllExternalDescendants(); + final Set tax_set = new HashSet(); + for( final PhylogenyNode n : descs ) { + if ( n.getNodeData().isHasTaxonomy() && !n.getNodeData().getTaxonomy().isEmpty() ) { + tax_set.add( n.getNodeData().getTaxonomy() ); + System.out.println( n.getNodeData().getTaxonomy() ); + } + } + for( final Taxonomy taxonomy : tax_set ) { + System.out.println( taxonomy ); + } + return tax_set; + } + final static void collapseSubtree( final PhylogenyNode node, final boolean collapse ) { node.setCollapse( collapse ); if ( node.isExternal() ) { @@ -636,7 +671,22 @@ public final class AptxUtil { } if ( phy.getIdentifier() != null ) { desc.append( "Id: " ); - desc.append( phy.getIdentifier() ); + desc.append( phy.getIdentifier().toString() ); + desc.append( "\n" ); + } + if ( !ForesterUtil.isEmpty( phy.getDescription() ) ) { + desc.append( "Description: " ); + desc.append( phy.getDescription() ); + desc.append( "\n" ); + } + if ( !ForesterUtil.isEmpty( phy.getDistanceUnit() ) ) { + desc.append( "Distance Unit: " ); + desc.append( phy.getDistanceUnit() ); + desc.append( "\n" ); + } + if ( !ForesterUtil.isEmpty( phy.getType() ) ) { + desc.append( "Type: " ); + desc.append( phy.getType() ); desc.append( "\n" ); } desc.append( "Rooted: " ); @@ -663,7 +713,7 @@ public final class AptxUtil { desc.append( "Maximum distance to root: " ); desc.append( ForesterUtil.round( PhylogenyMethods.calculateMaxDistanceToRoot( phy ), 6 ) ); desc.append( "\n" ); - final Set taxs = PhylogenyMethods.obtainDistinctTaxonomies( phy.getRoot() ); + final Set taxs = obtainAllDistinctTaxonomies( phy.getRoot() ); if ( taxs != null ) { desc.append( "Distinct external taxonomies: " ); desc.append( taxs.size() ); diff --git a/forester/java/src/org/forester/archaeopteryx/Configuration.java b/forester/java/src/org/forester/archaeopteryx/Configuration.java index c563795..855ded5 100644 --- a/forester/java/src/org/forester/archaeopteryx/Configuration.java +++ b/forester/java/src/org/forester/archaeopteryx/Configuration.java @@ -86,7 +86,7 @@ public final class Configuration { private short _number_of_digits_after_comma_for_branch_length_values = Constants.NUMBER_OF_DIGITS_AFTER_COMMA_FOR_BRANCH_LENGTH_VALUES_DEFAULT; private boolean _editable = true; private boolean _nh_parsing_replace_underscores = false; - private TAXONOMY_EXTRACTION _taxonomy_extraction = TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY; + private TAXONOMY_EXTRACTION _taxonomy_extraction = TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT; private boolean _internal_number_are_confidence_for_nh_parsing = false; private boolean _display_sequence_relations = false; private boolean _validate_against_phyloxml_xsd_schema = Constants.VALIDATE_AGAINST_PHYLOXML_XSD_SCJEMA_DEFAULT; @@ -1342,10 +1342,10 @@ public final class Configuration { setTaxonomyExtraction( TAXONOMY_EXTRACTION.NO ); } else if ( s.equalsIgnoreCase( "yes" ) ) { - setTaxonomyExtraction( TAXONOMY_EXTRACTION.YES ); + setTaxonomyExtraction( TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); } else if ( s.equalsIgnoreCase( "pfam" ) ) { - setTaxonomyExtraction( TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + setTaxonomyExtraction( TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); } else { ForesterUtil.printWarningMessage( Constants.PRG_NAME, diff --git a/forester/java/src/org/forester/archaeopteryx/MainFrame.java b/forester/java/src/org/forester/archaeopteryx/MainFrame.java index 0cc981e..37f84e4 100644 --- a/forester/java/src/org/forester/archaeopteryx/MainFrame.java +++ b/forester/java/src/org/forester/archaeopteryx/MainFrame.java @@ -1121,10 +1121,10 @@ public abstract class MainFrame extends JFrame implements ActionListener { options.setInternalNumberAreConfidenceForNhParsing( ( _internal_number_are_confidence_for_nh_parsing_cbmi != null ) && _internal_number_are_confidence_for_nh_parsing_cbmi.isSelected() ); if ( ( _extract_taxonomy_yes_rbmi != null ) && _extract_taxonomy_yes_rbmi.isSelected() ) { - options.setTaxonomyExtractio( TAXONOMY_EXTRACTION.YES ); + options.setTaxonomyExtractio( TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); } else if ( ( _extract_taxonomy_pfam_rbmi != null ) && _extract_taxonomy_pfam_rbmi.isSelected() ) { - options.setTaxonomyExtractio( TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + options.setTaxonomyExtractio( TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); } else if ( ( _extract_taxonomy_no_rbmi != null ) && _extract_taxonomy_no_rbmi.isSelected() ) { options.setTaxonomyExtractio( TAXONOMY_EXTRACTION.NO ); diff --git a/forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java b/forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java index e2f4737..8a6730a 100644 --- a/forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java +++ b/forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java @@ -1040,9 +1040,9 @@ public final class MainFrameApplication extends MainFrame { customizeRadioButtonMenuItem( _extract_taxonomy_no_rbmi, getOptions().getTaxonomyExtraction() == TAXONOMY_EXTRACTION.NO ); customizeRadioButtonMenuItem( _extract_taxonomy_yes_rbmi, - getOptions().getTaxonomyExtraction() == TAXONOMY_EXTRACTION.YES ); + getOptions().getTaxonomyExtraction() == TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); customizeRadioButtonMenuItem( _extract_taxonomy_pfam_rbmi, - getOptions().getTaxonomyExtraction() == TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + getOptions().getTaxonomyExtraction() == TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); customizeCheckBoxMenuItem( _replace_underscores_cbmi, getOptions().isReplaceUnderscoresInNhParsing() ); customizeCheckBoxMenuItem( _search_whole_words_only_cbmi, getOptions().isMatchWholeTermsOnly() ); customizeCheckBoxMenuItem( _inverse_search_result_cbmi, getOptions().isInverseSearchResult() ); @@ -2026,7 +2026,8 @@ public final class MainFrameApplication extends MainFrame { final PhylogenyNode n = it.next(); final String name = n.getName().trim(); if ( !ForesterUtil.isEmpty( name ) ) { - final String nt = ParserUtils.extractTaxonomyDataFromNodeName( n, TAXONOMY_EXTRACTION.YES ); + final String nt = ParserUtils + .extractTaxonomyDataFromNodeName( n, TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !ForesterUtil.isEmpty( nt ) ) { if ( counter < 15 ) { sb.append( name + ": " + nt + "\n" ); @@ -2576,7 +2577,12 @@ public final class MainFrameApplication extends MainFrame { final int count = getMainPanel().getTabbedPane().getTabCount(); final List trees = new ArrayList(); for( int i = 0; i < count; ++i ) { - trees.add( getMainPanel().getPhylogeny( i ) ); + final Phylogeny phy = getMainPanel().getPhylogeny( i ); + if ( ForesterUtil.isEmpty( phy.getName() ) + && !ForesterUtil.isEmpty( getMainPanel().getTabbedPane().getTitleAt( i ) ) ) { + phy.setName( getMainPanel().getTabbedPane().getTitleAt( i ) ); + } + trees.add( phy ); getMainPanel().getTreePanels().get( i ).setEdited( false ); } final PhylogenyWriter writer = new PhylogenyWriter(); diff --git a/forester/java/src/org/forester/archaeopteryx/Options.java b/forester/java/src/org/forester/archaeopteryx/Options.java index 9857b51..a0c70f3 100644 --- a/forester/java/src/org/forester/archaeopteryx/Options.java +++ b/forester/java/src/org/forester/archaeopteryx/Options.java @@ -458,7 +458,7 @@ final public class Options { _number_of_digits_after_comma_for_branch_length_values = Constants.NUMBER_OF_DIGITS_AFTER_COMMA_FOR_BRANCH_LENGTH_VALUES_DEFAULT; _number_of_digits_after_comma_for_confidence_values = Constants.NUMBER_OF_DIGITS_AFTER_COMMA_FOR_CONFIDENCE_VALUES_DEFAULT; _nh_parsing_replace_underscores = false; - _taxonomy_extraction = TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY; + _taxonomy_extraction = TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT; _cladogram_type = Constants.CLADOGRAM_TYPE_DEFAULT; _show_domain_labels = true; setAbbreviateScientificTaxonNames( false ); diff --git a/forester/java/src/org/forester/archaeopteryx/UrlTreeReader.java b/forester/java/src/org/forester/archaeopteryx/UrlTreeReader.java index ea4bca0..7d88a31 100644 --- a/forester/java/src/org/forester/archaeopteryx/UrlTreeReader.java +++ b/forester/java/src/org/forester/archaeopteryx/UrlTreeReader.java @@ -109,13 +109,15 @@ public class UrlTreeReader implements Runnable { break; case NH_EXTRACT_TAXONOMY: parser = new NHXParser(); - ( ( NHXParser ) parser ).setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + ( ( NHXParser ) parser ) + .setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); ( ( NHXParser ) parser ).setReplaceUnderscores( false ); ( ( NHXParser ) parser ).setGuessRootedness( true ); break; case PFAM: parser = new NHXParser(); - ( ( NHXParser ) parser ).setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + ( ( NHXParser ) parser ) + .setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); ( ( NHXParser ) parser ).setReplaceUnderscores( false ); ( ( NHXParser ) parser ).setGuessRootedness( true ); break; diff --git a/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java b/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java index 89a7d8f..397c14a 100644 --- a/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java +++ b/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java @@ -712,6 +712,6 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse } public static enum TAXONOMY_EXTRACTION { - NO, YES, PFAM_STYLE_ONLY; + NO, AGRESSIVE, PFAM_STYLE_RELAXED, PFAM_STYLE_STRICT; } } diff --git a/forester/java/src/org/forester/io/parsers/util/ParserUtils.java b/forester/java/src/org/forester/io/parsers/util/ParserUtils.java index 51f7f89..4ba38fa 100644 --- a/forester/java/src/org/forester/io/parsers/util/ParserUtils.java +++ b/forester/java/src/org/forester/io/parsers/util/ParserUtils.java @@ -57,11 +57,12 @@ public final class ParserUtils { final public static Pattern TAXOMONY_SN_PATTERN = Pattern .compile( "[^_]{2,}_([A-Z][a-z]+_[a-z]{2,}(_[A-Za-z]\\w+|))\\b" ); - final public static Pattern TAXOMONY_CODE_PATTERN_1 = Pattern.compile( "\\b[A-Z0-9]{5}|RAT|PIG|PEA|CAP\\b" ); + final public static Pattern TAXOMONY_CODE_PATTERN_1 = Pattern + .compile( "\\b[A-Z9][A-Z]{2}[A-Z0-9]{2}|RAT|PIG|PEA|CAP\\b" ); final private static Pattern TAXOMONY_CODE_PATTERN_2 = Pattern - .compile( "([A-Z0-9]{5}|RAT|PIG|PEA|CAP)[^0-9A-Za-z].*" ); + .compile( "([A-Z9][A-Z]{2}[A-Z0-9]{2}|RAT|PIG|PEA|CAP)[^0-9A-Za-z].*" ); final private static Pattern TAXOMONY_CODE_PATTERN_PF = Pattern - .compile( "([A-Z0-9]{5}|RAT|PIG|PEA|CAP)/\\d+-\\d+" ); + .compile( "([A-Z9][A-Z]{2}[A-Z0-9]{2}|RAT|PIG|PEA|CAP)/\\d+-\\d+" ); final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_1 = Pattern.compile( "\\b\\d{1,7}\\b" ); final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_2 = Pattern.compile( "(\\d{1,7})[^0-9A-Za-z].*" ); final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_PF = Pattern.compile( "(\\d{1,7})/\\d+-\\d+" ); @@ -225,12 +226,12 @@ public final class ParserUtils { public final static String extractTaxonomyCodeFromNodeName( final String name, final TAXONOMY_EXTRACTION taxonomy_extraction ) { if ( ( name.indexOf( "_" ) > 0 ) - && ( ( taxonomy_extraction != TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ) || ( name.indexOf( "/" ) > 4 ) ) ) { + && ( ( taxonomy_extraction != TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ) || ( name.indexOf( "/" ) > 4 ) ) ) { final String[] s = name.split( "[_\\s]" ); if ( s.length > 1 ) { final String str = s[ 1 ]; if ( !ForesterUtil.isEmpty( str ) ) { - if ( taxonomy_extraction == TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ) { + if ( taxonomy_extraction == TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ) { final Matcher m = TAXOMONY_CODE_PATTERN_PF.matcher( str ); if ( m.matches() ) { return m.group( 1 ); @@ -249,7 +250,7 @@ public final class ParserUtils { } } } - else if ( taxonomy_extraction == TAXONOMY_EXTRACTION.YES ) { + else if ( taxonomy_extraction == TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) { final Matcher m1 = TAXOMONY_CODE_PATTERN_1.matcher( name ); if ( m1.matches() ) { return name; @@ -291,7 +292,8 @@ public final class ParserUtils { return code; } } - else if ( taxonomy_extraction == TAXONOMY_EXTRACTION.YES ) { + else if ( ( taxonomy_extraction == TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) + || ( taxonomy_extraction == TAXONOMY_EXTRACTION.AGRESSIVE ) ) { final String sn = extractScientificNameFromNodeName( node.getName() ); if ( !ForesterUtil.isEmpty( sn ) ) { if ( !node.getNodeData().isHasTaxonomy() ) { @@ -310,12 +312,12 @@ public final class ParserUtils { public final static String extractUniprotTaxonomyIdFromNodeName( final String name, final TAXONOMY_EXTRACTION taxonomy_extraction ) { if ( ( name.indexOf( "_" ) > 0 ) - && ( ( taxonomy_extraction != TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ) || ( name.indexOf( "/" ) > 4 ) ) ) { + && ( ( taxonomy_extraction != TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ) || ( name.indexOf( "/" ) > 4 ) ) ) { final String[] s = name.split( "[_\\s]" ); if ( s.length > 1 ) { final String str = s[ 1 ]; if ( !ForesterUtil.isEmpty( str ) ) { - if ( taxonomy_extraction == TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ) { + if ( taxonomy_extraction == TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ) { final Matcher m = TAXOMONY_UNIPROT_ID_PATTERN_PF.matcher( str ); if ( m.matches() ) { return m.group( 1 ); @@ -334,7 +336,7 @@ public final class ParserUtils { } } } - else if ( taxonomy_extraction == TAXONOMY_EXTRACTION.YES ) { + if ( taxonomy_extraction == TAXONOMY_EXTRACTION.AGRESSIVE ) { final Matcher m1 = TAXOMONY_UNIPROT_ID_PATTERN_1.matcher( name ); if ( m1.matches() ) { return name; diff --git a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java index a3efa67..27f2a33 100644 --- a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java +++ b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java @@ -768,25 +768,6 @@ public class PhylogenyMethods { } /** - * Returns the set of distinct taxonomies of - * all external nodes of node. - * If at least one the external nodes has no taxonomy, - * null is returned. - * - */ - public static Set obtainDistinctTaxonomies( final PhylogenyNode node ) { - final List descs = node.getAllExternalDescendants(); - final Set tax_set = new HashSet(); - for( final PhylogenyNode n : descs ) { - if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) { - return null; - } - tax_set.add( n.getNodeData().getTaxonomy() ); - } - return tax_set; - } - - /** * Returns a map of distinct taxonomies of * all external nodes of node. * If at least one of the external nodes has no taxonomy, diff --git a/forester/java/src/org/forester/rio/RIO.java b/forester/java/src/org/forester/rio/RIO.java index d360a1f..cde7804 100644 --- a/forester/java/src/org/forester/rio/RIO.java +++ b/forester/java/src/org/forester/rio/RIO.java @@ -826,13 +826,13 @@ public final class RIO { final NHXParser nhx = ( NHXParser ) p; nhx.setReplaceUnderscores( false ); nhx.setIgnoreQuotes( true ); - nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.YES ); + nhx.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGRESSIVE ); } else if ( p instanceof NexusPhylogeniesParser ) { final NexusPhylogeniesParser nex = ( NexusPhylogeniesParser ) p; nex.setReplaceUnderscores( false ); nex.setIgnoreQuotes( true ); - nex.setTaxonomyExtraction( TAXONOMY_EXTRACTION.YES ); + nex.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGRESSIVE ); } return factory.create( gene_trees_file, p ); } diff --git a/forester/java/src/org/forester/rio/TestRIO.java b/forester/java/src/org/forester/rio/TestRIO.java index a0604d6..8160446 100644 --- a/forester/java/src/org/forester/rio/TestRIO.java +++ b/forester/java/src/org/forester/rio/TestRIO.java @@ -48,7 +48,7 @@ public final class TestRIO { final NHXParser nhx = new NHXParser(); nhx.setReplaceUnderscores( false ); nhx.setIgnoreQuotes( true ); - nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.YES ); + nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); final String gene_trees_1_str = "(((((MOUSE,RAT),HUMAN),CAEEL),YEAST),ARATH);" + "((((MOUSE,RAT),HUMAN),(ARATH,YEAST)),CAEEL);" + "((MOUSE,RAT),(((ARATH,YEAST),CAEEL),HUMAN));" + "(((((MOUSE,HUMAN),RAT),CAEEL),YEAST),ARATH);" + "((((HUMAN,MOUSE),RAT),(ARATH,YEAST)),CAEEL);"; @@ -134,7 +134,7 @@ public final class TestRIO { final NHXParser nhx = new NHXParser(); nhx.setReplaceUnderscores( false ); nhx.setIgnoreQuotes( true ); - nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.YES ); + nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); // final String gene_trees_00_str = "(MOUSE,RAT);(MOUSE,RAT);(MOUSE,RAT);(RAT,MOUSE);"; final Phylogeny[] gene_trees_00 = factory.create( gene_trees_00_str, nhx ); diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index e2e93c7..f04b445 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -817,63 +817,74 @@ public final class Test { private static boolean testExtractTaxonomyCodeFromNodeName() { try { - if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "MOUSE", TAXONOMY_EXTRACTION.YES ).equals( "MOUSE" ) ) { + if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "MOUSE", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) + .equals( "MOUSE" ) ) { return false; } - if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "RAT", TAXONOMY_EXTRACTION.YES ).equals( "RAT" ) ) { + if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "RAT", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) + .equals( "RAT" ) ) { return false; } - if ( ParserUtils.extractTaxonomyCodeFromNodeName( "RAT1", TAXONOMY_EXTRACTION.YES ) != null ) { + if ( ParserUtils.extractTaxonomyCodeFromNodeName( "RAT1", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) != null ) { return false; } - if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_MOUSE function = 23445", TAXONOMY_EXTRACTION.YES ) + if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_MOUSE function = 23445", + TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) .equals( "MOUSE" ) ) { return false; } - if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_MOUSE_function = 23445", TAXONOMY_EXTRACTION.YES ) + if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_MOUSE_function = 23445", + TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) .equals( "MOUSE" ) ) { return false; } - if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_MOUSE|function = 23445", TAXONOMY_EXTRACTION.YES ) + if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_MOUSE|function = 23445", + TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) .equals( "MOUSE" ) ) { return false; } - if ( ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_MOUSEfunction = 23445", TAXONOMY_EXTRACTION.YES ) != null ) { + if ( ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_MOUSEfunction = 23445", + TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) != null ) { return false; } - if ( ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_MOUSEFunction = 23445", TAXONOMY_EXTRACTION.YES ) != null ) { + if ( ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_MOUSEFunction = 23445", + TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) != null ) { return false; } - if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_RAT function = 23445", TAXONOMY_EXTRACTION.YES ) - .equals( "RAT" ) ) { + if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_RAT function = 23445", + TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ).equals( "RAT" ) ) { return false; } - if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_RAT_function = 23445", TAXONOMY_EXTRACTION.YES ) - .equals( "RAT" ) ) { + if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_RAT_function = 23445", + TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ).equals( "RAT" ) ) { return false; } - if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_RAT|function = 23445", TAXONOMY_EXTRACTION.YES ) - .equals( "RAT" ) ) { + if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_RAT|function = 23445", + TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ).equals( "RAT" ) ) { return false; } - if ( ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_RATfunction = 23445", TAXONOMY_EXTRACTION.YES ) != null ) { + if ( ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_RATfunction = 23445", + TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) != null ) { return false; } - if ( ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_RATFunction = 23445", TAXONOMY_EXTRACTION.YES ) != null ) { + if ( ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_RATFunction = 23445", + TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) != null ) { return false; } - if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_RAT/1-3", TAXONOMY_EXTRACTION.YES ).equals( "RAT" ) ) { + if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_RAT/1-3", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) + .equals( "RAT" ) ) { return false; } - if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_PIG/1-3", TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ) + if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_PIG/1-3", TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ) .equals( "PIG" ) ) { return false; } - if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_MOUSE/1-3", TAXONOMY_EXTRACTION.YES ) + if ( !ParserUtils + .extractTaxonomyCodeFromNodeName( "BCL2_MOUSE/1-3", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) .equals( "MOUSE" ) ) { return false; } - if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_MOUSE/1-3", TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ) + if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_MOUSE/1-3", TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ) .equals( "MOUSE" ) ) { return false; } @@ -892,11 +903,11 @@ public final class Test { } final PhylogenyNode n1 = new PhylogenyNode(); final PhylogenyNode n2 = PhylogenyNode - .createInstanceFromNhxString( "", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .createInstanceFromNhxString( "", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); final PhylogenyNode n3 = PhylogenyNode - .createInstanceFromNhxString( "n3", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .createInstanceFromNhxString( "n3", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); final PhylogenyNode n4 = PhylogenyNode - .createInstanceFromNhxString( "n4:0.01", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .createInstanceFromNhxString( "n4:0.01", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( n1.isHasAssignedEvent() ) { return false; } @@ -5912,109 +5923,132 @@ public final class Test { private static boolean testTaxonomyExtraction() { try { - final PhylogenyNode n0 = PhylogenyNode.createInstanceFromNhxString( "sd_12345678", - NHXParser.TAXONOMY_EXTRACTION.YES ); + final PhylogenyNode n0 = PhylogenyNode + .createInstanceFromNhxString( "sd_12345678", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n0.getNodeData().isHasTaxonomy() ) { return false; } - final PhylogenyNode n1 = PhylogenyNode.createInstanceFromNhxString( "sd_12345x", - NHXParser.TAXONOMY_EXTRACTION.YES ); + final PhylogenyNode n1 = PhylogenyNode + .createInstanceFromNhxString( "sd_12345x", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n1.getNodeData().isHasTaxonomy() ) { System.out.println( n1.toString() ); return false; } - final PhylogenyNode n2 = PhylogenyNode.createInstanceFromNhxString( "12345", - NHXParser.TAXONOMY_EXTRACTION.YES ); + final PhylogenyNode n2 = PhylogenyNode + .createInstanceFromNhxString( "12345", NHXParser.TAXONOMY_EXTRACTION.AGRESSIVE ); if ( !n2.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) { System.out.println( n2.toString() ); return false; } - final PhylogenyNode n3 = PhylogenyNode.createInstanceFromNhxString( "blag_12345", - NHXParser.TAXONOMY_EXTRACTION.YES ); + final PhylogenyNode n2x = PhylogenyNode + .createInstanceFromNhxString( "12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + if ( n2x.getNodeData().isHasTaxonomy() ) { + return false; + } + final PhylogenyNode n3 = PhylogenyNode + .createInstanceFromNhxString( "blag_12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n3.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) { System.out.println( n3.toString() ); return false; } - final PhylogenyNode n4 = PhylogenyNode.createInstanceFromNhxString( "blag-12345", - NHXParser.TAXONOMY_EXTRACTION.YES ); + final PhylogenyNode n4 = PhylogenyNode + .createInstanceFromNhxString( "blag-12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n4.getNodeData().isHasTaxonomy() ) { System.out.println( n4.toString() ); return false; } - final PhylogenyNode n5 = PhylogenyNode.createInstanceFromNhxString( "12345-blag", - NHXParser.TAXONOMY_EXTRACTION.YES ); + final PhylogenyNode n5 = PhylogenyNode + .createInstanceFromNhxString( "12345-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n5.getNodeData().isHasTaxonomy() ) { System.out.println( n5.toString() ); return false; } - final PhylogenyNode n6 = PhylogenyNode.createInstanceFromNhxString( "blag-12345-blag", - NHXParser.TAXONOMY_EXTRACTION.YES ); + final PhylogenyNode n6 = PhylogenyNode + .createInstanceFromNhxString( "blag-12345-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n6.getNodeData().isHasTaxonomy() ) { System.out.println( n6.toString() ); return false; } - final PhylogenyNode n7 = PhylogenyNode.createInstanceFromNhxString( "blag-12345_blag", - NHXParser.TAXONOMY_EXTRACTION.YES ); + final PhylogenyNode n7 = PhylogenyNode + .createInstanceFromNhxString( "blag-12345_blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n7.getNodeData().isHasTaxonomy() ) { System.out.println( n7.toString() ); return false; } - final PhylogenyNode n8 = PhylogenyNode.createInstanceFromNhxString( "blag_12345-blag", - NHXParser.TAXONOMY_EXTRACTION.YES ); + final PhylogenyNode n8 = PhylogenyNode + .createInstanceFromNhxString( "blag_12345-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n8.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) { System.out.println( n8.toString() ); return false; } - final PhylogenyNode n9 = PhylogenyNode.createInstanceFromNhxString( "blag_12345_blag", - NHXParser.TAXONOMY_EXTRACTION.YES ); + final PhylogenyNode n9 = PhylogenyNode + .createInstanceFromNhxString( "blag_12345_blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n9.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) { System.out.println( n9.toString() ); return false; } - final PhylogenyNode n10 = PhylogenyNode.createInstanceFromNhxString( "blag_12X45-blag", - NHXParser.TAXONOMY_EXTRACTION.YES ); - if ( !n10.getNodeData().getTaxonomy().getTaxonomyCode().equals( "12X45" ) ) { + final PhylogenyNode n10x = PhylogenyNode + .createInstanceFromNhxString( "blag_12X45-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + if ( n10x.getNodeData().isHasTaxonomy() ) { + System.out.println( n10x.toString() ); + return false; + } + final PhylogenyNode n10xx = PhylogenyNode + .createInstanceFromNhxString( "blag_1YX45-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + if ( n10xx.getNodeData().isHasTaxonomy() ) { + System.out.println( n10xx.toString() ); + return false; + } + final PhylogenyNode n10 = PhylogenyNode + .createInstanceFromNhxString( "blag_9YX45-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + if ( !n10.getNodeData().getTaxonomy().getTaxonomyCode().equals( "9YX45" ) ) { System.out.println( n10.toString() ); return false; } - final PhylogenyNode n11 = PhylogenyNode.createInstanceFromNhxString( "blag_Mus_musculus", - NHXParser.TAXONOMY_EXTRACTION.YES ); + final PhylogenyNode n11 = PhylogenyNode + .createInstanceFromNhxString( "blag_Mus_musculus", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n11.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus" ) ) { System.out.println( n11.toString() ); return false; } - final PhylogenyNode n12 = PhylogenyNode.createInstanceFromNhxString( "blag_Mus_musculus_musculus", - NHXParser.TAXONOMY_EXTRACTION.YES ); + final PhylogenyNode n12 = PhylogenyNode + .createInstanceFromNhxString( "blag_Mus_musculus_musculus", + NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n12.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus musculus" ) ) { System.out.println( n12.toString() ); return false; } - final PhylogenyNode n13 = PhylogenyNode.createInstanceFromNhxString( "blag_Mus_musculus1", - NHXParser.TAXONOMY_EXTRACTION.YES ); + final PhylogenyNode n13 = PhylogenyNode + .createInstanceFromNhxString( "blag_Mus_musculus1", + NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n13.getNodeData().isHasTaxonomy() ) { System.out.println( n13.toString() ); return false; } - final PhylogenyNode n14 = PhylogenyNode.createInstanceFromNhxString( "blag_Mus_musculus_11", - NHXParser.TAXONOMY_EXTRACTION.YES ); + final PhylogenyNode n14 = PhylogenyNode + .createInstanceFromNhxString( "blag_Mus_musculus_11", + NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n14.getNodeData().isHasTaxonomy() ) { System.out.println( n14.toString() ); return false; } - final PhylogenyNode n15 = PhylogenyNode.createInstanceFromNhxString( "blag_Mus_musculus_v11", - NHXParser.TAXONOMY_EXTRACTION.YES ); + final PhylogenyNode n15 = PhylogenyNode + .createInstanceFromNhxString( "blag_Mus_musculus_v11", + NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n15.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus v11" ) ) { System.out.println( n15.toString() ); return false; } - final PhylogenyNode n16 = PhylogenyNode.createInstanceFromNhxString( "blag_Mus_musculus_/11", - NHXParser.TAXONOMY_EXTRACTION.YES ); + final PhylogenyNode n16 = PhylogenyNode + .createInstanceFromNhxString( "blag_Mus_musculus_/11", + NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n16.getNodeData().isHasTaxonomy() ) { System.out.println( n16.toString() ); return false; } - final PhylogenyNode n17 = PhylogenyNode.createInstanceFromNhxString( "blag_Mus_musculus_v", - NHXParser.TAXONOMY_EXTRACTION.YES ); + final PhylogenyNode n17 = PhylogenyNode + .createInstanceFromNhxString( "blag_Mus_musculus_v", + NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n17.getNodeData().isHasTaxonomy() ) { System.out.println( n17.toString() ); return false; @@ -6075,7 +6109,7 @@ public final class Test { return false; } final PhylogenyNode n8 = PhylogenyNode - .createInstanceFromNhxString( "n8_ECOLI/12:0.01", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .createInstanceFromNhxString( "n8_ECOLI/12:0.01", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !n8.getName().equals( "n8_ECOLI/12" ) ) { return false; } @@ -6083,7 +6117,8 @@ public final class Test { return false; } final PhylogenyNode n9 = PhylogenyNode - .createInstanceFromNhxString( "n9_ECOLI/12=12:0.01", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .createInstanceFromNhxString( "n9_ECOLI/12=12:0.01", + NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !n9.getName().equals( "n9_ECOLI/12=12" ) ) { return false; } @@ -6091,20 +6126,20 @@ public final class Test { return false; } final PhylogenyNode n10 = PhylogenyNode - .createInstanceFromNhxString( "n10.ECOLI", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .createInstanceFromNhxString( "n10.ECOLI", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !n10.getName().equals( "n10.ECOLI" ) ) { return false; } final PhylogenyNode n20 = PhylogenyNode - .createInstanceFromNhxString( "n20_ECOLI/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .createInstanceFromNhxString( "n20_ECOLI/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !n20.getName().equals( "n20_ECOLI/1-2" ) ) { return false; } if ( !PhylogenyMethods.getSpecies( n20 ).equals( "ECOLI" ) ) { return false; } - final PhylogenyNode n20x = PhylogenyNode.createInstanceFromNhxString( "n20_ECOL1/1-2", - NHXParser.TAXONOMY_EXTRACTION.YES ); + final PhylogenyNode n20x = PhylogenyNode + .createInstanceFromNhxString( "n20_ECOL1/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n20x.getName().equals( "n20_ECOL1/1-2" ) ) { return false; } @@ -6112,7 +6147,7 @@ public final class Test { return false; } final PhylogenyNode n20xx = PhylogenyNode - .createInstanceFromNhxString( "n20_eCOL1/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .createInstanceFromNhxString( "n20_eCOL1/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !n20xx.getName().equals( "n20_eCOL1/1-2" ) ) { return false; } @@ -6120,7 +6155,7 @@ public final class Test { return false; } final PhylogenyNode n20xxx = PhylogenyNode - .createInstanceFromNhxString( "n20_ecoli/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .createInstanceFromNhxString( "n20_ecoli/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !n20xxx.getName().equals( "n20_ecoli/1-2" ) ) { return false; } @@ -6128,15 +6163,15 @@ public final class Test { return false; } final PhylogenyNode n20xxxx = PhylogenyNode - .createInstanceFromNhxString( "n20_Ecoli/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .createInstanceFromNhxString( "n20_Ecoli/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !n20xxxx.getName().equals( "n20_Ecoli/1-2" ) ) { return false; } if ( PhylogenyMethods.getSpecies( n20xxxx ).length() > 0 ) { return false; } - final PhylogenyNode n21 = PhylogenyNode.createInstanceFromNhxString( "n21_PIG", - NHXParser.TAXONOMY_EXTRACTION.YES ); + final PhylogenyNode n21 = PhylogenyNode + .createInstanceFromNhxString( "n21_PIG", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n21.getName().equals( "n21_PIG" ) ) { return false; } @@ -6144,7 +6179,7 @@ public final class Test { return false; } final PhylogenyNode n21x = PhylogenyNode - .createInstanceFromNhxString( "n21_PIG", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .createInstanceFromNhxString( "n21_PIG", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !n21x.getName().equals( "n21_PIG" ) ) { return false; } @@ -6152,7 +6187,7 @@ public final class Test { return false; } final PhylogenyNode n22 = PhylogenyNode - .createInstanceFromNhxString( "n22/PIG", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .createInstanceFromNhxString( "n22/PIG", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !n22.getName().equals( "n22/PIG" ) ) { return false; } @@ -6160,7 +6195,7 @@ public final class Test { return false; } final PhylogenyNode n23 = PhylogenyNode - .createInstanceFromNhxString( "n23/PIG_1", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .createInstanceFromNhxString( "n23/PIG_1", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !n23.getName().equals( "n23/PIG_1" ) ) { return false; } @@ -6168,7 +6203,7 @@ public final class Test { return false; } final PhylogenyNode a = PhylogenyNode - .createInstanceFromNhxString( "n10_ECOLI/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .createInstanceFromNhxString( "n10_ECOLI/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !a.getName().equals( "n10_ECOLI/1-2" ) ) { return false; } @@ -6176,7 +6211,7 @@ public final class Test { return false; } final PhylogenyNode b = PhylogenyNode - .createInstanceFromNhxString( "n10_ECOLI1/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .createInstanceFromNhxString( "n10_ECOLI1/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !b.getName().equals( "n10_ECOLI1/1-2" ) ) { return false; } @@ -6185,7 +6220,7 @@ public final class Test { } final PhylogenyNode c = PhylogenyNode .createInstanceFromNhxString( "n10_RATAF12/1000-2000", - NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !c.getName().equals( "n10_RATAF12/1000-2000" ) ) { return false; } @@ -6194,7 +6229,7 @@ public final class Test { } final PhylogenyNode c1 = PhylogenyNode .createInstanceFromNhxString( "n10_BOVIN_1/1000-2000", - NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !c1.getName().equals( "n10_BOVIN_1/1000-2000" ) ) { return false; } @@ -6203,7 +6238,7 @@ public final class Test { } final PhylogenyNode c2 = PhylogenyNode .createInstanceFromNhxString( "n10_Bovin_1/1000-2000", - NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !c2.getName().equals( "n10_Bovin_1/1000-2000" ) ) { return false; } @@ -6211,7 +6246,7 @@ public final class Test { return false; } final PhylogenyNode d = PhylogenyNode - .createInstanceFromNhxString( "n10_RAT1/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .createInstanceFromNhxString( "n10_RAT1/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !d.getName().equals( "n10_RAT1/1-2" ) ) { return false; } @@ -6219,23 +6254,23 @@ public final class Test { return false; } final PhylogenyNode e = PhylogenyNode - .createInstanceFromNhxString( "n10_RAT1", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .createInstanceFromNhxString( "n10_RAT1", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !e.getName().equals( "n10_RAT1" ) ) { return false; } if ( !ForesterUtil.isEmpty( PhylogenyMethods.getSpecies( e ) ) ) { return false; } - final PhylogenyNode e2 = PhylogenyNode.createInstanceFromNhxString( "n10_RAT1", - NHXParser.TAXONOMY_EXTRACTION.YES ); + final PhylogenyNode e2 = PhylogenyNode + .createInstanceFromNhxString( "n10_RAT1", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !e2.getName().equals( "n10_RAT1" ) ) { return false; } if ( PhylogenyMethods.getSpecies( e2 ).equals( "RAT" ) ) { return false; } - final PhylogenyNode e3 = PhylogenyNode.createInstanceFromNhxString( "n10_RAT~", - NHXParser.TAXONOMY_EXTRACTION.YES ); + final PhylogenyNode e3 = PhylogenyNode + .createInstanceFromNhxString( "n10_RAT~", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !e3.getName().equals( "n10_RAT~" ) ) { return false; } @@ -6244,7 +6279,7 @@ public final class Test { } final PhylogenyNode n11 = PhylogenyNode .createInstanceFromNhxString( "n111111_ECOLI/jdj:0.4", - NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !n11.getName().equals( "n111111_ECOLI/jdj" ) ) { return false; } @@ -6256,7 +6291,7 @@ public final class Test { } final PhylogenyNode n12 = PhylogenyNode .createInstanceFromNhxString( "n111111-ECOLI---/jdj:0.4", - NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !n12.getName().equals( "n111111-ECOLI---/jdj" ) ) { return false; } @@ -6266,16 +6301,16 @@ public final class Test { if ( PhylogenyMethods.getSpecies( n12 ).length() > 0 ) { return false; } - final PhylogenyNode m = PhylogenyNode.createInstanceFromNhxString( "n10_MOUSEa", - NHXParser.TAXONOMY_EXTRACTION.YES ); + final PhylogenyNode m = PhylogenyNode + .createInstanceFromNhxString( "n10_MOUSEa", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !m.getName().equals( "n10_MOUSEa" ) ) { return false; } if ( PhylogenyMethods.getSpecies( m ).equals( "MOUSE" ) ) { return false; } - final PhylogenyNode o = PhylogenyNode.createInstanceFromNhxString( "n10_MOUSE_", - NHXParser.TAXONOMY_EXTRACTION.YES ); + final PhylogenyNode o = PhylogenyNode + .createInstanceFromNhxString( "n10_MOUSE_", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !o.getName().equals( "n10_MOUSE_" ) ) { return false; } @@ -6313,7 +6348,7 @@ public final class Test { return false; } final PhylogenyNode n13 = PhylogenyNode - .createInstanceFromNhxString( "blah_12345/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .createInstanceFromNhxString( "blah_12345/1-2", NHXParser.TAXONOMY_EXTRACTION.AGRESSIVE ); if ( !n13.getName().equals( "blah_12345/1-2" ) ) { return false; } @@ -6327,16 +6362,16 @@ public final class Test { return false; } final PhylogenyNode n14 = PhylogenyNode - .createInstanceFromNhxString( "blah_12X45/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); - if ( !n14.getName().equals( "blah_12X45/1-2" ) ) { + .createInstanceFromNhxString( "blah_9QX45/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); + if ( !n14.getName().equals( "blah_9QX45/1-2" ) ) { return false; } - if ( !PhylogenyMethods.getSpecies( n14 ).equals( "12X45" ) ) { + if ( !PhylogenyMethods.getSpecies( n14 ).equals( "9QX45" ) ) { return false; } final PhylogenyNode n15 = PhylogenyNode .createInstanceFromNhxString( "something_wicked[123]", - NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !n15.getName().equals( "something_wicked" ) ) { return false; } @@ -6347,7 +6382,8 @@ public final class Test { return false; } final PhylogenyNode n16 = PhylogenyNode - .createInstanceFromNhxString( "something_wicked2[9]", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .createInstanceFromNhxString( "something_wicked2[9]", + NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !n16.getName().equals( "something_wicked2" ) ) { return false; } @@ -6358,7 +6394,8 @@ public final class Test { return false; } final PhylogenyNode n17 = PhylogenyNode - .createInstanceFromNhxString( "something_wicked3[a]", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .createInstanceFromNhxString( "something_wicked3[a]", + NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !n17.getName().equals( "something_wicked3" ) ) { return false; } @@ -6366,7 +6403,7 @@ public final class Test { return false; } final PhylogenyNode n18 = PhylogenyNode - .createInstanceFromNhxString( ":0.5[91]", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .createInstanceFromNhxString( ":0.5[91]", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !isEqual( n18.getDistanceToParent(), 0.5 ) ) { return false; } @@ -6376,29 +6413,31 @@ public final class Test { if ( !isEqual( n18.getBranchData().getConfidence( 0 ).getValue(), 91 ) ) { return false; } - final PhylogenyNode n19 = PhylogenyNode.createInstanceFromNhxString( "blah_1-roejojoej", - NHXParser.TAXONOMY_EXTRACTION.YES ); + final PhylogenyNode n19 = PhylogenyNode + .createInstanceFromNhxString( "blah_1-roejojoej", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n19.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "1" ) ) { return false; } if ( !n19.getNodeData().getTaxonomy().getIdentifier().getProvider().equals( "uniprot" ) ) { return false; } - final PhylogenyNode n30 = PhylogenyNode.createInstanceFromNhxString( "blah_1234567-roejojoej", - NHXParser.TAXONOMY_EXTRACTION.YES ); + final PhylogenyNode n30 = PhylogenyNode + .createInstanceFromNhxString( "blah_1234567-roejojoej", + NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n30.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "1234567" ) ) { return false; } if ( !n30.getNodeData().getTaxonomy().getIdentifier().getProvider().equals( "uniprot" ) ) { return false; } - final PhylogenyNode n31 = PhylogenyNode.createInstanceFromNhxString( "blah_12345678-roejojoej", - NHXParser.TAXONOMY_EXTRACTION.YES ); + final PhylogenyNode n31 = PhylogenyNode + .createInstanceFromNhxString( "blah_12345678-roejojoej", + NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n31.getNodeData().isHasTaxonomy() ) { return false; } - final PhylogenyNode n32 = PhylogenyNode.createInstanceFromNhxString( "sd_12345678", - NHXParser.TAXONOMY_EXTRACTION.YES ); + final PhylogenyNode n32 = PhylogenyNode + .createInstanceFromNhxString( "sd_12345678", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n32.getNodeData().isHasTaxonomy() ) { return false; }