final NHXParser nhx = ( NHXParser ) p;
nhx.setReplaceUnderscores( false );
nhx.setIgnoreQuotes( true );
- nhx.setTaxonomyExtraction( TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+ nhx.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGGRESSIVE );
}
else if ( p instanceof NexusPhylogeniesParser ) {
final NexusPhylogeniesParser nex = ( NexusPhylogeniesParser ) p;
nex.setReplaceUnderscores( false );
nex.setIgnoreQuotes( true );
- nex.setTaxonomyExtraction( TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+ nex.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGGRESSIVE );
}
else {
throw new RuntimeException( "unknown parser type: " + p );
final PhylogenyParser pp_bl = ParserUtils.createParserDependingOnFileType( infile_bl, true );
final PhylogenyParser pp_s = ParserUtils.createParserDependingOnFileType( infile_support_vals, true );
if ( pp_bl instanceof NHXParser ) {
- ( ( NHXParser ) pp_bl ).setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+ ( ( NHXParser ) pp_bl ).setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.NO );
}
phylogeny_w_bl = factory.create( infile_bl, pp_bl )[ index_of_tree_w_bl ];
phylogeny_w_support_vals = factory.create( infile_support_vals, pp_s )[ 0 ];
if ( s.equalsIgnoreCase( "no" ) ) {
setTaxonomyExtraction( TAXONOMY_EXTRACTION.NO );
}
- else if ( s.equalsIgnoreCase( "yes" ) ) {
+ else if ( s.equalsIgnoreCase( "pfam_relaxed" ) ) {
setTaxonomyExtraction( TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
}
- else if ( s.equalsIgnoreCase( "pfam" ) ) {
+ else if ( s.equalsIgnoreCase( "pfam_strict" ) ) {
setTaxonomyExtraction( TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT );
}
+ else if ( s.equalsIgnoreCase( "aggressive" ) ) {
+ setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGGRESSIVE );
+ }
else {
- ForesterUtil.printWarningMessage( Constants.PRG_NAME,
- "unknown value for \"taxonomy_extraction_in_nh_parsing\": " + s );
+ ForesterUtil
+ .printWarningMessage( Constants.PRG_NAME,
+ "unknown value for \"taxonomy_extraction_in_nh_parsing\": "
+ + s
+ + " (must be either: no, pfam_relaxed, pfam_strict, or aggressive)" );
}
if ( ( getTaxonomyExtraction() != TAXONOMY_EXTRACTION.NO ) && isReplaceUnderscoresInNhParsing() ) {
ForesterUtil
public final static boolean __SYNTH_LF = false; // TODO remove me
public final static boolean ALLOW_DDBJ_BLAST = false;
public final static String PRG_NAME = "Archaeopteryx";
- final static String VERSION = "0.9812 A1ST";
- final static String PRG_DATE = "130403";
+ final static String VERSION = "0.9812+ A1ST";
+ final static String PRG_DATE = "130409";
final static String DEFAULT_CONFIGURATION_FILE_NAME = "_aptx_configuration_file";
final static String[] DEFAULT_FONT_CHOICES = { "Arial", "Helvetica",
"Verdana", "Tahoma", "Dialog", "Lucida Sans", "SansSerif", "Sans-serif", "Sans" };
// _ parsing
JCheckBoxMenuItem _internal_number_are_confidence_for_nh_parsing_cbmi;
JRadioButtonMenuItem _extract_taxonomy_no_rbmi;
- JRadioButtonMenuItem _extract_taxonomy_yes_rbmi;
- JRadioButtonMenuItem _extract_taxonomy_pfam_rbmi;
+ JRadioButtonMenuItem _extract_taxonomy_agressive_rbmi;
+ JRadioButtonMenuItem _extract_taxonomy_pfam_strict_rbmi;
+ JRadioButtonMenuItem _extract_taxonomy_pfam_relaxed_rbmi;
JCheckBoxMenuItem _replace_underscores_cbmi;
JCheckBoxMenuItem _use_brackets_for_conf_in_nh_export_cbmi;
JCheckBoxMenuItem _use_internal_names_for_conf_in_nh_export_cbmi;
&& _print_black_and_white_cbmi.isSelected() );
options.setInternalNumberAreConfidenceForNhParsing( ( _internal_number_are_confidence_for_nh_parsing_cbmi != null )
&& _internal_number_are_confidence_for_nh_parsing_cbmi.isSelected() );
- if ( ( _extract_taxonomy_yes_rbmi != null ) && _extract_taxonomy_yes_rbmi.isSelected() ) {
- options.setTaxonomyExtractio( TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+ if ( ( _extract_taxonomy_pfam_strict_rbmi != null ) && _extract_taxonomy_pfam_strict_rbmi.isSelected() ) {
+ options.setTaxonomyExtraction( TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT );
}
- else if ( ( _extract_taxonomy_pfam_rbmi != null ) && _extract_taxonomy_pfam_rbmi.isSelected() ) {
- options.setTaxonomyExtractio( TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT );
+ else if ( ( _extract_taxonomy_pfam_relaxed_rbmi != null ) && _extract_taxonomy_pfam_relaxed_rbmi.isSelected() ) {
+ options.setTaxonomyExtraction( TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+ }
+ else if ( ( _extract_taxonomy_agressive_rbmi != null ) && _extract_taxonomy_agressive_rbmi.isSelected() ) {
+ options.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGGRESSIVE );
}
else if ( ( _extract_taxonomy_no_rbmi != null ) && _extract_taxonomy_no_rbmi.isSelected() ) {
- options.setTaxonomyExtractio( TAXONOMY_EXTRACTION.NO );
+ options.setTaxonomyExtraction( TAXONOMY_EXTRACTION.NO );
}
options.setReplaceUnderscoresInNhParsing( ( _replace_underscores_cbmi != null )
&& _replace_underscores_cbmi.isSelected() );
}
collapseBelowThreshold();
}
- else if ( ( o == _extract_taxonomy_pfam_rbmi ) || ( o == _extract_taxonomy_yes_rbmi ) ) {
+ else if ( ( o == _extract_taxonomy_pfam_strict_rbmi ) || ( o == _extract_taxonomy_pfam_relaxed_rbmi )
+ || ( o == _extract_taxonomy_agressive_rbmi ) ) {
if ( _replace_underscores_cbmi != null ) {
_replace_underscores_cbmi.setSelected( false );
}
//
_options_jmenu.add( _extract_taxonomy_no_rbmi = new JRadioButtonMenuItem( "No Taxonomy Extraction" ) );
_options_jmenu
- .add( _extract_taxonomy_pfam_rbmi = new JRadioButtonMenuItem( "Extract Taxonomy Codes from Pfam-style Node Names" ) );
- _extract_taxonomy_pfam_rbmi
- .setToolTipText( "To extract 5-letter taxonomy codes from node names in the form of \"BCL2_MOUSE/134-298\"" );
+ .add( _extract_taxonomy_pfam_strict_rbmi = new JRadioButtonMenuItem( "Extract Taxonomy Codes/Ids from Pfam-style Node Names" ) );
_options_jmenu
- .add( _extract_taxonomy_yes_rbmi = new JRadioButtonMenuItem( "Extract Taxonomy Codes from Node Names" ) );
- _extract_taxonomy_yes_rbmi
- .setToolTipText( "To extract 5-letter taxonomy codes from node names in the form of \"BCL2_MOUSE\" or \"BCL2_MOUSE B-cell lymphoma 2...\"" );
+ .add( _extract_taxonomy_pfam_relaxed_rbmi = new JRadioButtonMenuItem( "Extract Taxonomy Codes/Ids from Pfam-style like Node Names" ) );
+ _options_jmenu
+ .add( _extract_taxonomy_agressive_rbmi = new JRadioButtonMenuItem( "Extract Taxonomy Codes/Ids/Scientific Names from Node Names" ) );
+ _extract_taxonomy_pfam_strict_rbmi
+ .setToolTipText( "To extract taxonomy codes/ids from node names in the form of e.g. \"BCL2_MOUSE/123-304\" or \"BCL2_10090/123-304\"" );
+ _extract_taxonomy_pfam_relaxed_rbmi
+ .setToolTipText( "To extract taxonomy codes/ids from node names in the form of e.g. \"bax_MOUSE\" or \"bax_10090\"" );
+ _extract_taxonomy_agressive_rbmi
+ .setToolTipText( "To extract taxonomy codes/ids or scientific names from node names in the form of e.g. \"MOUSE\" or \"10090\" or \"xyz_Nematostella_vectensis\"" );
_radio_group_2 = new ButtonGroup();
_radio_group_2.add( _extract_taxonomy_no_rbmi );
- _radio_group_2.add( _extract_taxonomy_pfam_rbmi );
- _radio_group_2.add( _extract_taxonomy_yes_rbmi );
+ _radio_group_2.add( _extract_taxonomy_pfam_strict_rbmi );
+ _radio_group_2.add( _extract_taxonomy_pfam_relaxed_rbmi );
+ _radio_group_2.add( _extract_taxonomy_agressive_rbmi );
//
_options_jmenu.add( customizeMenuItemAsLabel( new JMenuItem( "Newick/Nexus Output:" ), getConfiguration() ) );
_options_jmenu
.isInternalNumberAreConfidenceForNhParsing() );
customizeRadioButtonMenuItem( _extract_taxonomy_no_rbmi,
getOptions().getTaxonomyExtraction() == TAXONOMY_EXTRACTION.NO );
- customizeRadioButtonMenuItem( _extract_taxonomy_yes_rbmi,
- getOptions().getTaxonomyExtraction() == TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
- customizeRadioButtonMenuItem( _extract_taxonomy_pfam_rbmi,
+ customizeRadioButtonMenuItem( _extract_taxonomy_pfam_strict_rbmi,
getOptions().getTaxonomyExtraction() == TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT );
+ customizeRadioButtonMenuItem( _extract_taxonomy_pfam_relaxed_rbmi,
+ getOptions().getTaxonomyExtraction() == TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+ customizeRadioButtonMenuItem( _extract_taxonomy_agressive_rbmi,
+ getOptions().getTaxonomyExtraction() == TAXONOMY_EXTRACTION.AGGRESSIVE );
customizeCheckBoxMenuItem( _replace_underscores_cbmi, getOptions().isReplaceUnderscoresInNhParsing() );
customizeCheckBoxMenuItem( _search_whole_words_only_cbmi, getOptions().isMatchWholeTermsOnly() );
customizeCheckBoxMenuItem( _inverse_search_result_cbmi, getOptions().isInverseSearchResult() );
final PhylogenyNode n = it.next();
final String name = n.getName().trim();
if ( !ForesterUtil.isEmpty( name ) ) {
- final String nt = ParserUtils
- .extractTaxonomyDataFromNodeName( n, TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+ final String nt = ParserUtils.extractTaxonomyDataFromNodeName( n,
+ TAXONOMY_EXTRACTION.AGGRESSIVE );
if ( !ForesterUtil.isEmpty( nt ) ) {
if ( counter < 15 ) {
sb.append( name + ": " + nt + "\n" );
_taxonomy_colorize_node_shapes = taxonomy_colorize_node_shapes;
}
- final void setTaxonomyExtractio( final TAXONOMY_EXTRACTION taxonomy_extraction ) {
+ final void setTaxonomyExtraction( final TAXONOMY_EXTRACTION taxonomy_extraction ) {
_taxonomy_extraction = taxonomy_extraction;
}
instance.setNumberOfDigitsAfterCommaForConfidenceValues( configuration
.getNumberOfDigitsAfterCommaForConfidenceValues() );
}
- instance.setTaxonomyExtractio( configuration.getTaxonomyExtraction() );
+ instance.setTaxonomyExtraction( configuration.getTaxonomyExtraction() );
instance.setReplaceUnderscoresInNhParsing( configuration.isReplaceUnderscoresInNhParsing() );
instance.setInternalNumberAreConfidenceForNhParsing( configuration
.isInternalNumberAreConfidenceForNhParsing() );
_webservice_client_index = webservice_client_index;
}
+ @Override
+ public void run() {
+ readPhylogeniesFromWebservice();
+ }
+
synchronized void readPhylogeniesFromWebservice() {
final long start_time = new Date().getTime();
URL url = null;
break;
case NH_EXTRACT_TAXONOMY:
parser = new NHXParser();
- ( ( NHXParser ) parser )
- .setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT );
+ ( ( NHXParser ) parser ).setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE );
( ( NHXParser ) parser ).setReplaceUnderscores( false );
( ( NHXParser ) parser ).setGuessRootedness( true );
break;
_main_frame.activateSaveAllIfNeeded();
System.gc();
}
-
- @Override
- public void run() {
- readPhylogeniesFromWebservice();
- }
}
public final class ParserUtils {
- final public static String TAX_CODE = "(?:[A-Z9][A-Z]{2}[A-Z0-9]{2})|RAT|PIG|PEA|CAP";
- final public static Pattern TAXOMONY_SN_PATTERN = Pattern
- .compile( "[A-Z0-9]{2,}_([A-Z][a-z]+_[a-z]{2,}(?:_[a-z][a-z0-9_]+)?)\\b" );
- final public static Pattern TAXOMONY_CODE_PATTERN_R1 = Pattern.compile( "[A-Z0-9]+_(" + TAX_CODE + ")\\b" );
- final public static Pattern TAXOMONY_CODE_PATTERN_R2 = Pattern.compile( "(?:\\b|_)(" + TAX_CODE + ")\\b" );
- final private static Pattern TAXOMONY_CODE_PATTERN_PF = Pattern.compile( "[A-Z0-9]{2,}_(" + TAX_CODE
- + ")/\\d+-\\d+" );
- final public static Pattern TAXOMONY_CODE_PATTERN_4 = Pattern.compile( "\\[(" + TAX_CODE + ")\\]" );
- final public static Pattern TAXOMONY_CODE_PATTERN_6 = Pattern.compile( "\\[([A-Z9][A-Z]{2}[A-Z0-9]{3})\\]" );
- final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_1 = Pattern.compile( "\\b\\d{1,7}\\b" );
- final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_2 = Pattern.compile( "(\\d{1,7})[^0-9A-Za-z].*" );
- final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_PF = Pattern.compile( "(\\d{1,7})/\\d+-\\d+" );
+ final public static String TAX_CODE = "(?:[A-Z9][A-Z]{2}[A-Z0-9]{2})|RAT|PIG|PEA|CAP";
+ final public static Pattern TAXOMONY_SN_PATTERN = Pattern
+ .compile( "(?:\\b|_)[a-zA-Z0-9]{3,}_([A-Z][a-z]+_[a-z]{2,}(?:_[a-z][a-z0-9_]+)?)\\b" );
+ final private static Pattern TAXOMONY_CODE_PATTERN_PFS = Pattern.compile( "(?:\\b|_)[A-Z0-9]{4,}_("
+ + TAX_CODE + ")/\\d+-\\d+\\b" );
+ final public static Pattern TAXOMONY_CODE_PATTERN_PFR = Pattern.compile( "(?:\\b|_)[a-zA-Z0-9]{3,}_("
+ + TAX_CODE + ")\\b" );
+ final public static Pattern TAXOMONY_CODE_PATTERN_A = Pattern.compile( "(?:\\b|_)(" + TAX_CODE + ")\\b" );
+ final public static Pattern TAXOMONY_CODE_PATTERN_4 = Pattern.compile( "\\[(" + TAX_CODE + ")\\]" );
+ final public static Pattern TAXOMONY_CODE_PATTERN_6 = Pattern
+ .compile( "\\[([A-Z9][A-Z]{2}[A-Z0-9]{3})\\]" );
+ final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_A = Pattern.compile( "(?:\\b|_)(\\d{1,7})\\b" );
+ final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_PFS = Pattern
+ .compile( "(?:\\b|_)[A-Z0-9]{4,}_(\\d{1,7})/\\d+-\\d+\\b" );
+ final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_PFR = Pattern
+ .compile( "(?:\\b|_)[a-zA-Z0-9]{3,}_(\\d{1,7})\\b" );
final public static PhylogenyParser createParserDependingFileContents( final File file,
final boolean phyloxml_validate_against_xsd )
return reader;
}
+ public final static String extractScientificNameFromNodeName( final String name ) {
+ final Matcher m = TAXOMONY_SN_PATTERN.matcher( name );
+ if ( m.find() ) {
+ return m.group( 1 ).replace( '_', ' ' );
+ }
+ return null;
+ }
+
public final static String extractTaxonomyCodeFromNodeName( final String name,
final TAXONOMY_EXTRACTION taxonomy_extraction ) {
- if ( taxonomy_extraction == TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ) {
- final Matcher m = TAXOMONY_CODE_PATTERN_PF.matcher( name );
- if ( m.find() ) {
- return m.group( 1 );
- }
+ Matcher m = TAXOMONY_CODE_PATTERN_PFS.matcher( name );
+ if ( m.find() ) {
+ return m.group( 1 );
}
else if ( ( taxonomy_extraction == TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
|| ( taxonomy_extraction == TAXONOMY_EXTRACTION.AGGRESSIVE ) ) {
- final Matcher m1 = TAXOMONY_CODE_PATTERN_R1.matcher( name );
- if ( m1.find() ) {
- return m1.group( 1 );
+ m = TAXOMONY_CODE_PATTERN_PFR.matcher( name );
+ if ( m.find() ) {
+ return m.group( 1 );
}
- final Matcher m2 = TAXOMONY_CODE_PATTERN_R2.matcher( name );
- if ( m2.find() ) {
- return m2.group( 1 );
+ else if ( taxonomy_extraction == TAXONOMY_EXTRACTION.AGGRESSIVE ) {
+ m = TAXOMONY_CODE_PATTERN_A.matcher( name );
+ if ( m.find() ) {
+ return m.group( 1 );
+ }
}
}
return null;
}
- public final static String extractScientificNameFromNodeName( final String name ) {
- final Matcher m1 = TAXOMONY_SN_PATTERN.matcher( name );
- if ( m1.find() ) {
- return m1.group( 1 ).replace( '_', ' ' );
- }
- return null;
- }
-
public final static String extractTaxonomyDataFromNodeName( final PhylogenyNode node,
final NHXParser.TAXONOMY_EXTRACTION taxonomy_extraction )
throws PhyloXmlDataFormatException {
node.getNodeData().getTaxonomy().setTaxonomyCode( code );
return code;
}
- else if ( ( taxonomy_extraction == TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED || taxonomy_extraction == TAXONOMY_EXTRACTION.AGGRESSIVE ) ) {
+ else if ( taxonomy_extraction == TAXONOMY_EXTRACTION.AGGRESSIVE ) {
final String sn = extractScientificNameFromNodeName( node.getName() );
if ( !ForesterUtil.isEmpty( sn ) ) {
if ( !node.getNodeData().isHasTaxonomy() ) {
public final static String extractUniprotTaxonomyIdFromNodeName( final String name,
final TAXONOMY_EXTRACTION taxonomy_extraction ) {
- if ( ( name.indexOf( "_" ) > 0 )
- && ( ( ( taxonomy_extraction == TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) || ( taxonomy_extraction == TAXONOMY_EXTRACTION.AGGRESSIVE ) ) || ( ( ( name
- .indexOf( "/" ) > 4 ) && ( taxonomy_extraction == TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ) ) ) ) ) {
- final String[] s = name.split( "[_\\s]" );
- if ( s.length > 1 ) {
- final String str = s[ 1 ];
- if ( !ForesterUtil.isEmpty( str ) ) {
- if ( taxonomy_extraction == TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ) {
- final Matcher m = TAXOMONY_UNIPROT_ID_PATTERN_PF.matcher( str );
- if ( m.matches() ) {
- return m.group( 1 );
- }
- }
- else {
- final Matcher m1 = TAXOMONY_UNIPROT_ID_PATTERN_1.matcher( str );
- if ( m1.matches() ) {
- return m1.group();
- }
- final Matcher m2 = TAXOMONY_UNIPROT_ID_PATTERN_2.matcher( str );
- if ( m2.matches() ) {
- return m2.group( 1 );
- }
- }
- }
- }
+ Matcher m = TAXOMONY_UNIPROT_ID_PATTERN_PFS.matcher( name );
+ if ( m.find() ) {
+ return m.group( 1 );
}
- if ( taxonomy_extraction == TAXONOMY_EXTRACTION.AGGRESSIVE ) {
- final Matcher m1 = TAXOMONY_UNIPROT_ID_PATTERN_1.matcher( name );
- if ( m1.matches() ) {
- return name;
+ else if ( ( taxonomy_extraction == TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
+ || ( taxonomy_extraction == TAXONOMY_EXTRACTION.AGGRESSIVE ) ) {
+ m = TAXOMONY_UNIPROT_ID_PATTERN_PFR.matcher( name );
+ if ( m.find() ) {
+ return m.group( 1 );
+ }
+ else if ( taxonomy_extraction == TAXONOMY_EXTRACTION.AGGRESSIVE ) {
+ m = TAXOMONY_UNIPROT_ID_PATTERN_A.matcher( name );
+ if ( m.find() ) {
+ return m.group( 1 );
+ }
}
}
return null;
final NHXParser nhx = new NHXParser();
nhx.setReplaceUnderscores( false );
nhx.setIgnoreQuotes( true );
- nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+ nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE );
//
final String gene_trees_00_str = "(MOUSE,RAT);(MOUSE,RAT);(MOUSE,RAT);(RAT,MOUSE);";
final Phylogeny[] gene_trees_00 = factory.create( gene_trees_00_str, nhx );
final NHXParser nhx = new NHXParser();
nhx.setReplaceUnderscores( false );
nhx.setIgnoreQuotes( true );
- nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+ nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE );
final String gene_trees_1_str = "(((((MOUSE,RAT),HUMAN),CAEEL),YEAST),ARATH);"
+ "((((MOUSE,RAT),HUMAN),(ARATH,YEAST)),CAEEL);" + "((MOUSE,RAT),(((ARATH,YEAST),CAEEL),HUMAN));"
+ "(((((MOUSE,HUMAN),RAT),CAEEL),YEAST),ARATH);" + "((((HUMAN,MOUSE),RAT),(ARATH,YEAST)),CAEEL);";
System.out.println( "failed." );
failed++;
}
- System.exit( 0 );
System.out.print( "UniProtKB id extraction: " );
if ( Test.testExtractUniProtKbProteinSeqIdentifier() ) {
System.out.println( "OK." );
.equals( "Mus musculus musculus" ) ) {
return false;
}
- if ( !ParserUtils.extractScientificNameFromNodeName( " -XS_Mus_musculus-12" ).equals( "Mus musculus" ) ) {
+ if ( !ParserUtils.extractScientificNameFromNodeName( " -XS12_Mus_musculus-12" ).equals( "Mus musculus" ) ) {
return false;
}
- if ( !ParserUtils.extractScientificNameFromNodeName( " -XS_Mus_musculus-12 affrre e" )
+ if ( !ParserUtils.extractScientificNameFromNodeName( " -1234_Mus_musculus-12 affrre e" )
.equals( "Mus musculus" ) ) {
return false;
}
private static boolean testExtractTaxonomyCodeFromNodeName() {
try {
- if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "MOUSE", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
- .equals( "MOUSE" ) ) {
+ if ( ParserUtils.extractTaxonomyCodeFromNodeName( "MOUSE", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) != null ) {
return false;
}
if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "SOYBN", TAXONOMY_EXTRACTION.AGGRESSIVE )
.equals( "SOYBN" ) ) {
return false;
}
- if ( !ParserUtils.extractTaxonomyCodeFromNodeName( " ARATH ", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
+ if ( !ParserUtils.extractTaxonomyCodeFromNodeName( " ARATH ", TAXONOMY_EXTRACTION.AGGRESSIVE )
.equals( "ARATH" ) ) {
return false;
}
.equals( "ARATH" ) ) {
return false;
}
- if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "RAT", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
- .equals( "RAT" ) ) {
+ if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "RAT", TAXONOMY_EXTRACTION.AGGRESSIVE ).equals( "RAT" ) ) {
return false;
}
if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "RAT", TAXONOMY_EXTRACTION.AGGRESSIVE ).equals( "RAT" ) ) {
if ( ParserUtils.extractTaxonomyCodeFromNodeName( "RAT1", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) != null ) {
return false;
}
- if ( !ParserUtils.extractTaxonomyCodeFromNodeName( " _MOUSE", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
- .equals( "MOUSE" ) ) {
- return false;
- }
if ( !ParserUtils.extractTaxonomyCodeFromNodeName( " _SOYBN", TAXONOMY_EXTRACTION.AGGRESSIVE )
.equals( "SOYBN" ) ) {
return false;
}
- if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "SOYBN", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
- .equals( "SOYBN" ) ) {
- return false;
- }
- if ( !ParserUtils.extractTaxonomyCodeFromNodeName( " SOYBN", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
- .equals( "SOYBN" ) ) {
- return false;
- }
- if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "_SOYBN", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
- .equals( "SOYBN" ) ) {
- return false;
- }
- if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "qwerty SOYBN", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
- .equals( "SOYBN" ) ) {
- return false;
- }
- if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "qwerty_SOYBN", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
+ if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "SOYBN", TAXONOMY_EXTRACTION.AGGRESSIVE )
.equals( "SOYBN" ) ) {
return false;
}
- if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "SOYBN ", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
+ if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "qwerty SOYBN", TAXONOMY_EXTRACTION.AGGRESSIVE )
.equals( "SOYBN" ) ) {
return false;
}
- if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "SOYBN", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
+ if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "qwerty_SOYBN", TAXONOMY_EXTRACTION.AGGRESSIVE )
.equals( "SOYBN" ) ) {
return false;
}
- if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "SOYBN qwerty", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
+ if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "ABCD_SOYBN ", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
.equals( "SOYBN" ) ) {
return false;
}
- if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "SOYBN qwerty", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
+ if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "SOYBN", TAXONOMY_EXTRACTION.AGGRESSIVE )
.equals( "SOYBN" ) ) {
return false;
}
- if ( !ParserUtils.extractTaxonomyCodeFromNodeName( ",SOYBN,", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
+ if ( !ParserUtils.extractTaxonomyCodeFromNodeName( ",SOYBN,", TAXONOMY_EXTRACTION.AGGRESSIVE )
.equals( "SOYBN" ) ) {
return false;
}
- if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "xxx,SOYBN,xxx", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
+ if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "xxx,SOYBN,xxx", TAXONOMY_EXTRACTION.AGGRESSIVE )
.equals( "SOYBN" ) ) {
return false;
}
- if ( ParserUtils.extractTaxonomyCodeFromNodeName( "xxxSOYBNxxx", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) != null ) {
+ if ( ParserUtils.extractTaxonomyCodeFromNodeName( "xxxSOYBNxxx", TAXONOMY_EXTRACTION.AGGRESSIVE ) != null ) {
return false;
}
- if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "-SOYBN~", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
+ if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "-SOYBN~", TAXONOMY_EXTRACTION.AGGRESSIVE )
.equals( "SOYBN" ) ) {
return false;
}
- if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "N8_ECOLI/1-2:0.01",
+ if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "NNN8_ECOLI/1-2:0.01",
TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ).equals( "ECOLI" ) ) {
return false;
}
- if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "blag_9YX45-blag",
- TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
+ if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "blag_9YX45-blag", TAXONOMY_EXTRACTION.AGGRESSIVE )
.equals( "9YX45" ) ) {
return false;
}
.equals( "MOUSE" ) ) {
return false;
}
- if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "_MOUSE ", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
- .equals( "MOUSE" ) ) {
- return false;
- }
- if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "_MOUSE^", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
- .equals( "MOUSE" ) ) {
- return false;
- }
- if ( ParserUtils.extractTaxonomyCodeFromNodeName( "_MOUSE*", TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ) != null ) {
- return false;
- }
- if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "x_MOUSE=x", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
- .equals( "MOUSE" ) ) {
+ if ( ParserUtils.extractTaxonomyCodeFromNodeName( "_MOUSE ", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) != null ) {
return false;
}
}
return false;
}
final PhylogenyNode n9 = PhylogenyNode
- .createInstanceFromNhxString( "blag_12345_blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+ .createInstanceFromNhxString( "blag_12345/blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
if ( !n9.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) {
System.out.println( n9.toString() );
return false;
return false;
}
final PhylogenyNode n11 = PhylogenyNode
- .createInstanceFromNhxString( "BLAG_Mus_musculus", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+ .createInstanceFromNhxString( "BLAG_Mus_musculus", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE );
if ( !n11.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus" ) ) {
System.out.println( n11.toString() );
return false;
}
final PhylogenyNode n12 = PhylogenyNode
.createInstanceFromNhxString( "BLAG_Mus_musculus_musculus",
- NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+ NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE );
if ( !n12.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus musculus" ) ) {
System.out.println( n12.toString() );
return false;
}
final PhylogenyNode n13 = PhylogenyNode
- .createInstanceFromNhxString( "BLAG_Mus_musculus1",
- NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+ .createInstanceFromNhxString( "BLAG_Mus_musculus1", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE );
if ( n13.getNodeData().isHasTaxonomy() ) {
System.out.println( n13.toString() );
return false;
}
- final PhylogenyNode n14 = PhylogenyNode
- .createInstanceFromNhxString( "BLAG_Mus_musculus_11",
- NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
- if ( n14.getNodeData().isHasTaxonomy() ) {
- System.out.println( n14.toString() );
- return false;
- }
- final PhylogenyNode n16 = PhylogenyNode
- .createInstanceFromNhxString( "BLAG_Mus_musculus_/11",
- NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
- if ( n16.getNodeData().isHasTaxonomy() ) {
- System.out.println( n16.toString() );
- return false;
- }
}
catch ( final Exception e ) {
e.printStackTrace( System.out );
return false;
}
final PhylogenyNode n8 = PhylogenyNode
- .createInstanceFromNhxString( "N8_ECOLI/1-2:0.01", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT );
- if ( !n8.getName().equals( "N8_ECOLI/1-2" ) ) {
+ .createInstanceFromNhxString( "ABCD_ECOLI/1-2:0.01",
+ NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT );
+ if ( !n8.getName().equals( "ABCD_ECOLI/1-2" ) ) {
return false;
}
if ( !PhylogenyMethods.getSpecies( n8 ).equals( "ECOLI" ) ) {
return false;
}
final PhylogenyNode n9 = PhylogenyNode
- .createInstanceFromNhxString( "N9_ECOLI/1-12:0.01", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT );
- if ( !n9.getName().equals( "N9_ECOLI/1-12" ) ) {
+ .createInstanceFromNhxString( "ABCD_ECOLI/1-12:0.01",
+ NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT );
+ if ( !n9.getName().equals( "ABCD_ECOLI/1-12" ) ) {
return false;
}
if ( !PhylogenyMethods.getSpecies( n9 ).equals( "ECOLI" ) ) {
return false;
}
final PhylogenyNode n20 = PhylogenyNode
- .createInstanceFromNhxString( "N20_ECOLI/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT );
- if ( !n20.getName().equals( "N20_ECOLI/1-2" ) ) {
+ .createInstanceFromNhxString( "ABCD_ECOLI/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT );
+ if ( !n20.getName().equals( "ABCD_ECOLI/1-2" ) ) {
return false;
}
if ( !PhylogenyMethods.getSpecies( n20 ).equals( "ECOLI" ) ) {
return false;
}
final PhylogenyNode a = PhylogenyNode
- .createInstanceFromNhxString( "N10_ECOLI/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT );
- if ( !a.getName().equals( "N10_ECOLI/1-2" ) ) {
+ .createInstanceFromNhxString( "ABCD_ECOLI/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT );
+ if ( !a.getName().equals( "ABCD_ECOLI/1-2" ) ) {
return false;
}
if ( !PhylogenyMethods.getSpecies( a ).equals( "ECOLI" ) ) {
return false;
}
final PhylogenyNode c1 = PhylogenyNode
- .createInstanceFromNhxString( "n10_BOVIN_1/1000-2000",
+ .createInstanceFromNhxString( "n10_BOVIN/1000-2000",
NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
- if ( !c1.getName().equals( "n10_BOVIN_1/1000-2000" ) ) {
+ if ( !c1.getName().equals( "n10_BOVIN/1000-2000" ) ) {
return false;
}
if ( !PhylogenyMethods.getSpecies( c1 ).equals( "BOVIN" ) ) {
return false;
}
final PhylogenyNode c2 = PhylogenyNode
- .createInstanceFromNhxString( "n10_Bovin_1/1000-2000",
+ .createInstanceFromNhxString( "N10_Bovin_1/1000-2000",
NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT );
- if ( !c2.getName().equals( "n10_Bovin_1/1000-2000" ) ) {
+ if ( !c2.getName().equals( "N10_Bovin_1/1000-2000" ) ) {
return false;
}
- if ( !PhylogenyMethods.getSpecies( c2 ).equals( "" ) ) {
+ if ( PhylogenyMethods.getSpecies( c2 ).length() > 0 ) {
return false;
}
final PhylogenyNode e3 = PhylogenyNode
return false;
}
final PhylogenyNode n12 = PhylogenyNode
- .createInstanceFromNhxString( "n111111-ECOLI---/jdj:0.4",
+ .createInstanceFromNhxString( "N111111-ECOLI---/jdj:0.4",
NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT );
- if ( !n12.getName().equals( "n111111-ECOLI---/jdj" ) ) {
+ if ( !n12.getName().equals( "N111111-ECOLI---/jdj" ) ) {
return false;
}
if ( n12.getDistanceToParent() != 0.4 ) {
return false;
}
final PhylogenyNode o = PhylogenyNode
- .createInstanceFromNhxString( "n10_MOUSE_", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
- if ( !o.getName().equals( "n10_MOUSE_" ) ) {
+ .createInstanceFromNhxString( "ABCD_MOUSE", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+ if ( !o.getName().equals( "ABCD_MOUSE" ) ) {
return false;
}
if ( !PhylogenyMethods.getSpecies( o ).equals( "MOUSE" ) ) {
return false;
}
final PhylogenyNode n14 = PhylogenyNode
- .createInstanceFromNhxString( "BLA_9QX45/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT );
- if ( !n14.getName().equals( "BLA_9QX45/1-2" ) ) {
+ .createInstanceFromNhxString( "BLA1_9QX45/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT );
+ if ( !n14.getName().equals( "BLA1_9QX45/1-2" ) ) {
return false;
}
if ( !PhylogenyMethods.getSpecies( n14 ).equals( "9QX45" ) ) {
ForesterUtil.ensurePresenceOfTaxonomy( node );
try {
if ( tc.length() == 6 ) {
- String t = tc.substring( 0, 5 );
+ final String t = tc.substring( 0, 5 );
System.out.println( "WARNING: taxonomy code " + tc + " -> " + t );
tc = t;
}
public final class ForesterConstants {
- public final static String FORESTER_VERSION = "1.027";
- public final static String FORESTER_DATE = "130401";
+ public final static String FORESTER_VERSION = "1.027+";
+ public final static String FORESTER_DATE = "130409";
public final static String PHYLO_XML_VERSION = "1.10";
public final static String PHYLO_XML_LOCATION = "http://www.phyloxml.org";
public final static String PHYLO_XML_XSD = "phyloxml.xsd";