options.setInternalNumberAreConfidenceForNhParsing( ( _internal_number_are_confidence_for_nh_parsing_cbmi != null )
&& _internal_number_are_confidence_for_nh_parsing_cbmi.isSelected() );
if ( ( _extract_taxonomy_yes_rbmi != null ) && _extract_taxonomy_yes_rbmi.isSelected() ) {
- options.setTaxonomyExtractio( TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+ options.setTaxonomyExtractio( TAXONOMY_EXTRACTION.AGRESSIVE );
}
else if ( ( _extract_taxonomy_pfam_rbmi != null ) && _extract_taxonomy_pfam_rbmi.isSelected() ) {
options.setTaxonomyExtractio( TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT );
.compile( "\\b[A-Z9][A-Z]{2}[A-Z0-9]{2}|RAT|PIG|PEA|CAP\\b" );
final private static Pattern TAXOMONY_CODE_PATTERN_2 = Pattern
.compile( "([A-Z9][A-Z]{2}[A-Z0-9]{2}|RAT|PIG|PEA|CAP)[^0-9A-Za-z].*" );
+ final private static Pattern TAXOMONY_CODE_PATTERN_3 = Pattern
+ .compile( "_([A-Z9][A-Z]{2}[A-Z0-9]{2}|RAT|PIG|PEA|CAP)_" );
final private static Pattern TAXOMONY_CODE_PATTERN_PF = Pattern
.compile( "([A-Z9][A-Z]{2}[A-Z0-9]{2}|RAT|PIG|PEA|CAP)/\\d+-\\d+" );
final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_1 = Pattern.compile( "\\b\\d{1,7}\\b" );
if ( m1.matches() ) {
return name;
}
+ final Matcher m3 = TAXOMONY_CODE_PATTERN_3.matcher( name );
+ if ( m3.matches() ) {
+ return m3.group( 1 );
+ }
}
return null;
}
.equals( "MOUSE" ) ) {
return false;
}
+ if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "_MOUSE_", TAXONOMY_EXTRACTION.AGRESSIVE )
+ .equals( "MOUSE" ) ) {
+ return false;
+ }
+ if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "_MOUSE_", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
+ .equals( "MOUSE" ) ) {
+ return false;
+ }
+ if ( ParserUtils.extractTaxonomyCodeFromNodeName( "_MOUSE_", TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ) != null ) {
+ return false;
+ }
+ if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "x_MOUSE_x", TAXONOMY_EXTRACTION.AGRESSIVE )
+ .equals( "MOUSE" ) ) {
+ return false;
+ }
}
catch ( final Exception e ) {
e.printStackTrace( System.out );