final public static Pattern TAXOMONY_CODE_PATTERN_PFR = Pattern.compile( "(?:\\b|_)[a-zA-Z0-9]{3,}_("
+ TAX_CODE + ")\\b" );
final public static Pattern TAXOMONY_SN_PATTERN = Pattern
- .compile( "(?:\\b|_)[a-zA-Z0-9]{3,}_([A-Z][a-z]+_[a-z]{2,30}(?:_[a-z][a-z0-9_]+)?)\\b" );
+ .compile( "(?:\\b|_)[a-zA-Z0-9]{3,}_([A-Z][a-z]{2,30}_[a-z]{3,30}(?:_[a-z][a-z0-9_]+)?)\\b" );
final public static Pattern TAXOMONY_SN_PATTERN_SN = Pattern
- .compile( "\\b([A-Z][a-z]+[_ ][a-z]{2,30}(?:[_ ][a-z]{2,30})?)(?:\\b|_)" );
+ .compile( "\\b([A-Z][a-z]{2,30}[_ ][a-z]{3,30}(?:[_ ][a-z]{3,30})?)(?:\\b|_)?" );
final public static Pattern TAXOMONY_SN_PATTERN_STRAIN_1 = Pattern
- .compile( "\\b([A-Z][a-z]+[_ ][a-z]{2,30}[_ ](?:str|subsp|var)[a-z]{0,5}\\.?[_ ]\\S{1,60})(?:\\b|_)" );
+ .compile( "\\b([A-Z][a-z]{2,30}[_ ][a-z]{3,30}[_ ](?:str|subsp|var)[a-z]{0,5}\\.?[_ ]\\S{1,60})(?:\\b|_)" );
final public static Pattern TAXOMONY_SN_PATTERN_STRAIN_2 = Pattern
- .compile( "\\b([A-Z][a-z]+[_ ][a-z]{2,30}[_ ]\\((?:str|subsp|var)[a-z]{0,5}\\.?[_ ]\\S{1,60}\\))(?:\\b|_)?" );
+ .compile( "\\b([A-Z][a-z]{2,30}[_ ][a-z]{3,30}[_ ]\\((?:str|subsp|var)[a-z]{0,5}\\.?[_ ]\\S{1,60}\\))(?:\\b|_)?" );
+ final public static Pattern TAXOMONY_SN_PATTERN_SP = Pattern
+ .compile( "\\b([A-Z][a-z]{2,30}[_ ]sp\\.)(?:\\b|_)?" );
+
final public static Pattern TAXOMONY_SN_PATTERN_GENUS = Pattern.compile( "([A-Z][a-z]{2,30})" );
final private static Pattern TAXOMONY_CODE_PATTERN_PFS = Pattern.compile( "(?:\\b|_)[A-Z0-9]{4,}_("
+ TAX_CODE + ")/\\d+-\\d+\\b" );
return m_str2.group( 1 ).replace( '_', ' ' );
}
final Matcher m_sn = TAXOMONY_SN_PATTERN_SN.matcher( name );
+
if ( m_sn.find() ) {
return m_sn.group( 1 ).replace( '_', ' ' );
}
+
+ final Matcher m_sp = TAXOMONY_SN_PATTERN_SP.matcher( name );
+
+ if ( m_sp.find() ) {
+ return m_sp.group( 1 ).replace( '_', ' ' );
+ }
return null;
}
if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_123" ).equals( "Mus musculus" ) ) {
return false;
}
+ if ( !ParserUtils.extractScientificNameFromNodeName( "Pilostyles mexicana Mexico Breedlove 27233" ).equals( "Pilostyles mexicana" ) ) {
+ return false;
+ }
if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia_coli_strain_K12/DH10B" )
.equals( "Escherichia coli strain K12/DH10B" ) ) {
return false;
.equals( "Escherichia coli (str. K12)" ) ) {
return false;
}
+ if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp." )
+ .equals( "Macrocera sp." ) ) {
+
+ return false;
+ }
+ if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp. 123" )
+ .equals( "Macrocera sp." ) ) {
+
+ return false;
+ }
+ if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp. K12" )
+ .equals( "Macrocera sp." ) ) {
+
+
+ return false;
+ }
+ if ( !ParserUtils.extractScientificNameFromNodeName( "something Macrocera sp. K12" )
+ .equals( "Macrocera sp." ) ) {
+
+
+ return false;
+ }
}
catch ( final Exception e ) {
e.printStackTrace( System.out );