+ TAX_CODE + ")\\b" );
final public static Pattern TAXOMONY_SN_PATTERN = Pattern
.compile( "(?:\\b|_)[a-zA-Z0-9]{3,}_([A-Z][a-z]+_[a-z]{2,}(?:_[a-z][a-z0-9_]+)?)\\b" );
+ final public static Pattern TAXOMONY_SN_PATTERN_SN = Pattern
+ .compile( "\\b([A-Z][a-z]+_[a-z]{2,}(?:_[a-z][a-z0-9_]+)?)(?:\\b|_)" );
final private static Pattern TAXOMONY_CODE_PATTERN_PFS = Pattern.compile( "(?:\\b|_)[A-Z0-9]{4,}_("
+ TAX_CODE + ")/\\d+-\\d+\\b" );
- final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_A = Pattern.compile( "(?:\\b|_)(\\d{1,7})\\b" );
+ // final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_A = Pattern.compile( "(?:\\b|(?:[A-Z]_))(\\d{1,7})\\b" );
final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_PFR = Pattern
- .compile( "(?:\\b|_)[a-zA-Z0-9]{3,}_(\\d{1,7})\\b" );
+ .compile( "(?:\\b|_)[A-Z0-9]{1,}_(\\d{1,7})\\b" );
final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_PFS = Pattern
.compile( "(?:\\b|_)[A-Z0-9]{4,}_(\\d{1,7})/\\d+-\\d+\\b" );
if ( m.find() ) {
return m.group( 1 ).replace( '_', ' ' );
}
+ final Matcher m_sn = TAXOMONY_SN_PATTERN_SN.matcher( name );
+ if ( m_sn.find() ) {
+ return m_sn.group( 1 ).replace( '_', ' ' );
+ }
return null;
}
if ( m.find() ) {
return m.group( 1 );
}
- else if ( taxonomy_extraction == TAXONOMY_EXTRACTION.AGGRESSIVE ) {
- m = TAXOMONY_UNIPROT_ID_PATTERN_A.matcher( name );
- if ( m.find() ) {
- return m.group( 1 );
- }
- }
+ //else if ( taxonomy_extraction == TAXONOMY_EXTRACTION.AGGRESSIVE ) {
+ // m = TAXOMONY_UNIPROT_ID_PATTERN_A.matcher( name );
+ // if ( m.find() ) {
+ // return m.group( 1 );
+ // }
+ //}
}
return null;
}
.equals( "Mus musculus" ) ) {
return false;
}
+ if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus" ).equals( "Mus musculus" ) ) {
+ return false;
+ }
+ if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_musculus" )
+ .equals( "Mus musculus musculus" ) ) {
+ return false;
+ }
+ if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_123" ).equals( "Mus musculus" ) ) {
+ return false;
+ }
}
catch ( final Exception e ) {
e.printStackTrace( System.out );
return false;
}
final PhylogenyNode n13 = PhylogenyNode
- .createInstanceFromNhxString( "blah_12345/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
- if ( !n13.getName().equals( "blah_12345/1-2" ) ) {
+ .createInstanceFromNhxString( "BLAH_12345/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+ if ( !n13.getName().equals( "BLAH_12345/1-2" ) ) {
return false;
}
if ( PhylogenyMethods.getSpecies( n13 ).equals( "12345" ) ) {
return false;
}
final PhylogenyNode n19 = PhylogenyNode
- .createInstanceFromNhxString( "blah_1-roejojoej", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+ .createInstanceFromNhxString( "BLAH_1-roejojoej", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
if ( !n19.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "1" ) ) {
return false;
}
return false;
}
final PhylogenyNode n30 = PhylogenyNode
- .createInstanceFromNhxString( "blah_1234567-roejojoej",
+ .createInstanceFromNhxString( "BLAH_1234567-roejojoej",
NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
if ( !n30.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "1234567" ) ) {
return false;
return false;
}
final PhylogenyNode n31 = PhylogenyNode
- .createInstanceFromNhxString( "blah_12345678-roejojoej",
+ .createInstanceFromNhxString( "BLAH_12345678-roejojoej",
NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
if ( n31.getNodeData().isHasTaxonomy() ) {
return false;
return false;
}
final PhylogenyNode n40 = PhylogenyNode
- .createInstanceFromNhxString( "bcl2_12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+ .createInstanceFromNhxString( "BCL2_12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
if ( !n40.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) {
return false;
}
return false;
}
final PhylogenyNode n3 = PhylogenyNode
- .createInstanceFromNhxString( "blag_12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+ .createInstanceFromNhxString( "BLAG_12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
if ( !n3.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) {
System.out.println( n3.toString() );
return false;
return false;
}
final PhylogenyNode n6 = PhylogenyNode
- .createInstanceFromNhxString( "blag-12345-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+ .createInstanceFromNhxString( "BLAG-12345-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
if ( n6.getNodeData().isHasTaxonomy() ) {
System.out.println( n6.toString() );
return false;
}
final PhylogenyNode n7 = PhylogenyNode
- .createInstanceFromNhxString( "blag-12345_blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+ .createInstanceFromNhxString( "BLAG-12345_blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
if ( n7.getNodeData().isHasTaxonomy() ) {
System.out.println( n7.toString() );
return false;
}
final PhylogenyNode n8 = PhylogenyNode
- .createInstanceFromNhxString( "blag_12345-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+ .createInstanceFromNhxString( "BLAG_12345-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
if ( !n8.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) {
System.out.println( n8.toString() );
return false;
}
final PhylogenyNode n9 = PhylogenyNode
- .createInstanceFromNhxString( "blag_12345/blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+ .createInstanceFromNhxString( "BLAG_12345/blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
if ( !n9.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) {
System.out.println( n9.toString() );
return false;
}
final PhylogenyNode n10x = PhylogenyNode
- .createInstanceFromNhxString( "blag_12X45-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+ .createInstanceFromNhxString( "BLAG_12X45-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
if ( n10x.getNodeData().isHasTaxonomy() ) {
System.out.println( n10x.toString() );
return false;
}
final PhylogenyNode n10xx = PhylogenyNode
- .createInstanceFromNhxString( "blag_1YX45-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+ .createInstanceFromNhxString( "BLAG_1YX45-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
if ( n10xx.getNodeData().isHasTaxonomy() ) {
System.out.println( n10xx.toString() );
return false;
}
final PhylogenyNode n10 = PhylogenyNode
- .createInstanceFromNhxString( "blag_9YX45-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+ .createInstanceFromNhxString( "BLAG_9YX45-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
if ( !n10.getNodeData().getTaxonomy().getTaxonomyCode().equals( "9YX45" ) ) {
System.out.println( n10.toString() );
return false;