public final static Pattern PharmGKB_PATTERN = Pattern.compile( "PharmGKB;\\s+([0-9A-Z]+);" );
public final static Pattern Reactome_PATTERN = Pattern.compile( "Reactome;\\s+([0-9A-Z]+);\\s+([^\\.]+)" );
public final static Pattern HGNC_PATTERN = Pattern.compile( "HGNC;\\s+HGNC:(\\d+);" );
+ public final static Pattern NCBI_TAXID_PATTERN= Pattern.compile( "NCBI_TaxID=(\\d+)" );
+
private String _ac;
private SortedSet<Accession> _cross_references;
private String _gene_name;
}
else if ( line.startsWith( "DE" ) && ForesterUtil.isEmpty( e.getSequenceName() ) ) {
if ( ( line.indexOf( "RecName:" ) > 0 ) && ( line.indexOf( "Full=" ) > 0 ) ) {
- e.setSequenceName( SequenceDbWsTools.extractFromTo( line, "Full=", ";" ) );
+ if ( line.indexOf( "{" ) > 0 ) {
+ e.setSequenceName( SequenceDbWsTools.extractFromTo( line, "Full=", "{" ) );
+ }
+ else {
+ e.setSequenceName( SequenceDbWsTools.extractFromTo( line, "Full=", ";" ) );
+ }
}
else if ( ( line.indexOf( "SubName:" ) > 0 ) && ( line.indexOf( "Full=" ) > 0 ) ) {
- e.setSequenceName( SequenceDbWsTools.extractFromTo( line, "Full=", ";" ) );
+ if ( line.indexOf( "{" ) > 0 ) {
+ e.setSequenceName( SequenceDbWsTools.extractFromTo( line, "Full=", "{" ) );
+ }
+ else {
+ e.setSequenceName( SequenceDbWsTools.extractFromTo( line, "Full=", ";" ) );
+ }
+
}
}
else if ( line.startsWith( "DE" ) && ForesterUtil.isEmpty( e.getSequenceSymbol() ) ) {
+
if ( line.indexOf( "Short=" ) > 0 ) {
- e.setSequenceSymbol( SequenceDbWsTools.extractFromTo( line, "Short=", ";" ) );
+ if ( line.indexOf( "{" ) > 0 ) {
+ e.setSequenceSymbol( SequenceDbWsTools.extractFromTo( line, "Short=", "{" ) );
+ }
+ else {
+ e.setSequenceSymbol( SequenceDbWsTools.extractFromTo( line, "Short=", ";" ) );
+ }
+
}
}
else if ( line.startsWith( "GN" ) && ForesterUtil.isEmpty( e.getGeneName() ) ) {
if ( line.indexOf( "Name=" ) > 0 ) {
- e.setGeneName( SequenceDbWsTools.extractFromTo( line, "Name=", ";" ) );
+ if ( line.indexOf( "{" ) > 0 ) {
+ e.setGeneName( SequenceDbWsTools.extractFromTo( line, "Name=", "{" ) );
+ }
+ else {
+ e.setGeneName( SequenceDbWsTools.extractFromTo( line, "Name=", ";" ) );
+ }
}
}
else if ( line.startsWith( "DR" ) ) {
}
else if ( line.startsWith( "OX" ) ) {
if ( line.indexOf( "NCBI_TaxID=" ) > 0 ) {
- e.setTaxId( SequenceDbWsTools.extractFromTo( line, "NCBI_TaxID=", ";" ) );
+ final Matcher m = NCBI_TAXID_PATTERN.matcher( line );
+ if ( m.find() ) {
+ e.setTaxId( m.group( 1 ) );
+ }
}
}
else if ( line.startsWith( "SQ" ) ) {