From 5384c741e4f4cb66468f2fbbf6e2e0e88e9e4626 Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Wed, 14 Jan 2015 01:40:46 +0000 Subject: [PATCH] in progress --- forester/java/src/org/forester/ws/seqdb/UniProtEntry.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/forester/java/src/org/forester/ws/seqdb/UniProtEntry.java b/forester/java/src/org/forester/ws/seqdb/UniProtEntry.java index 57652e5..3f0df32 100644 --- a/forester/java/src/org/forester/ws/seqdb/UniProtEntry.java +++ b/forester/java/src/org/forester/ws/seqdb/UniProtEntry.java @@ -54,6 +54,8 @@ public final class UniProtEntry implements SequenceDatabaseEntry { public final static Pattern PharmGKB_PATTERN = Pattern.compile( "PharmGKB;\\s+([0-9A-Z]+);" ); public final static Pattern Reactome_PATTERN = Pattern.compile( "Reactome;\\s+([0-9A-Z]+);\\s+([^\\.]+)" ); public final static Pattern HGNC_PATTERN = Pattern.compile( "HGNC;\\s+HGNC:(\\d+);" ); + public final static Pattern NCBI_TAXID_PATTERN= Pattern.compile( "NCBI_TaxID=(\\d+)" ); + private String _ac; private SortedSet _cross_references; private String _gene_name; @@ -323,7 +325,10 @@ public final class UniProtEntry implements SequenceDatabaseEntry { } else if ( line.startsWith( "OX" ) ) { if ( line.indexOf( "NCBI_TaxID=" ) > 0 ) { - e.setTaxId( SequenceDbWsTools.extractFromTo( line, "NCBI_TaxID=", ";" ) ); + final Matcher m = NCBI_TAXID_PATTERN.matcher( line ); + if ( m.find() ) { + e.setTaxId( m.group( 1 ) ); + } } } else if ( line.startsWith( "SQ" ) ) { -- 1.7.10.2