From ab61239b20e51f02341b658a0f74a8c8acd904ec Mon Sep 17 00:00:00 2001 From: Jim Procter Date: Wed, 29 Sep 2021 15:32:09 +0100 Subject: [PATCH] JAL-3881 split on any nonalphanumeric symbol (plus _) to generate putative sequence identifiers with which to query databases --- src/jalview/ws/DBRefFetcher.java | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/jalview/ws/DBRefFetcher.java b/src/jalview/ws/DBRefFetcher.java index 9420465..67b44d7 100644 --- a/src/jalview/ws/DBRefFetcher.java +++ b/src/jalview/ws/DBRefFetcher.java @@ -31,6 +31,8 @@ import java.util.List; import java.util.Map; import java.util.StringTokenizer; import java.util.Vector; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import jalview.analysis.AlignSeq; import jalview.api.FeatureSettingsModelI; @@ -404,12 +406,14 @@ public class DBRefFetcher implements Runnable } else { + Pattern possibleIds = Pattern.compile("[A-Za-z0-9_]+"); // generate queries from sequence ID string - StringTokenizer st = new StringTokenizer(sequence.getName(), - "|"); - while (st.hasMoreTokens()) + Matcher tokens = possibleIds.matcher(sequence.getName()); + int p=0; + while (tokens.find(p)) { - String token = st.nextToken(); + String token = tokens.group(); + p = tokens.end(); UPEntry[] presp = null; if (picrClient != null) { -- 1.7.10.2