From 8503d69aa0d56c0eec95d3c89bc90ada7272997b Mon Sep 17 00:00:00 2001 From: Jim Procter Date: Thu, 12 Nov 2015 17:58:42 +0000 Subject: [PATCH] JAL-1965 case insensitive & word matching SequenceIdMatcher .. nearly passes tests Signed-off-by: Jim Procter --- src/jalview/analysis/SequenceIdMatcher.java | 52 ++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 9 deletions(-) diff --git a/src/jalview/analysis/SequenceIdMatcher.java b/src/jalview/analysis/SequenceIdMatcher.java index 454bc09..8dda255 100755 --- a/src/jalview/analysis/SequenceIdMatcher.java +++ b/src/jalview/analysis/SequenceIdMatcher.java @@ -280,7 +280,7 @@ public class SequenceIdMatcher { if (s != null) { - id = new String(s); + id = new String(s).toLowerCase(); } else { @@ -341,27 +341,61 @@ public class SequenceIdMatcher // match contains one ID flanked if (id.length() > s.id.length()) { - return id.startsWith(s.id) ? (WORD_SEP.indexOf(id.charAt(s.id - .length())) > -1) : false; + return check_wordmatch(id, s.id); } else { - return s.id.startsWith(id) ? (s.id.equals(id) ? true : (WORD_SEP - .indexOf(s.id.charAt(id.length())) > -1)) : false; + return check_wordmatch(s.id, id); + } + } + + private boolean check_wordmatch(String longer, String shorter) + { + boolean elen = longer.length() == shorter.length(); + int sp = longer.indexOf(shorter); + if (sp == -1) + { + return false; + } + + if (sp == 0) + { + // end of match is word boundary + return elen ? true : (WORD_SEP.indexOf(longer.charAt(shorter + .length() + sp)) > -1); + } + if (WORD_SEP.indexOf(longer.charAt(sp - 1)) > -1) + { + if (sp + shorter.length() == longer.length()) + { + return true; + } + else + { + // end of match is word boundary + return elen ? false + : sp + shorter.length() == longer.length() ? true + : (WORD_SEP.indexOf(longer.charAt(shorter + .length() + sp)) > -1); + } + } + else + { + // prefix of match is not a word boundary + return false; } } public boolean equals(String s) { + s = s.toLowerCase(); // TODO: employ faster to lower case operation if (id.length() > s.length()) { - return id.startsWith(s) ? (WORD_SEP.indexOf(id.charAt(s.length())) > -1) - : false; + return check_wordmatch(id, s); } else { - return s.startsWith(id) ? (s.equals(id) ? true : (WORD_SEP - .indexOf(s.charAt(id.length())) > -1)) : false; + return check_wordmatch(s, id); } } } -- 1.7.10.2