X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;ds=inline;f=src%2Fcom%2Fstevesoft%2Fpat%2FSkipBMH.java;h=82575486fc4f298a9a85a517a60449b573124f7f;hb=620d3204a1b556ba249595be908fbc190cf7e1cf;hp=4fbe3818e66075c203f7580c80799504f9507dac;hpb=c40cf903f740a72ab63dd1abc10fa33450ce660d;p=jalview.git
diff --git a/src/com/stevesoft/pat/SkipBMH.java b/src/com/stevesoft/pat/SkipBMH.java
index 4fbe381..8257548 100755
--- a/src/com/stevesoft/pat/SkipBMH.java
+++ b/src/com/stevesoft/pat/SkipBMH.java
@@ -1,183 +1,279 @@
-//
-// This software is now distributed according to
-// the Lesser Gnu Public License. Please see
-// http://www.gnu.org/copyleft/lesser.txt for
-// the details.
-// -- Happy Computing!
-//
-package com.stevesoft.pat;
-
-import com.stevesoft.pat.wrap.StringWrap;
-
-/** Like Skip, but implements a
-
- Boyer-Moore-Horspool type search
- method that has been modified to be more like a "T-search" (see
- the Michael Tamm''s article in C'T, magazin fuer computer und technic, August 97
- p 292). Yet another important source of information for me was
- the
- Deep Magic article on string searching. As of this writing, I can
- beat String's indexOf method in many cases.
- @see com.stevesoft.pat.Skip
- @see com.stevesoft.pat.Skip2
- */
-public class SkipBMH extends Skip {
- // This number could be 256, but I think it's
- // big enough. Note, it must be a power of 2.
- final int MAX_CHAR = 64;
- final char[] skip = new char[MAX_CHAR];
- int sm1;
- int jump_ahead = 0;
- char uc,lc,tc,x;
- final boolean exact(char c) {
- return (ign && anyc(c))||c==x;
- }
- final boolean anyc(char c) {
- return c==uc||c==lc||c==tc;
- }
- public SkipBMH(String pt,boolean ign) { this(pt,ign,0); }
- public SkipBMH(String pt) { this(pt,false,0); }
- public SkipBMH(String pt,boolean ign,int offset) {
- super(pt,ign,offset);
- for(int k=0;k vend) return -1;
- }
- }
- } else {
- for(k=start; k <= vend1;k += skip[s.charAt(k) & (MAX_CHAR-1)] ) {
- // table look-up is expensive, avoid it if possible
- if( x==s.charAt(k) ) {
- //if(src.regionMatches(0,s,k-sm1,sm1))
- if(CaseMgr.regionMatches(src,false,0,s,k-sm1,sm1))
- return k-sm1-offset;
- k += jump_ahead;
- }
- }
- for(; k <= vend;k += skip[s.charAt(k) & (MAX_CHAR-1)] ) {
- // table look-up is expensive, avoid it if possible
- if( x==s.charAt(k) ) {
- //if(src.regionMatches(0,s,k-sm1,sm1))
- if(CaseMgr.regionMatches(src,false,0,s,k-sm1,sm1))
- return k-sm1-offset;
- k += jump_ahead;
- if(k > vend) return -1;
- }
- }
- }
-
- return -1;
- }
- public int find(StringLike s,int start,int end) {
- if(s instanceof StringWrap)
- return find(s.toString(),start,end);
- start += offset+sm1;
- int vend = min(s.length()-1,end+sm1+offset),k;
- int vend1 = vend-jump_ahead;
- if(ign) {
- for(k=start; k <= vend1;k += skip[s.charAt(k) & (MAX_CHAR-1)] ) {
- // table look-up is expensive, avoid it if possible
- if( anyc(s.charAt(k)) ) {
- if(CaseMgr.regionMatches(src,ign,0,s,k-sm1,sm1))
- return k-sm1-offset;
- k += jump_ahead;
- }
- }
- for(; k <= vend;k += skip[s.charAt(k) & (MAX_CHAR-1)] ) {
- // table look-up is expensive, avoid it if possible
- if( anyc(s.charAt(k)) ) {
- if(CaseMgr.regionMatches(src,ign,0,s,k-sm1,sm1))
- return k-sm1-offset;
- k += jump_ahead;
- if(k > vend) return -1;
- }
- }
- } else {
- for(k=start; k <= vend1;k += skip[s.charAt(k) & (MAX_CHAR-1)] ) {
- // table look-up is expensive, avoid it if possible
- if( x==s.charAt(k) ) {
- //if(src.regionMatches(0,s,k-sm1,sm1))
- if(CaseMgr.regionMatches(src,false,0,s,k-sm1,sm1))
- return k-sm1-offset;
- k += jump_ahead;
- }
- }
- for(; k <= vend;k += skip[s.charAt(k) & (MAX_CHAR-1)] ) {
- // table look-up is expensive, avoid it if possible
- if( x==s.charAt(k) ) {
- //if(src.regionMatches(0,s,k-sm1,sm1))
- if(CaseMgr.regionMatches(src,false,0,s,k-sm1,sm1))
- return k-sm1-offset;
- k += jump_ahead;
- if(k > vend) return -1;
- }
- }
- }
-
- return -1;
- }
-}
+//
+// This software is now distributed according to
+// the Lesser Gnu Public License. Please see
+// http://www.gnu.org/copyleft/lesser.txt for
+// the details.
+// -- Happy Computing!
+//
+package com.stevesoft.pat;
+
+import com.stevesoft.pat.wrap.StringWrap;
+
+/**
+ * Like Skip, but implements a
+ * Boyer-Moore-Horspool type search method that has been modified to be more
+ * like a "T-search" (see the Michael Tamm''s article in C'T, magazin fuer
+ * computer und technic, August 97 p 292). Yet another important source of
+ * information for me was the Deep Magic
+ * article on string searching. As of this writing, I can beat String's indexOf
+ * method in many cases.
+ *
+ * @see com.stevesoft.pat.Skip
+ * @see com.stevesoft.pat.Skip2
+ */
+public class SkipBMH extends Skip
+{
+ // This number could be 256, but I think it's
+ // big enough. Note, it must be a power of 2.
+ final int MAX_CHAR = 64;
+
+ final char[] skip = new char[MAX_CHAR];
+
+ int sm1;
+
+ int jump_ahead = 0;
+
+ char uc, lc, tc, x;
+
+ final boolean exact(char c)
+ {
+ return (ign && anyc(c)) || c == x;
+ }
+
+ final boolean anyc(char c)
+ {
+ return c == uc || c == lc || c == tc;
+ }
+
+ public SkipBMH(String pt, boolean ign)
+ {
+ this(pt, ign, 0);
+ }
+
+ public SkipBMH(String pt)
+ {
+ this(pt, false, 0);
+ }
+
+ public SkipBMH(String pt, boolean ign, int offset)
+ {
+ super(pt, ign, offset);
+ for (int k = 0; k < MAX_CHAR; k++)
+ {
+ skip[k] = (char) src.length();
+ }
+
+ sm1 = src.length() - 1;
+ x = src.charAt(sm1);
+ uc = CaseMgr.toUpperCase(x);
+ lc = CaseMgr.toLowerCase(x);
+ tc = CaseMgr.toTitleCase(x);
+
+ // We don't really want 65536 long arrays in skip[],
+ // so we mask of the higher bits. This can be combined
+ // with ignore case, so accounting for upper
+ // case costs us nothing extra.
+ for (int k = 0; k < src.length() - 1; k++)
+ {
+ char x_ = src.charAt(k);
+ if (ign)
+ {
+ char uc_ = CaseMgr.toUpperCase(x_);
+ char lc_ = CaseMgr.toLowerCase(x_);
+ char tc_ = CaseMgr.toTitleCase(x_);
+ skip[uc_ & (MAX_CHAR - 1)] = (char) (src.length() - k - 1);
+ skip[lc_ & (MAX_CHAR - 1)] = (char) (src.length() - k - 1);
+ skip[tc_ & (MAX_CHAR - 1)] = (char) (src.length() - k - 1);
+ }
+ else
+ {
+ skip[x_ & (MAX_CHAR - 1)] = (char) (src.length() - k - 1);
+ }
+ }
+
+ // This trick can be found in the July issue of
+ // C-T magazine. This makes the method a type of
+ // "T-search."
+ jump_ahead = src.length() - 1;
+ for (int k = 0; k < src.length() - 1; k++)
+ {
+ char y = src.charAt(sm1 - k - 1);
+ if (exact(y))
+ {
+ jump_ahead = k;
+ break;
+ }
+ }
+ }
+
+ /**
+ * Set to true if you only want to compare two of the characters in the
+ * String.
+ */
+ final public int searchRegion(String s, int start, int end)
+ {
+ return find(s, start, end);
+ }
+
+ final public int searchFrom(String s, int start)
+ {
+ return find(s, start, s.length());
+ }
+
+ final public int search(String s)
+ {
+ return find(s, 0, s.length());
+ }
+
+ public int find(String s, int start, int end)
+ {
+ start += offset + sm1;
+ int vend = min(s.length() - 1, end + sm1 + offset), k;
+ int vend1 = vend - jump_ahead;
+ if (ign)
+ {
+ for (k = start; k <= vend1; k += skip[s.charAt(k) & (MAX_CHAR - 1)])
+ {
+ // table look-up is expensive, avoid it if possible
+ if (anyc(s.charAt(k)))
+ {
+ if (CaseMgr.regionMatches(src, ign, 0, s, k - sm1, sm1))
+ {
+ return k - sm1 - offset;
+ }
+ k += jump_ahead;
+ }
+ }
+ for (; k <= vend; k += skip[s.charAt(k) & (MAX_CHAR - 1)])
+ {
+ // table look-up is expensive, avoid it if possible
+ if (anyc(s.charAt(k)))
+ {
+ if (CaseMgr.regionMatches(src, ign, 0, s, k - sm1, sm1))
+ {
+ return k - sm1 - offset;
+ }
+ k += jump_ahead;
+ if (k > vend)
+ {
+ return -1;
+ }
+ }
+ }
+ }
+ else
+ {
+ for (k = start; k <= vend1; k += skip[s.charAt(k) & (MAX_CHAR - 1)])
+ {
+ // table look-up is expensive, avoid it if possible
+ if (x == s.charAt(k))
+ {
+ // if(src.regionMatches(0,s,k-sm1,sm1))
+ if (CaseMgr.regionMatches(src, false, 0, s, k - sm1, sm1))
+ {
+ return k - sm1 - offset;
+ }
+ k += jump_ahead;
+ }
+ }
+ for (; k <= vend; k += skip[s.charAt(k) & (MAX_CHAR - 1)])
+ {
+ // table look-up is expensive, avoid it if possible
+ if (x == s.charAt(k))
+ {
+ // if(src.regionMatches(0,s,k-sm1,sm1))
+ if (CaseMgr.regionMatches(src, false, 0, s, k - sm1, sm1))
+ {
+ return k - sm1 - offset;
+ }
+ k += jump_ahead;
+ if (k > vend)
+ {
+ return -1;
+ }
+ }
+ }
+ }
+
+ return -1;
+ }
+
+ public int find(StringLike s, int start, int end)
+ {
+ if (s instanceof StringWrap)
+ {
+ return find(s.toString(), start, end);
+ }
+ start += offset + sm1;
+ int vend = min(s.length() - 1, end + sm1 + offset), k;
+ int vend1 = vend - jump_ahead;
+ if (ign)
+ {
+ for (k = start; k <= vend1; k += skip[s.charAt(k) & (MAX_CHAR - 1)])
+ {
+ // table look-up is expensive, avoid it if possible
+ if (anyc(s.charAt(k)))
+ {
+ if (CaseMgr.regionMatches(src, ign, 0, s, k - sm1, sm1))
+ {
+ return k - sm1 - offset;
+ }
+ k += jump_ahead;
+ }
+ }
+ for (; k <= vend; k += skip[s.charAt(k) & (MAX_CHAR - 1)])
+ {
+ // table look-up is expensive, avoid it if possible
+ if (anyc(s.charAt(k)))
+ {
+ if (CaseMgr.regionMatches(src, ign, 0, s, k - sm1, sm1))
+ {
+ return k - sm1 - offset;
+ }
+ k += jump_ahead;
+ if (k > vend)
+ {
+ return -1;
+ }
+ }
+ }
+ }
+ else
+ {
+ for (k = start; k <= vend1; k += skip[s.charAt(k) & (MAX_CHAR - 1)])
+ {
+ // table look-up is expensive, avoid it if possible
+ if (x == s.charAt(k))
+ {
+ // if(src.regionMatches(0,s,k-sm1,sm1))
+ if (CaseMgr.regionMatches(src, false, 0, s, k - sm1, sm1))
+ {
+ return k - sm1 - offset;
+ }
+ k += jump_ahead;
+ }
+ }
+ for (; k <= vend; k += skip[s.charAt(k) & (MAX_CHAR - 1)])
+ {
+ // table look-up is expensive, avoid it if possible
+ if (x == s.charAt(k))
+ {
+ // if(src.regionMatches(0,s,k-sm1,sm1))
+ if (CaseMgr.regionMatches(src, false, 0, s, k - sm1, sm1))
+ {
+ return k - sm1 - offset;
+ }
+ k += jump_ahead;
+ if (k > vend)
+ {
+ return -1;
+ }
+ }
+ }
+ }
+
+ return -1;
+ }
+}