//\r
package com.stevesoft.pat;\r
\r
-import com.stevesoft.pat.wrap.StringWrap;\r
+import com.stevesoft.pat.wrap.*;\r
\r
/** Like Skip, but implements a\r
- <a href="http://www.dcc.uchile.cl/~rbaeza/handbook/algs/7/713b.srch.p.html">\r
+ <a href="http://www.dcc.uchile.cl/~rbaeza/handbook/algs/7/713b.srch.p.html">\r
Boyer-Moore-Horspool</a> type search\r
method that has been modified to be more like a "T-search" (see\r
the Michael Tamm''s article in <i>C'T, magazin fuer computer und technic</i>, August 97\r
beat String's indexOf method in many cases.\r
@see com.stevesoft.pat.Skip\r
@see com.stevesoft.pat.Skip2\r
- */\r
-public class SkipBMH extends Skip {\r
- // This number could be 256, but I think it's\r
- // big enough. Note, it must be a power of 2.\r
- final int MAX_CHAR = 64;\r
- final char[] skip = new char[MAX_CHAR];\r
- int sm1;\r
- int jump_ahead = 0;\r
- char uc,lc,tc,x;\r
- final boolean exact(char c) {\r
- return (ign && anyc(c))||c==x;\r
+ */\r
+public class SkipBMH\r
+ extends Skip\r
+{\r
+ // This number could be 256, but I think it's\r
+ // big enough. Note, it must be a power of 2.\r
+ final int MAX_CHAR = 64;\r
+ final char[] skip = new char[MAX_CHAR];\r
+ int sm1;\r
+ int jump_ahead = 0;\r
+ char uc, lc, tc, x;\r
+ final boolean exact(char c)\r
+ {\r
+ return (ign && anyc(c)) || c == x;\r
+ }\r
+\r
+ final boolean anyc(char c)\r
+ {\r
+ return c == uc || c == lc || c == tc;\r
+ }\r
+\r
+ public SkipBMH(String pt, boolean ign)\r
+ {\r
+ this(pt, ign, 0);\r
+ }\r
+\r
+ public SkipBMH(String pt)\r
+ {\r
+ this(pt, false, 0);\r
+ }\r
+\r
+ public SkipBMH(String pt, boolean ign, int offset)\r
+ {\r
+ super(pt, ign, offset);\r
+ for (int k = 0; k < MAX_CHAR; k++)\r
+ {\r
+ skip[k] = (char) src.length();\r
}\r
- final boolean anyc(char c) {\r
- return c==uc||c==lc||c==tc;\r
+\r
+ sm1 = src.length() - 1;\r
+ x = src.charAt(sm1);\r
+ uc = CaseMgr.toUpperCase(x);\r
+ lc = CaseMgr.toLowerCase(x);\r
+ tc = CaseMgr.toTitleCase(x);\r
+\r
+ // We don't really want 65536 long arrays in skip[],\r
+ // so we mask of the higher bits. This can be combined\r
+ // with ignore case, so accounting for upper\r
+ // case costs us nothing extra.\r
+ for (int k = 0; k < src.length() - 1; k++)\r
+ {\r
+ char x_ = src.charAt(k);\r
+ if (ign)\r
+ {\r
+ char uc_ = CaseMgr.toUpperCase(x_);\r
+ char lc_ = CaseMgr.toLowerCase(x_);\r
+ char tc_ = CaseMgr.toTitleCase(x_);\r
+ skip[uc_ & (MAX_CHAR - 1)] = (char) (src.length() - k - 1);\r
+ skip[lc_ & (MAX_CHAR - 1)] = (char) (src.length() - k - 1);\r
+ skip[tc_ & (MAX_CHAR - 1)] = (char) (src.length() - k - 1);\r
+ }\r
+ else\r
+ {\r
+ skip[x_ & (MAX_CHAR - 1)] = (char) (src.length() - k - 1);\r
+ }\r
}\r
- public SkipBMH(String pt,boolean ign) { this(pt,ign,0); }\r
- public SkipBMH(String pt) { this(pt,false,0); }\r
- public SkipBMH(String pt,boolean ign,int offset) {\r
- super(pt,ign,offset);\r
- for(int k=0;k<MAX_CHAR;k++)\r
- skip[k] = (char)src.length();\r
-\r
- sm1 = src.length()-1;\r
- x = src.charAt(sm1);\r
- uc=CaseMgr.toUpperCase(x);\r
- lc=CaseMgr.toLowerCase(x);\r
- tc=CaseMgr.toTitleCase(x);\r
-\r
- // We don't really want 65536 long arrays in skip[],\r
- // so we mask of the higher bits. This can be combined\r
- // with ignore case, so accounting for upper\r
- // case costs us nothing extra.\r
- for(int k=0;k<src.length()-1;k++) {\r
- char x_ = src.charAt(k);\r
- if(ign) {\r
- char uc_ = CaseMgr.toUpperCase(x_);\r
- char lc_ = CaseMgr.toLowerCase(x_);\r
- char tc_ = CaseMgr.toTitleCase(x_);\r
- skip[uc_ & (MAX_CHAR-1)]=(char)(src.length()-k-1);\r
- skip[lc_ & (MAX_CHAR-1)]=(char)(src.length()-k-1);\r
- skip[tc_ & (MAX_CHAR-1)]=(char)(src.length()-k-1);\r
- } else\r
- skip[x_ & (MAX_CHAR-1)] = (char)(src.length()-k-1);\r
- }\r
\r
- // This trick can be found in the July issue of\r
- // C-T magazine. This makes the method a type of\r
- // "T-search."\r
- jump_ahead = src.length()-1;\r
- for(int k=0;k<src.length()-1;k++) {\r
- char y=src.charAt(sm1-k-1);\r
- if(exact(y)) {\r
- jump_ahead = k;\r
- break;\r
- }\r
+ // This trick can be found in the July issue of\r
+ // C-T magazine. This makes the method a type of\r
+ // "T-search."\r
+ jump_ahead = src.length() - 1;\r
+ for (int k = 0; k < src.length() - 1; k++)\r
+ {\r
+ char y = src.charAt(sm1 - k - 1);\r
+ if (exact(y))\r
+ {\r
+ jump_ahead = k;\r
+ break;\r
+ }\r
+ }\r
+ }\r
+\r
+ /** Set to true if you only want to compare two of the\r
+ characters in the String. */\r
+ final public int searchRegion(String s, int start, int end)\r
+ {\r
+ return find(s, start, end);\r
+ }\r
+\r
+ final public int searchFrom(String s, int start)\r
+ {\r
+ return find(s, start, s.length());\r
+ }\r
+\r
+ final public int search(String s)\r
+ {\r
+ return find(s, 0, s.length());\r
+ }\r
+\r
+ public int find(String s, int start, int end)\r
+ {\r
+ start += offset + sm1;\r
+ int vend = min(s.length() - 1, end + sm1 + offset), k;\r
+ int vend1 = vend - jump_ahead;\r
+ if (ign)\r
+ {\r
+ for (k = start; k <= vend1; k += skip[s.charAt(k) & (MAX_CHAR - 1)])\r
+ {\r
+ // table look-up is expensive, avoid it if possible\r
+ if (anyc(s.charAt(k)))\r
+ {\r
+ if (CaseMgr.regionMatches(src, ign, 0, s, k - sm1, sm1))\r
+ {\r
+ return k - sm1 - offset;\r
+ }\r
+ k += jump_ahead;\r
+ }\r
+ }\r
+ for (; k <= vend; k += skip[s.charAt(k) & (MAX_CHAR - 1)])\r
+ {\r
+ // table look-up is expensive, avoid it if possible\r
+ if (anyc(s.charAt(k)))\r
+ {\r
+ if (CaseMgr.regionMatches(src, ign, 0, s, k - sm1, sm1))\r
+ {\r
+ return k - sm1 - offset;\r
+ }\r
+ k += jump_ahead;\r
+ if (k > vend)\r
+ {\r
+ return -1;\r
+ }\r
}\r
+ }\r
}\r
- /** Set to true if you only want to compare two of the\r
- characters in the String. */\r
- final public int searchRegion(String s,int start,int end) {\r
- return find(s,start,end);\r
+ else\r
+ {\r
+ for (k = start; k <= vend1; k += skip[s.charAt(k) & (MAX_CHAR - 1)])\r
+ {\r
+ // table look-up is expensive, avoid it if possible\r
+ if (x == s.charAt(k))\r
+ {\r
+ //if(src.regionMatches(0,s,k-sm1,sm1))\r
+ if (CaseMgr.regionMatches(src, false, 0, s, k - sm1, sm1))\r
+ {\r
+ return k - sm1 - offset;\r
+ }\r
+ k += jump_ahead;\r
+ }\r
+ }\r
+ for (; k <= vend; k += skip[s.charAt(k) & (MAX_CHAR - 1)])\r
+ {\r
+ // table look-up is expensive, avoid it if possible\r
+ if (x == s.charAt(k))\r
+ {\r
+ //if(src.regionMatches(0,s,k-sm1,sm1))\r
+ if (CaseMgr.regionMatches(src, false, 0, s, k - sm1, sm1))\r
+ {\r
+ return k - sm1 - offset;\r
+ }\r
+ k += jump_ahead;\r
+ if (k > vend)\r
+ {\r
+ return -1;\r
+ }\r
+ }\r
+ }\r
}\r
- final public int searchFrom(String s,int start) {\r
- return find(s,start,s.length());\r
+\r
+ return -1;\r
+ }\r
+\r
+ public int find(StringLike s, int start, int end)\r
+ {\r
+ if (s instanceof StringWrap)\r
+ {\r
+ return find(s.toString(), start, end);\r
}\r
- final public int search(String s) { return find(s,0,s.length()); }\r
- public int find(String s,int start,int end) {\r
- start += offset+sm1;\r
- int vend = min(s.length()-1,end+sm1+offset),k;\r
- int vend1 = vend-jump_ahead;\r
- if(ign) {\r
- for(k=start; k <= vend1;k += skip[s.charAt(k) & (MAX_CHAR-1)] ) {\r
- // table look-up is expensive, avoid it if possible\r
- if( anyc(s.charAt(k)) ) {\r
- if(CaseMgr.regionMatches(src,ign,0,s,k-sm1,sm1))\r
- return k-sm1-offset;\r
- k += jump_ahead;\r
- }\r
- }\r
- for(; k <= vend;k += skip[s.charAt(k) & (MAX_CHAR-1)] ) {\r
- // table look-up is expensive, avoid it if possible\r
- if( anyc(s.charAt(k)) ) {\r
- if(CaseMgr.regionMatches(src,ign,0,s,k-sm1,sm1))\r
- return k-sm1-offset;\r
- k += jump_ahead;\r
- if(k > vend) return -1;\r
- }\r
- }\r
- } else {\r
- for(k=start; k <= vend1;k += skip[s.charAt(k) & (MAX_CHAR-1)] ) {\r
- // table look-up is expensive, avoid it if possible\r
- if( x==s.charAt(k) ) {\r
- //if(src.regionMatches(0,s,k-sm1,sm1))\r
- if(CaseMgr.regionMatches(src,false,0,s,k-sm1,sm1))\r
- return k-sm1-offset;\r
- k += jump_ahead;\r
- }\r
- }\r
- for(; k <= vend;k += skip[s.charAt(k) & (MAX_CHAR-1)] ) {\r
- // table look-up is expensive, avoid it if possible\r
- if( x==s.charAt(k) ) {\r
- //if(src.regionMatches(0,s,k-sm1,sm1))\r
- if(CaseMgr.regionMatches(src,false,0,s,k-sm1,sm1))\r
- return k-sm1-offset;\r
- k += jump_ahead;\r
- if(k > vend) return -1;\r
- }\r
- }\r
+ start += offset + sm1;\r
+ int vend = min(s.length() - 1, end + sm1 + offset), k;\r
+ int vend1 = vend - jump_ahead;\r
+ if (ign)\r
+ {\r
+ for (k = start; k <= vend1; k += skip[s.charAt(k) & (MAX_CHAR - 1)])\r
+ {\r
+ // table look-up is expensive, avoid it if possible\r
+ if (anyc(s.charAt(k)))\r
+ {\r
+ if (CaseMgr.regionMatches(src, ign, 0, s, k - sm1, sm1))\r
+ {\r
+ return k - sm1 - offset;\r
+ }\r
+ k += jump_ahead;\r
}\r
-\r
- return -1;\r
+ }\r
+ for (; k <= vend; k += skip[s.charAt(k) & (MAX_CHAR - 1)])\r
+ {\r
+ // table look-up is expensive, avoid it if possible\r
+ if (anyc(s.charAt(k)))\r
+ {\r
+ if (CaseMgr.regionMatches(src, ign, 0, s, k - sm1, sm1))\r
+ {\r
+ return k - sm1 - offset;\r
+ }\r
+ k += jump_ahead;\r
+ if (k > vend)\r
+ {\r
+ return -1;\r
+ }\r
+ }\r
+ }\r
}\r
- public int find(StringLike s,int start,int end) {\r
- if(s instanceof StringWrap)\r
- return find(s.toString(),start,end);\r
- start += offset+sm1;\r
- int vend = min(s.length()-1,end+sm1+offset),k;\r
- int vend1 = vend-jump_ahead;\r
- if(ign) {\r
- for(k=start; k <= vend1;k += skip[s.charAt(k) & (MAX_CHAR-1)] ) {\r
- // table look-up is expensive, avoid it if possible\r
- if( anyc(s.charAt(k)) ) {\r
- if(CaseMgr.regionMatches(src,ign,0,s,k-sm1,sm1))\r
- return k-sm1-offset;\r
- k += jump_ahead;\r
- }\r
- }\r
- for(; k <= vend;k += skip[s.charAt(k) & (MAX_CHAR-1)] ) {\r
- // table look-up is expensive, avoid it if possible\r
- if( anyc(s.charAt(k)) ) {\r
- if(CaseMgr.regionMatches(src,ign,0,s,k-sm1,sm1))\r
- return k-sm1-offset;\r
- k += jump_ahead;\r
- if(k > vend) return -1;\r
- }\r
- }\r
- } else {\r
- for(k=start; k <= vend1;k += skip[s.charAt(k) & (MAX_CHAR-1)] ) {\r
- // table look-up is expensive, avoid it if possible\r
- if( x==s.charAt(k) ) {\r
- //if(src.regionMatches(0,s,k-sm1,sm1))\r
- if(CaseMgr.regionMatches(src,false,0,s,k-sm1,sm1))\r
- return k-sm1-offset;\r
- k += jump_ahead;\r
- }\r
- }\r
- for(; k <= vend;k += skip[s.charAt(k) & (MAX_CHAR-1)] ) {\r
- // table look-up is expensive, avoid it if possible\r
- if( x==s.charAt(k) ) {\r
- //if(src.regionMatches(0,s,k-sm1,sm1))\r
- if(CaseMgr.regionMatches(src,false,0,s,k-sm1,sm1))\r
- return k-sm1-offset;\r
- k += jump_ahead;\r
- if(k > vend) return -1;\r
- }\r
- }\r
+ else\r
+ {\r
+ for (k = start; k <= vend1; k += skip[s.charAt(k) & (MAX_CHAR - 1)])\r
+ {\r
+ // table look-up is expensive, avoid it if possible\r
+ if (x == s.charAt(k))\r
+ {\r
+ //if(src.regionMatches(0,s,k-sm1,sm1))\r
+ if (CaseMgr.regionMatches(src, false, 0, s, k - sm1, sm1))\r
+ {\r
+ return k - sm1 - offset;\r
+ }\r
+ k += jump_ahead;\r
}\r
-\r
- return -1;\r
+ }\r
+ for (; k <= vend; k += skip[s.charAt(k) & (MAX_CHAR - 1)])\r
+ {\r
+ // table look-up is expensive, avoid it if possible\r
+ if (x == s.charAt(k))\r
+ {\r
+ //if(src.regionMatches(0,s,k-sm1,sm1))\r
+ if (CaseMgr.regionMatches(src, false, 0, s, k - sm1, sm1))\r
+ {\r
+ return k - sm1 - offset;\r
+ }\r
+ k += jump_ahead;\r
+ if (k > vend)\r
+ {\r
+ return -1;\r
+ }\r
+ }\r
+ }\r
}\r
+\r
+ return -1;\r
+ }\r
}\r