// // This software is now distributed according to // the Lesser Gnu Public License. Please see // http://www.gnu.org/copyleft/lesser.txt for // the details. // -- Happy Computing! // package com.stevesoft.pat; import com.stevesoft.pat.wrap.StringWrap; /** Like Skip, but implements a Boyer-Moore-Horspool type search method that has been modified to be more like a "T-search" (see the Michael Tamm''s article in C'T, magazin fuer computer und technic, August 97 p 292). Yet another important source of information for me was the Deep Magic article on string searching. As of this writing, I can beat String's indexOf method in many cases. @see com.stevesoft.pat.Skip @see com.stevesoft.pat.Skip2 */ public class SkipBMH extends Skip { // This number could be 256, but I think it's // big enough. Note, it must be a power of 2. final int MAX_CHAR = 64; final char[] skip = new char[MAX_CHAR]; int sm1; int jump_ahead = 0; char uc,lc,tc,x; final boolean exact(char c) { return (ign && anyc(c))||c==x; } final boolean anyc(char c) { return c==uc||c==lc||c==tc; } public SkipBMH(String pt,boolean ign) { this(pt,ign,0); } public SkipBMH(String pt) { this(pt,false,0); } public SkipBMH(String pt,boolean ign,int offset) { super(pt,ign,offset); for(int k=0;k vend) return -1; } } } else { for(k=start; k <= vend1;k += skip[s.charAt(k) & (MAX_CHAR-1)] ) { // table look-up is expensive, avoid it if possible if( x==s.charAt(k) ) { //if(src.regionMatches(0,s,k-sm1,sm1)) if(CaseMgr.regionMatches(src,false,0,s,k-sm1,sm1)) return k-sm1-offset; k += jump_ahead; } } for(; k <= vend;k += skip[s.charAt(k) & (MAX_CHAR-1)] ) { // table look-up is expensive, avoid it if possible if( x==s.charAt(k) ) { //if(src.regionMatches(0,s,k-sm1,sm1)) if(CaseMgr.regionMatches(src,false,0,s,k-sm1,sm1)) return k-sm1-offset; k += jump_ahead; if(k > vend) return -1; } } } return -1; } public int find(StringLike s,int start,int end) { if(s instanceof StringWrap) return find(s.toString(),start,end); start += offset+sm1; int vend = min(s.length()-1,end+sm1+offset),k; int vend1 = vend-jump_ahead; if(ign) { for(k=start; k <= vend1;k += skip[s.charAt(k) & (MAX_CHAR-1)] ) { // table look-up is expensive, avoid it if possible if( anyc(s.charAt(k)) ) { if(CaseMgr.regionMatches(src,ign,0,s,k-sm1,sm1)) return k-sm1-offset; k += jump_ahead; } } for(; k <= vend;k += skip[s.charAt(k) & (MAX_CHAR-1)] ) { // table look-up is expensive, avoid it if possible if( anyc(s.charAt(k)) ) { if(CaseMgr.regionMatches(src,ign,0,s,k-sm1,sm1)) return k-sm1-offset; k += jump_ahead; if(k > vend) return -1; } } } else { for(k=start; k <= vend1;k += skip[s.charAt(k) & (MAX_CHAR-1)] ) { // table look-up is expensive, avoid it if possible if( x==s.charAt(k) ) { //if(src.regionMatches(0,s,k-sm1,sm1)) if(CaseMgr.regionMatches(src,false,0,s,k-sm1,sm1)) return k-sm1-offset; k += jump_ahead; } } for(; k <= vend;k += skip[s.charAt(k) & (MAX_CHAR-1)] ) { // table look-up is expensive, avoid it if possible if( x==s.charAt(k) ) { //if(src.regionMatches(0,s,k-sm1,sm1)) if(CaseMgr.regionMatches(src,false,0,s,k-sm1,sm1)) return k-sm1-offset; k += jump_ahead; if(k > vend) return -1; } } } return -1; } }