//
// This software is now distributed according to
// the Lesser Gnu Public License. Please see
// http://www.gnu.org/copyleft/lesser.txt for
// the details.
// -- Happy Computing!
//
package com.stevesoft.pat;
import com.stevesoft.pat.wrap.StringWrap;
/**
* Like Skip, but implements a
* Boyer-Moore-Horspool type search method that has been modified to be more
* like a "T-search" (see the Michael Tamm''s article in C'T, magazin fuer
* computer und technic, August 97 p 292). Yet another important source of
* information for me was the Deep Magic
* article on string searching. As of this writing, I can beat String's indexOf
* method in many cases.
*
* @see com.stevesoft.pat.Skip
* @see com.stevesoft.pat.Skip2
*/
public class SkipBMH extends Skip
{
// This number could be 256, but I think it's
// big enough. Note, it must be a power of 2.
final int MAX_CHAR = 64;
final char[] skip = new char[MAX_CHAR];
int sm1;
int jump_ahead = 0;
char uc, lc, tc, x;
final boolean exact(char c)
{
return (ign && anyc(c)) || c == x;
}
final boolean anyc(char c)
{
return c == uc || c == lc || c == tc;
}
public SkipBMH(String pt, boolean ign)
{
this(pt, ign, 0);
}
public SkipBMH(String pt)
{
this(pt, false, 0);
}
public SkipBMH(String pt, boolean ign, int offset)
{
super(pt, ign, offset);
for (int k = 0; k < MAX_CHAR; k++)
{
skip[k] = (char) src.length();
}
sm1 = src.length() - 1;
x = src.charAt(sm1);
uc = CaseMgr.toUpperCase(x);
lc = CaseMgr.toLowerCase(x);
tc = CaseMgr.toTitleCase(x);
// We don't really want 65536 long arrays in skip[],
// so we mask of the higher bits. This can be combined
// with ignore case, so accounting for upper
// case costs us nothing extra.
for (int k = 0; k < src.length() - 1; k++)
{
char x_ = src.charAt(k);
if (ign)
{
char uc_ = CaseMgr.toUpperCase(x_);
char lc_ = CaseMgr.toLowerCase(x_);
char tc_ = CaseMgr.toTitleCase(x_);
skip[uc_ & (MAX_CHAR - 1)] = (char) (src.length() - k - 1);
skip[lc_ & (MAX_CHAR - 1)] = (char) (src.length() - k - 1);
skip[tc_ & (MAX_CHAR - 1)] = (char) (src.length() - k - 1);
}
else
{
skip[x_ & (MAX_CHAR - 1)] = (char) (src.length() - k - 1);
}
}
// This trick can be found in the July issue of
// C-T magazine. This makes the method a type of
// "T-search."
jump_ahead = src.length() - 1;
for (int k = 0; k < src.length() - 1; k++)
{
char y = src.charAt(sm1 - k - 1);
if (exact(y))
{
jump_ahead = k;
break;
}
}
}
/**
* Set to true if you only want to compare two of the characters in the
* String.
*/
final public int searchRegion(String s, int start, int end)
{
return find(s, start, end);
}
final public int searchFrom(String s, int start)
{
return find(s, start, s.length());
}
final public int search(String s)
{
return find(s, 0, s.length());
}
public int find(String s, int start, int end)
{
start += offset + sm1;
int vend = min(s.length() - 1, end + sm1 + offset), k;
int vend1 = vend - jump_ahead;
if (ign)
{
for (k = start; k <= vend1; k += skip[s.charAt(k) & (MAX_CHAR - 1)])
{
// table look-up is expensive, avoid it if possible
if (anyc(s.charAt(k)))
{
if (CaseMgr.regionMatches(src, ign, 0, s, k - sm1, sm1))
{
return k - sm1 - offset;
}
k += jump_ahead;
}
}
for (; k <= vend; k += skip[s.charAt(k) & (MAX_CHAR - 1)])
{
// table look-up is expensive, avoid it if possible
if (anyc(s.charAt(k)))
{
if (CaseMgr.regionMatches(src, ign, 0, s, k - sm1, sm1))
{
return k - sm1 - offset;
}
k += jump_ahead;
if (k > vend)
{
return -1;
}
}
}
}
else
{
for (k = start; k <= vend1; k += skip[s.charAt(k) & (MAX_CHAR - 1)])
{
// table look-up is expensive, avoid it if possible
if (x == s.charAt(k))
{
// if(src.regionMatches(0,s,k-sm1,sm1))
if (CaseMgr.regionMatches(src, false, 0, s, k - sm1, sm1))
{
return k - sm1 - offset;
}
k += jump_ahead;
}
}
for (; k <= vend; k += skip[s.charAt(k) & (MAX_CHAR - 1)])
{
// table look-up is expensive, avoid it if possible
if (x == s.charAt(k))
{
// if(src.regionMatches(0,s,k-sm1,sm1))
if (CaseMgr.regionMatches(src, false, 0, s, k - sm1, sm1))
{
return k - sm1 - offset;
}
k += jump_ahead;
if (k > vend)
{
return -1;
}
}
}
}
return -1;
}
public int find(StringLike s, int start, int end)
{
if (s instanceof StringWrap)
{
return find(s.toString(), start, end);
}
start += offset + sm1;
int vend = min(s.length() - 1, end + sm1 + offset), k;
int vend1 = vend - jump_ahead;
if (ign)
{
for (k = start; k <= vend1; k += skip[s.charAt(k) & (MAX_CHAR - 1)])
{
// table look-up is expensive, avoid it if possible
if (anyc(s.charAt(k)))
{
if (CaseMgr.regionMatches(src, ign, 0, s, k - sm1, sm1))
{
return k - sm1 - offset;
}
k += jump_ahead;
}
}
for (; k <= vend; k += skip[s.charAt(k) & (MAX_CHAR - 1)])
{
// table look-up is expensive, avoid it if possible
if (anyc(s.charAt(k)))
{
if (CaseMgr.regionMatches(src, ign, 0, s, k - sm1, sm1))
{
return k - sm1 - offset;
}
k += jump_ahead;
if (k > vend)
{
return -1;
}
}
}
}
else
{
for (k = start; k <= vend1; k += skip[s.charAt(k) & (MAX_CHAR - 1)])
{
// table look-up is expensive, avoid it if possible
if (x == s.charAt(k))
{
// if(src.regionMatches(0,s,k-sm1,sm1))
if (CaseMgr.regionMatches(src, false, 0, s, k - sm1, sm1))
{
return k - sm1 - offset;
}
k += jump_ahead;
}
}
for (; k <= vend; k += skip[s.charAt(k) & (MAX_CHAR - 1)])
{
// table look-up is expensive, avoid it if possible
if (x == s.charAt(k))
{
// if(src.regionMatches(0,s,k-sm1,sm1))
if (CaseMgr.regionMatches(src, false, 0, s, k - sm1, sm1))
{
return k - sm1 - offset;
}
k += jump_ahead;
if (k > vend)
{
return -1;
}
}
}
}
return -1;
}
}