1 /*******************************************************************************
2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $(date) The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
20 *******************************************************************************/
22 // This software is now distributed according to
23 // the Lesser Gnu Public License. Please see
24 // http://www.gnu.org/copyleft/lesser.txt for
26 // -- Happy Computing!
28 package com.stevesoft.pat;
30 import com.stevesoft.pat.wrap.StringWrap;
33 * Like Skip, but implements a <a
34 * href="http://www.dcc.uchile.cl/~rbaeza/handbook/algs/7/713b.srch.p.html">
35 * Boyer-Moore-Horspool</a> type search method that has been modified to be more
36 * like a "T-search" (see the Michael Tamm''s article in <i>C'T, magazin fuer
37 * computer und technic</i>, August 97 p 292). Yet another important source of
38 * information for me was the <a
39 * href="http://www.go2net.com/people/paulp/deep/1997/05/14/"> Deep Magic</a>
40 * article on string searching. As of this writing, I can beat String's indexOf
41 * method in many cases.
43 * @see com.stevesoft.pat.Skip
44 * @see com.stevesoft.pat.Skip2
46 public class SkipBMH extends Skip
48 // This number could be 256, but I think it's
49 // big enough. Note, it must be a power of 2.
50 final int MAX_CHAR = 64;
52 final char[] skip = new char[MAX_CHAR];
60 final boolean exact(char c)
62 return (ign && anyc(c)) || c == x;
65 final boolean anyc(char c)
67 return c == uc || c == lc || c == tc;
70 public SkipBMH(String pt, boolean ign)
75 public SkipBMH(String pt)
80 public SkipBMH(String pt, boolean ign, int offset)
82 super(pt, ign, offset);
83 for (int k = 0; k < MAX_CHAR; k++)
85 skip[k] = (char) src.length();
88 sm1 = src.length() - 1;
90 uc = CaseMgr.toUpperCase(x);
91 lc = CaseMgr.toLowerCase(x);
92 tc = CaseMgr.toTitleCase(x);
94 // We don't really want 65536 long arrays in skip[],
95 // so we mask of the higher bits. This can be combined
96 // with ignore case, so accounting for upper
97 // case costs us nothing extra.
98 for (int k = 0; k < src.length() - 1; k++)
100 char x_ = src.charAt(k);
103 char uc_ = CaseMgr.toUpperCase(x_);
104 char lc_ = CaseMgr.toLowerCase(x_);
105 char tc_ = CaseMgr.toTitleCase(x_);
106 skip[uc_ & (MAX_CHAR - 1)] = (char) (src.length() - k - 1);
107 skip[lc_ & (MAX_CHAR - 1)] = (char) (src.length() - k - 1);
108 skip[tc_ & (MAX_CHAR - 1)] = (char) (src.length() - k - 1);
112 skip[x_ & (MAX_CHAR - 1)] = (char) (src.length() - k - 1);
116 // This trick can be found in the July issue of
117 // C-T magazine. This makes the method a type of
119 jump_ahead = src.length() - 1;
120 for (int k = 0; k < src.length() - 1; k++)
122 char y = src.charAt(sm1 - k - 1);
132 * Set to true if you only want to compare two of the characters in the
135 final public int searchRegion(String s, int start, int end)
137 return find(s, start, end);
140 final public int searchFrom(String s, int start)
142 return find(s, start, s.length());
145 final public int search(String s)
147 return find(s, 0, s.length());
150 public int find(String s, int start, int end)
152 start += offset + sm1;
153 int vend = min(s.length() - 1, end + sm1 + offset), k;
154 int vend1 = vend - jump_ahead;
157 for (k = start; k <= vend1; k += skip[s.charAt(k) & (MAX_CHAR - 1)])
159 // table look-up is expensive, avoid it if possible
160 if (anyc(s.charAt(k)))
162 if (CaseMgr.regionMatches(src, ign, 0, s, k - sm1, sm1))
164 return k - sm1 - offset;
169 for (; k <= vend; k += skip[s.charAt(k) & (MAX_CHAR - 1)])
171 // table look-up is expensive, avoid it if possible
172 if (anyc(s.charAt(k)))
174 if (CaseMgr.regionMatches(src, ign, 0, s, k - sm1, sm1))
176 return k - sm1 - offset;
188 for (k = start; k <= vend1; k += skip[s.charAt(k) & (MAX_CHAR - 1)])
190 // table look-up is expensive, avoid it if possible
191 if (x == s.charAt(k))
193 // if(src.regionMatches(0,s,k-sm1,sm1))
194 if (CaseMgr.regionMatches(src, false, 0, s, k - sm1, sm1))
196 return k - sm1 - offset;
201 for (; k <= vend; k += skip[s.charAt(k) & (MAX_CHAR - 1)])
203 // table look-up is expensive, avoid it if possible
204 if (x == s.charAt(k))
206 // if(src.regionMatches(0,s,k-sm1,sm1))
207 if (CaseMgr.regionMatches(src, false, 0, s, k - sm1, sm1))
209 return k - sm1 - offset;
223 public int find(StringLike s, int start, int end)
225 if (s instanceof StringWrap)
227 return find(s.toString(), start, end);
229 start += offset + sm1;
230 int vend = min(s.length() - 1, end + sm1 + offset), k;
231 int vend1 = vend - jump_ahead;
234 for (k = start; k <= vend1; k += skip[s.charAt(k) & (MAX_CHAR - 1)])
236 // table look-up is expensive, avoid it if possible
237 if (anyc(s.charAt(k)))
239 if (CaseMgr.regionMatches(src, ign, 0, s, k - sm1, sm1))
241 return k - sm1 - offset;
246 for (; k <= vend; k += skip[s.charAt(k) & (MAX_CHAR - 1)])
248 // table look-up is expensive, avoid it if possible
249 if (anyc(s.charAt(k)))
251 if (CaseMgr.regionMatches(src, ign, 0, s, k - sm1, sm1))
253 return k - sm1 - offset;
265 for (k = start; k <= vend1; k += skip[s.charAt(k) & (MAX_CHAR - 1)])
267 // table look-up is expensive, avoid it if possible
268 if (x == s.charAt(k))
270 // if(src.regionMatches(0,s,k-sm1,sm1))
271 if (CaseMgr.regionMatches(src, false, 0, s, k - sm1, sm1))
273 return k - sm1 - offset;
278 for (; k <= vend; k += skip[s.charAt(k) & (MAX_CHAR - 1)])
280 // table look-up is expensive, avoid it if possible
281 if (x == s.charAt(k))
283 // if(src.regionMatches(0,s,k-sm1,sm1))
284 if (CaseMgr.regionMatches(src, false, 0, s, k - sm1, sm1))
286 return k - sm1 - offset;