2 // This software is now distributed according to
3 // the Lesser Gnu Public License. Please see
4 // http://www.gnu.org/copyleft/lesser.txt for
8 package com.stevesoft.pat;
12 /** This class is just like oneChar, but doesn't worry about case. */
13 class FastChar extends oneChar
20 public int matchInternal(int p, Pthings pt)
22 return (p < pt.src.length() && pt.src.charAt(p) == c) ? nextMatch(
26 Pattern clone1(Hashtable h)
28 return new FastChar(c);
33 * This class is a hashtable keyed by Character Objects. It is used to match
34 * things of the form (?:a..|b..|c..|..) match with greater efficiency -- by
35 * using a Hashtable that indexes into the group of patterns.
37 class Branch extends Pattern
39 Hashtable h = new Hashtable();
41 // We need to keep track of the order
42 // of the keys -- if we don't then
43 // recompiling the output of toString
44 // may produce errors by re-ordering
45 // ()'s and changing the id number of
46 // the backreference associated with
48 Vector keys = new Vector();
54 Pattern clone1(Hashtable x)
56 Branch b = new Branch();
57 b.keys = (Vector) keys.clone();
61 for (int i = 0; i < keys.size(); i++)
63 Pattern p = (Pattern) h.get(keys.elementAt(i));
64 b.h.put(keys.elementAt(i), p.clone(x));
69 // this function eliminates Branches with 0 or 1 elements.
70 final Pattern reduce(boolean ignoreCase, boolean dontMinQ)
74 Enumeration e = h.keys();
75 Character c = (Character) e.nextElement();
77 if (ignoreCase || dontMinQ)
79 oc = new oneChar(c.charValue());
83 oc = new FastChar(c.charValue());
85 oc.next = (Pattern) h.get(c);
89 else if (h.size() == 0)
96 public patInt maxChars()
98 Enumeration e = h.keys();
99 patInt count = new patInt(0);
100 while (e.hasMoreElements())
102 Object key = e.nextElement();
103 Pattern pa = (Pattern) h.get(key);
104 patInt pi = pa.maxChars();
111 public patInt minChars()
113 Enumeration e = h.keys();
114 patInt count = new patInt(0);
115 while (e.hasMoreElements())
117 Object key = e.nextElement();
118 Pattern pa = (Pattern) h.get(key);
119 patInt pi = pa.minChars();
126 // adds a oneChar object to this Branch
127 void addc(oneChar o, boolean ignoreCase, boolean dontMinQ)
132 n = new NullPattern();
136 n = RegOpt.opt(n, ignoreCase, dontMinQ);
139 set(new Character(o.c), n, ignoreCase, dontMinQ);
144 set(new Character(o.altc), n, ignoreCase, dontMinQ);
146 if (o.c != o.altc2 && o.altc != o.altc2)
148 set(new Character(o.altc2), n, ignoreCase, dontMinQ);
153 void set(Character c, Pattern n, boolean igc, boolean dontMinQ)
155 Pattern p = (Pattern) h.get(c);
157 // This letter is not yet used in the Branch object.
158 // We need to add it.
163 // A NullPattern is prepended to an Or
164 // to prevent confusing this object.
165 // For example: (boo|bug) => (b(?:oo|ug))
166 // during this process. However, we
167 // want (b(?:oo|ell)|bug)
168 NullPattern np = new NullPattern();
176 // Make sure we remember the order things were
177 // added into the Branch object so that we can
178 // properly convert it to a String.
181 else if (p instanceof Or)
185 else if (p instanceof oneChar && n instanceof oneChar
186 && ((oneChar) p).c != ((oneChar) n).c)
188 Branch b = new Branch();
189 b.addc((oneChar) p, igc, dontMinQ);
190 b.addc((oneChar) n, igc, dontMinQ);
194 else if (p instanceof Branch && n instanceof oneChar)
196 ((Branch) p).addc((oneChar) n, igc, dontMinQ);
201 // Create an Or object to receive the variety
202 // of branches in the pattern if the current letter
203 // is matched. We do not attempt to make these
204 // sub-branches into a Branch object yet.
208 // Remove NullPattern from p -- it's no longer needed.
209 if (p instanceof NullPattern && p.parent == null && p.next != null)
219 Pattern optpat = RegOpt.opt(o, igc, dontMinQ);
221 optpat.setParent(this);
225 public String toString()
227 StringBuffer sb = new StringBuffer();
228 // should protect this...
229 sb.append("(?:(?#branch)"); // Hashtable)");
230 for (int i = 0; i < keys.size(); i++)
232 Character c = (Character) keys.elementAt(i);
235 if (i + 1 < keys.size())
241 sb.append(nextString());
242 return sb.toString();
245 public int matchInternal(int pos, Pthings pt)
247 if (pos >= pt.src.length())
251 Pattern n = (Pattern) h.get(new Character(pt.src.charAt(pos)));
256 if (pt.cbits != null && pt.cbits.get(pos))
260 return n.matchInternal(pos + 1, pt);
265 * This is just a place to put the optimizing function. It is never instantiated
266 * as an Object. It just sorts through the RegOpt looking for things it can
267 * change and make faster.
271 static Pattern opt(Pattern p, boolean ignoreCase, boolean dontMinQ)
277 if (p instanceof Bracket)
279 Bracket b = (Bracket) p;
280 // FastBracket is the only special
281 // optimized class to have its own
283 p = FastBracket.process(b, ignoreCase);
284 // if(!(p instanceof FastBracket)
285 // p = Switch.process(b,ignoreCase);
289 else if (p instanceof oneChar && !ignoreCase && !dontMinQ)
291 oneChar o = (oneChar) p;
292 p = new FastChar(o.c);
296 else if (p instanceof Or && ((Or) p).leftForm().equals("(?:")
297 && ((Or) p).v.size() == 1)
298 { // Eliminate this Or Object.
300 p = (Pattern) o.v.elementAt(0);
302 p = RegOpt.opt(p, ignoreCase, dontMinQ);
305 else if (p instanceof Or)
311 Branch b = new Branch();
313 for (int i = 0; i < v.size(); i++)
315 Pattern pp = (Pattern) v.elementAt(i);
316 // We want to have at least two oneChar's in
317 // the Or Object to consider making a Branch.
318 if (pp instanceof oneChar
319 && (b.h.size() >= 1 || (i + 1 < v.size() && v
320 .elementAt(i + 1) instanceof oneChar)))
322 b.addc((oneChar) pp, ignoreCase, dontMinQ);
326 if (b.keys.size() > 0)
328 Pattern p2 = (Pattern) b.reduce(ignoreCase, dontMinQ);
336 o.addOr(opt(pp, ignoreCase, dontMinQ));
339 if (b.keys.size() > 0)
341 Pattern p2 = (Pattern) b.reduce(ignoreCase, dontMinQ);
347 if (o.v.size() == 1 && o.leftForm().equals("(?:"))
348 { // Eliminate Or Object
349 p = (Pattern) o.v.elementAt(0);
351 p = RegOpt.opt(p, ignoreCase, dontMinQ);
355 else if (p instanceof FastMulti)
357 PatternSub ps = (PatternSub) p;
358 ps.sub = RegOpt.opt(ps.sub, ignoreCase, dontMinQ);
360 else if (p instanceof Multi && safe4fm(((PatternSub) p).sub))
366 fm = new FastMulti(m.a, m.b, opt(m.sub, ignoreCase, dontMinQ));
367 } catch (RegSyntax rs)
370 fm.parent = m.parent;
371 fm.matchFewest = m.matchFewest;
377 p.next = opt(p.next, ignoreCase, dontMinQ);
382 final static boolean safe4fm(Pattern x)
386 if (x instanceof Bracket)
390 else if (x instanceof Range)
394 else if (x instanceof oneChar)
398 else if (x instanceof Any)
402 else if (x instanceof Custom
403 && ((Custom) x).v instanceof UniValidator)
407 else if (x instanceof Or)
410 if (!o.leftForm().equals("(?:"))
414 patInt lo = o.countMinChars();
415 patInt hi = o.countMaxChars();
420 for (int i = 0; i < o.v.size(); i++)
422 if (!safe4fm((Pattern) o.v.elementAt(i)))
437 * public static void setParents(Regex r) { setParents(r.thePattern,null); }
438 * static void setParents(Pattern p,Pattern x) { if(p instanceof PatternSub &&
439 * !(p instanceof FastMulti) && !(p instanceof DotMulti)) RegOpt.setParents(
440 * ((PatternSub)p).sub, p); else if(p instanceof Or && !(p instanceof
441 * Bracket)) { Or o = (Or)p; for(int i=0;i<o.v.size();i++)
442 * RegOpt.setParents((Pattern)o.v.elementAt(i),o); } else if(p instanceof
443 * Branch) { Branch b = (Branch)p; Enumeration e = b.h.keys();
444 * while(e.hasMoreElements()) { Object o = e.nextElement(); RegOpt.setParents(
445 * (Pattern)b.h.get(o), b); } } if(p.next == null) p.parent = x; else {
446 * p.parent = null; RegOpt.setParents(p.next,x); } }