2 // This software is now distributed according to
\r
3 // the Lesser Gnu Public License. Please see
\r
4 // http://www.gnu.org/copyleft/lesser.txt for
\r
6 // -- Happy Computing!
\r
8 package com.stevesoft.pat;
\r
12 /** This class is just like oneChar, but doesn't worry about case. */
\r
13 class FastChar extends oneChar
\r
20 public int matchInternal(int p, Pthings pt)
\r
22 return (p < pt.src.length() && pt.src.charAt(p) == c) ? nextMatch(
\r
26 Pattern clone1(Hashtable h)
\r
28 return new FastChar(c);
\r
33 * This class is a hashtable keyed by Character Objects. It is used to match
\r
34 * things of the form (?:a..|b..|c..|..) match with greater efficiency -- by
\r
35 * using a Hashtable that indexes into the group of patterns.
\r
37 class Branch extends Pattern
\r
39 Hashtable h = new Hashtable();
\r
41 // We need to keep track of the order
\r
42 // of the keys -- if we don't then
\r
43 // recompiling the output of toString
\r
44 // may produce errors by re-ordering
\r
45 // ()'s and changing the id number of
\r
46 // the backreference associated with
\r
48 Vector keys = new Vector();
\r
54 Pattern clone1(Hashtable x)
\r
56 Branch b = new Branch();
\r
57 b.keys = (Vector) keys.clone();
\r
61 for (int i = 0; i < keys.size(); i++)
\r
63 Pattern p = (Pattern) h.get(keys.elementAt(i));
\r
64 b.h.put(keys.elementAt(i), p.clone(x));
\r
69 // this function eliminates Branches with 0 or 1 elements.
\r
70 final Pattern reduce(boolean ignoreCase, boolean dontMinQ)
\r
74 Enumeration e = h.keys();
\r
75 Character c = (Character) e.nextElement();
\r
77 if (ignoreCase || dontMinQ)
\r
79 oc = new oneChar(c.charValue());
\r
83 oc = new FastChar(c.charValue());
\r
85 oc.next = (Pattern) h.get(c);
\r
89 else if (h.size() == 0)
\r
96 public patInt maxChars()
\r
98 Enumeration e = h.keys();
\r
99 patInt count = new patInt(0);
\r
100 while (e.hasMoreElements())
\r
102 Object key = e.nextElement();
\r
103 Pattern pa = (Pattern) h.get(key);
\r
104 patInt pi = pa.maxChars();
\r
111 public patInt minChars()
\r
113 Enumeration e = h.keys();
\r
114 patInt count = new patInt(0);
\r
115 while (e.hasMoreElements())
\r
117 Object key = e.nextElement();
\r
118 Pattern pa = (Pattern) h.get(key);
\r
119 patInt pi = pa.minChars();
\r
126 // adds a oneChar object to this Branch
\r
127 void addc(oneChar o, boolean ignoreCase, boolean dontMinQ)
\r
129 Pattern n = o.next;
\r
132 n = new NullPattern();
\r
136 n = RegOpt.opt(n, ignoreCase, dontMinQ);
\r
139 set(new Character(o.c), n, ignoreCase, dontMinQ);
\r
144 set(new Character(o.altc), n, ignoreCase, dontMinQ);
\r
146 if (o.c != o.altc2 && o.altc != o.altc2)
\r
148 set(new Character(o.altc2), n, ignoreCase, dontMinQ);
\r
153 void set(Character c, Pattern n, boolean igc, boolean dontMinQ)
\r
155 Pattern p = (Pattern) h.get(c);
\r
157 // This letter is not yet used in the Branch object.
\r
158 // We need to add it.
\r
161 if (n instanceof Or)
\r
163 // A NullPattern is prepended to an Or
\r
164 // to prevent confusing this object.
\r
165 // For example: (boo|bug) => (b(?:oo|ug))
\r
166 // during this process. However, we
\r
167 // want (b(?:oo|ell)|bug)
\r
168 NullPattern np = new NullPattern();
\r
176 // Make sure we remember the order things were
\r
177 // added into the Branch object so that we can
\r
178 // properly convert it to a String.
\r
179 keys.addElement(c);
\r
181 else if (p instanceof Or)
\r
185 else if (p instanceof oneChar && n instanceof oneChar
\r
186 && ((oneChar) p).c != ((oneChar) n).c)
\r
188 Branch b = new Branch();
\r
189 b.addc((oneChar) p, igc, dontMinQ);
\r
190 b.addc((oneChar) n, igc, dontMinQ);
\r
194 else if (p instanceof Branch && n instanceof oneChar)
\r
196 ((Branch) p).addc((oneChar) n, igc, dontMinQ);
\r
201 // Create an Or object to receive the variety
\r
202 // of branches in the pattern if the current letter
\r
203 // is matched. We do not attempt to make these
\r
204 // sub-branches into a Branch object yet.
\r
208 // Remove NullPattern from p -- it's no longer needed.
\r
209 if (p instanceof NullPattern && p.parent == null && p.next != null)
\r
219 Pattern optpat = RegOpt.opt(o, igc, dontMinQ);
\r
221 optpat.setParent(this);
\r
225 public String toString()
\r
227 javajs.util.SB sb = new javajs.util.SB();
\r
228 // should protect this...
\r
229 sb.append("(?:(?#branch)"); // Hashtable)");
\r
230 for (int i = 0; i < keys.size(); i++)
\r
232 Character c = (Character) keys.elementAt(i);
\r
234 sb.append((String) h.get(c));
\r
235 if (i + 1 < keys.size())
\r
241 sb.append(nextString());
\r
242 return sb.toString();
\r
245 public int matchInternal(int pos, Pthings pt)
\r
247 if (pos >= pt.src.length())
\r
251 Pattern n = (Pattern) h.get(new Character(pt.src.charAt(pos)));
\r
256 if (pt.cbits != null && pt.cbits.get(pos))
\r
260 return n.matchInternal(pos + 1, pt);
\r
265 * This is just a place to put the optimizing function. It is never instantiated
\r
266 * as an Object. It just sorts through the RegOpt looking for things it can
\r
267 * change and make faster.
\r
269 public class RegOpt
\r
271 static Pattern opt(Pattern p, boolean ignoreCase, boolean dontMinQ)
\r
277 if (p instanceof Bracket)
\r
279 Bracket b = (Bracket) p;
\r
280 // FastBracket is the only special
\r
281 // optimized class to have its own
\r
283 p = FastBracket.process(b, ignoreCase);
\r
284 // if(!(p instanceof FastBracket)
\r
285 // p = Switch.process(b,ignoreCase);
\r
287 p.parent = b.parent;
\r
289 else if (p instanceof oneChar && !ignoreCase && !dontMinQ)
\r
291 oneChar o = (oneChar) p;
\r
292 p = new FastChar(o.c);
\r
294 p.parent = o.parent;
\r
296 else if (p instanceof Or && ((Or) p).leftForm().equals("(?:")
\r
297 && ((Or) p).v.size() == 1)
\r
298 { // Eliminate this Or Object.
\r
300 p = (Pattern) o.v.elementAt(0);
\r
302 p = RegOpt.opt(p, ignoreCase, dontMinQ);
\r
305 else if (p instanceof Or)
\r
310 o.v = new Vector();
\r
311 Branch b = new Branch();
\r
312 b.parent = o.parent;
\r
313 for (int i = 0; i < v.size(); i++)
\r
315 Pattern pp = (Pattern) v.elementAt(i);
\r
316 // We want to have at least two oneChar's in
\r
317 // the Or Object to consider making a Branch.
\r
318 if (pp instanceof oneChar
\r
319 && (b.h.size() >= 1 || (i + 1 < v.size() && v
\r
320 .elementAt(i + 1) instanceof oneChar)))
\r
322 b.addc((oneChar) pp, ignoreCase, dontMinQ);
\r
326 if (b.keys.size() > 0)
\r
328 Pattern p2 = (Pattern) b.reduce(ignoreCase, dontMinQ);
\r
333 b.parent = o.parent;
\r
336 o.addOr(opt(pp, ignoreCase, dontMinQ));
\r
339 if (b.keys.size() > 0)
\r
341 Pattern p2 = (Pattern) b.reduce(ignoreCase, dontMinQ);
\r
347 if (o.v.size() == 1 && o.leftForm().equals("(?:"))
\r
348 { // Eliminate Or Object
\r
349 p = (Pattern) o.v.elementAt(0);
\r
351 p = RegOpt.opt(p, ignoreCase, dontMinQ);
\r
355 else if (p instanceof FastMulti)
\r
357 PatternSub ps = (PatternSub) p;
\r
358 ps.sub = RegOpt.opt(ps.sub, ignoreCase, dontMinQ);
\r
360 else if (p instanceof Multi && safe4fm(((PatternSub) p).sub))
\r
362 Multi m = (Multi) p;
\r
363 FastMulti fm = null;
\r
366 fm = new FastMulti(m.a, m.b, opt(m.sub, ignoreCase, dontMinQ));
\r
367 } catch (RegSyntax rs)
\r
370 fm.parent = m.parent;
\r
371 fm.matchFewest = m.matchFewest;
\r
375 if (p.next != null)
\r
377 p.next = opt(p.next, ignoreCase, dontMinQ);
\r
382 final static boolean safe4fm(Pattern x)
\r
386 if (x instanceof Bracket)
\r
390 else if (x instanceof Range)
\r
394 else if (x instanceof oneChar)
\r
398 else if (x instanceof Any)
\r
402 else if (x instanceof Custom
\r
403 && ((Custom) x).v instanceof UniValidator)
\r
407 else if (x instanceof Or)
\r
410 if (!o.leftForm().equals("(?:"))
\r
414 patInt lo = o.countMinChars();
\r
415 patInt hi = o.countMaxChars();
\r
416 if (!lo.equals(hi))
\r
420 for (int i = 0; i < o.v.size(); i++)
\r
422 if (!safe4fm((Pattern) o.v.elementAt(i)))
\r
437 * public static void setParents(Regex r) { setParents(r.thePattern,null); }
\r
438 * static void setParents(Pattern p,Pattern x) { if(p instanceof PatternSub &&
\r
439 * !(p instanceof FastMulti) && !(p instanceof DotMulti)) RegOpt.setParents(
\r
440 * ((PatternSub)p).sub, p); else if(p instanceof Or && !(p instanceof
\r
441 * Bracket)) { Or o = (Or)p; for(int i=0;i<o.v.size();i++)
\r
442 * RegOpt.setParents((Pattern)o.v.elementAt(i),o); } else if(p instanceof
\r
443 * Branch) { Branch b = (Branch)p; Enumeration e = b.h.keys();
\r
444 * while(e.hasMoreElements()) { Object o = e.nextElement(); RegOpt.setParents(
\r
445 * (Pattern)b.h.get(o), b); } } if(p.next == null) p.parent = x; else {
\r
446 * p.parent = null; RegOpt.setParents(p.next,x); } }
\r