2 // This software is now distributed according to
3 // the Lesser Gnu Public License. Please see
4 // http://www.gnu.org/copyleft/lesser.txt for
8 package com.stevesoft.pat;
10 import java.util.Enumeration;
11 import java.util.Hashtable;
12 import java.util.Vector;
14 /** This class is just like oneChar, but doesn't worry about case. */
15 class FastChar extends oneChar
22 public int matchInternal(int p, Pthings pt)
24 return (p < pt.src.length() && pt.src.charAt(p) == c)
25 ? nextMatch(p + 1, pt)
29 Pattern clone1(Hashtable h)
31 return new FastChar(c);
36 * This class is a hashtable keyed by Character Objects. It is used to match
37 * things of the form (?:a..|b..|c..|..) match with greater efficiency -- by
38 * using a Hashtable that indexes into the group of patterns.
40 class Branch extends Pattern
42 Hashtable h = new Hashtable();
44 // We need to keep track of the order
45 // of the keys -- if we don't then
46 // recompiling the output of toString
47 // may produce errors by re-ordering
48 // ()'s and changing the id number of
49 // the backreference associated with
51 Vector keys = new Vector();
57 Pattern clone1(Hashtable x)
59 Branch b = new Branch();
60 b.keys = (Vector) keys.clone();
64 for (int i = 0; i < keys.size(); i++)
66 Pattern p = (Pattern) h.get(keys.elementAt(i));
67 b.h.put(keys.elementAt(i), p.clone(x));
72 // this function eliminates Branches with 0 or 1 elements.
73 final Pattern reduce(boolean ignoreCase, boolean dontMinQ)
77 Enumeration e = h.keys();
78 Character c = (Character) e.nextElement();
80 if (ignoreCase || dontMinQ)
82 oc = new oneChar(c.charValue());
86 oc = new FastChar(c.charValue());
88 oc.next = (Pattern) h.get(c);
92 else if (h.size() == 0)
99 public patInt maxChars()
101 Enumeration e = h.keys();
102 patInt count = new patInt(0);
103 while (e.hasMoreElements())
105 Object key = e.nextElement();
106 Pattern pa = (Pattern) h.get(key);
107 patInt pi = pa.maxChars();
114 public patInt minChars()
116 Enumeration e = h.keys();
117 patInt count = new patInt(0);
118 while (e.hasMoreElements())
120 Object key = e.nextElement();
121 Pattern pa = (Pattern) h.get(key);
122 patInt pi = pa.minChars();
129 // adds a oneChar object to this Branch
130 void addc(oneChar o, boolean ignoreCase, boolean dontMinQ)
135 n = new NullPattern();
139 n = RegOpt.opt(n, ignoreCase, dontMinQ);
142 set(Character.valueOf(o.c), n, ignoreCase, dontMinQ);
147 set(Character.valueOf(o.altc), n, ignoreCase, dontMinQ);
149 if (o.c != o.altc2 && o.altc != o.altc2)
151 set(Character.valueOf(o.altc2), n, ignoreCase, dontMinQ);
156 void set(Character c, Pattern n, boolean igc, boolean dontMinQ)
158 Pattern p = (Pattern) h.get(c);
160 // This letter is not yet used in the Branch object.
161 // We need to add it.
166 // A NullPattern is prepended to an Or
167 // to prevent confusing this object.
168 // For example: (boo|bug) => (b(?:oo|ug))
169 // during this process. However, we
170 // want (b(?:oo|ell)|bug)
171 NullPattern np = new NullPattern();
179 // Make sure we remember the order things were
180 // added into the Branch object so that we can
181 // properly convert it to a String.
184 else if (p instanceof Or)
188 else if (p instanceof oneChar && n instanceof oneChar
189 && ((oneChar) p).c != ((oneChar) n).c)
191 Branch b = new Branch();
192 b.addc((oneChar) p, igc, dontMinQ);
193 b.addc((oneChar) n, igc, dontMinQ);
197 else if (p instanceof Branch && n instanceof oneChar)
199 ((Branch) p).addc((oneChar) n, igc, dontMinQ);
204 // Create an Or object to receive the variety
205 // of branches in the pattern if the current letter
206 // is matched. We do not attempt to make these
207 // sub-branches into a Branch object yet.
211 // Remove NullPattern from p -- it's no longer needed.
212 if (p instanceof NullPattern && p.parent == null && p.next != null)
222 Pattern optpat = RegOpt.opt(o, igc, dontMinQ);
224 optpat.setParent(this);
228 public String toString()
230 StringBuffer sb = new StringBuffer();
231 // should protect this...
232 sb.append("(?:(?#branch)"); // Hashtable)");
233 for (int i = 0; i < keys.size(); i++)
235 Character c = (Character) keys.elementAt(i);
238 if (i + 1 < keys.size())
244 sb.append(nextString());
245 return sb.toString();
248 public int matchInternal(int pos, Pthings pt)
250 if (pos >= pt.src.length())
254 Pattern n = (Pattern) h.get(Character.valueOf(pt.src.charAt(pos)));
259 if (pt.cbits != null && pt.cbits.get(pos))
263 return n.matchInternal(pos + 1, pt);
268 * This is just a place to put the optimizing function. It is never instantiated
269 * as an Object. It just sorts through the RegOpt looking for things it can
270 * change and make faster.
274 static Pattern opt(Pattern p, boolean ignoreCase, boolean dontMinQ)
280 if (p instanceof Bracket)
282 Bracket b = (Bracket) p;
283 // FastBracket is the only special
284 // optimized class to have its own
286 p = FastBracket.process(b, ignoreCase);
287 // if(!(p instanceof FastBracket)
288 // p = Switch.process(b,ignoreCase);
292 else if (p instanceof oneChar && !ignoreCase && !dontMinQ)
294 oneChar o = (oneChar) p;
295 p = new FastChar(o.c);
299 else if (p instanceof Or && ((Or) p).leftForm().equals("(?:")
300 && ((Or) p).v.size() == 1)
301 { // Eliminate this Or Object.
303 p = (Pattern) o.v.elementAt(0);
305 p = RegOpt.opt(p, ignoreCase, dontMinQ);
308 else if (p instanceof Or)
314 Branch b = new Branch();
316 for (int i = 0; i < v.size(); i++)
318 Pattern pp = (Pattern) v.elementAt(i);
319 // We want to have at least two oneChar's in
320 // the Or Object to consider making a Branch.
321 if (pp instanceof oneChar && (b.h.size() >= 1 || (i + 1 < v.size()
322 && v.elementAt(i + 1) instanceof oneChar)))
324 b.addc((oneChar) pp, ignoreCase, dontMinQ);
328 if (b.keys.size() > 0)
330 Pattern p2 = (Pattern) b.reduce(ignoreCase, dontMinQ);
338 o.addOr(opt(pp, ignoreCase, dontMinQ));
341 if (b.keys.size() > 0)
343 Pattern p2 = (Pattern) b.reduce(ignoreCase, dontMinQ);
349 if (o.v.size() == 1 && o.leftForm().equals("(?:"))
350 { // Eliminate Or Object
351 p = (Pattern) o.v.elementAt(0);
353 p = RegOpt.opt(p, ignoreCase, dontMinQ);
357 else if (p instanceof FastMulti)
359 PatternSub ps = (PatternSub) p;
360 ps.sub = RegOpt.opt(ps.sub, ignoreCase, dontMinQ);
362 else if (p instanceof Multi && safe4fm(((PatternSub) p).sub))
368 fm = new FastMulti(m.a, m.b, opt(m.sub, ignoreCase, dontMinQ));
369 } catch (RegSyntax rs)
372 fm.parent = m.parent;
373 fm.matchFewest = m.matchFewest;
379 p.next = opt(p.next, ignoreCase, dontMinQ);
384 final static boolean safe4fm(Pattern x)
388 if (x instanceof Bracket)
392 else if (x instanceof Range)
396 else if (x instanceof oneChar)
400 else if (x instanceof Any)
404 else if (x instanceof Custom
405 && ((Custom) x).v instanceof UniValidator)
409 else if (x instanceof Or)
412 if (!o.leftForm().equals("(?:"))
416 patInt lo = o.countMinChars();
417 patInt hi = o.countMaxChars();
422 for (int i = 0; i < o.v.size(); i++)
424 if (!safe4fm((Pattern) o.v.elementAt(i)))
439 * public static void setParents(Regex r) { setParents(r.thePattern,null); }
440 * static void setParents(Pattern p,Pattern x) { if(p instanceof PatternSub &&
441 * !(p instanceof FastMulti) && !(p instanceof DotMulti)) RegOpt.setParents(
442 * ((PatternSub)p).sub, p); else if(p instanceof Or && !(p instanceof
443 * Bracket)) { Or o = (Or)p; for(int i=0;i<o.v.size();i++)
444 * RegOpt.setParents((Pattern)o.v.elementAt(i),o); } else if(p instanceof
445 * Branch) { Branch b = (Branch)p; Enumeration e = b.h.keys();
446 * while(e.hasMoreElements()) { Object o = e.nextElement(); RegOpt.setParents(
447 * (Pattern)b.h.get(o), b); } } if(p.next == null) p.parent = x; else {
448 * p.parent = null; RegOpt.setParents(p.next,x); } }