2 // This software is now distributed according to
3 // the Lesser Gnu Public License. Please see
4 // http://www.gnu.org/copyleft/lesser.txt for
8 package com.stevesoft.pat;
10 import java.util.Enumeration;
11 import java.util.Hashtable;
12 import java.util.Vector;
14 /** This class is just like oneChar, but doesn't worry about case. */
15 class FastChar extends oneChar
22 public int matchInternal(int p, Pthings pt)
24 return (p < pt.src.length() && pt.src.charAt(p) == c) ? nextMatch(
28 Pattern clone1(Hashtable h)
30 return new FastChar(c);
35 * This class is a hashtable keyed by Character Objects. It is used to match
36 * things of the form (?:a..|b..|c..|..) match with greater efficiency -- by
37 * using a Hashtable that indexes into the group of patterns.
39 class Branch extends Pattern
41 Hashtable h = new Hashtable();
43 // We need to keep track of the order
44 // of the keys -- if we don't then
45 // recompiling the output of toString
46 // may produce errors by re-ordering
47 // ()'s and changing the id number of
48 // the backreference associated with
50 Vector keys = new Vector();
56 Pattern clone1(Hashtable x)
58 Branch b = new Branch();
59 b.keys = (Vector) keys.clone();
63 for (int i = 0; i < keys.size(); i++)
65 Pattern p = (Pattern) h.get(keys.elementAt(i));
66 b.h.put(keys.elementAt(i), p.clone(x));
71 // this function eliminates Branches with 0 or 1 elements.
72 final Pattern reduce(boolean ignoreCase, boolean dontMinQ)
76 Enumeration e = h.keys();
77 Character c = (Character) e.nextElement();
79 if (ignoreCase || dontMinQ)
81 oc = new oneChar(c.charValue());
85 oc = new FastChar(c.charValue());
87 oc.next = (Pattern) h.get(c);
91 else if (h.size() == 0)
98 public patInt maxChars()
100 Enumeration e = h.keys();
101 patInt count = new patInt(0);
102 while (e.hasMoreElements())
104 Object key = e.nextElement();
105 Pattern pa = (Pattern) h.get(key);
106 patInt pi = pa.maxChars();
113 public patInt minChars()
115 Enumeration e = h.keys();
116 patInt count = new patInt(0);
117 while (e.hasMoreElements())
119 Object key = e.nextElement();
120 Pattern pa = (Pattern) h.get(key);
121 patInt pi = pa.minChars();
128 // adds a oneChar object to this Branch
129 void addc(oneChar o, boolean ignoreCase, boolean dontMinQ)
134 n = new NullPattern();
138 n = RegOpt.opt(n, ignoreCase, dontMinQ);
141 set(Character.valueOf(o.c), n, ignoreCase, dontMinQ);
146 set(Character.valueOf(o.altc), n, ignoreCase, dontMinQ);
148 if (o.c != o.altc2 && o.altc != o.altc2)
150 set(Character.valueOf(o.altc2), n, ignoreCase, dontMinQ);
155 void set(Character c, Pattern n, boolean igc, boolean dontMinQ)
157 Pattern p = (Pattern) h.get(c);
159 // This letter is not yet used in the Branch object.
160 // We need to add it.
165 // A NullPattern is prepended to an Or
166 // to prevent confusing this object.
167 // For example: (boo|bug) => (b(?:oo|ug))
168 // during this process. However, we
169 // want (b(?:oo|ell)|bug)
170 NullPattern np = new NullPattern();
178 // Make sure we remember the order things were
179 // added into the Branch object so that we can
180 // properly convert it to a String.
183 else if (p instanceof Or)
187 else if (p instanceof oneChar && n instanceof oneChar
188 && ((oneChar) p).c != ((oneChar) n).c)
190 Branch b = new Branch();
191 b.addc((oneChar) p, igc, dontMinQ);
192 b.addc((oneChar) n, igc, dontMinQ);
196 else if (p instanceof Branch && n instanceof oneChar)
198 ((Branch) p).addc((oneChar) n, igc, dontMinQ);
203 // Create an Or object to receive the variety
204 // of branches in the pattern if the current letter
205 // is matched. We do not attempt to make these
206 // sub-branches into a Branch object yet.
210 // Remove NullPattern from p -- it's no longer needed.
211 if (p instanceof NullPattern && p.parent == null && p.next != null)
221 Pattern optpat = RegOpt.opt(o, igc, dontMinQ);
223 optpat.setParent(this);
227 public String toString()
229 StringBuffer sb = new StringBuffer();
230 // should protect this...
231 sb.append("(?:(?#branch)"); // Hashtable)");
232 for (int i = 0; i < keys.size(); i++)
234 Character c = (Character) keys.elementAt(i);
237 if (i + 1 < keys.size())
243 sb.append(nextString());
244 return sb.toString();
247 public int matchInternal(int pos, Pthings pt)
249 if (pos >= pt.src.length())
253 Pattern n = (Pattern) h.get(Character.valueOf(pt.src.charAt(pos)));
258 if (pt.cbits != null && pt.cbits.get(pos))
262 return n.matchInternal(pos + 1, pt);
267 * This is just a place to put the optimizing function. It is never instantiated
268 * as an Object. It just sorts through the RegOpt looking for things it can
269 * change and make faster.
273 static Pattern opt(Pattern p, boolean ignoreCase, boolean dontMinQ)
279 if (p instanceof Bracket)
281 Bracket b = (Bracket) p;
282 // FastBracket is the only special
283 // optimized class to have its own
285 p = FastBracket.process(b, ignoreCase);
286 // if(!(p instanceof FastBracket)
287 // p = Switch.process(b,ignoreCase);
291 else if (p instanceof oneChar && !ignoreCase && !dontMinQ)
293 oneChar o = (oneChar) p;
294 p = new FastChar(o.c);
298 else if (p instanceof Or && ((Or) p).leftForm().equals("(?:")
299 && ((Or) p).v.size() == 1)
300 { // Eliminate this Or Object.
302 p = (Pattern) o.v.elementAt(0);
304 p = RegOpt.opt(p, ignoreCase, dontMinQ);
307 else if (p instanceof Or)
313 Branch b = new Branch();
315 for (int i = 0; i < v.size(); i++)
317 Pattern pp = (Pattern) v.elementAt(i);
318 // We want to have at least two oneChar's in
319 // the Or Object to consider making a Branch.
320 if (pp instanceof oneChar
321 && (b.h.size() >= 1 || (i + 1 < v.size() && v
322 .elementAt(i + 1) instanceof oneChar)))
324 b.addc((oneChar) pp, ignoreCase, dontMinQ);
328 if (b.keys.size() > 0)
330 Pattern p2 = (Pattern) b.reduce(ignoreCase, dontMinQ);
338 o.addOr(opt(pp, ignoreCase, dontMinQ));
341 if (b.keys.size() > 0)
343 Pattern p2 = (Pattern) b.reduce(ignoreCase, dontMinQ);
349 if (o.v.size() == 1 && o.leftForm().equals("(?:"))
350 { // Eliminate Or Object
351 p = (Pattern) o.v.elementAt(0);
353 p = RegOpt.opt(p, ignoreCase, dontMinQ);
357 else if (p instanceof FastMulti)
359 PatternSub ps = (PatternSub) p;
360 ps.sub = RegOpt.opt(ps.sub, ignoreCase, dontMinQ);
362 else if (p instanceof Multi && safe4fm(((PatternSub) p).sub))
368 fm = new FastMulti(m.a, m.b, opt(m.sub, ignoreCase, dontMinQ));
369 } catch (RegSyntax rs)
372 fm.parent = m.parent;
373 fm.matchFewest = m.matchFewest;
379 p.next = opt(p.next, ignoreCase, dontMinQ);
384 final static boolean safe4fm(Pattern x)
388 if (x instanceof Bracket)
392 else if (x instanceof Range)
396 else if (x instanceof oneChar)
400 else if (x instanceof Any)
404 else if (x instanceof Custom
405 && ((Custom) x).v instanceof UniValidator)
409 else if (x instanceof Or)
412 if (!o.leftForm().equals("(?:"))
416 patInt lo = o.countMinChars();
417 patInt hi = o.countMaxChars();
422 for (int i = 0; i < o.v.size(); i++)
424 if (!safe4fm((Pattern) o.v.elementAt(i)))
439 * public static void setParents(Regex r) { setParents(r.thePattern,null); }
440 * static void setParents(Pattern p,Pattern x) { if(p instanceof PatternSub &&
441 * !(p instanceof FastMulti) && !(p instanceof DotMulti)) RegOpt.setParents(
442 * ((PatternSub)p).sub, p); else if(p instanceof Or && !(p instanceof
443 * Bracket)) { Or o = (Or)p; for(int i=0;i<o.v.size();i++)
444 * RegOpt.setParents((Pattern)o.v.elementAt(i),o); } else if(p instanceof
445 * Branch) { Branch b = (Branch)p; Enumeration e = b.h.keys();
446 * while(e.hasMoreElements()) { Object o = e.nextElement(); RegOpt.setParents(
447 * (Pattern)b.h.get(o), b); } } if(p.next == null) p.parent = x; else {
448 * p.parent = null; RegOpt.setParents(p.next,x); } }