2 // This software is now distributed according to
\r
3 // the Lesser Gnu Public License. Please see
\r
4 // http://www.gnu.org/copyleft/lesser.txt for
\r
6 // -- Happy Computing!
\r
8 package com.stevesoft.pat;
\r
12 /** This class is just like oneChar, but doesn't worry about case. */
\r
21 public int matchInternal(int p, Pthings pt)
\r
23 return (p < pt.src.length()
\r
24 && pt.src.charAt(p) == c) ?
\r
25 nextMatch(p + 1, pt) : -1;
\r
28 Pattern clone1(Hashtable h)
\r
30 return new FastChar(c);
\r
34 /** This class is a hashtable keyed by Character
\r
35 * Objects. It is used to match things of the
\r
36 * form (?:a..|b..|c..|..) match with greater efficiency --
\r
37 * by using a Hashtable that indexes into the group
\r
43 Hashtable h = new Hashtable();
\r
44 // We need to keep track of the order
\r
45 // of the keys -- if we don't then
\r
46 // recompiling the output of toString
\r
47 // may produce errors by re-ordering
\r
48 // ()'s and changing the id number of
\r
49 // the backreference associated with
\r
51 Vector keys = new Vector();
\r
55 Pattern clone1(Hashtable x)
\r
57 Branch b = new Branch();
\r
58 b.keys = (Vector) keys.clone();
\r
62 for (int i = 0; i < keys.size(); i++)
\r
64 Pattern p = (Pattern) h.get(keys.elementAt(i));
\r
65 b.h.put(keys.elementAt(i), p.clone(x));
\r
70 // this function eliminates Branches with 0 or 1 elements.
\r
71 final Pattern reduce(boolean ignoreCase, boolean dontMinQ)
\r
75 Enumeration e = h.keys();
\r
76 Character c = (Character) e.nextElement();
\r
78 if (ignoreCase || dontMinQ)
\r
80 oc = new oneChar(c.charValue());
\r
84 oc = new FastChar(c.charValue());
\r
86 oc.next = (Pattern) h.get(c);
\r
90 else if (h.size() == 0)
\r
97 public patInt maxChars()
\r
99 Enumeration e = h.keys();
\r
100 patInt count = new patInt(0);
\r
101 while (e.hasMoreElements())
\r
103 Object key = e.nextElement();
\r
104 Pattern pa = (Pattern) h.get(key);
\r
105 patInt pi = pa.maxChars();
\r
112 public patInt minChars()
\r
114 Enumeration e = h.keys();
\r
115 patInt count = new patInt(0);
\r
116 while (e.hasMoreElements())
\r
118 Object key = e.nextElement();
\r
119 Pattern pa = (Pattern) h.get(key);
\r
120 patInt pi = pa.minChars();
\r
127 // adds a oneChar object to this Branch
\r
128 void addc(oneChar o, boolean ignoreCase, boolean dontMinQ)
\r
130 Pattern n = o.next;
\r
133 n = new NullPattern();
\r
137 n = RegOpt.opt(n, ignoreCase, dontMinQ);
\r
140 set(new Character(o.c), n, ignoreCase, dontMinQ);
\r
145 set(new Character(o.altc), n, ignoreCase, dontMinQ);
\r
147 if (o.c != o.altc2 && o.altc != o.altc2)
\r
149 set(new Character(o.altc2), n, ignoreCase, dontMinQ);
\r
154 void set(Character c, Pattern n, boolean igc, boolean dontMinQ)
\r
156 Pattern p = (Pattern) h.get(c);
\r
158 // This letter is not yet used in the Branch object.
\r
159 // We need to add it.
\r
162 if (n instanceof Or)
\r
164 // A NullPattern is prepended to an Or
\r
165 // to prevent confusing this object.
\r
166 // For example: (boo|bug) => (b(?:oo|ug))
\r
167 // during this process. However, we
\r
168 // want (b(?:oo|ell)|bug)
\r
169 NullPattern np = new NullPattern();
\r
177 // Make sure we remember the order things were
\r
178 // added into the Branch object so that we can
\r
179 // properly convert it to a String.
\r
180 keys.addElement(c);
\r
182 else if (p instanceof Or)
\r
184 ( (Or) p).addOr(n);
\r
186 else if (p instanceof oneChar && n instanceof oneChar
\r
187 && ( (oneChar) p).c != ( (oneChar) n).c)
\r
189 Branch b = new Branch();
\r
190 b.addc( (oneChar) p, igc, dontMinQ);
\r
191 b.addc( (oneChar) n, igc, dontMinQ);
\r
195 else if (p instanceof Branch && n instanceof oneChar)
\r
197 ( (Branch) p).addc( (oneChar) n, igc, dontMinQ);
\r
202 // Create an Or object to receive the variety
\r
203 // of branches in the pattern if the current letter
\r
204 // is matched. We do not attempt to make these
\r
205 // sub-branches into a Branch object yet.
\r
209 // Remove NullPattern from p -- it's no longer needed.
\r
210 if (p instanceof NullPattern
\r
211 && p.parent == null && p.next != null)
\r
221 Pattern optpat = RegOpt.opt(o, igc, dontMinQ);
\r
223 optpat.setParent(this);
\r
227 public String toString()
\r
229 StringBuffer sb = new StringBuffer();
\r
230 // should protect this...
\r
231 sb.append("(?:(?#branch)"); // Hashtable)");
\r
232 for (int i = 0; i < keys.size(); i++)
\r
234 Character c = (Character) keys.elementAt(i);
\r
236 sb.append(h.get(c));
\r
237 if (i + 1 < keys.size())
\r
243 sb.append(nextString());
\r
244 return sb.toString();
\r
247 public int matchInternal(int pos, Pthings pt)
\r
249 if (pos >= pt.src.length())
\r
253 Pattern n = (Pattern) h.get(new Character(pt.src.charAt(pos)));
\r
258 if (pt.cbits != null && pt.cbits.get(pos))
\r
262 return n.matchInternal(pos + 1, pt);
\r
266 /** This is just a place to put the optimizing function.
\r
267 It is never instantiated as an Object. It just sorts
\r
268 through the RegOpt looking for things it can change
\r
269 and make faster. */
\r
270 public class RegOpt
\r
272 static Pattern opt(Pattern p, boolean ignoreCase,
\r
279 if (p instanceof Bracket)
\r
281 Bracket b = (Bracket) p;
\r
282 // FastBracket is the only special
\r
283 // optimized class to have its own
\r
285 p = FastBracket.process(b, ignoreCase);
\r
286 //if(!(p instanceof FastBracket)
\r
287 //p = Switch.process(b,ignoreCase);
\r
289 p.parent = b.parent;
\r
291 else if (p instanceof oneChar && !ignoreCase
\r
294 oneChar o = (oneChar) p;
\r
295 p = new FastChar(o.c);
\r
297 p.parent = o.parent;
\r
299 else if (p instanceof Or
\r
300 && ( (Or) p).leftForm().equals("(?:")
\r
301 && ( (Or) p).v.size() == 1)
\r
302 { // Eliminate this Or Object.
\r
304 p = (Pattern) o.v.elementAt(0);
\r
306 p = RegOpt.opt(p, ignoreCase, dontMinQ);
\r
309 else if (p instanceof Or)
\r
314 o.v = new Vector();
\r
315 Branch b = new Branch();
\r
316 b.parent = o.parent;
\r
317 for (int i = 0; i < v.size(); i++)
\r
319 Pattern pp = (Pattern) v.elementAt(i);
\r
320 // We want to have at least two oneChar's in
\r
321 // the Or Object to consider making a Branch.
\r
322 if (pp instanceof oneChar && (b.h.size() >= 1 ||
\r
323 (i + 1 < v.size() &&
\r
324 v.elementAt(i + 1) instanceof oneChar)))
\r
326 b.addc( (oneChar) pp, ignoreCase, dontMinQ);
\r
330 if (b.keys.size() > 0)
\r
332 Pattern p2 = (Pattern) b.reduce(ignoreCase, dontMinQ);
\r
337 b.parent = o.parent;
\r
340 o.addOr(opt(pp, ignoreCase, dontMinQ));
\r
343 if (b.keys.size() > 0)
\r
345 Pattern p2 = (Pattern) b.reduce(ignoreCase, dontMinQ);
\r
351 if (o.v.size() == 1
\r
352 && o.leftForm().equals("(?:"))
\r
353 { // Eliminate Or Object
\r
354 p = (Pattern) o.v.elementAt(0);
\r
356 p = RegOpt.opt(p, ignoreCase, dontMinQ);
\r
360 else if (p instanceof FastMulti)
\r
362 PatternSub ps = (PatternSub) p;
\r
363 ps.sub = RegOpt.opt(ps.sub, ignoreCase, dontMinQ);
\r
365 else if (p instanceof Multi && safe4fm( ( (PatternSub) p).sub))
\r
367 Multi m = (Multi) p;
\r
368 FastMulti fm = null;
\r
371 fm = new FastMulti(m.a, m.b,
\r
372 opt(m.sub, ignoreCase, dontMinQ));
\r
374 catch (RegSyntax rs)
\r
376 fm.parent = m.parent;
\r
377 fm.matchFewest = m.matchFewest;
\r
381 if (p.next != null)
\r
383 p.next = opt(p.next, ignoreCase, dontMinQ);
\r
388 final static boolean safe4fm(Pattern x)
\r
392 if (x instanceof Bracket)
\r
396 else if (x instanceof Range)
\r
400 else if (x instanceof oneChar)
\r
404 else if (x instanceof Any)
\r
408 else if (x instanceof Custom
\r
409 && ( (Custom) x).v instanceof UniValidator)
\r
413 else if (x instanceof Or)
\r
416 if (!o.leftForm().equals("(?:"))
\r
420 patInt lo = o.countMinChars();
\r
421 patInt hi = o.countMaxChars();
\r
422 if (!lo.equals(hi))
\r
426 for (int i = 0; i < o.v.size(); i++)
\r
428 if (!safe4fm( (Pattern) o.v.elementAt(i)))
\r
443 public static void setParents(Regex r) {
\r
444 setParents(r.thePattern,null);
\r
446 static void setParents(Pattern p,Pattern x) {
\r
447 if(p instanceof PatternSub && !(p instanceof FastMulti)
\r
448 && !(p instanceof DotMulti))
\r
449 RegOpt.setParents( ((PatternSub)p).sub, p);
\r
450 else if(p instanceof Or && !(p instanceof Bracket)) {
\r
452 for(int i=0;i<o.v.size();i++)
\r
453 RegOpt.setParents((Pattern)o.v.elementAt(i),o);
\r
454 } else if(p instanceof Branch) {
\r
455 Branch b = (Branch)p;
\r
456 Enumeration e = b.h.keys();
\r
457 while(e.hasMoreElements()) {
\r
458 Object o = e.nextElement();
\r
459 RegOpt.setParents( (Pattern)b.h.get(o), b);
\r
466 RegOpt.setParents(p.next,x);
\r