2 // This software is now distributed according to
\r
3 // the Lesser Gnu Public License. Please see
\r
4 // http://www.gnu.org/copyleft/lesser.txt for
\r
6 // -- Happy Computing!
\r
8 package com.stevesoft.pat;
\r
11 /** This class is just like oneChar, but doesn't worry about case. */
\r
12 class FastChar extends oneChar {
\r
13 FastChar(char c) { super(c); }
\r
14 public int matchInternal(int p,Pthings pt) {
\r
15 return (p < pt.src.length()
\r
16 && pt.src.charAt(p)==c) ?
\r
17 nextMatch(p+1,pt) : -1;
\r
19 Pattern clone1(Hashtable h) {
\r
20 return new FastChar(c);
\r
24 /** This class is a hashtable keyed by Character
\r
25 * Objects. It is used to match things of the
\r
26 * form (?:a..|b..|c..|..) match with greater efficiency --
\r
27 * by using a Hashtable that indexes into the group
\r
30 class Branch extends Pattern {
\r
31 Hashtable h = new Hashtable();
\r
32 // We need to keep track of the order
\r
33 // of the keys -- if we don't then
\r
34 // recompiling the output of toString
\r
35 // may produce errors by re-ordering
\r
36 // ()'s and changing the id number of
\r
37 // the backreference associated with
\r
39 Vector keys = new Vector();
\r
41 Pattern clone1(Hashtable x) {
\r
42 Branch b = new Branch();
\r
43 b.keys = (Vector)keys.clone();
\r
47 for(int i=0;i<keys.size();i++) {
\r
48 Pattern p = (Pattern)h.get(keys.elementAt(i));
\r
49 b.h.put(keys.elementAt(i),p.clone(x));
\r
54 // this function eliminates Branches with 0 or 1 elements.
\r
55 final Pattern reduce(boolean ignoreCase,boolean dontMinQ) {
\r
57 Enumeration e = h.keys();
\r
58 Character c = (Character)e.nextElement();
\r
60 if(ignoreCase||dontMinQ)
\r
61 oc=new oneChar(c.charValue());
\r
62 else oc=new FastChar(c.charValue());
\r
63 oc.next = (Pattern)h.get(c);
\r
66 } else if(h.size()==0) return null;
\r
69 public patInt maxChars() {
\r
70 Enumeration e = h.keys();
\r
71 patInt count = new patInt(0);
\r
72 while(e.hasMoreElements()) {
\r
73 Object key = e.nextElement();
\r
74 Pattern pa = (Pattern)h.get(key);
\r
75 patInt pi = pa.maxChars();
\r
81 public patInt minChars() {
\r
82 Enumeration e = h.keys();
\r
83 patInt count = new patInt(0);
\r
84 while(e.hasMoreElements()) {
\r
85 Object key = e.nextElement();
\r
86 Pattern pa = (Pattern)h.get(key);
\r
87 patInt pi = pa.minChars();
\r
94 // adds a oneChar object to this Branch
\r
95 void addc(oneChar o,boolean ignoreCase,boolean dontMinQ) {
\r
98 n = new NullPattern();
\r
100 n = RegOpt.opt(n,ignoreCase,dontMinQ);
\r
102 set(new Character(o.c),n,ignoreCase,dontMinQ);
\r
105 set(new Character(o.altc),n,ignoreCase,dontMinQ);
\r
106 if(o.c != o.altc2 && o.altc != o.altc2)
\r
107 set(new Character(o.altc2),n,ignoreCase,dontMinQ);
\r
110 void set(Character c,Pattern n,boolean igc,boolean dontMinQ) {
\r
111 Pattern p = (Pattern)h.get(c);
\r
113 // This letter is not yet used in the Branch object.
\r
114 // We need to add it.
\r
116 if(n instanceof Or) {
\r
117 // A NullPattern is prepended to an Or
\r
118 // to prevent confusing this object.
\r
119 // For example: (boo|bug) => (b(?:oo|ug))
\r
120 // during this process. However, we
\r
121 // want (b(?:oo|ell)|bug)
\r
122 NullPattern np = new NullPattern();
\r
128 // Make sure we remember the order things were
\r
129 // added into the Branch object so that we can
\r
130 // properly convert it to a String.
\r
131 keys.addElement(c);
\r
132 } else if(p instanceof Or) {
\r
134 } else if(p instanceof oneChar && n instanceof oneChar
\r
135 && ((oneChar)p).c != ((oneChar)n).c) {
\r
136 Branch b = new Branch();
\r
137 b.addc((oneChar)p,igc,dontMinQ);
\r
138 b.addc((oneChar)n,igc,dontMinQ);
\r
141 } else if(p instanceof Branch && n instanceof oneChar) {
\r
142 ((Branch)p).addc((oneChar)n,igc,dontMinQ);
\r
145 // Create an Or object to receive the variety
\r
146 // of branches in the pattern if the current letter
\r
147 // is matched. We do not attempt to make these
\r
148 // sub-branches into a Branch object yet.
\r
152 // Remove NullPattern from p -- it's no longer needed.
\r
153 if(p instanceof NullPattern
\r
154 && p.parent == null && p.next != null) {
\r
161 Pattern optpat = RegOpt.opt(o,igc,dontMinQ);
\r
163 optpat.setParent(this);
\r
166 public String toString() {
\r
167 StringBuffer sb = new StringBuffer();
\r
168 // should protect this...
\r
169 sb.append("(?:(?#branch)");// Hashtable)");
\r
170 for(int i=0;i<keys.size();i++) {
\r
171 Character c = (Character)keys.elementAt(i);
\r
173 sb.append(h.get(c));
\r
174 if(i+1<keys.size())
\r
178 sb.append(nextString());
\r
179 return sb.toString();
\r
181 public int matchInternal(int pos,Pthings pt) {
\r
182 if(pos >= pt.src.length()) return -1;
\r
183 Pattern n = (Pattern)h.get(new Character(pt.src.charAt(pos)));
\r
184 if(n == null) return -1;
\r
185 if(pt.cbits != null && pt.cbits.get(pos)) return -1;
\r
186 return n.matchInternal(pos+1,pt);
\r
190 /** This is just a place to put the optimizing function.
\r
191 It is never instantiated as an Object. It just sorts
\r
192 through the RegOpt looking for things it can change
\r
193 and make faster. */
\r
194 public class RegOpt {
\r
195 static Pattern opt(Pattern p,boolean ignoreCase,
\r
196 boolean dontMinQ) {
\r
197 if(p == null) return p;
\r
198 if(p instanceof Bracket) {
\r
199 Bracket b = (Bracket)p;
\r
200 // FastBracket is the only special
\r
201 // optimized class to have its own
\r
203 p = FastBracket.process(b,ignoreCase);
\r
204 //if(!(p instanceof FastBracket)
\r
205 //p = Switch.process(b,ignoreCase);
\r
207 p.parent = b.parent;
\r
208 } else if(p instanceof oneChar && !ignoreCase
\r
210 oneChar o = (oneChar)p;
\r
211 p = new FastChar(o.c);
\r
213 p.parent = o.parent;
\r
214 } else if(p instanceof Or
\r
215 && ((Or)p).leftForm().equals("(?:")
\r
216 && ((Or)p).v.size()==1) { // Eliminate this Or Object.
\r
218 p = (Pattern)o.v.elementAt(0);
\r
220 p = RegOpt.opt(p,ignoreCase,dontMinQ);
\r
222 } else if(p instanceof Or) {
\r
226 o.v = new Vector();
\r
227 Branch b = new Branch();
\r
228 b.parent = o.parent;
\r
229 for(int i=0;i<v.size();i++) {
\r
230 Pattern pp = (Pattern)v.elementAt(i);
\r
231 // We want to have at least two oneChar's in
\r
232 // the Or Object to consider making a Branch.
\r
233 if(pp instanceof oneChar && (b.h.size()>=1 ||
\r
234 (i+1<v.size() && v.elementAt(i+1) instanceof oneChar)))
\r
235 b.addc((oneChar)pp,ignoreCase,dontMinQ);
\r
237 if(b.keys.size() > 0) {
\r
238 Pattern p2 = (Pattern)b.reduce(ignoreCase,dontMinQ);
\r
242 b.parent = o.parent;
\r
245 o.addOr(opt(pp,ignoreCase,dontMinQ));
\r
248 if(b.keys.size()>0) {
\r
249 Pattern p2=(Pattern)b.reduce(ignoreCase,dontMinQ);
\r
254 && o.leftForm().equals("(?:")) { // Eliminate Or Object
\r
255 p = (Pattern)o.v.elementAt(0);
\r
257 p = RegOpt.opt(p,ignoreCase,dontMinQ);
\r
260 } else if(p instanceof FastMulti) {
\r
261 PatternSub ps = (PatternSub)p;
\r
262 ps.sub = RegOpt.opt(ps.sub,ignoreCase,dontMinQ);
\r
263 } else if(p instanceof Multi && safe4fm( ((PatternSub)p).sub )) {
\r
264 Multi m = (Multi)p;
\r
265 FastMulti fm = null;
\r
267 fm = new FastMulti(m.a,m.b,
\r
268 opt(m.sub,ignoreCase,dontMinQ));
\r
269 } catch(RegSyntax rs) {}
\r
270 fm.parent = m.parent;
\r
271 fm.matchFewest = m.matchFewest;
\r
276 p.next = opt(p.next,ignoreCase,dontMinQ);
\r
279 final static boolean safe4fm(Pattern x) {
\r
281 if(x instanceof Bracket)
\r
283 else if(x instanceof Range)
\r
285 else if(x instanceof oneChar)
\r
287 else if(x instanceof Any)
\r
289 else if(x instanceof Custom
\r
290 && ((Custom)x).v instanceof UniValidator)
\r
292 else if(x instanceof Or) {
\r
294 if(!o.leftForm().equals("(?:"))
\r
296 patInt lo = o.countMinChars();
\r
297 patInt hi = o.countMaxChars();
\r
300 for(int i=0;i<o.v.size();i++)
\r
301 if(!safe4fm((Pattern)o.v.elementAt(i)) )
\r
303 } else return false;
\r
309 public static void setParents(Regex r) {
\r
310 setParents(r.thePattern,null);
\r
312 static void setParents(Pattern p,Pattern x) {
\r
313 if(p instanceof PatternSub && !(p instanceof FastMulti)
\r
314 && !(p instanceof DotMulti))
\r
315 RegOpt.setParents( ((PatternSub)p).sub, p);
\r
316 else if(p instanceof Or && !(p instanceof Bracket)) {
\r
318 for(int i=0;i<o.v.size();i++)
\r
319 RegOpt.setParents((Pattern)o.v.elementAt(i),o);
\r
320 } else if(p instanceof Branch) {
\r
321 Branch b = (Branch)p;
\r
322 Enumeration e = b.h.keys();
\r
323 while(e.hasMoreElements()) {
\r
324 Object o = e.nextElement();
\r
325 RegOpt.setParents( (Pattern)b.h.get(o), b);
\r
332 RegOpt.setParents(p.next,x);
\r