2 // This software is now distributed according to
\r
3 // the Lesser Gnu Public License. Please see
\r
4 // http://www.gnu.org/copyleft/lesser.txt for
\r
6 // -- Happy Computing!
\r
8 package com.stevesoft.pat;
\r
12 /** This class is just like oneChar, but doesn't worry about case. */
\r
13 class FastChar extends oneChar {
\r
14 FastChar(char c) { super(c); }
\r
15 public int matchInternal(int p,Pthings pt) {
\r
16 return (p < pt.src.length()
\r
17 && pt.src.charAt(p)==c) ?
\r
18 nextMatch(p+1,pt) : -1;
\r
20 Pattern clone1(Hashtable h) {
\r
21 return new FastChar(c);
\r
25 /** This class is a hashtable keyed by Character
\r
26 * Objects. It is used to match things of the
\r
27 * form (?:a..|b..|c..|..) match with greater efficiency --
\r
28 * by using a Hashtable that indexes into the group
\r
31 class Branch extends Pattern {
\r
32 Hashtable h = new Hashtable();
\r
33 // We need to keep track of the order
\r
34 // of the keys -- if we don't then
\r
35 // recompiling the output of toString
\r
36 // may produce errors by re-ordering
\r
37 // ()'s and changing the id number of
\r
38 // the backreference associated with
\r
40 Vector keys = new Vector();
\r
42 Pattern clone1(Hashtable x) {
\r
43 Branch b = new Branch();
\r
44 b.keys = (Vector)keys.clone();
\r
48 for(int i=0;i<keys.size();i++) {
\r
49 Pattern p = (Pattern)h.get(keys.elementAt(i));
\r
50 b.h.put(keys.elementAt(i),p.clone(x));
\r
55 // this function eliminates Branches with 0 or 1 elements.
\r
56 final Pattern reduce(boolean ignoreCase,boolean dontMinQ) {
\r
58 Enumeration e = h.keys();
\r
59 Character c = (Character)e.nextElement();
\r
61 if(ignoreCase||dontMinQ)
\r
62 oc=new oneChar(c.charValue());
\r
63 else oc=new FastChar(c.charValue());
\r
64 oc.next = (Pattern)h.get(c);
\r
67 } else if(h.size()==0) return null;
\r
70 public patInt maxChars() {
\r
71 Enumeration e = h.keys();
\r
72 patInt count = new patInt(0);
\r
73 while(e.hasMoreElements()) {
\r
74 Object key = e.nextElement();
\r
75 Pattern pa = (Pattern)h.get(key);
\r
76 patInt pi = pa.maxChars();
\r
82 public patInt minChars() {
\r
83 Enumeration e = h.keys();
\r
84 patInt count = new patInt(0);
\r
85 while(e.hasMoreElements()) {
\r
86 Object key = e.nextElement();
\r
87 Pattern pa = (Pattern)h.get(key);
\r
88 patInt pi = pa.minChars();
\r
95 // adds a oneChar object to this Branch
\r
96 void addc(oneChar o,boolean ignoreCase,boolean dontMinQ) {
\r
99 n = new NullPattern();
\r
101 n = RegOpt.opt(n,ignoreCase,dontMinQ);
\r
103 set(new Character(o.c),n,ignoreCase,dontMinQ);
\r
106 set(new Character(o.altc),n,ignoreCase,dontMinQ);
\r
107 if(o.c != o.altc2 && o.altc != o.altc2)
\r
108 set(new Character(o.altc2),n,ignoreCase,dontMinQ);
\r
111 void set(Character c,Pattern n,boolean igc,boolean dontMinQ) {
\r
112 Pattern p = (Pattern)h.get(c);
\r
114 // This letter is not yet used in the Branch object.
\r
115 // We need to add it.
\r
117 if(n instanceof Or) {
\r
118 // A NullPattern is prepended to an Or
\r
119 // to prevent confusing this object.
\r
120 // For example: (boo|bug) => (b(?:oo|ug))
\r
121 // during this process. However, we
\r
122 // want (b(?:oo|ell)|bug)
\r
123 NullPattern np = new NullPattern();
\r
129 // Make sure we remember the order things were
\r
130 // added into the Branch object so that we can
\r
131 // properly convert it to a String.
\r
132 keys.addElement(c);
\r
133 } else if(p instanceof Or) {
\r
135 } else if(p instanceof oneChar && n instanceof oneChar
\r
136 && ((oneChar)p).c != ((oneChar)n).c) {
\r
137 Branch b = new Branch();
\r
138 b.addc((oneChar)p,igc,dontMinQ);
\r
139 b.addc((oneChar)n,igc,dontMinQ);
\r
142 } else if(p instanceof Branch && n instanceof oneChar) {
\r
143 ((Branch)p).addc((oneChar)n,igc,dontMinQ);
\r
146 // Create an Or object to receive the variety
\r
147 // of branches in the pattern if the current letter
\r
148 // is matched. We do not attempt to make these
\r
149 // sub-branches into a Branch object yet.
\r
153 // Remove NullPattern from p -- it's no longer needed.
\r
154 if(p instanceof NullPattern
\r
155 && p.parent == null && p.next != null) {
\r
162 Pattern optpat = RegOpt.opt(o,igc,dontMinQ);
\r
164 optpat.setParent(this);
\r
167 public String toString() {
\r
168 StringBuffer sb = new StringBuffer();
\r
169 // should protect this...
\r
170 sb.append("(?:(?#branch)");// Hashtable)");
\r
171 for(int i=0;i<keys.size();i++) {
\r
172 Character c = (Character)keys.elementAt(i);
\r
174 sb.append(h.get(c));
\r
175 if(i+1<keys.size())
\r
179 sb.append(nextString());
\r
180 return sb.toString();
\r
182 public int matchInternal(int pos,Pthings pt) {
\r
183 if(pos >= pt.src.length()) return -1;
\r
184 Pattern n = (Pattern)h.get(new Character(pt.src.charAt(pos)));
\r
185 if(n == null) return -1;
\r
186 if(pt.cbits != null && pt.cbits.get(pos)) return -1;
\r
187 return n.matchInternal(pos+1,pt);
\r
191 /** This is just a place to put the optimizing function.
\r
192 It is never instantiated as an Object. It just sorts
\r
193 through the RegOpt looking for things it can change
\r
194 and make faster. */
\r
195 public class RegOpt {
\r
196 static Pattern opt(Pattern p,boolean ignoreCase,
\r
197 boolean dontMinQ) {
\r
198 if(p == null) return p;
\r
199 if(p instanceof Bracket) {
\r
200 Bracket b = (Bracket)p;
\r
201 // FastBracket is the only special
\r
202 // optimized class to have its own
\r
204 p = FastBracket.process(b,ignoreCase);
\r
205 //if(!(p instanceof FastBracket)
\r
206 //p = Switch.process(b,ignoreCase);
\r
208 p.parent = b.parent;
\r
209 } else if(p instanceof oneChar && !ignoreCase
\r
211 oneChar o = (oneChar)p;
\r
212 p = new FastChar(o.c);
\r
214 p.parent = o.parent;
\r
215 } else if(p instanceof Or
\r
216 && ((Or)p).leftForm().equals("(?:")
\r
217 && ((Or)p).v.size()==1) { // Eliminate this Or Object.
\r
219 p = (Pattern)o.v.elementAt(0);
\r
221 p = RegOpt.opt(p,ignoreCase,dontMinQ);
\r
223 } else if(p instanceof Or) {
\r
227 o.v = new Vector();
\r
228 Branch b = new Branch();
\r
229 b.parent = o.parent;
\r
230 for(int i=0;i<v.size();i++) {
\r
231 Pattern pp = (Pattern)v.elementAt(i);
\r
232 // We want to have at least two oneChar's in
\r
233 // the Or Object to consider making a Branch.
\r
234 if(pp instanceof oneChar && (b.h.size()>=1 ||
\r
235 (i+1<v.size() && v.elementAt(i+1) instanceof oneChar)))
\r
236 b.addc((oneChar)pp,ignoreCase,dontMinQ);
\r
238 if(b.keys.size() > 0) {
\r
239 Pattern p2 = (Pattern)b.reduce(ignoreCase,dontMinQ);
\r
243 b.parent = o.parent;
\r
246 o.addOr(opt(pp,ignoreCase,dontMinQ));
\r
249 if(b.keys.size()>0) {
\r
250 Pattern p2=(Pattern)b.reduce(ignoreCase,dontMinQ);
\r
255 && o.leftForm().equals("(?:")) { // Eliminate Or Object
\r
256 p = (Pattern)o.v.elementAt(0);
\r
258 p = RegOpt.opt(p,ignoreCase,dontMinQ);
\r
261 } else if(p instanceof FastMulti) {
\r
262 PatternSub ps = (PatternSub)p;
\r
263 ps.sub = RegOpt.opt(ps.sub,ignoreCase,dontMinQ);
\r
264 } else if(p instanceof Multi && safe4fm( ((PatternSub)p).sub )) {
\r
265 Multi m = (Multi)p;
\r
266 FastMulti fm = null;
\r
268 fm = new FastMulti(m.a,m.b,
\r
269 opt(m.sub,ignoreCase,dontMinQ));
\r
270 } catch(RegSyntax rs) {}
\r
271 fm.parent = m.parent;
\r
272 fm.matchFewest = m.matchFewest;
\r
277 p.next = opt(p.next,ignoreCase,dontMinQ);
\r
280 final static boolean safe4fm(Pattern x) {
\r
282 if(x instanceof Bracket)
\r
284 else if(x instanceof Range)
\r
286 else if(x instanceof oneChar)
\r
288 else if(x instanceof Any)
\r
290 else if(x instanceof Custom
\r
291 && ((Custom)x).v instanceof UniValidator)
\r
293 else if(x instanceof Or) {
\r
295 if(!o.leftForm().equals("(?:"))
\r
297 patInt lo = o.countMinChars();
\r
298 patInt hi = o.countMaxChars();
\r
301 for(int i=0;i<o.v.size();i++)
\r
302 if(!safe4fm((Pattern)o.v.elementAt(i)) )
\r
304 } else return false;
\r
310 public static void setParents(Regex r) {
\r
311 setParents(r.thePattern,null);
\r
313 static void setParents(Pattern p,Pattern x) {
\r
314 if(p instanceof PatternSub && !(p instanceof FastMulti)
\r
315 && !(p instanceof DotMulti))
\r
316 RegOpt.setParents( ((PatternSub)p).sub, p);
\r
317 else if(p instanceof Or && !(p instanceof Bracket)) {
\r
319 for(int i=0;i<o.v.size();i++)
\r
320 RegOpt.setParents((Pattern)o.v.elementAt(i),o);
\r
321 } else if(p instanceof Branch) {
\r
322 Branch b = (Branch)p;
\r
323 Enumeration e = b.h.keys();
\r
324 while(e.hasMoreElements()) {
\r
325 Object o = e.nextElement();
\r
326 RegOpt.setParents( (Pattern)b.h.get(o), b);
\r
333 RegOpt.setParents(p.next,x);
\r