1 /*******************************************************************************
2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $(date) The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
20 *******************************************************************************/
22 // This software is now distributed according to
23 // the Lesser Gnu Public License. Please see
24 // http://www.gnu.org/copyleft/lesser.txt for
26 // -- Happy Computing!
28 package com.stevesoft.pat;
30 import java.util.Enumeration;
31 import java.util.Hashtable;
32 import java.util.Vector;
34 /** This class is just like oneChar, but doesn't worry about case. */
35 class FastChar extends oneChar
42 public int matchInternal(int p, Pthings pt)
44 return (p < pt.src.length() && pt.src.charAt(p) == c) ? nextMatch(
48 Pattern clone1(Hashtable h)
50 return new FastChar(c);
55 * This class is a hashtable keyed by Character Objects. It is used to match
56 * things of the form (?:a..|b..|c..|..) match with greater efficiency -- by
57 * using a Hashtable that indexes into the group of patterns.
59 class Branch extends Pattern
61 Hashtable h = new Hashtable();
63 // We need to keep track of the order
64 // of the keys -- if we don't then
65 // recompiling the output of toString
66 // may produce errors by re-ordering
67 // ()'s and changing the id number of
68 // the backreference associated with
70 Vector keys = new Vector();
76 Pattern clone1(Hashtable x)
78 Branch b = new Branch();
79 b.keys = (Vector) keys.clone();
83 for (int i = 0; i < keys.size(); i++)
85 Pattern p = (Pattern) h.get(keys.elementAt(i));
86 b.h.put(keys.elementAt(i), p.clone(x));
91 // this function eliminates Branches with 0 or 1 elements.
92 final Pattern reduce(boolean ignoreCase, boolean dontMinQ)
96 Enumeration e = h.keys();
97 Character c = (Character) e.nextElement();
99 if (ignoreCase || dontMinQ)
101 oc = new oneChar(c.charValue());
105 oc = new FastChar(c.charValue());
107 oc.next = (Pattern) h.get(c);
111 else if (h.size() == 0)
118 public patInt maxChars()
120 Enumeration e = h.keys();
121 patInt count = new patInt(0);
122 while (e.hasMoreElements())
124 Object key = e.nextElement();
125 Pattern pa = (Pattern) h.get(key);
126 patInt pi = pa.maxChars();
133 public patInt minChars()
135 Enumeration e = h.keys();
136 patInt count = new patInt(0);
137 while (e.hasMoreElements())
139 Object key = e.nextElement();
140 Pattern pa = (Pattern) h.get(key);
141 patInt pi = pa.minChars();
148 // adds a oneChar object to this Branch
149 void addc(oneChar o, boolean ignoreCase, boolean dontMinQ)
154 n = new NullPattern();
158 n = RegOpt.opt(n, ignoreCase, dontMinQ);
161 set(new Character(o.c), n, ignoreCase, dontMinQ);
166 set(new Character(o.altc), n, ignoreCase, dontMinQ);
168 if (o.c != o.altc2 && o.altc != o.altc2)
170 set(new Character(o.altc2), n, ignoreCase, dontMinQ);
175 void set(Character c, Pattern n, boolean igc, boolean dontMinQ)
177 Pattern p = (Pattern) h.get(c);
179 // This letter is not yet used in the Branch object.
180 // We need to add it.
185 // A NullPattern is prepended to an Or
186 // to prevent confusing this object.
187 // For example: (boo|bug) => (b(?:oo|ug))
188 // during this process. However, we
189 // want (b(?:oo|ell)|bug)
190 NullPattern np = new NullPattern();
198 // Make sure we remember the order things were
199 // added into the Branch object so that we can
200 // properly convert it to a String.
203 else if (p instanceof Or)
207 else if (p instanceof oneChar && n instanceof oneChar
208 && ((oneChar) p).c != ((oneChar) n).c)
210 Branch b = new Branch();
211 b.addc((oneChar) p, igc, dontMinQ);
212 b.addc((oneChar) n, igc, dontMinQ);
216 else if (p instanceof Branch && n instanceof oneChar)
218 ((Branch) p).addc((oneChar) n, igc, dontMinQ);
223 // Create an Or object to receive the variety
224 // of branches in the pattern if the current letter
225 // is matched. We do not attempt to make these
226 // sub-branches into a Branch object yet.
230 // Remove NullPattern from p -- it's no longer needed.
231 if (p instanceof NullPattern && p.parent == null && p.next != null)
241 Pattern optpat = RegOpt.opt(o, igc, dontMinQ);
243 optpat.setParent(this);
247 public String toString()
249 StringBuffer sb = new StringBuffer();
250 // should protect this...
251 sb.append("(?:(?#branch)"); // Hashtable)");
252 for (int i = 0; i < keys.size(); i++)
254 Character c = (Character) keys.elementAt(i);
257 if (i + 1 < keys.size())
263 sb.append(nextString());
264 return sb.toString();
267 public int matchInternal(int pos, Pthings pt)
269 if (pos >= pt.src.length())
273 Pattern n = (Pattern) h.get(new Character(pt.src.charAt(pos)));
278 if (pt.cbits != null && pt.cbits.get(pos))
282 return n.matchInternal(pos + 1, pt);
287 * This is just a place to put the optimizing function. It is never instantiated
288 * as an Object. It just sorts through the RegOpt looking for things it can
289 * change and make faster.
293 static Pattern opt(Pattern p, boolean ignoreCase, boolean dontMinQ)
299 if (p instanceof Bracket)
301 Bracket b = (Bracket) p;
302 // FastBracket is the only special
303 // optimized class to have its own
305 p = FastBracket.process(b, ignoreCase);
306 // if(!(p instanceof FastBracket)
307 // p = Switch.process(b,ignoreCase);
311 else if (p instanceof oneChar && !ignoreCase && !dontMinQ)
313 oneChar o = (oneChar) p;
314 p = new FastChar(o.c);
318 else if (p instanceof Or && ((Or) p).leftForm().equals("(?:")
319 && ((Or) p).v.size() == 1)
320 { // Eliminate this Or Object.
322 p = (Pattern) o.v.elementAt(0);
324 p = RegOpt.opt(p, ignoreCase, dontMinQ);
327 else if (p instanceof Or)
333 Branch b = new Branch();
335 for (int i = 0; i < v.size(); i++)
337 Pattern pp = (Pattern) v.elementAt(i);
338 // We want to have at least two oneChar's in
339 // the Or Object to consider making a Branch.
340 if (pp instanceof oneChar
341 && (b.h.size() >= 1 || (i + 1 < v.size() && v
342 .elementAt(i + 1) instanceof oneChar)))
344 b.addc((oneChar) pp, ignoreCase, dontMinQ);
348 if (b.keys.size() > 0)
350 Pattern p2 = (Pattern) b.reduce(ignoreCase, dontMinQ);
358 o.addOr(opt(pp, ignoreCase, dontMinQ));
361 if (b.keys.size() > 0)
363 Pattern p2 = (Pattern) b.reduce(ignoreCase, dontMinQ);
369 if (o.v.size() == 1 && o.leftForm().equals("(?:"))
370 { // Eliminate Or Object
371 p = (Pattern) o.v.elementAt(0);
373 p = RegOpt.opt(p, ignoreCase, dontMinQ);
377 else if (p instanceof FastMulti)
379 PatternSub ps = (PatternSub) p;
380 ps.sub = RegOpt.opt(ps.sub, ignoreCase, dontMinQ);
382 else if (p instanceof Multi && safe4fm(((PatternSub) p).sub))
388 fm = new FastMulti(m.a, m.b, opt(m.sub, ignoreCase, dontMinQ));
389 } catch (RegSyntax rs)
392 fm.parent = m.parent;
393 fm.matchFewest = m.matchFewest;
399 p.next = opt(p.next, ignoreCase, dontMinQ);
404 final static boolean safe4fm(Pattern x)
408 if (x instanceof Bracket)
412 else if (x instanceof Range)
416 else if (x instanceof oneChar)
420 else if (x instanceof Any)
424 else if (x instanceof Custom
425 && ((Custom) x).v instanceof UniValidator)
429 else if (x instanceof Or)
432 if (!o.leftForm().equals("(?:"))
436 patInt lo = o.countMinChars();
437 patInt hi = o.countMaxChars();
442 for (int i = 0; i < o.v.size(); i++)
444 if (!safe4fm((Pattern) o.v.elementAt(i)))
459 * public static void setParents(Regex r) { setParents(r.thePattern,null); }
460 * static void setParents(Pattern p,Pattern x) { if(p instanceof PatternSub &&
461 * !(p instanceof FastMulti) && !(p instanceof DotMulti)) RegOpt.setParents(
462 * ((PatternSub)p).sub, p); else if(p instanceof Or && !(p instanceof
463 * Bracket)) { Or o = (Or)p; for(int i=0;i<o.v.size();i++)
464 * RegOpt.setParents((Pattern)o.v.elementAt(i),o); } else if(p instanceof
465 * Branch) { Branch b = (Branch)p; Enumeration e = b.h.keys();
466 * while(e.hasMoreElements()) { Object o = e.nextElement(); RegOpt.setParents(
467 * (Pattern)b.h.get(o), b); } } if(p.next == null) p.parent = x; else {
468 * p.parent = null; RegOpt.setParents(p.next,x); } }