X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fcom%2Fstevesoft%2Fpat%2FRegOpt.java;h=0d651390d1d13620fdce66bc42d7cd79ba5086b0;hb=506d60f0e188723ddc91c26824b41ac7034df3fe;hp=f70df39d23592e2100076b233ded3ba3159af20b;hpb=60f2d6c034560415fd0139c8bc7df0c19cae1186;p=jalview.git diff --git a/src/com/stevesoft/pat/RegOpt.java b/src/com/stevesoft/pat/RegOpt.java index f70df39..0d65139 100755 --- a/src/com/stevesoft/pat/RegOpt.java +++ b/src/com/stevesoft/pat/RegOpt.java @@ -1,469 +1,448 @@ -// -// This software is now distributed according to -// the Lesser Gnu Public License. Please see -// http://www.gnu.org/copyleft/lesser.txt for -// the details. -// -- Happy Computing! -// -package com.stevesoft.pat; - -import java.util.*; - -/** This class is just like oneChar, but doesn't worry about case. */ -class FastChar - extends oneChar -{ - FastChar(char c) - { - super(c); - } - - public int matchInternal(int p, Pthings pt) - { - return (p < pt.src.length() - && pt.src.charAt(p) == c) ? - nextMatch(p + 1, pt) : -1; - } - - Pattern clone1(Hashtable h) - { - return new FastChar(c); - } -} - -/** This class is a hashtable keyed by Character - * Objects. It is used to match things of the - * form (?:a..|b..|c..|..) match with greater efficiency -- - * by using a Hashtable that indexes into the group - * of patterns. - */ -class Branch - extends Pattern -{ - Hashtable h = new Hashtable(); - // We need to keep track of the order - // of the keys -- if we don't then - // recompiling the output of toString - // may produce errors by re-ordering - // ()'s and changing the id number of - // the backreference associated with - // a subpattern. - Vector keys = new Vector(); - Branch() - {} - - Pattern clone1(Hashtable x) - { - Branch b = new Branch(); - b.keys = (Vector) keys.clone(); - x.put(this, b); - x.put(b, b); - - for (int i = 0; i < keys.size(); i++) - { - Pattern p = (Pattern) h.get(keys.elementAt(i)); - b.h.put(keys.elementAt(i), p.clone(x)); - } - return b; - } - - // this function eliminates Branches with 0 or 1 elements. - final Pattern reduce(boolean ignoreCase, boolean dontMinQ) - { - if (h.size() == 1) - { - Enumeration e = h.keys(); - Character c = (Character) e.nextElement(); - Pattern oc; - if (ignoreCase || dontMinQ) - { - oc = new oneChar(c.charValue()); - } - else - { - oc = new FastChar(c.charValue()); - } - oc.next = (Pattern) h.get(c); - oc.add(next); - return oc; - } - else if (h.size() == 0) - { - return null; - } - return this; - } - - public patInt maxChars() - { - Enumeration e = h.keys(); - patInt count = new patInt(0); - while (e.hasMoreElements()) - { - Object key = e.nextElement(); - Pattern pa = (Pattern) h.get(key); - patInt pi = pa.maxChars(); - pi.inc(); - count.maxeq(pi); - } - return count; - } - - public patInt minChars() - { - Enumeration e = h.keys(); - patInt count = new patInt(0); - while (e.hasMoreElements()) - { - Object key = e.nextElement(); - Pattern pa = (Pattern) h.get(key); - patInt pi = pa.minChars(); - pi.inc(); - count.mineq(pi); - } - return count; - } - - // adds a oneChar object to this Branch - void addc(oneChar o, boolean ignoreCase, boolean dontMinQ) - { - Pattern n = o.next; - if (n == null) - { - n = new NullPattern(); - } - else - { - n = RegOpt.opt(n, ignoreCase, dontMinQ); - } - n.setParent(this); - set(new Character(o.c), n, ignoreCase, dontMinQ); - if (ignoreCase) - { - if (o.c != o.altc) - { - set(new Character(o.altc), n, ignoreCase, dontMinQ); - } - if (o.c != o.altc2 && o.altc != o.altc2) - { - set(new Character(o.altc2), n, ignoreCase, dontMinQ); - } - } - } - - void set(Character c, Pattern n, boolean igc, boolean dontMinQ) - { - Pattern p = (Pattern) h.get(c); - next = null; - // This letter is not yet used in the Branch object. - // We need to add it. - if (p == null) - { - if (n instanceof Or) - { - // A NullPattern is prepended to an Or - // to prevent confusing this object. - // For example: (boo|bug) => (b(?:oo|ug)) - // during this process. However, we - // want (b(?:oo|ell)|bug) - NullPattern np = new NullPattern(); - np.add(n); - h.put(c, np); - } - else - { - h.put(c, n); - } - // Make sure we remember the order things were - // added into the Branch object so that we can - // properly convert it to a String. - keys.addElement(c); - } - else if (p instanceof Or) - { - ( (Or) p).addOr(n); - } - else if (p instanceof oneChar && n instanceof oneChar - && ( (oneChar) p).c != ( (oneChar) n).c) - { - Branch b = new Branch(); - b.addc( (oneChar) p, igc, dontMinQ); - b.addc( (oneChar) n, igc, dontMinQ); - h.put(c, b); - b.setParent(this); - } - else if (p instanceof Branch && n instanceof oneChar) - { - ( (Branch) p).addc( (oneChar) n, igc, dontMinQ); - n.setParent(p); - } - else - { - // Create an Or object to receive the variety - // of branches in the pattern if the current letter - // is matched. We do not attempt to make these - // sub-branches into a Branch object yet. - Or o = new Or(); - o.setParent(this); - - // Remove NullPattern from p -- it's no longer needed. - if (p instanceof NullPattern - && p.parent == null && p.next != null) - { - o.addOr(p.next); - } - else - { - o.addOr(p); - } - o.addOr(n); - - Pattern optpat = RegOpt.opt(o, igc, dontMinQ); - h.put(c, optpat); - optpat.setParent(this); - } - } - - public String toString() - { - StringBuffer sb = new StringBuffer(); - // should protect this... - sb.append("(?:(?#branch)"); // Hashtable)"); - for (int i = 0; i < keys.size(); i++) - { - Character c = (Character) keys.elementAt(i); - sb.append(c); - sb.append(h.get(c)); - if (i + 1 < keys.size()) - { - sb.append("|"); - } - } - sb.append(")"); - sb.append(nextString()); - return sb.toString(); - } - - public int matchInternal(int pos, Pthings pt) - { - if (pos >= pt.src.length()) - { - return -1; - } - Pattern n = (Pattern) h.get(new Character(pt.src.charAt(pos))); - if (n == null) - { - return -1; - } - if (pt.cbits != null && pt.cbits.get(pos)) - { - return -1; - } - return n.matchInternal(pos + 1, pt); - } -} - -/** This is just a place to put the optimizing function. - It is never instantiated as an Object. It just sorts - through the RegOpt looking for things it can change - and make faster. */ -public class RegOpt -{ - static Pattern opt(Pattern p, boolean ignoreCase, - boolean dontMinQ) - { - if (p == null) - { - return p; - } - if (p instanceof Bracket) - { - Bracket b = (Bracket) p; - // FastBracket is the only special - // optimized class to have its own - // source file. - p = FastBracket.process(b, ignoreCase); - //if(!(p instanceof FastBracket) - //p = Switch.process(b,ignoreCase); - p.next = b.next; - p.parent = b.parent; - } - else if (p instanceof oneChar && !ignoreCase - && !dontMinQ) - { - oneChar o = (oneChar) p; - p = new FastChar(o.c); - p.next = o.next; - p.parent = o.parent; - } - else if (p instanceof Or - && ( (Or) p).leftForm().equals("(?:") - && ( (Or) p).v.size() == 1) - { // Eliminate this Or Object. - Or o = (Or) p; - p = (Pattern) o.v.elementAt(0); - p.setParent(null); - p = RegOpt.opt(p, ignoreCase, dontMinQ); - p.add(o.next); - } - else if (p instanceof Or) - { - Or o = (Or) p; - o.pv = null; - Vector v = o.v; - o.v = new Vector(); - Branch b = new Branch(); - b.parent = o.parent; - for (int i = 0; i < v.size(); i++) - { - Pattern pp = (Pattern) v.elementAt(i); - // We want to have at least two oneChar's in - // the Or Object to consider making a Branch. - if (pp instanceof oneChar && (b.h.size() >= 1 || - (i + 1 < v.size() && - v.elementAt(i + 1) instanceof oneChar))) - { - b.addc( (oneChar) pp, ignoreCase, dontMinQ); - } - else - { - if (b.keys.size() > 0) - { - Pattern p2 = (Pattern) b.reduce(ignoreCase, dontMinQ); - if (p2 != null) - { - o.addOr(p2); - b = new Branch(); - b.parent = o.parent; - } - } - o.addOr(opt(pp, ignoreCase, dontMinQ)); - } - } - if (b.keys.size() > 0) - { - Pattern p2 = (Pattern) b.reduce(ignoreCase, dontMinQ); - if (p2 != null) - { - o.addOr(p2); - } - } - if (o.v.size() == 1 - && o.leftForm().equals("(?:")) - { // Eliminate Or Object - p = (Pattern) o.v.elementAt(0); - p.setParent(null); - p = RegOpt.opt(p, ignoreCase, dontMinQ); - p.add(o.next); - } - } - else if (p instanceof FastMulti) - { - PatternSub ps = (PatternSub) p; - ps.sub = RegOpt.opt(ps.sub, ignoreCase, dontMinQ); - } - else if (p instanceof Multi && safe4fm( ( (PatternSub) p).sub)) - { - Multi m = (Multi) p; - FastMulti fm = null; - try - { - fm = new FastMulti(m.a, m.b, - opt(m.sub, ignoreCase, dontMinQ)); - } - catch (RegSyntax rs) - {} - fm.parent = m.parent; - fm.matchFewest = m.matchFewest; - fm.next = m.next; - p = fm; - } - if (p.next != null) - { - p.next = opt(p.next, ignoreCase, dontMinQ); - } - return p; - } - - final static boolean safe4fm(Pattern x) - { - while (x != null) - { - if (x instanceof Bracket) - { - ; - } - else if (x instanceof Range) - { - ; - } - else if (x instanceof oneChar) - { - ; - } - else if (x instanceof Any) - { - ; - } - else if (x instanceof Custom - && ( (Custom) x).v instanceof UniValidator) - { - ; - } - else if (x instanceof Or) - { - Or o = (Or) x; - if (!o.leftForm().equals("(?:")) - { - return false; - } - patInt lo = o.countMinChars(); - patInt hi = o.countMaxChars(); - if (!lo.equals(hi)) - { - return false; - } - for (int i = 0; i < o.v.size(); i++) - { - if (!safe4fm( (Pattern) o.v.elementAt(i))) - { - return false; - } - } - } - else - { - return false; - } - x = x.next; - } - return true; - } - /* - public static void setParents(Regex r) { - setParents(r.thePattern,null); - } - static void setParents(Pattern p,Pattern x) { - if(p instanceof PatternSub && !(p instanceof FastMulti) - && !(p instanceof DotMulti)) - RegOpt.setParents( ((PatternSub)p).sub, p); - else if(p instanceof Or && !(p instanceof Bracket)) { - Or o = (Or)p; - for(int i=0;i (b(?:oo|ug)) + // during this process. However, we + // want (b(?:oo|ell)|bug) + NullPattern np = new NullPattern(); + np.add(n); + h.put(c, np); + } + else + { + h.put(c, n); + } + // Make sure we remember the order things were + // added into the Branch object so that we can + // properly convert it to a String. + keys.addElement(c); + } + else if (p instanceof Or) + { + ((Or) p).addOr(n); + } + else if (p instanceof oneChar && n instanceof oneChar + && ((oneChar) p).c != ((oneChar) n).c) + { + Branch b = new Branch(); + b.addc((oneChar) p, igc, dontMinQ); + b.addc((oneChar) n, igc, dontMinQ); + h.put(c, b); + b.setParent(this); + } + else if (p instanceof Branch && n instanceof oneChar) + { + ((Branch) p).addc((oneChar) n, igc, dontMinQ); + n.setParent(p); + } + else + { + // Create an Or object to receive the variety + // of branches in the pattern if the current letter + // is matched. We do not attempt to make these + // sub-branches into a Branch object yet. + Or o = new Or(); + o.setParent(this); + + // Remove NullPattern from p -- it's no longer needed. + if (p instanceof NullPattern && p.parent == null && p.next != null) + { + o.addOr(p.next); + } + else + { + o.addOr(p); + } + o.addOr(n); + + Pattern optpat = RegOpt.opt(o, igc, dontMinQ); + h.put(c, optpat); + optpat.setParent(this); + } + } + + public String toString() + { + StringBuffer sb = new StringBuffer(); + // should protect this... + sb.append("(?:(?#branch)"); // Hashtable)"); + for (int i = 0; i < keys.size(); i++) + { + Character c = (Character) keys.elementAt(i); + sb.append(c); + sb.append(h.get(c)); + if (i + 1 < keys.size()) + { + sb.append("|"); + } + } + sb.append(")"); + sb.append(nextString()); + return sb.toString(); + } + + public int matchInternal(int pos, Pthings pt) + { + if (pos >= pt.src.length()) + { + return -1; + } + Pattern n = (Pattern) h.get(new Character(pt.src.charAt(pos))); + if (n == null) + { + return -1; + } + if (pt.cbits != null && pt.cbits.get(pos)) + { + return -1; + } + return n.matchInternal(pos + 1, pt); + } +} + +/** + * This is just a place to put the optimizing function. It is never instantiated + * as an Object. It just sorts through the RegOpt looking for things it can + * change and make faster. + */ +public class RegOpt +{ + static Pattern opt(Pattern p, boolean ignoreCase, boolean dontMinQ) + { + if (p == null) + { + return p; + } + if (p instanceof Bracket) + { + Bracket b = (Bracket) p; + // FastBracket is the only special + // optimized class to have its own + // source file. + p = FastBracket.process(b, ignoreCase); + // if(!(p instanceof FastBracket) + // p = Switch.process(b,ignoreCase); + p.next = b.next; + p.parent = b.parent; + } + else if (p instanceof oneChar && !ignoreCase && !dontMinQ) + { + oneChar o = (oneChar) p; + p = new FastChar(o.c); + p.next = o.next; + p.parent = o.parent; + } + else if (p instanceof Or && ((Or) p).leftForm().equals("(?:") + && ((Or) p).v.size() == 1) + { // Eliminate this Or Object. + Or o = (Or) p; + p = (Pattern) o.v.elementAt(0); + p.setParent(null); + p = RegOpt.opt(p, ignoreCase, dontMinQ); + p.add(o.next); + } + else if (p instanceof Or) + { + Or o = (Or) p; + o.pv = null; + Vector v = o.v; + o.v = new Vector(); + Branch b = new Branch(); + b.parent = o.parent; + for (int i = 0; i < v.size(); i++) + { + Pattern pp = (Pattern) v.elementAt(i); + // We want to have at least two oneChar's in + // the Or Object to consider making a Branch. + if (pp instanceof oneChar + && (b.h.size() >= 1 || (i + 1 < v.size() && v + .elementAt(i + 1) instanceof oneChar))) + { + b.addc((oneChar) pp, ignoreCase, dontMinQ); + } + else + { + if (b.keys.size() > 0) + { + Pattern p2 = (Pattern) b.reduce(ignoreCase, dontMinQ); + if (p2 != null) + { + o.addOr(p2); + b = new Branch(); + b.parent = o.parent; + } + } + o.addOr(opt(pp, ignoreCase, dontMinQ)); + } + } + if (b.keys.size() > 0) + { + Pattern p2 = (Pattern) b.reduce(ignoreCase, dontMinQ); + if (p2 != null) + { + o.addOr(p2); + } + } + if (o.v.size() == 1 && o.leftForm().equals("(?:")) + { // Eliminate Or Object + p = (Pattern) o.v.elementAt(0); + p.setParent(null); + p = RegOpt.opt(p, ignoreCase, dontMinQ); + p.add(o.next); + } + } + else if (p instanceof FastMulti) + { + PatternSub ps = (PatternSub) p; + ps.sub = RegOpt.opt(ps.sub, ignoreCase, dontMinQ); + } + else if (p instanceof Multi && safe4fm(((PatternSub) p).sub)) + { + Multi m = (Multi) p; + FastMulti fm = null; + try + { + fm = new FastMulti(m.a, m.b, opt(m.sub, ignoreCase, dontMinQ)); + } catch (RegSyntax rs) + { + } + fm.parent = m.parent; + fm.matchFewest = m.matchFewest; + fm.next = m.next; + p = fm; + } + if (p.next != null) + { + p.next = opt(p.next, ignoreCase, dontMinQ); + } + return p; + } + + final static boolean safe4fm(Pattern x) + { + while (x != null) + { + if (x instanceof Bracket) + { + ; + } + else if (x instanceof Range) + { + ; + } + else if (x instanceof oneChar) + { + ; + } + else if (x instanceof Any) + { + ; + } + else if (x instanceof Custom + && ((Custom) x).v instanceof UniValidator) + { + ; + } + else if (x instanceof Or) + { + Or o = (Or) x; + if (!o.leftForm().equals("(?:")) + { + return false; + } + patInt lo = o.countMinChars(); + patInt hi = o.countMaxChars(); + if (!lo.equals(hi)) + { + return false; + } + for (int i = 0; i < o.v.size(); i++) + { + if (!safe4fm((Pattern) o.v.elementAt(i))) + { + return false; + } + } + } + else + { + return false; + } + x = x.next; + } + return true; + } + /* + * public static void setParents(Regex r) { setParents(r.thePattern,null); } + * static void setParents(Pattern p,Pattern x) { if(p instanceof PatternSub && + * !(p instanceof FastMulti) && !(p instanceof DotMulti)) RegOpt.setParents( + * ((PatternSub)p).sub, p); else if(p instanceof Or && !(p instanceof + * Bracket)) { Or o = (Or)p; for(int i=0;i