needed for applet search
[jalview.git] / src / com / stevesoft / pat / RegOpt.java
diff --git a/src/com/stevesoft/pat/RegOpt.java b/src/com/stevesoft/pat/RegOpt.java
new file mode 100755 (executable)
index 0000000..1033500
--- /dev/null
@@ -0,0 +1,336 @@
+//\r
+// This software is now distributed according to\r
+// the Lesser Gnu Public License.  Please see\r
+// http://www.gnu.org/copyleft/lesser.txt for\r
+// the details.\r
+//    -- Happy Computing!\r
+//\r
+package com.stevesoft.pat;\r
+import java.util.*;\r
+import java.io.*;\r
+\r
+/** This class is just like oneChar, but doesn't worry about case. */\r
+class FastChar extends oneChar {\r
+    FastChar(char c) { super(c); }\r
+    public int matchInternal(int p,Pthings pt) {\r
+        return (p < pt.src.length()\r
+            && pt.src.charAt(p)==c) ?\r
+            nextMatch(p+1,pt) : -1;\r
+    }\r
+    Pattern clone1(Hashtable h) {\r
+        return new FastChar(c);\r
+    }\r
+}\r
+\r
+/** This class is a hashtable keyed by Character\r
+  * Objects.  It is used to match things of the\r
+  * form (?:a..|b..|c..|..) match with greater efficiency --\r
+  * by using a Hashtable that indexes into the group\r
+  * of patterns.\r
+  */\r
+class Branch extends Pattern {\r
+    Hashtable h = new Hashtable();\r
+    // We need to keep track of the order\r
+    // of the keys -- if we don't then\r
+    // recompiling the output of toString\r
+    // may produce errors by re-ordering\r
+    // ()'s and changing the id number of\r
+    // the backreference associated with\r
+    // a subpattern.\r
+    Vector keys = new Vector();\r
+    Branch() {}\r
+    Pattern clone1(Hashtable x) {\r
+        Branch b = new Branch();\r
+        b.keys = (Vector)keys.clone();\r
+        x.put(this,b);\r
+        x.put(b,b);\r
+\r
+        for(int i=0;i<keys.size();i++) {\r
+            Pattern p = (Pattern)h.get(keys.elementAt(i));\r
+            b.h.put(keys.elementAt(i),p.clone(x));\r
+        }\r
+        return b;\r
+    }\r
+\r
+    // this function eliminates Branches with 0 or 1 elements.\r
+    final Pattern reduce(boolean ignoreCase,boolean dontMinQ) {\r
+        if(h.size()==1) {\r
+            Enumeration e = h.keys();\r
+            Character c = (Character)e.nextElement();\r
+            Pattern oc;\r
+            if(ignoreCase||dontMinQ)\r
+                oc=new oneChar(c.charValue());\r
+            else oc=new FastChar(c.charValue());\r
+            oc.next = (Pattern)h.get(c);\r
+            oc.add(next);\r
+            return oc;\r
+        } else if(h.size()==0) return null;\r
+        return this;\r
+    }\r
+    public patInt maxChars() {\r
+        Enumeration e = h.keys();\r
+        patInt count = new patInt(0);\r
+        while(e.hasMoreElements()) {\r
+            Object key = e.nextElement();\r
+            Pattern pa = (Pattern)h.get(key);\r
+            patInt pi = pa.maxChars();\r
+            pi.inc();\r
+            count.maxeq(pi);\r
+        }\r
+        return count;\r
+    }\r
+    public patInt minChars() {\r
+        Enumeration e = h.keys();\r
+        patInt count = new patInt(0);\r
+        while(e.hasMoreElements()) {\r
+            Object key = e.nextElement();\r
+            Pattern pa = (Pattern)h.get(key);\r
+            patInt pi = pa.minChars();\r
+            pi.inc();\r
+            count.mineq(pi);\r
+        }\r
+        return count;\r
+    }\r
+\r
+    // adds a oneChar object to this Branch\r
+    void addc(oneChar o,boolean ignoreCase,boolean dontMinQ) {\r
+        Pattern n = o.next;\r
+        if(n == null)\r
+            n = new NullPattern();\r
+        else\r
+            n = RegOpt.opt(n,ignoreCase,dontMinQ);\r
+        n.setParent(this);\r
+        set(new Character(o.c),n,ignoreCase,dontMinQ);\r
+        if(ignoreCase) {\r
+            if(o.c != o.altc)\r
+                set(new Character(o.altc),n,ignoreCase,dontMinQ);\r
+            if(o.c != o.altc2 && o.altc != o.altc2)\r
+                set(new Character(o.altc2),n,ignoreCase,dontMinQ);\r
+        }\r
+    }\r
+    void set(Character c,Pattern n,boolean igc,boolean dontMinQ) {\r
+        Pattern p = (Pattern)h.get(c);\r
+        next = null;\r
+        // This letter is not yet used in the Branch object.\r
+        // We need to add it.\r
+        if(p==null) {\r
+            if(n instanceof Or) {\r
+                // A NullPattern is prepended to an Or\r
+                // to prevent confusing this object.\r
+                // For example: (boo|bug) => (b(?:oo|ug))\r
+                // during this process.  However, we\r
+                // want (b(?:oo|ell)|bug)\r
+                NullPattern np = new NullPattern();\r
+                np.add(n);\r
+                h.put(c,np);\r
+            } else {\r
+                h.put(c,n);\r
+            }\r
+            // Make sure we remember the order things were\r
+            // added into the Branch object so that we can\r
+            // properly convert it to a String.\r
+            keys.addElement(c);\r
+        } else if(p instanceof Or) {\r
+            ((Or)p).addOr(n);\r
+        } else if(p instanceof oneChar && n instanceof oneChar\r
+                && ((oneChar)p).c != ((oneChar)n).c) {\r
+            Branch b = new Branch();\r
+            b.addc((oneChar)p,igc,dontMinQ);\r
+            b.addc((oneChar)n,igc,dontMinQ);\r
+            h.put(c,b);\r
+            b.setParent(this);\r
+        } else if(p instanceof Branch && n instanceof oneChar) {\r
+            ((Branch)p).addc((oneChar)n,igc,dontMinQ);\r
+            n.setParent(p);\r
+        } else {\r
+            // Create an Or object to receive the variety\r
+            // of branches in the pattern if the current letter\r
+            // is matched.  We do not attempt to make these\r
+            // sub-branches into a Branch object yet.\r
+            Or o = new Or();\r
+            o.setParent(this);\r
+\r
+            // Remove NullPattern from p -- it's no longer needed.\r
+            if(p instanceof NullPattern\r
+                    && p.parent == null && p.next != null) {\r
+                o.addOr(p.next);\r
+            } else {\r
+                o.addOr(p);\r
+            }\r
+            o.addOr(n);\r
+\r
+            Pattern optpat = RegOpt.opt(o,igc,dontMinQ);\r
+            h.put(c,optpat);\r
+            optpat.setParent(this);\r
+        }\r
+    }\r
+    public String toString() {\r
+        StringBuffer sb = new StringBuffer();\r
+        // should protect this...\r
+        sb.append("(?:(?#branch)");// Hashtable)");\r
+        for(int i=0;i<keys.size();i++) {\r
+            Character c = (Character)keys.elementAt(i);\r
+            sb.append(c);\r
+            sb.append(h.get(c));\r
+            if(i+1<keys.size())\r
+                sb.append("|");\r
+        }\r
+        sb.append(")");\r
+        sb.append(nextString());\r
+        return sb.toString();\r
+    }\r
+    public int matchInternal(int pos,Pthings pt) {\r
+        if(pos >= pt.src.length()) return -1;\r
+        Pattern n = (Pattern)h.get(new Character(pt.src.charAt(pos)));\r
+        if(n == null) return -1;\r
+        if(pt.cbits != null && pt.cbits.get(pos)) return -1;\r
+        return n.matchInternal(pos+1,pt);\r
+    }\r
+}\r
+\r
+/** This is just a place to put the optimizing function.\r
+    It is never instantiated as an Object.  It just sorts\r
+    through the RegOpt looking for things it can change\r
+    and make faster. */\r
+public class RegOpt {\r
+    static Pattern opt(Pattern p,boolean ignoreCase,\r
+        boolean dontMinQ) {\r
+        if(p == null) return p;\r
+        if(p instanceof Bracket) {\r
+            Bracket b = (Bracket)p;\r
+            // FastBracket is the only special\r
+            // optimized class to have its own\r
+            // source file.\r
+            p = FastBracket.process(b,ignoreCase);\r
+            //if(!(p instanceof FastBracket)\r
+            //p = Switch.process(b,ignoreCase);\r
+            p.next = b.next;\r
+            p.parent = b.parent;\r
+        } else if(p instanceof oneChar && !ignoreCase\r
+                && !dontMinQ) {\r
+            oneChar o = (oneChar)p;\r
+            p = new FastChar(o.c);\r
+            p.next = o.next;\r
+            p.parent = o.parent;\r
+        } else if(p instanceof Or\r
+                && ((Or)p).leftForm().equals("(?:")\r
+                && ((Or)p).v.size()==1) { // Eliminate this Or Object.\r
+            Or o = (Or)p;\r
+            p = (Pattern)o.v.elementAt(0);\r
+            p.setParent(null);\r
+            p = RegOpt.opt(p,ignoreCase,dontMinQ);\r
+            p.add(o.next);\r
+        } else if(p instanceof Or) {\r
+            Or o = (Or)p;\r
+            o.pv = null;\r
+            Vector v = o.v;\r
+            o.v = new Vector();\r
+            Branch b = new Branch();\r
+            b.parent = o.parent;\r
+            for(int i=0;i<v.size();i++) {\r
+                Pattern pp = (Pattern)v.elementAt(i);\r
+                // We want to have at least two oneChar's in\r
+                // the Or Object to consider making a Branch.\r
+                if(pp instanceof oneChar && (b.h.size()>=1 ||\r
+                        (i+1<v.size() && v.elementAt(i+1) instanceof oneChar)))\r
+                    b.addc((oneChar)pp,ignoreCase,dontMinQ);\r
+                else {\r
+                    if(b.keys.size() > 0) {\r
+                        Pattern p2 = (Pattern)b.reduce(ignoreCase,dontMinQ);\r
+                        if(p2 != null) {\r
+                            o.addOr(p2);\r
+                            b = new Branch();\r
+                            b.parent = o.parent;\r
+                        }\r
+                    }\r
+                    o.addOr(opt(pp,ignoreCase,dontMinQ));\r
+                }\r
+            }\r
+            if(b.keys.size()>0) {\r
+                Pattern p2=(Pattern)b.reduce(ignoreCase,dontMinQ);\r
+                if(p2 != null)\r
+                    o.addOr(p2);\r
+            }\r
+            if(o.v.size()==1\r
+                    && o.leftForm().equals("(?:")) { // Eliminate Or Object\r
+                p = (Pattern)o.v.elementAt(0);\r
+                p.setParent(null);\r
+                p = RegOpt.opt(p,ignoreCase,dontMinQ);\r
+                p.add(o.next);\r
+            }\r
+        } else if(p instanceof FastMulti) {\r
+            PatternSub ps = (PatternSub)p;\r
+            ps.sub = RegOpt.opt(ps.sub,ignoreCase,dontMinQ);\r
+        } else if(p instanceof Multi && safe4fm( ((PatternSub)p).sub )) {\r
+            Multi m = (Multi)p;\r
+            FastMulti fm = null;\r
+            try {\r
+                fm = new FastMulti(m.a,m.b,\r
+                    opt(m.sub,ignoreCase,dontMinQ));\r
+            } catch(RegSyntax rs) {}\r
+            fm.parent = m.parent;\r
+            fm.matchFewest = m.matchFewest;\r
+            fm.next = m.next;\r
+            p = fm;\r
+        }\r
+        if(p.next != null)\r
+            p.next = opt(p.next,ignoreCase,dontMinQ);\r
+        return p;\r
+    }\r
+    final static boolean safe4fm(Pattern x) {\r
+        while(x != null) {\r
+            if(x instanceof Bracket)\r
+                ;\r
+            else if(x instanceof Range)\r
+                ;\r
+            else if(x instanceof oneChar)\r
+                ;\r
+            else if(x instanceof Any)\r
+                ;\r
+            else if(x instanceof Custom\r
+                    && ((Custom)x).v instanceof UniValidator)\r
+                ;\r
+            else if(x instanceof Or) {\r
+                Or o = (Or)x;\r
+                if(!o.leftForm().equals("(?:"))\r
+                    return false;\r
+                patInt lo = o.countMinChars();\r
+                patInt hi = o.countMaxChars();\r
+                if(!lo.equals(hi))\r
+                    return false;\r
+                for(int i=0;i<o.v.size();i++)\r
+                    if(!safe4fm((Pattern)o.v.elementAt(i)) )\r
+                        return false;\r
+            } else return false;\r
+            x = x.next;\r
+        }\r
+        return true;\r
+    }\r
+    /*\r
+    public static void setParents(Regex r) {\r
+      setParents(r.thePattern,null);\r
+    }\r
+    static void setParents(Pattern p,Pattern x) {\r
+      if(p instanceof PatternSub && !(p instanceof FastMulti)\r
+      && !(p instanceof DotMulti))\r
+        RegOpt.setParents( ((PatternSub)p).sub, p);\r
+      else if(p instanceof Or && !(p instanceof Bracket)) {\r
+        Or o = (Or)p;\r
+        for(int i=0;i<o.v.size();i++)\r
+          RegOpt.setParents((Pattern)o.v.elementAt(i),o);\r
+      } else if(p instanceof Branch) {\r
+        Branch b = (Branch)p;\r
+        Enumeration e = b.h.keys();\r
+        while(e.hasMoreElements()) {\r
+          Object o = e.nextElement();\r
+          RegOpt.setParents( (Pattern)b.h.get(o), b);\r
+        }\r
+      }\r
+      if(p.next == null)\r
+        p.parent = x;\r
+      else {\r
+        p.parent = null;\r
+        RegOpt.setParents(p.next,x);\r
+      }\r
+    }*/\r
+}\r