needed for applet search
[jalview.git] / src / com / stevesoft / pat / FastBracket.java
diff --git a/src/com/stevesoft/pat/FastBracket.java b/src/com/stevesoft/pat/FastBracket.java
new file mode 100755 (executable)
index 0000000..2720cc4
--- /dev/null
@@ -0,0 +1,191 @@
+//\r
+// This software is now distributed according to\r
+// the Lesser Gnu Public License.  Please see\r
+// http://www.gnu.org/copyleft/lesser.txt for\r
+// the details.\r
+//    -- Happy Computing!\r
+//\r
+package com.stevesoft.pat;\r
+\r
+import java.util.*;\r
+\r
+/** Uses table lookup to match [] type constructs, but\r
+    only if it can use a lookup table 256 bits in size.\r
+    It is impractical to make a table if it is too large.\r
+    */\r
+public class FastBracket extends Bracket {\r
+    int min, max;\r
+    BitSet bs;\r
+    FastBracket(boolean n) { super(n); }\r
+    // This routine can optimize a bracket, possibly\r
+    // it will replace it with a FastBracket.\r
+    static Bracket process(Bracket b,boolean ignc) {\r
+        Vector v = b.v;\r
+        b.pv = null;\r
+        try {\r
+\r
+            // Expand out the vector to make separate\r
+            // entries for other cases if ignoreCase is\r
+            // turned on.\r
+            Vector nv = v;\r
+            if(ignc) {\r
+                nv = new Vector();\r
+                for(int i=0;i<v.size();i++) {\r
+                    Pattern p = (Pattern)v.elementAt(i);\r
+                    nv.addElement(p);\r
+                    if(p instanceof oneChar) {\r
+                        oneChar oc = (oneChar)p;\r
+                        nv.addElement(new oneChar(oc.altc));\r
+                    } else if(p instanceof Range) {\r
+                        Range ra = (Range)p;\r
+                        nv.addElement(new Range(ra.altlo,ra.althi));\r
+                    }\r
+                }\r
+            }\r
+            v = nv;\r
+\r
+            // Bubble sort, make sure elements\r
+            // are in order.  This will allow us\r
+            // to merge them.\r
+            for(int i=0;i<v.size()-1;i++) {\r
+                for(int j=0;j<v.size()-1;j++) {\r
+                    char c1 = getl(v.elementAt(j));\r
+                    char c2 = getl(v.elementAt(j+1));\r
+                    if(c2 < c1) {\r
+                        Object o = v.elementAt(j);\r
+                        v.setElementAt(v.elementAt(j+1),j);\r
+                        v.setElementAt(o,j+1);\r
+                    }\r
+                }\r
+            }\r
+\r
+            nv = new Vector();\r
+            // merge -- remove overlaps\r
+            Pattern p = (Pattern)v.elementAt(0);\r
+            nv.addElement(p);\r
+            for(int i=1;i<v.size();i++) {\r
+                if(geth(p)+1 >= getl(v.elementAt(i))) {\r
+                    Pattern p2 = (Pattern)v.elementAt(i);\r
+                    char lo = min(getl(p),getl(p2));\r
+                    char hi = max(geth(p),geth(p2));\r
+                    nv.setElementAt(p=mkelem(lo,hi),nv.size()-1);\r
+                } else {\r
+                    p = (Pattern)v.elementAt(i);\r
+                    nv.addElement(p);\r
+                }\r
+            }\r
+\r
+            b.v = v = nv;\r
+        } catch(RegSyntax e) {\r
+            e.printStackTrace();\r
+        }\r
+\r
+        // We don't want these things to be empty.\r
+        Vector negv = neg(v);\r
+        if(v.size()==1) return b;\r
+        if(negv.size()==1) {\r
+            b.v = negv;\r
+            b.neg = !b.neg;\r
+            return b;\r
+        }\r
+\r
+        // Now consider if we can make a FastBracket.\r
+        // Uses a BitSet to do a lookup.\r
+        FastBracket fb = newbrack(v,b.neg);\r
+        if(fb == null)\r
+            fb = newbrack(negv,!b.neg);\r
+        if(fb != null) {\r
+            fb.parent = b.parent;\r
+            fb.next = b.next;\r
+            return fb;\r
+        }\r
+\r
+        // return the normal Bracket.\r
+        return b;\r
+    }\r
+\r
+    // Build a FastBracket and set bits.  If this can't\r
+    // be done, return null.\r
+    final static FastBracket newbrack(Vector v,boolean neg) {\r
+        FastBracket fb = new FastBracket(neg);\r
+        fb.v = v;\r
+        if(v.size()==0) return null;\r
+        fb.min = getl(v.elementAt(0));\r
+        fb.max = geth(v.elementAt(v.size()-1));\r
+        if(fb.max-fb.min <= 256) {\r
+            fb.bs = new BitSet(fb.max-fb.min+1);\r
+            for(int i=0;i<v.size();i++) {\r
+                Object o = v.elementAt(i);\r
+                int min0 = getl(o)-fb.min;\r
+                int max0 = geth(o)-fb.min;\r
+                for(int j=min0;j<=max0;j++)\r
+                    fb.bs.set(j);\r
+            }\r
+            return fb;\r
+        }\r
+        return null;\r
+    }\r
+\r
+    // Negate a sorted Vector.  Applying this\r
+    // operation twice should yield the same Vector\r
+    // back.\r
+    final static Vector neg(Vector v) {\r
+        try {\r
+            Vector nv = new Vector();\r
+            if(v.size()==0) {\r
+                nv.addElement(new Range((char)0,(char)65535));\r
+                return nv;\r
+            }\r
+            int p0 = getl(v.elementAt(0));\r
+            if(p0!=0)\r
+                nv.addElement(mkelem((char)0,(char)(p0-1) ));\r
+            for(int i=0;i<v.size()-1;i++) {\r
+                int hi = getl(v.elementAt(i+1))-1;\r
+                int lo = geth(v.elementAt(i))+1;\r
+                nv.addElement(mkelem((char)lo,(char)hi));\r
+            }\r
+            int pN = geth(v.lastElement());\r
+            if(pN != 65535)\r
+                nv.addElement(mkelem((char)(pN+1),(char)65535));\r
+            return nv;\r
+        } catch(RegSyntax rs) {\r
+            return null;\r
+        }\r
+    }\r
+    // Make either a Range or oneChar Object, depending on which\r
+    // is appropriate.\r
+    final static Pattern mkelem(char lo,char hi) throws RegSyntax {\r
+        return lo==hi ? (Pattern)(new oneChar(lo)) : (Pattern)(new Range(lo,hi));\r
+    }\r
+    static final char min(char a,char b) {\r
+        return a<b ? a : b;\r
+    }\r
+    static final char max(char a,char b) {\r
+        return a>b ? a : b;\r
+    }\r
+\r
+    // getl -- get lower value of Range object,\r
+    // or get value of oneChar object.\r
+    final static char getl(Object o) {\r
+        Pattern p = (Pattern)o;\r
+        if(p instanceof Range)\r
+            return ((Range)p).lo;\r
+        return ((oneChar)p).c;\r
+    }\r
+    // geth -- get higher value of Range object,\r
+    // or get value of oneChar object.\r
+    final static char geth(Object o) {\r
+        Pattern p = (Pattern)o;\r
+        if(p instanceof Range)\r
+            return ((Range)p).hi;\r
+        return ((oneChar)p).c;\r
+    }\r
+\r
+    // This is the easy part!\r
+    public int matchInternal(int pos,Pthings pt) {\r
+        if(pos >= pt.src.length() || Masked(pos,pt)) return -1;\r
+        char c = pt.src.charAt(pos);\r
+        return (neg ^ (c >= min && c <= max && bs.get(c-min)) ) ?\r
+            nextMatch(pos+1,pt) : -1;\r
+    }\r
+}\r