JAL-1807
[jalview.git] / src / com / stevesoft / pat / SkipBMH.java
index 4fbe381..8529fdb 100755 (executable)
-//\r
-// This software is now distributed according to\r
-// the Lesser Gnu Public License.  Please see\r
-// http://www.gnu.org/copyleft/lesser.txt for\r
-// the details.\r
-//    -- Happy Computing!\r
-//\r
-package com.stevesoft.pat;\r
-\r
-import com.stevesoft.pat.wrap.StringWrap;\r
-\r
-/** Like Skip, but implements a\r
-    <a href="http://www.dcc.uchile.cl/~rbaeza/handbook/algs/7/713b.srch.p.html">\r
-    Boyer-Moore-Horspool</a> type search\r
-    method that has been modified to be more like a "T-search" (see\r
-    the Michael Tamm''s article in <i>C'T, magazin fuer computer und technic</i>, August 97\r
-    p 292).  Yet another important source of information for me was\r
-    the <a href="http://www.go2net.com/people/paulp/deep/1997/05/14/">\r
-    Deep Magic</a> article on string searching.  As of this writing, I can\r
-    beat String's indexOf method in many cases.\r
-    @see com.stevesoft.pat.Skip\r
-    @see com.stevesoft.pat.Skip2\r
-    */\r
-public class SkipBMH extends Skip {\r
-    // This number could be 256, but I think it's\r
-    // big enough.  Note, it must be a power of 2.\r
-    final int MAX_CHAR = 64;\r
-    final char[] skip = new char[MAX_CHAR];\r
-    int sm1;\r
-    int jump_ahead = 0;\r
-    char uc,lc,tc,x;\r
-    final boolean exact(char c) {\r
-        return (ign && anyc(c))||c==x;\r
-    }\r
-    final boolean anyc(char c) {\r
-        return c==uc||c==lc||c==tc;\r
-    }\r
-    public SkipBMH(String pt,boolean ign) { this(pt,ign,0); }\r
-    public SkipBMH(String pt) { this(pt,false,0); }\r
-    public SkipBMH(String pt,boolean ign,int offset) {\r
-        super(pt,ign,offset);\r
-        for(int k=0;k<MAX_CHAR;k++)\r
-            skip[k] = (char)src.length();\r
-\r
-        sm1 = src.length()-1;\r
-        x = src.charAt(sm1);\r
-        uc=CaseMgr.toUpperCase(x);\r
-        lc=CaseMgr.toLowerCase(x);\r
-        tc=CaseMgr.toTitleCase(x);\r
-\r
-        // We don't really want 65536 long arrays in skip[],\r
-        // so we mask of the higher bits.  This can be combined\r
-        // with ignore case, so accounting for upper\r
-        // case costs us nothing extra.\r
-        for(int k=0;k<src.length()-1;k++) {\r
-            char x_ = src.charAt(k);\r
-            if(ign) {\r
-                char uc_ = CaseMgr.toUpperCase(x_);\r
-                char lc_ = CaseMgr.toLowerCase(x_);\r
-                char tc_ = CaseMgr.toTitleCase(x_);\r
-                skip[uc_ & (MAX_CHAR-1)]=(char)(src.length()-k-1);\r
-                skip[lc_ & (MAX_CHAR-1)]=(char)(src.length()-k-1);\r
-                skip[tc_ & (MAX_CHAR-1)]=(char)(src.length()-k-1);\r
-            } else\r
-                skip[x_ & (MAX_CHAR-1)] = (char)(src.length()-k-1);\r
-        }\r
-\r
-        // This trick can be found in the July issue of\r
-        // C-T magazine.  This makes the method a type of\r
-        // "T-search."\r
-        jump_ahead = src.length()-1;\r
-        for(int k=0;k<src.length()-1;k++) {\r
-            char y=src.charAt(sm1-k-1);\r
-            if(exact(y)) {\r
-                jump_ahead = k;\r
-                break;\r
-            }\r
-        }\r
-    }\r
-    /** Set to true if you only want to compare two of the\r
-        characters in the String. */\r
-    final public int searchRegion(String s,int start,int end) {\r
-        return find(s,start,end);\r
-    }\r
-    final public int searchFrom(String s,int start) {\r
-        return find(s,start,s.length());\r
-    }\r
-    final public int search(String s) { return find(s,0,s.length()); }\r
-    public int find(String s,int start,int end) {\r
-        start += offset+sm1;\r
-        int vend = min(s.length()-1,end+sm1+offset),k;\r
-        int vend1 = vend-jump_ahead;\r
-        if(ign) {\r
-            for(k=start; k <= vend1;k += skip[s.charAt(k) & (MAX_CHAR-1)] ) {\r
-                // table look-up is expensive, avoid it if possible\r
-                if( anyc(s.charAt(k)) ) {\r
-                    if(CaseMgr.regionMatches(src,ign,0,s,k-sm1,sm1))\r
-                        return k-sm1-offset;\r
-                    k += jump_ahead;\r
-                }\r
-            }\r
-            for(; k <= vend;k += skip[s.charAt(k) & (MAX_CHAR-1)] ) {\r
-                // table look-up is expensive, avoid it if possible\r
-                if( anyc(s.charAt(k)) ) {\r
-                    if(CaseMgr.regionMatches(src,ign,0,s,k-sm1,sm1))\r
-                        return k-sm1-offset;\r
-                    k += jump_ahead;\r
-                    if(k > vend) return -1;\r
-                }\r
-            }\r
-        } else {\r
-            for(k=start; k <= vend1;k += skip[s.charAt(k) & (MAX_CHAR-1)] ) {\r
-                // table look-up is expensive, avoid it if possible\r
-                if( x==s.charAt(k) ) {\r
-                    //if(src.regionMatches(0,s,k-sm1,sm1))\r
-                    if(CaseMgr.regionMatches(src,false,0,s,k-sm1,sm1))\r
-                        return k-sm1-offset;\r
-                    k += jump_ahead;\r
-                }\r
-            }\r
-            for(; k <= vend;k += skip[s.charAt(k) & (MAX_CHAR-1)] ) {\r
-                // table look-up is expensive, avoid it if possible\r
-                if( x==s.charAt(k) ) {\r
-                    //if(src.regionMatches(0,s,k-sm1,sm1))\r
-                    if(CaseMgr.regionMatches(src,false,0,s,k-sm1,sm1))\r
-                        return k-sm1-offset;\r
-                    k += jump_ahead;\r
-                    if(k > vend) return -1;\r
-                }\r
-            }\r
-        }\r
-\r
-        return -1;\r
-    }\r
-    public int find(StringLike s,int start,int end) {\r
-        if(s instanceof StringWrap)\r
-          return find(s.toString(),start,end);\r
-        start += offset+sm1;\r
-        int vend = min(s.length()-1,end+sm1+offset),k;\r
-        int vend1 = vend-jump_ahead;\r
-        if(ign) {\r
-            for(k=start; k <= vend1;k += skip[s.charAt(k) & (MAX_CHAR-1)] ) {\r
-                // table look-up is expensive, avoid it if possible\r
-                if( anyc(s.charAt(k)) ) {\r
-                    if(CaseMgr.regionMatches(src,ign,0,s,k-sm1,sm1))\r
-                        return k-sm1-offset;\r
-                    k += jump_ahead;\r
-                }\r
-            }\r
-            for(; k <= vend;k += skip[s.charAt(k) & (MAX_CHAR-1)] ) {\r
-                // table look-up is expensive, avoid it if possible\r
-                if( anyc(s.charAt(k)) ) {\r
-                    if(CaseMgr.regionMatches(src,ign,0,s,k-sm1,sm1))\r
-                        return k-sm1-offset;\r
-                    k += jump_ahead;\r
-                    if(k > vend) return -1;\r
-                }\r
-            }\r
-        } else {\r
-            for(k=start; k <= vend1;k += skip[s.charAt(k) & (MAX_CHAR-1)] ) {\r
-                // table look-up is expensive, avoid it if possible\r
-                if( x==s.charAt(k) ) {\r
-                    //if(src.regionMatches(0,s,k-sm1,sm1))\r
-                    if(CaseMgr.regionMatches(src,false,0,s,k-sm1,sm1))\r
-                        return k-sm1-offset;\r
-                    k += jump_ahead;\r
-                }\r
-            }\r
-            for(; k <= vend;k += skip[s.charAt(k) & (MAX_CHAR-1)] ) {\r
-                // table look-up is expensive, avoid it if possible\r
-                if( x==s.charAt(k) ) {\r
-                    //if(src.regionMatches(0,s,k-sm1,sm1))\r
-                    if(CaseMgr.regionMatches(src,false,0,s,k-sm1,sm1))\r
-                        return k-sm1-offset;\r
-                    k += jump_ahead;\r
-                    if(k > vend) return -1;\r
-                }\r
-            }\r
-        }\r
-\r
-        return -1;\r
-    }\r
-}\r
+//
+// This software is now distributed according to
+// the Lesser Gnu Public License.  Please see
+// http://www.gnu.org/copyleft/lesser.txt for
+// the details.
+//    -- Happy Computing!
+//
+package com.stevesoft.pat;
+
+import com.stevesoft.pat.wrap.*;
+
+/**
+ * Like Skip, but implements a <a
+ * href="http://www.dcc.uchile.cl/~rbaeza/handbook/algs/7/713b.srch.p.html">
+ * Boyer-Moore-Horspool</a> type search method that has been modified to be
+ * more like a "T-search" (see the Michael Tamm''s article in <i>C'T, magazin
+ * fuer computer und technic</i>, August 97 p 292). Yet another important
+ * source of information for me was the <a
+ * href="http://www.go2net.com/people/paulp/deep/1997/05/14/"> Deep Magic</a>
+ * article on string searching. As of this writing, I can beat String's indexOf
+ * method in many cases.
+ * 
+ * @see com.stevesoft.pat.Skip
+ * @see com.stevesoft.pat.Skip2
+ */
+public class SkipBMH extends Skip
+{
+  // This number could be 256, but I think it's
+  // big enough. Note, it must be a power of 2.
+  final int MAX_CHAR = 64;
+
+  final char[] skip = new char[MAX_CHAR];
+
+  int sm1;
+
+  int jump_ahead = 0;
+
+  char uc, lc, tc, x;
+
+  final boolean exact(char c)
+  {
+    return (ign && anyc(c)) || c == x;
+  }
+
+  final boolean anyc(char c)
+  {
+    return c == uc || c == lc || c == tc;
+  }
+
+  public SkipBMH(String pt, boolean ign)
+  {
+    this(pt, ign, 0);
+  }
+
+  public SkipBMH(String pt)
+  {
+    this(pt, false, 0);
+  }
+
+  public SkipBMH(String pt, boolean ign, int offset)
+  {
+    super(pt, ign, offset);
+    for (int k = 0; k < MAX_CHAR; k++)
+    {
+      skip[k] = (char) src.length();
+    }
+
+    sm1 = src.length() - 1;
+    x = src.charAt(sm1);
+    uc = CaseMgr.toUpperCaseC(x);
+    lc = CaseMgr.toLowerCaseC(x);
+    tc = CaseMgr.toTitleCaseC(x);
+
+    // We don't really want 65536 long arrays in skip[],
+    // so we mask of the higher bits. This can be combined
+    // with ignore case, so accounting for upper
+    // case costs us nothing extra.
+    for (int k = 0; k < src.length() - 1; k++)
+    {
+      char x_ = src.charAt(k);
+      if (ign)
+      {
+        char uc_ = CaseMgr.toUpperCaseC(x_);
+        char lc_ = CaseMgr.toLowerCaseC(x_);
+        char tc_ = CaseMgr.toTitleCaseC(x_);
+        skip[uc_ & (MAX_CHAR - 1)] = (char) (src.length() - k - 1);
+        skip[lc_ & (MAX_CHAR - 1)] = (char) (src.length() - k - 1);
+        skip[tc_ & (MAX_CHAR - 1)] = (char) (src.length() - k - 1);
+      }
+      else
+      {
+        skip[x_ & (MAX_CHAR - 1)] = (char) (src.length() - k - 1);
+      }
+    }
+
+    // This trick can be found in the July issue of
+    // C-T magazine. This makes the method a type of
+    // "T-search."
+    jump_ahead = src.length() - 1;
+    for (int k = 0; k < src.length() - 1; k++)
+    {
+      char y = src.charAt(sm1 - k - 1);
+      if (exact(y))
+      {
+        jump_ahead = k;
+        break;
+      }
+    }
+  }
+
+  /**
+   * Set to true if you only want to compare two of the characters in the
+   * String.
+   */
+  final public int searchRegion(String s, int start, int end)
+  {
+    return find(s, start, end);
+  }
+
+  final public int searchFrom(String s, int start)
+  {
+    return find(s, start, s.length());
+  }
+
+  final public int search(String s)
+  {
+    return find(s, 0, s.length());
+  }
+
+  public int find(String s, int start, int end)
+  {
+    start += offset + sm1;
+    int vend = min(s.length() - 1, end + sm1 + offset), k;
+    int vend1 = vend - jump_ahead;
+    if (ign)
+    {
+      for (k = start; k <= vend1; k += skip[s.charAt(k) & (MAX_CHAR - 1)])
+      {
+        // table look-up is expensive, avoid it if possible
+        if (anyc(s.charAt(k)))
+        {
+          if (CaseMgr.regionMatches2(src, ign, 0, s, k - sm1, sm1))
+          {
+            return k - sm1 - offset;
+          }
+          k += jump_ahead;
+        }
+      }
+      for (; k <= vend; k += skip[s.charAt(k) & (MAX_CHAR - 1)])
+      {
+        // table look-up is expensive, avoid it if possible
+        if (anyc(s.charAt(k)))
+        {
+          if (CaseMgr.regionMatches2(src, ign, 0, s, k - sm1, sm1))
+          {
+            return k - sm1 - offset;
+          }
+          k += jump_ahead;
+          if (k > vend)
+          {
+            return -1;
+          }
+        }
+      }
+    }
+    else
+    {
+      for (k = start; k <= vend1; k += skip[s.charAt(k) & (MAX_CHAR - 1)])
+      {
+        // table look-up is expensive, avoid it if possible
+        if (x == s.charAt(k))
+        {
+          // if(src.regionMatches(0,s,k-sm1,sm1))
+          if (CaseMgr.regionMatches2(src, false, 0, s, k - sm1, sm1))
+          {
+            return k - sm1 - offset;
+          }
+          k += jump_ahead;
+        }
+      }
+      for (; k <= vend; k += skip[s.charAt(k) & (MAX_CHAR - 1)])
+      {
+        // table look-up is expensive, avoid it if possible
+        if (x == s.charAt(k))
+        {
+          // if(src.regionMatches(0,s,k-sm1,sm1))
+          if (CaseMgr.regionMatches2(src, false, 0, s, k - sm1, sm1))
+          {
+            return k - sm1 - offset;
+          }
+          k += jump_ahead;
+          if (k > vend)
+          {
+            return -1;
+          }
+        }
+      }
+    }
+
+    return -1;
+  }
+
+  public int find(StringLike s, int start, int end)
+  {
+    if (s instanceof StringWrap)
+    {
+      return find(s.toString(), start, end);
+    }
+    start += offset + sm1;
+    int vend = min(s.length() - 1, end + sm1 + offset), k;
+    int vend1 = vend - jump_ahead;
+    if (ign)
+    {
+      for (k = start; k <= vend1; k += skip[s.charAt(k) & (MAX_CHAR - 1)])
+      {
+        // table look-up is expensive, avoid it if possible
+        if (anyc(s.charAt(k)))
+        {
+          if (CaseMgr.regionMatches(src, ign, 0, s, k - sm1, sm1))
+          {
+            return k - sm1 - offset;
+          }
+          k += jump_ahead;
+        }
+      }
+      for (; k <= vend; k += skip[s.charAt(k) & (MAX_CHAR - 1)])
+      {
+        // table look-up is expensive, avoid it if possible
+        if (anyc(s.charAt(k)))
+        {
+          if (CaseMgr.regionMatches(src, ign, 0, s, k - sm1, sm1))
+          {
+            return k - sm1 - offset;
+          }
+          k += jump_ahead;
+          if (k > vend)
+          {
+            return -1;
+          }
+        }
+      }
+    }
+    else
+    {
+      for (k = start; k <= vend1; k += skip[s.charAt(k) & (MAX_CHAR - 1)])
+      {
+        // table look-up is expensive, avoid it if possible
+        if (x == s.charAt(k))
+        {
+          // if(src.regionMatches(0,s,k-sm1,sm1))
+          if (CaseMgr.regionMatches(src, false, 0, s, k - sm1, sm1))
+          {
+            return k - sm1 - offset;
+          }
+          k += jump_ahead;
+        }
+      }
+      for (; k <= vend; k += skip[s.charAt(k) & (MAX_CHAR - 1)])
+      {
+        // table look-up is expensive, avoid it if possible
+        if (x == s.charAt(k))
+        {
+          // if(src.regionMatches(0,s,k-sm1,sm1))
+          if (CaseMgr.regionMatches(src, false, 0, s, k - sm1, sm1))
+          {
+            return k - sm1 - offset;
+          }
+          k += jump_ahead;
+          if (k > vend)
+          {
+            return -1;
+          }
+        }
+      }
+    }
+
+    return -1;
+  }
+}