--- /dev/null
+//\r
+// This software is now distributed according to\r
+// the Lesser Gnu Public License. Please see\r
+// http://www.gnu.org/copyleft/lesser.txt for\r
+// the details.\r
+// -- Happy Computing!\r
+//\r
+package com.stevesoft.pat;\r
+\r
+/** This class is used internally to search ahead for some\r
+ optimized Regex objects. It searches within a String\r
+ for occrences of a given String -- like a more flexible\r
+ version of String.indexOf.\r
+ @see com.stevesoft.pat.Skip2\r
+ @see com.stevesoft.pat.SkipBMH\r
+ */\r
+public class Skip {\r
+ static int mkmask(int c) {\r
+ char x = (char)c;\r
+ return ~( CaseMgr.toUpperCase(x) |\r
+ CaseMgr.toLowerCase(x) |\r
+ CaseMgr.toTitleCase(x));\r
+ }\r
+ static { int x = Regex.BackRefOffset; }\r
+ String src;\r
+ int c,mask;\r
+ int offset;\r
+ boolean ign,m1;\r
+ /** Examine a Regex to determine what String it will\r
+ attempt to skip to when searching for patterns.\r
+ Return -1 if we aren't doing this. */\r
+ public static String string(Regex r) {\r
+ return r.skipper == null ? null : r.skipper.src;\r
+ }\r
+ /** Determine the offset of the String within the pattern\r
+ that we are skipping to. Return -1 if we aren't doing\r
+ this. */\r
+ public static int offset(Regex r) {\r
+ return r.skipper == null ? -1 : r.skipper.offset;\r
+ }\r
+ /** Initialize, give it a String to search for, tell it\r
+ whether or not to ignoreCase, and what the offset is\r
+ of the String within the String to be searched. */\r
+ public Skip(String s,boolean ign,int o) {\r
+ src = s;\r
+ c = s.charAt(0);\r
+ if(ign) {\r
+ mask = mkmask(c);\r
+ } else mask = 0;\r
+ offset = o;\r
+ this.ign = ign;\r
+ m1 = (s.length()==1);\r
+ }\r
+ /** The same as find(s,0,s.length()) */\r
+ public final int find(StringLike s) {\r
+ return find(s,0,s.length());\r
+ }\r
+ static final int min(int a,int b) { return a<b ? a : b; }\r
+ /** Searches a given region of text beginning at position start\r
+ and ending at position end for the skip object. */\r
+ public int find(StringLike s,int start,int end) {\r
+ if(start > end) return -1;\r
+ start += offset;\r
+ int vend = min(s.length()-1,end+offset);\r
+ if(mask != c) {\r
+ for(int i=start;i<=vend;i++)\r
+ if(0 == (s.charAt(i) & mask))\r
+ //if(m1||s.regionMatches(ign,i,src,0,src.length()) )\r
+ if(m1||CaseMgr.regionMatches(s,ign,i,src,0,src.length()) )\r
+ return i-offset;\r
+ } else {\r
+ for(int i=start;i<=vend;i++)\r
+ if(c == s.charAt(i))\r
+ //if(m1||s.regionMatches(ign,i,src,0,src.length()) )\r
+ if(m1||CaseMgr.regionMatches(s,ign,i,src,0,src.length()) )\r
+ return i-offset;\r
+ }\r
+ return -1;\r
+ }\r
+ static Skip findSkip(Regex r) {\r
+ return findSkip(r.thePattern,r.ignoreCase,!r.dontMatchInQuotes);\r
+ }\r
+ // look for things that can be skipped\r
+ static Skip findSkip(Pattern p,boolean ignoreCase,boolean trnc) {\r
+ StringBuffer sb = new StringBuffer();\r
+ Skip subsk = null;\r
+ int offset = 0;\r
+ int skipc = -1,skipoff=0;\r
+ for(;p != null;p = p.next) {\r
+ if(p instanceof oneChar) {\r
+ skipc = ((oneChar)p).c;\r
+ skipoff = offset;\r
+ }\r
+ if(p instanceof oneChar && p.next instanceof oneChar) {\r
+ Pattern psav = p;\r
+ sb.append(((oneChar)p).c);\r
+ while(p.next instanceof oneChar) {\r
+ sb.append(((oneChar)p.next).c);\r
+ p = p.next;\r
+ }\r
+ String st = sb.toString();\r
+ char c0 = st.charAt(0), c1 = st.charAt(1);\r
+ Skip sk=null;\r
+ if(st.length()>2)\r
+ sk = new SkipBMH(st,ignoreCase,offset);\r
+ else\r
+ sk = new Skip2(st,ignoreCase,offset);\r
+ if(trnc && st.length()>2) { // chop out a whole string...\r
+ psav.next = new Skipped(st.substring(1));\r
+ psav.next.next = p.next;\r
+ psav.next.parent = p.parent;\r
+ }\r
+ return sk;\r
+ } else if(p instanceof Or && ((Or)p).v.size()==1\r
+ && !((Or)p).leftForm().equals("(?!")\r
+ && null != (subsk=\r
+ findSkip( (Pattern)((Or)p).v.elementAt(0),ignoreCase,trnc) )) {\r
+ subsk.offset += offset;\r
+ return subsk;\r
+ } else if(p.minChars().equals(p.maxChars())) {\r
+ offset += p.minChars().intValue();\r
+ } else return skipc < 0 ? null :\r
+ new Skip(""+(char)skipc,ignoreCase,skipoff);\r
+ }\r
+ return null;\r
+ }\r
+}\r