// -- Happy Computing!\r
//\r
package com.stevesoft.pat;\r
-import java.util.*;\r
+\r
import java.io.*;\r
-import com.stevesoft.pat.wrap.StringWrap;\r
+import java.util.*;\r
\r
+import com.stevesoft.pat.wrap.*;\r
\r
/** Matches a Unicode punctuation character. */\r
-class UnicodePunct extends UniValidator {\r
- public int validate(StringLike s,int from,int to) {\r
- return from<s.length() && Prop.isPunct(s.charAt(from)) ? to : -1;\r
- }\r
+class UnicodePunct\r
+ extends UniValidator\r
+{\r
+ public int validate(StringLike s, int from, int to)\r
+ {\r
+ return from < s.length() && Prop.isPunct(s.charAt(from)) ? to : -1;\r
+ }\r
}\r
\r
/** Matches a Unicode white space character. */\r
-class UnicodeWhite extends UniValidator {\r
- public int validate(StringLike s,int from,int to) {\r
- return from<s.length() && Prop.isWhite(s.charAt(from)) ? to : -1;\r
- }\r
+class UnicodeWhite\r
+ extends UniValidator\r
+{\r
+ public int validate(StringLike s, int from, int to)\r
+ {\r
+ return from < s.length() && Prop.isWhite(s.charAt(from)) ? to : -1;\r
+ }\r
}\r
\r
/** Matches a character that is not a Unicode punctuation\r
- * character.\r
- */\r
-class NUnicodePunct extends UniValidator {\r
- public int validate(StringLike s,int from,int to) {\r
- return from<s.length() && !Prop.isPunct(s.charAt(from)) ? to : -1;\r
- }\r
+ * character.\r
+ */\r
+class NUnicodePunct\r
+ extends UniValidator\r
+{\r
+ public int validate(StringLike s, int from, int to)\r
+ {\r
+ return from < s.length() && !Prop.isPunct(s.charAt(from)) ? to : -1;\r
+ }\r
}\r
\r
/** Matches a character that is not a\r
- * Unicode white space character.\r
- */\r
-class NUnicodeWhite extends UniValidator {\r
- public int validate(StringLike s,int from,int to) {\r
- return from<s.length() && !Prop.isWhite(s.charAt(from)) ? to : -1;\r
- }\r
+ * Unicode white space character.\r
+ */\r
+class NUnicodeWhite\r
+ extends UniValidator\r
+{\r
+ public int validate(StringLike s, int from, int to)\r
+ {\r
+ return from < s.length() && !Prop.isWhite(s.charAt(from)) ? to : -1;\r
+ }\r
}\r
\r
/** Matches a Unicode word character: an alphanumeric or underscore. */\r
-class UnicodeW extends UniValidator {\r
- public int validate(StringLike s,int from,int to) {\r
- if(from >= s.length()) return -1;\r
- char c = s.charAt(from);\r
- return (Prop.isAlphabetic(c)||Prop.isDecimalDigit(c)||c=='_') ? to : -1;\r
- }\r
+class UnicodeW\r
+ extends UniValidator\r
+{\r
+ public int validate(StringLike s, int from, int to)\r
+ {\r
+ if (from >= s.length())\r
+ {\r
+ return -1;\r
+ }\r
+ char c = s.charAt(from);\r
+ return (Prop.isAlphabetic(c) || Prop.isDecimalDigit(c) || c == '_') ? to :\r
+ -1;\r
+ }\r
}\r
\r
/** Matches a character that is not a Unicode alphanumeric or underscore. */\r
-class NUnicodeW extends UniValidator {\r
- public int validate(StringLike s,int from,int to) {\r
- if(from >= s.length()) return -1;\r
- char c = s.charAt(from);\r
- return !(Prop.isAlphabetic(c)||Prop.isDecimalDigit(c)||c=='_') ? to : -1;\r
- }\r
+class NUnicodeW\r
+ extends UniValidator\r
+{\r
+ public int validate(StringLike s, int from, int to)\r
+ {\r
+ if (from >= s.length())\r
+ {\r
+ return -1;\r
+ }\r
+ char c = s.charAt(from);\r
+ return! (Prop.isAlphabetic(c) || Prop.isDecimalDigit(c) || c == '_') ? to :\r
+ -1;\r
+ }\r
}\r
\r
/** Matches a Unicode decimal digit. */\r
-class UnicodeDigit extends UniValidator {\r
- public int validate(StringLike s,int from,int to) {\r
- return from<s.length() && Prop.isDecimalDigit(s.charAt(from)) ? to : -1;\r
- }\r
+class UnicodeDigit\r
+ extends UniValidator\r
+{\r
+ public int validate(StringLike s, int from, int to)\r
+ {\r
+ return from < s.length() && Prop.isDecimalDigit(s.charAt(from)) ? to : -1;\r
+ }\r
}\r
+\r
/** Matches a character that is not a Unicode digit.*/\r
-class NUnicodeDigit extends UniValidator {\r
- public int validate(StringLike s,int from,int to) {\r
- return from<s.length() && !Prop.isDecimalDigit(s.charAt(from)) ? to : -1;\r
- }\r
+class NUnicodeDigit\r
+ extends UniValidator\r
+{\r
+ public int validate(StringLike s, int from, int to)\r
+ {\r
+ return from < s.length() && !Prop.isDecimalDigit(s.charAt(from)) ? to : -1;\r
+ }\r
}\r
\r
/** Matches a Unicode math character. */\r
-class UnicodeMath extends UniValidator {\r
- public int validate(StringLike s,int from,int to) {\r
- return from<s.length() && Prop.isMath(s.charAt(from)) ? to : -1;\r
- }\r
+class UnicodeMath\r
+ extends UniValidator\r
+{\r
+ public int validate(StringLike s, int from, int to)\r
+ {\r
+ return from < s.length() && Prop.isMath(s.charAt(from)) ? to : -1;\r
+ }\r
}\r
+\r
/** Matches a non-math Unicode character. */\r
-class NUnicodeMath extends UniValidator {\r
- public int validate(StringLike s,int from,int to) {\r
- return from<s.length() && !Prop.isMath(s.charAt(from)) ? to : -1;\r
- }\r
+class NUnicodeMath\r
+ extends UniValidator\r
+{\r
+ public int validate(StringLike s, int from, int to)\r
+ {\r
+ return from < s.length() && !Prop.isMath(s.charAt(from)) ? to : -1;\r
+ }\r
}\r
\r
/** Matches a Unicode currency symbol. */\r
-class UnicodeCurrency extends UniValidator {\r
- public int validate(StringLike s,int from,int to) {\r
- return from<s.length() && Prop.isCurrency(s.charAt(from)) ? to : -1;\r
- }\r
+class UnicodeCurrency\r
+ extends UniValidator\r
+{\r
+ public int validate(StringLike s, int from, int to)\r
+ {\r
+ return from < s.length() && Prop.isCurrency(s.charAt(from)) ? to : -1;\r
+ }\r
}\r
+\r
/** Matches a non-currency symbol Unicode character. */\r
-class NUnicodeCurrency extends UniValidator {\r
- public int validate(StringLike s,int from,int to) {\r
- return from<s.length() && !Prop.isCurrency(s.charAt(from)) ? to : -1;\r
- }\r
+class NUnicodeCurrency\r
+ extends UniValidator\r
+{\r
+ public int validate(StringLike s, int from, int to)\r
+ {\r
+ return from < s.length() && !Prop.isCurrency(s.charAt(from)) ? to : -1;\r
+ }\r
}\r
\r
/** Matches a Unicode alphabetic character. */\r
-class UnicodeAlpha extends UniValidator {\r
- public int validate(StringLike s,int from,int to) {\r
- return from<s.length() && Prop.isAlphabetic(s.charAt(from)) ? to : -1;\r
- }\r
+class UnicodeAlpha\r
+ extends UniValidator\r
+{\r
+ public int validate(StringLike s, int from, int to)\r
+ {\r
+ return from < s.length() && Prop.isAlphabetic(s.charAt(from)) ? to : -1;\r
+ }\r
}\r
\r
/** Matches a non-alphabetic Unicode character. */\r
-class NUnicodeAlpha extends UniValidator {\r
- public int validate(StringLike s,int from,int to) {\r
- return from<s.length() && !Prop.isAlphabetic(s.charAt(from)) ? to : -1;\r
- }\r
+class NUnicodeAlpha\r
+ extends UniValidator\r
+{\r
+ public int validate(StringLike s, int from, int to)\r
+ {\r
+ return from < s.length() && !Prop.isAlphabetic(s.charAt(from)) ? to : -1;\r
+ }\r
}\r
\r
/** Matches an upper case Unicode character. */\r
-class UnicodeUpper extends UniValidator {\r
- public int validate(StringLike s,int from,int to) {\r
- return from<s.length() && isUpper(s.charAt(from)) ? to : -1;\r
+class UnicodeUpper\r
+ extends UniValidator\r
+{\r
+ public int validate(StringLike s, int from, int to)\r
+ {\r
+ return from < s.length() && isUpper(s.charAt(from)) ? to : -1;\r
}\r
- final boolean isUpper(char c) {\r
+\r
+ final boolean isUpper(char c)\r
+ {\r
return c == CaseMgr.toUpperCase(c) && c != CaseMgr.toLowerCase(c);\r
}\r
}\r
\r
/** Matches an upper case Unicode character. */\r
-class UnicodeLower extends UniValidator {\r
- public int validate(StringLike s,int from,int to) {\r
- return from<s.length() && isLower(s.charAt(from)) ? to : -1;\r
+class UnicodeLower\r
+ extends UniValidator\r
+{\r
+ public int validate(StringLike s, int from, int to)\r
+ {\r
+ return from < s.length() && isLower(s.charAt(from)) ? to : -1;\r
}\r
- final boolean isLower(char c) {\r
+\r
+ final boolean isLower(char c)\r
+ {\r
return c != CaseMgr.toUpperCase(c) && c == CaseMgr.toLowerCase(c);\r
}\r
}\r
\r
/**\r
-Regex provides the parser which constructs the linked list of\r
-Pattern classes from a String.\r
-<p>\r
-For the purpose of this documentation, the fact that java interprets the\r
-backslash will be ignored. In practice, however, you will need a\r
-double backslash to obtain a string that contains a single backslash\r
-character. Thus, the example pattern "\b" should really be typed\r
-as "\\b" inside java code.\r
-<p>\r
-Note that Regex is part of package "com.stevesoft.pat".\r
-To use it, simply import\r
-com.stevesoft.pat.Regex at the top of your file.\r
-<p>\r
-Regex is made with a constructor that takes a String that defines\r
-the regular expression. Thus, for example\r
-<pre>\r
+ Regex provides the parser which constructs the linked list of\r
+ Pattern classes from a String.\r
+ <p>\r
+ For the purpose of this documentation, the fact that java interprets the\r
+ backslash will be ignored. In practice, however, you will need a\r
+ double backslash to obtain a string that contains a single backslash\r
+ character. Thus, the example pattern "\b" should really be typed\r
+ as "\\b" inside java code.\r
+ <p>\r
+ Note that Regex is part of package "com.stevesoft.pat".\r
+ To use it, simply import\r
+ com.stevesoft.pat.Regex at the top of your file.\r
+ <p>\r
+ Regex is made with a constructor that takes a String that defines\r
+ the regular expression. Thus, for example\r
+ <pre>\r
Regex r = new Regex("[a-c]*");\r
-</pre>\r
-matches any number of characters so long as the are 'a', 'b', or 'c').\r
-<p>\r
-To attempt to match the Pattern to a given string, you can use either\r
-the search(String) member function, or the matchAt(String,int position)\r
-member function. These functions return a boolean which tells you\r
-whether or not the thing worked, and sets the methods "charsMatched()"\r
-and "matchedFrom()" in the Regex object appropriately.\r
-<p>\r
-The portion of the string before the match can be obtained by the\r
-left() member, and the portion after the match can be obtained\r
-by the right() member.\r
-<p>\r
-Essentially, this package implements a syntax that is very much\r
-like the perl 5 regular expression syntax.\r
-\r
-Longer example:\r
-<pre>\r
+ </pre>\r
+ matches any number of characters so long as the are 'a', 'b', or 'c').\r
+ <p>\r
+ To attempt to match the Pattern to a given string, you can use either\r
+ the search(String) member function, or the matchAt(String,int position)\r
+ member function. These functions return a boolean which tells you\r
+ whether or not the thing worked, and sets the methods "charsMatched()"\r
+ and "matchedFrom()" in the Regex object appropriately.\r
+ <p>\r
+ The portion of the string before the match can be obtained by the\r
+ left() member, and the portion after the match can be obtained\r
+ by the right() member.\r
+ <p>\r
+ Essentially, this package implements a syntax that is very much\r
+ like the perl 5 regular expression syntax.\r
+\r
+ Longer example:\r
+ <pre>\r
Regex r = new Regex("x(a|b)y");\r
r.matchAt("xay",0);\r
System.out.println("sub = "+r.stringMatched(1));\r
-</pre>\r
-The above would print "sub = a".\r
-<pre>\r
+ </pre>\r
+ The above would print "sub = a".\r
+ <pre>\r
r.left() // would return "x"\r
r.right() // would return "y"\r
-</pre>\r
-<p>\r
-Differences between this package and perl5:<br>\r
-The extended Pattern for setting flags, is now supported,\r
-but the flags are different. "(?i)" tells the pattern to\r
-ignore case, "(?Q)" sets the "dontMatchInQuotes" flag, and\r
-"(?iQ)" sets them both. You can change the escape character.\r
-The pattern <pre>(?e=#)#d+</pre> is the same as <pre>\d+</pre>,\r
-but note that the sequence <pre>(?e=#)</pre> <b>must</b> occur\r
-at the very beginning of the pattern. There may be other small\r
-differences as well. I will either make my package conform\r
-or note them as I become aware of them.\r
-<p>\r
-This package supports additional patterns not in perl5:\r
-<center>\r
-<table border=1>\r
-<tr><td>(?@())</td><td>Group</td><td>This matches all characters between\r
-the '(' character and the balancing ')' character. Thus, it will\r
-match "()" as well as "(())". The balancing characters are\r
-arbitrary, thus (?@{}) matches on "{}" and "{{}}".</td>\r
-<tr><td>(?<1)</td><td>Backup</td><td>Moves the pointer backwards within the text.\r
-This allows you to make a "look behind." It fails if it\r
-attempts to move to a position before the beginning of the string.\r
-"x(?<1)" is equivalent to "(?=x)". The number, 1 in this example,\r
-is the number of characters to move backwards.</td>\r
-</table>\r
-</center>\r
-</dl>\r
-@author Steven R. Brandt\r
-@version package com.stevesoft.pat, release 1.5.3\r
-@see Pattern\r
-*/\r
-public class Regex extends RegRes implements FilenameFilter {\r
- /** BackRefOffset gives the identity number of the first\r
- pattern. Version 1.0 used zero, version 1.1 uses 1 to be\r
- more compatible with perl. */\r
- static int BackRefOffset = 1;\r
- private static Pattern none = new NoPattern();\r
- Pattern thePattern = none;\r
- patInt minMatch = new patInt(0);\r
-\r
- static Hashtable validators = new Hashtable();\r
- static {\r
- define("p","(?>1)",new UnicodePunct());\r
- define("P","(?>1)",new NUnicodePunct());\r
- define("s","(?>1)",new UnicodeWhite());\r
- define("S","(?>1)",new NUnicodeWhite());\r
- define("w","(?>1)",new UnicodeW());\r
- define("W","(?>1)",new NUnicodeW());\r
- define("d","(?>1)",new UnicodeDigit());\r
- define("D","(?>1)",new NUnicodeDigit());\r
- define("m","(?>1)",new UnicodeMath());\r
- define("M","(?>1)",new NUnicodeMath());\r
- define("c","(?>1)",new UnicodeCurrency());\r
- define("C","(?>1)",new NUnicodeCurrency());\r
- define("a","(?>1)",new UnicodeAlpha());\r
- define("A","(?>1)",new NUnicodeAlpha());\r
- define("uc","(?>1)",new UnicodeUpper());\r
- define("lc","(?>1)",new UnicodeLower());\r
- }\r
-\r
- /** Set the dontMatch in quotes flag. */\r
- public void setDontMatchInQuotes(boolean b) {\r
- dontMatchInQuotes = b;\r
- }\r
- /** Find out if the dontMatchInQuotes flag is enabled. */\r
- public boolean getDontMatchInQuotes() {\r
- return dontMatchInQuotes;\r
- }\r
- boolean dontMatchInQuotes = false;\r
-\r
- /** Set the state of the ignoreCase flag. If set to true, then\r
- the pattern matcher will ignore case when searching for a\r
- match. */\r
- public void setIgnoreCase(boolean b) {\r
- ignoreCase = b;\r
- }\r
- /** Get the state of the ignoreCase flag. Returns true if we\r
- are ignoring the case of the pattern, false otherwise. */\r
- public boolean getIgnoreCase() {\r
- return ignoreCase;\r
- }\r
- boolean ignoreCase = false;\r
-\r
- static boolean defaultMFlag = false;\r
- /** Set the default value of the m flag. If it\r
- is set to true, then the MFlag will be on\r
- for any regex search executed. */\r
- public static void setDefaultMFlag(boolean mFlag) {\r
- defaultMFlag = mFlag;\r
- }\r
- /** Get the default value of the m flag. If it\r
- is set to true, then the MFlag will be on\r
- for any regex search executed. */\r
- public static boolean getDefaultMFlag() {\r
- return defaultMFlag;\r
- }\r
-\r
- /** Initializes the object without a Pattern. To supply a Pattern\r
- use compile(String s).\r
- @see com.stevesoft.pat.Regex#compile(java.lang.String)\r
- */\r
- public Regex() {}\r
- /** Create and compile a Regex, but do not throw any exceptions.\r
- If you wish to have exceptions thrown for syntax errors,\r
- you must use the Regex(void) constructor to create the\r
- Regex object, and then call the compile method. Therefore, you\r
- should only call this method when you know your pattern is right.\r
- I will probably become more like\r
- @see com.stevesoft.pat.Regex#search(java.lang.String)\r
- @see com.stevesoft.pat.Regex#compile(java.lang.String)\r
- */\r
- public Regex(String s) {\r
- try {\r
- compile(s);\r
- } catch(RegSyntax rs) {}\r
- }\r
-\r
- ReplaceRule rep = null;\r
- /** Create and compile both a Regex and a ReplaceRule.\r
- @see com.stevesoft.pat.ReplaceRule\r
- @see com.stevesoft.pat.Regex#compile(java.lang.String)\r
- */\r
- public Regex(String s,String rp) {\r
- this(s);\r
- rep = ReplaceRule.perlCode(rp);\r
- }\r
- /** Create and compile a Regex, but give it the ReplaceRule\r
- specified. This allows the user finer control of the\r
- Replacement process, if that is desired.\r
- @see com.stevesoft.pat.ReplaceRule\r
- @see com.stevesoft.pat.Regex#compile(java.lang.String)\r
- */\r
- public Regex(String s,ReplaceRule rp) {\r
- this(s);\r
- rep = rp;\r
- }\r
-\r
- /** Change the ReplaceRule of this Regex by compiling\r
- a new one using String rp. */\r
- public void setReplaceRule(String rp) {\r
- rep = ReplaceRule.perlCode(rp);\r
- repr = null; // Clear Replacer history\r
- }\r
-\r
- /** Change the ReplaceRule of this Regex to rp. */\r
- public void setReplaceRule(ReplaceRule rp) {\r
- rep = rp;\r
- }\r
- /** Test to see if a custom defined rule exists.\r
- @see com.stevesoft.pat#define(java.lang.String,java.lang.String,Validator)\r
- */\r
- public static boolean isDefined(String nm) {\r
- return validators.get(nm) != null;\r
- }\r
- /** Removes a custom defined rule.\r
- @see com.stevesoft.pat#define(java.lang.String,java.lang.String,Validator)\r
- */\r
- public static void undefine(String nm) {\r
- validators.remove(nm);\r
- }\r
- /** Defines a method to create a new rule. See test/deriv2.java\r
- and test/deriv3.java for examples of how to use it. */\r
- public static void define(String nm,String pat,Validator v) {\r
- v.pattern = pat;\r
- validators.put(nm,v);\r
- }\r
- /** Defines a shorthand for a pattern. The pattern will be\r
- invoked by a string that has the form "(??"+nm+")".\r
- */\r
- public static void define(String nm,String pat) {\r
- validators.put(nm,pat);\r
- }\r
-\r
- /** Get the current ReplaceRule. */\r
- public ReplaceRule getReplaceRule() { return rep; }\r
-\r
- Replacer repr = null;\r
- final Replacer _getReplacer() {\r
- return repr==null ? repr=new Replacer() : repr;\r
- }\r
- public Replacer getReplacer() {\r
- if(repr == null)\r
- repr = new Replacer();\r
- repr.rh.me = this;\r
- repr.rh.prev = null;\r
- return repr;\r
- }\r
- /** Replace the first occurence of this pattern in String s\r
- according to the ReplaceRule.\r
- @see com.stevesoft.pat.ReplaceRule\r
- @see com.stevesoft.pat.Regex#getReplaceRule()\r
- */\r
- public String replaceFirst(String s) {\r
- return _getReplacer().replaceFirstRegion(s,this,0,s.length()).toString();\r
- }\r
- /** Replace the first occurence of this pattern in String s\r
- beginning with position pos according to the ReplaceRule.\r
- @see com.stevesoft.pat.ReplaceRule\r
- @see com.stevesoft.pat.Regex#getReplaceRule()\r
- */\r
- public String replaceFirstFrom(String s,int pos) {\r
- return _getReplacer().replaceFirstRegion(s,this,pos,s.length()).toString();\r
- }\r
- /** Replace the first occurence of this pattern in String s\r
- beginning with position start and ending with end\r
- according to the ReplaceRule.\r
- @see com.stevesoft.pat.ReplaceRule\r
- @see com.stevesoft.pat.Regex#getReplaceRule()\r
- */\r
- public String replaceFirstRegion(String s,int start,int end) {\r
- return _getReplacer().replaceFirstRegion(s,this,start,end).toString();\r
- }\r
-\r
- /** Replace all occurences of this pattern in String s\r
- according to the ReplaceRule.\r
- @see com.stevesoft.pat.ReplaceRule\r
- @see com.stevesoft.pat.Regex#getReplaceRule()\r
- */\r
- public String replaceAll(String s) {\r
- return _getReplacer().replaceAllRegion(s,this,0,s.length()).toString();\r
- }\r
- public StringLike replaceAll(StringLike s) {\r
- return _getReplacer().replaceAllRegion(s,this,0,s.length());\r
- }\r
- /** Replace all occurences of this pattern in String s\r
- beginning with position pos according to the ReplaceRule.\r
- @see com.stevesoft.pat.ReplaceRule\r
- @see com.stevesoft.pat.Regex#getReplaceRule()\r
- */\r
- public String replaceAllFrom(String s,int pos) {\r
- return _getReplacer().replaceAllRegion(s,this,pos,s.length()).toString();\r
- }\r
- /** Replace all occurences of this pattern in String s\r
- beginning with position start and ending with end\r
- according to the ReplaceRule.\r
- @see com.stevesoft.pat.ReplaceRule\r
- @see com.stevesoft.pat.Regex#getReplaceRule()\r
- */\r
- public String replaceAllRegion(String s,int start,int end) {\r
- return _getReplacer().replaceAllRegion(s,this,start,end).toString();\r
- }\r
-\r
-\r
- /** Essentially clones the Regex object */\r
- public Regex(Regex r) {\r
- super((RegRes)r);\r
- dontMatchInQuotes = r.dontMatchInQuotes;\r
- esc = r.esc;\r
- ignoreCase = r.ignoreCase;\r
- gFlag = r.gFlag;\r
- if(r.rep==null)\r
- rep = null;\r
- else\r
- rep = (ReplaceRule)r.rep.clone();\r
- /* try {\r
- compile(r.toString());\r
- } catch(RegSyntax r_) {} */\r
- thePattern = r.thePattern.clone(new Hashtable());\r
- minMatch = r.minMatch;\r
- skipper = r.skipper;\r
- }\r
-\r
- /** By default,\r
- the escape character is the backslash, but you can\r
- make it anything you want by setting this variable. */\r
- public char esc = Pattern.ESC;\r
- /** This method compiles a regular expression, making it\r
- possible to call the search or matchAt methods.\r
- @exception com.stevesoft.pat.RegSyntax\r
- is thrown if a syntax error is encountered\r
- in the pattern.\r
- For example, "x{3,1}" or "*a" are not valid\r
- patterns.\r
- @see com.stevesoft.pat.Regex#search\r
- @see com.stevesoft.pat.Regex#matchAt\r
- */\r
- public void compile(String prepat) throws RegSyntax {\r
- String postpat = parsePerl.codify(prepat,true);\r
- String pat = postpat==null ? prepat : postpat;\r
- minMatch = null;\r
- ignoreCase = false;\r
- dontMatchInQuotes = false;\r
- Rthings mk = new Rthings(this);\r
- int offset = mk.val;\r
- String newpat = pat;\r
- thePattern = none;\r
- p = null;\r
- or = null;\r
- minMatch = new patInt(0);\r
- StrPos sp = new StrPos(pat,0);\r
- if(sp.incMatch("(?e=")) {\r
- char newEsc = sp.c;\r
- sp.inc();\r
- if(sp.match(')'))\r
- newpat = reEscape(pat.substring(6),\r
- newEsc,Pattern.ESC);\r
- } else if(esc != Pattern.ESC)\r
- newpat = reEscape(pat,esc,Pattern.ESC);\r
- thePattern = _compile(newpat,mk);\r
- numSubs_ = mk.val-offset;\r
- mk.set(this);\r
- }\r
-\r
- /* If a Regex is compared against a Regex, a check is\r
- done to see that the patterns are equal as well as\r
- the most recent match. If a Regex is compare with\r
- a RegRes, only the result of the most recent match\r
- is compared. */\r
- public boolean equals(Object o) {\r
- if(o instanceof Regex) {\r
- if(toString().equals(o.toString()))\r
- return super.equals(o);\r
- else\r
- return false;\r
- } else return super.equals(o);\r
- }\r
-\r
- /** A clone by any other name would smell as sweet. */\r
- public Object clone() {\r
- return new Regex(this);\r
- }\r
- /** Return a clone of the underlying RegRes object. */\r
- public RegRes result() {\r
- return (RegRes)super.clone();\r
- }\r
-\r
- // prep sets global variables of class\r
- // Pattern so that it can access them\r
- // during an attempt at a match\r
- Pthings pt = new Pthings();\r
- final Pthings prep(StringLike s) {\r
- //if(gFlag)\r
- pt.lastPos = matchedTo();\r
- if(pt.lastPos < 0) pt.lastPos = 0;\r
- if( (s==null ? null : s.unwrap()) != (src==null ? null : s.unwrap()) )\r
- pt.lastPos = 0;\r
- src = s;\r
- pt.dotDoesntMatchCR=dotDoesntMatchCR && (!sFlag);\r
- pt.mFlag = (mFlag | defaultMFlag);\r
- pt.ignoreCase = ignoreCase;\r
- pt.no_check = false;\r
- if(pt.marks != null)\r
- for(int i=0;i<pt.marks.length;i++)\r
- pt.marks[i]=-1;\r
- pt.marks = null;\r
- pt.nMarks = numSubs_;\r
- pt.src = s;\r
- if(dontMatchInQuotes)\r
- setCbits(s,pt);\r
- else\r
- pt.cbits = null;\r
- return pt;\r
- }\r
- /** Attempt to match a Pattern beginning\r
- at a specified location within the string.\r
- @see com.stevesoft.pat.Regex#search\r
- */\r
- public boolean matchAt(String s,int start_pos) {\r
- return _search(s,start_pos,start_pos);\r
- }\r
- /** Attempt to match a Pattern beginning\r
- at a specified location within the StringLike.\r
- @see com.stevesoft.pat.Regex#search\r
- */\r
- public boolean matchAt(StringLike s,int start_pos) {\r
- return _search(s,start_pos,start_pos);\r
- }\r
-\r
-\r
- /** Search through a String for the first\r
- occurrence of a match.\r
- @see com.stevesoft.pat.Regex#searchFrom\r
- @see com.stevesoft.pat.Regex#matchAt\r
- */\r
- public boolean search(String s) {\r
- if(s==null)\r
- throw new NullPointerException("Null String Given to Regex.search");\r
- return _search(s,0,s.length());\r
- }\r
- public boolean search(StringLike sl) {\r
- if(sl==null)\r
- throw new NullPointerException("Null StringLike Given to Regex.search");\r
- return _search(sl,0,sl.length());\r
- }\r
- public boolean reverseSearch(String s) {\r
- if(s==null)\r
- throw new NullPointerException("Null String Given to Regex.reverseSearch");\r
- return _reverseSearch(s,0,s.length());\r
- }\r
- public boolean reverseSearch(StringLike sl) {\r
- if(sl==null)\r
- throw new NullPointerException("Null StringLike Given to Regex.reverseSearch");\r
- return _reverseSearch(sl,0,sl.length());\r
- }\r
- /** Search through a String for the first\r
- occurence of a match, but start at position <pre>start</pre>*/\r
- public boolean searchFrom(String s,int start) {\r
- if(s==null)\r
- throw new NullPointerException("Null String Given to Regex.searchFrom");\r
- return _search(s,start,s.length());\r
- }\r
- public boolean searchFrom(StringLike s,int start) {\r
- if(s==null)\r
- throw new NullPointerException("Null String Given to Regex.searchFrom");\r
- return _search(s,start,s.length());\r
- }\r
- /** Search through a region of a String\r
- for the first occurence of a match. */\r
- public boolean searchRegion(String s,int start,int end) {\r
- if(s==null)\r
- throw new NullPointerException("Null String Given to Regex.searchRegion");\r
- return _search(s,start,end);\r
- }\r
- /** Set this to change the default behavior of the "." pattern.\r
- By default it now matches perl's behavior and fails to\r
- match the '\n' character. */\r
- public static boolean dotDoesntMatchCR = true;\r
- StringLike gFlags;\r
- int gFlagto = 0;\r
- boolean gFlag = false;\r
- /** Set the 'g' flag */\r
- public void setGFlag(boolean b) {\r
- gFlag = b;\r
- }\r
- /** Get the state of the 'g' flag. */\r
- public boolean getGFlag() {\r
- return gFlag;\r
- }\r
- boolean sFlag = false;\r
- /** Get the state of the sFlag */\r
- public boolean getSFlag() {\r
- return sFlag;\r
- }\r
- boolean mFlag = false;\r
- /** Get the state of the sFlag */\r
- public boolean getMFlag() {\r
- return mFlag;\r
- }\r
-\r
- final boolean _search(String s,int start,int end) {\r
- return _search(new StringWrap(s),start,end);\r
- }\r
- final boolean _search(StringLike s,int start,int end) {\r
- if(gFlag && gFlagto > 0 && gFlags!=null && s.unwrap()==gFlags.unwrap())\r
- start = gFlagto;\r
- gFlags = null;\r
-\r
- Pthings pt=prep(s);\r
-\r
- int up = (minMatch == null ? end : end-minMatch.i);\r
-\r
- if(up < start && end >= start) up = start;\r
-\r
- if(skipper == null) {\r
- for(int i=start;i<=up;i++) {\r
- charsMatched_ = thePattern.matchAt(s,i,pt);\r
- if(charsMatched_ >= 0) {\r
- matchFrom_ = thePattern.mfrom;\r
- marks = pt.marks;\r
- gFlagto = matchFrom_+charsMatched_;\r
- gFlags = s;\r
- return didMatch_=true;\r
- }\r
- }\r
- } else {\r
- pt.no_check = true;\r
- for(int i=start;i<=up;i++) {\r
- i = skipper.find(src,i,up);\r
- if(i<0) {\r
- charsMatched_ = matchFrom_ = -1;\r
- return didMatch_ = false;\r
- }\r
- charsMatched_ = thePattern.matchAt(s,i,pt);\r
- if(charsMatched_ >= 0) {\r
- matchFrom_ = thePattern.mfrom;\r
- marks = pt.marks;\r
- gFlagto = matchFrom_+charsMatched_;\r
- gFlags = s;\r
- return didMatch_=true;\r
- }\r
- }\r
- }\r
- return didMatch_=false;\r
- }\r
- /*final boolean _search(LongStringLike s,long start,long end) {\r
- if(gFlag && gFlagto > 0 && s==gFlags)\r
- start = gFlagto;\r
- gFlags = null;\r
-\r
- Pthings pt=prep(s);\r
-\r
- int up = end;//(minMatch == null ? end : end-minMatch.i);\r
-\r
- if(up < start && end >= start) up = start;\r
-\r
- if(skipper == null) {\r
- for(long i=start;i<=up;i++) {\r
- charsMatched_ = thePattern.matchAt(s,i,pt);\r
- if(charsMatched_ >= 0) {\r
- matchFrom_ = thePattern.mfrom;\r
- marks = pt.marks;\r
- gFlagto = matchFrom_+charsMatched_;\r
- return didMatch_=true;\r
- }\r
- }\r
- } else {\r
- pt.no_check = true;\r
- for(long i=start;i<=up;i++) {\r
- i = skipper.find(src,i,up);\r
- if(i<0) {\r
- charsMatched_ = matchFrom_ = -1;\r
- return didMatch_ = false;\r
- }\r
- charsMatched_ = thePattern.matchAt(s,i,pt);\r
- if(charsMatched_ >= 0) {\r
- matchFrom_ = thePattern.mfrom;\r
- marks = pt.marks;\r
- gFlagto = matchFrom_+charsMatched_;\r
- gFlags = s;\r
- return didMatch_=true;\r
- } else {\r
- i = s.adjustIndex(i);\r
- up = s.adjustEnd(i);\r
- }\r
- }\r
- }\r
- return didMatch_=false;\r
- }*/\r
-\r
- boolean _reverseSearch(String s,int start,int end) {\r
- return _reverseSearch(new StringWrap(s),start,end);\r
- }\r
- boolean _reverseSearch(StringLike s,int start,int end) {\r
- if(gFlag && gFlagto > 0 && s.unwrap()==gFlags.unwrap())\r
- end = gFlagto;\r
- gFlags = null;\r
- Pthings pt=prep(s);\r
- for(int i=end;i>=start;i--) {\r
- charsMatched_ = thePattern.matchAt(s,i,pt);\r
- if(charsMatched_ >= 0) {\r
- matchFrom_ = thePattern.mfrom;\r
- marks = pt.marks;\r
- gFlagto = matchFrom_-1;\r
- gFlags = s;\r
- return didMatch_=true;\r
- }\r
- }\r
- return didMatch_=false;\r
- }\r
-\r
- // This routine sets the cbits variable\r
- // of class Pattern. Cbits is true for\r
- // the bit corresponding to a character inside\r
- // a set of quotes.\r
- static StringLike lasts=null;\r
- static BitSet lastbs=null;\r
- static void setCbits(StringLike s,Pthings pt) {\r
- if(s == lasts) {\r
- pt.cbits = lastbs;\r
- return;\r
+ </pre>\r
+ <p>\r
+ Differences between this package and perl5:<br>\r
+ The extended Pattern for setting flags, is now supported,\r
+ but the flags are different. "(?i)" tells the pattern to\r
+ ignore case, "(?Q)" sets the "dontMatchInQuotes" flag, and\r
+ "(?iQ)" sets them both. You can change the escape character.\r
+ The pattern <pre>(?e=#)#d+</pre> is the same as <pre>\d+</pre>,\r
+ but note that the sequence <pre>(?e=#)</pre> <b>must</b> occur\r
+ at the very beginning of the pattern. There may be other small\r
+ differences as well. I will either make my package conform\r
+ or note them as I become aware of them.\r
+ <p>\r
+ This package supports additional patterns not in perl5:\r
+ <center>\r
+ <table border=1>\r
+ <tr><td>(?@())</td><td>Group</td><td>This matches all characters between\r
+ the '(' character and the balancing ')' character. Thus, it will\r
+ match "()" as well as "(())". The balancing characters are\r
+ arbitrary, thus (?@{}) matches on "{}" and "{{}}".</td>\r
+ <tr><td>(?<1)</td><td>Backup</td><td>Moves the pointer backwards within the text.\r
+ This allows you to make a "look behind." It fails if it\r
+ attempts to move to a position before the beginning of the string.\r
+ "x(?<1)" is equivalent to "(?=x)". The number, 1 in this example,\r
+ is the number of characters to move backwards.</td>\r
+ </table>\r
+ </center>\r
+ </dl>\r
+ @author Steven R. Brandt\r
+ @version package com.stevesoft.pat, release 1.5.3\r
+ @see Pattern\r
+ */\r
+public class Regex\r
+ extends RegRes implements FilenameFilter\r
+{\r
+ /** BackRefOffset gives the identity number of the first\r
+ pattern. Version 1.0 used zero, version 1.1 uses 1 to be\r
+ more compatible with perl. */\r
+ static int BackRefOffset = 1;\r
+ private static Pattern none = new NoPattern();\r
+ Pattern thePattern = none;\r
+ patInt minMatch = new patInt(0);\r
+\r
+ static Hashtable validators = new Hashtable();\r
+ static\r
+ {\r
+ define("p", "(?>1)", new UnicodePunct());\r
+ define("P", "(?>1)", new NUnicodePunct());\r
+ define("s", "(?>1)", new UnicodeWhite());\r
+ define("S", "(?>1)", new NUnicodeWhite());\r
+ define("w", "(?>1)", new UnicodeW());\r
+ define("W", "(?>1)", new NUnicodeW());\r
+ define("d", "(?>1)", new UnicodeDigit());\r
+ define("D", "(?>1)", new NUnicodeDigit());\r
+ define("m", "(?>1)", new UnicodeMath());\r
+ define("M", "(?>1)", new NUnicodeMath());\r
+ define("c", "(?>1)", new UnicodeCurrency());\r
+ define("C", "(?>1)", new NUnicodeCurrency());\r
+ define("a", "(?>1)", new UnicodeAlpha());\r
+ define("A", "(?>1)", new NUnicodeAlpha());\r
+ define("uc", "(?>1)", new UnicodeUpper());\r
+ define("lc", "(?>1)", new UnicodeLower());\r
+ }\r
+\r
+ /** Set the dontMatch in quotes flag. */\r
+ public void setDontMatchInQuotes(boolean b)\r
+ {\r
+ dontMatchInQuotes = b;\r
+ }\r
+\r
+ /** Find out if the dontMatchInQuotes flag is enabled. */\r
+ public boolean getDontMatchInQuotes()\r
+ {\r
+ return dontMatchInQuotes;\r
+ }\r
+\r
+ boolean dontMatchInQuotes = false;\r
+\r
+ /** Set the state of the ignoreCase flag. If set to true, then\r
+ the pattern matcher will ignore case when searching for a\r
+ match. */\r
+ public void setIgnoreCase(boolean b)\r
+ {\r
+ ignoreCase = b;\r
+ }\r
+\r
+ /** Get the state of the ignoreCase flag. Returns true if we\r
+ are ignoring the case of the pattern, false otherwise. */\r
+ public boolean getIgnoreCase()\r
+ {\r
+ return ignoreCase;\r
+ }\r
+\r
+ boolean ignoreCase = false;\r
+\r
+ static boolean defaultMFlag = false;\r
+ /** Set the default value of the m flag. If it\r
+ is set to true, then the MFlag will be on\r
+ for any regex search executed. */\r
+ public static void setDefaultMFlag(boolean mFlag)\r
+ {\r
+ defaultMFlag = mFlag;\r
+ }\r
+\r
+ /** Get the default value of the m flag. If it\r
+ is set to true, then the MFlag will be on\r
+ for any regex search executed. */\r
+ public static boolean getDefaultMFlag()\r
+ {\r
+ return defaultMFlag;\r
+ }\r
+\r
+ /** Initializes the object without a Pattern. To supply a Pattern\r
+ use compile(String s).\r
+ @see com.stevesoft.pat.Regex#compile(java.lang.String)\r
+ */\r
+ public Regex()\r
+ {}\r
+\r
+ /** Create and compile a Regex, but do not throw any exceptions.\r
+ If you wish to have exceptions thrown for syntax errors,\r
+ you must use the Regex(void) constructor to create the\r
+ Regex object, and then call the compile method. Therefore, you\r
+ should only call this method when you know your pattern is right.\r
+ I will probably become more like\r
+ @see com.stevesoft.pat.Regex#search(java.lang.String)\r
+ @see com.stevesoft.pat.Regex#compile(java.lang.String)\r
+ */\r
+ public Regex(String s)\r
+ {\r
+ try\r
+ {\r
+ compile(s);\r
+ }\r
+ catch (RegSyntax rs)\r
+ {}\r
+ }\r
+\r
+ ReplaceRule rep = null;\r
+ /** Create and compile both a Regex and a ReplaceRule.\r
+ @see com.stevesoft.pat.ReplaceRule\r
+ @see com.stevesoft.pat.Regex#compile(java.lang.String)\r
+ */\r
+ public Regex(String s, String rp)\r
+ {\r
+ this(s);\r
+ rep = ReplaceRule.perlCode(rp);\r
+ }\r
+\r
+ /** Create and compile a Regex, but give it the ReplaceRule\r
+ specified. This allows the user finer control of the\r
+ Replacement process, if that is desired.\r
+ @see com.stevesoft.pat.ReplaceRule\r
+ @see com.stevesoft.pat.Regex#compile(java.lang.String)\r
+ */\r
+ public Regex(String s, ReplaceRule rp)\r
+ {\r
+ this(s);\r
+ rep = rp;\r
+ }\r
+\r
+ /** Change the ReplaceRule of this Regex by compiling\r
+ a new one using String rp. */\r
+ public void setReplaceRule(String rp)\r
+ {\r
+ rep = ReplaceRule.perlCode(rp);\r
+ repr = null; // Clear Replacer history\r
+ }\r
+\r
+ /** Change the ReplaceRule of this Regex to rp. */\r
+ public void setReplaceRule(ReplaceRule rp)\r
+ {\r
+ rep = rp;\r
+ }\r
+\r
+ /** Test to see if a custom defined rule exists.\r
+ @see com.stevesoft.pat#define(java.lang.String,java.lang.String,Validator)\r
+ */\r
+ public static boolean isDefined(String nm)\r
+ {\r
+ return validators.get(nm) != null;\r
+ }\r
+\r
+ /** Removes a custom defined rule.\r
+ @see com.stevesoft.pat#define(java.lang.String,java.lang.String,Validator)\r
+ */\r
+ public static void undefine(String nm)\r
+ {\r
+ validators.remove(nm);\r
+ }\r
+\r
+ /** Defines a method to create a new rule. See test/deriv2.java\r
+ and test/deriv3.java for examples of how to use it. */\r
+ public static void define(String nm, String pat, Validator v)\r
+ {\r
+ v.pattern = pat;\r
+ validators.put(nm, v);\r
+ }\r
+\r
+ /** Defines a shorthand for a pattern. The pattern will be\r
+ invoked by a string that has the form "(??"+nm+")".\r
+ */\r
+ public static void define(String nm, String pat)\r
+ {\r
+ validators.put(nm, pat);\r
+ }\r
+\r
+ /** Get the current ReplaceRule. */\r
+ public ReplaceRule getReplaceRule()\r
+ {\r
+ return rep;\r
+ }\r
+\r
+ Replacer repr = null;\r
+ final Replacer _getReplacer()\r
+ {\r
+ return repr == null ? repr = new Replacer() : repr;\r
+ }\r
+\r
+ public Replacer getReplacer()\r
+ {\r
+ if (repr == null)\r
+ {\r
+ repr = new Replacer();\r
+ }\r
+ repr.rh.me = this;\r
+ repr.rh.prev = null;\r
+ return repr;\r
+ }\r
+\r
+ /** Replace the first occurence of this pattern in String s\r
+ according to the ReplaceRule.\r
+ @see com.stevesoft.pat.ReplaceRule\r
+ @see com.stevesoft.pat.Regex#getReplaceRule()\r
+ */\r
+ public String replaceFirst(String s)\r
+ {\r
+ return _getReplacer().replaceFirstRegion(s, this, 0, s.length()).toString();\r
+ }\r
+\r
+ /** Replace the first occurence of this pattern in String s\r
+ beginning with position pos according to the ReplaceRule.\r
+ @see com.stevesoft.pat.ReplaceRule\r
+ @see com.stevesoft.pat.Regex#getReplaceRule()\r
+ */\r
+ public String replaceFirstFrom(String s, int pos)\r
+ {\r
+ return _getReplacer().replaceFirstRegion(s, this, pos, s.length()).toString();\r
+ }\r
+\r
+ /** Replace the first occurence of this pattern in String s\r
+ beginning with position start and ending with end\r
+ according to the ReplaceRule.\r
+ @see com.stevesoft.pat.ReplaceRule\r
+ @see com.stevesoft.pat.Regex#getReplaceRule()\r
+ */\r
+ public String replaceFirstRegion(String s, int start, int end)\r
+ {\r
+ return _getReplacer().replaceFirstRegion(s, this, start, end).toString();\r
+ }\r
+\r
+ /** Replace all occurences of this pattern in String s\r
+ according to the ReplaceRule.\r
+ @see com.stevesoft.pat.ReplaceRule\r
+ @see com.stevesoft.pat.Regex#getReplaceRule()\r
+ */\r
+ public String replaceAll(String s)\r
+ {\r
+ return _getReplacer().replaceAllRegion(s, this, 0, s.length()).toString();\r
+ }\r
+\r
+ public StringLike replaceAll(StringLike s)\r
+ {\r
+ return _getReplacer().replaceAllRegion(s, this, 0, s.length());\r
+ }\r
+\r
+ /** Replace all occurences of this pattern in String s\r
+ beginning with position pos according to the ReplaceRule.\r
+ @see com.stevesoft.pat.ReplaceRule\r
+ @see com.stevesoft.pat.Regex#getReplaceRule()\r
+ */\r
+ public String replaceAllFrom(String s, int pos)\r
+ {\r
+ return _getReplacer().replaceAllRegion(s, this, pos, s.length()).toString();\r
+ }\r
+\r
+ /** Replace all occurences of this pattern in String s\r
+ beginning with position start and ending with end\r
+ according to the ReplaceRule.\r
+ @see com.stevesoft.pat.ReplaceRule\r
+ @see com.stevesoft.pat.Regex#getReplaceRule()\r
+ */\r
+ public String replaceAllRegion(String s, int start, int end)\r
+ {\r
+ return _getReplacer().replaceAllRegion(s, this, start, end).toString();\r
+ }\r
+\r
+ /** Essentially clones the Regex object */\r
+ public Regex(Regex r)\r
+ {\r
+ super( (RegRes) r);\r
+ dontMatchInQuotes = r.dontMatchInQuotes;\r
+ esc = r.esc;\r
+ ignoreCase = r.ignoreCase;\r
+ gFlag = r.gFlag;\r
+ if (r.rep == null)\r
+ {\r
+ rep = null;\r
+ }\r
+ else\r
+ {\r
+ rep = (ReplaceRule) r.rep.clone();\r
+ }\r
+ /* try {\r
+ compile(r.toString());\r
+ } catch(RegSyntax r_) {} */\r
+ thePattern = r.thePattern.clone(new Hashtable());\r
+ minMatch = r.minMatch;\r
+ skipper = r.skipper;\r
+ }\r
+\r
+ /** By default,\r
+ the escape character is the backslash, but you can\r
+ make it anything you want by setting this variable. */\r
+ public char esc = Pattern.ESC;\r
+ /** This method compiles a regular expression, making it\r
+ possible to call the search or matchAt methods.\r
+ @exception com.stevesoft.pat.RegSyntax\r
+ is thrown if a syntax error is encountered\r
+ in the pattern.\r
+ For example, "x{3,1}" or "*a" are not valid\r
+ patterns.\r
+ @see com.stevesoft.pat.Regex#search\r
+ @see com.stevesoft.pat.Regex#matchAt\r
+ */\r
+ public void compile(String prepat)\r
+ throws RegSyntax\r
+ {\r
+ String postpat = parsePerl.codify(prepat, true);\r
+ String pat = postpat == null ? prepat : postpat;\r
+ minMatch = null;\r
+ ignoreCase = false;\r
+ dontMatchInQuotes = false;\r
+ Rthings mk = new Rthings(this);\r
+ int offset = mk.val;\r
+ String newpat = pat;\r
+ thePattern = none;\r
+ p = null;\r
+ or = null;\r
+ minMatch = new patInt(0);\r
+ StrPos sp = new StrPos(pat, 0);\r
+ if (sp.incMatch("(?e="))\r
+ {\r
+ char newEsc = sp.c;\r
+ sp.inc();\r
+ if (sp.match(')'))\r
+ {\r
+ newpat = reEscape(pat.substring(6),\r
+ newEsc, Pattern.ESC);\r
+ }\r
+ }\r
+ else if (esc != Pattern.ESC)\r
+ {\r
+ newpat = reEscape(pat, esc, Pattern.ESC);\r
+ }\r
+ thePattern = _compile(newpat, mk);\r
+ numSubs_ = mk.val - offset;\r
+ mk.set(this);\r
+ }\r
+\r
+ /* If a Regex is compared against a Regex, a check is\r
+ done to see that the patterns are equal as well as\r
+ the most recent match. If a Regex is compare with\r
+ a RegRes, only the result of the most recent match\r
+ is compared. */\r
+ public boolean equals(Object o)\r
+ {\r
+ if (o instanceof Regex)\r
+ {\r
+ if (toString().equals(o.toString()))\r
+ {\r
+ return super.equals(o);\r
+ }\r
+ else\r
+ {\r
+ return false;\r
+ }\r
+ }\r
+ else\r
+ {\r
+ return super.equals(o);\r
+ }\r
+ }\r
+\r
+ /** A clone by any other name would smell as sweet. */\r
+ public Object clone()\r
+ {\r
+ return new Regex(this);\r
+ }\r
+\r
+ /** Return a clone of the underlying RegRes object. */\r
+ public RegRes result()\r
+ {\r
+ return (RegRes)super.clone();\r
+ }\r
+\r
+ // prep sets global variables of class\r
+ // Pattern so that it can access them\r
+ // during an attempt at a match\r
+ Pthings pt = new Pthings();\r
+ final Pthings prep(StringLike s)\r
+ {\r
+ //if(gFlag)\r
+ pt.lastPos = matchedTo();\r
+ if (pt.lastPos < 0)\r
+ {\r
+ pt.lastPos = 0;\r
+ }\r
+ if ( (s == null ? null : s.unwrap()) != (src == null ? null : s.unwrap()))\r
+ {\r
+ pt.lastPos = 0;\r
+ }\r
+ src = s;\r
+ pt.dotDoesntMatchCR = dotDoesntMatchCR && (!sFlag);\r
+ pt.mFlag = (mFlag | defaultMFlag);\r
+ pt.ignoreCase = ignoreCase;\r
+ pt.no_check = false;\r
+ if (pt.marks != null)\r
+ {\r
+ for (int i = 0; i < pt.marks.length; i++)\r
+ {\r
+ pt.marks[i] = -1;\r
+ }\r
+ }\r
+ pt.marks = null;\r
+ pt.nMarks = numSubs_;\r
+ pt.src = s;\r
+ if (dontMatchInQuotes)\r
+ {\r
+ setCbits(s, pt);\r
+ }\r
+ else\r
+ {\r
+ pt.cbits = null;\r
+ }\r
+ return pt;\r
+ }\r
+\r
+ /** Attempt to match a Pattern beginning\r
+ at a specified location within the string.\r
+ @see com.stevesoft.pat.Regex#search\r
+ */\r
+ public boolean matchAt(String s, int start_pos)\r
+ {\r
+ return _search(s, start_pos, start_pos);\r
+ }\r
+\r
+ /** Attempt to match a Pattern beginning\r
+ at a specified location within the StringLike.\r
+ @see com.stevesoft.pat.Regex#search\r
+ */\r
+ public boolean matchAt(StringLike s, int start_pos)\r
+ {\r
+ return _search(s, start_pos, start_pos);\r
+ }\r
+\r
+ /** Search through a String for the first\r
+ occurrence of a match.\r
+ @see com.stevesoft.pat.Regex#searchFrom\r
+ @see com.stevesoft.pat.Regex#matchAt\r
+ */\r
+ public boolean search(String s)\r
+ {\r
+ if (s == null)\r
+ {\r
+ throw new NullPointerException("Null String Given to Regex.search");\r
+ }\r
+ return _search(s, 0, s.length());\r
+ }\r
+\r
+ public boolean search(StringLike sl)\r
+ {\r
+ if (sl == null)\r
+ {\r
+ throw new NullPointerException("Null StringLike Given to Regex.search");\r
+ }\r
+ return _search(sl, 0, sl.length());\r
+ }\r
+\r
+ public boolean reverseSearch(String s)\r
+ {\r
+ if (s == null)\r
+ {\r
+ throw new NullPointerException("Null String Given to Regex.reverseSearch");\r
+ }\r
+ return _reverseSearch(s, 0, s.length());\r
+ }\r
+\r
+ public boolean reverseSearch(StringLike sl)\r
+ {\r
+ if (sl == null)\r
+ {\r
+ throw new NullPointerException(\r
+ "Null StringLike Given to Regex.reverseSearch");\r
+ }\r
+ return _reverseSearch(sl, 0, sl.length());\r
+ }\r
+\r
+ /** Search through a String for the first\r
+ occurence of a match, but start at position <pre>start</pre>*/\r
+ public boolean searchFrom(String s, int start)\r
+ {\r
+ if (s == null)\r
+ {\r
+ throw new NullPointerException("Null String Given to Regex.searchFrom");\r
+ }\r
+ return _search(s, start, s.length());\r
+ }\r
+\r
+ public boolean searchFrom(StringLike s, int start)\r
+ {\r
+ if (s == null)\r
+ {\r
+ throw new NullPointerException("Null String Given to Regex.searchFrom");\r
+ }\r
+ return _search(s, start, s.length());\r
+ }\r
+\r
+ /** Search through a region of a String\r
+ for the first occurence of a match. */\r
+ public boolean searchRegion(String s, int start, int end)\r
+ {\r
+ if (s == null)\r
+ {\r
+ throw new NullPointerException("Null String Given to Regex.searchRegion");\r
+ }\r
+ return _search(s, start, end);\r
+ }\r
+\r
+ /** Set this to change the default behavior of the "." pattern.\r
+ By default it now matches perl's behavior and fails to\r
+ match the '\n' character. */\r
+ public static boolean dotDoesntMatchCR = true;\r
+ StringLike gFlags;\r
+ int gFlagto = 0;\r
+ boolean gFlag = false;\r
+ /** Set the 'g' flag */\r
+ public void setGFlag(boolean b)\r
+ {\r
+ gFlag = b;\r
+ }\r
+\r
+ /** Get the state of the 'g' flag. */\r
+ public boolean getGFlag()\r
+ {\r
+ return gFlag;\r
+ }\r
+\r
+ boolean sFlag = false;\r
+ /** Get the state of the sFlag */\r
+ public boolean getSFlag()\r
+ {\r
+ return sFlag;\r
+ }\r
+\r
+ boolean mFlag = false;\r
+ /** Get the state of the sFlag */\r
+ public boolean getMFlag()\r
+ {\r
+ return mFlag;\r
+ }\r
+\r
+ final boolean _search(String s, int start, int end)\r
+ {\r
+ return _search(new StringWrap(s), start, end);\r
+ }\r
+\r
+ final boolean _search(StringLike s, int start, int end)\r
+ {\r
+ if (gFlag && gFlagto > 0 && gFlags != null && s.unwrap() == gFlags.unwrap())\r
+ {\r
+ start = gFlagto;\r
+ }\r
+ gFlags = null;\r
+\r
+ Pthings pt = prep(s);\r
+\r
+ int up = (minMatch == null ? end : end - minMatch.i);\r
+\r
+ if (up < start && end >= start)\r
+ {\r
+ up = start;\r
+ }\r
+\r
+ if (skipper == null)\r
+ {\r
+ for (int i = start; i <= up; i++)\r
+ {\r
+ charsMatched_ = thePattern.matchAt(s, i, pt);\r
+ if (charsMatched_ >= 0)\r
+ {\r
+ matchFrom_ = thePattern.mfrom;\r
+ marks = pt.marks;\r
+ gFlagto = matchFrom_ + charsMatched_;\r
+ gFlags = s;\r
+ return didMatch_ = true;\r
}\r
- BitSet bs = new BitSet(s.length());\r
- char qc = ' ';\r
- boolean setBit = false;\r
- for(int i=0;i<s.length();i++) {\r
- if(setBit) bs.set(i);\r
- char c = s.charAt(i);\r
- if(!setBit && c == '"') {\r
- qc = c;\r
- setBit = true;\r
- bs.set(i);\r
- } else if(!setBit && c == '\'') {\r
- qc = c;\r
- setBit = true;\r
- bs.set(i);\r
- } else if(setBit && c == qc) {\r
- setBit = false;\r
- } else if(setBit && c == '\\' && i+1<s.length()) {\r
- i++;\r
- if(setBit) bs.set(i);\r
- }\r
+ }\r
+ }\r
+ else\r
+ {\r
+ pt.no_check = true;\r
+ for (int i = start; i <= up; i++)\r
+ {\r
+ i = skipper.find(src, i, up);\r
+ if (i < 0)\r
+ {\r
+ charsMatched_ = matchFrom_ = -1;\r
+ return didMatch_ = false;\r
}\r
- pt.cbits = lastbs = bs;\r
- lasts = s;\r
- }\r
-\r
- // Wanted user to over-ride this in alpha version,\r
- // but it wasn't really necessary because of this trick:\r
- Regex newRegex() {\r
- try {\r
- return (Regex)getClass().newInstance();\r
- } catch(InstantiationException ie) {\r
- return null;\r
- } catch(IllegalAccessException iae) {\r
- return null;\r
+ charsMatched_ = thePattern.matchAt(s, i, pt);\r
+ if (charsMatched_ >= 0)\r
+ {\r
+ matchFrom_ = thePattern.mfrom;\r
+ marks = pt.marks;\r
+ gFlagto = matchFrom_ + charsMatched_;\r
+ gFlags = s;\r
+ return didMatch_ = true;\r
}\r
+ }\r
}\r
- /** Only needed for creating your own extensions of\r
- Regex. This method adds the next Pattern in the chain\r
- of patterns or sets the Pattern if it is the first call. */\r
- protected void add(Pattern p2) {\r
- if(p == null)\r
- p = p2;\r
- else {\r
- p.add(p2);\r
- p2 = p;\r
+ return didMatch_ = false;\r
+ }\r
+\r
+ /*final boolean _search(LongStringLike s,long start,long end) {\r
+ if(gFlag && gFlagto > 0 && s==gFlags)\r
+ start = gFlagto;\r
+ gFlags = null;\r
+\r
+ Pthings pt=prep(s);\r
+\r
+ int up = end;//(minMatch == null ? end : end-minMatch.i);\r
+\r
+ if(up < start && end >= start) up = start;\r
+\r
+ if(skipper == null) {\r
+ for(long i=start;i<=up;i++) {\r
+ charsMatched_ = thePattern.matchAt(s,i,pt);\r
+ if(charsMatched_ >= 0) {\r
+ matchFrom_ = thePattern.mfrom;\r
+ marks = pt.marks;\r
+ gFlagto = matchFrom_+charsMatched_;\r
+ return didMatch_=true;\r
+ }\r
+ }\r
+ } else {\r
+ pt.no_check = true;\r
+ for(long i=start;i<=up;i++) {\r
+ i = skipper.find(src,i,up);\r
+ if(i<0) {\r
+ charsMatched_ = matchFrom_ = -1;\r
+ return didMatch_ = false;\r
+ }\r
+ charsMatched_ = thePattern.matchAt(s,i,pt);\r
+ if(charsMatched_ >= 0) {\r
+ matchFrom_ = thePattern.mfrom;\r
+ marks = pt.marks;\r
+ gFlagto = matchFrom_+charsMatched_;\r
+ gFlags = s;\r
+ return didMatch_=true;\r
+ } else {\r
+ i = s.adjustIndex(i);\r
+ up = s.adjustEnd(i);\r
+ }\r
+ }\r
+ }\r
+ return didMatch_=false;\r
+ }*/\r
+\r
+ boolean _reverseSearch(String s, int start, int end)\r
+ {\r
+ return _reverseSearch(new StringWrap(s), start, end);\r
+ }\r
+\r
+ boolean _reverseSearch(StringLike s, int start, int end)\r
+ {\r
+ if (gFlag && gFlagto > 0 && s.unwrap() == gFlags.unwrap())\r
+ {\r
+ end = gFlagto;\r
+ }\r
+ gFlags = null;\r
+ Pthings pt = prep(s);\r
+ for (int i = end; i >= start; i--)\r
+ {\r
+ charsMatched_ = thePattern.matchAt(s, i, pt);\r
+ if (charsMatched_ >= 0)\r
+ {\r
+ matchFrom_ = thePattern.mfrom;\r
+ marks = pt.marks;\r
+ gFlagto = matchFrom_ - 1;\r
+ gFlags = s;\r
+ return didMatch_ = true;\r
+ }\r
+ }\r
+ return didMatch_ = false;\r
+ }\r
+\r
+ // This routine sets the cbits variable\r
+ // of class Pattern. Cbits is true for\r
+ // the bit corresponding to a character inside\r
+ // a set of quotes.\r
+ static StringLike lasts = null;\r
+ static BitSet lastbs = null;\r
+ static void setCbits(StringLike s, Pthings pt)\r
+ {\r
+ if (s == lasts)\r
+ {\r
+ pt.cbits = lastbs;\r
+ return;\r
+ }\r
+ BitSet bs = new BitSet(s.length());\r
+ char qc = ' ';\r
+ boolean setBit = false;\r
+ for (int i = 0; i < s.length(); i++)\r
+ {\r
+ if (setBit)\r
+ {\r
+ bs.set(i);\r
+ }\r
+ char c = s.charAt(i);\r
+ if (!setBit && c == '"')\r
+ {\r
+ qc = c;\r
+ setBit = true;\r
+ bs.set(i);\r
+ }\r
+ else if (!setBit && c == '\'')\r
+ {\r
+ qc = c;\r
+ setBit = true;\r
+ bs.set(i);\r
+ }\r
+ else if (setBit && c == qc)\r
+ {\r
+ setBit = false;\r
+ }\r
+ else if (setBit && c == '\\' && i + 1 < s.length())\r
+ {\r
+ i++;\r
+ if (setBit)\r
+ {\r
+ bs.set(i);\r
}\r
+ }\r
}\r
+ pt.cbits = lastbs = bs;\r
+ lasts = s;\r
+ }\r
\r
- /** You only need to use this method if you are creating\r
- your own extentions to Regex.\r
- compile1 compiles one Pattern element, it can be\r
- over-ridden to allow the Regex compiler to understand\r
- new syntax. See deriv.java for an example. This routine\r
- is the heart of class Regex. Rthings has one integer\r
- member called intValue, it is used to keep track of the number\r
- of ()'s in the Pattern.\r
- @exception com.stevesoft.pat.RegSyntax is thrown when a nonsensensical\r
- pattern is supplied. For example, a pattern beginning\r
- with *. */\r
- protected void compile1(StrPos sp,Rthings mk) throws RegSyntax {\r
- if(sp.match('[')) {\r
- sp.inc();\r
- add(matchBracket(sp));\r
- } else if(sp.match('|')) {\r
- if(or == null)\r
- or = new Or();\r
- if(p == null) p=new NullPattern();\r
- or.addOr(p);\r
- p = null;\r
- } else if(sp.incMatch("(?<")) {\r
- patInt i = sp.getPatInt();\r
- if(i==null) RegSyntaxError.endItAll("No int after (?<");\r
- add(new Backup(i.intValue()));\r
- if(!sp.match(')')) RegSyntaxError.endItAll("No ) after (?<");\r
- } else if(sp.incMatch("(?>")) {\r
- patInt i = sp.getPatInt();\r
- if(i==null) RegSyntaxError.endItAll("No int after (?>");\r
- add(new Backup(-i.intValue()));\r
- if(!sp.match(')')) RegSyntaxError.endItAll("No ) after (?<");\r
- } else if(sp.incMatch("(?@")) {\r
- char op = sp.c;\r
- sp.inc();\r
- char cl = sp.c;\r
- sp.inc();\r
- if(!sp.match(')'))\r
- RegSyntaxError.endItAll(\r
- "(?@ does not have closing paren");\r
- add(new Group(op,cl));\r
- } else if(sp.incMatch("(?#")) {\r
- while(!sp.match(')'))\r
- sp.inc();\r
- } else if(sp.dontMatch && sp.c == 'w') {\r
- //Regex r = new Regex();\r
- //r._compile("[a-zA-Z0-9_]",mk);\r
- //add(new Goop("\\w",r.thePattern));\r
- Bracket b = new Bracket(false);\r
- b.addOr(new Range('a','z'));\r
- b.addOr(new Range('A','Z'));\r
- b.addOr(new Range('0','9'));\r
- b.addOr(new oneChar('_'));\r
- add(b);\r
- } else if(sp.dontMatch && sp.c == 'G') {\r
- add(new BackG());\r
- } else if(sp.dontMatch && sp.c == 's') {\r
- //Regex r = new Regex();\r
- //r._compile("[ \t\n\r\b]",mk);\r
- //add(new Goop("\\s",r.thePattern));\r
- Bracket b = new Bracket(false);\r
- b.addOr(new oneChar((char)32));\r
- b.addOr(new Range((char)8,(char)10));\r
- b.addOr(new oneChar((char)13));\r
- add(b);\r
- } else if(sp.dontMatch && sp.c == 'd') {\r
- //Regex r = new Regex();\r
- //r._compile("[0-9]",mk);\r
- //add(new Goop("\\d",r.thePattern));\r
- Range digit = new Range('0','9');\r
- digit.printBrackets = true;\r
- add(digit);\r
- } else if(sp.dontMatch && sp.c == 'W') {\r
- //Regex r = new Regex();\r
- //r._compile("[^a-zA-Z0-9_]",mk);\r
- //add(new Goop("\\W",r.thePattern));\r
- Bracket b = new Bracket(true);\r
- b.addOr(new Range('a','z'));\r
- b.addOr(new Range('A','Z'));\r
- b.addOr(new Range('0','9'));\r
- b.addOr(new oneChar('_'));\r
- add(b);\r
- } else if(sp.dontMatch && sp.c == 'S') {\r
- //Regex r = new Regex();\r
- //r._compile("[^ \t\n\r\b]",mk);\r
- //add(new Goop("\\S",r.thePattern));\r
- Bracket b = new Bracket(true);\r
- b.addOr(new oneChar((char)32));\r
- b.addOr(new Range((char)8,(char)10));\r
- b.addOr(new oneChar((char)13));\r
- add(b);\r
- } else if(sp.dontMatch && sp.c == 'D') {\r
- //Regex r = new Regex();\r
- //r._compile("[^0-9]",mk);\r
- //add(new Goop("\\D",r.thePattern));\r
- Bracket b = new Bracket(true);\r
- b.addOr(new Range('0','9'));\r
- add(b);\r
- } else if(sp.dontMatch && sp.c == 'B') {\r
- Regex r = new Regex();\r
- r._compile("(?!"+back_slash+"b)",mk);\r
- add(r.thePattern);\r
- } else if(isOctalString(sp)) {\r
- int d = sp.c - '0';\r
- sp.inc();\r
- d = 8*d + sp.c - '0';\r
- StrPos sp2 = new StrPos(sp);\r
- sp2.inc();\r
- if(isOctalDigit(sp2,false)) {\r
- sp.inc();\r
- d = 8*d + sp.c - '0';\r
- }\r
- add(new oneChar((char)d));\r
- } else if(sp.dontMatch && sp.c >= '1' && sp.c <= '9') {\r
- int iv = sp.c-'0';\r
- StrPos s2 = new StrPos(sp);\r
- s2.inc();\r
- if(!s2.dontMatch && s2.c >= '0' && s2.c <= '9') {\r
- iv = 10*iv+(s2.c-'0');\r
- sp.inc();\r
- }\r
- add(new BackMatch(iv));\r
- } else if(sp.dontMatch && sp.c == 'b') {\r
- add(new Boundary());\r
- } else if(sp.match('\b')) {\r
- add(new Boundary());\r
- } else if(sp.match('$')) {\r
- add(new End(true));\r
- } else if(sp.dontMatch && sp.c == 'Z') {\r
- add(new End(false));\r
- } else if(sp.match('.')) {\r
- add(new Any());\r
- } else if(sp.incMatch("(??")) {\r
- StringBuffer sb = new StringBuffer();\r
- StringBuffer sb2 = new StringBuffer();\r
- while(!sp.match(')') && !sp.match(':')) {\r
- sb.append(sp.c);\r
- sp.inc();\r
- }\r
- if(sp.incMatch(":")) {\r
- while(!sp.match(')')) {\r
- sb2.append(sp.c);\r
- sp.inc();\r
- }\r
- }\r
- String sbs = sb.toString();\r
- if(validators.get(sbs) instanceof String) {\r
- String pat = (String)validators.get(sbs);\r
- Regex r = newRegex();\r
- Rthings rth = new Rthings(this);\r
- rth.noBackRefs = true;\r
- r._compile(pat,rth);\r
- add(r.thePattern);\r
- } else {\r
- Custom cm = new Custom(sb.toString());\r
- if(cm.v != null) {\r
- Validator v2 = cm.v.arg(sb2.toString());\r
- if(v2 != null) {\r
- v2.argsave = sb2.toString();\r
- String p = cm.v.pattern;\r
- cm.v = v2;\r
- v2.pattern = p;\r
- }\r
- Regex r = newRegex();\r
- Rthings rth = new Rthings(this);\r
- rth.noBackRefs = true;\r
- r._compile(cm.v.pattern,rth);\r
- cm.sub = r.thePattern;\r
- cm.sub.add(new CustomEndpoint(cm));\r
- cm.sub.setParent(cm);\r
- add(cm);\r
- }\r
- }\r
- } else if(sp.match('(')) {\r
- mk.parenLevel++;\r
- Regex r = newRegex();\r
- // r.or = new Or();\r
- sp.inc();\r
- if(sp.incMatch("?:")) {\r
- r.or = new Or();\r
- } else if(sp.incMatch("?=")) {\r
- r.or = new lookAhead(false);\r
- } else if(sp.incMatch("?!")) {\r
- r.or = new lookAhead(true);\r
- } else if(sp.match('?')) {\r
- sp.inc();\r
- do {\r
- if(sp.c=='i')mk.ignoreCase = true;\r
- if(sp.c=='Q')mk.dontMatchInQuotes = true;\r
- if(sp.c=='o')mk.optimizeMe = true;\r
- if(sp.c=='g')mk.gFlag = true;\r
- if(sp.c=='s')mk.sFlag = true;\r
- if(sp.c=='m')mk.mFlag = true;\r
- sp.inc();\r
- } while(!sp.match(')') && !sp.eos);\r
- r = null;\r
- mk.parenLevel--;\r
- if(sp.eos) //throw new RegSyntax\r
- RegSyntaxError.endItAll("Unclosed ()");\r
- } else { // just ordinary parenthesis\r
- r.or = mk.noBackRefs ? new Or() : new OrMark(mk.val++);\r
- }\r
- if(r != null) add(r._compile(sp,mk));\r
- } else if(sp.match('^')) {\r
- add(new Start(true));\r
- } else if(sp.dontMatch && sp.c=='A') {\r
- add(new Start(false));\r
- } else if(sp.match('*')) {\r
- addMulti(new patInt(0),new patInf());\r
- } else if(sp.match('+')) {\r
- addMulti(new patInt(1),new patInf());\r
- } else if(sp.match('?')) {\r
- addMulti(new patInt(0),new patInt(1));\r
- } else if(sp.match('{')) {\r
- boolean bad = false;\r
- StrPos sp2 = new StrPos(sp);\r
- //StringBuffer sb = new StringBuffer();\r
- sp.inc();\r
- patInt i1 = sp.getPatInt();\r
- patInt i2 = null;\r
- if(sp.match('}')) {\r
- i2 = i1;\r
- } else {\r
- if(!sp.match(','))/*\r
- RegSyntaxError.endItAll(\r
- "String \"{"+i2+\r
- "\" should be followed with , or }");*/\r
- bad = true;\r
- sp.inc();\r
- if(sp.match('}'))\r
- i2 = new patInf();\r
- else\r
- i2 = sp.getPatInt();\r
- }\r
- if(i1 == null || i2 == null) /*\r
- throw new RegSyntax("Badly formatted Multi: "\r
- +"{"+i1+","+i2+"}"); */ bad = true;\r
- if(bad) {\r
- sp.dup(sp2);\r
- add(new oneChar(sp.c));\r
- } else\r
- addMulti(i1,i2);\r
- } else if(sp.escMatch('x') && next2Hex(sp)) {\r
- sp.inc();\r
- int d = getHexDigit(sp);\r
- sp.inc();\r
- d = 16*d + getHexDigit(sp);\r
- add(new oneChar((char)d));\r
- } else if(sp.escMatch('c')) {\r
- sp.inc();\r
- if(sp.c < Ctrl.cmap.length)\r
- add(new oneChar(Ctrl.cmap[sp.c]));\r
- else\r
- add(new oneChar(sp.c));\r
- } else if(sp.escMatch('f')) {\r
- add(new oneChar((char)12));\r
- } else if(sp.escMatch('a')) {\r
- add(new oneChar((char)7));\r
- } else if(sp.escMatch('t')) {\r
- add(new oneChar('\t'));\r
- } else if(sp.escMatch('n')) {\r
- add(new oneChar('\n'));\r
- } else if(sp.escMatch('r')) {\r
- add(new oneChar('\r'));\r
- } else if(sp.escMatch('b')) {\r
- add(new oneChar('\b'));\r
- } else if(sp.escMatch('e')) {\r
- add(new oneChar((char)27));\r
- } else {\r
- add(new oneChar(sp.c));\r
- if(sp.match(')'))\r
- RegSyntaxError.endItAll("Unmatched right paren in pattern");\r
- }\r
+ // Wanted user to over-ride this in alpha version,\r
+ // but it wasn't really necessary because of this trick:\r
+ Regex newRegex()\r
+ {\r
+ try\r
+ {\r
+ return (Regex) getClass().newInstance();\r
+ }\r
+ catch (InstantiationException ie)\r
+ {\r
+ return null;\r
+ }\r
+ catch (IllegalAccessException iae)\r
+ {\r
+ return null;\r
}\r
+ }\r
\r
- // compiles all Pattern elements, internal method\r
- private Pattern _compile(String pat,Rthings mk) throws RegSyntax {\r
- minMatch = null;\r
- sFlag = mFlag = ignoreCase = gFlag = false;\r
- StrPos sp = new StrPos(pat,0);\r
- thePattern = _compile(sp,mk);\r
- pt.marks = null;\r
- return thePattern;\r
+ /** Only needed for creating your own extensions of\r
+ Regex. This method adds the next Pattern in the chain\r
+ of patterns or sets the Pattern if it is the first call. */\r
+ protected void add(Pattern p2)\r
+ {\r
+ if (p == null)\r
+ {\r
+ p = p2;\r
+ }\r
+ else\r
+ {\r
+ p.add(p2);\r
+ p2 = p;\r
}\r
+ }\r
\r
- Pattern p = null;\r
- Or or = null;\r
- Pattern _compile(StrPos sp,Rthings mk) throws RegSyntax {\r
- while(!(sp.eos || (or != null && sp.match(')')) )) {\r
- compile1(sp,mk);\r
- sp.inc();\r
+ /** You only need to use this method if you are creating\r
+ your own extentions to Regex.\r
+ compile1 compiles one Pattern element, it can be\r
+ over-ridden to allow the Regex compiler to understand\r
+ new syntax. See deriv.java for an example. This routine\r
+ is the heart of class Regex. Rthings has one integer\r
+ member called intValue, it is used to keep track of the number\r
+ of ()'s in the Pattern.\r
+ @exception com.stevesoft.pat.RegSyntax is thrown when a nonsensensical\r
+ pattern is supplied. For example, a pattern beginning\r
+ with *. */\r
+ protected void compile1(StrPos sp, Rthings mk)\r
+ throws RegSyntax\r
+ {\r
+ if (sp.match('['))\r
+ {\r
+ sp.inc();\r
+ add(matchBracket(sp));\r
+ }\r
+ else if (sp.match('|'))\r
+ {\r
+ if (or == null)\r
+ {\r
+ or = new Or();\r
+ }\r
+ if (p == null)\r
+ {\r
+ p = new NullPattern();\r
+ }\r
+ or.addOr(p);\r
+ p = null;\r
+ }\r
+ else if (sp.incMatch("(?<"))\r
+ {\r
+ patInt i = sp.getPatInt();\r
+ if (i == null)\r
+ {\r
+ RegSyntaxError.endItAll("No int after (?<");\r
+ }\r
+ add(new Backup(i.intValue()));\r
+ if (!sp.match(')'))\r
+ {\r
+ RegSyntaxError.endItAll("No ) after (?<");\r
+ }\r
+ }\r
+ else if (sp.incMatch("(?>"))\r
+ {\r
+ patInt i = sp.getPatInt();\r
+ if (i == null)\r
+ {\r
+ RegSyntaxError.endItAll("No int after (?>");\r
+ }\r
+ add(new Backup( -i.intValue()));\r
+ if (!sp.match(')'))\r
+ {\r
+ RegSyntaxError.endItAll("No ) after (?<");\r
+ }\r
+ }\r
+ else if (sp.incMatch("(?@"))\r
+ {\r
+ char op = sp.c;\r
+ sp.inc();\r
+ char cl = sp.c;\r
+ sp.inc();\r
+ if (!sp.match(')'))\r
+ {\r
+ RegSyntaxError.endItAll(\r
+ "(?@ does not have closing paren");\r
+ }\r
+ add(new Group(op, cl));\r
+ }\r
+ else if (sp.incMatch("(?#"))\r
+ {\r
+ while (!sp.match(')'))\r
+ {\r
+ sp.inc();\r
+ }\r
+ }\r
+ else if (sp.dontMatch && sp.c == 'w')\r
+ {\r
+ //Regex r = new Regex();\r
+ //r._compile("[a-zA-Z0-9_]",mk);\r
+ //add(new Goop("\\w",r.thePattern));\r
+ Bracket b = new Bracket(false);\r
+ b.addOr(new Range('a', 'z'));\r
+ b.addOr(new Range('A', 'Z'));\r
+ b.addOr(new Range('0', '9'));\r
+ b.addOr(new oneChar('_'));\r
+ add(b);\r
+ }\r
+ else if (sp.dontMatch && sp.c == 'G')\r
+ {\r
+ add(new BackG());\r
+ }\r
+ else if (sp.dontMatch && sp.c == 's')\r
+ {\r
+ //Regex r = new Regex();\r
+ //r._compile("[ \t\n\r\b]",mk);\r
+ //add(new Goop("\\s",r.thePattern));\r
+ Bracket b = new Bracket(false);\r
+ b.addOr(new oneChar( (char) 32));\r
+ b.addOr(new Range( (char) 8, (char) 10));\r
+ b.addOr(new oneChar( (char) 13));\r
+ add(b);\r
+ }\r
+ else if (sp.dontMatch && sp.c == 'd')\r
+ {\r
+ //Regex r = new Regex();\r
+ //r._compile("[0-9]",mk);\r
+ //add(new Goop("\\d",r.thePattern));\r
+ Range digit = new Range('0', '9');\r
+ digit.printBrackets = true;\r
+ add(digit);\r
+ }\r
+ else if (sp.dontMatch && sp.c == 'W')\r
+ {\r
+ //Regex r = new Regex();\r
+ //r._compile("[^a-zA-Z0-9_]",mk);\r
+ //add(new Goop("\\W",r.thePattern));\r
+ Bracket b = new Bracket(true);\r
+ b.addOr(new Range('a', 'z'));\r
+ b.addOr(new Range('A', 'Z'));\r
+ b.addOr(new Range('0', '9'));\r
+ b.addOr(new oneChar('_'));\r
+ add(b);\r
+ }\r
+ else if (sp.dontMatch && sp.c == 'S')\r
+ {\r
+ //Regex r = new Regex();\r
+ //r._compile("[^ \t\n\r\b]",mk);\r
+ //add(new Goop("\\S",r.thePattern));\r
+ Bracket b = new Bracket(true);\r
+ b.addOr(new oneChar( (char) 32));\r
+ b.addOr(new Range( (char) 8, (char) 10));\r
+ b.addOr(new oneChar( (char) 13));\r
+ add(b);\r
+ }\r
+ else if (sp.dontMatch && sp.c == 'D')\r
+ {\r
+ //Regex r = new Regex();\r
+ //r._compile("[^0-9]",mk);\r
+ //add(new Goop("\\D",r.thePattern));\r
+ Bracket b = new Bracket(true);\r
+ b.addOr(new Range('0', '9'));\r
+ add(b);\r
+ }\r
+ else if (sp.dontMatch && sp.c == 'B')\r
+ {\r
+ Regex r = new Regex();\r
+ r._compile("(?!" + back_slash + "b)", mk);\r
+ add(r.thePattern);\r
+ }\r
+ else if (isOctalString(sp))\r
+ {\r
+ int d = sp.c - '0';\r
+ sp.inc();\r
+ d = 8 * d + sp.c - '0';\r
+ StrPos sp2 = new StrPos(sp);\r
+ sp2.inc();\r
+ if (isOctalDigit(sp2, false))\r
+ {\r
+ sp.inc();\r
+ d = 8 * d + sp.c - '0';\r
+ }\r
+ add(new oneChar( (char) d));\r
+ }\r
+ else if (sp.dontMatch && sp.c >= '1' && sp.c <= '9')\r
+ {\r
+ int iv = sp.c - '0';\r
+ StrPos s2 = new StrPos(sp);\r
+ s2.inc();\r
+ if (!s2.dontMatch && s2.c >= '0' && s2.c <= '9')\r
+ {\r
+ iv = 10 * iv + (s2.c - '0');\r
+ sp.inc();\r
+ }\r
+ add(new BackMatch(iv));\r
+ }\r
+ else if (sp.dontMatch && sp.c == 'b')\r
+ {\r
+ add(new Boundary());\r
+ }\r
+ else if (sp.match('\b'))\r
+ {\r
+ add(new Boundary());\r
+ }\r
+ else if (sp.match('$'))\r
+ {\r
+ add(new End(true));\r
+ }\r
+ else if (sp.dontMatch && sp.c == 'Z')\r
+ {\r
+ add(new End(false));\r
+ }\r
+ else if (sp.match('.'))\r
+ {\r
+ add(new Any());\r
+ }\r
+ else if (sp.incMatch("(??"))\r
+ {\r
+ StringBuffer sb = new StringBuffer();\r
+ StringBuffer sb2 = new StringBuffer();\r
+ while (!sp.match(')') && !sp.match(':'))\r
+ {\r
+ sb.append(sp.c);\r
+ sp.inc();\r
+ }\r
+ if (sp.incMatch(":"))\r
+ {\r
+ while (!sp.match(')'))\r
+ {\r
+ sb2.append(sp.c);\r
+ sp.inc();\r
+ }\r
+ }\r
+ String sbs = sb.toString();\r
+ if (validators.get(sbs) instanceof String)\r
+ {\r
+ String pat = (String) validators.get(sbs);\r
+ Regex r = newRegex();\r
+ Rthings rth = new Rthings(this);\r
+ rth.noBackRefs = true;\r
+ r._compile(pat, rth);\r
+ add(r.thePattern);\r
+ }\r
+ else\r
+ {\r
+ Custom cm = new Custom(sb.toString());\r
+ if (cm.v != null)\r
+ {\r
+ Validator v2 = cm.v.arg(sb2.toString());\r
+ if (v2 != null)\r
+ {\r
+ v2.argsave = sb2.toString();\r
+ String p = cm.v.pattern;\r
+ cm.v = v2;\r
+ v2.pattern = p;\r
+ }\r
+ Regex r = newRegex();\r
+ Rthings rth = new Rthings(this);\r
+ rth.noBackRefs = true;\r
+ r._compile(cm.v.pattern, rth);\r
+ cm.sub = r.thePattern;\r
+ cm.sub.add(new CustomEndpoint(cm));\r
+ cm.sub.setParent(cm);\r
+ add(cm);\r
+ }\r
+ }\r
+ }\r
+ else if (sp.match('('))\r
+ {\r
+ mk.parenLevel++;\r
+ Regex r = newRegex();\r
+ // r.or = new Or();\r
+ sp.inc();\r
+ if (sp.incMatch("?:"))\r
+ {\r
+ r.or = new Or();\r
+ }\r
+ else if (sp.incMatch("?="))\r
+ {\r
+ r.or = new lookAhead(false);\r
+ }\r
+ else if (sp.incMatch("?!"))\r
+ {\r
+ r.or = new lookAhead(true);\r
+ }\r
+ else if (sp.match('?'))\r
+ {\r
+ sp.inc();\r
+ do\r
+ {\r
+ if (sp.c == 'i')\r
+ {\r
+ mk.ignoreCase = true;\r
+ }\r
+ if (sp.c == 'Q')\r
+ {\r
+ mk.dontMatchInQuotes = true;\r
+ }\r
+ if (sp.c == 'o')\r
+ {\r
+ mk.optimizeMe = true;\r
+ }\r
+ if (sp.c == 'g')\r
+ {\r
+ mk.gFlag = true;\r
+ }\r
+ if (sp.c == 's')\r
+ {\r
+ mk.sFlag = true;\r
+ }\r
+ if (sp.c == 'm')\r
+ {\r
+ mk.mFlag = true;\r
+ }\r
+ sp.inc();\r
}\r
- if(sp.match(')')) mk.parenLevel--;\r
- else if(sp.eos && mk.parenLevel != 0) {\r
- RegSyntaxError.endItAll("Unclosed Parenthesis! lvl="+mk.parenLevel);\r
- } if(or != null) {\r
- if(p == null) p = new NullPattern();\r
- or.addOr(p);\r
- return or;\r
+ while (!sp.match(')') && !sp.eos);\r
+ r = null;\r
+ mk.parenLevel--;\r
+ if (sp.eos) //throw new RegSyntax\r
+ {\r
+ RegSyntaxError.endItAll("Unclosed ()");\r
}\r
- return p==null ? new NullPattern() : p;\r
+ }\r
+ else\r
+ { // just ordinary parenthesis\r
+ r.or = mk.noBackRefs ? new Or() : new OrMark(mk.val++);\r
+ }\r
+ if (r != null)\r
+ {\r
+ add(r._compile(sp, mk));\r
+ }\r
+ }\r
+ else if (sp.match('^'))\r
+ {\r
+ add(new Start(true));\r
+ }\r
+ else if (sp.dontMatch && sp.c == 'A')\r
+ {\r
+ add(new Start(false));\r
+ }\r
+ else if (sp.match('*'))\r
+ {\r
+ addMulti(new patInt(0), new patInf());\r
+ }\r
+ else if (sp.match('+'))\r
+ {\r
+ addMulti(new patInt(1), new patInf());\r
+ }\r
+ else if (sp.match('?'))\r
+ {\r
+ addMulti(new patInt(0), new patInt(1));\r
+ }\r
+ else if (sp.match('{'))\r
+ {\r
+ boolean bad = false;\r
+ StrPos sp2 = new StrPos(sp);\r
+ //StringBuffer sb = new StringBuffer();\r
+ sp.inc();\r
+ patInt i1 = sp.getPatInt();\r
+ patInt i2 = null;\r
+ if (sp.match('}'))\r
+ {\r
+ i2 = i1;\r
+ }\r
+ else\r
+ {\r
+ if (!sp.match(','))\r
+ {\r
+ /*\r
+ RegSyntaxError.endItAll(\r
+ "String \"{"+i2+\r
+ "\" should be followed with , or }");\r
+ */\r
+ bad = true;\r
+ }\r
+ sp.inc();\r
+ if (sp.match('}'))\r
+ {\r
+ i2 = new patInf();\r
+ }\r
+ else\r
+ {\r
+ i2 = sp.getPatInt();\r
+ }\r
+ }\r
+ if (i1 == null || i2 == null)\r
+ {\r
+ /*\r
+ throw new RegSyntax("Badly formatted Multi: "\r
+ +"{"+i1+","+i2+"}"); */\r
+ bad = true;\r
+ }\r
+ if (bad)\r
+ {\r
+ sp.dup(sp2);\r
+ add(new oneChar(sp.c));\r
+ }\r
+ else\r
+ {\r
+ addMulti(i1, i2);\r
+ }\r
+ }\r
+ else if (sp.escMatch('x') && next2Hex(sp))\r
+ {\r
+ sp.inc();\r
+ int d = getHexDigit(sp);\r
+ sp.inc();\r
+ d = 16 * d + getHexDigit(sp);\r
+ add(new oneChar( (char) d));\r
+ }\r
+ else if (sp.escMatch('c'))\r
+ {\r
+ sp.inc();\r
+ if (sp.c < Ctrl.cmap.length)\r
+ {\r
+ add(new oneChar(Ctrl.cmap[sp.c]));\r
+ }\r
+ else\r
+ {\r
+ add(new oneChar(sp.c));\r
+ }\r
+ }\r
+ else if (sp.escMatch('f'))\r
+ {\r
+ add(new oneChar( (char) 12));\r
+ }\r
+ else if (sp.escMatch('a'))\r
+ {\r
+ add(new oneChar( (char) 7));\r
+ }\r
+ else if (sp.escMatch('t'))\r
+ {\r
+ add(new oneChar('\t'));\r
+ }\r
+ else if (sp.escMatch('n'))\r
+ {\r
+ add(new oneChar('\n'));\r
+ }\r
+ else if (sp.escMatch('r'))\r
+ {\r
+ add(new oneChar('\r'));\r
+ }\r
+ else if (sp.escMatch('b'))\r
+ {\r
+ add(new oneChar('\b'));\r
+ }\r
+ else if (sp.escMatch('e'))\r
+ {\r
+ add(new oneChar( (char) 27));\r
+ }\r
+ else\r
+ {\r
+ add(new oneChar(sp.c));\r
+ if (sp.match(')'))\r
+ {\r
+ RegSyntaxError.endItAll("Unmatched right paren in pattern");\r
+ }\r
}\r
+ }\r
\r
- // add a multi object to the end of the chain\r
- // which applies to the last object\r
- void addMulti(patInt i1,patInt i2) throws RegSyntax {\r
- Pattern last,last2;\r
- for(last = p;last != null && last.next != null;last=last.next)\r
- ;\r
- if(last == null || last == p)\r
- last2 = null;\r
- else\r
- for(last2 = p;last2.next != last;last2=last2.next)\r
- ;\r
- if(last instanceof Multi && i1.intValue()==0 &&\r
- i2.intValue()==1)\r
- ((Multi)last).matchFewest = true;\r
- else if(last instanceof FastMulti && i1.intValue()==0 &&\r
- i2.intValue()==1)\r
- ((FastMulti)last).matchFewest = true;\r
- else if(last instanceof DotMulti && i1.intValue()==0 &&\r
- i2.intValue()==1)\r
- ((DotMulti)last).matchFewest = true;\r
- else if(last instanceof Multi\r
+ // compiles all Pattern elements, internal method\r
+ private Pattern _compile(String pat, Rthings mk)\r
+ throws RegSyntax\r
+ {\r
+ minMatch = null;\r
+ sFlag = mFlag = ignoreCase = gFlag = false;\r
+ StrPos sp = new StrPos(pat, 0);\r
+ thePattern = _compile(sp, mk);\r
+ pt.marks = null;\r
+ return thePattern;\r
+ }\r
+\r
+ Pattern p = null;\r
+ Or or = null;\r
+ Pattern _compile(StrPos sp, Rthings mk)\r
+ throws RegSyntax\r
+ {\r
+ while (! (sp.eos || (or != null && sp.match(')'))))\r
+ {\r
+ compile1(sp, mk);\r
+ sp.inc();\r
+ }\r
+ if (sp.match(')'))\r
+ {\r
+ mk.parenLevel--;\r
+ }\r
+ else if (sp.eos && mk.parenLevel != 0)\r
+ {\r
+ RegSyntaxError.endItAll("Unclosed Parenthesis! lvl=" + mk.parenLevel);\r
+ }\r
+ if (or != null)\r
+ {\r
+ if (p == null)\r
+ {\r
+ p = new NullPattern();\r
+ }\r
+ or.addOr(p);\r
+ return or;\r
+ }\r
+ return p == null ? new NullPattern() : p;\r
+ }\r
+\r
+ // add a multi object to the end of the chain\r
+ // which applies to the last object\r
+ void addMulti(patInt i1, patInt i2)\r
+ throws RegSyntax\r
+ {\r
+ Pattern last, last2;\r
+ for (last = p; last != null && last.next != null; last = last.next)\r
+ {\r
+ ;\r
+ }\r
+ if (last == null || last == p)\r
+ {\r
+ last2 = null;\r
+ }\r
+ else\r
+ {\r
+ for (last2 = p; last2.next != last; last2 = last2.next)\r
+ {\r
+ ;\r
+ }\r
+ }\r
+ if (last instanceof Multi && i1.intValue() == 0 &&\r
+ i2.intValue() == 1)\r
+ {\r
+ ( (Multi) last).matchFewest = true;\r
+ }\r
+ else if (last instanceof FastMulti && i1.intValue() == 0 &&\r
+ i2.intValue() == 1)\r
+ {\r
+ ( (FastMulti) last).matchFewest = true;\r
+ }\r
+ else if (last instanceof DotMulti && i1.intValue() == 0 &&\r
+ i2.intValue() == 1)\r
+ {\r
+ ( (DotMulti) last).matchFewest = true;\r
+ }\r
+ else if (last instanceof Multi\r
|| last instanceof DotMulti\r
|| last instanceof FastMulti)\r
- throw new RegSyntax("Syntax error.");\r
- else if(last2 == null)\r
- p = mkMulti(i1,i2,p);\r
+ {\r
+ throw new RegSyntax("Syntax error.");\r
+ }\r
+ else if (last2 == null)\r
+ {\r
+ p = mkMulti(i1, i2, p);\r
+ }\r
+ else\r
+ {\r
+ last2.next = mkMulti(i1, i2, last);\r
+ }\r
+ }\r
+\r
+ final static Pattern mkMulti(patInt lo, patInt hi, Pattern p)\r
+ throws RegSyntax\r
+ {\r
+ if (p instanceof Any && p.next == null)\r
+ {\r
+ return (Pattern)new DotMulti(lo, hi);\r
+ }\r
+ return RegOpt.safe4fm(p) ? (Pattern)new FastMulti(lo, hi, p) :\r
+ (Pattern)new Multi(lo, hi, p);\r
+ }\r
+\r
+ // process the bracket operator\r
+ Pattern matchBracket(StrPos sp)\r
+ throws RegSyntax\r
+ {\r
+ Bracket ret;\r
+ if (sp.match('^'))\r
+ {\r
+ ret = new Bracket(true);\r
+ sp.inc();\r
+ }\r
+ else\r
+ {\r
+ ret = new Bracket(false);\r
+ }\r
+ if (sp.match(']'))\r
+ {\r
+ //throw new RegSyntax\r
+ RegSyntaxError.endItAll("Unmatched []");\r
+ }\r
+\r
+ while (!sp.eos && !sp.match(']'))\r
+ {\r
+ StrPos s1 = new StrPos(sp);\r
+ s1.inc();\r
+ StrPos s1_ = new StrPos(s1);\r
+ s1_.inc();\r
+ if (s1.match('-') && !s1_.match(']'))\r
+ {\r
+ StrPos s2 = new StrPos(s1);\r
+ s2.inc();\r
+ if (!s2.eos)\r
+ {\r
+ ret.addOr(new Range(sp.c, s2.c));\r
+ }\r
+ sp.inc();\r
+ sp.inc();\r
+ }\r
+ else if (sp.escMatch('Q'))\r
+ {\r
+ sp.inc();\r
+ while (!sp.escMatch('E'))\r
+ {\r
+ ret.addOr(new oneChar(sp.c));\r
+ sp.inc();\r
+ }\r
+ }\r
+ else if (sp.escMatch('d'))\r
+ {\r
+ ret.addOr(new Range('0', '9'));\r
+ }\r
+ else if (sp.escMatch('s'))\r
+ {\r
+ ret.addOr(new oneChar( (char) 32));\r
+ ret.addOr(new Range( (char) 8, (char) 10));\r
+ ret.addOr(new oneChar( (char) 13));\r
+ }\r
+ else if (sp.escMatch('w'))\r
+ {\r
+ ret.addOr(new Range('a', 'z'));\r
+ ret.addOr(new Range('A', 'Z'));\r
+ ret.addOr(new Range('0', '9'));\r
+ ret.addOr(new oneChar('_'));\r
+ }\r
+ else if (sp.escMatch('D'))\r
+ {\r
+ ret.addOr(new Range( (char) 0, (char) 47));\r
+ ret.addOr(new Range( (char) 58, (char) 65535));\r
+ }\r
+ else if (sp.escMatch('S'))\r
+ {\r
+ ret.addOr(new Range( (char) 0, (char) 7));\r
+ ret.addOr(new Range( (char) 11, (char) 12));\r
+ ret.addOr(new Range( (char) 14, (char) 31));\r
+ ret.addOr(new Range( (char) 33, (char) 65535));\r
+ }\r
+ else if (sp.escMatch('W'))\r
+ {\r
+ ret.addOr(new Range( (char) 0, (char) 64));\r
+ ret.addOr(new Range( (char) 91, (char) 94));\r
+ ret.addOr(new oneChar( (char) 96));\r
+ ret.addOr(new Range( (char) 123, (char) 65535));\r
+ }\r
+ else if (sp.escMatch('x') && next2Hex(sp))\r
+ {\r
+ sp.inc();\r
+ int d = getHexDigit(sp);\r
+ sp.inc();\r
+ d = 16 * d + getHexDigit(sp);\r
+ ret.addOr(new oneChar( (char) d));\r
+ }\r
+ else if (sp.escMatch('a'))\r
+ {\r
+ ret.addOr(new oneChar( (char) 7));\r
+ }\r
+ else if (sp.escMatch('f'))\r
+ {\r
+ ret.addOr(new oneChar( (char) 12));\r
+ }\r
+ else if (sp.escMatch('e'))\r
+ {\r
+ ret.addOr(new oneChar( (char) 27));\r
+ }\r
+ else if (sp.escMatch('n'))\r
+ {\r
+ ret.addOr(new oneChar('\n'));\r
+ }\r
+ else if (sp.escMatch('t'))\r
+ {\r
+ ret.addOr(new oneChar('\t'));\r
+ }\r
+ else if (sp.escMatch('r'))\r
+ {\r
+ ret.addOr(new oneChar('\r'));\r
+ }\r
+ else if (sp.escMatch('c'))\r
+ {\r
+ sp.inc();\r
+ if (sp.c < Ctrl.cmap.length)\r
+ {\r
+ ret.addOr(new oneChar(Ctrl.cmap[sp.c]));\r
+ }\r
else\r
- last2.next = mkMulti(i1,i2,last);\r
- }\r
- final static Pattern mkMulti(patInt lo,patInt hi,Pattern p) throws RegSyntax {\r
- if(p instanceof Any && p.next == null)\r
- return (Pattern)new DotMulti(lo,hi);\r
- return RegOpt.safe4fm(p) ? (Pattern)new FastMulti(lo,hi,p) :\r
- (Pattern)new Multi(lo,hi,p);\r
- }\r
- // process the bracket operator\r
- Pattern matchBracket(StrPos sp) throws RegSyntax {\r
- Bracket ret;\r
- if(sp.match('^')) {\r
- ret = new Bracket(true);\r
- sp.inc();\r
- } else\r
- ret = new Bracket(false);\r
- if(sp.match(']'))\r
- //throw new RegSyntax\r
- RegSyntaxError.endItAll("Unmatched []");\r
-\r
- while(!sp.eos && !sp.match(']')) {\r
- StrPos s1 = new StrPos(sp);\r
- s1.inc();\r
- StrPos s1_ = new StrPos(s1);\r
- s1_.inc();\r
- if(s1.match('-') && !s1_.match(']')) {\r
- StrPos s2 = new StrPos(s1);\r
- s2.inc();\r
- if(!s2.eos)\r
- ret.addOr(new Range(sp.c,s2.c));\r
- sp.inc();\r
- sp.inc();\r
- } else if(sp.escMatch('Q')) {\r
- sp.inc();\r
- while(!sp.escMatch('E')) {\r
- ret.addOr(new oneChar(sp.c));\r
- sp.inc();\r
- }\r
- } else if(sp.escMatch('d')) {\r
- ret.addOr(new Range('0','9'));\r
- } else if(sp.escMatch('s')) {\r
- ret.addOr(new oneChar((char)32));\r
- ret.addOr(new Range((char)8,(char)10));\r
- ret.addOr(new oneChar((char)13));\r
- } else if(sp.escMatch('w')) {\r
- ret.addOr(new Range('a','z'));\r
- ret.addOr(new Range('A','Z'));\r
- ret.addOr(new Range('0','9'));\r
- ret.addOr(new oneChar('_'));\r
- } else if(sp.escMatch('D')) {\r
- ret.addOr(new Range((char)0,(char)47));\r
- ret.addOr(new Range((char)58,(char)65535));\r
- } else if(sp.escMatch('S')) {\r
- ret.addOr(new Range((char)0,(char)7));\r
- ret.addOr(new Range((char)11,(char)12));\r
- ret.addOr(new Range((char)14,(char)31));\r
- ret.addOr(new Range((char)33,(char)65535));\r
- } else if(sp.escMatch('W')) {\r
- ret.addOr(new Range((char)0,(char)64));\r
- ret.addOr(new Range((char)91,(char)94));\r
- ret.addOr(new oneChar((char)96));\r
- ret.addOr(new Range((char)123,(char)65535));\r
- } else if(sp.escMatch('x') && next2Hex(sp)) {\r
- sp.inc();\r
- int d = getHexDigit(sp);\r
- sp.inc();\r
- d = 16*d + getHexDigit(sp);\r
- ret.addOr(new oneChar((char)d));\r
- } else if(sp.escMatch('a')) {\r
- ret.addOr(new oneChar((char)7));\r
- } else if(sp.escMatch('f')) {\r
- ret.addOr(new oneChar((char)12));\r
- } else if(sp.escMatch('e')) {\r
- ret.addOr(new oneChar((char)27));\r
- } else if(sp.escMatch('n')) {\r
- ret.addOr(new oneChar('\n'));\r
- } else if(sp.escMatch('t')) {\r
- ret.addOr(new oneChar('\t'));\r
- } else if(sp.escMatch('r')) {\r
- ret.addOr(new oneChar('\r'));\r
- } else if(sp.escMatch('c')) {\r
- sp.inc();\r
- if(sp.c < Ctrl.cmap.length)\r
- ret.addOr(new oneChar(Ctrl.cmap[sp.c]));\r
- else\r
- ret.addOr(new oneChar(sp.c));\r
- } else if(isOctalString(sp)) {\r
- int d = sp.c - '0';\r
- sp.inc();\r
- d = 8*d + sp.c - '0';\r
- StrPos sp2 = new StrPos(sp);\r
- sp2.inc();\r
- if(isOctalDigit(sp2,false)) {\r
- sp.inc();\r
- d = 8*d + sp.c - '0';\r
- }\r
- ret.addOr(new oneChar((char)d));\r
- } else\r
- ret.addOr(new oneChar(sp.c));\r
- sp.inc();\r
+ {\r
+ ret.addOr(new oneChar(sp.c));\r
+ }\r
+ }\r
+ else if (isOctalString(sp))\r
+ {\r
+ int d = sp.c - '0';\r
+ sp.inc();\r
+ d = 8 * d + sp.c - '0';\r
+ StrPos sp2 = new StrPos(sp);\r
+ sp2.inc();\r
+ if (isOctalDigit(sp2, false))\r
+ {\r
+ sp.inc();\r
+ d = 8 * d + sp.c - '0';\r
+ }\r
+ ret.addOr(new oneChar( (char) d));\r
+ }\r
+ else\r
+ {\r
+ ret.addOr(new oneChar(sp.c));\r
+ }\r
+ sp.inc();\r
+ }\r
+ return ret;\r
+ }\r
+\r
+ /** Converts the stored Pattern to a String -- this is a\r
+ decompile. Note that \t and \n will really print out here,\r
+ Not just the two character representations.\r
+ Also be prepared to see some strange output if your characters\r
+ are not printable. */\r
+ public String toString()\r
+ {\r
+ if (false && thePattern == null)\r
+ {\r
+ return "";\r
+ }\r
+ else\r
+ {\r
+ StringBuffer sb = new StringBuffer();\r
+ if (esc != Pattern.ESC)\r
+ {\r
+ sb.append("(?e=");\r
+ sb.append(esc);\r
+ sb.append(")");\r
+ }\r
+ if (gFlag\r
+ || mFlag\r
+ || !dotDoesntMatchCR\r
+ || sFlag\r
+ || ignoreCase\r
+ || dontMatchInQuotes\r
+ || optimized())\r
+ {\r
+ sb.append("(?");\r
+ if (ignoreCase)\r
+ {\r
+ sb.append("i");\r
+ }\r
+ if (mFlag)\r
+ {\r
+ sb.append("m");\r
+ }\r
+ if (sFlag || !dotDoesntMatchCR)\r
+ {\r
+ sb.append("s");\r
+ }\r
+ if (dontMatchInQuotes)\r
+ {\r
+ sb.append("Q");\r
}\r
- return ret;\r
- }\r
-\r
- /** Converts the stored Pattern to a String -- this is a\r
- decompile. Note that \t and \n will really print out here,\r
- Not just the two character representations.\r
- Also be prepared to see some strange output if your characters\r
- are not printable. */\r
- public String toString() {\r
- if( false && thePattern == null )\r
- return "";\r
- else {\r
- StringBuffer sb = new StringBuffer();\r
- if(esc != Pattern.ESC) {\r
- sb.append("(?e=");\r
- sb.append(esc);\r
- sb.append(")");\r
- }\r
- if(gFlag\r
- ||mFlag\r
- ||!dotDoesntMatchCR\r
- ||sFlag\r
- ||ignoreCase\r
- ||dontMatchInQuotes\r
- ||optimized()) {\r
- sb.append("(?");\r
- if(ignoreCase)sb.append("i");\r
- if(mFlag)sb.append("m");\r
- if(sFlag||!dotDoesntMatchCR)sb.append("s");\r
- if(dontMatchInQuotes)sb.append("Q");\r
- if(optimized())sb.append("o");\r
- if(gFlag)sb.append("g");\r
- sb.append(")");\r
- }\r
- String patstr = thePattern.toString();\r
- if(esc != Pattern.ESC)\r
- patstr = reEscape(patstr,Pattern.ESC,esc);\r
- sb.append(patstr);\r
- return sb.toString();\r
+ if (optimized())\r
+ {\r
+ sb.append("o");\r
}\r
+ if (gFlag)\r
+ {\r
+ sb.append("g");\r
+ }\r
+ sb.append(")");\r
+ }\r
+ String patstr = thePattern.toString();\r
+ if (esc != Pattern.ESC)\r
+ {\r
+ patstr = reEscape(patstr, Pattern.ESC, esc);\r
+ }\r
+ sb.append(patstr);\r
+ return sb.toString();\r
}\r
- // Re-escape Pattern, allows us to use a different escape\r
- // character.\r
- static String reEscape(String s,char oldEsc,char newEsc) {\r
- if(oldEsc == newEsc) return s;\r
- int i;\r
- StringBuffer sb = new StringBuffer();\r
- for(i=0;i<s.length();i++) {\r
- if(s.charAt(i)==oldEsc && i+1 < s.length()) {\r
- if(s.charAt(i+1)==oldEsc) {\r
- sb.append(oldEsc);\r
- } else {\r
- sb.append(newEsc);\r
- sb.append(s.charAt(i+1));\r
- }\r
- i++;\r
- } else if(s.charAt(i)==newEsc) {\r
- sb.append(newEsc);\r
- sb.append(newEsc);\r
- } else {\r
- sb.append(s.charAt(i));\r
- }\r
+ }\r
+\r
+ // Re-escape Pattern, allows us to use a different escape\r
+ // character.\r
+ static String reEscape(String s, char oldEsc, char newEsc)\r
+ {\r
+ if (oldEsc == newEsc)\r
+ {\r
+ return s;\r
+ }\r
+ int i;\r
+ StringBuffer sb = new StringBuffer();\r
+ for (i = 0; i < s.length(); i++)\r
+ {\r
+ if (s.charAt(i) == oldEsc && i + 1 < s.length())\r
+ {\r
+ if (s.charAt(i + 1) == oldEsc)\r
+ {\r
+ sb.append(oldEsc);\r
}\r
- return sb.toString();\r
- }\r
- /** This method implements FilenameFilter, allowing one\r
- to use a Regex to search through a directory using File.list.\r
- There is a FileRegex now that does this better.\r
- @see com.stevesoft.pat.FileRegex\r
- */\r
- public boolean accept(File dir,String s) {\r
- return search(s);\r
- }\r
- /** The version of this package */\r
- final static public String version() {\r
- return "lgpl release 1.5.3";\r
- }\r
- /** Once this method is called, the state of variables\r
- ignoreCase and dontMatchInQuotes should not be changed as the\r
- results will be unpredictable. However,\r
- search and matchAt will run more quickly. Note that you\r
- can check to see if the pattern has been optimized by calling\r
- the optimized() method.<p>This method will attempt to rewrite\r
- your pattern in a way that makes it faster (not all patterns\r
- execute at the same speed). In general, "(?: ... )" will be\r
- faster than "( ... )" so if you don't need the backreference,\r
- you should group using the former pattern.<p>It will also\r
- introduce new pattern elements that you can't get to otherwise,\r
- for example if you have a large table of strings, i.e. the\r
- months of the year "(January|February|...)" optimize() will make\r
- a Hashtable that takes it to the next appropriate pattern\r
- element -- eliminating the need for a linear search.\r
- @see com.stevesoft.pat.Regex#optimized\r
- @see com.stevesoft.pat.Regex#ignoreCase\r
- @see com.stevesoft.pat.Regex#dontMatchInQuotes\r
- @see com.stevesoft.pat.Regex#matchAt\r
- @see com.stevesoft.pat.Regex#search\r
- */\r
- public void optimize() {\r
- if(optimized()||thePattern==null) return;\r
- minMatch = new patInt(0);//thePattern.countMinChars();\r
- thePattern = RegOpt.opt(thePattern,ignoreCase,\r
- dontMatchInQuotes);\r
- skipper = Skip.findSkip(this);\r
- //RegOpt.setParents(this);\r
- return;\r
- }\r
- Skip skipper;\r
- /** This function returns true if the optimize method has\r
- been called. */\r
- public boolean optimized() {\r
- return minMatch != null;\r
- }\r
-\r
- /** A bit of syntactic surgar for those who want to make\r
- their code look more perl-like. To use this initialize\r
- your Regex object by saying:\r
- <pre>\r
- Regex r1 = Regex.perlCode("s/hello/goodbye/");\r
- Regex r2 = Regex.perlCode("s'fish'frog'i");\r
- Regex r3 = Regex.perlCode("m'hello');\r
- </pre>\r
- The i for ignoreCase is supported in\r
- this syntax, as well as m, s, and x. The g flat\r
- is a bit of a special case.<p>\r
- If you wish to replace all occurences of a pattern, you\r
- do not put a 'g' in the perlCode, but call Regex's\r
- replaceAll method.<p>\r
- If you wish to simply\r
- and only do a search for r2's pattern, you can do this\r
- by calling the searchFrom method method repeatedly, or\r
- by calling search repeatedly if the g flag is set.\r
- <p>\r
- Note: Currently perlCode does <em>not</em>\r
- support the (?e=#) syntax for\r
- changing the escape character.\r
- */\r
-\r
- public static Regex perlCode(String s) {\r
- // this file is big enough, see parsePerl.java\r
- // for this function.\r
- return parsePerl.parse(s);\r
- }\r
- static final char back_slash = '\\';\r
-\r
- /** Checks to see if there are only literal and no special\r
- pattern elements in this Regex. */\r
- public boolean isLiteral() {\r
- Pattern x = thePattern;\r
- while(x != null) {\r
- if(x instanceof oneChar)\r
- ;\r
- else if(x instanceof Skipped)\r
- ;\r
- else\r
- return false;\r
- x = x.next;\r
+ else\r
+ {\r
+ sb.append(newEsc);\r
+ sb.append(s.charAt(i + 1));\r
}\r
- return true;\r
+ i++;\r
+ }\r
+ else if (s.charAt(i) == newEsc)\r
+ {\r
+ sb.append(newEsc);\r
+ sb.append(newEsc);\r
+ }\r
+ else\r
+ {\r
+ sb.append(s.charAt(i));\r
+ }\r
+ }\r
+ return sb.toString();\r
+ }\r
+\r
+ /** This method implements FilenameFilter, allowing one\r
+ to use a Regex to search through a directory using File.list.\r
+ There is a FileRegex now that does this better.\r
+ @see com.stevesoft.pat.FileRegex\r
+ */\r
+ public boolean accept(File dir, String s)\r
+ {\r
+ return search(s);\r
+ }\r
+\r
+ /** The version of this package */\r
+ final static public String version()\r
+ {\r
+ return "lgpl release 1.5.3";\r
+ }\r
+\r
+ /** Once this method is called, the state of variables\r
+ ignoreCase and dontMatchInQuotes should not be changed as the\r
+ results will be unpredictable. However,\r
+ search and matchAt will run more quickly. Note that you\r
+ can check to see if the pattern has been optimized by calling\r
+ the optimized() method.<p>This method will attempt to rewrite\r
+ your pattern in a way that makes it faster (not all patterns\r
+ execute at the same speed). In general, "(?: ... )" will be\r
+ faster than "( ... )" so if you don't need the backreference,\r
+ you should group using the former pattern.<p>It will also\r
+ introduce new pattern elements that you can't get to otherwise,\r
+ for example if you have a large table of strings, i.e. the\r
+ months of the year "(January|February|...)" optimize() will make\r
+ a Hashtable that takes it to the next appropriate pattern\r
+ element -- eliminating the need for a linear search.\r
+ @see com.stevesoft.pat.Regex#optimized\r
+ @see com.stevesoft.pat.Regex#ignoreCase\r
+ @see com.stevesoft.pat.Regex#dontMatchInQuotes\r
+ @see com.stevesoft.pat.Regex#matchAt\r
+ @see com.stevesoft.pat.Regex#search\r
+ */\r
+ public void optimize()\r
+ {\r
+ if (optimized() || thePattern == null)\r
+ {\r
+ return;\r
+ }\r
+ minMatch = new patInt(0); //thePattern.countMinChars();\r
+ thePattern = RegOpt.opt(thePattern, ignoreCase,\r
+ dontMatchInQuotes);\r
+ skipper = Skip.findSkip(this);\r
+ //RegOpt.setParents(this);\r
+ return;\r
+ }\r
+\r
+ Skip skipper;\r
+ /** This function returns true if the optimize method has\r
+ been called. */\r
+ public boolean optimized()\r
+ {\r
+ return minMatch != null;\r
+ }\r
+\r
+ /** A bit of syntactic surgar for those who want to make\r
+ their code look more perl-like. To use this initialize\r
+ your Regex object by saying:\r
+ <pre>\r
+ Regex r1 = Regex.perlCode("s/hello/goodbye/");\r
+ Regex r2 = Regex.perlCode("s'fish'frog'i");\r
+ Regex r3 = Regex.perlCode("m'hello');\r
+ </pre>\r
+ The i for ignoreCase is supported in\r
+ this syntax, as well as m, s, and x. The g flat\r
+ is a bit of a special case.<p>\r
+ If you wish to replace all occurences of a pattern, you\r
+ do not put a 'g' in the perlCode, but call Regex's\r
+ replaceAll method.<p>\r
+ If you wish to simply\r
+ and only do a search for r2's pattern, you can do this\r
+ by calling the searchFrom method method repeatedly, or\r
+ by calling search repeatedly if the g flag is set.\r
+ <p>\r
+ Note: Currently perlCode does <em>not</em>\r
+ support the (?e=#) syntax for\r
+ changing the escape character.\r
+ */\r
+\r
+ public static Regex perlCode(String s)\r
+ {\r
+ // this file is big enough, see parsePerl.java\r
+ // for this function.\r
+ return parsePerl.parse(s);\r
+ }\r
+\r
+ static final char back_slash = '\\';\r
+\r
+ /** Checks to see if there are only literal and no special\r
+ pattern elements in this Regex. */\r
+ public boolean isLiteral()\r
+ {\r
+ Pattern x = thePattern;\r
+ while (x != null)\r
+ {\r
+ if (x instanceof oneChar)\r
+ {\r
+ ;\r
+ }\r
+ else if (x instanceof Skipped)\r
+ {\r
+ ;\r
+ }\r
+ else\r
+ {\r
+ return false;\r
+ }\r
+ x = x.next;\r
}\r
+ return true;\r
+ }\r
+\r
+ /** You only need to know about this if you are inventing\r
+ your own pattern elements. */\r
+ public patInt countMinChars()\r
+ {\r
+ return thePattern.countMinChars();\r
+ }\r
\r
- /** You only need to know about this if you are inventing\r
- your own pattern elements. */\r
- public patInt countMinChars() { return thePattern.countMinChars(); }\r
- /** You only need to know about this if you are inventing\r
- your own pattern elements. */\r
- public patInt countMaxChars() { return thePattern.countMaxChars(); }\r
+ /** You only need to know about this if you are inventing\r
+ your own pattern elements. */\r
+ public patInt countMaxChars()\r
+ {\r
+ return thePattern.countMaxChars();\r
+ }\r
\r
- boolean isHexDigit(StrPos sp) {\r
- boolean r =\r
+ boolean isHexDigit(StrPos sp)\r
+ {\r
+ boolean r =\r
!sp.eos && !sp.dontMatch\r
- && ((sp.c>='0'&&sp.c<='9')\r
- ||(sp.c>='a'&&sp.c<='f')\r
- ||(sp.c>='A'&&sp.c<='F'));\r
- return r;\r
- }\r
- boolean isOctalDigit(StrPos sp,boolean first) {\r
- boolean r =\r
- !sp.eos && !(first^sp.dontMatch)\r
- && sp.c>='0'&&sp.c<='7';\r
- return r;\r
- }\r
- int getHexDigit(StrPos sp) {\r
- if(sp.c >= '0' && sp.c <= '9')\r
- return sp.c - '0';\r
- if(sp.c >= 'a' && sp.c <= 'f')\r
- return sp.c - 'a' + 10;\r
- return sp.c - 'A' + 10;\r
- }\r
- boolean next2Hex(StrPos sp) {\r
- StrPos sp2 = new StrPos(sp);\r
- sp2.inc();\r
- if(!isHexDigit(sp2))\r
- return false;\r
- sp2.inc();\r
- if(!isHexDigit(sp2))\r
- return false;\r
- return true;\r
+ && ( (sp.c >= '0' && sp.c <= '9')\r
+ || (sp.c >= 'a' && sp.c <= 'f')\r
+ || (sp.c >= 'A' && sp.c <= 'F'));\r
+ return r;\r
+ }\r
+\r
+ boolean isOctalDigit(StrPos sp, boolean first)\r
+ {\r
+ boolean r =\r
+ !sp.eos && ! (first ^ sp.dontMatch)\r
+ && sp.c >= '0' && sp.c <= '7';\r
+ return r;\r
+ }\r
+\r
+ int getHexDigit(StrPos sp)\r
+ {\r
+ if (sp.c >= '0' && sp.c <= '9')\r
+ {\r
+ return sp.c - '0';\r
}\r
- boolean isOctalString(StrPos sp) {\r
- if(!isOctalDigit(sp,true))\r
- return false;\r
- StrPos sp2 = new StrPos(sp);\r
- sp2.inc();\r
- if(!isOctalDigit(sp2,false))\r
- return false;\r
- return true;\r
+ if (sp.c >= 'a' && sp.c <= 'f')\r
+ {\r
+ return sp.c - 'a' + 10;\r
}\r
+ return sp.c - 'A' + 10;\r
+ }\r
+\r
+ boolean next2Hex(StrPos sp)\r
+ {\r
+ StrPos sp2 = new StrPos(sp);\r
+ sp2.inc();\r
+ if (!isHexDigit(sp2))\r
+ {\r
+ return false;\r
+ }\r
+ sp2.inc();\r
+ if (!isHexDigit(sp2))\r
+ {\r
+ return false;\r
+ }\r
+ return true;\r
+ }\r
+\r
+ boolean isOctalString(StrPos sp)\r
+ {\r
+ if (!isOctalDigit(sp, true))\r
+ {\r
+ return false;\r
+ }\r
+ StrPos sp2 = new StrPos(sp);\r
+ sp2.inc();\r
+ if (!isOctalDigit(sp2, false))\r
+ {\r
+ return false;\r
+ }\r
+ return true;\r
+ }\r
}\r