X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fcom%2Fstevesoft%2Fpat%2FRegex.java;h=2b38ee73ecc8018ffd926d2218b29ca06ec25983;hb=055eda0580313a9449ec41004f26e5b06ebdfc07;hp=c7379de94b82971bbb78697aab704dea425038b0;hpb=c40cf903f740a72ab63dd1abc10fa33450ce660d;p=jalview.git diff --git a/src/com/stevesoft/pat/Regex.java b/src/com/stevesoft/pat/Regex.java index c7379de..2b38ee7 100755 --- a/src/com/stevesoft/pat/Regex.java +++ b/src/com/stevesoft/pat/Regex.java @@ -6,1424 +6,2024 @@ // -- Happy Computing! // package com.stevesoft.pat; -import java.util.*; + import java.io.*; -import com.stevesoft.pat.wrap.StringWrap; +import java.util.*; +import com.stevesoft.pat.wrap.*; /** Matches a Unicode punctuation character. */ -class UnicodePunct extends UniValidator { - public int validate(StringLike s,int from,int to) { - return from= s.length()) return -1; - char c = s.charAt(from); - return (Prop.isAlphabetic(c)||Prop.isDecimalDigit(c)||c=='_') ? to : -1; - } +class UnicodeW + extends UniValidator +{ + public int validate(StringLike s, int from, int to) + { + if (from >= s.length()) + { + return -1; + } + char c = s.charAt(from); + return (Prop.isAlphabetic(c) || Prop.isDecimalDigit(c) || c == '_') ? to : + -1; + } } /** Matches a character that is not a Unicode alphanumeric or underscore. */ -class NUnicodeW extends UniValidator { - public int validate(StringLike s,int from,int to) { - if(from >= s.length()) return -1; - char c = s.charAt(from); - return !(Prop.isAlphabetic(c)||Prop.isDecimalDigit(c)||c=='_') ? to : -1; - } +class NUnicodeW + extends UniValidator +{ + public int validate(StringLike s, int from, int to) + { + if (from >= s.length()) + { + return -1; + } + char c = s.charAt(from); + return! (Prop.isAlphabetic(c) || Prop.isDecimalDigit(c) || c == '_') ? to : + -1; + } } /** Matches a Unicode decimal digit. */ -class UnicodeDigit extends UniValidator { - public int validate(StringLike s,int from,int to) { - return from -For the purpose of this documentation, the fact that java interprets the -backslash will be ignored. In practice, however, you will need a -double backslash to obtain a string that contains a single backslash -character. Thus, the example pattern "\b" should really be typed -as "\\b" inside java code. -

-Note that Regex is part of package "com.stevesoft.pat". -To use it, simply import -com.stevesoft.pat.Regex at the top of your file. -

-Regex is made with a constructor that takes a String that defines -the regular expression. Thus, for example -

+ Regex provides the parser which constructs the linked list of
+ Pattern classes from a String.
+ 

+ For the purpose of this documentation, the fact that java interprets the + backslash will be ignored. In practice, however, you will need a + double backslash to obtain a string that contains a single backslash + character. Thus, the example pattern "\b" should really be typed + as "\\b" inside java code. +

+ Note that Regex is part of package "com.stevesoft.pat". + To use it, simply import + com.stevesoft.pat.Regex at the top of your file. +

+ Regex is made with a constructor that takes a String that defines + the regular expression. Thus, for example +

       Regex r = new Regex("[a-c]*");
-
-matches any number of characters so long as the are 'a', 'b', or 'c'). -

-To attempt to match the Pattern to a given string, you can use either -the search(String) member function, or the matchAt(String,int position) -member function. These functions return a boolean which tells you -whether or not the thing worked, and sets the methods "charsMatched()" -and "matchedFrom()" in the Regex object appropriately. -

-The portion of the string before the match can be obtained by the -left() member, and the portion after the match can be obtained -by the right() member. -

-Essentially, this package implements a syntax that is very much -like the perl 5 regular expression syntax. - -Longer example: -

+ 
+ matches any number of characters so long as the are 'a', 'b', or 'c'). +

+ To attempt to match the Pattern to a given string, you can use either + the search(String) member function, or the matchAt(String,int position) + member function. These functions return a boolean which tells you + whether or not the thing worked, and sets the methods "charsMatched()" + and "matchedFrom()" in the Regex object appropriately. +

+ The portion of the string before the match can be obtained by the + left() member, and the portion after the match can be obtained + by the right() member. +

+ Essentially, this package implements a syntax that is very much + like the perl 5 regular expression syntax. + + Longer example: +

         Regex r = new Regex("x(a|b)y");
         r.matchAt("xay",0);
         System.out.println("sub = "+r.stringMatched(1));
-
-The above would print "sub = a". -
+ 
+ The above would print "sub = a". +
         r.left() // would return "x"
         r.right() // would return "y"
-
-

-Differences between this package and perl5:
-The extended Pattern for setting flags, is now supported, -but the flags are different. "(?i)" tells the pattern to -ignore case, "(?Q)" sets the "dontMatchInQuotes" flag, and -"(?iQ)" sets them both. You can change the escape character. -The pattern

(?e=#)#d+
is the same as
\d+
, -but note that the sequence
(?e=#)
must occur -at the very beginning of the pattern. There may be other small -differences as well. I will either make my package conform -or note them as I become aware of them. -

-This package supports additional patterns not in perl5: -

- - - -
(?@())GroupThis matches all characters between -the '(' character and the balancing ')' character. Thus, it will -match "()" as well as "(())". The balancing characters are -arbitrary, thus (?@{}) matches on "{}" and "{{}}".
(?<1)BackupMoves the pointer backwards within the text. -This allows you to make a "look behind." It fails if it -attempts to move to a position before the beginning of the string. -"x(?<1)" is equivalent to "(?=x)". The number, 1 in this example, -is the number of characters to move backwards.
-
- -@author Steven R. Brandt -@version package com.stevesoft.pat, release 1.5.3 -@see Pattern -*/ -public class Regex extends RegRes implements FilenameFilter { - /** BackRefOffset gives the identity number of the first - pattern. Version 1.0 used zero, version 1.1 uses 1 to be - more compatible with perl. */ - static int BackRefOffset = 1; - private static Pattern none = new NoPattern(); - Pattern thePattern = none; - patInt minMatch = new patInt(0); - - static Hashtable validators = new Hashtable(); - static { - define("p","(?>1)",new UnicodePunct()); - define("P","(?>1)",new NUnicodePunct()); - define("s","(?>1)",new UnicodeWhite()); - define("S","(?>1)",new NUnicodeWhite()); - define("w","(?>1)",new UnicodeW()); - define("W","(?>1)",new NUnicodeW()); - define("d","(?>1)",new UnicodeDigit()); - define("D","(?>1)",new NUnicodeDigit()); - define("m","(?>1)",new UnicodeMath()); - define("M","(?>1)",new NUnicodeMath()); - define("c","(?>1)",new UnicodeCurrency()); - define("C","(?>1)",new NUnicodeCurrency()); - define("a","(?>1)",new UnicodeAlpha()); - define("A","(?>1)",new NUnicodeAlpha()); - define("uc","(?>1)",new UnicodeUpper()); - define("lc","(?>1)",new UnicodeLower()); - } - - /** Set the dontMatch in quotes flag. */ - public void setDontMatchInQuotes(boolean b) { - dontMatchInQuotes = b; - } - /** Find out if the dontMatchInQuotes flag is enabled. */ - public boolean getDontMatchInQuotes() { - return dontMatchInQuotes; - } - boolean dontMatchInQuotes = false; - - /** Set the state of the ignoreCase flag. If set to true, then - the pattern matcher will ignore case when searching for a - match. */ - public void setIgnoreCase(boolean b) { - ignoreCase = b; - } - /** Get the state of the ignoreCase flag. Returns true if we - are ignoring the case of the pattern, false otherwise. */ - public boolean getIgnoreCase() { - return ignoreCase; - } - boolean ignoreCase = false; - - static boolean defaultMFlag = false; - /** Set the default value of the m flag. If it - is set to true, then the MFlag will be on - for any regex search executed. */ - public static void setDefaultMFlag(boolean mFlag) { - defaultMFlag = mFlag; - } - /** Get the default value of the m flag. If it - is set to true, then the MFlag will be on - for any regex search executed. */ - public static boolean getDefaultMFlag() { - return defaultMFlag; - } - - /** Initializes the object without a Pattern. To supply a Pattern - use compile(String s). - @see com.stevesoft.pat.Regex#compile(java.lang.String) - */ - public Regex() {} - /** Create and compile a Regex, but do not throw any exceptions. - If you wish to have exceptions thrown for syntax errors, - you must use the Regex(void) constructor to create the - Regex object, and then call the compile method. Therefore, you - should only call this method when you know your pattern is right. - I will probably become more like - @see com.stevesoft.pat.Regex#search(java.lang.String) - @see com.stevesoft.pat.Regex#compile(java.lang.String) - */ - public Regex(String s) { - try { - compile(s); - } catch(RegSyntax rs) {} - } - - ReplaceRule rep = null; - /** Create and compile both a Regex and a ReplaceRule. - @see com.stevesoft.pat.ReplaceRule - @see com.stevesoft.pat.Regex#compile(java.lang.String) - */ - public Regex(String s,String rp) { - this(s); - rep = ReplaceRule.perlCode(rp); - } - /** Create and compile a Regex, but give it the ReplaceRule - specified. This allows the user finer control of the - Replacement process, if that is desired. - @see com.stevesoft.pat.ReplaceRule - @see com.stevesoft.pat.Regex#compile(java.lang.String) - */ - public Regex(String s,ReplaceRule rp) { - this(s); - rep = rp; - } - - /** Change the ReplaceRule of this Regex by compiling - a new one using String rp. */ - public void setReplaceRule(String rp) { - rep = ReplaceRule.perlCode(rp); - repr = null; // Clear Replacer history - } - - /** Change the ReplaceRule of this Regex to rp. */ - public void setReplaceRule(ReplaceRule rp) { - rep = rp; - } - /** Test to see if a custom defined rule exists. - @see com.stevesoft.pat#define(java.lang.String,java.lang.String,Validator) - */ - public static boolean isDefined(String nm) { - return validators.get(nm) != null; - } - /** Removes a custom defined rule. - @see com.stevesoft.pat#define(java.lang.String,java.lang.String,Validator) - */ - public static void undefine(String nm) { - validators.remove(nm); - } - /** Defines a method to create a new rule. See test/deriv2.java - and test/deriv3.java for examples of how to use it. */ - public static void define(String nm,String pat,Validator v) { - v.pattern = pat; - validators.put(nm,v); - } - /** Defines a shorthand for a pattern. The pattern will be - invoked by a string that has the form "(??"+nm+")". - */ - public static void define(String nm,String pat) { - validators.put(nm,pat); - } - - /** Get the current ReplaceRule. */ - public ReplaceRule getReplaceRule() { return rep; } - - Replacer repr = null; - final Replacer _getReplacer() { - return repr==null ? repr=new Replacer() : repr; - } - public Replacer getReplacer() { - if(repr == null) - repr = new Replacer(); - repr.rh.me = this; - repr.rh.prev = null; - return repr; - } - /** Replace the first occurence of this pattern in String s - according to the ReplaceRule. - @see com.stevesoft.pat.ReplaceRule - @see com.stevesoft.pat.Regex#getReplaceRule() - */ - public String replaceFirst(String s) { - return _getReplacer().replaceFirstRegion(s,this,0,s.length()).toString(); - } - /** Replace the first occurence of this pattern in String s - beginning with position pos according to the ReplaceRule. - @see com.stevesoft.pat.ReplaceRule - @see com.stevesoft.pat.Regex#getReplaceRule() - */ - public String replaceFirstFrom(String s,int pos) { - return _getReplacer().replaceFirstRegion(s,this,pos,s.length()).toString(); - } - /** Replace the first occurence of this pattern in String s - beginning with position start and ending with end - according to the ReplaceRule. - @see com.stevesoft.pat.ReplaceRule - @see com.stevesoft.pat.Regex#getReplaceRule() - */ - public String replaceFirstRegion(String s,int start,int end) { - return _getReplacer().replaceFirstRegion(s,this,start,end).toString(); - } - - /** Replace all occurences of this pattern in String s - according to the ReplaceRule. - @see com.stevesoft.pat.ReplaceRule - @see com.stevesoft.pat.Regex#getReplaceRule() - */ - public String replaceAll(String s) { - return _getReplacer().replaceAllRegion(s,this,0,s.length()).toString(); - } - public StringLike replaceAll(StringLike s) { - return _getReplacer().replaceAllRegion(s,this,0,s.length()); - } - /** Replace all occurences of this pattern in String s - beginning with position pos according to the ReplaceRule. - @see com.stevesoft.pat.ReplaceRule - @see com.stevesoft.pat.Regex#getReplaceRule() - */ - public String replaceAllFrom(String s,int pos) { - return _getReplacer().replaceAllRegion(s,this,pos,s.length()).toString(); - } - /** Replace all occurences of this pattern in String s - beginning with position start and ending with end - according to the ReplaceRule. - @see com.stevesoft.pat.ReplaceRule - @see com.stevesoft.pat.Regex#getReplaceRule() - */ - public String replaceAllRegion(String s,int start,int end) { - return _getReplacer().replaceAllRegion(s,this,start,end).toString(); - } - - - /** Essentially clones the Regex object */ - public Regex(Regex r) { - super((RegRes)r); - dontMatchInQuotes = r.dontMatchInQuotes; - esc = r.esc; - ignoreCase = r.ignoreCase; - gFlag = r.gFlag; - if(r.rep==null) - rep = null; - else - rep = (ReplaceRule)r.rep.clone(); - /* try { - compile(r.toString()); - } catch(RegSyntax r_) {} */ - thePattern = r.thePattern.clone(new Hashtable()); - minMatch = r.minMatch; - skipper = r.skipper; - } - - /** By default, - the escape character is the backslash, but you can - make it anything you want by setting this variable. */ - public char esc = Pattern.ESC; - /** This method compiles a regular expression, making it - possible to call the search or matchAt methods. - @exception com.stevesoft.pat.RegSyntax - is thrown if a syntax error is encountered - in the pattern. - For example, "x{3,1}" or "*a" are not valid - patterns. - @see com.stevesoft.pat.Regex#search - @see com.stevesoft.pat.Regex#matchAt - */ - public void compile(String prepat) throws RegSyntax { - String postpat = parsePerl.codify(prepat,true); - String pat = postpat==null ? prepat : postpat; - minMatch = null; - ignoreCase = false; - dontMatchInQuotes = false; - Rthings mk = new Rthings(this); - int offset = mk.val; - String newpat = pat; - thePattern = none; - p = null; - or = null; - minMatch = new patInt(0); - StrPos sp = new StrPos(pat,0); - if(sp.incMatch("(?e=")) { - char newEsc = sp.c; - sp.inc(); - if(sp.match(')')) - newpat = reEscape(pat.substring(6), - newEsc,Pattern.ESC); - } else if(esc != Pattern.ESC) - newpat = reEscape(pat,esc,Pattern.ESC); - thePattern = _compile(newpat,mk); - numSubs_ = mk.val-offset; - mk.set(this); - } - - /* If a Regex is compared against a Regex, a check is - done to see that the patterns are equal as well as - the most recent match. If a Regex is compare with - a RegRes, only the result of the most recent match - is compared. */ - public boolean equals(Object o) { - if(o instanceof Regex) { - if(toString().equals(o.toString())) - return super.equals(o); - else - return false; - } else return super.equals(o); - } - - /** A clone by any other name would smell as sweet. */ - public Object clone() { - return new Regex(this); - } - /** Return a clone of the underlying RegRes object. */ - public RegRes result() { - return (RegRes)super.clone(); - } - - // prep sets global variables of class - // Pattern so that it can access them - // during an attempt at a match - Pthings pt = new Pthings(); - final Pthings prep(StringLike s) { - //if(gFlag) - pt.lastPos = matchedTo(); - if(pt.lastPos < 0) pt.lastPos = 0; - if( (s==null ? null : s.unwrap()) != (src==null ? null : s.unwrap()) ) - pt.lastPos = 0; - src = s; - pt.dotDoesntMatchCR=dotDoesntMatchCR && (!sFlag); - pt.mFlag = (mFlag | defaultMFlag); - pt.ignoreCase = ignoreCase; - pt.no_check = false; - if(pt.marks != null) - for(int i=0;istart
*/ - public boolean searchFrom(String s,int start) { - if(s==null) - throw new NullPointerException("Null String Given to Regex.searchFrom"); - return _search(s,start,s.length()); - } - public boolean searchFrom(StringLike s,int start) { - if(s==null) - throw new NullPointerException("Null String Given to Regex.searchFrom"); - return _search(s,start,s.length()); - } - /** Search through a region of a String - for the first occurence of a match. */ - public boolean searchRegion(String s,int start,int end) { - if(s==null) - throw new NullPointerException("Null String Given to Regex.searchRegion"); - return _search(s,start,end); - } - /** Set this to change the default behavior of the "." pattern. - By default it now matches perl's behavior and fails to - match the '\n' character. */ - public static boolean dotDoesntMatchCR = true; - StringLike gFlags; - int gFlagto = 0; - boolean gFlag = false; - /** Set the 'g' flag */ - public void setGFlag(boolean b) { - gFlag = b; - } - /** Get the state of the 'g' flag. */ - public boolean getGFlag() { - return gFlag; - } - boolean sFlag = false; - /** Get the state of the sFlag */ - public boolean getSFlag() { - return sFlag; - } - boolean mFlag = false; - /** Get the state of the sFlag */ - public boolean getMFlag() { - return mFlag; - } - - final boolean _search(String s,int start,int end) { - return _search(new StringWrap(s),start,end); - } - final boolean _search(StringLike s,int start,int end) { - if(gFlag && gFlagto > 0 && gFlags!=null && s.unwrap()==gFlags.unwrap()) - start = gFlagto; - gFlags = null; - - Pthings pt=prep(s); - - int up = (minMatch == null ? end : end-minMatch.i); - - if(up < start && end >= start) up = start; - - if(skipper == null) { - for(int i=start;i<=up;i++) { - charsMatched_ = thePattern.matchAt(s,i,pt); - if(charsMatched_ >= 0) { - matchFrom_ = thePattern.mfrom; - marks = pt.marks; - gFlagto = matchFrom_+charsMatched_; - gFlags = s; - return didMatch_=true; - } - } - } else { - pt.no_check = true; - for(int i=start;i<=up;i++) { - i = skipper.find(src,i,up); - if(i<0) { - charsMatched_ = matchFrom_ = -1; - return didMatch_ = false; - } - charsMatched_ = thePattern.matchAt(s,i,pt); - if(charsMatched_ >= 0) { - matchFrom_ = thePattern.mfrom; - marks = pt.marks; - gFlagto = matchFrom_+charsMatched_; - gFlags = s; - return didMatch_=true; - } - } - } - return didMatch_=false; - } - /*final boolean _search(LongStringLike s,long start,long end) { - if(gFlag && gFlagto > 0 && s==gFlags) - start = gFlagto; - gFlags = null; - - Pthings pt=prep(s); - - int up = end;//(minMatch == null ? end : end-minMatch.i); - - if(up < start && end >= start) up = start; - - if(skipper == null) { - for(long i=start;i<=up;i++) { - charsMatched_ = thePattern.matchAt(s,i,pt); - if(charsMatched_ >= 0) { - matchFrom_ = thePattern.mfrom; - marks = pt.marks; - gFlagto = matchFrom_+charsMatched_; - return didMatch_=true; - } - } - } else { - pt.no_check = true; - for(long i=start;i<=up;i++) { - i = skipper.find(src,i,up); - if(i<0) { - charsMatched_ = matchFrom_ = -1; - return didMatch_ = false; - } - charsMatched_ = thePattern.matchAt(s,i,pt); - if(charsMatched_ >= 0) { - matchFrom_ = thePattern.mfrom; - marks = pt.marks; - gFlagto = matchFrom_+charsMatched_; - gFlags = s; - return didMatch_=true; - } else { - i = s.adjustIndex(i); - up = s.adjustEnd(i); - } - } - } - return didMatch_=false; - }*/ - - boolean _reverseSearch(String s,int start,int end) { - return _reverseSearch(new StringWrap(s),start,end); - } - boolean _reverseSearch(StringLike s,int start,int end) { - if(gFlag && gFlagto > 0 && s.unwrap()==gFlags.unwrap()) - end = gFlagto; - gFlags = null; - Pthings pt=prep(s); - for(int i=end;i>=start;i--) { - charsMatched_ = thePattern.matchAt(s,i,pt); - if(charsMatched_ >= 0) { - matchFrom_ = thePattern.mfrom; - marks = pt.marks; - gFlagto = matchFrom_-1; - gFlags = s; - return didMatch_=true; - } - } - return didMatch_=false; - } - - // This routine sets the cbits variable - // of class Pattern. Cbits is true for - // the bit corresponding to a character inside - // a set of quotes. - static StringLike lasts=null; - static BitSet lastbs=null; - static void setCbits(StringLike s,Pthings pt) { - if(s == lasts) { - pt.cbits = lastbs; - return; + +

+ Differences between this package and perl5:
+ The extended Pattern for setting flags, is now supported, + but the flags are different. "(?i)" tells the pattern to + ignore case, "(?Q)" sets the "dontMatchInQuotes" flag, and + "(?iQ)" sets them both. You can change the escape character. + The pattern

(?e=#)#d+
is the same as
\d+
, + but note that the sequence
(?e=#)
must occur + at the very beginning of the pattern. There may be other small + differences as well. I will either make my package conform + or note them as I become aware of them. +

+ This package supports additional patterns not in perl5: +

+ + + +
(?@())GroupThis matches all characters between + the '(' character and the balancing ')' character. Thus, it will + match "()" as well as "(())". The balancing characters are + arbitrary, thus (?@{}) matches on "{}" and "{{}}".
(?<1)BackupMoves the pointer backwards within the text. + This allows you to make a "look behind." It fails if it + attempts to move to a position before the beginning of the string. + "x(?<1)" is equivalent to "(?=x)". The number, 1 in this example, + is the number of characters to move backwards.
+
+ + @author Steven R. Brandt + @version package com.stevesoft.pat, release 1.5.3 + @see Pattern + */ +public class Regex + extends RegRes implements FilenameFilter +{ + /** BackRefOffset gives the identity number of the first + pattern. Version 1.0 used zero, version 1.1 uses 1 to be + more compatible with perl. */ + static int BackRefOffset = 1; + private static Pattern none = new NoPattern(); + Pattern thePattern = none; + patInt minMatch = new patInt(0); + + static Hashtable validators = new Hashtable(); + static + { + define("p", "(?>1)", new UnicodePunct()); + define("P", "(?>1)", new NUnicodePunct()); + define("s", "(?>1)", new UnicodeWhite()); + define("S", "(?>1)", new NUnicodeWhite()); + define("w", "(?>1)", new UnicodeW()); + define("W", "(?>1)", new NUnicodeW()); + define("d", "(?>1)", new UnicodeDigit()); + define("D", "(?>1)", new NUnicodeDigit()); + define("m", "(?>1)", new UnicodeMath()); + define("M", "(?>1)", new NUnicodeMath()); + define("c", "(?>1)", new UnicodeCurrency()); + define("C", "(?>1)", new NUnicodeCurrency()); + define("a", "(?>1)", new UnicodeAlpha()); + define("A", "(?>1)", new NUnicodeAlpha()); + define("uc", "(?>1)", new UnicodeUpper()); + define("lc", "(?>1)", new UnicodeLower()); + } + + /** Set the dontMatch in quotes flag. */ + public void setDontMatchInQuotes(boolean b) + { + dontMatchInQuotes = b; + } + + /** Find out if the dontMatchInQuotes flag is enabled. */ + public boolean getDontMatchInQuotes() + { + return dontMatchInQuotes; + } + + boolean dontMatchInQuotes = false; + + /** Set the state of the ignoreCase flag. If set to true, then + the pattern matcher will ignore case when searching for a + match. */ + public void setIgnoreCase(boolean b) + { + ignoreCase = b; + } + + /** Get the state of the ignoreCase flag. Returns true if we + are ignoring the case of the pattern, false otherwise. */ + public boolean getIgnoreCase() + { + return ignoreCase; + } + + boolean ignoreCase = false; + + static boolean defaultMFlag = false; + /** Set the default value of the m flag. If it + is set to true, then the MFlag will be on + for any regex search executed. */ + public static void setDefaultMFlag(boolean mFlag) + { + defaultMFlag = mFlag; + } + + /** Get the default value of the m flag. If it + is set to true, then the MFlag will be on + for any regex search executed. */ + public static boolean getDefaultMFlag() + { + return defaultMFlag; + } + + /** Initializes the object without a Pattern. To supply a Pattern + use compile(String s). + @see com.stevesoft.pat.Regex#compile(java.lang.String) + */ + public Regex() + {} + + /** Create and compile a Regex, but do not throw any exceptions. + If you wish to have exceptions thrown for syntax errors, + you must use the Regex(void) constructor to create the + Regex object, and then call the compile method. Therefore, you + should only call this method when you know your pattern is right. + I will probably become more like + @see com.stevesoft.pat.Regex#search(java.lang.String) + @see com.stevesoft.pat.Regex#compile(java.lang.String) + */ + public Regex(String s) + { + try + { + compile(s); + } + catch (RegSyntax rs) + {} + } + + ReplaceRule rep = null; + /** Create and compile both a Regex and a ReplaceRule. + @see com.stevesoft.pat.ReplaceRule + @see com.stevesoft.pat.Regex#compile(java.lang.String) + */ + public Regex(String s, String rp) + { + this(s); + rep = ReplaceRule.perlCode(rp); + } + + /** Create and compile a Regex, but give it the ReplaceRule + specified. This allows the user finer control of the + Replacement process, if that is desired. + @see com.stevesoft.pat.ReplaceRule + @see com.stevesoft.pat.Regex#compile(java.lang.String) + */ + public Regex(String s, ReplaceRule rp) + { + this(s); + rep = rp; + } + + /** Change the ReplaceRule of this Regex by compiling + a new one using String rp. */ + public void setReplaceRule(String rp) + { + rep = ReplaceRule.perlCode(rp); + repr = null; // Clear Replacer history + } + + /** Change the ReplaceRule of this Regex to rp. */ + public void setReplaceRule(ReplaceRule rp) + { + rep = rp; + } + + /** Test to see if a custom defined rule exists. + @see com.stevesoft.pat#define(java.lang.String,java.lang.String,Validator) + */ + public static boolean isDefined(String nm) + { + return validators.get(nm) != null; + } + + /** Removes a custom defined rule. + @see com.stevesoft.pat#define(java.lang.String,java.lang.String,Validator) + */ + public static void undefine(String nm) + { + validators.remove(nm); + } + + /** Defines a method to create a new rule. See test/deriv2.java + and test/deriv3.java for examples of how to use it. */ + public static void define(String nm, String pat, Validator v) + { + v.pattern = pat; + validators.put(nm, v); + } + + /** Defines a shorthand for a pattern. The pattern will be + invoked by a string that has the form "(??"+nm+")". + */ + public static void define(String nm, String pat) + { + validators.put(nm, pat); + } + + /** Get the current ReplaceRule. */ + public ReplaceRule getReplaceRule() + { + return rep; + } + + Replacer repr = null; + final Replacer _getReplacer() + { + return repr == null ? repr = new Replacer() : repr; + } + + public Replacer getReplacer() + { + if (repr == null) + { + repr = new Replacer(); + } + repr.rh.me = this; + repr.rh.prev = null; + return repr; + } + + /** Replace the first occurence of this pattern in String s + according to the ReplaceRule. + @see com.stevesoft.pat.ReplaceRule + @see com.stevesoft.pat.Regex#getReplaceRule() + */ + public String replaceFirst(String s) + { + return _getReplacer().replaceFirstRegion(s, this, 0, s.length()).toString(); + } + + /** Replace the first occurence of this pattern in String s + beginning with position pos according to the ReplaceRule. + @see com.stevesoft.pat.ReplaceRule + @see com.stevesoft.pat.Regex#getReplaceRule() + */ + public String replaceFirstFrom(String s, int pos) + { + return _getReplacer().replaceFirstRegion(s, this, pos, s.length()).toString(); + } + + /** Replace the first occurence of this pattern in String s + beginning with position start and ending with end + according to the ReplaceRule. + @see com.stevesoft.pat.ReplaceRule + @see com.stevesoft.pat.Regex#getReplaceRule() + */ + public String replaceFirstRegion(String s, int start, int end) + { + return _getReplacer().replaceFirstRegion(s, this, start, end).toString(); + } + + /** Replace all occurences of this pattern in String s + according to the ReplaceRule. + @see com.stevesoft.pat.ReplaceRule + @see com.stevesoft.pat.Regex#getReplaceRule() + */ + public String replaceAll(String s) + { + return _getReplacer().replaceAllRegion(s, this, 0, s.length()).toString(); + } + + public StringLike replaceAll(StringLike s) + { + return _getReplacer().replaceAllRegion(s, this, 0, s.length()); + } + + /** Replace all occurences of this pattern in String s + beginning with position pos according to the ReplaceRule. + @see com.stevesoft.pat.ReplaceRule + @see com.stevesoft.pat.Regex#getReplaceRule() + */ + public String replaceAllFrom(String s, int pos) + { + return _getReplacer().replaceAllRegion(s, this, pos, s.length()).toString(); + } + + /** Replace all occurences of this pattern in String s + beginning with position start and ending with end + according to the ReplaceRule. + @see com.stevesoft.pat.ReplaceRule + @see com.stevesoft.pat.Regex#getReplaceRule() + */ + public String replaceAllRegion(String s, int start, int end) + { + return _getReplacer().replaceAllRegion(s, this, start, end).toString(); + } + + /** Essentially clones the Regex object */ + public Regex(Regex r) + { + super( (RegRes) r); + dontMatchInQuotes = r.dontMatchInQuotes; + esc = r.esc; + ignoreCase = r.ignoreCase; + gFlag = r.gFlag; + if (r.rep == null) + { + rep = null; + } + else + { + rep = (ReplaceRule) r.rep.clone(); + } + /* try { + compile(r.toString()); + } catch(RegSyntax r_) {} */ + thePattern = r.thePattern.clone(new Hashtable()); + minMatch = r.minMatch; + skipper = r.skipper; + } + + /** By default, + the escape character is the backslash, but you can + make it anything you want by setting this variable. */ + public char esc = Pattern.ESC; + /** This method compiles a regular expression, making it + possible to call the search or matchAt methods. + @exception com.stevesoft.pat.RegSyntax + is thrown if a syntax error is encountered + in the pattern. + For example, "x{3,1}" or "*a" are not valid + patterns. + @see com.stevesoft.pat.Regex#search + @see com.stevesoft.pat.Regex#matchAt + */ + public void compile(String prepat) + throws RegSyntax + { + String postpat = parsePerl.codify(prepat, true); + String pat = postpat == null ? prepat : postpat; + minMatch = null; + ignoreCase = false; + dontMatchInQuotes = false; + Rthings mk = new Rthings(this); + int offset = mk.val; + String newpat = pat; + thePattern = none; + p = null; + or = null; + minMatch = new patInt(0); + StrPos sp = new StrPos(pat, 0); + if (sp.incMatch("(?e=")) + { + char newEsc = sp.c; + sp.inc(); + if (sp.match(')')) + { + newpat = reEscape(pat.substring(6), + newEsc, Pattern.ESC); + } + } + else if (esc != Pattern.ESC) + { + newpat = reEscape(pat, esc, Pattern.ESC); + } + thePattern = _compile(newpat, mk); + numSubs_ = mk.val - offset; + mk.set(this); + } + + /* If a Regex is compared against a Regex, a check is + done to see that the patterns are equal as well as + the most recent match. If a Regex is compare with + a RegRes, only the result of the most recent match + is compared. */ + public boolean equals(Object o) + { + if (o instanceof Regex) + { + if (toString().equals(o.toString())) + { + return super.equals(o); + } + else + { + return false; + } + } + else + { + return super.equals(o); + } + } + + /** A clone by any other name would smell as sweet. */ + public Object clone() + { + return new Regex(this); + } + + /** Return a clone of the underlying RegRes object. */ + public RegRes result() + { + return (RegRes)super.clone(); + } + + // prep sets global variables of class + // Pattern so that it can access them + // during an attempt at a match + Pthings pt = new Pthings(); + final Pthings prep(StringLike s) + { + //if(gFlag) + pt.lastPos = matchedTo(); + if (pt.lastPos < 0) + { + pt.lastPos = 0; + } + if ( (s == null ? null : s.unwrap()) != (src == null ? null : s.unwrap())) + { + pt.lastPos = 0; + } + src = s; + pt.dotDoesntMatchCR = dotDoesntMatchCR && (!sFlag); + pt.mFlag = (mFlag | defaultMFlag); + pt.ignoreCase = ignoreCase; + pt.no_check = false; + if (pt.marks != null) + { + for (int i = 0; i < pt.marks.length; i++) + { + pt.marks[i] = -1; + } + } + pt.marks = null; + pt.nMarks = numSubs_; + pt.src = s; + if (dontMatchInQuotes) + { + setCbits(s, pt); + } + else + { + pt.cbits = null; + } + return pt; + } + + /** Attempt to match a Pattern beginning + at a specified location within the string. + @see com.stevesoft.pat.Regex#search + */ + public boolean matchAt(String s, int start_pos) + { + return _search(s, start_pos, start_pos); + } + + /** Attempt to match a Pattern beginning + at a specified location within the StringLike. + @see com.stevesoft.pat.Regex#search + */ + public boolean matchAt(StringLike s, int start_pos) + { + return _search(s, start_pos, start_pos); + } + + /** Search through a String for the first + occurrence of a match. + @see com.stevesoft.pat.Regex#searchFrom + @see com.stevesoft.pat.Regex#matchAt + */ + public boolean search(String s) + { + if (s == null) + { + throw new NullPointerException("Null String Given to Regex.search"); + } + return _search(s, 0, s.length()); + } + + public boolean search(StringLike sl) + { + if (sl == null) + { + throw new NullPointerException("Null StringLike Given to Regex.search"); + } + return _search(sl, 0, sl.length()); + } + + public boolean reverseSearch(String s) + { + if (s == null) + { + throw new NullPointerException("Null String Given to Regex.reverseSearch"); + } + return _reverseSearch(s, 0, s.length()); + } + + public boolean reverseSearch(StringLike sl) + { + if (sl == null) + { + throw new NullPointerException( + "Null StringLike Given to Regex.reverseSearch"); + } + return _reverseSearch(sl, 0, sl.length()); + } + + /** Search through a String for the first + occurence of a match, but start at position
start
*/ + public boolean searchFrom(String s, int start) + { + if (s == null) + { + throw new NullPointerException("Null String Given to Regex.searchFrom"); + } + return _search(s, start, s.length()); + } + + public boolean searchFrom(StringLike s, int start) + { + if (s == null) + { + throw new NullPointerException("Null String Given to Regex.searchFrom"); + } + return _search(s, start, s.length()); + } + + /** Search through a region of a String + for the first occurence of a match. */ + public boolean searchRegion(String s, int start, int end) + { + if (s == null) + { + throw new NullPointerException("Null String Given to Regex.searchRegion"); + } + return _search(s, start, end); + } + + /** Set this to change the default behavior of the "." pattern. + By default it now matches perl's behavior and fails to + match the '\n' character. */ + public static boolean dotDoesntMatchCR = true; + StringLike gFlags; + int gFlagto = 0; + boolean gFlag = false; + /** Set the 'g' flag */ + public void setGFlag(boolean b) + { + gFlag = b; + } + + /** Get the state of the 'g' flag. */ + public boolean getGFlag() + { + return gFlag; + } + + boolean sFlag = false; + /** Get the state of the sFlag */ + public boolean getSFlag() + { + return sFlag; + } + + boolean mFlag = false; + /** Get the state of the sFlag */ + public boolean getMFlag() + { + return mFlag; + } + + final boolean _search(String s, int start, int end) + { + return _search(new StringWrap(s), start, end); + } + + final boolean _search(StringLike s, int start, int end) + { + if (gFlag && gFlagto > 0 && gFlags != null && s.unwrap() == gFlags.unwrap()) + { + start = gFlagto; + } + gFlags = null; + + Pthings pt = prep(s); + + int up = (minMatch == null ? end : end - minMatch.i); + + if (up < start && end >= start) + { + up = start; + } + + if (skipper == null) + { + for (int i = start; i <= up; i++) + { + charsMatched_ = thePattern.matchAt(s, i, pt); + if (charsMatched_ >= 0) + { + matchFrom_ = thePattern.mfrom; + marks = pt.marks; + gFlagto = matchFrom_ + charsMatched_; + gFlags = s; + return didMatch_ = true; } - BitSet bs = new BitSet(s.length()); - char qc = ' '; - boolean setBit = false; - for(int i=0;i= 0) + { + matchFrom_ = thePattern.mfrom; + marks = pt.marks; + gFlagto = matchFrom_ + charsMatched_; + gFlags = s; + return didMatch_ = true; } + } } - /** Only needed for creating your own extensions of - Regex. This method adds the next Pattern in the chain - of patterns or sets the Pattern if it is the first call. */ - protected void add(Pattern p2) { - if(p == null) - p = p2; - else { - p.add(p2); - p2 = p; + return didMatch_ = false; + } + + /*final boolean _search(LongStringLike s,long start,long end) { + if(gFlag && gFlagto > 0 && s==gFlags) + start = gFlagto; + gFlags = null; + + Pthings pt=prep(s); + + int up = end;//(minMatch == null ? end : end-minMatch.i); + + if(up < start && end >= start) up = start; + + if(skipper == null) { + for(long i=start;i<=up;i++) { + charsMatched_ = thePattern.matchAt(s,i,pt); + if(charsMatched_ >= 0) { + matchFrom_ = thePattern.mfrom; + marks = pt.marks; + gFlagto = matchFrom_+charsMatched_; + return didMatch_=true; + } + } + } else { + pt.no_check = true; + for(long i=start;i<=up;i++) { + i = skipper.find(src,i,up); + if(i<0) { + charsMatched_ = matchFrom_ = -1; + return didMatch_ = false; + } + charsMatched_ = thePattern.matchAt(s,i,pt); + if(charsMatched_ >= 0) { + matchFrom_ = thePattern.mfrom; + marks = pt.marks; + gFlagto = matchFrom_+charsMatched_; + gFlags = s; + return didMatch_=true; + } else { + i = s.adjustIndex(i); + up = s.adjustEnd(i); + } + } + } + return didMatch_=false; + }*/ + + boolean _reverseSearch(String s, int start, int end) + { + return _reverseSearch(new StringWrap(s), start, end); + } + + boolean _reverseSearch(StringLike s, int start, int end) + { + if (gFlag && gFlagto > 0 && s.unwrap() == gFlags.unwrap()) + { + end = gFlagto; + } + gFlags = null; + Pthings pt = prep(s); + for (int i = end; i >= start; i--) + { + charsMatched_ = thePattern.matchAt(s, i, pt); + if (charsMatched_ >= 0) + { + matchFrom_ = thePattern.mfrom; + marks = pt.marks; + gFlagto = matchFrom_ - 1; + gFlags = s; + return didMatch_ = true; + } + } + return didMatch_ = false; + } + + // This routine sets the cbits variable + // of class Pattern. Cbits is true for + // the bit corresponding to a character inside + // a set of quotes. + static StringLike lasts = null; + static BitSet lastbs = null; + static void setCbits(StringLike s, Pthings pt) + { + if (s == lasts) + { + pt.cbits = lastbs; + return; + } + BitSet bs = new BitSet(s.length()); + char qc = ' '; + boolean setBit = false; + for (int i = 0; i < s.length(); i++) + { + if (setBit) + { + bs.set(i); + } + char c = s.charAt(i); + if (!setBit && c == '"') + { + qc = c; + setBit = true; + bs.set(i); + } + else if (!setBit && c == '\'') + { + qc = c; + setBit = true; + bs.set(i); + } + else if (setBit && c == qc) + { + setBit = false; + } + else if (setBit && c == '\\' && i + 1 < s.length()) + { + i++; + if (setBit) + { + bs.set(i); } + } } + pt.cbits = lastbs = bs; + lasts = s; + } - /** You only need to use this method if you are creating - your own extentions to Regex. - compile1 compiles one Pattern element, it can be - over-ridden to allow the Regex compiler to understand - new syntax. See deriv.java for an example. This routine - is the heart of class Regex. Rthings has one integer - member called intValue, it is used to keep track of the number - of ()'s in the Pattern. - @exception com.stevesoft.pat.RegSyntax is thrown when a nonsensensical - pattern is supplied. For example, a pattern beginning - with *. */ - protected void compile1(StrPos sp,Rthings mk) throws RegSyntax { - if(sp.match('[')) { - sp.inc(); - add(matchBracket(sp)); - } else if(sp.match('|')) { - if(or == null) - or = new Or(); - if(p == null) p=new NullPattern(); - or.addOr(p); - p = null; - } else if(sp.incMatch("(?<")) { - patInt i = sp.getPatInt(); - if(i==null) RegSyntaxError.endItAll("No int after (?<"); - add(new Backup(i.intValue())); - if(!sp.match(')')) RegSyntaxError.endItAll("No ) after (?<"); - } else if(sp.incMatch("(?>")) { - patInt i = sp.getPatInt(); - if(i==null) RegSyntaxError.endItAll("No int after (?>"); - add(new Backup(-i.intValue())); - if(!sp.match(')')) RegSyntaxError.endItAll("No ) after (?<"); - } else if(sp.incMatch("(?@")) { - char op = sp.c; - sp.inc(); - char cl = sp.c; - sp.inc(); - if(!sp.match(')')) - RegSyntaxError.endItAll( - "(?@ does not have closing paren"); - add(new Group(op,cl)); - } else if(sp.incMatch("(?#")) { - while(!sp.match(')')) - sp.inc(); - } else if(sp.dontMatch && sp.c == 'w') { - Regex r = new Regex(); - //r._compile("[a-zA-Z0-9_]",mk); - //add(new Goop("\\w",r.thePattern)); - Bracket b = new Bracket(false); - b.addOr(new Range('a','z')); - b.addOr(new Range('A','Z')); - b.addOr(new Range('0','9')); - b.addOr(new oneChar('_')); - add(b); - } else if(sp.dontMatch && sp.c == 'G') { - add(new BackG()); - } else if(sp.dontMatch && sp.c == 's') { - //Regex r = new Regex(); - //r._compile("[ \t\n\r\b]",mk); - //add(new Goop("\\s",r.thePattern)); - Bracket b = new Bracket(false); - b.addOr(new oneChar((char)32)); - b.addOr(new Range((char)8,(char)10)); - b.addOr(new oneChar((char)13)); - add(b); - } else if(sp.dontMatch && sp.c == 'd') { - Regex r = new Regex(); - //r._compile("[0-9]",mk); - //add(new Goop("\\d",r.thePattern)); - Range digit = new Range('0','9'); - digit.printBrackets = true; - add(digit); - } else if(sp.dontMatch && sp.c == 'W') { - Regex r = new Regex(); - //r._compile("[^a-zA-Z0-9_]",mk); - //add(new Goop("\\W",r.thePattern)); - Bracket b = new Bracket(true); - b.addOr(new Range('a','z')); - b.addOr(new Range('A','Z')); - b.addOr(new Range('0','9')); - b.addOr(new oneChar('_')); - add(b); - } else if(sp.dontMatch && sp.c == 'S') { - //Regex r = new Regex(); - //r._compile("[^ \t\n\r\b]",mk); - //add(new Goop("\\S",r.thePattern)); - Bracket b = new Bracket(true); - b.addOr(new oneChar((char)32)); - b.addOr(new Range((char)8,(char)10)); - b.addOr(new oneChar((char)13)); - add(b); - } else if(sp.dontMatch && sp.c == 'D') { - //Regex r = new Regex(); - //r._compile("[^0-9]",mk); - //add(new Goop("\\D",r.thePattern)); - Bracket b = new Bracket(true); - b.addOr(new Range('0','9')); - add(b); - } else if(sp.dontMatch && sp.c == 'B') { - Regex r = new Regex(); - r._compile("(?!"+back_slash+"b)",mk); - add(r.thePattern); - } else if(isOctalString(sp)) { - int d = sp.c - '0'; - sp.inc(); - d = 8*d + sp.c - '0'; - StrPos sp2 = new StrPos(sp); - sp2.inc(); - if(isOctalDigit(sp2,false)) { - sp.inc(); - d = 8*d + sp.c - '0'; - } - add(new oneChar((char)d)); - } else if(sp.dontMatch && sp.c >= '1' && sp.c <= '9') { - int iv = sp.c-'0'; - StrPos s2 = new StrPos(sp); - s2.inc(); - if(!s2.dontMatch && s2.c >= '0' && s2.c <= '9') { - iv = 10*iv+(s2.c-'0'); - sp.inc(); - } - add(new BackMatch(iv)); - } else if(sp.dontMatch && sp.c == 'b') { - add(new Boundary()); - } else if(sp.match('\b')) { - add(new Boundary()); - } else if(sp.match('$')) { - add(new End(true)); - } else if(sp.dontMatch && sp.c == 'Z') { - add(new End(false)); - } else if(sp.match('.')) { - add(new Any()); - } else if(sp.incMatch("(??")) { - StringBuffer sb = new StringBuffer(); - StringBuffer sb2 = new StringBuffer(); - while(!sp.match(')') && !sp.match(':')) { - sb.append(sp.c); - sp.inc(); - } - if(sp.incMatch(":")) { - while(!sp.match(')')) { - sb2.append(sp.c); - sp.inc(); - } - } - String sbs = sb.toString(); - if(validators.get(sbs) instanceof String) { - String pat = (String)validators.get(sbs); - Regex r = newRegex(); - Rthings rth = new Rthings(this); - rth.noBackRefs = true; - r._compile(pat,rth); - add(r.thePattern); - } else { - Custom cm = new Custom(sb.toString()); - if(cm.v != null) { - Validator v2 = cm.v.arg(sb2.toString()); - if(v2 != null) { - v2.argsave = sb2.toString(); - String p = cm.v.pattern; - cm.v = v2; - v2.pattern = p; - } - Regex r = newRegex(); - Rthings rth = new Rthings(this); - rth.noBackRefs = true; - r._compile(cm.v.pattern,rth); - cm.sub = r.thePattern; - cm.sub.add(new CustomEndpoint(cm)); - cm.sub.setParent(cm); - add(cm); - } - } - } else if(sp.match('(')) { - mk.parenLevel++; - Regex r = newRegex(); - // r.or = new Or(); - sp.inc(); - if(sp.incMatch("?:")) { - r.or = new Or(); - } else if(sp.incMatch("?=")) { - r.or = new lookAhead(false); - } else if(sp.incMatch("?!")) { - r.or = new lookAhead(true); - } else if(sp.match('?')) { - sp.inc(); - do { - if(sp.c=='i')mk.ignoreCase = true; - if(sp.c=='Q')mk.dontMatchInQuotes = true; - if(sp.c=='o')mk.optimizeMe = true; - if(sp.c=='g')mk.gFlag = true; - if(sp.c=='s')mk.sFlag = true; - if(sp.c=='m')mk.mFlag = true; - sp.inc(); - } while(!sp.match(')') && !sp.eos); - r = null; - mk.parenLevel--; - if(sp.eos) //throw new RegSyntax - RegSyntaxError.endItAll("Unclosed ()"); - } else { // just ordinary parenthesis - r.or = mk.noBackRefs ? new Or() : new OrMark(mk.val++); - } - if(r != null) add(r._compile(sp,mk)); - } else if(sp.match('^')) { - add(new Start(true)); - } else if(sp.dontMatch && sp.c=='A') { - add(new Start(false)); - } else if(sp.match('*')) { - addMulti(new patInt(0),new patInf()); - } else if(sp.match('+')) { - addMulti(new patInt(1),new patInf()); - } else if(sp.match('?')) { - addMulti(new patInt(0),new patInt(1)); - } else if(sp.match('{')) { - boolean bad = false; - StrPos sp2 = new StrPos(sp); - StringBuffer sb = new StringBuffer(); - sp.inc(); - patInt i1 = sp.getPatInt(); - patInt i2 = null; - if(sp.match('}')) { - i2 = i1; - } else { - if(!sp.match(','))/* - RegSyntaxError.endItAll( - "String \"{"+i2+ - "\" should be followed with , or }");*/ - bad = true; - sp.inc(); - if(sp.match('}')) - i2 = new patInf(); - else - i2 = sp.getPatInt(); - } - if(i1 == null || i2 == null) /* - throw new RegSyntax("Badly formatted Multi: " - +"{"+i1+","+i2+"}"); */ bad = true; - if(bad) { - sp.dup(sp2); - add(new oneChar(sp.c)); - } else - addMulti(i1,i2); - } else if(sp.escMatch('x') && next2Hex(sp)) { - sp.inc(); - int d = getHexDigit(sp); - sp.inc(); - d = 16*d + getHexDigit(sp); - add(new oneChar((char)d)); - } else if(sp.escMatch('c')) { - sp.inc(); - if(sp.c < Ctrl.cmap.length) - add(new oneChar(Ctrl.cmap[sp.c])); - else - add(new oneChar(sp.c)); - } else if(sp.escMatch('f')) { - add(new oneChar((char)12)); - } else if(sp.escMatch('a')) { - add(new oneChar((char)7)); - } else if(sp.escMatch('t')) { - add(new oneChar('\t')); - } else if(sp.escMatch('n')) { - add(new oneChar('\n')); - } else if(sp.escMatch('r')) { - add(new oneChar('\r')); - } else if(sp.escMatch('b')) { - add(new oneChar('\b')); - } else if(sp.escMatch('e')) { - add(new oneChar((char)27)); - } else { - add(new oneChar(sp.c)); - if(sp.match(')')) - RegSyntaxError.endItAll("Unmatched right paren in pattern"); - } + // Wanted user to over-ride this in alpha version, + // but it wasn't really necessary because of this trick: + Regex newRegex() + { + try + { + return (Regex) getClass().newInstance(); + } + catch (InstantiationException ie) + { + return null; + } + catch (IllegalAccessException iae) + { + return null; } + } - // compiles all Pattern elements, internal method - private Pattern _compile(String pat,Rthings mk) throws RegSyntax { - minMatch = null; - sFlag = mFlag = ignoreCase = gFlag = false; - StrPos sp = new StrPos(pat,0); - thePattern = _compile(sp,mk); - pt.marks = null; - return thePattern; + /** Only needed for creating your own extensions of + Regex. This method adds the next Pattern in the chain + of patterns or sets the Pattern if it is the first call. */ + protected void add(Pattern p2) + { + if (p == null) + { + p = p2; + } + else + { + p.add(p2); + p2 = p; } + } - Pattern p = null; - Or or = null; - Pattern _compile(StrPos sp,Rthings mk) throws RegSyntax { - while(!(sp.eos || (or != null && sp.match(')')) )) { - compile1(sp,mk); - sp.inc(); + /** You only need to use this method if you are creating + your own extentions to Regex. + compile1 compiles one Pattern element, it can be + over-ridden to allow the Regex compiler to understand + new syntax. See deriv.java for an example. This routine + is the heart of class Regex. Rthings has one integer + member called intValue, it is used to keep track of the number + of ()'s in the Pattern. + @exception com.stevesoft.pat.RegSyntax is thrown when a nonsensensical + pattern is supplied. For example, a pattern beginning + with *. */ + protected void compile1(StrPos sp, Rthings mk) + throws RegSyntax + { + if (sp.match('[')) + { + sp.inc(); + add(matchBracket(sp)); + } + else if (sp.match('|')) + { + if (or == null) + { + or = new Or(); + } + if (p == null) + { + p = new NullPattern(); + } + or.addOr(p); + p = null; + } + else if (sp.incMatch("(?<")) + { + patInt i = sp.getPatInt(); + if (i == null) + { + RegSyntaxError.endItAll("No int after (?<"); + } + add(new Backup(i.intValue())); + if (!sp.match(')')) + { + RegSyntaxError.endItAll("No ) after (?<"); + } + } + else if (sp.incMatch("(?>")) + { + patInt i = sp.getPatInt(); + if (i == null) + { + RegSyntaxError.endItAll("No int after (?>"); + } + add(new Backup( -i.intValue())); + if (!sp.match(')')) + { + RegSyntaxError.endItAll("No ) after (?<"); + } + } + else if (sp.incMatch("(?@")) + { + char op = sp.c; + sp.inc(); + char cl = sp.c; + sp.inc(); + if (!sp.match(')')) + { + RegSyntaxError.endItAll( + "(?@ does not have closing paren"); + } + add(new Group(op, cl)); + } + else if (sp.incMatch("(?#")) + { + while (!sp.match(')')) + { + sp.inc(); + } + } + else if (sp.dontMatch && sp.c == 'w') + { + //Regex r = new Regex(); + //r._compile("[a-zA-Z0-9_]",mk); + //add(new Goop("\\w",r.thePattern)); + Bracket b = new Bracket(false); + b.addOr(new Range('a', 'z')); + b.addOr(new Range('A', 'Z')); + b.addOr(new Range('0', '9')); + b.addOr(new oneChar('_')); + add(b); + } + else if (sp.dontMatch && sp.c == 'G') + { + add(new BackG()); + } + else if (sp.dontMatch && sp.c == 's') + { + //Regex r = new Regex(); + //r._compile("[ \t\n\r\b]",mk); + //add(new Goop("\\s",r.thePattern)); + Bracket b = new Bracket(false); + b.addOr(new oneChar( (char) 32)); + b.addOr(new Range( (char) 8, (char) 10)); + b.addOr(new oneChar( (char) 13)); + add(b); + } + else if (sp.dontMatch && sp.c == 'd') + { + //Regex r = new Regex(); + //r._compile("[0-9]",mk); + //add(new Goop("\\d",r.thePattern)); + Range digit = new Range('0', '9'); + digit.printBrackets = true; + add(digit); + } + else if (sp.dontMatch && sp.c == 'W') + { + //Regex r = new Regex(); + //r._compile("[^a-zA-Z0-9_]",mk); + //add(new Goop("\\W",r.thePattern)); + Bracket b = new Bracket(true); + b.addOr(new Range('a', 'z')); + b.addOr(new Range('A', 'Z')); + b.addOr(new Range('0', '9')); + b.addOr(new oneChar('_')); + add(b); + } + else if (sp.dontMatch && sp.c == 'S') + { + //Regex r = new Regex(); + //r._compile("[^ \t\n\r\b]",mk); + //add(new Goop("\\S",r.thePattern)); + Bracket b = new Bracket(true); + b.addOr(new oneChar( (char) 32)); + b.addOr(new Range( (char) 8, (char) 10)); + b.addOr(new oneChar( (char) 13)); + add(b); + } + else if (sp.dontMatch && sp.c == 'D') + { + //Regex r = new Regex(); + //r._compile("[^0-9]",mk); + //add(new Goop("\\D",r.thePattern)); + Bracket b = new Bracket(true); + b.addOr(new Range('0', '9')); + add(b); + } + else if (sp.dontMatch && sp.c == 'B') + { + Regex r = new Regex(); + r._compile("(?!" + back_slash + "b)", mk); + add(r.thePattern); + } + else if (isOctalString(sp)) + { + int d = sp.c - '0'; + sp.inc(); + d = 8 * d + sp.c - '0'; + StrPos sp2 = new StrPos(sp); + sp2.inc(); + if (isOctalDigit(sp2, false)) + { + sp.inc(); + d = 8 * d + sp.c - '0'; + } + add(new oneChar( (char) d)); + } + else if (sp.dontMatch && sp.c >= '1' && sp.c <= '9') + { + int iv = sp.c - '0'; + StrPos s2 = new StrPos(sp); + s2.inc(); + if (!s2.dontMatch && s2.c >= '0' && s2.c <= '9') + { + iv = 10 * iv + (s2.c - '0'); + sp.inc(); + } + add(new BackMatch(iv)); + } + else if (sp.dontMatch && sp.c == 'b') + { + add(new Boundary()); + } + else if (sp.match('\b')) + { + add(new Boundary()); + } + else if (sp.match('$')) + { + add(new End(true)); + } + else if (sp.dontMatch && sp.c == 'Z') + { + add(new End(false)); + } + else if (sp.match('.')) + { + add(new Any()); + } + else if (sp.incMatch("(??")) + { + StringBuffer sb = new StringBuffer(); + StringBuffer sb2 = new StringBuffer(); + while (!sp.match(')') && !sp.match(':')) + { + sb.append(sp.c); + sp.inc(); + } + if (sp.incMatch(":")) + { + while (!sp.match(')')) + { + sb2.append(sp.c); + sp.inc(); + } + } + String sbs = sb.toString(); + if (validators.get(sbs) instanceof String) + { + String pat = (String) validators.get(sbs); + Regex r = newRegex(); + Rthings rth = new Rthings(this); + rth.noBackRefs = true; + r._compile(pat, rth); + add(r.thePattern); + } + else + { + Custom cm = new Custom(sb.toString()); + if (cm.v != null) + { + Validator v2 = cm.v.arg(sb2.toString()); + if (v2 != null) + { + v2.argsave = sb2.toString(); + String p = cm.v.pattern; + cm.v = v2; + v2.pattern = p; + } + Regex r = newRegex(); + Rthings rth = new Rthings(this); + rth.noBackRefs = true; + r._compile(cm.v.pattern, rth); + cm.sub = r.thePattern; + cm.sub.add(new CustomEndpoint(cm)); + cm.sub.setParent(cm); + add(cm); + } + } + } + else if (sp.match('(')) + { + mk.parenLevel++; + Regex r = newRegex(); + // r.or = new Or(); + sp.inc(); + if (sp.incMatch("?:")) + { + r.or = new Or(); + } + else if (sp.incMatch("?=")) + { + r.or = new lookAhead(false); + } + else if (sp.incMatch("?!")) + { + r.or = new lookAhead(true); + } + else if (sp.match('?')) + { + sp.inc(); + do + { + if (sp.c == 'i') + { + mk.ignoreCase = true; + } + if (sp.c == 'Q') + { + mk.dontMatchInQuotes = true; + } + if (sp.c == 'o') + { + mk.optimizeMe = true; + } + if (sp.c == 'g') + { + mk.gFlag = true; + } + if (sp.c == 's') + { + mk.sFlag = true; + } + if (sp.c == 'm') + { + mk.mFlag = true; + } + sp.inc(); + } + while (!sp.match(')') && !sp.eos); + r = null; + mk.parenLevel--; + if (sp.eos) //throw new RegSyntax + { + RegSyntaxError.endItAll("Unclosed ()"); + } + } + else + { // just ordinary parenthesis + r.or = mk.noBackRefs ? new Or() : new OrMark(mk.val++); + } + if (r != null) + { + add(r._compile(sp, mk)); + } + } + else if (sp.match('^')) + { + add(new Start(true)); + } + else if (sp.dontMatch && sp.c == 'A') + { + add(new Start(false)); + } + else if (sp.match('*')) + { + addMulti(new patInt(0), new patInf()); + } + else if (sp.match('+')) + { + addMulti(new patInt(1), new patInf()); + } + else if (sp.match('?')) + { + addMulti(new patInt(0), new patInt(1)); + } + else if (sp.match('{')) + { + boolean bad = false; + StrPos sp2 = new StrPos(sp); + //StringBuffer sb = new StringBuffer(); + sp.inc(); + patInt i1 = sp.getPatInt(); + patInt i2 = null; + if (sp.match('}')) + { + i2 = i1; + } + else + { + if (!sp.match(',')) + { + /* + RegSyntaxError.endItAll( + "String \"{"+i2+ + "\" should be followed with , or }"); + */ + bad = true; } - if(sp.match(')')) mk.parenLevel--; - else if(sp.eos && mk.parenLevel != 0) { - RegSyntaxError.endItAll("Unclosed Parenthesis! lvl="+mk.parenLevel); - } if(or != null) { - if(p == null) p = new NullPattern(); - or.addOr(p); - return or; + sp.inc(); + if (sp.match('}')) + { + i2 = new patInf(); } - return p==null ? new NullPattern() : p; + else + { + i2 = sp.getPatInt(); + } + } + if (i1 == null || i2 == null) + { + /* + throw new RegSyntax("Badly formatted Multi: " + +"{"+i1+","+i2+"}"); */ + bad = true; + } + if (bad) + { + sp.dup(sp2); + add(new oneChar(sp.c)); + } + else + { + addMulti(i1, i2); + } + } + else if (sp.escMatch('x') && next2Hex(sp)) + { + sp.inc(); + int d = getHexDigit(sp); + sp.inc(); + d = 16 * d + getHexDigit(sp); + add(new oneChar( (char) d)); + } + else if (sp.escMatch('c')) + { + sp.inc(); + if (sp.c < Ctrl.cmap.length) + { + add(new oneChar(Ctrl.cmap[sp.c])); + } + else + { + add(new oneChar(sp.c)); + } + } + else if (sp.escMatch('f')) + { + add(new oneChar( (char) 12)); + } + else if (sp.escMatch('a')) + { + add(new oneChar( (char) 7)); + } + else if (sp.escMatch('t')) + { + add(new oneChar('\t')); + } + else if (sp.escMatch('n')) + { + add(new oneChar('\n')); + } + else if (sp.escMatch('r')) + { + add(new oneChar('\r')); + } + else if (sp.escMatch('b')) + { + add(new oneChar('\b')); + } + else if (sp.escMatch('e')) + { + add(new oneChar( (char) 27)); + } + else + { + add(new oneChar(sp.c)); + if (sp.match(')')) + { + RegSyntaxError.endItAll("Unmatched right paren in pattern"); + } } + } - // add a multi object to the end of the chain - // which applies to the last object - void addMulti(patInt i1,patInt i2) throws RegSyntax { - Pattern last,last2; - for(last = p;last != null && last.next != null;last=last.next) - ; - if(last == null || last == p) - last2 = null; - else - for(last2 = p;last2.next != last;last2=last2.next) - ; - if(last instanceof Multi && i1.intValue()==0 && - i2.intValue()==1) - ((Multi)last).matchFewest = true; - else if(last instanceof FastMulti && i1.intValue()==0 && - i2.intValue()==1) - ((FastMulti)last).matchFewest = true; - else if(last instanceof DotMulti && i1.intValue()==0 && - i2.intValue()==1) - ((DotMulti)last).matchFewest = true; - else if(last instanceof Multi - || last instanceof DotMulti - || last instanceof FastMulti) - throw new RegSyntax("Syntax error."); - else if(last2 == null) - p = mkMulti(i1,i2,p); + // compiles all Pattern elements, internal method + private Pattern _compile(String pat, Rthings mk) + throws RegSyntax + { + minMatch = null; + sFlag = mFlag = ignoreCase = gFlag = false; + StrPos sp = new StrPos(pat, 0); + thePattern = _compile(sp, mk); + pt.marks = null; + return thePattern; + } + + Pattern p = null; + Or or = null; + Pattern _compile(StrPos sp, Rthings mk) + throws RegSyntax + { + while (! (sp.eos || (or != null && sp.match(')')))) + { + compile1(sp, mk); + sp.inc(); + } + if (sp.match(')')) + { + mk.parenLevel--; + } + else if (sp.eos && mk.parenLevel != 0) + { + RegSyntaxError.endItAll("Unclosed Parenthesis! lvl=" + mk.parenLevel); + } + if (or != null) + { + if (p == null) + { + p = new NullPattern(); + } + or.addOr(p); + return or; + } + return p == null ? new NullPattern() : p; + } + + // add a multi object to the end of the chain + // which applies to the last object + void addMulti(patInt i1, patInt i2) + throws RegSyntax + { + Pattern last, last2; + for (last = p; last != null && last.next != null; last = last.next) + { + ; + } + if (last == null || last == p) + { + last2 = null; + } + else + { + for (last2 = p; last2.next != last; last2 = last2.next) + { + ; + } + } + if (last instanceof Multi && i1.intValue() == 0 && + i2.intValue() == 1) + { + ( (Multi) last).matchFewest = true; + } + else if (last instanceof FastMulti && i1.intValue() == 0 && + i2.intValue() == 1) + { + ( (FastMulti) last).matchFewest = true; + } + else if (last instanceof DotMulti && i1.intValue() == 0 && + i2.intValue() == 1) + { + ( (DotMulti) last).matchFewest = true; + } + else if (last instanceof Multi + || last instanceof DotMulti + || last instanceof FastMulti) + { + throw new RegSyntax("Syntax error."); + } + else if (last2 == null) + { + p = mkMulti(i1, i2, p); + } + else + { + last2.next = mkMulti(i1, i2, last); + } + } + + final static Pattern mkMulti(patInt lo, patInt hi, Pattern p) + throws RegSyntax + { + if (p instanceof Any && p.next == null) + { + return (Pattern)new DotMulti(lo, hi); + } + return RegOpt.safe4fm(p) ? (Pattern)new FastMulti(lo, hi, p) : + (Pattern)new Multi(lo, hi, p); + } + + // process the bracket operator + Pattern matchBracket(StrPos sp) + throws RegSyntax + { + Bracket ret; + if (sp.match('^')) + { + ret = new Bracket(true); + sp.inc(); + } + else + { + ret = new Bracket(false); + } + if (sp.match(']')) + { + //throw new RegSyntax + RegSyntaxError.endItAll("Unmatched []"); + } + + while (!sp.eos && !sp.match(']')) + { + StrPos s1 = new StrPos(sp); + s1.inc(); + StrPos s1_ = new StrPos(s1); + s1_.inc(); + if (s1.match('-') && !s1_.match(']')) + { + StrPos s2 = new StrPos(s1); + s2.inc(); + if (!s2.eos) + { + ret.addOr(new Range(sp.c, s2.c)); + } + sp.inc(); + sp.inc(); + } + else if (sp.escMatch('Q')) + { + sp.inc(); + while (!sp.escMatch('E')) + { + ret.addOr(new oneChar(sp.c)); + sp.inc(); + } + } + else if (sp.escMatch('d')) + { + ret.addOr(new Range('0', '9')); + } + else if (sp.escMatch('s')) + { + ret.addOr(new oneChar( (char) 32)); + ret.addOr(new Range( (char) 8, (char) 10)); + ret.addOr(new oneChar( (char) 13)); + } + else if (sp.escMatch('w')) + { + ret.addOr(new Range('a', 'z')); + ret.addOr(new Range('A', 'Z')); + ret.addOr(new Range('0', '9')); + ret.addOr(new oneChar('_')); + } + else if (sp.escMatch('D')) + { + ret.addOr(new Range( (char) 0, (char) 47)); + ret.addOr(new Range( (char) 58, (char) 65535)); + } + else if (sp.escMatch('S')) + { + ret.addOr(new Range( (char) 0, (char) 7)); + ret.addOr(new Range( (char) 11, (char) 12)); + ret.addOr(new Range( (char) 14, (char) 31)); + ret.addOr(new Range( (char) 33, (char) 65535)); + } + else if (sp.escMatch('W')) + { + ret.addOr(new Range( (char) 0, (char) 64)); + ret.addOr(new Range( (char) 91, (char) 94)); + ret.addOr(new oneChar( (char) 96)); + ret.addOr(new Range( (char) 123, (char) 65535)); + } + else if (sp.escMatch('x') && next2Hex(sp)) + { + sp.inc(); + int d = getHexDigit(sp); + sp.inc(); + d = 16 * d + getHexDigit(sp); + ret.addOr(new oneChar( (char) d)); + } + else if (sp.escMatch('a')) + { + ret.addOr(new oneChar( (char) 7)); + } + else if (sp.escMatch('f')) + { + ret.addOr(new oneChar( (char) 12)); + } + else if (sp.escMatch('e')) + { + ret.addOr(new oneChar( (char) 27)); + } + else if (sp.escMatch('n')) + { + ret.addOr(new oneChar('\n')); + } + else if (sp.escMatch('t')) + { + ret.addOr(new oneChar('\t')); + } + else if (sp.escMatch('r')) + { + ret.addOr(new oneChar('\r')); + } + else if (sp.escMatch('c')) + { + sp.inc(); + if (sp.c < Ctrl.cmap.length) + { + ret.addOr(new oneChar(Ctrl.cmap[sp.c])); + } else - last2.next = mkMulti(i1,i2,last); - } - final static Pattern mkMulti(patInt lo,patInt hi,Pattern p) throws RegSyntax { - if(p instanceof Any && p.next == null) - return (Pattern)new DotMulti(lo,hi); - return RegOpt.safe4fm(p) ? (Pattern)new FastMulti(lo,hi,p) : - (Pattern)new Multi(lo,hi,p); - } - // process the bracket operator - Pattern matchBracket(StrPos sp) throws RegSyntax { - Bracket ret; - if(sp.match('^')) { - ret = new Bracket(true); - sp.inc(); - } else - ret = new Bracket(false); - if(sp.match(']')) - //throw new RegSyntax - RegSyntaxError.endItAll("Unmatched []"); - - while(!sp.eos && !sp.match(']')) { - StrPos s1 = new StrPos(sp); - s1.inc(); - StrPos s1_ = new StrPos(s1); - s1_.inc(); - if(s1.match('-') && !s1_.match(']')) { - StrPos s2 = new StrPos(s1); - s2.inc(); - if(!s2.eos) - ret.addOr(new Range(sp.c,s2.c)); - sp.inc(); - sp.inc(); - } else if(sp.escMatch('Q')) { - sp.inc(); - while(!sp.escMatch('E')) { - ret.addOr(new oneChar(sp.c)); - sp.inc(); - } - } else if(sp.escMatch('d')) { - ret.addOr(new Range('0','9')); - } else if(sp.escMatch('s')) { - ret.addOr(new oneChar((char)32)); - ret.addOr(new Range((char)8,(char)10)); - ret.addOr(new oneChar((char)13)); - } else if(sp.escMatch('w')) { - ret.addOr(new Range('a','z')); - ret.addOr(new Range('A','Z')); - ret.addOr(new Range('0','9')); - ret.addOr(new oneChar('_')); - } else if(sp.escMatch('D')) { - ret.addOr(new Range((char)0,(char)47)); - ret.addOr(new Range((char)58,(char)65535)); - } else if(sp.escMatch('S')) { - ret.addOr(new Range((char)0,(char)7)); - ret.addOr(new Range((char)11,(char)12)); - ret.addOr(new Range((char)14,(char)31)); - ret.addOr(new Range((char)33,(char)65535)); - } else if(sp.escMatch('W')) { - ret.addOr(new Range((char)0,(char)64)); - ret.addOr(new Range((char)91,(char)94)); - ret.addOr(new oneChar((char)96)); - ret.addOr(new Range((char)123,(char)65535)); - } else if(sp.escMatch('x') && next2Hex(sp)) { - sp.inc(); - int d = getHexDigit(sp); - sp.inc(); - d = 16*d + getHexDigit(sp); - ret.addOr(new oneChar((char)d)); - } else if(sp.escMatch('a')) { - ret.addOr(new oneChar((char)7)); - } else if(sp.escMatch('f')) { - ret.addOr(new oneChar((char)12)); - } else if(sp.escMatch('e')) { - ret.addOr(new oneChar((char)27)); - } else if(sp.escMatch('n')) { - ret.addOr(new oneChar('\n')); - } else if(sp.escMatch('t')) { - ret.addOr(new oneChar('\t')); - } else if(sp.escMatch('r')) { - ret.addOr(new oneChar('\r')); - } else if(sp.escMatch('c')) { - sp.inc(); - if(sp.c < Ctrl.cmap.length) - ret.addOr(new oneChar(Ctrl.cmap[sp.c])); - else - ret.addOr(new oneChar(sp.c)); - } else if(isOctalString(sp)) { - int d = sp.c - '0'; - sp.inc(); - d = 8*d + sp.c - '0'; - StrPos sp2 = new StrPos(sp); - sp2.inc(); - if(isOctalDigit(sp2,false)) { - sp.inc(); - d = 8*d + sp.c - '0'; - } - ret.addOr(new oneChar((char)d)); - } else - ret.addOr(new oneChar(sp.c)); - sp.inc(); + { + ret.addOr(new oneChar(sp.c)); + } + } + else if (isOctalString(sp)) + { + int d = sp.c - '0'; + sp.inc(); + d = 8 * d + sp.c - '0'; + StrPos sp2 = new StrPos(sp); + sp2.inc(); + if (isOctalDigit(sp2, false)) + { + sp.inc(); + d = 8 * d + sp.c - '0'; + } + ret.addOr(new oneChar( (char) d)); + } + else + { + ret.addOr(new oneChar(sp.c)); + } + sp.inc(); + } + return ret; + } + + /** Converts the stored Pattern to a String -- this is a + decompile. Note that \t and \n will really print out here, + Not just the two character representations. + Also be prepared to see some strange output if your characters + are not printable. */ + public String toString() + { + if (false && thePattern == null) + { + return ""; + } + else + { + StringBuffer sb = new StringBuffer(); + if (esc != Pattern.ESC) + { + sb.append("(?e="); + sb.append(esc); + sb.append(")"); + } + if (gFlag + || mFlag + || !dotDoesntMatchCR + || sFlag + || ignoreCase + || dontMatchInQuotes + || optimized()) + { + sb.append("(?"); + if (ignoreCase) + { + sb.append("i"); } - return ret; - } - - /** Converts the stored Pattern to a String -- this is a - decompile. Note that \t and \n will really print out here, - Not just the two character representations. - Also be prepared to see some strange output if your characters - are not printable. */ - public String toString() { - if( false && thePattern == null ) - return ""; - else { - StringBuffer sb = new StringBuffer(); - if(esc != Pattern.ESC) { - sb.append("(?e="); - sb.append(esc); - sb.append(")"); - } - if(gFlag - ||mFlag - ||!dotDoesntMatchCR - ||sFlag - ||ignoreCase - ||dontMatchInQuotes - ||optimized()) { - sb.append("(?"); - if(ignoreCase)sb.append("i"); - if(mFlag)sb.append("m"); - if(sFlag||!dotDoesntMatchCR)sb.append("s"); - if(dontMatchInQuotes)sb.append("Q"); - if(optimized())sb.append("o"); - if(gFlag)sb.append("g"); - sb.append(")"); - } - String patstr = thePattern.toString(); - if(esc != Pattern.ESC) - patstr = reEscape(patstr,Pattern.ESC,esc); - sb.append(patstr); - return sb.toString(); + if (mFlag) + { + sb.append("m"); } + if (sFlag || !dotDoesntMatchCR) + { + sb.append("s"); + } + if (dontMatchInQuotes) + { + sb.append("Q"); + } + if (optimized()) + { + sb.append("o"); + } + if (gFlag) + { + sb.append("g"); + } + sb.append(")"); + } + String patstr = thePattern.toString(); + if (esc != Pattern.ESC) + { + patstr = reEscape(patstr, Pattern.ESC, esc); + } + sb.append(patstr); + return sb.toString(); } - // Re-escape Pattern, allows us to use a different escape - // character. - static String reEscape(String s,char oldEsc,char newEsc) { - if(oldEsc == newEsc) return s; - int i; - StringBuffer sb = new StringBuffer(); - for(i=0;iThis method will attempt to rewrite - your pattern in a way that makes it faster (not all patterns - execute at the same speed). In general, "(?: ... )" will be - faster than "( ... )" so if you don't need the backreference, - you should group using the former pattern.

It will also - introduce new pattern elements that you can't get to otherwise, - for example if you have a large table of strings, i.e. the - months of the year "(January|February|...)" optimize() will make - a Hashtable that takes it to the next appropriate pattern - element -- eliminating the need for a linear search. - @see com.stevesoft.pat.Regex#optimized - @see com.stevesoft.pat.Regex#ignoreCase - @see com.stevesoft.pat.Regex#dontMatchInQuotes - @see com.stevesoft.pat.Regex#matchAt - @see com.stevesoft.pat.Regex#search - */ - public void optimize() { - if(optimized()||thePattern==null) return; - minMatch = new patInt(0);//thePattern.countMinChars(); - thePattern = RegOpt.opt(thePattern,ignoreCase, - dontMatchInQuotes); - skipper = Skip.findSkip(this); - //RegOpt.setParents(this); - return; - } - Skip skipper; - /** This function returns true if the optimize method has - been called. */ - public boolean optimized() { - return minMatch != null; - } - - /** A bit of syntactic surgar for those who want to make - their code look more perl-like. To use this initialize - your Regex object by saying: -

-        Regex r1 = Regex.perlCode("s/hello/goodbye/");
-        Regex r2 = Regex.perlCode("s'fish'frog'i");
-        Regex r3 = Regex.perlCode("m'hello');
-        
- The i for ignoreCase is supported in - this syntax, as well as m, s, and x. The g flat - is a bit of a special case.

- If you wish to replace all occurences of a pattern, you - do not put a 'g' in the perlCode, but call Regex's - replaceAll method.

- If you wish to simply - and only do a search for r2's pattern, you can do this - by calling the searchFrom method method repeatedly, or - by calling search repeatedly if the g flag is set. -

- Note: Currently perlCode does not - support the (?e=#) syntax for - changing the escape character. - */ - - public static Regex perlCode(String s) { - // this file is big enough, see parsePerl.java - // for this function. - return parsePerl.parse(s); - } - static final char back_slash = '\\'; - - /** Checks to see if there are only literal and no special - pattern elements in this Regex. */ - public boolean isLiteral() { - Pattern x = thePattern; - while(x != null) { - if(x instanceof oneChar) - ; - else if(x instanceof Skipped) - ; - else - return false; - x = x.next; + else + { + sb.append(newEsc); + sb.append(s.charAt(i + 1)); } - return true; + i++; + } + else if (s.charAt(i) == newEsc) + { + sb.append(newEsc); + sb.append(newEsc); + } + else + { + sb.append(s.charAt(i)); + } + } + return sb.toString(); + } + + /** This method implements FilenameFilter, allowing one + to use a Regex to search through a directory using File.list. + There is a FileRegex now that does this better. + @see com.stevesoft.pat.FileRegex + */ + public boolean accept(File dir, String s) + { + return search(s); + } + + /** The version of this package */ + final static public String version() + { + return "lgpl release 1.5.3"; + } + + /** Once this method is called, the state of variables + ignoreCase and dontMatchInQuotes should not be changed as the + results will be unpredictable. However, + search and matchAt will run more quickly. Note that you + can check to see if the pattern has been optimized by calling + the optimized() method.

This method will attempt to rewrite + your pattern in a way that makes it faster (not all patterns + execute at the same speed). In general, "(?: ... )" will be + faster than "( ... )" so if you don't need the backreference, + you should group using the former pattern.

It will also + introduce new pattern elements that you can't get to otherwise, + for example if you have a large table of strings, i.e. the + months of the year "(January|February|...)" optimize() will make + a Hashtable that takes it to the next appropriate pattern + element -- eliminating the need for a linear search. + @see com.stevesoft.pat.Regex#optimized + @see com.stevesoft.pat.Regex#ignoreCase + @see com.stevesoft.pat.Regex#dontMatchInQuotes + @see com.stevesoft.pat.Regex#matchAt + @see com.stevesoft.pat.Regex#search + */ + public void optimize() + { + if (optimized() || thePattern == null) + { + return; + } + minMatch = new patInt(0); //thePattern.countMinChars(); + thePattern = RegOpt.opt(thePattern, ignoreCase, + dontMatchInQuotes); + skipper = Skip.findSkip(this); + //RegOpt.setParents(this); + return; + } + + Skip skipper; + /** This function returns true if the optimize method has + been called. */ + public boolean optimized() + { + return minMatch != null; + } + + /** A bit of syntactic surgar for those who want to make + their code look more perl-like. To use this initialize + your Regex object by saying: +

+      Regex r1 = Regex.perlCode("s/hello/goodbye/");
+      Regex r2 = Regex.perlCode("s'fish'frog'i");
+      Regex r3 = Regex.perlCode("m'hello');
+      
+ The i for ignoreCase is supported in + this syntax, as well as m, s, and x. The g flat + is a bit of a special case.

+ If you wish to replace all occurences of a pattern, you + do not put a 'g' in the perlCode, but call Regex's + replaceAll method.

+ If you wish to simply + and only do a search for r2's pattern, you can do this + by calling the searchFrom method method repeatedly, or + by calling search repeatedly if the g flag is set. +

+ Note: Currently perlCode does not + support the (?e=#) syntax for + changing the escape character. + */ + + public static Regex perlCode(String s) + { + // this file is big enough, see parsePerl.java + // for this function. + return parsePerl.parse(s); + } + + static final char back_slash = '\\'; + + /** Checks to see if there are only literal and no special + pattern elements in this Regex. */ + public boolean isLiteral() + { + Pattern x = thePattern; + while (x != null) + { + if (x instanceof oneChar) + { + ; + } + else if (x instanceof Skipped) + { + ; + } + else + { + return false; + } + x = x.next; } + return true; + } - /** You only need to know about this if you are inventing - your own pattern elements. */ - public patInt countMinChars() { return thePattern.countMinChars(); } - /** You only need to know about this if you are inventing - your own pattern elements. */ - public patInt countMaxChars() { return thePattern.countMaxChars(); } + /** You only need to know about this if you are inventing + your own pattern elements. */ + public patInt countMinChars() + { + return thePattern.countMinChars(); + } + + /** You only need to know about this if you are inventing + your own pattern elements. */ + public patInt countMaxChars() + { + return thePattern.countMaxChars(); + } - boolean isHexDigit(StrPos sp) { - boolean r = + boolean isHexDigit(StrPos sp) + { + boolean r = !sp.eos && !sp.dontMatch - && ((sp.c>='0'&&sp.c<='9') - ||(sp.c>='a'&&sp.c<='f') - ||(sp.c>='A'&&sp.c<='F')); - return r; - } - boolean isOctalDigit(StrPos sp,boolean first) { - boolean r = - !sp.eos && !(first^sp.dontMatch) - && sp.c>='0'&&sp.c<='7'; - return r; - } - int getHexDigit(StrPos sp) { - if(sp.c >= '0' && sp.c <= '9') - return sp.c - '0'; - if(sp.c >= 'a' && sp.c <= 'f') - return sp.c - 'a' + 10; - return sp.c - 'A' + 10; - } - boolean next2Hex(StrPos sp) { - StrPos sp2 = new StrPos(sp); - sp2.inc(); - if(!isHexDigit(sp2)) - return false; - sp2.inc(); - if(!isHexDigit(sp2)) - return false; - return true; + && ( (sp.c >= '0' && sp.c <= '9') + || (sp.c >= 'a' && sp.c <= 'f') + || (sp.c >= 'A' && sp.c <= 'F')); + return r; + } + + boolean isOctalDigit(StrPos sp, boolean first) + { + boolean r = + !sp.eos && ! (first ^ sp.dontMatch) + && sp.c >= '0' && sp.c <= '7'; + return r; + } + + int getHexDigit(StrPos sp) + { + if (sp.c >= '0' && sp.c <= '9') + { + return sp.c - '0'; } - boolean isOctalString(StrPos sp) { - if(!isOctalDigit(sp,true)) - return false; - StrPos sp2 = new StrPos(sp); - sp2.inc(); - if(!isOctalDigit(sp2,false)) - return false; - return true; + if (sp.c >= 'a' && sp.c <= 'f') + { + return sp.c - 'a' + 10; } + return sp.c - 'A' + 10; + } + + boolean next2Hex(StrPos sp) + { + StrPos sp2 = new StrPos(sp); + sp2.inc(); + if (!isHexDigit(sp2)) + { + return false; + } + sp2.inc(); + if (!isHexDigit(sp2)) + { + return false; + } + return true; + } + + boolean isOctalString(StrPos sp) + { + if (!isOctalDigit(sp, true)) + { + return false; + } + StrPos sp2 = new StrPos(sp); + sp2.inc(); + if (!isOctalDigit(sp2, false)) + { + return false; + } + return true; + } }