2 // This software is now distributed according to
\r
3 // the Lesser Gnu Public License. Please see
\r
4 // http://www.gnu.org/copyleft/lesser.txt for
\r
6 // -- Happy Computing!
\r
8 package com.stevesoft.pat;
\r
11 import com.stevesoft.pat.wrap.StringWrap;
\r
14 /** Matches a Unicode punctuation character. */
\r
15 class UnicodePunct extends UniValidator {
\r
16 public int validate(StringLike s,int from,int to) {
\r
17 return from<s.length() && Prop.isPunct(s.charAt(from)) ? to : -1;
\r
21 /** Matches a Unicode white space character. */
\r
22 class UnicodeWhite extends UniValidator {
\r
23 public int validate(StringLike s,int from,int to) {
\r
24 return from<s.length() && Prop.isWhite(s.charAt(from)) ? to : -1;
\r
28 /** Matches a character that is not a Unicode punctuation
\r
31 class NUnicodePunct extends UniValidator {
\r
32 public int validate(StringLike s,int from,int to) {
\r
33 return from<s.length() && !Prop.isPunct(s.charAt(from)) ? to : -1;
\r
37 /** Matches a character that is not a
\r
38 * Unicode white space character.
\r
40 class NUnicodeWhite extends UniValidator {
\r
41 public int validate(StringLike s,int from,int to) {
\r
42 return from<s.length() && !Prop.isWhite(s.charAt(from)) ? to : -1;
\r
46 /** Matches a Unicode word character: an alphanumeric or underscore. */
\r
47 class UnicodeW extends UniValidator {
\r
48 public int validate(StringLike s,int from,int to) {
\r
49 if(from >= s.length()) return -1;
\r
50 char c = s.charAt(from);
\r
51 return (Prop.isAlphabetic(c)||Prop.isDecimalDigit(c)||c=='_') ? to : -1;
\r
55 /** Matches a character that is not a Unicode alphanumeric or underscore. */
\r
56 class NUnicodeW extends UniValidator {
\r
57 public int validate(StringLike s,int from,int to) {
\r
58 if(from >= s.length()) return -1;
\r
59 char c = s.charAt(from);
\r
60 return !(Prop.isAlphabetic(c)||Prop.isDecimalDigit(c)||c=='_') ? to : -1;
\r
64 /** Matches a Unicode decimal digit. */
\r
65 class UnicodeDigit extends UniValidator {
\r
66 public int validate(StringLike s,int from,int to) {
\r
67 return from<s.length() && Prop.isDecimalDigit(s.charAt(from)) ? to : -1;
\r
70 /** Matches a character that is not a Unicode digit.*/
\r
71 class NUnicodeDigit extends UniValidator {
\r
72 public int validate(StringLike s,int from,int to) {
\r
73 return from<s.length() && !Prop.isDecimalDigit(s.charAt(from)) ? to : -1;
\r
77 /** Matches a Unicode math character. */
\r
78 class UnicodeMath extends UniValidator {
\r
79 public int validate(StringLike s,int from,int to) {
\r
80 return from<s.length() && Prop.isMath(s.charAt(from)) ? to : -1;
\r
83 /** Matches a non-math Unicode character. */
\r
84 class NUnicodeMath extends UniValidator {
\r
85 public int validate(StringLike s,int from,int to) {
\r
86 return from<s.length() && !Prop.isMath(s.charAt(from)) ? to : -1;
\r
90 /** Matches a Unicode currency symbol. */
\r
91 class UnicodeCurrency extends UniValidator {
\r
92 public int validate(StringLike s,int from,int to) {
\r
93 return from<s.length() && Prop.isCurrency(s.charAt(from)) ? to : -1;
\r
96 /** Matches a non-currency symbol Unicode character. */
\r
97 class NUnicodeCurrency extends UniValidator {
\r
98 public int validate(StringLike s,int from,int to) {
\r
99 return from<s.length() && !Prop.isCurrency(s.charAt(from)) ? to : -1;
\r
103 /** Matches a Unicode alphabetic character. */
\r
104 class UnicodeAlpha extends UniValidator {
\r
105 public int validate(StringLike s,int from,int to) {
\r
106 return from<s.length() && Prop.isAlphabetic(s.charAt(from)) ? to : -1;
\r
110 /** Matches a non-alphabetic Unicode character. */
\r
111 class NUnicodeAlpha extends UniValidator {
\r
112 public int validate(StringLike s,int from,int to) {
\r
113 return from<s.length() && !Prop.isAlphabetic(s.charAt(from)) ? to : -1;
\r
117 /** Matches an upper case Unicode character. */
\r
118 class UnicodeUpper extends UniValidator {
\r
119 public int validate(StringLike s,int from,int to) {
\r
120 return from<s.length() && isUpper(s.charAt(from)) ? to : -1;
\r
122 final boolean isUpper(char c) {
\r
123 return c == CaseMgr.toUpperCase(c) && c != CaseMgr.toLowerCase(c);
\r
127 /** Matches an upper case Unicode character. */
\r
128 class UnicodeLower extends UniValidator {
\r
129 public int validate(StringLike s,int from,int to) {
\r
130 return from<s.length() && isLower(s.charAt(from)) ? to : -1;
\r
132 final boolean isLower(char c) {
\r
133 return c != CaseMgr.toUpperCase(c) && c == CaseMgr.toLowerCase(c);
\r
138 Regex provides the parser which constructs the linked list of
\r
139 Pattern classes from a String.
\r
141 For the purpose of this documentation, the fact that java interprets the
\r
142 backslash will be ignored. In practice, however, you will need a
\r
143 double backslash to obtain a string that contains a single backslash
\r
144 character. Thus, the example pattern "\b" should really be typed
\r
145 as "\\b" inside java code.
\r
147 Note that Regex is part of package "com.stevesoft.pat".
\r
148 To use it, simply import
\r
149 com.stevesoft.pat.Regex at the top of your file.
\r
151 Regex is made with a constructor that takes a String that defines
\r
152 the regular expression. Thus, for example
\r
154 Regex r = new Regex("[a-c]*");
\r
156 matches any number of characters so long as the are 'a', 'b', or 'c').
\r
158 To attempt to match the Pattern to a given string, you can use either
\r
159 the search(String) member function, or the matchAt(String,int position)
\r
160 member function. These functions return a boolean which tells you
\r
161 whether or not the thing worked, and sets the methods "charsMatched()"
\r
162 and "matchedFrom()" in the Regex object appropriately.
\r
164 The portion of the string before the match can be obtained by the
\r
165 left() member, and the portion after the match can be obtained
\r
166 by the right() member.
\r
168 Essentially, this package implements a syntax that is very much
\r
169 like the perl 5 regular expression syntax.
\r
173 Regex r = new Regex("x(a|b)y");
\r
174 r.matchAt("xay",0);
\r
175 System.out.println("sub = "+r.stringMatched(1));
\r
177 The above would print "sub = a".
\r
179 r.left() // would return "x"
\r
180 r.right() // would return "y"
\r
183 Differences between this package and perl5:<br>
\r
184 The extended Pattern for setting flags, is now supported,
\r
185 but the flags are different. "(?i)" tells the pattern to
\r
186 ignore case, "(?Q)" sets the "dontMatchInQuotes" flag, and
\r
187 "(?iQ)" sets them both. You can change the escape character.
\r
188 The pattern <pre>(?e=#)#d+</pre> is the same as <pre>\d+</pre>,
\r
189 but note that the sequence <pre>(?e=#)</pre> <b>must</b> occur
\r
190 at the very beginning of the pattern. There may be other small
\r
191 differences as well. I will either make my package conform
\r
192 or note them as I become aware of them.
\r
194 This package supports additional patterns not in perl5:
\r
197 <tr><td>(?@())</td><td>Group</td><td>This matches all characters between
\r
198 the '(' character and the balancing ')' character. Thus, it will
\r
199 match "()" as well as "(())". The balancing characters are
\r
200 arbitrary, thus (?@{}) matches on "{}" and "{{}}".</td>
\r
201 <tr><td>(?<1)</td><td>Backup</td><td>Moves the pointer backwards within the text.
\r
202 This allows you to make a "look behind." It fails if it
\r
203 attempts to move to a position before the beginning of the string.
\r
204 "x(?<1)" is equivalent to "(?=x)". The number, 1 in this example,
\r
205 is the number of characters to move backwards.</td>
\r
209 @author Steven R. Brandt
\r
210 @version package com.stevesoft.pat, release 1.5.3
\r
213 public class Regex extends RegRes implements FilenameFilter {
\r
214 /** BackRefOffset gives the identity number of the first
\r
215 pattern. Version 1.0 used zero, version 1.1 uses 1 to be
\r
216 more compatible with perl. */
\r
217 static int BackRefOffset = 1;
\r
218 private static Pattern none = new NoPattern();
\r
219 Pattern thePattern = none;
\r
220 patInt minMatch = new patInt(0);
\r
222 static Hashtable validators = new Hashtable();
\r
224 define("p","(?>1)",new UnicodePunct());
\r
225 define("P","(?>1)",new NUnicodePunct());
\r
226 define("s","(?>1)",new UnicodeWhite());
\r
227 define("S","(?>1)",new NUnicodeWhite());
\r
228 define("w","(?>1)",new UnicodeW());
\r
229 define("W","(?>1)",new NUnicodeW());
\r
230 define("d","(?>1)",new UnicodeDigit());
\r
231 define("D","(?>1)",new NUnicodeDigit());
\r
232 define("m","(?>1)",new UnicodeMath());
\r
233 define("M","(?>1)",new NUnicodeMath());
\r
234 define("c","(?>1)",new UnicodeCurrency());
\r
235 define("C","(?>1)",new NUnicodeCurrency());
\r
236 define("a","(?>1)",new UnicodeAlpha());
\r
237 define("A","(?>1)",new NUnicodeAlpha());
\r
238 define("uc","(?>1)",new UnicodeUpper());
\r
239 define("lc","(?>1)",new UnicodeLower());
\r
242 /** Set the dontMatch in quotes flag. */
\r
243 public void setDontMatchInQuotes(boolean b) {
\r
244 dontMatchInQuotes = b;
\r
246 /** Find out if the dontMatchInQuotes flag is enabled. */
\r
247 public boolean getDontMatchInQuotes() {
\r
248 return dontMatchInQuotes;
\r
250 boolean dontMatchInQuotes = false;
\r
252 /** Set the state of the ignoreCase flag. If set to true, then
\r
253 the pattern matcher will ignore case when searching for a
\r
255 public void setIgnoreCase(boolean b) {
\r
258 /** Get the state of the ignoreCase flag. Returns true if we
\r
259 are ignoring the case of the pattern, false otherwise. */
\r
260 public boolean getIgnoreCase() {
\r
263 boolean ignoreCase = false;
\r
265 static boolean defaultMFlag = false;
\r
266 /** Set the default value of the m flag. If it
\r
267 is set to true, then the MFlag will be on
\r
268 for any regex search executed. */
\r
269 public static void setDefaultMFlag(boolean mFlag) {
\r
270 defaultMFlag = mFlag;
\r
272 /** Get the default value of the m flag. If it
\r
273 is set to true, then the MFlag will be on
\r
274 for any regex search executed. */
\r
275 public static boolean getDefaultMFlag() {
\r
276 return defaultMFlag;
\r
279 /** Initializes the object without a Pattern. To supply a Pattern
\r
280 use compile(String s).
\r
281 @see com.stevesoft.pat.Regex#compile(java.lang.String)
\r
284 /** Create and compile a Regex, but do not throw any exceptions.
\r
285 If you wish to have exceptions thrown for syntax errors,
\r
286 you must use the Regex(void) constructor to create the
\r
287 Regex object, and then call the compile method. Therefore, you
\r
288 should only call this method when you know your pattern is right.
\r
289 I will probably become more like
\r
290 @see com.stevesoft.pat.Regex#search(java.lang.String)
\r
291 @see com.stevesoft.pat.Regex#compile(java.lang.String)
\r
293 public Regex(String s) {
\r
296 } catch(RegSyntax rs) {}
\r
299 ReplaceRule rep = null;
\r
300 /** Create and compile both a Regex and a ReplaceRule.
\r
301 @see com.stevesoft.pat.ReplaceRule
\r
302 @see com.stevesoft.pat.Regex#compile(java.lang.String)
\r
304 public Regex(String s,String rp) {
\r
306 rep = ReplaceRule.perlCode(rp);
\r
308 /** Create and compile a Regex, but give it the ReplaceRule
\r
309 specified. This allows the user finer control of the
\r
310 Replacement process, if that is desired.
\r
311 @see com.stevesoft.pat.ReplaceRule
\r
312 @see com.stevesoft.pat.Regex#compile(java.lang.String)
\r
314 public Regex(String s,ReplaceRule rp) {
\r
319 /** Change the ReplaceRule of this Regex by compiling
\r
320 a new one using String rp. */
\r
321 public void setReplaceRule(String rp) {
\r
322 rep = ReplaceRule.perlCode(rp);
\r
323 repr = null; // Clear Replacer history
\r
326 /** Change the ReplaceRule of this Regex to rp. */
\r
327 public void setReplaceRule(ReplaceRule rp) {
\r
330 /** Test to see if a custom defined rule exists.
\r
331 @see com.stevesoft.pat#define(java.lang.String,java.lang.String,Validator)
\r
333 public static boolean isDefined(String nm) {
\r
334 return validators.get(nm) != null;
\r
336 /** Removes a custom defined rule.
\r
337 @see com.stevesoft.pat#define(java.lang.String,java.lang.String,Validator)
\r
339 public static void undefine(String nm) {
\r
340 validators.remove(nm);
\r
342 /** Defines a method to create a new rule. See test/deriv2.java
\r
343 and test/deriv3.java for examples of how to use it. */
\r
344 public static void define(String nm,String pat,Validator v) {
\r
346 validators.put(nm,v);
\r
348 /** Defines a shorthand for a pattern. The pattern will be
\r
349 invoked by a string that has the form "(??"+nm+")".
\r
351 public static void define(String nm,String pat) {
\r
352 validators.put(nm,pat);
\r
355 /** Get the current ReplaceRule. */
\r
356 public ReplaceRule getReplaceRule() { return rep; }
\r
358 Replacer repr = null;
\r
359 final Replacer _getReplacer() {
\r
360 return repr==null ? repr=new Replacer() : repr;
\r
362 public Replacer getReplacer() {
\r
364 repr = new Replacer();
\r
366 repr.rh.prev = null;
\r
369 /** Replace the first occurence of this pattern in String s
\r
370 according to the ReplaceRule.
\r
371 @see com.stevesoft.pat.ReplaceRule
\r
372 @see com.stevesoft.pat.Regex#getReplaceRule()
\r
374 public String replaceFirst(String s) {
\r
375 return _getReplacer().replaceFirstRegion(s,this,0,s.length()).toString();
\r
377 /** Replace the first occurence of this pattern in String s
\r
378 beginning with position pos according to the ReplaceRule.
\r
379 @see com.stevesoft.pat.ReplaceRule
\r
380 @see com.stevesoft.pat.Regex#getReplaceRule()
\r
382 public String replaceFirstFrom(String s,int pos) {
\r
383 return _getReplacer().replaceFirstRegion(s,this,pos,s.length()).toString();
\r
385 /** Replace the first occurence of this pattern in String s
\r
386 beginning with position start and ending with end
\r
387 according to the ReplaceRule.
\r
388 @see com.stevesoft.pat.ReplaceRule
\r
389 @see com.stevesoft.pat.Regex#getReplaceRule()
\r
391 public String replaceFirstRegion(String s,int start,int end) {
\r
392 return _getReplacer().replaceFirstRegion(s,this,start,end).toString();
\r
395 /** Replace all occurences of this pattern in String s
\r
396 according to the ReplaceRule.
\r
397 @see com.stevesoft.pat.ReplaceRule
\r
398 @see com.stevesoft.pat.Regex#getReplaceRule()
\r
400 public String replaceAll(String s) {
\r
401 return _getReplacer().replaceAllRegion(s,this,0,s.length()).toString();
\r
403 public StringLike replaceAll(StringLike s) {
\r
404 return _getReplacer().replaceAllRegion(s,this,0,s.length());
\r
406 /** Replace all occurences of this pattern in String s
\r
407 beginning with position pos according to the ReplaceRule.
\r
408 @see com.stevesoft.pat.ReplaceRule
\r
409 @see com.stevesoft.pat.Regex#getReplaceRule()
\r
411 public String replaceAllFrom(String s,int pos) {
\r
412 return _getReplacer().replaceAllRegion(s,this,pos,s.length()).toString();
\r
414 /** Replace all occurences of this pattern in String s
\r
415 beginning with position start and ending with end
\r
416 according to the ReplaceRule.
\r
417 @see com.stevesoft.pat.ReplaceRule
\r
418 @see com.stevesoft.pat.Regex#getReplaceRule()
\r
420 public String replaceAllRegion(String s,int start,int end) {
\r
421 return _getReplacer().replaceAllRegion(s,this,start,end).toString();
\r
425 /** Essentially clones the Regex object */
\r
426 public Regex(Regex r) {
\r
428 dontMatchInQuotes = r.dontMatchInQuotes;
\r
430 ignoreCase = r.ignoreCase;
\r
435 rep = (ReplaceRule)r.rep.clone();
\r
437 compile(r.toString());
\r
438 } catch(RegSyntax r_) {} */
\r
439 thePattern = r.thePattern.clone(new Hashtable());
\r
440 minMatch = r.minMatch;
\r
441 skipper = r.skipper;
\r
445 the escape character is the backslash, but you can
\r
446 make it anything you want by setting this variable. */
\r
447 public char esc = Pattern.ESC;
\r
448 /** This method compiles a regular expression, making it
\r
449 possible to call the search or matchAt methods.
\r
450 @exception com.stevesoft.pat.RegSyntax
\r
451 is thrown if a syntax error is encountered
\r
453 For example, "x{3,1}" or "*a" are not valid
\r
455 @see com.stevesoft.pat.Regex#search
\r
456 @see com.stevesoft.pat.Regex#matchAt
\r
458 public void compile(String prepat) throws RegSyntax {
\r
459 String postpat = parsePerl.codify(prepat,true);
\r
460 String pat = postpat==null ? prepat : postpat;
\r
462 ignoreCase = false;
\r
463 dontMatchInQuotes = false;
\r
464 Rthings mk = new Rthings(this);
\r
465 int offset = mk.val;
\r
466 String newpat = pat;
\r
470 minMatch = new patInt(0);
\r
471 StrPos sp = new StrPos(pat,0);
\r
472 if(sp.incMatch("(?e=")) {
\r
473 char newEsc = sp.c;
\r
476 newpat = reEscape(pat.substring(6),
\r
477 newEsc,Pattern.ESC);
\r
478 } else if(esc != Pattern.ESC)
\r
479 newpat = reEscape(pat,esc,Pattern.ESC);
\r
480 thePattern = _compile(newpat,mk);
\r
481 numSubs_ = mk.val-offset;
\r
485 /* If a Regex is compared against a Regex, a check is
\r
486 done to see that the patterns are equal as well as
\r
487 the most recent match. If a Regex is compare with
\r
488 a RegRes, only the result of the most recent match
\r
490 public boolean equals(Object o) {
\r
491 if(o instanceof Regex) {
\r
492 if(toString().equals(o.toString()))
\r
493 return super.equals(o);
\r
496 } else return super.equals(o);
\r
499 /** A clone by any other name would smell as sweet. */
\r
500 public Object clone() {
\r
501 return new Regex(this);
\r
503 /** Return a clone of the underlying RegRes object. */
\r
504 public RegRes result() {
\r
505 return (RegRes)super.clone();
\r
508 // prep sets global variables of class
\r
509 // Pattern so that it can access them
\r
510 // during an attempt at a match
\r
511 Pthings pt = new Pthings();
\r
512 final Pthings prep(StringLike s) {
\r
514 pt.lastPos = matchedTo();
\r
515 if(pt.lastPos < 0) pt.lastPos = 0;
\r
516 if( (s==null ? null : s.unwrap()) != (src==null ? null : s.unwrap()) )
\r
519 pt.dotDoesntMatchCR=dotDoesntMatchCR && (!sFlag);
\r
520 pt.mFlag = (mFlag | defaultMFlag);
\r
521 pt.ignoreCase = ignoreCase;
\r
522 pt.no_check = false;
\r
523 if(pt.marks != null)
\r
524 for(int i=0;i<pt.marks.length;i++)
\r
527 pt.nMarks = numSubs_;
\r
529 if(dontMatchInQuotes)
\r
535 /** Attempt to match a Pattern beginning
\r
536 at a specified location within the string.
\r
537 @see com.stevesoft.pat.Regex#search
\r
539 public boolean matchAt(String s,int start_pos) {
\r
540 return _search(s,start_pos,start_pos);
\r
542 /** Attempt to match a Pattern beginning
\r
543 at a specified location within the StringLike.
\r
544 @see com.stevesoft.pat.Regex#search
\r
546 public boolean matchAt(StringLike s,int start_pos) {
\r
547 return _search(s,start_pos,start_pos);
\r
551 /** Search through a String for the first
\r
552 occurrence of a match.
\r
553 @see com.stevesoft.pat.Regex#searchFrom
\r
554 @see com.stevesoft.pat.Regex#matchAt
\r
556 public boolean search(String s) {
\r
558 throw new NullPointerException("Null String Given to Regex.search");
\r
559 return _search(s,0,s.length());
\r
561 public boolean search(StringLike sl) {
\r
563 throw new NullPointerException("Null StringLike Given to Regex.search");
\r
564 return _search(sl,0,sl.length());
\r
566 public boolean reverseSearch(String s) {
\r
568 throw new NullPointerException("Null String Given to Regex.reverseSearch");
\r
569 return _reverseSearch(s,0,s.length());
\r
571 public boolean reverseSearch(StringLike sl) {
\r
573 throw new NullPointerException("Null StringLike Given to Regex.reverseSearch");
\r
574 return _reverseSearch(sl,0,sl.length());
\r
576 /** Search through a String for the first
\r
577 occurence of a match, but start at position <pre>start</pre>*/
\r
578 public boolean searchFrom(String s,int start) {
\r
580 throw new NullPointerException("Null String Given to Regex.searchFrom");
\r
581 return _search(s,start,s.length());
\r
583 public boolean searchFrom(StringLike s,int start) {
\r
585 throw new NullPointerException("Null String Given to Regex.searchFrom");
\r
586 return _search(s,start,s.length());
\r
588 /** Search through a region of a String
\r
589 for the first occurence of a match. */
\r
590 public boolean searchRegion(String s,int start,int end) {
\r
592 throw new NullPointerException("Null String Given to Regex.searchRegion");
\r
593 return _search(s,start,end);
\r
595 /** Set this to change the default behavior of the "." pattern.
\r
596 By default it now matches perl's behavior and fails to
\r
597 match the '\n' character. */
\r
598 public static boolean dotDoesntMatchCR = true;
\r
601 boolean gFlag = false;
\r
602 /** Set the 'g' flag */
\r
603 public void setGFlag(boolean b) {
\r
606 /** Get the state of the 'g' flag. */
\r
607 public boolean getGFlag() {
\r
610 boolean sFlag = false;
\r
611 /** Get the state of the sFlag */
\r
612 public boolean getSFlag() {
\r
615 boolean mFlag = false;
\r
616 /** Get the state of the sFlag */
\r
617 public boolean getMFlag() {
\r
621 final boolean _search(String s,int start,int end) {
\r
622 return _search(new StringWrap(s),start,end);
\r
624 final boolean _search(StringLike s,int start,int end) {
\r
625 if(gFlag && gFlagto > 0 && gFlags!=null && s.unwrap()==gFlags.unwrap())
\r
629 Pthings pt=prep(s);
\r
631 int up = (minMatch == null ? end : end-minMatch.i);
\r
633 if(up < start && end >= start) up = start;
\r
635 if(skipper == null) {
\r
636 for(int i=start;i<=up;i++) {
\r
637 charsMatched_ = thePattern.matchAt(s,i,pt);
\r
638 if(charsMatched_ >= 0) {
\r
639 matchFrom_ = thePattern.mfrom;
\r
641 gFlagto = matchFrom_+charsMatched_;
\r
643 return didMatch_=true;
\r
647 pt.no_check = true;
\r
648 for(int i=start;i<=up;i++) {
\r
649 i = skipper.find(src,i,up);
\r
651 charsMatched_ = matchFrom_ = -1;
\r
652 return didMatch_ = false;
\r
654 charsMatched_ = thePattern.matchAt(s,i,pt);
\r
655 if(charsMatched_ >= 0) {
\r
656 matchFrom_ = thePattern.mfrom;
\r
658 gFlagto = matchFrom_+charsMatched_;
\r
660 return didMatch_=true;
\r
664 return didMatch_=false;
\r
666 /*final boolean _search(LongStringLike s,long start,long end) {
\r
667 if(gFlag && gFlagto > 0 && s==gFlags)
\r
671 Pthings pt=prep(s);
\r
673 int up = end;//(minMatch == null ? end : end-minMatch.i);
\r
675 if(up < start && end >= start) up = start;
\r
677 if(skipper == null) {
\r
678 for(long i=start;i<=up;i++) {
\r
679 charsMatched_ = thePattern.matchAt(s,i,pt);
\r
680 if(charsMatched_ >= 0) {
\r
681 matchFrom_ = thePattern.mfrom;
\r
683 gFlagto = matchFrom_+charsMatched_;
\r
684 return didMatch_=true;
\r
688 pt.no_check = true;
\r
689 for(long i=start;i<=up;i++) {
\r
690 i = skipper.find(src,i,up);
\r
692 charsMatched_ = matchFrom_ = -1;
\r
693 return didMatch_ = false;
\r
695 charsMatched_ = thePattern.matchAt(s,i,pt);
\r
696 if(charsMatched_ >= 0) {
\r
697 matchFrom_ = thePattern.mfrom;
\r
699 gFlagto = matchFrom_+charsMatched_;
\r
701 return didMatch_=true;
\r
703 i = s.adjustIndex(i);
\r
704 up = s.adjustEnd(i);
\r
708 return didMatch_=false;
\r
711 boolean _reverseSearch(String s,int start,int end) {
\r
712 return _reverseSearch(new StringWrap(s),start,end);
\r
714 boolean _reverseSearch(StringLike s,int start,int end) {
\r
715 if(gFlag && gFlagto > 0 && s.unwrap()==gFlags.unwrap())
\r
718 Pthings pt=prep(s);
\r
719 for(int i=end;i>=start;i--) {
\r
720 charsMatched_ = thePattern.matchAt(s,i,pt);
\r
721 if(charsMatched_ >= 0) {
\r
722 matchFrom_ = thePattern.mfrom;
\r
724 gFlagto = matchFrom_-1;
\r
726 return didMatch_=true;
\r
729 return didMatch_=false;
\r
732 // This routine sets the cbits variable
\r
733 // of class Pattern. Cbits is true for
\r
734 // the bit corresponding to a character inside
\r
735 // a set of quotes.
\r
736 static StringLike lasts=null;
\r
737 static BitSet lastbs=null;
\r
738 static void setCbits(StringLike s,Pthings pt) {
\r
743 BitSet bs = new BitSet(s.length());
\r
745 boolean setBit = false;
\r
746 for(int i=0;i<s.length();i++) {
\r
747 if(setBit) bs.set(i);
\r
748 char c = s.charAt(i);
\r
749 if(!setBit && c == '"') {
\r
753 } else if(!setBit && c == '\'') {
\r
757 } else if(setBit && c == qc) {
\r
759 } else if(setBit && c == '\\' && i+1<s.length()) {
\r
761 if(setBit) bs.set(i);
\r
764 pt.cbits = lastbs = bs;
\r
768 // Wanted user to over-ride this in alpha version,
\r
769 // but it wasn't really necessary because of this trick:
\r
772 return (Regex)getClass().newInstance();
\r
773 } catch(InstantiationException ie) {
\r
775 } catch(IllegalAccessException iae) {
\r
779 /** Only needed for creating your own extensions of
\r
780 Regex. This method adds the next Pattern in the chain
\r
781 of patterns or sets the Pattern if it is the first call. */
\r
782 protected void add(Pattern p2) {
\r
791 /** You only need to use this method if you are creating
\r
792 your own extentions to Regex.
\r
793 compile1 compiles one Pattern element, it can be
\r
794 over-ridden to allow the Regex compiler to understand
\r
795 new syntax. See deriv.java for an example. This routine
\r
796 is the heart of class Regex. Rthings has one integer
\r
797 member called intValue, it is used to keep track of the number
\r
798 of ()'s in the Pattern.
\r
799 @exception com.stevesoft.pat.RegSyntax is thrown when a nonsensensical
\r
800 pattern is supplied. For example, a pattern beginning
\r
802 protected void compile1(StrPos sp,Rthings mk) throws RegSyntax {
\r
803 if(sp.match('[')) {
\r
805 add(matchBracket(sp));
\r
806 } else if(sp.match('|')) {
\r
809 if(p == null) p=new NullPattern();
\r
812 } else if(sp.incMatch("(?<")) {
\r
813 patInt i = sp.getPatInt();
\r
814 if(i==null) RegSyntaxError.endItAll("No int after (?<");
\r
815 add(new Backup(i.intValue()));
\r
816 if(!sp.match(')')) RegSyntaxError.endItAll("No ) after (?<");
\r
817 } else if(sp.incMatch("(?>")) {
\r
818 patInt i = sp.getPatInt();
\r
819 if(i==null) RegSyntaxError.endItAll("No int after (?>");
\r
820 add(new Backup(-i.intValue()));
\r
821 if(!sp.match(')')) RegSyntaxError.endItAll("No ) after (?<");
\r
822 } else if(sp.incMatch("(?@")) {
\r
828 RegSyntaxError.endItAll(
\r
829 "(?@ does not have closing paren");
\r
830 add(new Group(op,cl));
\r
831 } else if(sp.incMatch("(?#")) {
\r
832 while(!sp.match(')'))
\r
834 } else if(sp.dontMatch && sp.c == 'w') {
\r
835 //Regex r = new Regex();
\r
836 //r._compile("[a-zA-Z0-9_]",mk);
\r
837 //add(new Goop("\\w",r.thePattern));
\r
838 Bracket b = new Bracket(false);
\r
839 b.addOr(new Range('a','z'));
\r
840 b.addOr(new Range('A','Z'));
\r
841 b.addOr(new Range('0','9'));
\r
842 b.addOr(new oneChar('_'));
\r
844 } else if(sp.dontMatch && sp.c == 'G') {
\r
846 } else if(sp.dontMatch && sp.c == 's') {
\r
847 //Regex r = new Regex();
\r
848 //r._compile("[ \t\n\r\b]",mk);
\r
849 //add(new Goop("\\s",r.thePattern));
\r
850 Bracket b = new Bracket(false);
\r
851 b.addOr(new oneChar((char)32));
\r
852 b.addOr(new Range((char)8,(char)10));
\r
853 b.addOr(new oneChar((char)13));
\r
855 } else if(sp.dontMatch && sp.c == 'd') {
\r
856 //Regex r = new Regex();
\r
857 //r._compile("[0-9]",mk);
\r
858 //add(new Goop("\\d",r.thePattern));
\r
859 Range digit = new Range('0','9');
\r
860 digit.printBrackets = true;
\r
862 } else if(sp.dontMatch && sp.c == 'W') {
\r
863 //Regex r = new Regex();
\r
864 //r._compile("[^a-zA-Z0-9_]",mk);
\r
865 //add(new Goop("\\W",r.thePattern));
\r
866 Bracket b = new Bracket(true);
\r
867 b.addOr(new Range('a','z'));
\r
868 b.addOr(new Range('A','Z'));
\r
869 b.addOr(new Range('0','9'));
\r
870 b.addOr(new oneChar('_'));
\r
872 } else if(sp.dontMatch && sp.c == 'S') {
\r
873 //Regex r = new Regex();
\r
874 //r._compile("[^ \t\n\r\b]",mk);
\r
875 //add(new Goop("\\S",r.thePattern));
\r
876 Bracket b = new Bracket(true);
\r
877 b.addOr(new oneChar((char)32));
\r
878 b.addOr(new Range((char)8,(char)10));
\r
879 b.addOr(new oneChar((char)13));
\r
881 } else if(sp.dontMatch && sp.c == 'D') {
\r
882 //Regex r = new Regex();
\r
883 //r._compile("[^0-9]",mk);
\r
884 //add(new Goop("\\D",r.thePattern));
\r
885 Bracket b = new Bracket(true);
\r
886 b.addOr(new Range('0','9'));
\r
888 } else if(sp.dontMatch && sp.c == 'B') {
\r
889 Regex r = new Regex();
\r
890 r._compile("(?!"+back_slash+"b)",mk);
\r
892 } else if(isOctalString(sp)) {
\r
893 int d = sp.c - '0';
\r
895 d = 8*d + sp.c - '0';
\r
896 StrPos sp2 = new StrPos(sp);
\r
898 if(isOctalDigit(sp2,false)) {
\r
900 d = 8*d + sp.c - '0';
\r
902 add(new oneChar((char)d));
\r
903 } else if(sp.dontMatch && sp.c >= '1' && sp.c <= '9') {
\r
905 StrPos s2 = new StrPos(sp);
\r
907 if(!s2.dontMatch && s2.c >= '0' && s2.c <= '9') {
\r
908 iv = 10*iv+(s2.c-'0');
\r
911 add(new BackMatch(iv));
\r
912 } else if(sp.dontMatch && sp.c == 'b') {
\r
913 add(new Boundary());
\r
914 } else if(sp.match('\b')) {
\r
915 add(new Boundary());
\r
916 } else if(sp.match('$')) {
\r
917 add(new End(true));
\r
918 } else if(sp.dontMatch && sp.c == 'Z') {
\r
919 add(new End(false));
\r
920 } else if(sp.match('.')) {
\r
922 } else if(sp.incMatch("(??")) {
\r
923 StringBuffer sb = new StringBuffer();
\r
924 StringBuffer sb2 = new StringBuffer();
\r
925 while(!sp.match(')') && !sp.match(':')) {
\r
929 if(sp.incMatch(":")) {
\r
930 while(!sp.match(')')) {
\r
935 String sbs = sb.toString();
\r
936 if(validators.get(sbs) instanceof String) {
\r
937 String pat = (String)validators.get(sbs);
\r
938 Regex r = newRegex();
\r
939 Rthings rth = new Rthings(this);
\r
940 rth.noBackRefs = true;
\r
941 r._compile(pat,rth);
\r
944 Custom cm = new Custom(sb.toString());
\r
946 Validator v2 = cm.v.arg(sb2.toString());
\r
948 v2.argsave = sb2.toString();
\r
949 String p = cm.v.pattern;
\r
953 Regex r = newRegex();
\r
954 Rthings rth = new Rthings(this);
\r
955 rth.noBackRefs = true;
\r
956 r._compile(cm.v.pattern,rth);
\r
957 cm.sub = r.thePattern;
\r
958 cm.sub.add(new CustomEndpoint(cm));
\r
959 cm.sub.setParent(cm);
\r
963 } else if(sp.match('(')) {
\r
965 Regex r = newRegex();
\r
966 // r.or = new Or();
\r
968 if(sp.incMatch("?:")) {
\r
970 } else if(sp.incMatch("?=")) {
\r
971 r.or = new lookAhead(false);
\r
972 } else if(sp.incMatch("?!")) {
\r
973 r.or = new lookAhead(true);
\r
974 } else if(sp.match('?')) {
\r
977 if(sp.c=='i')mk.ignoreCase = true;
\r
978 if(sp.c=='Q')mk.dontMatchInQuotes = true;
\r
979 if(sp.c=='o')mk.optimizeMe = true;
\r
980 if(sp.c=='g')mk.gFlag = true;
\r
981 if(sp.c=='s')mk.sFlag = true;
\r
982 if(sp.c=='m')mk.mFlag = true;
\r
984 } while(!sp.match(')') && !sp.eos);
\r
987 if(sp.eos) //throw new RegSyntax
\r
988 RegSyntaxError.endItAll("Unclosed ()");
\r
989 } else { // just ordinary parenthesis
\r
990 r.or = mk.noBackRefs ? new Or() : new OrMark(mk.val++);
\r
992 if(r != null) add(r._compile(sp,mk));
\r
993 } else if(sp.match('^')) {
\r
994 add(new Start(true));
\r
995 } else if(sp.dontMatch && sp.c=='A') {
\r
996 add(new Start(false));
\r
997 } else if(sp.match('*')) {
\r
998 addMulti(new patInt(0),new patInf());
\r
999 } else if(sp.match('+')) {
\r
1000 addMulti(new patInt(1),new patInf());
\r
1001 } else if(sp.match('?')) {
\r
1002 addMulti(new patInt(0),new patInt(1));
\r
1003 } else if(sp.match('{')) {
\r
1004 boolean bad = false;
\r
1005 StrPos sp2 = new StrPos(sp);
\r
1006 //StringBuffer sb = new StringBuffer();
\r
1008 patInt i1 = sp.getPatInt();
\r
1010 if(sp.match('}')) {
\r
1013 if(!sp.match(','))/*
\r
1014 RegSyntaxError.endItAll(
\r
1016 "\" should be followed with , or }");*/
\r
1020 i2 = new patInf();
\r
1022 i2 = sp.getPatInt();
\r
1024 if(i1 == null || i2 == null) /*
\r
1025 throw new RegSyntax("Badly formatted Multi: "
\r
1026 +"{"+i1+","+i2+"}"); */ bad = true;
\r
1029 add(new oneChar(sp.c));
\r
1032 } else if(sp.escMatch('x') && next2Hex(sp)) {
\r
1034 int d = getHexDigit(sp);
\r
1036 d = 16*d + getHexDigit(sp);
\r
1037 add(new oneChar((char)d));
\r
1038 } else if(sp.escMatch('c')) {
\r
1040 if(sp.c < Ctrl.cmap.length)
\r
1041 add(new oneChar(Ctrl.cmap[sp.c]));
\r
1043 add(new oneChar(sp.c));
\r
1044 } else if(sp.escMatch('f')) {
\r
1045 add(new oneChar((char)12));
\r
1046 } else if(sp.escMatch('a')) {
\r
1047 add(new oneChar((char)7));
\r
1048 } else if(sp.escMatch('t')) {
\r
1049 add(new oneChar('\t'));
\r
1050 } else if(sp.escMatch('n')) {
\r
1051 add(new oneChar('\n'));
\r
1052 } else if(sp.escMatch('r')) {
\r
1053 add(new oneChar('\r'));
\r
1054 } else if(sp.escMatch('b')) {
\r
1055 add(new oneChar('\b'));
\r
1056 } else if(sp.escMatch('e')) {
\r
1057 add(new oneChar((char)27));
\r
1059 add(new oneChar(sp.c));
\r
1061 RegSyntaxError.endItAll("Unmatched right paren in pattern");
\r
1065 // compiles all Pattern elements, internal method
\r
1066 private Pattern _compile(String pat,Rthings mk) throws RegSyntax {
\r
1068 sFlag = mFlag = ignoreCase = gFlag = false;
\r
1069 StrPos sp = new StrPos(pat,0);
\r
1070 thePattern = _compile(sp,mk);
\r
1072 return thePattern;
\r
1077 Pattern _compile(StrPos sp,Rthings mk) throws RegSyntax {
\r
1078 while(!(sp.eos || (or != null && sp.match(')')) )) {
\r
1082 if(sp.match(')')) mk.parenLevel--;
\r
1083 else if(sp.eos && mk.parenLevel != 0) {
\r
1084 RegSyntaxError.endItAll("Unclosed Parenthesis! lvl="+mk.parenLevel);
\r
1085 } if(or != null) {
\r
1086 if(p == null) p = new NullPattern();
\r
1090 return p==null ? new NullPattern() : p;
\r
1093 // add a multi object to the end of the chain
\r
1094 // which applies to the last object
\r
1095 void addMulti(patInt i1,patInt i2) throws RegSyntax {
\r
1096 Pattern last,last2;
\r
1097 for(last = p;last != null && last.next != null;last=last.next)
\r
1099 if(last == null || last == p)
\r
1102 for(last2 = p;last2.next != last;last2=last2.next)
\r
1104 if(last instanceof Multi && i1.intValue()==0 &&
\r
1106 ((Multi)last).matchFewest = true;
\r
1107 else if(last instanceof FastMulti && i1.intValue()==0 &&
\r
1109 ((FastMulti)last).matchFewest = true;
\r
1110 else if(last instanceof DotMulti && i1.intValue()==0 &&
\r
1112 ((DotMulti)last).matchFewest = true;
\r
1113 else if(last instanceof Multi
\r
1114 || last instanceof DotMulti
\r
1115 || last instanceof FastMulti)
\r
1116 throw new RegSyntax("Syntax error.");
\r
1117 else if(last2 == null)
\r
1118 p = mkMulti(i1,i2,p);
\r
1120 last2.next = mkMulti(i1,i2,last);
\r
1122 final static Pattern mkMulti(patInt lo,patInt hi,Pattern p) throws RegSyntax {
\r
1123 if(p instanceof Any && p.next == null)
\r
1124 return (Pattern)new DotMulti(lo,hi);
\r
1125 return RegOpt.safe4fm(p) ? (Pattern)new FastMulti(lo,hi,p) :
\r
1126 (Pattern)new Multi(lo,hi,p);
\r
1128 // process the bracket operator
\r
1129 Pattern matchBracket(StrPos sp) throws RegSyntax {
\r
1131 if(sp.match('^')) {
\r
1132 ret = new Bracket(true);
\r
1135 ret = new Bracket(false);
\r
1137 //throw new RegSyntax
\r
1138 RegSyntaxError.endItAll("Unmatched []");
\r
1140 while(!sp.eos && !sp.match(']')) {
\r
1141 StrPos s1 = new StrPos(sp);
\r
1143 StrPos s1_ = new StrPos(s1);
\r
1145 if(s1.match('-') && !s1_.match(']')) {
\r
1146 StrPos s2 = new StrPos(s1);
\r
1149 ret.addOr(new Range(sp.c,s2.c));
\r
1152 } else if(sp.escMatch('Q')) {
\r
1154 while(!sp.escMatch('E')) {
\r
1155 ret.addOr(new oneChar(sp.c));
\r
1158 } else if(sp.escMatch('d')) {
\r
1159 ret.addOr(new Range('0','9'));
\r
1160 } else if(sp.escMatch('s')) {
\r
1161 ret.addOr(new oneChar((char)32));
\r
1162 ret.addOr(new Range((char)8,(char)10));
\r
1163 ret.addOr(new oneChar((char)13));
\r
1164 } else if(sp.escMatch('w')) {
\r
1165 ret.addOr(new Range('a','z'));
\r
1166 ret.addOr(new Range('A','Z'));
\r
1167 ret.addOr(new Range('0','9'));
\r
1168 ret.addOr(new oneChar('_'));
\r
1169 } else if(sp.escMatch('D')) {
\r
1170 ret.addOr(new Range((char)0,(char)47));
\r
1171 ret.addOr(new Range((char)58,(char)65535));
\r
1172 } else if(sp.escMatch('S')) {
\r
1173 ret.addOr(new Range((char)0,(char)7));
\r
1174 ret.addOr(new Range((char)11,(char)12));
\r
1175 ret.addOr(new Range((char)14,(char)31));
\r
1176 ret.addOr(new Range((char)33,(char)65535));
\r
1177 } else if(sp.escMatch('W')) {
\r
1178 ret.addOr(new Range((char)0,(char)64));
\r
1179 ret.addOr(new Range((char)91,(char)94));
\r
1180 ret.addOr(new oneChar((char)96));
\r
1181 ret.addOr(new Range((char)123,(char)65535));
\r
1182 } else if(sp.escMatch('x') && next2Hex(sp)) {
\r
1184 int d = getHexDigit(sp);
\r
1186 d = 16*d + getHexDigit(sp);
\r
1187 ret.addOr(new oneChar((char)d));
\r
1188 } else if(sp.escMatch('a')) {
\r
1189 ret.addOr(new oneChar((char)7));
\r
1190 } else if(sp.escMatch('f')) {
\r
1191 ret.addOr(new oneChar((char)12));
\r
1192 } else if(sp.escMatch('e')) {
\r
1193 ret.addOr(new oneChar((char)27));
\r
1194 } else if(sp.escMatch('n')) {
\r
1195 ret.addOr(new oneChar('\n'));
\r
1196 } else if(sp.escMatch('t')) {
\r
1197 ret.addOr(new oneChar('\t'));
\r
1198 } else if(sp.escMatch('r')) {
\r
1199 ret.addOr(new oneChar('\r'));
\r
1200 } else if(sp.escMatch('c')) {
\r
1202 if(sp.c < Ctrl.cmap.length)
\r
1203 ret.addOr(new oneChar(Ctrl.cmap[sp.c]));
\r
1205 ret.addOr(new oneChar(sp.c));
\r
1206 } else if(isOctalString(sp)) {
\r
1207 int d = sp.c - '0';
\r
1209 d = 8*d + sp.c - '0';
\r
1210 StrPos sp2 = new StrPos(sp);
\r
1212 if(isOctalDigit(sp2,false)) {
\r
1214 d = 8*d + sp.c - '0';
\r
1216 ret.addOr(new oneChar((char)d));
\r
1218 ret.addOr(new oneChar(sp.c));
\r
1224 /** Converts the stored Pattern to a String -- this is a
\r
1225 decompile. Note that \t and \n will really print out here,
\r
1226 Not just the two character representations.
\r
1227 Also be prepared to see some strange output if your characters
\r
1228 are not printable. */
\r
1229 public String toString() {
\r
1230 if( false && thePattern == null )
\r
1233 StringBuffer sb = new StringBuffer();
\r
1234 if(esc != Pattern.ESC) {
\r
1235 sb.append("(?e=");
\r
1241 ||!dotDoesntMatchCR
\r
1244 ||dontMatchInQuotes
\r
1247 if(ignoreCase)sb.append("i");
\r
1248 if(mFlag)sb.append("m");
\r
1249 if(sFlag||!dotDoesntMatchCR)sb.append("s");
\r
1250 if(dontMatchInQuotes)sb.append("Q");
\r
1251 if(optimized())sb.append("o");
\r
1252 if(gFlag)sb.append("g");
\r
1255 String patstr = thePattern.toString();
\r
1256 if(esc != Pattern.ESC)
\r
1257 patstr = reEscape(patstr,Pattern.ESC,esc);
\r
1258 sb.append(patstr);
\r
1259 return sb.toString();
\r
1262 // Re-escape Pattern, allows us to use a different escape
\r
1264 static String reEscape(String s,char oldEsc,char newEsc) {
\r
1265 if(oldEsc == newEsc) return s;
\r
1267 StringBuffer sb = new StringBuffer();
\r
1268 for(i=0;i<s.length();i++) {
\r
1269 if(s.charAt(i)==oldEsc && i+1 < s.length()) {
\r
1270 if(s.charAt(i+1)==oldEsc) {
\r
1271 sb.append(oldEsc);
\r
1273 sb.append(newEsc);
\r
1274 sb.append(s.charAt(i+1));
\r
1277 } else if(s.charAt(i)==newEsc) {
\r
1278 sb.append(newEsc);
\r
1279 sb.append(newEsc);
\r
1281 sb.append(s.charAt(i));
\r
1284 return sb.toString();
\r
1286 /** This method implements FilenameFilter, allowing one
\r
1287 to use a Regex to search through a directory using File.list.
\r
1288 There is a FileRegex now that does this better.
\r
1289 @see com.stevesoft.pat.FileRegex
\r
1291 public boolean accept(File dir,String s) {
\r
1294 /** The version of this package */
\r
1295 final static public String version() {
\r
1296 return "lgpl release 1.5.3";
\r
1298 /** Once this method is called, the state of variables
\r
1299 ignoreCase and dontMatchInQuotes should not be changed as the
\r
1300 results will be unpredictable. However,
\r
1301 search and matchAt will run more quickly. Note that you
\r
1302 can check to see if the pattern has been optimized by calling
\r
1303 the optimized() method.<p>This method will attempt to rewrite
\r
1304 your pattern in a way that makes it faster (not all patterns
\r
1305 execute at the same speed). In general, "(?: ... )" will be
\r
1306 faster than "( ... )" so if you don't need the backreference,
\r
1307 you should group using the former pattern.<p>It will also
\r
1308 introduce new pattern elements that you can't get to otherwise,
\r
1309 for example if you have a large table of strings, i.e. the
\r
1310 months of the year "(January|February|...)" optimize() will make
\r
1311 a Hashtable that takes it to the next appropriate pattern
\r
1312 element -- eliminating the need for a linear search.
\r
1313 @see com.stevesoft.pat.Regex#optimized
\r
1314 @see com.stevesoft.pat.Regex#ignoreCase
\r
1315 @see com.stevesoft.pat.Regex#dontMatchInQuotes
\r
1316 @see com.stevesoft.pat.Regex#matchAt
\r
1317 @see com.stevesoft.pat.Regex#search
\r
1319 public void optimize() {
\r
1320 if(optimized()||thePattern==null) return;
\r
1321 minMatch = new patInt(0);//thePattern.countMinChars();
\r
1322 thePattern = RegOpt.opt(thePattern,ignoreCase,
\r
1323 dontMatchInQuotes);
\r
1324 skipper = Skip.findSkip(this);
\r
1325 //RegOpt.setParents(this);
\r
1329 /** This function returns true if the optimize method has
\r
1331 public boolean optimized() {
\r
1332 return minMatch != null;
\r
1335 /** A bit of syntactic surgar for those who want to make
\r
1336 their code look more perl-like. To use this initialize
\r
1337 your Regex object by saying:
\r
1339 Regex r1 = Regex.perlCode("s/hello/goodbye/");
\r
1340 Regex r2 = Regex.perlCode("s'fish'frog'i");
\r
1341 Regex r3 = Regex.perlCode("m'hello');
\r
1343 The i for ignoreCase is supported in
\r
1344 this syntax, as well as m, s, and x. The g flat
\r
1345 is a bit of a special case.<p>
\r
1346 If you wish to replace all occurences of a pattern, you
\r
1347 do not put a 'g' in the perlCode, but call Regex's
\r
1348 replaceAll method.<p>
\r
1349 If you wish to simply
\r
1350 and only do a search for r2's pattern, you can do this
\r
1351 by calling the searchFrom method method repeatedly, or
\r
1352 by calling search repeatedly if the g flag is set.
\r
1354 Note: Currently perlCode does <em>not</em>
\r
1355 support the (?e=#) syntax for
\r
1356 changing the escape character.
\r
1359 public static Regex perlCode(String s) {
\r
1360 // this file is big enough, see parsePerl.java
\r
1361 // for this function.
\r
1362 return parsePerl.parse(s);
\r
1364 static final char back_slash = '\\';
\r
1366 /** Checks to see if there are only literal and no special
\r
1367 pattern elements in this Regex. */
\r
1368 public boolean isLiteral() {
\r
1369 Pattern x = thePattern;
\r
1370 while(x != null) {
\r
1371 if(x instanceof oneChar)
\r
1373 else if(x instanceof Skipped)
\r
1382 /** You only need to know about this if you are inventing
\r
1383 your own pattern elements. */
\r
1384 public patInt countMinChars() { return thePattern.countMinChars(); }
\r
1385 /** You only need to know about this if you are inventing
\r
1386 your own pattern elements. */
\r
1387 public patInt countMaxChars() { return thePattern.countMaxChars(); }
\r
1389 boolean isHexDigit(StrPos sp) {
\r
1391 !sp.eos && !sp.dontMatch
\r
1392 && ((sp.c>='0'&&sp.c<='9')
\r
1393 ||(sp.c>='a'&&sp.c<='f')
\r
1394 ||(sp.c>='A'&&sp.c<='F'));
\r
1397 boolean isOctalDigit(StrPos sp,boolean first) {
\r
1399 !sp.eos && !(first^sp.dontMatch)
\r
1400 && sp.c>='0'&&sp.c<='7';
\r
1403 int getHexDigit(StrPos sp) {
\r
1404 if(sp.c >= '0' && sp.c <= '9')
\r
1405 return sp.c - '0';
\r
1406 if(sp.c >= 'a' && sp.c <= 'f')
\r
1407 return sp.c - 'a' + 10;
\r
1408 return sp.c - 'A' + 10;
\r
1410 boolean next2Hex(StrPos sp) {
\r
1411 StrPos sp2 = new StrPos(sp);
\r
1413 if(!isHexDigit(sp2))
\r
1416 if(!isHexDigit(sp2))
\r
1420 boolean isOctalString(StrPos sp) {
\r
1421 if(!isOctalDigit(sp,true))
\r
1423 StrPos sp2 = new StrPos(sp);
\r
1425 if(!isOctalDigit(sp2,false))
\r