2 // This software is now distributed according to
\r
3 // the Lesser Gnu Public License. Please see
\r
4 // http://www.gnu.org/copyleft/lesser.txt for
\r
6 // -- Happy Computing!
\r
8 package com.stevesoft.pat;
\r
13 import com.stevesoft.pat.wrap.*;
\r
15 /** Matches a Unicode punctuation character. */
\r
17 extends UniValidator
\r
19 public int validate(StringLike s, int from, int to)
\r
21 return from < s.length() && Prop.isPunct(s.charAt(from)) ? to : -1;
\r
25 /** Matches a Unicode white space character. */
\r
27 extends UniValidator
\r
29 public int validate(StringLike s, int from, int to)
\r
31 return from < s.length() && Prop.isWhite(s.charAt(from)) ? to : -1;
\r
35 /** Matches a character that is not a Unicode punctuation
\r
39 extends UniValidator
\r
41 public int validate(StringLike s, int from, int to)
\r
43 return from < s.length() && !Prop.isPunct(s.charAt(from)) ? to : -1;
\r
47 /** Matches a character that is not a
\r
48 * Unicode white space character.
\r
51 extends UniValidator
\r
53 public int validate(StringLike s, int from, int to)
\r
55 return from < s.length() && !Prop.isWhite(s.charAt(from)) ? to : -1;
\r
59 /** Matches a Unicode word character: an alphanumeric or underscore. */
\r
61 extends UniValidator
\r
63 public int validate(StringLike s, int from, int to)
\r
65 if (from >= s.length())
\r
69 char c = s.charAt(from);
\r
70 return (Prop.isAlphabetic(c) || Prop.isDecimalDigit(c) || c == '_') ? to :
\r
75 /** Matches a character that is not a Unicode alphanumeric or underscore. */
\r
77 extends UniValidator
\r
79 public int validate(StringLike s, int from, int to)
\r
81 if (from >= s.length())
\r
85 char c = s.charAt(from);
\r
86 return! (Prop.isAlphabetic(c) || Prop.isDecimalDigit(c) || c == '_') ? to :
\r
91 /** Matches a Unicode decimal digit. */
\r
93 extends UniValidator
\r
95 public int validate(StringLike s, int from, int to)
\r
97 return from < s.length() && Prop.isDecimalDigit(s.charAt(from)) ? to : -1;
\r
101 /** Matches a character that is not a Unicode digit.*/
\r
102 class NUnicodeDigit
\r
103 extends UniValidator
\r
105 public int validate(StringLike s, int from, int to)
\r
107 return from < s.length() && !Prop.isDecimalDigit(s.charAt(from)) ? to : -1;
\r
111 /** Matches a Unicode math character. */
\r
113 extends UniValidator
\r
115 public int validate(StringLike s, int from, int to)
\r
117 return from < s.length() && Prop.isMath(s.charAt(from)) ? to : -1;
\r
121 /** Matches a non-math Unicode character. */
\r
123 extends UniValidator
\r
125 public int validate(StringLike s, int from, int to)
\r
127 return from < s.length() && !Prop.isMath(s.charAt(from)) ? to : -1;
\r
131 /** Matches a Unicode currency symbol. */
\r
132 class UnicodeCurrency
\r
133 extends UniValidator
\r
135 public int validate(StringLike s, int from, int to)
\r
137 return from < s.length() && Prop.isCurrency(s.charAt(from)) ? to : -1;
\r
141 /** Matches a non-currency symbol Unicode character. */
\r
142 class NUnicodeCurrency
\r
143 extends UniValidator
\r
145 public int validate(StringLike s, int from, int to)
\r
147 return from < s.length() && !Prop.isCurrency(s.charAt(from)) ? to : -1;
\r
151 /** Matches a Unicode alphabetic character. */
\r
153 extends UniValidator
\r
155 public int validate(StringLike s, int from, int to)
\r
157 return from < s.length() && Prop.isAlphabetic(s.charAt(from)) ? to : -1;
\r
161 /** Matches a non-alphabetic Unicode character. */
\r
162 class NUnicodeAlpha
\r
163 extends UniValidator
\r
165 public int validate(StringLike s, int from, int to)
\r
167 return from < s.length() && !Prop.isAlphabetic(s.charAt(from)) ? to : -1;
\r
171 /** Matches an upper case Unicode character. */
\r
173 extends UniValidator
\r
175 public int validate(StringLike s, int from, int to)
\r
177 return from < s.length() && isUpper(s.charAt(from)) ? to : -1;
\r
180 final boolean isUpper(char c)
\r
182 return c == CaseMgr.toUpperCase(c) && c != CaseMgr.toLowerCase(c);
\r
186 /** Matches an upper case Unicode character. */
\r
188 extends UniValidator
\r
190 public int validate(StringLike s, int from, int to)
\r
192 return from < s.length() && isLower(s.charAt(from)) ? to : -1;
\r
195 final boolean isLower(char c)
\r
197 return c != CaseMgr.toUpperCase(c) && c == CaseMgr.toLowerCase(c);
\r
202 Regex provides the parser which constructs the linked list of
\r
203 Pattern classes from a String.
\r
205 For the purpose of this documentation, the fact that java interprets the
\r
206 backslash will be ignored. In practice, however, you will need a
\r
207 double backslash to obtain a string that contains a single backslash
\r
208 character. Thus, the example pattern "\b" should really be typed
\r
209 as "\\b" inside java code.
\r
211 Note that Regex is part of package "com.stevesoft.pat".
\r
212 To use it, simply import
\r
213 com.stevesoft.pat.Regex at the top of your file.
\r
215 Regex is made with a constructor that takes a String that defines
\r
216 the regular expression. Thus, for example
\r
218 Regex r = new Regex("[a-c]*");
\r
220 matches any number of characters so long as the are 'a', 'b', or 'c').
\r
222 To attempt to match the Pattern to a given string, you can use either
\r
223 the search(String) member function, or the matchAt(String,int position)
\r
224 member function. These functions return a boolean which tells you
\r
225 whether or not the thing worked, and sets the methods "charsMatched()"
\r
226 and "matchedFrom()" in the Regex object appropriately.
\r
228 The portion of the string before the match can be obtained by the
\r
229 left() member, and the portion after the match can be obtained
\r
230 by the right() member.
\r
232 Essentially, this package implements a syntax that is very much
\r
233 like the perl 5 regular expression syntax.
\r
237 Regex r = new Regex("x(a|b)y");
\r
238 r.matchAt("xay",0);
\r
239 System.out.println("sub = "+r.stringMatched(1));
\r
241 The above would print "sub = a".
\r
243 r.left() // would return "x"
\r
244 r.right() // would return "y"
\r
247 Differences between this package and perl5:<br>
\r
248 The extended Pattern for setting flags, is now supported,
\r
249 but the flags are different. "(?i)" tells the pattern to
\r
250 ignore case, "(?Q)" sets the "dontMatchInQuotes" flag, and
\r
251 "(?iQ)" sets them both. You can change the escape character.
\r
252 The pattern <pre>(?e=#)#d+</pre> is the same as <pre>\d+</pre>,
\r
253 but note that the sequence <pre>(?e=#)</pre> <b>must</b> occur
\r
254 at the very beginning of the pattern. There may be other small
\r
255 differences as well. I will either make my package conform
\r
256 or note them as I become aware of them.
\r
258 This package supports additional patterns not in perl5:
\r
261 <tr><td>(?@())</td><td>Group</td><td>This matches all characters between
\r
262 the '(' character and the balancing ')' character. Thus, it will
\r
263 match "()" as well as "(())". The balancing characters are
\r
264 arbitrary, thus (?@{}) matches on "{}" and "{{}}".</td>
\r
265 <tr><td>(?<1)</td><td>Backup</td><td>Moves the pointer backwards within the text.
\r
266 This allows you to make a "look behind." It fails if it
\r
267 attempts to move to a position before the beginning of the string.
\r
268 "x(?<1)" is equivalent to "(?=x)". The number, 1 in this example,
\r
269 is the number of characters to move backwards.</td>
\r
273 @author Steven R. Brandt
\r
274 @version package com.stevesoft.pat, release 1.5.3
\r
278 extends RegRes implements FilenameFilter
\r
280 /** BackRefOffset gives the identity number of the first
\r
281 pattern. Version 1.0 used zero, version 1.1 uses 1 to be
\r
282 more compatible with perl. */
\r
283 static int BackRefOffset = 1;
\r
284 private static Pattern none = new NoPattern();
\r
285 Pattern thePattern = none;
\r
286 patInt minMatch = new patInt(0);
\r
288 static Hashtable validators = new Hashtable();
\r
291 define("p", "(?>1)", new UnicodePunct());
\r
292 define("P", "(?>1)", new NUnicodePunct());
\r
293 define("s", "(?>1)", new UnicodeWhite());
\r
294 define("S", "(?>1)", new NUnicodeWhite());
\r
295 define("w", "(?>1)", new UnicodeW());
\r
296 define("W", "(?>1)", new NUnicodeW());
\r
297 define("d", "(?>1)", new UnicodeDigit());
\r
298 define("D", "(?>1)", new NUnicodeDigit());
\r
299 define("m", "(?>1)", new UnicodeMath());
\r
300 define("M", "(?>1)", new NUnicodeMath());
\r
301 define("c", "(?>1)", new UnicodeCurrency());
\r
302 define("C", "(?>1)", new NUnicodeCurrency());
\r
303 define("a", "(?>1)", new UnicodeAlpha());
\r
304 define("A", "(?>1)", new NUnicodeAlpha());
\r
305 define("uc", "(?>1)", new UnicodeUpper());
\r
306 define("lc", "(?>1)", new UnicodeLower());
\r
309 /** Set the dontMatch in quotes flag. */
\r
310 public void setDontMatchInQuotes(boolean b)
\r
312 dontMatchInQuotes = b;
\r
315 /** Find out if the dontMatchInQuotes flag is enabled. */
\r
316 public boolean getDontMatchInQuotes()
\r
318 return dontMatchInQuotes;
\r
321 boolean dontMatchInQuotes = false;
\r
323 /** Set the state of the ignoreCase flag. If set to true, then
\r
324 the pattern matcher will ignore case when searching for a
\r
326 public void setIgnoreCase(boolean b)
\r
331 /** Get the state of the ignoreCase flag. Returns true if we
\r
332 are ignoring the case of the pattern, false otherwise. */
\r
333 public boolean getIgnoreCase()
\r
338 boolean ignoreCase = false;
\r
340 static boolean defaultMFlag = false;
\r
341 /** Set the default value of the m flag. If it
\r
342 is set to true, then the MFlag will be on
\r
343 for any regex search executed. */
\r
344 public static void setDefaultMFlag(boolean mFlag)
\r
346 defaultMFlag = mFlag;
\r
349 /** Get the default value of the m flag. If it
\r
350 is set to true, then the MFlag will be on
\r
351 for any regex search executed. */
\r
352 public static boolean getDefaultMFlag()
\r
354 return defaultMFlag;
\r
357 /** Initializes the object without a Pattern. To supply a Pattern
\r
358 use compile(String s).
\r
359 @see com.stevesoft.pat.Regex#compile(java.lang.String)
\r
364 /** Create and compile a Regex, but do not throw any exceptions.
\r
365 If you wish to have exceptions thrown for syntax errors,
\r
366 you must use the Regex(void) constructor to create the
\r
367 Regex object, and then call the compile method. Therefore, you
\r
368 should only call this method when you know your pattern is right.
\r
369 I will probably become more like
\r
370 @see com.stevesoft.pat.Regex#search(java.lang.String)
\r
371 @see com.stevesoft.pat.Regex#compile(java.lang.String)
\r
373 public Regex(String s)
\r
379 catch (RegSyntax rs)
\r
383 ReplaceRule rep = null;
\r
384 /** Create and compile both a Regex and a ReplaceRule.
\r
385 @see com.stevesoft.pat.ReplaceRule
\r
386 @see com.stevesoft.pat.Regex#compile(java.lang.String)
\r
388 public Regex(String s, String rp)
\r
391 rep = ReplaceRule.perlCode(rp);
\r
394 /** Create and compile a Regex, but give it the ReplaceRule
\r
395 specified. This allows the user finer control of the
\r
396 Replacement process, if that is desired.
\r
397 @see com.stevesoft.pat.ReplaceRule
\r
398 @see com.stevesoft.pat.Regex#compile(java.lang.String)
\r
400 public Regex(String s, ReplaceRule rp)
\r
406 /** Change the ReplaceRule of this Regex by compiling
\r
407 a new one using String rp. */
\r
408 public void setReplaceRule(String rp)
\r
410 rep = ReplaceRule.perlCode(rp);
\r
411 repr = null; // Clear Replacer history
\r
414 /** Change the ReplaceRule of this Regex to rp. */
\r
415 public void setReplaceRule(ReplaceRule rp)
\r
420 /** Test to see if a custom defined rule exists.
\r
421 @see com.stevesoft.pat#define(java.lang.String,java.lang.String,Validator)
\r
423 public static boolean isDefined(String nm)
\r
425 return validators.get(nm) != null;
\r
428 /** Removes a custom defined rule.
\r
429 @see com.stevesoft.pat#define(java.lang.String,java.lang.String,Validator)
\r
431 public static void undefine(String nm)
\r
433 validators.remove(nm);
\r
436 /** Defines a method to create a new rule. See test/deriv2.java
\r
437 and test/deriv3.java for examples of how to use it. */
\r
438 public static void define(String nm, String pat, Validator v)
\r
441 validators.put(nm, v);
\r
444 /** Defines a shorthand for a pattern. The pattern will be
\r
445 invoked by a string that has the form "(??"+nm+")".
\r
447 public static void define(String nm, String pat)
\r
449 validators.put(nm, pat);
\r
452 /** Get the current ReplaceRule. */
\r
453 public ReplaceRule getReplaceRule()
\r
458 Replacer repr = null;
\r
459 final Replacer _getReplacer()
\r
461 return repr == null ? repr = new Replacer() : repr;
\r
464 public Replacer getReplacer()
\r
468 repr = new Replacer();
\r
471 repr.rh.prev = null;
\r
475 /** Replace the first occurence of this pattern in String s
\r
476 according to the ReplaceRule.
\r
477 @see com.stevesoft.pat.ReplaceRule
\r
478 @see com.stevesoft.pat.Regex#getReplaceRule()
\r
480 public String replaceFirst(String s)
\r
482 return _getReplacer().replaceFirstRegion(s, this, 0, s.length()).toString();
\r
485 /** Replace the first occurence of this pattern in String s
\r
486 beginning with position pos according to the ReplaceRule.
\r
487 @see com.stevesoft.pat.ReplaceRule
\r
488 @see com.stevesoft.pat.Regex#getReplaceRule()
\r
490 public String replaceFirstFrom(String s, int pos)
\r
492 return _getReplacer().replaceFirstRegion(s, this, pos, s.length()).toString();
\r
495 /** Replace the first occurence of this pattern in String s
\r
496 beginning with position start and ending with end
\r
497 according to the ReplaceRule.
\r
498 @see com.stevesoft.pat.ReplaceRule
\r
499 @see com.stevesoft.pat.Regex#getReplaceRule()
\r
501 public String replaceFirstRegion(String s, int start, int end)
\r
503 return _getReplacer().replaceFirstRegion(s, this, start, end).toString();
\r
506 /** Replace all occurences of this pattern in String s
\r
507 according to the ReplaceRule.
\r
508 @see com.stevesoft.pat.ReplaceRule
\r
509 @see com.stevesoft.pat.Regex#getReplaceRule()
\r
511 public String replaceAll(String s)
\r
513 return _getReplacer().replaceAllRegion(s, this, 0, s.length()).toString();
\r
516 public StringLike replaceAll(StringLike s)
\r
518 return _getReplacer().replaceAllRegion(s, this, 0, s.length());
\r
521 /** Replace all occurences of this pattern in String s
\r
522 beginning with position pos according to the ReplaceRule.
\r
523 @see com.stevesoft.pat.ReplaceRule
\r
524 @see com.stevesoft.pat.Regex#getReplaceRule()
\r
526 public String replaceAllFrom(String s, int pos)
\r
528 return _getReplacer().replaceAllRegion(s, this, pos, s.length()).toString();
\r
531 /** Replace all occurences of this pattern in String s
\r
532 beginning with position start and ending with end
\r
533 according to the ReplaceRule.
\r
534 @see com.stevesoft.pat.ReplaceRule
\r
535 @see com.stevesoft.pat.Regex#getReplaceRule()
\r
537 public String replaceAllRegion(String s, int start, int end)
\r
539 return _getReplacer().replaceAllRegion(s, this, start, end).toString();
\r
542 /** Essentially clones the Regex object */
\r
543 public Regex(Regex r)
\r
545 super( (RegRes) r);
\r
546 dontMatchInQuotes = r.dontMatchInQuotes;
\r
548 ignoreCase = r.ignoreCase;
\r
556 rep = (ReplaceRule) r.rep.clone();
\r
559 compile(r.toString());
\r
560 } catch(RegSyntax r_) {} */
\r
561 thePattern = r.thePattern.clone(new Hashtable());
\r
562 minMatch = r.minMatch;
\r
563 skipper = r.skipper;
\r
567 the escape character is the backslash, but you can
\r
568 make it anything you want by setting this variable. */
\r
569 public char esc = Pattern.ESC;
\r
570 /** This method compiles a regular expression, making it
\r
571 possible to call the search or matchAt methods.
\r
572 @exception com.stevesoft.pat.RegSyntax
\r
573 is thrown if a syntax error is encountered
\r
575 For example, "x{3,1}" or "*a" are not valid
\r
577 @see com.stevesoft.pat.Regex#search
\r
578 @see com.stevesoft.pat.Regex#matchAt
\r
580 public void compile(String prepat)
\r
583 String postpat = parsePerl.codify(prepat, true);
\r
584 String pat = postpat == null ? prepat : postpat;
\r
586 ignoreCase = false;
\r
587 dontMatchInQuotes = false;
\r
588 Rthings mk = new Rthings(this);
\r
589 int offset = mk.val;
\r
590 String newpat = pat;
\r
594 minMatch = new patInt(0);
\r
595 StrPos sp = new StrPos(pat, 0);
\r
596 if (sp.incMatch("(?e="))
\r
598 char newEsc = sp.c;
\r
602 newpat = reEscape(pat.substring(6),
\r
603 newEsc, Pattern.ESC);
\r
606 else if (esc != Pattern.ESC)
\r
608 newpat = reEscape(pat, esc, Pattern.ESC);
\r
610 thePattern = _compile(newpat, mk);
\r
611 numSubs_ = mk.val - offset;
\r
615 /* If a Regex is compared against a Regex, a check is
\r
616 done to see that the patterns are equal as well as
\r
617 the most recent match. If a Regex is compare with
\r
618 a RegRes, only the result of the most recent match
\r
620 public boolean equals(Object o)
\r
622 if (o instanceof Regex)
\r
624 if (toString().equals(o.toString()))
\r
626 return super.equals(o);
\r
635 return super.equals(o);
\r
639 /** A clone by any other name would smell as sweet. */
\r
640 public Object clone()
\r
642 return new Regex(this);
\r
645 /** Return a clone of the underlying RegRes object. */
\r
646 public RegRes result()
\r
648 return (RegRes)super.clone();
\r
651 // prep sets global variables of class
\r
652 // Pattern so that it can access them
\r
653 // during an attempt at a match
\r
654 Pthings pt = new Pthings();
\r
655 final Pthings prep(StringLike s)
\r
658 pt.lastPos = matchedTo();
\r
659 if (pt.lastPos < 0)
\r
663 if ( (s == null ? null : s.unwrap()) != (src == null ? null : s.unwrap()))
\r
668 pt.dotDoesntMatchCR = dotDoesntMatchCR && (!sFlag);
\r
669 pt.mFlag = (mFlag | defaultMFlag);
\r
670 pt.ignoreCase = ignoreCase;
\r
671 pt.no_check = false;
\r
672 if (pt.marks != null)
\r
674 for (int i = 0; i < pt.marks.length; i++)
\r
680 pt.nMarks = numSubs_;
\r
682 if (dontMatchInQuotes)
\r
693 /** Attempt to match a Pattern beginning
\r
694 at a specified location within the string.
\r
695 @see com.stevesoft.pat.Regex#search
\r
697 public boolean matchAt(String s, int start_pos)
\r
699 return _search(s, start_pos, start_pos);
\r
702 /** Attempt to match a Pattern beginning
\r
703 at a specified location within the StringLike.
\r
704 @see com.stevesoft.pat.Regex#search
\r
706 public boolean matchAt(StringLike s, int start_pos)
\r
708 return _search(s, start_pos, start_pos);
\r
711 /** Search through a String for the first
\r
712 occurrence of a match.
\r
713 @see com.stevesoft.pat.Regex#searchFrom
\r
714 @see com.stevesoft.pat.Regex#matchAt
\r
716 public boolean search(String s)
\r
720 throw new NullPointerException("Null String Given to Regex.search");
\r
722 return _search(s, 0, s.length());
\r
725 public boolean search(StringLike sl)
\r
729 throw new NullPointerException("Null StringLike Given to Regex.search");
\r
731 return _search(sl, 0, sl.length());
\r
734 public boolean reverseSearch(String s)
\r
738 throw new NullPointerException("Null String Given to Regex.reverseSearch");
\r
740 return _reverseSearch(s, 0, s.length());
\r
743 public boolean reverseSearch(StringLike sl)
\r
747 throw new NullPointerException(
\r
748 "Null StringLike Given to Regex.reverseSearch");
\r
750 return _reverseSearch(sl, 0, sl.length());
\r
753 /** Search through a String for the first
\r
754 occurence of a match, but start at position <pre>start</pre>*/
\r
755 public boolean searchFrom(String s, int start)
\r
759 throw new NullPointerException("Null String Given to Regex.searchFrom");
\r
761 return _search(s, start, s.length());
\r
764 public boolean searchFrom(StringLike s, int start)
\r
768 throw new NullPointerException("Null String Given to Regex.searchFrom");
\r
770 return _search(s, start, s.length());
\r
773 /** Search through a region of a String
\r
774 for the first occurence of a match. */
\r
775 public boolean searchRegion(String s, int start, int end)
\r
779 throw new NullPointerException("Null String Given to Regex.searchRegion");
\r
781 return _search(s, start, end);
\r
784 /** Set this to change the default behavior of the "." pattern.
\r
785 By default it now matches perl's behavior and fails to
\r
786 match the '\n' character. */
\r
787 public static boolean dotDoesntMatchCR = true;
\r
790 boolean gFlag = false;
\r
791 /** Set the 'g' flag */
\r
792 public void setGFlag(boolean b)
\r
797 /** Get the state of the 'g' flag. */
\r
798 public boolean getGFlag()
\r
803 boolean sFlag = false;
\r
804 /** Get the state of the sFlag */
\r
805 public boolean getSFlag()
\r
810 boolean mFlag = false;
\r
811 /** Get the state of the sFlag */
\r
812 public boolean getMFlag()
\r
817 final boolean _search(String s, int start, int end)
\r
819 return _search(new StringWrap(s), start, end);
\r
822 final boolean _search(StringLike s, int start, int end)
\r
824 if (gFlag && gFlagto > 0 && gFlags != null && s.unwrap() == gFlags.unwrap())
\r
830 Pthings pt = prep(s);
\r
832 int up = (minMatch == null ? end : end - minMatch.i);
\r
834 if (up < start && end >= start)
\r
839 if (skipper == null)
\r
841 for (int i = start; i <= up; i++)
\r
843 charsMatched_ = thePattern.matchAt(s, i, pt);
\r
844 if (charsMatched_ >= 0)
\r
846 matchFrom_ = thePattern.mfrom;
\r
848 gFlagto = matchFrom_ + charsMatched_;
\r
850 return didMatch_ = true;
\r
856 pt.no_check = true;
\r
857 for (int i = start; i <= up; i++)
\r
859 i = skipper.find(src, i, up);
\r
862 charsMatched_ = matchFrom_ = -1;
\r
863 return didMatch_ = false;
\r
865 charsMatched_ = thePattern.matchAt(s, i, pt);
\r
866 if (charsMatched_ >= 0)
\r
868 matchFrom_ = thePattern.mfrom;
\r
870 gFlagto = matchFrom_ + charsMatched_;
\r
872 return didMatch_ = true;
\r
876 return didMatch_ = false;
\r
879 /*final boolean _search(LongStringLike s,long start,long end) {
\r
880 if(gFlag && gFlagto > 0 && s==gFlags)
\r
884 Pthings pt=prep(s);
\r
886 int up = end;//(minMatch == null ? end : end-minMatch.i);
\r
888 if(up < start && end >= start) up = start;
\r
890 if(skipper == null) {
\r
891 for(long i=start;i<=up;i++) {
\r
892 charsMatched_ = thePattern.matchAt(s,i,pt);
\r
893 if(charsMatched_ >= 0) {
\r
894 matchFrom_ = thePattern.mfrom;
\r
896 gFlagto = matchFrom_+charsMatched_;
\r
897 return didMatch_=true;
\r
901 pt.no_check = true;
\r
902 for(long i=start;i<=up;i++) {
\r
903 i = skipper.find(src,i,up);
\r
905 charsMatched_ = matchFrom_ = -1;
\r
906 return didMatch_ = false;
\r
908 charsMatched_ = thePattern.matchAt(s,i,pt);
\r
909 if(charsMatched_ >= 0) {
\r
910 matchFrom_ = thePattern.mfrom;
\r
912 gFlagto = matchFrom_+charsMatched_;
\r
914 return didMatch_=true;
\r
916 i = s.adjustIndex(i);
\r
917 up = s.adjustEnd(i);
\r
921 return didMatch_=false;
\r
924 boolean _reverseSearch(String s, int start, int end)
\r
926 return _reverseSearch(new StringWrap(s), start, end);
\r
929 boolean _reverseSearch(StringLike s, int start, int end)
\r
931 if (gFlag && gFlagto > 0 && s.unwrap() == gFlags.unwrap())
\r
936 Pthings pt = prep(s);
\r
937 for (int i = end; i >= start; i--)
\r
939 charsMatched_ = thePattern.matchAt(s, i, pt);
\r
940 if (charsMatched_ >= 0)
\r
942 matchFrom_ = thePattern.mfrom;
\r
944 gFlagto = matchFrom_ - 1;
\r
946 return didMatch_ = true;
\r
949 return didMatch_ = false;
\r
952 // This routine sets the cbits variable
\r
953 // of class Pattern. Cbits is true for
\r
954 // the bit corresponding to a character inside
\r
955 // a set of quotes.
\r
956 static StringLike lasts = null;
\r
957 static BitSet lastbs = null;
\r
958 static void setCbits(StringLike s, Pthings pt)
\r
965 BitSet bs = new BitSet(s.length());
\r
967 boolean setBit = false;
\r
968 for (int i = 0; i < s.length(); i++)
\r
974 char c = s.charAt(i);
\r
975 if (!setBit && c == '"')
\r
981 else if (!setBit && c == '\'')
\r
987 else if (setBit && c == qc)
\r
991 else if (setBit && c == '\\' && i + 1 < s.length())
\r
1000 pt.cbits = lastbs = bs;
\r
1004 // Wanted user to over-ride this in alpha version,
\r
1005 // but it wasn't really necessary because of this trick:
\r
1010 return (Regex) getClass().newInstance();
\r
1012 catch (InstantiationException ie)
\r
1016 catch (IllegalAccessException iae)
\r
1022 /** Only needed for creating your own extensions of
\r
1023 Regex. This method adds the next Pattern in the chain
\r
1024 of patterns or sets the Pattern if it is the first call. */
\r
1025 protected void add(Pattern p2)
\r
1038 /** You only need to use this method if you are creating
\r
1039 your own extentions to Regex.
\r
1040 compile1 compiles one Pattern element, it can be
\r
1041 over-ridden to allow the Regex compiler to understand
\r
1042 new syntax. See deriv.java for an example. This routine
\r
1043 is the heart of class Regex. Rthings has one integer
\r
1044 member called intValue, it is used to keep track of the number
\r
1045 of ()'s in the Pattern.
\r
1046 @exception com.stevesoft.pat.RegSyntax is thrown when a nonsensensical
\r
1047 pattern is supplied. For example, a pattern beginning
\r
1049 protected void compile1(StrPos sp, Rthings mk)
\r
1052 if (sp.match('['))
\r
1055 add(matchBracket(sp));
\r
1057 else if (sp.match('|'))
\r
1065 p = new NullPattern();
\r
1070 else if (sp.incMatch("(?<"))
\r
1072 patInt i = sp.getPatInt();
\r
1075 RegSyntaxError.endItAll("No int after (?<");
\r
1077 add(new Backup(i.intValue()));
\r
1078 if (!sp.match(')'))
\r
1080 RegSyntaxError.endItAll("No ) after (?<");
\r
1083 else if (sp.incMatch("(?>"))
\r
1085 patInt i = sp.getPatInt();
\r
1088 RegSyntaxError.endItAll("No int after (?>");
\r
1090 add(new Backup( -i.intValue()));
\r
1091 if (!sp.match(')'))
\r
1093 RegSyntaxError.endItAll("No ) after (?<");
\r
1096 else if (sp.incMatch("(?@"))
\r
1102 if (!sp.match(')'))
\r
1104 RegSyntaxError.endItAll(
\r
1105 "(?@ does not have closing paren");
\r
1107 add(new Group(op, cl));
\r
1109 else if (sp.incMatch("(?#"))
\r
1111 while (!sp.match(')'))
\r
1116 else if (sp.dontMatch && sp.c == 'w')
\r
1118 //Regex r = new Regex();
\r
1119 //r._compile("[a-zA-Z0-9_]",mk);
\r
1120 //add(new Goop("\\w",r.thePattern));
\r
1121 Bracket b = new Bracket(false);
\r
1122 b.addOr(new Range('a', 'z'));
\r
1123 b.addOr(new Range('A', 'Z'));
\r
1124 b.addOr(new Range('0', '9'));
\r
1125 b.addOr(new oneChar('_'));
\r
1128 else if (sp.dontMatch && sp.c == 'G')
\r
1132 else if (sp.dontMatch && sp.c == 's')
\r
1134 //Regex r = new Regex();
\r
1135 //r._compile("[ \t\n\r\b]",mk);
\r
1136 //add(new Goop("\\s",r.thePattern));
\r
1137 Bracket b = new Bracket(false);
\r
1138 b.addOr(new oneChar( (char) 32));
\r
1139 b.addOr(new Range( (char) 8, (char) 10));
\r
1140 b.addOr(new oneChar( (char) 13));
\r
1143 else if (sp.dontMatch && sp.c == 'd')
\r
1145 //Regex r = new Regex();
\r
1146 //r._compile("[0-9]",mk);
\r
1147 //add(new Goop("\\d",r.thePattern));
\r
1148 Range digit = new Range('0', '9');
\r
1149 digit.printBrackets = true;
\r
1152 else if (sp.dontMatch && sp.c == 'W')
\r
1154 //Regex r = new Regex();
\r
1155 //r._compile("[^a-zA-Z0-9_]",mk);
\r
1156 //add(new Goop("\\W",r.thePattern));
\r
1157 Bracket b = new Bracket(true);
\r
1158 b.addOr(new Range('a', 'z'));
\r
1159 b.addOr(new Range('A', 'Z'));
\r
1160 b.addOr(new Range('0', '9'));
\r
1161 b.addOr(new oneChar('_'));
\r
1164 else if (sp.dontMatch && sp.c == 'S')
\r
1166 //Regex r = new Regex();
\r
1167 //r._compile("[^ \t\n\r\b]",mk);
\r
1168 //add(new Goop("\\S",r.thePattern));
\r
1169 Bracket b = new Bracket(true);
\r
1170 b.addOr(new oneChar( (char) 32));
\r
1171 b.addOr(new Range( (char) 8, (char) 10));
\r
1172 b.addOr(new oneChar( (char) 13));
\r
1175 else if (sp.dontMatch && sp.c == 'D')
\r
1177 //Regex r = new Regex();
\r
1178 //r._compile("[^0-9]",mk);
\r
1179 //add(new Goop("\\D",r.thePattern));
\r
1180 Bracket b = new Bracket(true);
\r
1181 b.addOr(new Range('0', '9'));
\r
1184 else if (sp.dontMatch && sp.c == 'B')
\r
1186 Regex r = new Regex();
\r
1187 r._compile("(?!" + back_slash + "b)", mk);
\r
1188 add(r.thePattern);
\r
1190 else if (isOctalString(sp))
\r
1192 int d = sp.c - '0';
\r
1194 d = 8 * d + sp.c - '0';
\r
1195 StrPos sp2 = new StrPos(sp);
\r
1197 if (isOctalDigit(sp2, false))
\r
1200 d = 8 * d + sp.c - '0';
\r
1202 add(new oneChar( (char) d));
\r
1204 else if (sp.dontMatch && sp.c >= '1' && sp.c <= '9')
\r
1206 int iv = sp.c - '0';
\r
1207 StrPos s2 = new StrPos(sp);
\r
1209 if (!s2.dontMatch && s2.c >= '0' && s2.c <= '9')
\r
1211 iv = 10 * iv + (s2.c - '0');
\r
1214 add(new BackMatch(iv));
\r
1216 else if (sp.dontMatch && sp.c == 'b')
\r
1218 add(new Boundary());
\r
1220 else if (sp.match('\b'))
\r
1222 add(new Boundary());
\r
1224 else if (sp.match('$'))
\r
1226 add(new End(true));
\r
1228 else if (sp.dontMatch && sp.c == 'Z')
\r
1230 add(new End(false));
\r
1232 else if (sp.match('.'))
\r
1236 else if (sp.incMatch("(??"))
\r
1238 StringBuffer sb = new StringBuffer();
\r
1239 StringBuffer sb2 = new StringBuffer();
\r
1240 while (!sp.match(')') && !sp.match(':'))
\r
1245 if (sp.incMatch(":"))
\r
1247 while (!sp.match(')'))
\r
1253 String sbs = sb.toString();
\r
1254 if (validators.get(sbs) instanceof String)
\r
1256 String pat = (String) validators.get(sbs);
\r
1257 Regex r = newRegex();
\r
1258 Rthings rth = new Rthings(this);
\r
1259 rth.noBackRefs = true;
\r
1260 r._compile(pat, rth);
\r
1261 add(r.thePattern);
\r
1265 Custom cm = new Custom(sb.toString());
\r
1268 Validator v2 = cm.v.arg(sb2.toString());
\r
1271 v2.argsave = sb2.toString();
\r
1272 String p = cm.v.pattern;
\r
1276 Regex r = newRegex();
\r
1277 Rthings rth = new Rthings(this);
\r
1278 rth.noBackRefs = true;
\r
1279 r._compile(cm.v.pattern, rth);
\r
1280 cm.sub = r.thePattern;
\r
1281 cm.sub.add(new CustomEndpoint(cm));
\r
1282 cm.sub.setParent(cm);
\r
1287 else if (sp.match('('))
\r
1290 Regex r = newRegex();
\r
1291 // r.or = new Or();
\r
1293 if (sp.incMatch("?:"))
\r
1297 else if (sp.incMatch("?="))
\r
1299 r.or = new lookAhead(false);
\r
1301 else if (sp.incMatch("?!"))
\r
1303 r.or = new lookAhead(true);
\r
1305 else if (sp.match('?'))
\r
1312 mk.ignoreCase = true;
\r
1316 mk.dontMatchInQuotes = true;
\r
1320 mk.optimizeMe = true;
\r
1336 while (!sp.match(')') && !sp.eos);
\r
1339 if (sp.eos) //throw new RegSyntax
\r
1341 RegSyntaxError.endItAll("Unclosed ()");
\r
1345 { // just ordinary parenthesis
\r
1346 r.or = mk.noBackRefs ? new Or() : new OrMark(mk.val++);
\r
1350 add(r._compile(sp, mk));
\r
1353 else if (sp.match('^'))
\r
1355 add(new Start(true));
\r
1357 else if (sp.dontMatch && sp.c == 'A')
\r
1359 add(new Start(false));
\r
1361 else if (sp.match('*'))
\r
1363 addMulti(new patInt(0), new patInf());
\r
1365 else if (sp.match('+'))
\r
1367 addMulti(new patInt(1), new patInf());
\r
1369 else if (sp.match('?'))
\r
1371 addMulti(new patInt(0), new patInt(1));
\r
1373 else if (sp.match('{'))
\r
1375 boolean bad = false;
\r
1376 StrPos sp2 = new StrPos(sp);
\r
1377 //StringBuffer sb = new StringBuffer();
\r
1379 patInt i1 = sp.getPatInt();
\r
1381 if (sp.match('}'))
\r
1387 if (!sp.match(','))
\r
1390 RegSyntaxError.endItAll(
\r
1392 "\" should be followed with , or }");
\r
1397 if (sp.match('}'))
\r
1399 i2 = new patInf();
\r
1403 i2 = sp.getPatInt();
\r
1406 if (i1 == null || i2 == null)
\r
1409 throw new RegSyntax("Badly formatted Multi: "
\r
1410 +"{"+i1+","+i2+"}"); */
\r
1416 add(new oneChar(sp.c));
\r
1423 else if (sp.escMatch('x') && next2Hex(sp))
\r
1426 int d = getHexDigit(sp);
\r
1428 d = 16 * d + getHexDigit(sp);
\r
1429 add(new oneChar( (char) d));
\r
1431 else if (sp.escMatch('c'))
\r
1434 if (sp.c < Ctrl.cmap.length)
\r
1436 add(new oneChar(Ctrl.cmap[sp.c]));
\r
1440 add(new oneChar(sp.c));
\r
1443 else if (sp.escMatch('f'))
\r
1445 add(new oneChar( (char) 12));
\r
1447 else if (sp.escMatch('a'))
\r
1449 add(new oneChar( (char) 7));
\r
1451 else if (sp.escMatch('t'))
\r
1453 add(new oneChar('\t'));
\r
1455 else if (sp.escMatch('n'))
\r
1457 add(new oneChar('\n'));
\r
1459 else if (sp.escMatch('r'))
\r
1461 add(new oneChar('\r'));
\r
1463 else if (sp.escMatch('b'))
\r
1465 add(new oneChar('\b'));
\r
1467 else if (sp.escMatch('e'))
\r
1469 add(new oneChar( (char) 27));
\r
1473 add(new oneChar(sp.c));
\r
1474 if (sp.match(')'))
\r
1476 RegSyntaxError.endItAll("Unmatched right paren in pattern");
\r
1481 // compiles all Pattern elements, internal method
\r
1482 private Pattern _compile(String pat, Rthings mk)
\r
1486 sFlag = mFlag = ignoreCase = gFlag = false;
\r
1487 StrPos sp = new StrPos(pat, 0);
\r
1488 thePattern = _compile(sp, mk);
\r
1490 return thePattern;
\r
1495 Pattern _compile(StrPos sp, Rthings mk)
\r
1498 while (! (sp.eos || (or != null && sp.match(')'))))
\r
1503 if (sp.match(')'))
\r
1507 else if (sp.eos && mk.parenLevel != 0)
\r
1509 RegSyntaxError.endItAll("Unclosed Parenthesis! lvl=" + mk.parenLevel);
\r
1515 p = new NullPattern();
\r
1520 return p == null ? new NullPattern() : p;
\r
1523 // add a multi object to the end of the chain
\r
1524 // which applies to the last object
\r
1525 void addMulti(patInt i1, patInt i2)
\r
1528 Pattern last, last2;
\r
1529 for (last = p; last != null && last.next != null; last = last.next)
\r
1533 if (last == null || last == p)
\r
1539 for (last2 = p; last2.next != last; last2 = last2.next)
\r
1544 if (last instanceof Multi && i1.intValue() == 0 &&
\r
1545 i2.intValue() == 1)
\r
1547 ( (Multi) last).matchFewest = true;
\r
1549 else if (last instanceof FastMulti && i1.intValue() == 0 &&
\r
1550 i2.intValue() == 1)
\r
1552 ( (FastMulti) last).matchFewest = true;
\r
1554 else if (last instanceof DotMulti && i1.intValue() == 0 &&
\r
1555 i2.intValue() == 1)
\r
1557 ( (DotMulti) last).matchFewest = true;
\r
1559 else if (last instanceof Multi
\r
1560 || last instanceof DotMulti
\r
1561 || last instanceof FastMulti)
\r
1563 throw new RegSyntax("Syntax error.");
\r
1565 else if (last2 == null)
\r
1567 p = mkMulti(i1, i2, p);
\r
1571 last2.next = mkMulti(i1, i2, last);
\r
1575 final static Pattern mkMulti(patInt lo, patInt hi, Pattern p)
\r
1578 if (p instanceof Any && p.next == null)
\r
1580 return (Pattern)new DotMulti(lo, hi);
\r
1582 return RegOpt.safe4fm(p) ? (Pattern)new FastMulti(lo, hi, p) :
\r
1583 (Pattern)new Multi(lo, hi, p);
\r
1586 // process the bracket operator
\r
1587 Pattern matchBracket(StrPos sp)
\r
1591 if (sp.match('^'))
\r
1593 ret = new Bracket(true);
\r
1598 ret = new Bracket(false);
\r
1600 if (sp.match(']'))
\r
1602 //throw new RegSyntax
\r
1603 RegSyntaxError.endItAll("Unmatched []");
\r
1606 while (!sp.eos && !sp.match(']'))
\r
1608 StrPos s1 = new StrPos(sp);
\r
1610 StrPos s1_ = new StrPos(s1);
\r
1612 if (s1.match('-') && !s1_.match(']'))
\r
1614 StrPos s2 = new StrPos(s1);
\r
1618 ret.addOr(new Range(sp.c, s2.c));
\r
1623 else if (sp.escMatch('Q'))
\r
1626 while (!sp.escMatch('E'))
\r
1628 ret.addOr(new oneChar(sp.c));
\r
1632 else if (sp.escMatch('d'))
\r
1634 ret.addOr(new Range('0', '9'));
\r
1636 else if (sp.escMatch('s'))
\r
1638 ret.addOr(new oneChar( (char) 32));
\r
1639 ret.addOr(new Range( (char) 8, (char) 10));
\r
1640 ret.addOr(new oneChar( (char) 13));
\r
1642 else if (sp.escMatch('w'))
\r
1644 ret.addOr(new Range('a', 'z'));
\r
1645 ret.addOr(new Range('A', 'Z'));
\r
1646 ret.addOr(new Range('0', '9'));
\r
1647 ret.addOr(new oneChar('_'));
\r
1649 else if (sp.escMatch('D'))
\r
1651 ret.addOr(new Range( (char) 0, (char) 47));
\r
1652 ret.addOr(new Range( (char) 58, (char) 65535));
\r
1654 else if (sp.escMatch('S'))
\r
1656 ret.addOr(new Range( (char) 0, (char) 7));
\r
1657 ret.addOr(new Range( (char) 11, (char) 12));
\r
1658 ret.addOr(new Range( (char) 14, (char) 31));
\r
1659 ret.addOr(new Range( (char) 33, (char) 65535));
\r
1661 else if (sp.escMatch('W'))
\r
1663 ret.addOr(new Range( (char) 0, (char) 64));
\r
1664 ret.addOr(new Range( (char) 91, (char) 94));
\r
1665 ret.addOr(new oneChar( (char) 96));
\r
1666 ret.addOr(new Range( (char) 123, (char) 65535));
\r
1668 else if (sp.escMatch('x') && next2Hex(sp))
\r
1671 int d = getHexDigit(sp);
\r
1673 d = 16 * d + getHexDigit(sp);
\r
1674 ret.addOr(new oneChar( (char) d));
\r
1676 else if (sp.escMatch('a'))
\r
1678 ret.addOr(new oneChar( (char) 7));
\r
1680 else if (sp.escMatch('f'))
\r
1682 ret.addOr(new oneChar( (char) 12));
\r
1684 else if (sp.escMatch('e'))
\r
1686 ret.addOr(new oneChar( (char) 27));
\r
1688 else if (sp.escMatch('n'))
\r
1690 ret.addOr(new oneChar('\n'));
\r
1692 else if (sp.escMatch('t'))
\r
1694 ret.addOr(new oneChar('\t'));
\r
1696 else if (sp.escMatch('r'))
\r
1698 ret.addOr(new oneChar('\r'));
\r
1700 else if (sp.escMatch('c'))
\r
1703 if (sp.c < Ctrl.cmap.length)
\r
1705 ret.addOr(new oneChar(Ctrl.cmap[sp.c]));
\r
1709 ret.addOr(new oneChar(sp.c));
\r
1712 else if (isOctalString(sp))
\r
1714 int d = sp.c - '0';
\r
1716 d = 8 * d + sp.c - '0';
\r
1717 StrPos sp2 = new StrPos(sp);
\r
1719 if (isOctalDigit(sp2, false))
\r
1722 d = 8 * d + sp.c - '0';
\r
1724 ret.addOr(new oneChar( (char) d));
\r
1728 ret.addOr(new oneChar(sp.c));
\r
1735 /** Converts the stored Pattern to a String -- this is a
\r
1736 decompile. Note that \t and \n will really print out here,
\r
1737 Not just the two character representations.
\r
1738 Also be prepared to see some strange output if your characters
\r
1739 are not printable. */
\r
1740 public String toString()
\r
1742 if (false && thePattern == null)
\r
1748 StringBuffer sb = new StringBuffer();
\r
1749 if (esc != Pattern.ESC)
\r
1751 sb.append("(?e=");
\r
1757 || !dotDoesntMatchCR
\r
1760 || dontMatchInQuotes
\r
1772 if (sFlag || !dotDoesntMatchCR)
\r
1776 if (dontMatchInQuotes)
\r
1790 String patstr = thePattern.toString();
\r
1791 if (esc != Pattern.ESC)
\r
1793 patstr = reEscape(patstr, Pattern.ESC, esc);
\r
1795 sb.append(patstr);
\r
1796 return sb.toString();
\r
1800 // Re-escape Pattern, allows us to use a different escape
\r
1802 static String reEscape(String s, char oldEsc, char newEsc)
\r
1804 if (oldEsc == newEsc)
\r
1809 StringBuffer sb = new StringBuffer();
\r
1810 for (i = 0; i < s.length(); i++)
\r
1812 if (s.charAt(i) == oldEsc && i + 1 < s.length())
\r
1814 if (s.charAt(i + 1) == oldEsc)
\r
1816 sb.append(oldEsc);
\r
1820 sb.append(newEsc);
\r
1821 sb.append(s.charAt(i + 1));
\r
1825 else if (s.charAt(i) == newEsc)
\r
1827 sb.append(newEsc);
\r
1828 sb.append(newEsc);
\r
1832 sb.append(s.charAt(i));
\r
1835 return sb.toString();
\r
1838 /** This method implements FilenameFilter, allowing one
\r
1839 to use a Regex to search through a directory using File.list.
\r
1840 There is a FileRegex now that does this better.
\r
1841 @see com.stevesoft.pat.FileRegex
\r
1843 public boolean accept(File dir, String s)
\r
1848 /** The version of this package */
\r
1849 final static public String version()
\r
1851 return "lgpl release 1.5.3";
\r
1854 /** Once this method is called, the state of variables
\r
1855 ignoreCase and dontMatchInQuotes should not be changed as the
\r
1856 results will be unpredictable. However,
\r
1857 search and matchAt will run more quickly. Note that you
\r
1858 can check to see if the pattern has been optimized by calling
\r
1859 the optimized() method.<p>This method will attempt to rewrite
\r
1860 your pattern in a way that makes it faster (not all patterns
\r
1861 execute at the same speed). In general, "(?: ... )" will be
\r
1862 faster than "( ... )" so if you don't need the backreference,
\r
1863 you should group using the former pattern.<p>It will also
\r
1864 introduce new pattern elements that you can't get to otherwise,
\r
1865 for example if you have a large table of strings, i.e. the
\r
1866 months of the year "(January|February|...)" optimize() will make
\r
1867 a Hashtable that takes it to the next appropriate pattern
\r
1868 element -- eliminating the need for a linear search.
\r
1869 @see com.stevesoft.pat.Regex#optimized
\r
1870 @see com.stevesoft.pat.Regex#ignoreCase
\r
1871 @see com.stevesoft.pat.Regex#dontMatchInQuotes
\r
1872 @see com.stevesoft.pat.Regex#matchAt
\r
1873 @see com.stevesoft.pat.Regex#search
\r
1875 public void optimize()
\r
1877 if (optimized() || thePattern == null)
\r
1881 minMatch = new patInt(0); //thePattern.countMinChars();
\r
1882 thePattern = RegOpt.opt(thePattern, ignoreCase,
\r
1883 dontMatchInQuotes);
\r
1884 skipper = Skip.findSkip(this);
\r
1885 //RegOpt.setParents(this);
\r
1890 /** This function returns true if the optimize method has
\r
1892 public boolean optimized()
\r
1894 return minMatch != null;
\r
1897 /** A bit of syntactic surgar for those who want to make
\r
1898 their code look more perl-like. To use this initialize
\r
1899 your Regex object by saying:
\r
1901 Regex r1 = Regex.perlCode("s/hello/goodbye/");
\r
1902 Regex r2 = Regex.perlCode("s'fish'frog'i");
\r
1903 Regex r3 = Regex.perlCode("m'hello');
\r
1905 The i for ignoreCase is supported in
\r
1906 this syntax, as well as m, s, and x. The g flat
\r
1907 is a bit of a special case.<p>
\r
1908 If you wish to replace all occurences of a pattern, you
\r
1909 do not put a 'g' in the perlCode, but call Regex's
\r
1910 replaceAll method.<p>
\r
1911 If you wish to simply
\r
1912 and only do a search for r2's pattern, you can do this
\r
1913 by calling the searchFrom method method repeatedly, or
\r
1914 by calling search repeatedly if the g flag is set.
\r
1916 Note: Currently perlCode does <em>not</em>
\r
1917 support the (?e=#) syntax for
\r
1918 changing the escape character.
\r
1921 public static Regex perlCode(String s)
\r
1923 // this file is big enough, see parsePerl.java
\r
1924 // for this function.
\r
1925 return parsePerl.parse(s);
\r
1928 static final char back_slash = '\\';
\r
1930 /** Checks to see if there are only literal and no special
\r
1931 pattern elements in this Regex. */
\r
1932 public boolean isLiteral()
\r
1934 Pattern x = thePattern;
\r
1937 if (x instanceof oneChar)
\r
1941 else if (x instanceof Skipped)
\r
1954 /** You only need to know about this if you are inventing
\r
1955 your own pattern elements. */
\r
1956 public patInt countMinChars()
\r
1958 return thePattern.countMinChars();
\r
1961 /** You only need to know about this if you are inventing
\r
1962 your own pattern elements. */
\r
1963 public patInt countMaxChars()
\r
1965 return thePattern.countMaxChars();
\r
1968 boolean isHexDigit(StrPos sp)
\r
1971 !sp.eos && !sp.dontMatch
\r
1972 && ( (sp.c >= '0' && sp.c <= '9')
\r
1973 || (sp.c >= 'a' && sp.c <= 'f')
\r
1974 || (sp.c >= 'A' && sp.c <= 'F'));
\r
1978 boolean isOctalDigit(StrPos sp, boolean first)
\r
1981 !sp.eos && ! (first ^ sp.dontMatch)
\r
1982 && sp.c >= '0' && sp.c <= '7';
\r
1986 int getHexDigit(StrPos sp)
\r
1988 if (sp.c >= '0' && sp.c <= '9')
\r
1990 return sp.c - '0';
\r
1992 if (sp.c >= 'a' && sp.c <= 'f')
\r
1994 return sp.c - 'a' + 10;
\r
1996 return sp.c - 'A' + 10;
\r
1999 boolean next2Hex(StrPos sp)
\r
2001 StrPos sp2 = new StrPos(sp);
\r
2003 if (!isHexDigit(sp2))
\r
2008 if (!isHexDigit(sp2))
\r
2015 boolean isOctalString(StrPos sp)
\r
2017 if (!isOctalDigit(sp, true))
\r
2021 StrPos sp2 = new StrPos(sp);
\r
2023 if (!isOctalDigit(sp2, false))
\r