2 // This software is now distributed according to
\r
3 // the Lesser Gnu Public License. Please see
\r
4 // http://www.gnu.org/copyleft/lesser.txt for
\r
6 // -- Happy Computing!
\r
8 package com.stevesoft.pat;
\r
10 import jalview.util.MessageManager;
\r
12 import java.util.BitSet;
\r
13 import java.util.Hashtable;
\r
15 import com.stevesoft.pat.wrap.StringWrap;
\r
17 /** Matches a Unicode punctuation character. */
\r
18 class UnicodePunct extends UniValidator
\r
20 public int validate(StringLike s, int from, int to)
\r
22 return from < s.length() && Prop.isPunct(s.charAt(from)) ? to : -1;
\r
26 /** Matches a Unicode white space character. */
\r
27 class UnicodeWhite extends UniValidator
\r
29 public int validate(StringLike s, int from, int to)
\r
31 return from < s.length() && Prop.isWhite(s.charAt(from)) ? to : -1;
\r
36 * Matches a character that is not a Unicode punctuation character.
\r
38 class NUnicodePunct extends UniValidator
\r
40 public int validate(StringLike s, int from, int to)
\r
42 return from < s.length() && !Prop.isPunct(s.charAt(from)) ? to : -1;
\r
47 * Matches a character that is not a Unicode white space character.
\r
49 class NUnicodeWhite extends UniValidator
\r
51 public int validate(StringLike s, int from, int to)
\r
53 return from < s.length() && !Prop.isWhite(s.charAt(from)) ? to : -1;
\r
57 /** Matches a Unicode word character: an alphanumeric or underscore. */
\r
58 class UnicodeW extends UniValidator
\r
60 public int validate(StringLike s, int from, int to)
\r
62 if (from >= s.length())
\r
66 char c = s.charAt(from);
\r
67 return (Prop.isAlphabetic(c) || Prop.isDecimalDigit(c) || c == '_') ? to
\r
72 /** Matches a character that is not a Unicode alphanumeric or underscore. */
\r
73 class NUnicodeW extends UniValidator
\r
75 public int validate(StringLike s, int from, int to)
\r
77 if (from >= s.length())
\r
81 char c = s.charAt(from);
\r
82 return !(Prop.isAlphabetic(c) || Prop.isDecimalDigit(c) || c == '_') ? to
\r
87 /** Matches a Unicode decimal digit. */
\r
88 class UnicodeDigit extends UniValidator
\r
90 public int validate(StringLike s, int from, int to)
\r
92 return from < s.length() && Prop.isDecimalDigit(s.charAt(from)) ? to
\r
97 /** Matches a character that is not a Unicode digit. */
\r
98 class NUnicodeDigit extends UniValidator
\r
100 public int validate(StringLike s, int from, int to)
\r
102 return from < s.length() && !Prop.isDecimalDigit(s.charAt(from)) ? to
\r
107 /** Matches a Unicode math character. */
\r
108 class UnicodeMath extends UniValidator
\r
110 public int validate(StringLike s, int from, int to)
\r
112 return from < s.length() && Prop.isMath(s.charAt(from)) ? to : -1;
\r
116 /** Matches a non-math Unicode character. */
\r
117 class NUnicodeMath extends UniValidator
\r
119 public int validate(StringLike s, int from, int to)
\r
121 return from < s.length() && !Prop.isMath(s.charAt(from)) ? to : -1;
\r
125 /** Matches a Unicode currency symbol. */
\r
126 class UnicodeCurrency extends UniValidator
\r
128 public int validate(StringLike s, int from, int to)
\r
130 return from < s.length() && Prop.isCurrency(s.charAt(from)) ? to : -1;
\r
134 /** Matches a non-currency symbol Unicode character. */
\r
135 class NUnicodeCurrency extends UniValidator
\r
137 public int validate(StringLike s, int from, int to)
\r
139 return from < s.length() && !Prop.isCurrency(s.charAt(from)) ? to : -1;
\r
143 /** Matches a Unicode alphabetic character. */
\r
144 class UnicodeAlpha extends UniValidator
\r
146 public int validate(StringLike s, int from, int to)
\r
148 return from < s.length() && Prop.isAlphabetic(s.charAt(from)) ? to : -1;
\r
152 /** Matches a non-alphabetic Unicode character. */
\r
153 class NUnicodeAlpha extends UniValidator
\r
155 public int validate(StringLike s, int from, int to)
\r
157 return from < s.length() && !Prop.isAlphabetic(s.charAt(from)) ? to
\r
162 /** Matches an upper case Unicode character. */
\r
163 class UnicodeUpper extends UniValidator
\r
165 public int validate(StringLike s, int from, int to)
\r
167 return from < s.length() && isUpper(s.charAt(from)) ? to : -1;
\r
170 final boolean isUpper(char c)
\r
172 return c == CaseMgr.toUpperCase(c) && c != CaseMgr.toLowerCase(c);
\r
176 /** Matches an upper case Unicode character. */
\r
177 class UnicodeLower extends UniValidator
\r
179 public int validate(StringLike s, int from, int to)
\r
181 return from < s.length() && isLower(s.charAt(from)) ? to : -1;
\r
184 final boolean isLower(char c)
\r
186 return c != CaseMgr.toUpperCase(c) && c == CaseMgr.toLowerCase(c);
\r
191 * Regex provides the parser which constructs the linked list of Pattern classes
\r
194 * For the purpose of this documentation, the fact that java interprets the
\r
195 * backslash will be ignored. In practice, however, you will need a double
\r
196 * backslash to obtain a string that contains a single backslash character.
\r
197 * Thus, the example pattern "\b" should really be typed as "\\b" inside java
\r
200 * Note that Regex is part of package "com.stevesoft.pat". To use it, simply
\r
201 * import com.stevesoft.pat.Regex at the top of your file.
\r
203 * Regex is made with a constructor that takes a String that defines the regular
\r
204 * expression. Thus, for example
\r
207 * Regex r = new Regex("[a-c]*");
\r
210 * matches any number of characters so long as the are 'a', 'b', or 'c').
\r
212 * To attempt to match the Pattern to a given string, you can use either the
\r
213 * search(String) member function, or the matchAt(String,int position) member
\r
214 * function. These functions return a boolean which tells you whether or not the
\r
215 * thing worked, and sets the methods "charsMatched()" and "matchedFrom()" in
\r
216 * the Regex object appropriately.
\r
218 * The portion of the string before the match can be obtained by the left()
\r
219 * member, and the portion after the match can be obtained by the right()
\r
222 * Essentially, this package implements a syntax that is very much like the perl
\r
223 * 5 regular expression syntax.
\r
228 * Regex r = new Regex("x(a|b)y");
\r
229 * r.matchAt("xay", 0);
\r
230 * System.out.println("sub = " + r.stringMatched(1));
\r
233 * The above would print "sub = a".
\r
236 * r.left() // would return "x"
\r
237 * r.right() // would return "y"
\r
241 * Differences between this package and perl5:<br>
\r
242 * The extended Pattern for setting flags, is now supported, but the flags are
\r
243 * different. "(?i)" tells the pattern to ignore case, "(?Q)" sets the
\r
244 * "dontMatchInQuotes" flag, and "(?iQ)" sets them both. You can change the
\r
245 * escape character. The pattern
\r
255 * </pre>, but note that the sequence
\r
261 * <b>must</b> occur at the very beginning of the pattern. There may be other
\r
262 * small differences as well. I will either make my package conform or note them
\r
263 * as I become aware of them.
\r
265 * This package supports additional patterns not in perl5: <center> <table
\r
270 * <td>This matches all characters between the '(' character and the balancing
\r
271 * ')' character. Thus, it will match "()" as well as "(())". The balancing
\r
272 * characters are arbitrary, thus (?@{}) matches on "{}" and "{{}}".</td>
\r
276 * <td>Moves the pointer backwards within the text. This allows you to make a
\r
277 * "look behind." It fails if it attempts to move to a position before the
\r
278 * beginning of the string. "x(?<1)" is equivalent to "(?=x)". The number, 1
\r
279 * in this example, is the number of characters to move backwards.</td>
\r
280 * </table> </center>
\r
283 * @author Steven R. Brandt
\r
284 * @version package com.stevesoft.pat, release 1.5.3
\r
287 public class Regex extends RegRes// implements FilenameFilter
\r
290 * BackRefOffset gives the identity number of the first pattern. Version 1.0
\r
291 * used zero, version 1.1 uses 1 to be more compatible with perl.
\r
293 static int BackRefOffset = 1;
\r
295 private static Pattern none = new NoPattern();
\r
297 Pattern thePattern = none;
\r
299 patInt minMatch = new patInt(0);
\r
301 static Hashtable validators = new Hashtable();
\r
304 define("p", "(?>1)", new UnicodePunct());
\r
305 define("P", "(?>1)", new NUnicodePunct());
\r
306 define("s", "(?>1)", new UnicodeWhite());
\r
307 define("S", "(?>1)", new NUnicodeWhite());
\r
308 define("w", "(?>1)", new UnicodeW());
\r
309 define("W", "(?>1)", new NUnicodeW());
\r
310 define("d", "(?>1)", new UnicodeDigit());
\r
311 define("D", "(?>1)", new NUnicodeDigit());
\r
312 define("m", "(?>1)", new UnicodeMath());
\r
313 define("M", "(?>1)", new NUnicodeMath());
\r
314 define("c", "(?>1)", new UnicodeCurrency());
\r
315 define("C", "(?>1)", new NUnicodeCurrency());
\r
316 define("a", "(?>1)", new UnicodeAlpha());
\r
317 define("A", "(?>1)", new NUnicodeAlpha());
\r
318 define("uc", "(?>1)", new UnicodeUpper());
\r
319 define("lc", "(?>1)", new UnicodeLower());
\r
322 /** Set the dontMatch in quotes flag. */
\r
323 public void setDontMatchInQuotes(boolean b)
\r
325 dontMatchInQuotes = b;
\r
328 /** Find out if the dontMatchInQuotes flag is enabled. */
\r
329 public boolean getDontMatchInQuotes()
\r
331 return dontMatchInQuotes;
\r
334 boolean dontMatchInQuotes = false;
\r
337 * Set the state of the ignoreCase flag. If set to true, then the pattern
\r
338 * matcher will ignore case when searching for a match.
\r
340 public void setIgnoreCase(boolean b)
\r
346 * Get the state of the ignoreCase flag. Returns true if we are ignoring the
\r
347 * case of the pattern, false otherwise.
\r
349 public boolean getIgnoreCase()
\r
354 boolean ignoreCase = false;
\r
356 static boolean defaultMFlag = false;
\r
359 * Set the default value of the m flag. If it is set to true, then the MFlag
\r
360 * will be on for any regex search executed.
\r
362 public static void setDefaultMFlag(boolean mFlag)
\r
364 defaultMFlag = mFlag;
\r
368 * Get the default value of the m flag. If it is set to true, then the MFlag
\r
369 * will be on for any regex search executed.
\r
371 public static boolean getDefaultMFlag()
\r
373 return defaultMFlag;
\r
377 * Initializes the object without a Pattern. To supply a Pattern use
\r
378 * compile(String s).
\r
380 * @see com.stevesoft.pat.Regex#compile(java.lang.String)
\r
387 * Create and compile a Regex, but do not throw any exceptions. If you wish to
\r
388 * have exceptions thrown for syntax errors, you must use the Regex(void)
\r
389 * constructor to create the Regex object, and then call the compile method.
\r
390 * Therefore, you should only call this method when you know your pattern is
\r
391 * right. I will probably become more like
\r
393 * @see com.stevesoft.pat.Regex#search(java.lang.String)
\r
394 * @see com.stevesoft.pat.Regex#compile(java.lang.String)
\r
396 public Regex(String s)
\r
401 } catch (RegSyntax rs)
\r
406 ReplaceRule rep = null;
\r
409 * Create and compile both a Regex and a ReplaceRule.
\r
411 * @see com.stevesoft.pat.ReplaceRule
\r
412 * @see com.stevesoft.pat.Regex#compile(java.lang.String)
\r
414 public Regex(String s, String rp)
\r
417 rep = parsePerl.perlCode(rp);
\r
421 * Create and compile a Regex, but give it the ReplaceRule specified. This
\r
422 * allows the user finer control of the Replacement process, if that is
\r
425 * @see com.stevesoft.pat.ReplaceRule
\r
426 * @see com.stevesoft.pat.Regex#compile(java.lang.String)
\r
428 public Regex(String s, ReplaceRule rp)
\r
435 * Change the ReplaceRule of this Regex by compiling a new one using String
\r
438 public void setReplaceRule(String rp)
\r
440 rep = parsePerl.perlCode(rp);
\r
441 repr = null; // Clear Replacer history
\r
444 /** Change the ReplaceRule of this Regex to rp. */
\r
445 public void setReplaceRule(ReplaceRule rp)
\r
451 * Test to see if a custom defined rule exists.
\r
453 * @see com.stevesoft.pat#define(java.lang.String,java.lang.String,Validator)
\r
455 public static boolean isDefined(String nm)
\r
457 return validators.get(nm) != null;
\r
461 * Removes a custom defined rule.
\r
463 * @see com.stevesoft.pat#define(java.lang.String,java.lang.String,Validator)
\r
465 public static void undefine(String nm)
\r
467 validators.remove(nm);
\r
471 * Defines a method to create a new rule. See test/deriv2.java and
\r
472 * test/deriv3.java for examples of how to use it.
\r
474 public static void define(String nm, String pat, Validator v)
\r
477 validators.put(nm, v);
\r
481 * Defines a shorthand for a pattern. The pattern will be invoked by a string
\r
482 * that has the form "(??"+nm+")".
\r
484 public static void define(String nm, String pat)
\r
486 validators.put(nm, pat);
\r
489 /** Get the current ReplaceRule. */
\r
490 public ReplaceRule getReplaceRule()
\r
495 Replacer repr = null;
\r
497 final Replacer _getReplacer()
\r
499 return repr == null ? repr = new Replacer() : repr;
\r
502 public Replacer getReplacer()
\r
506 repr = new Replacer();
\r
509 repr.rh.prev = null;
\r
514 * Replace the first occurence of this pattern in String s according to the
\r
517 * @see com.stevesoft.pat.ReplaceRule
\r
518 * @see com.stevesoft.pat.Regex#getReplaceRule()
\r
520 public String replaceFirst(String s)
\r
522 return _getReplacer().replaceFirstRegion(s, this, 0, s.length())
\r
527 * Replace the first occurence of this pattern in String s beginning with
\r
528 * position pos according to the ReplaceRule.
\r
530 * @see com.stevesoft.pat.ReplaceRule
\r
531 * @see com.stevesoft.pat.Regex#getReplaceRule()
\r
533 public String replaceFirstFrom(String s, int pos)
\r
535 return _getReplacer().replaceFirstRegion(s, this, pos, s.length())
\r
540 * Replace the first occurence of this pattern in String s beginning with
\r
541 * position start and ending with end according to the ReplaceRule.
\r
543 * @see com.stevesoft.pat.ReplaceRule
\r
544 * @see com.stevesoft.pat.Regex#getReplaceRule()
\r
546 public String replaceFirstRegion(String s, int start, int end)
\r
548 return _getReplacer().replaceFirstRegion(s, this, start, end)
\r
553 * Replace all occurences of this pattern in String s according to the
\r
556 * @see com.stevesoft.pat.ReplaceRule
\r
557 * @see com.stevesoft.pat.Regex#getReplaceRule()
\r
559 public String replaceAll(String s)
\r
561 return _getReplacer().replaceAllRegion(s, this, 0, s.length())
\r
565 public StringLike replaceAll(StringLike s)
\r
567 return _getReplacer().replaceAllRegion(s, this, 0, s.length());
\r
571 * Replace all occurences of this pattern in String s beginning with position
\r
572 * pos according to the ReplaceRule.
\r
574 * @see com.stevesoft.pat.ReplaceRule
\r
575 * @see com.stevesoft.pat.Regex#getReplaceRule()
\r
577 public String replaceAllFrom(String s, int pos)
\r
579 return _getReplacer().replaceAllRegion(s, this, pos, s.length())
\r
584 * Replace all occurences of this pattern in String s beginning with position
\r
585 * start and ending with end according to the ReplaceRule.
\r
587 * @see com.stevesoft.pat.ReplaceRule
\r
588 * @see com.stevesoft.pat.Regex#getReplaceRule()
\r
590 public String replaceAllRegion(String s, int start, int end)
\r
592 return _getReplacer().replaceAllRegion(s, this, start, end).toString();
\r
595 /** Essentially clones the Regex object */
\r
596 public Regex(Regex r)
\r
599 dontMatchInQuotes = r.dontMatchInQuotes;
\r
601 ignoreCase = r.ignoreCase;
\r
609 rep = (ReplaceRule) r.rep.clone();
\r
612 * try { compile(r.toString()); } catch(RegSyntax r_) {}
\r
614 thePattern = r.thePattern.clone(new Hashtable());
\r
615 minMatch = r.minMatch;
\r
616 skipper = r.skipper;
\r
620 * By default, the escape character is the backslash, but you can make it
\r
621 * anything you want by setting this variable.
\r
623 public char esc = Pattern.ESC;
\r
626 * This method compiles a regular expression, making it possible to call the
\r
627 * search or matchAt methods.
\r
629 * @exception com.stevesoft.pat.RegSyntax
\r
630 * is thrown if a syntax error is encountered in the
\r
631 * pattern. For example, "x{3,1}" or "*a" are not valid
\r
633 * @see com.stevesoft.pat.Regex#search
\r
634 * @see com.stevesoft.pat.Regex#matchAt
\r
636 public void compile(String prepat) throws RegSyntax
\r
638 String postpat = parsePerl.codify(prepat, true);
\r
639 String pat = postpat == null ? prepat : postpat;
\r
641 ignoreCase = false;
\r
642 dontMatchInQuotes = false;
\r
643 Rthings mk = new Rthings(this);
\r
644 int offset = mk.val;
\r
645 String newpat = pat;
\r
649 minMatch = new patInt(0);
\r
650 StrPos sp = new StrPos(pat, 0);
\r
651 if (sp.incMatch("(?e="))
\r
653 char newEsc = sp.c;
\r
657 newpat = reEscape(pat.substring(6), newEsc, Pattern.ESC);
\r
660 else if (esc != Pattern.ESC)
\r
662 newpat = reEscape(pat, esc, Pattern.ESC);
\r
664 thePattern = _compile2(newpat, mk);
\r
665 numSubs_ = mk.val - offset;
\r
670 * If a Regex is compared against a Regex, a check is done to see that the
\r
671 * patterns are equal as well as the most recent match. If a Regex is compare
\r
672 * with a RegRes, only the result of the most recent match is compared.
\r
674 public boolean equals(Object o)
\r
676 if (o instanceof Regex)
\r
678 if (toString().equals(o.toString()))
\r
680 return super.equals(o);
\r
689 return super.equals(o);
\r
693 /** A clone by any other name would smell as sweet. */
\r
694 public Object clone()
\r
696 return new Regex(this);
\r
699 /** Return a clone of the underlying RegRes object. */
\r
700 public RegRes result()
\r
702 return (RegRes) super.clone();
\r
705 // prep sets global variables of class
\r
706 // Pattern so that it can access them
\r
707 // during an attempt at a match
\r
708 Pthings pt = new Pthings();
\r
710 final Pthings prep(StringLike s)
\r
713 pt.lastPos = matchedTo();
\r
714 if (pt.lastPos < 0)
\r
718 if ((s == null ? null : s.unwrap()) != (src == null ? null : s.unwrap()))
\r
723 pt.dotDoesntMatchCR = dotDoesntMatchCR && (!sFlag);
\r
724 pt.mFlag = (mFlag | defaultMFlag);
\r
725 pt.ignoreCase = ignoreCase;
\r
726 pt.no_check = false;
\r
727 if (pt.marks != null)
\r
729 for (int i = 0; i < pt.marks.length; i++)
\r
735 pt.nMarks = numSubs_;
\r
737 if (dontMatchInQuotes)
\r
749 * Attempt to match a Pattern beginning at a specified location within the
\r
752 * @see com.stevesoft.pat.Regex#search
\r
754 public boolean matchAt(String s, int start_pos)
\r
756 return _search(s, start_pos, start_pos);
\r
760 * Attempt to match a Pattern beginning at a specified location within the
\r
763 * @see com.stevesoft.pat.Regex#search
\r
765 public boolean matchAt(StringLike s, int start_pos)
\r
767 return _search(s, start_pos, start_pos);
\r
771 * Search through a String for the first occurrence of a match.
\r
773 * @see com.stevesoft.pat.Regex#searchFrom
\r
774 * @see com.stevesoft.pat.Regex#matchAt
\r
776 public boolean search(String s)
\r
780 throw new NullPointerException(MessageManager.getString("exception.null_string_given_to_regex_search"));
\r
782 return _search(s, 0, s.length());
\r
785 public boolean search(StringLike sl)
\r
789 throw new NullPointerException(MessageManager.getString("exception.null_string_like_given_to_regex_search"));
\r
791 return _search(sl, 0, sl.length());
\r
794 public boolean reverseSearch(String s)
\r
798 throw new NullPointerException(MessageManager.getString("exception.null_string_given_to_regex_reverse_search"));
\r
800 return _reverseSearch(s, 0, s.length());
\r
803 public boolean reverseSearch(StringLike sl)
\r
807 throw new NullPointerException(MessageManager.getString("exception.null_string_like_given_to_regex_reverse_search"));
\r
809 return _reverseSearch(sl, 0, sl.length());
\r
813 * Search through a String for the first occurence of a match, but start at
\r
820 public boolean searchFrom(String s, int start)
\r
824 throw new NullPointerException(MessageManager.getString("exception.null_string_like_given_to_regex_search_from"));
\r
826 return _search(s, start, s.length());
\r
829 public boolean searchFrom(StringLike s, int start)
\r
833 throw new NullPointerException(MessageManager.getString("exception.null_string_like_given_to_regex_search_from"));
\r
835 return _search(s, start, s.length());
\r
839 * Search through a region of a String for the first occurence of a match.
\r
841 public boolean searchRegion(String s, int start, int end)
\r
845 throw new NullPointerException(MessageManager.getString("exception.null_string_like_given_to_regex_search_region"));
\r
847 return _search(s, start, end);
\r
851 * Set this to change the default behavior of the "." pattern. By default it
\r
852 * now matches perl's behavior and fails to match the '\n' character.
\r
854 public static boolean dotDoesntMatchCR = true;
\r
860 boolean gFlag = false;
\r
862 /** Set the 'g' flag */
\r
863 public void setGFlag(boolean b)
\r
868 /** Get the state of the 'g' flag. */
\r
869 public boolean getGFlag()
\r
874 boolean sFlag = false;
\r
876 /** Get the state of the sFlag */
\r
877 public boolean getSFlag()
\r
882 boolean mFlag = false;
\r
884 /** Get the state of the sFlag */
\r
885 public boolean getMFlag()
\r
890 final boolean _search(String s, int start, int end)
\r
892 return _search(new StringWrap(s), start, end);
\r
895 final boolean _search(StringLike s, int start, int end)
\r
897 if (gFlag && gFlagto > 0 && gFlags != null
\r
898 && s.unwrap() == gFlags.unwrap())
\r
904 Pthings pt = prep(s);
\r
906 int up = (minMatch == null ? end : end - minMatch.i);
\r
908 if (up < start && end >= start)
\r
913 if (skipper == null)
\r
915 for (int i = start; i <= up; i++)
\r
917 charsMatched_ = thePattern.matchAt(s, i, pt);
\r
918 if (charsMatched_ >= 0)
\r
920 matchFrom_ = thePattern.mfrom;
\r
922 gFlagto = matchFrom_ + charsMatched_;
\r
924 return didMatch_ = true;
\r
930 pt.no_check = true;
\r
931 for (int i = start; i <= up; i++)
\r
933 i = skipper.find(src, i, up);
\r
936 charsMatched_ = matchFrom_ = -1;
\r
937 return didMatch_ = false;
\r
939 charsMatched_ = thePattern.matchAt(s, i, pt);
\r
940 if (charsMatched_ >= 0)
\r
942 matchFrom_ = thePattern.mfrom;
\r
944 gFlagto = matchFrom_ + charsMatched_;
\r
946 return didMatch_ = true;
\r
950 return didMatch_ = false;
\r
954 * final boolean _search(LongStringLike s,long start,long end) { if(gFlag &&
\r
955 * gFlagto > 0 && s==gFlags) start = gFlagto; gFlags = null;
\r
957 * Pthings pt=prep(s);
\r
959 * int up = end;//(minMatch == null ? end : end-minMatch.i);
\r
961 * if(up < start && end >= start) up = start;
\r
963 * if(skipper == null) { for(long i=start;i<=up;i++) { charsMatched_ =
\r
964 * thePattern.matchAt(s,i,pt); if(charsMatched_ >= 0) { matchFrom_ =
\r
965 * thePattern.mfrom; marks = pt.marks; gFlagto = matchFrom_+charsMatched_;
\r
966 * return didMatch_=true; } } } else { pt.no_check = true; for(long i=start;i<=up;i++) {
\r
967 * i = skipper.find(src,i,up); if(i<0) { charsMatched_ = matchFrom_ = -1;
\r
968 * return didMatch_ = false; } charsMatched_ = thePattern.matchAt(s,i,pt);
\r
969 * if(charsMatched_ >= 0) { matchFrom_ = thePattern.mfrom; marks = pt.marks;
\r
970 * gFlagto = matchFrom_+charsMatched_; gFlags = s; return didMatch_=true; }
\r
971 * else { i = s.adjustIndex(i); up = s.adjustEnd(i); } } } return
\r
972 * didMatch_=false; }
\r
975 boolean _reverseSearch(String s, int start, int end)
\r
977 return _reverseSearch(new StringWrap(s), start, end);
\r
980 boolean _reverseSearch(StringLike s, int start, int end)
\r
982 if (gFlag && gFlagto > 0 && s.unwrap() == gFlags.unwrap())
\r
987 Pthings pt = prep(s);
\r
988 for (int i = end; i >= start; i--)
\r
990 charsMatched_ = thePattern.matchAt(s, i, pt);
\r
991 if (charsMatched_ >= 0)
\r
993 matchFrom_ = thePattern.mfrom;
\r
995 gFlagto = matchFrom_ - 1;
\r
997 return didMatch_ = true;
\r
1000 return didMatch_ = false;
\r
1003 // This routine sets the cbits variable
\r
1004 // of class Pattern. Cbits is true for
\r
1005 // the bit corresponding to a character inside
\r
1006 // a set of quotes.
\r
1007 static StringLike lasts = null;
\r
1009 static BitSet lastbs = null;
\r
1011 static void setCbits(StringLike s, Pthings pt)
\r
1015 pt.cbits = lastbs;
\r
1018 BitSet bs = new BitSet(s.length());
\r
1020 boolean setBit = false;
\r
1021 for (int i = 0; i < s.length(); i++)
\r
1027 char c = s.charAt(i);
\r
1028 if (!setBit && c == '"')
\r
1034 else if (!setBit && c == '\'')
\r
1040 else if (setBit && c == qc)
\r
1044 else if (setBit && c == '\\' && i + 1 < s.length())
\r
1053 pt.cbits = lastbs = bs;
\r
1057 // Wanted user to over-ride this in alpha version,
\r
1058 // but it wasn't really necessary because of this trick:
\r
1063 return (Regex) getClass().newInstance();
\r
1064 } catch (InstantiationException ie)
\r
1067 } catch (IllegalAccessException iae)
\r
1074 * Only needed for creating your own extensions of Regex. This method adds the
\r
1075 * next Pattern in the chain of patterns or sets the Pattern if it is the
\r
1078 protected void add(Pattern p2)
\r
1092 * You only need to use this method if you are creating your own extentions to
\r
1093 * Regex. compile1 compiles one Pattern element, it can be over-ridden to
\r
1094 * allow the Regex compiler to understand new syntax. See deriv.java for an
\r
1095 * example. This routine is the heart of class Regex. Rthings has one integer
\r
1096 * member called intValue, it is used to keep track of the number of ()'s in
\r
1099 * @exception com.stevesoft.pat.RegSyntax
\r
1100 * is thrown when a nonsensensical pattern is supplied. For
\r
1101 * example, a pattern beginning with *.
\r
1103 protected void compile1(StrPos sp, Rthings mk) throws RegSyntax
\r
1105 if (sp.match('['))
\r
1108 add(matchBracket(sp));
\r
1110 else if (sp.match('|'))
\r
1118 p = new NullPattern();
\r
1123 else if (sp.incMatch("(?<"))
\r
1125 patInt i = sp.getPatInt();
\r
1128 RegSyntaxError.endItAll("No int after (?<");
\r
1130 add(new Backup(i.intValue()));
\r
1131 if (!sp.match(')'))
\r
1133 RegSyntaxError.endItAll("No ) after (?<");
\r
1136 else if (sp.incMatch("(?>"))
\r
1138 patInt i = sp.getPatInt();
\r
1141 RegSyntaxError.endItAll("No int after (?>");
\r
1143 add(new Backup(-i.intValue()));
\r
1144 if (!sp.match(')'))
\r
1146 RegSyntaxError.endItAll("No ) after (?<");
\r
1149 else if (sp.incMatch("(?@"))
\r
1155 if (!sp.match(')'))
\r
1157 RegSyntaxError.endItAll("(?@ does not have closing paren");
\r
1159 add(new Group(op, cl));
\r
1161 else if (sp.incMatch("(?#"))
\r
1163 while (!sp.match(')'))
\r
1168 else if (sp.dontMatch && sp.c == 'w')
\r
1170 // Regex r = new Regex();
\r
1171 // r._compile("[a-zA-Z0-9_]",mk);
\r
1172 // add(new Goop("\\w",r.thePattern));
\r
1173 Bracket b = new Bracket(false);
\r
1174 b.addOr(new Range('a', 'z'));
\r
1175 b.addOr(new Range('A', 'Z'));
\r
1176 b.addOr(new Range('0', '9'));
\r
1177 b.addOr(new oneChar('_'));
\r
1180 else if (sp.dontMatch && sp.c == 'G')
\r
1184 else if (sp.dontMatch && sp.c == 's')
\r
1186 // Regex r = new Regex();
\r
1187 // r._compile("[ \t\n\r\b]",mk);
\r
1188 // add(new Goop("\\s",r.thePattern));
\r
1189 Bracket b = new Bracket(false);
\r
1190 b.addOr(new oneChar((char) 32));
\r
1191 b.addOr(new Range((char) 8, (char) 10));
\r
1192 b.addOr(new oneChar((char) 13));
\r
1195 else if (sp.dontMatch && sp.c == 'd')
\r
1197 // Regex r = new Regex();
\r
1198 // r._compile("[0-9]",mk);
\r
1199 // add(new Goop("\\d",r.thePattern));
\r
1200 Range digit = new Range('0', '9');
\r
1201 digit.printBrackets = true;
\r
1204 else if (sp.dontMatch && sp.c == 'W')
\r
1206 // Regex r = new Regex();
\r
1207 // r._compile("[^a-zA-Z0-9_]",mk);
\r
1208 // add(new Goop("\\W",r.thePattern));
\r
1209 Bracket b = new Bracket(true);
\r
1210 b.addOr(new Range('a', 'z'));
\r
1211 b.addOr(new Range('A', 'Z'));
\r
1212 b.addOr(new Range('0', '9'));
\r
1213 b.addOr(new oneChar('_'));
\r
1216 else if (sp.dontMatch && sp.c == 'S')
\r
1218 // Regex r = new Regex();
\r
1219 // r._compile("[^ \t\n\r\b]",mk);
\r
1220 // add(new Goop("\\S",r.thePattern));
\r
1221 Bracket b = new Bracket(true);
\r
1222 b.addOr(new oneChar((char) 32));
\r
1223 b.addOr(new Range((char) 8, (char) 10));
\r
1224 b.addOr(new oneChar((char) 13));
\r
1227 else if (sp.dontMatch && sp.c == 'D')
\r
1229 // Regex r = new Regex();
\r
1230 // r._compile("[^0-9]",mk);
\r
1231 // add(new Goop("\\D",r.thePattern));
\r
1232 Bracket b = new Bracket(true);
\r
1233 b.addOr(new Range('0', '9'));
\r
1236 else if (sp.dontMatch && sp.c == 'B')
\r
1238 Regex r = new Regex();
\r
1239 r._compile2("(?!" + back_slash + "b)", mk);
\r
1240 add(r.thePattern);
\r
1242 else if (isOctalString(sp))
\r
1244 int d = sp.c - '0';
\r
1246 d = 8 * d + sp.c - '0';
\r
1247 StrPos sp2 = new StrPos(sp);
\r
1249 if (isOctalDigit(sp2, false))
\r
1252 d = 8 * d + sp.c - '0';
\r
1254 add(new oneChar((char) d));
\r
1256 else if (sp.dontMatch && sp.c >= '1' && sp.c <= '9')
\r
1258 int iv = sp.c - '0';
\r
1259 StrPos s2 = new StrPos(sp);
\r
1261 if (!s2.dontMatch && s2.c >= '0' && s2.c <= '9')
\r
1263 iv = 10 * iv + (s2.c - '0');
\r
1266 add(new BackMatch(iv));
\r
1268 else if (sp.dontMatch && sp.c == 'b')
\r
1270 add(new Boundary());
\r
1272 else if (sp.match('\b'))
\r
1274 add(new Boundary());
\r
1276 else if (sp.match('$'))
\r
1278 add(new End(true));
\r
1280 else if (sp.dontMatch && sp.c == 'Z')
\r
1282 add(new End(false));
\r
1284 else if (sp.match('.'))
\r
1288 else if (sp.incMatch("(??"))
\r
1290 javajs.util.SB sb = new javajs.util.SB();
\r
1291 javajs.util.SB sb2 = new javajs.util.SB();
\r
1292 while (!sp.match(')') && !sp.match(':'))
\r
1297 if (sp.incMatch(":"))
\r
1299 while (!sp.match(')'))
\r
1301 sb2.appendC(sp.c);
\r
1305 String sbs = sb.toString();
\r
1306 if (validators.get(sbs) instanceof String)
\r
1308 String pat = (String) validators.get(sbs);
\r
1309 Regex r = newRegex();
\r
1310 Rthings rth = new Rthings(this);
\r
1311 rth.noBackRefs = true;
\r
1312 r._compile2(pat, rth);
\r
1313 add(r.thePattern);
\r
1317 Custom cm = new Custom(sb.toString());
\r
1320 Validator v2 = cm.v.arg(sb2.toString());
\r
1323 v2.argsave = sb2.toString();
\r
1324 String p = cm.v.pattern;
\r
1328 Regex r = newRegex();
\r
1329 Rthings rth = new Rthings(this);
\r
1330 rth.noBackRefs = true;
\r
1331 r._compile2(cm.v.pattern, rth);
\r
1332 cm.sub = r.thePattern;
\r
1333 cm.sub.add(new CustomEndpoint(cm));
\r
1334 cm.sub.setParent(cm);
\r
1339 else if (sp.match('('))
\r
1342 Regex r = newRegex();
\r
1343 // r.or = new Or();
\r
1345 if (sp.incMatch("?:"))
\r
1349 else if (sp.incMatch("?="))
\r
1351 r.or = new lookAhead(false);
\r
1353 else if (sp.incMatch("?!"))
\r
1355 r.or = new lookAhead(true);
\r
1357 else if (sp.match('?'))
\r
1364 mk.ignoreCase = true;
\r
1368 mk.dontMatchInQuotes = true;
\r
1372 mk.optimizeMe = true;
\r
1387 } while (!sp.match(')') && !sp.eos);
\r
1390 if (sp.eos) // throw new RegSyntax
\r
1392 RegSyntaxError.endItAll("Unclosed ()");
\r
1396 { // just ordinary parenthesis
\r
1397 r.or = mk.noBackRefs ? new Or() : new OrMark(mk.val++);
\r
1401 add(r._compile1(sp, mk));
\r
1404 else if (sp.match('^'))
\r
1406 add(new Start(true));
\r
1408 else if (sp.dontMatch && sp.c == 'A')
\r
1410 add(new Start(false));
\r
1412 else if (sp.match('*'))
\r
1414 addMulti(new patInt(0), new patInf());
\r
1416 else if (sp.match('+'))
\r
1418 addMulti(new patInt(1), new patInf());
\r
1420 else if (sp.match('?'))
\r
1422 addMulti(new patInt(0), new patInt(1));
\r
1424 else if (sp.match('{'))
\r
1426 boolean bad = false;
\r
1427 StrPos sp2 = new StrPos(sp);
\r
1428 // javajs.util.SB sb = new javajs.util.SB();
\r
1430 patInt i1 = sp.getPatInt();
\r
1432 if (sp.match('}'))
\r
1438 if (!sp.match(','))
\r
1441 * RegSyntaxError.endItAll( "String \"{"+i2+ "\" should be followed
\r
1447 if (sp.match('}'))
\r
1449 i2 = new patInf();
\r
1453 i2 = sp.getPatInt();
\r
1456 if (i1 == null || i2 == null)
\r
1459 * throw new RegSyntax("Badly formatted Multi: " +"{"+i1+","+i2+"}");
\r
1466 add(new oneChar(sp.c));
\r
1473 else if (sp.escMatch('x') && next2Hex(sp))
\r
1476 int d = getHexDigit(sp);
\r
1478 d = 16 * d + getHexDigit(sp);
\r
1479 add(new oneChar((char) d));
\r
1481 else if (sp.escMatch('c'))
\r
1484 if (sp.c < Ctrl.cmap.length)
\r
1486 add(new oneChar(Ctrl.cmap[sp.c]));
\r
1490 add(new oneChar(sp.c));
\r
1493 else if (sp.escMatch('f'))
\r
1495 add(new oneChar((char) 12));
\r
1497 else if (sp.escMatch('a'))
\r
1499 add(new oneChar((char) 7));
\r
1501 else if (sp.escMatch('t'))
\r
1503 add(new oneChar('\t'));
\r
1505 else if (sp.escMatch('n'))
\r
1507 add(new oneChar('\n'));
\r
1509 else if (sp.escMatch('r'))
\r
1511 add(new oneChar('\r'));
\r
1513 else if (sp.escMatch('b'))
\r
1515 add(new oneChar('\b'));
\r
1517 else if (sp.escMatch('e'))
\r
1519 add(new oneChar((char) 27));
\r
1523 add(new oneChar(sp.c));
\r
1524 if (sp.match(')'))
\r
1526 RegSyntaxError.endItAll("Unmatched right paren in pattern");
\r
1531 // compiles all Pattern elements, internal method
\r
1532 private Pattern _compile2(String pat, Rthings mk) throws RegSyntax
\r
1535 sFlag = mFlag = ignoreCase = gFlag = false;
\r
1536 StrPos sp = new StrPos(pat, 0);
\r
1537 thePattern = _compile1(sp, mk);
\r
1539 return thePattern;
\r
1546 Pattern _compile1(StrPos sp, Rthings mk) throws RegSyntax
\r
1548 while (!(sp.eos || (or != null && sp.match(')'))))
\r
1553 if (sp.match(')'))
\r
1557 else if (sp.eos && mk.parenLevel != 0)
\r
1559 RegSyntaxError.endItAll("Unclosed Parenthesis! lvl=" + mk.parenLevel);
\r
1565 p = new NullPattern();
\r
1570 return p == null ? new NullPattern() : p;
\r
1573 // add a multi object to the end of the chain
\r
1574 // which applies to the last object
\r
1575 void addMulti(patInt i1, patInt i2) throws RegSyntax
\r
1577 Pattern last, last2;
\r
1578 for (last = p; last != null && last.next != null; last = last.next)
\r
1582 if (last == null || last == p)
\r
1588 for (last2 = p; last2.next != last; last2 = last2.next)
\r
1593 if (last instanceof Multi && i1.intValue() == 0 && i2.intValue() == 1)
\r
1595 ((Multi) last).matchFewest = true;
\r
1597 else if (last instanceof FastMulti && i1.intValue() == 0
\r
1598 && i2.intValue() == 1)
\r
1600 ((FastMulti) last).matchFewest = true;
\r
1602 else if (last instanceof DotMulti && i1.intValue() == 0
\r
1603 && i2.intValue() == 1)
\r
1605 ((DotMulti) last).matchFewest = true;
\r
1607 else if (last instanceof Multi || last instanceof DotMulti
\r
1608 || last instanceof FastMulti)
\r
1610 throw new RegSyntax("Syntax error.");
\r
1612 else if (last2 == null)
\r
1614 p = mkMulti(i1, i2, p);
\r
1618 last2.next = mkMulti(i1, i2, last);
\r
1622 final static Pattern mkMulti(patInt lo, patInt hi, Pattern p)
\r
1625 if (p instanceof Any && p.next == null)
\r
1627 return (Pattern) new DotMulti(lo, hi);
\r
1629 return RegOpt.safe4fm(p) ? (Pattern) new FastMulti(lo, hi, p)
\r
1630 : (Pattern) new Multi(lo, hi, p);
\r
1633 // process the bracket operator
\r
1634 Pattern matchBracket(StrPos sp) throws RegSyntax
\r
1637 if (sp.match('^'))
\r
1639 ret = new Bracket(true);
\r
1644 ret = new Bracket(false);
\r
1646 if (sp.match(']'))
\r
1648 // throw new RegSyntax
\r
1649 RegSyntaxError.endItAll("Unmatched []");
\r
1652 while (!sp.eos && !sp.match(']'))
\r
1654 StrPos s1 = new StrPos(sp);
\r
1656 StrPos s1_ = new StrPos(s1);
\r
1658 if (s1.match('-') && !s1_.match(']'))
\r
1660 StrPos s2 = new StrPos(s1);
\r
1664 ret.addOr(new Range(sp.c, s2.c));
\r
1669 else if (sp.escMatch('Q'))
\r
1672 while (!sp.escMatch('E'))
\r
1674 ret.addOr(new oneChar(sp.c));
\r
1678 else if (sp.escMatch('d'))
\r
1680 ret.addOr(new Range('0', '9'));
\r
1682 else if (sp.escMatch('s'))
\r
1684 ret.addOr(new oneChar((char) 32));
\r
1685 ret.addOr(new Range((char) 8, (char) 10));
\r
1686 ret.addOr(new oneChar((char) 13));
\r
1688 else if (sp.escMatch('w'))
\r
1690 ret.addOr(new Range('a', 'z'));
\r
1691 ret.addOr(new Range('A', 'Z'));
\r
1692 ret.addOr(new Range('0', '9'));
\r
1693 ret.addOr(new oneChar('_'));
\r
1695 else if (sp.escMatch('D'))
\r
1697 ret.addOr(new Range((char) 0, (char) 47));
\r
1698 ret.addOr(new Range((char) 58, (char) 65535));
\r
1700 else if (sp.escMatch('S'))
\r
1702 ret.addOr(new Range((char) 0, (char) 7));
\r
1703 ret.addOr(new Range((char) 11, (char) 12));
\r
1704 ret.addOr(new Range((char) 14, (char) 31));
\r
1705 ret.addOr(new Range((char) 33, (char) 65535));
\r
1707 else if (sp.escMatch('W'))
\r
1709 ret.addOr(new Range((char) 0, (char) 64));
\r
1710 ret.addOr(new Range((char) 91, (char) 94));
\r
1711 ret.addOr(new oneChar((char) 96));
\r
1712 ret.addOr(new Range((char) 123, (char) 65535));
\r
1714 else if (sp.escMatch('x') && next2Hex(sp))
\r
1717 int d = getHexDigit(sp);
\r
1719 d = 16 * d + getHexDigit(sp);
\r
1720 ret.addOr(new oneChar((char) d));
\r
1722 else if (sp.escMatch('a'))
\r
1724 ret.addOr(new oneChar((char) 7));
\r
1726 else if (sp.escMatch('f'))
\r
1728 ret.addOr(new oneChar((char) 12));
\r
1730 else if (sp.escMatch('e'))
\r
1732 ret.addOr(new oneChar((char) 27));
\r
1734 else if (sp.escMatch('n'))
\r
1736 ret.addOr(new oneChar('\n'));
\r
1738 else if (sp.escMatch('t'))
\r
1740 ret.addOr(new oneChar('\t'));
\r
1742 else if (sp.escMatch('r'))
\r
1744 ret.addOr(new oneChar('\r'));
\r
1746 else if (sp.escMatch('c'))
\r
1749 if (sp.c < Ctrl.cmap.length)
\r
1751 ret.addOr(new oneChar(Ctrl.cmap[sp.c]));
\r
1755 ret.addOr(new oneChar(sp.c));
\r
1758 else if (isOctalString(sp))
\r
1760 int d = sp.c - '0';
\r
1762 d = 8 * d + sp.c - '0';
\r
1763 StrPos sp2 = new StrPos(sp);
\r
1765 if (isOctalDigit(sp2, false))
\r
1768 d = 8 * d + sp.c - '0';
\r
1770 ret.addOr(new oneChar((char) d));
\r
1774 ret.addOr(new oneChar(sp.c));
\r
1782 * Converts the stored Pattern to a String -- this is a decompile. Note that
\r
1783 * \t and \n will really print out here, Not just the two character
\r
1784 * representations. Also be prepared to see some strange output if your
\r
1785 * characters are not printable.
\r
1787 public String toString()
\r
1789 if (false && thePattern == null)
\r
1795 javajs.util.SB sb = new javajs.util.SB();
\r
1796 if (esc != Pattern.ESC)
\r
1798 sb.append("(?e=");
\r
1802 if (gFlag || mFlag || !dotDoesntMatchCR || sFlag || ignoreCase
\r
1803 || dontMatchInQuotes || optimized())
\r
1814 if (sFlag || !dotDoesntMatchCR)
\r
1818 if (dontMatchInQuotes)
\r
1832 String patstr = thePattern.toString();
\r
1833 if (esc != Pattern.ESC)
\r
1835 patstr = reEscape(patstr, Pattern.ESC, esc);
\r
1837 sb.append(patstr);
\r
1838 return sb.toString();
\r
1842 // Re-escape Pattern, allows us to use a different escape
\r
1844 static String reEscape(String s, char oldEsc, char newEsc)
\r
1846 if (oldEsc == newEsc)
\r
1851 javajs.util.SB sb = new javajs.util.SB();
\r
1852 for (i = 0; i < s.length(); i++)
\r
1854 if (s.charAt(i) == oldEsc && i + 1 < s.length())
\r
1856 if (s.charAt(i + 1) == oldEsc)
\r
1858 sb.appendC(oldEsc);
\r
1862 sb.appendC(newEsc);
\r
1863 sb.appendC(s.charAt(i + 1));
\r
1867 else if (s.charAt(i) == newEsc)
\r
1869 sb.appendC(newEsc);
\r
1870 sb.appendC(newEsc);
\r
1874 sb.appendC(s.charAt(i));
\r
1877 return sb.toString();
\r
1881 // * This method implements FilenameFilter, allowing one to use a Regex to
\r
1882 // * search through a directory using File.list. There is a FileRegex now that
\r
1883 // * does this better.
\r
1885 // * @see com.stevesoft.pat.FileRegex
\r
1887 // public boolean accept(File dir, String s)
\r
1889 // return search(s);
\r
1892 /** The version of this package */
\r
1893 final static public String version()
\r
1895 return "lgpl release 1.5.3";
\r
1899 * Once this method is called, the state of variables ignoreCase and
\r
1900 * dontMatchInQuotes should not be changed as the results will be
\r
1901 * unpredictable. However, search and matchAt will run more quickly. Note that
\r
1902 * you can check to see if the pattern has been optimized by calling the
\r
1903 * optimized() method.
\r
1905 * This method will attempt to rewrite your pattern in a way that makes it
\r
1906 * faster (not all patterns execute at the same speed). In general, "(?: ... )"
\r
1907 * will be faster than "( ... )" so if you don't need the backreference, you
\r
1908 * should group using the former pattern.
\r
1910 * It will also introduce new pattern elements that you can't get to
\r
1911 * otherwise, for example if you have a large table of strings, i.e. the
\r
1912 * months of the year "(January|February|...)" optimize() will make a
\r
1913 * Hashtable that takes it to the next appropriate pattern element --
\r
1914 * eliminating the need for a linear search.
\r
1916 * @see com.stevesoft.pat.Regex#optimized
\r
1917 * @see com.stevesoft.pat.Regex#ignoreCase
\r
1918 * @see com.stevesoft.pat.Regex#dontMatchInQuotes
\r
1919 * @see com.stevesoft.pat.Regex#matchAt
\r
1920 * @see com.stevesoft.pat.Regex#search
\r
1922 public void optimize()
\r
1924 if (optimized() || thePattern == null)
\r
1928 minMatch = new patInt(0); // thePattern.countMinChars();
\r
1929 thePattern = RegOpt.opt(thePattern, ignoreCase, dontMatchInQuotes);
\r
1930 skipper = Skip.findSkip(this);
\r
1931 // RegOpt.setParents(this);
\r
1938 * This function returns true if the optimize method has been called.
\r
1940 public boolean optimized()
\r
1942 return minMatch != null;
\r
1946 * A bit of syntactic surgar for those who want to make their code look more
\r
1947 * perl-like. To use this initialize your Regex object by saying:
\r
1950 * Regex r1 = Regex.perlCode("s/hello/goodbye/");
\r
1951 * Regex r2 = Regex.perlCode("s'fish'frog'i");
\r
1952 * Regex r3 = Regex.perlCode("m'hello');
\r
1955 * The i for ignoreCase is supported in this syntax, as well as m, s, and x.
\r
1956 * The g flat is a bit of a special case.
\r
1958 * If you wish to replace all occurences of a pattern, you do not put a 'g' in
\r
1959 * the perlCode, but call Regex's replaceAll method.
\r
1961 * If you wish to simply and only do a search for r2's pattern, you can do
\r
1962 * this by calling the searchFrom method method repeatedly, or by calling
\r
1963 * search repeatedly if the g flag is set.
\r
1965 * Note: Currently perlCode does <em>not</em> support the (?e=#) syntax for
\r
1966 * changing the escape character.
\r
1969 public static Regex perlCode(String s)
\r
1971 // this file is big enough, see parsePerl.java
\r
1972 // for this function.
\r
1973 return parsePerl.parse(s);
\r
1976 static final char back_slash = '\\';
\r
1979 * Checks to see if there are only literal and no special pattern elements in
\r
1982 public boolean isLiteral()
\r
1984 Pattern x = thePattern;
\r
1987 if (x instanceof oneChar)
\r
1991 else if (x instanceof Skipped)
\r
2005 * You only need to know about this if you are inventing your own pattern
\r
2008 public patInt countMinChars()
\r
2010 return thePattern.countMinChars();
\r
2014 * You only need to know about this if you are inventing your own pattern
\r
2017 public patInt countMaxChars()
\r
2019 return thePattern.countMaxChars();
\r
2022 boolean isHexDigit(StrPos sp)
\r
2024 boolean r = !sp.eos
\r
2026 && ((sp.c >= '0' && sp.c <= '9')
\r
2027 || (sp.c >= 'a' && sp.c <= 'f') || (sp.c >= 'A' && sp.c <= 'F'));
\r
2031 boolean isOctalDigit(StrPos sp, boolean first)
\r
2033 boolean r = !sp.eos && !(first ^ sp.dontMatch) && sp.c >= '0'
\r
2038 int getHexDigit(StrPos sp)
\r
2040 if (sp.c >= '0' && sp.c <= '9')
\r
2042 return sp.c - '0';
\r
2044 if (sp.c >= 'a' && sp.c <= 'f')
\r
2046 return sp.c - 'a' + 10;
\r
2048 return sp.c - 'A' + 10;
\r
2051 boolean next2Hex(StrPos sp)
\r
2053 StrPos sp2 = new StrPos(sp);
\r
2055 if (!isHexDigit(sp2))
\r
2060 if (!isHexDigit(sp2))
\r
2067 boolean isOctalString(StrPos sp)
\r
2069 if (!isOctalDigit(sp, true))
\r
2073 StrPos sp2 = new StrPos(sp);
\r
2075 if (!isOctalDigit(sp2, false))
\r