-//
-// This software is now distributed according to
-// the Lesser Gnu Public License. Please see
-// http://www.gnu.org/copyleft/lesser.txt for
-// the details.
-// -- Happy Computing!
-//
-package com.stevesoft.pat;
-
-import jalview.util.MessageManager;
-
-import java.util.BitSet;
-import java.util.Hashtable;
-
-import com.stevesoft.pat.wrap.StringWrap;
-
-/** Matches a Unicode punctuation character. */
-class UnicodePunct extends UniValidator
-{
- public int validate(StringLike s, int from, int to)
- {
- return from < s.length() && Prop.isPunct(s.charAt(from)) ? to : -1;
- }
-}
-
-/** Matches a Unicode white space character. */
-class UnicodeWhite extends UniValidator
-{
- public int validate(StringLike s, int from, int to)
- {
- return from < s.length() && Prop.isWhite(s.charAt(from)) ? to : -1;
- }
-}
-
-/**
- * Matches a character that is not a Unicode punctuation character.
- */
-class NUnicodePunct extends UniValidator
-{
- public int validate(StringLike s, int from, int to)
- {
- return from < s.length() && !Prop.isPunct(s.charAt(from)) ? to : -1;
- }
-}
-
-/**
- * Matches a character that is not a Unicode white space character.
- */
-class NUnicodeWhite extends UniValidator
-{
- public int validate(StringLike s, int from, int to)
- {
- return from < s.length() && !Prop.isWhite(s.charAt(from)) ? to : -1;
- }
-}
-
-/** Matches a Unicode word character: an alphanumeric or underscore. */
-class UnicodeW extends UniValidator
-{
- public int validate(StringLike s, int from, int to)
- {
- if (from >= s.length())
- {
- return -1;
- }
- char c = s.charAt(from);
- return (Prop.isAlphabetic(c) || Prop.isDecimalDigit(c) || c == '_') ? to
- : -1;
- }
-}
-
-/** Matches a character that is not a Unicode alphanumeric or underscore. */
-class NUnicodeW extends UniValidator
-{
- public int validate(StringLike s, int from, int to)
- {
- if (from >= s.length())
- {
- return -1;
- }
- char c = s.charAt(from);
- return !(Prop.isAlphabetic(c) || Prop.isDecimalDigit(c) || c == '_') ? to
- : -1;
- }
-}
-
-/** Matches a Unicode decimal digit. */
-class UnicodeDigit extends UniValidator
-{
- public int validate(StringLike s, int from, int to)
- {
- return from < s.length() && Prop.isDecimalDigit(s.charAt(from)) ? to
- : -1;
- }
-}
-
-/** Matches a character that is not a Unicode digit. */
-class NUnicodeDigit extends UniValidator
-{
- public int validate(StringLike s, int from, int to)
- {
- return from < s.length() && !Prop.isDecimalDigit(s.charAt(from)) ? to
- : -1;
- }
-}
-
-/** Matches a Unicode math character. */
-class UnicodeMath extends UniValidator
-{
- public int validate(StringLike s, int from, int to)
- {
- return from < s.length() && Prop.isMath(s.charAt(from)) ? to : -1;
- }
-}
-
-/** Matches a non-math Unicode character. */
-class NUnicodeMath extends UniValidator
-{
- public int validate(StringLike s, int from, int to)
- {
- return from < s.length() && !Prop.isMath(s.charAt(from)) ? to : -1;
- }
-}
-
-/** Matches a Unicode currency symbol. */
-class UnicodeCurrency extends UniValidator
-{
- public int validate(StringLike s, int from, int to)
- {
- return from < s.length() && Prop.isCurrency(s.charAt(from)) ? to : -1;
- }
-}
-
-/** Matches a non-currency symbol Unicode character. */
-class NUnicodeCurrency extends UniValidator
-{
- public int validate(StringLike s, int from, int to)
- {
- return from < s.length() && !Prop.isCurrency(s.charAt(from)) ? to : -1;
- }
-}
-
-/** Matches a Unicode alphabetic character. */
-class UnicodeAlpha extends UniValidator
-{
- public int validate(StringLike s, int from, int to)
- {
- return from < s.length() && Prop.isAlphabetic(s.charAt(from)) ? to : -1;
- }
-}
-
-/** Matches a non-alphabetic Unicode character. */
-class NUnicodeAlpha extends UniValidator
-{
- public int validate(StringLike s, int from, int to)
- {
- return from < s.length() && !Prop.isAlphabetic(s.charAt(from)) ? to
- : -1;
- }
-}
-
-/** Matches an upper case Unicode character. */
-class UnicodeUpper extends UniValidator
-{
- public int validate(StringLike s, int from, int to)
- {
- return from < s.length() && isUpper(s.charAt(from)) ? to : -1;
- }
-
- final boolean isUpper(char c)
- {
- return c == CaseMgr.toUpperCase(c) && c != CaseMgr.toLowerCase(c);
- }
-}
-
-/** Matches an upper case Unicode character. */
-class UnicodeLower extends UniValidator
-{
- public int validate(StringLike s, int from, int to)
- {
- return from < s.length() && isLower(s.charAt(from)) ? to : -1;
- }
-
- final boolean isLower(char c)
- {
- return c != CaseMgr.toUpperCase(c) && c == CaseMgr.toLowerCase(c);
- }
-}
-
-/**
- * Regex provides the parser which constructs the linked list of Pattern classes
- * from a String.
- * <p>
- * For the purpose of this documentation, the fact that java interprets the
- * backslash will be ignored. In practice, however, you will need a double
- * backslash to obtain a string that contains a single backslash character.
- * Thus, the example pattern "\b" should really be typed as "\\b" inside java
- * code.
- * <p>
- * Note that Regex is part of package "com.stevesoft.pat". To use it, simply
- * import com.stevesoft.pat.Regex at the top of your file.
- * <p>
- * Regex is made with a constructor that takes a String that defines the regular
- * expression. Thus, for example
- *
- * <pre>
- * Regex r = new Regex("[a-c]*");
- * </pre>
- *
- * matches any number of characters so long as the are 'a', 'b', or 'c').
- * <p>
- * To attempt to match the Pattern to a given string, you can use either the
- * search(String) member function, or the matchAt(String,int position) member
- * function. These functions return a boolean which tells you whether or not the
- * thing worked, and sets the methods "charsMatched()" and "matchedFrom()" in
- * the Regex object appropriately.
- * <p>
- * The portion of the string before the match can be obtained by the left()
- * member, and the portion after the match can be obtained by the right()
- * member.
- * <p>
- * Essentially, this package implements a syntax that is very much like the perl
- * 5 regular expression syntax.
- *
- * Longer example:
- *
- * <pre>
- * Regex r = new Regex("x(a|b)y");
- * r.matchAt("xay", 0);
- * System.out.println("sub = " + r.stringMatched(1));
- * </pre>
- *
- * The above would print "sub = a".
- *
- * <pre>
- * r.left() // would return "x"
- * r.right() // would return "y"
- * </pre>
- *
- * <p>
- * Differences between this package and perl5:<br>
- * The extended Pattern for setting flags, is now supported, but the flags are
- * different. "(?i)" tells the pattern to ignore case, "(?Q)" sets the
- * "dontMatchInQuotes" flag, and "(?iQ)" sets them both. You can change the
- * escape character. The pattern
- *
- * <pre>
- * (?e=#)#d+
- * </pre>
- *
- * is the same as
- *
- * <pre>
- * \d+
- * </pre>, but note that the sequence
- *
- * <pre>
- * (?e=#)
- * </pre>
- *
- * <b>must</b> occur at the very beginning of the pattern. There may be other
- * small differences as well. I will either make my package conform or note them
- * as I become aware of them.
- * <p>
- * This package supports additional patterns not in perl5: <center> <table
- * border=1>
- * <tr>
- * <td>(?@())</td>
- * <td>Group</td>
- * <td>This matches all characters between the '(' character and the balancing
- * ')' character. Thus, it will match "()" as well as "(())". The balancing
- * characters are arbitrary, thus (?@{}) matches on "{}" and "{{}}".</td>
- * <tr>
- * <td>(?<1)</td>
- * <td>Backup</td>
- * <td>Moves the pointer backwards within the text. This allows you to make a
- * "look behind." It fails if it attempts to move to a position before the
- * beginning of the string. "x(?<1)" is equivalent to "(?=x)". The number, 1
- * in this example, is the number of characters to move backwards.</td>
- * </table> </center>
- * </dl>
- *
- * @author Steven R. Brandt
- * @version package com.stevesoft.pat, release 1.5.3
- * @see Pattern
- */
-public class Regex extends RegRes// implements FilenameFilter
-{
- /**
- * BackRefOffset gives the identity number of the first pattern. Version 1.0
- * used zero, version 1.1 uses 1 to be more compatible with perl.
- */
- static int BackRefOffset = 1;
-
- private static Pattern none = new NoPattern();
-
- Pattern thePattern = none;
-
- patInt minMatch = new patInt(0);
-
- static Hashtable validators = new Hashtable();
- static
- {
- define("p", "(?>1)", new UnicodePunct());
- define("P", "(?>1)", new NUnicodePunct());
- define("s", "(?>1)", new UnicodeWhite());
- define("S", "(?>1)", new NUnicodeWhite());
- define("w", "(?>1)", new UnicodeW());
- define("W", "(?>1)", new NUnicodeW());
- define("d", "(?>1)", new UnicodeDigit());
- define("D", "(?>1)", new NUnicodeDigit());
- define("m", "(?>1)", new UnicodeMath());
- define("M", "(?>1)", new NUnicodeMath());
- define("c", "(?>1)", new UnicodeCurrency());
- define("C", "(?>1)", new NUnicodeCurrency());
- define("a", "(?>1)", new UnicodeAlpha());
- define("A", "(?>1)", new NUnicodeAlpha());
- define("uc", "(?>1)", new UnicodeUpper());
- define("lc", "(?>1)", new UnicodeLower());
- }
-
- /** Set the dontMatch in quotes flag. */
- public void setDontMatchInQuotes(boolean b)
- {
- dontMatchInQuotes = b;
- }
-
- /** Find out if the dontMatchInQuotes flag is enabled. */
- public boolean getDontMatchInQuotes()
- {
- return dontMatchInQuotes;
- }
-
- boolean dontMatchInQuotes = false;
-
- /**
- * Set the state of the ignoreCase flag. If set to true, then the pattern
- * matcher will ignore case when searching for a match.
- */
- public void setIgnoreCase(boolean b)
- {
- ignoreCase = b;
- }
-
- /**
- * Get the state of the ignoreCase flag. Returns true if we are ignoring the
- * case of the pattern, false otherwise.
- */
- public boolean getIgnoreCase()
- {
- return ignoreCase;
- }
-
- boolean ignoreCase = false;
-
- static boolean defaultMFlag = false;
-
- /**
- * Set the default value of the m flag. If it is set to true, then the MFlag
- * will be on for any regex search executed.
- */
- public static void setDefaultMFlag(boolean mFlag)
- {
- defaultMFlag = mFlag;
- }
-
- /**
- * Get the default value of the m flag. If it is set to true, then the MFlag
- * will be on for any regex search executed.
- */
- public static boolean getDefaultMFlag()
- {
- return defaultMFlag;
- }
-
- /**
- * Initializes the object without a Pattern. To supply a Pattern use
- * compile(String s).
- *
- * @see com.stevesoft.pat.Regex#compile(java.lang.String)
- */
- public Regex()
- {
- }
-
- /**
- * Create and compile a Regex, but do not throw any exceptions. If you wish to
- * have exceptions thrown for syntax errors, you must use the Regex(void)
- * constructor to create the Regex object, and then call the compile method.
- * Therefore, you should only call this method when you know your pattern is
- * right. I will probably become more like
- *
- * @see com.stevesoft.pat.Regex#search(java.lang.String)
- * @see com.stevesoft.pat.Regex#compile(java.lang.String)
- */
- public Regex(String s)
- {
- try
- {
- compile(s);
- } catch (RegSyntax rs)
- {
- }
- }
-
- ReplaceRule rep = null;
-
- /**
- * Create and compile both a Regex and a ReplaceRule.
- *
- * @see com.stevesoft.pat.ReplaceRule
- * @see com.stevesoft.pat.Regex#compile(java.lang.String)
- */
- public Regex(String s, String rp)
- {
- this(s);
- rep = parsePerl.perlCode(rp);
- }
-
- /**
- * Create and compile a Regex, but give it the ReplaceRule specified. This
- * allows the user finer control of the Replacement process, if that is
- * desired.
- *
- * @see com.stevesoft.pat.ReplaceRule
- * @see com.stevesoft.pat.Regex#compile(java.lang.String)
- */
- public Regex(String s, ReplaceRule rp)
- {
- this(s);
- rep = rp;
- }
-
- /**
- * Change the ReplaceRule of this Regex by compiling a new one using String
- * rp.
- */
- public void setReplaceRule(String rp)
- {
- rep = parsePerl.perlCode(rp);
- repr = null; // Clear Replacer history
- }
-
- /** Change the ReplaceRule of this Regex to rp. */
- public void setReplaceRule(ReplaceRule rp)
- {
- rep = rp;
- }
-
- /**
- * Test to see if a custom defined rule exists.
- *
- * @see com.stevesoft.pat#define(java.lang.String,java.lang.String,Validator)
- */
- public static boolean isDefined(String nm)
- {
- return validators.get(nm) != null;
- }
-
- /**
- * Removes a custom defined rule.
- *
- * @see com.stevesoft.pat#define(java.lang.String,java.lang.String,Validator)
- */
- public static void undefine(String nm)
- {
- validators.remove(nm);
- }
-
- /**
- * Defines a method to create a new rule. See test/deriv2.java and
- * test/deriv3.java for examples of how to use it.
- */
- public static void define(String nm, String pat, Validator v)
- {
- v.pattern = pat;
- validators.put(nm, v);
- }
-
- /**
- * Defines a shorthand for a pattern. The pattern will be invoked by a string
- * that has the form "(??"+nm+")".
- */
- public static void define(String nm, String pat)
- {
- validators.put(nm, pat);
- }
-
- /** Get the current ReplaceRule. */
- public ReplaceRule getReplaceRule()
- {
- return rep;
- }
-
- Replacer repr = null;
-
- final Replacer _getReplacer()
- {
- return repr == null ? repr = new Replacer() : repr;
- }
-
- public Replacer getReplacer()
- {
- if (repr == null)
- {
- repr = new Replacer();
- }
- repr.rh.me = this;
- repr.rh.prev = null;
- return repr;
- }
-
- /**
- * Replace the first occurence of this pattern in String s according to the
- * ReplaceRule.
- *
- * @see com.stevesoft.pat.ReplaceRule
- * @see com.stevesoft.pat.Regex#getReplaceRule()
- */
- public String replaceFirst(String s)
- {
- return _getReplacer().replaceFirstRegion(s, this, 0, s.length())
- .toString();
- }
-
- /**
- * Replace the first occurence of this pattern in String s beginning with
- * position pos according to the ReplaceRule.
- *
- * @see com.stevesoft.pat.ReplaceRule
- * @see com.stevesoft.pat.Regex#getReplaceRule()
- */
- public String replaceFirstFrom(String s, int pos)
- {
- return _getReplacer().replaceFirstRegion(s, this, pos, s.length())
- .toString();
- }
-
- /**
- * Replace the first occurence of this pattern in String s beginning with
- * position start and ending with end according to the ReplaceRule.
- *
- * @see com.stevesoft.pat.ReplaceRule
- * @see com.stevesoft.pat.Regex#getReplaceRule()
- */
- public String replaceFirstRegion(String s, int start, int end)
- {
- return _getReplacer().replaceFirstRegion(s, this, start, end)
- .toString();
- }
-
- /**
- * Replace all occurences of this pattern in String s according to the
- * ReplaceRule.
- *
- * @see com.stevesoft.pat.ReplaceRule
- * @see com.stevesoft.pat.Regex#getReplaceRule()
- */
- public String replaceAll(String s)
- {
- return _getReplacer().replaceAllRegion(s, this, 0, s.length())
- .toString();
- }
-
- public StringLike replaceAll(StringLike s)
- {
- return _getReplacer().replaceAllRegion(s, this, 0, s.length());
- }
-
- /**
- * Replace all occurences of this pattern in String s beginning with position
- * pos according to the ReplaceRule.
- *
- * @see com.stevesoft.pat.ReplaceRule
- * @see com.stevesoft.pat.Regex#getReplaceRule()
- */
- public String replaceAllFrom(String s, int pos)
- {
- return _getReplacer().replaceAllRegion(s, this, pos, s.length())
- .toString();
- }
-
- /**
- * Replace all occurences of this pattern in String s beginning with position
- * start and ending with end according to the ReplaceRule.
- *
- * @see com.stevesoft.pat.ReplaceRule
- * @see com.stevesoft.pat.Regex#getReplaceRule()
- */
- public String replaceAllRegion(String s, int start, int end)
- {
- return _getReplacer().replaceAllRegion(s, this, start, end).toString();
- }
-
- /** Essentially clones the Regex object */
- public Regex(Regex r)
- {
- super((RegRes) r);
- dontMatchInQuotes = r.dontMatchInQuotes;
- esc = r.esc;
- ignoreCase = r.ignoreCase;
- gFlag = r.gFlag;
- if (r.rep == null)
- {
- rep = null;
- }
- else
- {
- rep = (ReplaceRule) r.rep.clone();
- }
- /*
- * try { compile(r.toString()); } catch(RegSyntax r_) {}
- */
- thePattern = r.thePattern.clone(new Hashtable());
- minMatch = r.minMatch;
- skipper = r.skipper;
- }
-
- /**
- * By default, the escape character is the backslash, but you can make it
- * anything you want by setting this variable.
- */
- public char esc = Pattern.ESC;
-
- /**
- * This method compiles a regular expression, making it possible to call the
- * search or matchAt methods.
- *
- * @exception com.stevesoft.pat.RegSyntax
- * is thrown if a syntax error is encountered in the
- * pattern. For example, "x{3,1}" or "*a" are not valid
- * patterns.
- * @see com.stevesoft.pat.Regex#search
- * @see com.stevesoft.pat.Regex#matchAt
- */
- public void compile(String prepat) throws RegSyntax
- {
- String postpat = parsePerl.codify(prepat, true);
- String pat = postpat == null ? prepat : postpat;
- minMatch = null;
- ignoreCase = false;
- dontMatchInQuotes = false;
- Rthings mk = new Rthings(this);
- int offset = mk.val;
- String newpat = pat;
- thePattern = none;
- p = null;
- or = null;
- minMatch = new patInt(0);
- StrPos sp = new StrPos(pat, 0);
- if (sp.incMatch("(?e="))
- {
- char newEsc = sp.c;
- sp.inc();
- if (sp.match(')'))
- {
- newpat = reEscape(pat.substring(6), newEsc, Pattern.ESC);
- }
- }
- else if (esc != Pattern.ESC)
- {
- newpat = reEscape(pat, esc, Pattern.ESC);
- }
- thePattern = _compile2(newpat, mk);
- numSubs_ = mk.val - offset;
- mk.set(this);
- }
-
- /*
- * If a Regex is compared against a Regex, a check is done to see that the
- * patterns are equal as well as the most recent match. If a Regex is compare
- * with a RegRes, only the result of the most recent match is compared.
- */
- public boolean equals(Object o)
- {
- if (o instanceof Regex)
- {
- if (toString().equals(o.toString()))
- {
- return super.equals(o);
- }
- else
- {
- return false;
- }
- }
- else
- {
- return super.equals(o);
- }
- }
-
- /** A clone by any other name would smell as sweet. */
- public Object clone()
- {
- return new Regex(this);
- }
-
- /** Return a clone of the underlying RegRes object. */
- public RegRes result()
- {
- return (RegRes) super.clone();
- }
-
- // prep sets global variables of class
- // Pattern so that it can access them
- // during an attempt at a match
- Pthings pt = new Pthings();
-
- final Pthings prep(StringLike s)
- {
- // if(gFlag)
- pt.lastPos = matchedTo();
- if (pt.lastPos < 0)
- {
- pt.lastPos = 0;
- }
- if ((s == null ? null : s.unwrap()) != (src == null ? null : s.unwrap()))
- {
- pt.lastPos = 0;
- }
- src = s;
- pt.dotDoesntMatchCR = dotDoesntMatchCR && (!sFlag);
- pt.mFlag = (mFlag | defaultMFlag);
- pt.ignoreCase = ignoreCase;
- pt.no_check = false;
- if (pt.marks != null)
- {
- for (int i = 0; i < pt.marks.length; i++)
- {
- pt.marks[i] = -1;
- }
- }
- pt.marks = null;
- pt.nMarks = numSubs_;
- pt.src = s;
- if (dontMatchInQuotes)
- {
- setCbits(s, pt);
- }
- else
- {
- pt.cbits = null;
- }
- return pt;
- }
-
- /**
- * Attempt to match a Pattern beginning at a specified location within the
- * string.
- *
- * @see com.stevesoft.pat.Regex#search
- */
- public boolean matchAt(String s, int start_pos)
- {
- return _search(s, start_pos, start_pos);
- }
-
- /**
- * Attempt to match a Pattern beginning at a specified location within the
- * StringLike.
- *
- * @see com.stevesoft.pat.Regex#search
- */
- public boolean matchAt(StringLike s, int start_pos)
- {
- return _search(s, start_pos, start_pos);
- }
-
- /**
- * Search through a String for the first occurrence of a match.
- *
- * @see com.stevesoft.pat.Regex#searchFrom
- * @see com.stevesoft.pat.Regex#matchAt
- */
- public boolean search(String s)
- {
- if (s == null)
- {
- throw new NullPointerException(MessageManager.getString("exception.null_string_given_to_regex_search"));
- }
- return _search(s, 0, s.length());
- }
-
- public boolean search(StringLike sl)
- {
- if (sl == null)
- {
- throw new NullPointerException(MessageManager.getString("exception.null_string_like_given_to_regex_search"));
- }
- return _search(sl, 0, sl.length());
- }
-
- public boolean reverseSearch(String s)
- {
- if (s == null)
- {
- throw new NullPointerException(MessageManager.getString("exception.null_string_given_to_regex_reverse_search"));
- }
- return _reverseSearch(s, 0, s.length());
- }
-
- public boolean reverseSearch(StringLike sl)
- {
- if (sl == null)
- {
- throw new NullPointerException(MessageManager.getString("exception.null_string_like_given_to_regex_reverse_search"));
- }
- return _reverseSearch(sl, 0, sl.length());
- }
-
- /**
- * Search through a String for the first occurence of a match, but start at
- * position
- *
- * <pre>
- * start
- * </pre>
- */
- public boolean searchFrom(String s, int start)
- {
- if (s == null)
- {
- throw new NullPointerException(MessageManager.getString("exception.null_string_like_given_to_regex_search_from"));
- }
- return _search(s, start, s.length());
- }
-
- public boolean searchFrom(StringLike s, int start)
- {
- if (s == null)
- {
- throw new NullPointerException(MessageManager.getString("exception.null_string_like_given_to_regex_search_from"));
- }
- return _search(s, start, s.length());
- }
-
- /**
- * Search through a region of a String for the first occurence of a match.
- */
- public boolean searchRegion(String s, int start, int end)
- {
- if (s == null)
- {
- throw new NullPointerException(MessageManager.getString("exception.null_string_like_given_to_regex_search_region"));
- }
- return _search(s, start, end);
- }
-
- /**
- * Set this to change the default behavior of the "." pattern. By default it
- * now matches perl's behavior and fails to match the '\n' character.
- */
- public static boolean dotDoesntMatchCR = true;
-
- StringLike gFlags;
-
- int gFlagto = 0;
-
- boolean gFlag = false;
-
- /** Set the 'g' flag */
- public void setGFlag(boolean b)
- {
- gFlag = b;
- }
-
- /** Get the state of the 'g' flag. */
- public boolean getGFlag()
- {
- return gFlag;
- }
-
- boolean sFlag = false;
-
- /** Get the state of the sFlag */
- public boolean getSFlag()
- {
- return sFlag;
- }
-
- boolean mFlag = false;
-
- /** Get the state of the sFlag */
- public boolean getMFlag()
- {
- return mFlag;
- }
-
- final boolean _search(String s, int start, int end)
- {
- return _search(new StringWrap(s), start, end);
- }
-
- final boolean _search(StringLike s, int start, int end)
- {
- if (gFlag && gFlagto > 0 && gFlags != null
- && s.unwrap() == gFlags.unwrap())
- {
- start = gFlagto;
- }
- gFlags = null;
-
- Pthings pt = prep(s);
-
- int up = (minMatch == null ? end : end - minMatch.i);
-
- if (up < start && end >= start)
- {
- up = start;
- }
-
- if (skipper == null)
- {
- for (int i = start; i <= up; i++)
- {
- charsMatched_ = thePattern.matchAt(s, i, pt);
- if (charsMatched_ >= 0)
- {
- matchFrom_ = thePattern.mfrom;
- marks = pt.marks;
- gFlagto = matchFrom_ + charsMatched_;
- gFlags = s;
- return didMatch_ = true;
- }
- }
- }
- else
- {
- pt.no_check = true;
- for (int i = start; i <= up; i++)
- {
- i = skipper.find(src, i, up);
- if (i < 0)
- {
- charsMatched_ = matchFrom_ = -1;
- return didMatch_ = false;
- }
- charsMatched_ = thePattern.matchAt(s, i, pt);
- if (charsMatched_ >= 0)
- {
- matchFrom_ = thePattern.mfrom;
- marks = pt.marks;
- gFlagto = matchFrom_ + charsMatched_;
- gFlags = s;
- return didMatch_ = true;
- }
- }
- }
- return didMatch_ = false;
- }
-
- /*
- * final boolean _search(LongStringLike s,long start,long end) { if(gFlag &&
- * gFlagto > 0 && s==gFlags) start = gFlagto; gFlags = null;
- *
- * Pthings pt=prep(s);
- *
- * int up = end;//(minMatch == null ? end : end-minMatch.i);
- *
- * if(up < start && end >= start) up = start;
- *
- * if(skipper == null) { for(long i=start;i<=up;i++) { charsMatched_ =
- * thePattern.matchAt(s,i,pt); if(charsMatched_ >= 0) { matchFrom_ =
- * thePattern.mfrom; marks = pt.marks; gFlagto = matchFrom_+charsMatched_;
- * return didMatch_=true; } } } else { pt.no_check = true; for(long i=start;i<=up;i++) {
- * i = skipper.find(src,i,up); if(i<0) { charsMatched_ = matchFrom_ = -1;
- * return didMatch_ = false; } charsMatched_ = thePattern.matchAt(s,i,pt);
- * if(charsMatched_ >= 0) { matchFrom_ = thePattern.mfrom; marks = pt.marks;
- * gFlagto = matchFrom_+charsMatched_; gFlags = s; return didMatch_=true; }
- * else { i = s.adjustIndex(i); up = s.adjustEnd(i); } } } return
- * didMatch_=false; }
- */
-
- boolean _reverseSearch(String s, int start, int end)
- {
- return _reverseSearch(new StringWrap(s), start, end);
- }
-
- boolean _reverseSearch(StringLike s, int start, int end)
- {
- if (gFlag && gFlagto > 0 && s.unwrap() == gFlags.unwrap())
- {
- end = gFlagto;
- }
- gFlags = null;
- Pthings pt = prep(s);
- for (int i = end; i >= start; i--)
- {
- charsMatched_ = thePattern.matchAt(s, i, pt);
- if (charsMatched_ >= 0)
- {
- matchFrom_ = thePattern.mfrom;
- marks = pt.marks;
- gFlagto = matchFrom_ - 1;
- gFlags = s;
- return didMatch_ = true;
- }
- }
- return didMatch_ = false;
- }
-
- // This routine sets the cbits variable
- // of class Pattern. Cbits is true for
- // the bit corresponding to a character inside
- // a set of quotes.
- static StringLike lasts = null;
-
- static BitSet lastbs = null;
-
- static void setCbits(StringLike s, Pthings pt)
- {
- if (s == lasts)
- {
- pt.cbits = lastbs;
- return;
- }
- BitSet bs = new BitSet(s.length());
- char qc = ' ';
- boolean setBit = false;
- for (int i = 0; i < s.length(); i++)
- {
- if (setBit)
- {
- bs.set(i);
- }
- char c = s.charAt(i);
- if (!setBit && c == '"')
- {
- qc = c;
- setBit = true;
- bs.set(i);
- }
- else if (!setBit && c == '\'')
- {
- qc = c;
- setBit = true;
- bs.set(i);
- }
- else if (setBit && c == qc)
- {
- setBit = false;
- }
- else if (setBit && c == '\\' && i + 1 < s.length())
- {
- i++;
- if (setBit)
- {
- bs.set(i);
- }
- }
- }
- pt.cbits = lastbs = bs;
- lasts = s;
- }
-
- // Wanted user to over-ride this in alpha version,
- // but it wasn't really necessary because of this trick:
- Regex newRegex()
- {
- try
- {
- return (Regex) getClass().newInstance();
- } catch (InstantiationException ie)
- {
- return null;
- } catch (IllegalAccessException iae)
- {
- return null;
- }
- }
-
- /**
- * Only needed for creating your own extensions of Regex. This method adds the
- * next Pattern in the chain of patterns or sets the Pattern if it is the
- * first call.
- */
- protected void add(Pattern p2)
- {
- if (p == null)
- {
- p = p2;
- }
- else
- {
- p.add(p2);
- p2 = p;
- }
- }
-
- /**
- * You only need to use this method if you are creating your own extentions to
- * Regex. compile1 compiles one Pattern element, it can be over-ridden to
- * allow the Regex compiler to understand new syntax. See deriv.java for an
- * example. This routine is the heart of class Regex. Rthings has one integer
- * member called intValue, it is used to keep track of the number of ()'s in
- * the Pattern.
- *
- * @exception com.stevesoft.pat.RegSyntax
- * is thrown when a nonsensensical pattern is supplied. For
- * example, a pattern beginning with *.
- */
- protected void compile1(StrPos sp, Rthings mk) throws RegSyntax
- {
- if (sp.match('['))
- {
- sp.inc();
- add(matchBracket(sp));
- }
- else if (sp.match('|'))
- {
- if (or == null)
- {
- or = new Or();
- }
- if (p == null)
- {
- p = new NullPattern();
- }
- or.addOr(p);
- p = null;
- }
- else if (sp.incMatch("(?<"))
- {
- patInt i = sp.getPatInt();
- if (i == null)
- {
- RegSyntaxError.endItAll("No int after (?<");
- }
- add(new Backup(i.intValue()));
- if (!sp.match(')'))
- {
- RegSyntaxError.endItAll("No ) after (?<");
- }
- }
- else if (sp.incMatch("(?>"))
- {
- patInt i = sp.getPatInt();
- if (i == null)
- {
- RegSyntaxError.endItAll("No int after (?>");
- }
- add(new Backup(-i.intValue()));
- if (!sp.match(')'))
- {
- RegSyntaxError.endItAll("No ) after (?<");
- }
- }
- else if (sp.incMatch("(?@"))
- {
- char op = sp.c;
- sp.inc();
- char cl = sp.c;
- sp.inc();
- if (!sp.match(')'))
- {
- RegSyntaxError.endItAll("(?@ does not have closing paren");
- }
- add(new Group(op, cl));
- }
- else if (sp.incMatch("(?#"))
- {
- while (!sp.match(')'))
- {
- sp.inc();
- }
- }
- else if (sp.dontMatch && sp.c == 'w')
- {
- // Regex r = new Regex();
- // r._compile("[a-zA-Z0-9_]",mk);
- // add(new Goop("\\w",r.thePattern));
- Bracket b = new Bracket(false);
- b.addOr(new Range('a', 'z'));
- b.addOr(new Range('A', 'Z'));
- b.addOr(new Range('0', '9'));
- b.addOr(new oneChar('_'));
- add(b);
- }
- else if (sp.dontMatch && sp.c == 'G')
- {
- add(new BackG());
- }
- else if (sp.dontMatch && sp.c == 's')
- {
- // Regex r = new Regex();
- // r._compile("[ \t\n\r\b]",mk);
- // add(new Goop("\\s",r.thePattern));
- Bracket b = new Bracket(false);
- b.addOr(new oneChar((char) 32));
- b.addOr(new Range((char) 8, (char) 10));
- b.addOr(new oneChar((char) 13));
- add(b);
- }
- else if (sp.dontMatch && sp.c == 'd')
- {
- // Regex r = new Regex();
- // r._compile("[0-9]",mk);
- // add(new Goop("\\d",r.thePattern));
- Range digit = new Range('0', '9');
- digit.printBrackets = true;
- add(digit);
- }
- else if (sp.dontMatch && sp.c == 'W')
- {
- // Regex r = new Regex();
- // r._compile("[^a-zA-Z0-9_]",mk);
- // add(new Goop("\\W",r.thePattern));
- Bracket b = new Bracket(true);
- b.addOr(new Range('a', 'z'));
- b.addOr(new Range('A', 'Z'));
- b.addOr(new Range('0', '9'));
- b.addOr(new oneChar('_'));
- add(b);
- }
- else if (sp.dontMatch && sp.c == 'S')
- {
- // Regex r = new Regex();
- // r._compile("[^ \t\n\r\b]",mk);
- // add(new Goop("\\S",r.thePattern));
- Bracket b = new Bracket(true);
- b.addOr(new oneChar((char) 32));
- b.addOr(new Range((char) 8, (char) 10));
- b.addOr(new oneChar((char) 13));
- add(b);
- }
- else if (sp.dontMatch && sp.c == 'D')
- {
- // Regex r = new Regex();
- // r._compile("[^0-9]",mk);
- // add(new Goop("\\D",r.thePattern));
- Bracket b = new Bracket(true);
- b.addOr(new Range('0', '9'));
- add(b);
- }
- else if (sp.dontMatch && sp.c == 'B')
- {
- Regex r = new Regex();
- r._compile2("(?!" + back_slash + "b)", mk);
- add(r.thePattern);
- }
- else if (isOctalString(sp))
- {
- int d = sp.c - '0';
- sp.inc();
- d = 8 * d + sp.c - '0';
- StrPos sp2 = new StrPos(sp);
- sp2.inc();
- if (isOctalDigit(sp2, false))
- {
- sp.inc();
- d = 8 * d + sp.c - '0';
- }
- add(new oneChar((char) d));
- }
- else if (sp.dontMatch && sp.c >= '1' && sp.c <= '9')
- {
- int iv = sp.c - '0';
- StrPos s2 = new StrPos(sp);
- s2.inc();
- if (!s2.dontMatch && s2.c >= '0' && s2.c <= '9')
- {
- iv = 10 * iv + (s2.c - '0');
- sp.inc();
- }
- add(new BackMatch(iv));
- }
- else if (sp.dontMatch && sp.c == 'b')
- {
- add(new Boundary());
- }
- else if (sp.match('\b'))
- {
- add(new Boundary());
- }
- else if (sp.match('$'))
- {
- add(new End(true));
- }
- else if (sp.dontMatch && sp.c == 'Z')
- {
- add(new End(false));
- }
- else if (sp.match('.'))
- {
- add(new Any());
- }
- else if (sp.incMatch("(??"))
- {
- javajs.util.SB sb = new javajs.util.SB();
- javajs.util.SB sb2 = new javajs.util.SB();
- while (!sp.match(')') && !sp.match(':'))
- {
- sb.appendC(sp.c);
- sp.inc();
- }
- if (sp.incMatch(":"))
- {
- while (!sp.match(')'))
- {
- sb2.appendC(sp.c);
- sp.inc();
- }
- }
- String sbs = sb.toString();
- if (validators.get(sbs) instanceof String)
- {
- String pat = (String) validators.get(sbs);
- Regex r = newRegex();
- Rthings rth = new Rthings(this);
- rth.noBackRefs = true;
- r._compile2(pat, rth);
- add(r.thePattern);
- }
- else
- {
- Custom cm = new Custom(sb.toString());
- if (cm.v != null)
- {
- Validator v2 = cm.v.arg(sb2.toString());
- if (v2 != null)
- {
- v2.argsave = sb2.toString();
- String p = cm.v.pattern;
- cm.v = v2;
- v2.pattern = p;
- }
- Regex r = newRegex();
- Rthings rth = new Rthings(this);
- rth.noBackRefs = true;
- r._compile2(cm.v.pattern, rth);
- cm.sub = r.thePattern;
- cm.sub.add(new CustomEndpoint(cm));
- cm.sub.setParent(cm);
- add(cm);
- }
- }
- }
- else if (sp.match('('))
- {
- mk.parenLevel++;
- Regex r = newRegex();
- // r.or = new Or();
- sp.inc();
- if (sp.incMatch("?:"))
- {
- r.or = new Or();
- }
- else if (sp.incMatch("?="))
- {
- r.or = new lookAhead(false);
- }
- else if (sp.incMatch("?!"))
- {
- r.or = new lookAhead(true);
- }
- else if (sp.match('?'))
- {
- sp.inc();
- do
- {
- if (sp.c == 'i')
- {
- mk.ignoreCase = true;
- }
- if (sp.c == 'Q')
- {
- mk.dontMatchInQuotes = true;
- }
- if (sp.c == 'o')
- {
- mk.optimizeMe = true;
- }
- if (sp.c == 'g')
- {
- mk.gFlag = true;
- }
- if (sp.c == 's')
- {
- mk.sFlag = true;
- }
- if (sp.c == 'm')
- {
- mk.mFlag = true;
- }
- sp.inc();
- } while (!sp.match(')') && !sp.eos);
- r = null;
- mk.parenLevel--;
- if (sp.eos) // throw new RegSyntax
- {
- RegSyntaxError.endItAll("Unclosed ()");
- }
- }
- else
- { // just ordinary parenthesis
- r.or = mk.noBackRefs ? new Or() : new OrMark(mk.val++);
- }
- if (r != null)
- {
- add(r._compile1(sp, mk));
- }
- }
- else if (sp.match('^'))
- {
- add(new Start(true));
- }
- else if (sp.dontMatch && sp.c == 'A')
- {
- add(new Start(false));
- }
- else if (sp.match('*'))
- {
- addMulti(new patInt(0), new patInf());
- }
- else if (sp.match('+'))
- {
- addMulti(new patInt(1), new patInf());
- }
- else if (sp.match('?'))
- {
- addMulti(new patInt(0), new patInt(1));
- }
- else if (sp.match('{'))
- {
- boolean bad = false;
- StrPos sp2 = new StrPos(sp);
- // javajs.util.SB sb = new javajs.util.SB();
- sp.inc();
- patInt i1 = sp.getPatInt();
- patInt i2 = null;
- if (sp.match('}'))
- {
- i2 = i1;
- }
- else
- {
- if (!sp.match(','))
- {
- /*
- * RegSyntaxError.endItAll( "String \"{"+i2+ "\" should be followed
- * with , or }");
- */
- bad = true;
- }
- sp.inc();
- if (sp.match('}'))
- {
- i2 = new patInf();
- }
- else
- {
- i2 = sp.getPatInt();
- }
- }
- if (i1 == null || i2 == null)
- {
- /*
- * throw new RegSyntax("Badly formatted Multi: " +"{"+i1+","+i2+"}");
- */
- bad = true;
- }
- if (bad)
- {
- sp.dup(sp2);
- add(new oneChar(sp.c));
- }
- else
- {
- addMulti(i1, i2);
- }
- }
- else if (sp.escMatch('x') && next2Hex(sp))
- {
- sp.inc();
- int d = getHexDigit(sp);
- sp.inc();
- d = 16 * d + getHexDigit(sp);
- add(new oneChar((char) d));
- }
- else if (sp.escMatch('c'))
- {
- sp.inc();
- if (sp.c < Ctrl.cmap.length)
- {
- add(new oneChar(Ctrl.cmap[sp.c]));
- }
- else
- {
- add(new oneChar(sp.c));
- }
- }
- else if (sp.escMatch('f'))
- {
- add(new oneChar((char) 12));
- }
- else if (sp.escMatch('a'))
- {
- add(new oneChar((char) 7));
- }
- else if (sp.escMatch('t'))
- {
- add(new oneChar('\t'));
- }
- else if (sp.escMatch('n'))
- {
- add(new oneChar('\n'));
- }
- else if (sp.escMatch('r'))
- {
- add(new oneChar('\r'));
- }
- else if (sp.escMatch('b'))
- {
- add(new oneChar('\b'));
- }
- else if (sp.escMatch('e'))
- {
- add(new oneChar((char) 27));
- }
- else
- {
- add(new oneChar(sp.c));
- if (sp.match(')'))
- {
- RegSyntaxError.endItAll("Unmatched right paren in pattern");
- }
- }
- }
-
- // compiles all Pattern elements, internal method
- private Pattern _compile2(String pat, Rthings mk) throws RegSyntax
- {
- minMatch = null;
- sFlag = mFlag = ignoreCase = gFlag = false;
- StrPos sp = new StrPos(pat, 0);
- thePattern = _compile1(sp, mk);
- pt.marks = null;
- return thePattern;
- }
-
- Pattern p = null;
-
- Or or = null;
-
- Pattern _compile1(StrPos sp, Rthings mk) throws RegSyntax
- {
- while (!(sp.eos || (or != null && sp.match(')'))))
- {
- compile1(sp, mk);
- sp.inc();
- }
- if (sp.match(')'))
- {
- mk.parenLevel--;
- }
- else if (sp.eos && mk.parenLevel != 0)
- {
- RegSyntaxError.endItAll("Unclosed Parenthesis! lvl=" + mk.parenLevel);
- }
- if (or != null)
- {
- if (p == null)
- {
- p = new NullPattern();
- }
- or.addOr(p);
- return or;
- }
- return p == null ? new NullPattern() : p;
- }
-
- // add a multi object to the end of the chain
- // which applies to the last object
- void addMulti(patInt i1, patInt i2) throws RegSyntax
- {
- Pattern last, last2;
- for (last = p; last != null && last.next != null; last = last.next)
- {
- ;
- }
- if (last == null || last == p)
- {
- last2 = null;
- }
- else
- {
- for (last2 = p; last2.next != last; last2 = last2.next)
- {
- ;
- }
- }
- if (last instanceof Multi && i1.intValue() == 0 && i2.intValue() == 1)
- {
- ((Multi) last).matchFewest = true;
- }
- else if (last instanceof FastMulti && i1.intValue() == 0
- && i2.intValue() == 1)
- {
- ((FastMulti) last).matchFewest = true;
- }
- else if (last instanceof DotMulti && i1.intValue() == 0
- && i2.intValue() == 1)
- {
- ((DotMulti) last).matchFewest = true;
- }
- else if (last instanceof Multi || last instanceof DotMulti
- || last instanceof FastMulti)
- {
- throw new RegSyntax("Syntax error.");
- }
- else if (last2 == null)
- {
- p = mkMulti(i1, i2, p);
- }
- else
- {
- last2.next = mkMulti(i1, i2, last);
- }
- }
-
- final static Pattern mkMulti(patInt lo, patInt hi, Pattern p)
- throws RegSyntax
- {
- if (p instanceof Any && p.next == null)
- {
- return (Pattern) new DotMulti(lo, hi);
- }
- return RegOpt.safe4fm(p) ? (Pattern) new FastMulti(lo, hi, p)
- : (Pattern) new Multi(lo, hi, p);
- }
-
- // process the bracket operator
- Pattern matchBracket(StrPos sp) throws RegSyntax
- {
- Bracket ret;
- if (sp.match('^'))
- {
- ret = new Bracket(true);
- sp.inc();
- }
- else
- {
- ret = new Bracket(false);
- }
- if (sp.match(']'))
- {
- // throw new RegSyntax
- RegSyntaxError.endItAll("Unmatched []");
- }
-
- while (!sp.eos && !sp.match(']'))
- {
- StrPos s1 = new StrPos(sp);
- s1.inc();
- StrPos s1_ = new StrPos(s1);
- s1_.inc();
- if (s1.match('-') && !s1_.match(']'))
- {
- StrPos s2 = new StrPos(s1);
- s2.inc();
- if (!s2.eos)
- {
- ret.addOr(new Range(sp.c, s2.c));
- }
- sp.inc();
- sp.inc();
- }
- else if (sp.escMatch('Q'))
- {
- sp.inc();
- while (!sp.escMatch('E'))
- {
- ret.addOr(new oneChar(sp.c));
- sp.inc();
- }
- }
- else if (sp.escMatch('d'))
- {
- ret.addOr(new Range('0', '9'));
- }
- else if (sp.escMatch('s'))
- {
- ret.addOr(new oneChar((char) 32));
- ret.addOr(new Range((char) 8, (char) 10));
- ret.addOr(new oneChar((char) 13));
- }
- else if (sp.escMatch('w'))
- {
- ret.addOr(new Range('a', 'z'));
- ret.addOr(new Range('A', 'Z'));
- ret.addOr(new Range('0', '9'));
- ret.addOr(new oneChar('_'));
- }
- else if (sp.escMatch('D'))
- {
- ret.addOr(new Range((char) 0, (char) 47));
- ret.addOr(new Range((char) 58, (char) 65535));
- }
- else if (sp.escMatch('S'))
- {
- ret.addOr(new Range((char) 0, (char) 7));
- ret.addOr(new Range((char) 11, (char) 12));
- ret.addOr(new Range((char) 14, (char) 31));
- ret.addOr(new Range((char) 33, (char) 65535));
- }
- else if (sp.escMatch('W'))
- {
- ret.addOr(new Range((char) 0, (char) 64));
- ret.addOr(new Range((char) 91, (char) 94));
- ret.addOr(new oneChar((char) 96));
- ret.addOr(new Range((char) 123, (char) 65535));
- }
- else if (sp.escMatch('x') && next2Hex(sp))
- {
- sp.inc();
- int d = getHexDigit(sp);
- sp.inc();
- d = 16 * d + getHexDigit(sp);
- ret.addOr(new oneChar((char) d));
- }
- else if (sp.escMatch('a'))
- {
- ret.addOr(new oneChar((char) 7));
- }
- else if (sp.escMatch('f'))
- {
- ret.addOr(new oneChar((char) 12));
- }
- else if (sp.escMatch('e'))
- {
- ret.addOr(new oneChar((char) 27));
- }
- else if (sp.escMatch('n'))
- {
- ret.addOr(new oneChar('\n'));
- }
- else if (sp.escMatch('t'))
- {
- ret.addOr(new oneChar('\t'));
- }
- else if (sp.escMatch('r'))
- {
- ret.addOr(new oneChar('\r'));
- }
- else if (sp.escMatch('c'))
- {
- sp.inc();
- if (sp.c < Ctrl.cmap.length)
- {
- ret.addOr(new oneChar(Ctrl.cmap[sp.c]));
- }
- else
- {
- ret.addOr(new oneChar(sp.c));
- }
- }
- else if (isOctalString(sp))
- {
- int d = sp.c - '0';
- sp.inc();
- d = 8 * d + sp.c - '0';
- StrPos sp2 = new StrPos(sp);
- sp2.inc();
- if (isOctalDigit(sp2, false))
- {
- sp.inc();
- d = 8 * d + sp.c - '0';
- }
- ret.addOr(new oneChar((char) d));
- }
- else
- {
- ret.addOr(new oneChar(sp.c));
- }
- sp.inc();
- }
- return ret;
- }
-
- /**
- * Converts the stored Pattern to a String -- this is a decompile. Note that
- * \t and \n will really print out here, Not just the two character
- * representations. Also be prepared to see some strange output if your
- * characters are not printable.
- */
- public String toString()
- {
- if (false && thePattern == null)
- {
- return "";
- }
- else
- {
- javajs.util.SB sb = new javajs.util.SB();
- if (esc != Pattern.ESC)
- {
- sb.append("(?e=");
- sb.appendC(esc);
- sb.append(")");
- }
- if (gFlag || mFlag || !dotDoesntMatchCR || sFlag || ignoreCase
- || dontMatchInQuotes || optimized())
- {
- sb.append("(?");
- if (ignoreCase)
- {
- sb.append("i");
- }
- if (mFlag)
- {
- sb.append("m");
- }
- if (sFlag || !dotDoesntMatchCR)
- {
- sb.append("s");
- }
- if (dontMatchInQuotes)
- {
- sb.append("Q");
- }
- if (optimized())
- {
- sb.append("o");
- }
- if (gFlag)
- {
- sb.append("g");
- }
- sb.append(")");
- }
- String patstr = thePattern.toString();
- if (esc != Pattern.ESC)
- {
- patstr = reEscape(patstr, Pattern.ESC, esc);
- }
- sb.append(patstr);
- return sb.toString();
- }
- }
-
- // Re-escape Pattern, allows us to use a different escape
- // character.
- static String reEscape(String s, char oldEsc, char newEsc)
- {
- if (oldEsc == newEsc)
- {
- return s;
- }
- int i;
- javajs.util.SB sb = new javajs.util.SB();
- for (i = 0; i < s.length(); i++)
- {
- if (s.charAt(i) == oldEsc && i + 1 < s.length())
- {
- if (s.charAt(i + 1) == oldEsc)
- {
- sb.appendC(oldEsc);
- }
- else
- {
- sb.appendC(newEsc);
- sb.appendC(s.charAt(i + 1));
- }
- i++;
- }
- else if (s.charAt(i) == newEsc)
- {
- sb.appendC(newEsc);
- sb.appendC(newEsc);
- }
- else
- {
- sb.appendC(s.charAt(i));
- }
- }
- return sb.toString();
- }
-
-// /**
-// * This method implements FilenameFilter, allowing one to use a Regex to
-// * search through a directory using File.list. There is a FileRegex now that
-// * does this better.
-// *
-// * @see com.stevesoft.pat.FileRegex
-// */
-// public boolean accept(File dir, String s)
-// {
-// return search(s);
-// }
-
- /** The version of this package */
- final static public String version()
- {
- return "lgpl release 1.5.3";
- }
-
- /**
- * Once this method is called, the state of variables ignoreCase and
- * dontMatchInQuotes should not be changed as the results will be
- * unpredictable. However, search and matchAt will run more quickly. Note that
- * you can check to see if the pattern has been optimized by calling the
- * optimized() method.
- * <p>
- * This method will attempt to rewrite your pattern in a way that makes it
- * faster (not all patterns execute at the same speed). In general, "(?: ... )"
- * will be faster than "( ... )" so if you don't need the backreference, you
- * should group using the former pattern.
- * <p>
- * It will also introduce new pattern elements that you can't get to
- * otherwise, for example if you have a large table of strings, i.e. the
- * months of the year "(January|February|...)" optimize() will make a
- * Hashtable that takes it to the next appropriate pattern element --
- * eliminating the need for a linear search.
- *
- * @see com.stevesoft.pat.Regex#optimized
- * @see com.stevesoft.pat.Regex#ignoreCase
- * @see com.stevesoft.pat.Regex#dontMatchInQuotes
- * @see com.stevesoft.pat.Regex#matchAt
- * @see com.stevesoft.pat.Regex#search
- */
- public void optimize()
- {
- if (optimized() || thePattern == null)
- {
- return;
- }
- minMatch = new patInt(0); // thePattern.countMinChars();
- thePattern = RegOpt.opt(thePattern, ignoreCase, dontMatchInQuotes);
- skipper = Skip.findSkip(this);
- // RegOpt.setParents(this);
- return;
- }
-
- Skip skipper;
-
- /**
- * This function returns true if the optimize method has been called.
- */
- public boolean optimized()
- {
- return minMatch != null;
- }
-
- /**
- * A bit of syntactic surgar for those who want to make their code look more
- * perl-like. To use this initialize your Regex object by saying:
- *
- * <pre>
- * Regex r1 = Regex.perlCode("s/hello/goodbye/");
- * Regex r2 = Regex.perlCode("s'fish'frog'i");
- * Regex r3 = Regex.perlCode("m'hello');
- * </pre>
- *
- * The i for ignoreCase is supported in this syntax, as well as m, s, and x.
- * The g flat is a bit of a special case.
- * <p>
- * If you wish to replace all occurences of a pattern, you do not put a 'g' in
- * the perlCode, but call Regex's replaceAll method.
- * <p>
- * If you wish to simply and only do a search for r2's pattern, you can do
- * this by calling the searchFrom method method repeatedly, or by calling
- * search repeatedly if the g flag is set.
- * <p>
- * Note: Currently perlCode does <em>not</em> support the (?e=#) syntax for
- * changing the escape character.
- */
-
- public static Regex perlCode(String s)
- {
- // this file is big enough, see parsePerl.java
- // for this function.
- return parsePerl.parse(s);
- }
-
- static final char back_slash = '\\';
-
- /**
- * Checks to see if there are only literal and no special pattern elements in
- * this Regex.
- */
- public boolean isLiteral()
- {
- Pattern x = thePattern;
- while (x != null)
- {
- if (x instanceof oneChar)
- {
- ;
- }
- else if (x instanceof Skipped)
- {
- ;
- }
- else
- {
- return false;
- }
- x = x.next;
- }
- return true;
- }
-
- /**
- * You only need to know about this if you are inventing your own pattern
- * elements.
- */
- public patInt countMinChars()
- {
- return thePattern.countMinChars();
- }
-
- /**
- * You only need to know about this if you are inventing your own pattern
- * elements.
- */
- public patInt countMaxChars()
- {
- return thePattern.countMaxChars();
- }
-
- boolean isHexDigit(StrPos sp)
- {
- boolean r = !sp.eos
- && !sp.dontMatch
- && ((sp.c >= '0' && sp.c <= '9')
- || (sp.c >= 'a' && sp.c <= 'f') || (sp.c >= 'A' && sp.c <= 'F'));
- return r;
- }
-
- boolean isOctalDigit(StrPos sp, boolean first)
- {
- boolean r = !sp.eos && !(first ^ sp.dontMatch) && sp.c >= '0'
- && sp.c <= '7';
- return r;
- }
-
- int getHexDigit(StrPos sp)
- {
- if (sp.c >= '0' && sp.c <= '9')
- {
- return sp.c - '0';
- }
- if (sp.c >= 'a' && sp.c <= 'f')
- {
- return sp.c - 'a' + 10;
- }
- return sp.c - 'A' + 10;
- }
-
- boolean next2Hex(StrPos sp)
- {
- StrPos sp2 = new StrPos(sp);
- sp2.inc();
- if (!isHexDigit(sp2))
- {
- return false;
- }
- sp2.inc();
- if (!isHexDigit(sp2))
- {
- return false;
- }
- return true;
- }
-
- boolean isOctalString(StrPos sp)
- {
- if (!isOctalDigit(sp, true))
- {
- return false;
- }
- StrPos sp2 = new StrPos(sp);
- sp2.inc();
- if (!isOctalDigit(sp2, false))
- {
- return false;
- }
- return true;
- }
-}
+//\r
+// This software is now distributed according to\r
+// the Lesser Gnu Public License. Please see\r
+// http://www.gnu.org/copyleft/lesser.txt for\r
+// the details.\r
+// -- Happy Computing!\r
+//\r
+package com.stevesoft.pat;\r
+\r
+import jalview.util.MessageManager;\r
+\r
+import java.util.BitSet;\r
+import java.util.Hashtable;\r
+\r
+import com.stevesoft.pat.wrap.StringWrap;\r
+\r
+/** Matches a Unicode punctuation character. */\r
+class UnicodePunct extends UniValidator\r
+{\r
+ public int validate(StringLike s, int from, int to)\r
+ {\r
+ return from < s.length() && Prop.isPunct(s.charAt(from)) ? to : -1;\r
+ }\r
+}\r
+\r
+/** Matches a Unicode white space character. */\r
+class UnicodeWhite extends UniValidator\r
+{\r
+ public int validate(StringLike s, int from, int to)\r
+ {\r
+ return from < s.length() && Prop.isWhite(s.charAt(from)) ? to : -1;\r
+ }\r
+}\r
+\r
+/**\r
+ * Matches a character that is not a Unicode punctuation character.\r
+ */\r
+class NUnicodePunct extends UniValidator\r
+{\r
+ public int validate(StringLike s, int from, int to)\r
+ {\r
+ return from < s.length() && !Prop.isPunct(s.charAt(from)) ? to : -1;\r
+ }\r
+}\r
+\r
+/**\r
+ * Matches a character that is not a Unicode white space character.\r
+ */\r
+class NUnicodeWhite extends UniValidator\r
+{\r
+ public int validate(StringLike s, int from, int to)\r
+ {\r
+ return from < s.length() && !Prop.isWhite(s.charAt(from)) ? to : -1;\r
+ }\r
+}\r
+\r
+/** Matches a Unicode word character: an alphanumeric or underscore. */\r
+class UnicodeW extends UniValidator\r
+{\r
+ public int validate(StringLike s, int from, int to)\r
+ {\r
+ if (from >= s.length())\r
+ {\r
+ return -1;\r
+ }\r
+ char c = s.charAt(from);\r
+ return (Prop.isAlphabetic(c) || Prop.isDecimalDigit(c) || c == '_') ? to\r
+ : -1;\r
+ }\r
+}\r
+\r
+/** Matches a character that is not a Unicode alphanumeric or underscore. */\r
+class NUnicodeW extends UniValidator\r
+{\r
+ public int validate(StringLike s, int from, int to)\r
+ {\r
+ if (from >= s.length())\r
+ {\r
+ return -1;\r
+ }\r
+ char c = s.charAt(from);\r
+ return !(Prop.isAlphabetic(c) || Prop.isDecimalDigit(c) || c == '_') ? to\r
+ : -1;\r
+ }\r
+}\r
+\r
+/** Matches a Unicode decimal digit. */\r
+class UnicodeDigit extends UniValidator\r
+{\r
+ public int validate(StringLike s, int from, int to)\r
+ {\r
+ return from < s.length() && Prop.isDecimalDigit(s.charAt(from)) ? to\r
+ : -1;\r
+ }\r
+}\r
+\r
+/** Matches a character that is not a Unicode digit. */\r
+class NUnicodeDigit extends UniValidator\r
+{\r
+ public int validate(StringLike s, int from, int to)\r
+ {\r
+ return from < s.length() && !Prop.isDecimalDigit(s.charAt(from)) ? to\r
+ : -1;\r
+ }\r
+}\r
+\r
+/** Matches a Unicode math character. */\r
+class UnicodeMath extends UniValidator\r
+{\r
+ public int validate(StringLike s, int from, int to)\r
+ {\r
+ return from < s.length() && Prop.isMath(s.charAt(from)) ? to : -1;\r
+ }\r
+}\r
+\r
+/** Matches a non-math Unicode character. */\r
+class NUnicodeMath extends UniValidator\r
+{\r
+ public int validate(StringLike s, int from, int to)\r
+ {\r
+ return from < s.length() && !Prop.isMath(s.charAt(from)) ? to : -1;\r
+ }\r
+}\r
+\r
+/** Matches a Unicode currency symbol. */\r
+class UnicodeCurrency extends UniValidator\r
+{\r
+ public int validate(StringLike s, int from, int to)\r
+ {\r
+ return from < s.length() && Prop.isCurrency(s.charAt(from)) ? to : -1;\r
+ }\r
+}\r
+\r
+/** Matches a non-currency symbol Unicode character. */\r
+class NUnicodeCurrency extends UniValidator\r
+{\r
+ public int validate(StringLike s, int from, int to)\r
+ {\r
+ return from < s.length() && !Prop.isCurrency(s.charAt(from)) ? to : -1;\r
+ }\r
+}\r
+\r
+/** Matches a Unicode alphabetic character. */\r
+class UnicodeAlpha extends UniValidator\r
+{\r
+ public int validate(StringLike s, int from, int to)\r
+ {\r
+ return from < s.length() && Prop.isAlphabetic(s.charAt(from)) ? to : -1;\r
+ }\r
+}\r
+\r
+/** Matches a non-alphabetic Unicode character. */\r
+class NUnicodeAlpha extends UniValidator\r
+{\r
+ public int validate(StringLike s, int from, int to)\r
+ {\r
+ return from < s.length() && !Prop.isAlphabetic(s.charAt(from)) ? to\r
+ : -1;\r
+ }\r
+}\r
+\r
+/** Matches an upper case Unicode character. */\r
+class UnicodeUpper extends UniValidator\r
+{\r
+ public int validate(StringLike s, int from, int to)\r
+ {\r
+ return from < s.length() && isUpper(s.charAt(from)) ? to : -1;\r
+ }\r
+\r
+ final boolean isUpper(char c)\r
+ {\r
+ return c == CaseMgr.toUpperCase(c) && c != CaseMgr.toLowerCase(c);\r
+ }\r
+}\r
+\r
+/** Matches an upper case Unicode character. */\r
+class UnicodeLower extends UniValidator\r
+{\r
+ public int validate(StringLike s, int from, int to)\r
+ {\r
+ return from < s.length() && isLower(s.charAt(from)) ? to : -1;\r
+ }\r
+\r
+ final boolean isLower(char c)\r
+ {\r
+ return c != CaseMgr.toUpperCase(c) && c == CaseMgr.toLowerCase(c);\r
+ }\r
+}\r
+\r
+/**\r
+ * Regex provides the parser which constructs the linked list of Pattern classes\r
+ * from a String.\r
+ * <p>\r
+ * For the purpose of this documentation, the fact that java interprets the\r
+ * backslash will be ignored. In practice, however, you will need a double\r
+ * backslash to obtain a string that contains a single backslash character.\r
+ * Thus, the example pattern "\b" should really be typed as "\\b" inside java\r
+ * code.\r
+ * <p>\r
+ * Note that Regex is part of package "com.stevesoft.pat". To use it, simply\r
+ * import com.stevesoft.pat.Regex at the top of your file.\r
+ * <p>\r
+ * Regex is made with a constructor that takes a String that defines the regular\r
+ * expression. Thus, for example\r
+ * \r
+ * <pre>\r
+ * Regex r = new Regex("[a-c]*");\r
+ * </pre>\r
+ * \r
+ * matches any number of characters so long as the are 'a', 'b', or 'c').\r
+ * <p>\r
+ * To attempt to match the Pattern to a given string, you can use either the\r
+ * search(String) member function, or the matchAt(String,int position) member\r
+ * function. These functions return a boolean which tells you whether or not the\r
+ * thing worked, and sets the methods "charsMatched()" and "matchedFrom()" in\r
+ * the Regex object appropriately.\r
+ * <p>\r
+ * The portion of the string before the match can be obtained by the left()\r
+ * member, and the portion after the match can be obtained by the right()\r
+ * member.\r
+ * <p>\r
+ * Essentially, this package implements a syntax that is very much like the perl\r
+ * 5 regular expression syntax.\r
+ * \r
+ * Longer example:\r
+ * \r
+ * <pre>\r
+ * Regex r = new Regex("x(a|b)y");\r
+ * r.matchAt("xay", 0);\r
+ * System.out.println("sub = " + r.stringMatched(1));\r
+ * </pre>\r
+ * \r
+ * The above would print "sub = a".\r
+ * \r
+ * <pre>\r
+ * r.left() // would return "x"\r
+ * r.right() // would return "y"\r
+ * </pre>\r
+ * \r
+ * <p>\r
+ * Differences between this package and perl5:<br>\r
+ * The extended Pattern for setting flags, is now supported, but the flags are\r
+ * different. "(?i)" tells the pattern to ignore case, "(?Q)" sets the\r
+ * "dontMatchInQuotes" flag, and "(?iQ)" sets them both. You can change the\r
+ * escape character. The pattern\r
+ * \r
+ * <pre>\r
+ * (?e=#)#d+\r
+ * </pre>\r
+ * \r
+ * is the same as\r
+ * \r
+ * <pre>\r
+ * \d+\r
+ * </pre>, but note that the sequence\r
+ * \r
+ * <pre>\r
+ * (?e=#)\r
+ * </pre>\r
+ * \r
+ * <b>must</b> occur at the very beginning of the pattern. There may be other\r
+ * small differences as well. I will either make my package conform or note them\r
+ * as I become aware of them.\r
+ * <p>\r
+ * This package supports additional patterns not in perl5: <center> <table\r
+ * border=1>\r
+ * <tr>\r
+ * <td>(?@())</td>\r
+ * <td>Group</td>\r
+ * <td>This matches all characters between the '(' character and the balancing\r
+ * ')' character. Thus, it will match "()" as well as "(())". The balancing\r
+ * characters are arbitrary, thus (?@{}) matches on "{}" and "{{}}".</td>\r
+ * <tr>\r
+ * <td>(?<1)</td>\r
+ * <td>Backup</td>\r
+ * <td>Moves the pointer backwards within the text. This allows you to make a\r
+ * "look behind." It fails if it attempts to move to a position before the\r
+ * beginning of the string. "x(?<1)" is equivalent to "(?=x)". The number, 1\r
+ * in this example, is the number of characters to move backwards.</td>\r
+ * </table> </center>\r
+ * </dl>\r
+ * \r
+ * @author Steven R. Brandt\r
+ * @version package com.stevesoft.pat, release 1.5.3\r
+ * @see Pattern\r
+ */\r
+public class Regex extends RegRes// implements FilenameFilter\r
+{\r
+ /**\r
+ * BackRefOffset gives the identity number of the first pattern. Version 1.0\r
+ * used zero, version 1.1 uses 1 to be more compatible with perl.\r
+ */\r
+ static int BackRefOffset = 1;\r
+\r
+ private static Pattern none = new NoPattern();\r
+\r
+ Pattern thePattern = none;\r
+\r
+ patInt minMatch = new patInt(0);\r
+\r
+ static Hashtable validators = new Hashtable();\r
+ static\r
+ {\r
+ define("p", "(?>1)", new UnicodePunct());\r
+ define("P", "(?>1)", new NUnicodePunct());\r
+ define("s", "(?>1)", new UnicodeWhite());\r
+ define("S", "(?>1)", new NUnicodeWhite());\r
+ define("w", "(?>1)", new UnicodeW());\r
+ define("W", "(?>1)", new NUnicodeW());\r
+ define("d", "(?>1)", new UnicodeDigit());\r
+ define("D", "(?>1)", new NUnicodeDigit());\r
+ define("m", "(?>1)", new UnicodeMath());\r
+ define("M", "(?>1)", new NUnicodeMath());\r
+ define("c", "(?>1)", new UnicodeCurrency());\r
+ define("C", "(?>1)", new NUnicodeCurrency());\r
+ define("a", "(?>1)", new UnicodeAlpha());\r
+ define("A", "(?>1)", new NUnicodeAlpha());\r
+ define("uc", "(?>1)", new UnicodeUpper());\r
+ define("lc", "(?>1)", new UnicodeLower());\r
+ }\r
+\r
+ /** Set the dontMatch in quotes flag. */\r
+ public void setDontMatchInQuotes(boolean b)\r
+ {\r
+ dontMatchInQuotes = b;\r
+ }\r
+\r
+ /** Find out if the dontMatchInQuotes flag is enabled. */\r
+ public boolean getDontMatchInQuotes()\r
+ {\r
+ return dontMatchInQuotes;\r
+ }\r
+\r
+ boolean dontMatchInQuotes = false;\r
+\r
+ /**\r
+ * Set the state of the ignoreCase flag. If set to true, then the pattern\r
+ * matcher will ignore case when searching for a match.\r
+ */\r
+ public void setIgnoreCase(boolean b)\r
+ {\r
+ ignoreCase = b;\r
+ }\r
+\r
+ /**\r
+ * Get the state of the ignoreCase flag. Returns true if we are ignoring the\r
+ * case of the pattern, false otherwise.\r
+ */\r
+ public boolean getIgnoreCase()\r
+ {\r
+ return ignoreCase;\r
+ }\r
+\r
+ boolean ignoreCase = false;\r
+\r
+ static boolean defaultMFlag = false;\r
+\r
+ /**\r
+ * Set the default value of the m flag. If it is set to true, then the MFlag\r
+ * will be on for any regex search executed.\r
+ */\r
+ public static void setDefaultMFlag(boolean mFlag)\r
+ {\r
+ defaultMFlag = mFlag;\r
+ }\r
+\r
+ /**\r
+ * Get the default value of the m flag. If it is set to true, then the MFlag\r
+ * will be on for any regex search executed.\r
+ */\r
+ public static boolean getDefaultMFlag()\r
+ {\r
+ return defaultMFlag;\r
+ }\r
+\r
+ /**\r
+ * Initializes the object without a Pattern. To supply a Pattern use\r
+ * compile(String s).\r
+ * \r
+ * @see com.stevesoft.pat.Regex#compile(java.lang.String)\r
+ */\r
+ public Regex()\r
+ {\r
+ }\r
+\r
+ /**\r
+ * Create and compile a Regex, but do not throw any exceptions. If you wish to\r
+ * have exceptions thrown for syntax errors, you must use the Regex(void)\r
+ * constructor to create the Regex object, and then call the compile method.\r
+ * Therefore, you should only call this method when you know your pattern is\r
+ * right. I will probably become more like\r
+ * \r
+ * @see com.stevesoft.pat.Regex#search(java.lang.String)\r
+ * @see com.stevesoft.pat.Regex#compile(java.lang.String)\r
+ */\r
+ public Regex(String s)\r
+ {\r
+ try\r
+ {\r
+ compile(s);\r
+ } catch (RegSyntax rs)\r
+ {\r
+ }\r
+ }\r
+\r
+ ReplaceRule rep = null;\r
+\r
+ /**\r
+ * Create and compile both a Regex and a ReplaceRule.\r
+ * \r
+ * @see com.stevesoft.pat.ReplaceRule\r
+ * @see com.stevesoft.pat.Regex#compile(java.lang.String)\r
+ */\r
+ public Regex(String s, String rp)\r
+ {\r
+ this(s);\r
+ rep = parsePerl.perlCode(rp);\r
+ }\r
+\r
+ /**\r
+ * Create and compile a Regex, but give it the ReplaceRule specified. This\r
+ * allows the user finer control of the Replacement process, if that is\r
+ * desired.\r
+ * \r
+ * @see com.stevesoft.pat.ReplaceRule\r
+ * @see com.stevesoft.pat.Regex#compile(java.lang.String)\r
+ */\r
+ public Regex(String s, ReplaceRule rp)\r
+ {\r
+ this(s);\r
+ rep = rp;\r
+ }\r
+\r
+ /**\r
+ * Change the ReplaceRule of this Regex by compiling a new one using String\r
+ * rp.\r
+ */\r
+ public void setReplaceRule(String rp)\r
+ {\r
+ rep = parsePerl.perlCode(rp);\r
+ repr = null; // Clear Replacer history\r
+ }\r
+\r
+ /** Change the ReplaceRule of this Regex to rp. */\r
+ public void setReplaceRule(ReplaceRule rp)\r
+ {\r
+ rep = rp;\r
+ }\r
+\r
+ /**\r
+ * Test to see if a custom defined rule exists.\r
+ * \r
+ * @see com.stevesoft.pat#define(java.lang.String,java.lang.String,Validator)\r
+ */\r
+ public static boolean isDefined(String nm)\r
+ {\r
+ return validators.get(nm) != null;\r
+ }\r
+\r
+ /**\r
+ * Removes a custom defined rule.\r
+ * \r
+ * @see com.stevesoft.pat#define(java.lang.String,java.lang.String,Validator)\r
+ */\r
+ public static void undefine(String nm)\r
+ {\r
+ validators.remove(nm);\r
+ }\r
+\r
+ /**\r
+ * Defines a method to create a new rule. See test/deriv2.java and\r
+ * test/deriv3.java for examples of how to use it.\r
+ */\r
+ public static void define(String nm, String pat, Validator v)\r
+ {\r
+ v.pattern = pat;\r
+ validators.put(nm, v);\r
+ }\r
+\r
+ /**\r
+ * Defines a shorthand for a pattern. The pattern will be invoked by a string\r
+ * that has the form "(??"+nm+")".\r
+ */\r
+ public static void define(String nm, String pat)\r
+ {\r
+ validators.put(nm, pat);\r
+ }\r
+\r
+ /** Get the current ReplaceRule. */\r
+ public ReplaceRule getReplaceRule()\r
+ {\r
+ return rep;\r
+ }\r
+\r
+ Replacer repr = null;\r
+\r
+ final Replacer _getReplacer()\r
+ {\r
+ return repr == null ? repr = new Replacer() : repr;\r
+ }\r
+\r
+ public Replacer getReplacer()\r
+ {\r
+ if (repr == null)\r
+ {\r
+ repr = new Replacer();\r
+ }\r
+ repr.rh.me = this;\r
+ repr.rh.prev = null;\r
+ return repr;\r
+ }\r
+\r
+ /**\r
+ * Replace the first occurence of this pattern in String s according to the\r
+ * ReplaceRule.\r
+ * \r
+ * @see com.stevesoft.pat.ReplaceRule\r
+ * @see com.stevesoft.pat.Regex#getReplaceRule()\r
+ */\r
+ public String replaceFirst(String s)\r
+ {\r
+ return _getReplacer().replaceFirstRegion(s, this, 0, s.length())\r
+ .toString();\r
+ }\r
+\r
+ /**\r
+ * Replace the first occurence of this pattern in String s beginning with\r
+ * position pos according to the ReplaceRule.\r
+ * \r
+ * @see com.stevesoft.pat.ReplaceRule\r
+ * @see com.stevesoft.pat.Regex#getReplaceRule()\r
+ */\r
+ public String replaceFirstFrom(String s, int pos)\r
+ {\r
+ return _getReplacer().replaceFirstRegion(s, this, pos, s.length())\r
+ .toString();\r
+ }\r
+\r
+ /**\r
+ * Replace the first occurence of this pattern in String s beginning with\r
+ * position start and ending with end according to the ReplaceRule.\r
+ * \r
+ * @see com.stevesoft.pat.ReplaceRule\r
+ * @see com.stevesoft.pat.Regex#getReplaceRule()\r
+ */\r
+ public String replaceFirstRegion(String s, int start, int end)\r
+ {\r
+ return _getReplacer().replaceFirstRegion(s, this, start, end)\r
+ .toString();\r
+ }\r
+\r
+ /**\r
+ * Replace all occurences of this pattern in String s according to the\r
+ * ReplaceRule.\r
+ * \r
+ * @see com.stevesoft.pat.ReplaceRule\r
+ * @see com.stevesoft.pat.Regex#getReplaceRule()\r
+ */\r
+ public String replaceAll(String s)\r
+ {\r
+ return _getReplacer().replaceAllRegion(s, this, 0, s.length())\r
+ .toString();\r
+ }\r
+\r
+ public StringLike replaceAll(StringLike s)\r
+ {\r
+ return _getReplacer().replaceAllRegion(s, this, 0, s.length());\r
+ }\r
+\r
+ /**\r
+ * Replace all occurences of this pattern in String s beginning with position\r
+ * pos according to the ReplaceRule.\r
+ * \r
+ * @see com.stevesoft.pat.ReplaceRule\r
+ * @see com.stevesoft.pat.Regex#getReplaceRule()\r
+ */\r
+ public String replaceAllFrom(String s, int pos)\r
+ {\r
+ return _getReplacer().replaceAllRegion(s, this, pos, s.length())\r
+ .toString();\r
+ }\r
+\r
+ /**\r
+ * Replace all occurences of this pattern in String s beginning with position\r
+ * start and ending with end according to the ReplaceRule.\r
+ * \r
+ * @see com.stevesoft.pat.ReplaceRule\r
+ * @see com.stevesoft.pat.Regex#getReplaceRule()\r
+ */\r
+ public String replaceAllRegion(String s, int start, int end)\r
+ {\r
+ return _getReplacer().replaceAllRegion(s, this, start, end).toString();\r
+ }\r
+\r
+ /** Essentially clones the Regex object */\r
+ public Regex(Regex r)\r
+ {\r
+ super((RegRes) r);\r
+ dontMatchInQuotes = r.dontMatchInQuotes;\r
+ esc = r.esc;\r
+ ignoreCase = r.ignoreCase;\r
+ gFlag = r.gFlag;\r
+ if (r.rep == null)\r
+ {\r
+ rep = null;\r
+ }\r
+ else\r
+ {\r
+ rep = (ReplaceRule) r.rep.clone();\r
+ }\r
+ /*\r
+ * try { compile(r.toString()); } catch(RegSyntax r_) {}\r
+ */\r
+ thePattern = r.thePattern.clone(new Hashtable());\r
+ minMatch = r.minMatch;\r
+ skipper = r.skipper;\r
+ }\r
+\r
+ /**\r
+ * By default, the escape character is the backslash, but you can make it\r
+ * anything you want by setting this variable.\r
+ */\r
+ public char esc = Pattern.ESC;\r
+\r
+ /**\r
+ * This method compiles a regular expression, making it possible to call the\r
+ * search or matchAt methods.\r
+ * \r
+ * @exception com.stevesoft.pat.RegSyntax\r
+ * is thrown if a syntax error is encountered in the\r
+ * pattern. For example, "x{3,1}" or "*a" are not valid\r
+ * patterns.\r
+ * @see com.stevesoft.pat.Regex#search\r
+ * @see com.stevesoft.pat.Regex#matchAt\r
+ */\r
+ public void compile(String prepat) throws RegSyntax\r
+ {\r
+ String postpat = parsePerl.codify(prepat, true);\r
+ String pat = postpat == null ? prepat : postpat;\r
+ minMatch = null;\r
+ ignoreCase = false;\r
+ dontMatchInQuotes = false;\r
+ Rthings mk = new Rthings(this);\r
+ int offset = mk.val;\r
+ String newpat = pat;\r
+ thePattern = none;\r
+ p = null;\r
+ or = null;\r
+ minMatch = new patInt(0);\r
+ StrPos sp = new StrPos(pat, 0);\r
+ if (sp.incMatch("(?e="))\r
+ {\r
+ char newEsc = sp.c;\r
+ sp.inc();\r
+ if (sp.match(')'))\r
+ {\r
+ newpat = reEscape(pat.substring(6), newEsc, Pattern.ESC);\r
+ }\r
+ }\r
+ else if (esc != Pattern.ESC)\r
+ {\r
+ newpat = reEscape(pat, esc, Pattern.ESC);\r
+ }\r
+ thePattern = _compile2(newpat, mk);\r
+ numSubs_ = mk.val - offset;\r
+ mk.set(this);\r
+ }\r
+\r
+ /*\r
+ * If a Regex is compared against a Regex, a check is done to see that the\r
+ * patterns are equal as well as the most recent match. If a Regex is compare\r
+ * with a RegRes, only the result of the most recent match is compared.\r
+ */\r
+ public boolean equals(Object o)\r
+ {\r
+ if (o instanceof Regex)\r
+ {\r
+ if (toString().equals(o.toString()))\r
+ {\r
+ return super.equals(o);\r
+ }\r
+ else\r
+ {\r
+ return false;\r
+ }\r
+ }\r
+ else\r
+ {\r
+ return super.equals(o);\r
+ }\r
+ }\r
+\r
+ /** A clone by any other name would smell as sweet. */\r
+ public Object clone()\r
+ {\r
+ return new Regex(this);\r
+ }\r
+\r
+ /** Return a clone of the underlying RegRes object. */\r
+ public RegRes result()\r
+ {\r
+ return (RegRes) super.clone();\r
+ }\r
+\r
+ // prep sets global variables of class\r
+ // Pattern so that it can access them\r
+ // during an attempt at a match\r
+ Pthings pt = new Pthings();\r
+\r
+ final Pthings prep(StringLike s)\r
+ {\r
+ // if(gFlag)\r
+ pt.lastPos = matchedTo();\r
+ if (pt.lastPos < 0)\r
+ {\r
+ pt.lastPos = 0;\r
+ }\r
+ if ((s == null ? null : s.unwrap()) != (src == null ? null : s.unwrap()))\r
+ {\r
+ pt.lastPos = 0;\r
+ }\r
+ src = s;\r
+ pt.dotDoesntMatchCR = dotDoesntMatchCR && (!sFlag);\r
+ pt.mFlag = (mFlag | defaultMFlag);\r
+ pt.ignoreCase = ignoreCase;\r
+ pt.no_check = false;\r
+ if (pt.marks != null)\r
+ {\r
+ for (int i = 0; i < pt.marks.length; i++)\r
+ {\r
+ pt.marks[i] = -1;\r
+ }\r
+ }\r
+ pt.marks = null;\r
+ pt.nMarks = numSubs_;\r
+ pt.src = s;\r
+ if (dontMatchInQuotes)\r
+ {\r
+ setCbits(s, pt);\r
+ }\r
+ else\r
+ {\r
+ pt.cbits = null;\r
+ }\r
+ return pt;\r
+ }\r
+\r
+ /**\r
+ * Attempt to match a Pattern beginning at a specified location within the\r
+ * string.\r
+ * \r
+ * @see com.stevesoft.pat.Regex#search\r
+ */\r
+ public boolean matchAt(String s, int start_pos)\r
+ {\r
+ return _search(s, start_pos, start_pos);\r
+ }\r
+\r
+ /**\r
+ * Attempt to match a Pattern beginning at a specified location within the\r
+ * StringLike.\r
+ * \r
+ * @see com.stevesoft.pat.Regex#search\r
+ */\r
+ public boolean matchAt(StringLike s, int start_pos)\r
+ {\r
+ return _search(s, start_pos, start_pos);\r
+ }\r
+\r
+ /**\r
+ * Search through a String for the first occurrence of a match.\r
+ * \r
+ * @see com.stevesoft.pat.Regex#searchFrom\r
+ * @see com.stevesoft.pat.Regex#matchAt\r
+ */\r
+ public boolean search(String s)\r
+ {\r
+ if (s == null)\r
+ {\r
+ throw new NullPointerException(MessageManager.getString("exception.null_string_given_to_regex_search"));\r
+ }\r
+ return _search(s, 0, s.length());\r
+ }\r
+\r
+ public boolean search(StringLike sl)\r
+ {\r
+ if (sl == null)\r
+ {\r
+ throw new NullPointerException(MessageManager.getString("exception.null_string_like_given_to_regex_search"));\r
+ }\r
+ return _search(sl, 0, sl.length());\r
+ }\r
+\r
+ public boolean reverseSearch(String s)\r
+ {\r
+ if (s == null)\r
+ {\r
+ throw new NullPointerException(MessageManager.getString("exception.null_string_given_to_regex_reverse_search"));\r
+ }\r
+ return _reverseSearch(s, 0, s.length());\r
+ }\r
+\r
+ public boolean reverseSearch(StringLike sl)\r
+ {\r
+ if (sl == null)\r
+ {\r
+ throw new NullPointerException(MessageManager.getString("exception.null_string_like_given_to_regex_reverse_search"));\r
+ }\r
+ return _reverseSearch(sl, 0, sl.length());\r
+ }\r
+\r
+ /**\r
+ * Search through a String for the first occurence of a match, but start at\r
+ * position\r
+ * \r
+ * <pre>\r
+ * start\r
+ * </pre>\r
+ */\r
+ public boolean searchFrom(String s, int start)\r
+ {\r
+ if (s == null)\r
+ {\r
+ throw new NullPointerException(MessageManager.getString("exception.null_string_like_given_to_regex_search_from"));\r
+ }\r
+ return _search(s, start, s.length());\r
+ }\r
+\r
+ public boolean searchFrom(StringLike s, int start)\r
+ {\r
+ if (s == null)\r
+ {\r
+ throw new NullPointerException(MessageManager.getString("exception.null_string_like_given_to_regex_search_from"));\r
+ }\r
+ return _search(s, start, s.length());\r
+ }\r
+\r
+ /**\r
+ * Search through a region of a String for the first occurence of a match.\r
+ */\r
+ public boolean searchRegion(String s, int start, int end)\r
+ {\r
+ if (s == null)\r
+ {\r
+ throw new NullPointerException(MessageManager.getString("exception.null_string_like_given_to_regex_search_region"));\r
+ }\r
+ return _search(s, start, end);\r
+ }\r
+\r
+ /**\r
+ * Set this to change the default behavior of the "." pattern. By default it\r
+ * now matches perl's behavior and fails to match the '\n' character.\r
+ */\r
+ public static boolean dotDoesntMatchCR = true;\r
+\r
+ StringLike gFlags;\r
+\r
+ int gFlagto = 0;\r
+\r
+ boolean gFlag = false;\r
+\r
+ /** Set the 'g' flag */\r
+ public void setGFlag(boolean b)\r
+ {\r
+ gFlag = b;\r
+ }\r
+\r
+ /** Get the state of the 'g' flag. */\r
+ public boolean getGFlag()\r
+ {\r
+ return gFlag;\r
+ }\r
+\r
+ boolean sFlag = false;\r
+\r
+ /** Get the state of the sFlag */\r
+ public boolean getSFlag()\r
+ {\r
+ return sFlag;\r
+ }\r
+\r
+ boolean mFlag = false;\r
+\r
+ /** Get the state of the sFlag */\r
+ public boolean getMFlag()\r
+ {\r
+ return mFlag;\r
+ }\r
+\r
+ final boolean _search(String s, int start, int end)\r
+ {\r
+ return _search(new StringWrap(s), start, end);\r
+ }\r
+\r
+ final boolean _search(StringLike s, int start, int end)\r
+ {\r
+ if (gFlag && gFlagto > 0 && gFlags != null\r
+ && s.unwrap() == gFlags.unwrap())\r
+ {\r
+ start = gFlagto;\r
+ }\r
+ gFlags = null;\r
+\r
+ Pthings pt = prep(s);\r
+\r
+ int up = (minMatch == null ? end : end - minMatch.i);\r
+\r
+ if (up < start && end >= start)\r
+ {\r
+ up = start;\r
+ }\r
+\r
+ if (skipper == null)\r
+ {\r
+ for (int i = start; i <= up; i++)\r
+ {\r
+ charsMatched_ = thePattern.matchAt(s, i, pt);\r
+ if (charsMatched_ >= 0)\r
+ {\r
+ matchFrom_ = thePattern.mfrom;\r
+ marks = pt.marks;\r
+ gFlagto = matchFrom_ + charsMatched_;\r
+ gFlags = s;\r
+ return didMatch_ = true;\r
+ }\r
+ }\r
+ }\r
+ else\r
+ {\r
+ pt.no_check = true;\r
+ for (int i = start; i <= up; i++)\r
+ {\r
+ i = skipper.find(src, i, up);\r
+ if (i < 0)\r
+ {\r
+ charsMatched_ = matchFrom_ = -1;\r
+ return didMatch_ = false;\r
+ }\r
+ charsMatched_ = thePattern.matchAt(s, i, pt);\r
+ if (charsMatched_ >= 0)\r
+ {\r
+ matchFrom_ = thePattern.mfrom;\r
+ marks = pt.marks;\r
+ gFlagto = matchFrom_ + charsMatched_;\r
+ gFlags = s;\r
+ return didMatch_ = true;\r
+ }\r
+ }\r
+ }\r
+ return didMatch_ = false;\r
+ }\r
+\r
+ /*\r
+ * final boolean _search(LongStringLike s,long start,long end) { if(gFlag &&\r
+ * gFlagto > 0 && s==gFlags) start = gFlagto; gFlags = null;\r
+ * \r
+ * Pthings pt=prep(s);\r
+ * \r
+ * int up = end;//(minMatch == null ? end : end-minMatch.i);\r
+ * \r
+ * if(up < start && end >= start) up = start;\r
+ * \r
+ * if(skipper == null) { for(long i=start;i<=up;i++) { charsMatched_ =\r
+ * thePattern.matchAt(s,i,pt); if(charsMatched_ >= 0) { matchFrom_ =\r
+ * thePattern.mfrom; marks = pt.marks; gFlagto = matchFrom_+charsMatched_;\r
+ * return didMatch_=true; } } } else { pt.no_check = true; for(long i=start;i<=up;i++) {\r
+ * i = skipper.find(src,i,up); if(i<0) { charsMatched_ = matchFrom_ = -1;\r
+ * return didMatch_ = false; } charsMatched_ = thePattern.matchAt(s,i,pt);\r
+ * if(charsMatched_ >= 0) { matchFrom_ = thePattern.mfrom; marks = pt.marks;\r
+ * gFlagto = matchFrom_+charsMatched_; gFlags = s; return didMatch_=true; }\r
+ * else { i = s.adjustIndex(i); up = s.adjustEnd(i); } } } return\r
+ * didMatch_=false; }\r
+ */\r
+\r
+ boolean _reverseSearch(String s, int start, int end)\r
+ {\r
+ return _reverseSearch(new StringWrap(s), start, end);\r
+ }\r
+\r
+ boolean _reverseSearch(StringLike s, int start, int end)\r
+ {\r
+ if (gFlag && gFlagto > 0 && s.unwrap() == gFlags.unwrap())\r
+ {\r
+ end = gFlagto;\r
+ }\r
+ gFlags = null;\r
+ Pthings pt = prep(s);\r
+ for (int i = end; i >= start; i--)\r
+ {\r
+ charsMatched_ = thePattern.matchAt(s, i, pt);\r
+ if (charsMatched_ >= 0)\r
+ {\r
+ matchFrom_ = thePattern.mfrom;\r
+ marks = pt.marks;\r
+ gFlagto = matchFrom_ - 1;\r
+ gFlags = s;\r
+ return didMatch_ = true;\r
+ }\r
+ }\r
+ return didMatch_ = false;\r
+ }\r
+\r
+ // This routine sets the cbits variable\r
+ // of class Pattern. Cbits is true for\r
+ // the bit corresponding to a character inside\r
+ // a set of quotes.\r
+ static StringLike lasts = null;\r
+\r
+ static BitSet lastbs = null;\r
+\r
+ static void setCbits(StringLike s, Pthings pt)\r
+ {\r
+ if (s == lasts)\r
+ {\r
+ pt.cbits = lastbs;\r
+ return;\r
+ }\r
+ BitSet bs = new BitSet(s.length());\r
+ char qc = ' ';\r
+ boolean setBit = false;\r
+ for (int i = 0; i < s.length(); i++)\r
+ {\r
+ if (setBit)\r
+ {\r
+ bs.set(i);\r
+ }\r
+ char c = s.charAt(i);\r
+ if (!setBit && c == '"')\r
+ {\r
+ qc = c;\r
+ setBit = true;\r
+ bs.set(i);\r
+ }\r
+ else if (!setBit && c == '\'')\r
+ {\r
+ qc = c;\r
+ setBit = true;\r
+ bs.set(i);\r
+ }\r
+ else if (setBit && c == qc)\r
+ {\r
+ setBit = false;\r
+ }\r
+ else if (setBit && c == '\\' && i + 1 < s.length())\r
+ {\r
+ i++;\r
+ if (setBit)\r
+ {\r
+ bs.set(i);\r
+ }\r
+ }\r
+ }\r
+ pt.cbits = lastbs = bs;\r
+ lasts = s;\r
+ }\r
+\r
+ // Wanted user to over-ride this in alpha version,\r
+ // but it wasn't really necessary because of this trick:\r
+ Regex newRegex()\r
+ {\r
+ try\r
+ {\r
+ return (Regex) getClass().newInstance();\r
+ } catch (InstantiationException ie)\r
+ {\r
+ return null;\r
+ } catch (IllegalAccessException iae)\r
+ {\r
+ return null;\r
+ }\r
+ }\r
+\r
+ /**\r
+ * Only needed for creating your own extensions of Regex. This method adds the\r
+ * next Pattern in the chain of patterns or sets the Pattern if it is the\r
+ * first call.\r
+ */\r
+ protected void add(Pattern p2)\r
+ {\r
+ if (p == null)\r
+ {\r
+ p = p2;\r
+ }\r
+ else\r
+ {\r
+ p.add(p2);\r
+ p2 = p;\r
+ }\r
+ }\r
+\r
+ /**\r
+ * You only need to use this method if you are creating your own extentions to\r
+ * Regex. compile1 compiles one Pattern element, it can be over-ridden to\r
+ * allow the Regex compiler to understand new syntax. See deriv.java for an\r
+ * example. This routine is the heart of class Regex. Rthings has one integer\r
+ * member called intValue, it is used to keep track of the number of ()'s in\r
+ * the Pattern.\r
+ * \r
+ * @exception com.stevesoft.pat.RegSyntax\r
+ * is thrown when a nonsensensical pattern is supplied. For\r
+ * example, a pattern beginning with *.\r
+ */\r
+ protected void compile1(StrPos sp, Rthings mk) throws RegSyntax\r
+ {\r
+ if (sp.match('['))\r
+ {\r
+ sp.inc();\r
+ add(matchBracket(sp));\r
+ }\r
+ else if (sp.match('|'))\r
+ {\r
+ if (or == null)\r
+ {\r
+ or = new Or();\r
+ }\r
+ if (p == null)\r
+ {\r
+ p = new NullPattern();\r
+ }\r
+ or.addOr(p);\r
+ p = null;\r
+ }\r
+ else if (sp.incMatch("(?<"))\r
+ {\r
+ patInt i = sp.getPatInt();\r
+ if (i == null)\r
+ {\r
+ RegSyntaxError.endItAll("No int after (?<");\r
+ }\r
+ add(new Backup(i.intValue()));\r
+ if (!sp.match(')'))\r
+ {\r
+ RegSyntaxError.endItAll("No ) after (?<");\r
+ }\r
+ }\r
+ else if (sp.incMatch("(?>"))\r
+ {\r
+ patInt i = sp.getPatInt();\r
+ if (i == null)\r
+ {\r
+ RegSyntaxError.endItAll("No int after (?>");\r
+ }\r
+ add(new Backup(-i.intValue()));\r
+ if (!sp.match(')'))\r
+ {\r
+ RegSyntaxError.endItAll("No ) after (?<");\r
+ }\r
+ }\r
+ else if (sp.incMatch("(?@"))\r
+ {\r
+ char op = sp.c;\r
+ sp.inc();\r
+ char cl = sp.c;\r
+ sp.inc();\r
+ if (!sp.match(')'))\r
+ {\r
+ RegSyntaxError.endItAll("(?@ does not have closing paren");\r
+ }\r
+ add(new Group(op, cl));\r
+ }\r
+ else if (sp.incMatch("(?#"))\r
+ {\r
+ while (!sp.match(')'))\r
+ {\r
+ sp.inc();\r
+ }\r
+ }\r
+ else if (sp.dontMatch && sp.c == 'w')\r
+ {\r
+ // Regex r = new Regex();\r
+ // r._compile("[a-zA-Z0-9_]",mk);\r
+ // add(new Goop("\\w",r.thePattern));\r
+ Bracket b = new Bracket(false);\r
+ b.addOr(new Range('a', 'z'));\r
+ b.addOr(new Range('A', 'Z'));\r
+ b.addOr(new Range('0', '9'));\r
+ b.addOr(new oneChar('_'));\r
+ add(b);\r
+ }\r
+ else if (sp.dontMatch && sp.c == 'G')\r
+ {\r
+ add(new BackG());\r
+ }\r
+ else if (sp.dontMatch && sp.c == 's')\r
+ {\r
+ // Regex r = new Regex();\r
+ // r._compile("[ \t\n\r\b]",mk);\r
+ // add(new Goop("\\s",r.thePattern));\r
+ Bracket b = new Bracket(false);\r
+ b.addOr(new oneChar((char) 32));\r
+ b.addOr(new Range((char) 8, (char) 10));\r
+ b.addOr(new oneChar((char) 13));\r
+ add(b);\r
+ }\r
+ else if (sp.dontMatch && sp.c == 'd')\r
+ {\r
+ // Regex r = new Regex();\r
+ // r._compile("[0-9]",mk);\r
+ // add(new Goop("\\d",r.thePattern));\r
+ Range digit = new Range('0', '9');\r
+ digit.printBrackets = true;\r
+ add(digit);\r
+ }\r
+ else if (sp.dontMatch && sp.c == 'W')\r
+ {\r
+ // Regex r = new Regex();\r
+ // r._compile("[^a-zA-Z0-9_]",mk);\r
+ // add(new Goop("\\W",r.thePattern));\r
+ Bracket b = new Bracket(true);\r
+ b.addOr(new Range('a', 'z'));\r
+ b.addOr(new Range('A', 'Z'));\r
+ b.addOr(new Range('0', '9'));\r
+ b.addOr(new oneChar('_'));\r
+ add(b);\r
+ }\r
+ else if (sp.dontMatch && sp.c == 'S')\r
+ {\r
+ // Regex r = new Regex();\r
+ // r._compile("[^ \t\n\r\b]",mk);\r
+ // add(new Goop("\\S",r.thePattern));\r
+ Bracket b = new Bracket(true);\r
+ b.addOr(new oneChar((char) 32));\r
+ b.addOr(new Range((char) 8, (char) 10));\r
+ b.addOr(new oneChar((char) 13));\r
+ add(b);\r
+ }\r
+ else if (sp.dontMatch && sp.c == 'D')\r
+ {\r
+ // Regex r = new Regex();\r
+ // r._compile("[^0-9]",mk);\r
+ // add(new Goop("\\D",r.thePattern));\r
+ Bracket b = new Bracket(true);\r
+ b.addOr(new Range('0', '9'));\r
+ add(b);\r
+ }\r
+ else if (sp.dontMatch && sp.c == 'B')\r
+ {\r
+ Regex r = new Regex();\r
+ r._compile2("(?!" + back_slash + "b)", mk);\r
+ add(r.thePattern);\r
+ }\r
+ else if (isOctalString(sp))\r
+ {\r
+ int d = sp.c - '0';\r
+ sp.inc();\r
+ d = 8 * d + sp.c - '0';\r
+ StrPos sp2 = new StrPos(sp);\r
+ sp2.inc();\r
+ if (isOctalDigit(sp2, false))\r
+ {\r
+ sp.inc();\r
+ d = 8 * d + sp.c - '0';\r
+ }\r
+ add(new oneChar((char) d));\r
+ }\r
+ else if (sp.dontMatch && sp.c >= '1' && sp.c <= '9')\r
+ {\r
+ int iv = sp.c - '0';\r
+ StrPos s2 = new StrPos(sp);\r
+ s2.inc();\r
+ if (!s2.dontMatch && s2.c >= '0' && s2.c <= '9')\r
+ {\r
+ iv = 10 * iv + (s2.c - '0');\r
+ sp.inc();\r
+ }\r
+ add(new BackMatch(iv));\r
+ }\r
+ else if (sp.dontMatch && sp.c == 'b')\r
+ {\r
+ add(new Boundary());\r
+ }\r
+ else if (sp.match('\b'))\r
+ {\r
+ add(new Boundary());\r
+ }\r
+ else if (sp.match('$'))\r
+ {\r
+ add(new End(true));\r
+ }\r
+ else if (sp.dontMatch && sp.c == 'Z')\r
+ {\r
+ add(new End(false));\r
+ }\r
+ else if (sp.match('.'))\r
+ {\r
+ add(new Any());\r
+ }\r
+ else if (sp.incMatch("(??"))\r
+ {\r
+ javajs.util.SB sb = new javajs.util.SB();\r
+ javajs.util.SB sb2 = new javajs.util.SB();\r
+ while (!sp.match(')') && !sp.match(':'))\r
+ {\r
+ sb.appendC(sp.c);\r
+ sp.inc();\r
+ }\r
+ if (sp.incMatch(":"))\r
+ {\r
+ while (!sp.match(')'))\r
+ {\r
+ sb2.appendC(sp.c);\r
+ sp.inc();\r
+ }\r
+ }\r
+ String sbs = sb.toString();\r
+ if (validators.get(sbs) instanceof String)\r
+ {\r
+ String pat = (String) validators.get(sbs);\r
+ Regex r = newRegex();\r
+ Rthings rth = new Rthings(this);\r
+ rth.noBackRefs = true;\r
+ r._compile2(pat, rth);\r
+ add(r.thePattern);\r
+ }\r
+ else\r
+ {\r
+ Custom cm = new Custom(sb.toString());\r
+ if (cm.v != null)\r
+ {\r
+ Validator v2 = cm.v.arg(sb2.toString());\r
+ if (v2 != null)\r
+ {\r
+ v2.argsave = sb2.toString();\r
+ String p = cm.v.pattern;\r
+ cm.v = v2;\r
+ v2.pattern = p;\r
+ }\r
+ Regex r = newRegex();\r
+ Rthings rth = new Rthings(this);\r
+ rth.noBackRefs = true;\r
+ r._compile2(cm.v.pattern, rth);\r
+ cm.sub = r.thePattern;\r
+ cm.sub.add(new CustomEndpoint(cm));\r
+ cm.sub.setParent(cm);\r
+ add(cm);\r
+ }\r
+ }\r
+ }\r
+ else if (sp.match('('))\r
+ {\r
+ mk.parenLevel++;\r
+ Regex r = newRegex();\r
+ // r.or = new Or();\r
+ sp.inc();\r
+ if (sp.incMatch("?:"))\r
+ {\r
+ r.or = new Or();\r
+ }\r
+ else if (sp.incMatch("?="))\r
+ {\r
+ r.or = new lookAhead(false);\r
+ }\r
+ else if (sp.incMatch("?!"))\r
+ {\r
+ r.or = new lookAhead(true);\r
+ }\r
+ else if (sp.match('?'))\r
+ {\r
+ sp.inc();\r
+ do\r
+ {\r
+ if (sp.c == 'i')\r
+ {\r
+ mk.ignoreCase = true;\r
+ }\r
+ if (sp.c == 'Q')\r
+ {\r
+ mk.dontMatchInQuotes = true;\r
+ }\r
+ if (sp.c == 'o')\r
+ {\r
+ mk.optimizeMe = true;\r
+ }\r
+ if (sp.c == 'g')\r
+ {\r
+ mk.gFlag = true;\r
+ }\r
+ if (sp.c == 's')\r
+ {\r
+ mk.sFlag = true;\r
+ }\r
+ if (sp.c == 'm')\r
+ {\r
+ mk.mFlag = true;\r
+ }\r
+ sp.inc();\r
+ } while (!sp.match(')') && !sp.eos);\r
+ r = null;\r
+ mk.parenLevel--;\r
+ if (sp.eos) // throw new RegSyntax\r
+ {\r
+ RegSyntaxError.endItAll("Unclosed ()");\r
+ }\r
+ }\r
+ else\r
+ { // just ordinary parenthesis\r
+ r.or = mk.noBackRefs ? new Or() : new OrMark(mk.val++);\r
+ }\r
+ if (r != null)\r
+ {\r
+ add(r._compile1(sp, mk));\r
+ }\r
+ }\r
+ else if (sp.match('^'))\r
+ {\r
+ add(new Start(true));\r
+ }\r
+ else if (sp.dontMatch && sp.c == 'A')\r
+ {\r
+ add(new Start(false));\r
+ }\r
+ else if (sp.match('*'))\r
+ {\r
+ addMulti(new patInt(0), new patInf());\r
+ }\r
+ else if (sp.match('+'))\r
+ {\r
+ addMulti(new patInt(1), new patInf());\r
+ }\r
+ else if (sp.match('?'))\r
+ {\r
+ addMulti(new patInt(0), new patInt(1));\r
+ }\r
+ else if (sp.match('{'))\r
+ {\r
+ boolean bad = false;\r
+ StrPos sp2 = new StrPos(sp);\r
+ // javajs.util.SB sb = new javajs.util.SB();\r
+ sp.inc();\r
+ patInt i1 = sp.getPatInt();\r
+ patInt i2 = null;\r
+ if (sp.match('}'))\r
+ {\r
+ i2 = i1;\r
+ }\r
+ else\r
+ {\r
+ if (!sp.match(','))\r
+ {\r
+ /*\r
+ * RegSyntaxError.endItAll( "String \"{"+i2+ "\" should be followed\r
+ * with , or }");\r
+ */\r
+ bad = true;\r
+ }\r
+ sp.inc();\r
+ if (sp.match('}'))\r
+ {\r
+ i2 = new patInf();\r
+ }\r
+ else\r
+ {\r
+ i2 = sp.getPatInt();\r
+ }\r
+ }\r
+ if (i1 == null || i2 == null)\r
+ {\r
+ /*\r
+ * throw new RegSyntax("Badly formatted Multi: " +"{"+i1+","+i2+"}");\r
+ */\r
+ bad = true;\r
+ }\r
+ if (bad)\r
+ {\r
+ sp.dup(sp2);\r
+ add(new oneChar(sp.c));\r
+ }\r
+ else\r
+ {\r
+ addMulti(i1, i2);\r
+ }\r
+ }\r
+ else if (sp.escMatch('x') && next2Hex(sp))\r
+ {\r
+ sp.inc();\r
+ int d = getHexDigit(sp);\r
+ sp.inc();\r
+ d = 16 * d + getHexDigit(sp);\r
+ add(new oneChar((char) d));\r
+ }\r
+ else if (sp.escMatch('c'))\r
+ {\r
+ sp.inc();\r
+ if (sp.c < Ctrl.cmap.length)\r
+ {\r
+ add(new oneChar(Ctrl.cmap[sp.c]));\r
+ }\r
+ else\r
+ {\r
+ add(new oneChar(sp.c));\r
+ }\r
+ }\r
+ else if (sp.escMatch('f'))\r
+ {\r
+ add(new oneChar((char) 12));\r
+ }\r
+ else if (sp.escMatch('a'))\r
+ {\r
+ add(new oneChar((char) 7));\r
+ }\r
+ else if (sp.escMatch('t'))\r
+ {\r
+ add(new oneChar('\t'));\r
+ }\r
+ else if (sp.escMatch('n'))\r
+ {\r
+ add(new oneChar('\n'));\r
+ }\r
+ else if (sp.escMatch('r'))\r
+ {\r
+ add(new oneChar('\r'));\r
+ }\r
+ else if (sp.escMatch('b'))\r
+ {\r
+ add(new oneChar('\b'));\r
+ }\r
+ else if (sp.escMatch('e'))\r
+ {\r
+ add(new oneChar((char) 27));\r
+ }\r
+ else\r
+ {\r
+ add(new oneChar(sp.c));\r
+ if (sp.match(')'))\r
+ {\r
+ RegSyntaxError.endItAll("Unmatched right paren in pattern");\r
+ }\r
+ }\r
+ }\r
+\r
+ // compiles all Pattern elements, internal method\r
+ private Pattern _compile2(String pat, Rthings mk) throws RegSyntax\r
+ {\r
+ minMatch = null;\r
+ sFlag = mFlag = ignoreCase = gFlag = false;\r
+ StrPos sp = new StrPos(pat, 0);\r
+ thePattern = _compile1(sp, mk);\r
+ pt.marks = null;\r
+ return thePattern;\r
+ }\r
+\r
+ Pattern p = null;\r
+\r
+ Or or = null;\r
+\r
+ Pattern _compile1(StrPos sp, Rthings mk) throws RegSyntax\r
+ {\r
+ while (!(sp.eos || (or != null && sp.match(')'))))\r
+ {\r
+ compile1(sp, mk);\r
+ sp.inc();\r
+ }\r
+ if (sp.match(')'))\r
+ {\r
+ mk.parenLevel--;\r
+ }\r
+ else if (sp.eos && mk.parenLevel != 0)\r
+ {\r
+ RegSyntaxError.endItAll("Unclosed Parenthesis! lvl=" + mk.parenLevel);\r
+ }\r
+ if (or != null)\r
+ {\r
+ if (p == null)\r
+ {\r
+ p = new NullPattern();\r
+ }\r
+ or.addOr(p);\r
+ return or;\r
+ }\r
+ return p == null ? new NullPattern() : p;\r
+ }\r
+\r
+ // add a multi object to the end of the chain\r
+ // which applies to the last object\r
+ void addMulti(patInt i1, patInt i2) throws RegSyntax\r
+ {\r
+ Pattern last, last2;\r
+ for (last = p; last != null && last.next != null; last = last.next)\r
+ {\r
+ ;\r
+ }\r
+ if (last == null || last == p)\r
+ {\r
+ last2 = null;\r
+ }\r
+ else\r
+ {\r
+ for (last2 = p; last2.next != last; last2 = last2.next)\r
+ {\r
+ ;\r
+ }\r
+ }\r
+ if (last instanceof Multi && i1.intValue() == 0 && i2.intValue() == 1)\r
+ {\r
+ ((Multi) last).matchFewest = true;\r
+ }\r
+ else if (last instanceof FastMulti && i1.intValue() == 0\r
+ && i2.intValue() == 1)\r
+ {\r
+ ((FastMulti) last).matchFewest = true;\r
+ }\r
+ else if (last instanceof DotMulti && i1.intValue() == 0\r
+ && i2.intValue() == 1)\r
+ {\r
+ ((DotMulti) last).matchFewest = true;\r
+ }\r
+ else if (last instanceof Multi || last instanceof DotMulti\r
+ || last instanceof FastMulti)\r
+ {\r
+ throw new RegSyntax("Syntax error.");\r
+ }\r
+ else if (last2 == null)\r
+ {\r
+ p = mkMulti(i1, i2, p);\r
+ }\r
+ else\r
+ {\r
+ last2.next = mkMulti(i1, i2, last);\r
+ }\r
+ }\r
+\r
+ final static Pattern mkMulti(patInt lo, patInt hi, Pattern p)\r
+ throws RegSyntax\r
+ {\r
+ if (p instanceof Any && p.next == null)\r
+ {\r
+ return (Pattern) new DotMulti(lo, hi);\r
+ }\r
+ return RegOpt.safe4fm(p) ? (Pattern) new FastMulti(lo, hi, p)\r
+ : (Pattern) new Multi(lo, hi, p);\r
+ }\r
+\r
+ // process the bracket operator\r
+ Pattern matchBracket(StrPos sp) throws RegSyntax\r
+ {\r
+ Bracket ret;\r
+ if (sp.match('^'))\r
+ {\r
+ ret = new Bracket(true);\r
+ sp.inc();\r
+ }\r
+ else\r
+ {\r
+ ret = new Bracket(false);\r
+ }\r
+ if (sp.match(']'))\r
+ {\r
+ // throw new RegSyntax\r
+ RegSyntaxError.endItAll("Unmatched []");\r
+ }\r
+\r
+ while (!sp.eos && !sp.match(']'))\r
+ {\r
+ StrPos s1 = new StrPos(sp);\r
+ s1.inc();\r
+ StrPos s1_ = new StrPos(s1);\r
+ s1_.inc();\r
+ if (s1.match('-') && !s1_.match(']'))\r
+ {\r
+ StrPos s2 = new StrPos(s1);\r
+ s2.inc();\r
+ if (!s2.eos)\r
+ {\r
+ ret.addOr(new Range(sp.c, s2.c));\r
+ }\r
+ sp.inc();\r
+ sp.inc();\r
+ }\r
+ else if (sp.escMatch('Q'))\r
+ {\r
+ sp.inc();\r
+ while (!sp.escMatch('E'))\r
+ {\r
+ ret.addOr(new oneChar(sp.c));\r
+ sp.inc();\r
+ }\r
+ }\r
+ else if (sp.escMatch('d'))\r
+ {\r
+ ret.addOr(new Range('0', '9'));\r
+ }\r
+ else if (sp.escMatch('s'))\r
+ {\r
+ ret.addOr(new oneChar((char) 32));\r
+ ret.addOr(new Range((char) 8, (char) 10));\r
+ ret.addOr(new oneChar((char) 13));\r
+ }\r
+ else if (sp.escMatch('w'))\r
+ {\r
+ ret.addOr(new Range('a', 'z'));\r
+ ret.addOr(new Range('A', 'Z'));\r
+ ret.addOr(new Range('0', '9'));\r
+ ret.addOr(new oneChar('_'));\r
+ }\r
+ else if (sp.escMatch('D'))\r
+ {\r
+ ret.addOr(new Range((char) 0, (char) 47));\r
+ ret.addOr(new Range((char) 58, (char) 65535));\r
+ }\r
+ else if (sp.escMatch('S'))\r
+ {\r
+ ret.addOr(new Range((char) 0, (char) 7));\r
+ ret.addOr(new Range((char) 11, (char) 12));\r
+ ret.addOr(new Range((char) 14, (char) 31));\r
+ ret.addOr(new Range((char) 33, (char) 65535));\r
+ }\r
+ else if (sp.escMatch('W'))\r
+ {\r
+ ret.addOr(new Range((char) 0, (char) 64));\r
+ ret.addOr(new Range((char) 91, (char) 94));\r
+ ret.addOr(new oneChar((char) 96));\r
+ ret.addOr(new Range((char) 123, (char) 65535));\r
+ }\r
+ else if (sp.escMatch('x') && next2Hex(sp))\r
+ {\r
+ sp.inc();\r
+ int d = getHexDigit(sp);\r
+ sp.inc();\r
+ d = 16 * d + getHexDigit(sp);\r
+ ret.addOr(new oneChar((char) d));\r
+ }\r
+ else if (sp.escMatch('a'))\r
+ {\r
+ ret.addOr(new oneChar((char) 7));\r
+ }\r
+ else if (sp.escMatch('f'))\r
+ {\r
+ ret.addOr(new oneChar((char) 12));\r
+ }\r
+ else if (sp.escMatch('e'))\r
+ {\r
+ ret.addOr(new oneChar((char) 27));\r
+ }\r
+ else if (sp.escMatch('n'))\r
+ {\r
+ ret.addOr(new oneChar('\n'));\r
+ }\r
+ else if (sp.escMatch('t'))\r
+ {\r
+ ret.addOr(new oneChar('\t'));\r
+ }\r
+ else if (sp.escMatch('r'))\r
+ {\r
+ ret.addOr(new oneChar('\r'));\r
+ }\r
+ else if (sp.escMatch('c'))\r
+ {\r
+ sp.inc();\r
+ if (sp.c < Ctrl.cmap.length)\r
+ {\r
+ ret.addOr(new oneChar(Ctrl.cmap[sp.c]));\r
+ }\r
+ else\r
+ {\r
+ ret.addOr(new oneChar(sp.c));\r
+ }\r
+ }\r
+ else if (isOctalString(sp))\r
+ {\r
+ int d = sp.c - '0';\r
+ sp.inc();\r
+ d = 8 * d + sp.c - '0';\r
+ StrPos sp2 = new StrPos(sp);\r
+ sp2.inc();\r
+ if (isOctalDigit(sp2, false))\r
+ {\r
+ sp.inc();\r
+ d = 8 * d + sp.c - '0';\r
+ }\r
+ ret.addOr(new oneChar((char) d));\r
+ }\r
+ else\r
+ {\r
+ ret.addOr(new oneChar(sp.c));\r
+ }\r
+ sp.inc();\r
+ }\r
+ return ret;\r
+ }\r
+\r
+ /**\r
+ * Converts the stored Pattern to a String -- this is a decompile. Note that\r
+ * \t and \n will really print out here, Not just the two character\r
+ * representations. Also be prepared to see some strange output if your\r
+ * characters are not printable.\r
+ */\r
+ public String toString()\r
+ {\r
+ if (false && thePattern == null)\r
+ {\r
+ return "";\r
+ }\r
+ else\r
+ {\r
+ javajs.util.SB sb = new javajs.util.SB();\r
+ if (esc != Pattern.ESC)\r
+ {\r
+ sb.append("(?e=");\r
+ sb.appendC(esc);\r
+ sb.append(")");\r
+ }\r
+ if (gFlag || mFlag || !dotDoesntMatchCR || sFlag || ignoreCase\r
+ || dontMatchInQuotes || optimized())\r
+ {\r
+ sb.append("(?");\r
+ if (ignoreCase)\r
+ {\r
+ sb.append("i");\r
+ }\r
+ if (mFlag)\r
+ {\r
+ sb.append("m");\r
+ }\r
+ if (sFlag || !dotDoesntMatchCR)\r
+ {\r
+ sb.append("s");\r
+ }\r
+ if (dontMatchInQuotes)\r
+ {\r
+ sb.append("Q");\r
+ }\r
+ if (optimized())\r
+ {\r
+ sb.append("o");\r
+ }\r
+ if (gFlag)\r
+ {\r
+ sb.append("g");\r
+ }\r
+ sb.append(")");\r
+ }\r
+ String patstr = thePattern.toString();\r
+ if (esc != Pattern.ESC)\r
+ {\r
+ patstr = reEscape(patstr, Pattern.ESC, esc);\r
+ }\r
+ sb.append(patstr);\r
+ return sb.toString();\r
+ }\r
+ }\r
+\r
+ // Re-escape Pattern, allows us to use a different escape\r
+ // character.\r
+ static String reEscape(String s, char oldEsc, char newEsc)\r
+ {\r
+ if (oldEsc == newEsc)\r
+ {\r
+ return s;\r
+ }\r
+ int i;\r
+ javajs.util.SB sb = new javajs.util.SB();\r
+ for (i = 0; i < s.length(); i++)\r
+ {\r
+ if (s.charAt(i) == oldEsc && i + 1 < s.length())\r
+ {\r
+ if (s.charAt(i + 1) == oldEsc)\r
+ {\r
+ sb.appendC(oldEsc);\r
+ }\r
+ else\r
+ {\r
+ sb.appendC(newEsc);\r
+ sb.appendC(s.charAt(i + 1));\r
+ }\r
+ i++;\r
+ }\r
+ else if (s.charAt(i) == newEsc)\r
+ {\r
+ sb.appendC(newEsc);\r
+ sb.appendC(newEsc);\r
+ }\r
+ else\r
+ {\r
+ sb.appendC(s.charAt(i));\r
+ }\r
+ }\r
+ return sb.toString();\r
+ }\r
+\r
+// /**\r
+// * This method implements FilenameFilter, allowing one to use a Regex to\r
+// * search through a directory using File.list. There is a FileRegex now that\r
+// * does this better.\r
+// * \r
+// * @see com.stevesoft.pat.FileRegex\r
+// */\r
+// public boolean accept(File dir, String s)\r
+// {\r
+// return search(s);\r
+// }\r
+\r
+ /** The version of this package */\r
+ final static public String version()\r
+ {\r
+ return "lgpl release 1.5.3";\r
+ }\r
+\r
+ /**\r
+ * Once this method is called, the state of variables ignoreCase and\r
+ * dontMatchInQuotes should not be changed as the results will be\r
+ * unpredictable. However, search and matchAt will run more quickly. Note that\r
+ * you can check to see if the pattern has been optimized by calling the\r
+ * optimized() method.\r
+ * <p>\r
+ * This method will attempt to rewrite your pattern in a way that makes it\r
+ * faster (not all patterns execute at the same speed). In general, "(?: ... )"\r
+ * will be faster than "( ... )" so if you don't need the backreference, you\r
+ * should group using the former pattern.\r
+ * <p>\r
+ * It will also introduce new pattern elements that you can't get to\r
+ * otherwise, for example if you have a large table of strings, i.e. the\r
+ * months of the year "(January|February|...)" optimize() will make a\r
+ * Hashtable that takes it to the next appropriate pattern element --\r
+ * eliminating the need for a linear search.\r
+ * \r
+ * @see com.stevesoft.pat.Regex#optimized\r
+ * @see com.stevesoft.pat.Regex#ignoreCase\r
+ * @see com.stevesoft.pat.Regex#dontMatchInQuotes\r
+ * @see com.stevesoft.pat.Regex#matchAt\r
+ * @see com.stevesoft.pat.Regex#search\r
+ */\r
+ public void optimize()\r
+ {\r
+ if (optimized() || thePattern == null)\r
+ {\r
+ return;\r
+ }\r
+ minMatch = new patInt(0); // thePattern.countMinChars();\r
+ thePattern = RegOpt.opt(thePattern, ignoreCase, dontMatchInQuotes);\r
+ skipper = Skip.findSkip(this);\r
+ // RegOpt.setParents(this);\r
+ return;\r
+ }\r
+\r
+ Skip skipper;\r
+\r
+ /**\r
+ * This function returns true if the optimize method has been called.\r
+ */\r
+ public boolean optimized()\r
+ {\r
+ return minMatch != null;\r
+ }\r
+\r
+ /**\r
+ * A bit of syntactic surgar for those who want to make their code look more\r
+ * perl-like. To use this initialize your Regex object by saying:\r
+ * \r
+ * <pre>\r
+ * Regex r1 = Regex.perlCode("s/hello/goodbye/");\r
+ * Regex r2 = Regex.perlCode("s'fish'frog'i");\r
+ * Regex r3 = Regex.perlCode("m'hello');\r
+ * </pre>\r
+ * \r
+ * The i for ignoreCase is supported in this syntax, as well as m, s, and x.\r
+ * The g flat is a bit of a special case.\r
+ * <p>\r
+ * If you wish to replace all occurences of a pattern, you do not put a 'g' in\r
+ * the perlCode, but call Regex's replaceAll method.\r
+ * <p>\r
+ * If you wish to simply and only do a search for r2's pattern, you can do\r
+ * this by calling the searchFrom method method repeatedly, or by calling\r
+ * search repeatedly if the g flag is set.\r
+ * <p>\r
+ * Note: Currently perlCode does <em>not</em> support the (?e=#) syntax for\r
+ * changing the escape character.\r
+ */\r
+\r
+ public static Regex perlCode(String s)\r
+ {\r
+ // this file is big enough, see parsePerl.java\r
+ // for this function.\r
+ return parsePerl.parse(s);\r
+ }\r
+\r
+ static final char back_slash = '\\';\r
+\r
+ /**\r
+ * Checks to see if there are only literal and no special pattern elements in\r
+ * this Regex.\r
+ */\r
+ public boolean isLiteral()\r
+ {\r
+ Pattern x = thePattern;\r
+ while (x != null)\r
+ {\r
+ if (x instanceof oneChar)\r
+ {\r
+ ;\r
+ }\r
+ else if (x instanceof Skipped)\r
+ {\r
+ ;\r
+ }\r
+ else\r
+ {\r
+ return false;\r
+ }\r
+ x = x.next;\r
+ }\r
+ return true;\r
+ }\r
+\r
+ /**\r
+ * You only need to know about this if you are inventing your own pattern\r
+ * elements.\r
+ */\r
+ public patInt countMinChars()\r
+ {\r
+ return thePattern.countMinChars();\r
+ }\r
+\r
+ /**\r
+ * You only need to know about this if you are inventing your own pattern\r
+ * elements.\r
+ */\r
+ public patInt countMaxChars()\r
+ {\r
+ return thePattern.countMaxChars();\r
+ }\r
+\r
+ boolean isHexDigit(StrPos sp)\r
+ {\r
+ boolean r = !sp.eos\r
+ && !sp.dontMatch\r
+ && ((sp.c >= '0' && sp.c <= '9')\r
+ || (sp.c >= 'a' && sp.c <= 'f') || (sp.c >= 'A' && sp.c <= 'F'));\r
+ return r;\r
+ }\r
+\r
+ boolean isOctalDigit(StrPos sp, boolean first)\r
+ {\r
+ boolean r = !sp.eos && !(first ^ sp.dontMatch) && sp.c >= '0'\r
+ && sp.c <= '7';\r
+ return r;\r
+ }\r
+\r
+ int getHexDigit(StrPos sp)\r
+ {\r
+ if (sp.c >= '0' && sp.c <= '9')\r
+ {\r
+ return sp.c - '0';\r
+ }\r
+ if (sp.c >= 'a' && sp.c <= 'f')\r
+ {\r
+ return sp.c - 'a' + 10;\r
+ }\r
+ return sp.c - 'A' + 10;\r
+ }\r
+\r
+ boolean next2Hex(StrPos sp)\r
+ {\r
+ StrPos sp2 = new StrPos(sp);\r
+ sp2.inc();\r
+ if (!isHexDigit(sp2))\r
+ {\r
+ return false;\r
+ }\r
+ sp2.inc();\r
+ if (!isHexDigit(sp2))\r
+ {\r
+ return false;\r
+ }\r
+ return true;\r
+ }\r
+\r
+ boolean isOctalString(StrPos sp)\r
+ {\r
+ if (!isOctalDigit(sp, true))\r
+ {\r
+ return false;\r
+ }\r
+ StrPos sp2 = new StrPos(sp);\r
+ sp2.inc();\r
+ if (!isOctalDigit(sp2, false))\r
+ {\r
+ return false;\r
+ }\r
+ return true;\r
+ }\r
+}\r