//
// This software is now distributed according to
// the Lesser Gnu Public License. Please see
// http://www.gnu.org/copyleft/lesser.txt for
// the details.
// -- Happy Computing!
//
package com.stevesoft.pat;
import com.stevesoft.pat.MessageManager;
import jalview.jsdev.api.RegExpInterface;
import java.io.File;
import java.util.BitSet;
import java.util.Hashtable;
import com.stevesoft.pat.wrap.StringWrap;
/** Matches a Unicode punctuation character. */
class UnicodePunct extends UniValidator
{
public int validate(StringLike s, int from, int to)
{
return from < s.length() && Prop.isPunct(s.charAt(from)) ? to : -1;
}
}
/** Matches a Unicode white space character. */
class UnicodeWhite extends UniValidator
{
public int validate(StringLike s, int from, int to)
{
return from < s.length() && Prop.isWhite(s.charAt(from)) ? to : -1;
}
}
/**
* Matches a character that is not a Unicode punctuation character.
*/
class NUnicodePunct extends UniValidator
{
public int validate(StringLike s, int from, int to)
{
return from < s.length() && !Prop.isPunct(s.charAt(from)) ? to : -1;
}
}
/**
* Matches a character that is not a Unicode white space character.
*/
class NUnicodeWhite extends UniValidator
{
public int validate(StringLike s, int from, int to)
{
return from < s.length() && !Prop.isWhite(s.charAt(from)) ? to : -1;
}
}
/** Matches a Unicode word character: an alphanumeric or underscore. */
class UnicodeW extends UniValidator
{
public int validate(StringLike s, int from, int to)
{
if (from >= s.length())
{
return -1;
}
char c = s.charAt(from);
return (Prop.isAlphabetic(c) || Prop.isDecimalDigit(c) || c == '_') ? to
: -1;
}
}
/** Matches a character that is not a Unicode alphanumeric or underscore. */
class NUnicodeW extends UniValidator
{
public int validate(StringLike s, int from, int to)
{
if (from >= s.length())
{
return -1;
}
char c = s.charAt(from);
return !(Prop.isAlphabetic(c) || Prop.isDecimalDigit(c) || c == '_') ? to
: -1;
}
}
/** Matches a Unicode decimal digit. */
class UnicodeDigit extends UniValidator
{
public int validate(StringLike s, int from, int to)
{
return from < s.length() && Prop.isDecimalDigit(s.charAt(from)) ? to
: -1;
}
}
/** Matches a character that is not a Unicode digit. */
class NUnicodeDigit extends UniValidator
{
public int validate(StringLike s, int from, int to)
{
return from < s.length() && !Prop.isDecimalDigit(s.charAt(from)) ? to
: -1;
}
}
/** Matches a Unicode math character. */
class UnicodeMath extends UniValidator
{
public int validate(StringLike s, int from, int to)
{
return from < s.length() && Prop.isMath(s.charAt(from)) ? to : -1;
}
}
/** Matches a non-math Unicode character. */
class NUnicodeMath extends UniValidator
{
public int validate(StringLike s, int from, int to)
{
return from < s.length() && !Prop.isMath(s.charAt(from)) ? to : -1;
}
}
/** Matches a Unicode currency symbol. */
class UnicodeCurrency extends UniValidator
{
public int validate(StringLike s, int from, int to)
{
return from < s.length() && Prop.isCurrency(s.charAt(from)) ? to : -1;
}
}
/** Matches a non-currency symbol Unicode character. */
class NUnicodeCurrency extends UniValidator
{
public int validate(StringLike s, int from, int to)
{
return from < s.length() && !Prop.isCurrency(s.charAt(from)) ? to : -1;
}
}
/** Matches a Unicode alphabetic character. */
class UnicodeAlpha extends UniValidator
{
public int validate(StringLike s, int from, int to)
{
return from < s.length() && Prop.isAlphabetic(s.charAt(from)) ? to : -1;
}
}
/** Matches a non-alphabetic Unicode character. */
class NUnicodeAlpha extends UniValidator
{
public int validate(StringLike s, int from, int to)
{
return from < s.length() && !Prop.isAlphabetic(s.charAt(from)) ? to
: -1;
}
}
/** Matches an upper case Unicode character. */
class UnicodeUpper extends UniValidator
{
public int validate(StringLike s, int from, int to)
{
return from < s.length() && isUpper(s.charAt(from)) ? to : -1;
}
final boolean isUpper(char c)
{
return c == CaseMgr.toUpperCaseC(c) && c != CaseMgr.toLowerCaseC(c);
}
}
/** Matches an upper case Unicode character. */
class UnicodeLower extends UniValidator
{
public int validate(StringLike s, int from, int to)
{
return from < s.length() && isLower(s.charAt(from)) ? to : -1;
}
final boolean isLower(char c)
{
return c != CaseMgr.toUpperCaseC(c) && c == CaseMgr.toLowerCaseC(c);
}
}
/**
* Regex provides the parser which constructs the linked list of Pattern classes
* from a String.
*
* For the purpose of this documentation, the fact that java interprets the
* backslash will be ignored. In practice, however, you will need a double
* backslash to obtain a string that contains a single backslash character.
* Thus, the example pattern "\b" should really be typed as "\\b" inside java
* code.
*
* Note that Regex is part of package "com.stevesoft.pat". To use it, simply
* import com.stevesoft.pat.Regex at the top of your file.
*
* Regex is made with a constructor that takes a String that defines the regular
* expression. Thus, for example
*
*
* Regex r = new Regex("[a-c]*");
*
*
* matches any number of characters so long as the are 'a', 'b', or 'c').
*
* To attempt to match the Pattern to a given string, you can use either the
* search(String) member function, or the matchAt(String,int position) member
* function. These functions return a boolean which tells you whether or not the
* thing worked, and sets the methods "charsMatched()" and "matchedFrom()" in
* the Regex object appropriately.
*
* The portion of the string before the match can be obtained by the left()
* member, and the portion after the match can be obtained by the right()
* member.
*
* Essentially, this package implements a syntax that is very much like the perl
* 5 regular expression syntax.
*
* Longer example:
*
*
* Regex r = new Regex("x(a|b)y");
* r.matchAt("xay", 0);
* System.out.println("sub = " + r.stringMatched(1));
*
*
* The above would print "sub = a".
*
*
* r.left() // would return "x"
* r.right() // would return "y"
*
*
*
* Differences between this package and perl5:
* The extended Pattern for setting flags, is now supported, but the flags are
* different. "(?i)" tells the pattern to ignore case, "(?Q)" sets the
* "dontMatchInQuotes" flag, and "(?iQ)" sets them both. You can change the
* escape character. The pattern
*
*
* (?e=#)#d+
*
*
* is the same as
*
*
* \d+
* , but note that the sequence
*
*
* (?e=#)
*
*
* must occur at the very beginning of the pattern. There may be other
* small differences as well. I will either make my package conform or note them
* as I become aware of them.
*
* This package supports additional patterns not in perl5:
*
* (?@())
* Group
* This matches all characters between the '(' character and the balancing
* ')' character. Thus, it will match "()" as well as "(())". The balancing
* characters are arbitrary, thus (?@{}) matches on "{}" and "{{}}".
*
* (?<1)
* Backup
* Moves the pointer backwards within the text. This allows you to make a
* "look behind." It fails if it attempts to move to a position before the
* beginning of the string. "x(?<1)" is equivalent to "(?=x)". The number, 1
* in this example, is the number of characters to move backwards.
*
*
*
* @author Steven R. Brandt
* @version package com.stevesoft.pat, release 1.5.3
* @see Pattern
*/
public class Regex extends RegRes implements RegExpInterface, Cloneable //implements FilenameFilter
{
/**
* BackRefOffset gives the identity number of the first pattern. Version 1.0
* used zero, version 1.1 uses 1 to be more compatible with perl.
*/
static int BackRefOffset = 1;
private static Pattern none = new NoPattern();
Pattern thePattern = none;
patInt minMatch = new patInt(0);
static Hashtable validators = new Hashtable();
static
{
defineV("p", "(?>1)", new UnicodePunct());
defineV("P", "(?>1)", new NUnicodePunct());
defineV("s", "(?>1)", new UnicodeWhite());
defineV("S", "(?>1)", new NUnicodeWhite());
defineV("w", "(?>1)", new UnicodeW());
defineV("W", "(?>1)", new NUnicodeW());
defineV("d", "(?>1)", new UnicodeDigit());
defineV("D", "(?>1)", new NUnicodeDigit());
defineV("m", "(?>1)", new UnicodeMath());
defineV("M", "(?>1)", new NUnicodeMath());
defineV("c", "(?>1)", new UnicodeCurrency());
defineV("C", "(?>1)", new NUnicodeCurrency());
defineV("a", "(?>1)", new UnicodeAlpha());
defineV("A", "(?>1)", new NUnicodeAlpha());
defineV("uc", "(?>1)", new UnicodeUpper());
defineV("lc", "(?>1)", new UnicodeLower());
}
ReplaceRule rep = null;
/**
* Initializes the object without a Pattern. To supply a Pattern use
* compile(String s).
*
* @j2sIgnore
*
* @see com.stevesoft.pat.Regex#compile(java.lang.String)
*/
@Deprecated
public Regex()
{
}
/**
* Create and compile both a Regex and a ReplaceRule.
*
* @see com.stevesoft.pat.ReplaceRule
* @see com.stevesoft.pat.Regex#compile(java.lang.String)
*/
public Regex(String s, String strRp)
{
try
{
if (s != null)
compile(s);
if (strRp.length() > 0)
rep = ReplaceRule.perlCode(strRp);
} catch (RegSyntax rs)
{
}
}
/**
* Create and compile a Regex, but give it the ReplaceRule specified. This
* allows the user finer control of the Replacement process, if that is
* desired.
*
* @j2sIgnore
*
* @see com.stevesoft.pat.ReplaceRule
* @see com.stevesoft.pat.Regex#compile(java.lang.String)
*/
public Regex(String s, ReplaceRule rp)
{
this(s, "");
rep = rp;
}
/**
* Create and compile a Regex, but do not throw any exceptions. If you wish to
* have exceptions thrown for syntax errors, you must use the Regex(void)
* constructor to create the Regex object, and then call the compile method.
* Therefore, you should only call this method when you know your pattern is
* right. I will probably become more like
*
* @j2sIgnore
*
* @see com.stevesoft.pat.Regex#search(java.lang.String)
* @see com.stevesoft.pat.Regex#compile(java.lang.String)
*/
@Deprecated
public Regex(String s)
{
this(s, "");
}
/** A clone by any other name would smell as sweet. */
public Object clone()
{
return new Regex(null, "").cloneFrom(this);
}
// /** Return a clone of the underlying RegRes object. */
// public RegRes result()
// {
// return (RegRes) super.clone();
// }
/** Essentially clones the Regex object */
public Regex cloneFrom(Regex r)
{
copyOutOf(r);
// super((RegRes) r);
dontMatchInQuotes = r.dontMatchInQuotes;
esc = r.esc;
ignoreCase = r.ignoreCase;
gFlag = r.gFlag;
if (r.rep == null)
{
rep = null;
}
else
{
rep = (ReplaceRule) r.rep.clone();
}
/*
* try { compile(r.toString()); } catch(RegSyntax r_) {}
*/
thePattern = r.thePattern.clone(new Hashtable());
minMatch = r.minMatch;
skipper = r.skipper;
return this;
}
/** Set the dontMatch in quotes flag. */
public void setDontMatchInQuotes(boolean b)
{
dontMatchInQuotes = b;
}
/** Find out if the dontMatchInQuotes flag is enabled. */
public boolean getDontMatchInQuotes()
{
return dontMatchInQuotes;
}
boolean dontMatchInQuotes = false;
/**
* Set the state of the ignoreCase flag. If set to true, then the pattern
* matcher will ignore case when searching for a match.
*/
public void setIgnoreCase(boolean b)
{
ignoreCase = b;
}
/**
* Get the state of the ignoreCase flag. Returns true if we are ignoring the
* case of the pattern, false otherwise.
*/
public boolean getIgnoreCase()
{
return ignoreCase;
}
boolean ignoreCase = false;
static boolean defaultMFlag = false;
/**
* Set the default value of the m flag. If it is set to true, then the MFlag
* will be on for any regex search executed.
*/
public static void setDefaultMFlag(boolean mFlag)
{
defaultMFlag = mFlag;
}
/**
* Get the default value of the m flag. If it is set to true, then the MFlag
* will be on for any regex search executed.
*/
public static boolean getDefaultMFlag()
{
return defaultMFlag;
}
/**
* Change the ReplaceRule of this Regex by compiling a new one using String
* rp.
*/
public void setReplaceRuleStr(String rp)
{
rep = ReplaceRule.perlCode(rp);
repr = null; // Clear Replacer history
}
/** Change the ReplaceRule of this Regex to rp. */
public void setReplaceRule(ReplaceRule rp)
{
rep = rp;
}
/**
* Test to see if a custom defined rule exists.
*
* @see com.stevesoft.pat#defineV(java.lang.String,java.lang.String,Validator)
*/
public static boolean isDefined(String nm)
{
return validators.get(nm) != null;
}
/**
* Removes a custom defined rule.
*
* @see com.stevesoft.pat#defineV(java.lang.String,java.lang.String,Validator)
*/
public static void undefine(String nm)
{
validators.remove(nm);
}
/**
* Defines a method to create a new rule. See test/deriv2.java and
* test/deriv3.java for examples of how to use it.
*/
public static void defineV(String nm, String pat, Validator v)
{
v.pattern = pat;
validators.put(nm, v);
}
/**
* Defines a shorthand for a pattern. The pattern will be invoked by a string
* that has the form "(??"+nm+")".
*/
public static void define(String nm, String pat)
{
validators.put(nm, pat);
}
/** Get the current ReplaceRule. */
public ReplaceRule getReplaceRule()
{
return rep;
}
Replacer repr = null;
final Replacer _getReplacer()
{
return repr == null ? repr = new Replacer() : repr;
}
public Replacer getReplacer()
{
if (repr == null)
{
repr = new Replacer();
}
repr.rh.me = this;
repr.rh.prev = null;
return repr;
}
/**
* Replace the first occurence of this pattern in String s according to the
* ReplaceRule.
*
* @see com.stevesoft.pat.ReplaceRule
* @see com.stevesoft.pat.Regex#getReplaceRule()
*/
public String replaceFirst(String s)
{
return _getReplacer().replaceFirstRegion(s, this, 0, s.length())
.toString();
}
/**
* Replace the first occurence of this pattern in String s beginning with
* position pos according to the ReplaceRule.
*
* @see com.stevesoft.pat.ReplaceRule
* @see com.stevesoft.pat.Regex#getReplaceRule()
*/
public String replaceFirstFrom(String s, int pos)
{
return _getReplacer().replaceFirstRegion(s, this, pos, s.length())
.toString();
}
/**
* Replace the first occurence of this pattern in String s beginning with
* position start and ending with end according to the ReplaceRule.
*
* @see com.stevesoft.pat.ReplaceRule
* @see com.stevesoft.pat.Regex#getReplaceRule()
*/
public String replaceFirstRegion(String s, int start, int end)
{
return _getReplacer().replaceFirstRegion(s, this, start, end)
.toString();
}
/**
* Replace all occurences of this pattern in String s according to the
* ReplaceRule.
*
* @see com.stevesoft.pat.ReplaceRule
* @see com.stevesoft.pat.Regex#getReplaceRule()
*/
public String replaceAll(String s)
{
return _getReplacer().replaceAllRegion(s, this, 0, s.length())
.toString();
}
public StringLike replaceAllLike(StringLike s)
{
return _getReplacer().replaceAllRegion(s, this, 0, s.length());
}
/**
* Replace all occurences of this pattern in String s beginning with position
* pos according to the ReplaceRule.
*
* @see com.stevesoft.pat.ReplaceRule
* @see com.stevesoft.pat.Regex#getReplaceRule()
*/
public String replaceAllFrom(String s, int pos)
{
return _getReplacer().replaceAllRegion(s, this, pos, s.length())
.toString();
}
/**
* Replace all occurences of this pattern in String s beginning with position
* start and ending with end according to the ReplaceRule.
*
* @see com.stevesoft.pat.ReplaceRule
* @see com.stevesoft.pat.Regex#getReplaceRule()
*/
public String replaceAllRegion(String s, int start, int end)
{
return _getReplacer().replaceAllRegion(s, this, start, end).toString();
}
/**
* By default, the escape character is the backslash, but you can make it
* anything you want by setting this variable.
*/
public char esc = Pattern.ESC;
/**
* This method compiles a regular expression, making it possible to call the
* search or matchAt methods.
*
* @exception com.stevesoft.pat.RegSyntax
* is thrown if a syntax error is encountered in the
* pattern. For example, "x{3,1}" or "*a" are not valid
* patterns.
* @see com.stevesoft.pat.Regex#search
* @see com.stevesoft.pat.Regex#matchAt
*/
public void compile(String prepat) throws RegSyntax
{
String postpat = parsePerl.codify(prepat, true);
String pat = postpat == null ? prepat : postpat;
minMatch = null;
ignoreCase = false;
dontMatchInQuotes = false;
Rthings mk = new Rthings(this);
int offset = mk.val;
String newpat = pat;
thePattern = none;
p = null;
or = null;
minMatch = new patInt(0);
StrPos sp = new StrPos(pat, 0);
if (sp.incMatch("(?e="))
{
char newEsc = sp.c;
sp.inc();
if (sp.match(')'))
{
newpat = reEscape(pat.substring(6), newEsc, Pattern.ESC);
}
}
else if (esc != Pattern.ESC)
{
newpat = reEscape(pat, esc, Pattern.ESC);
}
thePattern = _compile(newpat, mk);
numSubs_ = mk.val - offset;
mk.set(this);
}
/*
* If a Regex is compared against a Regex, a check is done to see that the
* patterns are equal as well as the most recent match. If a Regex is compare
* with a RegRes, only the result of the most recent match is compared.
*/
public boolean equals(Object o)
{
if (o instanceof Regex)
{
if (toString().equals(o.toString()))
{
return super.equals(o);
}
else
{
return false;
}
}
else
{
return super.equals(o);
}
}
// prep sets global variables of class
// Pattern so that it can access them
// during an attempt at a match
Pthings pt = new Pthings();
final Pthings prep(StringLike s)
{
// if(gFlag)
pt.lastPos = matchedTo();
if (pt.lastPos < 0)
{
pt.lastPos = 0;
}
if ((s == null ? null : s.unwrap()) != (src == null ? null : s.unwrap()))
{
pt.lastPos = 0;
}
src = s;
pt.dotDoesntMatchCR = dotDoesntMatchCR && (!sFlag);
pt.mFlag = (mFlag | defaultMFlag);
pt.ignoreCase = ignoreCase;
pt.no_check = false;
if (pt.marks != null)
{
for (int i = 0; i < pt.marks.length; i++)
{
pt.marks[i] = -1;
}
}
pt.marks = null;
pt.nMarks = numSubs_;
pt.src = s;
if (dontMatchInQuotes)
{
setCbits(s, pt);
}
else
{
pt.cbits = null;
}
return pt;
}
/**
* Attempt to match a Pattern beginning at a specified location within the
* string.
*
* @see com.stevesoft.pat.Regex#search
*/
public boolean matchAt(String s, int start_pos)
{
return _search(s, start_pos, start_pos);
}
/**
* Attempt to match a Pattern beginning at a specified location within the
* StringLike.
*
* @see com.stevesoft.pat.Regex#search
*/
public boolean matchAtLike(StringLike s, int start_pos)
{
return _searchLike(s, start_pos, start_pos);
}
/**
* Search through a String for the first occurrence of a match.
*
* @see com.stevesoft.pat.Regex#searchFrom
* @see com.stevesoft.pat.Regex#matchAt
*/
public boolean search(String s)
{
if (s == null)
{
throw new NullPointerException(MessageManager.getString("exception.null_string_given_to_regex_search"));
}
return _search(s, 0, s.length());
}
public boolean searchLike(StringLike sl)
{
if (sl == null)
{
throw new NullPointerException(MessageManager.getString("exception.null_string_like_given_to_regex_search"));
}
return _searchLike(sl, 0, sl.length());
}
public boolean reverseSearch(String s)
{
if (s == null)
{
throw new NullPointerException(MessageManager.getString("exception.null_string_given_to_regex_reverse_search"));
}
return _reverseSearch(s, 0, s.length());
}
public boolean reverseSearchLike(StringLike sl)
{
if (sl == null)
{
throw new NullPointerException(MessageManager.getString("exception.null_string_like_given_to_regex_reverse_search"));
}
return _reverseSearchLike(sl, 0, sl.length());
}
/**
* Search through a String for the first occurence of a match, but start at
* position
*
*
* start
*
*/
public boolean searchFrom(String s, int start)
{
if (s == null)
{
throw new NullPointerException(MessageManager.getString("exception.null_string_like_given_to_regex_search_from"));
}
return _search(s, start, s.length());
}
public boolean searchFromLike(StringLike s, int start)
{
if (s == null)
{
throw new NullPointerException(MessageManager.getString("exception.null_string_like_given_to_regex_search_from"));
}
return _searchLike(s, start, s.length());
}
/**
* Search through a region of a String for the first occurence of a match.
*/
public boolean searchRegion(String s, int start, int end)
{
if (s == null)
{
throw new NullPointerException(MessageManager.getString("exception.null_string_like_given_to_regex_search_region"));
}
return _search(s, start, end);
}
/**
* Set this to change the default behavior of the "." pattern. By default it
* now matches perl's behavior and fails to match the '\n' character.
*/
public static boolean dotDoesntMatchCR = true;
StringLike gFlags;
int gFlagto = 0;
boolean gFlag = false;
/** Set the 'g' flag */
public void setGFlag(boolean b)
{
gFlag = b;
}
/** Get the state of the 'g' flag. */
public boolean getGFlag()
{
return gFlag;
}
boolean sFlag = false;
/** Get the state of the sFlag */
public boolean getSFlag()
{
return sFlag;
}
boolean mFlag = false;
/** Get the state of the sFlag */
public boolean getMFlag()
{
return mFlag;
}
final boolean _search(String s, int start, int end)
{
return _searchLike(new StringWrap(s), start, end);
}
final boolean _searchLike(StringLike s, int start, int end)
{
if (gFlag && gFlagto > 0 && gFlags != null
&& s.unwrap() == gFlags.unwrap())
{
start = gFlagto;
}
gFlags = null;
Pthings pt = prep(s);
int up = (minMatch == null ? end : end - minMatch.i);
if (up < start && end >= start)
{
up = start;
}
if (skipper == null)
{
for (int i = start; i <= up; i++)
{
charsMatched_ = thePattern.matchAt(s, i, pt);
if (charsMatched_ >= 0)
{
matchFrom_ = thePattern.mfrom;
marks = pt.marks;
gFlagto = matchFrom_ + charsMatched_;
gFlags = s;
return didMatch_ = true;
}
}
}
else
{
pt.no_check = true;
for (int i = start; i <= up; i++)
{
i = skipper.find(src, i, up);
if (i < 0)
{
charsMatched_ = matchFrom_ = -1;
return didMatch_ = false;
}
charsMatched_ = thePattern.matchAt(s, i, pt);
if (charsMatched_ >= 0)
{
matchFrom_ = thePattern.mfrom;
marks = pt.marks;
gFlagto = matchFrom_ + charsMatched_;
gFlags = s;
return didMatch_ = true;
}
}
}
return didMatch_ = false;
}
/*
* final boolean _search(LongStringLike s,long start,long end) { if(gFlag &&
* gFlagto > 0 && s==gFlags) start = gFlagto; gFlags = null;
*
* Pthings pt=prep(s);
*
* int up = end;//(minMatch == null ? end : end-minMatch.i);
*
* if(up < start && end >= start) up = start;
*
* if(skipper == null) { for(long i=start;i<=up;i++) { charsMatched_ =
* thePattern.matchAt(s,i,pt); if(charsMatched_ >= 0) { matchFrom_ =
* thePattern.mfrom; marks = pt.marks; gFlagto = matchFrom_+charsMatched_;
* return didMatch_=true; } } } else { pt.no_check = true; for(long i=start;i<=up;i++) {
* i = skipper.find(src,i,up); if(i<0) { charsMatched_ = matchFrom_ = -1;
* return didMatch_ = false; } charsMatched_ = thePattern.matchAt(s,i,pt);
* if(charsMatched_ >= 0) { matchFrom_ = thePattern.mfrom; marks = pt.marks;
* gFlagto = matchFrom_+charsMatched_; gFlags = s; return didMatch_=true; }
* else { i = s.adjustIndex(i); up = s.adjustEnd(i); } } } return
* didMatch_=false; }
*/
boolean _reverseSearch(String s, int start, int end)
{
return _reverseSearchLike(new StringWrap(s), start, end);
}
boolean _reverseSearchLike(StringLike s, int start, int end)
{
if (gFlag && gFlagto > 0 && s.unwrap() == gFlags.unwrap())
{
end = gFlagto;
}
gFlags = null;
Pthings pt = prep(s);
for (int i = end; i >= start; i--)
{
charsMatched_ = thePattern.matchAt(s, i, pt);
if (charsMatched_ >= 0)
{
matchFrom_ = thePattern.mfrom;
marks = pt.marks;
gFlagto = matchFrom_ - 1;
gFlags = s;
return didMatch_ = true;
}
}
return didMatch_ = false;
}
// This routine sets the cbits variable
// of class Pattern. Cbits is true for
// the bit corresponding to a character inside
// a set of quotes.
static StringLike lasts = null;
static BitSet lastbs = null;
static void setCbits(StringLike s, Pthings pt)
{
if (s == lasts)
{
pt.cbits = lastbs;
return;
}
BitSet bs = new BitSet(s.length());
char qc = ' ';
boolean setBit = false;
for (int i = 0; i < s.length(); i++)
{
if (setBit)
{
bs.set(i);
}
char c = s.charAt(i);
if (!setBit && c == '"')
{
qc = c;
setBit = true;
bs.set(i);
}
else if (!setBit && c == '\'')
{
qc = c;
setBit = true;
bs.set(i);
}
else if (setBit && c == qc)
{
setBit = false;
}
else if (setBit && c == '\\' && i + 1 < s.length())
{
i++;
if (setBit)
{
bs.set(i);
}
}
}
pt.cbits = lastbs = bs;
lasts = s;
}
// // Wanted user to over-ride this in alpha version,
// // but it wasn't really necessary because of this trick:
// Regex newRegex()
// {
// try
// {
// return (Regex) getClass().newInstance();
// } catch (InstantiationException ie)
// {
// return null;
// } catch (IllegalAccessException iae)
// {
// return null;
// }
// }
/**
* Only needed for creating your own extensions of Regex. This method adds the
* next Pattern in the chain of patterns or sets the Pattern if it is the
* first call.
*/
protected void add(Pattern p2)
{
if (p == null)
{
p = p2;
}
else
{
p.add(p2);
p2 = p;
}
}
/**
* You only need to use this method if you are creating your own extentions to
* Regex. compile1 compiles one Pattern element, it can be over-ridden to
* allow the Regex compiler to understand new syntax. See deriv.java for an
* example. This routine is the heart of class Regex. Rthings has one integer
* member called intValue, it is used to keep track of the number of ()'s in
* the Pattern.
*
* @exception com.stevesoft.pat.RegSyntax
* is thrown when a nonsensensical pattern is supplied. For
* example, a pattern beginning with *.
*/
protected void compileSP(StrPos sp, Rthings mk) throws RegSyntax
{
if (sp.match('['))
{
sp.inc();
add(matchBracket(sp));
}
else if (sp.match('|'))
{
if (or == null)
{
or = new Or();
}
if (p == null)
{
p = new NullPattern();
}
or.addOr(p);
p = null;
}
else if (sp.incMatch("(?<"))
{
patInt i = sp.getPatInt();
if (i == null)
{
RegSyntaxError.endItAll("No int after (?<");
}
add(new Backup(i.intValue()));
if (!sp.match(')'))
{
RegSyntaxError.endItAll("No ) after (?<");
}
}
else if (sp.incMatch("(?>"))
{
patInt i = sp.getPatInt();
if (i == null)
{
RegSyntaxError.endItAll("No int after (?>");
}
add(new Backup(-i.intValue()));
if (!sp.match(')'))
{
RegSyntaxError.endItAll("No ) after (?<");
}
}
else if (sp.incMatch("(?@"))
{
char op = sp.c;
sp.inc();
char cl = sp.c;
sp.inc();
if (!sp.match(')'))
{
RegSyntaxError.endItAll("(?@ does not have closing paren");
}
add(new Group(op, cl));
}
else if (sp.incMatch("(?#"))
{
while (!sp.match(')'))
{
sp.inc();
}
}
else if (sp.dontMatch && sp.c == 'w')
{
// Regex r = new Regex();
// r._compile("[a-zA-Z0-9_]",mk);
// add(new Goop("\\w",r.thePattern));
Bracket b = new Bracket(false);
b.addOr(new Range('a', 'z'));
b.addOr(new Range('A', 'Z'));
b.addOr(new Range('0', '9'));
b.addOr(new oneChar('_'));
add(b);
}
else if (sp.dontMatch && sp.c == 'G')
{
add(new BackG());
}
else if (sp.dontMatch && sp.c == 's')
{
// Regex r = new Regex();
// r._compile("[ \t\n\r\b]",mk);
// add(new Goop("\\s",r.thePattern));
Bracket b = new Bracket(false);
b.addOr(new oneChar((char) 32));
b.addOr(new Range((char) 8, (char) 10));
b.addOr(new oneChar((char) 13));
add(b);
}
else if (sp.dontMatch && sp.c == 'd')
{
// Regex r = new Regex();
// r._compile("[0-9]",mk);
// add(new Goop("\\d",r.thePattern));
Range digit = new Range('0', '9');
digit.printBrackets = true;
add(digit);
}
else if (sp.dontMatch && sp.c == 'W')
{
// Regex r = new Regex();
// r._compile("[^a-zA-Z0-9_]",mk);
// add(new Goop("\\W",r.thePattern));
Bracket b = new Bracket(true);
b.addOr(new Range('a', 'z'));
b.addOr(new Range('A', 'Z'));
b.addOr(new Range('0', '9'));
b.addOr(new oneChar('_'));
add(b);
}
else if (sp.dontMatch && sp.c == 'S')
{
// Regex r = new Regex();
// r._compile("[^ \t\n\r\b]",mk);
// add(new Goop("\\S",r.thePattern));
Bracket b = new Bracket(true);
b.addOr(new oneChar((char) 32));
b.addOr(new Range((char) 8, (char) 10));
b.addOr(new oneChar((char) 13));
add(b);
}
else if (sp.dontMatch && sp.c == 'D')
{
// Regex r = new Regex();
// r._compile("[^0-9]",mk);
// add(new Goop("\\D",r.thePattern));
Bracket b = new Bracket(true);
b.addOr(new Range('0', '9'));
add(b);
}
else if (sp.dontMatch && sp.c == 'B')
{
Regex r = new Regex(null, "");
r._compile("(?!" + back_slash + "b)", mk);
add(r.thePattern);
}
else if (isOctalString(sp))
{
int d = sp.c - '0';
sp.inc();
d = 8 * d + sp.c - '0';
StrPos sp2 = new StrPos(sp);
sp2.inc();
if (isOctalDigit(sp2, false))
{
sp.inc();
d = 8 * d + sp.c - '0';
}
add(new oneChar((char) d));
}
else if (sp.dontMatch && sp.c >= '1' && sp.c <= '9')
{
int iv = sp.c - '0';
StrPos s2 = new StrPos(sp);
s2.inc();
if (!s2.dontMatch && s2.c >= '0' && s2.c <= '9')
{
iv = 10 * iv + (s2.c - '0');
sp.inc();
}
add(new BackMatch(iv));
}
else if (sp.dontMatch && sp.c == 'b')
{
add(new Boundary());
}
else if (sp.match('\b'))
{
add(new Boundary());
}
else if (sp.match('$'))
{
add(new End(true));
}
else if (sp.dontMatch && sp.c == 'Z')
{
add(new End(false));
}
else if (sp.match('.'))
{
add(new Any());
}
else if (sp.incMatch("(??"))
{
StringBuffer sb = new StringBuffer();
StringBuffer sb2 = new StringBuffer();
while (!sp.match(')') && !sp.match(':'))
{
sb.append(sp.c);
sp.inc();
}
if (sp.incMatch(":"))
{
while (!sp.match(')'))
{
sb2.append(sp.c);
sp.inc();
}
}
String sbs = sb.toString();
if (validators.get(sbs) instanceof String)
{
String pat = (String) validators.get(sbs);
Regex r = new Regex(null, "");
Rthings rth = new Rthings(this);
rth.noBackRefs = true;
r._compile(pat, rth);
add(r.thePattern);
}
else
{
Custom cm = new Custom(sb.toString());
if (cm.v != null)
{
Validator v2 = cm.v.arg(sb2.toString());
if (v2 != null)
{
v2.argsave = sb2.toString();
String p = cm.v.pattern;
cm.v = v2;
v2.pattern = p;
}
Regex r = new Regex(null, "");
Rthings rth = new Rthings(this);
rth.noBackRefs = true;
r._compile(cm.v.pattern, rth);
cm.sub = r.thePattern;
cm.sub.add(new CustomEndpoint(cm));
cm.sub.setParent(cm);
add(cm);
}
}
}
else if (sp.match('('))
{
mk.parenLevel++;
Regex r = new Regex(null, "");
// r.or = new Or();
sp.inc();
if (sp.incMatch("?:"))
{
r.or = new Or();
}
else if (sp.incMatch("?="))
{
r.or = new lookAhead(false);
}
else if (sp.incMatch("?!"))
{
r.or = new lookAhead(true);
}
else if (sp.match('?'))
{
sp.inc();
do
{
if (sp.c == 'i')
{
mk.ignoreCase = true;
}
if (sp.c == 'Q')
{
mk.dontMatchInQuotes = true;
}
if (sp.c == 'o')
{
mk.optimizeMe = true;
}
if (sp.c == 'g')
{
mk.gFlag = true;
}
if (sp.c == 's')
{
mk.sFlag = true;
}
if (sp.c == 'm')
{
mk.mFlag = true;
}
sp.inc();
} while (!sp.match(')') && !sp.eos);
r = null;
mk.parenLevel--;
if (sp.eos) // throw new RegSyntax
{
RegSyntaxError.endItAll("Unclosed ()");
}
}
else
{ // just ordinary parenthesis
r.or = mk.noBackRefs ? new Or() : new OrMark(mk.val++);
}
if (r != null)
{
add(r._compileSP(sp, mk));
}
}
else if (sp.match('^'))
{
add(new Start(true));
}
else if (sp.dontMatch && sp.c == 'A')
{
add(new Start(false));
}
else if (sp.match('*'))
{
addMulti(new patInt(0), new patInf());
}
else if (sp.match('+'))
{
addMulti(new patInt(1), new patInf());
}
else if (sp.match('?'))
{
addMulti(new patInt(0), new patInt(1));
}
else if (sp.match('{'))
{
boolean bad = false;
StrPos sp2 = new StrPos(sp);
// StringBuffer sb = new StringBuffer();
sp.inc();
patInt i1 = sp.getPatInt();
patInt i2 = null;
if (sp.match('}'))
{
i2 = i1;
}
else
{
if (!sp.match(','))
{
/*
* RegSyntaxError.endItAll( "String \"{"+i2+ "\" should be followed
* with , or }");
*/
bad = true;
}
sp.inc();
if (sp.match('}'))
{
i2 = new patInf();
}
else
{
i2 = sp.getPatInt();
}
}
if (i1 == null || i2 == null)
{
/*
* throw new RegSyntax("Badly formatted Multi: " +"{"+i1+","+i2+"}");
*/
bad = true;
}
if (bad)
{
sp.dup(sp2);
add(new oneChar(sp.c));
}
else
{
addMulti(i1, i2);
}
}
else if (sp.escMatch('x') && next2Hex(sp))
{
sp.inc();
int d = getHexDigit(sp);
sp.inc();
d = 16 * d + getHexDigit(sp);
add(new oneChar((char) d));
}
else if (sp.escMatch('c'))
{
sp.inc();
if (sp.c < Ctrl.cmap.length)
{
add(new oneChar(Ctrl.cmap[sp.c]));
}
else
{
add(new oneChar(sp.c));
}
}
else if (sp.escMatch('f'))
{
add(new oneChar((char) 12));
}
else if (sp.escMatch('a'))
{
add(new oneChar((char) 7));
}
else if (sp.escMatch('t'))
{
add(new oneChar('\t'));
}
else if (sp.escMatch('n'))
{
add(new oneChar('\n'));
}
else if (sp.escMatch('r'))
{
add(new oneChar('\r'));
}
else if (sp.escMatch('b'))
{
add(new oneChar('\b'));
}
else if (sp.escMatch('e'))
{
add(new oneChar((char) 27));
}
else
{
add(new oneChar(sp.c));
if (sp.match(')'))
{
RegSyntaxError.endItAll("Unmatched right paren in pattern");
}
}
}
// compiles all Pattern elements, internal method
private Pattern _compile(String pat, Rthings mk) throws RegSyntax
{
minMatch = null;
sFlag = mFlag = ignoreCase = gFlag = false;
StrPos sp = new StrPos(pat, 0);
thePattern = _compileSP(sp, mk);
pt.marks = null;
return thePattern;
}
Pattern p = null;
Or or = null;
Pattern _compileSP(StrPos sp, Rthings mk) throws RegSyntax
{
while (!(sp.eos || (or != null && sp.match(')'))))
{
compileSP(sp, mk);
sp.inc();
}
if (sp.match(')'))
{
mk.parenLevel--;
}
else if (sp.eos && mk.parenLevel != 0)
{
RegSyntaxError.endItAll("Unclosed Parenthesis! lvl=" + mk.parenLevel);
}
if (or != null)
{
if (p == null)
{
p = new NullPattern();
}
or.addOr(p);
return or;
}
return p == null ? new NullPattern() : p;
}
// add a multi object to the end of the chain
// which applies to the last object
void addMulti(patInt i1, patInt i2) throws RegSyntax
{
Pattern last, last2;
for (last = p; last != null && last.next != null; last = last.next)
{
;
}
if (last == null || last == p)
{
last2 = null;
}
else
{
for (last2 = p; last2.next != last; last2 = last2.next)
{
;
}
}
if (last instanceof Multi && i1.intValue() == 0 && i2.intValue() == 1)
{
((Multi) last).matchFewest = true;
}
else if (last instanceof FastMulti && i1.intValue() == 0
&& i2.intValue() == 1)
{
((FastMulti) last).matchFewest = true;
}
else if (last instanceof DotMulti && i1.intValue() == 0
&& i2.intValue() == 1)
{
((DotMulti) last).matchFewest = true;
}
else if (last instanceof Multi || last instanceof DotMulti
|| last instanceof FastMulti)
{
throw new RegSyntax("Syntax error.");
}
else if (last2 == null)
{
p = mkMulti(i1, i2, p);
}
else
{
last2.next = mkMulti(i1, i2, last);
}
}
final static Pattern mkMulti(patInt lo, patInt hi, Pattern p)
throws RegSyntax
{
if (p instanceof Any && p.next == null)
{
return (Pattern) new DotMulti(lo, hi);
}
return RegOpt.safe4fm(p) ? (Pattern) new FastMulti(lo, hi, p)
: (Pattern) new Multi(lo, hi, p);
}
// process the bracket operator
Pattern matchBracket(StrPos sp) throws RegSyntax
{
Bracket ret;
if (sp.match('^'))
{
ret = new Bracket(true);
sp.inc();
}
else
{
ret = new Bracket(false);
}
if (sp.match(']'))
{
// throw new RegSyntax
RegSyntaxError.endItAll("Unmatched []");
}
while (!sp.eos && !sp.match(']'))
{
StrPos s1 = new StrPos(sp);
s1.inc();
StrPos s1_ = new StrPos(s1);
s1_.inc();
if (s1.match('-') && !s1_.match(']'))
{
StrPos s2 = new StrPos(s1);
s2.inc();
if (!s2.eos)
{
ret.addOr(new Range(sp.c, s2.c));
}
sp.inc();
sp.inc();
}
else if (sp.escMatch('Q'))
{
sp.inc();
while (!sp.escMatch('E'))
{
ret.addOr(new oneChar(sp.c));
sp.inc();
}
}
else if (sp.escMatch('d'))
{
ret.addOr(new Range('0', '9'));
}
else if (sp.escMatch('s'))
{
ret.addOr(new oneChar((char) 32));
ret.addOr(new Range((char) 8, (char) 10));
ret.addOr(new oneChar((char) 13));
}
else if (sp.escMatch('w'))
{
ret.addOr(new Range('a', 'z'));
ret.addOr(new Range('A', 'Z'));
ret.addOr(new Range('0', '9'));
ret.addOr(new oneChar('_'));
}
else if (sp.escMatch('D'))
{
ret.addOr(new Range((char) 0, (char) 47));
ret.addOr(new Range((char) 58, (char) 65535));
}
else if (sp.escMatch('S'))
{
ret.addOr(new Range((char) 0, (char) 7));
ret.addOr(new Range((char) 11, (char) 12));
ret.addOr(new Range((char) 14, (char) 31));
ret.addOr(new Range((char) 33, (char) 65535));
}
else if (sp.escMatch('W'))
{
ret.addOr(new Range((char) 0, (char) 64));
ret.addOr(new Range((char) 91, (char) 94));
ret.addOr(new oneChar((char) 96));
ret.addOr(new Range((char) 123, (char) 65535));
}
else if (sp.escMatch('x') && next2Hex(sp))
{
sp.inc();
int d = getHexDigit(sp);
sp.inc();
d = 16 * d + getHexDigit(sp);
ret.addOr(new oneChar((char) d));
}
else if (sp.escMatch('a'))
{
ret.addOr(new oneChar((char) 7));
}
else if (sp.escMatch('f'))
{
ret.addOr(new oneChar((char) 12));
}
else if (sp.escMatch('e'))
{
ret.addOr(new oneChar((char) 27));
}
else if (sp.escMatch('n'))
{
ret.addOr(new oneChar('\n'));
}
else if (sp.escMatch('t'))
{
ret.addOr(new oneChar('\t'));
}
else if (sp.escMatch('r'))
{
ret.addOr(new oneChar('\r'));
}
else if (sp.escMatch('c'))
{
sp.inc();
if (sp.c < Ctrl.cmap.length)
{
ret.addOr(new oneChar(Ctrl.cmap[sp.c]));
}
else
{
ret.addOr(new oneChar(sp.c));
}
}
else if (isOctalString(sp))
{
int d = sp.c - '0';
sp.inc();
d = 8 * d + sp.c - '0';
StrPos sp2 = new StrPos(sp);
sp2.inc();
if (isOctalDigit(sp2, false))
{
sp.inc();
d = 8 * d + sp.c - '0';
}
ret.addOr(new oneChar((char) d));
}
else
{
ret.addOr(new oneChar(sp.c));
}
sp.inc();
}
return ret;
}
/**
* Converts the stored Pattern to a String -- this is a decompile. Note that
* \t and \n will really print out here, Not just the two character
* representations. Also be prepared to see some strange output if your
* characters are not printable.
*/
public String toString()
{
// if (false && thePattern == null)
// {
// return "";
// }
// else
{
StringBuffer sb = new StringBuffer();
if (esc != Pattern.ESC)
{
sb.append("(?e=");
sb.append(esc);
sb.append(")");
}
if (gFlag || mFlag || !dotDoesntMatchCR || sFlag || ignoreCase
|| dontMatchInQuotes || optimized())
{
sb.append("(?");
if (ignoreCase)
{
sb.append("i");
}
if (mFlag)
{
sb.append("m");
}
if (sFlag || !dotDoesntMatchCR)
{
sb.append("s");
}
if (dontMatchInQuotes)
{
sb.append("Q");
}
if (optimized())
{
sb.append("o");
}
if (gFlag)
{
sb.append("g");
}
sb.append(")");
}
String patstr = thePattern.toString();
if (esc != Pattern.ESC)
{
patstr = reEscape(patstr, Pattern.ESC, esc);
}
sb.append(patstr);
return sb.toString();
}
}
// Re-escape Pattern, allows us to use a different escape
// character.
static String reEscape(String s, char oldEsc, char newEsc)
{
if (oldEsc == newEsc)
{
return s;
}
int i;
StringBuffer sb = new StringBuffer();
for (i = 0; i < s.length(); i++)
{
if (s.charAt(i) == oldEsc && i + 1 < s.length())
{
if (s.charAt(i + 1) == oldEsc)
{
sb.append(oldEsc);
}
else
{
sb.append(newEsc);
sb.append(s.charAt(i + 1));
}
i++;
}
else if (s.charAt(i) == newEsc)
{
sb.append(newEsc);
sb.append(newEsc);
}
else
{
sb.append(s.charAt(i));
}
}
return sb.toString();
}
/**
* This method implements FilenameFilter, allowing one to use a Regex to
* search through a directory using File.list. There is a FileRegex now that
* does this better.
*
* @see com.stevesoft.pat.FileRegex
*/
public boolean accept(File dir, String s)
{
return search(s);
}
/** The version of this package */
final static public String version()
{
return "lgpl release 1.5.3";
}
/**
* Once this method is called, the state of variables ignoreCase and
* dontMatchInQuotes should not be changed as the results will be
* unpredictable. However, search and matchAt will run more quickly. Note that
* you can check to see if the pattern has been optimized by calling the
* optimized() method.
*
* This method will attempt to rewrite your pattern in a way that makes it
* faster (not all patterns execute at the same speed). In general, "(?: ... )"
* will be faster than "( ... )" so if you don't need the backreference, you
* should group using the former pattern.
*
* It will also introduce new pattern elements that you can't get to
* otherwise, for example if you have a large table of strings, i.e. the
* months of the year "(January|February|...)" optimize() will make a
* Hashtable that takes it to the next appropriate pattern element --
* eliminating the need for a linear search.
*
* @see com.stevesoft.pat.Regex#optimized
* @see com.stevesoft.pat.Regex#ignoreCase
* @see com.stevesoft.pat.Regex#dontMatchInQuotes
* @see com.stevesoft.pat.Regex#matchAt
* @see com.stevesoft.pat.Regex#search
*/
public void optimize()
{
if (optimized() || thePattern == null)
{
return;
}
minMatch = new patInt(0); // thePattern.countMinChars();
thePattern = RegOpt.opt(thePattern, ignoreCase, dontMatchInQuotes);
skipper = Skip.findSkipRegex(this);
// RegOpt.setParents(this);
return;
}
Skip skipper;
/**
* This function returns true if the optimize method has been called.
*/
public boolean optimized()
{
return minMatch != null;
}
/**
* A bit of syntactic surgar for those who want to make their code look more
* perl-like. To use this initialize your Regex object by saying:
*
*
* Regex r1 = Regex.perlCode("s/hello/goodbye/");
* Regex r2 = Regex.perlCode("s'fish'frog'i");
* Regex r3 = Regex.perlCode("m'hello');
*
*
* The i for ignoreCase is supported in this syntax, as well as m, s, and x.
* The g flat is a bit of a special case.
*
* If you wish to replace all occurences of a pattern, you do not put a 'g' in
* the perlCode, but call Regex's replaceAll method.
*
* If you wish to simply and only do a search for r2's pattern, you can do
* this by calling the searchFrom method method repeatedly, or by calling
* search repeatedly if the g flag is set.
*
* Note: Currently perlCode does not support the (?e=#) syntax for
* changing the escape character.
*/
public static Regex perlCode(String s)
{
// this file is big enough, see parsePerl.java
// for this function.
return parsePerl.parse(s);
}
static final char back_slash = '\\';
/**
* Checks to see if there are only literal and no special pattern elements in
* this Regex.
*/
public boolean isLiteral()
{
Pattern x = thePattern;
while (x != null)
{
if (x instanceof oneChar)
{
;
}
else if (x instanceof Skipped)
{
;
}
else
{
return false;
}
x = x.next;
}
return true;
}
/**
* You only need to know about this if you are inventing your own pattern
* elements.
*/
public patInt countMinChars()
{
return thePattern.countMinChars();
}
/**
* You only need to know about this if you are inventing your own pattern
* elements.
*/
public patInt countMaxChars()
{
return thePattern.countMaxChars();
}
boolean isHexDigit(StrPos sp)
{
boolean r = !sp.eos
&& !sp.dontMatch
&& ((sp.c >= '0' && sp.c <= '9')
|| (sp.c >= 'a' && sp.c <= 'f') || (sp.c >= 'A' && sp.c <= 'F'));
return r;
}
boolean isOctalDigit(StrPos sp, boolean first)
{
boolean r = !sp.eos && !(first ^ sp.dontMatch) && sp.c >= '0'
&& sp.c <= '7';
return r;
}
int getHexDigit(StrPos sp)
{
if (sp.c >= '0' && sp.c <= '9')
{
return sp.c - '0';
}
if (sp.c >= 'a' && sp.c <= 'f')
{
return sp.c - 'a' + 10;
}
return sp.c - 'A' + 10;
}
boolean next2Hex(StrPos sp)
{
StrPos sp2 = new StrPos(sp);
sp2.inc();
if (!isHexDigit(sp2))
{
return false;
}
sp2.inc();
if (!isHexDigit(sp2))
{
return false;
}
return true;
}
boolean isOctalString(StrPos sp)
{
if (!isOctalDigit(sp, true))
{
return false;
}
StrPos sp2 = new StrPos(sp);
sp2.inc();
if (!isOctalDigit(sp2, false))
{
return false;
}
return true;
}
}