// // This software is now distributed according to // the Lesser Gnu Public License. Please see // http://www.gnu.org/copyleft/lesser.txt for // the details. // -- Happy Computing! // package com.stevesoft.pat; import jalview.util.MessageManager; import java.io.File; import java.io.FilenameFilter; import java.util.BitSet; import java.util.Hashtable; import com.stevesoft.pat.wrap.StringWrap; /** Matches a Unicode punctuation character. */ class UnicodePunct extends UniValidator { @Override public int validate(StringLike s, int from, int to) { return from < s.length() && Prop.isPunct(s.charAt(from)) ? to : -1; } } /** Matches a Unicode white space character. */ class UnicodeWhite extends UniValidator { @Override public int validate(StringLike s, int from, int to) { return from < s.length() && Prop.isWhite(s.charAt(from)) ? to : -1; } } /** * Matches a character that is not a Unicode punctuation character. */ class NUnicodePunct extends UniValidator { @Override public int validate(StringLike s, int from, int to) { return from < s.length() && !Prop.isPunct(s.charAt(from)) ? to : -1; } } /** * Matches a character that is not a Unicode white space character. */ class NUnicodeWhite extends UniValidator { @Override public int validate(StringLike s, int from, int to) { return from < s.length() && !Prop.isWhite(s.charAt(from)) ? to : -1; } } /** Matches a Unicode word character: an alphanumeric or underscore. */ class UnicodeW extends UniValidator { @Override public int validate(StringLike s, int from, int to) { if (from >= s.length()) { return -1; } char c = s.charAt(from); return (Prop.isAlphabetic(c) || Prop.isDecimalDigit(c) || c == '_') ? to : -1; } } /** Matches a character that is not a Unicode alphanumeric or underscore. */ class NUnicodeW extends UniValidator { @Override public int validate(StringLike s, int from, int to) { if (from >= s.length()) { return -1; } char c = s.charAt(from); return !(Prop.isAlphabetic(c) || Prop.isDecimalDigit(c) || c == '_') ? to : -1; } } /** Matches a Unicode decimal digit. */ class UnicodeDigit extends UniValidator { @Override public int validate(StringLike s, int from, int to) { return from < s.length() && Prop.isDecimalDigit(s.charAt(from)) ? to : -1; } } /** Matches a character that is not a Unicode digit. */ class NUnicodeDigit extends UniValidator { @Override public int validate(StringLike s, int from, int to) { return from < s.length() && !Prop.isDecimalDigit(s.charAt(from)) ? to : -1; } } /** Matches a Unicode math character. */ class UnicodeMath extends UniValidator { @Override public int validate(StringLike s, int from, int to) { return from < s.length() && Prop.isMath(s.charAt(from)) ? to : -1; } } /** Matches a non-math Unicode character. */ class NUnicodeMath extends UniValidator { @Override public int validate(StringLike s, int from, int to) { return from < s.length() && !Prop.isMath(s.charAt(from)) ? to : -1; } } /** Matches a Unicode currency symbol. */ class UnicodeCurrency extends UniValidator { @Override public int validate(StringLike s, int from, int to) { return from < s.length() && Prop.isCurrency(s.charAt(from)) ? to : -1; } } /** Matches a non-currency symbol Unicode character. */ class NUnicodeCurrency extends UniValidator { @Override public int validate(StringLike s, int from, int to) { return from < s.length() && !Prop.isCurrency(s.charAt(from)) ? to : -1; } } /** Matches a Unicode alphabetic character. */ class UnicodeAlpha extends UniValidator { @Override public int validate(StringLike s, int from, int to) { return from < s.length() && Prop.isAlphabetic(s.charAt(from)) ? to : -1; } } /** Matches a non-alphabetic Unicode character. */ class NUnicodeAlpha extends UniValidator { @Override public int validate(StringLike s, int from, int to) { return from < s.length() && !Prop.isAlphabetic(s.charAt(from)) ? to : -1; } } /** Matches an upper case Unicode character. */ class UnicodeUpper extends UniValidator { @Override public int validate(StringLike s, int from, int to) { return from < s.length() && isUpper(s.charAt(from)) ? to : -1; } final boolean isUpper(char c) { return c == CaseMgr.toUpperCase(c) && c != CaseMgr.toLowerCase(c); } } /** Matches an upper case Unicode character. */ class UnicodeLower extends UniValidator { @Override public int validate(StringLike s, int from, int to) { return from < s.length() && isLower(s.charAt(from)) ? to : -1; } final boolean isLower(char c) { return c != CaseMgr.toUpperCase(c) && c == CaseMgr.toLowerCase(c); } } /** * Regex provides the parser which constructs the linked list of Pattern classes * from a String. *
* For the purpose of this documentation, the fact that java interprets the * backslash will be ignored. In practice, however, you will need a double * backslash to obtain a string that contains a single backslash character. * Thus, the example pattern "\b" should really be typed as "\\b" inside java * code. *
* Note that Regex is part of package "com.stevesoft.pat". To use it, simply * import com.stevesoft.pat.Regex at the top of your file. *
* Regex is made with a constructor that takes a String that defines the regular * expression. Thus, for example * *
* Regex r = new Regex("[a-c]*"); ** * matches any number of characters so long as the are 'a', 'b', or 'c'). *
* To attempt to match the Pattern to a given string, you can use either the * search(String) member function, or the matchAt(String,int position) member * function. These functions return a boolean which tells you whether or not the * thing worked, and sets the methods "charsMatched()" and "matchedFrom()" in * the Regex object appropriately. *
* The portion of the string before the match can be obtained by the left() * member, and the portion after the match can be obtained by the right() * member. *
* Essentially, this package implements a syntax that is very much like the perl * 5 regular expression syntax. * * Longer example: * *
* Regex r = new Regex("x(a|b)y"); * r.matchAt("xay", 0); * System.out.println("sub = " + r.stringMatched(1)); ** * The above would print "sub = a". * *
* r.left() // would return "x" * r.right() // would return "y" ** *
* Differences between this package and perl5:
* The extended Pattern for setting flags, is now supported, but the flags are
* different. "(?i)" tells the pattern to ignore case, "(?Q)" sets the
* "dontMatchInQuotes" flag, and "(?iQ)" sets them both. You can change the
* escape character. The pattern
*
*
* (?e=#)#d+ ** * is the same as * *
* \d+ ** * , but note that the sequence * *
* (?e=#) ** * must occur at the very beginning of the pattern. There may be other * small differences as well. I will either make my package conform or note them * as I become aware of them. *
* This package supports additional patterns not in perl5:
(?@()) | *Group | *This matches all characters between the '(' character and the balancing * ')' character. Thus, it will match "()" as well as "(())". The balancing * characters are arbitrary, thus (?@{}) matches on "{}" and "{{}}". | *
(?<1) | *Backup | *Moves the pointer backwards within the text. This allows you to make a * "look behind." It fails if it attempts to move to a position before the * beginning of the string. "x(?<1)" is equivalent to "(?=x)". The number, 1 * in this example, is the number of characters to move backwards. | *
* start **/ public boolean searchFrom(String s, int start) { if (s == null) { throw new NullPointerException( MessageManager .getString("exception.null_string_like_given_to_regex_search_from")); } return _search(s, start, s.length()); } public boolean searchFrom(StringLike s, int start) { if (s == null) { throw new NullPointerException( MessageManager .getString("exception.null_string_like_given_to_regex_search_from")); } return _search(s, start, s.length()); } /** * Search through a region of a String for the first occurence of a match. */ public boolean searchRegion(String s, int start, int end) { if (s == null) { throw new NullPointerException( MessageManager .getString("exception.null_string_like_given_to_regex_search_region")); } return _search(s, start, end); } /** * Set this to change the default behavior of the "." pattern. By default it * now matches perl's behavior and fails to match the '\n' character. */ public static boolean dotDoesntMatchCR = true; StringLike gFlags; int gFlagto = 0; boolean gFlag = false; /** Set the 'g' flag */ public void setGFlag(boolean b) { gFlag = b; } /** Get the state of the 'g' flag. */ public boolean getGFlag() { return gFlag; } boolean sFlag = false; /** Get the state of the sFlag */ public boolean getSFlag() { return sFlag; } boolean mFlag = false; /** Get the state of the sFlag */ public boolean getMFlag() { return mFlag; } final boolean _search(String s, int start, int end) { return _search(new StringWrap(s), start, end); } final boolean _search(StringLike s, int start, int end) { if (gFlag && gFlagto > 0 && gFlags != null && s.unwrap() == gFlags.unwrap()) { start = gFlagto; } gFlags = null; Pthings pt = prep(s); int up = (minMatch == null ? end : end - minMatch.i); if (up < start && end >= start) { up = start; } if (skipper == null) { for (int i = start; i <= up; i++) { charsMatched_ = thePattern.matchAt(s, i, pt); if (charsMatched_ >= 0) { matchFrom_ = thePattern.mfrom; marks = pt.marks; gFlagto = matchFrom_ + charsMatched_; gFlags = s; return didMatch_ = true; } } } else { pt.no_check = true; for (int i = start; i <= up; i++) { i = skipper.find(src, i, up); if (i < 0) { charsMatched_ = matchFrom_ = -1; return didMatch_ = false; } charsMatched_ = thePattern.matchAt(s, i, pt); if (charsMatched_ >= 0) { matchFrom_ = thePattern.mfrom; marks = pt.marks; gFlagto = matchFrom_ + charsMatched_; gFlags = s; return didMatch_ = true; } } } return didMatch_ = false; } /* * final boolean _search(LongStringLike s,long start,long end) { if(gFlag && * gFlagto > 0 && s==gFlags) start = gFlagto; gFlags = null; * * Pthings pt=prep(s); * * int up = end;//(minMatch == null ? end : end-minMatch.i); * * if(up < start && end >= start) up = start; * * if(skipper == null) { for(long i=start;i<=up;i++) { charsMatched_ = * thePattern.matchAt(s,i,pt); if(charsMatched_ >= 0) { matchFrom_ = * thePattern.mfrom; marks = pt.marks; gFlagto = matchFrom_+charsMatched_; * return didMatch_=true; } } } else { pt.no_check = true; for(long * i=start;i<=up;i++) { i = skipper.find(src,i,up); if(i<0) { charsMatched_ = * matchFrom_ = -1; return didMatch_ = false; } charsMatched_ = * thePattern.matchAt(s,i,pt); if(charsMatched_ >= 0) { matchFrom_ = * thePattern.mfrom; marks = pt.marks; gFlagto = matchFrom_+charsMatched_; * gFlags = s; return didMatch_=true; } else { i = s.adjustIndex(i); up = * s.adjustEnd(i); } } } return didMatch_=false; } */ boolean _reverseSearch(String s, int start, int end) { return _reverseSearch(new StringWrap(s), start, end); } boolean _reverseSearch(StringLike s, int start, int end) { if (gFlag && gFlagto > 0 && s.unwrap() == gFlags.unwrap()) { end = gFlagto; } gFlags = null; Pthings pt = prep(s); for (int i = end; i >= start; i--) { charsMatched_ = thePattern.matchAt(s, i, pt); if (charsMatched_ >= 0) { matchFrom_ = thePattern.mfrom; marks = pt.marks; gFlagto = matchFrom_ - 1; gFlags = s; return didMatch_ = true; } } return didMatch_ = false; } // This routine sets the cbits variable // of class Pattern. Cbits is true for // the bit corresponding to a character inside // a set of quotes. static StringLike lasts = null; static BitSet lastbs = null; static void setCbits(StringLike s, Pthings pt) { if (s == lasts) { pt.cbits = lastbs; return; } BitSet bs = new BitSet(s.length()); char qc = ' '; boolean setBit = false; for (int i = 0; i < s.length(); i++) { if (setBit) { bs.set(i); } char c = s.charAt(i); if (!setBit && c == '"') { qc = c; setBit = true; bs.set(i); } else if (!setBit && c == '\'') { qc = c; setBit = true; bs.set(i); } else if (setBit && c == qc) { setBit = false; } else if (setBit && c == '\\' && i + 1 < s.length()) { i++; if (setBit) { bs.set(i); } } } pt.cbits = lastbs = bs; lasts = s; } // Wanted user to over-ride this in alpha version, // but it wasn't really necessary because of this trick: Regex newRegex() { try { return getClass().getDeclaredConstructor().newInstance(); } catch (InstantiationException ie) { return null; } catch (IllegalAccessException iae) { return null; } catch (ReflectiveOperationException roe) { return null; } } /** * Only needed for creating your own extensions of Regex. This method adds the * next Pattern in the chain of patterns or sets the Pattern if it is the * first call. */ protected void add(Pattern p2) { if (p == null) { p = p2; } else { p.add(p2); p2 = p; } } /** * You only need to use this method if you are creating your own extentions to * Regex. compile1 compiles one Pattern element, it can be over-ridden to * allow the Regex compiler to understand new syntax. See deriv.java for an * example. This routine is the heart of class Regex. Rthings has one integer * member called intValue, it is used to keep track of the number of ()'s in * the Pattern. * * @exception com.stevesoft.pat.RegSyntax * is thrown when a nonsensensical pattern is supplied. For * example, a pattern beginning with *. */ protected void compile1(StrPos sp, Rthings mk) throws RegSyntax { if (sp.match('[')) { sp.inc(); add(matchBracket(sp)); } else if (sp.match('|')) { if (or == null) { or = new Or(); } if (p == null) { p = new NullPattern(); } or.addOr(p); p = null; } else if (sp.incMatch("(?<")) { patInt i = sp.getPatInt(); if (i == null) { RegSyntaxError.endItAll("No int after (?<"); } add(new Backup(i.intValue())); if (!sp.match(')')) { RegSyntaxError.endItAll("No ) after (?<"); } } else if (sp.incMatch("(?>")) { patInt i = sp.getPatInt(); if (i == null) { RegSyntaxError.endItAll("No int after (?>"); } add(new Backup(-i.intValue())); if (!sp.match(')')) { RegSyntaxError.endItAll("No ) after (?<"); } } else if (sp.incMatch("(?@")) { char op = sp.c; sp.inc(); char cl = sp.c; sp.inc(); if (!sp.match(')')) { RegSyntaxError.endItAll("(?@ does not have closing paren"); } add(new Group(op, cl)); } else if (sp.incMatch("(?#")) { while (!sp.match(')')) { sp.inc(); } } else if (sp.dontMatch && sp.c == 'w') { // Regex r = new Regex(); // r._compile("[a-zA-Z0-9_]",mk); // add(new Goop("\\w",r.thePattern)); Bracket b = new Bracket(false); b.addOr(new Range('a', 'z')); b.addOr(new Range('A', 'Z')); b.addOr(new Range('0', '9')); b.addOr(new oneChar('_')); add(b); } else if (sp.dontMatch && sp.c == 'G') { add(new BackG()); } else if (sp.dontMatch && sp.c == 's') { // Regex r = new Regex(); // r._compile("[ \t\n\r\b]",mk); // add(new Goop("\\s",r.thePattern)); Bracket b = new Bracket(false); b.addOr(new oneChar((char) 32)); b.addOr(new Range((char) 8, (char) 10)); b.addOr(new oneChar((char) 13)); add(b); } else if (sp.dontMatch && sp.c == 'd') { // Regex r = new Regex(); // r._compile("[0-9]",mk); // add(new Goop("\\d",r.thePattern)); Range digit = new Range('0', '9'); digit.printBrackets = true; add(digit); } else if (sp.dontMatch && sp.c == 'W') { // Regex r = new Regex(); // r._compile("[^a-zA-Z0-9_]",mk); // add(new Goop("\\W",r.thePattern)); Bracket b = new Bracket(true); b.addOr(new Range('a', 'z')); b.addOr(new Range('A', 'Z')); b.addOr(new Range('0', '9')); b.addOr(new oneChar('_')); add(b); } else if (sp.dontMatch && sp.c == 'S') { // Regex r = new Regex(); // r._compile("[^ \t\n\r\b]",mk); // add(new Goop("\\S",r.thePattern)); Bracket b = new Bracket(true); b.addOr(new oneChar((char) 32)); b.addOr(new Range((char) 8, (char) 10)); b.addOr(new oneChar((char) 13)); add(b); } else if (sp.dontMatch && sp.c == 'D') { // Regex r = new Regex(); // r._compile("[^0-9]",mk); // add(new Goop("\\D",r.thePattern)); Bracket b = new Bracket(true); b.addOr(new Range('0', '9')); add(b); } else if (sp.dontMatch && sp.c == 'B') { Regex r = new Regex(); r._compile("(?!" + back_slash + "b)", mk); add(r.thePattern); } else if (isOctalString(sp)) { int d = sp.c - '0'; sp.inc(); d = 8 * d + sp.c - '0'; StrPos sp2 = new StrPos(sp); sp2.inc(); if (isOctalDigit(sp2, false)) { sp.inc(); d = 8 * d + sp.c - '0'; } add(new oneChar((char) d)); } else if (sp.dontMatch && sp.c >= '1' && sp.c <= '9') { int iv = sp.c - '0'; StrPos s2 = new StrPos(sp); s2.inc(); if (!s2.dontMatch && s2.c >= '0' && s2.c <= '9') { iv = 10 * iv + (s2.c - '0'); sp.inc(); } add(new BackMatch(iv)); } else if (sp.dontMatch && sp.c == 'b') { add(new Boundary()); } else if (sp.match('\b')) { add(new Boundary()); } else if (sp.match('$')) { add(new End(true)); } else if (sp.dontMatch && sp.c == 'Z') { add(new End(false)); } else if (sp.match('.')) { add(new Any()); } else if (sp.incMatch("(??")) { StringBuffer sb = new StringBuffer(); StringBuffer sb2 = new StringBuffer(); while (!sp.match(')') && !sp.match(':')) { sb.append(sp.c); sp.inc(); } if (sp.incMatch(":")) { while (!sp.match(')')) { sb2.append(sp.c); sp.inc(); } } String sbs = sb.toString(); if (validators.get(sbs) instanceof String) { String pat = (String) validators.get(sbs); Regex r = newRegex(); Rthings rth = new Rthings(this); rth.noBackRefs = true; r._compile(pat, rth); add(r.thePattern); } else { Custom cm = new Custom(sb.toString()); if (cm.v != null) { Validator v2 = cm.v.arg(sb2.toString()); if (v2 != null) { v2.argsave = sb2.toString(); String p = cm.v.pattern; cm.v = v2; v2.pattern = p; } Regex r = newRegex(); Rthings rth = new Rthings(this); rth.noBackRefs = true; r._compile(cm.v.pattern, rth); cm.sub = r.thePattern; cm.sub.add(new CustomEndpoint(cm)); cm.sub.setParent(cm); add(cm); } } } else if (sp.match('(')) { mk.parenLevel++; Regex r = newRegex(); // r.or = new Or(); sp.inc(); if (sp.incMatch("?:")) { r.or = new Or(); } else if (sp.incMatch("?=")) { r.or = new lookAhead(false); } else if (sp.incMatch("?!")) { r.or = new lookAhead(true); } else if (sp.match('?')) { sp.inc(); do { if (sp.c == 'i') { mk.ignoreCase = true; } if (sp.c == 'Q') { mk.dontMatchInQuotes = true; } if (sp.c == 'o') { mk.optimizeMe = true; } if (sp.c == 'g') { mk.gFlag = true; } if (sp.c == 's') { mk.sFlag = true; } if (sp.c == 'm') { mk.mFlag = true; } sp.inc(); } while (!sp.match(')') && !sp.eos); r = null; mk.parenLevel--; if (sp.eos) // throw new RegSyntax { RegSyntaxError.endItAll("Unclosed ()"); } } else { // just ordinary parenthesis r.or = mk.noBackRefs ? new Or() : new OrMark(mk.val++); } if (r != null) { add(r._compile(sp, mk)); } } else if (sp.match('^')) { add(new Start(true)); } else if (sp.dontMatch && sp.c == 'A') { add(new Start(false)); } else if (sp.match('*')) { addMulti(new patInt(0), new patInf()); } else if (sp.match('+')) { addMulti(new patInt(1), new patInf()); } else if (sp.match('?')) { addMulti(new patInt(0), new patInt(1)); } else if (sp.match('{')) { boolean bad = false; StrPos sp2 = new StrPos(sp); // StringBuffer sb = new StringBuffer(); sp.inc(); patInt i1 = sp.getPatInt(); patInt i2 = null; if (sp.match('}')) { i2 = i1; } else { if (!sp.match(',')) { /* * RegSyntaxError.endItAll( "String \"{"+i2+ "\" should be followed * with , or }"); */ bad = true; } sp.inc(); if (sp.match('}')) { i2 = new patInf(); } else { i2 = sp.getPatInt(); } } if (i1 == null || i2 == null) { /* * throw new RegSyntax("Badly formatted Multi: " +"{"+i1+","+i2+"}"); */ bad = true; } if (bad) { sp.dup(sp2); add(new oneChar(sp.c)); } else { addMulti(i1, i2); } } else if (sp.escMatch('x') && next2Hex(sp)) { sp.inc(); int d = getHexDigit(sp); sp.inc(); d = 16 * d + getHexDigit(sp); add(new oneChar((char) d)); } else if (sp.escMatch('c')) { sp.inc(); if (sp.c < Ctrl.cmap.length) { add(new oneChar(Ctrl.cmap[sp.c])); } else { add(new oneChar(sp.c)); } } else if (sp.escMatch('f')) { add(new oneChar((char) 12)); } else if (sp.escMatch('a')) { add(new oneChar((char) 7)); } else if (sp.escMatch('t')) { add(new oneChar('\t')); } else if (sp.escMatch('n')) { add(new oneChar('\n')); } else if (sp.escMatch('r')) { add(new oneChar('\r')); } else if (sp.escMatch('b')) { add(new oneChar('\b')); } else if (sp.escMatch('e')) { add(new oneChar((char) 27)); } else { add(new oneChar(sp.c)); if (sp.match(')')) { RegSyntaxError.endItAll("Unmatched right paren in pattern"); } } } // compiles all Pattern elements, internal method private Pattern _compile(String pat, Rthings mk) throws RegSyntax { minMatch = null; sFlag = mFlag = ignoreCase = gFlag = false; StrPos sp = new StrPos(pat, 0); thePattern = _compile(sp, mk); pt.marks = null; return thePattern; } Pattern p = null; Or or = null; Pattern _compile(StrPos sp, Rthings mk) throws RegSyntax { while (!(sp.eos || (or != null && sp.match(')')))) { compile1(sp, mk); sp.inc(); } if (sp.match(')')) { mk.parenLevel--; } else if (sp.eos && mk.parenLevel != 0) { RegSyntaxError.endItAll("Unclosed Parenthesis! lvl=" + mk.parenLevel); } if (or != null) { if (p == null) { p = new NullPattern(); } or.addOr(p); return or; } return p == null ? new NullPattern() : p; } // add a multi object to the end of the chain // which applies to the last object void addMulti(patInt i1, patInt i2) throws RegSyntax { Pattern last, last2; for (last = p; last != null && last.next != null; last = last.next) { ; } if (last == null || last == p) { last2 = null; } else { for (last2 = p; last2.next != last; last2 = last2.next) { ; } } if (last instanceof Multi && i1.intValue() == 0 && i2.intValue() == 1) { ((Multi) last).matchFewest = true; } else if (last instanceof FastMulti && i1.intValue() == 0 && i2.intValue() == 1) { ((FastMulti) last).matchFewest = true; } else if (last instanceof DotMulti && i1.intValue() == 0 && i2.intValue() == 1) { ((DotMulti) last).matchFewest = true; } else if (last instanceof Multi || last instanceof DotMulti || last instanceof FastMulti) { throw new RegSyntax("Syntax error."); } else if (last2 == null) { p = mkMulti(i1, i2, p); } else { last2.next = mkMulti(i1, i2, last); } } final static Pattern mkMulti(patInt lo, patInt hi, Pattern p) throws RegSyntax { if (p instanceof Any && p.next == null) { return new DotMulti(lo, hi); } return RegOpt.safe4fm(p) ? (Pattern) new FastMulti(lo, hi, p) : (Pattern) new Multi(lo, hi, p); } // process the bracket operator Pattern matchBracket(StrPos sp) throws RegSyntax { Bracket ret; if (sp.match('^')) { ret = new Bracket(true); sp.inc(); } else { ret = new Bracket(false); } if (sp.match(']')) { // throw new RegSyntax RegSyntaxError.endItAll("Unmatched []"); } while (!sp.eos && !sp.match(']')) { StrPos s1 = new StrPos(sp); s1.inc(); StrPos s1_ = new StrPos(s1); s1_.inc(); if (s1.match('-') && !s1_.match(']')) { StrPos s2 = new StrPos(s1); s2.inc(); if (!s2.eos) { ret.addOr(new Range(sp.c, s2.c)); } sp.inc(); sp.inc(); } else if (sp.escMatch('Q')) { sp.inc(); while (!sp.escMatch('E')) { ret.addOr(new oneChar(sp.c)); sp.inc(); } } else if (sp.escMatch('d')) { ret.addOr(new Range('0', '9')); } else if (sp.escMatch('s')) { ret.addOr(new oneChar((char) 32)); ret.addOr(new Range((char) 8, (char) 10)); ret.addOr(new oneChar((char) 13)); } else if (sp.escMatch('w')) { ret.addOr(new Range('a', 'z')); ret.addOr(new Range('A', 'Z')); ret.addOr(new Range('0', '9')); ret.addOr(new oneChar('_')); } else if (sp.escMatch('D')) { ret.addOr(new Range((char) 0, (char) 47)); ret.addOr(new Range((char) 58, (char) 65535)); } else if (sp.escMatch('S')) { ret.addOr(new Range((char) 0, (char) 7)); ret.addOr(new Range((char) 11, (char) 12)); ret.addOr(new Range((char) 14, (char) 31)); ret.addOr(new Range((char) 33, (char) 65535)); } else if (sp.escMatch('W')) { ret.addOr(new Range((char) 0, (char) 64)); ret.addOr(new Range((char) 91, (char) 94)); ret.addOr(new oneChar((char) 96)); ret.addOr(new Range((char) 123, (char) 65535)); } else if (sp.escMatch('x') && next2Hex(sp)) { sp.inc(); int d = getHexDigit(sp); sp.inc(); d = 16 * d + getHexDigit(sp); ret.addOr(new oneChar((char) d)); } else if (sp.escMatch('a')) { ret.addOr(new oneChar((char) 7)); } else if (sp.escMatch('f')) { ret.addOr(new oneChar((char) 12)); } else if (sp.escMatch('e')) { ret.addOr(new oneChar((char) 27)); } else if (sp.escMatch('n')) { ret.addOr(new oneChar('\n')); } else if (sp.escMatch('t')) { ret.addOr(new oneChar('\t')); } else if (sp.escMatch('r')) { ret.addOr(new oneChar('\r')); } else if (sp.escMatch('c')) { sp.inc(); if (sp.c < Ctrl.cmap.length) { ret.addOr(new oneChar(Ctrl.cmap[sp.c])); } else { ret.addOr(new oneChar(sp.c)); } } else if (isOctalString(sp)) { int d = sp.c - '0'; sp.inc(); d = 8 * d + sp.c - '0'; StrPos sp2 = new StrPos(sp); sp2.inc(); if (isOctalDigit(sp2, false)) { sp.inc(); d = 8 * d + sp.c - '0'; } ret.addOr(new oneChar((char) d)); } else { ret.addOr(new oneChar(sp.c)); } sp.inc(); } return ret; } /** * Converts the stored Pattern to a String -- this is a decompile. Note that * \t and \n will really print out here, Not just the two character * representations. Also be prepared to see some strange output if your * characters are not printable. */ @Override public String toString() { if (false && thePattern == null) { return ""; } else { StringBuffer sb = new StringBuffer(); if (esc != Pattern.ESC) { sb.append("(?e="); sb.append(esc); sb.append(")"); } if (gFlag || mFlag || !dotDoesntMatchCR || sFlag || ignoreCase || dontMatchInQuotes || optimized()) { sb.append("(?"); if (ignoreCase) { sb.append("i"); } if (mFlag) { sb.append("m"); } if (sFlag || !dotDoesntMatchCR) { sb.append("s"); } if (dontMatchInQuotes) { sb.append("Q"); } if (optimized()) { sb.append("o"); } if (gFlag) { sb.append("g"); } sb.append(")"); } String patstr = thePattern.toString(); if (esc != Pattern.ESC) { patstr = reEscape(patstr, Pattern.ESC, esc); } sb.append(patstr); return sb.toString(); } } // Re-escape Pattern, allows us to use a different escape // character. static String reEscape(String s, char oldEsc, char newEsc) { if (oldEsc == newEsc) { return s; } int i; StringBuffer sb = new StringBuffer(); for (i = 0; i < s.length(); i++) { if (s.charAt(i) == oldEsc && i + 1 < s.length()) { if (s.charAt(i + 1) == oldEsc) { sb.append(oldEsc); } else { sb.append(newEsc); sb.append(s.charAt(i + 1)); } i++; } else if (s.charAt(i) == newEsc) { sb.append(newEsc); sb.append(newEsc); } else { sb.append(s.charAt(i)); } } return sb.toString(); } /** * This method implements FilenameFilter, allowing one to use a Regex to * search through a directory using File.list. There is a FileRegex now that * does this better. * * @see com.stevesoft.pat.FileRegex */ @Override public boolean accept(File dir, String s) { return search(s); } /** The version of this package */ final static public String version() { return "lgpl release 1.5.3"; } /** * Once this method is called, the state of variables ignoreCase and * dontMatchInQuotes should not be changed as the results will be * unpredictable. However, search and matchAt will run more quickly. Note that * you can check to see if the pattern has been optimized by calling the * optimized() method. *
* This method will attempt to rewrite your pattern in a way that makes it * faster (not all patterns execute at the same speed). In general, * "(?: ... )" will be faster than "( ... )" so if you don't need the * backreference, you should group using the former pattern. *
* It will also introduce new pattern elements that you can't get to * otherwise, for example if you have a large table of strings, i.e. the * months of the year "(January|February|...)" optimize() will make a * Hashtable that takes it to the next appropriate pattern element -- * eliminating the need for a linear search. * * @see com.stevesoft.pat.Regex#optimized * @see com.stevesoft.pat.Regex#ignoreCase * @see com.stevesoft.pat.Regex#dontMatchInQuotes * @see com.stevesoft.pat.Regex#matchAt * @see com.stevesoft.pat.Regex#search */ public void optimize() { if (optimized() || thePattern == null) { return; } minMatch = new patInt(0); // thePattern.countMinChars(); thePattern = RegOpt.opt(thePattern, ignoreCase, dontMatchInQuotes); skipper = Skip.findSkip(this); // RegOpt.setParents(this); return; } Skip skipper; /** * This function returns true if the optimize method has been called. */ public boolean optimized() { return minMatch != null; } /** * A bit of syntactic surgar for those who want to make their code look more * perl-like. To use this initialize your Regex object by saying: * *
* Regex r1 = Regex.perlCode("s/hello/goodbye/"); * Regex r2 = Regex.perlCode("s'fish'frog'i"); * Regex r3 = Regex.perlCode("m'hello'); ** * The i for ignoreCase is supported in this syntax, as well as m, s, and x. * The g flat is a bit of a special case. *
* If you wish to replace all occurences of a pattern, you do not put a 'g' in * the perlCode, but call Regex's replaceAll method. *
* If you wish to simply and only do a search for r2's pattern, you can do * this by calling the searchFrom method method repeatedly, or by calling * search repeatedly if the g flag is set. *
* Note: Currently perlCode does not support the (?e=#) syntax for * changing the escape character. */ public static Regex perlCode(String s) { // this file is big enough, see parsePerl.java // for this function. return parsePerl.parse(s); } static final char back_slash = '\\'; /** * Checks to see if there are only literal and no special pattern elements in * this Regex. */ public boolean isLiteral() { Pattern x = thePattern; while (x != null) { if (x instanceof oneChar) { ; } else if (x instanceof Skipped) { ; } else { return false; } x = x.next; } return true; } /** * You only need to know about this if you are inventing your own pattern * elements. */ public patInt countMinChars() { return thePattern.countMinChars(); } /** * You only need to know about this if you are inventing your own pattern * elements. */ public patInt countMaxChars() { return thePattern.countMaxChars(); } boolean isHexDigit(StrPos sp) { boolean r = !sp.eos && !sp.dontMatch && ((sp.c >= '0' && sp.c <= '9') || (sp.c >= 'a' && sp.c <= 'f') || (sp.c >= 'A' && sp.c <= 'F')); return r; } boolean isOctalDigit(StrPos sp, boolean first) { boolean r = !sp.eos && !(first ^ sp.dontMatch) && sp.c >= '0' && sp.c <= '7'; return r; } int getHexDigit(StrPos sp) { if (sp.c >= '0' && sp.c <= '9') { return sp.c - '0'; } if (sp.c >= 'a' && sp.c <= 'f') { return sp.c - 'a' + 10; } return sp.c - 'A' + 10; } boolean next2Hex(StrPos sp) { StrPos sp2 = new StrPos(sp); sp2.inc(); if (!isHexDigit(sp2)) { return false; } sp2.inc(); if (!isHexDigit(sp2)) { return false; } return true; } boolean isOctalString(StrPos sp) { if (!isOctalDigit(sp, true)) { return false; } StrPos sp2 = new StrPos(sp); sp2.inc(); if (!isOctalDigit(sp2, false)) { return false; } return true; } }