From c40cf903f740a72ab63dd1abc10fa33450ce660d Mon Sep 17 00:00:00 2001 From: amwaterhouse Date: Thu, 9 Jun 2005 15:29:47 +0000 Subject: [PATCH] needed for applet search --- src/com/stevesoft/pat/AmpersandRule.java | 20 + src/com/stevesoft/pat/Any.java | 28 + src/com/stevesoft/pat/BackG.java | 26 + src/com/stevesoft/pat/BackMatch.java | 33 + src/com/stevesoft/pat/BackRefRule.java | 23 + src/com/stevesoft/pat/Backup.java | 27 + src/com/stevesoft/pat/BasicStringBufferLike.java | 18 + src/com/stevesoft/pat/Bits.java | 3072 ++++++++++++++++++++ src/com/stevesoft/pat/Boundary.java | 51 + src/com/stevesoft/pat/Bracket.java | 48 + src/com/stevesoft/pat/CaseMgr.java | 1537 ++++++++++ src/com/stevesoft/pat/ChangeRule.java | 22 + src/com/stevesoft/pat/CodeRule.java | 21 + src/com/stevesoft/pat/Ctrl.java | 269 ++ src/com/stevesoft/pat/Custom.java | 42 + src/com/stevesoft/pat/CustomEndpoint.java | 27 + src/com/stevesoft/pat/DirFileRegex.java | 19 + src/com/stevesoft/pat/DotMulti.java | 125 + src/com/stevesoft/pat/End.java | 42 + src/com/stevesoft/pat/FastBracket.java | 191 ++ src/com/stevesoft/pat/FastMulti.java | 111 + src/com/stevesoft/pat/FileRegex.java | 215 ++ src/com/stevesoft/pat/Group.java | 42 + src/com/stevesoft/pat/LeftRule.java | 21 + src/com/stevesoft/pat/Multi.java | 60 + src/com/stevesoft/pat/MultiMin.java | 18 + src/com/stevesoft/pat/Multi_stage2.java | 103 + src/com/stevesoft/pat/NoPattern.java | 19 + src/com/stevesoft/pat/NonDirFileRegex.java | 19 + src/com/stevesoft/pat/NotImplementedError.java | 14 + src/com/stevesoft/pat/NullPattern.java | 21 + src/com/stevesoft/pat/NullRule.java | 19 + src/com/stevesoft/pat/Or.java | 81 + src/com/stevesoft/pat/OrMark.java | 51 + src/com/stevesoft/pat/PartialBuffer.java | 84 + src/com/stevesoft/pat/Pattern.java | 193 ++ src/com/stevesoft/pat/PatternSub.java | 15 + src/com/stevesoft/pat/PopRule.java | 16 + src/com/stevesoft/pat/Prop.java | 52 + src/com/stevesoft/pat/Pthings.java | 33 + src/com/stevesoft/pat/PushRule.java | 22 + src/com/stevesoft/pat/RBuffer.java | 36 + src/com/stevesoft/pat/Range.java | 61 + src/com/stevesoft/pat/RegOpt.java | 336 +++ src/com/stevesoft/pat/RegRes.java | 166 ++ src/com/stevesoft/pat/RegSyntax.java | 24 + src/com/stevesoft/pat/RegSyntaxError.java | 29 + src/com/stevesoft/pat/Regex.java | 1429 +++++++++ src/com/stevesoft/pat/RegexReader.java | 248 ++ src/com/stevesoft/pat/RegexTokenizer.java | 110 + src/com/stevesoft/pat/RegexWriter.java | 205 ++ src/com/stevesoft/pat/ReplaceRule.java | 255 ++ src/com/stevesoft/pat/Replacer.java | 261 ++ src/com/stevesoft/pat/RightRule.java | 22 + src/com/stevesoft/pat/Rthings.java | 44 + src/com/stevesoft/pat/RuleHolder.java | 20 + src/com/stevesoft/pat/Skip.java | 127 + src/com/stevesoft/pat/Skip2.java | 38 + src/com/stevesoft/pat/SkipBMH.java | 183 ++ src/com/stevesoft/pat/Skipped.java | 27 + src/com/stevesoft/pat/SpecialRule.java | 13 + src/com/stevesoft/pat/Start.java | 31 + src/com/stevesoft/pat/StrPos.java | 117 + src/com/stevesoft/pat/StringBufferLike.java | 65 + src/com/stevesoft/pat/StringLike.java | 37 + src/com/stevesoft/pat/StringRule.java | 22 + src/com/stevesoft/pat/SubMark.java | 22 + src/com/stevesoft/pat/TransPat.java | 40 + src/com/stevesoft/pat/Transformer.java | 155 + src/com/stevesoft/pat/UniValidator.java | 16 + src/com/stevesoft/pat/Validator.java | 58 + src/com/stevesoft/pat/WantMoreTextReplaceRule.java | 19 + src/com/stevesoft/pat/lookAhead.java | 46 + src/com/stevesoft/pat/oneChar.java | 48 + src/com/stevesoft/pat/parsePerl.java | 266 ++ src/com/stevesoft/pat/patInf.java | 12 + src/com/stevesoft/pat/patInt.java | 88 + .../stevesoft/pat/wrap/CharArrayBufferWrap.java | 38 + src/com/stevesoft/pat/wrap/CharArrayWrap.java | 39 + .../stevesoft/pat/wrap/RandomAccessFileWrap.java | 116 + src/com/stevesoft/pat/wrap/StringBufferWrap.java | 36 + src/com/stevesoft/pat/wrap/StringWrap.java | 33 + src/com/stevesoft/pat/wrap/WriterWrap.java | 45 + 83 files changed, 11833 insertions(+) create mode 100755 src/com/stevesoft/pat/AmpersandRule.java create mode 100755 src/com/stevesoft/pat/Any.java create mode 100755 src/com/stevesoft/pat/BackG.java create mode 100755 src/com/stevesoft/pat/BackMatch.java create mode 100755 src/com/stevesoft/pat/BackRefRule.java create mode 100755 src/com/stevesoft/pat/Backup.java create mode 100755 src/com/stevesoft/pat/BasicStringBufferLike.java create mode 100755 src/com/stevesoft/pat/Bits.java create mode 100755 src/com/stevesoft/pat/Boundary.java create mode 100755 src/com/stevesoft/pat/Bracket.java create mode 100755 src/com/stevesoft/pat/CaseMgr.java create mode 100755 src/com/stevesoft/pat/ChangeRule.java create mode 100755 src/com/stevesoft/pat/CodeRule.java create mode 100755 src/com/stevesoft/pat/Ctrl.java create mode 100755 src/com/stevesoft/pat/Custom.java create mode 100755 src/com/stevesoft/pat/CustomEndpoint.java create mode 100755 src/com/stevesoft/pat/DirFileRegex.java create mode 100755 src/com/stevesoft/pat/DotMulti.java create mode 100755 src/com/stevesoft/pat/End.java create mode 100755 src/com/stevesoft/pat/FastBracket.java create mode 100755 src/com/stevesoft/pat/FastMulti.java create mode 100755 src/com/stevesoft/pat/FileRegex.java create mode 100755 src/com/stevesoft/pat/Group.java create mode 100755 src/com/stevesoft/pat/LeftRule.java create mode 100755 src/com/stevesoft/pat/Multi.java create mode 100755 src/com/stevesoft/pat/MultiMin.java create mode 100755 src/com/stevesoft/pat/Multi_stage2.java create mode 100755 src/com/stevesoft/pat/NoPattern.java create mode 100755 src/com/stevesoft/pat/NonDirFileRegex.java create mode 100755 src/com/stevesoft/pat/NotImplementedError.java create mode 100755 src/com/stevesoft/pat/NullPattern.java create mode 100755 src/com/stevesoft/pat/NullRule.java create mode 100755 src/com/stevesoft/pat/Or.java create mode 100755 src/com/stevesoft/pat/OrMark.java create mode 100755 src/com/stevesoft/pat/PartialBuffer.java create mode 100755 src/com/stevesoft/pat/Pattern.java create mode 100755 src/com/stevesoft/pat/PatternSub.java create mode 100755 src/com/stevesoft/pat/PopRule.java create mode 100755 src/com/stevesoft/pat/Prop.java create mode 100755 src/com/stevesoft/pat/Pthings.java create mode 100755 src/com/stevesoft/pat/PushRule.java create mode 100755 src/com/stevesoft/pat/RBuffer.java create mode 100755 src/com/stevesoft/pat/Range.java create mode 100755 src/com/stevesoft/pat/RegOpt.java create mode 100755 src/com/stevesoft/pat/RegRes.java create mode 100755 src/com/stevesoft/pat/RegSyntax.java create mode 100755 src/com/stevesoft/pat/RegSyntaxError.java create mode 100755 src/com/stevesoft/pat/Regex.java create mode 100755 src/com/stevesoft/pat/RegexReader.java create mode 100755 src/com/stevesoft/pat/RegexTokenizer.java create mode 100755 src/com/stevesoft/pat/RegexWriter.java create mode 100755 src/com/stevesoft/pat/ReplaceRule.java create mode 100755 src/com/stevesoft/pat/Replacer.java create mode 100755 src/com/stevesoft/pat/RightRule.java create mode 100755 src/com/stevesoft/pat/Rthings.java create mode 100755 src/com/stevesoft/pat/RuleHolder.java create mode 100755 src/com/stevesoft/pat/Skip.java create mode 100755 src/com/stevesoft/pat/Skip2.java create mode 100755 src/com/stevesoft/pat/SkipBMH.java create mode 100755 src/com/stevesoft/pat/Skipped.java create mode 100755 src/com/stevesoft/pat/SpecialRule.java create mode 100755 src/com/stevesoft/pat/Start.java create mode 100755 src/com/stevesoft/pat/StrPos.java create mode 100755 src/com/stevesoft/pat/StringBufferLike.java create mode 100755 src/com/stevesoft/pat/StringLike.java create mode 100755 src/com/stevesoft/pat/StringRule.java create mode 100755 src/com/stevesoft/pat/SubMark.java create mode 100755 src/com/stevesoft/pat/TransPat.java create mode 100755 src/com/stevesoft/pat/Transformer.java create mode 100755 src/com/stevesoft/pat/UniValidator.java create mode 100755 src/com/stevesoft/pat/Validator.java create mode 100755 src/com/stevesoft/pat/WantMoreTextReplaceRule.java create mode 100755 src/com/stevesoft/pat/lookAhead.java create mode 100755 src/com/stevesoft/pat/oneChar.java create mode 100755 src/com/stevesoft/pat/parsePerl.java create mode 100755 src/com/stevesoft/pat/patInf.java create mode 100755 src/com/stevesoft/pat/patInt.java create mode 100755 src/com/stevesoft/pat/wrap/CharArrayBufferWrap.java create mode 100755 src/com/stevesoft/pat/wrap/CharArrayWrap.java create mode 100755 src/com/stevesoft/pat/wrap/RandomAccessFileWrap.java create mode 100755 src/com/stevesoft/pat/wrap/StringBufferWrap.java create mode 100755 src/com/stevesoft/pat/wrap/StringWrap.java create mode 100755 src/com/stevesoft/pat/wrap/WriterWrap.java diff --git a/src/com/stevesoft/pat/AmpersandRule.java b/src/com/stevesoft/pat/AmpersandRule.java new file mode 100755 index 0000000..934d9c0 --- /dev/null +++ b/src/com/stevesoft/pat/AmpersandRule.java @@ -0,0 +1,20 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; + +/** This implements the $& element of the second argument to + * Regex. + * @see com.stevesoft.pat.ReplaceRule + */ +public final class AmpersandRule extends ReplaceRule { + public AmpersandRule() {} + public void apply(StringBufferLike sb,RegRes res) { + sb.append(res.stringMatched()); + } + public String toString1() { return "$&"; } +} diff --git a/src/com/stevesoft/pat/Any.java b/src/com/stevesoft/pat/Any.java new file mode 100755 index 0000000..234c343 --- /dev/null +++ b/src/com/stevesoft/pat/Any.java @@ -0,0 +1,28 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; +import java.util.Hashtable; + +/** This is the '.' character in a Pattern. It + matches any character. */ +class Any extends Pattern { + public int matchInternal(int pos,Pthings pt) { + if(pos < pt.src.length()) + if(pt.dotDoesntMatchCR) { + if(pt.src.charAt(pos) != '\n') + return nextMatch(pos+1,pt); + } else return nextMatch(pos+1,pt); + return -1; + } + public String toString() { + return "."+nextString(); + } + public patInt minChars() { return new patInt(1); } + public patInt maxChars() { return new patInt(1); } + public Pattern clone1(Hashtable h) { return new Any(); } +}; diff --git a/src/com/stevesoft/pat/BackG.java b/src/com/stevesoft/pat/BackG.java new file mode 100755 index 0000000..78115f8 --- /dev/null +++ b/src/com/stevesoft/pat/BackG.java @@ -0,0 +1,26 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; +import java.util.Hashtable; + +/** This class represents the \G pattern element. */ +class BackG extends Pattern { + char c,altc,altc2; + int mask; + public BackG() { + } + public int matchInternal(int pos,Pthings pt) { + return pos==pt.lastPos ? nextMatch(pos,pt) : -1; + } + public String toString() { + return "\\G"+nextString(); + } + public patInt minChars() { return new patInt(1); } + public patInt maxChars() { return new patInt(1); } + Pattern clone1(Hashtable h) { return new BackG(); } +} diff --git a/src/com/stevesoft/pat/BackMatch.java b/src/com/stevesoft/pat/BackMatch.java new file mode 100755 index 0000000..168faec --- /dev/null +++ b/src/com/stevesoft/pat/BackMatch.java @@ -0,0 +1,33 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; +import java.util.Hashtable; + +/** Provides the ability to match a backreference from within + * a Pattern. + */ +class BackMatch extends Pattern { + int id; + BackMatch(int id) { this.id = id; } + public String toString() { return "\\"+(id)+nextString(); } + public int matchInternal(int pos,Pthings p) { + int i1 = p.marks[id]; + int i2 = p.marks[id+p.nMarks]; + int imax = i2-i1; + if(i1<0||imax < 0||pos+imax>p.src.length()) return -1; + int ns = p.src.length()-pos; + if(imax < ns) ns = imax; + for(int i=0;i" + (-bk) : "<" + bk) + ")" + nextString(); + } + public int matchInternal(int pos,Pthings pt) { + if(pos < bk) return -1; + return nextMatch(pos-bk,pt); + } + public patInt minChars() { return new patInt(-bk); } + public patInt maxChars() { return new patInt(-bk); } + public Pattern clone1(Hashtable h) { return new Backup(bk); } +}; diff --git a/src/com/stevesoft/pat/BasicStringBufferLike.java b/src/com/stevesoft/pat/BasicStringBufferLike.java new file mode 100755 index 0000000..6e7b26c --- /dev/null +++ b/src/com/stevesoft/pat/BasicStringBufferLike.java @@ -0,0 +1,18 @@ +package// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +com.stevesoft.pat; + +/** An abstraction of the StringBuffer which only + implements a subset of StringBuffer's methods. + */ +public interface BasicStringBufferLike { + public void append(char c); + public void append(String s); + public StringLike toStringLike(); + public Object unwrap(); +} diff --git a/src/com/stevesoft/pat/Bits.java b/src/com/stevesoft/pat/Bits.java new file mode 100755 index 0000000..244889f --- /dev/null +++ b/src/com/stevesoft/pat/Bits.java @@ -0,0 +1,3072 @@ +package// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +com.stevesoft.pat; + +import java.io.*; + +public class Bits { + char[] carray; + Bits(char[] carray) { + this.carray = carray; + } + public boolean get(int i) { + return ((carray[i>>4]) & (1<<(i&15))) != 0; + } + public void set(int i,boolean b) { + if(b) { + carray[i>>4] |= (char) 1<<(i&15); + } else { + carray[i>>4] &= (char)~(1<<(i&15)); + } + } + + /* + public static String n4(char c) { + String s = Integer.toHexString(c); + while(s.length()<4) + s = "0"+s; + return s; + } + static abstract class Tester { + abstract boolean test(char c); + } + public static void main(String[] args) throws Exception { + //pw_s.println(" static {"); + FileWriter fw = new FileWriter("x.out"); + fw.close(); + test("upper",new Tester() { + boolean test(char c) { + return Character.isUpperCase(c); + } + }); + test("lower",new Tester() { + boolean test(char c) { + return Character.isLowerCase(c); + } + }); + test("title",new Tester() { + boolean test(char c) { + return Character.isLowerCase(c); + } + }); + test("currency",new Tester() { + boolean test(char c) { + return Character.getType(c)==Character.CURRENCY_SYMBOL; + } + }); + test("decimal_digit",new Tester() { + boolean test(char c) { + return Character.getType(c)==Character.DECIMAL_DIGIT_NUMBER; + } + }); + test("math",new Tester() { + boolean test(char c) { + return Character.getType(c)==Character.MATH_SYMBOL; + } + }); + test("letter",new Tester() { + boolean test(char c) { + return Character.isLetter(c); + } + }); + test("white",new Tester() { + boolean test(char c) { + return Character.isWhitespace(c); + } + }); + test("punct",new Tester() { + boolean test(char c) { + boolean r = false; + switch(Character.getType(c)) { + case Character.DASH_PUNCTUATION: + case Character.START_PUNCTUATION: + case Character.END_PUNCTUATION: + case Character.CONNECTOR_PUNCTUATION: + case Character.OTHER_PUNCTUATION: + r = true; + break; + default: + r = false; + break; + } + return r; + } + }); + //pw_s.println(" }"); + fw = new FileWriter("x.out",true); + fw.write(sw.toString()); + fw.close(); + } + static StringWriter sw = new StringWriter(); + static PrintWriter pw_s = new PrintWriter(sw,true); + public static void test(String var,Tester t) throws Exception { + + char[] ca = new char[(66536 >> 4)+1]; + Bits b = new Bits(ca); + FileWriter fw = new FileWriter("x.out",true); + PrintWriter pw = new PrintWriter(fw); + + //pw_s.println(" "+var+"_f();"); + pw.println(" public static Bits "+var+";"); + pw.println(" static void "+var+"_f() {"); + pw.println(" char[] data = new char["+ca.length+"];"); + pw.println(" "+var+" = new Bits(data);"); + for(int i=0;i<66536;i++) { + char c = (char)i; + //b.set(i,Character.getType(c)==Character.CURRENCY_SYMBOL); + b.set(i,t.test(c)); + } + for(int i=0;i= 'a' && c <= 'z') + return true; + if(c >= 'A' && c <= 'Z') + return true; + if(c >= '0' && c <= '9') + return true; + if(c == '_') + return true; + return false; + } + boolean matchLeft(int pos,Pthings pt) { + if(pos <= 0) + return true; + if(isAChar(pt.src.charAt(pos)) + && isAChar(pt.src.charAt(pos-1))) + return false; + return true; + } + boolean matchRight(int pos,Pthings pt) { + if(pos < 0) return false; + if(pos+1 >= pt.src.length()) + return true; + if(isAChar(pt.src.charAt(pos)) + && isAChar(pt.src.charAt(pos+1))) + return false; + return true; + } + public int matchInternal(int pos,Pthings pt) { + if(matchRight(pos-1,pt) || matchLeft(pos,pt)) + return nextMatch(pos,pt); + return -1; + } + public patInt maxChars() { return new patInt(0); } + public Pattern clone1(Hashtable h) { return new Boundary(); } +}; diff --git a/src/com/stevesoft/pat/Bracket.java b/src/com/stevesoft/pat/Bracket.java new file mode 100755 index 0000000..9f60179 --- /dev/null +++ b/src/com/stevesoft/pat/Bracket.java @@ -0,0 +1,48 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; +import java.util.Vector; +import java.util.Hashtable; + +/** The Bracket is a form of the Or class, + implements the pattern element [ ]. */ +class Bracket extends Or { + boolean neg; + Bracket(boolean n) { neg = n; } + String leftForm() { + if(neg) + return "[^"; + else + return "["; + } + String rightForm() { return "]"; } + String sepForm() { return ""; } + public int matchInternal(int pos,Pthings pt) { + if(pos >= pt.src.length()) return -1; + int r = super.matchInternal(pos,pt); + if((neg && r<0)||(!neg && r>=0)) + return nextMatch(pos+1,pt); + return -1; + } + public patInt minChars() { return new patInt(1); } + public patInt maxChars() { return new patInt(1); } + + public Or addOr(Pattern p) { + pv = null; + v.addElement(p); + p.setParent(null); + return this; + } + public Pattern clone1(Hashtable h) { + Bracket b = new Bracket(neg); + b.v = new Vector(); + for(int i=0;i + unicode.org. +

+ The performance of the methods found in String + and Character is better, but these methods work + both in java 1.0 and 1.1. If it is desirable, + either to gain a slight performance increase or + to accomodate application specific modifications + to the definitions of upper, lower, and title case + then it should be a straightforward task to substitute + your own methods for these. + */ +final public class CaseMgr { + final static boolean java_1_0 = false; + /** Convert a character to upper case .*/ + public static char toUpperCase(char c) { + if(java_1_0) { + int ret = (int)c; + switch(c) { + case 97: ret=65; break; + case 98: ret=66; break; + case 99: ret=67; break; + case 100: ret=68; break; + case 101: ret=69; break; + case 102: ret=70; break; + case 103: ret=71; break; + case 104: ret=72; break; + case 105: ret=73; break; + case 106: ret=74; break; + case 107: ret=75; break; + case 108: ret=76; break; + case 109: ret=77; break; + case 110: ret=78; break; + case 111: ret=79; break; + case 112: ret=80; break; + case 113: ret=81; break; + case 114: ret=82; break; + case 115: ret=83; break; + case 116: ret=84; break; + case 117: ret=85; break; + case 118: ret=86; break; + case 119: ret=87; break; + case 120: ret=88; break; + case 121: ret=89; break; + case 122: ret=90; break; + case 224: ret=192; break; + case 225: ret=193; break; + case 226: ret=194; break; + case 227: ret=195; break; + case 228: ret=196; break; + case 229: ret=197; break; + case 230: ret=198; break; + case 231: ret=199; break; + case 232: ret=200; break; + case 233: ret=201; break; + case 234: ret=202; break; + case 235: ret=203; break; + case 236: ret=204; break; + case 237: ret=205; break; + case 238: ret=206; break; + case 239: ret=207; break; + case 240: ret=208; break; + case 241: ret=209; break; + case 242: ret=210; break; + case 243: ret=211; break; + case 244: ret=212; break; + case 245: ret=213; break; + case 246: ret=214; break; + case 248: ret=216; break; + case 249: ret=217; break; + case 250: ret=218; break; + case 251: ret=219; break; + case 252: ret=220; break; + case 253: ret=221; break; + case 254: ret=222; break; + case 255: ret=376; break; + case 257: ret=256; break; + case 259: ret=258; break; + case 261: ret=260; break; + case 263: ret=262; break; + case 265: ret=264; break; + case 267: ret=266; break; + case 269: ret=268; break; + case 271: ret=270; break; + case 273: ret=272; break; + case 275: ret=274; break; + case 277: ret=276; break; + case 279: ret=278; break; + case 281: ret=280; break; + case 283: ret=282; break; + case 285: ret=284; break; + case 287: ret=286; break; + case 289: ret=288; break; + case 291: ret=290; break; + case 293: ret=292; break; + case 295: ret=294; break; + case 297: ret=296; break; + case 299: ret=298; break; + case 301: ret=300; break; + case 303: ret=302; break; + case 305: ret=73; break; + case 307: ret=306; break; + case 309: ret=308; break; + case 311: ret=310; break; + case 314: ret=313; break; + case 316: ret=315; break; + case 318: ret=317; break; + case 320: ret=319; break; + case 322: ret=321; break; + case 324: ret=323; break; + case 326: ret=325; break; + case 328: ret=327; break; + case 331: ret=330; break; + case 333: ret=332; break; + case 335: ret=334; break; + case 337: ret=336; break; + case 339: ret=338; break; + case 341: ret=340; break; + case 343: ret=342; break; + case 345: ret=344; break; + case 347: ret=346; break; + case 349: ret=348; break; + case 351: ret=350; break; + case 353: ret=352; break; + case 355: ret=354; break; + case 357: ret=356; break; + case 359: ret=358; break; + case 361: ret=360; break; + case 363: ret=362; break; + case 365: ret=364; break; + case 367: ret=366; break; + case 369: ret=368; break; + case 371: ret=370; break; + case 373: ret=372; break; + case 375: ret=374; break; + case 378: ret=377; break; + case 380: ret=379; break; + case 382: ret=381; break; + case 383: ret=83; break; + case 387: ret=386; break; + case 389: ret=388; break; + case 392: ret=391; break; + case 396: ret=395; break; + case 402: ret=401; break; + case 409: ret=408; break; + case 417: ret=416; break; + case 419: ret=418; break; + case 421: ret=420; break; + case 424: ret=423; break; + case 429: ret=428; break; + case 432: ret=431; break; + case 436: ret=435; break; + case 438: ret=437; break; + case 441: ret=440; break; + case 445: ret=444; break; + case 453: ret=452; break; + case 454: ret=452; break; + case 456: ret=455; break; + case 457: ret=455; break; + case 459: ret=458; break; + case 460: ret=458; break; + case 462: ret=461; break; + case 464: ret=463; break; + case 466: ret=465; break; + case 468: ret=467; break; + case 470: ret=469; break; + case 472: ret=471; break; + case 474: ret=473; break; + case 476: ret=475; break; + case 479: ret=478; break; + case 481: ret=480; break; + case 483: ret=482; break; + case 485: ret=484; break; + case 487: ret=486; break; + case 489: ret=488; break; + case 491: ret=490; break; + case 493: ret=492; break; + case 495: ret=494; break; + case 498: ret=497; break; + case 499: ret=497; break; + case 501: ret=500; break; + case 507: ret=506; break; + case 509: ret=508; break; + case 511: ret=510; break; + case 513: ret=512; break; + case 515: ret=514; break; + case 517: ret=516; break; + case 519: ret=518; break; + case 521: ret=520; break; + case 523: ret=522; break; + case 525: ret=524; break; + case 527: ret=526; break; + case 529: ret=528; break; + case 531: ret=530; break; + case 533: ret=532; break; + case 535: ret=534; break; + case 595: ret=385; break; + case 596: ret=390; break; + case 598: ret=393; break; + case 599: ret=394; break; + case 600: ret=398; break; + case 601: ret=399; break; + case 603: ret=400; break; + case 608: ret=403; break; + case 611: ret=404; break; + case 616: ret=407; break; + case 617: ret=406; break; + case 623: ret=412; break; + case 626: ret=413; break; + case 643: ret=425; break; + case 648: ret=430; break; + case 650: ret=433; break; + case 651: ret=434; break; + case 658: ret=439; break; + case 940: ret=902; break; + case 941: ret=904; break; + case 942: ret=905; break; + case 943: ret=906; break; + case 945: ret=913; break; + case 946: ret=914; break; + case 947: ret=915; break; + case 948: ret=916; break; + case 949: ret=917; break; + case 950: ret=918; break; + case 951: ret=919; break; + case 952: ret=920; break; + case 953: ret=921; break; + case 954: ret=922; break; + case 955: ret=923; break; + case 956: ret=924; break; + case 957: ret=925; break; + case 958: ret=926; break; + case 959: ret=927; break; + case 960: ret=928; break; + case 961: ret=929; break; + case 963: ret=931; break; + case 964: ret=932; break; + case 965: ret=933; break; + case 966: ret=934; break; + case 967: ret=935; break; + case 968: ret=936; break; + case 969: ret=937; break; + case 970: ret=938; break; + case 971: ret=939; break; + case 972: ret=908; break; + case 973: ret=910; break; + case 974: ret=911; break; + case 976: ret=914; break; + case 977: ret=920; break; + case 981: ret=934; break; + case 982: ret=928; break; + case 995: ret=994; break; + case 997: ret=996; break; + case 999: ret=998; break; + case 1001: ret=1000; break; + case 1003: ret=1002; break; + case 1005: ret=1004; break; + case 1007: ret=1006; break; + case 1008: ret=922; break; + case 1009: ret=929; break; + case 1072: ret=1040; break; + case 1073: ret=1041; break; + case 1074: ret=1042; break; + case 1075: ret=1043; break; + case 1076: ret=1044; break; + case 1077: ret=1045; break; + case 1078: ret=1046; break; + case 1079: ret=1047; break; + case 1080: ret=1048; break; + case 1081: ret=1049; break; + case 1082: ret=1050; break; + case 1083: ret=1051; break; + case 1084: ret=1052; break; + case 1085: ret=1053; break; + case 1086: ret=1054; break; + case 1087: ret=1055; break; + case 1088: ret=1056; break; + case 1089: ret=1057; break; + case 1090: ret=1058; break; + case 1091: ret=1059; break; + case 1092: ret=1060; break; + case 1093: ret=1061; break; + case 1094: ret=1062; break; + case 1095: ret=1063; break; + case 1096: ret=1064; break; + case 1097: ret=1065; break; + case 1098: ret=1066; break; + case 1099: ret=1067; break; + case 1100: ret=1068; break; + case 1101: ret=1069; break; + case 1102: ret=1070; break; + case 1103: ret=1071; break; + case 1105: ret=1025; break; + case 1106: ret=1026; break; + case 1107: ret=1027; break; + case 1108: ret=1028; break; + case 1109: ret=1029; break; + case 1110: ret=1030; break; + case 1111: ret=1031; break; + case 1112: ret=1032; break; + case 1113: ret=1033; break; + case 1114: ret=1034; break; + case 1115: ret=1035; break; + case 1116: ret=1036; break; + case 1118: ret=1038; break; + case 1119: ret=1039; break; + case 1121: ret=1120; break; + case 1123: ret=1122; break; + case 1125: ret=1124; break; + case 1127: ret=1126; break; + case 1129: ret=1128; break; + case 1131: ret=1130; break; + case 1133: ret=1132; break; + case 1135: ret=1134; break; + case 1137: ret=1136; break; + case 1139: ret=1138; break; + case 1141: ret=1140; break; + case 1143: ret=1142; break; + case 1145: ret=1144; break; + case 1147: ret=1146; break; + case 1149: ret=1148; break; + case 1151: ret=1150; break; + case 1153: ret=1152; break; + case 1169: ret=1168; break; + case 1171: ret=1170; break; + case 1173: ret=1172; break; + case 1175: ret=1174; break; + case 1177: ret=1176; break; + case 1179: ret=1178; break; + case 1181: ret=1180; break; + case 1183: ret=1182; break; + case 1185: ret=1184; break; + case 1187: ret=1186; break; + case 1189: ret=1188; break; + case 1191: ret=1190; break; + case 1193: ret=1192; break; + case 1195: ret=1194; break; + case 1197: ret=1196; break; + case 1199: ret=1198; break; + case 1201: ret=1200; break; + case 1203: ret=1202; break; + case 1205: ret=1204; break; + case 1207: ret=1206; break; + case 1209: ret=1208; break; + case 1211: ret=1210; break; + case 1213: ret=1212; break; + case 1215: ret=1214; break; + case 1218: ret=1217; break; + case 1220: ret=1219; break; + case 1224: ret=1223; break; + case 1228: ret=1227; break; + case 1233: ret=1232; break; + case 1235: ret=1234; break; + case 1237: ret=1236; break; + case 1239: ret=1238; break; + case 1241: ret=1240; break; + case 1243: ret=1242; break; + case 1245: ret=1244; break; + case 1247: ret=1246; break; + case 1249: ret=1248; break; + case 1251: ret=1250; break; + case 1253: ret=1252; break; + case 1255: ret=1254; break; + case 1257: ret=1256; break; + case 1259: ret=1258; break; + case 1263: ret=1262; break; + case 1265: ret=1264; break; + case 1267: ret=1266; break; + case 1269: ret=1268; break; + case 1273: ret=1272; break; + case 1377: ret=1329; break; + case 1378: ret=1330; break; + case 1379: ret=1331; break; + case 1380: ret=1332; break; + case 1381: ret=1333; break; + case 1382: ret=1334; break; + case 1383: ret=1335; break; + case 1384: ret=1336; break; + case 1385: ret=1337; break; + case 1386: ret=1338; break; + case 1387: ret=1339; break; + case 1388: ret=1340; break; + case 1389: ret=1341; break; + case 1390: ret=1342; break; + case 1391: ret=1343; break; + case 1392: ret=1344; break; + case 1393: ret=1345; break; + case 1394: ret=1346; break; + case 1395: ret=1347; break; + case 1396: ret=1348; break; + case 1397: ret=1349; break; + case 1398: ret=1350; break; + case 1399: ret=1351; break; + case 1400: ret=1352; break; + case 1401: ret=1353; break; + case 1402: ret=1354; break; + case 1403: ret=1355; break; + case 1404: ret=1356; break; + case 1405: ret=1357; break; + case 1406: ret=1358; break; + case 1407: ret=1359; break; + case 1408: ret=1360; break; + case 1409: ret=1361; break; + case 1410: ret=1362; break; + case 1411: ret=1363; break; + case 1412: ret=1364; break; + case 1413: ret=1365; break; + case 1414: ret=1366; break; + case 7681: ret=7680; break; + case 7683: ret=7682; break; + case 7685: ret=7684; break; + case 7687: ret=7686; break; + case 7689: ret=7688; break; + case 7691: ret=7690; break; + case 7693: ret=7692; break; + case 7695: ret=7694; break; + case 7697: ret=7696; break; + case 7699: ret=7698; break; + case 7701: ret=7700; break; + case 7703: ret=7702; break; + case 7705: ret=7704; break; + case 7707: ret=7706; break; + case 7709: ret=7708; break; + case 7711: ret=7710; break; + case 7713: ret=7712; break; + case 7715: ret=7714; break; + case 7717: ret=7716; break; + case 7719: ret=7718; break; + case 7721: ret=7720; break; + case 7723: ret=7722; break; + case 7725: ret=7724; break; + case 7727: ret=7726; break; + case 7729: ret=7728; break; + case 7731: ret=7730; break; + case 7733: ret=7732; break; + case 7735: ret=7734; break; + case 7737: ret=7736; break; + case 7739: ret=7738; break; + case 7741: ret=7740; break; + case 7743: ret=7742; break; + case 7745: ret=7744; break; + case 7747: ret=7746; break; + case 7749: ret=7748; break; + case 7751: ret=7750; break; + case 7753: ret=7752; break; + case 7755: ret=7754; break; + case 7757: ret=7756; break; + case 7759: ret=7758; break; + case 7761: ret=7760; break; + case 7763: ret=7762; break; + case 7765: ret=7764; break; + case 7767: ret=7766; break; + case 7769: ret=7768; break; + case 7771: ret=7770; break; + case 7773: ret=7772; break; + case 7775: ret=7774; break; + case 7777: ret=7776; break; + case 7779: ret=7778; break; + case 7781: ret=7780; break; + case 7783: ret=7782; break; + case 7785: ret=7784; break; + case 7787: ret=7786; break; + case 7789: ret=7788; break; + case 7791: ret=7790; break; + case 7793: ret=7792; break; + case 7795: ret=7794; break; + case 7797: ret=7796; break; + case 7799: ret=7798; break; + case 7801: ret=7800; break; + case 7803: ret=7802; break; + case 7805: ret=7804; break; + case 7807: ret=7806; break; + case 7809: ret=7808; break; + case 7811: ret=7810; break; + case 7813: ret=7812; break; + case 7815: ret=7814; break; + case 7817: ret=7816; break; + case 7819: ret=7818; break; + case 7821: ret=7820; break; + case 7823: ret=7822; break; + case 7825: ret=7824; break; + case 7827: ret=7826; break; + case 7829: ret=7828; break; + case 7841: ret=7840; break; + case 7843: ret=7842; break; + case 7845: ret=7844; break; + case 7847: ret=7846; break; + case 7849: ret=7848; break; + case 7851: ret=7850; break; + case 7853: ret=7852; break; + case 7855: ret=7854; break; + case 7857: ret=7856; break; + case 7859: ret=7858; break; + case 7861: ret=7860; break; + case 7863: ret=7862; break; + case 7865: ret=7864; break; + case 7867: ret=7866; break; + case 7869: ret=7868; break; + case 7871: ret=7870; break; + case 7873: ret=7872; break; + case 7875: ret=7874; break; + case 7877: ret=7876; break; + case 7879: ret=7878; break; + case 7881: ret=7880; break; + case 7883: ret=7882; break; + case 7885: ret=7884; break; + case 7887: ret=7886; break; + case 7889: ret=7888; break; + case 7891: ret=7890; break; + case 7893: ret=7892; break; + case 7895: ret=7894; break; + case 7897: ret=7896; break; + case 7899: ret=7898; break; + case 7901: ret=7900; break; + case 7903: ret=7902; break; + case 7905: ret=7904; break; + case 7907: ret=7906; break; + case 7909: ret=7908; break; + case 7911: ret=7910; break; + case 7913: ret=7912; break; + case 7915: ret=7914; break; + case 7917: ret=7916; break; + case 7919: ret=7918; break; + case 7921: ret=7920; break; + case 7923: ret=7922; break; + case 7925: ret=7924; break; + case 7927: ret=7926; break; + case 7929: ret=7928; break; + case 7936: ret=7944; break; + case 7937: ret=7945; break; + case 7938: ret=7946; break; + case 7939: ret=7947; break; + case 7940: ret=7948; break; + case 7941: ret=7949; break; + case 7942: ret=7950; break; + case 7943: ret=7951; break; + case 7952: ret=7960; break; + case 7953: ret=7961; break; + case 7954: ret=7962; break; + case 7955: ret=7963; break; + case 7956: ret=7964; break; + case 7957: ret=7965; break; + case 7968: ret=7976; break; + case 7969: ret=7977; break; + case 7970: ret=7978; break; + case 7971: ret=7979; break; + case 7972: ret=7980; break; + case 7973: ret=7981; break; + case 7974: ret=7982; break; + case 7975: ret=7983; break; + case 7984: ret=7992; break; + case 7985: ret=7993; break; + case 7986: ret=7994; break; + case 7987: ret=7995; break; + case 7988: ret=7996; break; + case 7989: ret=7997; break; + case 7990: ret=7998; break; + case 7991: ret=7999; break; + case 8000: ret=8008; break; + case 8001: ret=8009; break; + case 8002: ret=8010; break; + case 8003: ret=8011; break; + case 8004: ret=8012; break; + case 8005: ret=8013; break; + case 8017: ret=8025; break; + case 8019: ret=8027; break; + case 8021: ret=8029; break; + case 8023: ret=8031; break; + case 8032: ret=8040; break; + case 8033: ret=8041; break; + case 8034: ret=8042; break; + case 8035: ret=8043; break; + case 8036: ret=8044; break; + case 8037: ret=8045; break; + case 8038: ret=8046; break; + case 8039: ret=8047; break; + case 8048: ret=8122; break; + case 8049: ret=8123; break; + case 8050: ret=8136; break; + case 8051: ret=8137; break; + case 8052: ret=8138; break; + case 8053: ret=8139; break; + case 8054: ret=8154; break; + case 8055: ret=8155; break; + case 8056: ret=8184; break; + case 8057: ret=8185; break; + case 8058: ret=8170; break; + case 8059: ret=8171; break; + case 8060: ret=8186; break; + case 8061: ret=8187; break; + case 8064: ret=8072; break; + case 8065: ret=8073; break; + case 8066: ret=8074; break; + case 8067: ret=8075; break; + case 8068: ret=8076; break; + case 8069: ret=8077; break; + case 8070: ret=8078; break; + case 8071: ret=8079; break; + case 8080: ret=8088; break; + case 8081: ret=8089; break; + case 8082: ret=8090; break; + case 8083: ret=8091; break; + case 8084: ret=8092; break; + case 8085: ret=8093; break; + case 8086: ret=8094; break; + case 8087: ret=8095; break; + case 8096: ret=8104; break; + case 8097: ret=8105; break; + case 8098: ret=8106; break; + case 8099: ret=8107; break; + case 8100: ret=8108; break; + case 8101: ret=8109; break; + case 8102: ret=8110; break; + case 8103: ret=8111; break; + case 8112: ret=8120; break; + case 8113: ret=8121; break; + case 8115: ret=8124; break; + case 8131: ret=8140; break; + case 8144: ret=8152; break; + case 8145: ret=8153; break; + case 8160: ret=8168; break; + case 8161: ret=8169; break; + case 8165: ret=8172; break; + case 8179: ret=8188; break; + case 8560: ret=8544; break; + case 8561: ret=8545; break; + case 8562: ret=8546; break; + case 8563: ret=8547; break; + case 8564: ret=8548; break; + case 8565: ret=8549; break; + case 8566: ret=8550; break; + case 8567: ret=8551; break; + case 8568: ret=8552; break; + case 8569: ret=8553; break; + case 8570: ret=8554; break; + case 8571: ret=8555; break; + case 8572: ret=8556; break; + case 8573: ret=8557; break; + case 8574: ret=8558; break; + case 8575: ret=8559; break; + case 9424: ret=9398; break; + case 9425: ret=9399; break; + case 9426: ret=9400; break; + case 9427: ret=9401; break; + case 9428: ret=9402; break; + case 9429: ret=9403; break; + case 9430: ret=9404; break; + case 9431: ret=9405; break; + case 9432: ret=9406; break; + case 9433: ret=9407; break; + case 9434: ret=9408; break; + case 9435: ret=9409; break; + case 9436: ret=9410; break; + case 9437: ret=9411; break; + case 9438: ret=9412; break; + case 9439: ret=9413; break; + case 9440: ret=9414; break; + case 9441: ret=9415; break; + case 9442: ret=9416; break; + case 9443: ret=9417; break; + case 9444: ret=9418; break; + case 9445: ret=9419; break; + case 9446: ret=9420; break; + case 9447: ret=9421; break; + case 9448: ret=9422; break; + case 9449: ret=9423; break; + case 65345: ret=65313; break; + case 65346: ret=65314; break; + case 65347: ret=65315; break; + case 65348: ret=65316; break; + case 65349: ret=65317; break; + case 65350: ret=65318; break; + case 65351: ret=65319; break; + case 65352: ret=65320; break; + case 65353: ret=65321; break; + case 65354: ret=65322; break; + case 65355: ret=65323; break; + case 65356: ret=65324; break; + case 65357: ret=65325; break; + case 65358: ret=65326; break; + case 65359: ret=65327; break; + case 65360: ret=65328; break; + case 65361: ret=65329; break; + case 65362: ret=65330; break; + case 65363: ret=65331; break; + case 65364: ret=65332; break; + case 65365: ret=65333; break; + case 65366: ret=65334; break; + case 65367: ret=65335; break; + case 65368: ret=65336; break; + case 65369: ret=65337; break; + case 65370: ret=65338; break; + } + return (char)ret; + } + return Character.toUpperCase(c); + } + /** Convert a character to lower case. */ + public static char toLowerCase(char c) { + if(java_1_0) { + int ret = (int)c; + switch(c) { + case 65: ret=97; break; + case 66: ret=98; break; + case 67: ret=99; break; + case 68: ret=100; break; + case 69: ret=101; break; + case 70: ret=102; break; + case 71: ret=103; break; + case 72: ret=104; break; + case 73: ret=105; break; + case 74: ret=106; break; + case 75: ret=107; break; + case 76: ret=108; break; + case 77: ret=109; break; + case 78: ret=110; break; + case 79: ret=111; break; + case 80: ret=112; break; + case 81: ret=113; break; + case 82: ret=114; break; + case 83: ret=115; break; + case 84: ret=116; break; + case 85: ret=117; break; + case 86: ret=118; break; + case 87: ret=119; break; + case 88: ret=120; break; + case 89: ret=121; break; + case 90: ret=122; break; + case 192: ret=224; break; + case 193: ret=225; break; + case 194: ret=226; break; + case 195: ret=227; break; + case 196: ret=228; break; + case 197: ret=229; break; + case 198: ret=230; break; + case 199: ret=231; break; + case 200: ret=232; break; + case 201: ret=233; break; + case 202: ret=234; break; + case 203: ret=235; break; + case 204: ret=236; break; + case 205: ret=237; break; + case 206: ret=238; break; + case 207: ret=239; break; + case 208: ret=240; break; + case 209: ret=241; break; + case 210: ret=242; break; + case 211: ret=243; break; + case 212: ret=244; break; + case 213: ret=245; break; + case 214: ret=246; break; + case 216: ret=248; break; + case 217: ret=249; break; + case 218: ret=250; break; + case 219: ret=251; break; + case 220: ret=252; break; + case 221: ret=253; break; + case 222: ret=254; break; + case 256: ret=257; break; + case 258: ret=259; break; + case 260: ret=261; break; + case 262: ret=263; break; + case 264: ret=265; break; + case 266: ret=267; break; + case 268: ret=269; break; + case 270: ret=271; break; + case 272: ret=273; break; + case 274: ret=275; break; + case 276: ret=277; break; + case 278: ret=279; break; + case 280: ret=281; break; + case 282: ret=283; break; + case 284: ret=285; break; + case 286: ret=287; break; + case 288: ret=289; break; + case 290: ret=291; break; + case 292: ret=293; break; + case 294: ret=295; break; + case 296: ret=297; break; + case 298: ret=299; break; + case 300: ret=301; break; + case 302: ret=303; break; + case 304: ret=105; break; + case 306: ret=307; break; + case 308: ret=309; break; + case 310: ret=311; break; + case 313: ret=314; break; + case 315: ret=316; break; + case 317: ret=318; break; + case 319: ret=320; break; + case 321: ret=322; break; + case 323: ret=324; break; + case 325: ret=326; break; + case 327: ret=328; break; + case 330: ret=331; break; + case 332: ret=333; break; + case 334: ret=335; break; + case 336: ret=337; break; + case 338: ret=339; break; + case 340: ret=341; break; + case 342: ret=343; break; + case 344: ret=345; break; + case 346: ret=347; break; + case 348: ret=349; break; + case 350: ret=351; break; + case 352: ret=353; break; + case 354: ret=355; break; + case 356: ret=357; break; + case 358: ret=359; break; + case 360: ret=361; break; + case 362: ret=363; break; + case 364: ret=365; break; + case 366: ret=367; break; + case 368: ret=369; break; + case 370: ret=371; break; + case 372: ret=373; break; + case 374: ret=375; break; + case 376: ret=255; break; + case 377: ret=378; break; + case 379: ret=380; break; + case 381: ret=382; break; + case 385: ret=595; break; + case 386: ret=387; break; + case 388: ret=389; break; + case 390: ret=596; break; + case 391: ret=392; break; + case 393: ret=598; break; + case 394: ret=599; break; + case 395: ret=396; break; + case 398: ret=600; break; + case 399: ret=601; break; + case 400: ret=603; break; + case 401: ret=402; break; + case 403: ret=608; break; + case 404: ret=611; break; + case 406: ret=617; break; + case 407: ret=616; break; + case 408: ret=409; break; + case 412: ret=623; break; + case 413: ret=626; break; + case 416: ret=417; break; + case 418: ret=419; break; + case 420: ret=421; break; + case 423: ret=424; break; + case 425: ret=643; break; + case 428: ret=429; break; + case 430: ret=648; break; + case 431: ret=432; break; + case 433: ret=650; break; + case 434: ret=651; break; + case 435: ret=436; break; + case 437: ret=438; break; + case 439: ret=658; break; + case 440: ret=441; break; + case 444: ret=445; break; + case 452: ret=454; break; + case 453: ret=454; break; + case 455: ret=457; break; + case 456: ret=457; break; + case 458: ret=460; break; + case 459: ret=460; break; + case 461: ret=462; break; + case 463: ret=464; break; + case 465: ret=466; break; + case 467: ret=468; break; + case 469: ret=470; break; + case 471: ret=472; break; + case 473: ret=474; break; + case 475: ret=476; break; + case 478: ret=479; break; + case 480: ret=481; break; + case 482: ret=483; break; + case 484: ret=485; break; + case 486: ret=487; break; + case 488: ret=489; break; + case 490: ret=491; break; + case 492: ret=493; break; + case 494: ret=495; break; + case 497: ret=499; break; + case 498: ret=499; break; + case 500: ret=501; break; + case 506: ret=507; break; + case 508: ret=509; break; + case 510: ret=511; break; + case 512: ret=513; break; + case 514: ret=515; break; + case 516: ret=517; break; + case 518: ret=519; break; + case 520: ret=521; break; + case 522: ret=523; break; + case 524: ret=525; break; + case 526: ret=527; break; + case 528: ret=529; break; + case 530: ret=531; break; + case 532: ret=533; break; + case 534: ret=535; break; + case 902: ret=940; break; + case 904: ret=941; break; + case 905: ret=942; break; + case 906: ret=943; break; + case 908: ret=972; break; + case 910: ret=973; break; + case 911: ret=974; break; + case 913: ret=945; break; + case 914: ret=946; break; + case 915: ret=947; break; + case 916: ret=948; break; + case 917: ret=949; break; + case 918: ret=950; break; + case 919: ret=951; break; + case 920: ret=952; break; + case 921: ret=953; break; + case 922: ret=954; break; + case 923: ret=955; break; + case 924: ret=956; break; + case 925: ret=957; break; + case 926: ret=958; break; + case 927: ret=959; break; + case 928: ret=960; break; + case 929: ret=961; break; + case 931: ret=963; break; + case 932: ret=964; break; + case 933: ret=965; break; + case 934: ret=966; break; + case 935: ret=967; break; + case 936: ret=968; break; + case 937: ret=969; break; + case 938: ret=970; break; + case 939: ret=971; break; + case 994: ret=995; break; + case 996: ret=997; break; + case 998: ret=999; break; + case 1000: ret=1001; break; + case 1002: ret=1003; break; + case 1004: ret=1005; break; + case 1006: ret=1007; break; + case 1025: ret=1105; break; + case 1026: ret=1106; break; + case 1027: ret=1107; break; + case 1028: ret=1108; break; + case 1029: ret=1109; break; + case 1030: ret=1110; break; + case 1031: ret=1111; break; + case 1032: ret=1112; break; + case 1033: ret=1113; break; + case 1034: ret=1114; break; + case 1035: ret=1115; break; + case 1036: ret=1116; break; + case 1038: ret=1118; break; + case 1039: ret=1119; break; + case 1040: ret=1072; break; + case 1041: ret=1073; break; + case 1042: ret=1074; break; + case 1043: ret=1075; break; + case 1044: ret=1076; break; + case 1045: ret=1077; break; + case 1046: ret=1078; break; + case 1047: ret=1079; break; + case 1048: ret=1080; break; + case 1049: ret=1081; break; + case 1050: ret=1082; break; + case 1051: ret=1083; break; + case 1052: ret=1084; break; + case 1053: ret=1085; break; + case 1054: ret=1086; break; + case 1055: ret=1087; break; + case 1056: ret=1088; break; + case 1057: ret=1089; break; + case 1058: ret=1090; break; + case 1059: ret=1091; break; + case 1060: ret=1092; break; + case 1061: ret=1093; break; + case 1062: ret=1094; break; + case 1063: ret=1095; break; + case 1064: ret=1096; break; + case 1065: ret=1097; break; + case 1066: ret=1098; break; + case 1067: ret=1099; break; + case 1068: ret=1100; break; + case 1069: ret=1101; break; + case 1070: ret=1102; break; + case 1071: ret=1103; break; + case 1120: ret=1121; break; + case 1122: ret=1123; break; + case 1124: ret=1125; break; + case 1126: ret=1127; break; + case 1128: ret=1129; break; + case 1130: ret=1131; break; + case 1132: ret=1133; break; + case 1134: ret=1135; break; + case 1136: ret=1137; break; + case 1138: ret=1139; break; + case 1140: ret=1141; break; + case 1142: ret=1143; break; + case 1144: ret=1145; break; + case 1146: ret=1147; break; + case 1148: ret=1149; break; + case 1150: ret=1151; break; + case 1152: ret=1153; break; + case 1168: ret=1169; break; + case 1170: ret=1171; break; + case 1172: ret=1173; break; + case 1174: ret=1175; break; + case 1176: ret=1177; break; + case 1178: ret=1179; break; + case 1180: ret=1181; break; + case 1182: ret=1183; break; + case 1184: ret=1185; break; + case 1186: ret=1187; break; + case 1188: ret=1189; break; + case 1190: ret=1191; break; + case 1192: ret=1193; break; + case 1194: ret=1195; break; + case 1196: ret=1197; break; + case 1198: ret=1199; break; + case 1200: ret=1201; break; + case 1202: ret=1203; break; + case 1204: ret=1205; break; + case 1206: ret=1207; break; + case 1208: ret=1209; break; + case 1210: ret=1211; break; + case 1212: ret=1213; break; + case 1214: ret=1215; break; + case 1217: ret=1218; break; + case 1219: ret=1220; break; + case 1223: ret=1224; break; + case 1227: ret=1228; break; + case 1232: ret=1233; break; + case 1234: ret=1235; break; + case 1236: ret=1237; break; + case 1238: ret=1239; break; + case 1240: ret=1241; break; + case 1242: ret=1243; break; + case 1244: ret=1245; break; + case 1246: ret=1247; break; + case 1248: ret=1249; break; + case 1250: ret=1251; break; + case 1252: ret=1253; break; + case 1254: ret=1255; break; + case 1256: ret=1257; break; + case 1258: ret=1259; break; + case 1262: ret=1263; break; + case 1264: ret=1265; break; + case 1266: ret=1267; break; + case 1268: ret=1269; break; + case 1272: ret=1273; break; + case 1329: ret=1377; break; + case 1330: ret=1378; break; + case 1331: ret=1379; break; + case 1332: ret=1380; break; + case 1333: ret=1381; break; + case 1334: ret=1382; break; + case 1335: ret=1383; break; + case 1336: ret=1384; break; + case 1337: ret=1385; break; + case 1338: ret=1386; break; + case 1339: ret=1387; break; + case 1340: ret=1388; break; + case 1341: ret=1389; break; + case 1342: ret=1390; break; + case 1343: ret=1391; break; + case 1344: ret=1392; break; + case 1345: ret=1393; break; + case 1346: ret=1394; break; + case 1347: ret=1395; break; + case 1348: ret=1396; break; + case 1349: ret=1397; break; + case 1350: ret=1398; break; + case 1351: ret=1399; break; + case 1352: ret=1400; break; + case 1353: ret=1401; break; + case 1354: ret=1402; break; + case 1355: ret=1403; break; + case 1356: ret=1404; break; + case 1357: ret=1405; break; + case 1358: ret=1406; break; + case 1359: ret=1407; break; + case 1360: ret=1408; break; + case 1361: ret=1409; break; + case 1362: ret=1410; break; + case 1363: ret=1411; break; + case 1364: ret=1412; break; + case 1365: ret=1413; break; + case 1366: ret=1414; break; + case 4256: ret=4304; break; + case 4257: ret=4305; break; + case 4258: ret=4306; break; + case 4259: ret=4307; break; + case 4260: ret=4308; break; + case 4261: ret=4309; break; + case 4262: ret=4310; break; + case 4263: ret=4311; break; + case 4264: ret=4312; break; + case 4265: ret=4313; break; + case 4266: ret=4314; break; + case 4267: ret=4315; break; + case 4268: ret=4316; break; + case 4269: ret=4317; break; + case 4270: ret=4318; break; + case 4271: ret=4319; break; + case 4272: ret=4320; break; + case 4273: ret=4321; break; + case 4274: ret=4322; break; + case 4275: ret=4323; break; + case 4276: ret=4324; break; + case 4277: ret=4325; break; + case 4278: ret=4326; break; + case 4279: ret=4327; break; + case 4280: ret=4328; break; + case 4281: ret=4329; break; + case 4282: ret=4330; break; + case 4283: ret=4331; break; + case 4284: ret=4332; break; + case 4285: ret=4333; break; + case 4286: ret=4334; break; + case 4287: ret=4335; break; + case 4288: ret=4336; break; + case 4289: ret=4337; break; + case 4290: ret=4338; break; + case 4291: ret=4339; break; + case 4292: ret=4340; break; + case 4293: ret=4341; break; + case 7680: ret=7681; break; + case 7682: ret=7683; break; + case 7684: ret=7685; break; + case 7686: ret=7687; break; + case 7688: ret=7689; break; + case 7690: ret=7691; break; + case 7692: ret=7693; break; + case 7694: ret=7695; break; + case 7696: ret=7697; break; + case 7698: ret=7699; break; + case 7700: ret=7701; break; + case 7702: ret=7703; break; + case 7704: ret=7705; break; + case 7706: ret=7707; break; + case 7708: ret=7709; break; + case 7710: ret=7711; break; + case 7712: ret=7713; break; + case 7714: ret=7715; break; + case 7716: ret=7717; break; + case 7718: ret=7719; break; + case 7720: ret=7721; break; + case 7722: ret=7723; break; + case 7724: ret=7725; break; + case 7726: ret=7727; break; + case 7728: ret=7729; break; + case 7730: ret=7731; break; + case 7732: ret=7733; break; + case 7734: ret=7735; break; + case 7736: ret=7737; break; + case 7738: ret=7739; break; + case 7740: ret=7741; break; + case 7742: ret=7743; break; + case 7744: ret=7745; break; + case 7746: ret=7747; break; + case 7748: ret=7749; break; + case 7750: ret=7751; break; + case 7752: ret=7753; break; + case 7754: ret=7755; break; + case 7756: ret=7757; break; + case 7758: ret=7759; break; + case 7760: ret=7761; break; + case 7762: ret=7763; break; + case 7764: ret=7765; break; + case 7766: ret=7767; break; + case 7768: ret=7769; break; + case 7770: ret=7771; break; + case 7772: ret=7773; break; + case 7774: ret=7775; break; + case 7776: ret=7777; break; + case 7778: ret=7779; break; + case 7780: ret=7781; break; + case 7782: ret=7783; break; + case 7784: ret=7785; break; + case 7786: ret=7787; break; + case 7788: ret=7789; break; + case 7790: ret=7791; break; + case 7792: ret=7793; break; + case 7794: ret=7795; break; + case 7796: ret=7797; break; + case 7798: ret=7799; break; + case 7800: ret=7801; break; + case 7802: ret=7803; break; + case 7804: ret=7805; break; + case 7806: ret=7807; break; + case 7808: ret=7809; break; + case 7810: ret=7811; break; + case 7812: ret=7813; break; + case 7814: ret=7815; break; + case 7816: ret=7817; break; + case 7818: ret=7819; break; + case 7820: ret=7821; break; + case 7822: ret=7823; break; + case 7824: ret=7825; break; + case 7826: ret=7827; break; + case 7828: ret=7829; break; + case 7840: ret=7841; break; + case 7842: ret=7843; break; + case 7844: ret=7845; break; + case 7846: ret=7847; break; + case 7848: ret=7849; break; + case 7850: ret=7851; break; + case 7852: ret=7853; break; + case 7854: ret=7855; break; + case 7856: ret=7857; break; + case 7858: ret=7859; break; + case 7860: ret=7861; break; + case 7862: ret=7863; break; + case 7864: ret=7865; break; + case 7866: ret=7867; break; + case 7868: ret=7869; break; + case 7870: ret=7871; break; + case 7872: ret=7873; break; + case 7874: ret=7875; break; + case 7876: ret=7877; break; + case 7878: ret=7879; break; + case 7880: ret=7881; break; + case 7882: ret=7883; break; + case 7884: ret=7885; break; + case 7886: ret=7887; break; + case 7888: ret=7889; break; + case 7890: ret=7891; break; + case 7892: ret=7893; break; + case 7894: ret=7895; break; + case 7896: ret=7897; break; + case 7898: ret=7899; break; + case 7900: ret=7901; break; + case 7902: ret=7903; break; + case 7904: ret=7905; break; + case 7906: ret=7907; break; + case 7908: ret=7909; break; + case 7910: ret=7911; break; + case 7912: ret=7913; break; + case 7914: ret=7915; break; + case 7916: ret=7917; break; + case 7918: ret=7919; break; + case 7920: ret=7921; break; + case 7922: ret=7923; break; + case 7924: ret=7925; break; + case 7926: ret=7927; break; + case 7928: ret=7929; break; + case 7944: ret=7936; break; + case 7945: ret=7937; break; + case 7946: ret=7938; break; + case 7947: ret=7939; break; + case 7948: ret=7940; break; + case 7949: ret=7941; break; + case 7950: ret=7942; break; + case 7951: ret=7943; break; + case 7960: ret=7952; break; + case 7961: ret=7953; break; + case 7962: ret=7954; break; + case 7963: ret=7955; break; + case 7964: ret=7956; break; + case 7965: ret=7957; break; + case 7976: ret=7968; break; + case 7977: ret=7969; break; + case 7978: ret=7970; break; + case 7979: ret=7971; break; + case 7980: ret=7972; break; + case 7981: ret=7973; break; + case 7982: ret=7974; break; + case 7983: ret=7975; break; + case 7992: ret=7984; break; + case 7993: ret=7985; break; + case 7994: ret=7986; break; + case 7995: ret=7987; break; + case 7996: ret=7988; break; + case 7997: ret=7989; break; + case 7998: ret=7990; break; + case 7999: ret=7991; break; + case 8008: ret=8000; break; + case 8009: ret=8001; break; + case 8010: ret=8002; break; + case 8011: ret=8003; break; + case 8012: ret=8004; break; + case 8013: ret=8005; break; + case 8025: ret=8017; break; + case 8027: ret=8019; break; + case 8029: ret=8021; break; + case 8031: ret=8023; break; + case 8040: ret=8032; break; + case 8041: ret=8033; break; + case 8042: ret=8034; break; + case 8043: ret=8035; break; + case 8044: ret=8036; break; + case 8045: ret=8037; break; + case 8046: ret=8038; break; + case 8047: ret=8039; break; + case 8072: ret=8064; break; + case 8073: ret=8065; break; + case 8074: ret=8066; break; + case 8075: ret=8067; break; + case 8076: ret=8068; break; + case 8077: ret=8069; break; + case 8078: ret=8070; break; + case 8079: ret=8071; break; + case 8088: ret=8080; break; + case 8089: ret=8081; break; + case 8090: ret=8082; break; + case 8091: ret=8083; break; + case 8092: ret=8084; break; + case 8093: ret=8085; break; + case 8094: ret=8086; break; + case 8095: ret=8087; break; + case 8104: ret=8096; break; + case 8105: ret=8097; break; + case 8106: ret=8098; break; + case 8107: ret=8099; break; + case 8108: ret=8100; break; + case 8109: ret=8101; break; + case 8110: ret=8102; break; + case 8111: ret=8103; break; + case 8120: ret=8112; break; + case 8121: ret=8113; break; + case 8122: ret=8048; break; + case 8123: ret=8049; break; + case 8124: ret=8115; break; + case 8136: ret=8050; break; + case 8137: ret=8051; break; + case 8138: ret=8052; break; + case 8139: ret=8053; break; + case 8140: ret=8131; break; + case 8152: ret=8144; break; + case 8153: ret=8145; break; + case 8154: ret=8054; break; + case 8155: ret=8055; break; + case 8168: ret=8160; break; + case 8169: ret=8161; break; + case 8170: ret=8058; break; + case 8171: ret=8059; break; + case 8172: ret=8165; break; + case 8184: ret=8056; break; + case 8185: ret=8057; break; + case 8186: ret=8060; break; + case 8187: ret=8061; break; + case 8188: ret=8179; break; + case 8544: ret=8560; break; + case 8545: ret=8561; break; + case 8546: ret=8562; break; + case 8547: ret=8563; break; + case 8548: ret=8564; break; + case 8549: ret=8565; break; + case 8550: ret=8566; break; + case 8551: ret=8567; break; + case 8552: ret=8568; break; + case 8553: ret=8569; break; + case 8554: ret=8570; break; + case 8555: ret=8571; break; + case 8556: ret=8572; break; + case 8557: ret=8573; break; + case 8558: ret=8574; break; + case 8559: ret=8575; break; + case 9398: ret=9424; break; + case 9399: ret=9425; break; + case 9400: ret=9426; break; + case 9401: ret=9427; break; + case 9402: ret=9428; break; + case 9403: ret=9429; break; + case 9404: ret=9430; break; + case 9405: ret=9431; break; + case 9406: ret=9432; break; + case 9407: ret=9433; break; + case 9408: ret=9434; break; + case 9409: ret=9435; break; + case 9410: ret=9436; break; + case 9411: ret=9437; break; + case 9412: ret=9438; break; + case 9413: ret=9439; break; + case 9414: ret=9440; break; + case 9415: ret=9441; break; + case 9416: ret=9442; break; + case 9417: ret=9443; break; + case 9418: ret=9444; break; + case 9419: ret=9445; break; + case 9420: ret=9446; break; + case 9421: ret=9447; break; + case 9422: ret=9448; break; + case 9423: ret=9449; break; + case 65313: ret=65345; break; + case 65314: ret=65346; break; + case 65315: ret=65347; break; + case 65316: ret=65348; break; + case 65317: ret=65349; break; + case 65318: ret=65350; break; + case 65319: ret=65351; break; + case 65320: ret=65352; break; + case 65321: ret=65353; break; + case 65322: ret=65354; break; + case 65323: ret=65355; break; + case 65324: ret=65356; break; + case 65325: ret=65357; break; + case 65326: ret=65358; break; + case 65327: ret=65359; break; + case 65328: ret=65360; break; + case 65329: ret=65361; break; + case 65330: ret=65362; break; + case 65331: ret=65363; break; + case 65332: ret=65364; break; + case 65333: ret=65365; break; + case 65334: ret=65366; break; + case 65335: ret=65367; break; + case 65336: ret=65368; break; + case 65337: ret=65369; break; + case 65338: ret=65370; break; + } + return (char)ret; + } + return Character.toLowerCase(c); + } + /** Convert a String to title case. */ + public static String toTitleCase(String s) { + StringBuffer sb = new StringBuffer(); + for(int i=0;is2.length()||i1+i3>s1.length()) return false; + if(!ign) { + for(int i=i2;is2.length()||i1+i3>s1.length()) return false; + if(!ign) { + for(int i=i2;is2.length()||i1+i3>s1.length()) return false; + if(!ign) { + for(int i=i2;is2.length()||i1+i3>s1.length()) return false; + if(!ign) { + for(int i=i2;itrans2.java + * and trans2a.java. + */ +public class ChangeRule extends SpecialRule { + Regex NewRule; + public ChangeRule(ChangeRule c) { NewRule=c.NewRule; } + public ChangeRule(String nm,Regex rr) { name=nm; NewRule = rr; } + public ChangeRule(String nm,Transformer tr) { name=nm; NewRule = tr.rp; } + public Object clone1() { return new ChangeRule(this); } + public String toString1() { return "${="+name+"}"; } + public void apply(StringBufferLike sb,RegRes rr) {} +} diff --git a/src/com/stevesoft/pat/CodeRule.java b/src/com/stevesoft/pat/CodeRule.java new file mode 100755 index 0000000..31c779f --- /dev/null +++ b/src/com/stevesoft/pat/CodeRule.java @@ -0,0 +1,21 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; + +/** Implements the rules for \U, \L, \E, \Q in substitutions. */ +public final class CodeRule extends SpecialRule { + char c = 'E'; + public CodeRule() {} + public CodeRule(char c) { + this.c = c; + } + public void apply(StringBufferLike sb,RegRes res) { + sb.setMode(c); + } + public String toString1() { return "\\"+c; } +} diff --git a/src/com/stevesoft/pat/Ctrl.java b/src/com/stevesoft/pat/Ctrl.java new file mode 100755 index 0000000..ba7fc20 --- /dev/null +++ b/src/com/stevesoft/pat/Ctrl.java @@ -0,0 +1,269 @@ +package// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +com.stevesoft.pat; + +public class Ctrl { + public final static char[] cmap = { + (char)64, + (char)65, + (char)66, + (char)67, + (char)68, + (char)69, + (char)70, + (char)71, + (char)72, + (char)73, + (char)74, + (char)75, + (char)76, + (char)77, + (char)78, + (char)79, + (char)80, + (char)81, + (char)82, + (char)83, + (char)84, + (char)85, + (char)86, + (char)87, + (char)88, + (char)89, + (char)90, + (char)91, + (char)92, + (char)93, + (char)94, + (char)95, + (char)96, + (char)97, + (char)97, + (char)99, + (char)100, + (char)101, + (char)102, + (char)103, + (char)104, + (char)105, + (char)106, + (char)107, + (char)108, + (char)109, + (char)110, + (char)111, + (char)112, + (char)113, + (char)114, + (char)115, + (char)116, + (char)117, + (char)118, + (char)119, + (char)120, + (char)121, + (char)122, + (char)123, + (char)124, + (char)125, + (char)126, + (char)127, + (char)0, + (char)1, + (char)2, + (char)3, + (char)4, + (char)5, + (char)6, + (char)7, + (char)8, + (char)9, + (char)10, + (char)11, + (char)12, + (char)13, + (char)14, + (char)15, + (char)16, + (char)17, + (char)18, + (char)19, + (char)20, + (char)21, + (char)22, + (char)23, + (char)24, + (char)25, + (char)26, + (char)27, + (char)27, + (char)29, + (char)30, + (char)31, + (char)32, + (char)1, + (char)2, + (char)3, + (char)4, + (char)5, + (char)6, + (char)7, + (char)8, + (char)9, + (char)10, + (char)11, + (char)12, + (char)13, + (char)14, + (char)15, + (char)16, + (char)17, + (char)18, + (char)19, + (char)20, + (char)21, + (char)22, + (char)23, + (char)24, + (char)25, + (char)26, + (char)59, + (char)60, + (char)61, + (char)62, + (char)63, + (char)192, + (char)193, + (char)194, + (char)195, + (char)196, + (char)197, + (char)198, + (char)199, + (char)200, + (char)201, + (char)202, + (char)203, + (char)204, + (char)205, + (char)206, + (char)207, + (char)208, + (char)209, + (char)210, + (char)211, + (char)212, + (char)213, + (char)214, + (char)215, + (char)216, + (char)217, + (char)218, + (char)219, + (char)220, + (char)221, + (char)222, + (char)223, + (char)224, + (char)225, + (char)226, + (char)227, + (char)228, + (char)229, + (char)230, + (char)231, + (char)232, + (char)233, + (char)234, + (char)235, + (char)236, + (char)237, + (char)238, + (char)239, + (char)240, + (char)241, + (char)242, + (char)243, + (char)244, + (char)245, + (char)246, + (char)247, + (char)248, + (char)249, + (char)250, + (char)251, + (char)252, + (char)253, + (char)254, + (char)255, + (char)128, + (char)129, + (char)130, + (char)131, + (char)132, + (char)133, + (char)134, + (char)135, + (char)136, + (char)137, + (char)138, + (char)139, + (char)140, + (char)141, + (char)142, + (char)143, + (char)144, + (char)145, + (char)146, + (char)147, + (char)148, + (char)149, + (char)150, + (char)151, + (char)152, + (char)153, + (char)154, + (char)155, + (char)156, + (char)157, + (char)158, + (char)159, + (char)160, + (char)161, + (char)162, + (char)163, + (char)164, + (char)165, + (char)166, + (char)167, + (char)168, + (char)169, + (char)170, + (char)171, + (char)172, + (char)173, + (char)174, + (char)175, + (char)176, + (char)177, + (char)178, + (char)179, + (char)180, + (char)181, + (char)182, + (char)183, + (char)184, + (char)185, + (char)186, + (char)187, + (char)188, + (char)189, + (char)190, + (char)191 + }; +} diff --git a/src/com/stevesoft/pat/Custom.java b/src/com/stevesoft/pat/Custom.java new file mode 100755 index 0000000..e7621a1 --- /dev/null +++ b/src/com/stevesoft/pat/Custom.java @@ -0,0 +1,42 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; +import java.util.Hashtable; + +/** Simple custom patterns. See + deriv2.java + and deriv3.java + in the test directory. + @see com.stevesoft.pat.CustomEndpoint + */ +class Custom extends PatternSub { + String select; + Validator v; + int start; + Custom(String s) { + select = s; + v = (Validator)Regex.validators.get(s); + } + public int matchInternal(int pos,Pthings pt) { + start = pos; + return sub.matchInternal(pos,pt); + } + public String toString() { + String a = v.argsave == null ? "" : ":"+v.argsave; + return "(??"+select+a+")"+nextString(); + } + public patInt minChars() { return v.minChars(); } + public patInt maxChars() { return v.maxChars(); } + Pattern clone1(Hashtable h) { + Custom c = new Custom(select); + h.put(c,c); + h.put(this,c); + c.sub = sub.clone(h); + return c; + } +} diff --git a/src/com/stevesoft/pat/CustomEndpoint.java b/src/com/stevesoft/pat/CustomEndpoint.java new file mode 100755 index 0000000..b8fc704 --- /dev/null +++ b/src/com/stevesoft/pat/CustomEndpoint.java @@ -0,0 +1,27 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; +import java.util.Hashtable; + +/** This class is needed internally to make backtracking work + * correctly in user-defined patterns. + */ +class CustomEndpoint extends Pattern { + Custom c; + CustomEndpoint(Custom cm) { c = cm; } + public int matchInternal(int pos,Pthings pt) { + int npos = c.v.validate(pt.src,c.start,pos); + if(npos >= 0) + return nextMatch(npos,pt); + return -1; + } + public String toString() { return ""; } + Pattern clone1(Hashtable h) { + return new CustomEndpoint((Custom)c.clone(h)); + } +} diff --git a/src/com/stevesoft/pat/DirFileRegex.java b/src/com/stevesoft/pat/DirFileRegex.java new file mode 100755 index 0000000..731457c --- /dev/null +++ b/src/com/stevesoft/pat/DirFileRegex.java @@ -0,0 +1,19 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; +import java.io.File; + +/** This class is just like FileRegex, except that its accept method + only returns true if the file matching the pattern is a directory.*/ +public class DirFileRegex extends FileRegex { + public DirFileRegex() { dirflag = DIR; } + public DirFileRegex(String fp) { super(fp); dirflag = DIR; } + public static String[] list(String f) { + return list(f,DIR); + } +} diff --git a/src/com/stevesoft/pat/DotMulti.java b/src/com/stevesoft/pat/DotMulti.java new file mode 100755 index 0000000..514c161 --- /dev/null +++ b/src/com/stevesoft/pat/DotMulti.java @@ -0,0 +1,125 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; +import java.util.Hashtable; + +/** A special optimization of multi that is used when the + * common subpattern ".*" is encountered. + */ +class DotMulti extends PatternSub { + patInt fewestMatches,mostMatches; + public patInt minChars() { + return fewestMatches; + } + public patInt maxChars() { + return mostMatches; + } + public boolean matchFewest = false; + + StringLike src=null; + int srclength=0; + boolean dotDoesntMatchCR=true; + DotMulti(patInt a,patInt b) { + fewestMatches = a; + mostMatches = b; + } + public String toString() { + return ".{" + +fewestMatches+","+mostMatches+"}"+ + (matchFewest ? "?" : "")+"(?# <= dot multi)"+ + nextString(); + } + final int submatchInternal(int pos,Pthings pt) { + if(pos < srclength) { + if(dotDoesntMatchCR) { + if(src.charAt(pos) != '\n') + return 1+pos; + } else return 1+pos; + } + return -1; + } + final static int step = 1; + static int idcount = 1; + public int matchInternal(int pos,Pthings pt) { + int m=-1; + int i=pos; + src = pt.src; + srclength = src.length(); + dotDoesntMatchCR = pt.dotDoesntMatchCR; + if(matchFewest) { + int nMatches = 0; + while(fewestMatches.intValue() > nMatches) { + i=submatchInternal(i,pt); + if(i<0) return -1; + nMatches++; + } + if(i<0) return -1; + int ii = nextMatch(i,pt); + if(ii >= 0) return ii; + if(!mostMatches.finite()) { + while(i >= 0) { + i = submatchInternal(i,pt); + if(i < 0) return -1; + ii = nextMatch(i,pt); + if(ii >= 0) return ii; + } + } else { + while(i > 0) { + i = submatchInternal(i,pt); + if(i < 0) return -1; + nMatches++; + if(nMatches > mostMatches.intValue()) + return -1; + ii = nextMatch(i,pt); + if(ii >= 0) return ii; + } + } + return -1; + } + int nMatches = 0; + while(fewestMatches.intValue() > nMatches) { + i=submatchInternal(i,pt); + if(i >= 0) + nMatches++; + else + return -1; + } + m=i; + if(mostMatches.finite()) { + while(nMatches < mostMatches.intValue()) { + i = submatchInternal(i,pt); + if(i>=0) { + m=i; + nMatches++; + } else break; + } + } else { + while(true) { + i = submatchInternal(i,pt); + if(i>=0) { + m=i; + nMatches++; + } else break; + } + } + while(m >= pos) { + int r=nextMatch(m,pt); + if(r >= 0) return r; + m -= step; + nMatches--; + if(nMatches < fewestMatches.intValue()) + return -1; + } + return -1; + } + Pattern clone1(Hashtable h) { + DotMulti dm = new DotMulti(fewestMatches,mostMatches); + dm.matchFewest = matchFewest; + return dm; + } +} diff --git a/src/com/stevesoft/pat/End.java b/src/com/stevesoft/pat/End.java new file mode 100755 index 0000000..75e5acf --- /dev/null +++ b/src/com/stevesoft/pat/End.java @@ -0,0 +1,42 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; +import java.util.Hashtable; + +//class AddToEnd extends RegSyntax {}; + +/** Compiles the '$' or the '\Z' Pattern. It is + an error to have further Pattern elements after + '\Z'. It is the end of the String. */ +class End extends Pattern { + boolean retIsEnd; + End(boolean b) { retIsEnd = b; } + public int matchInternal(int pos,Pthings pt) { + if(retIsEnd && pt.mFlag && pos < pt.src.length()) { + if(pt.src.charAt(pos)=='\n') { + return nextMatch(pos,pt); + } + } + if(pt.src.length() == pos) + return nextMatch(pos,pt); + else if(pos= getl(v.elementAt(i))) { + Pattern p2 = (Pattern)v.elementAt(i); + char lo = min(getl(p),getl(p2)); + char hi = max(geth(p),geth(p2)); + nv.setElementAt(p=mkelem(lo,hi),nv.size()-1); + } else { + p = (Pattern)v.elementAt(i); + nv.addElement(p); + } + } + + b.v = v = nv; + } catch(RegSyntax e) { + e.printStackTrace(); + } + + // We don't want these things to be empty. + Vector negv = neg(v); + if(v.size()==1) return b; + if(negv.size()==1) { + b.v = negv; + b.neg = !b.neg; + return b; + } + + // Now consider if we can make a FastBracket. + // Uses a BitSet to do a lookup. + FastBracket fb = newbrack(v,b.neg); + if(fb == null) + fb = newbrack(negv,!b.neg); + if(fb != null) { + fb.parent = b.parent; + fb.next = b.next; + return fb; + } + + // return the normal Bracket. + return b; + } + + // Build a FastBracket and set bits. If this can't + // be done, return null. + final static FastBracket newbrack(Vector v,boolean neg) { + FastBracket fb = new FastBracket(neg); + fb.v = v; + if(v.size()==0) return null; + fb.min = getl(v.elementAt(0)); + fb.max = geth(v.elementAt(v.size()-1)); + if(fb.max-fb.min <= 256) { + fb.bs = new BitSet(fb.max-fb.min+1); + for(int i=0;ib ? a : b; + } + + // getl -- get lower value of Range object, + // or get value of oneChar object. + final static char getl(Object o) { + Pattern p = (Pattern)o; + if(p instanceof Range) + return ((Range)p).lo; + return ((oneChar)p).c; + } + // geth -- get higher value of Range object, + // or get value of oneChar object. + final static char geth(Object o) { + Pattern p = (Pattern)o; + if(p instanceof Range) + return ((Range)p).hi; + return ((oneChar)p).c; + } + + // This is the easy part! + public int matchInternal(int pos,Pthings pt) { + if(pos >= pt.src.length() || Masked(pos,pt)) return -1; + char c = pt.src.charAt(pos); + return (neg ^ (c >= min && c <= max && bs.get(c-min)) ) ? + nextMatch(pos+1,pt) : -1; + } +} diff --git a/src/com/stevesoft/pat/FastMulti.java b/src/com/stevesoft/pat/FastMulti.java new file mode 100755 index 0000000..2a38281 --- /dev/null +++ b/src/com/stevesoft/pat/FastMulti.java @@ -0,0 +1,111 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; +import java.util.Hashtable; + +/** A special case of Multi, implemented when minChars().equals(maxChars()), + * and some other conditions spelled out in RegOpt.safe4fm "Safe for + * FastMulti." It avoids stack growth problems as well as being slightly + * faster. + */ +class FastMulti extends PatternSub { + patInt fewestMatches,mostMatches; + public patInt minChars() { + return sub.countMinChars().mul(fewestMatches); + } + public patInt maxChars() { + return sub.countMaxChars().mul(mostMatches); + } + public boolean matchFewest = false; + + FastMulti(patInt a,patInt b,Pattern p) throws RegSyntax { + if(p == null) RegSyntaxError.endItAll("Null length pattern "+ + "followed by *, +, or other Multi."); + fewestMatches = a; + mostMatches = b; + sub = p; + step = p.countMinChars().intValue(); + sub.setParent(null); + } + public String toString() { + return sub.toString()+"{" + +fewestMatches+","+mostMatches+"}"+ + (matchFewest ? "?" : "")+"(?# <= fast multi)"+ + nextString(); + } + int step = -1; + public int matchInternal(int pos,Pthings pt) { + int m=-1; + int i=pos; + int endstr = pt.src.length()-step; + patInt matches = new patInt(0); + if(matchFewest) { + if(fewestMatches.lessEq(matches)) { + int ii = nextMatch(i,pt); + if(ii >= 0) return ii; + } + while(i >= 0 && i <= endstr) { + i=sub.matchInternal(i,pt); + if(i >= 0) { + matches.inc(); + if(fewestMatches.lessEq(matches)) { + int ii = nextMatch(i,pt); + if(ii >= 0) return ii; + } + if(matches.equals(mostMatches)) + return -1; + } + } + return -1; + } + int nMatches = 0; + while(fewestMatches.intValue() > nMatches) { + i=sub.matchInternal(i,pt); + if(i >= 0) + nMatches++; + else + return -1; + } + m=i; + if(mostMatches.finite()) { + while(nMatches < mostMatches.intValue()) { + i = sub.matchInternal(i,pt); + if(i>=0) { + m=i; + nMatches++; + } else break; + } + } else { + while(true) { + i = sub.matchInternal(i,pt); + if(i>=0) { + m=i; + nMatches++; + } else break; + } + } + while(m >= pos) { + int r=nextMatch(m,pt); + if(r >= 0) return r; + m -= step; + nMatches--; + if(nMatches < fewestMatches.intValue()) + return -1; + } + return -1; + } + public Pattern clone1(Hashtable h) { + try { + FastMulti fm = new FastMulti(fewestMatches,mostMatches,sub.clone(h)); + fm.matchFewest = matchFewest; + return fm; + } catch(RegSyntax rs) { + return null; + } + } +} diff --git a/src/com/stevesoft/pat/FileRegex.java b/src/com/stevesoft/pat/FileRegex.java new file mode 100755 index 0000000..f5ac448 --- /dev/null +++ b/src/com/stevesoft/pat/FileRegex.java @@ -0,0 +1,215 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; +import java.io.*; +import java.util.*; + +/** This class is a different form of Regex designed to work more + like the file matching utility of a Unix shell. It is implemented + by some simple string transformations: +

+ + + + + + + + + + + +
FileRegex Regex
* .*
. \.
{ (?:
{?! (?!
{?= (?=
{?? (??
} )
? .
{,} (|)
+
+ Note that a FileRegex pattern always ends with the Regex + pattern element "$". If you like to experiment, try making + FileRegex's and then printing them out. The toString() method + does a decompile of the pattern to a standard Regex. Here are + some more complete examples: +
+ + + + + +
FileRegex Regex
*.java .*\.java$
*.{java,html} .*\.(java|html)$
foo.[chC] foo.[chC]$
+
+ */ +public class FileRegex extends Regex { + /** Build an unitialized FileRegex. */ + public FileRegex() { dirflag=EITHER; } + /** Build a FileRegex form String s. */ + public FileRegex(String s) { + super(s); + dirflag = EITHER; + } + /** Compile a new pattern. + Throws @exception com.stevesoft.pat.RegSyntax for + nonsensical patterns like "[9-0]+" just as Regex does. + @see com.stevesoft.pat#compile(java.lang.String) + */ + public void compile(String s) throws RegSyntax { + String npat = toFileRegex(s); + super.compile(npat); + if(File.separatorChar == '\\') // MS-DOS + ignoreCase = true; + } + /** This is the method required by FileNameFilter. + To get a listing of files in the current directory + ending in .java, do this: +
+        File dot = new File(".");
+        FileRegex java_files = new FileRegex("*.java");
+        String[] file_list = dot.list(java_files);
+        
+ */ + public boolean accept(File dir,String s) { + if(dirflag != EITHER) { + File f = new File(s); + if(f.isDirectory() && dirflag == NONDIR) + return false; + if(!f.isDirectory() && dirflag == DIR) + return false; + } + return matchAt(s,0); + } + int dirflag = 0; + final static int EITHER=0,DIR=1,NONDIR=2; + + /** Provides an alternative to File.list -- this + separates its argument according to File.pathSeparator. + To each path, it splits off a directory -- all characters + up to and including the first instance of File.separator -- + and a file pattern -- the part that comes after the directory. + It then produces a list of all the pattern matches on all + the paths. Thus "*.java:../*.java" would produce a list of + all the java files in this directory and in the ".." directory + on a Unix machine. "*.java;..\\*.java" would do the same thing + on a Dos machine. */ + public static String[] list(String f) { + return list(f,EITHER); + } + static String[] list(String f,int df) { + //return list_(f,new FileRegex()); + StringTokenizer st = new StringTokenizer(f,File.pathSeparator); + Vector v = new Vector(); + while(st.hasMoreTokens()) { + String path = st.nextToken(); + list1(path,v,df,true); + } + String[] sa = new String[v.size()]; + v.copyInto(sa); + return sa; + } + final static Regex root=new Regex(File.separatorChar=='/' ? + "/$" : "(?:.:|)\\\\$"); + static void list1(String path,Vector v,int df,boolean rec) { + // if path looks like a/b/c/ or d:\ then add . + if(root.matchAt(path,0)) { + v.addElement(path+"."); + return; + } + File f = new File(path); + if(f.getParent() != null && rec) { + Vector v2 = new Vector(); + list1(f.getParent(),v2,DIR,true); + for(int i=0;i) syntax that matches + a balanced parenthesis. Not in perl 5. */ +class Group extends Pattern { + char op,cl; + Group(char opi,char cli) { + op = opi; + cl = cli; + } + public int matchInternal(int pos,Pthings pt) { + int i,count=1; + if(pos < pt.src.length()) + if(!Masked(pos,pt) && pt.src.charAt(pos) != op) + return -1; + for(i=pos+1;i a > -1 for this + // to make sense. + if(!a.lessEq(b)) + //throw new BadMultiArgs(); + RegSyntaxError.endItAll("Bad Multi Args: "+a+">"+b); + patInt i = new patInt(-1); + if(a.lessEq(i)) + //throw new BadMultiArgs(); + RegSyntaxError.endItAll("Bad Multi Args: "+a+"< 0"); + } + public Pattern getNext() { + return nextRet; + } + int pos_old = -1; + public int matchInternal(int pos,Pthings pt) { + sub.setParent(this); + + int canUse = -1; + + // check for some forms of infinite recursion... + if(pos_old >= 0 && pos == pos_old) { + return -1; + } + pos_old = pos; + + if(matchMin.lessEq(count)) + canUse = pos; + if(!count.lessEq(matchMax) || pos > pt.src.length()) + return -1; + + if((matchFewest||count.equals(matchMax)) && canUse >= 0) { + Pattern n = super.getNext(); + if(n == null) + return canUse; + int ret = testMatch(n,pos,pt); + if(ret >= 0) { + return ret; + } + else canUse = -1; + } + + count.inc(); + try { + if(count.lessEq(matchMax)) { + int r = testMatch(sub,pos,pt); + if(r >= 0) + return r; + } + } finally { count.dec(); } + + if(!matchFewest && canUse >= 0) { + Pattern n = super.getNext(); + if(n == null) + return canUse; + int ret = testMatch(n,pos,pt); + return ret; + } else return canUse; + } + public Pattern clone1(Hashtable h) { + try { + Multi_stage2 m = new Multi_stage2(matchMin,matchMax,sub.clone(h)); + m.matchFewest = matchFewest; + return m; + } catch(RegSyntax rs) { + return null; + } + } +}; diff --git a/src/com/stevesoft/pat/NoPattern.java b/src/com/stevesoft/pat/NoPattern.java new file mode 100755 index 0000000..3bac329 --- /dev/null +++ b/src/com/stevesoft/pat/NoPattern.java @@ -0,0 +1,19 @@ +package// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +com.stevesoft.pat; +import java.util.*; + +/** The idea behind this class is simply to eliminate the need for + * testing to see if Regex.thePattern is null. Every instruction + * we can eliminate from _search will help. + */ +public class NoPattern extends Pattern { + public String toString() { return "(?e=#)[^#d#D]"; } + public int matchInternal(int i,Pthings p) { return -1; } + Pattern clone1(Hashtable h) { return new NoPattern(); } +} diff --git a/src/com/stevesoft/pat/NonDirFileRegex.java b/src/com/stevesoft/pat/NonDirFileRegex.java new file mode 100755 index 0000000..92f54a3 --- /dev/null +++ b/src/com/stevesoft/pat/NonDirFileRegex.java @@ -0,0 +1,19 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; +import java.io.File; + +/** This class is just like FileRegex, except that its accept method + only returns true if the file matching the pattern is not a directory.*/ +public class NonDirFileRegex extends FileRegex { + public NonDirFileRegex() { dirflag = NONDIR; } + public NonDirFileRegex(String fp) { super(fp); dirflag = NONDIR; } + public static String[] list(String f) { + return list(f,NONDIR); + } +} diff --git a/src/com/stevesoft/pat/NotImplementedError.java b/src/com/stevesoft/pat/NotImplementedError.java new file mode 100755 index 0000000..f8835f2 --- /dev/null +++ b/src/com/stevesoft/pat/NotImplementedError.java @@ -0,0 +1,14 @@ +package// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +com.stevesoft.pat; + +public class NotImplementedError extends Error { + public NotImplementedError(String s) { + super(s); + } +} diff --git a/src/com/stevesoft/pat/NullPattern.java b/src/com/stevesoft/pat/NullPattern.java new file mode 100755 index 0000000..082b0f3 --- /dev/null +++ b/src/com/stevesoft/pat/NullPattern.java @@ -0,0 +1,21 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; +import java.util.Hashtable; + +/** This pattern matches nothing -- it is found in patterns + * like (hello|world|) where a zero-length subelement occurs. + */ +class NullPattern extends Pattern { + public String toString() { return nextString(); } + public int matchInternal(int p,Pthings pt) { + return nextMatch(p,pt); + } + public patInt maxChars() { return new patInt(0); } + Pattern clone1(Hashtable h) { return new NullPattern(); } +} diff --git a/src/com/stevesoft/pat/NullRule.java b/src/com/stevesoft/pat/NullRule.java new file mode 100755 index 0000000..454c81e --- /dev/null +++ b/src/com/stevesoft/pat/NullRule.java @@ -0,0 +1,19 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; + +/** The apply(StringBufferLike sb,RegRes res) method of this derivation + of ReplaceRule appends nothing to the contents of the StringBuffer sb. + @see com.stevesoft.pat.ReplaceRule + */ +public class NullRule extends ReplaceRule { + public NullRule() {} + public void apply(StringBufferLike sb,RegRes res) { + } + public String toString1() { return ""; } +} diff --git a/src/com/stevesoft/pat/Or.java b/src/com/stevesoft/pat/Or.java new file mode 100755 index 0000000..9791c74 --- /dev/null +++ b/src/com/stevesoft/pat/Or.java @@ -0,0 +1,81 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; + +import java.util.*; + +/** This class implements the (?: ... ) extended Pattern. + It provides a base class from which we derive the + [ ... ], ( ... ), (?! ... ), and (?= ... ) patterns. */ +class Or extends Pattern { + Vector v; + Pattern[] pv = null; + Or() { v = new Vector(); } + String leftForm() { return "(?:"; } + String rightForm() { return ")"; } + String sepForm() { return "|"; } + public Or addOr(Pattern p) { + pv = null; + v.addElement(p); + p.setParent(this); + return this; + } + public String toString() { + int i; + StringBuffer sb = new StringBuffer(); + sb.append(leftForm()); + if(v.size()>0) + sb.append( ((Pattern)v.elementAt(0)).toString() ); + for(i=1;i= 0) + return r; + } + return -1; + } + public patInt minChars() { + if(v.size()==0) return new patInt(0); + patInt m = ((Pattern)v.elementAt(0)).countMinChars(); + for(int i=1;i pt.marks[id+pt.nMarks]) { + int swap = pt.marks[id]; + pt.marks[id] = pt.marks[id+pt.nMarks]+1; + pt.marks[id+pt.nMarks] = swap+1; + } + return ret; + } + public Pattern clone1(Hashtable h) { + OrMark om = new OrMark(id); + h.put(om,om); + h.put(this,om); + for(int i=0;i + If one attempts to access the last character as + follows: +
+    StringBuffer sb = ...;
+    ...
+    PartialBuffer pb = new PartialBuffer(sb);
+    char c = pb.charAt(pb.length()-1);
+    
+ then two things happen. First, a zero is returned + into the variable c. Second, the overRun flag is + set to "true." Accessing data beyond the end of + the buffer is considered an "overRun" of the data. +

+ This can be helpful in determining whether more + characters are required for a match to occur, as + the pseudo-code below illustrates. +

+    int i = ...;
+    Regex r = new Regex("some pattern");
+    pb.allowOverRun = true;
+    pb.overRun = true;
+    boolean result = r.matchAt(pb,i);
+    if(pb.overRun) {
+      // The result of the match is not relevant, regardless
+      // of whether result is true or false.  We need to
+      // append more data to the buffer and try again.
+      ....
+      sb.append(more data);
+    }
+    
+ */ +class PartialBuffer implements StringLike { + int off; + public boolean allowOverRun = true; + public boolean overRun = false; + StringBuffer sb; + PartialBuffer(StringBuffer sb) { + this.sb = sb; + } + public char charAt(int n) { + n += off; + if(n == sb.length()) { + overRun = true; + return 0; + } + return sb.charAt(n); + } + public int length() { + return allowOverRun ? sb.length()+1 : sb.length(); + } + public int indexOf(char c) { + for(int i=0;iCopyright 2001, Steven R. Brandt +*/ /** +Class Pattern is the base class on which all the other pattern +elements are built. */ + +public abstract class Pattern { + /** The ESC character, the user can provide his own value + for the escape character through regex.esc */ + public final static char ESC = '\\'; + final static String PROTECT_THESE = "[]{}(),$,-\"^."; + + /** The interal match function, it must be provided by any + class which wishes to extend Pattern. */ + public abstract int matchInternal(int i,Pthings p); + public abstract String toString(); + + // Class Pattern is a singly linked list + // chained together by member next. The member + // parent is used so that sub patterns can access + // the chain they are branching from. + Pattern next=null,parent=null; + + /** This gets the next element of a Pattern that + we wish to match. If we are at the end of a + subchain of patterns, it will return us to the + parent chain. */ + public Pattern getNext() { + return next != null ? next : + (parent == null ? null : parent.getNext()); + } + /** Call this method if you have a pattern element that + takes a sub pattern (such as Or), and + after you have added a sub pattern to the current + pattern element. */ + public void setParent(Pattern p) { + if(next != null) next.setParent(p); + else parent = p; + } + /** This determines if the remainder of a Pattern + matches. Type "return nextMatch" from within + matchInternal if the current + Pattern matches. Otherwise, return a -1.*/ + public int nextMatch(int i,Pthings pt) { + Pattern p = getNext(); + /*if(p == null) return i; + return p.matchInternal(i,pt);*/ + return p==null ? i : p.matchInternal(i,pt); + } + /** This is a toString() for the remainder + of the Pattern elements after this one. + use this when overriding toString(). Called from + within toString(). */ + public String nextString() { + if(next == null) return ""; + return next.toString(); + } + + /** a method to detect whether char c is in String s */ + final static boolean inString(char c,String s) { + int i; + for(i=0;itrans3.html. + */ +public class PopRule extends SpecialRule { + public PopRule() {} + public String toString1() { return "${POP}"; } +} diff --git a/src/com/stevesoft/pat/Prop.java b/src/com/stevesoft/pat/Prop.java new file mode 100755 index 0000000..9319db5 --- /dev/null +++ b/src/com/stevesoft/pat/Prop.java @@ -0,0 +1,52 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; + +/** Get Unicode properties for a character. See +http://unicode.org. */ +public class Prop { + /** Is this a "Decimal Digit" according to Unicode? */ + public final static boolean isDecimalDigit(char c) { + if(Bits.decimal_digit == null) + Bits.decimal_digit_f(); + return Bits.decimal_digit.get(c); + } + /** Is this a "Alphabetic" according to Unicode? */ + public final static boolean isAlphabetic(char c) { + if(Bits.letter == null) + Bits.letter_f(); + return Bits.letter.get(c); + } + /** Is this a "Math" according to Unicode? */ + public final static boolean isMath(char c) { + if(Bits.math == null) + Bits.math_f(); + return Bits.math.get(c); + } + + /** Is this a "Currency" according to Unicode? */ + public final static boolean isCurrency(char c) { + if(Bits.currency == null) + Bits.currency_f(); + return Bits.currency.get(c); + } + + /** Is c a white space character according to Unicode? */ + public final static boolean isWhite(char c) { + if(Bits.white == null) + Bits.white_f(); + return Bits.white.get(c); + } + + /** Is c a punctuation character according to Unicode? */ + public final static boolean isPunct(char c) { + if(Bits.punct == null) + Bits.punct_f(); + return Bits.punct.get(c); + } +} diff --git a/src/com/stevesoft/pat/Pthings.java b/src/com/stevesoft/pat/Pthings.java new file mode 100755 index 0000000..0a465ce --- /dev/null +++ b/src/com/stevesoft/pat/Pthings.java @@ -0,0 +1,33 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; +import java.util.*; + +/** +Things that need to be kept track of during a + match. Passed along with Pattern.matchInternal. */ +public class Pthings { + /** The current text we are attempting to match. */ + public StringLike src; + /** Whether we should ignore the case of letters in + this match. */ + public boolean ignoreCase; + public boolean mFlag; + /** The mask to use when dontMatchInQuotes is set. */ + public BitSet cbits; + /** Used to keep track of backreferences. */ + //public Hashtable marks; + public int[] marks; + public int nMarks; + /** Used to set the behavior of "." By default, it + now fails to match the '\n' character. */ + public boolean dotDoesntMatchCR; + /** Determine if Skipped strings need to be checked. */ + public boolean no_check; + int lastPos; +} diff --git a/src/com/stevesoft/pat/PushRule.java b/src/com/stevesoft/pat/PushRule.java new file mode 100755 index 0000000..cdc33f9 --- /dev/null +++ b/src/com/stevesoft/pat/PushRule.java @@ -0,0 +1,22 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; + +/** See the example file + trans3.java for + further examples of how this is used. You will probably not + want to call it directly. */ +public class PushRule extends SpecialRule { + Regex NewRule; + public PushRule(PushRule p) { NewRule = p.NewRule; } + public PushRule(String nm,Regex rr) { name=nm; NewRule = rr; } + public PushRule(String nm,Transformer tr) { name = nm; NewRule = tr.rp; } + public Object clone1() { return new PushRule(this); } + public String String1() { return "${+"+name+"}"; } + public void apply(StringBufferLike sbl,RegRes rr) {} +} diff --git a/src/com/stevesoft/pat/RBuffer.java b/src/com/stevesoft/pat/RBuffer.java new file mode 100755 index 0000000..08fb920 --- /dev/null +++ b/src/com/stevesoft/pat/RBuffer.java @@ -0,0 +1,36 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; + +import java.io.*; +import com.stevesoft.pat.wrap.*; + +/** This class is used internally by RegexReader to + store blocks of data. */ +class RBuffer { + boolean done = false; + StringBuffer sb; + int pos,epos; + RBuffer next; + RBuffer() {} + RBuffer(StringBuffer sb) { this.sb = sb; } + public String toString() { + return "sb="+sb.toString().replace('\n',' ')+ + " pos="+pos+" epos="+epos+ + " sb.length()="+sb.length()+ + "\n"+sp(pos+3)+"^"+sp(epos-pos-1)+"^"; + } + String sp(int n) { + if(n<=0) + return ""; + StringBuffer sb = new StringBuffer(n); + for(int i=0;i= hi) + //throw new BadRangeArgs(); + RegSyntaxError.endItAll("Badly formed []'s : "+lo+" >= "+hi); + o = new oneChar(lo); + altlo = o.altc; + o = new oneChar(hi); + althi = o.altc; + } + public int matchInternal(int pos,Pthings pt) { + if(pos >= pt.src.length()) return -1; + if(Masked(pos,pt)) return -1; + char c = pt.src.charAt(pos); + if(lo <= c && c <= hi || + (pt.ignoreCase && (altlo <= c && c <= althi))) + return nextMatch(pos+1,pt); + return -1; + } + public patInt minChars() { return new patInt(1); } + public patInt maxChars() { return new patInt(1); } + public Pattern clone1(Hashtable h) { + try { + Range r = new Range(lo,hi); + r.printBrackets = printBrackets; + return r; + } catch(RegSyntax rs) { + return null; + } + } +}; diff --git a/src/com/stevesoft/pat/RegOpt.java b/src/com/stevesoft/pat/RegOpt.java new file mode 100755 index 0000000..1033500 --- /dev/null +++ b/src/com/stevesoft/pat/RegOpt.java @@ -0,0 +1,336 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; +import java.util.*; +import java.io.*; + +/** This class is just like oneChar, but doesn't worry about case. */ +class FastChar extends oneChar { + FastChar(char c) { super(c); } + public int matchInternal(int p,Pthings pt) { + return (p < pt.src.length() + && pt.src.charAt(p)==c) ? + nextMatch(p+1,pt) : -1; + } + Pattern clone1(Hashtable h) { + return new FastChar(c); + } +} + +/** This class is a hashtable keyed by Character + * Objects. It is used to match things of the + * form (?:a..|b..|c..|..) match with greater efficiency -- + * by using a Hashtable that indexes into the group + * of patterns. + */ +class Branch extends Pattern { + Hashtable h = new Hashtable(); + // We need to keep track of the order + // of the keys -- if we don't then + // recompiling the output of toString + // may produce errors by re-ordering + // ()'s and changing the id number of + // the backreference associated with + // a subpattern. + Vector keys = new Vector(); + Branch() {} + Pattern clone1(Hashtable x) { + Branch b = new Branch(); + b.keys = (Vector)keys.clone(); + x.put(this,b); + x.put(b,b); + + for(int i=0;i (b(?:oo|ug)) + // during this process. However, we + // want (b(?:oo|ell)|bug) + NullPattern np = new NullPattern(); + np.add(n); + h.put(c,np); + } else { + h.put(c,n); + } + // Make sure we remember the order things were + // added into the Branch object so that we can + // properly convert it to a String. + keys.addElement(c); + } else if(p instanceof Or) { + ((Or)p).addOr(n); + } else if(p instanceof oneChar && n instanceof oneChar + && ((oneChar)p).c != ((oneChar)n).c) { + Branch b = new Branch(); + b.addc((oneChar)p,igc,dontMinQ); + b.addc((oneChar)n,igc,dontMinQ); + h.put(c,b); + b.setParent(this); + } else if(p instanceof Branch && n instanceof oneChar) { + ((Branch)p).addc((oneChar)n,igc,dontMinQ); + n.setParent(p); + } else { + // Create an Or object to receive the variety + // of branches in the pattern if the current letter + // is matched. We do not attempt to make these + // sub-branches into a Branch object yet. + Or o = new Or(); + o.setParent(this); + + // Remove NullPattern from p -- it's no longer needed. + if(p instanceof NullPattern + && p.parent == null && p.next != null) { + o.addOr(p.next); + } else { + o.addOr(p); + } + o.addOr(n); + + Pattern optpat = RegOpt.opt(o,igc,dontMinQ); + h.put(c,optpat); + optpat.setParent(this); + } + } + public String toString() { + StringBuffer sb = new StringBuffer(); + // should protect this... + sb.append("(?:(?#branch)");// Hashtable)"); + for(int i=0;i= pt.src.length()) return -1; + Pattern n = (Pattern)h.get(new Character(pt.src.charAt(pos))); + if(n == null) return -1; + if(pt.cbits != null && pt.cbits.get(pos)) return -1; + return n.matchInternal(pos+1,pt); + } +} + +/** This is just a place to put the optimizing function. + It is never instantiated as an Object. It just sorts + through the RegOpt looking for things it can change + and make faster. */ +public class RegOpt { + static Pattern opt(Pattern p,boolean ignoreCase, + boolean dontMinQ) { + if(p == null) return p; + if(p instanceof Bracket) { + Bracket b = (Bracket)p; + // FastBracket is the only special + // optimized class to have its own + // source file. + p = FastBracket.process(b,ignoreCase); + //if(!(p instanceof FastBracket) + //p = Switch.process(b,ignoreCase); + p.next = b.next; + p.parent = b.parent; + } else if(p instanceof oneChar && !ignoreCase + && !dontMinQ) { + oneChar o = (oneChar)p; + p = new FastChar(o.c); + p.next = o.next; + p.parent = o.parent; + } else if(p instanceof Or + && ((Or)p).leftForm().equals("(?:") + && ((Or)p).v.size()==1) { // Eliminate this Or Object. + Or o = (Or)p; + p = (Pattern)o.v.elementAt(0); + p.setParent(null); + p = RegOpt.opt(p,ignoreCase,dontMinQ); + p.add(o.next); + } else if(p instanceof Or) { + Or o = (Or)p; + o.pv = null; + Vector v = o.v; + o.v = new Vector(); + Branch b = new Branch(); + b.parent = o.parent; + for(int i=0;i=1 || + (i+1 0) { + Pattern p2 = (Pattern)b.reduce(ignoreCase,dontMinQ); + if(p2 != null) { + o.addOr(p2); + b = new Branch(); + b.parent = o.parent; + } + } + o.addOr(opt(pp,ignoreCase,dontMinQ)); + } + } + if(b.keys.size()>0) { + Pattern p2=(Pattern)b.reduce(ignoreCase,dontMinQ); + if(p2 != null) + o.addOr(p2); + } + if(o.v.size()==1 + && o.leftForm().equals("(?:")) { // Eliminate Or Object + p = (Pattern)o.v.elementAt(0); + p.setParent(null); + p = RegOpt.opt(p,ignoreCase,dontMinQ); + p.add(o.next); + } + } else if(p instanceof FastMulti) { + PatternSub ps = (PatternSub)p; + ps.sub = RegOpt.opt(ps.sub,ignoreCase,dontMinQ); + } else if(p instanceof Multi && safe4fm( ((PatternSub)p).sub )) { + Multi m = (Multi)p; + FastMulti fm = null; + try { + fm = new FastMulti(m.a,m.b, + opt(m.sub,ignoreCase,dontMinQ)); + } catch(RegSyntax rs) {} + fm.parent = m.parent; + fm.matchFewest = m.matchFewest; + fm.next = m.next; + p = fm; + } + if(p.next != null) + p.next = opt(p.next,ignoreCase,dontMinQ); + return p; + } + final static boolean safe4fm(Pattern x) { + while(x != null) { + if(x instanceof Bracket) + ; + else if(x instanceof Range) + ; + else if(x instanceof oneChar) + ; + else if(x instanceof Any) + ; + else if(x instanceof Custom + && ((Custom)x).v instanceof UniValidator) + ; + else if(x instanceof Or) { + Or o = (Or)x; + if(!o.leftForm().equals("(?:")) + return false; + patInt lo = o.countMinChars(); + patInt hi = o.countMaxChars(); + if(!lo.equals(hi)) + return false; + for(int i=0;iCopyright 2001, Steven R. Brandt +*/ /** +This class is used to store a result from Regex */ +public class RegRes implements Cloneable { + protected int[] marks = null; + protected boolean didMatch_ = false; + protected StringLike src=null; + + /** Obtain the text String that was matched against. */ + public String getString() { return src.toString(); } + /** Obtain the source StringLike object. */ + public StringLike getStringLike() { return src; } + protected int charsMatched_=0,matchFrom_=0,numSubs_=0; + public String toString() { + StringBuffer sb = new StringBuffer(); + sb.append("match="+matchedFrom()+":"+charsMatched()); + if(!didMatch()) return sb.toString(); + for(int i=0;inumSubs_) return -1; + //Integer in=(Integer)marks.get("left"+i); + //return in == null ? -1 : in.intValue(); + return marks[i]; + } + /** Obtains the number of characters matched by backreference i, or + -1 if backreference i was not matched. */ + public int charsMatched(int i) { + if(marks==null||i>numSubs_||!didMatch_) return -1; + //Integer in = (Integer)marks.get("right"+i); + //int i2 = in==null ? -1 : in.intValue(); + int mf = matchedFrom(i); + return mf < 0 ? -1 : marks[i+numSubs_]-matchedFrom(i); + } + /** This is either equal to matchedFrom(i)+charsMatched(i) if the match + was successful, or -1 if it was not. */ + public int matchedTo(int i) { + if(marks==null||i>numSubs_||!didMatch_) return -1; + return marks[i+numSubs_]; + } + /** Obtains a substring matching the nth set + of parenthesis from the pattern. See + numSubs(void), or null if the nth backrefence did + not match. */ + public String stringMatched(int i) { + int mf = matchedFrom(i), cm = charsMatched(i); + return !didMatch_ || mf<0 || cm<0 ? null : + src.substring(mf,mf+cm); + } + /** This returns the part of the string that preceeds the match, + or null if the match failed.*/ + public String left() { + int mf = matchedFrom(); + return !didMatch_ || (mf<0) ? null : src.substring(0,mf); + } + /** This returns the part of the string that follows the ith + backreference, or null if the backreference did not match. */ + public String left(int i) { + int mf = matchedFrom(i); + return !didMatch_ || (mf<0) ? null : src.substring(0,mf); + } + /** This returns the part of the string that follows the match, + or null if the backreference did not match.*/ + public String right() { + int mf = matchedFrom(), cm = charsMatched(); + return !didMatch_ || mf<0 || cm<0 ? null : src.substring(mf+ + cm,src.length()); + } + /** This returns the string to the right of the ith backreference, + or null if the backreference did not match. */ + public String right(int i) { + int mf = matchedFrom(i), cm = charsMatched(i); + return !didMatch_ || mf<0 || cm<0 ? null : + src.substring(mf+cm,src.length()); + } + /** After a successful match, this returns the location of + the first matching character, or -1 if the match failed.*/ + public int matchedFrom() { return !didMatch_ ? -1 : matchFrom_; } + /** After a successful match, this returns the number of + characters in the match, or -1 if the match failed. */ + public int charsMatched() { return !didMatch_||matchFrom_<0 ? -1 : charsMatched_; } + /** This is matchedFrom()+charsMatched() after a successful match, + or -1 otherwise. */ + public int matchedTo() { return !didMatch_ ? -1 : matchFrom_+charsMatched_;} + /** This returns the number of + backreferences (parenthesis) in the pattern, + i.e. the pattern "(ab)" has + one, the pattern "(a)(b)" has two, etc. */ + public int numSubs() { return numSubs_; } + /** Contains true if the last match was successful. */ + public boolean didMatch() { return didMatch_; } + + /** An older name for matchedFrom. */ + public int matchFrom() { return matchedFrom(); } + /** An older name for stringMatched(). */ + public String substring() { return stringMatched(); } + /** An older name for matchedFrom. */ + public int matchFrom(int i) { return matchedFrom(i); } + /** An older name for stringMatched. */ + public String substring(int i) { return stringMatched(i); } +} diff --git a/src/com/stevesoft/pat/RegSyntax.java b/src/com/stevesoft/pat/RegSyntax.java new file mode 100755 index 0000000..1a51d9e --- /dev/null +++ b/src/com/stevesoft/pat/RegSyntax.java @@ -0,0 +1,24 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; + +/* + Shareware: package pat + Copyright 2001, Steven R. Brandt +*/ +/** +This type of syntax error is thrown whenever a syntax error + is encountered in the pattern. It may not be caught directly, as + it is not in the throws clause of any method. To detect it, catch + Throwable, and use instanceof to see if it is a RegSyntax. */ +public class RegSyntax extends Exception { + RegSyntax() {} + RegSyntax(String msg) { + super(msg); + } +}; diff --git a/src/com/stevesoft/pat/RegSyntaxError.java b/src/com/stevesoft/pat/RegSyntaxError.java new file mode 100755 index 0000000..c403412 --- /dev/null +++ b/src/com/stevesoft/pat/RegSyntaxError.java @@ -0,0 +1,29 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; + +/** When enabled, this class is thrown instead of + the normal RegSyntax. Thus, enabling of this + class will make your debugging easier -- but + if you leave it on and forget to catch RegSyntaxError + a user-supplied pattern could generate a + RegSyntaxError that will kill your application. + + I strongly recommend turning this flag on, however, + as I think it is more likely to help than to hurt + your programming efforts. + */ +public class RegSyntaxError extends Error { + public static boolean RegSyntaxErrorEnabled = false; + public RegSyntaxError() {} + public RegSyntaxError(String s) { super(s); } + final static void endItAll(String s) throws RegSyntax { + if(RegSyntaxErrorEnabled) throw new RegSyntaxError(s); + throw new RegSyntax(s); + } +} diff --git a/src/com/stevesoft/pat/Regex.java b/src/com/stevesoft/pat/Regex.java new file mode 100755 index 0000000..c7379de --- /dev/null +++ b/src/com/stevesoft/pat/Regex.java @@ -0,0 +1,1429 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; +import java.util.*; +import java.io.*; +import com.stevesoft.pat.wrap.StringWrap; + + +/** Matches a Unicode punctuation character. */ +class UnicodePunct extends UniValidator { + public int validate(StringLike s,int from,int to) { + return from= s.length()) return -1; + char c = s.charAt(from); + return (Prop.isAlphabetic(c)||Prop.isDecimalDigit(c)||c=='_') ? to : -1; + } +} + +/** Matches a character that is not a Unicode alphanumeric or underscore. */ +class NUnicodeW extends UniValidator { + public int validate(StringLike s,int from,int to) { + if(from >= s.length()) return -1; + char c = s.charAt(from); + return !(Prop.isAlphabetic(c)||Prop.isDecimalDigit(c)||c=='_') ? to : -1; + } +} + +/** Matches a Unicode decimal digit. */ +class UnicodeDigit extends UniValidator { + public int validate(StringLike s,int from,int to) { + return from +For the purpose of this documentation, the fact that java interprets the +backslash will be ignored. In practice, however, you will need a +double backslash to obtain a string that contains a single backslash +character. Thus, the example pattern "\b" should really be typed +as "\\b" inside java code. +

+Note that Regex is part of package "com.stevesoft.pat". +To use it, simply import +com.stevesoft.pat.Regex at the top of your file. +

+Regex is made with a constructor that takes a String that defines +the regular expression. Thus, for example +

+      Regex r = new Regex("[a-c]*");
+
+matches any number of characters so long as the are 'a', 'b', or 'c'). +

+To attempt to match the Pattern to a given string, you can use either +the search(String) member function, or the matchAt(String,int position) +member function. These functions return a boolean which tells you +whether or not the thing worked, and sets the methods "charsMatched()" +and "matchedFrom()" in the Regex object appropriately. +

+The portion of the string before the match can be obtained by the +left() member, and the portion after the match can be obtained +by the right() member. +

+Essentially, this package implements a syntax that is very much +like the perl 5 regular expression syntax. + +Longer example: +

+        Regex r = new Regex("x(a|b)y");
+        r.matchAt("xay",0);
+        System.out.println("sub = "+r.stringMatched(1));
+
+The above would print "sub = a". +
+        r.left() // would return "x"
+        r.right() // would return "y"
+
+

+Differences between this package and perl5:
+The extended Pattern for setting flags, is now supported, +but the flags are different. "(?i)" tells the pattern to +ignore case, "(?Q)" sets the "dontMatchInQuotes" flag, and +"(?iQ)" sets them both. You can change the escape character. +The pattern

(?e=#)#d+
is the same as
\d+
, +but note that the sequence
(?e=#)
must occur +at the very beginning of the pattern. There may be other small +differences as well. I will either make my package conform +or note them as I become aware of them. +

+This package supports additional patterns not in perl5: +

+ + + +
(?@())GroupThis matches all characters between +the '(' character and the balancing ')' character. Thus, it will +match "()" as well as "(())". The balancing characters are +arbitrary, thus (?@{}) matches on "{}" and "{{}}".
(?<1)BackupMoves the pointer backwards within the text. +This allows you to make a "look behind." It fails if it +attempts to move to a position before the beginning of the string. +"x(?<1)" is equivalent to "(?=x)". The number, 1 in this example, +is the number of characters to move backwards.
+
+ +@author Steven R. Brandt +@version package com.stevesoft.pat, release 1.5.3 +@see Pattern +*/ +public class Regex extends RegRes implements FilenameFilter { + /** BackRefOffset gives the identity number of the first + pattern. Version 1.0 used zero, version 1.1 uses 1 to be + more compatible with perl. */ + static int BackRefOffset = 1; + private static Pattern none = new NoPattern(); + Pattern thePattern = none; + patInt minMatch = new patInt(0); + + static Hashtable validators = new Hashtable(); + static { + define("p","(?>1)",new UnicodePunct()); + define("P","(?>1)",new NUnicodePunct()); + define("s","(?>1)",new UnicodeWhite()); + define("S","(?>1)",new NUnicodeWhite()); + define("w","(?>1)",new UnicodeW()); + define("W","(?>1)",new NUnicodeW()); + define("d","(?>1)",new UnicodeDigit()); + define("D","(?>1)",new NUnicodeDigit()); + define("m","(?>1)",new UnicodeMath()); + define("M","(?>1)",new NUnicodeMath()); + define("c","(?>1)",new UnicodeCurrency()); + define("C","(?>1)",new NUnicodeCurrency()); + define("a","(?>1)",new UnicodeAlpha()); + define("A","(?>1)",new NUnicodeAlpha()); + define("uc","(?>1)",new UnicodeUpper()); + define("lc","(?>1)",new UnicodeLower()); + } + + /** Set the dontMatch in quotes flag. */ + public void setDontMatchInQuotes(boolean b) { + dontMatchInQuotes = b; + } + /** Find out if the dontMatchInQuotes flag is enabled. */ + public boolean getDontMatchInQuotes() { + return dontMatchInQuotes; + } + boolean dontMatchInQuotes = false; + + /** Set the state of the ignoreCase flag. If set to true, then + the pattern matcher will ignore case when searching for a + match. */ + public void setIgnoreCase(boolean b) { + ignoreCase = b; + } + /** Get the state of the ignoreCase flag. Returns true if we + are ignoring the case of the pattern, false otherwise. */ + public boolean getIgnoreCase() { + return ignoreCase; + } + boolean ignoreCase = false; + + static boolean defaultMFlag = false; + /** Set the default value of the m flag. If it + is set to true, then the MFlag will be on + for any regex search executed. */ + public static void setDefaultMFlag(boolean mFlag) { + defaultMFlag = mFlag; + } + /** Get the default value of the m flag. If it + is set to true, then the MFlag will be on + for any regex search executed. */ + public static boolean getDefaultMFlag() { + return defaultMFlag; + } + + /** Initializes the object without a Pattern. To supply a Pattern + use compile(String s). + @see com.stevesoft.pat.Regex#compile(java.lang.String) + */ + public Regex() {} + /** Create and compile a Regex, but do not throw any exceptions. + If you wish to have exceptions thrown for syntax errors, + you must use the Regex(void) constructor to create the + Regex object, and then call the compile method. Therefore, you + should only call this method when you know your pattern is right. + I will probably become more like + @see com.stevesoft.pat.Regex#search(java.lang.String) + @see com.stevesoft.pat.Regex#compile(java.lang.String) + */ + public Regex(String s) { + try { + compile(s); + } catch(RegSyntax rs) {} + } + + ReplaceRule rep = null; + /** Create and compile both a Regex and a ReplaceRule. + @see com.stevesoft.pat.ReplaceRule + @see com.stevesoft.pat.Regex#compile(java.lang.String) + */ + public Regex(String s,String rp) { + this(s); + rep = ReplaceRule.perlCode(rp); + } + /** Create and compile a Regex, but give it the ReplaceRule + specified. This allows the user finer control of the + Replacement process, if that is desired. + @see com.stevesoft.pat.ReplaceRule + @see com.stevesoft.pat.Regex#compile(java.lang.String) + */ + public Regex(String s,ReplaceRule rp) { + this(s); + rep = rp; + } + + /** Change the ReplaceRule of this Regex by compiling + a new one using String rp. */ + public void setReplaceRule(String rp) { + rep = ReplaceRule.perlCode(rp); + repr = null; // Clear Replacer history + } + + /** Change the ReplaceRule of this Regex to rp. */ + public void setReplaceRule(ReplaceRule rp) { + rep = rp; + } + /** Test to see if a custom defined rule exists. + @see com.stevesoft.pat#define(java.lang.String,java.lang.String,Validator) + */ + public static boolean isDefined(String nm) { + return validators.get(nm) != null; + } + /** Removes a custom defined rule. + @see com.stevesoft.pat#define(java.lang.String,java.lang.String,Validator) + */ + public static void undefine(String nm) { + validators.remove(nm); + } + /** Defines a method to create a new rule. See test/deriv2.java + and test/deriv3.java for examples of how to use it. */ + public static void define(String nm,String pat,Validator v) { + v.pattern = pat; + validators.put(nm,v); + } + /** Defines a shorthand for a pattern. The pattern will be + invoked by a string that has the form "(??"+nm+")". + */ + public static void define(String nm,String pat) { + validators.put(nm,pat); + } + + /** Get the current ReplaceRule. */ + public ReplaceRule getReplaceRule() { return rep; } + + Replacer repr = null; + final Replacer _getReplacer() { + return repr==null ? repr=new Replacer() : repr; + } + public Replacer getReplacer() { + if(repr == null) + repr = new Replacer(); + repr.rh.me = this; + repr.rh.prev = null; + return repr; + } + /** Replace the first occurence of this pattern in String s + according to the ReplaceRule. + @see com.stevesoft.pat.ReplaceRule + @see com.stevesoft.pat.Regex#getReplaceRule() + */ + public String replaceFirst(String s) { + return _getReplacer().replaceFirstRegion(s,this,0,s.length()).toString(); + } + /** Replace the first occurence of this pattern in String s + beginning with position pos according to the ReplaceRule. + @see com.stevesoft.pat.ReplaceRule + @see com.stevesoft.pat.Regex#getReplaceRule() + */ + public String replaceFirstFrom(String s,int pos) { + return _getReplacer().replaceFirstRegion(s,this,pos,s.length()).toString(); + } + /** Replace the first occurence of this pattern in String s + beginning with position start and ending with end + according to the ReplaceRule. + @see com.stevesoft.pat.ReplaceRule + @see com.stevesoft.pat.Regex#getReplaceRule() + */ + public String replaceFirstRegion(String s,int start,int end) { + return _getReplacer().replaceFirstRegion(s,this,start,end).toString(); + } + + /** Replace all occurences of this pattern in String s + according to the ReplaceRule. + @see com.stevesoft.pat.ReplaceRule + @see com.stevesoft.pat.Regex#getReplaceRule() + */ + public String replaceAll(String s) { + return _getReplacer().replaceAllRegion(s,this,0,s.length()).toString(); + } + public StringLike replaceAll(StringLike s) { + return _getReplacer().replaceAllRegion(s,this,0,s.length()); + } + /** Replace all occurences of this pattern in String s + beginning with position pos according to the ReplaceRule. + @see com.stevesoft.pat.ReplaceRule + @see com.stevesoft.pat.Regex#getReplaceRule() + */ + public String replaceAllFrom(String s,int pos) { + return _getReplacer().replaceAllRegion(s,this,pos,s.length()).toString(); + } + /** Replace all occurences of this pattern in String s + beginning with position start and ending with end + according to the ReplaceRule. + @see com.stevesoft.pat.ReplaceRule + @see com.stevesoft.pat.Regex#getReplaceRule() + */ + public String replaceAllRegion(String s,int start,int end) { + return _getReplacer().replaceAllRegion(s,this,start,end).toString(); + } + + + /** Essentially clones the Regex object */ + public Regex(Regex r) { + super((RegRes)r); + dontMatchInQuotes = r.dontMatchInQuotes; + esc = r.esc; + ignoreCase = r.ignoreCase; + gFlag = r.gFlag; + if(r.rep==null) + rep = null; + else + rep = (ReplaceRule)r.rep.clone(); + /* try { + compile(r.toString()); + } catch(RegSyntax r_) {} */ + thePattern = r.thePattern.clone(new Hashtable()); + minMatch = r.minMatch; + skipper = r.skipper; + } + + /** By default, + the escape character is the backslash, but you can + make it anything you want by setting this variable. */ + public char esc = Pattern.ESC; + /** This method compiles a regular expression, making it + possible to call the search or matchAt methods. + @exception com.stevesoft.pat.RegSyntax + is thrown if a syntax error is encountered + in the pattern. + For example, "x{3,1}" or "*a" are not valid + patterns. + @see com.stevesoft.pat.Regex#search + @see com.stevesoft.pat.Regex#matchAt + */ + public void compile(String prepat) throws RegSyntax { + String postpat = parsePerl.codify(prepat,true); + String pat = postpat==null ? prepat : postpat; + minMatch = null; + ignoreCase = false; + dontMatchInQuotes = false; + Rthings mk = new Rthings(this); + int offset = mk.val; + String newpat = pat; + thePattern = none; + p = null; + or = null; + minMatch = new patInt(0); + StrPos sp = new StrPos(pat,0); + if(sp.incMatch("(?e=")) { + char newEsc = sp.c; + sp.inc(); + if(sp.match(')')) + newpat = reEscape(pat.substring(6), + newEsc,Pattern.ESC); + } else if(esc != Pattern.ESC) + newpat = reEscape(pat,esc,Pattern.ESC); + thePattern = _compile(newpat,mk); + numSubs_ = mk.val-offset; + mk.set(this); + } + + /* If a Regex is compared against a Regex, a check is + done to see that the patterns are equal as well as + the most recent match. If a Regex is compare with + a RegRes, only the result of the most recent match + is compared. */ + public boolean equals(Object o) { + if(o instanceof Regex) { + if(toString().equals(o.toString())) + return super.equals(o); + else + return false; + } else return super.equals(o); + } + + /** A clone by any other name would smell as sweet. */ + public Object clone() { + return new Regex(this); + } + /** Return a clone of the underlying RegRes object. */ + public RegRes result() { + return (RegRes)super.clone(); + } + + // prep sets global variables of class + // Pattern so that it can access them + // during an attempt at a match + Pthings pt = new Pthings(); + final Pthings prep(StringLike s) { + //if(gFlag) + pt.lastPos = matchedTo(); + if(pt.lastPos < 0) pt.lastPos = 0; + if( (s==null ? null : s.unwrap()) != (src==null ? null : s.unwrap()) ) + pt.lastPos = 0; + src = s; + pt.dotDoesntMatchCR=dotDoesntMatchCR && (!sFlag); + pt.mFlag = (mFlag | defaultMFlag); + pt.ignoreCase = ignoreCase; + pt.no_check = false; + if(pt.marks != null) + for(int i=0;istart*/ + public boolean searchFrom(String s,int start) { + if(s==null) + throw new NullPointerException("Null String Given to Regex.searchFrom"); + return _search(s,start,s.length()); + } + public boolean searchFrom(StringLike s,int start) { + if(s==null) + throw new NullPointerException("Null String Given to Regex.searchFrom"); + return _search(s,start,s.length()); + } + /** Search through a region of a String + for the first occurence of a match. */ + public boolean searchRegion(String s,int start,int end) { + if(s==null) + throw new NullPointerException("Null String Given to Regex.searchRegion"); + return _search(s,start,end); + } + /** Set this to change the default behavior of the "." pattern. + By default it now matches perl's behavior and fails to + match the '\n' character. */ + public static boolean dotDoesntMatchCR = true; + StringLike gFlags; + int gFlagto = 0; + boolean gFlag = false; + /** Set the 'g' flag */ + public void setGFlag(boolean b) { + gFlag = b; + } + /** Get the state of the 'g' flag. */ + public boolean getGFlag() { + return gFlag; + } + boolean sFlag = false; + /** Get the state of the sFlag */ + public boolean getSFlag() { + return sFlag; + } + boolean mFlag = false; + /** Get the state of the sFlag */ + public boolean getMFlag() { + return mFlag; + } + + final boolean _search(String s,int start,int end) { + return _search(new StringWrap(s),start,end); + } + final boolean _search(StringLike s,int start,int end) { + if(gFlag && gFlagto > 0 && gFlags!=null && s.unwrap()==gFlags.unwrap()) + start = gFlagto; + gFlags = null; + + Pthings pt=prep(s); + + int up = (minMatch == null ? end : end-minMatch.i); + + if(up < start && end >= start) up = start; + + if(skipper == null) { + for(int i=start;i<=up;i++) { + charsMatched_ = thePattern.matchAt(s,i,pt); + if(charsMatched_ >= 0) { + matchFrom_ = thePattern.mfrom; + marks = pt.marks; + gFlagto = matchFrom_+charsMatched_; + gFlags = s; + return didMatch_=true; + } + } + } else { + pt.no_check = true; + for(int i=start;i<=up;i++) { + i = skipper.find(src,i,up); + if(i<0) { + charsMatched_ = matchFrom_ = -1; + return didMatch_ = false; + } + charsMatched_ = thePattern.matchAt(s,i,pt); + if(charsMatched_ >= 0) { + matchFrom_ = thePattern.mfrom; + marks = pt.marks; + gFlagto = matchFrom_+charsMatched_; + gFlags = s; + return didMatch_=true; + } + } + } + return didMatch_=false; + } + /*final boolean _search(LongStringLike s,long start,long end) { + if(gFlag && gFlagto > 0 && s==gFlags) + start = gFlagto; + gFlags = null; + + Pthings pt=prep(s); + + int up = end;//(minMatch == null ? end : end-minMatch.i); + + if(up < start && end >= start) up = start; + + if(skipper == null) { + for(long i=start;i<=up;i++) { + charsMatched_ = thePattern.matchAt(s,i,pt); + if(charsMatched_ >= 0) { + matchFrom_ = thePattern.mfrom; + marks = pt.marks; + gFlagto = matchFrom_+charsMatched_; + return didMatch_=true; + } + } + } else { + pt.no_check = true; + for(long i=start;i<=up;i++) { + i = skipper.find(src,i,up); + if(i<0) { + charsMatched_ = matchFrom_ = -1; + return didMatch_ = false; + } + charsMatched_ = thePattern.matchAt(s,i,pt); + if(charsMatched_ >= 0) { + matchFrom_ = thePattern.mfrom; + marks = pt.marks; + gFlagto = matchFrom_+charsMatched_; + gFlags = s; + return didMatch_=true; + } else { + i = s.adjustIndex(i); + up = s.adjustEnd(i); + } + } + } + return didMatch_=false; + }*/ + + boolean _reverseSearch(String s,int start,int end) { + return _reverseSearch(new StringWrap(s),start,end); + } + boolean _reverseSearch(StringLike s,int start,int end) { + if(gFlag && gFlagto > 0 && s.unwrap()==gFlags.unwrap()) + end = gFlagto; + gFlags = null; + Pthings pt=prep(s); + for(int i=end;i>=start;i--) { + charsMatched_ = thePattern.matchAt(s,i,pt); + if(charsMatched_ >= 0) { + matchFrom_ = thePattern.mfrom; + marks = pt.marks; + gFlagto = matchFrom_-1; + gFlags = s; + return didMatch_=true; + } + } + return didMatch_=false; + } + + // This routine sets the cbits variable + // of class Pattern. Cbits is true for + // the bit corresponding to a character inside + // a set of quotes. + static StringLike lasts=null; + static BitSet lastbs=null; + static void setCbits(StringLike s,Pthings pt) { + if(s == lasts) { + pt.cbits = lastbs; + return; + } + BitSet bs = new BitSet(s.length()); + char qc = ' '; + boolean setBit = false; + for(int i=0;i")) { + patInt i = sp.getPatInt(); + if(i==null) RegSyntaxError.endItAll("No int after (?>"); + add(new Backup(-i.intValue())); + if(!sp.match(')')) RegSyntaxError.endItAll("No ) after (?<"); + } else if(sp.incMatch("(?@")) { + char op = sp.c; + sp.inc(); + char cl = sp.c; + sp.inc(); + if(!sp.match(')')) + RegSyntaxError.endItAll( + "(?@ does not have closing paren"); + add(new Group(op,cl)); + } else if(sp.incMatch("(?#")) { + while(!sp.match(')')) + sp.inc(); + } else if(sp.dontMatch && sp.c == 'w') { + Regex r = new Regex(); + //r._compile("[a-zA-Z0-9_]",mk); + //add(new Goop("\\w",r.thePattern)); + Bracket b = new Bracket(false); + b.addOr(new Range('a','z')); + b.addOr(new Range('A','Z')); + b.addOr(new Range('0','9')); + b.addOr(new oneChar('_')); + add(b); + } else if(sp.dontMatch && sp.c == 'G') { + add(new BackG()); + } else if(sp.dontMatch && sp.c == 's') { + //Regex r = new Regex(); + //r._compile("[ \t\n\r\b]",mk); + //add(new Goop("\\s",r.thePattern)); + Bracket b = new Bracket(false); + b.addOr(new oneChar((char)32)); + b.addOr(new Range((char)8,(char)10)); + b.addOr(new oneChar((char)13)); + add(b); + } else if(sp.dontMatch && sp.c == 'd') { + Regex r = new Regex(); + //r._compile("[0-9]",mk); + //add(new Goop("\\d",r.thePattern)); + Range digit = new Range('0','9'); + digit.printBrackets = true; + add(digit); + } else if(sp.dontMatch && sp.c == 'W') { + Regex r = new Regex(); + //r._compile("[^a-zA-Z0-9_]",mk); + //add(new Goop("\\W",r.thePattern)); + Bracket b = new Bracket(true); + b.addOr(new Range('a','z')); + b.addOr(new Range('A','Z')); + b.addOr(new Range('0','9')); + b.addOr(new oneChar('_')); + add(b); + } else if(sp.dontMatch && sp.c == 'S') { + //Regex r = new Regex(); + //r._compile("[^ \t\n\r\b]",mk); + //add(new Goop("\\S",r.thePattern)); + Bracket b = new Bracket(true); + b.addOr(new oneChar((char)32)); + b.addOr(new Range((char)8,(char)10)); + b.addOr(new oneChar((char)13)); + add(b); + } else if(sp.dontMatch && sp.c == 'D') { + //Regex r = new Regex(); + //r._compile("[^0-9]",mk); + //add(new Goop("\\D",r.thePattern)); + Bracket b = new Bracket(true); + b.addOr(new Range('0','9')); + add(b); + } else if(sp.dontMatch && sp.c == 'B') { + Regex r = new Regex(); + r._compile("(?!"+back_slash+"b)",mk); + add(r.thePattern); + } else if(isOctalString(sp)) { + int d = sp.c - '0'; + sp.inc(); + d = 8*d + sp.c - '0'; + StrPos sp2 = new StrPos(sp); + sp2.inc(); + if(isOctalDigit(sp2,false)) { + sp.inc(); + d = 8*d + sp.c - '0'; + } + add(new oneChar((char)d)); + } else if(sp.dontMatch && sp.c >= '1' && sp.c <= '9') { + int iv = sp.c-'0'; + StrPos s2 = new StrPos(sp); + s2.inc(); + if(!s2.dontMatch && s2.c >= '0' && s2.c <= '9') { + iv = 10*iv+(s2.c-'0'); + sp.inc(); + } + add(new BackMatch(iv)); + } else if(sp.dontMatch && sp.c == 'b') { + add(new Boundary()); + } else if(sp.match('\b')) { + add(new Boundary()); + } else if(sp.match('$')) { + add(new End(true)); + } else if(sp.dontMatch && sp.c == 'Z') { + add(new End(false)); + } else if(sp.match('.')) { + add(new Any()); + } else if(sp.incMatch("(??")) { + StringBuffer sb = new StringBuffer(); + StringBuffer sb2 = new StringBuffer(); + while(!sp.match(')') && !sp.match(':')) { + sb.append(sp.c); + sp.inc(); + } + if(sp.incMatch(":")) { + while(!sp.match(')')) { + sb2.append(sp.c); + sp.inc(); + } + } + String sbs = sb.toString(); + if(validators.get(sbs) instanceof String) { + String pat = (String)validators.get(sbs); + Regex r = newRegex(); + Rthings rth = new Rthings(this); + rth.noBackRefs = true; + r._compile(pat,rth); + add(r.thePattern); + } else { + Custom cm = new Custom(sb.toString()); + if(cm.v != null) { + Validator v2 = cm.v.arg(sb2.toString()); + if(v2 != null) { + v2.argsave = sb2.toString(); + String p = cm.v.pattern; + cm.v = v2; + v2.pattern = p; + } + Regex r = newRegex(); + Rthings rth = new Rthings(this); + rth.noBackRefs = true; + r._compile(cm.v.pattern,rth); + cm.sub = r.thePattern; + cm.sub.add(new CustomEndpoint(cm)); + cm.sub.setParent(cm); + add(cm); + } + } + } else if(sp.match('(')) { + mk.parenLevel++; + Regex r = newRegex(); + // r.or = new Or(); + sp.inc(); + if(sp.incMatch("?:")) { + r.or = new Or(); + } else if(sp.incMatch("?=")) { + r.or = new lookAhead(false); + } else if(sp.incMatch("?!")) { + r.or = new lookAhead(true); + } else if(sp.match('?')) { + sp.inc(); + do { + if(sp.c=='i')mk.ignoreCase = true; + if(sp.c=='Q')mk.dontMatchInQuotes = true; + if(sp.c=='o')mk.optimizeMe = true; + if(sp.c=='g')mk.gFlag = true; + if(sp.c=='s')mk.sFlag = true; + if(sp.c=='m')mk.mFlag = true; + sp.inc(); + } while(!sp.match(')') && !sp.eos); + r = null; + mk.parenLevel--; + if(sp.eos) //throw new RegSyntax + RegSyntaxError.endItAll("Unclosed ()"); + } else { // just ordinary parenthesis + r.or = mk.noBackRefs ? new Or() : new OrMark(mk.val++); + } + if(r != null) add(r._compile(sp,mk)); + } else if(sp.match('^')) { + add(new Start(true)); + } else if(sp.dontMatch && sp.c=='A') { + add(new Start(false)); + } else if(sp.match('*')) { + addMulti(new patInt(0),new patInf()); + } else if(sp.match('+')) { + addMulti(new patInt(1),new patInf()); + } else if(sp.match('?')) { + addMulti(new patInt(0),new patInt(1)); + } else if(sp.match('{')) { + boolean bad = false; + StrPos sp2 = new StrPos(sp); + StringBuffer sb = new StringBuffer(); + sp.inc(); + patInt i1 = sp.getPatInt(); + patInt i2 = null; + if(sp.match('}')) { + i2 = i1; + } else { + if(!sp.match(','))/* + RegSyntaxError.endItAll( + "String \"{"+i2+ + "\" should be followed with , or }");*/ + bad = true; + sp.inc(); + if(sp.match('}')) + i2 = new patInf(); + else + i2 = sp.getPatInt(); + } + if(i1 == null || i2 == null) /* + throw new RegSyntax("Badly formatted Multi: " + +"{"+i1+","+i2+"}"); */ bad = true; + if(bad) { + sp.dup(sp2); + add(new oneChar(sp.c)); + } else + addMulti(i1,i2); + } else if(sp.escMatch('x') && next2Hex(sp)) { + sp.inc(); + int d = getHexDigit(sp); + sp.inc(); + d = 16*d + getHexDigit(sp); + add(new oneChar((char)d)); + } else if(sp.escMatch('c')) { + sp.inc(); + if(sp.c < Ctrl.cmap.length) + add(new oneChar(Ctrl.cmap[sp.c])); + else + add(new oneChar(sp.c)); + } else if(sp.escMatch('f')) { + add(new oneChar((char)12)); + } else if(sp.escMatch('a')) { + add(new oneChar((char)7)); + } else if(sp.escMatch('t')) { + add(new oneChar('\t')); + } else if(sp.escMatch('n')) { + add(new oneChar('\n')); + } else if(sp.escMatch('r')) { + add(new oneChar('\r')); + } else if(sp.escMatch('b')) { + add(new oneChar('\b')); + } else if(sp.escMatch('e')) { + add(new oneChar((char)27)); + } else { + add(new oneChar(sp.c)); + if(sp.match(')')) + RegSyntaxError.endItAll("Unmatched right paren in pattern"); + } + } + + // compiles all Pattern elements, internal method + private Pattern _compile(String pat,Rthings mk) throws RegSyntax { + minMatch = null; + sFlag = mFlag = ignoreCase = gFlag = false; + StrPos sp = new StrPos(pat,0); + thePattern = _compile(sp,mk); + pt.marks = null; + return thePattern; + } + + Pattern p = null; + Or or = null; + Pattern _compile(StrPos sp,Rthings mk) throws RegSyntax { + while(!(sp.eos || (or != null && sp.match(')')) )) { + compile1(sp,mk); + sp.inc(); + } + if(sp.match(')')) mk.parenLevel--; + else if(sp.eos && mk.parenLevel != 0) { + RegSyntaxError.endItAll("Unclosed Parenthesis! lvl="+mk.parenLevel); + } if(or != null) { + if(p == null) p = new NullPattern(); + or.addOr(p); + return or; + } + return p==null ? new NullPattern() : p; + } + + // add a multi object to the end of the chain + // which applies to the last object + void addMulti(patInt i1,patInt i2) throws RegSyntax { + Pattern last,last2; + for(last = p;last != null && last.next != null;last=last.next) + ; + if(last == null || last == p) + last2 = null; + else + for(last2 = p;last2.next != last;last2=last2.next) + ; + if(last instanceof Multi && i1.intValue()==0 && + i2.intValue()==1) + ((Multi)last).matchFewest = true; + else if(last instanceof FastMulti && i1.intValue()==0 && + i2.intValue()==1) + ((FastMulti)last).matchFewest = true; + else if(last instanceof DotMulti && i1.intValue()==0 && + i2.intValue()==1) + ((DotMulti)last).matchFewest = true; + else if(last instanceof Multi + || last instanceof DotMulti + || last instanceof FastMulti) + throw new RegSyntax("Syntax error."); + else if(last2 == null) + p = mkMulti(i1,i2,p); + else + last2.next = mkMulti(i1,i2,last); + } + final static Pattern mkMulti(patInt lo,patInt hi,Pattern p) throws RegSyntax { + if(p instanceof Any && p.next == null) + return (Pattern)new DotMulti(lo,hi); + return RegOpt.safe4fm(p) ? (Pattern)new FastMulti(lo,hi,p) : + (Pattern)new Multi(lo,hi,p); + } + // process the bracket operator + Pattern matchBracket(StrPos sp) throws RegSyntax { + Bracket ret; + if(sp.match('^')) { + ret = new Bracket(true); + sp.inc(); + } else + ret = new Bracket(false); + if(sp.match(']')) + //throw new RegSyntax + RegSyntaxError.endItAll("Unmatched []"); + + while(!sp.eos && !sp.match(']')) { + StrPos s1 = new StrPos(sp); + s1.inc(); + StrPos s1_ = new StrPos(s1); + s1_.inc(); + if(s1.match('-') && !s1_.match(']')) { + StrPos s2 = new StrPos(s1); + s2.inc(); + if(!s2.eos) + ret.addOr(new Range(sp.c,s2.c)); + sp.inc(); + sp.inc(); + } else if(sp.escMatch('Q')) { + sp.inc(); + while(!sp.escMatch('E')) { + ret.addOr(new oneChar(sp.c)); + sp.inc(); + } + } else if(sp.escMatch('d')) { + ret.addOr(new Range('0','9')); + } else if(sp.escMatch('s')) { + ret.addOr(new oneChar((char)32)); + ret.addOr(new Range((char)8,(char)10)); + ret.addOr(new oneChar((char)13)); + } else if(sp.escMatch('w')) { + ret.addOr(new Range('a','z')); + ret.addOr(new Range('A','Z')); + ret.addOr(new Range('0','9')); + ret.addOr(new oneChar('_')); + } else if(sp.escMatch('D')) { + ret.addOr(new Range((char)0,(char)47)); + ret.addOr(new Range((char)58,(char)65535)); + } else if(sp.escMatch('S')) { + ret.addOr(new Range((char)0,(char)7)); + ret.addOr(new Range((char)11,(char)12)); + ret.addOr(new Range((char)14,(char)31)); + ret.addOr(new Range((char)33,(char)65535)); + } else if(sp.escMatch('W')) { + ret.addOr(new Range((char)0,(char)64)); + ret.addOr(new Range((char)91,(char)94)); + ret.addOr(new oneChar((char)96)); + ret.addOr(new Range((char)123,(char)65535)); + } else if(sp.escMatch('x') && next2Hex(sp)) { + sp.inc(); + int d = getHexDigit(sp); + sp.inc(); + d = 16*d + getHexDigit(sp); + ret.addOr(new oneChar((char)d)); + } else if(sp.escMatch('a')) { + ret.addOr(new oneChar((char)7)); + } else if(sp.escMatch('f')) { + ret.addOr(new oneChar((char)12)); + } else if(sp.escMatch('e')) { + ret.addOr(new oneChar((char)27)); + } else if(sp.escMatch('n')) { + ret.addOr(new oneChar('\n')); + } else if(sp.escMatch('t')) { + ret.addOr(new oneChar('\t')); + } else if(sp.escMatch('r')) { + ret.addOr(new oneChar('\r')); + } else if(sp.escMatch('c')) { + sp.inc(); + if(sp.c < Ctrl.cmap.length) + ret.addOr(new oneChar(Ctrl.cmap[sp.c])); + else + ret.addOr(new oneChar(sp.c)); + } else if(isOctalString(sp)) { + int d = sp.c - '0'; + sp.inc(); + d = 8*d + sp.c - '0'; + StrPos sp2 = new StrPos(sp); + sp2.inc(); + if(isOctalDigit(sp2,false)) { + sp.inc(); + d = 8*d + sp.c - '0'; + } + ret.addOr(new oneChar((char)d)); + } else + ret.addOr(new oneChar(sp.c)); + sp.inc(); + } + return ret; + } + + /** Converts the stored Pattern to a String -- this is a + decompile. Note that \t and \n will really print out here, + Not just the two character representations. + Also be prepared to see some strange output if your characters + are not printable. */ + public String toString() { + if( false && thePattern == null ) + return ""; + else { + StringBuffer sb = new StringBuffer(); + if(esc != Pattern.ESC) { + sb.append("(?e="); + sb.append(esc); + sb.append(")"); + } + if(gFlag + ||mFlag + ||!dotDoesntMatchCR + ||sFlag + ||ignoreCase + ||dontMatchInQuotes + ||optimized()) { + sb.append("(?"); + if(ignoreCase)sb.append("i"); + if(mFlag)sb.append("m"); + if(sFlag||!dotDoesntMatchCR)sb.append("s"); + if(dontMatchInQuotes)sb.append("Q"); + if(optimized())sb.append("o"); + if(gFlag)sb.append("g"); + sb.append(")"); + } + String patstr = thePattern.toString(); + if(esc != Pattern.ESC) + patstr = reEscape(patstr,Pattern.ESC,esc); + sb.append(patstr); + return sb.toString(); + } + } + // Re-escape Pattern, allows us to use a different escape + // character. + static String reEscape(String s,char oldEsc,char newEsc) { + if(oldEsc == newEsc) return s; + int i; + StringBuffer sb = new StringBuffer(); + for(i=0;iThis method will attempt to rewrite + your pattern in a way that makes it faster (not all patterns + execute at the same speed). In general, "(?: ... )" will be + faster than "( ... )" so if you don't need the backreference, + you should group using the former pattern.

It will also + introduce new pattern elements that you can't get to otherwise, + for example if you have a large table of strings, i.e. the + months of the year "(January|February|...)" optimize() will make + a Hashtable that takes it to the next appropriate pattern + element -- eliminating the need for a linear search. + @see com.stevesoft.pat.Regex#optimized + @see com.stevesoft.pat.Regex#ignoreCase + @see com.stevesoft.pat.Regex#dontMatchInQuotes + @see com.stevesoft.pat.Regex#matchAt + @see com.stevesoft.pat.Regex#search + */ + public void optimize() { + if(optimized()||thePattern==null) return; + minMatch = new patInt(0);//thePattern.countMinChars(); + thePattern = RegOpt.opt(thePattern,ignoreCase, + dontMatchInQuotes); + skipper = Skip.findSkip(this); + //RegOpt.setParents(this); + return; + } + Skip skipper; + /** This function returns true if the optimize method has + been called. */ + public boolean optimized() { + return minMatch != null; + } + + /** A bit of syntactic surgar for those who want to make + their code look more perl-like. To use this initialize + your Regex object by saying: +

+        Regex r1 = Regex.perlCode("s/hello/goodbye/");
+        Regex r2 = Regex.perlCode("s'fish'frog'i");
+        Regex r3 = Regex.perlCode("m'hello');
+        
+ The i for ignoreCase is supported in + this syntax, as well as m, s, and x. The g flat + is a bit of a special case.

+ If you wish to replace all occurences of a pattern, you + do not put a 'g' in the perlCode, but call Regex's + replaceAll method.

+ If you wish to simply + and only do a search for r2's pattern, you can do this + by calling the searchFrom method method repeatedly, or + by calling search repeatedly if the g flag is set. +

+ Note: Currently perlCode does not + support the (?e=#) syntax for + changing the escape character. + */ + + public static Regex perlCode(String s) { + // this file is big enough, see parsePerl.java + // for this function. + return parsePerl.parse(s); + } + static final char back_slash = '\\'; + + /** Checks to see if there are only literal and no special + pattern elements in this Regex. */ + public boolean isLiteral() { + Pattern x = thePattern; + while(x != null) { + if(x instanceof oneChar) + ; + else if(x instanceof Skipped) + ; + else + return false; + x = x.next; + } + return true; + } + + /** You only need to know about this if you are inventing + your own pattern elements. */ + public patInt countMinChars() { return thePattern.countMinChars(); } + /** You only need to know about this if you are inventing + your own pattern elements. */ + public patInt countMaxChars() { return thePattern.countMaxChars(); } + + boolean isHexDigit(StrPos sp) { + boolean r = + !sp.eos && !sp.dontMatch + && ((sp.c>='0'&&sp.c<='9') + ||(sp.c>='a'&&sp.c<='f') + ||(sp.c>='A'&&sp.c<='F')); + return r; + } + boolean isOctalDigit(StrPos sp,boolean first) { + boolean r = + !sp.eos && !(first^sp.dontMatch) + && sp.c>='0'&&sp.c<='7'; + return r; + } + int getHexDigit(StrPos sp) { + if(sp.c >= '0' && sp.c <= '9') + return sp.c - '0'; + if(sp.c >= 'a' && sp.c <= 'f') + return sp.c - 'a' + 10; + return sp.c - 'A' + 10; + } + boolean next2Hex(StrPos sp) { + StrPos sp2 = new StrPos(sp); + sp2.inc(); + if(!isHexDigit(sp2)) + return false; + sp2.inc(); + if(!isHexDigit(sp2)) + return false; + return true; + } + boolean isOctalString(StrPos sp) { + if(!isOctalDigit(sp,true)) + return false; + StrPos sp2 = new StrPos(sp); + sp2.inc(); + if(!isOctalDigit(sp2,false)) + return false; + return true; + } +} diff --git a/src/com/stevesoft/pat/RegexReader.java b/src/com/stevesoft/pat/RegexReader.java new file mode 100755 index 0000000..0ff33e3 --- /dev/null +++ b/src/com/stevesoft/pat/RegexReader.java @@ -0,0 +1,248 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; + +import java.io.*; +import com.stevesoft.pat.wrap.*; + +/** This class allows you to replace the text in strings + as you read them in. Be careful what you do with + this freedom... using Regex.perlCode("s{.*}{x}s") + as your pattern will result in loading the entire + contents of the Reader into memory. + */ +public class RegexReader extends Reader { + RBuffer rb = new RBuffer(new StringBuffer()); + PartialBuffer wrap = new PartialBuffer(rb.sb); + boolean moreToRead = true; + Reader r; + Replacer rp; + + // the buffer size + int nmax = 2*1024; + + public RegexReader(Regex rex,Reader r) { + this.r = r; + rp = rex.getReplacer(); + } + public RegexReader(Transformer tex,Reader r) { + this.r = r; + rp = tex.getReplacer(); + } + public void reset() throws IOException { + r.reset(); + rb = new RBuffer(new StringBuffer()); + wrap = new PartialBuffer(rb.sb); + moreToRead = true; + } + void readData() throws IOException { + int c; + int n = 0; + while( (c = r.read()) != -1) { + rb.sb.append((char)c); + if(n++ > nmax) + break; + } + if(c == -1 && n == 0) { + moreToRead = false; + wrap.allowOverRun = false; + } + } + void getMoreData() throws IOException { + while(rb.pos >= rb.epos) { + wrap.overRun = false; + if(rb.next != null) { + rb = rb.next; + } else if(rb.done) { + break; + } else if(rb.epos >= rb.sb.length() + && rb.epos > nmax) { + rb.pos = 1; + rb.epos = 1; + rb.sb.setLength(1); + readData(); + } else if(rb.epos >= rb.sb.length() + && moreToRead) { + readData(); + } else if(rp.getRegex().matchAt(wrap,rb.epos)) { + if(wrap.overRun) { + readData(); + } else { + StringBufferWrap sbw = new StringBufferWrap(); + StringBufferLike sbl = new StringBufferLike(sbw); + /* + ReplaceRule rr = rex.getReplaceRule(); + while(rr != null) { + rr.apply(sbl,rex); + rr = rr.next; + } + */ + Regex rex = rp.getRegex(); + int npos = rex.matchedTo(); + rp.setBuffer(sbl); + rp.setSource(wrap); + rp.setPos(npos); + rp.apply(rex,rex.getReplaceRule()); + int opos = rb.epos; + RBuffer rb2 = new RBuffer((StringBuffer)sbw.unwrap()); + rb2.epos = rb2.sb.length(); + RBuffer rb3 = new RBuffer(rb.sb); + + rb.next = rb2; + rb2.next = rb3; + + if(npos == opos) { + rb3.epos = npos+1; + if(rb3.epos > rb3.sb.length()) { + if(rb.pos >= rb.epos) + rb = rb.next; + rb3.pos = rb3.epos = 0; + rb3.done = true; + //break; + } + rb3.pos = npos; + } else { + rb3.pos = rb3.epos = npos; + } + + } + } else { + if(wrap.overRun) { + readData(); + } else if(rb.epos= rb.epos) { + getMoreData(); + if(rb.pos >= rb.epos) + return -1; + } + //System.out.println(rb); + return rb.sb.charAt(rb.pos++); + } + public int read(char[] buf,int off,int len) + throws IOException + { + int c = -1; + int end = off+len; + for(int i=off;iCopyright 2001, Steven R. Brandt +*/ /** +The RegexTokenizer is similar to the StringTokenizer class +provided with java, but allows one to tokenize using +regular expressions, rather than a simple list of characters. +Tokens are any strings between the supplied regular expression, +as well as any backreferences (things in parenthesis) +contained within the regular expression. */ +public class RegexTokenizer implements Enumeration { + String toParse; + Regex r; + int count = 0; + Vector v = new Vector(); + Vector vi = new Vector(); + int pos=0; + + int offset = 1; + void getMore() { + String s = r.right(); + if(r.searchFrom(toParse,pos)) { + v.addElement(r.left().substring(pos)); + vi.addElement(new Integer(r.matchFrom()+ + r.charsMatched())); + for(int i=0;i= v.size()) getMore(); + return v.elementAt(count++); + } + /** This is the equivalent (String)nextElement(). */ + public String nextToken() { return (String)nextElement(); } + /** This asks for the next token, and changes the pattern + being used at the same time. */ + public String nextToken(String newpat) { + try { r.compile(newpat); } catch (RegSyntax r_) {} + return nextToken(r); + } + /** This asks for the next token, and changes the pattern + being used at the same time. */ + public String nextToken(Regex nr) { + r = nr; + if(vi.size() > count) { + pos = ((Integer)vi.elementAt(count)).intValue(); + v.setSize(count); + vi.setSize(count); + } + getMore(); + return nextToken(); + } + /** Tells whether there are more tokens in the pattern. */ + public boolean hasMoreElements() { + if(count >= v.size()) getMore(); + return count < v.size(); + } + /** Tells whether there are more tokens in the pattern, but + in the fashion of StringTokenizer. */ + public boolean hasMoreTokens() { return hasMoreElements(); } + /** Determines the # of remaining tokens */ + public int countTokens() { + int old_pos=pos,_count=count; + while(hasMoreTokens()) + nextToken(); + count=_count; + return v.size()-count; + } + /** Returns all tokens in the String */ + public String[] allTokens() { + countTokens(); + String[] ret = new String[v.size()]; + v.copyInto(ret); + return ret; + } +}; diff --git a/src/com/stevesoft/pat/RegexWriter.java b/src/com/stevesoft/pat/RegexWriter.java new file mode 100755 index 0000000..008f6ac --- /dev/null +++ b/src/com/stevesoft/pat/RegexWriter.java @@ -0,0 +1,205 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; + +import java.io.*; +import com.stevesoft.pat.wrap.*; + +/** A basic extension of FilterWriter that uses Transformer + to make replacements in data as it is written out. It attempts + to transform a string whenever the End-of-Line (EOL) character + is written (which is, by default, the carriage return '\n'). + Only the transformed portion of the line is written out, allowing + the RegexWriter to wait until a complete pattern is present before + attempting to write out info. Until a pattern completes, data is + stored in a StringBuffer -- which can be accessed through the + length() and charAt() methods of this class. +

+ Note a subtlety here -- while a Transformer normally matches + at higher priority against the pattern added to it first, this + will not necessarily be true when a multi-line match is in progress + because one of the complete multi-line patterns may not be completely + loaded in RegexWriter's buffer. For this reason, the Transformer + class is equipped with a way to add a pattern and replacement rule + in three pieces -- a beginning (once this matches, nothing else in + the Transformer can match until the whole pattern matches), an + ending (the whole pattern is a String formed by adding the beginning + and ending), and a ReplaceRule. +

+ An illustration of this is given in the this + example. + */ +public class RegexWriter extends Writer { + Replacer repr; + Writer w; + WriterWrap ww; + StringBuffer sb = new StringBuffer(); + PartialBuffer wrap = new PartialBuffer(sb); + int pos, epos; + int interval = 128; + int bufferSize = 2*1024; + + public RegexWriter(Transformer t,Writer w) { + this.w = w; + ww = new WriterWrap(w); + repr = t.getReplacer(); + repr.setBuffer(new StringBufferLike(ww)); + repr.setSource(wrap); + } + public RegexWriter(Regex r,Writer w) { + this.w = w; + ww = new WriterWrap(w); + repr = r.getReplacer(); + repr.setBuffer(new StringBufferLike(ww)); + repr.setSource(wrap); + } + + char EOLchar = '\n'; + /** This method no longer serves any purpose. + @deprecated + */ + public char getEOLchar() { + return EOLchar; + } + /** This method no longer serves any purpose. + @deprecated + */ + public void setEOLchar(char c) { + EOLchar = c; + } + + int max_lines=2; + /** This method no longer serves any purpose. + @deprecated + */ + public int getMaxLines() { return max_lines; } + /** This method no longer serves any purpose. + @deprecated + */ + public void setMaxLines(int ml) { max_lines = ml; } + + void write() throws IOException { + Regex rex = repr.getRegex(); + int eposOld = epos; + if(rex.matchAt(wrap,epos) && !wrap.overRun) { + while(pos < epos) + w.write(sb.charAt(pos++)); + int to = rex.matchedTo(); + repr.setPos(to); + repr.apply(rex,rex.getReplaceRule()); + epos = pos = to; + if(epos == eposOld && epos < sb.length()) + epos++; + } else if(!wrap.overRun && epos < sb.length()) { + epos++; + } + while(pos < epos) + w.write(sb.charAt(pos++)); + if(epos == sb.length()) { + sb.setLength(1); + pos = epos = 1; + } else if(pos > bufferSize) { + for(int i=bufferSize;iexample + + @see com.stevesoft.pat.NullRule + @see com.stevesoft.pat.AmpersandRule + @see com.stevesoft.pat.BackRefRule + @see com.stevesoft.pat.LeftRule + @see com.stevesoft.pat.RightRule + @see com.stevesoft.pat.StringRule + */ +public abstract class ReplaceRule { + /** points to the next ReplaceRule in the linked list. */ + protected ReplaceRule next = null; + /** This function appends to the StringBufferLike the text you want + to replaced the portion of the String last matched. */ + public abstract void apply(StringBufferLike sb,RegRes r); + + /** A rule describing how to clone only the current ReplaceRule, + and none of the others in this linked list. It is called by + clone() for each item in the list. */ + public Object clone1() { + return new RuleHolder(this); + } + public final Object clone() { + ReplaceRule x = (ReplaceRule)clone1(); + ReplaceRule xsav = x; + ReplaceRule y = this; + while(y.next != null) { + x.next = (ReplaceRule)y.next.clone1(); + x.name = y.name; + x = x.next; + y = y.next; + } + return xsav; + } + static ReplaceRule add(ReplaceRule head,ReplaceRule adding) { + if(head == null) + return head = adding; + head.addRule(adding); + return head; + } + public ReplaceRule add(ReplaceRule adding) { + return add(this,adding); + } + /** Add another ReplaceRule to the linked list. */ + public void addRule(ReplaceRule r) { + if(next == null) next = r; + else next.addRule(r); + } + static Regex getvar = null; + final static Regex getv() { + // Thanks to Michael Jimenez for pointing out the need + // to clone getvar rather than simply returning it. + // Previously this was not thread safe. + //if(getvar != null) return getvar; + if(getvar != null) return (Regex)getvar.clone(); + getvar= + new Regex( + "(?:\\\\(\\d+)|"+ // ref 1 + "\\$(?:"+ + "(\\d+)|"+ // ref 2 + "(\\w+)|"+ // ref 3 + "([&'`])|"+ // ref 4 + "\\{(?:(\\d+)|"+ // ref 5 + "([^\n}\\\\]+))}"+ // ref 6 + ")|"+ + "\\\\([nrbtaef])|"+ // ref 7 + "\\\\c([\u0000-\uFFFF])|"+ // ref 8 + "\\\\x([A-Fa-f0-9]{2})|"+ // ref 9 + "\\\\([\u0000-\uFFFF])"+ // ref 10 + ")"); + getvar.optimize(); + return getvar; + } + /** Compile a ReplaceRule using the text that would go between + the second and third /'s in a typical substitution pattern + in Perl: s/ ... / The argument to ReplaceRule.perlCode /. + */ + public static ReplaceRule perlCode(String s) { + //String sav_backGs = Regex.backGs; + //int sav_backGto = Regex.backGto; + try { + int mf = 0, mt = 0; + Regex gv = getv(); + ReplaceRule head = null; + Object tmp = null; + while(gv.searchFrom(s,mt)) { + int off=Regex.BackRefOffset-1; + mf = gv.matchedFrom(); + if(mf > mt) + head=add(head, + new StringRule(s.substring(mt,mf))); + String var = null; + if((var=gv.stringMatched(1+off)) != null + || (var=gv.stringMatched(2+off)) != null + || (var=gv.stringMatched(5+off)) != null) { + int d=0; + for(int i=0;i= 0) + head=add(head,new CodeRule(var.charAt(0)) ); + else + head=add(head,new StringRule(var) ); + } else if( + (var=gv.stringMatched(3+off)) != null + || (var=gv.stringMatched(4+off)) != null + || (var=gv.stringMatched(6+off)) != null) { + String arg = ""; + int pc; + if((pc=var.indexOf(':')) > 0) { + arg = var.substring(pc+1); + var = var.substring(0,pc); + } + if(var.equals("&")||var.equals("MATCH")) { + head=add(head,new AmpersandRule()); + } else if(var.equals("`")||var.equals("PREMATCH")) { + head=add(head,new LeftRule()); + } else if(var.equals("'")||var.equals("POSTMATCH")) { + head=add(head,new RightRule()); + } else if(var.equals("WANT_MORE_TEXT")) { + head=add(head,new WantMoreTextReplaceRule()); + } else if(var.equals("POP")) { + head=add(head,new PopRule()); + } else if(var.startsWith("+") && (tmp=defs.get(var.substring(1))) != null) { + if(tmp instanceof Regex) + head=add(head,new PushRule(var.substring(1),(Regex)tmp)); + else if(tmp instanceof Transformer) + head=add(head,new PushRule(var.substring(1),(Transformer)tmp)); + else head=add(head,new StringRule("${"+var+"}")); + } else if(var.startsWith("=") && (tmp=defs.get(var.substring(1))) != null) { + if(tmp instanceof Regex) + head=add(head,new ChangeRule(var.substring(1),(Regex)tmp)); + else if(tmp instanceof Transformer) + head=add(head,new ChangeRule(var.substring(1),(Transformer)tmp)); + else head=add(head,new StringRule("${"+var+"}")); + } else if( (tmp=defs.get(var)) != null) { + if(tmp instanceof ReplaceRule) { + ReplaceRule alt = ((ReplaceRule)tmp).arg(arg); + if(alt == null) alt = ((ReplaceRule)tmp); + head=add(head,(ReplaceRule)(alt.clone())); + } + } else // can't figure out how to transform this thing... + head=add(head,new StringRule("${"+var+"}")); + } else if( + (var = gv.stringMatched(7+off)) != null) { + char c = var.charAt(0); + if(c == 'n') + head=add(head,new StringRule("\n")); + else if(c == 't') + head=add(head,new StringRule("\t")); + else if(c == 'r') + head=add(head,new StringRule("\r")); + else if(c == 'b') + head=add(head,new StringRule("\r")); + else if(c == 'a') + head=add(head,new StringRule(""+(char)7)); + else if(c == 'e') + head=add(head,new StringRule(""+(char)27)); + else if(c == 'f') + head=add(head,new StringRule(""+(char)12)); + } else if( + (var = gv.stringMatched(8+off)) != null) { + char c = var.charAt(0); + if(c < Ctrl.cmap.length) + c = Ctrl.cmap[c]; + head=add(head,new StringRule(""+c)); + } else if( + (var = gv.stringMatched(9+off)) != null) { + int d = + 16*getHexDigit(var.charAt(0))+ + getHexDigit(var.charAt(1)); + head=add(head,new StringRule(""+(char)d)); + } + mt = gv.matchedTo(); + } + if(mt <= s.length()) + head=add(head,new StringRule(s.substring(mt))); + return head; + } finally { + //Regex.backGs = sav_backGs; + //Regex.backGto = sav_backGto; + } + } + static Hashtable defs = new Hashtable(); + public static boolean isDefined(String s) { return defs.get(s) != null; } + public static void define(String s,Regex r) { defs.put(s,r); } + public static void define(String s,ReplaceRule r) { + defs.put(s,r); + r.name = s; + } + String name = getClass().getName(); + public static void define(String s,Transformer t) { defs.put(s,t); } + public static void undefine(String s) { defs.remove(s); } + /** This tells how to convert just the current element (and none + of the other items in the linked list) to a String. This + method is called by toString() for each item in the linked + list. */ + public String toString1() { + return "${"+name+"}"; + } + /** Convert to a String. */ + public final String toString() { + StringBuffer sb = new StringBuffer(); + sb.append(toString1()); + ReplaceRule rr = this.next; + while(rr != null) { + sb.append(rr.toString1()); + rr = rr.next; + } + return sb.toString(); + } + /** Modified the behavior of a ReplaceRule by supplying + an argument. If a ReplaceRule named "foo" is defined + and the pattern "s/x/${foo:5}/" is given to Regex.perlCode, + then the "foo" the definition of "foo" will be retrieved + and arg("5") will be called. If the result is non-null, + that is the ReplaceRule that will be used. If the result + is null, then the pattern works just as if it were + "s/x/${foo}/". + @see com.stevesoft.pat.Validator#arg(java.lang.String) + */ + public ReplaceRule arg(String s) { return null; } + static int getHexDigit(char c) { + if(c >= '0' && c <= '9') + return c - '0'; + if(c >= 'a' && c <= 'f') + return c - 'a'+10; + return c - 'A'+10; + } +} diff --git a/src/com/stevesoft/pat/Replacer.java b/src/com/stevesoft/pat/Replacer.java new file mode 100755 index 0000000..c78a11d --- /dev/null +++ b/src/com/stevesoft/pat/Replacer.java @@ -0,0 +1,261 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; + +import java.util.*; +import com.stevesoft.pat.wrap.StringWrap; + +/** Internally used class. */ +class RegHolder { + Regex me = null; + RegHolder prev = null; +} + +/** Internally used class. + * @see CodeRule + */ +class CodeVal { + int pos; + char code; + CodeVal(int p,char c) { + pos = p; + code = c; + } + public String toString() { + return "("+pos+","+code+")"; + } +} + +/** + To use this class, first use either the getReplacer() method from + Transformer or Regex. You can then use replaceAll, replaceFirst, + etc. methods on the Replacer in the same way that you can from + either of those two classes. +

+ The only potential difference between using the methods of + Replacer to do the replacing is that Replacer remembers changes + to the replacing object between calls to replaceAll, replaceFirst + etc. For details, see the example file + trans3.java. + @see com.stevesoft.pat.Transformer + @see com.stevesoft.pat.Regex +*/ +public class Replacer { + boolean first; + + /** Instantiate a new Replacer. */ + public Replacer() {} + + public StringLike replaceFirstRegion(String s,Regex r, + int start,int end) { + return replaceFirstRegion(new StringWrap(s),r,start,end); + } + /** This method replaces the first occurence of the Regex in the + String starting with position pos + according to the Replacer rule of this object. */ + public StringLike replaceFirstRegion(StringLike s,Regex r, + int start,int end) { + first = true; + rh.me = r; + rh.prev = null; + return dorep(s,start,end); + } + public StringLike replaceFirst(StringLike s) { + return replaceFirstRegion(s,0,s.length()); + } + public StringLike replaceFirstFrom(StringLike s,int start) { + return replaceFirstRegion(s,start,s.length()); + } + public StringLike replaceFirstRegion(StringLike s,int start,int end) { + first = true; + return dorep(s,start,end); + } + + RegHolder rh = new RegHolder(); + + public StringLike replaceAllRegion(String s,Regex r, + int start, int end) { + return replaceAllRegion(new StringWrap(s),r,start,end); + } + /** This method replaces all occurences of the Regex in the + String starting with postition pos + according to the Replacer rule of this object. */ + public StringLike replaceAllRegion(StringLike s,Regex r, + int start,int end) { + first = false; + // reset + rh.me = r; + rh.prev = null; + return dorep(s,start,end); + } + public StringLike replaceAll(StringLike s) { + return replaceAllRegion(s,0,s.length()); + } + public StringLike replaceAllFrom(StringLike s,int start) { + return replaceAllRegion(s,start,s.length()); + } + public StringLike replaceAllRegion(StringLike s,int start,int end) { + first = false; + return dorep(s,start,end); + } + + public String replaceAll(String s) { + return replaceAllRegion(new StringWrap(s),0,s.length()).toString(); + } + public String replaceAllFrom(String s,int start) { + return replaceAllRegion(new StringWrap(s),start,s.length()).toString(); + } + public String replaceAllRegion(String s,int start,int end) { + first = false; + return dorep(new StringWrap(s),start,end).toString(); + } + + final public boolean isSpecial(ReplaceRule x) { + while(x != null) { + if(x instanceof SpecialRule + || (x instanceof RuleHolder && ((RuleHolder)x).held instanceof SpecialRule)) + return true; + x = x.next; + } + return false; + } + final public void apply1(RegRes rr) { + rr.charsMatched_++; + apply(rr,null); + rr.charsMatched_--; + } + + final StringLike dorep(StringLike s,int start,int end) { + StringLike ret = s; + want_more_text = false; + lastMatchedTo = 0; + if(rh.me == null) + throw new NullPointerException("Replacer has null Regex pointer"); + if(rh.me._search(s,start,end)) { + int rmn = rh.me.matchedTo(); + if(rh.me.charsMatched()==0 && !isSpecial(rh.me.getReplaceRule())) { + apply1(rh.me); + rmn++; + } + apply(rh.me); + if(!first) + for(int i=rmn; + !want_more_text && rh.me._search(s,i,end);i=rmn) { + rmn = rh.me.matchedTo(); + if(rh.me.charsMatched()==0) { + if(!isSpecial(rh.me.getReplaceRule())) + apply1(rh.me); + rmn++; + } + apply(rh.me); + } + ret = finish(); + ret = ret == null ? s : ret; + } + return ret; + } + + StringBufferLike sb = null; + StringLike src = null; + int pos = 0; + /** This method allows you to apply the results of several + matches in a sequence to modify a String of text. Each + call in the sequence must operate on the same piece of + text and the matchedFrom() of each RegRes given to this + method must be greater in value than the preceeding + RegRes's matchedTo() value. + */ + public void apply(RegRes r,ReplaceRule rp) { + if(rp==null ||(rp.next == null && rp instanceof AmpersandRule)) + return; + if(r.didMatch()) { + if(src == null) + src = r.getStringLike(); + if(sb == null) + sb = new StringBufferLike(src.newStringBufferLike()); + int rmf = r.matchedFrom(); + for(int ii=pos;iiCopyright 2001, Steven R. Brandt +*/ /** +This class only exists to store data needed during the +compilation of a regular expression. */ +public class Rthings { + /** The numeric identity of the next () to be encountered + while compiling the pattern. */ + public int val=Regex.BackRefOffset; + /** Needed in case (?i) is encountered, to pass back the + message that ignoreCase should be set. */ + public boolean ignoreCase; + /** Needed in case (?Q) is encountered, to pass back the + message that dontMatchInQuotes should be set. */ + public boolean dontMatchInQuotes; + public boolean optimizeMe = false; + public boolean noBackRefs = false; + public int parenLevel = 0; + boolean gFlag = false, mFlag = false, sFlag = false; + Pattern p; + Or o; + Rthings(Regex r) { + ignoreCase = r.ignoreCase; + dontMatchInQuotes = r.dontMatchInQuotes; + } + void set(Regex r) { + r.gFlag = gFlag; + r.mFlag = mFlag; + r.sFlag = sFlag; + r.ignoreCase = ignoreCase; + r.dontMatchInQuotes = dontMatchInQuotes; + if(optimizeMe) r.optimize(); + } +}; diff --git a/src/com/stevesoft/pat/RuleHolder.java b/src/com/stevesoft/pat/RuleHolder.java new file mode 100755 index 0000000..e732d58 --- /dev/null +++ b/src/com/stevesoft/pat/RuleHolder.java @@ -0,0 +1,20 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; +/** This class is used internally. */ +class RuleHolder extends ReplaceRule { + ReplaceRule held = null; + RuleHolder() {} + RuleHolder(ReplaceRule h) { held = h; } + public Object clone1() { return new RuleHolder(held); } + public String toString1() { return held.toString1(); } + public void apply(StringBufferLike sb,RegRes rr) { + held.apply(sb,rr); + } + public ReplaceRule arg(String s) { return new RuleHolder(held.arg(s)); } +} diff --git a/src/com/stevesoft/pat/Skip.java b/src/com/stevesoft/pat/Skip.java new file mode 100755 index 0000000..664c712 --- /dev/null +++ b/src/com/stevesoft/pat/Skip.java @@ -0,0 +1,127 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; + +/** This class is used internally to search ahead for some + optimized Regex objects. It searches within a String + for occrences of a given String -- like a more flexible + version of String.indexOf. + @see com.stevesoft.pat.Skip2 + @see com.stevesoft.pat.SkipBMH + */ +public class Skip { + static int mkmask(int c) { + char x = (char)c; + return ~( CaseMgr.toUpperCase(x) | + CaseMgr.toLowerCase(x) | + CaseMgr.toTitleCase(x)); + } + static { int x = Regex.BackRefOffset; } + String src; + int c,mask; + int offset; + boolean ign,m1; + /** Examine a Regex to determine what String it will + attempt to skip to when searching for patterns. + Return -1 if we aren't doing this. */ + public static String string(Regex r) { + return r.skipper == null ? null : r.skipper.src; + } + /** Determine the offset of the String within the pattern + that we are skipping to. Return -1 if we aren't doing + this. */ + public static int offset(Regex r) { + return r.skipper == null ? -1 : r.skipper.offset; + } + /** Initialize, give it a String to search for, tell it + whether or not to ignoreCase, and what the offset is + of the String within the String to be searched. */ + public Skip(String s,boolean ign,int o) { + src = s; + c = s.charAt(0); + if(ign) { + mask = mkmask(c); + } else mask = 0; + offset = o; + this.ign = ign; + m1 = (s.length()==1); + } + /** The same as find(s,0,s.length()) */ + public final int find(StringLike s) { + return find(s,0,s.length()); + } + static final int min(int a,int b) { return a end) return -1; + start += offset; + int vend = min(s.length()-1,end+offset); + if(mask != c) { + for(int i=start;i<=vend;i++) + if(0 == (s.charAt(i) & mask)) + //if(m1||s.regionMatches(ign,i,src,0,src.length()) ) + if(m1||CaseMgr.regionMatches(s,ign,i,src,0,src.length()) ) + return i-offset; + } else { + for(int i=start;i<=vend;i++) + if(c == s.charAt(i)) + //if(m1||s.regionMatches(ign,i,src,0,src.length()) ) + if(m1||CaseMgr.regionMatches(s,ign,i,src,0,src.length()) ) + return i-offset; + } + return -1; + } + static Skip findSkip(Regex r) { + return findSkip(r.thePattern,r.ignoreCase,!r.dontMatchInQuotes); + } + // look for things that can be skipped + static Skip findSkip(Pattern p,boolean ignoreCase,boolean trnc) { + StringBuffer sb = new StringBuffer(); + Skip subsk = null; + int offset = 0; + int skipc = -1,skipoff=0; + for(;p != null;p = p.next) { + if(p instanceof oneChar) { + skipc = ((oneChar)p).c; + skipoff = offset; + } + if(p instanceof oneChar && p.next instanceof oneChar) { + Pattern psav = p; + sb.append(((oneChar)p).c); + while(p.next instanceof oneChar) { + sb.append(((oneChar)p.next).c); + p = p.next; + } + String st = sb.toString(); + char c0 = st.charAt(0), c1 = st.charAt(1); + Skip sk=null; + if(st.length()>2) + sk = new SkipBMH(st,ignoreCase,offset); + else + sk = new Skip2(st,ignoreCase,offset); + if(trnc && st.length()>2) { // chop out a whole string... + psav.next = new Skipped(st.substring(1)); + psav.next.next = p.next; + psav.next.parent = p.parent; + } + return sk; + } else if(p instanceof Or && ((Or)p).v.size()==1 + && !((Or)p).leftForm().equals("(?!") + && null != (subsk= + findSkip( (Pattern)((Or)p).v.elementAt(0),ignoreCase,trnc) )) { + subsk.offset += offset; + return subsk; + } else if(p.minChars().equals(p.maxChars())) { + offset += p.minChars().intValue(); + } else return skipc < 0 ? null : + new Skip(""+(char)skipc,ignoreCase,skipoff); + } + return null; + } +} diff --git a/src/com/stevesoft/pat/Skip2.java b/src/com/stevesoft/pat/Skip2.java new file mode 100755 index 0000000..f2b03ca --- /dev/null +++ b/src/com/stevesoft/pat/Skip2.java @@ -0,0 +1,38 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; + +/** This is the same as Skip, except it needs a minimum + of two characters in the initializing String. + @see com.stevesoft.pat.Skip + @see com.stevesoft.pat.SkipBMH + */ +public class Skip2 extends Skip { + int c1,mask1; + public Skip2(String s,boolean ign,int offset) { + super(s,ign,offset); + c1 = s.charAt(1); + m1 = 2==s.length(); + if(ign) { + mask1=mkmask(c1); + } else + mask1 = 0; + } + public int find(StringLike s,int start,int end) { + if(start > end) return -1; + start += offset; + int vend = min(s.length()-2,end+offset); + for(int i=start;i<=vend;i++) + if(0 == (s.charAt(i)&mask) && 0 == (s.charAt(i+1)&mask1)) { + //if(m1||s.regionMatches(ign,i,src,0,src.length()) ) + if(m1||CaseMgr.regionMatches(s,ign,i,src,0,src.length()) ) + return i-offset; + } + return -1; + } +} diff --git a/src/com/stevesoft/pat/SkipBMH.java b/src/com/stevesoft/pat/SkipBMH.java new file mode 100755 index 0000000..4fbe381 --- /dev/null +++ b/src/com/stevesoft/pat/SkipBMH.java @@ -0,0 +1,183 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; + +import com.stevesoft.pat.wrap.StringWrap; + +/** Like Skip, but implements a + + Boyer-Moore-Horspool type search + method that has been modified to be more like a "T-search" (see + the Michael Tamm''s article in C'T, magazin fuer computer und technic, August 97 + p 292). Yet another important source of information for me was + the + Deep Magic article on string searching. As of this writing, I can + beat String's indexOf method in many cases. + @see com.stevesoft.pat.Skip + @see com.stevesoft.pat.Skip2 + */ +public class SkipBMH extends Skip { + // This number could be 256, but I think it's + // big enough. Note, it must be a power of 2. + final int MAX_CHAR = 64; + final char[] skip = new char[MAX_CHAR]; + int sm1; + int jump_ahead = 0; + char uc,lc,tc,x; + final boolean exact(char c) { + return (ign && anyc(c))||c==x; + } + final boolean anyc(char c) { + return c==uc||c==lc||c==tc; + } + public SkipBMH(String pt,boolean ign) { this(pt,ign,0); } + public SkipBMH(String pt) { this(pt,false,0); } + public SkipBMH(String pt,boolean ign,int offset) { + super(pt,ign,offset); + for(int k=0;k vend) return -1; + } + } + } else { + for(k=start; k <= vend1;k += skip[s.charAt(k) & (MAX_CHAR-1)] ) { + // table look-up is expensive, avoid it if possible + if( x==s.charAt(k) ) { + //if(src.regionMatches(0,s,k-sm1,sm1)) + if(CaseMgr.regionMatches(src,false,0,s,k-sm1,sm1)) + return k-sm1-offset; + k += jump_ahead; + } + } + for(; k <= vend;k += skip[s.charAt(k) & (MAX_CHAR-1)] ) { + // table look-up is expensive, avoid it if possible + if( x==s.charAt(k) ) { + //if(src.regionMatches(0,s,k-sm1,sm1)) + if(CaseMgr.regionMatches(src,false,0,s,k-sm1,sm1)) + return k-sm1-offset; + k += jump_ahead; + if(k > vend) return -1; + } + } + } + + return -1; + } + public int find(StringLike s,int start,int end) { + if(s instanceof StringWrap) + return find(s.toString(),start,end); + start += offset+sm1; + int vend = min(s.length()-1,end+sm1+offset),k; + int vend1 = vend-jump_ahead; + if(ign) { + for(k=start; k <= vend1;k += skip[s.charAt(k) & (MAX_CHAR-1)] ) { + // table look-up is expensive, avoid it if possible + if( anyc(s.charAt(k)) ) { + if(CaseMgr.regionMatches(src,ign,0,s,k-sm1,sm1)) + return k-sm1-offset; + k += jump_ahead; + } + } + for(; k <= vend;k += skip[s.charAt(k) & (MAX_CHAR-1)] ) { + // table look-up is expensive, avoid it if possible + if( anyc(s.charAt(k)) ) { + if(CaseMgr.regionMatches(src,ign,0,s,k-sm1,sm1)) + return k-sm1-offset; + k += jump_ahead; + if(k > vend) return -1; + } + } + } else { + for(k=start; k <= vend1;k += skip[s.charAt(k) & (MAX_CHAR-1)] ) { + // table look-up is expensive, avoid it if possible + if( x==s.charAt(k) ) { + //if(src.regionMatches(0,s,k-sm1,sm1)) + if(CaseMgr.regionMatches(src,false,0,s,k-sm1,sm1)) + return k-sm1-offset; + k += jump_ahead; + } + } + for(; k <= vend;k += skip[s.charAt(k) & (MAX_CHAR-1)] ) { + // table look-up is expensive, avoid it if possible + if( x==s.charAt(k) ) { + //if(src.regionMatches(0,s,k-sm1,sm1)) + if(CaseMgr.regionMatches(src,false,0,s,k-sm1,sm1)) + return k-sm1-offset; + k += jump_ahead; + if(k > vend) return -1; + } + } + } + + return -1; + } +} diff --git a/src/com/stevesoft/pat/Skipped.java b/src/com/stevesoft/pat/Skipped.java new file mode 100755 index 0000000..9d963ae --- /dev/null +++ b/src/com/stevesoft/pat/Skipped.java @@ -0,0 +1,27 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; +import java.util.Hashtable; + +/** Implements the (?<number) Pattern, where number is + an integer telling us how far to back up in the Pattern. + Not in perl 5. */ +class Skipped extends Pattern { + String s; + Skipped(String s) { this.s = s; } + public String toString() { return s+nextString(); } + public int matchInternal(int pos,Pthings pt) { + //if(pt.no_check || s.regionMatches(pt.ignoreCase,0,pt.src,pos,s.length())) + if(pt.no_check || CaseMgr.regionMatches(s,pt.ignoreCase,0,pt.src,pos,s.length())) + return nextMatch(pos+s.length(),pt); + return -1; + } + public patInt minChars() { return new patInt(s.length()); } + public patInt maxChars() { return new patInt(s.length()); } + Pattern clone1(Hashtable h) { return new Skipped(s); } +}; diff --git a/src/com/stevesoft/pat/SpecialRule.java b/src/com/stevesoft/pat/SpecialRule.java new file mode 100755 index 0000000..72df8d2 --- /dev/null +++ b/src/com/stevesoft/pat/SpecialRule.java @@ -0,0 +1,13 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; + +public class SpecialRule extends ReplaceRule { + public SpecialRule() {} + public void apply(StringBufferLike sb,RegRes rr) {} +} diff --git a/src/com/stevesoft/pat/Start.java b/src/com/stevesoft/pat/Start.java new file mode 100755 index 0000000..9784358 --- /dev/null +++ b/src/com/stevesoft/pat/Start.java @@ -0,0 +1,31 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; +import java.util.Hashtable; + +/** The '^' or the '\A' Pattern, matches the start of a string. */ +class Start extends Pattern { + boolean retIsStart; + Start(boolean b) { retIsStart = b; } + public int matchInternal(int pos,Pthings pt) { + if(retIsStart + && pt.mFlag + && pos > 0 && pt.src.charAt(pos-1)=='\n') + return nextMatch(pos,pt); + if(pos == 0) return nextMatch(pos,pt); + return -1; + } + public String toString() { + if(retIsStart) + return "^"+nextString(); + else + return "\\A"+nextString(); + } + public patInt maxChars() { return new patInt(0); } + Pattern clone1(Hashtable h) { return new Start(retIsStart); } +}; diff --git a/src/com/stevesoft/pat/StrPos.java b/src/com/stevesoft/pat/StrPos.java new file mode 100755 index 0000000..4a9f287 --- /dev/null +++ b/src/com/stevesoft/pat/StrPos.java @@ -0,0 +1,117 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; +/** + Shareware: package pat + Copyright 2001, Steven R. Brandt +*/ /** +StrPos is used internally by regex to parse the regular expression. */ +public class StrPos { + String s; + int pos; + /** Return the position in the string pointed to */ + public int pos() { return pos; } + + /** This contains the escape character, which is \ by default. */ + public char esc=Pattern.ESC; + char c; + /** Returns the current, possibly escaped, character. */ + public char thisChar() { return c; } + + boolean dontMatch,eos; + + /** tell whether we are at end of string */ + public boolean eos() { return eos; } + /** initialize a StrPos from another StrPos. */ + public StrPos(StrPos sp) { + dup(sp); + } + /** copy a StrPos from sp to this. */ + public void dup(StrPos sp) { + s = sp.s; + pos = sp.pos; + c = sp.c; + dontMatch = sp.dontMatch; + eos = sp.eos; + } + /** Initialize a StrPos by giving it a String, and a + position within the String. */ + public StrPos(String s,int pos) { + this.s=s; + this.pos=pos-1; + inc(); + } + /** Advance the place where StrPos points within the String. + Counts a backslash as part of the next character. */ + public StrPos inc() { + pos++; + if(pos >= s.length()) { + eos = true; + return this; + } + eos = false; + c = s.charAt(pos); + if(c == esc && pos+1st that matches a non-escaped + character. */ + public boolean incMatch(String st) { + StrPos sp = new StrPos(this); + int i; + for(i=0;i= '0' && sp.c <= '9';i++) { + cnt = 10*cnt+sp.c-'0'; + sp.inc(); + } + if(i==0) return null; + dup(sp); + return new patInt(cnt); + } + /** get the string that we are processing. */ + public String getString() { return s; } +}; diff --git a/src/com/stevesoft/pat/StringBufferLike.java b/src/com/stevesoft/pat/StringBufferLike.java new file mode 100755 index 0000000..6987eec --- /dev/null +++ b/src/com/stevesoft/pat/StringBufferLike.java @@ -0,0 +1,65 @@ +package// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +com.stevesoft.pat; + +import com.stevesoft.pat.*; + +/** A tool that is used to make the \E, \U, \L, and \Q + elements of a substitution. */ +public class StringBufferLike implements BasicStringBufferLike { + BasicStringBufferLike sbl; + public StringBufferLike(BasicStringBufferLike sbl) { + this.sbl = sbl; + } + char mode = 'E', altMode = ' '; + public StringLike toStringLike() { + return sbl.toStringLike(); + } + public String toString() { + return sbl.toString(); + } + public void append(char c) { + + switch(mode) { + case 'u': + mode = altMode; + altMode = ' '; + case 'U': + sbl.append(CaseMgr.toUpperCase(c)); + break; + case 'l': + mode = altMode; + altMode = ' '; + case 'L': + sbl.append(CaseMgr.toLowerCase(c)); + break; + case 'Q': + if((c >= 'a' && c <= 'z') + || (c >= 'A' && c <= 'Z') + || (c >= '0' && c <= '9')) + ; + else + sbl.append('\\'); + default: + sbl.append(c); + break; + } + } + public void append(String s) { + for(int i=0;i +

  • The StringLike object will not change. Calls to + charAt(int) will not vary with time. +
  • The length of the object being searched is known + before the search begins and does not vary with time. + + Note that searching String is probably faster than searching + other objects, so searching String is still preferred if + possible. +*/ +public interface StringLike { + public char charAt(int i); + public String toString(); + public int length(); + public String substring(int i1,int i2); + /** Obtain the underlying object, be it a String, char[], + RandomAccessFile, whatever. */ + public Object unwrap(); + /** By default, the result is put in a String or char[] + when a replace is done. If you wish to save the result + in some other StringBufferLike then you can do this + by implementing this method, or over-riding it's behavior + from an existing class. */ + public BasicStringBufferLike newStringBufferLike(); + public int indexOf(char c); +} diff --git a/src/com/stevesoft/pat/StringRule.java b/src/com/stevesoft/pat/StringRule.java new file mode 100755 index 0000000..c37b6bb --- /dev/null +++ b/src/com/stevesoft/pat/StringRule.java @@ -0,0 +1,22 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; + +/** The apply method of this ReplaceRule simply appends the text + it was initialized with to the StringBufferLike. + @see com.stevesoft.pat.ReplaceRule + */ +public class StringRule extends ReplaceRule { + String s; + public StringRule(String s) { this.s = s; } + public void apply(StringBufferLike sb,RegRes res) { + sb.append(s); + } + public String toString1() { return s; } + public Object clone1() { return new StringRule(s); } +} diff --git a/src/com/stevesoft/pat/SubMark.java b/src/com/stevesoft/pat/SubMark.java new file mode 100755 index 0000000..ab5e431 --- /dev/null +++ b/src/com/stevesoft/pat/SubMark.java @@ -0,0 +1,22 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; + +/** OrMark and SubMark together implement ( ... ) */ +class SubMark extends Pattern { + int end_pos,start_pos; + OrMark om; + public String toString() { return ""; } + public int matchInternal(int i,Pthings pt) { + pt.marks[om.id+pt.nMarks] = i; + int ret=nextMatch(i,pt); + if(ret < 0) + pt.marks[om.id+pt.nMarks] = -1; + return ret; + } +} diff --git a/src/com/stevesoft/pat/TransPat.java b/src/com/stevesoft/pat/TransPat.java new file mode 100755 index 0000000..27c1fb2 --- /dev/null +++ b/src/com/stevesoft/pat/TransPat.java @@ -0,0 +1,40 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; + +import java.util.*; + +/** This class is used to implement the Transformer + @see com.stevesoft.pat.Transform + */ +class TransPat extends Pattern { + Regex[] ra = new Regex[10]; + int ra_len = 0; + int pn = -1; + public String toString() { + return "(?#TransPat)"; + } + + TransPat() {} + + int lastMatchedTo = -1; + public int matchInternal(int pos,Pthings pt) { + for(int i=0;i= 0) { + pn = i; + return r; + } + } + pn = -1; + return -1; + } +} diff --git a/src/com/stevesoft/pat/Transformer.java b/src/com/stevesoft/pat/Transformer.java new file mode 100755 index 0000000..f65afb4 --- /dev/null +++ b/src/com/stevesoft/pat/Transformer.java @@ -0,0 +1,155 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; + +import java.util.Vector; +import com.stevesoft.pat.wrap.StringWrap; + +/** Replacement rule used by the Transformer. + @see com.stevesoft.pat.Transformer + */ +class TransRepRule extends ReplaceRule { + Transformer t; + TransRepRule(Transformer t) { this.t = t; } + public String toString1() { return ""; } + public Object clone1() { return new TransRepRule(t); } + public void apply(StringBufferLike sb,RegRes rr) { + // get the ReplaceRule of the Regex that matched. + next = t.tp.ra[t.tp.pn].getReplaceRule(); + } +} + +/** Sometimes you want to replace a whole bunch of things + that might occur within a single line of text. One efficient + way to do this, both in terms of performance and programming + ease, is with Transformer. The Transformer contains an array + of Regex's and uses the Regex that matches earliest within the + text to do the replacing, if two Regex's match at the same + time it uses the one put in the Transformer first. +

    + This feature can be used to prevent transformations from + occurring in certain regions. For example, if I add the rule + s'//.*'$&' and then add the + rule s/hello/goodbye/ the Transformer will replace "hello" + with "goodbye" except when it occurs inside a double-slash + style of comment. The transformation on the comment goes first, + does nothing, and precludes transformation on the same region + of text as the s/hello/goodbye/ rule. +

    + So far, at least, this class does not have the capability of + turning into a giant robot :-) + */ +public class Transformer { + TransPat tp; + Regex rp = new Regex(); + boolean auto_optimize; + + /** Get a replacer to that works with the current Regex. + @see com.stevesoft.pat.Replacer + */ + public Replacer getReplacer() { return rp.getReplacer(); } + + /** Instantiate a new Transformer object. */ + public Transformer(boolean auto) { + auto_optimize = auto; + tp = new TransPat(); + rp.setReplaceRule(new TransRepRule(this)); + rp.thePattern = tp; + } + + /** Add a new Regex to the set of Regex's. */ + public void add(Regex r) { + if(auto_optimize) r.optimize(); + tp.ra[tp.ra_len++] = r; + if(tp.ra.length==tp.ra_len) { + Regex[] ra2 = new Regex[tp.ra_len+10]; + for(int i=0;i rp.numSubs_ ? r.numSubs_ : rp.numSubs_; + } + + /** Returns the number of Regex's in this Transformer. */ + public int patterns() { return tp.ra_len; } + + /** Get the Regex at position i in this Transformer. */ + public Regex getRegexAt(int i) { + if(i >= tp.ra_len) + throw new ArrayIndexOutOfBoundsException("i="+i+">="+patterns()); + if(i < 0) + throw new ArrayIndexOutOfBoundsException("i="+i+"< 0"); + return tp.ra[i]; + } + /** Set the Regex at position i in this Transformer. */ + public void setRegexAt(Regex rx,int i) { + if(i >= tp.ra_len) + throw new ArrayIndexOutOfBoundsException("i="+i+">="+patterns()); + if(i < 0) + throw new ArrayIndexOutOfBoundsException("i="+i+"< 0"); + tp.ra[i] = rx; + } + + /** Add a new Regex by calling Regex.perlCode + @see com.stevesoft.pat.Regex#perlCode(java.lang.String) + */ + public void add(String rs) { + Regex r = Regex.perlCode(rs); + if(r == null) throw new NullPointerException("bad pattern to Regex.perlCode: "+rs); + add(r); + } + /** Add an array of Strings (which will be converted to + Regex's via the Regex.perlCode method. + @see com.stevesoft.pat.Regex#perlCode(java.lang.String) + */ + public void add(String[] array) { + for(int i=0;ideriv2.java or +deriv3.java. */ + +public class Validator { + String argsave = null; + String pattern = "."; + /** + This method does extra checking on a matched section of + a String beginning at position start and ending at end. + The idea is that you can do extra checking with this + that you don't know how to do with a standard Regex. + + If this method is successful, it returns the location + of the end of this pattern element -- that may be the + value end provided or some other value. A negative + value signifies that a match failure. + + By default, this method just returns end and thus + does nothing. + @see com.stevesoft.pat.Regex#define(java.lang.String,java.lang.String,com.stevesoft.pat.Validator) + */ + public int validate(StringLike src,int start,int end) { + return end; + } + /* This method allows you to modify the behavior of this + validator by making a new Validator object. If a Validator + named "foo" is defined, then the pattern "{??foo:bar}" will + cause Regex to first get the Validator given to Regex.define + and then to call its arg method with the string "bar". + If this method returns a null (the default) you get the same + behavior as the pattern "{??foo}" would supply. */ + public Validator arg(String s) { return null; } + + /** For optimization it is helpful, but not necessary, that + you define the minimum number of characters this validator + will allow to match. To do this + return new patInt(number) where number is the smallest + number of characters that can match. */ + public patInt minChars() { return new patInt(0); } + + /** For optimization it is helpful, but not necessary, that + you define the maximum number of characters this validator + will allow to match. To do this either + return new patInt(number), or new patInf() if an infinite + number of characters may match. */ + public patInt maxChars() { return new patInf(); } +} diff --git a/src/com/stevesoft/pat/WantMoreTextReplaceRule.java b/src/com/stevesoft/pat/WantMoreTextReplaceRule.java new file mode 100755 index 0000000..5cc6722 --- /dev/null +++ b/src/com/stevesoft/pat/WantMoreTextReplaceRule.java @@ -0,0 +1,19 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; + +/** Triggers the reading of another line of text, allowing a longer + pattern to match -- for details see + WantMore.java. + */ +public class WantMoreTextReplaceRule extends SpecialRule { + public WantMoreTextReplaceRule() {} + public void apply(StringBufferLike sb,RegRes res) { + } + public String toString1() { return "${WANT_MORE_TEXT}"; } +} diff --git a/src/com/stevesoft/pat/lookAhead.java b/src/com/stevesoft/pat/lookAhead.java new file mode 100755 index 0000000..112db7c --- /dev/null +++ b/src/com/stevesoft/pat/lookAhead.java @@ -0,0 +1,46 @@ +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; +import java.util.Hashtable; + +/** Implements "(?= )" and "(?! )" */ +class lookAhead extends Or { + boolean reverse; + lookAhead(boolean b) { reverse = b; } + public Pattern getNext() { return null; } + public int nextMatch(int pos,Pthings pt) { + Pattern p = super.getNext(); + if(p != null) return p.matchInternal(pos,pt); + else return pos; + } + public int matchInternal(int pos,Pthings pt) { + if(super.matchInternal(pos,pt) >= 0) { + if(reverse) return -1; + else return nextMatch(pos,pt); + } else { + if(reverse) return nextMatch(pos,pt); + else return -1; + } + } + String leftForm() { + if(reverse) + return "(?!"; + else + return "(?="; + } + public patInt minChars() { return new patInt(0); } + public patInt maxChars() { return new patInt(0); } + Pattern clone1(Hashtable h) { + lookAhead la=new lookAhead(reverse); + h.put(this,la); + h.put(la,la); + for(int i=0;i'; + if(c == '[') + return ']'; + if(c == '(') + return ')'; + if(c == '{') + return '}'; + return c; + } + + final public static String codify(String s,boolean keepbs) { + return codify(s,0,s.length(),keepbs); + } + final public static String codify(String s,int i0,int iN,boolean keepbs) { + StringBuffer sb = new StringBuffer(); + boolean ucmode = false, lcmode = false, litmode = false; + boolean uc1 = false, lc1 = false; + boolean modified = false; + for(int i=i0;i= 'a' && c <= 'z') + return false; + if(c >= 'A' && c <= 'Z') + return false; + if(c >= '0' && c <= '9') + return false; + if(c == '_') + return false; + return true; + } + final static Regex parse(String s) { + boolean igncase = false, optim = false, gFlag = false; + boolean sFlag = false, mFlag = false, xFlag = false; + + StringBuffer s1 = new StringBuffer(); + StringBuffer s2 = new StringBuffer(); + int i=0,count=0; + char mode,delim='/',cdelim='/'; + if(s.length() >= 3 && s.charAt(0)=='s') { + mode = 's'; + delim = s.charAt(1); + cdelim = close(delim); + i=2; + } else if(s.length() >= 2 && s.charAt(0)=='m') { + mode = 'm'; + delim = s.charAt(1); + cdelim = close(delim); + i=2; + } else if(s.length() >= 1 && s.charAt(0)=='/') { + mode = 'm'; + i=1; + } else { + try { + RegSyntaxError.endItAll( + "Regex.perlCode should be of the "+ + "form s/// or m// or //"); + } catch(RegSyntax rs) {} + return null; + } + for(;i=s.length()) { + try { + RegSyntaxError.endItAll(""+mode+delim+" needs "+cdelim); + } catch(RegSyntax rs) {} + return null; + } + cdelim = close(delim = s.charAt(i)); + i++; + } + count=0; + if(mode=='s') { + for(;i i) i = p.i; + return this; + } + /** Tests to see if this represents an infinite quantity. */ + public boolean finite() { return !inf; } + /** Converts to a patInt to an int. Infinity is + mapped Integer.MAX_VALUE; + */ + public int intValue() { return inf ? Integer.MAX_VALUE : i; } +}; diff --git a/src/com/stevesoft/pat/wrap/CharArrayBufferWrap.java b/src/com/stevesoft/pat/wrap/CharArrayBufferWrap.java new file mode 100755 index 0000000..4e11067 --- /dev/null +++ b/src/com/stevesoft/pat/wrap/CharArrayBufferWrap.java @@ -0,0 +1,38 @@ +package// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +com.stevesoft.pat.wrap; + +import com.stevesoft.pat.*; + +/** Allows the outcome of a replaceAll() or replaceFirst() + to be an array of characters rather than a String. + */ +public class CharArrayBufferWrap + implements BasicStringBufferLike + { + StringBuffer sb = new StringBuffer(); + public void append(char c) { + sb.append(c); + } + public void append(String s) { + sb.append(s); + } + public StringLike toStringLike() { + char[] ca = new char[sb.length()]; + for(int i=0;i= i0 && i < iend) + return (char)buf[i-i0]; + + try { + i0 = i-5; + //if(i0+offset<0) i0=(int)(-offset); + if(i0<0) i0=0; + raf.seek(i0+offset); + iend = i0+raf.read(buf,0,buf.length); + + if(i >= i0 && i < iend) + return (char)buf[i-i0]; + } catch(Throwable t) {} + + throw new ArrayIndexOutOfBoundsException("Out of bounds for file:"+ + " i="+i+ + ", Final Buffer: i0="+i0+ + " iend="+iend); + } + + public String toString() { throw new Error("Not implemented"); } + public int length() { + try { + long len = raf.length()-offset; + if(len > Integer.MAX_VALUE) + return Integer.MAX_VALUE; + return (int)len; + } catch(IOException ioe) { + return 0; + } + } + public String substring(int i1,int i2) { + StringBuffer sb = new StringBuffer(); + for(int i=i1;i + The method toStringLike() cannot work, however. + This means that the return value of replaceAll() will + be null if this Object is used as the StringBufferLike.*/ +public class WriterWrap + implements BasicStringBufferLike + { + Writer w; + public WriterWrap(Writer w) { + this.w = w; + } + public void append(char c) { + try { + w.write((int)c); + } catch(IOException ioe) {} + } + public void append(String s) { + try { + w.write(s); + } catch(IOException ioe) {} + } + + /** This operation can't really be done. */ + public StringLike toStringLike() { + return null; + } + + public Object unwrap() { + return w; + } +} -- 1.7.10.2