// // This software is now distributed according to // the Lesser Gnu Public License. Please see // http://www.gnu.org/copyleft/lesser.txt for // the details. // -- Happy Computing! // package com.stevesoft.pat; /** * This class provides a method for parsing the "s/.../.../" constructs of * Regex.perlCode. * * @see Regex#perlCode */ class parsePerl { final static char close(char c) { // This switch statement does not behave // properly when compiled with jdk1.1.5 // and the -O flag. /* * switch(c) { case '[': return ']'; case '(': return ')'; case '{': return * '}'; } return c; */ if (c == '<') { return '>'; } if (c == '[') { return ']'; } if (c == '(') { return ')'; } if (c == '{') { return '}'; } return c; } final public static String codify(String s, boolean keepbs) { return codify(s, 0, s.length(), keepbs); } final public static String codify(String s, int i0, int iN, boolean keepbs) { javajs.util.SB sb = new javajs.util.SB(); boolean ucmode = false, lcmode = false, litmode = false; boolean uc1 = false, lc1 = false; boolean modified = false; for (int i = i0; i < iN; i++) { char c = s.charAt(i); boolean mf = true, app = true; if (c == '\\') { app = false; i++; if (i < s.length()) { char c2 = s.charAt(i); switch (c2) { case 'Q': litmode = true; break; case 'U': ucmode = true; break; case 'L': lcmode = true; break; case 'u': uc1 = true; break; case 'l': lc1 = true; break; case 'E': uc1 = lc1 = ucmode = lcmode = litmode = false; break; default: if (keepbs) { sb.appendC('\\'); } c = c2; if (keepbs) { mf = false; } app = true; break; } modified |= mf; } } if (app) { if (lc1) { c = lc(c); lc1 = false; } else if (uc1) { c = uc(c); uc1 = false; } else if (ucmode) { c = uc(c); } else if (lcmode) { c = lc(c); } if (litmode && needbs(c)) { sb.appendC('\\'); } sb.appendC(c); } } return modified ? sb.toString() : s; } final static char uc(char c) { return CaseMgr.toUpperCase(c); } final static char lc(char c) { return CaseMgr.toLowerCase(c); } final static boolean needbs(char c) { if (c >= 'a' && c <= 'z') { return false; } if (c >= 'A' && c <= 'Z') { return false; } if (c >= '0' && c <= '9') { return false; } if (c == '_') { return false; } return true; } final static Regex parse(String s) { boolean igncase = false, optim = false, gFlag = false; boolean sFlag = false, mFlag = false, xFlag = false; javajs.util.SB s1 = new javajs.util.SB(); javajs.util.SB s2 = new javajs.util.SB(); int i = 0, count = 0; char mode, delim = '/', cdelim = '/'; if (s.length() >= 3 && s.charAt(0) == 's') { mode = 's'; delim = s.charAt(1); cdelim = close(delim); i = 2; } else if (s.length() >= 2 && s.charAt(0) == 'm') { mode = 'm'; delim = s.charAt(1); cdelim = close(delim); i = 2; } else if (s.length() >= 1 && s.charAt(0) == '/') { mode = 'm'; i = 1; } else { try { RegSyntaxError.endItAll("Regex.perlCode should be of the " + "form s/// or m// or //"); } catch (RegSyntax rs) { } return null; } for (; i < s.length(); i++) { if (s.charAt(i) == '\\') { s1.appendC('\\'); i++; } else if (s.charAt(i) == cdelim && count == 0) { i++; break; } else if (s.charAt(i) == delim && cdelim != delim) { count++; } else if (s.charAt(i) == cdelim && cdelim != delim) { count--; } s1.appendC(s.charAt(i)); } if (mode == 's' && cdelim != delim) { while (i < s.length() && Prop.isWhite(s.charAt(i))) { i++; } if (i >= s.length()) { try { RegSyntaxError.endItAll("" + mode + delim + " needs " + cdelim); } catch (RegSyntax rs) { } return null; } cdelim = close(delim = s.charAt(i)); i++; } count = 0; if (mode == 's') { for (; i < s.length(); i++) { if (s.charAt(i) == '\\') { s2.appendC('\\'); i++; } else if (s.charAt(i) == cdelim && count == 0) { i++; break; } else if (s.charAt(i) == delim && cdelim != delim) { count++; } else if (s.charAt(i) == cdelim && cdelim != delim) { count--; } s2.appendC(s.charAt(i)); } } for (; i < s.length(); i++) { char c = s.charAt(i); switch (c) { case 'x': xFlag = true; break; case 'i': igncase = true; break; case 'o': optim = true; break; case 's': sFlag = true; break; case 'm': mFlag = true; break; case 'g': gFlag = true; break; default: // syntax error! try { RegSyntaxError.endItAll("Illegal flag to pattern: " + c); } catch (RegSyntax rs) { } return null; } } Regex r = new Regex(); try { String pat = s1.toString(), reprul = s2.toString(); if (xFlag) { pat = strip(pat); reprul = strip(reprul); } r.compile(pat); r.ignoreCase |= igncase; r.gFlag |= gFlag; r.sFlag |= sFlag; r.mFlag |= mFlag; if (optim) { r.optimize(); } if (delim == '\'') { r.setReplaceRule(new StringRule(reprul)); } else { r.setReplaceRule(parsePerl.perlCode(reprul)); } } catch (RegSyntax rs) { r = null; } return r; } static String strip(String s) { javajs.util.SB sb = new javajs.util.SB(); for (int i = 0; i < s.length(); i++) { char c = s.charAt(i); if (Prop.isWhite(c)) { ; } else if (c == '#') { i++; while (i < s.length()) { if (s.charAt(i) == '\n') { break; } i++; } } else if (c == '\\') { sb.appendC(c); sb.appendC(s.charAt(++i)); } else { sb.appendC(c); } } return sb.toString(); } /** * Compile a ReplaceRule using the text that would go between the second and * third /'s in a typical substitution pattern in Perl: s/ ... / The * argument to ReplaceRule.perlCode /. */ public static ReplaceRule perlCode(String s) { // String sav_backGs = Regex.backGs; // int sav_backGto = Regex.backGto; try { int mf = 0, mt = 0; Regex gv = ReplaceRule.getv(); ReplaceRule head = null; Object tmp = null; while (gv.searchFrom(s, mt)) { int off = Regex.BackRefOffset - 1; mf = gv.matchedFrom(); if (mf > mt) { head = ReplaceRule.add(head, new StringRule(s.substring(mt, mf))); } String var = null; if ((var = gv.stringMatched(1 + off)) != null || (var = gv.stringMatched(2 + off)) != null || (var = gv.stringMatched(5 + off)) != null) { int d = 0; for (int i = 0; i < var.length(); i++) { d = 8 * d + (var.charAt(i) - '0'); } if (var.length() == 1) { head = ReplaceRule.add(head, new BackRefRule(d)); } else { head = new StringRule("" + (char) d); } } else if ((var = gv.stringMatched(10 + off)) != null) { if ("QELlUu".indexOf(var) >= 0) { head = ReplaceRule.add(head, new CodeRule(var.charAt(0))); } else { head = ReplaceRule.add(head, new StringRule(var)); } } else if ((var = gv.stringMatched(3 + off)) != null || (var = gv.stringMatched(4 + off)) != null || (var = gv.stringMatched(6 + off)) != null) { String arg = ""; int pc; if ((pc = var.indexOf(':')) > 0) { arg = var.substring(pc + 1); var = var.substring(0, pc); } if (var.equals("&") || var.equals("MATCH")) { head = ReplaceRule.add(head, new AmpersandRule()); } else if (var.equals("`") || var.equals("PREMATCH")) { head = ReplaceRule.add(head, new LeftRule()); } else if (var.equals("'") || var.equals("POSTMATCH")) { head = ReplaceRule.add(head, new RightRule()); } else if (var.equals("WANT_MORE_TEXT")) { head = ReplaceRule.add(head, new WantMoreTextReplaceRule()); } else if (var.equals("POP")) { head = ReplaceRule.add(head, new PopRule()); } else if (var.startsWith("+") && (tmp = ReplaceRule.defs.get(var.substring(1))) != null) { if (tmp instanceof Regex) { head = ReplaceRule.add(head, new PushRule(var.substring(1), (Regex) tmp)); } else if (tmp instanceof Transformer) { head = ReplaceRule.add(head, new PushRule(var.substring(1), (Transformer) tmp)); } else { head = ReplaceRule.add(head, new StringRule("${" + var + "}")); } } else if (var.startsWith("=") && (tmp = ReplaceRule.defs.get(var.substring(1))) != null) { if (tmp instanceof Regex) { head = ReplaceRule.add(head, new ChangeRule(var.substring(1), (Regex) tmp)); } else if (tmp instanceof Transformer) { head = ReplaceRule.add(head, new ChangeRule(var.substring(1), (Transformer) tmp)); } else { head = ReplaceRule.add(head, new StringRule("${" + var + "}")); } } else if ((tmp = ReplaceRule.defs.get(var)) != null) { if (tmp instanceof ReplaceRule) { ReplaceRule alt = ((ReplaceRule) tmp).arg(arg); if (alt == null) { alt = ((ReplaceRule) tmp); } head = ReplaceRule.add(head, (ReplaceRule) (alt.clone())); } } else // can't figure out how to transform this thing... { head = ReplaceRule.add(head, new StringRule("${" + var + "}")); } } else if ((var = gv.stringMatched(7 + off)) != null) { char c = var.charAt(0); if (c == 'n') { head = ReplaceRule.add(head, new StringRule("\n")); } else if (c == 't') { head = ReplaceRule.add(head, new StringRule("\t")); } else if (c == 'r') { head = ReplaceRule.add(head, new StringRule("\r")); } else if (c == 'b') { head = ReplaceRule.add(head, new StringRule("\r")); } else if (c == 'a') { head = ReplaceRule.add(head, new StringRule("" + (char) 7)); } else if (c == 'e') { head = ReplaceRule.add(head, new StringRule("" + (char) 27)); } else if (c == 'f') { head = ReplaceRule.add(head, new StringRule("" + (char) 12)); } } else if ((var = gv.stringMatched(8 + off)) != null) { char c = var.charAt(0); if (c < Ctrl.cmap.length) { c = Ctrl.cmap[c]; } head = ReplaceRule.add(head, new StringRule("" + c)); } else if ((var = gv.stringMatched(9 + off)) != null) { int d = 16 * ReplaceRule.getHexDigit(var.charAt(0)) + ReplaceRule.getHexDigit(var.charAt(1)); head = ReplaceRule.add(head, new StringRule("" + (char) d)); } mt = gv.matchedTo(); } if (mt <= s.length()) { head = ReplaceRule.add(head, new StringRule(s.substring(mt))); } return head; } finally { // Regex.backGs = sav_backGs; // Regex.backGto = sav_backGto; } } }