src/com/stevesoft/pat/Regex.java

   1 //
   2 // This software is now distributed according to
   3 // the Lesser Gnu Public License.  Please see
   4 // http://www.gnu.org/copyleft/lesser.txt for
   5 // the details.
   6 //    -- Happy Computing!
   7 //
   8 package com.stevesoft.pat;
   9
  10 import java.io.*;
  11 import java.util.*;
  12
  13 import com.stevesoft.pat.wrap.*;
  14
  15 /** Matches a Unicode punctuation character. */
  16 class UnicodePunct extends UniValidator
  17 {
  18   public int validate(StringLike s, int from, int to)
  19   {
  20     return from < s.length() && Prop.isPunct(s.charAt(from)) ? to : -1;
  21   }
  22 }
  23
  24 /** Matches a Unicode white space character. */
  25 class UnicodeWhite extends UniValidator
  26 {
  27   public int validate(StringLike s, int from, int to)
  28   {
  29     return from < s.length() && Prop.isWhite(s.charAt(from)) ? to : -1;
  30   }
  31 }
  32
  33 /**
  34  * Matches a character that is not a Unicode punctuation character.
  35  */
  36 class NUnicodePunct extends UniValidator
  37 {
  38   public int validate(StringLike s, int from, int to)
  39   {
  40     return from < s.length() && !Prop.isPunct(s.charAt(from)) ? to : -1;
  41   }
  42 }
  43
  44 /**
  45  * Matches a character that is not a Unicode white space character.
  46  */
  47 class NUnicodeWhite extends UniValidator
  48 {
  49   public int validate(StringLike s, int from, int to)
  50   {
  51     return from < s.length() && !Prop.isWhite(s.charAt(from)) ? to : -1;
  52   }
  53 }
  54
  55 /** Matches a Unicode word character: an alphanumeric or underscore. */
  56 class UnicodeW extends UniValidator
  57 {
  58   public int validate(StringLike s, int from, int to)
  59   {
  60     if (from >= s.length())
  61     {
  62       return -1;
  63     }
  64     char c = s.charAt(from);
  65     return (Prop.isAlphabetic(c) || Prop.isDecimalDigit(c) || c == '_') ? to
  66             : -1;
  67   }
  68 }
  69
  70 /** Matches a character that is not a Unicode alphanumeric or underscore. */
  71 class NUnicodeW extends UniValidator
  72 {
  73   public int validate(StringLike s, int from, int to)
  74   {
  75     if (from >= s.length())
  76     {
  77       return -1;
  78     }
  79     char c = s.charAt(from);
  80     return !(Prop.isAlphabetic(c) || Prop.isDecimalDigit(c) || c == '_') ? to
  81             : -1;
  82   }
  83 }
  84
  85 /** Matches a Unicode decimal digit. */
  86 class UnicodeDigit extends UniValidator
  87 {
  88   public int validate(StringLike s, int from, int to)
  89   {
  90     return from < s.length() && Prop.isDecimalDigit(s.charAt(from)) ? to
  91             : -1;
  92   }
  93 }
  94
  95 /** Matches a character that is not a Unicode digit. */
  96 class NUnicodeDigit extends UniValidator
  97 {
  98   public int validate(StringLike s, int from, int to)
  99   {
 100     return from < s.length() && !Prop.isDecimalDigit(s.charAt(from)) ? to
 101             : -1;
 102   }
 103 }
 104
 105 /** Matches a Unicode math character. */
 106 class UnicodeMath extends UniValidator
 107 {
 108   public int validate(StringLike s, int from, int to)
 109   {
 110     return from < s.length() && Prop.isMath(s.charAt(from)) ? to : -1;
 111   }
 112 }
 113
 114 /** Matches a non-math Unicode character. */
 115 class NUnicodeMath extends UniValidator
 116 {
 117   public int validate(StringLike s, int from, int to)
 118   {
 119     return from < s.length() && !Prop.isMath(s.charAt(from)) ? to : -1;
 120   }
 121 }
 122
 123 /** Matches a Unicode currency symbol. */
 124 class UnicodeCurrency extends UniValidator
 125 {
 126   public int validate(StringLike s, int from, int to)
 127   {
 128     return from < s.length() && Prop.isCurrency(s.charAt(from)) ? to : -1;
 129   }
 130 }
 131
 132 /** Matches a non-currency symbol Unicode character. */
 133 class NUnicodeCurrency extends UniValidator
 134 {
 135   public int validate(StringLike s, int from, int to)
 136   {
 137     return from < s.length() && !Prop.isCurrency(s.charAt(from)) ? to : -1;
 138   }
 139 }
 140
 141 /** Matches a Unicode alphabetic character. */
 142 class UnicodeAlpha extends UniValidator
 143 {
 144   public int validate(StringLike s, int from, int to)
 145   {
 146     return from < s.length() && Prop.isAlphabetic(s.charAt(from)) ? to : -1;
 147   }
 148 }
 149
 150 /** Matches a non-alphabetic Unicode character. */
 151 class NUnicodeAlpha extends UniValidator
 152 {
 153   public int validate(StringLike s, int from, int to)
 154   {
 155     return from < s.length() && !Prop.isAlphabetic(s.charAt(from)) ? to
 156             : -1;
 157   }
 158 }
 159
 160 /** Matches an upper case Unicode character. */
 161 class UnicodeUpper extends UniValidator
 162 {
 163   public int validate(StringLike s, int from, int to)
 164   {
 165     return from < s.length() && isUpper(s.charAt(from)) ? to : -1;
 166   }
 167
 168   final boolean isUpper(char c)
 169   {
 170     return c == CaseMgr.toUpperCase(c) && c != CaseMgr.toLowerCase(c);
 171   }
 172 }
 173
 174 /** Matches an upper case Unicode character. */
 175 class UnicodeLower extends UniValidator
 176 {
 177   public int validate(StringLike s, int from, int to)
 178   {
 179     return from < s.length() && isLower(s.charAt(from)) ? to : -1;
 180   }
 181
 182   final boolean isLower(char c)
 183   {
 184     return c != CaseMgr.toUpperCase(c) && c == CaseMgr.toLowerCase(c);
 185   }
 186 }
 187
 188 /**
 189  * Regex provides the parser which constructs the linked list of Pattern classes
 190  * from a String.
 191  * <p>
 192  * For the purpose of this documentation, the fact that java interprets the
 193  * backslash will be ignored. In practice, however, you will need a double
 194  * backslash to obtain a string that contains a single backslash character.
 195  * Thus, the example pattern "\b" should really be typed as "\\b" inside java
 196  * code.
 197  * <p>
 198  * Note that Regex is part of package "com.stevesoft.pat". To use it, simply
 199  * import com.stevesoft.pat.Regex at the top of your file.
 200  * <p>
 201  * Regex is made with a constructor that takes a String that defines the regular
 202  * expression. Thus, for example
 203  *
 204  * <pre>
 205  * Regex r = new Regex(&quot;[a-c]*&quot;);
 206  * </pre>
 207  *
 208  * matches any number of characters so long as the are 'a', 'b', or 'c').
 209  * <p>
 210  * To attempt to match the Pattern to a given string, you can use either the
 211  * search(String) member function, or the matchAt(String,int position) member
 212  * function. These functions return a boolean which tells you whether or not the
 213  * thing worked, and sets the methods "charsMatched()" and "matchedFrom()" in
 214  * the Regex object appropriately.
 215  * <p>
 216  * The portion of the string before the match can be obtained by the left()
 217  * member, and the portion after the match can be obtained by the right()
 218  * member.
 219  * <p>
 220  * Essentially, this package implements a syntax that is very much like the perl
 221  * 5 regular expression syntax.
 222  *
 223  * Longer example:
 224  *
 225  * <pre>
 226  * Regex r = new Regex(&quot;x(a|b)y&quot;);
 227  * r.matchAt(&quot;xay&quot;, 0);
 228  * System.out.println(&quot;sub = &quot; + r.stringMatched(1));
 229  * </pre>
 230  *
 231  * The above would print "sub = a".
 232  *
 233  * <pre>
 234  *  r.left() // would return &quot;x&quot;
 235  *  r.right() // would return &quot;y&quot;
 236  * </pre>
 237  *
 238  * <p>
 239  * Differences between this package and perl5:<br>
 240  * The extended Pattern for setting flags, is now supported, but the flags are
 241  * different. "(?i)" tells the pattern to ignore case, "(?Q)" sets the
 242  * "dontMatchInQuotes" flag, and "(?iQ)" sets them both. You can change the
 243  * escape character. The pattern
 244  *
 245  * <pre>
 246  * (?e=#)#d+
 247  * </pre>
 248  *
 249  * is the same as
 250  *
 251  * <pre>
 252  * \d+
 253  * </pre>, but note that the sequence
 254  *
 255  * <pre>
 256  * (?e=#)
 257  * </pre>
 258  *
 259  * <b>must</b> occur at the very beginning of the pattern. There may be other
 260  * small differences as well. I will either make my package conform or note them
 261  * as I become aware of them.
 262  * <p>
 263  * This package supports additional patterns not in perl5: <center> <table
 264  * border=1>
 265  * <tr>
 266  * <td>(?@())</td>
 267  * <td>Group</td>
 268  * <td>This matches all characters between the '(' character and the balancing
 269  * ')' character. Thus, it will match "()" as well as "(())". The balancing
 270  * characters are arbitrary, thus (?@{}) matches on "{}" and "{{}}".</td>
 271  * <tr>
 272  * <td>(?&lt1)</td>
 273  * <td>Backup</td>
 274  * <td>Moves the pointer backwards within the text. This allows you to make a
 275  * "look behind." It fails if it attempts to move to a position before the
 276  * beginning of the string. "x(?&lt1)" is equivalent to "(?=x)". The number, 1
 277  * in this example, is the number of characters to move backwards.</td>
 278  * </table> </center>
 279  * </dl>
 280  *
 281  * @author Steven R. Brandt
 282  * @version package com.stevesoft.pat, release 1.5.3
 283  * @see Pattern
 284  */
 285 public class Regex extends RegRes implements FilenameFilter
 286 {
 287   /**
 288    * BackRefOffset gives the identity number of the first pattern. Version 1.0
 289    * used zero, version 1.1 uses 1 to be more compatible with perl.
 290    */
 291   static int BackRefOffset = 1;
 292
 293   private static Pattern none = new NoPattern();
 294
 295   Pattern thePattern = none;
 296
 297   patInt minMatch = new patInt(0);
 298
 299   static Hashtable validators = new Hashtable();
 300   static
 301   {
 302     define("p", "(?>1)", new UnicodePunct());
 303     define("P", "(?>1)", new NUnicodePunct());
 304     define("s", "(?>1)", new UnicodeWhite());
 305     define("S", "(?>1)", new NUnicodeWhite());
 306     define("w", "(?>1)", new UnicodeW());
 307     define("W", "(?>1)", new NUnicodeW());
 308     define("d", "(?>1)", new UnicodeDigit());
 309     define("D", "(?>1)", new NUnicodeDigit());
 310     define("m", "(?>1)", new UnicodeMath());
 311     define("M", "(?>1)", new NUnicodeMath());
 312     define("c", "(?>1)", new UnicodeCurrency());
 313     define("C", "(?>1)", new NUnicodeCurrency());
 314     define("a", "(?>1)", new UnicodeAlpha());
 315     define("A", "(?>1)", new NUnicodeAlpha());
 316     define("uc", "(?>1)", new UnicodeUpper());
 317     define("lc", "(?>1)", new UnicodeLower());
 318   }
 319
 320   /** Set the dontMatch in quotes flag. */
 321   public void setDontMatchInQuotes(boolean b)
 322   {
 323     dontMatchInQuotes = b;
 324   }
 325
 326   /** Find out if the dontMatchInQuotes flag is enabled. */
 327   public boolean getDontMatchInQuotes()
 328   {
 329     return dontMatchInQuotes;
 330   }
 331
 332   boolean dontMatchInQuotes = false;
 333
 334   /**
 335    * Set the state of the ignoreCase flag. If set to true, then the pattern
 336    * matcher will ignore case when searching for a match.
 337    */
 338   public void setIgnoreCase(boolean b)
 339   {
 340     ignoreCase = b;
 341   }
 342
 343   /**
 344    * Get the state of the ignoreCase flag. Returns true if we are ignoring the
 345    * case of the pattern, false otherwise.
 346    */
 347   public boolean getIgnoreCase()
 348   {
 349     return ignoreCase;
 350   }
 351
 352   boolean ignoreCase = false;
 353
 354   static boolean defaultMFlag = false;
 355
 356   /**
 357    * Set the default value of the m flag. If it is set to true, then the MFlag
 358    * will be on for any regex search executed.
 359    */
 360   public static void setDefaultMFlag(boolean mFlag)
 361   {
 362     defaultMFlag = mFlag;
 363   }
 364
 365   /**
 366    * Get the default value of the m flag. If it is set to true, then the MFlag
 367    * will be on for any regex search executed.
 368    */
 369   public static boolean getDefaultMFlag()
 370   {
 371     return defaultMFlag;
 372   }
 373
 374   /**
 375    * Initializes the object without a Pattern. To supply a Pattern use
 376    * compile(String s).
 377    *
 378    * @see com.stevesoft.pat.Regex#compile(java.lang.String)
 379    */
 380   public Regex()
 381   {
 382   }
 383
 384   /**
 385    * Create and compile a Regex, but do not throw any exceptions. If you wish to
 386    * have exceptions thrown for syntax errors, you must use the Regex(void)
 387    * constructor to create the Regex object, and then call the compile method.
 388    * Therefore, you should only call this method when you know your pattern is
 389    * right. I will probably become more like
 390    *
 391    * @see com.stevesoft.pat.Regex#search(java.lang.String)
 392    * @see com.stevesoft.pat.Regex#compile(java.lang.String)
 393    */
 394   public Regex(String s)
 395   {
 396     try
 397     {
 398       compile(s);
 399     } catch (RegSyntax rs)
 400     {
 401     }
 402   }
 403
 404   ReplaceRule rep = null;
 405
 406   /**
 407    * Create and compile both a Regex and a ReplaceRule.
 408    *
 409    * @see com.stevesoft.pat.ReplaceRule
 410    * @see com.stevesoft.pat.Regex#compile(java.lang.String)
 411    */
 412   public Regex(String s, String rp)
 413   {
 414     this(s);
 415     rep = ReplaceRule.perlCode(rp);
 416   }
 417
 418   /**
 419    * Create and compile a Regex, but give it the ReplaceRule specified. This
 420    * allows the user finer control of the Replacement process, if that is
 421    * desired.
 422    *
 423    * @see com.stevesoft.pat.ReplaceRule
 424    * @see com.stevesoft.pat.Regex#compile(java.lang.String)
 425    */
 426   public Regex(String s, ReplaceRule rp)
 427   {
 428     this(s);
 429     rep = rp;
 430   }
 431
 432   /**
 433    * Change the ReplaceRule of this Regex by compiling a new one using String
 434    * rp.
 435    */
 436   public void setReplaceRule(String rp)
 437   {
 438     rep = ReplaceRule.perlCode(rp);
 439     repr = null; // Clear Replacer history
 440   }
 441
 442   /** Change the ReplaceRule of this Regex to rp. */
 443   public void setReplaceRule(ReplaceRule rp)
 444   {
 445     rep = rp;
 446   }
 447
 448   /**
 449    * Test to see if a custom defined rule exists.
 450    *
 451    * @see com.stevesoft.pat#define(java.lang.String,java.lang.String,Validator)
 452    */
 453   public static boolean isDefined(String nm)
 454   {
 455     return validators.get(nm) != null;
 456   }
 457
 458   /**
 459    * Removes a custom defined rule.
 460    *
 461    * @see com.stevesoft.pat#define(java.lang.String,java.lang.String,Validator)
 462    */
 463   public static void undefine(String nm)
 464   {
 465     validators.remove(nm);
 466   }
 467
 468   /**
 469    * Defines a method to create a new rule. See test/deriv2.java and
 470    * test/deriv3.java for examples of how to use it.
 471    */
 472   public static void define(String nm, String pat, Validator v)
 473   {
 474     v.pattern = pat;
 475     validators.put(nm, v);
 476   }
 477
 478   /**
 479    * Defines a shorthand for a pattern. The pattern will be invoked by a string
 480    * that has the form "(??"+nm+")".
 481    */
 482   public static void define(String nm, String pat)
 483   {
 484     validators.put(nm, pat);
 485   }
 486
 487   /** Get the current ReplaceRule. */
 488   public ReplaceRule getReplaceRule()
 489   {
 490     return rep;
 491   }
 492
 493   Replacer repr = null;
 494
 495   final Replacer _getReplacer()
 496   {
 497     return repr == null ? repr = new Replacer() : repr;
 498   }
 499
 500   public Replacer getReplacer()
 501   {
 502     if (repr == null)
 503     {
 504       repr = new Replacer();
 505     }
 506     repr.rh.me = this;
 507     repr.rh.prev = null;
 508     return repr;
 509   }
 510
 511   /**
 512    * Replace the first occurence of this pattern in String s according to the
 513    * ReplaceRule.
 514    *
 515    * @see com.stevesoft.pat.ReplaceRule
 516    * @see com.stevesoft.pat.Regex#getReplaceRule()
 517    */
 518   public String replaceFirst(String s)
 519   {
 520     return _getReplacer().replaceFirstRegion(s, this, 0, s.length())
 521             .toString();
 522   }
 523
 524   /**
 525    * Replace the first occurence of this pattern in String s beginning with
 526    * position pos according to the ReplaceRule.
 527    *
 528    * @see com.stevesoft.pat.ReplaceRule
 529    * @see com.stevesoft.pat.Regex#getReplaceRule()
 530    */
 531   public String replaceFirstFrom(String s, int pos)
 532   {
 533     return _getReplacer().replaceFirstRegion(s, this, pos, s.length())
 534             .toString();
 535   }
 536
 537   /**
 538    * Replace the first occurence of this pattern in String s beginning with
 539    * position start and ending with end according to the ReplaceRule.
 540    *
 541    * @see com.stevesoft.pat.ReplaceRule
 542    * @see com.stevesoft.pat.Regex#getReplaceRule()
 543    */
 544   public String replaceFirstRegion(String s, int start, int end)
 545   {
 546     return _getReplacer().replaceFirstRegion(s, this, start, end)
 547             .toString();
 548   }
 549
 550   /**
 551    * Replace all occurences of this pattern in String s according to the
 552    * ReplaceRule.
 553    *
 554    * @see com.stevesoft.pat.ReplaceRule
 555    * @see com.stevesoft.pat.Regex#getReplaceRule()
 556    */
 557   public String replaceAll(String s)
 558   {
 559     return _getReplacer().replaceAllRegion(s, this, 0, s.length())
 560             .toString();
 561   }
 562
 563   public StringLike replaceAll(StringLike s)
 564   {
 565     return _getReplacer().replaceAllRegion(s, this, 0, s.length());
 566   }
 567
 568   /**
 569    * Replace all occurences of this pattern in String s beginning with position
 570    * pos according to the ReplaceRule.
 571    *
 572    * @see com.stevesoft.pat.ReplaceRule
 573    * @see com.stevesoft.pat.Regex#getReplaceRule()
 574    */
 575   public String replaceAllFrom(String s, int pos)
 576   {
 577     return _getReplacer().replaceAllRegion(s, this, pos, s.length())
 578             .toString();
 579   }
 580
 581   /**
 582    * Replace all occurences of this pattern in String s beginning with position
 583    * start and ending with end according to the ReplaceRule.
 584    *
 585    * @see com.stevesoft.pat.ReplaceRule
 586    * @see com.stevesoft.pat.Regex#getReplaceRule()
 587    */
 588   public String replaceAllRegion(String s, int start, int end)
 589   {
 590     return _getReplacer().replaceAllRegion(s, this, start, end).toString();
 591   }
 592
 593   /** Essentially clones the Regex object */
 594   public Regex(Regex r)
 595   {
 596     super((RegRes) r);
 597     dontMatchInQuotes = r.dontMatchInQuotes;
 598     esc = r.esc;
 599     ignoreCase = r.ignoreCase;
 600     gFlag = r.gFlag;
 601     if (r.rep == null)
 602     {
 603       rep = null;
 604     }
 605     else
 606     {
 607       rep = (ReplaceRule) r.rep.clone();
 608     }
 609     /*
 610      * try { compile(r.toString()); } catch(RegSyntax r_) {}
 611      */
 612     thePattern = r.thePattern.clone(new Hashtable());
 613     minMatch = r.minMatch;
 614     skipper = r.skipper;
 615   }
 616
 617   /**
 618    * By default, the escape character is the backslash, but you can make it
 619    * anything you want by setting this variable.
 620    */
 621   public char esc = Pattern.ESC;
 622
 623   /**
 624    * This method compiles a regular expression, making it possible to call the
 625    * search or matchAt methods.
 626    *
 627    * @exception com.stevesoft.pat.RegSyntax
 628    *                    is thrown if a syntax error is encountered in the
 629    *                    pattern. For example, "x{3,1}" or "*a" are not valid
 630    *                    patterns.
 631    * @see com.stevesoft.pat.Regex#search
 632    * @see com.stevesoft.pat.Regex#matchAt
 633    */
 634   public void compile(String prepat) throws RegSyntax
 635   {
 636     String postpat = parsePerl.codify(prepat, true);
 637     String pat = postpat == null ? prepat : postpat;
 638     minMatch = null;
 639     ignoreCase = false;
 640     dontMatchInQuotes = false;
 641     Rthings mk = new Rthings(this);
 642     int offset = mk.val;
 643     String newpat = pat;
 644     thePattern = none;
 645     p = null;
 646     or = null;
 647     minMatch = new patInt(0);
 648     StrPos sp = new StrPos(pat, 0);
 649     if (sp.incMatch("(?e="))
 650     {
 651       char newEsc = sp.c;
 652       sp.inc();
 653       if (sp.match(')'))
 654       {
 655         newpat = reEscape(pat.substring(6), newEsc, Pattern.ESC);
 656       }
 657     }
 658     else if (esc != Pattern.ESC)
 659     {
 660       newpat = reEscape(pat, esc, Pattern.ESC);
 661     }
 662     thePattern = _compile(newpat, mk);
 663     numSubs_ = mk.val - offset;
 664     mk.set(this);
 665   }
 666
 667   /*
 668    * If a Regex is compared against a Regex, a check is done to see that the
 669    * patterns are equal as well as the most recent match. If a Regex is compare
 670    * with a RegRes, only the result of the most recent match is compared.
 671    */
 672   public boolean equals(Object o)
 673   {
 674     if (o instanceof Regex)
 675     {
 676       if (toString().equals(o.toString()))
 677       {
 678         return super.equals(o);
 679       }
 680       else
 681       {
 682         return false;
 683       }
 684     }
 685     else
 686     {
 687       return super.equals(o);
 688     }
 689   }
 690
 691   /** A clone by any other name would smell as sweet. */
 692   public Object clone()
 693   {
 694     return new Regex(this);
 695   }
 696
 697   /** Return a clone of the underlying RegRes object. */
 698   public RegRes result()
 699   {
 700     return (RegRes) super.clone();
 701   }
 702
 703   // prep sets global variables of class
 704   // Pattern so that it can access them
 705   // during an attempt at a match
 706   Pthings pt = new Pthings();
 707
 708   final Pthings prep(StringLike s)
 709   {
 710     // if(gFlag)
 711     pt.lastPos = matchedTo();
 712     if (pt.lastPos < 0)
 713     {
 714       pt.lastPos = 0;
 715     }
 716     if ((s == null ? null : s.unwrap()) != (src == null ? null : s.unwrap()))
 717     {
 718       pt.lastPos = 0;
 719     }
 720     src = s;
 721     pt.dotDoesntMatchCR = dotDoesntMatchCR && (!sFlag);
 722     pt.mFlag = (mFlag | defaultMFlag);
 723     pt.ignoreCase = ignoreCase;
 724     pt.no_check = false;
 725     if (pt.marks != null)
 726     {
 727       for (int i = 0; i < pt.marks.length; i++)
 728       {
 729         pt.marks[i] = -1;
 730       }
 731     }
 732     pt.marks = null;
 733     pt.nMarks = numSubs_;
 734     pt.src = s;
 735     if (dontMatchInQuotes)
 736     {
 737       setCbits(s, pt);
 738     }
 739     else
 740     {
 741       pt.cbits = null;
 742     }
 743     return pt;
 744   }
 745
 746   /**
 747    * Attempt to match a Pattern beginning at a specified location within the
 748    * string.
 749    *
 750    * @see com.stevesoft.pat.Regex#search
 751    */
 752   public boolean matchAt(String s, int start_pos)
 753   {
 754     return _search(s, start_pos, start_pos);
 755   }
 756
 757   /**
 758    * Attempt to match a Pattern beginning at a specified location within the
 759    * StringLike.
 760    *
 761    * @see com.stevesoft.pat.Regex#search
 762    */
 763   public boolean matchAt(StringLike s, int start_pos)
 764   {
 765     return _search(s, start_pos, start_pos);
 766   }
 767
 768   /**
 769    * Search through a String for the first occurrence of a match.
 770    *
 771    * @see com.stevesoft.pat.Regex#searchFrom
 772    * @see com.stevesoft.pat.Regex#matchAt
 773    */
 774   public boolean search(String s)
 775   {
 776     if (s == null)
 777     {
 778       throw new NullPointerException("Null String Given to Regex.search");
 779     }
 780     return _search(s, 0, s.length());
 781   }
 782
 783   public boolean search(StringLike sl)
 784   {
 785     if (sl == null)
 786     {
 787       throw new NullPointerException(
 788               "Null StringLike Given to Regex.search");
 789     }
 790     return _search(sl, 0, sl.length());
 791   }
 792
 793   public boolean reverseSearch(String s)
 794   {
 795     if (s == null)
 796     {
 797       throw new NullPointerException(
 798               "Null String Given to Regex.reverseSearch");
 799     }
 800     return _reverseSearch(s, 0, s.length());
 801   }
 802
 803   public boolean reverseSearch(StringLike sl)
 804   {
 805     if (sl == null)
 806     {
 807       throw new NullPointerException(
 808               "Null StringLike Given to Regex.reverseSearch");
 809     }
 810     return _reverseSearch(sl, 0, sl.length());
 811   }
 812
 813   /**
 814    * Search through a String for the first occurence of a match, but start at
 815    * position
 816    *
 817    * <pre>
 818    * start
 819    * </pre>
 820    */
 821   public boolean searchFrom(String s, int start)
 822   {
 823     if (s == null)
 824     {
 825       throw new NullPointerException(
 826               "Null String Given to Regex.searchFrom");
 827     }
 828     return _search(s, start, s.length());
 829   }
 830
 831   public boolean searchFrom(StringLike s, int start)
 832   {
 833     if (s == null)
 834     {
 835       throw new NullPointerException(
 836               "Null String Given to Regex.searchFrom");
 837     }
 838     return _search(s, start, s.length());
 839   }
 840
 841   /**
 842    * Search through a region of a String for the first occurence of a match.
 843    */
 844   public boolean searchRegion(String s, int start, int end)
 845   {
 846     if (s == null)
 847     {
 848       throw new NullPointerException(
 849               "Null String Given to Regex.searchRegion");
 850     }
 851     return _search(s, start, end);
 852   }
 853
 854   /**
 855    * Set this to change the default behavior of the "." pattern. By default it
 856    * now matches perl's behavior and fails to match the '\n' character.
 857    */
 858   public static boolean dotDoesntMatchCR = true;
 859
 860   StringLike gFlags;
 861
 862   int gFlagto = 0;
 863
 864   boolean gFlag = false;
 865
 866   /** Set the 'g' flag */
 867   public void setGFlag(boolean b)
 868   {
 869     gFlag = b;
 870   }
 871
 872   /** Get the state of the 'g' flag. */
 873   public boolean getGFlag()
 874   {
 875     return gFlag;
 876   }
 877
 878   boolean sFlag = false;
 879
 880   /** Get the state of the sFlag */
 881   public boolean getSFlag()
 882   {
 883     return sFlag;
 884   }
 885
 886   boolean mFlag = false;
 887
 888   /** Get the state of the sFlag */
 889   public boolean getMFlag()
 890   {
 891     return mFlag;
 892   }
 893
 894   final boolean _search(String s, int start, int end)
 895   {
 896     return _search(new StringWrap(s), start, end);
 897   }
 898
 899   final boolean _search(StringLike s, int start, int end)
 900   {
 901     if (gFlag && gFlagto > 0 && gFlags != null
 902             && s.unwrap() == gFlags.unwrap())
 903     {
 904       start = gFlagto;
 905     }
 906     gFlags = null;
 907
 908     Pthings pt = prep(s);
 909
 910     int up = (minMatch == null ? end : end - minMatch.i);
 911
 912     if (up < start && end >= start)
 913     {
 914       up = start;
 915     }
 916
 917     if (skipper == null)
 918     {
 919       for (int i = start; i <= up; i++)
 920       {
 921         charsMatched_ = thePattern.matchAt(s, i, pt);
 922         if (charsMatched_ >= 0)
 923         {
 924           matchFrom_ = thePattern.mfrom;
 925           marks = pt.marks;
 926           gFlagto = matchFrom_ + charsMatched_;
 927           gFlags = s;
 928           return didMatch_ = true;
 929         }
 930       }
 931     }
 932     else
 933     {
 934       pt.no_check = true;
 935       for (int i = start; i <= up; i++)
 936       {
 937         i = skipper.find(src, i, up);
 938         if (i < 0)
 939         {
 940           charsMatched_ = matchFrom_ = -1;
 941           return didMatch_ = false;
 942         }
 943         charsMatched_ = thePattern.matchAt(s, i, pt);
 944         if (charsMatched_ >= 0)
 945         {
 946           matchFrom_ = thePattern.mfrom;
 947           marks = pt.marks;
 948           gFlagto = matchFrom_ + charsMatched_;
 949           gFlags = s;
 950           return didMatch_ = true;
 951         }
 952       }
 953     }
 954     return didMatch_ = false;
 955   }
 956
 957   /*
 958    * final boolean _search(LongStringLike s,long start,long end) { if(gFlag &&
 959    * gFlagto > 0 && s==gFlags) start = gFlagto; gFlags = null;
 960    *
 961    * Pthings pt=prep(s);
 962    *
 963    * int up = end;//(minMatch == null ? end : end-minMatch.i);
 964    *
 965    * if(up < start && end >= start) up = start;
 966    *
 967    * if(skipper == null) { for(long i=start;i<=up;i++) { charsMatched_ =
 968    * thePattern.matchAt(s,i,pt); if(charsMatched_ >= 0) { matchFrom_ =
 969    * thePattern.mfrom; marks = pt.marks; gFlagto = matchFrom_+charsMatched_;
 970    * return didMatch_=true; } } } else { pt.no_check = true; for(long i=start;i<=up;i++) {
 971    * i = skipper.find(src,i,up); if(i<0) { charsMatched_ = matchFrom_ = -1;
 972    * return didMatch_ = false; } charsMatched_ = thePattern.matchAt(s,i,pt);
 973    * if(charsMatched_ >= 0) { matchFrom_ = thePattern.mfrom; marks = pt.marks;
 974    * gFlagto = matchFrom_+charsMatched_; gFlags = s; return didMatch_=true; }
 975    * else { i = s.adjustIndex(i); up = s.adjustEnd(i); } } } return
 976    * didMatch_=false; }
 977    */
 978
 979   boolean _reverseSearch(String s, int start, int end)
 980   {
 981     return _reverseSearch(new StringWrap(s), start, end);
 982   }
 983
 984   boolean _reverseSearch(StringLike s, int start, int end)
 985   {
 986     if (gFlag && gFlagto > 0 && s.unwrap() == gFlags.unwrap())
 987     {
 988       end = gFlagto;
 989     }
 990     gFlags = null;
 991     Pthings pt = prep(s);
 992     for (int i = end; i >= start; i--)
 993     {
 994       charsMatched_ = thePattern.matchAt(s, i, pt);
 995       if (charsMatched_ >= 0)
 996       {
 997         matchFrom_ = thePattern.mfrom;
 998         marks = pt.marks;
 999         gFlagto = matchFrom_ - 1;
1000         gFlags = s;
1001         return didMatch_ = true;
1002       }
1003     }
1004     return didMatch_ = false;
1005   }
1006
1007   // This routine sets the cbits variable
1008   // of class Pattern. Cbits is true for
1009   // the bit corresponding to a character inside
1010   // a set of quotes.
1011   static StringLike lasts = null;
1012
1013   static BitSet lastbs = null;
1014
1015   static void setCbits(StringLike s, Pthings pt)
1016   {
1017     if (s == lasts)
1018     {
1019       pt.cbits = lastbs;
1020       return;
1021     }
1022     BitSet bs = new BitSet(s.length());
1023     char qc = ' ';
1024     boolean setBit = false;
1025     for (int i = 0; i < s.length(); i++)
1026     {
1027       if (setBit)
1028       {
1029         bs.set(i);
1030       }
1031       char c = s.charAt(i);
1032       if (!setBit && c == '"')
1033       {
1034         qc = c;
1035         setBit = true;
1036         bs.set(i);
1037       }
1038       else if (!setBit && c == '\'')
1039       {
1040         qc = c;
1041         setBit = true;
1042         bs.set(i);
1043       }
1044       else if (setBit && c == qc)
1045       {
1046         setBit = false;
1047       }
1048       else if (setBit && c == '\\' && i + 1 < s.length())
1049       {
1050         i++;
1051         if (setBit)
1052         {
1053           bs.set(i);
1054         }
1055       }
1056     }
1057     pt.cbits = lastbs = bs;
1058     lasts = s;
1059   }
1060
1061   // Wanted user to over-ride this in alpha version,
1062   // but it wasn't really necessary because of this trick:
1063   Regex newRegex()
1064   {
1065     try
1066     {
1067       return (Regex) getClass().newInstance();
1068     } catch (InstantiationException ie)
1069     {
1070       return null;
1071     } catch (IllegalAccessException iae)
1072     {
1073       return null;
1074     }
1075   }
1076
1077   /**
1078    * Only needed for creating your own extensions of Regex. This method adds the
1079    * next Pattern in the chain of patterns or sets the Pattern if it is the
1080    * first call.
1081    */
1082   protected void add(Pattern p2)
1083   {
1084     if (p == null)
1085     {
1086       p = p2;
1087     }
1088     else
1089     {
1090       p.add(p2);
1091       p2 = p;
1092     }
1093   }
1094
1095   /**
1096    * You only need to use this method if you are creating your own extentions to
1097    * Regex. compile1 compiles one Pattern element, it can be over-ridden to
1098    * allow the Regex compiler to understand new syntax. See deriv.java for an
1099    * example. This routine is the heart of class Regex. Rthings has one integer
1100    * member called intValue, it is used to keep track of the number of ()'s in
1101    * the Pattern.
1102    *
1103    * @exception com.stevesoft.pat.RegSyntax
1104    *                    is thrown when a nonsensensical pattern is supplied. For
1105    *                    example, a pattern beginning with *.
1106    */
1107   protected void compile1(StrPos sp, Rthings mk) throws RegSyntax
1108   {
1109     if (sp.match('['))
1110     {
1111       sp.inc();
1112       add(matchBracket(sp));
1113     }
1114     else if (sp.match('|'))
1115     {
1116       if (or == null)
1117       {
1118         or = new Or();
1119       }
1120       if (p == null)
1121       {
1122         p = new NullPattern();
1123       }
1124       or.addOr(p);
1125       p = null;
1126     }
1127     else if (sp.incMatch("(?<"))
1128     {
1129       patInt i = sp.getPatInt();
1130       if (i == null)
1131       {
1132         RegSyntaxError.endItAll("No int after (?<");
1133       }
1134       add(new Backup(i.intValue()));
1135       if (!sp.match(')'))
1136       {
1137         RegSyntaxError.endItAll("No ) after (?<");
1138       }
1139     }
1140     else if (sp.incMatch("(?>"))
1141     {
1142       patInt i = sp.getPatInt();
1143       if (i == null)
1144       {
1145         RegSyntaxError.endItAll("No int after (?>");
1146       }
1147       add(new Backup(-i.intValue()));
1148       if (!sp.match(')'))
1149       {
1150         RegSyntaxError.endItAll("No ) after (?<");
1151       }
1152     }
1153     else if (sp.incMatch("(?@"))
1154     {
1155       char op = sp.c;
1156       sp.inc();
1157       char cl = sp.c;
1158       sp.inc();
1159       if (!sp.match(')'))
1160       {
1161         RegSyntaxError.endItAll("(?@ does not have closing paren");
1162       }
1163       add(new Group(op, cl));
1164     }
1165     else if (sp.incMatch("(?#"))
1166     {
1167       while (!sp.match(')'))
1168       {
1169         sp.inc();
1170       }
1171     }
1172     else if (sp.dontMatch && sp.c == 'w')
1173     {
1174       // Regex r = new Regex();
1175       // r._compile("[a-zA-Z0-9_]",mk);
1176       // add(new Goop("\\w",r.thePattern));
1177       Bracket b = new Bracket(false);
1178       b.addOr(new Range('a', 'z'));
1179       b.addOr(new Range('A', 'Z'));
1180       b.addOr(new Range('0', '9'));
1181       b.addOr(new oneChar('_'));
1182       add(b);
1183     }
1184     else if (sp.dontMatch && sp.c == 'G')
1185     {
1186       add(new BackG());
1187     }
1188     else if (sp.dontMatch && sp.c == 's')
1189     {
1190       // Regex r = new Regex();
1191       // r._compile("[ \t\n\r\b]",mk);
1192       // add(new Goop("\\s",r.thePattern));
1193       Bracket b = new Bracket(false);
1194       b.addOr(new oneChar((char) 32));
1195       b.addOr(new Range((char) 8, (char) 10));
1196       b.addOr(new oneChar((char) 13));
1197       add(b);
1198     }
1199     else if (sp.dontMatch && sp.c == 'd')
1200     {
1201       // Regex r = new Regex();
1202       // r._compile("[0-9]",mk);
1203       // add(new Goop("\\d",r.thePattern));
1204       Range digit = new Range('0', '9');
1205       digit.printBrackets = true;
1206       add(digit);
1207     }
1208     else if (sp.dontMatch && sp.c == 'W')
1209     {
1210       // Regex r = new Regex();
1211       // r._compile("[^a-zA-Z0-9_]",mk);
1212       // add(new Goop("\\W",r.thePattern));
1213       Bracket b = new Bracket(true);
1214       b.addOr(new Range('a', 'z'));
1215       b.addOr(new Range('A', 'Z'));
1216       b.addOr(new Range('0', '9'));
1217       b.addOr(new oneChar('_'));
1218       add(b);
1219     }
1220     else if (sp.dontMatch && sp.c == 'S')
1221     {
1222       // Regex r = new Regex();
1223       // r._compile("[^ \t\n\r\b]",mk);
1224       // add(new Goop("\\S",r.thePattern));
1225       Bracket b = new Bracket(true);
1226       b.addOr(new oneChar((char) 32));
1227       b.addOr(new Range((char) 8, (char) 10));
1228       b.addOr(new oneChar((char) 13));
1229       add(b);
1230     }
1231     else if (sp.dontMatch && sp.c == 'D')
1232     {
1233       // Regex r = new Regex();
1234       // r._compile("[^0-9]",mk);
1235       // add(new Goop("\\D",r.thePattern));
1236       Bracket b = new Bracket(true);
1237       b.addOr(new Range('0', '9'));
1238       add(b);
1239     }
1240     else if (sp.dontMatch && sp.c == 'B')
1241     {
1242       Regex r = new Regex();
1243       r._compile("(?!" + back_slash + "b)", mk);
1244       add(r.thePattern);
1245     }
1246     else if (isOctalString(sp))
1247     {
1248       int d = sp.c - '0';
1249       sp.inc();
1250       d = 8 * d + sp.c - '0';
1251       StrPos sp2 = new StrPos(sp);
1252       sp2.inc();
1253       if (isOctalDigit(sp2, false))
1254       {
1255         sp.inc();
1256         d = 8 * d + sp.c - '0';
1257       }
1258       add(new oneChar((char) d));
1259     }
1260     else if (sp.dontMatch && sp.c >= '1' && sp.c <= '9')
1261     {
1262       int iv = sp.c - '0';
1263       StrPos s2 = new StrPos(sp);
1264       s2.inc();
1265       if (!s2.dontMatch && s2.c >= '0' && s2.c <= '9')
1266       {
1267         iv = 10 * iv + (s2.c - '0');
1268         sp.inc();
1269       }
1270       add(new BackMatch(iv));
1271     }
1272     else if (sp.dontMatch && sp.c == 'b')
1273     {
1274       add(new Boundary());
1275     }
1276     else if (sp.match('\b'))
1277     {
1278       add(new Boundary());
1279     }
1280     else if (sp.match('$'))
1281     {
1282       add(new End(true));
1283     }
1284     else if (sp.dontMatch && sp.c == 'Z')
1285     {
1286       add(new End(false));
1287     }
1288     else if (sp.match('.'))
1289     {
1290       add(new Any());
1291     }
1292     else if (sp.incMatch("(??"))
1293     {
1294       StringBuffer sb = new StringBuffer();
1295       StringBuffer sb2 = new StringBuffer();
1296       while (!sp.match(')') && !sp.match(':'))
1297       {
1298         sb.append(sp.c);
1299         sp.inc();
1300       }
1301       if (sp.incMatch(":"))
1302       {
1303         while (!sp.match(')'))
1304         {
1305           sb2.append(sp.c);
1306           sp.inc();
1307         }
1308       }
1309       String sbs = sb.toString();
1310       if (validators.get(sbs) instanceof String)
1311       {
1312         String pat = (String) validators.get(sbs);
1313         Regex r = newRegex();
1314         Rthings rth = new Rthings(this);
1315         rth.noBackRefs = true;
1316         r._compile(pat, rth);
1317         add(r.thePattern);
1318       }
1319       else
1320       {
1321         Custom cm = new Custom(sb.toString());
1322         if (cm.v != null)
1323         {
1324           Validator v2 = cm.v.arg(sb2.toString());
1325           if (v2 != null)
1326           {
1327             v2.argsave = sb2.toString();
1328             String p = cm.v.pattern;
1329             cm.v = v2;
1330             v2.pattern = p;
1331           }
1332           Regex r = newRegex();
1333           Rthings rth = new Rthings(this);
1334           rth.noBackRefs = true;
1335           r._compile(cm.v.pattern, rth);
1336           cm.sub = r.thePattern;
1337           cm.sub.add(new CustomEndpoint(cm));
1338           cm.sub.setParent(cm);
1339           add(cm);
1340         }
1341       }
1342     }
1343     else if (sp.match('('))
1344     {
1345       mk.parenLevel++;
1346       Regex r = newRegex();
1347       // r.or = new Or();
1348       sp.inc();
1349       if (sp.incMatch("?:"))
1350       {
1351         r.or = new Or();
1352       }
1353       else if (sp.incMatch("?="))
1354       {
1355         r.or = new lookAhead(false);
1356       }
1357       else if (sp.incMatch("?!"))
1358       {
1359         r.or = new lookAhead(true);
1360       }
1361       else if (sp.match('?'))
1362       {
1363         sp.inc();
1364         do
1365         {
1366           if (sp.c == 'i')
1367           {
1368             mk.ignoreCase = true;
1369           }
1370           if (sp.c == 'Q')
1371           {
1372             mk.dontMatchInQuotes = true;
1373           }
1374           if (sp.c == 'o')
1375           {
1376             mk.optimizeMe = true;
1377           }
1378           if (sp.c == 'g')
1379           {
1380             mk.gFlag = true;
1381           }
1382           if (sp.c == 's')
1383           {
1384             mk.sFlag = true;
1385           }
1386           if (sp.c == 'm')
1387           {
1388             mk.mFlag = true;
1389           }
1390           sp.inc();
1391         } while (!sp.match(')') && !sp.eos);
1392         r = null;
1393         mk.parenLevel--;
1394         if (sp.eos) // throw new RegSyntax
1395         {
1396           RegSyntaxError.endItAll("Unclosed ()");
1397         }
1398       }
1399       else
1400       { // just ordinary parenthesis
1401         r.or = mk.noBackRefs ? new Or() : new OrMark(mk.val++);
1402       }
1403       if (r != null)
1404       {
1405         add(r._compile(sp, mk));
1406       }
1407     }
1408     else if (sp.match('^'))
1409     {
1410       add(new Start(true));
1411     }
1412     else if (sp.dontMatch && sp.c == 'A')
1413     {
1414       add(new Start(false));
1415     }
1416     else if (sp.match('*'))
1417     {
1418       addMulti(new patInt(0), new patInf());
1419     }
1420     else if (sp.match('+'))
1421     {
1422       addMulti(new patInt(1), new patInf());
1423     }
1424     else if (sp.match('?'))
1425     {
1426       addMulti(new patInt(0), new patInt(1));
1427     }
1428     else if (sp.match('{'))
1429     {
1430       boolean bad = false;
1431       StrPos sp2 = new StrPos(sp);
1432       // StringBuffer sb = new StringBuffer();
1433       sp.inc();
1434       patInt i1 = sp.getPatInt();
1435       patInt i2 = null;
1436       if (sp.match('}'))
1437       {
1438         i2 = i1;
1439       }
1440       else
1441       {
1442         if (!sp.match(','))
1443         {
1444           /*
1445            * RegSyntaxError.endItAll( "String \"{"+i2+ "\" should be followed
1446            * with , or }");
1447            */
1448           bad = true;
1449         }
1450         sp.inc();
1451         if (sp.match('}'))
1452         {
1453           i2 = new patInf();
1454         }
1455         else
1456         {
1457           i2 = sp.getPatInt();
1458         }
1459       }
1460       if (i1 == null || i2 == null)
1461       {
1462         /*
1463          * throw new RegSyntax("Badly formatted Multi: " +"{"+i1+","+i2+"}");
1464          */
1465         bad = true;
1466       }
1467       if (bad)
1468       {
1469         sp.dup(sp2);
1470         add(new oneChar(sp.c));
1471       }
1472       else
1473       {
1474         addMulti(i1, i2);
1475       }
1476     }
1477     else if (sp.escMatch('x') && next2Hex(sp))
1478     {
1479       sp.inc();
1480       int d = getHexDigit(sp);
1481       sp.inc();
1482       d = 16 * d + getHexDigit(sp);
1483       add(new oneChar((char) d));
1484     }
1485     else if (sp.escMatch('c'))
1486     {
1487       sp.inc();
1488       if (sp.c < Ctrl.cmap.length)
1489       {
1490         add(new oneChar(Ctrl.cmap[sp.c]));
1491       }
1492       else
1493       {
1494         add(new oneChar(sp.c));
1495       }
1496     }
1497     else if (sp.escMatch('f'))
1498     {
1499       add(new oneChar((char) 12));
1500     }
1501     else if (sp.escMatch('a'))
1502     {
1503       add(new oneChar((char) 7));
1504     }
1505     else if (sp.escMatch('t'))
1506     {
1507       add(new oneChar('\t'));
1508     }
1509     else if (sp.escMatch('n'))
1510     {
1511       add(new oneChar('\n'));
1512     }
1513     else if (sp.escMatch('r'))
1514     {
1515       add(new oneChar('\r'));
1516     }
1517     else if (sp.escMatch('b'))
1518     {
1519       add(new oneChar('\b'));
1520     }
1521     else if (sp.escMatch('e'))
1522     {
1523       add(new oneChar((char) 27));
1524     }
1525     else
1526     {
1527       add(new oneChar(sp.c));
1528       if (sp.match(')'))
1529       {
1530         RegSyntaxError.endItAll("Unmatched right paren in pattern");
1531       }
1532     }
1533   }
1534
1535   // compiles all Pattern elements, internal method
1536   private Pattern _compile(String pat, Rthings mk) throws RegSyntax
1537   {
1538     minMatch = null;
1539     sFlag = mFlag = ignoreCase = gFlag = false;
1540     StrPos sp = new StrPos(pat, 0);
1541     thePattern = _compile(sp, mk);
1542     pt.marks = null;
1543     return thePattern;
1544   }
1545
1546   Pattern p = null;
1547
1548   Or or = null;
1549
1550   Pattern _compile(StrPos sp, Rthings mk) throws RegSyntax
1551   {
1552     while (!(sp.eos || (or != null && sp.match(')'))))
1553     {
1554       compile1(sp, mk);
1555       sp.inc();
1556     }
1557     if (sp.match(')'))
1558     {
1559       mk.parenLevel--;
1560     }
1561     else if (sp.eos && mk.parenLevel != 0)
1562     {
1563       RegSyntaxError.endItAll("Unclosed Parenthesis! lvl=" + mk.parenLevel);
1564     }
1565     if (or != null)
1566     {
1567       if (p == null)
1568       {
1569         p = new NullPattern();
1570       }
1571       or.addOr(p);
1572       return or;
1573     }
1574     return p == null ? new NullPattern() : p;
1575   }
1576
1577   // add a multi object to the end of the chain
1578   // which applies to the last object
1579   void addMulti(patInt i1, patInt i2) throws RegSyntax
1580   {
1581     Pattern last, last2;
1582     for (last = p; last != null && last.next != null; last = last.next)
1583     {
1584       ;
1585     }
1586     if (last == null || last == p)
1587     {
1588       last2 = null;
1589     }
1590     else
1591     {
1592       for (last2 = p; last2.next != last; last2 = last2.next)
1593       {
1594         ;
1595       }
1596     }
1597     if (last instanceof Multi && i1.intValue() == 0 && i2.intValue() == 1)
1598     {
1599       ((Multi) last).matchFewest = true;
1600     }
1601     else if (last instanceof FastMulti && i1.intValue() == 0
1602             && i2.intValue() == 1)
1603     {
1604       ((FastMulti) last).matchFewest = true;
1605     }
1606     else if (last instanceof DotMulti && i1.intValue() == 0
1607             && i2.intValue() == 1)
1608     {
1609       ((DotMulti) last).matchFewest = true;
1610     }
1611     else if (last instanceof Multi || last instanceof DotMulti
1612             || last instanceof FastMulti)
1613     {
1614       throw new RegSyntax("Syntax error.");
1615     }
1616     else if (last2 == null)
1617     {
1618       p = mkMulti(i1, i2, p);
1619     }
1620     else
1621     {
1622       last2.next = mkMulti(i1, i2, last);
1623     }
1624   }
1625
1626   final static Pattern mkMulti(patInt lo, patInt hi, Pattern p)
1627           throws RegSyntax
1628   {
1629     if (p instanceof Any && p.next == null)
1630     {
1631       return (Pattern) new DotMulti(lo, hi);
1632     }
1633     return RegOpt.safe4fm(p) ? (Pattern) new FastMulti(lo, hi, p)
1634             : (Pattern) new Multi(lo, hi, p);
1635   }
1636
1637   // process the bracket operator
1638   Pattern matchBracket(StrPos sp) throws RegSyntax
1639   {
1640     Bracket ret;
1641     if (sp.match('^'))
1642     {
1643       ret = new Bracket(true);
1644       sp.inc();
1645     }
1646     else
1647     {
1648       ret = new Bracket(false);
1649     }
1650     if (sp.match(']'))
1651     {
1652       // throw new RegSyntax
1653       RegSyntaxError.endItAll("Unmatched []");
1654     }
1655
1656     while (!sp.eos && !sp.match(']'))
1657     {
1658       StrPos s1 = new StrPos(sp);
1659       s1.inc();
1660       StrPos s1_ = new StrPos(s1);
1661       s1_.inc();
1662       if (s1.match('-') && !s1_.match(']'))
1663       {
1664         StrPos s2 = new StrPos(s1);
1665         s2.inc();
1666         if (!s2.eos)
1667         {
1668           ret.addOr(new Range(sp.c, s2.c));
1669         }
1670         sp.inc();
1671         sp.inc();
1672       }
1673       else if (sp.escMatch('Q'))
1674       {
1675         sp.inc();
1676         while (!sp.escMatch('E'))
1677         {
1678           ret.addOr(new oneChar(sp.c));
1679           sp.inc();
1680         }
1681       }
1682       else if (sp.escMatch('d'))
1683       {
1684         ret.addOr(new Range('0', '9'));
1685       }
1686       else if (sp.escMatch('s'))
1687       {
1688         ret.addOr(new oneChar((char) 32));
1689         ret.addOr(new Range((char) 8, (char) 10));
1690         ret.addOr(new oneChar((char) 13));
1691       }
1692       else if (sp.escMatch('w'))
1693       {
1694         ret.addOr(new Range('a', 'z'));
1695         ret.addOr(new Range('A', 'Z'));
1696         ret.addOr(new Range('0', '9'));
1697         ret.addOr(new oneChar('_'));
1698       }
1699       else if (sp.escMatch('D'))
1700       {
1701         ret.addOr(new Range((char) 0, (char) 47));
1702         ret.addOr(new Range((char) 58, (char) 65535));
1703       }
1704       else if (sp.escMatch('S'))
1705       {
1706         ret.addOr(new Range((char) 0, (char) 7));
1707         ret.addOr(new Range((char) 11, (char) 12));
1708         ret.addOr(new Range((char) 14, (char) 31));
1709         ret.addOr(new Range((char) 33, (char) 65535));
1710       }
1711       else if (sp.escMatch('W'))
1712       {
1713         ret.addOr(new Range((char) 0, (char) 64));
1714         ret.addOr(new Range((char) 91, (char) 94));
1715         ret.addOr(new oneChar((char) 96));
1716         ret.addOr(new Range((char) 123, (char) 65535));
1717       }
1718       else if (sp.escMatch('x') && next2Hex(sp))
1719       {
1720         sp.inc();
1721         int d = getHexDigit(sp);
1722         sp.inc();
1723         d = 16 * d + getHexDigit(sp);
1724         ret.addOr(new oneChar((char) d));
1725       }
1726       else if (sp.escMatch('a'))
1727       {
1728         ret.addOr(new oneChar((char) 7));
1729       }
1730       else if (sp.escMatch('f'))
1731       {
1732         ret.addOr(new oneChar((char) 12));
1733       }
1734       else if (sp.escMatch('e'))
1735       {
1736         ret.addOr(new oneChar((char) 27));
1737       }
1738       else if (sp.escMatch('n'))
1739       {
1740         ret.addOr(new oneChar('\n'));
1741       }
1742       else if (sp.escMatch('t'))
1743       {
1744         ret.addOr(new oneChar('\t'));
1745       }
1746       else if (sp.escMatch('r'))
1747       {
1748         ret.addOr(new oneChar('\r'));
1749       }
1750       else if (sp.escMatch('c'))
1751       {
1752         sp.inc();
1753         if (sp.c < Ctrl.cmap.length)
1754         {
1755           ret.addOr(new oneChar(Ctrl.cmap[sp.c]));
1756         }
1757         else
1758         {
1759           ret.addOr(new oneChar(sp.c));
1760         }
1761       }
1762       else if (isOctalString(sp))
1763       {
1764         int d = sp.c - '0';
1765         sp.inc();
1766         d = 8 * d + sp.c - '0';
1767         StrPos sp2 = new StrPos(sp);
1768         sp2.inc();
1769         if (isOctalDigit(sp2, false))
1770         {
1771           sp.inc();
1772           d = 8 * d + sp.c - '0';
1773         }
1774         ret.addOr(new oneChar((char) d));
1775       }
1776       else
1777       {
1778         ret.addOr(new oneChar(sp.c));
1779       }
1780       sp.inc();
1781     }
1782     return ret;
1783   }
1784
1785   /**
1786    * Converts the stored Pattern to a String -- this is a decompile. Note that
1787    * \t and \n will really print out here, Not just the two character
1788    * representations. Also be prepared to see some strange output if your
1789    * characters are not printable.
1790    */
1791   public String toString()
1792   {
1793     if (false && thePattern == null)
1794     {
1795       return "";
1796     }
1797     else
1798     {
1799       StringBuffer sb = new StringBuffer();
1800       if (esc != Pattern.ESC)
1801       {
1802         sb.append("(?e=");
1803         sb.append(esc);
1804         sb.append(")");
1805       }
1806       if (gFlag || mFlag || !dotDoesntMatchCR || sFlag || ignoreCase
1807               || dontMatchInQuotes || optimized())
1808       {
1809         sb.append("(?");
1810         if (ignoreCase)
1811         {
1812           sb.append("i");
1813         }
1814         if (mFlag)
1815         {
1816           sb.append("m");
1817         }
1818         if (sFlag || !dotDoesntMatchCR)
1819         {
1820           sb.append("s");
1821         }
1822         if (dontMatchInQuotes)
1823         {
1824           sb.append("Q");
1825         }
1826         if (optimized())
1827         {
1828           sb.append("o");
1829         }
1830         if (gFlag)
1831         {
1832           sb.append("g");
1833         }
1834         sb.append(")");
1835       }
1836       String patstr = thePattern.toString();
1837       if (esc != Pattern.ESC)
1838       {
1839         patstr = reEscape(patstr, Pattern.ESC, esc);
1840       }
1841       sb.append(patstr);
1842       return sb.toString();
1843     }
1844   }
1845
1846   // Re-escape Pattern, allows us to use a different escape
1847   // character.
1848   static String reEscape(String s, char oldEsc, char newEsc)
1849   {
1850     if (oldEsc == newEsc)
1851     {
1852       return s;
1853     }
1854     int i;
1855     StringBuffer sb = new StringBuffer();
1856     for (i = 0; i < s.length(); i++)
1857     {
1858       if (s.charAt(i) == oldEsc && i + 1 < s.length())
1859       {
1860         if (s.charAt(i + 1) == oldEsc)
1861         {
1862           sb.append(oldEsc);
1863         }
1864         else
1865         {
1866           sb.append(newEsc);
1867           sb.append(s.charAt(i + 1));
1868         }
1869         i++;
1870       }
1871       else if (s.charAt(i) == newEsc)
1872       {
1873         sb.append(newEsc);
1874         sb.append(newEsc);
1875       }
1876       else
1877       {
1878         sb.append(s.charAt(i));
1879       }
1880     }
1881     return sb.toString();
1882   }
1883
1884   /**
1885    * This method implements FilenameFilter, allowing one to use a Regex to
1886    * search through a directory using File.list. There is a FileRegex now that
1887    * does this better.
1888    *
1889    * @see com.stevesoft.pat.FileRegex
1890    */
1891   public boolean accept(File dir, String s)
1892   {
1893     return search(s);
1894   }
1895
1896   /** The version of this package */
1897   final static public String version()
1898   {
1899     return "lgpl release 1.5.3";
1900   }
1901
1902   /**
1903    * Once this method is called, the state of variables ignoreCase and
1904    * dontMatchInQuotes should not be changed as the results will be
1905    * unpredictable. However, search and matchAt will run more quickly. Note that
1906    * you can check to see if the pattern has been optimized by calling the
1907    * optimized() method.
1908    * <p>
1909    * This method will attempt to rewrite your pattern in a way that makes it
1910    * faster (not all patterns execute at the same speed). In general, "(?: ... )"
1911    * will be faster than "( ... )" so if you don't need the backreference, you
1912    * should group using the former pattern.
1913    * <p>
1914    * It will also introduce new pattern elements that you can't get to
1915    * otherwise, for example if you have a large table of strings, i.e. the
1916    * months of the year "(January|February|...)" optimize() will make a
1917    * Hashtable that takes it to the next appropriate pattern element --
1918    * eliminating the need for a linear search.
1919    *
1920    * @see com.stevesoft.pat.Regex#optimized
1921    * @see com.stevesoft.pat.Regex#ignoreCase
1922    * @see com.stevesoft.pat.Regex#dontMatchInQuotes
1923    * @see com.stevesoft.pat.Regex#matchAt
1924    * @see com.stevesoft.pat.Regex#search
1925    */
1926   public void optimize()
1927   {
1928     if (optimized() || thePattern == null)
1929     {
1930       return;
1931     }
1932     minMatch = new patInt(0); // thePattern.countMinChars();
1933     thePattern = RegOpt.opt(thePattern, ignoreCase, dontMatchInQuotes);
1934     skipper = Skip.findSkip(this);
1935     // RegOpt.setParents(this);
1936     return;
1937   }
1938
1939   Skip skipper;
1940
1941   /**
1942    * This function returns true if the optimize method has been called.
1943    */
1944   public boolean optimized()
1945   {
1946     return minMatch != null;
1947   }
1948
1949   /**
1950    * A bit of syntactic surgar for those who want to make their code look more
1951    * perl-like. To use this initialize your Regex object by saying:
1952    *
1953    * <pre>
1954    *       Regex r1 = Regex.perlCode(&quot;s/hello/goodbye/&quot;);
1955    *       Regex r2 = Regex.perlCode(&quot;s'fish'frog'i&quot;);
1956    *       Regex r3 = Regex.perlCode(&quot;m'hello');
1957    * </pre>
1958    *
1959    * The i for ignoreCase is supported in this syntax, as well as m, s, and x.
1960    * The g flat is a bit of a special case.
1961    * <p>
1962    * If you wish to replace all occurences of a pattern, you do not put a 'g' in
1963    * the perlCode, but call Regex's replaceAll method.
1964    * <p>
1965    * If you wish to simply and only do a search for r2's pattern, you can do
1966    * this by calling the searchFrom method method repeatedly, or by calling
1967    * search repeatedly if the g flag is set.
1968    * <p>
1969    * Note: Currently perlCode does <em>not</em> support the (?e=#) syntax for
1970    * changing the escape character.
1971    */
1972
1973   public static Regex perlCode(String s)
1974   {
1975     // this file is big enough, see parsePerl.java
1976     // for this function.
1977     return parsePerl.parse(s);
1978   }
1979
1980   static final char back_slash = '\\';
1981
1982   /**
1983    * Checks to see if there are only literal and no special pattern elements in
1984    * this Regex.
1985    */
1986   public boolean isLiteral()
1987   {
1988     Pattern x = thePattern;
1989     while (x != null)
1990     {
1991       if (x instanceof oneChar)
1992       {
1993         ;
1994       }
1995       else if (x instanceof Skipped)
1996       {
1997         ;
1998       }
1999       else
2000       {
2001         return false;
2002       }
2003       x = x.next;
2004     }
2005     return true;
2006   }
2007
2008   /**
2009    * You only need to know about this if you are inventing your own pattern
2010    * elements.
2011    */
2012   public patInt countMinChars()
2013   {
2014     return thePattern.countMinChars();
2015   }
2016
2017   /**
2018    * You only need to know about this if you are inventing your own pattern
2019    * elements.
2020    */
2021   public patInt countMaxChars()
2022   {
2023     return thePattern.countMaxChars();
2024   }
2025
2026   boolean isHexDigit(StrPos sp)
2027   {
2028     boolean r = !sp.eos
2029             && !sp.dontMatch
2030             && ((sp.c >= '0' && sp.c <= '9')
2031                     || (sp.c >= 'a' && sp.c <= 'f') || (sp.c >= 'A' && sp.c <= 'F'));
2032     return r;
2033   }
2034
2035   boolean isOctalDigit(StrPos sp, boolean first)
2036   {
2037     boolean r = !sp.eos && !(first ^ sp.dontMatch) && sp.c >= '0'
2038             && sp.c <= '7';
2039     return r;
2040   }
2041
2042   int getHexDigit(StrPos sp)
2043   {
2044     if (sp.c >= '0' && sp.c <= '9')
2045     {
2046       return sp.c - '0';
2047     }
2048     if (sp.c >= 'a' && sp.c <= 'f')
2049     {
2050       return sp.c - 'a' + 10;
2051     }
2052     return sp.c - 'A' + 10;
2053   }
2054
2055   boolean next2Hex(StrPos sp)
2056   {
2057     StrPos sp2 = new StrPos(sp);
2058     sp2.inc();
2059     if (!isHexDigit(sp2))
2060     {
2061       return false;
2062     }
2063     sp2.inc();
2064     if (!isHexDigit(sp2))
2065     {
2066       return false;
2067     }
2068     return true;
2069   }
2070
2071   boolean isOctalString(StrPos sp)
2072   {
2073     if (!isOctalDigit(sp, true))
2074     {
2075       return false;
2076     }
2077     StrPos sp2 = new StrPos(sp);
2078     sp2.inc();
2079     if (!isOctalDigit(sp2, false))
2080     {
2081       return false;
2082     }
2083     return true;
2084   }
2085 }