src/com/stevesoft/pat/Regex.java

   1 /*******************************************************************************
   2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
   3  * Copyright (C) $(date) The Jalview Authors
   4  *
   5  * This file is part of Jalview.
   6  *
   7  * Jalview is free software: you can redistribute it and/or
   8  * modify it under the terms of the GNU General Public License
   9  * as published by the Free Software Foundation, either version 3
  10  * of the License, or (at your option) any later version.
  11  *
  12  * Jalview is distributed in the hope that it will be useful, but
  13  * WITHOUT ANY WARRANTY; without even the implied warranty
  14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15  * PURPOSE.  See the GNU General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU General Public License
  18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
  19  * The Jalview Authors are detailed in the 'AUTHORS' file.
  20  *******************************************************************************/
  21 //
  22 // This software is now distributed according to
  23 // the Lesser Gnu Public License.  Please see
  24 // http://www.gnu.org/copyleft/lesser.txt for
  25 // the details.
  26 //    -- Happy Computing!
  27 //
  28 package com.stevesoft.pat;
  29
  30 import jalview.util.MessageManager;
  31
  32 import java.io.File;
  33 import java.io.FilenameFilter;
  34 import java.util.BitSet;
  35 import java.util.Hashtable;
  36
  37 import com.stevesoft.pat.wrap.StringWrap;
  38
  39 /** Matches a Unicode punctuation character. */
  40 class UnicodePunct extends UniValidator
  41 {
  42   public int validate(StringLike s, int from, int to)
  43   {
  44     return from < s.length() && Prop.isPunct(s.charAt(from)) ? to : -1;
  45   }
  46 }
  47
  48 /** Matches a Unicode white space character. */
  49 class UnicodeWhite extends UniValidator
  50 {
  51   public int validate(StringLike s, int from, int to)
  52   {
  53     return from < s.length() && Prop.isWhite(s.charAt(from)) ? to : -1;
  54   }
  55 }
  56
  57 /**
  58  * Matches a character that is not a Unicode punctuation character.
  59  */
  60 class NUnicodePunct extends UniValidator
  61 {
  62   public int validate(StringLike s, int from, int to)
  63   {
  64     return from < s.length() && !Prop.isPunct(s.charAt(from)) ? to : -1;
  65   }
  66 }
  67
  68 /**
  69  * Matches a character that is not a Unicode white space character.
  70  */
  71 class NUnicodeWhite extends UniValidator
  72 {
  73   public int validate(StringLike s, int from, int to)
  74   {
  75     return from < s.length() && !Prop.isWhite(s.charAt(from)) ? to : -1;
  76   }
  77 }
  78
  79 /** Matches a Unicode word character: an alphanumeric or underscore. */
  80 class UnicodeW extends UniValidator
  81 {
  82   public int validate(StringLike s, int from, int to)
  83   {
  84     if (from >= s.length())
  85     {
  86       return -1;
  87     }
  88     char c = s.charAt(from);
  89     return (Prop.isAlphabetic(c) || Prop.isDecimalDigit(c) || c == '_') ? to
  90             : -1;
  91   }
  92 }
  93
  94 /** Matches a character that is not a Unicode alphanumeric or underscore. */
  95 class NUnicodeW extends UniValidator
  96 {
  97   public int validate(StringLike s, int from, int to)
  98   {
  99     if (from >= s.length())
 100     {
 101       return -1;
 102     }
 103     char c = s.charAt(from);
 104     return !(Prop.isAlphabetic(c) || Prop.isDecimalDigit(c) || c == '_') ? to
 105             : -1;
 106   }
 107 }
 108
 109 /** Matches a Unicode decimal digit. */
 110 class UnicodeDigit extends UniValidator
 111 {
 112   public int validate(StringLike s, int from, int to)
 113   {
 114     return from < s.length() && Prop.isDecimalDigit(s.charAt(from)) ? to
 115             : -1;
 116   }
 117 }
 118
 119 /** Matches a character that is not a Unicode digit. */
 120 class NUnicodeDigit extends UniValidator
 121 {
 122   public int validate(StringLike s, int from, int to)
 123   {
 124     return from < s.length() && !Prop.isDecimalDigit(s.charAt(from)) ? to
 125             : -1;
 126   }
 127 }
 128
 129 /** Matches a Unicode math character. */
 130 class UnicodeMath extends UniValidator
 131 {
 132   public int validate(StringLike s, int from, int to)
 133   {
 134     return from < s.length() && Prop.isMath(s.charAt(from)) ? to : -1;
 135   }
 136 }
 137
 138 /** Matches a non-math Unicode character. */
 139 class NUnicodeMath extends UniValidator
 140 {
 141   public int validate(StringLike s, int from, int to)
 142   {
 143     return from < s.length() && !Prop.isMath(s.charAt(from)) ? to : -1;
 144   }
 145 }
 146
 147 /** Matches a Unicode currency symbol. */
 148 class UnicodeCurrency extends UniValidator
 149 {
 150   public int validate(StringLike s, int from, int to)
 151   {
 152     return from < s.length() && Prop.isCurrency(s.charAt(from)) ? to : -1;
 153   }
 154 }
 155
 156 /** Matches a non-currency symbol Unicode character. */
 157 class NUnicodeCurrency extends UniValidator
 158 {
 159   public int validate(StringLike s, int from, int to)
 160   {
 161     return from < s.length() && !Prop.isCurrency(s.charAt(from)) ? to : -1;
 162   }
 163 }
 164
 165 /** Matches a Unicode alphabetic character. */
 166 class UnicodeAlpha extends UniValidator
 167 {
 168   public int validate(StringLike s, int from, int to)
 169   {
 170     return from < s.length() && Prop.isAlphabetic(s.charAt(from)) ? to : -1;
 171   }
 172 }
 173
 174 /** Matches a non-alphabetic Unicode character. */
 175 class NUnicodeAlpha extends UniValidator
 176 {
 177   public int validate(StringLike s, int from, int to)
 178   {
 179     return from < s.length() && !Prop.isAlphabetic(s.charAt(from)) ? to
 180             : -1;
 181   }
 182 }
 183
 184 /** Matches an upper case Unicode character. */
 185 class UnicodeUpper extends UniValidator
 186 {
 187   public int validate(StringLike s, int from, int to)
 188   {
 189     return from < s.length() && isUpper(s.charAt(from)) ? to : -1;
 190   }
 191
 192   final boolean isUpper(char c)
 193   {
 194     return c == CaseMgr.toUpperCase(c) && c != CaseMgr.toLowerCase(c);
 195   }
 196 }
 197
 198 /** Matches an upper case Unicode character. */
 199 class UnicodeLower extends UniValidator
 200 {
 201   public int validate(StringLike s, int from, int to)
 202   {
 203     return from < s.length() && isLower(s.charAt(from)) ? to : -1;
 204   }
 205
 206   final boolean isLower(char c)
 207   {
 208     return c != CaseMgr.toUpperCase(c) && c == CaseMgr.toLowerCase(c);
 209   }
 210 }
 211
 212 /**
 213  * Regex provides the parser which constructs the linked list of Pattern classes
 214  * from a String.
 215  * <p>
 216  * For the purpose of this documentation, the fact that java interprets the
 217  * backslash will be ignored. In practice, however, you will need a double
 218  * backslash to obtain a string that contains a single backslash character.
 219  * Thus, the example pattern "\b" should really be typed as "\\b" inside java
 220  * code.
 221  * <p>
 222  * Note that Regex is part of package "com.stevesoft.pat". To use it, simply
 223  * import com.stevesoft.pat.Regex at the top of your file.
 224  * <p>
 225  * Regex is made with a constructor that takes a String that defines the regular
 226  * expression. Thus, for example
 227  *
 228  * <pre>
 229  * Regex r = new Regex(&quot;[a-c]*&quot;);
 230  * </pre>
 231  *
 232  * matches any number of characters so long as the are 'a', 'b', or 'c').
 233  * <p>
 234  * To attempt to match the Pattern to a given string, you can use either the
 235  * search(String) member function, or the matchAt(String,int position) member
 236  * function. These functions return a boolean which tells you whether or not the
 237  * thing worked, and sets the methods "charsMatched()" and "matchedFrom()" in
 238  * the Regex object appropriately.
 239  * <p>
 240  * The portion of the string before the match can be obtained by the left()
 241  * member, and the portion after the match can be obtained by the right()
 242  * member.
 243  * <p>
 244  * Essentially, this package implements a syntax that is very much like the perl
 245  * 5 regular expression syntax.
 246  *
 247  * Longer example:
 248  *
 249  * <pre>
 250  * Regex r = new Regex(&quot;x(a|b)y&quot;);
 251  * r.matchAt(&quot;xay&quot;, 0);
 252  * System.out.println(&quot;sub = &quot; + r.stringMatched(1));
 253  * </pre>
 254  *
 255  * The above would print "sub = a".
 256  *
 257  * <pre>
 258  *  r.left() // would return &quot;x&quot;
 259  *  r.right() // would return &quot;y&quot;
 260  * </pre>
 261  *
 262  * <p>
 263  * Differences between this package and perl5:<br>
 264  * The extended Pattern for setting flags, is now supported, but the flags are
 265  * different. "(?i)" tells the pattern to ignore case, "(?Q)" sets the
 266  * "dontMatchInQuotes" flag, and "(?iQ)" sets them both. You can change the
 267  * escape character. The pattern
 268  *
 269  * <pre>
 270  * (?e=#)#d+
 271  * </pre>
 272  *
 273  * is the same as
 274  *
 275  * <pre>
 276  * \d+
 277  * </pre>
 278  *
 279  * , but note that the sequence
 280  *
 281  * <pre>
 282  * (?e=#)
 283  * </pre>
 284  *
 285  * <b>must</b> occur at the very beginning of the pattern. There may be other
 286  * small differences as well. I will either make my package conform or note them
 287  * as I become aware of them.
 288  * <p>
 289  * This package supports additional patterns not in perl5: <center>
 290  * <table * border=1>
 291  * <tr>
 292  * <td>(?@())</td>
 293  * <td>Group</td>
 294  * <td>This matches all characters between the '(' character and the balancing
 295  * ')' character. Thus, it will match "()" as well as "(())". The balancing
 296  * characters are arbitrary, thus (?@{}) matches on "{}" and "{{}}".</td>
 297  * <tr>
 298  * <td>(?&lt1)</td>
 299  * <td>Backup</td>
 300  * <td>Moves the pointer backwards within the text. This allows you to make a
 301  * "look behind." It fails if it attempts to move to a position before the
 302  * beginning of the string. "x(?&lt1)" is equivalent to "(?=x)". The number, 1
 303  * in this example, is the number of characters to move backwards.</td>
 304  * </table>
 305  * </center> </dl>
 306  *
 307  * @author Steven R. Brandt
 308  * @version package com.stevesoft.pat, release 1.5.3
 309  * @see Pattern
 310  */
 311 public class Regex extends RegRes implements FilenameFilter
 312 {
 313   /**
 314    * BackRefOffset gives the identity number of the first pattern. Version 1.0
 315    * used zero, version 1.1 uses 1 to be more compatible with perl.
 316    */
 317   static int BackRefOffset = 1;
 318
 319   private static Pattern none = new NoPattern();
 320
 321   Pattern thePattern = none;
 322
 323   patInt minMatch = new patInt(0);
 324
 325   static Hashtable validators = new Hashtable();
 326   static
 327   {
 328     define("p", "(?>1)", new UnicodePunct());
 329     define("P", "(?>1)", new NUnicodePunct());
 330     define("s", "(?>1)", new UnicodeWhite());
 331     define("S", "(?>1)", new NUnicodeWhite());
 332     define("w", "(?>1)", new UnicodeW());
 333     define("W", "(?>1)", new NUnicodeW());
 334     define("d", "(?>1)", new UnicodeDigit());
 335     define("D", "(?>1)", new NUnicodeDigit());
 336     define("m", "(?>1)", new UnicodeMath());
 337     define("M", "(?>1)", new NUnicodeMath());
 338     define("c", "(?>1)", new UnicodeCurrency());
 339     define("C", "(?>1)", new NUnicodeCurrency());
 340     define("a", "(?>1)", new UnicodeAlpha());
 341     define("A", "(?>1)", new NUnicodeAlpha());
 342     define("uc", "(?>1)", new UnicodeUpper());
 343     define("lc", "(?>1)", new UnicodeLower());
 344   }
 345
 346   /** Set the dontMatch in quotes flag. */
 347   public void setDontMatchInQuotes(boolean b)
 348   {
 349     dontMatchInQuotes = b;
 350   }
 351
 352   /** Find out if the dontMatchInQuotes flag is enabled. */
 353   public boolean getDontMatchInQuotes()
 354   {
 355     return dontMatchInQuotes;
 356   }
 357
 358   boolean dontMatchInQuotes = false;
 359
 360   /**
 361    * Set the state of the ignoreCase flag. If set to true, then the pattern
 362    * matcher will ignore case when searching for a match.
 363    */
 364   public void setIgnoreCase(boolean b)
 365   {
 366     ignoreCase = b;
 367   }
 368
 369   /**
 370    * Get the state of the ignoreCase flag. Returns true if we are ignoring the
 371    * case of the pattern, false otherwise.
 372    */
 373   public boolean getIgnoreCase()
 374   {
 375     return ignoreCase;
 376   }
 377
 378   boolean ignoreCase = false;
 379
 380   static boolean defaultMFlag = false;
 381
 382   /**
 383    * Set the default value of the m flag. If it is set to true, then the MFlag
 384    * will be on for any regex search executed.
 385    */
 386   public static void setDefaultMFlag(boolean mFlag)
 387   {
 388     defaultMFlag = mFlag;
 389   }
 390
 391   /**
 392    * Get the default value of the m flag. If it is set to true, then the MFlag
 393    * will be on for any regex search executed.
 394    */
 395   public static boolean getDefaultMFlag()
 396   {
 397     return defaultMFlag;
 398   }
 399
 400   /**
 401    * Initializes the object without a Pattern. To supply a Pattern use
 402    * compile(String s).
 403    *
 404    * @see com.stevesoft.pat.Regex#compile(java.lang.String)
 405    */
 406   public Regex()
 407   {
 408   }
 409
 410   /**
 411    * Create and compile a Regex, but do not throw any exceptions. If you wish to
 412    * have exceptions thrown for syntax errors, you must use the Regex(void)
 413    * constructor to create the Regex object, and then call the compile method.
 414    * Therefore, you should only call this method when you know your pattern is
 415    * right. I will probably become more like
 416    *
 417    * @see com.stevesoft.pat.Regex#search(java.lang.String)
 418    * @see com.stevesoft.pat.Regex#compile(java.lang.String)
 419    */
 420   public Regex(String s)
 421   {
 422     try
 423     {
 424       compile(s);
 425     } catch (RegSyntax rs)
 426     {
 427     }
 428   }
 429
 430   ReplaceRule rep = null;
 431
 432   /**
 433    * Create and compile both a Regex and a ReplaceRule.
 434    *
 435    * @see com.stevesoft.pat.ReplaceRule
 436    * @see com.stevesoft.pat.Regex#compile(java.lang.String)
 437    */
 438   public Regex(String s, String rp)
 439   {
 440     this(s);
 441     rep = ReplaceRule.perlCode(rp);
 442   }
 443
 444   /**
 445    * Create and compile a Regex, but give it the ReplaceRule specified. This
 446    * allows the user finer control of the Replacement process, if that is
 447    * desired.
 448    *
 449    * @see com.stevesoft.pat.ReplaceRule
 450    * @see com.stevesoft.pat.Regex#compile(java.lang.String)
 451    */
 452   public Regex(String s, ReplaceRule rp)
 453   {
 454     this(s);
 455     rep = rp;
 456   }
 457
 458   /**
 459    * Change the ReplaceRule of this Regex by compiling a new one using String
 460    * rp.
 461    */
 462   public void setReplaceRule(String rp)
 463   {
 464     rep = ReplaceRule.perlCode(rp);
 465     repr = null; // Clear Replacer history
 466   }
 467
 468   /** Change the ReplaceRule of this Regex to rp. */
 469   public void setReplaceRule(ReplaceRule rp)
 470   {
 471     rep = rp;
 472   }
 473
 474   /**
 475    * Test to see if a custom defined rule exists.
 476    *
 477    * @see com.stevesoft.pat#define(java.lang.String,java.lang.String,Validator)
 478    */
 479   public static boolean isDefined(String nm)
 480   {
 481     return validators.get(nm) != null;
 482   }
 483
 484   /**
 485    * Removes a custom defined rule.
 486    *
 487    * @see com.stevesoft.pat#define(java.lang.String,java.lang.String,Validator)
 488    */
 489   public static void undefine(String nm)
 490   {
 491     validators.remove(nm);
 492   }
 493
 494   /**
 495    * Defines a method to create a new rule. See test/deriv2.java and
 496    * test/deriv3.java for examples of how to use it.
 497    */
 498   public static void define(String nm, String pat, Validator v)
 499   {
 500     v.pattern = pat;
 501     validators.put(nm, v);
 502   }
 503
 504   /**
 505    * Defines a shorthand for a pattern. The pattern will be invoked by a string
 506    * that has the form "(??"+nm+")".
 507    */
 508   public static void define(String nm, String pat)
 509   {
 510     validators.put(nm, pat);
 511   }
 512
 513   /** Get the current ReplaceRule. */
 514   public ReplaceRule getReplaceRule()
 515   {
 516     return rep;
 517   }
 518
 519   Replacer repr = null;
 520
 521   final Replacer _getReplacer()
 522   {
 523     return repr == null ? repr = new Replacer() : repr;
 524   }
 525
 526   public Replacer getReplacer()
 527   {
 528     if (repr == null)
 529     {
 530       repr = new Replacer();
 531     }
 532     repr.rh.me = this;
 533     repr.rh.prev = null;
 534     return repr;
 535   }
 536
 537   /**
 538    * Replace the first occurence of this pattern in String s according to the
 539    * ReplaceRule.
 540    *
 541    * @see com.stevesoft.pat.ReplaceRule
 542    * @see com.stevesoft.pat.Regex#getReplaceRule()
 543    */
 544   public String replaceFirst(String s)
 545   {
 546     return _getReplacer().replaceFirstRegion(s, this, 0, s.length())
 547             .toString();
 548   }
 549
 550   /**
 551    * Replace the first occurence of this pattern in String s beginning with
 552    * position pos according to the ReplaceRule.
 553    *
 554    * @see com.stevesoft.pat.ReplaceRule
 555    * @see com.stevesoft.pat.Regex#getReplaceRule()
 556    */
 557   public String replaceFirstFrom(String s, int pos)
 558   {
 559     return _getReplacer().replaceFirstRegion(s, this, pos, s.length())
 560             .toString();
 561   }
 562
 563   /**
 564    * Replace the first occurence of this pattern in String s beginning with
 565    * position start and ending with end according to the ReplaceRule.
 566    *
 567    * @see com.stevesoft.pat.ReplaceRule
 568    * @see com.stevesoft.pat.Regex#getReplaceRule()
 569    */
 570   public String replaceFirstRegion(String s, int start, int end)
 571   {
 572     return _getReplacer().replaceFirstRegion(s, this, start, end)
 573             .toString();
 574   }
 575
 576   /**
 577    * Replace all occurences of this pattern in String s according to the
 578    * ReplaceRule.
 579    *
 580    * @see com.stevesoft.pat.ReplaceRule
 581    * @see com.stevesoft.pat.Regex#getReplaceRule()
 582    */
 583   public String replaceAll(String s)
 584   {
 585     return _getReplacer().replaceAllRegion(s, this, 0, s.length())
 586             .toString();
 587   }
 588
 589   public StringLike replaceAll(StringLike s)
 590   {
 591     return _getReplacer().replaceAllRegion(s, this, 0, s.length());
 592   }
 593
 594   /**
 595    * Replace all occurences of this pattern in String s beginning with position
 596    * pos according to the ReplaceRule.
 597    *
 598    * @see com.stevesoft.pat.ReplaceRule
 599    * @see com.stevesoft.pat.Regex#getReplaceRule()
 600    */
 601   public String replaceAllFrom(String s, int pos)
 602   {
 603     return _getReplacer().replaceAllRegion(s, this, pos, s.length())
 604             .toString();
 605   }
 606
 607   /**
 608    * Replace all occurences of this pattern in String s beginning with position
 609    * start and ending with end according to the ReplaceRule.
 610    *
 611    * @see com.stevesoft.pat.ReplaceRule
 612    * @see com.stevesoft.pat.Regex#getReplaceRule()
 613    */
 614   public String replaceAllRegion(String s, int start, int end)
 615   {
 616     return _getReplacer().replaceAllRegion(s, this, start, end).toString();
 617   }
 618
 619   /** Essentially clones the Regex object */
 620   public Regex(Regex r)
 621   {
 622     super((RegRes) r);
 623     dontMatchInQuotes = r.dontMatchInQuotes;
 624     esc = r.esc;
 625     ignoreCase = r.ignoreCase;
 626     gFlag = r.gFlag;
 627     if (r.rep == null)
 628     {
 629       rep = null;
 630     }
 631     else
 632     {
 633       rep = (ReplaceRule) r.rep.clone();
 634     }
 635     /*
 636      * try { compile(r.toString()); } catch(RegSyntax r_) {}
 637      */
 638     thePattern = r.thePattern.clone(new Hashtable());
 639     minMatch = r.minMatch;
 640     skipper = r.skipper;
 641   }
 642
 643   /**
 644    * By default, the escape character is the backslash, but you can make it
 645    * anything you want by setting this variable.
 646    */
 647   public char esc = Pattern.ESC;
 648
 649   /**
 650    * This method compiles a regular expression, making it possible to call the
 651    * search or matchAt methods.
 652    *
 653    * @exception com.stevesoft.pat.RegSyntax
 654    *              is thrown if a syntax error is encountered in the pattern. For
 655    *              example, "x{3,1}" or "*a" are not valid patterns.
 656    * @see com.stevesoft.pat.Regex#search
 657    * @see com.stevesoft.pat.Regex#matchAt
 658    */
 659   public void compile(String prepat) throws RegSyntax
 660   {
 661     String postpat = parsePerl.codify(prepat, true);
 662     String pat = postpat == null ? prepat : postpat;
 663     minMatch = null;
 664     ignoreCase = false;
 665     dontMatchInQuotes = false;
 666     Rthings mk = new Rthings(this);
 667     int offset = mk.val;
 668     String newpat = pat;
 669     thePattern = none;
 670     p = null;
 671     or = null;
 672     minMatch = new patInt(0);
 673     StrPos sp = new StrPos(pat, 0);
 674     if (sp.incMatch("(?e="))
 675     {
 676       char newEsc = sp.c;
 677       sp.inc();
 678       if (sp.match(')'))
 679       {
 680         newpat = reEscape(pat.substring(6), newEsc, Pattern.ESC);
 681       }
 682     }
 683     else if (esc != Pattern.ESC)
 684     {
 685       newpat = reEscape(pat, esc, Pattern.ESC);
 686     }
 687     thePattern = _compile(newpat, mk);
 688     numSubs_ = mk.val - offset;
 689     mk.set(this);
 690   }
 691
 692   /*
 693    * If a Regex is compared against a Regex, a check is done to see that the
 694    * patterns are equal as well as the most recent match. If a Regex is compare
 695    * with a RegRes, only the result of the most recent match is compared.
 696    */
 697   public boolean equals(Object o)
 698   {
 699     if (o instanceof Regex)
 700     {
 701       if (toString().equals(o.toString()))
 702       {
 703         return super.equals(o);
 704       }
 705       else
 706       {
 707         return false;
 708       }
 709     }
 710     else
 711     {
 712       return super.equals(o);
 713     }
 714   }
 715
 716   /** A clone by any other name would smell as sweet. */
 717   public Object clone()
 718   {
 719     return new Regex(this);
 720   }
 721
 722   /** Return a clone of the underlying RegRes object. */
 723   public RegRes result()
 724   {
 725     return (RegRes) super.clone();
 726   }
 727
 728   // prep sets global variables of class
 729   // Pattern so that it can access them
 730   // during an attempt at a match
 731   Pthings pt = new Pthings();
 732
 733   final Pthings prep(StringLike s)
 734   {
 735     // if(gFlag)
 736     pt.lastPos = matchedTo();
 737     if (pt.lastPos < 0)
 738     {
 739       pt.lastPos = 0;
 740     }
 741     if ((s == null ? null : s.unwrap()) != (src == null ? null : s.unwrap()))
 742     {
 743       pt.lastPos = 0;
 744     }
 745     src = s;
 746     pt.dotDoesntMatchCR = dotDoesntMatchCR && (!sFlag);
 747     pt.mFlag = (mFlag | defaultMFlag);
 748     pt.ignoreCase = ignoreCase;
 749     pt.no_check = false;
 750     if (pt.marks != null)
 751     {
 752       for (int i = 0; i < pt.marks.length; i++)
 753       {
 754         pt.marks[i] = -1;
 755       }
 756     }
 757     pt.marks = null;
 758     pt.nMarks = numSubs_;
 759     pt.src = s;
 760     if (dontMatchInQuotes)
 761     {
 762       setCbits(s, pt);
 763     }
 764     else
 765     {
 766       pt.cbits = null;
 767     }
 768     return pt;
 769   }
 770
 771   /**
 772    * Attempt to match a Pattern beginning at a specified location within the
 773    * string.
 774    *
 775    * @see com.stevesoft.pat.Regex#search
 776    */
 777   public boolean matchAt(String s, int start_pos)
 778   {
 779     return _search(s, start_pos, start_pos);
 780   }
 781
 782   /**
 783    * Attempt to match a Pattern beginning at a specified location within the
 784    * StringLike.
 785    *
 786    * @see com.stevesoft.pat.Regex#search
 787    */
 788   public boolean matchAt(StringLike s, int start_pos)
 789   {
 790     return _search(s, start_pos, start_pos);
 791   }
 792
 793   /**
 794    * Search through a String for the first occurrence of a match.
 795    *
 796    * @see com.stevesoft.pat.Regex#searchFrom
 797    * @see com.stevesoft.pat.Regex#matchAt
 798    */
 799   public boolean search(String s)
 800   {
 801     if (s == null)
 802     {
 803       throw new NullPointerException(
 804               MessageManager
 805                       .getString("exception.null_string_given_to_regex_search"));
 806     }
 807     return _search(s, 0, s.length());
 808   }
 809
 810   public boolean search(StringLike sl)
 811   {
 812     if (sl == null)
 813     {
 814       throw new NullPointerException(
 815               MessageManager
 816                       .getString("exception.null_string_like_given_to_regex_search"));
 817     }
 818     return _search(sl, 0, sl.length());
 819   }
 820
 821   public boolean reverseSearch(String s)
 822   {
 823     if (s == null)
 824     {
 825       throw new NullPointerException(
 826               MessageManager
 827                       .getString("exception.null_string_given_to_regex_reverse_search"));
 828     }
 829     return _reverseSearch(s, 0, s.length());
 830   }
 831
 832   public boolean reverseSearch(StringLike sl)
 833   {
 834     if (sl == null)
 835     {
 836       throw new NullPointerException(
 837               MessageManager
 838                       .getString("exception.null_string_like_given_to_regex_reverse_search"));
 839     }
 840     return _reverseSearch(sl, 0, sl.length());
 841   }
 842
 843   /**
 844    * Search through a String for the first occurence of a match, but start at
 845    * position
 846    *
 847    * <pre>
 848    * start
 849    * </pre>
 850    */
 851   public boolean searchFrom(String s, int start)
 852   {
 853     if (s == null)
 854     {
 855       throw new NullPointerException(
 856               MessageManager
 857                       .getString("exception.null_string_like_given_to_regex_search_from"));
 858     }
 859     return _search(s, start, s.length());
 860   }
 861
 862   public boolean searchFrom(StringLike s, int start)
 863   {
 864     if (s == null)
 865     {
 866       throw new NullPointerException(
 867               MessageManager
 868                       .getString("exception.null_string_like_given_to_regex_search_from"));
 869     }
 870     return _search(s, start, s.length());
 871   }
 872
 873   /**
 874    * Search through a region of a String for the first occurence of a match.
 875    */
 876   public boolean searchRegion(String s, int start, int end)
 877   {
 878     if (s == null)
 879     {
 880       throw new NullPointerException(
 881               MessageManager
 882                       .getString("exception.null_string_like_given_to_regex_search_region"));
 883     }
 884     return _search(s, start, end);
 885   }
 886
 887   /**
 888    * Set this to change the default behavior of the "." pattern. By default it
 889    * now matches perl's behavior and fails to match the '\n' character.
 890    */
 891   public static boolean dotDoesntMatchCR = true;
 892
 893   StringLike gFlags;
 894
 895   int gFlagto = 0;
 896
 897   boolean gFlag = false;
 898
 899   /** Set the 'g' flag */
 900   public void setGFlag(boolean b)
 901   {
 902     gFlag = b;
 903   }
 904
 905   /** Get the state of the 'g' flag. */
 906   public boolean getGFlag()
 907   {
 908     return gFlag;
 909   }
 910
 911   boolean sFlag = false;
 912
 913   /** Get the state of the sFlag */
 914   public boolean getSFlag()
 915   {
 916     return sFlag;
 917   }
 918
 919   boolean mFlag = false;
 920
 921   /** Get the state of the sFlag */
 922   public boolean getMFlag()
 923   {
 924     return mFlag;
 925   }
 926
 927   final boolean _search(String s, int start, int end)
 928   {
 929     return _search(new StringWrap(s), start, end);
 930   }
 931
 932   final boolean _search(StringLike s, int start, int end)
 933   {
 934     if (gFlag && gFlagto > 0 && gFlags != null
 935             && s.unwrap() == gFlags.unwrap())
 936     {
 937       start = gFlagto;
 938     }
 939     gFlags = null;
 940
 941     Pthings pt = prep(s);
 942
 943     int up = (minMatch == null ? end : end - minMatch.i);
 944
 945     if (up < start && end >= start)
 946     {
 947       up = start;
 948     }
 949
 950     if (skipper == null)
 951     {
 952       for (int i = start; i <= up; i++)
 953       {
 954         charsMatched_ = thePattern.matchAt(s, i, pt);
 955         if (charsMatched_ >= 0)
 956         {
 957           matchFrom_ = thePattern.mfrom;
 958           marks = pt.marks;
 959           gFlagto = matchFrom_ + charsMatched_;
 960           gFlags = s;
 961           return didMatch_ = true;
 962         }
 963       }
 964     }
 965     else
 966     {
 967       pt.no_check = true;
 968       for (int i = start; i <= up; i++)
 969       {
 970         i = skipper.find(src, i, up);
 971         if (i < 0)
 972         {
 973           charsMatched_ = matchFrom_ = -1;
 974           return didMatch_ = false;
 975         }
 976         charsMatched_ = thePattern.matchAt(s, i, pt);
 977         if (charsMatched_ >= 0)
 978         {
 979           matchFrom_ = thePattern.mfrom;
 980           marks = pt.marks;
 981           gFlagto = matchFrom_ + charsMatched_;
 982           gFlags = s;
 983           return didMatch_ = true;
 984         }
 985       }
 986     }
 987     return didMatch_ = false;
 988   }
 989
 990   /*
 991    * final boolean _search(LongStringLike s,long start,long end) { if(gFlag &&
 992    * gFlagto > 0 && s==gFlags) start = gFlagto; gFlags = null;
 993    *
 994    * Pthings pt=prep(s);
 995    *
 996    * int up = end;//(minMatch == null ? end : end-minMatch.i);
 997    *
 998    * if(up < start && end >= start) up = start;
 999    *
1000    * if(skipper == null) { for(long i=start;i<=up;i++) { charsMatched_ =
1001    * thePattern.matchAt(s,i,pt); if(charsMatched_ >= 0) { matchFrom_ =
1002    * thePattern.mfrom; marks = pt.marks; gFlagto = matchFrom_+charsMatched_;
1003    * return didMatch_=true; } } } else { pt.no_check = true; for(long
1004    * i=start;i<=up;i++) { i = skipper.find(src,i,up); if(i<0) { charsMatched_ =
1005    * matchFrom_ = -1; return didMatch_ = false; } charsMatched_ =
1006    * thePattern.matchAt(s,i,pt); if(charsMatched_ >= 0) { matchFrom_ =
1007    * thePattern.mfrom; marks = pt.marks; gFlagto = matchFrom_+charsMatched_;
1008    * gFlags = s; return didMatch_=true; } else { i = s.adjustIndex(i); up =
1009    * s.adjustEnd(i); } } } return didMatch_=false; }
1010    */
1011
1012   boolean _reverseSearch(String s, int start, int end)
1013   {
1014     return _reverseSearch(new StringWrap(s), start, end);
1015   }
1016
1017   boolean _reverseSearch(StringLike s, int start, int end)
1018   {
1019     if (gFlag && gFlagto > 0 && s.unwrap() == gFlags.unwrap())
1020     {
1021       end = gFlagto;
1022     }
1023     gFlags = null;
1024     Pthings pt = prep(s);
1025     for (int i = end; i >= start; i--)
1026     {
1027       charsMatched_ = thePattern.matchAt(s, i, pt);
1028       if (charsMatched_ >= 0)
1029       {
1030         matchFrom_ = thePattern.mfrom;
1031         marks = pt.marks;
1032         gFlagto = matchFrom_ - 1;
1033         gFlags = s;
1034         return didMatch_ = true;
1035       }
1036     }
1037     return didMatch_ = false;
1038   }
1039
1040   // This routine sets the cbits variable
1041   // of class Pattern. Cbits is true for
1042   // the bit corresponding to a character inside
1043   // a set of quotes.
1044   static StringLike lasts = null;
1045
1046   static BitSet lastbs = null;
1047
1048   static void setCbits(StringLike s, Pthings pt)
1049   {
1050     if (s == lasts)
1051     {
1052       pt.cbits = lastbs;
1053       return;
1054     }
1055     BitSet bs = new BitSet(s.length());
1056     char qc = ' ';
1057     boolean setBit = false;
1058     for (int i = 0; i < s.length(); i++)
1059     {
1060       if (setBit)
1061       {
1062         bs.set(i);
1063       }
1064       char c = s.charAt(i);
1065       if (!setBit && c == '"')
1066       {
1067         qc = c;
1068         setBit = true;
1069         bs.set(i);
1070       }
1071       else if (!setBit && c == '\'')
1072       {
1073         qc = c;
1074         setBit = true;
1075         bs.set(i);
1076       }
1077       else if (setBit && c == qc)
1078       {
1079         setBit = false;
1080       }
1081       else if (setBit && c == '\\' && i + 1 < s.length())
1082       {
1083         i++;
1084         if (setBit)
1085         {
1086           bs.set(i);
1087         }
1088       }
1089     }
1090     pt.cbits = lastbs = bs;
1091     lasts = s;
1092   }
1093
1094   // Wanted user to over-ride this in alpha version,
1095   // but it wasn't really necessary because of this trick:
1096   Regex newRegex()
1097   {
1098     try
1099     {
1100       return (Regex) getClass().newInstance();
1101     } catch (InstantiationException ie)
1102     {
1103       return null;
1104     } catch (IllegalAccessException iae)
1105     {
1106       return null;
1107     }
1108   }
1109
1110   /**
1111    * Only needed for creating your own extensions of Regex. This method adds the
1112    * next Pattern in the chain of patterns or sets the Pattern if it is the
1113    * first call.
1114    */
1115   protected void add(Pattern p2)
1116   {
1117     if (p == null)
1118     {
1119       p = p2;
1120     }
1121     else
1122     {
1123       p.add(p2);
1124       p2 = p;
1125     }
1126   }
1127
1128   /**
1129    * You only need to use this method if you are creating your own extentions to
1130    * Regex. compile1 compiles one Pattern element, it can be over-ridden to
1131    * allow the Regex compiler to understand new syntax. See deriv.java for an
1132    * example. This routine is the heart of class Regex. Rthings has one integer
1133    * member called intValue, it is used to keep track of the number of ()'s in
1134    * the Pattern.
1135    *
1136    * @exception com.stevesoft.pat.RegSyntax
1137    *              is thrown when a nonsensensical pattern is supplied. For
1138    *              example, a pattern beginning with *.
1139    */
1140   protected void compile1(StrPos sp, Rthings mk) throws RegSyntax
1141   {
1142     if (sp.match('['))
1143     {
1144       sp.inc();
1145       add(matchBracket(sp));
1146     }
1147     else if (sp.match('|'))
1148     {
1149       if (or == null)
1150       {
1151         or = new Or();
1152       }
1153       if (p == null)
1154       {
1155         p = new NullPattern();
1156       }
1157       or.addOr(p);
1158       p = null;
1159     }
1160     else if (sp.incMatch("(?<"))
1161     {
1162       patInt i = sp.getPatInt();
1163       if (i == null)
1164       {
1165         RegSyntaxError.endItAll("No int after (?<");
1166       }
1167       add(new Backup(i.intValue()));
1168       if (!sp.match(')'))
1169       {
1170         RegSyntaxError.endItAll("No ) after (?<");
1171       }
1172     }
1173     else if (sp.incMatch("(?>"))
1174     {
1175       patInt i = sp.getPatInt();
1176       if (i == null)
1177       {
1178         RegSyntaxError.endItAll("No int after (?>");
1179       }
1180       add(new Backup(-i.intValue()));
1181       if (!sp.match(')'))
1182       {
1183         RegSyntaxError.endItAll("No ) after (?<");
1184       }
1185     }
1186     else if (sp.incMatch("(?@"))
1187     {
1188       char op = sp.c;
1189       sp.inc();
1190       char cl = sp.c;
1191       sp.inc();
1192       if (!sp.match(')'))
1193       {
1194         RegSyntaxError.endItAll("(?@ does not have closing paren");
1195       }
1196       add(new Group(op, cl));
1197     }
1198     else if (sp.incMatch("(?#"))
1199     {
1200       while (!sp.match(')'))
1201       {
1202         sp.inc();
1203       }
1204     }
1205     else if (sp.dontMatch && sp.c == 'w')
1206     {
1207       // Regex r = new Regex();
1208       // r._compile("[a-zA-Z0-9_]",mk);
1209       // add(new Goop("\\w",r.thePattern));
1210       Bracket b = new Bracket(false);
1211       b.addOr(new Range('a', 'z'));
1212       b.addOr(new Range('A', 'Z'));
1213       b.addOr(new Range('0', '9'));
1214       b.addOr(new oneChar('_'));
1215       add(b);
1216     }
1217     else if (sp.dontMatch && sp.c == 'G')
1218     {
1219       add(new BackG());
1220     }
1221     else if (sp.dontMatch && sp.c == 's')
1222     {
1223       // Regex r = new Regex();
1224       // r._compile("[ \t\n\r\b]",mk);
1225       // add(new Goop("\\s",r.thePattern));
1226       Bracket b = new Bracket(false);
1227       b.addOr(new oneChar((char) 32));
1228       b.addOr(new Range((char) 8, (char) 10));
1229       b.addOr(new oneChar((char) 13));
1230       add(b);
1231     }
1232     else if (sp.dontMatch && sp.c == 'd')
1233     {
1234       // Regex r = new Regex();
1235       // r._compile("[0-9]",mk);
1236       // add(new Goop("\\d",r.thePattern));
1237       Range digit = new Range('0', '9');
1238       digit.printBrackets = true;
1239       add(digit);
1240     }
1241     else if (sp.dontMatch && sp.c == 'W')
1242     {
1243       // Regex r = new Regex();
1244       // r._compile("[^a-zA-Z0-9_]",mk);
1245       // add(new Goop("\\W",r.thePattern));
1246       Bracket b = new Bracket(true);
1247       b.addOr(new Range('a', 'z'));
1248       b.addOr(new Range('A', 'Z'));
1249       b.addOr(new Range('0', '9'));
1250       b.addOr(new oneChar('_'));
1251       add(b);
1252     }
1253     else if (sp.dontMatch && sp.c == 'S')
1254     {
1255       // Regex r = new Regex();
1256       // r._compile("[^ \t\n\r\b]",mk);
1257       // add(new Goop("\\S",r.thePattern));
1258       Bracket b = new Bracket(true);
1259       b.addOr(new oneChar((char) 32));
1260       b.addOr(new Range((char) 8, (char) 10));
1261       b.addOr(new oneChar((char) 13));
1262       add(b);
1263     }
1264     else if (sp.dontMatch && sp.c == 'D')
1265     {
1266       // Regex r = new Regex();
1267       // r._compile("[^0-9]",mk);
1268       // add(new Goop("\\D",r.thePattern));
1269       Bracket b = new Bracket(true);
1270       b.addOr(new Range('0', '9'));
1271       add(b);
1272     }
1273     else if (sp.dontMatch && sp.c == 'B')
1274     {
1275       Regex r = new Regex();
1276       r._compile("(?!" + back_slash + "b)", mk);
1277       add(r.thePattern);
1278     }
1279     else if (isOctalString(sp))
1280     {
1281       int d = sp.c - '0';
1282       sp.inc();
1283       d = 8 * d + sp.c - '0';
1284       StrPos sp2 = new StrPos(sp);
1285       sp2.inc();
1286       if (isOctalDigit(sp2, false))
1287       {
1288         sp.inc();
1289         d = 8 * d + sp.c - '0';
1290       }
1291       add(new oneChar((char) d));
1292     }
1293     else if (sp.dontMatch && sp.c >= '1' && sp.c <= '9')
1294     {
1295       int iv = sp.c - '0';
1296       StrPos s2 = new StrPos(sp);
1297       s2.inc();
1298       if (!s2.dontMatch && s2.c >= '0' && s2.c <= '9')
1299       {
1300         iv = 10 * iv + (s2.c - '0');
1301         sp.inc();
1302       }
1303       add(new BackMatch(iv));
1304     }
1305     else if (sp.dontMatch && sp.c == 'b')
1306     {
1307       add(new Boundary());
1308     }
1309     else if (sp.match('\b'))
1310     {
1311       add(new Boundary());
1312     }
1313     else if (sp.match('$'))
1314     {
1315       add(new End(true));
1316     }
1317     else if (sp.dontMatch && sp.c == 'Z')
1318     {
1319       add(new End(false));
1320     }
1321     else if (sp.match('.'))
1322     {
1323       add(new Any());
1324     }
1325     else if (sp.incMatch("(??"))
1326     {
1327       StringBuffer sb = new StringBuffer();
1328       StringBuffer sb2 = new StringBuffer();
1329       while (!sp.match(')') && !sp.match(':'))
1330       {
1331         sb.append(sp.c);
1332         sp.inc();
1333       }
1334       if (sp.incMatch(":"))
1335       {
1336         while (!sp.match(')'))
1337         {
1338           sb2.append(sp.c);
1339           sp.inc();
1340         }
1341       }
1342       String sbs = sb.toString();
1343       if (validators.get(sbs) instanceof String)
1344       {
1345         String pat = (String) validators.get(sbs);
1346         Regex r = newRegex();
1347         Rthings rth = new Rthings(this);
1348         rth.noBackRefs = true;
1349         r._compile(pat, rth);
1350         add(r.thePattern);
1351       }
1352       else
1353       {
1354         Custom cm = new Custom(sb.toString());
1355         if (cm.v != null)
1356         {
1357           Validator v2 = cm.v.arg(sb2.toString());
1358           if (v2 != null)
1359           {
1360             v2.argsave = sb2.toString();
1361             String p = cm.v.pattern;
1362             cm.v = v2;
1363             v2.pattern = p;
1364           }
1365           Regex r = newRegex();
1366           Rthings rth = new Rthings(this);
1367           rth.noBackRefs = true;
1368           r._compile(cm.v.pattern, rth);
1369           cm.sub = r.thePattern;
1370           cm.sub.add(new CustomEndpoint(cm));
1371           cm.sub.setParent(cm);
1372           add(cm);
1373         }
1374       }
1375     }
1376     else if (sp.match('('))
1377     {
1378       mk.parenLevel++;
1379       Regex r = newRegex();
1380       // r.or = new Or();
1381       sp.inc();
1382       if (sp.incMatch("?:"))
1383       {
1384         r.or = new Or();
1385       }
1386       else if (sp.incMatch("?="))
1387       {
1388         r.or = new lookAhead(false);
1389       }
1390       else if (sp.incMatch("?!"))
1391       {
1392         r.or = new lookAhead(true);
1393       }
1394       else if (sp.match('?'))
1395       {
1396         sp.inc();
1397         do
1398         {
1399           if (sp.c == 'i')
1400           {
1401             mk.ignoreCase = true;
1402           }
1403           if (sp.c == 'Q')
1404           {
1405             mk.dontMatchInQuotes = true;
1406           }
1407           if (sp.c == 'o')
1408           {
1409             mk.optimizeMe = true;
1410           }
1411           if (sp.c == 'g')
1412           {
1413             mk.gFlag = true;
1414           }
1415           if (sp.c == 's')
1416           {
1417             mk.sFlag = true;
1418           }
1419           if (sp.c == 'm')
1420           {
1421             mk.mFlag = true;
1422           }
1423           sp.inc();
1424         } while (!sp.match(')') && !sp.eos);
1425         r = null;
1426         mk.parenLevel--;
1427         if (sp.eos) // throw new RegSyntax
1428         {
1429           RegSyntaxError.endItAll("Unclosed ()");
1430         }
1431       }
1432       else
1433       { // just ordinary parenthesis
1434         r.or = mk.noBackRefs ? new Or() : new OrMark(mk.val++);
1435       }
1436       if (r != null)
1437       {
1438         add(r._compile(sp, mk));
1439       }
1440     }
1441     else if (sp.match('^'))
1442     {
1443       add(new Start(true));
1444     }
1445     else if (sp.dontMatch && sp.c == 'A')
1446     {
1447       add(new Start(false));
1448     }
1449     else if (sp.match('*'))
1450     {
1451       addMulti(new patInt(0), new patInf());
1452     }
1453     else if (sp.match('+'))
1454     {
1455       addMulti(new patInt(1), new patInf());
1456     }
1457     else if (sp.match('?'))
1458     {
1459       addMulti(new patInt(0), new patInt(1));
1460     }
1461     else if (sp.match('{'))
1462     {
1463       boolean bad = false;
1464       StrPos sp2 = new StrPos(sp);
1465       // StringBuffer sb = new StringBuffer();
1466       sp.inc();
1467       patInt i1 = sp.getPatInt();
1468       patInt i2 = null;
1469       if (sp.match('}'))
1470       {
1471         i2 = i1;
1472       }
1473       else
1474       {
1475         if (!sp.match(','))
1476         {
1477           /*
1478            * RegSyntaxError.endItAll( "String \"{"+i2+ "\" should be followed
1479            * with , or }");
1480            */
1481           bad = true;
1482         }
1483         sp.inc();
1484         if (sp.match('}'))
1485         {
1486           i2 = new patInf();
1487         }
1488         else
1489         {
1490           i2 = sp.getPatInt();
1491         }
1492       }
1493       if (i1 == null || i2 == null)
1494       {
1495         /*
1496          * throw new RegSyntax("Badly formatted Multi: " +"{"+i1+","+i2+"}");
1497          */
1498         bad = true;
1499       }
1500       if (bad)
1501       {
1502         sp.dup(sp2);
1503         add(new oneChar(sp.c));
1504       }
1505       else
1506       {
1507         addMulti(i1, i2);
1508       }
1509     }
1510     else if (sp.escMatch('x') && next2Hex(sp))
1511     {
1512       sp.inc();
1513       int d = getHexDigit(sp);
1514       sp.inc();
1515       d = 16 * d + getHexDigit(sp);
1516       add(new oneChar((char) d));
1517     }
1518     else if (sp.escMatch('c'))
1519     {
1520       sp.inc();
1521       if (sp.c < Ctrl.cmap.length)
1522       {
1523         add(new oneChar(Ctrl.cmap[sp.c]));
1524       }
1525       else
1526       {
1527         add(new oneChar(sp.c));
1528       }
1529     }
1530     else if (sp.escMatch('f'))
1531     {
1532       add(new oneChar((char) 12));
1533     }
1534     else if (sp.escMatch('a'))
1535     {
1536       add(new oneChar((char) 7));
1537     }
1538     else if (sp.escMatch('t'))
1539     {
1540       add(new oneChar('\t'));
1541     }
1542     else if (sp.escMatch('n'))
1543     {
1544       add(new oneChar('\n'));
1545     }
1546     else if (sp.escMatch('r'))
1547     {
1548       add(new oneChar('\r'));
1549     }
1550     else if (sp.escMatch('b'))
1551     {
1552       add(new oneChar('\b'));
1553     }
1554     else if (sp.escMatch('e'))
1555     {
1556       add(new oneChar((char) 27));
1557     }
1558     else
1559     {
1560       add(new oneChar(sp.c));
1561       if (sp.match(')'))
1562       {
1563         RegSyntaxError.endItAll("Unmatched right paren in pattern");
1564       }
1565     }
1566   }
1567
1568   // compiles all Pattern elements, internal method
1569   private Pattern _compile(String pat, Rthings mk) throws RegSyntax
1570   {
1571     minMatch = null;
1572     sFlag = mFlag = ignoreCase = gFlag = false;
1573     StrPos sp = new StrPos(pat, 0);
1574     thePattern = _compile(sp, mk);
1575     pt.marks = null;
1576     return thePattern;
1577   }
1578
1579   Pattern p = null;
1580
1581   Or or = null;
1582
1583   Pattern _compile(StrPos sp, Rthings mk) throws RegSyntax
1584   {
1585     while (!(sp.eos || (or != null && sp.match(')'))))
1586     {
1587       compile1(sp, mk);
1588       sp.inc();
1589     }
1590     if (sp.match(')'))
1591     {
1592       mk.parenLevel--;
1593     }
1594     else if (sp.eos && mk.parenLevel != 0)
1595     {
1596       RegSyntaxError.endItAll("Unclosed Parenthesis! lvl=" + mk.parenLevel);
1597     }
1598     if (or != null)
1599     {
1600       if (p == null)
1601       {
1602         p = new NullPattern();
1603       }
1604       or.addOr(p);
1605       return or;
1606     }
1607     return p == null ? new NullPattern() : p;
1608   }
1609
1610   // add a multi object to the end of the chain
1611   // which applies to the last object
1612   void addMulti(patInt i1, patInt i2) throws RegSyntax
1613   {
1614     Pattern last, last2;
1615     for (last = p; last != null && last.next != null; last = last.next)
1616     {
1617       ;
1618     }
1619     if (last == null || last == p)
1620     {
1621       last2 = null;
1622     }
1623     else
1624     {
1625       for (last2 = p; last2.next != last; last2 = last2.next)
1626       {
1627         ;
1628       }
1629     }
1630     if (last instanceof Multi && i1.intValue() == 0 && i2.intValue() == 1)
1631     {
1632       ((Multi) last).matchFewest = true;
1633     }
1634     else if (last instanceof FastMulti && i1.intValue() == 0
1635             && i2.intValue() == 1)
1636     {
1637       ((FastMulti) last).matchFewest = true;
1638     }
1639     else if (last instanceof DotMulti && i1.intValue() == 0
1640             && i2.intValue() == 1)
1641     {
1642       ((DotMulti) last).matchFewest = true;
1643     }
1644     else if (last instanceof Multi || last instanceof DotMulti
1645             || last instanceof FastMulti)
1646     {
1647       throw new RegSyntax("Syntax error.");
1648     }
1649     else if (last2 == null)
1650     {
1651       p = mkMulti(i1, i2, p);
1652     }
1653     else
1654     {
1655       last2.next = mkMulti(i1, i2, last);
1656     }
1657   }
1658
1659   final static Pattern mkMulti(patInt lo, patInt hi, Pattern p)
1660           throws RegSyntax
1661   {
1662     if (p instanceof Any && p.next == null)
1663     {
1664       return (Pattern) new DotMulti(lo, hi);
1665     }
1666     return RegOpt.safe4fm(p) ? (Pattern) new FastMulti(lo, hi, p)
1667             : (Pattern) new Multi(lo, hi, p);
1668   }
1669
1670   // process the bracket operator
1671   Pattern matchBracket(StrPos sp) throws RegSyntax
1672   {
1673     Bracket ret;
1674     if (sp.match('^'))
1675     {
1676       ret = new Bracket(true);
1677       sp.inc();
1678     }
1679     else
1680     {
1681       ret = new Bracket(false);
1682     }
1683     if (sp.match(']'))
1684     {
1685       // throw new RegSyntax
1686       RegSyntaxError.endItAll("Unmatched []");
1687     }
1688
1689     while (!sp.eos && !sp.match(']'))
1690     {
1691       StrPos s1 = new StrPos(sp);
1692       s1.inc();
1693       StrPos s1_ = new StrPos(s1);
1694       s1_.inc();
1695       if (s1.match('-') && !s1_.match(']'))
1696       {
1697         StrPos s2 = new StrPos(s1);
1698         s2.inc();
1699         if (!s2.eos)
1700         {
1701           ret.addOr(new Range(sp.c, s2.c));
1702         }
1703         sp.inc();
1704         sp.inc();
1705       }
1706       else if (sp.escMatch('Q'))
1707       {
1708         sp.inc();
1709         while (!sp.escMatch('E'))
1710         {
1711           ret.addOr(new oneChar(sp.c));
1712           sp.inc();
1713         }
1714       }
1715       else if (sp.escMatch('d'))
1716       {
1717         ret.addOr(new Range('0', '9'));
1718       }
1719       else if (sp.escMatch('s'))
1720       {
1721         ret.addOr(new oneChar((char) 32));
1722         ret.addOr(new Range((char) 8, (char) 10));
1723         ret.addOr(new oneChar((char) 13));
1724       }
1725       else if (sp.escMatch('w'))
1726       {
1727         ret.addOr(new Range('a', 'z'));
1728         ret.addOr(new Range('A', 'Z'));
1729         ret.addOr(new Range('0', '9'));
1730         ret.addOr(new oneChar('_'));
1731       }
1732       else if (sp.escMatch('D'))
1733       {
1734         ret.addOr(new Range((char) 0, (char) 47));
1735         ret.addOr(new Range((char) 58, (char) 65535));
1736       }
1737       else if (sp.escMatch('S'))
1738       {
1739         ret.addOr(new Range((char) 0, (char) 7));
1740         ret.addOr(new Range((char) 11, (char) 12));
1741         ret.addOr(new Range((char) 14, (char) 31));
1742         ret.addOr(new Range((char) 33, (char) 65535));
1743       }
1744       else if (sp.escMatch('W'))
1745       {
1746         ret.addOr(new Range((char) 0, (char) 64));
1747         ret.addOr(new Range((char) 91, (char) 94));
1748         ret.addOr(new oneChar((char) 96));
1749         ret.addOr(new Range((char) 123, (char) 65535));
1750       }
1751       else if (sp.escMatch('x') && next2Hex(sp))
1752       {
1753         sp.inc();
1754         int d = getHexDigit(sp);
1755         sp.inc();
1756         d = 16 * d + getHexDigit(sp);
1757         ret.addOr(new oneChar((char) d));
1758       }
1759       else if (sp.escMatch('a'))
1760       {
1761         ret.addOr(new oneChar((char) 7));
1762       }
1763       else if (sp.escMatch('f'))
1764       {
1765         ret.addOr(new oneChar((char) 12));
1766       }
1767       else if (sp.escMatch('e'))
1768       {
1769         ret.addOr(new oneChar((char) 27));
1770       }
1771       else if (sp.escMatch('n'))
1772       {
1773         ret.addOr(new oneChar('\n'));
1774       }
1775       else if (sp.escMatch('t'))
1776       {
1777         ret.addOr(new oneChar('\t'));
1778       }
1779       else if (sp.escMatch('r'))
1780       {
1781         ret.addOr(new oneChar('\r'));
1782       }
1783       else if (sp.escMatch('c'))
1784       {
1785         sp.inc();
1786         if (sp.c < Ctrl.cmap.length)
1787         {
1788           ret.addOr(new oneChar(Ctrl.cmap[sp.c]));
1789         }
1790         else
1791         {
1792           ret.addOr(new oneChar(sp.c));
1793         }
1794       }
1795       else if (isOctalString(sp))
1796       {
1797         int d = sp.c - '0';
1798         sp.inc();
1799         d = 8 * d + sp.c - '0';
1800         StrPos sp2 = new StrPos(sp);
1801         sp2.inc();
1802         if (isOctalDigit(sp2, false))
1803         {
1804           sp.inc();
1805           d = 8 * d + sp.c - '0';
1806         }
1807         ret.addOr(new oneChar((char) d));
1808       }
1809       else
1810       {
1811         ret.addOr(new oneChar(sp.c));
1812       }
1813       sp.inc();
1814     }
1815     return ret;
1816   }
1817
1818   /**
1819    * Converts the stored Pattern to a String -- this is a decompile. Note that
1820    * \t and \n will really print out here, Not just the two character
1821    * representations. Also be prepared to see some strange output if your
1822    * characters are not printable.
1823    */
1824   public String toString()
1825   {
1826     if (false && thePattern == null)
1827     {
1828       return "";
1829     }
1830     else
1831     {
1832       StringBuffer sb = new StringBuffer();
1833       if (esc != Pattern.ESC)
1834       {
1835         sb.append("(?e=");
1836         sb.append(esc);
1837         sb.append(")");
1838       }
1839       if (gFlag || mFlag || !dotDoesntMatchCR || sFlag || ignoreCase
1840               || dontMatchInQuotes || optimized())
1841       {
1842         sb.append("(?");
1843         if (ignoreCase)
1844         {
1845           sb.append("i");
1846         }
1847         if (mFlag)
1848         {
1849           sb.append("m");
1850         }
1851         if (sFlag || !dotDoesntMatchCR)
1852         {
1853           sb.append("s");
1854         }
1855         if (dontMatchInQuotes)
1856         {
1857           sb.append("Q");
1858         }
1859         if (optimized())
1860         {
1861           sb.append("o");
1862         }
1863         if (gFlag)
1864         {
1865           sb.append("g");
1866         }
1867         sb.append(")");
1868       }
1869       String patstr = thePattern.toString();
1870       if (esc != Pattern.ESC)
1871       {
1872         patstr = reEscape(patstr, Pattern.ESC, esc);
1873       }
1874       sb.append(patstr);
1875       return sb.toString();
1876     }
1877   }
1878
1879   // Re-escape Pattern, allows us to use a different escape
1880   // character.
1881   static String reEscape(String s, char oldEsc, char newEsc)
1882   {
1883     if (oldEsc == newEsc)
1884     {
1885       return s;
1886     }
1887     int i;
1888     StringBuffer sb = new StringBuffer();
1889     for (i = 0; i < s.length(); i++)
1890     {
1891       if (s.charAt(i) == oldEsc && i + 1 < s.length())
1892       {
1893         if (s.charAt(i + 1) == oldEsc)
1894         {
1895           sb.append(oldEsc);
1896         }
1897         else
1898         {
1899           sb.append(newEsc);
1900           sb.append(s.charAt(i + 1));
1901         }
1902         i++;
1903       }
1904       else if (s.charAt(i) == newEsc)
1905       {
1906         sb.append(newEsc);
1907         sb.append(newEsc);
1908       }
1909       else
1910       {
1911         sb.append(s.charAt(i));
1912       }
1913     }
1914     return sb.toString();
1915   }
1916
1917   /**
1918    * This method implements FilenameFilter, allowing one to use a Regex to
1919    * search through a directory using File.list. There is a FileRegex now that
1920    * does this better.
1921    *
1922    * @see com.stevesoft.pat.FileRegex
1923    */
1924   public boolean accept(File dir, String s)
1925   {
1926     return search(s);
1927   }
1928
1929   /** The version of this package */
1930   final static public String version()
1931   {
1932     return "lgpl release 1.5.3";
1933   }
1934
1935   /**
1936    * Once this method is called, the state of variables ignoreCase and
1937    * dontMatchInQuotes should not be changed as the results will be
1938    * unpredictable. However, search and matchAt will run more quickly. Note that
1939    * you can check to see if the pattern has been optimized by calling the
1940    * optimized() method.
1941    * <p>
1942    * This method will attempt to rewrite your pattern in a way that makes it
1943    * faster (not all patterns execute at the same speed). In general,
1944    * "(?: ... )" will be faster than "( ... )" so if you don't need the
1945    * backreference, you should group using the former pattern.
1946    * <p>
1947    * It will also introduce new pattern elements that you can't get to
1948    * otherwise, for example if you have a large table of strings, i.e. the
1949    * months of the year "(January|February|...)" optimize() will make a
1950    * Hashtable that takes it to the next appropriate pattern element --
1951    * eliminating the need for a linear search.
1952    *
1953    * @see com.stevesoft.pat.Regex#optimized
1954    * @see com.stevesoft.pat.Regex#ignoreCase
1955    * @see com.stevesoft.pat.Regex#dontMatchInQuotes
1956    * @see com.stevesoft.pat.Regex#matchAt
1957    * @see com.stevesoft.pat.Regex#search
1958    */
1959   public void optimize()
1960   {
1961     if (optimized() || thePattern == null)
1962     {
1963       return;
1964     }
1965     minMatch = new patInt(0); // thePattern.countMinChars();
1966     thePattern = RegOpt.opt(thePattern, ignoreCase, dontMatchInQuotes);
1967     skipper = Skip.findSkip(this);
1968     // RegOpt.setParents(this);
1969     return;
1970   }
1971
1972   Skip skipper;
1973
1974   /**
1975    * This function returns true if the optimize method has been called.
1976    */
1977   public boolean optimized()
1978   {
1979     return minMatch != null;
1980   }
1981
1982   /**
1983    * A bit of syntactic surgar for those who want to make their code look more
1984    * perl-like. To use this initialize your Regex object by saying:
1985    *
1986    * <pre>
1987    *       Regex r1 = Regex.perlCode(&quot;s/hello/goodbye/&quot;);
1988    *       Regex r2 = Regex.perlCode(&quot;s'fish'frog'i&quot;);
1989    *       Regex r3 = Regex.perlCode(&quot;m'hello');
1990    * </pre>
1991    *
1992    * The i for ignoreCase is supported in this syntax, as well as m, s, and x.
1993    * The g flat is a bit of a special case.
1994    * <p>
1995    * If you wish to replace all occurences of a pattern, you do not put a 'g' in
1996    * the perlCode, but call Regex's replaceAll method.
1997    * <p>
1998    * If you wish to simply and only do a search for r2's pattern, you can do
1999    * this by calling the searchFrom method method repeatedly, or by calling
2000    * search repeatedly if the g flag is set.
2001    * <p>
2002    * Note: Currently perlCode does <em>not</em> support the (?e=#) syntax for
2003    * changing the escape character.
2004    */
2005
2006   public static Regex perlCode(String s)
2007   {
2008     // this file is big enough, see parsePerl.java
2009     // for this function.
2010     return parsePerl.parse(s);
2011   }
2012
2013   static final char back_slash = '\\';
2014
2015   /**
2016    * Checks to see if there are only literal and no special pattern elements in
2017    * this Regex.
2018    */
2019   public boolean isLiteral()
2020   {
2021     Pattern x = thePattern;
2022     while (x != null)
2023     {
2024       if (x instanceof oneChar)
2025       {
2026         ;
2027       }
2028       else if (x instanceof Skipped)
2029       {
2030         ;
2031       }
2032       else
2033       {
2034         return false;
2035       }
2036       x = x.next;
2037     }
2038     return true;
2039   }
2040
2041   /**
2042    * You only need to know about this if you are inventing your own pattern
2043    * elements.
2044    */
2045   public patInt countMinChars()
2046   {
2047     return thePattern.countMinChars();
2048   }
2049
2050   /**
2051    * You only need to know about this if you are inventing your own pattern
2052    * elements.
2053    */
2054   public patInt countMaxChars()
2055   {
2056     return thePattern.countMaxChars();
2057   }
2058
2059   boolean isHexDigit(StrPos sp)
2060   {
2061     boolean r = !sp.eos
2062             && !sp.dontMatch
2063             && ((sp.c >= '0' && sp.c <= '9')
2064                     || (sp.c >= 'a' && sp.c <= 'f') || (sp.c >= 'A' && sp.c <= 'F'));
2065     return r;
2066   }
2067
2068   boolean isOctalDigit(StrPos sp, boolean first)
2069   {
2070     boolean r = !sp.eos && !(first ^ sp.dontMatch) && sp.c >= '0'
2071             && sp.c <= '7';
2072     return r;
2073   }
2074
2075   int getHexDigit(StrPos sp)
2076   {
2077     if (sp.c >= '0' && sp.c <= '9')
2078     {
2079       return sp.c - '0';
2080     }
2081     if (sp.c >= 'a' && sp.c <= 'f')
2082     {
2083       return sp.c - 'a' + 10;
2084     }
2085     return sp.c - 'A' + 10;
2086   }
2087
2088   boolean next2Hex(StrPos sp)
2089   {
2090     StrPos sp2 = new StrPos(sp);
2091     sp2.inc();
2092     if (!isHexDigit(sp2))
2093     {
2094       return false;
2095     }
2096     sp2.inc();
2097     if (!isHexDigit(sp2))
2098     {
2099       return false;
2100     }
2101     return true;
2102   }
2103
2104   boolean isOctalString(StrPos sp)
2105   {
2106     if (!isOctalDigit(sp, true))
2107     {
2108       return false;
2109     }
2110     StrPos sp2 = new StrPos(sp);
2111     sp2.inc();
2112     if (!isOctalDigit(sp2, false))
2113     {
2114       return false;
2115     }
2116     return true;
2117   }
2118 }