2 // This software is now distributed according to
\r
3 // the Lesser Gnu Public License. Please see
\r
4 // http://www.gnu.org/copyleft/lesser.txt for
\r
6 // -- Happy Computing!
\r
8 package com.stevesoft.pat;
\r
13 Shareware: package pat
\r
14 <a href="copyright.html">Copyright 2001, Steven R. Brandt</a>
\r
17 * The RegexTokenizer is similar to the StringTokenizer class provided with
\r
18 * java, but allows one to tokenize using regular expressions, rather than a
\r
19 * simple list of characters. Tokens are any strings between the supplied
\r
20 * regular expression, as well as any backreferences (things in parenthesis)
\r
21 * contained within the regular expression.
\r
23 public class RegexTokenizer implements Enumeration
\r
31 Vector v = new Vector();
\r
33 Vector vi = new Vector();
\r
41 String s = r.right();
\r
42 if (r.searchFrom(toParse, pos))
\r
44 v.addElement(r.left().substring(pos));
\r
45 vi.addElement(new Integer(r.matchFrom() + r.charsMatched()));
\r
46 for (int i = 0; i < r.numSubs(); i++)
\r
48 if (r.substring() != null)
\r
50 v.addElement(r.substring(i + offset));
\r
51 vi.addElement(new Integer(r.matchFrom(i + offset)
\r
52 + r.charsMatched(i + offset)));
\r
55 pos = r.matchFrom() + r.charsMatched();
\r
63 /** Initialize the tokenizer with a string of text and a pattern */
\r
64 public RegexTokenizer(String txt, String ptrn)
\r
67 r = new Regex(ptrn);
\r
68 offset = Regex.BackRefOffset;
\r
72 /** Initialize the tokenizer with a Regex object. */
\r
73 public RegexTokenizer(String txt, Regex r)
\r
77 offset = Regex.BackRefOffset;
\r
82 * This should always be cast to a String, as in StringTokenizer, and as in
\r
83 * StringTokenizer one can do this by calling nextString().
\r
85 public Object nextElement()
\r
87 if (count >= v.size())
\r
91 return v.elementAt(count++);
\r
94 /** This is the equivalent (String)nextElement(). */
\r
95 public String nextToken()
\r
97 return (String) nextElement();
\r
101 * This asks for the next token, and changes the pattern being used at the
\r
104 public String nextToken(String newpat)
\r
109 } catch (RegSyntax r_)
\r
112 return nextToken(r);
\r
116 * This asks for the next token, and changes the pattern being used at the
\r
119 public String nextToken(Regex nr)
\r
122 if (vi.size() > count)
\r
124 pos = ((Integer) vi.elementAt(count)).intValue();
\r
129 return nextToken();
\r
132 /** Tells whether there are more tokens in the pattern. */
\r
133 public boolean hasMoreElements()
\r
135 if (count >= v.size())
\r
139 return count < v.size();
\r
143 * Tells whether there are more tokens in the pattern, but in the fashion of
\r
146 public boolean hasMoreTokens()
\r
148 return hasMoreElements();
\r
151 /** Determines the # of remaining tokens */
\r
152 public int countTokens()
\r
154 int _count = count;
\r
155 while (hasMoreTokens())
\r
160 return v.size() - count;
\r
163 /** Returns all tokens in the String */
\r
164 public String[] allTokens()
\r
167 String[] ret = new String[v.size()];
\r