2 // This software is now distributed according to
\r
3 // the Lesser Gnu Public License. Please see
\r
4 // http://www.gnu.org/copyleft/lesser.txt for
\r
6 // -- Happy Computing!
\r
8 package com.stevesoft.pat;
\r
13 Shareware: package pat
\r
14 <a href="copyright.html">Copyright 2001, Steven R. Brandt</a>
\r
17 The RegexTokenizer is similar to the StringTokenizer class
\r
18 provided with java, but allows one to tokenize using
\r
19 regular expressions, rather than a simple list of characters.
\r
20 Tokens are any strings between the supplied regular expression,
\r
21 as well as any backreferences (things in parenthesis)
\r
22 contained within the regular expression. */
\r
23 public class RegexTokenizer
\r
24 implements Enumeration
\r
29 Vector v = new Vector();
\r
30 Vector vi = new Vector();
\r
36 String s = r.right();
\r
37 if (r.searchFrom(toParse, pos))
\r
39 v.addElement(r.left().substring(pos));
\r
40 vi.addElement(new Integer(r.matchFrom() +
\r
42 for (int i = 0; i < r.numSubs(); i++)
\r
44 if (r.substring() != null)
\r
46 v.addElement(r.substring(i + offset));
\r
48 new Integer(r.matchFrom(i + offset) +
\r
49 r.charsMatched(i + offset)));
\r
52 pos = r.matchFrom() + r.charsMatched();
\r
60 /** Initialize the tokenizer with a string of text and a pattern */
\r
61 public RegexTokenizer(String txt, String ptrn)
\r
64 r = new Regex(ptrn);
\r
65 offset = Regex.BackRefOffset;
\r
69 /** Initialize the tokenizer with a Regex object. */
\r
70 public RegexTokenizer(String txt, Regex r)
\r
74 offset = Regex.BackRefOffset;
\r
78 /** This should always be cast to a String, as in StringTokenizer,
\r
79 and as in StringTokenizer one can do this by calling
\r
81 public Object nextElement()
\r
83 if (count >= v.size())
\r
87 return v.elementAt(count++);
\r
90 /** This is the equivalent (String)nextElement(). */
\r
91 public String nextToken()
\r
93 return (String) nextElement();
\r
96 /** This asks for the next token, and changes the pattern
\r
97 being used at the same time. */
\r
98 public String nextToken(String newpat)
\r
104 catch (RegSyntax r_)
\r
106 return nextToken(r);
\r
109 /** This asks for the next token, and changes the pattern
\r
110 being used at the same time. */
\r
111 public String nextToken(Regex nr)
\r
114 if (vi.size() > count)
\r
116 pos = ( (Integer) vi.elementAt(count)).intValue();
\r
121 return nextToken();
\r
124 /** Tells whether there are more tokens in the pattern. */
\r
125 public boolean hasMoreElements()
\r
127 if (count >= v.size())
\r
131 return count < v.size();
\r
134 /** Tells whether there are more tokens in the pattern, but
\r
135 in the fashion of StringTokenizer. */
\r
136 public boolean hasMoreTokens()
\r
138 return hasMoreElements();
\r
141 /** Determines the # of remaining tokens */
\r
142 public int countTokens()
\r
144 int _count = count;
\r
145 while (hasMoreTokens())
\r
150 return v.size() - count;
\r
153 /** Returns all tokens in the String */
\r
154 public String[] allTokens()
\r
157 String[] ret = new String[v.size()];
\r