2 // This software is now distributed according to
\r
3 // the Lesser Gnu Public License. Please see
\r
4 // http://www.gnu.org/copyleft/lesser.txt for
\r
6 // -- Happy Computing!
\r
8 package com.stevesoft.pat;
\r
11 Shareware: package pat
\r
12 <a href="copyright.html">Copyright 2001, Steven R. Brandt</a>
\r
14 The RegexTokenizer is similar to the StringTokenizer class
\r
15 provided with java, but allows one to tokenize using
\r
16 regular expressions, rather than a simple list of characters.
\r
17 Tokens are any strings between the supplied regular expression,
\r
18 as well as any backreferences (things in parenthesis)
\r
19 contained within the regular expression. */
\r
20 public class RegexTokenizer implements Enumeration {
\r
24 Vector v = new Vector();
\r
25 Vector vi = new Vector();
\r
30 String s = r.right();
\r
31 if(r.searchFrom(toParse,pos)) {
\r
32 v.addElement(r.left().substring(pos));
\r
33 vi.addElement(new Integer(r.matchFrom()+
\r
35 for(int i=0;i<r.numSubs();i++)
\r
36 if(r.substring() != null) {
\r
37 v.addElement(r.substring(i+offset));
\r
39 new Integer(r.matchFrom(i+offset)+
\r
40 r.charsMatched(i+offset)));
\r
42 pos = r.matchFrom()+r.charsMatched();
\r
43 } else if(s != null) v.addElement(s);
\r
46 /** Initialize the tokenizer with a string of text and a pattern */
\r
47 public RegexTokenizer(String txt,String ptrn) {
\r
49 r = new Regex(ptrn);
\r
50 offset = Regex.BackRefOffset;
\r
53 /** Initialize the tokenizer with a Regex object. */
\r
54 public RegexTokenizer(String txt,Regex r) {
\r
57 offset = Regex.BackRefOffset;
\r
60 /** This should always be cast to a String, as in StringTokenizer,
\r
61 and as in StringTokenizer one can do this by calling
\r
63 public Object nextElement() {
\r
64 if(count >= v.size()) getMore();
\r
65 return v.elementAt(count++);
\r
67 /** This is the equivalent (String)nextElement(). */
\r
68 public String nextToken() { return (String)nextElement(); }
\r
69 /** This asks for the next token, and changes the pattern
\r
70 being used at the same time. */
\r
71 public String nextToken(String newpat) {
\r
72 try { r.compile(newpat); } catch (RegSyntax r_) {}
\r
73 return nextToken(r);
\r
75 /** This asks for the next token, and changes the pattern
\r
76 being used at the same time. */
\r
77 public String nextToken(Regex nr) {
\r
79 if(vi.size() > count) {
\r
80 pos = ((Integer)vi.elementAt(count)).intValue();
\r
87 /** Tells whether there are more tokens in the pattern. */
\r
88 public boolean hasMoreElements() {
\r
89 if(count >= v.size()) getMore();
\r
90 return count < v.size();
\r
92 /** Tells whether there are more tokens in the pattern, but
\r
93 in the fashion of StringTokenizer. */
\r
94 public boolean hasMoreTokens() { return hasMoreElements(); }
\r
95 /** Determines the # of remaining tokens */
\r
96 public int countTokens() {
\r
98 while(hasMoreTokens())
\r
101 return v.size()-count;
\r
103 /** Returns all tokens in the String */
\r
104 public String[] allTokens() {
\r
106 String[] ret = new String[v.size()];
\r