// -- Happy Computing!\r
//\r
package com.stevesoft.pat;\r
+\r
import java.util.*;\r
-/** \r
+\r
+/**\r
Shareware: package pat\r
<a href="copyright.html">Copyright 2001, Steven R. Brandt</a>\r
-*/ /**\r
-The RegexTokenizer is similar to the StringTokenizer class\r
-provided with java, but allows one to tokenize using\r
-regular expressions, rather than a simple list of characters.\r
-Tokens are any strings between the supplied regular expression,\r
-as well as any backreferences (things in parenthesis)\r
-contained within the regular expression. */\r
-public class RegexTokenizer implements Enumeration {\r
- String toParse;\r
- Regex r;\r
- int count = 0;\r
- Vector v = new Vector();\r
- Vector vi = new Vector();\r
- int pos=0;\r
+ */\r
+/**\r
+ The RegexTokenizer is similar to the StringTokenizer class\r
+ provided with java, but allows one to tokenize using\r
+ regular expressions, rather than a simple list of characters.\r
+ Tokens are any strings between the supplied regular expression,\r
+ as well as any backreferences (things in parenthesis)\r
+ contained within the regular expression. */\r
+public class RegexTokenizer\r
+ implements Enumeration\r
+{\r
+ String toParse;\r
+ Regex r;\r
+ int count = 0;\r
+ Vector v = new Vector();\r
+ Vector vi = new Vector();\r
+ int pos = 0;\r
\r
- int offset = 1;\r
- void getMore() {\r
- String s = r.right();\r
- if(r.searchFrom(toParse,pos)) {\r
- v.addElement(r.left().substring(pos));\r
- vi.addElement(new Integer(r.matchFrom()+\r
- r.charsMatched()));\r
- for(int i=0;i<r.numSubs();i++)\r
- if(r.substring() != null) {\r
- v.addElement(r.substring(i+offset));\r
- vi.addElement(\r
- new Integer(r.matchFrom(i+offset)+\r
- r.charsMatched(i+offset)));\r
- }\r
- pos = r.matchFrom()+r.charsMatched();\r
- } else if(s != null) v.addElement(s);\r
- }\r
-\r
- /** Initialize the tokenizer with a string of text and a pattern */\r
- public RegexTokenizer(String txt,String ptrn) {\r
- toParse = txt;\r
- r = new Regex(ptrn);\r
- offset = Regex.BackRefOffset;\r
- getMore();\r
- }\r
- /** Initialize the tokenizer with a Regex object. */\r
- public RegexTokenizer(String txt,Regex r) {\r
- toParse = txt;\r
- this.r = r;\r
- offset = Regex.BackRefOffset;\r
- getMore();\r
+ int offset = 1;\r
+ void getMore()\r
+ {\r
+ String s = r.right();\r
+ if (r.searchFrom(toParse, pos))\r
+ {\r
+ v.addElement(r.left().substring(pos));\r
+ vi.addElement(new Integer(r.matchFrom() +\r
+ r.charsMatched()));\r
+ for (int i = 0; i < r.numSubs(); i++)\r
+ {\r
+ if (r.substring() != null)\r
+ {\r
+ v.addElement(r.substring(i + offset));\r
+ vi.addElement(\r
+ new Integer(r.matchFrom(i + offset) +\r
+ r.charsMatched(i + offset)));\r
+ }\r
+ }\r
+ pos = r.matchFrom() + r.charsMatched();\r
}\r
- /** This should always be cast to a String, as in StringTokenizer,\r
- and as in StringTokenizer one can do this by calling\r
- nextString(). */\r
- public Object nextElement() {\r
- if(count >= v.size()) getMore();\r
- return v.elementAt(count++);\r
+ else if (s != null)\r
+ {\r
+ v.addElement(s);\r
}\r
- /** This is the equivalent (String)nextElement(). */\r
- public String nextToken() { return (String)nextElement(); }\r
- /** This asks for the next token, and changes the pattern\r
- being used at the same time. */\r
- public String nextToken(String newpat) {\r
- try { r.compile(newpat); } catch (RegSyntax r_) {}\r
- return nextToken(r);\r
+ }\r
+\r
+ /** Initialize the tokenizer with a string of text and a pattern */\r
+ public RegexTokenizer(String txt, String ptrn)\r
+ {\r
+ toParse = txt;\r
+ r = new Regex(ptrn);\r
+ offset = Regex.BackRefOffset;\r
+ getMore();\r
+ }\r
+\r
+ /** Initialize the tokenizer with a Regex object. */\r
+ public RegexTokenizer(String txt, Regex r)\r
+ {\r
+ toParse = txt;\r
+ this.r = r;\r
+ offset = Regex.BackRefOffset;\r
+ getMore();\r
+ }\r
+\r
+ /** This should always be cast to a String, as in StringTokenizer,\r
+ and as in StringTokenizer one can do this by calling\r
+ nextString(). */\r
+ public Object nextElement()\r
+ {\r
+ if (count >= v.size())\r
+ {\r
+ getMore();\r
}\r
- /** This asks for the next token, and changes the pattern\r
- being used at the same time. */\r
- public String nextToken(Regex nr) {\r
- r = nr;\r
- if(vi.size() > count) {\r
- pos = ((Integer)vi.elementAt(count)).intValue();\r
- v.setSize(count);\r
- vi.setSize(count);\r
- }\r
- getMore();\r
- return nextToken();\r
+ return v.elementAt(count++);\r
+ }\r
+\r
+ /** This is the equivalent (String)nextElement(). */\r
+ public String nextToken()\r
+ {\r
+ return (String) nextElement();\r
+ }\r
+\r
+ /** This asks for the next token, and changes the pattern\r
+ being used at the same time. */\r
+ public String nextToken(String newpat)\r
+ {\r
+ try\r
+ {\r
+ r.compile(newpat);\r
}\r
- /** Tells whether there are more tokens in the pattern. */\r
- public boolean hasMoreElements() {\r
- if(count >= v.size()) getMore();\r
- return count < v.size();\r
+ catch (RegSyntax r_)\r
+ {}\r
+ return nextToken(r);\r
+ }\r
+\r
+ /** This asks for the next token, and changes the pattern\r
+ being used at the same time. */\r
+ public String nextToken(Regex nr)\r
+ {\r
+ r = nr;\r
+ if (vi.size() > count)\r
+ {\r
+ pos = ( (Integer) vi.elementAt(count)).intValue();\r
+ v.setSize(count);\r
+ vi.setSize(count);\r
}\r
- /** Tells whether there are more tokens in the pattern, but\r
- in the fashion of StringTokenizer. */\r
- public boolean hasMoreTokens() { return hasMoreElements(); }\r
- /** Determines the # of remaining tokens */\r
- public int countTokens() {\r
- int _count=count;\r
- while(hasMoreTokens())\r
- nextToken();\r
- count=_count;\r
- return v.size()-count;\r
+ getMore();\r
+ return nextToken();\r
+ }\r
+\r
+ /** Tells whether there are more tokens in the pattern. */\r
+ public boolean hasMoreElements()\r
+ {\r
+ if (count >= v.size())\r
+ {\r
+ getMore();\r
}\r
- /** Returns all tokens in the String */\r
- public String[] allTokens() {\r
- countTokens();\r
- String[] ret = new String[v.size()];\r
- v.copyInto(ret);\r
- return ret;\r
+ return count < v.size();\r
+ }\r
+\r
+ /** Tells whether there are more tokens in the pattern, but\r
+ in the fashion of StringTokenizer. */\r
+ public boolean hasMoreTokens()\r
+ {\r
+ return hasMoreElements();\r
+ }\r
+\r
+ /** Determines the # of remaining tokens */\r
+ public int countTokens()\r
+ {\r
+ int _count = count;\r
+ while (hasMoreTokens())\r
+ {\r
+ nextToken();\r
}\r
+ count = _count;\r
+ return v.size() - count;\r
+ }\r
+\r
+ /** Returns all tokens in the String */\r
+ public String[] allTokens()\r
+ {\r
+ countTokens();\r
+ String[] ret = new String[v.size()];\r
+ v.copyInto(ret);\r
+ return ret;\r
+ }\r
};\r