src/com/stevesoft/pat/RegexTokenizer.java

   1 //\r
   2 // This software is now distributed according to\r
   3 // the Lesser Gnu Public License.  Please see\r
   4 // http://www.gnu.org/copyleft/lesser.txt for\r
   5 // the details.\r
   6 //    -- Happy Computing!\r
   7 //\r
   8 package com.stevesoft.pat;\r
   9 import java.util.*;\r
  10 /** \r
  11         Shareware: package pat\r
  12    <a href="copyright.html">Copyright 2001, Steven R. Brandt</a>\r
  13 */ /**\r
  14 The RegexTokenizer is similar to the StringTokenizer class\r
  15 provided with java, but allows one to tokenize using\r
  16 regular expressions, rather than a simple list of characters.\r
  17 Tokens are any strings between the supplied regular expression,\r
  18 as well as any backreferences (things in parenthesis)\r
  19 contained within the regular expression. */\r
  20 public class RegexTokenizer implements Enumeration {\r
  21     String toParse;\r
  22     Regex r;\r
  23     int count = 0;\r
  24     Vector v = new Vector();\r
  25     Vector vi = new Vector();\r
  26     int pos=0;\r
  27 \r
  28     int offset = 1;\r
  29     void getMore() {\r
  30         String s = r.right();\r
  31         if(r.searchFrom(toParse,pos)) {\r
  32             v.addElement(r.left().substring(pos));\r
  33             vi.addElement(new Integer(r.matchFrom()+\r
  34                 r.charsMatched()));\r
  35             for(int i=0;i<r.numSubs();i++)\r
  36                 if(r.substring() != null) {\r
  37                     v.addElement(r.substring(i+offset));\r
  38                     vi.addElement(\r
  39                         new Integer(r.matchFrom(i+offset)+\r
  40                         r.charsMatched(i+offset)));\r
  41                 }\r
  42             pos = r.matchFrom()+r.charsMatched();\r
  43         } else if(s != null) v.addElement(s);\r
  44     }\r
  45 \r
  46     /** Initialize the tokenizer with a string of text and a pattern */\r
  47     public RegexTokenizer(String txt,String ptrn) {\r
  48         toParse = txt;\r
  49         r = new Regex(ptrn);\r
  50         offset = Regex.BackRefOffset;\r
  51         getMore();\r
  52     }\r
  53     /** Initialize the tokenizer with a Regex object. */\r
  54     public RegexTokenizer(String txt,Regex r) {\r
  55         toParse = txt;\r
  56         this.r = r;\r
  57         offset = Regex.BackRefOffset;\r
  58         getMore();\r
  59     }\r
  60     /** This should always be cast to a String, as in StringTokenizer,\r
  61          and as in StringTokenizer one can do this by calling\r
  62          nextString(). */\r
  63     public Object nextElement() {\r
  64         if(count >= v.size()) getMore();\r
  65         return v.elementAt(count++);\r
  66     }\r
  67     /** This is the equivalent (String)nextElement(). */\r
  68     public String nextToken() { return (String)nextElement(); }\r
  69     /** This asks for the next token, and changes the pattern\r
  70          being used at the same time. */\r
  71     public String nextToken(String newpat) {\r
  72         try { r.compile(newpat); } catch (RegSyntax r_) {}\r
  73         return nextToken(r);\r
  74     }\r
  75     /** This asks for the next token, and changes the pattern\r
  76          being used at the same time. */\r
  77     public String nextToken(Regex nr) {\r
  78         r = nr;\r
  79         if(vi.size() > count) {\r
  80             pos = ((Integer)vi.elementAt(count)).intValue();\r
  81             v.setSize(count);\r
  82             vi.setSize(count);\r
  83         }\r
  84         getMore();\r
  85         return nextToken();\r
  86     }\r
  87     /** Tells whether there are more tokens in the pattern. */\r
  88     public boolean hasMoreElements() {\r
  89         if(count >= v.size()) getMore();\r
  90         return count < v.size();\r
  91     }\r
  92     /** Tells whether there are more tokens in the pattern, but\r
  93          in the fashion of StringTokenizer. */\r
  94     public boolean hasMoreTokens() { return hasMoreElements(); }\r
  95     /** Determines the # of remaining tokens */\r
  96     public int countTokens() {\r
  97         int _count=count;\r
  98         while(hasMoreTokens())\r
  99             nextToken();\r
 100         count=_count;\r
 101         return v.size()-count;\r
 102     }\r
 103     /** Returns all tokens in the String */\r
 104     public String[] allTokens() {\r
 105         countTokens();\r
 106         String[] ret = new String[v.size()];\r
 107         v.copyInto(ret);\r
 108         return ret;\r
 109     }\r
 110 };\r