X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fcom%2Fstevesoft%2Fpat%2FRegexTokenizer.java;fp=src%2Fcom%2Fstevesoft%2Fpat%2FRegexTokenizer.java;h=76fa005aa753874e9516010d665da201e2f25967;hb=7bc226b58110fa26d9dbd3f0c78095d06909ffc3;hp=bc6fe515428811c40bb4c79379ab9ee49bba5354;hpb=dd74fc4938723fe5ec48d4e5fdcfbe58ac42a48d;p=jalview.git diff --git a/src/com/stevesoft/pat/RegexTokenizer.java b/src/com/stevesoft/pat/RegexTokenizer.java index bc6fe51..76fa005 100755 --- a/src/com/stevesoft/pat/RegexTokenizer.java +++ b/src/com/stevesoft/pat/RegexTokenizer.java @@ -6,105 +6,156 @@ // -- Happy Computing! // package com.stevesoft.pat; + import java.util.*; -/** + +/** Shareware: package pat Copyright 2001, Steven R. Brandt -*/ /** -The RegexTokenizer is similar to the StringTokenizer class -provided with java, but allows one to tokenize using -regular expressions, rather than a simple list of characters. -Tokens are any strings between the supplied regular expression, -as well as any backreferences (things in parenthesis) -contained within the regular expression. */ -public class RegexTokenizer implements Enumeration { - String toParse; - Regex r; - int count = 0; - Vector v = new Vector(); - Vector vi = new Vector(); - int pos=0; + */ +/** + The RegexTokenizer is similar to the StringTokenizer class + provided with java, but allows one to tokenize using + regular expressions, rather than a simple list of characters. + Tokens are any strings between the supplied regular expression, + as well as any backreferences (things in parenthesis) + contained within the regular expression. */ +public class RegexTokenizer + implements Enumeration +{ + String toParse; + Regex r; + int count = 0; + Vector v = new Vector(); + Vector vi = new Vector(); + int pos = 0; - int offset = 1; - void getMore() { - String s = r.right(); - if(r.searchFrom(toParse,pos)) { - v.addElement(r.left().substring(pos)); - vi.addElement(new Integer(r.matchFrom()+ - r.charsMatched())); - for(int i=0;i= v.size()) getMore(); - return v.elementAt(count++); + else if (s != null) + { + v.addElement(s); } - /** This is the equivalent (String)nextElement(). */ - public String nextToken() { return (String)nextElement(); } - /** This asks for the next token, and changes the pattern - being used at the same time. */ - public String nextToken(String newpat) { - try { r.compile(newpat); } catch (RegSyntax r_) {} - return nextToken(r); + } + + /** Initialize the tokenizer with a string of text and a pattern */ + public RegexTokenizer(String txt, String ptrn) + { + toParse = txt; + r = new Regex(ptrn); + offset = Regex.BackRefOffset; + getMore(); + } + + /** Initialize the tokenizer with a Regex object. */ + public RegexTokenizer(String txt, Regex r) + { + toParse = txt; + this.r = r; + offset = Regex.BackRefOffset; + getMore(); + } + + /** This should always be cast to a String, as in StringTokenizer, + and as in StringTokenizer one can do this by calling + nextString(). */ + public Object nextElement() + { + if (count >= v.size()) + { + getMore(); } - /** This asks for the next token, and changes the pattern - being used at the same time. */ - public String nextToken(Regex nr) { - r = nr; - if(vi.size() > count) { - pos = ((Integer)vi.elementAt(count)).intValue(); - v.setSize(count); - vi.setSize(count); - } - getMore(); - return nextToken(); + return v.elementAt(count++); + } + + /** This is the equivalent (String)nextElement(). */ + public String nextToken() + { + return (String) nextElement(); + } + + /** This asks for the next token, and changes the pattern + being used at the same time. */ + public String nextToken(String newpat) + { + try + { + r.compile(newpat); } - /** Tells whether there are more tokens in the pattern. */ - public boolean hasMoreElements() { - if(count >= v.size()) getMore(); - return count < v.size(); + catch (RegSyntax r_) + {} + return nextToken(r); + } + + /** This asks for the next token, and changes the pattern + being used at the same time. */ + public String nextToken(Regex nr) + { + r = nr; + if (vi.size() > count) + { + pos = ( (Integer) vi.elementAt(count)).intValue(); + v.setSize(count); + vi.setSize(count); } - /** Tells whether there are more tokens in the pattern, but - in the fashion of StringTokenizer. */ - public boolean hasMoreTokens() { return hasMoreElements(); } - /** Determines the # of remaining tokens */ - public int countTokens() { - int _count=count; - while(hasMoreTokens()) - nextToken(); - count=_count; - return v.size()-count; + getMore(); + return nextToken(); + } + + /** Tells whether there are more tokens in the pattern. */ + public boolean hasMoreElements() + { + if (count >= v.size()) + { + getMore(); } - /** Returns all tokens in the String */ - public String[] allTokens() { - countTokens(); - String[] ret = new String[v.size()]; - v.copyInto(ret); - return ret; + return count < v.size(); + } + + /** Tells whether there are more tokens in the pattern, but + in the fashion of StringTokenizer. */ + public boolean hasMoreTokens() + { + return hasMoreElements(); + } + + /** Determines the # of remaining tokens */ + public int countTokens() + { + int _count = count; + while (hasMoreTokens()) + { + nextToken(); } + count = _count; + return v.size() - count; + } + + /** Returns all tokens in the String */ + public String[] allTokens() + { + countTokens(); + String[] ret = new String[v.size()]; + v.copyInto(ret); + return ret; + } };