X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fcom%2Fstevesoft%2Fpat%2FRegexTokenizer.java;h=c99bfeaed372052bb3a64fd7bc7d575c1dc1e94f;hb=d7e106a685363a77d7548051014deab532911ac3;hp=40302a4927b6ff07b6fb673c6ab0988e7d4f5e9a;hpb=c40cf903f740a72ab63dd1abc10fa33450ce660d;p=jalview.git diff --git a/src/com/stevesoft/pat/RegexTokenizer.java b/src/com/stevesoft/pat/RegexTokenizer.java index 40302a4..c99bfea 100755 --- a/src/com/stevesoft/pat/RegexTokenizer.java +++ b/src/com/stevesoft/pat/RegexTokenizer.java @@ -1,110 +1,172 @@ -// -// This software is now distributed according to -// the Lesser Gnu Public License. Please see -// http://www.gnu.org/copyleft/lesser.txt for -// the details. -// -- Happy Computing! -// -package com.stevesoft.pat; -import java.util.*; -/** - Shareware: package pat - Copyright 2001, Steven R. Brandt -*/ /** -The RegexTokenizer is similar to the StringTokenizer class -provided with java, but allows one to tokenize using -regular expressions, rather than a simple list of characters. -Tokens are any strings between the supplied regular expression, -as well as any backreferences (things in parenthesis) -contained within the regular expression. */ -public class RegexTokenizer implements Enumeration { - String toParse; - Regex r; - int count = 0; - Vector v = new Vector(); - Vector vi = new Vector(); - int pos=0; - - int offset = 1; - void getMore() { - String s = r.right(); - if(r.searchFrom(toParse,pos)) { - v.addElement(r.left().substring(pos)); - vi.addElement(new Integer(r.matchFrom()+ - r.charsMatched())); - for(int i=0;i= v.size()) getMore(); - return v.elementAt(count++); - } - /** This is the equivalent (String)nextElement(). */ - public String nextToken() { return (String)nextElement(); } - /** This asks for the next token, and changes the pattern - being used at the same time. */ - public String nextToken(String newpat) { - try { r.compile(newpat); } catch (RegSyntax r_) {} - return nextToken(r); - } - /** This asks for the next token, and changes the pattern - being used at the same time. */ - public String nextToken(Regex nr) { - r = nr; - if(vi.size() > count) { - pos = ((Integer)vi.elementAt(count)).intValue(); - v.setSize(count); - vi.setSize(count); - } - getMore(); - return nextToken(); - } - /** Tells whether there are more tokens in the pattern. */ - public boolean hasMoreElements() { - if(count >= v.size()) getMore(); - return count < v.size(); - } - /** Tells whether there are more tokens in the pattern, but - in the fashion of StringTokenizer. */ - public boolean hasMoreTokens() { return hasMoreElements(); } - /** Determines the # of remaining tokens */ - public int countTokens() { - int old_pos=pos,_count=count; - while(hasMoreTokens()) - nextToken(); - count=_count; - return v.size()-count; - } - /** Returns all tokens in the String */ - public String[] allTokens() { - countTokens(); - String[] ret = new String[v.size()]; - v.copyInto(ret); - return ret; - } -}; +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; + +import java.util.Enumeration; +import java.util.Vector; + +/** + Shareware: package pat + Copyright 2001, Steven R. Brandt + */ +/** + * The RegexTokenizer is similar to the StringTokenizer class provided with + * java, but allows one to tokenize using regular expressions, rather than a + * simple list of characters. Tokens are any strings between the supplied + * regular expression, as well as any backreferences (things in parenthesis) + * contained within the regular expression. + */ +public class RegexTokenizer implements Enumeration +{ + String toParse; + + Regex r; + + int count = 0; + + Vector v = new Vector(); + + Vector vi = new Vector(); + + int pos = 0; + + int offset = 1; + + void getMore() + { + String s = r.right(); + if (r.searchFrom(toParse, pos)) + { + v.addElement(r.left().substring(pos)); + vi.addElement(Integer.valueOf(r.matchFrom() + r.charsMatched())); + for (int i = 0; i < r.numSubs(); i++) + { + if (r.substring() != null) + { + v.addElement(r.substring(i + offset)); + vi.addElement(Integer.valueOf(r.matchFrom(i + offset) + + r.charsMatched(i + offset))); + } + } + pos = r.matchFrom() + r.charsMatched(); + } + else if (s != null) + { + v.addElement(s); + } + } + + /** Initialize the tokenizer with a string of text and a pattern */ + public RegexTokenizer(String txt, String ptrn) + { + toParse = txt; + r = new Regex(ptrn); + offset = Regex.BackRefOffset; + getMore(); + } + + /** Initialize the tokenizer with a Regex object. */ + public RegexTokenizer(String txt, Regex r) + { + toParse = txt; + this.r = r; + offset = Regex.BackRefOffset; + getMore(); + } + + /** + * This should always be cast to a String, as in StringTokenizer, and as in + * StringTokenizer one can do this by calling nextString(). + */ + public Object nextElement() + { + if (count >= v.size()) + { + getMore(); + } + return v.elementAt(count++); + } + + /** This is the equivalent (String)nextElement(). */ + public String nextToken() + { + return (String) nextElement(); + } + + /** + * This asks for the next token, and changes the pattern being used at the + * same time. + */ + public String nextToken(String newpat) + { + try + { + r.compile(newpat); + } catch (RegSyntax r_) + { + } + return nextToken(r); + } + + /** + * This asks for the next token, and changes the pattern being used at the + * same time. + */ + public String nextToken(Regex nr) + { + r = nr; + if (vi.size() > count) + { + pos = ((Integer) vi.elementAt(count)).intValue(); + v.setSize(count); + vi.setSize(count); + } + getMore(); + return nextToken(); + } + + /** Tells whether there are more tokens in the pattern. */ + public boolean hasMoreElements() + { + if (count >= v.size()) + { + getMore(); + } + return count < v.size(); + } + + /** + * Tells whether there are more tokens in the pattern, but in the fashion of + * StringTokenizer. + */ + public boolean hasMoreTokens() + { + return hasMoreElements(); + } + + /** Determines the # of remaining tokens */ + public int countTokens() + { + int _count = count; + while (hasMoreTokens()) + { + nextToken(); + } + count = _count; + return v.size() - count; + } + + /** Returns all tokens in the String */ + public String[] allTokens() + { + countTokens(); + String[] ret = new String[v.size()]; + v.copyInto(ret); + return ret; + } +};