X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fcom%2Fstevesoft%2Fpat%2FRegexTokenizer.java;h=31fa2ba8f4e5fb4c0232848087eceb63a9ee691d;hb=a83adb45bdf9554e270921b4baad94defd314b36;hp=76fa005aa753874e9516010d665da201e2f25967;hpb=7bc226b58110fa26d9dbd3f0c78095d06909ffc3;p=jalview.git diff --git a/src/com/stevesoft/pat/RegexTokenizer.java b/src/com/stevesoft/pat/RegexTokenizer.java index 76fa005..31fa2ba 100755 --- a/src/com/stevesoft/pat/RegexTokenizer.java +++ b/src/com/stevesoft/pat/RegexTokenizer.java @@ -1,161 +1,172 @@ -// -// This software is now distributed according to -// the Lesser Gnu Public License. Please see -// http://www.gnu.org/copyleft/lesser.txt for -// the details. -// -- Happy Computing! -// -package com.stevesoft.pat; - -import java.util.*; - -/** - Shareware: package pat - Copyright 2001, Steven R. Brandt - */ -/** - The RegexTokenizer is similar to the StringTokenizer class - provided with java, but allows one to tokenize using - regular expressions, rather than a simple list of characters. - Tokens are any strings between the supplied regular expression, - as well as any backreferences (things in parenthesis) - contained within the regular expression. */ -public class RegexTokenizer - implements Enumeration -{ - String toParse; - Regex r; - int count = 0; - Vector v = new Vector(); - Vector vi = new Vector(); - int pos = 0; - - int offset = 1; - void getMore() - { - String s = r.right(); - if (r.searchFrom(toParse, pos)) - { - v.addElement(r.left().substring(pos)); - vi.addElement(new Integer(r.matchFrom() + - r.charsMatched())); - for (int i = 0; i < r.numSubs(); i++) - { - if (r.substring() != null) - { - v.addElement(r.substring(i + offset)); - vi.addElement( - new Integer(r.matchFrom(i + offset) + - r.charsMatched(i + offset))); - } - } - pos = r.matchFrom() + r.charsMatched(); - } - else if (s != null) - { - v.addElement(s); - } - } - - /** Initialize the tokenizer with a string of text and a pattern */ - public RegexTokenizer(String txt, String ptrn) - { - toParse = txt; - r = new Regex(ptrn); - offset = Regex.BackRefOffset; - getMore(); - } - - /** Initialize the tokenizer with a Regex object. */ - public RegexTokenizer(String txt, Regex r) - { - toParse = txt; - this.r = r; - offset = Regex.BackRefOffset; - getMore(); - } - - /** This should always be cast to a String, as in StringTokenizer, - and as in StringTokenizer one can do this by calling - nextString(). */ - public Object nextElement() - { - if (count >= v.size()) - { - getMore(); - } - return v.elementAt(count++); - } - - /** This is the equivalent (String)nextElement(). */ - public String nextToken() - { - return (String) nextElement(); - } - - /** This asks for the next token, and changes the pattern - being used at the same time. */ - public String nextToken(String newpat) - { - try - { - r.compile(newpat); - } - catch (RegSyntax r_) - {} - return nextToken(r); - } - - /** This asks for the next token, and changes the pattern - being used at the same time. */ - public String nextToken(Regex nr) - { - r = nr; - if (vi.size() > count) - { - pos = ( (Integer) vi.elementAt(count)).intValue(); - v.setSize(count); - vi.setSize(count); - } - getMore(); - return nextToken(); - } - - /** Tells whether there are more tokens in the pattern. */ - public boolean hasMoreElements() - { - if (count >= v.size()) - { - getMore(); - } - return count < v.size(); - } - - /** Tells whether there are more tokens in the pattern, but - in the fashion of StringTokenizer. */ - public boolean hasMoreTokens() - { - return hasMoreElements(); - } - - /** Determines the # of remaining tokens */ - public int countTokens() - { - int _count = count; - while (hasMoreTokens()) - { - nextToken(); - } - count = _count; - return v.size() - count; - } - - /** Returns all tokens in the String */ - public String[] allTokens() - { - countTokens(); - String[] ret = new String[v.size()]; - v.copyInto(ret); - return ret; - } -}; +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; + +import java.util.Enumeration; +import java.util.Vector; + +/** + Shareware: package pat + Copyright 2001, Steven R. Brandt + */ +/** + * The RegexTokenizer is similar to the StringTokenizer class provided with + * java, but allows one to tokenize using regular expressions, rather than a + * simple list of characters. Tokens are any strings between the supplied + * regular expression, as well as any backreferences (things in parenthesis) + * contained within the regular expression. + */ +public class RegexTokenizer implements Enumeration +{ + String toParse; + + Regex r; + + int count = 0; + + Vector v = new Vector(); + + Vector vi = new Vector(); + + int pos = 0; + + int offset = 1; + + void getMore() + { + String s = r.right(); + if (r.searchFrom(toParse, pos)) + { + v.addElement(r.left().substring(pos)); + vi.addElement(new Integer(r.matchFrom() + r.charsMatched())); + for (int i = 0; i < r.numSubs(); i++) + { + if (r.substring() != null) + { + v.addElement(r.substring(i + offset)); + vi.addElement(new Integer(r.matchFrom(i + offset) + + r.charsMatched(i + offset))); + } + } + pos = r.matchFrom() + r.charsMatched(); + } + else if (s != null) + { + v.addElement(s); + } + } + + /** Initialize the tokenizer with a string of text and a pattern */ + public RegexTokenizer(String txt, String ptrn) + { + toParse = txt; + r = new Regex(ptrn); + offset = Regex.BackRefOffset; + getMore(); + } + + /** Initialize the tokenizer with a Regex object. */ + public RegexTokenizer(String txt, Regex r) + { + toParse = txt; + this.r = r; + offset = Regex.BackRefOffset; + getMore(); + } + + /** + * This should always be cast to a String, as in StringTokenizer, and as in + * StringTokenizer one can do this by calling nextString(). + */ + public Object nextElement() + { + if (count >= v.size()) + { + getMore(); + } + return v.elementAt(count++); + } + + /** This is the equivalent (String)nextElement(). */ + public String nextToken() + { + return (String) nextElement(); + } + + /** + * This asks for the next token, and changes the pattern being used at the + * same time. + */ + public String nextToken(String newpat) + { + try + { + r.compile(newpat); + } catch (RegSyntax r_) + { + } + return nextToken(r); + } + + /** + * This asks for the next token, and changes the pattern being used at the + * same time. + */ + public String nextToken(Regex nr) + { + r = nr; + if (vi.size() > count) + { + pos = ((Integer) vi.elementAt(count)).intValue(); + v.setSize(count); + vi.setSize(count); + } + getMore(); + return nextToken(); + } + + /** Tells whether there are more tokens in the pattern. */ + public boolean hasMoreElements() + { + if (count >= v.size()) + { + getMore(); + } + return count < v.size(); + } + + /** + * Tells whether there are more tokens in the pattern, but in the fashion of + * StringTokenizer. + */ + public boolean hasMoreTokens() + { + return hasMoreElements(); + } + + /** Determines the # of remaining tokens */ + public int countTokens() + { + int _count = count; + while (hasMoreTokens()) + { + nextToken(); + } + count = _count; + return v.size() - count; + } + + /** Returns all tokens in the String */ + public String[] allTokens() + { + countTokens(); + String[] ret = new String[v.size()]; + v.copyInto(ret); + return ret; + } +};