X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=unused%2Fcom%2Fstevesoft%2Fpat%2FRegexTokenizer.java;fp=unused%2Fcom%2Fstevesoft%2Fpat%2FRegexTokenizer.java;h=0e5385730d7a77c521b54083053a671af715b198;hb=6319110ce33faa76ee6cf9832e78faa224510fed;hp=0378f3a54018c60964eaacb65c70643e1bfa1527;hpb=7301a2415adab88038b291fc54caeeb3a5a47a44;p=jalviewjs.git diff --git a/unused/com/stevesoft/pat/RegexTokenizer.java b/unused/com/stevesoft/pat/RegexTokenizer.java index 0378f3a..0e53857 100644 --- a/unused/com/stevesoft/pat/RegexTokenizer.java +++ b/unused/com/stevesoft/pat/RegexTokenizer.java @@ -1,171 +1,171 @@ -// -// This software is now distributed according to -// the Lesser Gnu Public License. Please see -// http://www.gnu.org/copyleft/lesser.txt for -// the details. -// -- Happy Computing! -// -package com.stevesoft.pat; - -import java.util.*; - -/** - Shareware: package pat - Copyright 2001, Steven R. Brandt - */ -/** - * The RegexTokenizer is similar to the StringTokenizer class provided with - * java, but allows one to tokenize using regular expressions, rather than a - * simple list of characters. Tokens are any strings between the supplied - * regular expression, as well as any backreferences (things in parenthesis) - * contained within the regular expression. - */ -public class RegexTokenizer implements Enumeration -{ - String toParse; - - Regex r; - - int count = 0; - - Vector v = new Vector(); - - Vector vi = new Vector(); - - int pos = 0; - - int offset = 1; - - void getMore() - { - String s = r.right(); - if (r.searchFrom(toParse, pos)) - { - v.addElement(r.left().substring(pos)); - vi.addElement(new Integer(r.matchFrom() + r.charsMatched())); - for (int i = 0; i < r.numSubs(); i++) - { - if (r.substring() != null) - { - v.addElement(r.substring(i + offset)); - vi.addElement(new Integer(r.matchFrom(i + offset) - + r.charsMatched(i + offset))); - } - } - pos = r.matchFrom() + r.charsMatched(); - } - else if (s != null) - { - v.addElement(s); - } - } - - /** Initialize the tokenizer with a string of text and a pattern */ - public RegexTokenizer(String txt, String ptrn) - { - toParse = txt; - r = new Regex(ptrn); - offset = Regex.BackRefOffset; - getMore(); - } - - /** Initialize the tokenizer with a Regex object. */ - public RegexTokenizer(String txt, Regex r) - { - toParse = txt; - this.r = r; - offset = Regex.BackRefOffset; - getMore(); - } - - /** - * This should always be cast to a String, as in StringTokenizer, and as in - * StringTokenizer one can do this by calling nextString(). - */ - public Object nextElement() - { - if (count >= v.size()) - { - getMore(); - } - return v.elementAt(count++); - } - - /** This is the equivalent (String)nextElement(). */ - public String nextToken() - { - return (String) nextElement(); - } - - /** - * This asks for the next token, and changes the pattern being used at the - * same time. - */ - public String nextToken(String newpat) - { - try - { - r.compile(newpat); - } catch (RegSyntax r_) - { - } - return nextToken(r); - } - - /** - * This asks for the next token, and changes the pattern being used at the - * same time. - */ - public String nextToken(Regex nr) - { - r = nr; - if (vi.size() > count) - { - pos = ((Integer) vi.elementAt(count)).intValue(); - v.setSize(count); - vi.setSize(count); - } - getMore(); - return nextToken(); - } - - /** Tells whether there are more tokens in the pattern. */ - public boolean hasMoreElements() - { - if (count >= v.size()) - { - getMore(); - } - return count < v.size(); - } - - /** - * Tells whether there are more tokens in the pattern, but in the fashion of - * StringTokenizer. - */ - public boolean hasMoreTokens() - { - return hasMoreElements(); - } - - /** Determines the # of remaining tokens */ - public int countTokens() - { - int _count = count; - while (hasMoreTokens()) - { - nextToken(); - } - count = _count; - return v.size() - count; - } - - /** Returns all tokens in the String */ - public String[] allTokens() - { - countTokens(); - String[] ret = new String[v.size()]; - v.copyInto(ret); - return ret; - } -}; +// +// This software is now distributed according to +// the Lesser Gnu Public License. Please see +// http://www.gnu.org/copyleft/lesser.txt for +// the details. +// -- Happy Computing! +// +package com.stevesoft.pat; + +import java.util.*; + +/** + Shareware: package pat + Copyright 2001, Steven R. Brandt + */ +/** + * The RegexTokenizer is similar to the StringTokenizer class provided with + * java, but allows one to tokenize using regular expressions, rather than a + * simple list of characters. Tokens are any strings between the supplied + * regular expression, as well as any backreferences (things in parenthesis) + * contained within the regular expression. + */ +public class RegexTokenizer implements Enumeration +{ + String toParse; + + Regex r; + + int count = 0; + + Vector v = new Vector(); + + Vector vi = new Vector(); + + int pos = 0; + + int offset = 1; + + void getMore() + { + String s = r.right(); + if (r.searchFrom(toParse, pos)) + { + v.addElement(r.left().substring(pos)); + vi.addElement(new Integer(r.matchFrom() + r.charsMatched())); + for (int i = 0; i < r.numSubs(); i++) + { + if (r.substring() != null) + { + v.addElement(r.substring(i + offset)); + vi.addElement(new Integer(r.matchFrom(i + offset) + + r.charsMatched(i + offset))); + } + } + pos = r.matchFrom() + r.charsMatched(); + } + else if (s != null) + { + v.addElement(s); + } + } + + /** Initialize the tokenizer with a string of text and a pattern */ + public RegexTokenizer(String txt, String ptrn) + { + toParse = txt; + r = new Regex(ptrn); + offset = Regex.BackRefOffset; + getMore(); + } + + /** Initialize the tokenizer with a Regex object. */ + public RegexTokenizer(String txt, Regex r) + { + toParse = txt; + this.r = r; + offset = Regex.BackRefOffset; + getMore(); + } + + /** + * This should always be cast to a String, as in StringTokenizer, and as in + * StringTokenizer one can do this by calling nextString(). + */ + public Object nextElement() + { + if (count >= v.size()) + { + getMore(); + } + return v.elementAt(count++); + } + + /** This is the equivalent (String)nextElement(). */ + public String nextToken() + { + return (String) nextElement(); + } + + /** + * This asks for the next token, and changes the pattern being used at the + * same time. + */ + public String nextToken(String newpat) + { + try + { + r.compile(newpat); + } catch (RegSyntax r_) + { + } + return nextToken(r); + } + + /** + * This asks for the next token, and changes the pattern being used at the + * same time. + */ + public String nextToken(Regex nr) + { + r = nr; + if (vi.size() > count) + { + pos = ((Integer) vi.elementAt(count)).intValue(); + v.setSize(count); + vi.setSize(count); + } + getMore(); + return nextToken(); + } + + /** Tells whether there are more tokens in the pattern. */ + public boolean hasMoreElements() + { + if (count >= v.size()) + { + getMore(); + } + return count < v.size(); + } + + /** + * Tells whether there are more tokens in the pattern, but in the fashion of + * StringTokenizer. + */ + public boolean hasMoreTokens() + { + return hasMoreElements(); + } + + /** Determines the # of remaining tokens */ + public int countTokens() + { + int _count = count; + while (hasMoreTokens()) + { + nextToken(); + } + count = _count; + return v.size() - count; + } + + /** Returns all tokens in the String */ + public String[] allTokens() + { + countTokens(); + String[] ret = new String[v.size()]; + v.copyInto(ret); + return ret; + } +};