needed for applet search

[jalview.git] / src / com / stevesoft / pat / RegexTokenizer.java
diff --git a/src/com/stevesoft/pat/RegexTokenizer.java b/src/com/stevesoft/pat/RegexTokenizer.java

new file mode 100755 (executable)

index 0000000..40302a4
--- /dev/null
+++ b/src/com/stevesoft/pat/RegexTokenizer.java
@@ -0,0 +1,110 @@
+//\r
+// This software is now distributed according to\r
+// the Lesser Gnu Public License.  Please see\r
+// http://www.gnu.org/copyleft/lesser.txt for\r
+// the details.\r
+//    -- Happy Computing!\r
+//\r
+package com.stevesoft.pat;\r
+import java.util.*;\r
+/** \r
+        Shareware: package pat\r
+   <a href="copyright.html">Copyright 2001, Steven R. Brandt</a>\r
+*/ /**\r
+The RegexTokenizer is similar to the StringTokenizer class\r
+provided with java, but allows one to tokenize using\r
+regular expressions, rather than a simple list of characters.\r
+Tokens are any strings between the supplied regular expression,\r
+as well as any backreferences (things in parenthesis)\r
+contained within the regular expression. */\r
+public class RegexTokenizer implements Enumeration {\r
+    String toParse;\r
+    Regex r;\r
+    int count = 0;\r
+    Vector v = new Vector();\r
+    Vector vi = new Vector();\r
+    int pos=0;\r
+\r
+    int offset = 1;\r
+    void getMore() {\r
+        String s = r.right();\r
+        if(r.searchFrom(toParse,pos)) {\r
+            v.addElement(r.left().substring(pos));\r
+            vi.addElement(new Integer(r.matchFrom()+\r
+                r.charsMatched()));\r
+            for(int i=0;i<r.numSubs();i++)\r
+                if(r.substring() != null) {\r
+                    v.addElement(r.substring(i+offset));\r
+                    vi.addElement(\r
+                        new Integer(r.matchFrom(i+offset)+\r
+                        r.charsMatched(i+offset)));\r
+                }\r
+            pos = r.matchFrom()+r.charsMatched();\r
+        } else if(s != null) v.addElement(s);\r
+    }\r
+\r
+    /** Initialize the tokenizer with a string of text and a pattern */\r
+    public RegexTokenizer(String txt,String ptrn) {\r
+        toParse = txt;\r
+        r = new Regex(ptrn);\r
+        offset = r.BackRefOffset;\r
+        getMore();\r
+    }\r
+    /** Initialize the tokenizer with a Regex object. */\r
+    public RegexTokenizer(String txt,Regex r) {\r
+        toParse = txt;\r
+        this.r = r;\r
+        offset = r.BackRefOffset;\r
+        getMore();\r
+    }\r
+    /** This should always be cast to a String, as in StringTokenizer,\r
+         and as in StringTokenizer one can do this by calling\r
+         nextString(). */\r
+    public Object nextElement() {\r
+        if(count >= v.size()) getMore();\r
+        return v.elementAt(count++);\r
+    }\r
+    /** This is the equivalent (String)nextElement(). */\r
+    public String nextToken() { return (String)nextElement(); }\r
+    /** This asks for the next token, and changes the pattern\r
+         being used at the same time. */\r
+    public String nextToken(String newpat) {\r
+        try { r.compile(newpat); } catch (RegSyntax r_) {}\r
+        return nextToken(r);\r
+    }\r
+    /** This asks for the next token, and changes the pattern\r
+         being used at the same time. */\r
+    public String nextToken(Regex nr) {\r
+        r = nr;\r
+        if(vi.size() > count) {\r
+            pos = ((Integer)vi.elementAt(count)).intValue();\r
+            v.setSize(count);\r
+            vi.setSize(count);\r
+        }\r
+        getMore();\r
+        return nextToken();\r
+    }\r
+    /** Tells whether there are more tokens in the pattern. */\r
+    public boolean hasMoreElements() {\r
+        if(count >= v.size()) getMore();\r
+        return count < v.size();\r
+    }\r
+    /** Tells whether there are more tokens in the pattern, but\r
+         in the fashion of StringTokenizer. */\r
+    public boolean hasMoreTokens() { return hasMoreElements(); }\r
+    /** Determines the # of remaining tokens */\r
+    public int countTokens() {\r
+        int old_pos=pos,_count=count;\r
+        while(hasMoreTokens())\r
+            nextToken();\r
+        count=_count;\r
+        return v.size()-count;\r
+    }\r
+    /** Returns all tokens in the String */\r
+    public String[] allTokens() {\r
+        countTokens();\r
+        String[] ret = new String[v.size()];\r
+        v.copyInto(ret);\r
+        return ret;\r
+    }\r
+};\r