X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=binaries%2Fsrc%2Fglobplot%2Fbiopython-1.50%2FBio%2Ftriefind.py;fp=binaries%2Fsrc%2Fglobplot%2Fbiopython-1.50%2FBio%2Ftriefind.py;h=da862e527d0be4aedd52a9ee8e4e9a8c8d645e04;hb=119df1cedad3d4760e6fd458713da2488eff79cc;hp=0000000000000000000000000000000000000000;hpb=d3806a66f002b93f6dc03447b6628f943a3ba90c;p=jabaws.git diff --git a/binaries/src/globplot/biopython-1.50/Bio/triefind.py b/binaries/src/globplot/biopython-1.50/Bio/triefind.py new file mode 100644 index 0000000..da862e5 --- /dev/null +++ b/binaries/src/globplot/biopython-1.50/Bio/triefind.py @@ -0,0 +1,92 @@ +""" +Given a trie, find all occurrences of a word in the trie in a string. + +Like searching a string for a substring, except that the substring is +any word in a trie. + +Functions: +match Find longest key in a trie matching the beginning of the string. +match_all Find all keys in a trie matching the beginning of the string. +find Find keys in a trie matching anywhere in a string. +find_words Find keys in a trie matching whole words in a string. + +""" +import string +import re + +def match(string, trie): + """match(string, trie) -> longest key or None + + Find the longest key in the trie that matches the beginning of the + string. + + """ + longest = None + for i in range(len(string)): + substr = string[:i+1] + if not trie.has_prefix(substr): + break + if trie.has_key(substr): + longest = substr + return longest + +def match_all(string, trie): + """match_all(string, trie) -> list of keys + + Find all the keys in the trie that matches the beginning of the + string. + + """ + matches = [] + for i in range(len(string)): + substr = string[:i+1] + if not trie.has_prefix(substr): + break + if trie.has_key(substr): + matches.append(substr) + return matches + +def find(string, trie): + """find(string, trie) -> list of tuples (key, start, end) + + Find all the keys in the trie that match anywhere in the string. + + """ + results = [] + start = 0 # index to start the search + while start < len(string): + # Look for a match. + keys = match_all(string[start:], trie) + for key in keys: + results.append((key, start, start+len(key))) + start += 1 + return results + +DEFAULT_BOUNDARY_CHARS = string.punctuation + string.whitespace + +def find_words(string, trie): + """find_words(string, trie) -> list of tuples (key, start, end) + + Find all the keys in the trie that match full words in the string. + Word boundaries are defined as any punctuation or whitespace. + + """ + _boundary_re = re.compile(r"[%s]+" % re.escape(DEFAULT_BOUNDARY_CHARS)) + + results = [] + start = 0 # index of word boundary + while start < len(string): + # Look for a match. + keys = match_all(string[start:], trie) + for key in keys: + l = len(key) + # Make sure it ends at a boundary. + if start+l == len(string) or \ + _boundary_re.match(string[start+l]): + results.append((key, start, start+l)) + # Move forward to the next boundary. + m = _boundary_re.search(string, start) + if m is None: + break + start = m.end() + return results