2 Given a trie, find all occurrences of a word in the trie in a string.
4 Like searching a string for a substring, except that the substring is
8 match Find longest key in a trie matching the beginning of the string.
9 match_all Find all keys in a trie matching the beginning of the string.
10 find Find keys in a trie matching anywhere in a string.
11 find_words Find keys in a trie matching whole words in a string.
17 def match(string, trie):
18 """match(string, trie) -> longest key or None
20 Find the longest key in the trie that matches the beginning of the
25 for i in range(len(string)):
27 if not trie.has_prefix(substr):
29 if trie.has_key(substr):
33 def match_all(string, trie):
34 """match_all(string, trie) -> list of keys
36 Find all the keys in the trie that matches the beginning of the
41 for i in range(len(string)):
43 if not trie.has_prefix(substr):
45 if trie.has_key(substr):
46 matches.append(substr)
49 def find(string, trie):
50 """find(string, trie) -> list of tuples (key, start, end)
52 Find all the keys in the trie that match anywhere in the string.
56 start = 0 # index to start the search
57 while start < len(string):
59 keys = match_all(string[start:], trie)
61 results.append((key, start, start+len(key)))
65 DEFAULT_BOUNDARY_CHARS = string.punctuation + string.whitespace
67 def find_words(string, trie):
68 """find_words(string, trie) -> list of tuples (key, start, end)
70 Find all the keys in the trie that match full words in the string.
71 Word boundaries are defined as any punctuation or whitespace.
74 _boundary_re = re.compile(r"[%s]+" % re.escape(DEFAULT_BOUNDARY_CHARS))
77 start = 0 # index of word boundary
78 while start < len(string):
80 keys = match_all(string[start:], trie)
83 # Make sure it ends at a boundary.
84 if start+l == len(string) or \
85 _boundary_re.match(string[start+l]):
86 results.append((key, start, start+l))
87 # Move forward to the next boundary.
88 m = _boundary_re.search(string, start)