Trees | Indices | Help |
---|
|
1 """ 2 Given a trie, find all occurrences of a word in the trie in a string. 3 4 Like searching a string for a substring, except that the substring is 5 any word in a trie. 6 7 Functions: 8 match Find longest key in a trie matching the beginning of the string. 9 match_all Find all keys in a trie matching the beginning of the string. 10 find Find keys in a trie matching anywhere in a string. 11 find_words Find keys in a trie matching whole words in a string. 12 13 """ 14 import string 15 import re 1618 """match(string, trie) -> longest key or None 19 20 Find the longest key in the trie that matches the beginning of the 21 string. 22 23 """ 24 longest = None 25 for i in range(len(string)): 26 substr = string[:i+1] 27 if not trie.has_prefix(substr): 28 break 29 if trie.has_key(substr): 30 longest = substr 31 return longest3234 """match_all(string, trie) -> list of keys 35 36 Find all the keys in the trie that matches the beginning of the 37 string. 38 39 """ 40 matches = [] 41 for i in range(len(string)): 42 substr = string[:i+1] 43 if not trie.has_prefix(substr): 44 break 45 if trie.has_key(substr): 46 matches.append(substr) 47 return matches4850 """find(string, trie) -> list of tuples (key, start, end) 51 52 Find all the keys in the trie that match anywhere in the string. 53 54 """ 55 results = [] 56 start = 0 # index to start the search 57 while start < len(string): 58 # Look for a match. 59 keys = match_all(string[start:], trie) 60 for key in keys: 61 results.append((key, start, start+len(key))) 62 start += 1 63 return results64 65 DEFAULT_BOUNDARY_CHARS = string.punctuation + string.whitespace 6668 """find_words(string, trie) -> list of tuples (key, start, end) 69 70 Find all the keys in the trie that match full words in the string. 71 Word boundaries are defined as any punctuation or whitespace. 72 73 """ 74 _boundary_re = re.compile(r"[%s]+" % re.escape(DEFAULT_BOUNDARY_CHARS)) 75 76 results = [] 77 start = 0 # index of word boundary 78 while start < len(string): 79 # Look for a match. 80 keys = match_all(string[start:], trie) 81 for key in keys: 82 l = len(key) 83 # Make sure it ends at a boundary. 84 if start+l == len(string) or \ 85 _boundary_re.match(string[start+l]): 86 results.append((key, start, start+l)) 87 # Move forward to the next boundary. 88 m = _boundary_re.search(string, start) 89 if m is None: 90 break 91 start = m.end() 92 return results93
Trees | Indices | Help |
---|
Generated by Epydoc 3.0.1 on Fri Nov 26 15:47:40 2010 | http://epydoc.sourceforge.net |