1
2
3
4
5
6
7
8
9
10 NOUN = 'noun'
11 VERB = 'verb'
12 ADJECTIVE = 'adj'
13 ADVERB = 'adv'
14
15 pos_abbrs = {NOUN: 'n.', VERB: 'v.', ADJECTIVE: 'adj.', ADVERB: 'adv.'}
16
17 _POSNormalizationTable = {}
18
19 for pos, abbreviations in (
20 (NOUN, "noun n n."),
21 (VERB, "verb v v."),
22 (ADJECTIVE, "adjective adj adj. a s"),
23 (ADVERB, "adverb adv adv. r")):
24 tokens = abbreviations.split()
25
26 for token in tokens:
27 _POSNormalizationTable[token] = pos
28 _POSNormalizationTable[token.upper()] = pos
29
31 """
32 Return the standard form of the supplied part of speech.
33
34 @type pos: C{string}
35 @param pos: A (non-standard) part of speech string.
36 @return: A standard form part of speech string.
37 """
38 try:
39 norm = _POSNormalizationTable[pos]
40 except KeyError:
41 raise TypeError, `pos` + " is not a part of speech type"
42 return norm
43