1
2
3
4
5
6
7 from nltk_lite.contrib.classifier_tests import *
8 from nltk_lite.contrib.classifier import *
9 from nltk_lite.probability import FreqDist
10
13 returnvalue = split_ignore_space('foo , bar, foobar')
14 self.assertEqual(3, len(returnvalue))
15 self.assertEqual(['foo', 'bar', 'foobar'], returnvalue)
16
18 e = entropy(['yes', 'no', 'yes', 'yes', 'yes', 'no'])
19 self.assertEqual(-1 * (4.0/6 * math.log( 4.0/6, 2) + 2.0/6 * math.log(2.0/6, 2)), e)
20
22 position, min_ent = min_entropy_breakpoint(['yes', 'no', 'yes', 'yes', 'yes', 'no'])
23 self.assertEqual(4, position)
24 self.assertEqual(-1 * (4.0/5 * math.log(4.0/5, 2) + 1.0/5 * math.log(1.0/5, 2)), min_ent)
25
27 dictionary_of_klass_counts = {}
28 dictionary_of_klass_counts['yes'] = 2
29 dictionary_of_klass_counts['no'] = 0
30 self.assertEqual(0, entropy_of_key_counts(dictionary_of_klass_counts))
31
32 dictionary_of_klass_counts['yes'] = 3
33 dictionary_of_klass_counts['no'] = 3
34 self.assertAlmostEqual(1, entropy_of_key_counts(dictionary_of_klass_counts))
35
36 dictionary_of_klass_counts['yes'] = 9
37 dictionary_of_klass_counts['no'] = 5
38 self.assertAlmostEqual(0.94, entropy_of_key_counts(dictionary_of_klass_counts), 2)
39
40 dictionary_of_klass_counts['yes'] = 1
41 dictionary_of_klass_counts['no'] = 3
42 expected = -(1.0/4 * math.log(1.0/4, 2)) + -(3.0/4 * math.log(3.0/4, 2))
43 self.assertAlmostEqual(expected, entropy_of_key_counts(dictionary_of_klass_counts), 6)
44
45 dictionary_of_klass_counts['yes'] = 2
46 dictionary_of_klass_counts['no'] = 1
47 expected = -(2.0/3 * math.log(2.0/3, 2)) + -(1.0/3 * math.log(1.0/3, 2))
48 self.assertAlmostEqual(expected, entropy_of_key_counts(dictionary_of_klass_counts), 6)
49
56