1
2
3
4
5
6 """
7 Maximum Entropy code.
8
9 Uses Improved Iterative Scaling:
10 XXX ref
11
12 # XXX need to define terminology
13
14 """
15
16 import numpy
17
19 """Holds information for a Maximum Entropy classifier.
20
21 Members:
22 classes List of the possible classes of data.
23 alphas List of the weights for each feature.
24 feature_fns List of the feature functions.
25
26 """
28 self.classes = []
29 self.alphas = []
30 self.feature_fns = []
31
33 """calculate(me, observation) -> list of log probs
34
35 Calculate the log of the probability for each class. me is a
36 MaxEntropy object that has been trained. observation is a vector
37 representing the observed data. The return value is a list of
38 unnormalized log probabilities for each class.
39
40 """
41 scores = []
42 assert len(me.feature_fns) == len(me.alphas)
43 for klass in me.classes:
44 lprob = 0.0
45 for fn, alpha in zip(me.feature_fns, me.alphas):
46 lprob += fn(observation, klass) * alpha
47 scores.append(lprob)
48 return scores
49
62
64 """_eval_feature_fn(fn, xs, classes) -> dict of values
65
66 Evaluate a feature function on every instance of the training set
67 and class. fn is a callback function that takes two parameters: a
68 training instance and a class. Return a dictionary of (training
69 set index, class index) -> non-zero value. Values of 0 are not
70 stored in the dictionary.
71
72 """
73 values = {}
74 for i in range(len(xs)):
75 for j in range(len(classes)):
76 f = fn(xs[i], classes[j])
77 if f != 0:
78 values[(i, j)] = f
79 return values
80
82 """_calc_empirical_expects(xs, ys, classes, features) -> list of expectations
83
84 Calculate the expectation of each function from the data. This is
85 the constraint for the maximum entropy distribution. Return a
86 list of expectations, parallel to the list of features.
87
88 """
89
90
91 class2index = {}
92 for index, key in enumerate(classes):
93 class2index[key] = index
94 ys_i = [class2index[y] for y in ys]
95
96 expect = []
97 N = len(xs)
98 for feature in features:
99 s = 0
100 for i in range(N):
101 s += feature.get((i, ys_i[i]), 0)
102 expect.append(float(s) / N)
103 return expect
104
106 """_calc_model_expects(xs, classes, features, alphas) -> list of expectations.
107
108 Calculate the expectation of each feature from the model. This is
109 not used in maximum entropy training, but provides a good function
110 for debugging.
111
112 """
113
114
115 p_yx = _calc_p_class_given_x(xs, classes, features, alphas)
116
117 expects = []
118 for feature in features:
119 sum = 0.0
120 for (i, j), f in feature.iteritems():
121 sum += p_yx[i][j] * f
122 expects.append(sum/len(xs))
123 return expects
124
126 """_calc_p_class_given_x(xs, classes, features, alphas) -> matrix
127
128 Calculate P(y|x), where y is the class and x is an instance from
129 the training set. Return a XSxCLASSES matrix of probabilities.
130
131 """
132 prob_yx = numpy.zeros((len(xs), len(classes)))
133
134
135 assert len(features) == len(alphas)
136 for feature, alpha in zip(features, alphas):
137 for (x, y), f in feature.iteritems():
138 prob_yx[x][y] += alpha * f
139
140 prob_yx = numpy.exp(prob_yx)
141
142 for i in range(len(xs)):
143 z = sum(prob_yx[i])
144 prob_yx[i] = prob_yx[i] / z
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159 return prob_yx
160
162 """_calc_f_sharp(N, nclasses, features) -> matrix of f sharp values."""
163
164 f_sharp = numpy.zeros((N, nclasses))
165 for feature in features:
166 for (i, j), f in feature.iteritems():
167 f_sharp[i][j] += f
168 return f_sharp
169
170 -def _iis_solve_delta(N, feature, f_sharp, empirical, prob_yx,
171 max_newton_iterations, newton_converge):
172
173
174 delta = 0.0
175 iters = 0
176 while iters < max_newton_iterations:
177 f_newton = df_newton = 0.0
178 for (i, j), f in feature.iteritems():
179 prod = prob_yx[i][j] * f * numpy.exp(delta * f_sharp[i][j])
180 f_newton += prod
181 df_newton += prod * f_sharp[i][j]
182 f_newton, df_newton = empirical - f_newton / N, -df_newton / N
183
184 ratio = f_newton / df_newton
185 delta -= ratio
186 if numpy.fabs(ratio) < newton_converge:
187 break
188 iters = iters + 1
189 else:
190 raise RuntimeError("Newton's method did not converge")
191 return delta
192
193 -def _train_iis(xs, classes, features, f_sharp, alphas, e_empirical,
194 max_newton_iterations, newton_converge):
195 """Do one iteration of hill climbing to find better alphas (PRIVATE)."""
196
197
198
199 p_yx = _calc_p_class_given_x(xs, classes, features, alphas)
200
201 N = len(xs)
202 newalphas = alphas[:]
203 for i in range(len(alphas)):
204 delta = _iis_solve_delta(N, features[i], f_sharp, e_empirical[i], p_yx,
205 max_newton_iterations, newton_converge)
206 newalphas[i] += delta
207 return newalphas
208
209
210 -def train(training_set, results, feature_fns, update_fn=None,
211 max_iis_iterations=10000, iis_converge=1.0e-5,
212 max_newton_iterations=100, newton_converge=1.0e-10):
213 """Train a maximum entropy classifier, returns MaxEntropy object.
214
215 Train a maximum entropy classifier on a training set.
216 training_set is a list of observations. results is a list of the
217 class assignments for each observation. feature_fns is a list of
218 the features. These are callback functions that take an
219 observation and class and return a 1 or 0. update_fn is a
220 callback function that is called at each training iteration. It is
221 passed a MaxEntropy object that encapsulates the current state of
222 the training.
223
224 The maximum number of iterations and the convergence criterion for IIS
225 are given by max_iis_iterations and iis_converge, respectively, while
226 max_newton_iterations and newton_converge are the maximum number
227 of iterations and the convergence criterion for Newton's method.
228 """
229 if not training_set:
230 raise ValueError("No data in the training set.")
231 if len(training_set) != len(results):
232 raise ValueError("training_set and results should be parallel lists.")
233
234
235 xs, ys = training_set, results
236
237
238 classes = sorted(set(results))
239
240
241 features = [_eval_feature_fn(fn, training_set, classes)
242 for fn in feature_fns]
243
244 f_sharp = _calc_f_sharp(len(training_set), len(classes), features)
245
246
247 e_empirical = _calc_empirical_expects(xs, ys, classes, features)
248
249
250 alphas = [0.0] * len(features)
251 iters = 0
252 while iters < max_iis_iterations:
253 nalphas = _train_iis(xs, classes, features, f_sharp,
254 alphas, e_empirical,
255 max_newton_iterations, newton_converge)
256 diff = map(lambda x, y: numpy.fabs(x-y), alphas, nalphas)
257 diff = reduce(lambda x, y: x+y, diff, 0)
258 alphas = nalphas
259
260 me = MaxEntropy()
261 me.alphas, me.classes, me.feature_fns = alphas, classes, feature_fns
262 if update_fn is not None:
263 update_fn(me)
264
265 if diff < iis_converge:
266 break
267 else:
268 raise RuntimeError("IIS did not converge")
269
270 return me
271
272
273 if __name__ == "__main__":
274
275
276
277 xcar=[
278 ['Red', 'Sports', 'Domestic'],
279 ['Red', 'Sports', 'Domestic'],
280 ['Red', 'Sports', 'Domestic'],
281 ['Yellow', 'Sports', 'Domestic'],
282 ['Yellow', 'Sports', 'Imported'],
283 ['Yellow', 'SUV', 'Imported'],
284 ['Yellow', 'SUV', 'Imported'],
285 ['Yellow', 'SUV', 'Domestic'],
286 ['Red', 'SUV', 'Imported'],
287 ['Red', 'Sports', 'Imported']
288 ]
289
290 ycar=[
291 'Yes',
292 'No',
293 'Yes',
294 'No',
295 'Yes',
296 'No',
297 'Yes',
298 'No',
299 'No',
300 'Yes'
301 ]
302
303
305 if ts[0] =='Red':
306 return 0
307 else:
308 return 1
309
311 if ts[1] =='Sports':
312 return 0
313 else:
314 return 1
315
317 if ts[2] =='Domestic':
318 return 0
319 else:
320 return 1
321
322 user_functions=[udf1, udf2, udf3]
323
324 xe=train(xcar, ycar, user_functions)
325 for xv,yv in zip(xcar, ycar):
326 xc=classify(xe, xv)
327 print 'Pred:', xv, 'gives', xc, 'y is', yv
328