1
2
3
4
5
6
7
8
9
10 """
11 A graphical tool for exploring the recursive descent parser.
12
13 The recursive descent parser maintains a tree, which records the
14 structure of the portion of the text that has been parsed. It uses
15 CFG productions to expand the fringe of the tree, and matches its
16 leaves against the text. Initially, the tree contains the start
17 symbol ("S"). It is shown in the main canvas, to the right of the
18 list of available expansions.
19
20 The parser builds up a tree structure for the text using three
21 operations:
22
23 - "expand" uses a CFG production to add children to a node on the
24 fringe of the tree.
25 - "match" compares a leaf in the tree to a text token.
26 - "backtrack" returns the tree to its state before the most recent
27 expand or match operation.
28
29 The parser maintains a list of tree locations called a "frontier" to
30 remember which nodes have not yet been expanded and which leaves have
31 not yet been matched against the text. The leftmost frontier node is
32 shown in green, and the other frontier nodes are shown in blue. The
33 parser always performs expand and match operations on the leftmost
34 element of the frontier.
35
36 You can control the parser's operation by using the "expand," "match,"
37 and "backtrack" buttons; or you can use the "step" button to let the
38 parser automatically decide which operation to apply. The parser uses
39 the following rules to decide which operation to apply:
40
41 - If the leftmost frontier element is a token, try matching it.
42 - If the leftmost frontier element is a node, try expanding it with
43 the first untried expansion.
44 - Otherwise, backtrack.
45
46 The "expand" button applies the untried expansion whose CFG production
47 is listed earliest in the grammar. To manually choose which expansion
48 to apply, click on a CFG production from the list of available
49 expansions, on the left side of the main window.
50
51 The "autostep" button will let the parser continue applying
52 applications to the tree until it reaches a complete parse. You can
53 cancel an autostep in progress at any time by clicking on the
54 "autostep" button again.
55
56 Keyboard Shortcuts::
57 [Space]\t Perform the next expand, match, or backtrack operation
58 [a]\t Step through operations until the next complete parse
59 [e]\t Perform an expand operation
60 [m]\t Perform a match operation
61 [b]\t Perform a backtrack operation
62 [Delete]\t Reset the parser
63 [g]\t Show/hide available expansions list
64 [h]\t Help
65 [Ctrl-p]\t Print
66 [q]\t Quit
67 """
68
69 from nltk_lite.draw.tree import *
70 from nltk_lite.draw import *
71 from nltk_lite import parse
72 from nltk_lite import tokenize
73 from nltk_lite.draw.cfg import *
74 from nltk_lite.draw.cfg import CFGEditor
75 import string
76 import tkFont
77 from Tkinter import *
78
79
80
82 """
83 A graphical tool for exploring the recursive descent parser. The tool
84 displays the parser's tree and the remaining text, and allows the
85 user to control the parser's operation. In particular, the user
86 can expand subtrees on the frontier, match tokens on the frontier
87 against the text, and backtrack. A "step" button simply steps
88 through the parsing process, performing the operations that
89 C{RecursiveDescentParser} would use.
90 """
91 - def __init__(self, grammar, sent, trace=0):
128
129
130
131
132
134
135 self._sysfont = tkFont.Font(font=Button()["font"])
136 root.option_add("*Font", self._sysfont)
137
138
139 self._size = IntVar(root)
140 self._size.set(self._sysfont.cget('size'))
141
142 self._boldfont = tkFont.Font(family='helvetica', weight='bold',
143 size=self._size.get())
144 self._font = tkFont.Font(family='helvetica',
145 size=self._size.get())
146 if self._size.get() < 0: big = self._size.get()-2
147 else: big = self._size.get()+2
148 self._bigfont = tkFont.Font(family='helvetica', weight='bold',
149 size=big)
150
152
153 self._prodframe = listframe = Frame(parent)
154 self._prodframe.pack(fill='both', side='left', padx=2)
155 self._prodlist_label = Label(self._prodframe, font=self._boldfont,
156 text='Available Expansions')
157 self._prodlist_label.pack()
158 self._prodlist = Listbox(self._prodframe, selectmode='single',
159 relief='groove', background='white',
160 foreground='#909090', font=self._font,
161 selectforeground='#004040',
162 selectbackground='#c0f0c0')
163
164 self._prodlist.pack(side='right', fill='both', expand=1)
165
166 self._productions = list(self._parser.grammar().productions())
167 for production in self._productions:
168 self._prodlist.insert('end', (' %s' % production))
169 self._prodlist.config(height=min(len(self._productions), 25))
170
171
172 if len(self._productions) > 25:
173 listscroll = Scrollbar(self._prodframe,
174 orient='vertical')
175 self._prodlist.config(yscrollcommand = listscroll.set)
176 listscroll.config(command=self._prodlist.yview)
177 listscroll.pack(side='left', fill='y')
178
179
180 self._prodlist.bind('<<ListboxSelect>>', self._prodlist_select)
181
183
184 self._top.bind('<Control-q>', self.destroy)
185 self._top.bind('<Control-x>', self.destroy)
186 self._top.bind('<Escape>', self.destroy)
187 self._top.bind('e', self.expand)
188
189
190 self._top.bind('m', self.match)
191 self._top.bind('<Alt-m>', self.match)
192 self._top.bind('<Control-m>', self.match)
193 self._top.bind('b', self.backtrack)
194 self._top.bind('<Alt-b>', self.backtrack)
195 self._top.bind('<Control-b>', self.backtrack)
196 self._top.bind('<Control-z>', self.backtrack)
197 self._top.bind('<BackSpace>', self.backtrack)
198 self._top.bind('a', self.autostep)
199
200 self._top.bind('<Control-space>', self.autostep)
201 self._top.bind('<Control-c>', self.cancel_autostep)
202 self._top.bind('<space>', self.step)
203 self._top.bind('<Delete>', self.reset)
204 self._top.bind('<Control-p>', self.postscript)
205
206
207 self._top.bind('<Control-h>', self.help)
208 self._top.bind('<F1>', self.help)
209
210
211
212 self._top.bind('<Control-g>', self.edit_grammar)
213 self._top.bind('<Control-t>', self.edit_sentence)
214
234
235
236
237
238
245
247 self._feedbackframe = feedbackframe = Frame(parent)
248 feedbackframe.pack(fill='x', side='bottom', padx=3, pady=3)
249 self._lastoper_label = Label(feedbackframe, text='Last Operation:',
250 font=self._font)
251 self._lastoper_label.pack(side='left')
252 lastoperframe = Frame(feedbackframe, relief='sunken', border=1)
253 lastoperframe.pack(fill='x', side='right', expand=1, padx=5)
254 self._lastoper1 = Label(lastoperframe, foreground='#007070',
255 background='#f0f0f0', font=self._font)
256 self._lastoper2 = Label(lastoperframe, anchor='w', width=30,
257 foreground='#004040', background='#f0f0f0',
258 font=self._font)
259 self._lastoper1.pack(side='left')
260 self._lastoper2.pack(side='left', fill='x', expand=1)
261
263 self._cframe = CanvasFrame(parent, background='white',
264
265 closeenough=10,
266 border=2, relief='sunken')
267 self._cframe.pack(expand=1, fill='both', side='top', pady=2)
268 canvas = self._canvas = self._cframe.canvas()
269
270
271 self._tree = None
272 self._textwidgets = []
273 self._textline = None
274
276 menubar = Menu(parent)
277
278 filemenu = Menu(menubar, tearoff=0)
279 filemenu.add_command(label='Reset Parser', underline=0,
280 command=self.reset, accelerator='Del')
281 filemenu.add_command(label='Print to Postscript', underline=0,
282 command=self.postscript, accelerator='Ctrl-p')
283 filemenu.add_command(label='Exit', underline=1,
284 command=self.destroy, accelerator='Ctrl-x')
285 menubar.add_cascade(label='File', underline=0, menu=filemenu)
286
287 editmenu = Menu(menubar, tearoff=0)
288 editmenu.add_command(label='Edit Grammar', underline=5,
289 command=self.edit_grammar,
290 accelerator='Ctrl-g')
291 editmenu.add_command(label='Edit Text', underline=5,
292 command=self.edit_sentence,
293 accelerator='Ctrl-t')
294 menubar.add_cascade(label='Edit', underline=0, menu=editmenu)
295
296 rulemenu = Menu(menubar, tearoff=0)
297 rulemenu.add_command(label='Step', underline=1,
298 command=self.step, accelerator='Space')
299 rulemenu.add_separator()
300 rulemenu.add_command(label='Match', underline=0,
301 command=self.match, accelerator='Ctrl-m')
302 rulemenu.add_command(label='Expand', underline=0,
303 command=self.expand, accelerator='Ctrl-e')
304 rulemenu.add_separator()
305 rulemenu.add_command(label='Backtrack', underline=0,
306 command=self.backtrack, accelerator='Ctrl-b')
307 menubar.add_cascade(label='Apply', underline=0, menu=rulemenu)
308
309 viewmenu = Menu(menubar, tearoff=0)
310 viewmenu.add_checkbutton(label="Show Grammar", underline=0,
311 variable=self._show_grammar,
312 command=self._toggle_grammar)
313 viewmenu.add_separator()
314 viewmenu.add_radiobutton(label='Tiny', variable=self._size,
315 underline=0, value=10, command=self.resize)
316 viewmenu.add_radiobutton(label='Small', variable=self._size,
317 underline=0, value=12, command=self.resize)
318 viewmenu.add_radiobutton(label='Medium', variable=self._size,
319 underline=0, value=14, command=self.resize)
320 viewmenu.add_radiobutton(label='Large', variable=self._size,
321 underline=0, value=18, command=self.resize)
322 viewmenu.add_radiobutton(label='Huge', variable=self._size,
323 underline=0, value=24, command=self.resize)
324 menubar.add_cascade(label='View', underline=0, menu=viewmenu)
325
326 animatemenu = Menu(menubar, tearoff=0)
327 animatemenu.add_radiobutton(label="No Animation", underline=0,
328 variable=self._animation_frames,
329 value=0)
330 animatemenu.add_radiobutton(label="Slow Animation", underline=0,
331 variable=self._animation_frames,
332 value=10, accelerator='-')
333 animatemenu.add_radiobutton(label="Normal Animation", underline=0,
334 variable=self._animation_frames,
335 value=5, accelerator='=')
336 animatemenu.add_radiobutton(label="Fast Animation", underline=0,
337 variable=self._animation_frames,
338 value=2, accelerator='+')
339 menubar.add_cascade(label="Animate", underline=1, menu=animatemenu)
340
341
342 helpmenu = Menu(menubar, tearoff=0)
343 helpmenu.add_command(label='About', underline=0,
344 command=self.about)
345 helpmenu.add_command(label='Instructions', underline=0,
346 command=self.help, accelerator='F1')
347 menubar.add_cascade(label='Help', underline=0, menu=helpmenu)
348
349 parent.config(menu=menubar)
350
351
352
353
354
355 - def _get(self, widget, treeloc):
360
361
362
363
364
366 canvas = self._canvas
367
368
369 if self._tree is not None:
370 self._cframe.destroy_widget(self._tree)
371 for twidget in self._textwidgets:
372 self._cframe.destroy_widget(twidget)
373 if self._textline is not None:
374 self._canvas.delete(self._textline)
375
376
377 helv = ('helvetica', -self._size.get())
378 bold = ('helvetica', -self._size.get(), 'bold')
379 attribs = {'tree_color': '#000000', 'tree_width': 2,
380 'node_font': bold, 'leaf_font': helv,}
381 tree = self._parser.tree()
382 self._tree = tree_to_treesegment(canvas, tree, **attribs)
383 self._cframe.add_widget(self._tree, 30, 5)
384
385
386 helv = ('helvetica', -self._size.get())
387 bottom = y = self._cframe.scrollregion()[3]
388 self._textwidgets = [TextWidget(canvas, word, font=self._font)
389 for word in self._sent]
390 for twidget in self._textwidgets:
391 self._cframe.add_widget(twidget, 0, 0)
392 twidget.move(0, bottom-twidget.bbox()[3]-5)
393 y = min(y, twidget.bbox()[1])
394
395
396 self._textline = canvas.create_line(-5000, y-5, 5000, y-5, dash='.')
397
398
399 self._highlight_nodes()
400 self._highlight_prodlist()
401
402
403 self._position_text()
404
405
411
413
414 bold = ('helvetica', -self._size.get(), 'bold')
415 for treeloc in self._parser.frontier()[:1]:
416 self._get(self._tree, treeloc)['color'] = '#20a050'
417 self._get(self._tree, treeloc)['font'] = bold
418 for treeloc in self._parser.frontier()[1:]:
419 self._get(self._tree, treeloc)['color'] = '#008080'
420
439
440 - def _position_text(self):
441
442 numwords = len(self._sent)
443 num_matched = numwords - len(self._parser.remaining_text())
444 leaves = self._tree_leaves()[:num_matched]
445 xmax = self._tree.bbox()[0]
446 for i in range(0, len(leaves)):
447 widget = self._textwidgets[i]
448 leaf = leaves[i]
449 widget['color'] = '#006040'
450 leaf['color'] = '#006040'
451 widget.move(leaf.bbox()[0] - widget.bbox()[0], 0)
452 xmax = widget.bbox()[2] + 10
453
454
455 for i in range(len(leaves), numwords):
456 widget = self._textwidgets[i]
457 widget['color'] = '#a0a0a0'
458 widget.move(xmax - widget.bbox()[0], 0)
459 xmax = widget.bbox()[2] + 10
460
461
462 if self._parser.currently_complete():
463 for twidget in self._textwidgets:
464 twidget['color'] = '#00a000'
465
466
467 for i in range(0, len(leaves)):
468 widget = self._textwidgets[i]
469 leaf = leaves[i]
470 dy = widget.bbox()[1] - leaf.bbox()[3] - 10.0
471 dy = max(dy, leaf.parent().node().bbox()[3] - leaf.bbox()[3] + 10)
472 leaf.move(0, dy)
473
482
483
484
485
486
488 self._autostep = 0
489 if self._top is None: return
490 self._top.destroy()
491 self._top = None
492
494 self._autostep = 0
495 self._parser.initialize(self._sent)
496 self._lastoper1['text'] = 'Reset Demo'
497 self._lastoper2['text'] = ''
498 self._redraw()
499
501 if self._animation_frames.get() == 0:
502 self._animation_frames.set(2)
503 if self._autostep:
504 self._autostep = 0
505 else:
506 self._autostep = 1
507 self._step()
508
510
511 self._autostep = 0
512
513
514 - def step(self, *e): self._autostep = 0; self._step()
515 - def match(self, *e): self._autostep = 0; self._match()
518
520 if self._animating_lock: return
521
522
523 if self._expand(): pass
524 elif self._parser.untried_match() and self._match(): pass
525 elif self._backtrack(): pass
526 else:
527 self._lastoper1['text'] = 'Finished'
528 self._lastoper2['text'] = ''
529 self._autostep = 0
530
531
532 if self._parser.currently_complete():
533 self._autostep = 0
534 self._lastoper2['text'] += ' [COMPLETE PARSE]'
535
537 if self._animating_lock: return
538 old_frontier = self._parser.frontier()
539 rv = self._parser.expand()
540 if rv is not None:
541 self._lastoper1['text'] = 'Expand:'
542 self._lastoper2['text'] = rv
543 self._prodlist.selection_clear(0, 'end')
544 index = self._productions.index(rv)
545 self._prodlist.selection_set(index)
546 self._animate_expand(old_frontier[0])
547 return 1
548 else:
549 self._lastoper1['text'] = 'Expand:'
550 self._lastoper2['text'] = '(all expansions tried)'
551 return 0
552
554 if self._animating_lock: return
555 old_frontier = self._parser.frontier()
556 rv = self._parser.match()
557 if rv is not None:
558 self._lastoper1['text'] = 'Match:'
559 self._lastoper2['text'] = rv
560 self._animate_match(old_frontier[0])
561 return 1
562 else:
563 self._lastoper1['text'] = 'Match:'
564 self._lastoper2['text'] = '(failed)'
565 return 0
566
585
587 ABOUT = ("NLTK Recursive Descent Parser Demo\n"+
588 "Written by Edward Loper")
589 TITLE = 'About: Recursive Descent Parser Demo'
590 try:
591 from tkMessageBox import Message
592 Message(message=ABOUT, title=TITLE).show()
593 except:
594 ShowText(self._top, TITLE, ABOUT)
595
596 - def help(self, *e):
597 self._autostep = 0
598
599 try:
600 ShowText(self._top, 'Help: Recursive Descent Parser Demo',
601 (__doc__).strip(), width=75, font='fixed')
602 except:
603 ShowText(self._top, 'Help: Recursive Descent Parser Demo',
604 (__doc__).strip(), width=75)
605
606 - def postscript(self, *e):
607 self._autostep = 0
608 self._cframe.print_to_file()
609
610 - def mainloop(self, *args, **kwargs):
611 """
612 Enter the Tkinter mainloop. This function must be called if
613 this demo is created from a non-interactive program (e.g.
614 from a secript); otherwise, the demo will close as soon as
615 the script completes.
616 """
617 if in_idle(): return
618 self._top.mainloop(*args, **kwargs)
619
628
629
630
631
632
634 if self._show_grammar.get():
635 self._prodframe.pack(fill='both', side='left', padx=2,
636 after=self._feedbackframe)
637 self._lastoper1['text'] = 'Show Grammar'
638 else:
639 self._prodframe.pack_forget()
640 self._lastoper1['text'] = 'Hide Grammar'
641 self._lastoper2['text'] = ''
642
643
644
645
646
647
648
649
650
651
652
653
673
674
675
676
677
679 oldwidget = self._get(self._tree, treeloc)
680 oldtree = oldwidget.parent()
681 top = not isinstance(oldtree.parent(), TreeSegmentWidget)
682
683 tree = self._parser.tree()
684 for i in treeloc:
685 tree = tree[i]
686
687 widget = tree_to_treesegment(self._canvas, tree,
688 node_font=self._boldfont,
689 leaf_color='white',
690 tree_width=2, tree_color='white',
691 node_color='white',
692 leaf_font=self._font)
693 widget.node()['color'] = '#20a050'
694
695 (oldx, oldy) = oldtree.node().bbox()[:2]
696 (newx, newy) = widget.node().bbox()[:2]
697 widget.move(oldx-newx, oldy-newy)
698
699 if top:
700 self._cframe.add_widget(widget, 0, 5)
701 widget.move(30-widget.node().bbox()[0], 0)
702 self._tree = widget
703 else:
704 oldtree.parent().replace_child(oldtree, widget)
705
706
707
708 if widget.subtrees():
709 dx = (oldx + widget.node().width()/2 -
710 widget.subtrees()[0].bbox()[0]/2 -
711 widget.subtrees()[0].bbox()[2]/2)
712 for subtree in widget.subtrees(): subtree.move(dx, 0)
713
714 self._makeroom(widget)
715
716 if top:
717 self._cframe.destroy_widget(oldtree)
718 else:
719 oldtree.destroy()
720
721 colors = ['gray%d' % (10*int(10*x/self._animation_frames.get()))
722 for x in range(self._animation_frames.get(),0,-1)]
723
724
725 dy = widget.bbox()[3] + 30 - self._canvas.coords(self._textline)[1]
726 if dy > 0:
727 for twidget in self._textwidgets: twidget.move(0, dy)
728 self._canvas.move(self._textline, 0, dy)
729
730 self._animate_expand_frame(widget, colors)
731
755
757 if len(colors) > 0:
758 self._animating_lock = 1
759 widget['color'] = colors[0]
760 for subtree in widget.subtrees():
761 if isinstance(subtree, TreeSegmentWidget):
762 subtree.node()['color'] = colors[0]
763 else:
764 subtree['color'] = colors[0]
765 self._top.after(50, self._animate_expand_frame,
766 widget, colors[1:])
767 else:
768 widget['color'] = 'black'
769 for subtree in widget.subtrees():
770 if isinstance(subtree, TreeSegmentWidget):
771 subtree.node()['color'] = 'black'
772 else:
773 subtree['color'] = 'black'
774 self._redraw_quick()
775 widget.node()['color'] = 'black'
776 self._animating_lock = 0
777 if self._autostep: self._step()
778
794
808
816
818 widget = self._get(self._tree, treeloc)
819
820 dy = ((self._textwidgets[0].bbox()[1] - widget.bbox()[3] - 10.0) /
821 max(1, self._animation_frames.get()))
822 self._animate_match_frame(self._animation_frames.get(), widget, dy)
823
825 if frame > 0:
826 self._animating_lock = 1
827 widget.move(0, dy)
828 self._top.after(10, self._animate_match_frame,
829 frame-1, widget, dy)
830 else:
831 widget['color'] = '#006040'
832 self._redraw_quick()
833 self._animating_lock = 0
834 if self._autostep: self._step()
835
847
850
857
859 sentence = string.join(self._sent)
860 title = 'Edit Text'
861 instr = 'Enter a new sentence to parse.'
862 EntryDialog(self._top, sentence, instr, self.set_sentence, title)
863
867
869 """
870 Create a recursive descent parser demo, using a simple grammar and
871 text.
872 """
873 from nltk_lite.parse import cfg
874 grammar = cfg.parse_cfg("""
875 # Grammatical productions.
876 S -> NP VP
877 NP -> Det N PP | Det N
878 VP -> V NP PP | V NP | V
879 PP -> P NP
880 # Lexical productions.
881 NP -> 'I'
882 Det -> 'the' | 'a'
883 N -> 'man' | 'park' | 'dog' | 'telescope'
884 V -> 'ate' | 'saw'
885 P -> 'in' | 'under' | 'with'
886 """)
887
888 sent = list(tokenize.whitespace('the dog saw a man in the park'))
889
890 RecursiveDescentDemo(grammar, sent).mainloop()
891
892 if __name__ == '__main__': demo()
893