regex_syntax/ast/
visitor.rs

1use std::fmt;
2
3use ast::{self, Ast};
4
5/// A trait for visiting an abstract syntax tree (AST) in depth first order.
6///
7/// The principle aim of this trait is to enable callers to perform case
8/// analysis on an abstract syntax tree without necessarily using recursion.
9/// In particular, this permits callers to do case analysis with constant stack
10/// usage, which can be important since the size of an abstract syntax tree
11/// may be proportional to end user input.
12///
13/// Typical usage of this trait involves providing an implementation and then
14/// running it using the [`visit`](fn.visit.html) function.
15///
16/// Note that the abstract syntax tree for a regular expression is quite
17/// complex. Unless you specifically need it, you might be able to use the
18/// much simpler
19/// [high-level intermediate representation](../hir/struct.Hir.html)
20/// and its
21/// [corresponding `Visitor` trait](../hir/trait.Visitor.html)
22/// instead.
23pub trait Visitor {
24    /// The result of visiting an AST.
25    type Output;
26    /// An error that visiting an AST might return.
27    type Err;
28
29    /// All implementors of `Visitor` must provide a `finish` method, which
30    /// yields the result of visiting the AST or an error.
31    fn finish(self) -> Result<Self::Output, Self::Err>;
32
33    /// This method is called before beginning traversal of the AST.
34    fn start(&mut self) {}
35
36    /// This method is called on an `Ast` before descending into child `Ast`
37    /// nodes.
38    fn visit_pre(&mut self, _ast: &Ast) -> Result<(), Self::Err> {
39        Ok(())
40    }
41
42    /// This method is called on an `Ast` after descending all of its child
43    /// `Ast` nodes.
44    fn visit_post(&mut self, _ast: &Ast) -> Result<(), Self::Err> {
45        Ok(())
46    }
47
48    /// This method is called between child nodes of an
49    /// [`Alternation`](struct.Alternation.html).
50    fn visit_alternation_in(&mut self) -> Result<(), Self::Err> {
51        Ok(())
52    }
53
54    /// This method is called on every
55    /// [`ClassSetItem`](enum.ClassSetItem.html)
56    /// before descending into child nodes.
57    fn visit_class_set_item_pre(
58        &mut self,
59        _ast: &ast::ClassSetItem,
60    ) -> Result<(), Self::Err> {
61        Ok(())
62    }
63
64    /// This method is called on every
65    /// [`ClassSetItem`](enum.ClassSetItem.html)
66    /// after descending into child nodes.
67    fn visit_class_set_item_post(
68        &mut self,
69        _ast: &ast::ClassSetItem,
70    ) -> Result<(), Self::Err> {
71        Ok(())
72    }
73
74    /// This method is called on every
75    /// [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html)
76    /// before descending into child nodes.
77    fn visit_class_set_binary_op_pre(
78        &mut self,
79        _ast: &ast::ClassSetBinaryOp,
80    ) -> Result<(), Self::Err> {
81        Ok(())
82    }
83
84    /// This method is called on every
85    /// [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html)
86    /// after descending into child nodes.
87    fn visit_class_set_binary_op_post(
88        &mut self,
89        _ast: &ast::ClassSetBinaryOp,
90    ) -> Result<(), Self::Err> {
91        Ok(())
92    }
93
94    /// This method is called between the left hand and right hand child nodes
95    /// of a [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html).
96    fn visit_class_set_binary_op_in(
97        &mut self,
98        _ast: &ast::ClassSetBinaryOp,
99    ) -> Result<(), Self::Err> {
100        Ok(())
101    }
102}
103
104/// Executes an implementation of `Visitor` in constant stack space.
105///
106/// This function will visit every node in the given `Ast` while calling the
107/// appropriate methods provided by the
108/// [`Visitor`](trait.Visitor.html) trait.
109///
110/// The primary use case for this method is when one wants to perform case
111/// analysis over an `Ast` without using a stack size proportional to the depth
112/// of the `Ast`. Namely, this method will instead use constant stack size, but
113/// will use heap space proportional to the size of the `Ast`. This may be
114/// desirable in cases where the size of `Ast` is proportional to end user
115/// input.
116///
117/// If the visitor returns an error at any point, then visiting is stopped and
118/// the error is returned.
119pub fn visit<V: Visitor>(ast: &Ast, visitor: V) -> Result<V::Output, V::Err> {
120    HeapVisitor::new().visit(ast, visitor)
121}
122
123/// HeapVisitor visits every item in an `Ast` recursively using constant stack
124/// size and a heap size proportional to the size of the `Ast`.
125struct HeapVisitor<'a> {
126    /// A stack of `Ast` nodes. This is roughly analogous to the call stack
127    /// used in a typical recursive visitor.
128    stack: Vec<(&'a Ast, Frame<'a>)>,
129    /// Similar to the `Ast` stack above, but is used only for character
130    /// classes. In particular, character classes embed their own mini
131    /// recursive syntax.
132    stack_class: Vec<(ClassInduct<'a>, ClassFrame<'a>)>,
133}
134
135/// Represents a single stack frame while performing structural induction over
136/// an `Ast`.
137enum Frame<'a> {
138    /// A stack frame allocated just before descending into a repetition
139    /// operator's child node.
140    Repetition(&'a ast::Repetition),
141    /// A stack frame allocated just before descending into a group's child
142    /// node.
143    Group(&'a ast::Group),
144    /// The stack frame used while visiting every child node of a concatenation
145    /// of expressions.
146    Concat {
147        /// The child node we are currently visiting.
148        head: &'a Ast,
149        /// The remaining child nodes to visit (which may be empty).
150        tail: &'a [Ast],
151    },
152    /// The stack frame used while visiting every child node of an alternation
153    /// of expressions.
154    Alternation {
155        /// The child node we are currently visiting.
156        head: &'a Ast,
157        /// The remaining child nodes to visit (which may be empty).
158        tail: &'a [Ast],
159    },
160}
161
162/// Represents a single stack frame while performing structural induction over
163/// a character class.
164enum ClassFrame<'a> {
165    /// The stack frame used while visiting every child node of a union of
166    /// character class items.
167    Union {
168        /// The child node we are currently visiting.
169        head: &'a ast::ClassSetItem,
170        /// The remaining child nodes to visit (which may be empty).
171        tail: &'a [ast::ClassSetItem],
172    },
173    /// The stack frame used while a binary class operation.
174    Binary { op: &'a ast::ClassSetBinaryOp },
175    /// A stack frame allocated just before descending into a binary operator's
176    /// left hand child node.
177    BinaryLHS {
178        op: &'a ast::ClassSetBinaryOp,
179        lhs: &'a ast::ClassSet,
180        rhs: &'a ast::ClassSet,
181    },
182    /// A stack frame allocated just before descending into a binary operator's
183    /// right hand child node.
184    BinaryRHS { op: &'a ast::ClassSetBinaryOp, rhs: &'a ast::ClassSet },
185}
186
187/// A representation of the inductive step when performing structural induction
188/// over a character class.
189///
190/// Note that there is no analogous explicit type for the inductive step for
191/// `Ast` nodes because the inductive step is just an `Ast`. For character
192/// classes, the inductive step can produce one of two possible child nodes:
193/// an item or a binary operation. (An item cannot be a binary operation
194/// because that would imply binary operations can be unioned in the concrete
195/// syntax, which is not possible.)
196enum ClassInduct<'a> {
197    Item(&'a ast::ClassSetItem),
198    BinaryOp(&'a ast::ClassSetBinaryOp),
199}
200
201impl<'a> HeapVisitor<'a> {
202    fn new() -> HeapVisitor<'a> {
203        HeapVisitor { stack: vec![], stack_class: vec![] }
204    }
205
206    fn visit<V: Visitor>(
207        &mut self,
208        mut ast: &'a Ast,
209        mut visitor: V,
210    ) -> Result<V::Output, V::Err> {
211        self.stack.clear();
212        self.stack_class.clear();
213
214        visitor.start();
215        loop {
216            visitor.visit_pre(ast)?;
217            if let Some(x) = self.induct(ast, &mut visitor)? {
218                let child = x.child();
219                self.stack.push((ast, x));
220                ast = child;
221                continue;
222            }
223            // No induction means we have a base case, so we can post visit
224            // it now.
225            visitor.visit_post(ast)?;
226
227            // At this point, we now try to pop our call stack until it is
228            // either empty or we hit another inductive case.
229            loop {
230                let (post_ast, frame) = match self.stack.pop() {
231                    None => return visitor.finish(),
232                    Some((post_ast, frame)) => (post_ast, frame),
233                };
234                // If this is a concat/alternate, then we might have additional
235                // inductive steps to process.
236                if let Some(x) = self.pop(frame) {
237                    if let Frame::Alternation { .. } = x {
238                        visitor.visit_alternation_in()?;
239                    }
240                    ast = x.child();
241                    self.stack.push((post_ast, x));
242                    break;
243                }
244                // Otherwise, we've finished visiting all the child nodes for
245                // this AST, so we can post visit it now.
246                visitor.visit_post(post_ast)?;
247            }
248        }
249    }
250
251    /// Build a stack frame for the given AST if one is needed (which occurs if
252    /// and only if there are child nodes in the AST). Otherwise, return None.
253    ///
254    /// If this visits a class, then the underlying visitor implementation may
255    /// return an error which will be passed on here.
256    fn induct<V: Visitor>(
257        &mut self,
258        ast: &'a Ast,
259        visitor: &mut V,
260    ) -> Result<Option<Frame<'a>>, V::Err> {
261        Ok(match *ast {
262            Ast::Class(ast::Class::Bracketed(ref x)) => {
263                self.visit_class(x, visitor)?;
264                None
265            }
266            Ast::Repetition(ref x) => Some(Frame::Repetition(x)),
267            Ast::Group(ref x) => Some(Frame::Group(x)),
268            Ast::Concat(ref x) if x.asts.is_empty() => None,
269            Ast::Concat(ref x) => {
270                Some(Frame::Concat { head: &x.asts[0], tail: &x.asts[1..] })
271            }
272            Ast::Alternation(ref x) if x.asts.is_empty() => None,
273            Ast::Alternation(ref x) => Some(Frame::Alternation {
274                head: &x.asts[0],
275                tail: &x.asts[1..],
276            }),
277            _ => None,
278        })
279    }
280
281    /// Pops the given frame. If the frame has an additional inductive step,
282    /// then return it, otherwise return `None`.
283    fn pop(&self, induct: Frame<'a>) -> Option<Frame<'a>> {
284        match induct {
285            Frame::Repetition(_) => None,
286            Frame::Group(_) => None,
287            Frame::Concat { tail, .. } => {
288                if tail.is_empty() {
289                    None
290                } else {
291                    Some(Frame::Concat { head: &tail[0], tail: &tail[1..] })
292                }
293            }
294            Frame::Alternation { tail, .. } => {
295                if tail.is_empty() {
296                    None
297                } else {
298                    Some(Frame::Alternation {
299                        head: &tail[0],
300                        tail: &tail[1..],
301                    })
302                }
303            }
304        }
305    }
306
307    fn visit_class<V: Visitor>(
308        &mut self,
309        ast: &'a ast::ClassBracketed,
310        visitor: &mut V,
311    ) -> Result<(), V::Err> {
312        let mut ast = ClassInduct::from_bracketed(ast);
313        loop {
314            self.visit_class_pre(&ast, visitor)?;
315            if let Some(x) = self.induct_class(&ast) {
316                let child = x.child();
317                self.stack_class.push((ast, x));
318                ast = child;
319                continue;
320            }
321            self.visit_class_post(&ast, visitor)?;
322
323            // At this point, we now try to pop our call stack until it is
324            // either empty or we hit another inductive case.
325            loop {
326                let (post_ast, frame) = match self.stack_class.pop() {
327                    None => return Ok(()),
328                    Some((post_ast, frame)) => (post_ast, frame),
329                };
330                // If this is a union or a binary op, then we might have
331                // additional inductive steps to process.
332                if let Some(x) = self.pop_class(frame) {
333                    if let ClassFrame::BinaryRHS { ref op, .. } = x {
334                        visitor.visit_class_set_binary_op_in(op)?;
335                    }
336                    ast = x.child();
337                    self.stack_class.push((post_ast, x));
338                    break;
339                }
340                // Otherwise, we've finished visiting all the child nodes for
341                // this class node, so we can post visit it now.
342                self.visit_class_post(&post_ast, visitor)?;
343            }
344        }
345    }
346
347    /// Call the appropriate `Visitor` methods given an inductive step.
348    fn visit_class_pre<V: Visitor>(
349        &self,
350        ast: &ClassInduct<'a>,
351        visitor: &mut V,
352    ) -> Result<(), V::Err> {
353        match *ast {
354            ClassInduct::Item(item) => {
355                visitor.visit_class_set_item_pre(item)?;
356            }
357            ClassInduct::BinaryOp(op) => {
358                visitor.visit_class_set_binary_op_pre(op)?;
359            }
360        }
361        Ok(())
362    }
363
364    /// Call the appropriate `Visitor` methods given an inductive step.
365    fn visit_class_post<V: Visitor>(
366        &self,
367        ast: &ClassInduct<'a>,
368        visitor: &mut V,
369    ) -> Result<(), V::Err> {
370        match *ast {
371            ClassInduct::Item(item) => {
372                visitor.visit_class_set_item_post(item)?;
373            }
374            ClassInduct::BinaryOp(op) => {
375                visitor.visit_class_set_binary_op_post(op)?;
376            }
377        }
378        Ok(())
379    }
380
381    /// Build a stack frame for the given class node if one is needed (which
382    /// occurs if and only if there are child nodes). Otherwise, return None.
383    fn induct_class(&self, ast: &ClassInduct<'a>) -> Option<ClassFrame<'a>> {
384        match *ast {
385            ClassInduct::Item(&ast::ClassSetItem::Bracketed(ref x)) => {
386                match x.kind {
387                    ast::ClassSet::Item(ref item) => {
388                        Some(ClassFrame::Union { head: item, tail: &[] })
389                    }
390                    ast::ClassSet::BinaryOp(ref op) => {
391                        Some(ClassFrame::Binary { op: op })
392                    }
393                }
394            }
395            ClassInduct::Item(&ast::ClassSetItem::Union(ref x)) => {
396                if x.items.is_empty() {
397                    None
398                } else {
399                    Some(ClassFrame::Union {
400                        head: &x.items[0],
401                        tail: &x.items[1..],
402                    })
403                }
404            }
405            ClassInduct::BinaryOp(op) => Some(ClassFrame::BinaryLHS {
406                op: op,
407                lhs: &op.lhs,
408                rhs: &op.rhs,
409            }),
410            _ => None,
411        }
412    }
413
414    /// Pops the given frame. If the frame has an additional inductive step,
415    /// then return it, otherwise return `None`.
416    fn pop_class(&self, induct: ClassFrame<'a>) -> Option<ClassFrame<'a>> {
417        match induct {
418            ClassFrame::Union { tail, .. } => {
419                if tail.is_empty() {
420                    None
421                } else {
422                    Some(ClassFrame::Union {
423                        head: &tail[0],
424                        tail: &tail[1..],
425                    })
426                }
427            }
428            ClassFrame::Binary { .. } => None,
429            ClassFrame::BinaryLHS { op, rhs, .. } => {
430                Some(ClassFrame::BinaryRHS { op: op, rhs: rhs })
431            }
432            ClassFrame::BinaryRHS { .. } => None,
433        }
434    }
435}
436
437impl<'a> Frame<'a> {
438    /// Perform the next inductive step on this frame and return the next
439    /// child AST node to visit.
440    fn child(&self) -> &'a Ast {
441        match *self {
442            Frame::Repetition(rep) => &rep.ast,
443            Frame::Group(group) => &group.ast,
444            Frame::Concat { head, .. } => head,
445            Frame::Alternation { head, .. } => head,
446        }
447    }
448}
449
450impl<'a> ClassFrame<'a> {
451    /// Perform the next inductive step on this frame and return the next
452    /// child class node to visit.
453    fn child(&self) -> ClassInduct<'a> {
454        match *self {
455            ClassFrame::Union { head, .. } => ClassInduct::Item(head),
456            ClassFrame::Binary { op, .. } => ClassInduct::BinaryOp(op),
457            ClassFrame::BinaryLHS { ref lhs, .. } => {
458                ClassInduct::from_set(lhs)
459            }
460            ClassFrame::BinaryRHS { ref rhs, .. } => {
461                ClassInduct::from_set(rhs)
462            }
463        }
464    }
465}
466
467impl<'a> ClassInduct<'a> {
468    fn from_bracketed(ast: &'a ast::ClassBracketed) -> ClassInduct<'a> {
469        ClassInduct::from_set(&ast.kind)
470    }
471
472    fn from_set(ast: &'a ast::ClassSet) -> ClassInduct<'a> {
473        match *ast {
474            ast::ClassSet::Item(ref item) => ClassInduct::Item(item),
475            ast::ClassSet::BinaryOp(ref op) => ClassInduct::BinaryOp(op),
476        }
477    }
478}
479
480impl<'a> fmt::Debug for ClassFrame<'a> {
481    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
482        let x = match *self {
483            ClassFrame::Union { .. } => "Union",
484            ClassFrame::Binary { .. } => "Binary",
485            ClassFrame::BinaryLHS { .. } => "BinaryLHS",
486            ClassFrame::BinaryRHS { .. } => "BinaryRHS",
487        };
488        write!(f, "{}", x)
489    }
490}
491
492impl<'a> fmt::Debug for ClassInduct<'a> {
493    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
494        let x = match *self {
495            ClassInduct::Item(it) => match *it {
496                ast::ClassSetItem::Empty(_) => "Item(Empty)",
497                ast::ClassSetItem::Literal(_) => "Item(Literal)",
498                ast::ClassSetItem::Range(_) => "Item(Range)",
499                ast::ClassSetItem::Ascii(_) => "Item(Ascii)",
500                ast::ClassSetItem::Perl(_) => "Item(Perl)",
501                ast::ClassSetItem::Unicode(_) => "Item(Unicode)",
502                ast::ClassSetItem::Bracketed(_) => "Item(Bracketed)",
503                ast::ClassSetItem::Union(_) => "Item(Union)",
504            },
505            ClassInduct::BinaryOp(it) => match it.kind {
506                ast::ClassSetBinaryOpKind::Intersection => {
507                    "BinaryOp(Intersection)"
508                }
509                ast::ClassSetBinaryOpKind::Difference => {
510                    "BinaryOp(Difference)"
511                }
512                ast::ClassSetBinaryOpKind::SymmetricDifference => {
513                    "BinaryOp(SymmetricDifference)"
514                }
515            },
516        };
517        write!(f, "{}", x)
518    }
519}