regex_syntax/hir/
visitor.rs

1use hir::{self, Hir, HirKind};
2
3/// A trait for visiting the high-level IR (HIR) in depth first order.
4///
5/// The principle aim of this trait is to enable callers to perform case
6/// analysis on a high-level intermediate representation of a regular
7/// expression without necessarily using recursion. In particular, this permits
8/// callers to do case analysis with constant stack usage, which can be
9/// important since the size of an HIR may be proportional to end user input.
10///
11/// Typical usage of this trait involves providing an implementation and then
12/// running it using the [`visit`](fn.visit.html) function.
13pub trait Visitor {
14    /// The result of visiting an HIR.
15    type Output;
16    /// An error that visiting an HIR might return.
17    type Err;
18
19    /// All implementors of `Visitor` must provide a `finish` method, which
20    /// yields the result of visiting the HIR or an error.
21    fn finish(self) -> Result<Self::Output, Self::Err>;
22
23    /// This method is called before beginning traversal of the HIR.
24    fn start(&mut self) {}
25
26    /// This method is called on an `Hir` before descending into child `Hir`
27    /// nodes.
28    fn visit_pre(&mut self, _hir: &Hir) -> Result<(), Self::Err> {
29        Ok(())
30    }
31
32    /// This method is called on an `Hir` after descending all of its child
33    /// `Hir` nodes.
34    fn visit_post(&mut self, _hir: &Hir) -> Result<(), Self::Err> {
35        Ok(())
36    }
37
38    /// This method is called between child nodes of an alternation.
39    fn visit_alternation_in(&mut self) -> Result<(), Self::Err> {
40        Ok(())
41    }
42}
43
44/// Executes an implementation of `Visitor` in constant stack space.
45///
46/// This function will visit every node in the given `Hir` while calling
47/// appropriate methods provided by the
48/// [`Visitor`](trait.Visitor.html) trait.
49///
50/// The primary use case for this method is when one wants to perform case
51/// analysis over an `Hir` without using a stack size proportional to the depth
52/// of the `Hir`. Namely, this method will instead use constant stack space,
53/// but will use heap space proportional to the size of the `Hir`. This may be
54/// desirable in cases where the size of `Hir` is proportional to end user
55/// input.
56///
57/// If the visitor returns an error at any point, then visiting is stopped and
58/// the error is returned.
59pub fn visit<V: Visitor>(hir: &Hir, visitor: V) -> Result<V::Output, V::Err> {
60    HeapVisitor::new().visit(hir, visitor)
61}
62
63/// HeapVisitor visits every item in an `Hir` recursively using constant stack
64/// size and a heap size proportional to the size of the `Hir`.
65struct HeapVisitor<'a> {
66    /// A stack of `Hir` nodes. This is roughly analogous to the call stack
67    /// used in a typical recursive visitor.
68    stack: Vec<(&'a Hir, Frame<'a>)>,
69}
70
71/// Represents a single stack frame while performing structural induction over
72/// an `Hir`.
73enum Frame<'a> {
74    /// A stack frame allocated just before descending into a repetition
75    /// operator's child node.
76    Repetition(&'a hir::Repetition),
77    /// A stack frame allocated just before descending into a group's child
78    /// node.
79    Group(&'a hir::Group),
80    /// The stack frame used while visiting every child node of a concatenation
81    /// of expressions.
82    Concat {
83        /// The child node we are currently visiting.
84        head: &'a Hir,
85        /// The remaining child nodes to visit (which may be empty).
86        tail: &'a [Hir],
87    },
88    /// The stack frame used while visiting every child node of an alternation
89    /// of expressions.
90    Alternation {
91        /// The child node we are currently visiting.
92        head: &'a Hir,
93        /// The remaining child nodes to visit (which may be empty).
94        tail: &'a [Hir],
95    },
96}
97
98impl<'a> HeapVisitor<'a> {
99    fn new() -> HeapVisitor<'a> {
100        HeapVisitor { stack: vec![] }
101    }
102
103    fn visit<V: Visitor>(
104        &mut self,
105        mut hir: &'a Hir,
106        mut visitor: V,
107    ) -> Result<V::Output, V::Err> {
108        self.stack.clear();
109
110        visitor.start();
111        loop {
112            visitor.visit_pre(hir)?;
113            if let Some(x) = self.induct(hir) {
114                let child = x.child();
115                self.stack.push((hir, x));
116                hir = child;
117                continue;
118            }
119            // No induction means we have a base case, so we can post visit
120            // it now.
121            visitor.visit_post(hir)?;
122
123            // At this point, we now try to pop our call stack until it is
124            // either empty or we hit another inductive case.
125            loop {
126                let (post_hir, frame) = match self.stack.pop() {
127                    None => return visitor.finish(),
128                    Some((post_hir, frame)) => (post_hir, frame),
129                };
130                // If this is a concat/alternate, then we might have additional
131                // inductive steps to process.
132                if let Some(x) = self.pop(frame) {
133                    if let Frame::Alternation { .. } = x {
134                        visitor.visit_alternation_in()?;
135                    }
136                    hir = x.child();
137                    self.stack.push((post_hir, x));
138                    break;
139                }
140                // Otherwise, we've finished visiting all the child nodes for
141                // this HIR, so we can post visit it now.
142                visitor.visit_post(post_hir)?;
143            }
144        }
145    }
146
147    /// Build a stack frame for the given HIR if one is needed (which occurs if
148    /// and only if there are child nodes in the HIR). Otherwise, return None.
149    fn induct(&mut self, hir: &'a Hir) -> Option<Frame<'a>> {
150        match *hir.kind() {
151            HirKind::Repetition(ref x) => Some(Frame::Repetition(x)),
152            HirKind::Group(ref x) => Some(Frame::Group(x)),
153            HirKind::Concat(ref x) if x.is_empty() => None,
154            HirKind::Concat(ref x) => {
155                Some(Frame::Concat { head: &x[0], tail: &x[1..] })
156            }
157            HirKind::Alternation(ref x) if x.is_empty() => None,
158            HirKind::Alternation(ref x) => {
159                Some(Frame::Alternation { head: &x[0], tail: &x[1..] })
160            }
161            _ => None,
162        }
163    }
164
165    /// Pops the given frame. If the frame has an additional inductive step,
166    /// then return it, otherwise return `None`.
167    fn pop(&self, induct: Frame<'a>) -> Option<Frame<'a>> {
168        match induct {
169            Frame::Repetition(_) => None,
170            Frame::Group(_) => None,
171            Frame::Concat { tail, .. } => {
172                if tail.is_empty() {
173                    None
174                } else {
175                    Some(Frame::Concat { head: &tail[0], tail: &tail[1..] })
176                }
177            }
178            Frame::Alternation { tail, .. } => {
179                if tail.is_empty() {
180                    None
181                } else {
182                    Some(Frame::Alternation {
183                        head: &tail[0],
184                        tail: &tail[1..],
185                    })
186                }
187            }
188        }
189    }
190}
191
192impl<'a> Frame<'a> {
193    /// Perform the next inductive step on this frame and return the next
194    /// child HIR node to visit.
195    fn child(&self) -> &'a Hir {
196        match *self {
197            Frame::Repetition(rep) => &rep.hir,
198            Frame::Group(group) => &group.hir,
199            Frame::Concat { head, .. } => head,
200            Frame::Alternation { head, .. } => head,
201        }
202    }
203}