regex_syntax/ast/
print.rs

1/*!
2This module provides a regular expression printer for `Ast`.
3*/
4
5use std::fmt;
6
7use ast::visitor::{self, Visitor};
8use ast::{self, Ast};
9
10/// A builder for constructing a printer.
11///
12/// Note that since a printer doesn't have any configuration knobs, this type
13/// remains unexported.
14#[derive(Clone, Debug)]
15struct PrinterBuilder {
16    _priv: (),
17}
18
19impl Default for PrinterBuilder {
20    fn default() -> PrinterBuilder {
21        PrinterBuilder::new()
22    }
23}
24
25impl PrinterBuilder {
26    fn new() -> PrinterBuilder {
27        PrinterBuilder { _priv: () }
28    }
29
30    fn build(&self) -> Printer {
31        Printer { _priv: () }
32    }
33}
34
35/// A printer for a regular expression abstract syntax tree.
36///
37/// A printer converts an abstract syntax tree (AST) to a regular expression
38/// pattern string. This particular printer uses constant stack space and heap
39/// space proportional to the size of the AST.
40///
41/// This printer will not necessarily preserve the original formatting of the
42/// regular expression pattern string. For example, all whitespace and comments
43/// are ignored.
44#[derive(Debug)]
45pub struct Printer {
46    _priv: (),
47}
48
49impl Printer {
50    /// Create a new printer.
51    pub fn new() -> Printer {
52        PrinterBuilder::new().build()
53    }
54
55    /// Print the given `Ast` to the given writer. The writer must implement
56    /// `fmt::Write`. Typical implementations of `fmt::Write` that can be used
57    /// here are a `fmt::Formatter` (which is available in `fmt::Display`
58    /// implementations) or a `&mut String`.
59    pub fn print<W: fmt::Write>(&mut self, ast: &Ast, wtr: W) -> fmt::Result {
60        visitor::visit(ast, Writer { printer: self, wtr: wtr })
61    }
62}
63
64#[derive(Debug)]
65struct Writer<'p, W> {
66    printer: &'p mut Printer,
67    wtr: W,
68}
69
70impl<'p, W: fmt::Write> Visitor for Writer<'p, W> {
71    type Output = ();
72    type Err = fmt::Error;
73
74    fn finish(self) -> fmt::Result {
75        Ok(())
76    }
77
78    fn visit_pre(&mut self, ast: &Ast) -> fmt::Result {
79        match *ast {
80            Ast::Group(ref x) => self.fmt_group_pre(x),
81            Ast::Class(ast::Class::Bracketed(ref x)) => {
82                self.fmt_class_bracketed_pre(x)
83            }
84            _ => Ok(()),
85        }
86    }
87
88    fn visit_post(&mut self, ast: &Ast) -> fmt::Result {
89        use ast::Class;
90
91        match *ast {
92            Ast::Empty(_) => Ok(()),
93            Ast::Flags(ref x) => self.fmt_set_flags(x),
94            Ast::Literal(ref x) => self.fmt_literal(x),
95            Ast::Dot(_) => self.wtr.write_str("."),
96            Ast::Assertion(ref x) => self.fmt_assertion(x),
97            Ast::Class(Class::Perl(ref x)) => self.fmt_class_perl(x),
98            Ast::Class(Class::Unicode(ref x)) => self.fmt_class_unicode(x),
99            Ast::Class(Class::Bracketed(ref x)) => {
100                self.fmt_class_bracketed_post(x)
101            }
102            Ast::Repetition(ref x) => self.fmt_repetition(x),
103            Ast::Group(ref x) => self.fmt_group_post(x),
104            Ast::Alternation(_) => Ok(()),
105            Ast::Concat(_) => Ok(()),
106        }
107    }
108
109    fn visit_alternation_in(&mut self) -> fmt::Result {
110        self.wtr.write_str("|")
111    }
112
113    fn visit_class_set_item_pre(
114        &mut self,
115        ast: &ast::ClassSetItem,
116    ) -> Result<(), Self::Err> {
117        match *ast {
118            ast::ClassSetItem::Bracketed(ref x) => {
119                self.fmt_class_bracketed_pre(x)
120            }
121            _ => Ok(()),
122        }
123    }
124
125    fn visit_class_set_item_post(
126        &mut self,
127        ast: &ast::ClassSetItem,
128    ) -> Result<(), Self::Err> {
129        use ast::ClassSetItem::*;
130
131        match *ast {
132            Empty(_) => Ok(()),
133            Literal(ref x) => self.fmt_literal(x),
134            Range(ref x) => {
135                self.fmt_literal(&x.start)?;
136                self.wtr.write_str("-")?;
137                self.fmt_literal(&x.end)?;
138                Ok(())
139            }
140            Ascii(ref x) => self.fmt_class_ascii(x),
141            Unicode(ref x) => self.fmt_class_unicode(x),
142            Perl(ref x) => self.fmt_class_perl(x),
143            Bracketed(ref x) => self.fmt_class_bracketed_post(x),
144            Union(_) => Ok(()),
145        }
146    }
147
148    fn visit_class_set_binary_op_in(
149        &mut self,
150        ast: &ast::ClassSetBinaryOp,
151    ) -> Result<(), Self::Err> {
152        self.fmt_class_set_binary_op_kind(&ast.kind)
153    }
154}
155
156impl<'p, W: fmt::Write> Writer<'p, W> {
157    fn fmt_group_pre(&mut self, ast: &ast::Group) -> fmt::Result {
158        use ast::GroupKind::*;
159        match ast.kind {
160            CaptureIndex(_) => self.wtr.write_str("("),
161            CaptureName(ref x) => {
162                self.wtr.write_str("(?P<")?;
163                self.wtr.write_str(&x.name)?;
164                self.wtr.write_str(">")?;
165                Ok(())
166            }
167            NonCapturing(ref flags) => {
168                self.wtr.write_str("(?")?;
169                self.fmt_flags(flags)?;
170                self.wtr.write_str(":")?;
171                Ok(())
172            }
173        }
174    }
175
176    fn fmt_group_post(&mut self, _ast: &ast::Group) -> fmt::Result {
177        self.wtr.write_str(")")
178    }
179
180    fn fmt_repetition(&mut self, ast: &ast::Repetition) -> fmt::Result {
181        use ast::RepetitionKind::*;
182        match ast.op.kind {
183            ZeroOrOne if ast.greedy => self.wtr.write_str("?"),
184            ZeroOrOne => self.wtr.write_str("??"),
185            ZeroOrMore if ast.greedy => self.wtr.write_str("*"),
186            ZeroOrMore => self.wtr.write_str("*?"),
187            OneOrMore if ast.greedy => self.wtr.write_str("+"),
188            OneOrMore => self.wtr.write_str("+?"),
189            Range(ref x) => {
190                self.fmt_repetition_range(x)?;
191                if !ast.greedy {
192                    self.wtr.write_str("?")?;
193                }
194                Ok(())
195            }
196        }
197    }
198
199    fn fmt_repetition_range(
200        &mut self,
201        ast: &ast::RepetitionRange,
202    ) -> fmt::Result {
203        use ast::RepetitionRange::*;
204        match *ast {
205            Exactly(x) => write!(self.wtr, "{{{}}}", x),
206            AtLeast(x) => write!(self.wtr, "{{{},}}", x),
207            Bounded(x, y) => write!(self.wtr, "{{{},{}}}", x, y),
208        }
209    }
210
211    fn fmt_literal(&mut self, ast: &ast::Literal) -> fmt::Result {
212        use ast::LiteralKind::*;
213
214        match ast.kind {
215            Verbatim => self.wtr.write_char(ast.c),
216            Punctuation => write!(self.wtr, r"\{}", ast.c),
217            Octal => write!(self.wtr, r"\{:o}", ast.c as u32),
218            HexFixed(ast::HexLiteralKind::X) => {
219                write!(self.wtr, r"\x{:02X}", ast.c as u32)
220            }
221            HexFixed(ast::HexLiteralKind::UnicodeShort) => {
222                write!(self.wtr, r"\u{:04X}", ast.c as u32)
223            }
224            HexFixed(ast::HexLiteralKind::UnicodeLong) => {
225                write!(self.wtr, r"\U{:08X}", ast.c as u32)
226            }
227            HexBrace(ast::HexLiteralKind::X) => {
228                write!(self.wtr, r"\x{{{:X}}}", ast.c as u32)
229            }
230            HexBrace(ast::HexLiteralKind::UnicodeShort) => {
231                write!(self.wtr, r"\u{{{:X}}}", ast.c as u32)
232            }
233            HexBrace(ast::HexLiteralKind::UnicodeLong) => {
234                write!(self.wtr, r"\U{{{:X}}}", ast.c as u32)
235            }
236            Special(ast::SpecialLiteralKind::Bell) => {
237                self.wtr.write_str(r"\a")
238            }
239            Special(ast::SpecialLiteralKind::FormFeed) => {
240                self.wtr.write_str(r"\f")
241            }
242            Special(ast::SpecialLiteralKind::Tab) => self.wtr.write_str(r"\t"),
243            Special(ast::SpecialLiteralKind::LineFeed) => {
244                self.wtr.write_str(r"\n")
245            }
246            Special(ast::SpecialLiteralKind::CarriageReturn) => {
247                self.wtr.write_str(r"\r")
248            }
249            Special(ast::SpecialLiteralKind::VerticalTab) => {
250                self.wtr.write_str(r"\v")
251            }
252            Special(ast::SpecialLiteralKind::Space) => {
253                self.wtr.write_str(r"\ ")
254            }
255        }
256    }
257
258    fn fmt_assertion(&mut self, ast: &ast::Assertion) -> fmt::Result {
259        use ast::AssertionKind::*;
260        match ast.kind {
261            StartLine => self.wtr.write_str("^"),
262            EndLine => self.wtr.write_str("$"),
263            StartText => self.wtr.write_str(r"\A"),
264            EndText => self.wtr.write_str(r"\z"),
265            WordBoundary => self.wtr.write_str(r"\b"),
266            NotWordBoundary => self.wtr.write_str(r"\B"),
267        }
268    }
269
270    fn fmt_set_flags(&mut self, ast: &ast::SetFlags) -> fmt::Result {
271        self.wtr.write_str("(?")?;
272        self.fmt_flags(&ast.flags)?;
273        self.wtr.write_str(")")?;
274        Ok(())
275    }
276
277    fn fmt_flags(&mut self, ast: &ast::Flags) -> fmt::Result {
278        use ast::{Flag, FlagsItemKind};
279
280        for item in &ast.items {
281            match item.kind {
282                FlagsItemKind::Negation => self.wtr.write_str("-"),
283                FlagsItemKind::Flag(ref flag) => match *flag {
284                    Flag::CaseInsensitive => self.wtr.write_str("i"),
285                    Flag::MultiLine => self.wtr.write_str("m"),
286                    Flag::DotMatchesNewLine => self.wtr.write_str("s"),
287                    Flag::SwapGreed => self.wtr.write_str("U"),
288                    Flag::Unicode => self.wtr.write_str("u"),
289                    Flag::IgnoreWhitespace => self.wtr.write_str("x"),
290                },
291            }?;
292        }
293        Ok(())
294    }
295
296    fn fmt_class_bracketed_pre(
297        &mut self,
298        ast: &ast::ClassBracketed,
299    ) -> fmt::Result {
300        if ast.negated {
301            self.wtr.write_str("[^")
302        } else {
303            self.wtr.write_str("[")
304        }
305    }
306
307    fn fmt_class_bracketed_post(
308        &mut self,
309        _ast: &ast::ClassBracketed,
310    ) -> fmt::Result {
311        self.wtr.write_str("]")
312    }
313
314    fn fmt_class_set_binary_op_kind(
315        &mut self,
316        ast: &ast::ClassSetBinaryOpKind,
317    ) -> fmt::Result {
318        use ast::ClassSetBinaryOpKind::*;
319        match *ast {
320            Intersection => self.wtr.write_str("&&"),
321            Difference => self.wtr.write_str("--"),
322            SymmetricDifference => self.wtr.write_str("~~"),
323        }
324    }
325
326    fn fmt_class_perl(&mut self, ast: &ast::ClassPerl) -> fmt::Result {
327        use ast::ClassPerlKind::*;
328        match ast.kind {
329            Digit if ast.negated => self.wtr.write_str(r"\D"),
330            Digit => self.wtr.write_str(r"\d"),
331            Space if ast.negated => self.wtr.write_str(r"\S"),
332            Space => self.wtr.write_str(r"\s"),
333            Word if ast.negated => self.wtr.write_str(r"\W"),
334            Word => self.wtr.write_str(r"\w"),
335        }
336    }
337
338    fn fmt_class_ascii(&mut self, ast: &ast::ClassAscii) -> fmt::Result {
339        use ast::ClassAsciiKind::*;
340        match ast.kind {
341            Alnum if ast.negated => self.wtr.write_str("[:^alnum:]"),
342            Alnum => self.wtr.write_str("[:alnum:]"),
343            Alpha if ast.negated => self.wtr.write_str("[:^alpha:]"),
344            Alpha => self.wtr.write_str("[:alpha:]"),
345            Ascii if ast.negated => self.wtr.write_str("[:^ascii:]"),
346            Ascii => self.wtr.write_str("[:ascii:]"),
347            Blank if ast.negated => self.wtr.write_str("[:^blank:]"),
348            Blank => self.wtr.write_str("[:blank:]"),
349            Cntrl if ast.negated => self.wtr.write_str("[:^cntrl:]"),
350            Cntrl => self.wtr.write_str("[:cntrl:]"),
351            Digit if ast.negated => self.wtr.write_str("[:^digit:]"),
352            Digit => self.wtr.write_str("[:digit:]"),
353            Graph if ast.negated => self.wtr.write_str("[:^graph:]"),
354            Graph => self.wtr.write_str("[:graph:]"),
355            Lower if ast.negated => self.wtr.write_str("[:^lower:]"),
356            Lower => self.wtr.write_str("[:lower:]"),
357            Print if ast.negated => self.wtr.write_str("[:^print:]"),
358            Print => self.wtr.write_str("[:print:]"),
359            Punct if ast.negated => self.wtr.write_str("[:^punct:]"),
360            Punct => self.wtr.write_str("[:punct:]"),
361            Space if ast.negated => self.wtr.write_str("[:^space:]"),
362            Space => self.wtr.write_str("[:space:]"),
363            Upper if ast.negated => self.wtr.write_str("[:^upper:]"),
364            Upper => self.wtr.write_str("[:upper:]"),
365            Word if ast.negated => self.wtr.write_str("[:^word:]"),
366            Word => self.wtr.write_str("[:word:]"),
367            Xdigit if ast.negated => self.wtr.write_str("[:^xdigit:]"),
368            Xdigit => self.wtr.write_str("[:xdigit:]"),
369        }
370    }
371
372    fn fmt_class_unicode(&mut self, ast: &ast::ClassUnicode) -> fmt::Result {
373        use ast::ClassUnicodeKind::*;
374        use ast::ClassUnicodeOpKind::*;
375
376        if ast.negated {
377            self.wtr.write_str(r"\P")?;
378        } else {
379            self.wtr.write_str(r"\p")?;
380        }
381        match ast.kind {
382            OneLetter(c) => self.wtr.write_char(c),
383            Named(ref x) => write!(self.wtr, "{{{}}}", x),
384            NamedValue { op: Equal, ref name, ref value } => {
385                write!(self.wtr, "{{{}={}}}", name, value)
386            }
387            NamedValue { op: Colon, ref name, ref value } => {
388                write!(self.wtr, "{{{}:{}}}", name, value)
389            }
390            NamedValue { op: NotEqual, ref name, ref value } => {
391                write!(self.wtr, "{{{}!={}}}", name, value)
392            }
393        }
394    }
395}
396
397#[cfg(test)]
398mod tests {
399    use super::Printer;
400    use ast::parse::ParserBuilder;
401
402    fn roundtrip(given: &str) {
403        roundtrip_with(|b| b, given);
404    }
405
406    fn roundtrip_with<F>(mut f: F, given: &str)
407    where
408        F: FnMut(&mut ParserBuilder) -> &mut ParserBuilder,
409    {
410        let mut builder = ParserBuilder::new();
411        f(&mut builder);
412        let ast = builder.build().parse(given).unwrap();
413
414        let mut printer = Printer::new();
415        let mut dst = String::new();
416        printer.print(&ast, &mut dst).unwrap();
417        assert_eq!(given, dst);
418    }
419
420    #[test]
421    fn print_literal() {
422        roundtrip("a");
423        roundtrip(r"\[");
424        roundtrip_with(|b| b.octal(true), r"\141");
425        roundtrip(r"\x61");
426        roundtrip(r"\x7F");
427        roundtrip(r"\u0061");
428        roundtrip(r"\U00000061");
429        roundtrip(r"\x{61}");
430        roundtrip(r"\x{7F}");
431        roundtrip(r"\u{61}");
432        roundtrip(r"\U{61}");
433
434        roundtrip(r"\a");
435        roundtrip(r"\f");
436        roundtrip(r"\t");
437        roundtrip(r"\n");
438        roundtrip(r"\r");
439        roundtrip(r"\v");
440        roundtrip(r"(?x)\ ");
441    }
442
443    #[test]
444    fn print_dot() {
445        roundtrip(".");
446    }
447
448    #[test]
449    fn print_concat() {
450        roundtrip("ab");
451        roundtrip("abcde");
452        roundtrip("a(bcd)ef");
453    }
454
455    #[test]
456    fn print_alternation() {
457        roundtrip("a|b");
458        roundtrip("a|b|c|d|e");
459        roundtrip("|a|b|c|d|e");
460        roundtrip("|a|b|c|d|e|");
461        roundtrip("a(b|c|d)|e|f");
462    }
463
464    #[test]
465    fn print_assertion() {
466        roundtrip(r"^");
467        roundtrip(r"$");
468        roundtrip(r"\A");
469        roundtrip(r"\z");
470        roundtrip(r"\b");
471        roundtrip(r"\B");
472    }
473
474    #[test]
475    fn print_repetition() {
476        roundtrip("a?");
477        roundtrip("a??");
478        roundtrip("a*");
479        roundtrip("a*?");
480        roundtrip("a+");
481        roundtrip("a+?");
482        roundtrip("a{5}");
483        roundtrip("a{5}?");
484        roundtrip("a{5,}");
485        roundtrip("a{5,}?");
486        roundtrip("a{5,10}");
487        roundtrip("a{5,10}?");
488    }
489
490    #[test]
491    fn print_flags() {
492        roundtrip("(?i)");
493        roundtrip("(?-i)");
494        roundtrip("(?s-i)");
495        roundtrip("(?-si)");
496        roundtrip("(?siUmux)");
497    }
498
499    #[test]
500    fn print_group() {
501        roundtrip("(?i:a)");
502        roundtrip("(?P<foo>a)");
503        roundtrip("(a)");
504    }
505
506    #[test]
507    fn print_class() {
508        roundtrip(r"[abc]");
509        roundtrip(r"[a-z]");
510        roundtrip(r"[^a-z]");
511        roundtrip(r"[a-z0-9]");
512        roundtrip(r"[-a-z0-9]");
513        roundtrip(r"[-a-z0-9]");
514        roundtrip(r"[a-z0-9---]");
515        roundtrip(r"[a-z&&m-n]");
516        roundtrip(r"[[a-z&&m-n]]");
517        roundtrip(r"[a-z--m-n]");
518        roundtrip(r"[a-z~~m-n]");
519        roundtrip(r"[a-z[0-9]]");
520        roundtrip(r"[a-z[^0-9]]");
521
522        roundtrip(r"\d");
523        roundtrip(r"\D");
524        roundtrip(r"\s");
525        roundtrip(r"\S");
526        roundtrip(r"\w");
527        roundtrip(r"\W");
528
529        roundtrip(r"[[:alnum:]]");
530        roundtrip(r"[[:^alnum:]]");
531        roundtrip(r"[[:alpha:]]");
532        roundtrip(r"[[:^alpha:]]");
533        roundtrip(r"[[:ascii:]]");
534        roundtrip(r"[[:^ascii:]]");
535        roundtrip(r"[[:blank:]]");
536        roundtrip(r"[[:^blank:]]");
537        roundtrip(r"[[:cntrl:]]");
538        roundtrip(r"[[:^cntrl:]]");
539        roundtrip(r"[[:digit:]]");
540        roundtrip(r"[[:^digit:]]");
541        roundtrip(r"[[:graph:]]");
542        roundtrip(r"[[:^graph:]]");
543        roundtrip(r"[[:lower:]]");
544        roundtrip(r"[[:^lower:]]");
545        roundtrip(r"[[:print:]]");
546        roundtrip(r"[[:^print:]]");
547        roundtrip(r"[[:punct:]]");
548        roundtrip(r"[[:^punct:]]");
549        roundtrip(r"[[:space:]]");
550        roundtrip(r"[[:^space:]]");
551        roundtrip(r"[[:upper:]]");
552        roundtrip(r"[[:^upper:]]");
553        roundtrip(r"[[:word:]]");
554        roundtrip(r"[[:^word:]]");
555        roundtrip(r"[[:xdigit:]]");
556        roundtrip(r"[[:^xdigit:]]");
557
558        roundtrip(r"\pL");
559        roundtrip(r"\PL");
560        roundtrip(r"\p{L}");
561        roundtrip(r"\P{L}");
562        roundtrip(r"\p{X=Y}");
563        roundtrip(r"\P{X=Y}");
564        roundtrip(r"\p{X:Y}");
565        roundtrip(r"\P{X:Y}");
566        roundtrip(r"\p{X!=Y}");
567        roundtrip(r"\P{X!=Y}");
568    }
569}