regex_syntax/hir/
translate.rs

1/*!
2Defines a translator that converts an `Ast` to an `Hir`.
3*/
4
5use std::cell::{Cell, RefCell};
6use std::result;
7
8use ast::{self, Ast, Span, Visitor};
9use hir::{self, Error, ErrorKind, Hir};
10use unicode::{self, ClassQuery};
11
12type Result<T> = result::Result<T, Error>;
13
14/// A builder for constructing an AST->HIR translator.
15#[derive(Clone, Debug)]
16pub struct TranslatorBuilder {
17    allow_invalid_utf8: bool,
18    flags: Flags,
19}
20
21impl Default for TranslatorBuilder {
22    fn default() -> TranslatorBuilder {
23        TranslatorBuilder::new()
24    }
25}
26
27impl TranslatorBuilder {
28    /// Create a new translator builder with a default c onfiguration.
29    pub fn new() -> TranslatorBuilder {
30        TranslatorBuilder {
31            allow_invalid_utf8: false,
32            flags: Flags::default(),
33        }
34    }
35
36    /// Build a translator using the current configuration.
37    pub fn build(&self) -> Translator {
38        Translator {
39            stack: RefCell::new(vec![]),
40            flags: Cell::new(self.flags),
41            allow_invalid_utf8: self.allow_invalid_utf8,
42        }
43    }
44
45    /// When enabled, translation will permit the construction of a regular
46    /// expression that may match invalid UTF-8.
47    ///
48    /// When disabled (the default), the translator is guaranteed to produce
49    /// an expression that will only ever match valid UTF-8 (otherwise, the
50    /// translator will return an error).
51    ///
52    /// Perhaps surprisingly, when invalid UTF-8 isn't allowed, a negated ASCII
53    /// word boundary (uttered as `(?-u:\B)` in the concrete syntax) will cause
54    /// the parser to return an error. Namely, a negated ASCII word boundary
55    /// can result in matching positions that aren't valid UTF-8 boundaries.
56    pub fn allow_invalid_utf8(&mut self, yes: bool) -> &mut TranslatorBuilder {
57        self.allow_invalid_utf8 = yes;
58        self
59    }
60
61    /// Enable or disable the case insensitive flag (`i`) by default.
62    pub fn case_insensitive(&mut self, yes: bool) -> &mut TranslatorBuilder {
63        self.flags.case_insensitive = if yes { Some(true) } else { None };
64        self
65    }
66
67    /// Enable or disable the multi-line matching flag (`m`) by default.
68    pub fn multi_line(&mut self, yes: bool) -> &mut TranslatorBuilder {
69        self.flags.multi_line = if yes { Some(true) } else { None };
70        self
71    }
72
73    /// Enable or disable the "dot matches any character" flag (`s`) by
74    /// default.
75    pub fn dot_matches_new_line(
76        &mut self,
77        yes: bool,
78    ) -> &mut TranslatorBuilder {
79        self.flags.dot_matches_new_line = if yes { Some(true) } else { None };
80        self
81    }
82
83    /// Enable or disable the "swap greed" flag (`U`) by default.
84    pub fn swap_greed(&mut self, yes: bool) -> &mut TranslatorBuilder {
85        self.flags.swap_greed = if yes { Some(true) } else { None };
86        self
87    }
88
89    /// Enable or disable the Unicode flag (`u`) by default.
90    pub fn unicode(&mut self, yes: bool) -> &mut TranslatorBuilder {
91        self.flags.unicode = if yes { None } else { Some(false) };
92        self
93    }
94}
95
96/// A translator maps abstract syntax to a high level intermediate
97/// representation.
98///
99/// A translator may be benefit from reuse. That is, a translator can translate
100/// many abstract syntax trees.
101///
102/// A `Translator` can be configured in more detail via a
103/// [`TranslatorBuilder`](struct.TranslatorBuilder.html).
104#[derive(Clone, Debug)]
105pub struct Translator {
106    /// Our call stack, but on the heap.
107    stack: RefCell<Vec<HirFrame>>,
108    /// The current flag settings.
109    flags: Cell<Flags>,
110    /// Whether we're allowed to produce HIR that can match arbitrary bytes.
111    allow_invalid_utf8: bool,
112}
113
114impl Translator {
115    /// Create a new translator using the default configuration.
116    pub fn new() -> Translator {
117        TranslatorBuilder::new().build()
118    }
119
120    /// Translate the given abstract syntax tree (AST) into a high level
121    /// intermediate representation (HIR).
122    ///
123    /// If there was a problem doing the translation, then an HIR-specific
124    /// error is returned.
125    ///
126    /// The original pattern string used to produce the `Ast` *must* also be
127    /// provided. The translator does not use the pattern string during any
128    /// correct translation, but is used for error reporting.
129    pub fn translate(&mut self, pattern: &str, ast: &Ast) -> Result<Hir> {
130        ast::visit(ast, TranslatorI::new(self, pattern))
131    }
132}
133
134/// An HirFrame is a single stack frame, represented explicitly, which is
135/// created for each item in the Ast that we traverse.
136///
137/// Note that technically, this type doesn't represent our entire stack
138/// frame. In particular, the Ast visitor represents any state associated with
139/// traversing the Ast itself.
140#[derive(Clone, Debug)]
141enum HirFrame {
142    /// An arbitrary HIR expression. These get pushed whenever we hit a base
143    /// case in the Ast. They get popped after an inductive (i.e., recursive)
144    /// step is complete.
145    Expr(Hir),
146    /// A Unicode character class. This frame is mutated as we descend into
147    /// the Ast of a character class (which is itself its own mini recursive
148    /// structure).
149    ClassUnicode(hir::ClassUnicode),
150    /// A byte-oriented character class. This frame is mutated as we descend
151    /// into the Ast of a character class (which is itself its own mini
152    /// recursive structure).
153    ///
154    /// Byte character classes are created when Unicode mode (`u`) is disabled.
155    /// If `allow_invalid_utf8` is disabled (the default), then a byte
156    /// character is only permitted to match ASCII text.
157    ClassBytes(hir::ClassBytes),
158    /// This is pushed on to the stack upon first seeing any kind of group,
159    /// indicated by parentheses (including non-capturing groups). It is popped
160    /// upon leaving a group.
161    Group {
162        /// The old active flags, if any, when this group was opened.
163        ///
164        /// If this group sets flags, then the new active flags are set to the
165        /// result of merging the old flags with the flags introduced by this
166        /// group.
167        ///
168        /// When this group is popped, the active flags should be restored to
169        /// the flags set here.
170        ///
171        /// The "active" flags correspond to whatever flags are set in the
172        /// Translator.
173        old_flags: Option<Flags>,
174    },
175    /// This is pushed whenever a concatenation is observed. After visiting
176    /// every sub-expression in the concatenation, the translator's stack is
177    /// popped until it sees a Concat frame.
178    Concat,
179    /// This is pushed whenever an alternation is observed. After visiting
180    /// every sub-expression in the alternation, the translator's stack is
181    /// popped until it sees an Alternation frame.
182    Alternation,
183}
184
185impl HirFrame {
186    /// Assert that the current stack frame is an Hir expression and return it.
187    fn unwrap_expr(self) -> Hir {
188        match self {
189            HirFrame::Expr(expr) => expr,
190            _ => panic!("tried to unwrap expr from HirFrame, got: {:?}", self),
191        }
192    }
193
194    /// Assert that the current stack frame is a Unicode class expression and
195    /// return it.
196    fn unwrap_class_unicode(self) -> hir::ClassUnicode {
197        match self {
198            HirFrame::ClassUnicode(cls) => cls,
199            _ => panic!(
200                "tried to unwrap Unicode class \
201                 from HirFrame, got: {:?}",
202                self
203            ),
204        }
205    }
206
207    /// Assert that the current stack frame is a byte class expression and
208    /// return it.
209    fn unwrap_class_bytes(self) -> hir::ClassBytes {
210        match self {
211            HirFrame::ClassBytes(cls) => cls,
212            _ => panic!(
213                "tried to unwrap byte class \
214                 from HirFrame, got: {:?}",
215                self
216            ),
217        }
218    }
219
220    /// Assert that the current stack frame is a group indicator and return
221    /// its corresponding flags (the flags that were active at the time the
222    /// group was entered) if they exist.
223    fn unwrap_group(self) -> Option<Flags> {
224        match self {
225            HirFrame::Group { old_flags } => old_flags,
226            _ => {
227                panic!("tried to unwrap group from HirFrame, got: {:?}", self)
228            }
229        }
230    }
231}
232
233impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
234    type Output = Hir;
235    type Err = Error;
236
237    fn finish(self) -> Result<Hir> {
238        // ... otherwise, we should have exactly one HIR on the stack.
239        assert_eq!(self.trans().stack.borrow().len(), 1);
240        Ok(self.pop().unwrap().unwrap_expr())
241    }
242
243    fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
244        match *ast {
245            Ast::Class(ast::Class::Bracketed(_)) => {
246                if self.flags().unicode() {
247                    let cls = hir::ClassUnicode::empty();
248                    self.push(HirFrame::ClassUnicode(cls));
249                } else {
250                    let cls = hir::ClassBytes::empty();
251                    self.push(HirFrame::ClassBytes(cls));
252                }
253            }
254            Ast::Group(ref x) => {
255                let old_flags = x.flags().map(|ast| self.set_flags(ast));
256                self.push(HirFrame::Group { old_flags: old_flags });
257            }
258            Ast::Concat(ref x) if x.asts.is_empty() => {}
259            Ast::Concat(_) => {
260                self.push(HirFrame::Concat);
261            }
262            Ast::Alternation(ref x) if x.asts.is_empty() => {}
263            Ast::Alternation(_) => {
264                self.push(HirFrame::Alternation);
265            }
266            _ => {}
267        }
268        Ok(())
269    }
270
271    fn visit_post(&mut self, ast: &Ast) -> Result<()> {
272        match *ast {
273            Ast::Empty(_) => {
274                self.push(HirFrame::Expr(Hir::empty()));
275            }
276            Ast::Flags(ref x) => {
277                self.set_flags(&x.flags);
278                // Flags in the AST are generally considered directives and
279                // not actual sub-expressions. However, they can be used in
280                // the concrete syntax like `((?i))`, and we need some kind of
281                // indication of an expression there, and Empty is the correct
282                // choice.
283                //
284                // There can also be things like `(?i)+`, but we rule those out
285                // in the parser. In the future, we might allow them for
286                // consistency sake.
287                self.push(HirFrame::Expr(Hir::empty()));
288            }
289            Ast::Literal(ref x) => {
290                self.push(HirFrame::Expr(self.hir_literal(x)?));
291            }
292            Ast::Dot(span) => {
293                self.push(HirFrame::Expr(self.hir_dot(span)?));
294            }
295            Ast::Assertion(ref x) => {
296                self.push(HirFrame::Expr(self.hir_assertion(x)?));
297            }
298            Ast::Class(ast::Class::Perl(ref x)) => {
299                if self.flags().unicode() {
300                    let cls = self.hir_perl_unicode_class(x)?;
301                    let hcls = hir::Class::Unicode(cls);
302                    self.push(HirFrame::Expr(Hir::class(hcls)));
303                } else {
304                    let cls = self.hir_perl_byte_class(x);
305                    let hcls = hir::Class::Bytes(cls);
306                    self.push(HirFrame::Expr(Hir::class(hcls)));
307                }
308            }
309            Ast::Class(ast::Class::Unicode(ref x)) => {
310                let cls = hir::Class::Unicode(self.hir_unicode_class(x)?);
311                self.push(HirFrame::Expr(Hir::class(cls)));
312            }
313            Ast::Class(ast::Class::Bracketed(ref ast)) => {
314                if self.flags().unicode() {
315                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
316                    self.unicode_fold_and_negate(
317                        &ast.span,
318                        ast.negated,
319                        &mut cls,
320                    )?;
321                    if cls.iter().next().is_none() {
322                        return Err(self.error(
323                            ast.span,
324                            ErrorKind::EmptyClassNotAllowed,
325                        ));
326                    }
327                    let expr = Hir::class(hir::Class::Unicode(cls));
328                    self.push(HirFrame::Expr(expr));
329                } else {
330                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
331                    self.bytes_fold_and_negate(
332                        &ast.span,
333                        ast.negated,
334                        &mut cls,
335                    )?;
336                    if cls.iter().next().is_none() {
337                        return Err(self.error(
338                            ast.span,
339                            ErrorKind::EmptyClassNotAllowed,
340                        ));
341                    }
342
343                    let expr = Hir::class(hir::Class::Bytes(cls));
344                    self.push(HirFrame::Expr(expr));
345                }
346            }
347            Ast::Repetition(ref x) => {
348                let expr = self.pop().unwrap().unwrap_expr();
349                self.push(HirFrame::Expr(self.hir_repetition(x, expr)));
350            }
351            Ast::Group(ref x) => {
352                let expr = self.pop().unwrap().unwrap_expr();
353                if let Some(flags) = self.pop().unwrap().unwrap_group() {
354                    self.trans().flags.set(flags);
355                }
356                self.push(HirFrame::Expr(self.hir_group(x, expr)));
357            }
358            Ast::Concat(_) => {
359                let mut exprs = vec![];
360                while let Some(HirFrame::Expr(expr)) = self.pop() {
361                    if !expr.kind().is_empty() {
362                        exprs.push(expr);
363                    }
364                }
365                exprs.reverse();
366                self.push(HirFrame::Expr(Hir::concat(exprs)));
367            }
368            Ast::Alternation(_) => {
369                let mut exprs = vec![];
370                while let Some(HirFrame::Expr(expr)) = self.pop() {
371                    exprs.push(expr);
372                }
373                exprs.reverse();
374                self.push(HirFrame::Expr(Hir::alternation(exprs)));
375            }
376        }
377        Ok(())
378    }
379
380    fn visit_class_set_item_pre(
381        &mut self,
382        ast: &ast::ClassSetItem,
383    ) -> Result<()> {
384        match *ast {
385            ast::ClassSetItem::Bracketed(_) => {
386                if self.flags().unicode() {
387                    let cls = hir::ClassUnicode::empty();
388                    self.push(HirFrame::ClassUnicode(cls));
389                } else {
390                    let cls = hir::ClassBytes::empty();
391                    self.push(HirFrame::ClassBytes(cls));
392                }
393            }
394            // We needn't handle the Union case here since the visitor will
395            // do it for us.
396            _ => {}
397        }
398        Ok(())
399    }
400
401    fn visit_class_set_item_post(
402        &mut self,
403        ast: &ast::ClassSetItem,
404    ) -> Result<()> {
405        match *ast {
406            ast::ClassSetItem::Empty(_) => {}
407            ast::ClassSetItem::Literal(ref x) => {
408                if self.flags().unicode() {
409                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
410                    cls.push(hir::ClassUnicodeRange::new(x.c, x.c));
411                    self.push(HirFrame::ClassUnicode(cls));
412                } else {
413                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
414                    let byte = self.class_literal_byte(x)?;
415                    cls.push(hir::ClassBytesRange::new(byte, byte));
416                    self.push(HirFrame::ClassBytes(cls));
417                }
418            }
419            ast::ClassSetItem::Range(ref x) => {
420                if self.flags().unicode() {
421                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
422                    cls.push(hir::ClassUnicodeRange::new(x.start.c, x.end.c));
423                    self.push(HirFrame::ClassUnicode(cls));
424                } else {
425                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
426                    let start = self.class_literal_byte(&x.start)?;
427                    let end = self.class_literal_byte(&x.end)?;
428                    cls.push(hir::ClassBytesRange::new(start, end));
429                    self.push(HirFrame::ClassBytes(cls));
430                }
431            }
432            ast::ClassSetItem::Ascii(ref x) => {
433                if self.flags().unicode() {
434                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
435                    for &(s, e) in ascii_class(&x.kind) {
436                        cls.push(hir::ClassUnicodeRange::new(s, e));
437                    }
438                    self.unicode_fold_and_negate(
439                        &x.span, x.negated, &mut cls,
440                    )?;
441                    self.push(HirFrame::ClassUnicode(cls));
442                } else {
443                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
444                    for &(s, e) in ascii_class(&x.kind) {
445                        cls.push(hir::ClassBytesRange::new(s as u8, e as u8));
446                    }
447                    self.bytes_fold_and_negate(&x.span, x.negated, &mut cls)?;
448                    self.push(HirFrame::ClassBytes(cls));
449                }
450            }
451            ast::ClassSetItem::Unicode(ref x) => {
452                let xcls = self.hir_unicode_class(x)?;
453                let mut cls = self.pop().unwrap().unwrap_class_unicode();
454                cls.union(&xcls);
455                self.push(HirFrame::ClassUnicode(cls));
456            }
457            ast::ClassSetItem::Perl(ref x) => {
458                if self.flags().unicode() {
459                    let xcls = self.hir_perl_unicode_class(x)?;
460                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
461                    cls.union(&xcls);
462                    self.push(HirFrame::ClassUnicode(cls));
463                } else {
464                    let xcls = self.hir_perl_byte_class(x);
465                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
466                    cls.union(&xcls);
467                    self.push(HirFrame::ClassBytes(cls));
468                }
469            }
470            ast::ClassSetItem::Bracketed(ref ast) => {
471                if self.flags().unicode() {
472                    let mut cls1 = self.pop().unwrap().unwrap_class_unicode();
473                    self.unicode_fold_and_negate(
474                        &ast.span,
475                        ast.negated,
476                        &mut cls1,
477                    )?;
478
479                    let mut cls2 = self.pop().unwrap().unwrap_class_unicode();
480                    cls2.union(&cls1);
481                    self.push(HirFrame::ClassUnicode(cls2));
482                } else {
483                    let mut cls1 = self.pop().unwrap().unwrap_class_bytes();
484                    self.bytes_fold_and_negate(
485                        &ast.span,
486                        ast.negated,
487                        &mut cls1,
488                    )?;
489
490                    let mut cls2 = self.pop().unwrap().unwrap_class_bytes();
491                    cls2.union(&cls1);
492                    self.push(HirFrame::ClassBytes(cls2));
493                }
494            }
495            // This is handled automatically by the visitor.
496            ast::ClassSetItem::Union(_) => {}
497        }
498        Ok(())
499    }
500
501    fn visit_class_set_binary_op_pre(
502        &mut self,
503        _op: &ast::ClassSetBinaryOp,
504    ) -> Result<()> {
505        if self.flags().unicode() {
506            let cls = hir::ClassUnicode::empty();
507            self.push(HirFrame::ClassUnicode(cls));
508        } else {
509            let cls = hir::ClassBytes::empty();
510            self.push(HirFrame::ClassBytes(cls));
511        }
512        Ok(())
513    }
514
515    fn visit_class_set_binary_op_in(
516        &mut self,
517        _op: &ast::ClassSetBinaryOp,
518    ) -> Result<()> {
519        if self.flags().unicode() {
520            let cls = hir::ClassUnicode::empty();
521            self.push(HirFrame::ClassUnicode(cls));
522        } else {
523            let cls = hir::ClassBytes::empty();
524            self.push(HirFrame::ClassBytes(cls));
525        }
526        Ok(())
527    }
528
529    fn visit_class_set_binary_op_post(
530        &mut self,
531        op: &ast::ClassSetBinaryOp,
532    ) -> Result<()> {
533        use ast::ClassSetBinaryOpKind::*;
534
535        if self.flags().unicode() {
536            let mut rhs = self.pop().unwrap().unwrap_class_unicode();
537            let mut lhs = self.pop().unwrap().unwrap_class_unicode();
538            let mut cls = self.pop().unwrap().unwrap_class_unicode();
539            if self.flags().case_insensitive() {
540                rhs.try_case_fold_simple().map_err(|_| {
541                    self.error(
542                        op.rhs.span().clone(),
543                        ErrorKind::UnicodeCaseUnavailable,
544                    )
545                })?;
546                lhs.try_case_fold_simple().map_err(|_| {
547                    self.error(
548                        op.lhs.span().clone(),
549                        ErrorKind::UnicodeCaseUnavailable,
550                    )
551                })?;
552            }
553            match op.kind {
554                Intersection => lhs.intersect(&rhs),
555                Difference => lhs.difference(&rhs),
556                SymmetricDifference => lhs.symmetric_difference(&rhs),
557            }
558            cls.union(&lhs);
559            self.push(HirFrame::ClassUnicode(cls));
560        } else {
561            let mut rhs = self.pop().unwrap().unwrap_class_bytes();
562            let mut lhs = self.pop().unwrap().unwrap_class_bytes();
563            let mut cls = self.pop().unwrap().unwrap_class_bytes();
564            if self.flags().case_insensitive() {
565                rhs.case_fold_simple();
566                lhs.case_fold_simple();
567            }
568            match op.kind {
569                Intersection => lhs.intersect(&rhs),
570                Difference => lhs.difference(&rhs),
571                SymmetricDifference => lhs.symmetric_difference(&rhs),
572            }
573            cls.union(&lhs);
574            self.push(HirFrame::ClassBytes(cls));
575        }
576        Ok(())
577    }
578}
579
580/// The internal implementation of a translator.
581///
582/// This type is responsible for carrying around the original pattern string,
583/// which is not tied to the internal state of a translator.
584///
585/// A TranslatorI exists for the time it takes to translate a single Ast.
586#[derive(Clone, Debug)]
587struct TranslatorI<'t, 'p> {
588    trans: &'t Translator,
589    pattern: &'p str,
590}
591
592impl<'t, 'p> TranslatorI<'t, 'p> {
593    /// Build a new internal translator.
594    fn new(trans: &'t Translator, pattern: &'p str) -> TranslatorI<'t, 'p> {
595        TranslatorI { trans: trans, pattern: pattern }
596    }
597
598    /// Return a reference to the underlying translator.
599    fn trans(&self) -> &Translator {
600        &self.trans
601    }
602
603    /// Push the given frame on to the call stack.
604    fn push(&self, frame: HirFrame) {
605        self.trans().stack.borrow_mut().push(frame);
606    }
607
608    /// Pop the top of the call stack. If the call stack is empty, return None.
609    fn pop(&self) -> Option<HirFrame> {
610        self.trans().stack.borrow_mut().pop()
611    }
612
613    /// Create a new error with the given span and error type.
614    fn error(&self, span: Span, kind: ErrorKind) -> Error {
615        Error { kind: kind, pattern: self.pattern.to_string(), span: span }
616    }
617
618    /// Return a copy of the active flags.
619    fn flags(&self) -> Flags {
620        self.trans().flags.get()
621    }
622
623    /// Set the flags of this translator from the flags set in the given AST.
624    /// Then, return the old flags.
625    fn set_flags(&self, ast_flags: &ast::Flags) -> Flags {
626        let old_flags = self.flags();
627        let mut new_flags = Flags::from_ast(ast_flags);
628        new_flags.merge(&old_flags);
629        self.trans().flags.set(new_flags);
630        old_flags
631    }
632
633    fn hir_literal(&self, lit: &ast::Literal) -> Result<Hir> {
634        let ch = match self.literal_to_char(lit)? {
635            byte @ hir::Literal::Byte(_) => return Ok(Hir::literal(byte)),
636            hir::Literal::Unicode(ch) => ch,
637        };
638        if self.flags().case_insensitive() {
639            self.hir_from_char_case_insensitive(lit.span, ch)
640        } else {
641            self.hir_from_char(lit.span, ch)
642        }
643    }
644
645    /// Convert an Ast literal to its scalar representation.
646    ///
647    /// When Unicode mode is enabled, then this always succeeds and returns a
648    /// `char` (Unicode scalar value).
649    ///
650    /// When Unicode mode is disabled, then a raw byte is returned. If that
651    /// byte is not ASCII and invalid UTF-8 is not allowed, then this returns
652    /// an error.
653    fn literal_to_char(&self, lit: &ast::Literal) -> Result<hir::Literal> {
654        if self.flags().unicode() {
655            return Ok(hir::Literal::Unicode(lit.c));
656        }
657        let byte = match lit.byte() {
658            None => return Ok(hir::Literal::Unicode(lit.c)),
659            Some(byte) => byte,
660        };
661        if byte <= 0x7F {
662            return Ok(hir::Literal::Unicode(byte as char));
663        }
664        if !self.trans().allow_invalid_utf8 {
665            return Err(self.error(lit.span, ErrorKind::InvalidUtf8));
666        }
667        Ok(hir::Literal::Byte(byte))
668    }
669
670    fn hir_from_char(&self, span: Span, c: char) -> Result<Hir> {
671        if !self.flags().unicode() && c.len_utf8() > 1 {
672            return Err(self.error(span, ErrorKind::UnicodeNotAllowed));
673        }
674        Ok(Hir::literal(hir::Literal::Unicode(c)))
675    }
676
677    fn hir_from_char_case_insensitive(
678        &self,
679        span: Span,
680        c: char,
681    ) -> Result<Hir> {
682        if self.flags().unicode() {
683            // If case folding won't do anything, then don't bother trying.
684            let map =
685                unicode::contains_simple_case_mapping(c, c).map_err(|_| {
686                    self.error(span, ErrorKind::UnicodeCaseUnavailable)
687                })?;
688            if !map {
689                return self.hir_from_char(span, c);
690            }
691            let mut cls =
692                hir::ClassUnicode::new(vec![hir::ClassUnicodeRange::new(
693                    c, c,
694                )]);
695            cls.try_case_fold_simple().map_err(|_| {
696                self.error(span, ErrorKind::UnicodeCaseUnavailable)
697            })?;
698            Ok(Hir::class(hir::Class::Unicode(cls)))
699        } else {
700            if c.len_utf8() > 1 {
701                return Err(self.error(span, ErrorKind::UnicodeNotAllowed));
702            }
703            // If case folding won't do anything, then don't bother trying.
704            match c {
705                'A'..='Z' | 'a'..='z' => {}
706                _ => return self.hir_from_char(span, c),
707            }
708            let mut cls =
709                hir::ClassBytes::new(vec![hir::ClassBytesRange::new(
710                    c as u8, c as u8,
711                )]);
712            cls.case_fold_simple();
713            Ok(Hir::class(hir::Class::Bytes(cls)))
714        }
715    }
716
717    fn hir_dot(&self, span: Span) -> Result<Hir> {
718        let unicode = self.flags().unicode();
719        if !unicode && !self.trans().allow_invalid_utf8 {
720            return Err(self.error(span, ErrorKind::InvalidUtf8));
721        }
722        Ok(if self.flags().dot_matches_new_line() {
723            Hir::any(!unicode)
724        } else {
725            Hir::dot(!unicode)
726        })
727    }
728
729    fn hir_assertion(&self, asst: &ast::Assertion) -> Result<Hir> {
730        let unicode = self.flags().unicode();
731        let multi_line = self.flags().multi_line();
732        Ok(match asst.kind {
733            ast::AssertionKind::StartLine => Hir::anchor(if multi_line {
734                hir::Anchor::StartLine
735            } else {
736                hir::Anchor::StartText
737            }),
738            ast::AssertionKind::EndLine => Hir::anchor(if multi_line {
739                hir::Anchor::EndLine
740            } else {
741                hir::Anchor::EndText
742            }),
743            ast::AssertionKind::StartText => {
744                Hir::anchor(hir::Anchor::StartText)
745            }
746            ast::AssertionKind::EndText => Hir::anchor(hir::Anchor::EndText),
747            ast::AssertionKind::WordBoundary => {
748                Hir::word_boundary(if unicode {
749                    hir::WordBoundary::Unicode
750                } else {
751                    hir::WordBoundary::Ascii
752                })
753            }
754            ast::AssertionKind::NotWordBoundary => {
755                Hir::word_boundary(if unicode {
756                    hir::WordBoundary::UnicodeNegate
757                } else {
758                    // It is possible for negated ASCII word boundaries to
759                    // match at invalid UTF-8 boundaries, even when searching
760                    // valid UTF-8.
761                    if !self.trans().allow_invalid_utf8 {
762                        return Err(
763                            self.error(asst.span, ErrorKind::InvalidUtf8)
764                        );
765                    }
766                    hir::WordBoundary::AsciiNegate
767                })
768            }
769        })
770    }
771
772    fn hir_group(&self, group: &ast::Group, expr: Hir) -> Hir {
773        let kind = match group.kind {
774            ast::GroupKind::CaptureIndex(idx) => {
775                hir::GroupKind::CaptureIndex(idx)
776            }
777            ast::GroupKind::CaptureName(ref capname) => {
778                hir::GroupKind::CaptureName {
779                    name: capname.name.clone(),
780                    index: capname.index,
781                }
782            }
783            ast::GroupKind::NonCapturing(_) => hir::GroupKind::NonCapturing,
784        };
785        Hir::group(hir::Group { kind: kind, hir: Box::new(expr) })
786    }
787
788    fn hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir {
789        let kind = match rep.op.kind {
790            ast::RepetitionKind::ZeroOrOne => hir::RepetitionKind::ZeroOrOne,
791            ast::RepetitionKind::ZeroOrMore => hir::RepetitionKind::ZeroOrMore,
792            ast::RepetitionKind::OneOrMore => hir::RepetitionKind::OneOrMore,
793            ast::RepetitionKind::Range(ast::RepetitionRange::Exactly(m)) => {
794                hir::RepetitionKind::Range(hir::RepetitionRange::Exactly(m))
795            }
796            ast::RepetitionKind::Range(ast::RepetitionRange::AtLeast(m)) => {
797                hir::RepetitionKind::Range(hir::RepetitionRange::AtLeast(m))
798            }
799            ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(
800                m,
801                n,
802            )) => {
803                hir::RepetitionKind::Range(hir::RepetitionRange::Bounded(m, n))
804            }
805        };
806        let greedy =
807            if self.flags().swap_greed() { !rep.greedy } else { rep.greedy };
808        Hir::repetition(hir::Repetition {
809            kind: kind,
810            greedy: greedy,
811            hir: Box::new(expr),
812        })
813    }
814
815    fn hir_unicode_class(
816        &self,
817        ast_class: &ast::ClassUnicode,
818    ) -> Result<hir::ClassUnicode> {
819        use ast::ClassUnicodeKind::*;
820
821        if !self.flags().unicode() {
822            return Err(
823                self.error(ast_class.span, ErrorKind::UnicodeNotAllowed)
824            );
825        }
826        let query = match ast_class.kind {
827            OneLetter(name) => ClassQuery::OneLetter(name),
828            Named(ref name) => ClassQuery::Binary(name),
829            NamedValue { ref name, ref value, .. } => ClassQuery::ByValue {
830                property_name: name,
831                property_value: value,
832            },
833        };
834        let mut result = self.convert_unicode_class_error(
835            &ast_class.span,
836            unicode::class(query),
837        );
838        if let Ok(ref mut class) = result {
839            self.unicode_fold_and_negate(
840                &ast_class.span,
841                ast_class.negated,
842                class,
843            )?;
844        }
845        result
846    }
847
848    fn hir_perl_unicode_class(
849        &self,
850        ast_class: &ast::ClassPerl,
851    ) -> Result<hir::ClassUnicode> {
852        use ast::ClassPerlKind::*;
853
854        assert!(self.flags().unicode());
855        let result = match ast_class.kind {
856            Digit => unicode::perl_digit(),
857            Space => unicode::perl_space(),
858            Word => unicode::perl_word(),
859        };
860        let mut class =
861            self.convert_unicode_class_error(&ast_class.span, result)?;
862        // We needn't apply case folding here because the Perl Unicode classes
863        // are already closed under Unicode simple case folding.
864        if ast_class.negated {
865            class.negate();
866        }
867        Ok(class)
868    }
869
870    fn hir_perl_byte_class(
871        &self,
872        ast_class: &ast::ClassPerl,
873    ) -> hir::ClassBytes {
874        use ast::ClassPerlKind::*;
875
876        assert!(!self.flags().unicode());
877        let mut class = match ast_class.kind {
878            Digit => hir_ascii_class_bytes(&ast::ClassAsciiKind::Digit),
879            Space => hir_ascii_class_bytes(&ast::ClassAsciiKind::Space),
880            Word => hir_ascii_class_bytes(&ast::ClassAsciiKind::Word),
881        };
882        // We needn't apply case folding here because the Perl ASCII classes
883        // are already closed (under ASCII case folding).
884        if ast_class.negated {
885            class.negate();
886        }
887        class
888    }
889
890    /// Converts the given Unicode specific error to an HIR translation error.
891    ///
892    /// The span given should approximate the position at which an error would
893    /// occur.
894    fn convert_unicode_class_error(
895        &self,
896        span: &Span,
897        result: unicode::Result<hir::ClassUnicode>,
898    ) -> Result<hir::ClassUnicode> {
899        result.map_err(|err| {
900            let sp = span.clone();
901            match err {
902                unicode::Error::PropertyNotFound => {
903                    self.error(sp, ErrorKind::UnicodePropertyNotFound)
904                }
905                unicode::Error::PropertyValueNotFound => {
906                    self.error(sp, ErrorKind::UnicodePropertyValueNotFound)
907                }
908                unicode::Error::PerlClassNotFound => {
909                    self.error(sp, ErrorKind::UnicodePerlClassNotFound)
910                }
911            }
912        })
913    }
914
915    fn unicode_fold_and_negate(
916        &self,
917        span: &Span,
918        negated: bool,
919        class: &mut hir::ClassUnicode,
920    ) -> Result<()> {
921        // Note that we must apply case folding before negation!
922        // Consider `(?i)[^x]`. If we applied negation field, then
923        // the result would be the character class that matched any
924        // Unicode scalar value.
925        if self.flags().case_insensitive() {
926            class.try_case_fold_simple().map_err(|_| {
927                self.error(span.clone(), ErrorKind::UnicodeCaseUnavailable)
928            })?;
929        }
930        if negated {
931            class.negate();
932        }
933        Ok(())
934    }
935
936    fn bytes_fold_and_negate(
937        &self,
938        span: &Span,
939        negated: bool,
940        class: &mut hir::ClassBytes,
941    ) -> Result<()> {
942        // Note that we must apply case folding before negation!
943        // Consider `(?i)[^x]`. If we applied negation field, then
944        // the result would be the character class that matched any
945        // Unicode scalar value.
946        if self.flags().case_insensitive() {
947            class.case_fold_simple();
948        }
949        if negated {
950            class.negate();
951        }
952        if !self.trans().allow_invalid_utf8 && !class.is_all_ascii() {
953            return Err(self.error(span.clone(), ErrorKind::InvalidUtf8));
954        }
955        Ok(())
956    }
957
958    /// Return a scalar byte value suitable for use as a literal in a byte
959    /// character class.
960    fn class_literal_byte(&self, ast: &ast::Literal) -> Result<u8> {
961        match self.literal_to_char(ast)? {
962            hir::Literal::Byte(byte) => Ok(byte),
963            hir::Literal::Unicode(ch) => {
964                if ch <= 0x7F as char {
965                    Ok(ch as u8)
966                } else {
967                    // We can't feasibly support Unicode in
968                    // byte oriented classes. Byte classes don't
969                    // do Unicode case folding.
970                    Err(self.error(ast.span, ErrorKind::UnicodeNotAllowed))
971                }
972            }
973        }
974    }
975}
976
977/// A translator's representation of a regular expression's flags at any given
978/// moment in time.
979///
980/// Each flag can be in one of three states: absent, present but disabled or
981/// present but enabled.
982#[derive(Clone, Copy, Debug, Default)]
983struct Flags {
984    case_insensitive: Option<bool>,
985    multi_line: Option<bool>,
986    dot_matches_new_line: Option<bool>,
987    swap_greed: Option<bool>,
988    unicode: Option<bool>,
989    // Note that `ignore_whitespace` is omitted here because it is handled
990    // entirely in the parser.
991}
992
993impl Flags {
994    fn from_ast(ast: &ast::Flags) -> Flags {
995        let mut flags = Flags::default();
996        let mut enable = true;
997        for item in &ast.items {
998            match item.kind {
999                ast::FlagsItemKind::Negation => {
1000                    enable = false;
1001                }
1002                ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive) => {
1003                    flags.case_insensitive = Some(enable);
1004                }
1005                ast::FlagsItemKind::Flag(ast::Flag::MultiLine) => {
1006                    flags.multi_line = Some(enable);
1007                }
1008                ast::FlagsItemKind::Flag(ast::Flag::DotMatchesNewLine) => {
1009                    flags.dot_matches_new_line = Some(enable);
1010                }
1011                ast::FlagsItemKind::Flag(ast::Flag::SwapGreed) => {
1012                    flags.swap_greed = Some(enable);
1013                }
1014                ast::FlagsItemKind::Flag(ast::Flag::Unicode) => {
1015                    flags.unicode = Some(enable);
1016                }
1017                ast::FlagsItemKind::Flag(ast::Flag::IgnoreWhitespace) => {}
1018            }
1019        }
1020        flags
1021    }
1022
1023    fn merge(&mut self, previous: &Flags) {
1024        if self.case_insensitive.is_none() {
1025            self.case_insensitive = previous.case_insensitive;
1026        }
1027        if self.multi_line.is_none() {
1028            self.multi_line = previous.multi_line;
1029        }
1030        if self.dot_matches_new_line.is_none() {
1031            self.dot_matches_new_line = previous.dot_matches_new_line;
1032        }
1033        if self.swap_greed.is_none() {
1034            self.swap_greed = previous.swap_greed;
1035        }
1036        if self.unicode.is_none() {
1037            self.unicode = previous.unicode;
1038        }
1039    }
1040
1041    fn case_insensitive(&self) -> bool {
1042        self.case_insensitive.unwrap_or(false)
1043    }
1044
1045    fn multi_line(&self) -> bool {
1046        self.multi_line.unwrap_or(false)
1047    }
1048
1049    fn dot_matches_new_line(&self) -> bool {
1050        self.dot_matches_new_line.unwrap_or(false)
1051    }
1052
1053    fn swap_greed(&self) -> bool {
1054        self.swap_greed.unwrap_or(false)
1055    }
1056
1057    fn unicode(&self) -> bool {
1058        self.unicode.unwrap_or(true)
1059    }
1060}
1061
1062fn hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes {
1063    let ranges: Vec<_> = ascii_class(kind)
1064        .iter()
1065        .cloned()
1066        .map(|(s, e)| hir::ClassBytesRange::new(s as u8, e as u8))
1067        .collect();
1068    hir::ClassBytes::new(ranges)
1069}
1070
1071fn ascii_class(kind: &ast::ClassAsciiKind) -> &'static [(char, char)] {
1072    use ast::ClassAsciiKind::*;
1073    match *kind {
1074        Alnum => &[('0', '9'), ('A', 'Z'), ('a', 'z')],
1075        Alpha => &[('A', 'Z'), ('a', 'z')],
1076        Ascii => &[('\x00', '\x7F')],
1077        Blank => &[('\t', '\t'), (' ', ' ')],
1078        Cntrl => &[('\x00', '\x1F'), ('\x7F', '\x7F')],
1079        Digit => &[('0', '9')],
1080        Graph => &[('!', '~')],
1081        Lower => &[('a', 'z')],
1082        Print => &[(' ', '~')],
1083        Punct => &[('!', '/'), (':', '@'), ('[', '`'), ('{', '~')],
1084        Space => &[
1085            ('\t', '\t'),
1086            ('\n', '\n'),
1087            ('\x0B', '\x0B'),
1088            ('\x0C', '\x0C'),
1089            ('\r', '\r'),
1090            (' ', ' '),
1091        ],
1092        Upper => &[('A', 'Z')],
1093        Word => &[('0', '9'), ('A', 'Z'), ('_', '_'), ('a', 'z')],
1094        Xdigit => &[('0', '9'), ('A', 'F'), ('a', 'f')],
1095    }
1096}
1097
1098#[cfg(test)]
1099mod tests {
1100    use ast::parse::ParserBuilder;
1101    use ast::{self, Ast, Position, Span};
1102    use hir::{self, Hir, HirKind};
1103    use unicode::{self, ClassQuery};
1104
1105    use super::{ascii_class, TranslatorBuilder};
1106
1107    // We create these errors to compare with real hir::Errors in the tests.
1108    // We define equality between TestError and hir::Error to disregard the
1109    // pattern string in hir::Error, which is annoying to provide in tests.
1110    #[derive(Clone, Debug)]
1111    struct TestError {
1112        span: Span,
1113        kind: hir::ErrorKind,
1114    }
1115
1116    impl PartialEq<hir::Error> for TestError {
1117        fn eq(&self, other: &hir::Error) -> bool {
1118            self.span == other.span && self.kind == other.kind
1119        }
1120    }
1121
1122    impl PartialEq<TestError> for hir::Error {
1123        fn eq(&self, other: &TestError) -> bool {
1124            self.span == other.span && self.kind == other.kind
1125        }
1126    }
1127
1128    fn parse(pattern: &str) -> Ast {
1129        ParserBuilder::new().octal(true).build().parse(pattern).unwrap()
1130    }
1131
1132    fn t(pattern: &str) -> Hir {
1133        TranslatorBuilder::new()
1134            .allow_invalid_utf8(false)
1135            .build()
1136            .translate(pattern, &parse(pattern))
1137            .unwrap()
1138    }
1139
1140    fn t_err(pattern: &str) -> hir::Error {
1141        TranslatorBuilder::new()
1142            .allow_invalid_utf8(false)
1143            .build()
1144            .translate(pattern, &parse(pattern))
1145            .unwrap_err()
1146    }
1147
1148    fn t_bytes(pattern: &str) -> Hir {
1149        TranslatorBuilder::new()
1150            .allow_invalid_utf8(true)
1151            .build()
1152            .translate(pattern, &parse(pattern))
1153            .unwrap()
1154    }
1155
1156    fn hir_lit(s: &str) -> Hir {
1157        match s.len() {
1158            0 => Hir::empty(),
1159            _ => {
1160                let lits = s
1161                    .chars()
1162                    .map(hir::Literal::Unicode)
1163                    .map(Hir::literal)
1164                    .collect();
1165                Hir::concat(lits)
1166            }
1167        }
1168    }
1169
1170    fn hir_blit(s: &[u8]) -> Hir {
1171        match s.len() {
1172            0 => Hir::empty(),
1173            1 => Hir::literal(hir::Literal::Byte(s[0])),
1174            _ => {
1175                let lits = s
1176                    .iter()
1177                    .cloned()
1178                    .map(hir::Literal::Byte)
1179                    .map(Hir::literal)
1180                    .collect();
1181                Hir::concat(lits)
1182            }
1183        }
1184    }
1185
1186    fn hir_group(i: u32, expr: Hir) -> Hir {
1187        Hir::group(hir::Group {
1188            kind: hir::GroupKind::CaptureIndex(i),
1189            hir: Box::new(expr),
1190        })
1191    }
1192
1193    fn hir_group_name(i: u32, name: &str, expr: Hir) -> Hir {
1194        Hir::group(hir::Group {
1195            kind: hir::GroupKind::CaptureName {
1196                name: name.to_string(),
1197                index: i,
1198            },
1199            hir: Box::new(expr),
1200        })
1201    }
1202
1203    fn hir_group_nocap(expr: Hir) -> Hir {
1204        Hir::group(hir::Group {
1205            kind: hir::GroupKind::NonCapturing,
1206            hir: Box::new(expr),
1207        })
1208    }
1209
1210    fn hir_quest(greedy: bool, expr: Hir) -> Hir {
1211        Hir::repetition(hir::Repetition {
1212            kind: hir::RepetitionKind::ZeroOrOne,
1213            greedy: greedy,
1214            hir: Box::new(expr),
1215        })
1216    }
1217
1218    fn hir_star(greedy: bool, expr: Hir) -> Hir {
1219        Hir::repetition(hir::Repetition {
1220            kind: hir::RepetitionKind::ZeroOrMore,
1221            greedy: greedy,
1222            hir: Box::new(expr),
1223        })
1224    }
1225
1226    fn hir_plus(greedy: bool, expr: Hir) -> Hir {
1227        Hir::repetition(hir::Repetition {
1228            kind: hir::RepetitionKind::OneOrMore,
1229            greedy: greedy,
1230            hir: Box::new(expr),
1231        })
1232    }
1233
1234    fn hir_range(greedy: bool, range: hir::RepetitionRange, expr: Hir) -> Hir {
1235        Hir::repetition(hir::Repetition {
1236            kind: hir::RepetitionKind::Range(range),
1237            greedy: greedy,
1238            hir: Box::new(expr),
1239        })
1240    }
1241
1242    fn hir_alt(alts: Vec<Hir>) -> Hir {
1243        Hir::alternation(alts)
1244    }
1245
1246    fn hir_cat(exprs: Vec<Hir>) -> Hir {
1247        Hir::concat(exprs)
1248    }
1249
1250    #[allow(dead_code)]
1251    fn hir_uclass_query(query: ClassQuery) -> Hir {
1252        Hir::class(hir::Class::Unicode(unicode::class(query).unwrap()))
1253    }
1254
1255    #[allow(dead_code)]
1256    fn hir_uclass_perl_word() -> Hir {
1257        Hir::class(hir::Class::Unicode(unicode::perl_word().unwrap()))
1258    }
1259
1260    fn hir_uclass(ranges: &[(char, char)]) -> Hir {
1261        let ranges: Vec<hir::ClassUnicodeRange> = ranges
1262            .iter()
1263            .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e))
1264            .collect();
1265        Hir::class(hir::Class::Unicode(hir::ClassUnicode::new(ranges)))
1266    }
1267
1268    fn hir_bclass(ranges: &[(u8, u8)]) -> Hir {
1269        let ranges: Vec<hir::ClassBytesRange> = ranges
1270            .iter()
1271            .map(|&(s, e)| hir::ClassBytesRange::new(s, e))
1272            .collect();
1273        Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges)))
1274    }
1275
1276    fn hir_bclass_from_char(ranges: &[(char, char)]) -> Hir {
1277        let ranges: Vec<hir::ClassBytesRange> = ranges
1278            .iter()
1279            .map(|&(s, e)| {
1280                assert!(s as u32 <= 0x7F);
1281                assert!(e as u32 <= 0x7F);
1282                hir::ClassBytesRange::new(s as u8, e as u8)
1283            })
1284            .collect();
1285        Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges)))
1286    }
1287
1288    fn hir_case_fold(expr: Hir) -> Hir {
1289        match expr.into_kind() {
1290            HirKind::Class(mut cls) => {
1291                cls.case_fold_simple();
1292                Hir::class(cls)
1293            }
1294            _ => panic!("cannot case fold non-class Hir expr"),
1295        }
1296    }
1297
1298    fn hir_negate(expr: Hir) -> Hir {
1299        match expr.into_kind() {
1300            HirKind::Class(mut cls) => {
1301                cls.negate();
1302                Hir::class(cls)
1303            }
1304            _ => panic!("cannot negate non-class Hir expr"),
1305        }
1306    }
1307
1308    #[allow(dead_code)]
1309    fn hir_union(expr1: Hir, expr2: Hir) -> Hir {
1310        use hir::Class::{Bytes, Unicode};
1311
1312        match (expr1.into_kind(), expr2.into_kind()) {
1313            (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
1314                c1.union(&c2);
1315                Hir::class(hir::Class::Unicode(c1))
1316            }
1317            (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
1318                c1.union(&c2);
1319                Hir::class(hir::Class::Bytes(c1))
1320            }
1321            _ => panic!("cannot union non-class Hir exprs"),
1322        }
1323    }
1324
1325    #[allow(dead_code)]
1326    fn hir_difference(expr1: Hir, expr2: Hir) -> Hir {
1327        use hir::Class::{Bytes, Unicode};
1328
1329        match (expr1.into_kind(), expr2.into_kind()) {
1330            (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
1331                c1.difference(&c2);
1332                Hir::class(hir::Class::Unicode(c1))
1333            }
1334            (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
1335                c1.difference(&c2);
1336                Hir::class(hir::Class::Bytes(c1))
1337            }
1338            _ => panic!("cannot difference non-class Hir exprs"),
1339        }
1340    }
1341
1342    fn hir_anchor(anchor: hir::Anchor) -> Hir {
1343        Hir::anchor(anchor)
1344    }
1345
1346    fn hir_word(wb: hir::WordBoundary) -> Hir {
1347        Hir::word_boundary(wb)
1348    }
1349
1350    #[test]
1351    fn empty() {
1352        assert_eq!(t(""), Hir::empty());
1353        assert_eq!(t("(?i)"), Hir::empty());
1354        assert_eq!(t("()"), hir_group(1, Hir::empty()));
1355        assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty()));
1356        assert_eq!(t("(?P<wat>)"), hir_group_name(1, "wat", Hir::empty()));
1357        assert_eq!(t("|"), hir_alt(vec![Hir::empty(), Hir::empty()]));
1358        assert_eq!(
1359            t("()|()"),
1360            hir_alt(vec![
1361                hir_group(1, Hir::empty()),
1362                hir_group(2, Hir::empty()),
1363            ])
1364        );
1365        assert_eq!(
1366            t("(|b)"),
1367            hir_group(1, hir_alt(vec![Hir::empty(), hir_lit("b"),]))
1368        );
1369        assert_eq!(
1370            t("(a|)"),
1371            hir_group(1, hir_alt(vec![hir_lit("a"), Hir::empty(),]))
1372        );
1373        assert_eq!(
1374            t("(a||c)"),
1375            hir_group(
1376                1,
1377                hir_alt(vec![hir_lit("a"), Hir::empty(), hir_lit("c"),])
1378            )
1379        );
1380        assert_eq!(
1381            t("(||)"),
1382            hir_group(
1383                1,
1384                hir_alt(vec![Hir::empty(), Hir::empty(), Hir::empty(),])
1385            )
1386        );
1387    }
1388
1389    #[test]
1390    fn literal() {
1391        assert_eq!(t("a"), hir_lit("a"));
1392        assert_eq!(t("(?-u)a"), hir_lit("a"));
1393        assert_eq!(t("☃"), hir_lit("☃"));
1394        assert_eq!(t("abcd"), hir_lit("abcd"));
1395
1396        assert_eq!(t_bytes("(?-u)a"), hir_lit("a"));
1397        assert_eq!(t_bytes("(?-u)\x61"), hir_lit("a"));
1398        assert_eq!(t_bytes(r"(?-u)\x61"), hir_lit("a"));
1399        assert_eq!(t_bytes(r"(?-u)\xFF"), hir_blit(b"\xFF"));
1400
1401        assert_eq!(
1402            t_err("(?-u)☃"),
1403            TestError {
1404                kind: hir::ErrorKind::UnicodeNotAllowed,
1405                span: Span::new(
1406                    Position::new(5, 1, 6),
1407                    Position::new(8, 1, 7)
1408                ),
1409            }
1410        );
1411        assert_eq!(
1412            t_err(r"(?-u)\xFF"),
1413            TestError {
1414                kind: hir::ErrorKind::InvalidUtf8,
1415                span: Span::new(
1416                    Position::new(5, 1, 6),
1417                    Position::new(9, 1, 10)
1418                ),
1419            }
1420        );
1421    }
1422
1423    #[test]
1424    fn literal_case_insensitive() {
1425        #[cfg(feature = "unicode-case")]
1426        assert_eq!(t("(?i)a"), hir_uclass(&[('A', 'A'), ('a', 'a'),]));
1427        #[cfg(feature = "unicode-case")]
1428        assert_eq!(
1429            t("(?i:a)"),
1430            hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')],))
1431        );
1432        #[cfg(feature = "unicode-case")]
1433        assert_eq!(
1434            t("a(?i)a(?-i)a"),
1435            hir_cat(vec![
1436                hir_lit("a"),
1437                hir_uclass(&[('A', 'A'), ('a', 'a')]),
1438                hir_lit("a"),
1439            ])
1440        );
1441        #[cfg(feature = "unicode-case")]
1442        assert_eq!(
1443            t("(?i)ab@c"),
1444            hir_cat(vec![
1445                hir_uclass(&[('A', 'A'), ('a', 'a')]),
1446                hir_uclass(&[('B', 'B'), ('b', 'b')]),
1447                hir_lit("@"),
1448                hir_uclass(&[('C', 'C'), ('c', 'c')]),
1449            ])
1450        );
1451        #[cfg(feature = "unicode-case")]
1452        assert_eq!(
1453            t("(?i)β"),
1454            hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),])
1455        );
1456
1457        assert_eq!(t("(?i-u)a"), hir_bclass(&[(b'A', b'A'), (b'a', b'a'),]));
1458        #[cfg(feature = "unicode-case")]
1459        assert_eq!(
1460            t("(?-u)a(?i)a(?-i)a"),
1461            hir_cat(vec![
1462                hir_lit("a"),
1463                hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
1464                hir_lit("a"),
1465            ])
1466        );
1467        assert_eq!(
1468            t("(?i-u)ab@c"),
1469            hir_cat(vec![
1470                hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
1471                hir_bclass(&[(b'B', b'B'), (b'b', b'b')]),
1472                hir_lit("@"),
1473                hir_bclass(&[(b'C', b'C'), (b'c', b'c')]),
1474            ])
1475        );
1476
1477        assert_eq!(
1478            t_bytes("(?i-u)a"),
1479            hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1480        );
1481        assert_eq!(
1482            t_bytes("(?i-u)\x61"),
1483            hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1484        );
1485        assert_eq!(
1486            t_bytes(r"(?i-u)\x61"),
1487            hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1488        );
1489        assert_eq!(t_bytes(r"(?i-u)\xFF"), hir_blit(b"\xFF"));
1490
1491        assert_eq!(
1492            t_err("(?i-u)β"),
1493            TestError {
1494                kind: hir::ErrorKind::UnicodeNotAllowed,
1495                span: Span::new(
1496                    Position::new(6, 1, 7),
1497                    Position::new(8, 1, 8),
1498                ),
1499            }
1500        );
1501    }
1502
1503    #[test]
1504    fn dot() {
1505        assert_eq!(
1506            t("."),
1507            hir_uclass(&[('\0', '\t'), ('\x0B', '\u{10FFFF}'),])
1508        );
1509        assert_eq!(t("(?s)."), hir_uclass(&[('\0', '\u{10FFFF}'),]));
1510        assert_eq!(
1511            t_bytes("(?-u)."),
1512            hir_bclass(&[(b'\0', b'\t'), (b'\x0B', b'\xFF'),])
1513        );
1514        assert_eq!(t_bytes("(?s-u)."), hir_bclass(&[(b'\0', b'\xFF'),]));
1515
1516        // If invalid UTF-8 isn't allowed, then non-Unicode `.` isn't allowed.
1517        assert_eq!(
1518            t_err("(?-u)."),
1519            TestError {
1520                kind: hir::ErrorKind::InvalidUtf8,
1521                span: Span::new(
1522                    Position::new(5, 1, 6),
1523                    Position::new(6, 1, 7)
1524                ),
1525            }
1526        );
1527        assert_eq!(
1528            t_err("(?s-u)."),
1529            TestError {
1530                kind: hir::ErrorKind::InvalidUtf8,
1531                span: Span::new(
1532                    Position::new(6, 1, 7),
1533                    Position::new(7, 1, 8)
1534                ),
1535            }
1536        );
1537    }
1538
1539    #[test]
1540    fn assertions() {
1541        assert_eq!(t("^"), hir_anchor(hir::Anchor::StartText));
1542        assert_eq!(t("$"), hir_anchor(hir::Anchor::EndText));
1543        assert_eq!(t(r"\A"), hir_anchor(hir::Anchor::StartText));
1544        assert_eq!(t(r"\z"), hir_anchor(hir::Anchor::EndText));
1545        assert_eq!(t("(?m)^"), hir_anchor(hir::Anchor::StartLine));
1546        assert_eq!(t("(?m)$"), hir_anchor(hir::Anchor::EndLine));
1547        assert_eq!(t(r"(?m)\A"), hir_anchor(hir::Anchor::StartText));
1548        assert_eq!(t(r"(?m)\z"), hir_anchor(hir::Anchor::EndText));
1549
1550        assert_eq!(t(r"\b"), hir_word(hir::WordBoundary::Unicode));
1551        assert_eq!(t(r"\B"), hir_word(hir::WordBoundary::UnicodeNegate));
1552        assert_eq!(t(r"(?-u)\b"), hir_word(hir::WordBoundary::Ascii));
1553        assert_eq!(
1554            t_bytes(r"(?-u)\B"),
1555            hir_word(hir::WordBoundary::AsciiNegate)
1556        );
1557
1558        assert_eq!(
1559            t_err(r"(?-u)\B"),
1560            TestError {
1561                kind: hir::ErrorKind::InvalidUtf8,
1562                span: Span::new(
1563                    Position::new(5, 1, 6),
1564                    Position::new(7, 1, 8)
1565                ),
1566            }
1567        );
1568    }
1569
1570    #[test]
1571    fn group() {
1572        assert_eq!(t("(a)"), hir_group(1, hir_lit("a")));
1573        assert_eq!(
1574            t("(a)(b)"),
1575            hir_cat(vec![
1576                hir_group(1, hir_lit("a")),
1577                hir_group(2, hir_lit("b")),
1578            ])
1579        );
1580        assert_eq!(
1581            t("(a)|(b)"),
1582            hir_alt(vec![
1583                hir_group(1, hir_lit("a")),
1584                hir_group(2, hir_lit("b")),
1585            ])
1586        );
1587        assert_eq!(t("(?P<foo>)"), hir_group_name(1, "foo", Hir::empty()));
1588        assert_eq!(t("(?P<foo>a)"), hir_group_name(1, "foo", hir_lit("a")));
1589        assert_eq!(
1590            t("(?P<foo>a)(?P<bar>b)"),
1591            hir_cat(vec![
1592                hir_group_name(1, "foo", hir_lit("a")),
1593                hir_group_name(2, "bar", hir_lit("b")),
1594            ])
1595        );
1596        assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty()));
1597        assert_eq!(t("(?:a)"), hir_group_nocap(hir_lit("a")));
1598        assert_eq!(
1599            t("(?:a)(b)"),
1600            hir_cat(vec![
1601                hir_group_nocap(hir_lit("a")),
1602                hir_group(1, hir_lit("b")),
1603            ])
1604        );
1605        assert_eq!(
1606            t("(a)(?:b)(c)"),
1607            hir_cat(vec![
1608                hir_group(1, hir_lit("a")),
1609                hir_group_nocap(hir_lit("b")),
1610                hir_group(2, hir_lit("c")),
1611            ])
1612        );
1613        assert_eq!(
1614            t("(a)(?P<foo>b)(c)"),
1615            hir_cat(vec![
1616                hir_group(1, hir_lit("a")),
1617                hir_group_name(2, "foo", hir_lit("b")),
1618                hir_group(3, hir_lit("c")),
1619            ])
1620        );
1621        assert_eq!(t("()"), hir_group(1, Hir::empty()));
1622        assert_eq!(t("((?i))"), hir_group(1, Hir::empty()));
1623        assert_eq!(t("((?x))"), hir_group(1, Hir::empty()));
1624        assert_eq!(t("(((?x)))"), hir_group(1, hir_group(2, Hir::empty())));
1625    }
1626
1627    #[test]
1628    fn flags() {
1629        #[cfg(feature = "unicode-case")]
1630        assert_eq!(
1631            t("(?i:a)a"),
1632            hir_cat(vec![
1633                hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')])),
1634                hir_lit("a"),
1635            ])
1636        );
1637        assert_eq!(
1638            t("(?i-u:a)β"),
1639            hir_cat(vec![
1640                hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
1641                hir_lit("β"),
1642            ])
1643        );
1644        #[cfg(feature = "unicode-case")]
1645        assert_eq!(
1646            t("(?i)(?-i:a)a"),
1647            hir_cat(vec![
1648                hir_group_nocap(hir_lit("a")),
1649                hir_uclass(&[('A', 'A'), ('a', 'a')]),
1650            ])
1651        );
1652        #[cfg(feature = "unicode-case")]
1653        assert_eq!(
1654            t("(?im)a^"),
1655            hir_cat(vec![
1656                hir_uclass(&[('A', 'A'), ('a', 'a')]),
1657                hir_anchor(hir::Anchor::StartLine),
1658            ])
1659        );
1660        #[cfg(feature = "unicode-case")]
1661        assert_eq!(
1662            t("(?im)a^(?i-m)a^"),
1663            hir_cat(vec![
1664                hir_uclass(&[('A', 'A'), ('a', 'a')]),
1665                hir_anchor(hir::Anchor::StartLine),
1666                hir_uclass(&[('A', 'A'), ('a', 'a')]),
1667                hir_anchor(hir::Anchor::StartText),
1668            ])
1669        );
1670        assert_eq!(
1671            t("(?U)a*a*?(?-U)a*a*?"),
1672            hir_cat(vec![
1673                hir_star(false, hir_lit("a")),
1674                hir_star(true, hir_lit("a")),
1675                hir_star(true, hir_lit("a")),
1676                hir_star(false, hir_lit("a")),
1677            ])
1678        );
1679        #[cfg(feature = "unicode-case")]
1680        assert_eq!(
1681            t("(?:a(?i)a)a"),
1682            hir_cat(vec![
1683                hir_group_nocap(hir_cat(vec![
1684                    hir_lit("a"),
1685                    hir_uclass(&[('A', 'A'), ('a', 'a')]),
1686                ])),
1687                hir_lit("a"),
1688            ])
1689        );
1690        #[cfg(feature = "unicode-case")]
1691        assert_eq!(
1692            t("(?i)(?:a(?-i)a)a"),
1693            hir_cat(vec![
1694                hir_group_nocap(hir_cat(vec![
1695                    hir_uclass(&[('A', 'A'), ('a', 'a')]),
1696                    hir_lit("a"),
1697                ])),
1698                hir_uclass(&[('A', 'A'), ('a', 'a')]),
1699            ])
1700        );
1701    }
1702
1703    #[test]
1704    fn escape() {
1705        assert_eq!(
1706            t(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#"),
1707            hir_lit(r"\.+*?()|[]{}^$#")
1708        );
1709    }
1710
1711    #[test]
1712    fn repetition() {
1713        assert_eq!(t("a?"), hir_quest(true, hir_lit("a")));
1714        assert_eq!(t("a*"), hir_star(true, hir_lit("a")));
1715        assert_eq!(t("a+"), hir_plus(true, hir_lit("a")));
1716        assert_eq!(t("a??"), hir_quest(false, hir_lit("a")));
1717        assert_eq!(t("a*?"), hir_star(false, hir_lit("a")));
1718        assert_eq!(t("a+?"), hir_plus(false, hir_lit("a")));
1719
1720        assert_eq!(
1721            t("a{1}"),
1722            hir_range(true, hir::RepetitionRange::Exactly(1), hir_lit("a"),)
1723        );
1724        assert_eq!(
1725            t("a{1,}"),
1726            hir_range(true, hir::RepetitionRange::AtLeast(1), hir_lit("a"),)
1727        );
1728        assert_eq!(
1729            t("a{1,2}"),
1730            hir_range(true, hir::RepetitionRange::Bounded(1, 2), hir_lit("a"),)
1731        );
1732        assert_eq!(
1733            t("a{1}?"),
1734            hir_range(false, hir::RepetitionRange::Exactly(1), hir_lit("a"),)
1735        );
1736        assert_eq!(
1737            t("a{1,}?"),
1738            hir_range(false, hir::RepetitionRange::AtLeast(1), hir_lit("a"),)
1739        );
1740        assert_eq!(
1741            t("a{1,2}?"),
1742            hir_range(
1743                false,
1744                hir::RepetitionRange::Bounded(1, 2),
1745                hir_lit("a"),
1746            )
1747        );
1748
1749        assert_eq!(
1750            t("ab?"),
1751            hir_cat(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),])
1752        );
1753        assert_eq!(
1754            t("(ab)?"),
1755            hir_quest(
1756                true,
1757                hir_group(1, hir_cat(vec![hir_lit("a"), hir_lit("b"),]))
1758            )
1759        );
1760        assert_eq!(
1761            t("a|b?"),
1762            hir_alt(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),])
1763        );
1764    }
1765
1766    #[test]
1767    fn cat_alt() {
1768        assert_eq!(
1769            t("(ab)"),
1770            hir_group(1, hir_cat(vec![hir_lit("a"), hir_lit("b"),]))
1771        );
1772        assert_eq!(t("a|b"), hir_alt(vec![hir_lit("a"), hir_lit("b"),]));
1773        assert_eq!(
1774            t("a|b|c"),
1775            hir_alt(vec![hir_lit("a"), hir_lit("b"), hir_lit("c"),])
1776        );
1777        assert_eq!(
1778            t("ab|bc|cd"),
1779            hir_alt(vec![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),])
1780        );
1781        assert_eq!(
1782            t("(a|b)"),
1783            hir_group(1, hir_alt(vec![hir_lit("a"), hir_lit("b"),]))
1784        );
1785        assert_eq!(
1786            t("(a|b|c)"),
1787            hir_group(
1788                1,
1789                hir_alt(vec![hir_lit("a"), hir_lit("b"), hir_lit("c"),])
1790            )
1791        );
1792        assert_eq!(
1793            t("(ab|bc|cd)"),
1794            hir_group(
1795                1,
1796                hir_alt(vec![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),])
1797            )
1798        );
1799        assert_eq!(
1800            t("(ab|(bc|(cd)))"),
1801            hir_group(
1802                1,
1803                hir_alt(vec![
1804                    hir_lit("ab"),
1805                    hir_group(
1806                        2,
1807                        hir_alt(vec![
1808                            hir_lit("bc"),
1809                            hir_group(3, hir_lit("cd")),
1810                        ])
1811                    ),
1812                ])
1813            )
1814        );
1815    }
1816
1817    #[test]
1818    fn class_ascii() {
1819        assert_eq!(
1820            t("[[:alnum:]]"),
1821            hir_uclass(ascii_class(&ast::ClassAsciiKind::Alnum))
1822        );
1823        assert_eq!(
1824            t("[[:alpha:]]"),
1825            hir_uclass(ascii_class(&ast::ClassAsciiKind::Alpha))
1826        );
1827        assert_eq!(
1828            t("[[:ascii:]]"),
1829            hir_uclass(ascii_class(&ast::ClassAsciiKind::Ascii))
1830        );
1831        assert_eq!(
1832            t("[[:blank:]]"),
1833            hir_uclass(ascii_class(&ast::ClassAsciiKind::Blank))
1834        );
1835        assert_eq!(
1836            t("[[:cntrl:]]"),
1837            hir_uclass(ascii_class(&ast::ClassAsciiKind::Cntrl))
1838        );
1839        assert_eq!(
1840            t("[[:digit:]]"),
1841            hir_uclass(ascii_class(&ast::ClassAsciiKind::Digit))
1842        );
1843        assert_eq!(
1844            t("[[:graph:]]"),
1845            hir_uclass(ascii_class(&ast::ClassAsciiKind::Graph))
1846        );
1847        assert_eq!(
1848            t("[[:lower:]]"),
1849            hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower))
1850        );
1851        assert_eq!(
1852            t("[[:print:]]"),
1853            hir_uclass(ascii_class(&ast::ClassAsciiKind::Print))
1854        );
1855        assert_eq!(
1856            t("[[:punct:]]"),
1857            hir_uclass(ascii_class(&ast::ClassAsciiKind::Punct))
1858        );
1859        assert_eq!(
1860            t("[[:space:]]"),
1861            hir_uclass(ascii_class(&ast::ClassAsciiKind::Space))
1862        );
1863        assert_eq!(
1864            t("[[:upper:]]"),
1865            hir_uclass(ascii_class(&ast::ClassAsciiKind::Upper))
1866        );
1867        assert_eq!(
1868            t("[[:word:]]"),
1869            hir_uclass(ascii_class(&ast::ClassAsciiKind::Word))
1870        );
1871        assert_eq!(
1872            t("[[:xdigit:]]"),
1873            hir_uclass(ascii_class(&ast::ClassAsciiKind::Xdigit))
1874        );
1875
1876        assert_eq!(
1877            t("[[:^lower:]]"),
1878            hir_negate(hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower)))
1879        );
1880        #[cfg(feature = "unicode-case")]
1881        assert_eq!(
1882            t("(?i)[[:lower:]]"),
1883            hir_uclass(&[
1884                ('A', 'Z'),
1885                ('a', 'z'),
1886                ('\u{17F}', '\u{17F}'),
1887                ('\u{212A}', '\u{212A}'),
1888            ])
1889        );
1890
1891        assert_eq!(
1892            t("(?-u)[[:lower:]]"),
1893            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Lower))
1894        );
1895        assert_eq!(
1896            t("(?i-u)[[:lower:]]"),
1897            hir_case_fold(hir_bclass_from_char(ascii_class(
1898                &ast::ClassAsciiKind::Lower
1899            )))
1900        );
1901
1902        assert_eq!(
1903            t_err("(?-u)[[:^lower:]]"),
1904            TestError {
1905                kind: hir::ErrorKind::InvalidUtf8,
1906                span: Span::new(
1907                    Position::new(6, 1, 7),
1908                    Position::new(16, 1, 17)
1909                ),
1910            }
1911        );
1912        assert_eq!(
1913            t_err("(?i-u)[[:^lower:]]"),
1914            TestError {
1915                kind: hir::ErrorKind::InvalidUtf8,
1916                span: Span::new(
1917                    Position::new(7, 1, 8),
1918                    Position::new(17, 1, 18)
1919                ),
1920            }
1921        );
1922    }
1923
1924    #[test]
1925    #[cfg(feature = "unicode-perl")]
1926    fn class_perl() {
1927        // Unicode
1928        assert_eq!(t(r"\d"), hir_uclass_query(ClassQuery::Binary("digit")));
1929        assert_eq!(t(r"\s"), hir_uclass_query(ClassQuery::Binary("space")));
1930        assert_eq!(t(r"\w"), hir_uclass_perl_word());
1931        #[cfg(feature = "unicode-case")]
1932        assert_eq!(
1933            t(r"(?i)\d"),
1934            hir_uclass_query(ClassQuery::Binary("digit"))
1935        );
1936        #[cfg(feature = "unicode-case")]
1937        assert_eq!(
1938            t(r"(?i)\s"),
1939            hir_uclass_query(ClassQuery::Binary("space"))
1940        );
1941        #[cfg(feature = "unicode-case")]
1942        assert_eq!(t(r"(?i)\w"), hir_uclass_perl_word());
1943
1944        // Unicode, negated
1945        assert_eq!(
1946            t(r"\D"),
1947            hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
1948        );
1949        assert_eq!(
1950            t(r"\S"),
1951            hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
1952        );
1953        assert_eq!(t(r"\W"), hir_negate(hir_uclass_perl_word()));
1954        #[cfg(feature = "unicode-case")]
1955        assert_eq!(
1956            t(r"(?i)\D"),
1957            hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
1958        );
1959        #[cfg(feature = "unicode-case")]
1960        assert_eq!(
1961            t(r"(?i)\S"),
1962            hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
1963        );
1964        #[cfg(feature = "unicode-case")]
1965        assert_eq!(t(r"(?i)\W"), hir_negate(hir_uclass_perl_word()));
1966
1967        // ASCII only
1968        assert_eq!(
1969            t(r"(?-u)\d"),
1970            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
1971        );
1972        assert_eq!(
1973            t(r"(?-u)\s"),
1974            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space))
1975        );
1976        assert_eq!(
1977            t(r"(?-u)\w"),
1978            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word))
1979        );
1980        assert_eq!(
1981            t(r"(?i-u)\d"),
1982            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
1983        );
1984        assert_eq!(
1985            t(r"(?i-u)\s"),
1986            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space))
1987        );
1988        assert_eq!(
1989            t(r"(?i-u)\w"),
1990            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word))
1991        );
1992
1993        // ASCII only, negated
1994        assert_eq!(
1995            t(r"(?-u)\D"),
1996            hir_negate(hir_bclass_from_char(ascii_class(
1997                &ast::ClassAsciiKind::Digit
1998            )))
1999        );
2000        assert_eq!(
2001            t(r"(?-u)\S"),
2002            hir_negate(hir_bclass_from_char(ascii_class(
2003                &ast::ClassAsciiKind::Space
2004            )))
2005        );
2006        assert_eq!(
2007            t(r"(?-u)\W"),
2008            hir_negate(hir_bclass_from_char(ascii_class(
2009                &ast::ClassAsciiKind::Word
2010            )))
2011        );
2012        assert_eq!(
2013            t(r"(?i-u)\D"),
2014            hir_negate(hir_bclass_from_char(ascii_class(
2015                &ast::ClassAsciiKind::Digit
2016            )))
2017        );
2018        assert_eq!(
2019            t(r"(?i-u)\S"),
2020            hir_negate(hir_bclass_from_char(ascii_class(
2021                &ast::ClassAsciiKind::Space
2022            )))
2023        );
2024        assert_eq!(
2025            t(r"(?i-u)\W"),
2026            hir_negate(hir_bclass_from_char(ascii_class(
2027                &ast::ClassAsciiKind::Word
2028            )))
2029        );
2030    }
2031
2032    #[test]
2033    #[cfg(not(feature = "unicode-perl"))]
2034    fn class_perl_word_disabled() {
2035        assert_eq!(
2036            t_err(r"\w"),
2037            TestError {
2038                kind: hir::ErrorKind::UnicodePerlClassNotFound,
2039                span: Span::new(
2040                    Position::new(0, 1, 1),
2041                    Position::new(2, 1, 3)
2042                ),
2043            }
2044        );
2045    }
2046
2047    #[test]
2048    #[cfg(all(not(feature = "unicode-perl"), not(feature = "unicode-bool")))]
2049    fn class_perl_space_disabled() {
2050        assert_eq!(
2051            t_err(r"\s"),
2052            TestError {
2053                kind: hir::ErrorKind::UnicodePerlClassNotFound,
2054                span: Span::new(
2055                    Position::new(0, 1, 1),
2056                    Position::new(2, 1, 3)
2057                ),
2058            }
2059        );
2060    }
2061
2062    #[test]
2063    #[cfg(all(
2064        not(feature = "unicode-perl"),
2065        not(feature = "unicode-gencat")
2066    ))]
2067    fn class_perl_digit_disabled() {
2068        assert_eq!(
2069            t_err(r"\d"),
2070            TestError {
2071                kind: hir::ErrorKind::UnicodePerlClassNotFound,
2072                span: Span::new(
2073                    Position::new(0, 1, 1),
2074                    Position::new(2, 1, 3)
2075                ),
2076            }
2077        );
2078    }
2079
2080    #[test]
2081    #[cfg(feature = "unicode-gencat")]
2082    fn class_unicode_gencat() {
2083        assert_eq!(t(r"\pZ"), hir_uclass_query(ClassQuery::Binary("Z")));
2084        assert_eq!(t(r"\pz"), hir_uclass_query(ClassQuery::Binary("Z")));
2085        assert_eq!(
2086            t(r"\p{Separator}"),
2087            hir_uclass_query(ClassQuery::Binary("Z"))
2088        );
2089        assert_eq!(
2090            t(r"\p{se      PaRa ToR}"),
2091            hir_uclass_query(ClassQuery::Binary("Z"))
2092        );
2093        assert_eq!(
2094            t(r"\p{gc:Separator}"),
2095            hir_uclass_query(ClassQuery::Binary("Z"))
2096        );
2097        assert_eq!(
2098            t(r"\p{gc=Separator}"),
2099            hir_uclass_query(ClassQuery::Binary("Z"))
2100        );
2101        assert_eq!(
2102            t(r"\p{Other}"),
2103            hir_uclass_query(ClassQuery::Binary("Other"))
2104        );
2105        assert_eq!(t(r"\pC"), hir_uclass_query(ClassQuery::Binary("Other")));
2106
2107        assert_eq!(
2108            t(r"\PZ"),
2109            hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2110        );
2111        assert_eq!(
2112            t(r"\P{separator}"),
2113            hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2114        );
2115        assert_eq!(
2116            t(r"\P{gc!=separator}"),
2117            hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2118        );
2119
2120        assert_eq!(t(r"\p{any}"), hir_uclass_query(ClassQuery::Binary("Any")));
2121        assert_eq!(
2122            t(r"\p{assigned}"),
2123            hir_uclass_query(ClassQuery::Binary("Assigned"))
2124        );
2125        assert_eq!(
2126            t(r"\p{ascii}"),
2127            hir_uclass_query(ClassQuery::Binary("ASCII"))
2128        );
2129        assert_eq!(
2130            t(r"\p{gc:any}"),
2131            hir_uclass_query(ClassQuery::Binary("Any"))
2132        );
2133        assert_eq!(
2134            t(r"\p{gc:assigned}"),
2135            hir_uclass_query(ClassQuery::Binary("Assigned"))
2136        );
2137        assert_eq!(
2138            t(r"\p{gc:ascii}"),
2139            hir_uclass_query(ClassQuery::Binary("ASCII"))
2140        );
2141
2142        assert_eq!(
2143            t_err(r"(?-u)\pZ"),
2144            TestError {
2145                kind: hir::ErrorKind::UnicodeNotAllowed,
2146                span: Span::new(
2147                    Position::new(5, 1, 6),
2148                    Position::new(8, 1, 9)
2149                ),
2150            }
2151        );
2152        assert_eq!(
2153            t_err(r"(?-u)\p{Separator}"),
2154            TestError {
2155                kind: hir::ErrorKind::UnicodeNotAllowed,
2156                span: Span::new(
2157                    Position::new(5, 1, 6),
2158                    Position::new(18, 1, 19)
2159                ),
2160            }
2161        );
2162        assert_eq!(
2163            t_err(r"\pE"),
2164            TestError {
2165                kind: hir::ErrorKind::UnicodePropertyNotFound,
2166                span: Span::new(
2167                    Position::new(0, 1, 1),
2168                    Position::new(3, 1, 4)
2169                ),
2170            }
2171        );
2172        assert_eq!(
2173            t_err(r"\p{Foo}"),
2174            TestError {
2175                kind: hir::ErrorKind::UnicodePropertyNotFound,
2176                span: Span::new(
2177                    Position::new(0, 1, 1),
2178                    Position::new(7, 1, 8)
2179                ),
2180            }
2181        );
2182        assert_eq!(
2183            t_err(r"\p{gc:Foo}"),
2184            TestError {
2185                kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2186                span: Span::new(
2187                    Position::new(0, 1, 1),
2188                    Position::new(10, 1, 11)
2189                ),
2190            }
2191        );
2192    }
2193
2194    #[test]
2195    #[cfg(not(feature = "unicode-gencat"))]
2196    fn class_unicode_gencat_disabled() {
2197        assert_eq!(
2198            t_err(r"\p{Separator}"),
2199            TestError {
2200                kind: hir::ErrorKind::UnicodePropertyNotFound,
2201                span: Span::new(
2202                    Position::new(0, 1, 1),
2203                    Position::new(13, 1, 14)
2204                ),
2205            }
2206        );
2207
2208        assert_eq!(
2209            t_err(r"\p{Any}"),
2210            TestError {
2211                kind: hir::ErrorKind::UnicodePropertyNotFound,
2212                span: Span::new(
2213                    Position::new(0, 1, 1),
2214                    Position::new(7, 1, 8)
2215                ),
2216            }
2217        );
2218    }
2219
2220    #[test]
2221    #[cfg(feature = "unicode-script")]
2222    fn class_unicode_script() {
2223        assert_eq!(
2224            t(r"\p{Greek}"),
2225            hir_uclass_query(ClassQuery::Binary("Greek"))
2226        );
2227        #[cfg(feature = "unicode-case")]
2228        assert_eq!(
2229            t(r"(?i)\p{Greek}"),
2230            hir_case_fold(hir_uclass_query(ClassQuery::Binary("Greek")))
2231        );
2232        #[cfg(feature = "unicode-case")]
2233        assert_eq!(
2234            t(r"(?i)\P{Greek}"),
2235            hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2236                "Greek"
2237            ))))
2238        );
2239
2240        assert_eq!(
2241            t_err(r"\p{sc:Foo}"),
2242            TestError {
2243                kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2244                span: Span::new(
2245                    Position::new(0, 1, 1),
2246                    Position::new(10, 1, 11)
2247                ),
2248            }
2249        );
2250        assert_eq!(
2251            t_err(r"\p{scx:Foo}"),
2252            TestError {
2253                kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2254                span: Span::new(
2255                    Position::new(0, 1, 1),
2256                    Position::new(11, 1, 12)
2257                ),
2258            }
2259        );
2260    }
2261
2262    #[test]
2263    #[cfg(not(feature = "unicode-script"))]
2264    fn class_unicode_script_disabled() {
2265        assert_eq!(
2266            t_err(r"\p{Greek}"),
2267            TestError {
2268                kind: hir::ErrorKind::UnicodePropertyNotFound,
2269                span: Span::new(
2270                    Position::new(0, 1, 1),
2271                    Position::new(9, 1, 10)
2272                ),
2273            }
2274        );
2275
2276        assert_eq!(
2277            t_err(r"\p{scx:Greek}"),
2278            TestError {
2279                kind: hir::ErrorKind::UnicodePropertyNotFound,
2280                span: Span::new(
2281                    Position::new(0, 1, 1),
2282                    Position::new(13, 1, 14)
2283                ),
2284            }
2285        );
2286    }
2287
2288    #[test]
2289    #[cfg(feature = "unicode-age")]
2290    fn class_unicode_age() {
2291        assert_eq!(
2292            t_err(r"\p{age:Foo}"),
2293            TestError {
2294                kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2295                span: Span::new(
2296                    Position::new(0, 1, 1),
2297                    Position::new(11, 1, 12)
2298                ),
2299            }
2300        );
2301    }
2302
2303    #[test]
2304    #[cfg(not(feature = "unicode-age"))]
2305    fn class_unicode_age_disabled() {
2306        assert_eq!(
2307            t_err(r"\p{age:3.0}"),
2308            TestError {
2309                kind: hir::ErrorKind::UnicodePropertyNotFound,
2310                span: Span::new(
2311                    Position::new(0, 1, 1),
2312                    Position::new(11, 1, 12)
2313                ),
2314            }
2315        );
2316    }
2317
2318    #[test]
2319    fn class_bracketed() {
2320        assert_eq!(t("[a]"), hir_uclass(&[('a', 'a')]));
2321        assert_eq!(t("[^[a]]"), hir_negate(hir_uclass(&[('a', 'a')])));
2322        assert_eq!(t("[a-z]"), hir_uclass(&[('a', 'z')]));
2323        assert_eq!(t("[a-fd-h]"), hir_uclass(&[('a', 'h')]));
2324        assert_eq!(t("[a-fg-m]"), hir_uclass(&[('a', 'm')]));
2325        assert_eq!(t(r"[\x00]"), hir_uclass(&[('\0', '\0')]));
2326        assert_eq!(t(r"[\n]"), hir_uclass(&[('\n', '\n')]));
2327        assert_eq!(t("[\n]"), hir_uclass(&[('\n', '\n')]));
2328        #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2329        assert_eq!(t(r"[\d]"), hir_uclass_query(ClassQuery::Binary("digit")));
2330        #[cfg(feature = "unicode-gencat")]
2331        assert_eq!(
2332            t(r"[\pZ]"),
2333            hir_uclass_query(ClassQuery::Binary("separator"))
2334        );
2335        #[cfg(feature = "unicode-gencat")]
2336        assert_eq!(
2337            t(r"[\p{separator}]"),
2338            hir_uclass_query(ClassQuery::Binary("separator"))
2339        );
2340        #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2341        assert_eq!(t(r"[^\D]"), hir_uclass_query(ClassQuery::Binary("digit")));
2342        #[cfg(feature = "unicode-gencat")]
2343        assert_eq!(
2344            t(r"[^\PZ]"),
2345            hir_uclass_query(ClassQuery::Binary("separator"))
2346        );
2347        #[cfg(feature = "unicode-gencat")]
2348        assert_eq!(
2349            t(r"[^\P{separator}]"),
2350            hir_uclass_query(ClassQuery::Binary("separator"))
2351        );
2352        #[cfg(all(
2353            feature = "unicode-case",
2354            any(feature = "unicode-perl", feature = "unicode-gencat")
2355        ))]
2356        assert_eq!(
2357            t(r"(?i)[^\D]"),
2358            hir_uclass_query(ClassQuery::Binary("digit"))
2359        );
2360        #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2361        assert_eq!(
2362            t(r"(?i)[^\P{greek}]"),
2363            hir_case_fold(hir_uclass_query(ClassQuery::Binary("greek")))
2364        );
2365
2366        assert_eq!(t("(?-u)[a]"), hir_bclass(&[(b'a', b'a')]));
2367        assert_eq!(t(r"(?-u)[\x00]"), hir_bclass(&[(b'\0', b'\0')]));
2368        assert_eq!(t_bytes(r"(?-u)[\xFF]"), hir_bclass(&[(b'\xFF', b'\xFF')]));
2369
2370        #[cfg(feature = "unicode-case")]
2371        assert_eq!(t("(?i)[a]"), hir_uclass(&[('A', 'A'), ('a', 'a')]));
2372        #[cfg(feature = "unicode-case")]
2373        assert_eq!(
2374            t("(?i)[k]"),
2375            hir_uclass(&[('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}'),])
2376        );
2377        #[cfg(feature = "unicode-case")]
2378        assert_eq!(
2379            t("(?i)[β]"),
2380            hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),])
2381        );
2382        assert_eq!(t("(?i-u)[k]"), hir_bclass(&[(b'K', b'K'), (b'k', b'k'),]));
2383
2384        assert_eq!(t("[^a]"), hir_negate(hir_uclass(&[('a', 'a')])));
2385        assert_eq!(t(r"[^\x00]"), hir_negate(hir_uclass(&[('\0', '\0')])));
2386        assert_eq!(
2387            t_bytes("(?-u)[^a]"),
2388            hir_negate(hir_bclass(&[(b'a', b'a')]))
2389        );
2390        #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2391        assert_eq!(
2392            t(r"[^\d]"),
2393            hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2394        );
2395        #[cfg(feature = "unicode-gencat")]
2396        assert_eq!(
2397            t(r"[^\pZ]"),
2398            hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
2399        );
2400        #[cfg(feature = "unicode-gencat")]
2401        assert_eq!(
2402            t(r"[^\p{separator}]"),
2403            hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
2404        );
2405        #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2406        assert_eq!(
2407            t(r"(?i)[^\p{greek}]"),
2408            hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2409                "greek"
2410            ))))
2411        );
2412        #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2413        assert_eq!(
2414            t(r"(?i)[\P{greek}]"),
2415            hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2416                "greek"
2417            ))))
2418        );
2419
2420        // Test some weird cases.
2421        assert_eq!(t(r"[\[]"), hir_uclass(&[('[', '[')]));
2422
2423        assert_eq!(t(r"[&]"), hir_uclass(&[('&', '&')]));
2424        assert_eq!(t(r"[\&]"), hir_uclass(&[('&', '&')]));
2425        assert_eq!(t(r"[\&\&]"), hir_uclass(&[('&', '&')]));
2426        assert_eq!(t(r"[\x00-&]"), hir_uclass(&[('\0', '&')]));
2427        assert_eq!(t(r"[&-\xFF]"), hir_uclass(&[('&', '\u{FF}')]));
2428
2429        assert_eq!(t(r"[~]"), hir_uclass(&[('~', '~')]));
2430        assert_eq!(t(r"[\~]"), hir_uclass(&[('~', '~')]));
2431        assert_eq!(t(r"[\~\~]"), hir_uclass(&[('~', '~')]));
2432        assert_eq!(t(r"[\x00-~]"), hir_uclass(&[('\0', '~')]));
2433        assert_eq!(t(r"[~-\xFF]"), hir_uclass(&[('~', '\u{FF}')]));
2434
2435        assert_eq!(t(r"[-]"), hir_uclass(&[('-', '-')]));
2436        assert_eq!(t(r"[\-]"), hir_uclass(&[('-', '-')]));
2437        assert_eq!(t(r"[\-\-]"), hir_uclass(&[('-', '-')]));
2438        assert_eq!(t(r"[\x00-\-]"), hir_uclass(&[('\0', '-')]));
2439        assert_eq!(t(r"[\--\xFF]"), hir_uclass(&[('-', '\u{FF}')]));
2440
2441        assert_eq!(
2442            t_err("(?-u)[^a]"),
2443            TestError {
2444                kind: hir::ErrorKind::InvalidUtf8,
2445                span: Span::new(
2446                    Position::new(5, 1, 6),
2447                    Position::new(9, 1, 10)
2448                ),
2449            }
2450        );
2451        #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
2452        assert_eq!(
2453            t_err(r"[^\s\S]"),
2454            TestError {
2455                kind: hir::ErrorKind::EmptyClassNotAllowed,
2456                span: Span::new(
2457                    Position::new(0, 1, 1),
2458                    Position::new(7, 1, 8)
2459                ),
2460            }
2461        );
2462        #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
2463        assert_eq!(
2464            t_err(r"(?-u)[^\s\S]"),
2465            TestError {
2466                kind: hir::ErrorKind::EmptyClassNotAllowed,
2467                span: Span::new(
2468                    Position::new(5, 1, 6),
2469                    Position::new(12, 1, 13)
2470                ),
2471            }
2472        );
2473    }
2474
2475    #[test]
2476    fn class_bracketed_union() {
2477        assert_eq!(t("[a-zA-Z]"), hir_uclass(&[('A', 'Z'), ('a', 'z')]));
2478        #[cfg(feature = "unicode-gencat")]
2479        assert_eq!(
2480            t(r"[a\pZb]"),
2481            hir_union(
2482                hir_uclass(&[('a', 'b')]),
2483                hir_uclass_query(ClassQuery::Binary("separator"))
2484            )
2485        );
2486        #[cfg(all(feature = "unicode-gencat", feature = "unicode-script"))]
2487        assert_eq!(
2488            t(r"[\pZ\p{Greek}]"),
2489            hir_union(
2490                hir_uclass_query(ClassQuery::Binary("greek")),
2491                hir_uclass_query(ClassQuery::Binary("separator"))
2492            )
2493        );
2494        #[cfg(all(
2495            feature = "unicode-age",
2496            feature = "unicode-gencat",
2497            feature = "unicode-script"
2498        ))]
2499        assert_eq!(
2500            t(r"[\p{age:3.0}\pZ\p{Greek}]"),
2501            hir_union(
2502                hir_uclass_query(ClassQuery::ByValue {
2503                    property_name: "age",
2504                    property_value: "3.0",
2505                }),
2506                hir_union(
2507                    hir_uclass_query(ClassQuery::Binary("greek")),
2508                    hir_uclass_query(ClassQuery::Binary("separator"))
2509                )
2510            )
2511        );
2512        #[cfg(all(
2513            feature = "unicode-age",
2514            feature = "unicode-gencat",
2515            feature = "unicode-script"
2516        ))]
2517        assert_eq!(
2518            t(r"[[[\p{age:3.0}\pZ]\p{Greek}][\p{Cyrillic}]]"),
2519            hir_union(
2520                hir_uclass_query(ClassQuery::ByValue {
2521                    property_name: "age",
2522                    property_value: "3.0",
2523                }),
2524                hir_union(
2525                    hir_uclass_query(ClassQuery::Binary("cyrillic")),
2526                    hir_union(
2527                        hir_uclass_query(ClassQuery::Binary("greek")),
2528                        hir_uclass_query(ClassQuery::Binary("separator"))
2529                    )
2530                )
2531            )
2532        );
2533
2534        #[cfg(all(
2535            feature = "unicode-age",
2536            feature = "unicode-case",
2537            feature = "unicode-gencat",
2538            feature = "unicode-script"
2539        ))]
2540        assert_eq!(
2541            t(r"(?i)[\p{age:3.0}\pZ\p{Greek}]"),
2542            hir_case_fold(hir_union(
2543                hir_uclass_query(ClassQuery::ByValue {
2544                    property_name: "age",
2545                    property_value: "3.0",
2546                }),
2547                hir_union(
2548                    hir_uclass_query(ClassQuery::Binary("greek")),
2549                    hir_uclass_query(ClassQuery::Binary("separator"))
2550                )
2551            ))
2552        );
2553        #[cfg(all(
2554            feature = "unicode-age",
2555            feature = "unicode-gencat",
2556            feature = "unicode-script"
2557        ))]
2558        assert_eq!(
2559            t(r"[^\p{age:3.0}\pZ\p{Greek}]"),
2560            hir_negate(hir_union(
2561                hir_uclass_query(ClassQuery::ByValue {
2562                    property_name: "age",
2563                    property_value: "3.0",
2564                }),
2565                hir_union(
2566                    hir_uclass_query(ClassQuery::Binary("greek")),
2567                    hir_uclass_query(ClassQuery::Binary("separator"))
2568                )
2569            ))
2570        );
2571        #[cfg(all(
2572            feature = "unicode-age",
2573            feature = "unicode-case",
2574            feature = "unicode-gencat",
2575            feature = "unicode-script"
2576        ))]
2577        assert_eq!(
2578            t(r"(?i)[^\p{age:3.0}\pZ\p{Greek}]"),
2579            hir_negate(hir_case_fold(hir_union(
2580                hir_uclass_query(ClassQuery::ByValue {
2581                    property_name: "age",
2582                    property_value: "3.0",
2583                }),
2584                hir_union(
2585                    hir_uclass_query(ClassQuery::Binary("greek")),
2586                    hir_uclass_query(ClassQuery::Binary("separator"))
2587                )
2588            )))
2589        );
2590    }
2591
2592    #[test]
2593    fn class_bracketed_nested() {
2594        assert_eq!(t(r"[a[^c]]"), hir_negate(hir_uclass(&[('c', 'c')])));
2595        assert_eq!(t(r"[a-b[^c]]"), hir_negate(hir_uclass(&[('c', 'c')])));
2596        assert_eq!(t(r"[a-c[^c]]"), hir_negate(hir_uclass(&[])));
2597
2598        assert_eq!(t(r"[^a[^c]]"), hir_uclass(&[('c', 'c')]));
2599        assert_eq!(t(r"[^a-b[^c]]"), hir_uclass(&[('c', 'c')]));
2600
2601        #[cfg(feature = "unicode-case")]
2602        assert_eq!(
2603            t(r"(?i)[a[^c]]"),
2604            hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')])))
2605        );
2606        #[cfg(feature = "unicode-case")]
2607        assert_eq!(
2608            t(r"(?i)[a-b[^c]]"),
2609            hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')])))
2610        );
2611
2612        #[cfg(feature = "unicode-case")]
2613        assert_eq!(t(r"(?i)[^a[^c]]"), hir_uclass(&[('C', 'C'), ('c', 'c')]));
2614        #[cfg(feature = "unicode-case")]
2615        assert_eq!(
2616            t(r"(?i)[^a-b[^c]]"),
2617            hir_uclass(&[('C', 'C'), ('c', 'c')])
2618        );
2619
2620        assert_eq!(
2621            t_err(r"[^a-c[^c]]"),
2622            TestError {
2623                kind: hir::ErrorKind::EmptyClassNotAllowed,
2624                span: Span::new(
2625                    Position::new(0, 1, 1),
2626                    Position::new(10, 1, 11)
2627                ),
2628            }
2629        );
2630        #[cfg(feature = "unicode-case")]
2631        assert_eq!(
2632            t_err(r"(?i)[^a-c[^c]]"),
2633            TestError {
2634                kind: hir::ErrorKind::EmptyClassNotAllowed,
2635                span: Span::new(
2636                    Position::new(4, 1, 5),
2637                    Position::new(14, 1, 15)
2638                ),
2639            }
2640        );
2641    }
2642
2643    #[test]
2644    fn class_bracketed_intersect() {
2645        assert_eq!(t("[abc&&b-c]"), hir_uclass(&[('b', 'c')]));
2646        assert_eq!(t("[abc&&[b-c]]"), hir_uclass(&[('b', 'c')]));
2647        assert_eq!(t("[[abc]&&[b-c]]"), hir_uclass(&[('b', 'c')]));
2648        assert_eq!(t("[a-z&&b-y&&c-x]"), hir_uclass(&[('c', 'x')]));
2649        assert_eq!(t("[c-da-b&&a-d]"), hir_uclass(&[('a', 'd')]));
2650        assert_eq!(t("[a-d&&c-da-b]"), hir_uclass(&[('a', 'd')]));
2651        assert_eq!(t(r"[a-z&&a-c]"), hir_uclass(&[('a', 'c')]));
2652        assert_eq!(t(r"[[a-z&&a-c]]"), hir_uclass(&[('a', 'c')]));
2653        assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
2654
2655        assert_eq!(t("(?-u)[abc&&b-c]"), hir_bclass(&[(b'b', b'c')]));
2656        assert_eq!(t("(?-u)[abc&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
2657        assert_eq!(t("(?-u)[[abc]&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
2658        assert_eq!(t("(?-u)[a-z&&b-y&&c-x]"), hir_bclass(&[(b'c', b'x')]));
2659        assert_eq!(t("(?-u)[c-da-b&&a-d]"), hir_bclass(&[(b'a', b'd')]));
2660        assert_eq!(t("(?-u)[a-d&&c-da-b]"), hir_bclass(&[(b'a', b'd')]));
2661
2662        #[cfg(feature = "unicode-case")]
2663        assert_eq!(
2664            t("(?i)[abc&&b-c]"),
2665            hir_case_fold(hir_uclass(&[('b', 'c')]))
2666        );
2667        #[cfg(feature = "unicode-case")]
2668        assert_eq!(
2669            t("(?i)[abc&&[b-c]]"),
2670            hir_case_fold(hir_uclass(&[('b', 'c')]))
2671        );
2672        #[cfg(feature = "unicode-case")]
2673        assert_eq!(
2674            t("(?i)[[abc]&&[b-c]]"),
2675            hir_case_fold(hir_uclass(&[('b', 'c')]))
2676        );
2677        #[cfg(feature = "unicode-case")]
2678        assert_eq!(
2679            t("(?i)[a-z&&b-y&&c-x]"),
2680            hir_case_fold(hir_uclass(&[('c', 'x')]))
2681        );
2682        #[cfg(feature = "unicode-case")]
2683        assert_eq!(
2684            t("(?i)[c-da-b&&a-d]"),
2685            hir_case_fold(hir_uclass(&[('a', 'd')]))
2686        );
2687        #[cfg(feature = "unicode-case")]
2688        assert_eq!(
2689            t("(?i)[a-d&&c-da-b]"),
2690            hir_case_fold(hir_uclass(&[('a', 'd')]))
2691        );
2692
2693        assert_eq!(
2694            t("(?i-u)[abc&&b-c]"),
2695            hir_case_fold(hir_bclass(&[(b'b', b'c')]))
2696        );
2697        assert_eq!(
2698            t("(?i-u)[abc&&[b-c]]"),
2699            hir_case_fold(hir_bclass(&[(b'b', b'c')]))
2700        );
2701        assert_eq!(
2702            t("(?i-u)[[abc]&&[b-c]]"),
2703            hir_case_fold(hir_bclass(&[(b'b', b'c')]))
2704        );
2705        assert_eq!(
2706            t("(?i-u)[a-z&&b-y&&c-x]"),
2707            hir_case_fold(hir_bclass(&[(b'c', b'x')]))
2708        );
2709        assert_eq!(
2710            t("(?i-u)[c-da-b&&a-d]"),
2711            hir_case_fold(hir_bclass(&[(b'a', b'd')]))
2712        );
2713        assert_eq!(
2714            t("(?i-u)[a-d&&c-da-b]"),
2715            hir_case_fold(hir_bclass(&[(b'a', b'd')]))
2716        );
2717
2718        // In `[a^]`, `^` does not need to be escaped, so it makes sense that
2719        // `^` is also allowed to be unescaped after `&&`.
2720        assert_eq!(t(r"[\^&&^]"), hir_uclass(&[('^', '^')]));
2721        // `]` needs to be escaped after `&&` since it's not at start of class.
2722        assert_eq!(t(r"[]&&\]]"), hir_uclass(&[(']', ']')]));
2723        assert_eq!(t(r"[-&&-]"), hir_uclass(&[('-', '-')]));
2724        assert_eq!(t(r"[\&&&&]"), hir_uclass(&[('&', '&')]));
2725        assert_eq!(t(r"[\&&&\&]"), hir_uclass(&[('&', '&')]));
2726        // Test precedence.
2727        assert_eq!(
2728            t(r"[a-w&&[^c-g]z]"),
2729            hir_uclass(&[('a', 'b'), ('h', 'w')])
2730        );
2731    }
2732
2733    #[test]
2734    fn class_bracketed_intersect_negate() {
2735        #[cfg(feature = "unicode-perl")]
2736        assert_eq!(
2737            t(r"[^\w&&\d]"),
2738            hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2739        );
2740        assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
2741        #[cfg(feature = "unicode-perl")]
2742        assert_eq!(
2743            t(r"[^[\w&&\d]]"),
2744            hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2745        );
2746        #[cfg(feature = "unicode-perl")]
2747        assert_eq!(
2748            t(r"[^[^\w&&\d]]"),
2749            hir_uclass_query(ClassQuery::Binary("digit"))
2750        );
2751        #[cfg(feature = "unicode-perl")]
2752        assert_eq!(t(r"[[[^\w]&&[^\d]]]"), hir_negate(hir_uclass_perl_word()));
2753
2754        #[cfg(feature = "unicode-perl")]
2755        assert_eq!(
2756            t_bytes(r"(?-u)[^\w&&\d]"),
2757            hir_negate(hir_bclass_from_char(ascii_class(
2758                &ast::ClassAsciiKind::Digit
2759            )))
2760        );
2761        assert_eq!(
2762            t_bytes(r"(?-u)[^[a-z&&a-c]]"),
2763            hir_negate(hir_bclass(&[(b'a', b'c')]))
2764        );
2765        assert_eq!(
2766            t_bytes(r"(?-u)[^[\w&&\d]]"),
2767            hir_negate(hir_bclass_from_char(ascii_class(
2768                &ast::ClassAsciiKind::Digit
2769            )))
2770        );
2771        assert_eq!(
2772            t_bytes(r"(?-u)[^[^\w&&\d]]"),
2773            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
2774        );
2775        assert_eq!(
2776            t_bytes(r"(?-u)[[[^\w]&&[^\d]]]"),
2777            hir_negate(hir_bclass_from_char(ascii_class(
2778                &ast::ClassAsciiKind::Word
2779            )))
2780        );
2781    }
2782
2783    #[test]
2784    fn class_bracketed_difference() {
2785        #[cfg(feature = "unicode-gencat")]
2786        assert_eq!(
2787            t(r"[\pL--[:ascii:]]"),
2788            hir_difference(
2789                hir_uclass_query(ClassQuery::Binary("letter")),
2790                hir_uclass(&[('\0', '\x7F')])
2791            )
2792        );
2793
2794        assert_eq!(
2795            t(r"(?-u)[[:alpha:]--[:lower:]]"),
2796            hir_bclass(&[(b'A', b'Z')])
2797        );
2798    }
2799
2800    #[test]
2801    fn class_bracketed_symmetric_difference() {
2802        #[cfg(feature = "unicode-script")]
2803        assert_eq!(
2804            t(r"[\p{sc:Greek}~~\p{scx:Greek}]"),
2805            hir_uclass(&[
2806                ('\u{0342}', '\u{0342}'),
2807                ('\u{0345}', '\u{0345}'),
2808                ('\u{1DC0}', '\u{1DC1}'),
2809            ])
2810        );
2811        assert_eq!(t(r"[a-g~~c-j]"), hir_uclass(&[('a', 'b'), ('h', 'j')]));
2812
2813        assert_eq!(
2814            t(r"(?-u)[a-g~~c-j]"),
2815            hir_bclass(&[(b'a', b'b'), (b'h', b'j')])
2816        );
2817    }
2818
2819    #[test]
2820    fn ignore_whitespace() {
2821        assert_eq!(t(r"(?x)\12 3"), hir_lit("\n3"));
2822        assert_eq!(t(r"(?x)\x { 53 }"), hir_lit("S"));
2823        assert_eq!(
2824            t(r"(?x)\x # comment
2825{ # comment
2826    53 # comment
2827} #comment"),
2828            hir_lit("S")
2829        );
2830
2831        assert_eq!(t(r"(?x)\x 53"), hir_lit("S"));
2832        assert_eq!(
2833            t(r"(?x)\x # comment
2834        53 # comment"),
2835            hir_lit("S")
2836        );
2837        assert_eq!(t(r"(?x)\x5 3"), hir_lit("S"));
2838
2839        #[cfg(feature = "unicode-gencat")]
2840        assert_eq!(
2841            t(r"(?x)\p # comment
2842{ # comment
2843    Separator # comment
2844} # comment"),
2845            hir_uclass_query(ClassQuery::Binary("separator"))
2846        );
2847
2848        assert_eq!(
2849            t(r"(?x)a # comment
2850{ # comment
2851    5 # comment
2852    , # comment
2853    10 # comment
2854} # comment"),
2855            hir_range(
2856                true,
2857                hir::RepetitionRange::Bounded(5, 10),
2858                hir_lit("a")
2859            )
2860        );
2861
2862        assert_eq!(t(r"(?x)a\  # hi there"), hir_lit("a "));
2863    }
2864
2865    #[test]
2866    fn analysis_is_always_utf8() {
2867        // Positive examples.
2868        assert!(t_bytes(r"a").is_always_utf8());
2869        assert!(t_bytes(r"ab").is_always_utf8());
2870        assert!(t_bytes(r"(?-u)a").is_always_utf8());
2871        assert!(t_bytes(r"(?-u)ab").is_always_utf8());
2872        assert!(t_bytes(r"\xFF").is_always_utf8());
2873        assert!(t_bytes(r"\xFF\xFF").is_always_utf8());
2874        assert!(t_bytes(r"[^a]").is_always_utf8());
2875        assert!(t_bytes(r"[^a][^a]").is_always_utf8());
2876        assert!(t_bytes(r"\b").is_always_utf8());
2877        assert!(t_bytes(r"\B").is_always_utf8());
2878        assert!(t_bytes(r"(?-u)\b").is_always_utf8());
2879
2880        // Negative examples.
2881        assert!(!t_bytes(r"(?-u)\xFF").is_always_utf8());
2882        assert!(!t_bytes(r"(?-u)\xFF\xFF").is_always_utf8());
2883        assert!(!t_bytes(r"(?-u)[^a]").is_always_utf8());
2884        assert!(!t_bytes(r"(?-u)[^a][^a]").is_always_utf8());
2885        assert!(!t_bytes(r"(?-u)\B").is_always_utf8());
2886    }
2887
2888    #[test]
2889    fn analysis_is_all_assertions() {
2890        // Positive examples.
2891        assert!(t(r"\b").is_all_assertions());
2892        assert!(t(r"\B").is_all_assertions());
2893        assert!(t(r"^").is_all_assertions());
2894        assert!(t(r"$").is_all_assertions());
2895        assert!(t(r"\A").is_all_assertions());
2896        assert!(t(r"\z").is_all_assertions());
2897        assert!(t(r"$^\z\A\b\B").is_all_assertions());
2898        assert!(t(r"$|^|\z|\A|\b|\B").is_all_assertions());
2899        assert!(t(r"^$|$^").is_all_assertions());
2900        assert!(t(r"((\b)+())*^").is_all_assertions());
2901
2902        // Negative examples.
2903        assert!(!t(r"^a").is_all_assertions());
2904    }
2905
2906    #[test]
2907    fn analysis_is_anchored() {
2908        // Positive examples.
2909        assert!(t(r"^").is_anchored_start());
2910        assert!(t(r"$").is_anchored_end());
2911        assert!(t(r"^").is_line_anchored_start());
2912        assert!(t(r"$").is_line_anchored_end());
2913
2914        assert!(t(r"^^").is_anchored_start());
2915        assert!(t(r"$$").is_anchored_end());
2916        assert!(t(r"^^").is_line_anchored_start());
2917        assert!(t(r"$$").is_line_anchored_end());
2918
2919        assert!(t(r"^$").is_anchored_start());
2920        assert!(t(r"^$").is_anchored_end());
2921        assert!(t(r"^$").is_line_anchored_start());
2922        assert!(t(r"^$").is_line_anchored_end());
2923
2924        assert!(t(r"^foo").is_anchored_start());
2925        assert!(t(r"foo$").is_anchored_end());
2926        assert!(t(r"^foo").is_line_anchored_start());
2927        assert!(t(r"foo$").is_line_anchored_end());
2928
2929        assert!(t(r"^foo|^bar").is_anchored_start());
2930        assert!(t(r"foo$|bar$").is_anchored_end());
2931        assert!(t(r"^foo|^bar").is_line_anchored_start());
2932        assert!(t(r"foo$|bar$").is_line_anchored_end());
2933
2934        assert!(t(r"^(foo|bar)").is_anchored_start());
2935        assert!(t(r"(foo|bar)$").is_anchored_end());
2936        assert!(t(r"^(foo|bar)").is_line_anchored_start());
2937        assert!(t(r"(foo|bar)$").is_line_anchored_end());
2938
2939        assert!(t(r"^+").is_anchored_start());
2940        assert!(t(r"$+").is_anchored_end());
2941        assert!(t(r"^+").is_line_anchored_start());
2942        assert!(t(r"$+").is_line_anchored_end());
2943        assert!(t(r"^++").is_anchored_start());
2944        assert!(t(r"$++").is_anchored_end());
2945        assert!(t(r"^++").is_line_anchored_start());
2946        assert!(t(r"$++").is_line_anchored_end());
2947        assert!(t(r"(^)+").is_anchored_start());
2948        assert!(t(r"($)+").is_anchored_end());
2949        assert!(t(r"(^)+").is_line_anchored_start());
2950        assert!(t(r"($)+").is_line_anchored_end());
2951
2952        assert!(t(r"$^").is_anchored_start());
2953        assert!(t(r"$^").is_anchored_start());
2954        assert!(t(r"$^").is_line_anchored_end());
2955        assert!(t(r"$^").is_line_anchored_end());
2956        assert!(t(r"$^|^$").is_anchored_start());
2957        assert!(t(r"$^|^$").is_anchored_end());
2958        assert!(t(r"$^|^$").is_line_anchored_start());
2959        assert!(t(r"$^|^$").is_line_anchored_end());
2960
2961        assert!(t(r"\b^").is_anchored_start());
2962        assert!(t(r"$\b").is_anchored_end());
2963        assert!(t(r"\b^").is_line_anchored_start());
2964        assert!(t(r"$\b").is_line_anchored_end());
2965        assert!(t(r"^(?m:^)").is_anchored_start());
2966        assert!(t(r"(?m:$)$").is_anchored_end());
2967        assert!(t(r"^(?m:^)").is_line_anchored_start());
2968        assert!(t(r"(?m:$)$").is_line_anchored_end());
2969        assert!(t(r"(?m:^)^").is_anchored_start());
2970        assert!(t(r"$(?m:$)").is_anchored_end());
2971        assert!(t(r"(?m:^)^").is_line_anchored_start());
2972        assert!(t(r"$(?m:$)").is_line_anchored_end());
2973
2974        // Negative examples.
2975        assert!(!t(r"(?m)^").is_anchored_start());
2976        assert!(!t(r"(?m)$").is_anchored_end());
2977        assert!(!t(r"(?m:^$)|$^").is_anchored_start());
2978        assert!(!t(r"(?m:^$)|$^").is_anchored_end());
2979        assert!(!t(r"$^|(?m:^$)").is_anchored_start());
2980        assert!(!t(r"$^|(?m:^$)").is_anchored_end());
2981
2982        assert!(!t(r"a^").is_anchored_start());
2983        assert!(!t(r"$a").is_anchored_start());
2984        assert!(!t(r"a^").is_line_anchored_start());
2985        assert!(!t(r"$a").is_line_anchored_start());
2986
2987        assert!(!t(r"a^").is_anchored_end());
2988        assert!(!t(r"$a").is_anchored_end());
2989        assert!(!t(r"a^").is_line_anchored_end());
2990        assert!(!t(r"$a").is_line_anchored_end());
2991
2992        assert!(!t(r"^foo|bar").is_anchored_start());
2993        assert!(!t(r"foo|bar$").is_anchored_end());
2994        assert!(!t(r"^foo|bar").is_line_anchored_start());
2995        assert!(!t(r"foo|bar$").is_line_anchored_end());
2996
2997        assert!(!t(r"^*").is_anchored_start());
2998        assert!(!t(r"$*").is_anchored_end());
2999        assert!(!t(r"^*").is_line_anchored_start());
3000        assert!(!t(r"$*").is_line_anchored_end());
3001        assert!(!t(r"^*+").is_anchored_start());
3002        assert!(!t(r"$*+").is_anchored_end());
3003        assert!(!t(r"^*+").is_line_anchored_start());
3004        assert!(!t(r"$*+").is_line_anchored_end());
3005        assert!(!t(r"^+*").is_anchored_start());
3006        assert!(!t(r"$+*").is_anchored_end());
3007        assert!(!t(r"^+*").is_line_anchored_start());
3008        assert!(!t(r"$+*").is_line_anchored_end());
3009        assert!(!t(r"(^)*").is_anchored_start());
3010        assert!(!t(r"($)*").is_anchored_end());
3011        assert!(!t(r"(^)*").is_line_anchored_start());
3012        assert!(!t(r"($)*").is_line_anchored_end());
3013    }
3014
3015    #[test]
3016    fn analysis_is_line_anchored() {
3017        assert!(t(r"(?m)^(foo|bar)").is_line_anchored_start());
3018        assert!(t(r"(?m)(foo|bar)$").is_line_anchored_end());
3019
3020        assert!(t(r"(?m)^foo|^bar").is_line_anchored_start());
3021        assert!(t(r"(?m)foo$|bar$").is_line_anchored_end());
3022
3023        assert!(t(r"(?m)^").is_line_anchored_start());
3024        assert!(t(r"(?m)$").is_line_anchored_end());
3025
3026        assert!(t(r"(?m:^$)|$^").is_line_anchored_start());
3027        assert!(t(r"(?m:^$)|$^").is_line_anchored_end());
3028
3029        assert!(t(r"$^|(?m:^$)").is_line_anchored_start());
3030        assert!(t(r"$^|(?m:^$)").is_line_anchored_end());
3031    }
3032
3033    #[test]
3034    fn analysis_is_any_anchored() {
3035        // Positive examples.
3036        assert!(t(r"^").is_any_anchored_start());
3037        assert!(t(r"$").is_any_anchored_end());
3038        assert!(t(r"\A").is_any_anchored_start());
3039        assert!(t(r"\z").is_any_anchored_end());
3040
3041        // Negative examples.
3042        assert!(!t(r"(?m)^").is_any_anchored_start());
3043        assert!(!t(r"(?m)$").is_any_anchored_end());
3044        assert!(!t(r"$").is_any_anchored_start());
3045        assert!(!t(r"^").is_any_anchored_end());
3046    }
3047
3048    #[test]
3049    fn analysis_is_match_empty() {
3050        // Positive examples.
3051        assert!(t(r"").is_match_empty());
3052        assert!(t(r"()").is_match_empty());
3053        assert!(t(r"()*").is_match_empty());
3054        assert!(t(r"()+").is_match_empty());
3055        assert!(t(r"()?").is_match_empty());
3056        assert!(t(r"a*").is_match_empty());
3057        assert!(t(r"a?").is_match_empty());
3058        assert!(t(r"a{0}").is_match_empty());
3059        assert!(t(r"a{0,}").is_match_empty());
3060        assert!(t(r"a{0,1}").is_match_empty());
3061        assert!(t(r"a{0,10}").is_match_empty());
3062        #[cfg(feature = "unicode-gencat")]
3063        assert!(t(r"\pL*").is_match_empty());
3064        assert!(t(r"a*|b").is_match_empty());
3065        assert!(t(r"b|a*").is_match_empty());
3066        assert!(t(r"a*a?(abcd)*").is_match_empty());
3067        assert!(t(r"^").is_match_empty());
3068        assert!(t(r"$").is_match_empty());
3069        assert!(t(r"(?m)^").is_match_empty());
3070        assert!(t(r"(?m)$").is_match_empty());
3071        assert!(t(r"\A").is_match_empty());
3072        assert!(t(r"\z").is_match_empty());
3073        assert!(t(r"\B").is_match_empty());
3074        assert!(t_bytes(r"(?-u)\B").is_match_empty());
3075
3076        // Negative examples.
3077        assert!(!t(r"a+").is_match_empty());
3078        assert!(!t(r"a{1}").is_match_empty());
3079        assert!(!t(r"a{1,}").is_match_empty());
3080        assert!(!t(r"a{1,2}").is_match_empty());
3081        assert!(!t(r"a{1,10}").is_match_empty());
3082        assert!(!t(r"b|a").is_match_empty());
3083        assert!(!t(r"a*a+(abcd)*").is_match_empty());
3084        assert!(!t(r"\b").is_match_empty());
3085        assert!(!t(r"(?-u)\b").is_match_empty());
3086    }
3087
3088    #[test]
3089    fn analysis_is_literal() {
3090        // Positive examples.
3091        assert!(t(r"").is_literal());
3092        assert!(t(r"a").is_literal());
3093        assert!(t(r"ab").is_literal());
3094        assert!(t(r"abc").is_literal());
3095        assert!(t(r"(?m)abc").is_literal());
3096
3097        // Negative examples.
3098        assert!(!t(r"^").is_literal());
3099        assert!(!t(r"a|b").is_literal());
3100        assert!(!t(r"(a)").is_literal());
3101        assert!(!t(r"a+").is_literal());
3102        assert!(!t(r"foo(a)").is_literal());
3103        assert!(!t(r"(a)foo").is_literal());
3104        assert!(!t(r"[a]").is_literal());
3105    }
3106
3107    #[test]
3108    fn analysis_is_alternation_literal() {
3109        // Positive examples.
3110        assert!(t(r"").is_alternation_literal());
3111        assert!(t(r"a").is_alternation_literal());
3112        assert!(t(r"ab").is_alternation_literal());
3113        assert!(t(r"abc").is_alternation_literal());
3114        assert!(t(r"(?m)abc").is_alternation_literal());
3115        assert!(t(r"a|b").is_alternation_literal());
3116        assert!(t(r"a|b|c").is_alternation_literal());
3117        assert!(t(r"foo|bar").is_alternation_literal());
3118        assert!(t(r"foo|bar|baz").is_alternation_literal());
3119
3120        // Negative examples.
3121        assert!(!t(r"^").is_alternation_literal());
3122        assert!(!t(r"(a)").is_alternation_literal());
3123        assert!(!t(r"a+").is_alternation_literal());
3124        assert!(!t(r"foo(a)").is_alternation_literal());
3125        assert!(!t(r"(a)foo").is_alternation_literal());
3126        assert!(!t(r"[a]").is_alternation_literal());
3127        assert!(!t(r"[a]|b").is_alternation_literal());
3128        assert!(!t(r"a|[b]").is_alternation_literal());
3129        assert!(!t(r"(a)|b").is_alternation_literal());
3130        assert!(!t(r"a|(b)").is_alternation_literal());
3131    }
3132}