regex_syntax/ast/
parse.rs

1/*!
2This module provides a regular expression parser.
3*/
4
5use std::borrow::Borrow;
6use std::cell::{Cell, RefCell};
7use std::mem;
8use std::result;
9
10use ast::{self, Ast, Position, Span};
11use either::Either;
12
13use is_meta_character;
14
15type Result<T> = result::Result<T, ast::Error>;
16
17/// A primitive is an expression with no sub-expressions. This includes
18/// literals, assertions and non-set character classes. This representation
19/// is used as intermediate state in the parser.
20///
21/// This does not include ASCII character classes, since they can only appear
22/// within a set character class.
23#[derive(Clone, Debug, Eq, PartialEq)]
24enum Primitive {
25    Literal(ast::Literal),
26    Assertion(ast::Assertion),
27    Dot(Span),
28    Perl(ast::ClassPerl),
29    Unicode(ast::ClassUnicode),
30}
31
32impl Primitive {
33    /// Return the span of this primitive.
34    fn span(&self) -> &Span {
35        match *self {
36            Primitive::Literal(ref x) => &x.span,
37            Primitive::Assertion(ref x) => &x.span,
38            Primitive::Dot(ref span) => span,
39            Primitive::Perl(ref x) => &x.span,
40            Primitive::Unicode(ref x) => &x.span,
41        }
42    }
43
44    /// Convert this primitive into a proper AST.
45    fn into_ast(self) -> Ast {
46        match self {
47            Primitive::Literal(lit) => Ast::Literal(lit),
48            Primitive::Assertion(assert) => Ast::Assertion(assert),
49            Primitive::Dot(span) => Ast::Dot(span),
50            Primitive::Perl(cls) => Ast::Class(ast::Class::Perl(cls)),
51            Primitive::Unicode(cls) => Ast::Class(ast::Class::Unicode(cls)),
52        }
53    }
54
55    /// Convert this primitive into an item in a character class.
56    ///
57    /// If this primitive is not a legal item (i.e., an assertion or a dot),
58    /// then return an error.
59    fn into_class_set_item<P: Borrow<Parser>>(
60        self,
61        p: &ParserI<P>,
62    ) -> Result<ast::ClassSetItem> {
63        use self::Primitive::*;
64        use ast::ClassSetItem;
65
66        match self {
67            Literal(lit) => Ok(ClassSetItem::Literal(lit)),
68            Perl(cls) => Ok(ClassSetItem::Perl(cls)),
69            Unicode(cls) => Ok(ClassSetItem::Unicode(cls)),
70            x => Err(p.error(*x.span(), ast::ErrorKind::ClassEscapeInvalid)),
71        }
72    }
73
74    /// Convert this primitive into a literal in a character class. In
75    /// particular, literals are the only valid items that can appear in
76    /// ranges.
77    ///
78    /// If this primitive is not a legal item (i.e., a class, assertion or a
79    /// dot), then return an error.
80    fn into_class_literal<P: Borrow<Parser>>(
81        self,
82        p: &ParserI<P>,
83    ) -> Result<ast::Literal> {
84        use self::Primitive::*;
85
86        match self {
87            Literal(lit) => Ok(lit),
88            x => Err(p.error(*x.span(), ast::ErrorKind::ClassRangeLiteral)),
89        }
90    }
91}
92
93/// Returns true if the given character is a hexadecimal digit.
94fn is_hex(c: char) -> bool {
95    ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')
96}
97
98/// Returns true if the given character is a valid in a capture group name.
99///
100/// If `first` is true, then `c` is treated as the first character in the
101/// group name (which is not allowed to be a digit).
102fn is_capture_char(c: char, first: bool) -> bool {
103    c == '_'
104        || (!first && c >= '0' && c <= '9')
105        || (c >= 'a' && c <= 'z')
106        || (c >= 'A' && c <= 'Z')
107}
108
109/// A builder for a regular expression parser.
110///
111/// This builder permits modifying configuration options for the parser.
112#[derive(Clone, Debug)]
113pub struct ParserBuilder {
114    ignore_whitespace: bool,
115    nest_limit: u32,
116    octal: bool,
117}
118
119impl Default for ParserBuilder {
120    fn default() -> ParserBuilder {
121        ParserBuilder::new()
122    }
123}
124
125impl ParserBuilder {
126    /// Create a new parser builder with a default configuration.
127    pub fn new() -> ParserBuilder {
128        ParserBuilder {
129            ignore_whitespace: false,
130            nest_limit: 250,
131            octal: false,
132        }
133    }
134
135    /// Build a parser from this configuration with the given pattern.
136    pub fn build(&self) -> Parser {
137        Parser {
138            pos: Cell::new(Position { offset: 0, line: 1, column: 1 }),
139            capture_index: Cell::new(0),
140            nest_limit: self.nest_limit,
141            octal: self.octal,
142            initial_ignore_whitespace: self.ignore_whitespace,
143            ignore_whitespace: Cell::new(self.ignore_whitespace),
144            comments: RefCell::new(vec![]),
145            stack_group: RefCell::new(vec![]),
146            stack_class: RefCell::new(vec![]),
147            capture_names: RefCell::new(vec![]),
148            scratch: RefCell::new(String::new()),
149        }
150    }
151
152    /// Set the nesting limit for this parser.
153    ///
154    /// The nesting limit controls how deep the abstract syntax tree is allowed
155    /// to be. If the AST exceeds the given limit (e.g., with too many nested
156    /// groups), then an error is returned by the parser.
157    ///
158    /// The purpose of this limit is to act as a heuristic to prevent stack
159    /// overflow for consumers that do structural induction on an `Ast` using
160    /// explicit recursion. While this crate never does this (instead using
161    /// constant stack space and moving the call stack to the heap), other
162    /// crates may.
163    ///
164    /// This limit is not checked until the entire Ast is parsed. Therefore,
165    /// if callers want to put a limit on the amount of heap space used, then
166    /// they should impose a limit on the length, in bytes, of the concrete
167    /// pattern string. In particular, this is viable since this parser
168    /// implementation will limit itself to heap space proportional to the
169    /// lenth of the pattern string.
170    ///
171    /// Note that a nest limit of `0` will return a nest limit error for most
172    /// patterns but not all. For example, a nest limit of `0` permits `a` but
173    /// not `ab`, since `ab` requires a concatenation, which results in a nest
174    /// depth of `1`. In general, a nest limit is not something that manifests
175    /// in an obvious way in the concrete syntax, therefore, it should not be
176    /// used in a granular way.
177    pub fn nest_limit(&mut self, limit: u32) -> &mut ParserBuilder {
178        self.nest_limit = limit;
179        self
180    }
181
182    /// Whether to support octal syntax or not.
183    ///
184    /// Octal syntax is a little-known way of uttering Unicode codepoints in
185    /// a regular expression. For example, `a`, `\x61`, `\u0061` and
186    /// `\141` are all equivalent regular expressions, where the last example
187    /// shows octal syntax.
188    ///
189    /// While supporting octal syntax isn't in and of itself a problem, it does
190    /// make good error messages harder. That is, in PCRE based regex engines,
191    /// syntax like `\0` invokes a backreference, which is explicitly
192    /// unsupported in Rust's regex engine. However, many users expect it to
193    /// be supported. Therefore, when octal support is disabled, the error
194    /// message will explicitly mention that backreferences aren't supported.
195    ///
196    /// Octal syntax is disabled by default.
197    pub fn octal(&mut self, yes: bool) -> &mut ParserBuilder {
198        self.octal = yes;
199        self
200    }
201
202    /// Enable verbose mode in the regular expression.
203    ///
204    /// When enabled, verbose mode permits insigificant whitespace in many
205    /// places in the regular expression, as well as comments. Comments are
206    /// started using `#` and continue until the end of the line.
207    ///
208    /// By default, this is disabled. It may be selectively enabled in the
209    /// regular expression by using the `x` flag regardless of this setting.
210    pub fn ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder {
211        self.ignore_whitespace = yes;
212        self
213    }
214}
215
216/// A regular expression parser.
217///
218/// This parses a string representation of a regular expression into an
219/// abstract syntax tree. The size of the tree is proportional to the length
220/// of the regular expression pattern.
221///
222/// A `Parser` can be configured in more detail via a
223/// [`ParserBuilder`](struct.ParserBuilder.html).
224#[derive(Clone, Debug)]
225pub struct Parser {
226    /// The current position of the parser.
227    pos: Cell<Position>,
228    /// The current capture index.
229    capture_index: Cell<u32>,
230    /// The maximum number of open parens/brackets allowed. If the parser
231    /// exceeds this number, then an error is returned.
232    nest_limit: u32,
233    /// Whether to support octal syntax or not. When `false`, the parser will
234    /// return an error helpfully pointing out that backreferences are not
235    /// supported.
236    octal: bool,
237    /// The initial setting for `ignore_whitespace` as provided by
238    /// Th`ParserBuilder`. is is used when reseting the parser's state.
239    initial_ignore_whitespace: bool,
240    /// Whether whitespace should be ignored. When enabled, comments are
241    /// also permitted.
242    ignore_whitespace: Cell<bool>,
243    /// A list of comments, in order of appearance.
244    comments: RefCell<Vec<ast::Comment>>,
245    /// A stack of grouped sub-expressions, including alternations.
246    stack_group: RefCell<Vec<GroupState>>,
247    /// A stack of nested character classes. This is only non-empty when
248    /// parsing a class.
249    stack_class: RefCell<Vec<ClassState>>,
250    /// A sorted sequence of capture names. This is used to detect duplicate
251    /// capture names and report an error if one is detected.
252    capture_names: RefCell<Vec<ast::CaptureName>>,
253    /// A scratch buffer used in various places. Mostly this is used to
254    /// accumulate relevant characters from parts of a pattern.
255    scratch: RefCell<String>,
256}
257
258/// ParserI is the internal parser implementation.
259///
260/// We use this separate type so that we can carry the provided pattern string
261/// along with us. In particular, a `Parser` internal state is not tied to any
262/// one pattern, but `ParserI` is.
263///
264/// This type also lets us use `ParserI<&Parser>` in production code while
265/// retaining the convenience of `ParserI<Parser>` for tests, which sometimes
266/// work against the internal interface of the parser.
267#[derive(Clone, Debug)]
268struct ParserI<'s, P> {
269    /// The parser state/configuration.
270    parser: P,
271    /// The full regular expression provided by the user.
272    pattern: &'s str,
273}
274
275/// GroupState represents a single stack frame while parsing nested groups
276/// and alternations. Each frame records the state up to an opening parenthesis
277/// or a alternating bracket `|`.
278#[derive(Clone, Debug)]
279enum GroupState {
280    /// This state is pushed whenever an opening group is found.
281    Group {
282        /// The concatenation immediately preceding the opening group.
283        concat: ast::Concat,
284        /// The group that has been opened. Its sub-AST is always empty.
285        group: ast::Group,
286        /// Whether this group has the `x` flag enabled or not.
287        ignore_whitespace: bool,
288    },
289    /// This state is pushed whenever a new alternation branch is found. If
290    /// an alternation branch is found and this state is at the top of the
291    /// stack, then this state should be modified to include the new
292    /// alternation.
293    Alternation(ast::Alternation),
294}
295
296/// ClassState represents a single stack frame while parsing character classes.
297/// Each frame records the state up to an intersection, difference, symmetric
298/// difference or nested class.
299///
300/// Note that a parser's character class stack is only non-empty when parsing
301/// a character class. In all other cases, it is empty.
302#[derive(Clone, Debug)]
303enum ClassState {
304    /// This state is pushed whenever an opening bracket is found.
305    Open {
306        /// The union of class items immediately preceding this class.
307        union: ast::ClassSetUnion,
308        /// The class that has been opened. Typically this just corresponds
309        /// to the `[`, but it can also include `[^` since `^` indicates
310        /// negation of the class.
311        set: ast::ClassBracketed,
312    },
313    /// This state is pushed when a operator is seen. When popped, the stored
314    /// set becomes the left hand side of the operator.
315    Op {
316        /// The type of the operation, i.e., &&, -- or ~~.
317        kind: ast::ClassSetBinaryOpKind,
318        /// The left-hand side of the operator.
319        lhs: ast::ClassSet,
320    },
321}
322
323impl Parser {
324    /// Create a new parser with a default configuration.
325    ///
326    /// The parser can be run with either the `parse` or `parse_with_comments`
327    /// methods. The parse methods return an abstract syntax tree.
328    ///
329    /// To set configuration options on the parser, use
330    /// [`ParserBuilder`](struct.ParserBuilder.html).
331    pub fn new() -> Parser {
332        ParserBuilder::new().build()
333    }
334
335    /// Parse the regular expression into an abstract syntax tree.
336    pub fn parse(&mut self, pattern: &str) -> Result<Ast> {
337        ParserI::new(self, pattern).parse()
338    }
339
340    /// Parse the regular expression and return an abstract syntax tree with
341    /// all of the comments found in the pattern.
342    pub fn parse_with_comments(
343        &mut self,
344        pattern: &str,
345    ) -> Result<ast::WithComments> {
346        ParserI::new(self, pattern).parse_with_comments()
347    }
348
349    /// Reset the internal state of a parser.
350    ///
351    /// This is called at the beginning of every parse. This prevents the
352    /// parser from running with inconsistent state (say, if a previous
353    /// invocation returned an error and the parser is reused).
354    fn reset(&self) {
355        // These settings should be in line with the construction
356        // in `ParserBuilder::build`.
357        self.pos.set(Position { offset: 0, line: 1, column: 1 });
358        self.ignore_whitespace.set(self.initial_ignore_whitespace);
359        self.comments.borrow_mut().clear();
360        self.stack_group.borrow_mut().clear();
361        self.stack_class.borrow_mut().clear();
362    }
363}
364
365impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
366    /// Build an internal parser from a parser configuration and a pattern.
367    fn new(parser: P, pattern: &'s str) -> ParserI<'s, P> {
368        ParserI { parser: parser, pattern: pattern }
369    }
370
371    /// Return a reference to the parser state.
372    fn parser(&self) -> &Parser {
373        self.parser.borrow()
374    }
375
376    /// Return a reference to the pattern being parsed.
377    fn pattern(&self) -> &str {
378        self.pattern.borrow()
379    }
380
381    /// Create a new error with the given span and error type.
382    fn error(&self, span: Span, kind: ast::ErrorKind) -> ast::Error {
383        ast::Error {
384            kind: kind,
385            pattern: self.pattern().to_string(),
386            span: span,
387        }
388    }
389
390    /// Return the current offset of the parser.
391    ///
392    /// The offset starts at `0` from the beginning of the regular expression
393    /// pattern string.
394    fn offset(&self) -> usize {
395        self.parser().pos.get().offset
396    }
397
398    /// Return the current line number of the parser.
399    ///
400    /// The line number starts at `1`.
401    fn line(&self) -> usize {
402        self.parser().pos.get().line
403    }
404
405    /// Return the current column of the parser.
406    ///
407    /// The column number starts at `1` and is reset whenever a `\n` is seen.
408    fn column(&self) -> usize {
409        self.parser().pos.get().column
410    }
411
412    /// Return the next capturing index. Each subsequent call increments the
413    /// internal index.
414    ///
415    /// The span given should correspond to the location of the opening
416    /// parenthesis.
417    ///
418    /// If the capture limit is exceeded, then an error is returned.
419    fn next_capture_index(&self, span: Span) -> Result<u32> {
420        let current = self.parser().capture_index.get();
421        let i = current.checked_add(1).ok_or_else(|| {
422            self.error(span, ast::ErrorKind::CaptureLimitExceeded)
423        })?;
424        self.parser().capture_index.set(i);
425        Ok(i)
426    }
427
428    /// Adds the given capture name to this parser. If this capture name has
429    /// already been used, then an error is returned.
430    fn add_capture_name(&self, cap: &ast::CaptureName) -> Result<()> {
431        let mut names = self.parser().capture_names.borrow_mut();
432        match names
433            .binary_search_by_key(&cap.name.as_str(), |c| c.name.as_str())
434        {
435            Err(i) => {
436                names.insert(i, cap.clone());
437                Ok(())
438            }
439            Ok(i) => Err(self.error(
440                cap.span,
441                ast::ErrorKind::GroupNameDuplicate { original: names[i].span },
442            )),
443        }
444    }
445
446    /// Return whether the parser should ignore whitespace or not.
447    fn ignore_whitespace(&self) -> bool {
448        self.parser().ignore_whitespace.get()
449    }
450
451    /// Return the character at the current position of the parser.
452    ///
453    /// This panics if the current position does not point to a valid char.
454    fn char(&self) -> char {
455        self.char_at(self.offset())
456    }
457
458    /// Return the character at the given position.
459    ///
460    /// This panics if the given position does not point to a valid char.
461    fn char_at(&self, i: usize) -> char {
462        self.pattern()[i..]
463            .chars()
464            .next()
465            .unwrap_or_else(|| panic!("expected char at offset {}", i))
466    }
467
468    /// Bump the parser to the next Unicode scalar value.
469    ///
470    /// If the end of the input has been reached, then `false` is returned.
471    fn bump(&self) -> bool {
472        if self.is_eof() {
473            return false;
474        }
475        let Position { mut offset, mut line, mut column } = self.pos();
476        if self.char() == '\n' {
477            line = line.checked_add(1).unwrap();
478            column = 1;
479        } else {
480            column = column.checked_add(1).unwrap();
481        }
482        offset += self.char().len_utf8();
483        self.parser().pos.set(Position {
484            offset: offset,
485            line: line,
486            column: column,
487        });
488        self.pattern()[self.offset()..].chars().next().is_some()
489    }
490
491    /// If the substring starting at the current position of the parser has
492    /// the given prefix, then bump the parser to the character immediately
493    /// following the prefix and return true. Otherwise, don't bump the parser
494    /// and return false.
495    fn bump_if(&self, prefix: &str) -> bool {
496        if self.pattern()[self.offset()..].starts_with(prefix) {
497            for _ in 0..prefix.chars().count() {
498                self.bump();
499            }
500            true
501        } else {
502            false
503        }
504    }
505
506    /// Returns true if and only if the parser is positioned at a look-around
507    /// prefix. The conditions under which this returns true must always
508    /// correspond to a regular expression that would otherwise be consider
509    /// invalid.
510    ///
511    /// This should only be called immediately after parsing the opening of
512    /// a group or a set of flags.
513    fn is_lookaround_prefix(&self) -> bool {
514        self.bump_if("?=")
515            || self.bump_if("?!")
516            || self.bump_if("?<=")
517            || self.bump_if("?<!")
518    }
519
520    /// Bump the parser, and if the `x` flag is enabled, bump through any
521    /// subsequent spaces. Return true if and only if the parser is not at
522    /// EOF.
523    fn bump_and_bump_space(&self) -> bool {
524        if !self.bump() {
525            return false;
526        }
527        self.bump_space();
528        !self.is_eof()
529    }
530
531    /// If the `x` flag is enabled (i.e., whitespace insensitivity with
532    /// comments), then this will advance the parser through all whitespace
533    /// and comments to the next non-whitespace non-comment byte.
534    ///
535    /// If the `x` flag is disabled, then this is a no-op.
536    ///
537    /// This should be used selectively throughout the parser where
538    /// arbitrary whitespace is permitted when the `x` flag is enabled. For
539    /// example, `{   5  , 6}` is equivalent to `{5,6}`.
540    fn bump_space(&self) {
541        if !self.ignore_whitespace() {
542            return;
543        }
544        while !self.is_eof() {
545            if self.char().is_whitespace() {
546                self.bump();
547            } else if self.char() == '#' {
548                let start = self.pos();
549                let mut comment_text = String::new();
550                self.bump();
551                while !self.is_eof() {
552                    let c = self.char();
553                    self.bump();
554                    if c == '\n' {
555                        break;
556                    }
557                    comment_text.push(c);
558                }
559                let comment = ast::Comment {
560                    span: Span::new(start, self.pos()),
561                    comment: comment_text,
562                };
563                self.parser().comments.borrow_mut().push(comment);
564            } else {
565                break;
566            }
567        }
568    }
569
570    /// Peek at the next character in the input without advancing the parser.
571    ///
572    /// If the input has been exhausted, then this returns `None`.
573    fn peek(&self) -> Option<char> {
574        if self.is_eof() {
575            return None;
576        }
577        self.pattern()[self.offset() + self.char().len_utf8()..].chars().next()
578    }
579
580    /// Like peek, but will ignore spaces when the parser is in whitespace
581    /// insensitive mode.
582    fn peek_space(&self) -> Option<char> {
583        if !self.ignore_whitespace() {
584            return self.peek();
585        }
586        if self.is_eof() {
587            return None;
588        }
589        let mut start = self.offset() + self.char().len_utf8();
590        let mut in_comment = false;
591        for (i, c) in self.pattern()[start..].char_indices() {
592            if c.is_whitespace() {
593                continue;
594            } else if !in_comment && c == '#' {
595                in_comment = true;
596            } else if in_comment && c == '\n' {
597                in_comment = false;
598            } else {
599                start += i;
600                break;
601            }
602        }
603        self.pattern()[start..].chars().next()
604    }
605
606    /// Returns true if the next call to `bump` would return false.
607    fn is_eof(&self) -> bool {
608        self.offset() == self.pattern().len()
609    }
610
611    /// Return the current position of the parser, which includes the offset,
612    /// line and column.
613    fn pos(&self) -> Position {
614        self.parser().pos.get()
615    }
616
617    /// Create a span at the current position of the parser. Both the start
618    /// and end of the span are set.
619    fn span(&self) -> Span {
620        Span::splat(self.pos())
621    }
622
623    /// Create a span that covers the current character.
624    fn span_char(&self) -> Span {
625        let mut next = Position {
626            offset: self.offset().checked_add(self.char().len_utf8()).unwrap(),
627            line: self.line(),
628            column: self.column().checked_add(1).unwrap(),
629        };
630        if self.char() == '\n' {
631            next.line += 1;
632            next.column = 1;
633        }
634        Span::new(self.pos(), next)
635    }
636
637    /// Parse and push a single alternation on to the parser's internal stack.
638    /// If the top of the stack already has an alternation, then add to that
639    /// instead of pushing a new one.
640    ///
641    /// The concatenation given corresponds to a single alternation branch.
642    /// The concatenation returned starts the next branch and is empty.
643    ///
644    /// This assumes the parser is currently positioned at `|` and will advance
645    /// the parser to the character following `|`.
646    #[inline(never)]
647    fn push_alternate(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
648        assert_eq!(self.char(), '|');
649        concat.span.end = self.pos();
650        self.push_or_add_alternation(concat);
651        self.bump();
652        Ok(ast::Concat { span: self.span(), asts: vec![] })
653    }
654
655    /// Pushes or adds the given branch of an alternation to the parser's
656    /// internal stack of state.
657    fn push_or_add_alternation(&self, concat: ast::Concat) {
658        use self::GroupState::*;
659
660        let mut stack = self.parser().stack_group.borrow_mut();
661        if let Some(&mut Alternation(ref mut alts)) = stack.last_mut() {
662            alts.asts.push(concat.into_ast());
663            return;
664        }
665        stack.push(Alternation(ast::Alternation {
666            span: Span::new(concat.span.start, self.pos()),
667            asts: vec![concat.into_ast()],
668        }));
669    }
670
671    /// Parse and push a group AST (and its parent concatenation) on to the
672    /// parser's internal stack. Return a fresh concatenation corresponding
673    /// to the group's sub-AST.
674    ///
675    /// If a set of flags was found (with no group), then the concatenation
676    /// is returned with that set of flags added.
677    ///
678    /// This assumes that the parser is currently positioned on the opening
679    /// parenthesis. It advances the parser to the character at the start
680    /// of the sub-expression (or adjoining expression).
681    ///
682    /// If there was a problem parsing the start of the group, then an error
683    /// is returned.
684    #[inline(never)]
685    fn push_group(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
686        assert_eq!(self.char(), '(');
687        match self.parse_group()? {
688            Either::Left(set) => {
689                let ignore = set.flags.flag_state(ast::Flag::IgnoreWhitespace);
690                if let Some(v) = ignore {
691                    self.parser().ignore_whitespace.set(v);
692                }
693
694                concat.asts.push(Ast::Flags(set));
695                Ok(concat)
696            }
697            Either::Right(group) => {
698                let old_ignore_whitespace = self.ignore_whitespace();
699                let new_ignore_whitespace = group
700                    .flags()
701                    .and_then(|f| f.flag_state(ast::Flag::IgnoreWhitespace))
702                    .unwrap_or(old_ignore_whitespace);
703                self.parser().stack_group.borrow_mut().push(
704                    GroupState::Group {
705                        concat: concat,
706                        group: group,
707                        ignore_whitespace: old_ignore_whitespace,
708                    },
709                );
710                self.parser().ignore_whitespace.set(new_ignore_whitespace);
711                Ok(ast::Concat { span: self.span(), asts: vec![] })
712            }
713        }
714    }
715
716    /// Pop a group AST from the parser's internal stack and set the group's
717    /// AST to the given concatenation. Return the concatenation containing
718    /// the group.
719    ///
720    /// This assumes that the parser is currently positioned on the closing
721    /// parenthesis and advances the parser to the character following the `)`.
722    ///
723    /// If no such group could be popped, then an unopened group error is
724    /// returned.
725    #[inline(never)]
726    fn pop_group(&self, mut group_concat: ast::Concat) -> Result<ast::Concat> {
727        use self::GroupState::*;
728
729        assert_eq!(self.char(), ')');
730        let mut stack = self.parser().stack_group.borrow_mut();
731        let (mut prior_concat, mut group, ignore_whitespace, alt) = match stack
732            .pop()
733        {
734            Some(Group { concat, group, ignore_whitespace }) => {
735                (concat, group, ignore_whitespace, None)
736            }
737            Some(Alternation(alt)) => match stack.pop() {
738                Some(Group { concat, group, ignore_whitespace }) => {
739                    (concat, group, ignore_whitespace, Some(alt))
740                }
741                None | Some(Alternation(_)) => {
742                    return Err(self.error(
743                        self.span_char(),
744                        ast::ErrorKind::GroupUnopened,
745                    ));
746                }
747            },
748            None => {
749                return Err(self
750                    .error(self.span_char(), ast::ErrorKind::GroupUnopened));
751            }
752        };
753        self.parser().ignore_whitespace.set(ignore_whitespace);
754        group_concat.span.end = self.pos();
755        self.bump();
756        group.span.end = self.pos();
757        match alt {
758            Some(mut alt) => {
759                alt.span.end = group_concat.span.end;
760                alt.asts.push(group_concat.into_ast());
761                group.ast = Box::new(alt.into_ast());
762            }
763            None => {
764                group.ast = Box::new(group_concat.into_ast());
765            }
766        }
767        prior_concat.asts.push(Ast::Group(group));
768        Ok(prior_concat)
769    }
770
771    /// Pop the last state from the parser's internal stack, if it exists, and
772    /// add the given concatenation to it. There either must be no state or a
773    /// single alternation item on the stack. Any other scenario produces an
774    /// error.
775    ///
776    /// This assumes that the parser has advanced to the end.
777    #[inline(never)]
778    fn pop_group_end(&self, mut concat: ast::Concat) -> Result<Ast> {
779        concat.span.end = self.pos();
780        let mut stack = self.parser().stack_group.borrow_mut();
781        let ast = match stack.pop() {
782            None => Ok(concat.into_ast()),
783            Some(GroupState::Alternation(mut alt)) => {
784                alt.span.end = self.pos();
785                alt.asts.push(concat.into_ast());
786                Ok(Ast::Alternation(alt))
787            }
788            Some(GroupState::Group { group, .. }) => {
789                return Err(
790                    self.error(group.span, ast::ErrorKind::GroupUnclosed)
791                );
792            }
793        };
794        // If we try to pop again, there should be nothing.
795        match stack.pop() {
796            None => ast,
797            Some(GroupState::Alternation(_)) => {
798                // This unreachable is unfortunate. This case can't happen
799                // because the only way we can be here is if there were two
800                // `GroupState::Alternation`s adjacent in the parser's stack,
801                // which we guarantee to never happen because we never push a
802                // `GroupState::Alternation` if one is already at the top of
803                // the stack.
804                unreachable!()
805            }
806            Some(GroupState::Group { group, .. }) => {
807                Err(self.error(group.span, ast::ErrorKind::GroupUnclosed))
808            }
809        }
810    }
811
812    /// Parse the opening of a character class and push the current class
813    /// parsing context onto the parser's stack. This assumes that the parser
814    /// is positioned at an opening `[`. The given union should correspond to
815    /// the union of set items built up before seeing the `[`.
816    ///
817    /// If there was a problem parsing the opening of the class, then an error
818    /// is returned. Otherwise, a new union of set items for the class is
819    /// returned (which may be populated with either a `]` or a `-`).
820    #[inline(never)]
821    fn push_class_open(
822        &self,
823        parent_union: ast::ClassSetUnion,
824    ) -> Result<ast::ClassSetUnion> {
825        assert_eq!(self.char(), '[');
826
827        let (nested_set, nested_union) = self.parse_set_class_open()?;
828        self.parser()
829            .stack_class
830            .borrow_mut()
831            .push(ClassState::Open { union: parent_union, set: nested_set });
832        Ok(nested_union)
833    }
834
835    /// Parse the end of a character class set and pop the character class
836    /// parser stack. The union given corresponds to the last union built
837    /// before seeing the closing `]`. The union returned corresponds to the
838    /// parent character class set with the nested class added to it.
839    ///
840    /// This assumes that the parser is positioned at a `]` and will advance
841    /// the parser to the byte immediately following the `]`.
842    ///
843    /// If the stack is empty after popping, then this returns the final
844    /// "top-level" character class AST (where a "top-level" character class
845    /// is one that is not nested inside any other character class).
846    ///
847    /// If there is no corresponding opening bracket on the parser's stack,
848    /// then an error is returned.
849    #[inline(never)]
850    fn pop_class(
851        &self,
852        nested_union: ast::ClassSetUnion,
853    ) -> Result<Either<ast::ClassSetUnion, ast::Class>> {
854        assert_eq!(self.char(), ']');
855
856        let item = ast::ClassSet::Item(nested_union.into_item());
857        let prevset = self.pop_class_op(item);
858        let mut stack = self.parser().stack_class.borrow_mut();
859        match stack.pop() {
860            None => {
861                // We can never observe an empty stack:
862                //
863                // 1) We are guaranteed to start with a non-empty stack since
864                //    the character class parser is only initiated when it sees
865                //    a `[`.
866                // 2) If we ever observe an empty stack while popping after
867                //    seeing a `]`, then we signal the character class parser
868                //    to terminate.
869                panic!("unexpected empty character class stack")
870            }
871            Some(ClassState::Op { .. }) => {
872                // This panic is unfortunate, but this case is impossible
873                // since we already popped the Op state if one exists above.
874                // Namely, every push to the class parser stack is guarded by
875                // whether an existing Op is already on the top of the stack.
876                // If it is, the existing Op is modified. That is, the stack
877                // can never have consecutive Op states.
878                panic!("unexpected ClassState::Op")
879            }
880            Some(ClassState::Open { mut union, mut set }) => {
881                self.bump();
882                set.span.end = self.pos();
883                set.kind = prevset;
884                if stack.is_empty() {
885                    Ok(Either::Right(ast::Class::Bracketed(set)))
886                } else {
887                    union.push(ast::ClassSetItem::Bracketed(Box::new(set)));
888                    Ok(Either::Left(union))
889                }
890            }
891        }
892    }
893
894    /// Return an "unclosed class" error whose span points to the most
895    /// recently opened class.
896    ///
897    /// This should only be called while parsing a character class.
898    #[inline(never)]
899    fn unclosed_class_error(&self) -> ast::Error {
900        for state in self.parser().stack_class.borrow().iter().rev() {
901            match *state {
902                ClassState::Open { ref set, .. } => {
903                    return self
904                        .error(set.span, ast::ErrorKind::ClassUnclosed);
905                }
906                _ => {}
907            }
908        }
909        // We are guaranteed to have a non-empty stack with at least
910        // one open bracket, so we should never get here.
911        panic!("no open character class found")
912    }
913
914    /// Push the current set of class items on to the class parser's stack as
915    /// the left hand side of the given operator.
916    ///
917    /// A fresh set union is returned, which should be used to build the right
918    /// hand side of this operator.
919    #[inline(never)]
920    fn push_class_op(
921        &self,
922        next_kind: ast::ClassSetBinaryOpKind,
923        next_union: ast::ClassSetUnion,
924    ) -> ast::ClassSetUnion {
925        let item = ast::ClassSet::Item(next_union.into_item());
926        let new_lhs = self.pop_class_op(item);
927        self.parser()
928            .stack_class
929            .borrow_mut()
930            .push(ClassState::Op { kind: next_kind, lhs: new_lhs });
931        ast::ClassSetUnion { span: self.span(), items: vec![] }
932    }
933
934    /// Pop a character class set from the character class parser stack. If the
935    /// top of the stack is just an item (not an operation), then return the
936    /// given set unchanged. If the top of the stack is an operation, then the
937    /// given set will be used as the rhs of the operation on the top of the
938    /// stack. In that case, the binary operation is returned as a set.
939    #[inline(never)]
940    fn pop_class_op(&self, rhs: ast::ClassSet) -> ast::ClassSet {
941        let mut stack = self.parser().stack_class.borrow_mut();
942        let (kind, lhs) = match stack.pop() {
943            Some(ClassState::Op { kind, lhs }) => (kind, lhs),
944            Some(state @ ClassState::Open { .. }) => {
945                stack.push(state);
946                return rhs;
947            }
948            None => unreachable!(),
949        };
950        let span = Span::new(lhs.span().start, rhs.span().end);
951        ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
952            span: span,
953            kind: kind,
954            lhs: Box::new(lhs),
955            rhs: Box::new(rhs),
956        })
957    }
958}
959
960impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
961    /// Parse the regular expression into an abstract syntax tree.
962    fn parse(&self) -> Result<Ast> {
963        self.parse_with_comments().map(|astc| astc.ast)
964    }
965
966    /// Parse the regular expression and return an abstract syntax tree with
967    /// all of the comments found in the pattern.
968    fn parse_with_comments(&self) -> Result<ast::WithComments> {
969        assert_eq!(self.offset(), 0, "parser can only be used once");
970        self.parser().reset();
971        let mut concat = ast::Concat { span: self.span(), asts: vec![] };
972        loop {
973            self.bump_space();
974            if self.is_eof() {
975                break;
976            }
977            match self.char() {
978                '(' => concat = self.push_group(concat)?,
979                ')' => concat = self.pop_group(concat)?,
980                '|' => concat = self.push_alternate(concat)?,
981                '[' => {
982                    let class = self.parse_set_class()?;
983                    concat.asts.push(Ast::Class(class));
984                }
985                '?' => {
986                    concat = self.parse_uncounted_repetition(
987                        concat,
988                        ast::RepetitionKind::ZeroOrOne,
989                    )?;
990                }
991                '*' => {
992                    concat = self.parse_uncounted_repetition(
993                        concat,
994                        ast::RepetitionKind::ZeroOrMore,
995                    )?;
996                }
997                '+' => {
998                    concat = self.parse_uncounted_repetition(
999                        concat,
1000                        ast::RepetitionKind::OneOrMore,
1001                    )?;
1002                }
1003                '{' => {
1004                    concat = self.parse_counted_repetition(concat)?;
1005                }
1006                _ => concat.asts.push(self.parse_primitive()?.into_ast()),
1007            }
1008        }
1009        let ast = self.pop_group_end(concat)?;
1010        NestLimiter::new(self).check(&ast)?;
1011        Ok(ast::WithComments {
1012            ast: ast,
1013            comments: mem::replace(
1014                &mut *self.parser().comments.borrow_mut(),
1015                vec![],
1016            ),
1017        })
1018    }
1019
1020    /// Parses an uncounted repetition operation. An uncounted repetition
1021    /// operator includes ?, * and +, but does not include the {m,n} syntax.
1022    /// The given `kind` should correspond to the operator observed by the
1023    /// caller.
1024    ///
1025    /// This assumes that the paser is currently positioned at the repetition
1026    /// operator and advances the parser to the first character after the
1027    /// operator. (Note that the operator may include a single additional `?`,
1028    /// which makes the operator ungreedy.)
1029    ///
1030    /// The caller should include the concatenation that is being built. The
1031    /// concatenation returned includes the repetition operator applied to the
1032    /// last expression in the given concatenation.
1033    #[inline(never)]
1034    fn parse_uncounted_repetition(
1035        &self,
1036        mut concat: ast::Concat,
1037        kind: ast::RepetitionKind,
1038    ) -> Result<ast::Concat> {
1039        assert!(
1040            self.char() == '?' || self.char() == '*' || self.char() == '+'
1041        );
1042        let op_start = self.pos();
1043        let ast = match concat.asts.pop() {
1044            Some(ast) => ast,
1045            None => {
1046                return Err(
1047                    self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1048                )
1049            }
1050        };
1051        match ast {
1052            Ast::Empty(_) | Ast::Flags(_) => {
1053                return Err(
1054                    self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1055                )
1056            }
1057            _ => {}
1058        }
1059        let mut greedy = true;
1060        if self.bump() && self.char() == '?' {
1061            greedy = false;
1062            self.bump();
1063        }
1064        concat.asts.push(Ast::Repetition(ast::Repetition {
1065            span: ast.span().with_end(self.pos()),
1066            op: ast::RepetitionOp {
1067                span: Span::new(op_start, self.pos()),
1068                kind: kind,
1069            },
1070            greedy: greedy,
1071            ast: Box::new(ast),
1072        }));
1073        Ok(concat)
1074    }
1075
1076    /// Parses a counted repetition operation. A counted repetition operator
1077    /// corresponds to the {m,n} syntax, and does not include the ?, * or +
1078    /// operators.
1079    ///
1080    /// This assumes that the paser is currently positioned at the opening `{`
1081    /// and advances the parser to the first character after the operator.
1082    /// (Note that the operator may include a single additional `?`, which
1083    /// makes the operator ungreedy.)
1084    ///
1085    /// The caller should include the concatenation that is being built. The
1086    /// concatenation returned includes the repetition operator applied to the
1087    /// last expression in the given concatenation.
1088    #[inline(never)]
1089    fn parse_counted_repetition(
1090        &self,
1091        mut concat: ast::Concat,
1092    ) -> Result<ast::Concat> {
1093        assert!(self.char() == '{');
1094        let start = self.pos();
1095        let ast = match concat.asts.pop() {
1096            Some(ast) => ast,
1097            None => {
1098                return Err(
1099                    self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1100                )
1101            }
1102        };
1103        match ast {
1104            Ast::Empty(_) | Ast::Flags(_) => {
1105                return Err(
1106                    self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1107                )
1108            }
1109            _ => {}
1110        }
1111        if !self.bump_and_bump_space() {
1112            return Err(self.error(
1113                Span::new(start, self.pos()),
1114                ast::ErrorKind::RepetitionCountUnclosed,
1115            ));
1116        }
1117        let count_start = specialize_err(
1118            self.parse_decimal(),
1119            ast::ErrorKind::DecimalEmpty,
1120            ast::ErrorKind::RepetitionCountDecimalEmpty,
1121        )?;
1122        let mut range = ast::RepetitionRange::Exactly(count_start);
1123        if self.is_eof() {
1124            return Err(self.error(
1125                Span::new(start, self.pos()),
1126                ast::ErrorKind::RepetitionCountUnclosed,
1127            ));
1128        }
1129        if self.char() == ',' {
1130            if !self.bump_and_bump_space() {
1131                return Err(self.error(
1132                    Span::new(start, self.pos()),
1133                    ast::ErrorKind::RepetitionCountUnclosed,
1134                ));
1135            }
1136            if self.char() != '}' {
1137                let count_end = specialize_err(
1138                    self.parse_decimal(),
1139                    ast::ErrorKind::DecimalEmpty,
1140                    ast::ErrorKind::RepetitionCountDecimalEmpty,
1141                )?;
1142                range = ast::RepetitionRange::Bounded(count_start, count_end);
1143            } else {
1144                range = ast::RepetitionRange::AtLeast(count_start);
1145            }
1146        }
1147        if self.is_eof() || self.char() != '}' {
1148            return Err(self.error(
1149                Span::new(start, self.pos()),
1150                ast::ErrorKind::RepetitionCountUnclosed,
1151            ));
1152        }
1153
1154        let mut greedy = true;
1155        if self.bump_and_bump_space() && self.char() == '?' {
1156            greedy = false;
1157            self.bump();
1158        }
1159
1160        let op_span = Span::new(start, self.pos());
1161        if !range.is_valid() {
1162            return Err(
1163                self.error(op_span, ast::ErrorKind::RepetitionCountInvalid)
1164            );
1165        }
1166        concat.asts.push(Ast::Repetition(ast::Repetition {
1167            span: ast.span().with_end(self.pos()),
1168            op: ast::RepetitionOp {
1169                span: op_span,
1170                kind: ast::RepetitionKind::Range(range),
1171            },
1172            greedy: greedy,
1173            ast: Box::new(ast),
1174        }));
1175        Ok(concat)
1176    }
1177
1178    /// Parse a group (which contains a sub-expression) or a set of flags.
1179    ///
1180    /// If a group was found, then it is returned with an empty AST. If a set
1181    /// of flags is found, then that set is returned.
1182    ///
1183    /// The parser should be positioned at the opening parenthesis.
1184    ///
1185    /// This advances the parser to the character before the start of the
1186    /// sub-expression (in the case of a group) or to the closing parenthesis
1187    /// immediately following the set of flags.
1188    ///
1189    /// # Errors
1190    ///
1191    /// If flags are given and incorrectly specified, then a corresponding
1192    /// error is returned.
1193    ///
1194    /// If a capture name is given and it is incorrectly specified, then a
1195    /// corresponding error is returned.
1196    #[inline(never)]
1197    fn parse_group(&self) -> Result<Either<ast::SetFlags, ast::Group>> {
1198        assert_eq!(self.char(), '(');
1199        let open_span = self.span_char();
1200        self.bump();
1201        self.bump_space();
1202        if self.is_lookaround_prefix() {
1203            return Err(self.error(
1204                Span::new(open_span.start, self.span().end),
1205                ast::ErrorKind::UnsupportedLookAround,
1206            ));
1207        }
1208        let inner_span = self.span();
1209        if self.bump_if("?P<") {
1210            let capture_index = self.next_capture_index(open_span)?;
1211            let cap = self.parse_capture_name(capture_index)?;
1212            Ok(Either::Right(ast::Group {
1213                span: open_span,
1214                kind: ast::GroupKind::CaptureName(cap),
1215                ast: Box::new(Ast::Empty(self.span())),
1216            }))
1217        } else if self.bump_if("?") {
1218            if self.is_eof() {
1219                return Err(
1220                    self.error(open_span, ast::ErrorKind::GroupUnclosed)
1221                );
1222            }
1223            let flags = self.parse_flags()?;
1224            let char_end = self.char();
1225            self.bump();
1226            if char_end == ')' {
1227                // We don't allow empty flags, e.g., `(?)`. We instead
1228                // interpret it as a repetition operator missing its argument.
1229                if flags.items.is_empty() {
1230                    return Err(self.error(
1231                        inner_span,
1232                        ast::ErrorKind::RepetitionMissing,
1233                    ));
1234                }
1235                Ok(Either::Left(ast::SetFlags {
1236                    span: Span { end: self.pos(), ..open_span },
1237                    flags: flags,
1238                }))
1239            } else {
1240                assert_eq!(char_end, ':');
1241                Ok(Either::Right(ast::Group {
1242                    span: open_span,
1243                    kind: ast::GroupKind::NonCapturing(flags),
1244                    ast: Box::new(Ast::Empty(self.span())),
1245                }))
1246            }
1247        } else {
1248            let capture_index = self.next_capture_index(open_span)?;
1249            Ok(Either::Right(ast::Group {
1250                span: open_span,
1251                kind: ast::GroupKind::CaptureIndex(capture_index),
1252                ast: Box::new(Ast::Empty(self.span())),
1253            }))
1254        }
1255    }
1256
1257    /// Parses a capture group name. Assumes that the parser is positioned at
1258    /// the first character in the name following the opening `<` (and may
1259    /// possibly be EOF). This advances the parser to the first character
1260    /// following the closing `>`.
1261    ///
1262    /// The caller must provide the capture index of the group for this name.
1263    #[inline(never)]
1264    fn parse_capture_name(
1265        &self,
1266        capture_index: u32,
1267    ) -> Result<ast::CaptureName> {
1268        if self.is_eof() {
1269            return Err(self
1270                .error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1271        }
1272        let start = self.pos();
1273        loop {
1274            if self.char() == '>' {
1275                break;
1276            }
1277            if !is_capture_char(self.char(), self.pos() == start) {
1278                return Err(self.error(
1279                    self.span_char(),
1280                    ast::ErrorKind::GroupNameInvalid,
1281                ));
1282            }
1283            if !self.bump() {
1284                break;
1285            }
1286        }
1287        let end = self.pos();
1288        if self.is_eof() {
1289            return Err(self
1290                .error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1291        }
1292        assert_eq!(self.char(), '>');
1293        self.bump();
1294        let name = &self.pattern()[start.offset..end.offset];
1295        if name.is_empty() {
1296            return Err(self.error(
1297                Span::new(start, start),
1298                ast::ErrorKind::GroupNameEmpty,
1299            ));
1300        }
1301        let capname = ast::CaptureName {
1302            span: Span::new(start, end),
1303            name: name.to_string(),
1304            index: capture_index,
1305        };
1306        self.add_capture_name(&capname)?;
1307        Ok(capname)
1308    }
1309
1310    /// Parse a sequence of flags starting at the current character.
1311    ///
1312    /// This advances the parser to the character immediately following the
1313    /// flags, which is guaranteed to be either `:` or `)`.
1314    ///
1315    /// # Errors
1316    ///
1317    /// If any flags are duplicated, then an error is returned.
1318    ///
1319    /// If the negation operator is used more than once, then an error is
1320    /// returned.
1321    ///
1322    /// If no flags could be found or if the negation operation is not followed
1323    /// by any flags, then an error is returned.
1324    #[inline(never)]
1325    fn parse_flags(&self) -> Result<ast::Flags> {
1326        let mut flags = ast::Flags { span: self.span(), items: vec![] };
1327        let mut last_was_negation = None;
1328        while self.char() != ':' && self.char() != ')' {
1329            if self.char() == '-' {
1330                last_was_negation = Some(self.span_char());
1331                let item = ast::FlagsItem {
1332                    span: self.span_char(),
1333                    kind: ast::FlagsItemKind::Negation,
1334                };
1335                if let Some(i) = flags.add_item(item) {
1336                    return Err(self.error(
1337                        self.span_char(),
1338                        ast::ErrorKind::FlagRepeatedNegation {
1339                            original: flags.items[i].span,
1340                        },
1341                    ));
1342                }
1343            } else {
1344                last_was_negation = None;
1345                let item = ast::FlagsItem {
1346                    span: self.span_char(),
1347                    kind: ast::FlagsItemKind::Flag(self.parse_flag()?),
1348                };
1349                if let Some(i) = flags.add_item(item) {
1350                    return Err(self.error(
1351                        self.span_char(),
1352                        ast::ErrorKind::FlagDuplicate {
1353                            original: flags.items[i].span,
1354                        },
1355                    ));
1356                }
1357            }
1358            if !self.bump() {
1359                return Err(
1360                    self.error(self.span(), ast::ErrorKind::FlagUnexpectedEof)
1361                );
1362            }
1363        }
1364        if let Some(span) = last_was_negation {
1365            return Err(self.error(span, ast::ErrorKind::FlagDanglingNegation));
1366        }
1367        flags.span.end = self.pos();
1368        Ok(flags)
1369    }
1370
1371    /// Parse the current character as a flag. Do not advance the parser.
1372    ///
1373    /// # Errors
1374    ///
1375    /// If the flag is not recognized, then an error is returned.
1376    #[inline(never)]
1377    fn parse_flag(&self) -> Result<ast::Flag> {
1378        match self.char() {
1379            'i' => Ok(ast::Flag::CaseInsensitive),
1380            'm' => Ok(ast::Flag::MultiLine),
1381            's' => Ok(ast::Flag::DotMatchesNewLine),
1382            'U' => Ok(ast::Flag::SwapGreed),
1383            'u' => Ok(ast::Flag::Unicode),
1384            'x' => Ok(ast::Flag::IgnoreWhitespace),
1385            _ => {
1386                Err(self
1387                    .error(self.span_char(), ast::ErrorKind::FlagUnrecognized))
1388            }
1389        }
1390    }
1391
1392    /// Parse a primitive AST. e.g., A literal, non-set character class or
1393    /// assertion.
1394    ///
1395    /// This assumes that the parser expects a primitive at the current
1396    /// location. i.e., All other non-primitive cases have been handled.
1397    /// For example, if the parser's position is at `|`, then `|` will be
1398    /// treated as a literal (e.g., inside a character class).
1399    ///
1400    /// This advances the parser to the first character immediately following
1401    /// the primitive.
1402    fn parse_primitive(&self) -> Result<Primitive> {
1403        match self.char() {
1404            '\\' => self.parse_escape(),
1405            '.' => {
1406                let ast = Primitive::Dot(self.span_char());
1407                self.bump();
1408                Ok(ast)
1409            }
1410            '^' => {
1411                let ast = Primitive::Assertion(ast::Assertion {
1412                    span: self.span_char(),
1413                    kind: ast::AssertionKind::StartLine,
1414                });
1415                self.bump();
1416                Ok(ast)
1417            }
1418            '$' => {
1419                let ast = Primitive::Assertion(ast::Assertion {
1420                    span: self.span_char(),
1421                    kind: ast::AssertionKind::EndLine,
1422                });
1423                self.bump();
1424                Ok(ast)
1425            }
1426            c => {
1427                let ast = Primitive::Literal(ast::Literal {
1428                    span: self.span_char(),
1429                    kind: ast::LiteralKind::Verbatim,
1430                    c: c,
1431                });
1432                self.bump();
1433                Ok(ast)
1434            }
1435        }
1436    }
1437
1438    /// Parse an escape sequence as a primitive AST.
1439    ///
1440    /// This assumes the parser is positioned at the start of the escape
1441    /// sequence, i.e., `\`. It advances the parser to the first position
1442    /// immediately following the escape sequence.
1443    #[inline(never)]
1444    fn parse_escape(&self) -> Result<Primitive> {
1445        assert_eq!(self.char(), '\\');
1446        let start = self.pos();
1447        if !self.bump() {
1448            return Err(self.error(
1449                Span::new(start, self.pos()),
1450                ast::ErrorKind::EscapeUnexpectedEof,
1451            ));
1452        }
1453        let c = self.char();
1454        // Put some of the more complicated routines into helpers.
1455        match c {
1456            '0'..='7' => {
1457                if !self.parser().octal {
1458                    return Err(self.error(
1459                        Span::new(start, self.span_char().end),
1460                        ast::ErrorKind::UnsupportedBackreference,
1461                    ));
1462                }
1463                let mut lit = self.parse_octal();
1464                lit.span.start = start;
1465                return Ok(Primitive::Literal(lit));
1466            }
1467            '8'..='9' if !self.parser().octal => {
1468                return Err(self.error(
1469                    Span::new(start, self.span_char().end),
1470                    ast::ErrorKind::UnsupportedBackreference,
1471                ));
1472            }
1473            'x' | 'u' | 'U' => {
1474                let mut lit = self.parse_hex()?;
1475                lit.span.start = start;
1476                return Ok(Primitive::Literal(lit));
1477            }
1478            'p' | 'P' => {
1479                let mut cls = self.parse_unicode_class()?;
1480                cls.span.start = start;
1481                return Ok(Primitive::Unicode(cls));
1482            }
1483            'd' | 's' | 'w' | 'D' | 'S' | 'W' => {
1484                let mut cls = self.parse_perl_class();
1485                cls.span.start = start;
1486                return Ok(Primitive::Perl(cls));
1487            }
1488            _ => {}
1489        }
1490
1491        // Handle all of the one letter sequences inline.
1492        self.bump();
1493        let span = Span::new(start, self.pos());
1494        if is_meta_character(c) {
1495            return Ok(Primitive::Literal(ast::Literal {
1496                span: span,
1497                kind: ast::LiteralKind::Punctuation,
1498                c: c,
1499            }));
1500        }
1501        let special = |kind, c| {
1502            Ok(Primitive::Literal(ast::Literal {
1503                span: span,
1504                kind: ast::LiteralKind::Special(kind),
1505                c: c,
1506            }))
1507        };
1508        match c {
1509            'a' => special(ast::SpecialLiteralKind::Bell, '\x07'),
1510            'f' => special(ast::SpecialLiteralKind::FormFeed, '\x0C'),
1511            't' => special(ast::SpecialLiteralKind::Tab, '\t'),
1512            'n' => special(ast::SpecialLiteralKind::LineFeed, '\n'),
1513            'r' => special(ast::SpecialLiteralKind::CarriageReturn, '\r'),
1514            'v' => special(ast::SpecialLiteralKind::VerticalTab, '\x0B'),
1515            ' ' if self.ignore_whitespace() => {
1516                special(ast::SpecialLiteralKind::Space, ' ')
1517            }
1518            'A' => Ok(Primitive::Assertion(ast::Assertion {
1519                span: span,
1520                kind: ast::AssertionKind::StartText,
1521            })),
1522            'z' => Ok(Primitive::Assertion(ast::Assertion {
1523                span: span,
1524                kind: ast::AssertionKind::EndText,
1525            })),
1526            'b' => Ok(Primitive::Assertion(ast::Assertion {
1527                span: span,
1528                kind: ast::AssertionKind::WordBoundary,
1529            })),
1530            'B' => Ok(Primitive::Assertion(ast::Assertion {
1531                span: span,
1532                kind: ast::AssertionKind::NotWordBoundary,
1533            })),
1534            _ => Err(self.error(span, ast::ErrorKind::EscapeUnrecognized)),
1535        }
1536    }
1537
1538    /// Parse an octal representation of a Unicode codepoint up to 3 digits
1539    /// long. This expects the parser to be positioned at the first octal
1540    /// digit and advances the parser to the first character immediately
1541    /// following the octal number. This also assumes that parsing octal
1542    /// escapes is enabled.
1543    ///
1544    /// Assuming the preconditions are met, this routine can never fail.
1545    #[inline(never)]
1546    fn parse_octal(&self) -> ast::Literal {
1547        use std::char;
1548        use std::u32;
1549
1550        assert!(self.parser().octal);
1551        assert!('0' <= self.char() && self.char() <= '7');
1552        let start = self.pos();
1553        // Parse up to two more digits.
1554        while self.bump()
1555            && '0' <= self.char()
1556            && self.char() <= '7'
1557            && self.pos().offset - start.offset <= 2
1558        {}
1559        let end = self.pos();
1560        let octal = &self.pattern()[start.offset..end.offset];
1561        // Parsing the octal should never fail since the above guarantees a
1562        // valid number.
1563        let codepoint =
1564            u32::from_str_radix(octal, 8).expect("valid octal number");
1565        // The max value for 3 digit octal is 0777 = 511 and [0, 511] has no
1566        // invalid Unicode scalar values.
1567        let c = char::from_u32(codepoint).expect("Unicode scalar value");
1568        ast::Literal {
1569            span: Span::new(start, end),
1570            kind: ast::LiteralKind::Octal,
1571            c: c,
1572        }
1573    }
1574
1575    /// Parse a hex representation of a Unicode codepoint. This handles both
1576    /// hex notations, i.e., `\xFF` and `\x{FFFF}`. This expects the parser to
1577    /// be positioned at the `x`, `u` or `U` prefix. The parser is advanced to
1578    /// the first character immediately following the hexadecimal literal.
1579    #[inline(never)]
1580    fn parse_hex(&self) -> Result<ast::Literal> {
1581        assert!(
1582            self.char() == 'x' || self.char() == 'u' || self.char() == 'U'
1583        );
1584
1585        let hex_kind = match self.char() {
1586            'x' => ast::HexLiteralKind::X,
1587            'u' => ast::HexLiteralKind::UnicodeShort,
1588            _ => ast::HexLiteralKind::UnicodeLong,
1589        };
1590        if !self.bump_and_bump_space() {
1591            return Err(
1592                self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)
1593            );
1594        }
1595        if self.char() == '{' {
1596            self.parse_hex_brace(hex_kind)
1597        } else {
1598            self.parse_hex_digits(hex_kind)
1599        }
1600    }
1601
1602    /// Parse an N-digit hex representation of a Unicode codepoint. This
1603    /// expects the parser to be positioned at the first digit and will advance
1604    /// the parser to the first character immediately following the escape
1605    /// sequence.
1606    ///
1607    /// The number of digits given must be 2 (for `\xNN`), 4 (for `\uNNNN`)
1608    /// or 8 (for `\UNNNNNNNN`).
1609    #[inline(never)]
1610    fn parse_hex_digits(
1611        &self,
1612        kind: ast::HexLiteralKind,
1613    ) -> Result<ast::Literal> {
1614        use std::char;
1615        use std::u32;
1616
1617        let mut scratch = self.parser().scratch.borrow_mut();
1618        scratch.clear();
1619
1620        let start = self.pos();
1621        for i in 0..kind.digits() {
1622            if i > 0 && !self.bump_and_bump_space() {
1623                return Err(self
1624                    .error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
1625            }
1626            if !is_hex(self.char()) {
1627                return Err(self.error(
1628                    self.span_char(),
1629                    ast::ErrorKind::EscapeHexInvalidDigit,
1630                ));
1631            }
1632            scratch.push(self.char());
1633        }
1634        // The final bump just moves the parser past the literal, which may
1635        // be EOF.
1636        self.bump_and_bump_space();
1637        let end = self.pos();
1638        let hex = scratch.as_str();
1639        match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
1640            None => Err(self.error(
1641                Span::new(start, end),
1642                ast::ErrorKind::EscapeHexInvalid,
1643            )),
1644            Some(c) => Ok(ast::Literal {
1645                span: Span::new(start, end),
1646                kind: ast::LiteralKind::HexFixed(kind),
1647                c: c,
1648            }),
1649        }
1650    }
1651
1652    /// Parse a hex representation of any Unicode scalar value. This expects
1653    /// the parser to be positioned at the opening brace `{` and will advance
1654    /// the parser to the first character following the closing brace `}`.
1655    #[inline(never)]
1656    fn parse_hex_brace(
1657        &self,
1658        kind: ast::HexLiteralKind,
1659    ) -> Result<ast::Literal> {
1660        use std::char;
1661        use std::u32;
1662
1663        let mut scratch = self.parser().scratch.borrow_mut();
1664        scratch.clear();
1665
1666        let brace_pos = self.pos();
1667        let start = self.span_char().end;
1668        while self.bump_and_bump_space() && self.char() != '}' {
1669            if !is_hex(self.char()) {
1670                return Err(self.error(
1671                    self.span_char(),
1672                    ast::ErrorKind::EscapeHexInvalidDigit,
1673                ));
1674            }
1675            scratch.push(self.char());
1676        }
1677        if self.is_eof() {
1678            return Err(self.error(
1679                Span::new(brace_pos, self.pos()),
1680                ast::ErrorKind::EscapeUnexpectedEof,
1681            ));
1682        }
1683        let end = self.pos();
1684        let hex = scratch.as_str();
1685        assert_eq!(self.char(), '}');
1686        self.bump_and_bump_space();
1687
1688        if hex.is_empty() {
1689            return Err(self.error(
1690                Span::new(brace_pos, self.pos()),
1691                ast::ErrorKind::EscapeHexEmpty,
1692            ));
1693        }
1694        match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
1695            None => Err(self.error(
1696                Span::new(start, end),
1697                ast::ErrorKind::EscapeHexInvalid,
1698            )),
1699            Some(c) => Ok(ast::Literal {
1700                span: Span::new(start, self.pos()),
1701                kind: ast::LiteralKind::HexBrace(kind),
1702                c: c,
1703            }),
1704        }
1705    }
1706
1707    /// Parse a decimal number into a u32 while trimming leading and trailing
1708    /// whitespace.
1709    ///
1710    /// This expects the parser to be positioned at the first position where
1711    /// a decimal digit could occur. This will advance the parser to the byte
1712    /// immediately following the last contiguous decimal digit.
1713    ///
1714    /// If no decimal digit could be found or if there was a problem parsing
1715    /// the complete set of digits into a u32, then an error is returned.
1716    fn parse_decimal(&self) -> Result<u32> {
1717        let mut scratch = self.parser().scratch.borrow_mut();
1718        scratch.clear();
1719
1720        while !self.is_eof() && self.char().is_whitespace() {
1721            self.bump();
1722        }
1723        let start = self.pos();
1724        while !self.is_eof() && '0' <= self.char() && self.char() <= '9' {
1725            scratch.push(self.char());
1726            self.bump_and_bump_space();
1727        }
1728        let span = Span::new(start, self.pos());
1729        while !self.is_eof() && self.char().is_whitespace() {
1730            self.bump_and_bump_space();
1731        }
1732        let digits = scratch.as_str();
1733        if digits.is_empty() {
1734            return Err(self.error(span, ast::ErrorKind::DecimalEmpty));
1735        }
1736        match u32::from_str_radix(digits, 10).ok() {
1737            Some(n) => Ok(n),
1738            None => Err(self.error(span, ast::ErrorKind::DecimalInvalid)),
1739        }
1740    }
1741
1742    /// Parse a standard character class consisting primarily of characters or
1743    /// character ranges, but can also contain nested character classes of
1744    /// any type (sans `.`).
1745    ///
1746    /// This assumes the parser is positioned at the opening `[`. If parsing
1747    /// is successful, then the parser is advanced to the position immediately
1748    /// following the closing `]`.
1749    #[inline(never)]
1750    fn parse_set_class(&self) -> Result<ast::Class> {
1751        assert_eq!(self.char(), '[');
1752
1753        let mut union =
1754            ast::ClassSetUnion { span: self.span(), items: vec![] };
1755        loop {
1756            self.bump_space();
1757            if self.is_eof() {
1758                return Err(self.unclosed_class_error());
1759            }
1760            match self.char() {
1761                '[' => {
1762                    // If we've already parsed the opening bracket, then
1763                    // attempt to treat this as the beginning of an ASCII
1764                    // class. If ASCII class parsing fails, then the parser
1765                    // backs up to `[`.
1766                    if !self.parser().stack_class.borrow().is_empty() {
1767                        if let Some(cls) = self.maybe_parse_ascii_class() {
1768                            union.push(ast::ClassSetItem::Ascii(cls));
1769                            continue;
1770                        }
1771                    }
1772                    union = self.push_class_open(union)?;
1773                }
1774                ']' => match self.pop_class(union)? {
1775                    Either::Left(nested_union) => {
1776                        union = nested_union;
1777                    }
1778                    Either::Right(class) => return Ok(class),
1779                },
1780                '&' if self.peek() == Some('&') => {
1781                    assert!(self.bump_if("&&"));
1782                    union = self.push_class_op(
1783                        ast::ClassSetBinaryOpKind::Intersection,
1784                        union,
1785                    );
1786                }
1787                '-' if self.peek() == Some('-') => {
1788                    assert!(self.bump_if("--"));
1789                    union = self.push_class_op(
1790                        ast::ClassSetBinaryOpKind::Difference,
1791                        union,
1792                    );
1793                }
1794                '~' if self.peek() == Some('~') => {
1795                    assert!(self.bump_if("~~"));
1796                    union = self.push_class_op(
1797                        ast::ClassSetBinaryOpKind::SymmetricDifference,
1798                        union,
1799                    );
1800                }
1801                _ => {
1802                    union.push(self.parse_set_class_range()?);
1803                }
1804            }
1805        }
1806    }
1807
1808    /// Parse a single primitive item in a character class set. The item to
1809    /// be parsed can either be one of a simple literal character, a range
1810    /// between two simple literal characters or a "primitive" character
1811    /// class like \w or \p{Greek}.
1812    ///
1813    /// If an invalid escape is found, or if a character class is found where
1814    /// a simple literal is expected (e.g., in a range), then an error is
1815    /// returned.
1816    #[inline(never)]
1817    fn parse_set_class_range(&self) -> Result<ast::ClassSetItem> {
1818        let prim1 = self.parse_set_class_item()?;
1819        self.bump_space();
1820        if self.is_eof() {
1821            return Err(self.unclosed_class_error());
1822        }
1823        // If the next char isn't a `-`, then we don't have a range.
1824        // There are two exceptions. If the char after a `-` is a `]`, then
1825        // `-` is interpreted as a literal `-`. Alternatively, if the char
1826        // after a `-` is a `-`, then `--` corresponds to a "difference"
1827        // operation.
1828        if self.char() != '-'
1829            || self.peek_space() == Some(']')
1830            || self.peek_space() == Some('-')
1831        {
1832            return prim1.into_class_set_item(self);
1833        }
1834        // OK, now we're parsing a range, so bump past the `-` and parse the
1835        // second half of the range.
1836        if !self.bump_and_bump_space() {
1837            return Err(self.unclosed_class_error());
1838        }
1839        let prim2 = self.parse_set_class_item()?;
1840        let range = ast::ClassSetRange {
1841            span: Span::new(prim1.span().start, prim2.span().end),
1842            start: prim1.into_class_literal(self)?,
1843            end: prim2.into_class_literal(self)?,
1844        };
1845        if !range.is_valid() {
1846            return Err(
1847                self.error(range.span, ast::ErrorKind::ClassRangeInvalid)
1848            );
1849        }
1850        Ok(ast::ClassSetItem::Range(range))
1851    }
1852
1853    /// Parse a single item in a character class as a primitive, where the
1854    /// primitive either consists of a verbatim literal or a single escape
1855    /// sequence.
1856    ///
1857    /// This assumes the parser is positioned at the beginning of a primitive,
1858    /// and advances the parser to the first position after the primitive if
1859    /// successful.
1860    ///
1861    /// Note that it is the caller's responsibility to report an error if an
1862    /// illegal primitive was parsed.
1863    #[inline(never)]
1864    fn parse_set_class_item(&self) -> Result<Primitive> {
1865        if self.char() == '\\' {
1866            self.parse_escape()
1867        } else {
1868            let x = Primitive::Literal(ast::Literal {
1869                span: self.span_char(),
1870                kind: ast::LiteralKind::Verbatim,
1871                c: self.char(),
1872            });
1873            self.bump();
1874            Ok(x)
1875        }
1876    }
1877
1878    /// Parses the opening of a character class set. This includes the opening
1879    /// bracket along with `^` if present to indicate negation. This also
1880    /// starts parsing the opening set of unioned items if applicable, since
1881    /// there are special rules applied to certain characters in the opening
1882    /// of a character class. For example, `[^]]` is the class of all
1883    /// characters not equal to `]`. (`]` would need to be escaped in any other
1884    /// position.) Similarly for `-`.
1885    ///
1886    /// In all cases, the op inside the returned `ast::ClassBracketed` is an
1887    /// empty union. This empty union should be replaced with the actual item
1888    /// when it is popped from the parser's stack.
1889    ///
1890    /// This assumes the parser is positioned at the opening `[` and advances
1891    /// the parser to the first non-special byte of the character class.
1892    ///
1893    /// An error is returned if EOF is found.
1894    #[inline(never)]
1895    fn parse_set_class_open(
1896        &self,
1897    ) -> Result<(ast::ClassBracketed, ast::ClassSetUnion)> {
1898        assert_eq!(self.char(), '[');
1899        let start = self.pos();
1900        if !self.bump_and_bump_space() {
1901            return Err(self.error(
1902                Span::new(start, self.pos()),
1903                ast::ErrorKind::ClassUnclosed,
1904            ));
1905        }
1906
1907        let negated = if self.char() != '^' {
1908            false
1909        } else {
1910            if !self.bump_and_bump_space() {
1911                return Err(self.error(
1912                    Span::new(start, self.pos()),
1913                    ast::ErrorKind::ClassUnclosed,
1914                ));
1915            }
1916            true
1917        };
1918        // Accept any number of `-` as literal `-`.
1919        let mut union =
1920            ast::ClassSetUnion { span: self.span(), items: vec![] };
1921        while self.char() == '-' {
1922            union.push(ast::ClassSetItem::Literal(ast::Literal {
1923                span: self.span_char(),
1924                kind: ast::LiteralKind::Verbatim,
1925                c: '-',
1926            }));
1927            if !self.bump_and_bump_space() {
1928                return Err(self.error(
1929                    Span::new(start, self.pos()),
1930                    ast::ErrorKind::ClassUnclosed,
1931                ));
1932            }
1933        }
1934        // If `]` is the *first* char in a set, then interpret it as a literal
1935        // `]`. That is, an empty class is impossible to write.
1936        if union.items.is_empty() && self.char() == ']' {
1937            union.push(ast::ClassSetItem::Literal(ast::Literal {
1938                span: self.span_char(),
1939                kind: ast::LiteralKind::Verbatim,
1940                c: ']',
1941            }));
1942            if !self.bump_and_bump_space() {
1943                return Err(self.error(
1944                    Span::new(start, self.pos()),
1945                    ast::ErrorKind::ClassUnclosed,
1946                ));
1947            }
1948        }
1949        let set = ast::ClassBracketed {
1950            span: Span::new(start, self.pos()),
1951            negated: negated,
1952            kind: ast::ClassSet::union(ast::ClassSetUnion {
1953                span: Span::new(union.span.start, union.span.start),
1954                items: vec![],
1955            }),
1956        };
1957        Ok((set, union))
1958    }
1959
1960    /// Attempt to parse an ASCII character class, e.g., `[:alnum:]`.
1961    ///
1962    /// This assumes the parser is positioned at the opening `[`.
1963    ///
1964    /// If no valid ASCII character class could be found, then this does not
1965    /// advance the parser and `None` is returned. Otherwise, the parser is
1966    /// advanced to the first byte following the closing `]` and the
1967    /// corresponding ASCII class is returned.
1968    #[inline(never)]
1969    fn maybe_parse_ascii_class(&self) -> Option<ast::ClassAscii> {
1970        // ASCII character classes are interesting from a parsing perspective
1971        // because parsing cannot fail with any interesting error. For example,
1972        // in order to use an ASCII character class, it must be enclosed in
1973        // double brackets, e.g., `[[:alnum:]]`. Alternatively, you might think
1974        // of it as "ASCII character characters have the syntax `[:NAME:]`
1975        // which can only appear within character brackets." This means that
1976        // things like `[[:lower:]A]` are legal constructs.
1977        //
1978        // However, if one types an incorrect ASCII character class, e.g.,
1979        // `[[:loower:]]`, then we treat that as a normal nested character
1980        // class containing the characters `:elorw`. One might argue that we
1981        // should return an error instead since the repeated colons give away
1982        // the intent to write an ASCII class. But what if the user typed
1983        // `[[:lower]]` instead? How can we tell that was intended to be an
1984        // ASCII class and not just a normal nested class?
1985        //
1986        // Reasonable people can probably disagree over this, but for better
1987        // or worse, we implement semantics that never fails at the expense
1988        // of better failure modes.
1989        assert_eq!(self.char(), '[');
1990        // If parsing fails, then we back up the parser to this starting point.
1991        let start = self.pos();
1992        let mut negated = false;
1993        if !self.bump() || self.char() != ':' {
1994            self.parser().pos.set(start);
1995            return None;
1996        }
1997        if !self.bump() {
1998            self.parser().pos.set(start);
1999            return None;
2000        }
2001        if self.char() == '^' {
2002            negated = true;
2003            if !self.bump() {
2004                self.parser().pos.set(start);
2005                return None;
2006            }
2007        }
2008        let name_start = self.offset();
2009        while self.char() != ':' && self.bump() {}
2010        if self.is_eof() {
2011            self.parser().pos.set(start);
2012            return None;
2013        }
2014        let name = &self.pattern()[name_start..self.offset()];
2015        if !self.bump_if(":]") {
2016            self.parser().pos.set(start);
2017            return None;
2018        }
2019        let kind = match ast::ClassAsciiKind::from_name(name) {
2020            Some(kind) => kind,
2021            None => {
2022                self.parser().pos.set(start);
2023                return None;
2024            }
2025        };
2026        Some(ast::ClassAscii {
2027            span: Span::new(start, self.pos()),
2028            kind: kind,
2029            negated: negated,
2030        })
2031    }
2032
2033    /// Parse a Unicode class in either the single character notation, `\pN`
2034    /// or the multi-character bracketed notation, `\p{Greek}`. This assumes
2035    /// the parser is positioned at the `p` (or `P` for negation) and will
2036    /// advance the parser to the character immediately following the class.
2037    ///
2038    /// Note that this does not check whether the class name is valid or not.
2039    #[inline(never)]
2040    fn parse_unicode_class(&self) -> Result<ast::ClassUnicode> {
2041        assert!(self.char() == 'p' || self.char() == 'P');
2042
2043        let mut scratch = self.parser().scratch.borrow_mut();
2044        scratch.clear();
2045
2046        let negated = self.char() == 'P';
2047        if !self.bump_and_bump_space() {
2048            return Err(
2049                self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)
2050            );
2051        }
2052        let (start, kind) = if self.char() == '{' {
2053            let start = self.span_char().end;
2054            while self.bump_and_bump_space() && self.char() != '}' {
2055                scratch.push(self.char());
2056            }
2057            if self.is_eof() {
2058                return Err(self
2059                    .error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2060            }
2061            assert_eq!(self.char(), '}');
2062            self.bump();
2063
2064            let name = scratch.as_str();
2065            if let Some(i) = name.find("!=") {
2066                (
2067                    start,
2068                    ast::ClassUnicodeKind::NamedValue {
2069                        op: ast::ClassUnicodeOpKind::NotEqual,
2070                        name: name[..i].to_string(),
2071                        value: name[i + 2..].to_string(),
2072                    },
2073                )
2074            } else if let Some(i) = name.find(':') {
2075                (
2076                    start,
2077                    ast::ClassUnicodeKind::NamedValue {
2078                        op: ast::ClassUnicodeOpKind::Colon,
2079                        name: name[..i].to_string(),
2080                        value: name[i + 1..].to_string(),
2081                    },
2082                )
2083            } else if let Some(i) = name.find('=') {
2084                (
2085                    start,
2086                    ast::ClassUnicodeKind::NamedValue {
2087                        op: ast::ClassUnicodeOpKind::Equal,
2088                        name: name[..i].to_string(),
2089                        value: name[i + 1..].to_string(),
2090                    },
2091                )
2092            } else {
2093                (start, ast::ClassUnicodeKind::Named(name.to_string()))
2094            }
2095        } else {
2096            let start = self.pos();
2097            let c = self.char();
2098            self.bump_and_bump_space();
2099            let kind = ast::ClassUnicodeKind::OneLetter(c);
2100            (start, kind)
2101        };
2102        Ok(ast::ClassUnicode {
2103            span: Span::new(start, self.pos()),
2104            negated: negated,
2105            kind: kind,
2106        })
2107    }
2108
2109    /// Parse a Perl character class, e.g., `\d` or `\W`. This assumes the
2110    /// parser is currently at a valid character class name and will be
2111    /// advanced to the character immediately following the class.
2112    #[inline(never)]
2113    fn parse_perl_class(&self) -> ast::ClassPerl {
2114        let c = self.char();
2115        let span = self.span_char();
2116        self.bump();
2117        let (negated, kind) = match c {
2118            'd' => (false, ast::ClassPerlKind::Digit),
2119            'D' => (true, ast::ClassPerlKind::Digit),
2120            's' => (false, ast::ClassPerlKind::Space),
2121            'S' => (true, ast::ClassPerlKind::Space),
2122            'w' => (false, ast::ClassPerlKind::Word),
2123            'W' => (true, ast::ClassPerlKind::Word),
2124            c => panic!("expected valid Perl class but got '{}'", c),
2125        };
2126        ast::ClassPerl { span: span, kind: kind, negated: negated }
2127    }
2128}
2129
2130/// A type that traverses a fully parsed Ast and checks whether its depth
2131/// exceeds the specified nesting limit. If it does, then an error is returned.
2132#[derive(Debug)]
2133struct NestLimiter<'p, 's: 'p, P: 'p + 's> {
2134    /// The parser that is checking the nest limit.
2135    p: &'p ParserI<'s, P>,
2136    /// The current depth while walking an Ast.
2137    depth: u32,
2138}
2139
2140impl<'p, 's, P: Borrow<Parser>> NestLimiter<'p, 's, P> {
2141    fn new(p: &'p ParserI<'s, P>) -> NestLimiter<'p, 's, P> {
2142        NestLimiter { p: p, depth: 0 }
2143    }
2144
2145    #[inline(never)]
2146    fn check(self, ast: &Ast) -> Result<()> {
2147        ast::visit(ast, self)
2148    }
2149
2150    fn increment_depth(&mut self, span: &Span) -> Result<()> {
2151        let new = self.depth.checked_add(1).ok_or_else(|| {
2152            self.p.error(
2153                span.clone(),
2154                ast::ErrorKind::NestLimitExceeded(::std::u32::MAX),
2155            )
2156        })?;
2157        let limit = self.p.parser().nest_limit;
2158        if new > limit {
2159            return Err(self.p.error(
2160                span.clone(),
2161                ast::ErrorKind::NestLimitExceeded(limit),
2162            ));
2163        }
2164        self.depth = new;
2165        Ok(())
2166    }
2167
2168    fn decrement_depth(&mut self) {
2169        // Assuming the correctness of the visitor, this should never drop
2170        // below 0.
2171        self.depth = self.depth.checked_sub(1).unwrap();
2172    }
2173}
2174
2175impl<'p, 's, P: Borrow<Parser>> ast::Visitor for NestLimiter<'p, 's, P> {
2176    type Output = ();
2177    type Err = ast::Error;
2178
2179    fn finish(self) -> Result<()> {
2180        Ok(())
2181    }
2182
2183    fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
2184        let span = match *ast {
2185            Ast::Empty(_)
2186            | Ast::Flags(_)
2187            | Ast::Literal(_)
2188            | Ast::Dot(_)
2189            | Ast::Assertion(_)
2190            | Ast::Class(ast::Class::Unicode(_))
2191            | Ast::Class(ast::Class::Perl(_)) => {
2192                // These are all base cases, so we don't increment depth.
2193                return Ok(());
2194            }
2195            Ast::Class(ast::Class::Bracketed(ref x)) => &x.span,
2196            Ast::Repetition(ref x) => &x.span,
2197            Ast::Group(ref x) => &x.span,
2198            Ast::Alternation(ref x) => &x.span,
2199            Ast::Concat(ref x) => &x.span,
2200        };
2201        self.increment_depth(span)
2202    }
2203
2204    fn visit_post(&mut self, ast: &Ast) -> Result<()> {
2205        match *ast {
2206            Ast::Empty(_)
2207            | Ast::Flags(_)
2208            | Ast::Literal(_)
2209            | Ast::Dot(_)
2210            | Ast::Assertion(_)
2211            | Ast::Class(ast::Class::Unicode(_))
2212            | Ast::Class(ast::Class::Perl(_)) => {
2213                // These are all base cases, so we don't decrement depth.
2214                Ok(())
2215            }
2216            Ast::Class(ast::Class::Bracketed(_))
2217            | Ast::Repetition(_)
2218            | Ast::Group(_)
2219            | Ast::Alternation(_)
2220            | Ast::Concat(_) => {
2221                self.decrement_depth();
2222                Ok(())
2223            }
2224        }
2225    }
2226
2227    fn visit_class_set_item_pre(
2228        &mut self,
2229        ast: &ast::ClassSetItem,
2230    ) -> Result<()> {
2231        let span = match *ast {
2232            ast::ClassSetItem::Empty(_)
2233            | ast::ClassSetItem::Literal(_)
2234            | ast::ClassSetItem::Range(_)
2235            | ast::ClassSetItem::Ascii(_)
2236            | ast::ClassSetItem::Unicode(_)
2237            | ast::ClassSetItem::Perl(_) => {
2238                // These are all base cases, so we don't increment depth.
2239                return Ok(());
2240            }
2241            ast::ClassSetItem::Bracketed(ref x) => &x.span,
2242            ast::ClassSetItem::Union(ref x) => &x.span,
2243        };
2244        self.increment_depth(span)
2245    }
2246
2247    fn visit_class_set_item_post(
2248        &mut self,
2249        ast: &ast::ClassSetItem,
2250    ) -> Result<()> {
2251        match *ast {
2252            ast::ClassSetItem::Empty(_)
2253            | ast::ClassSetItem::Literal(_)
2254            | ast::ClassSetItem::Range(_)
2255            | ast::ClassSetItem::Ascii(_)
2256            | ast::ClassSetItem::Unicode(_)
2257            | ast::ClassSetItem::Perl(_) => {
2258                // These are all base cases, so we don't decrement depth.
2259                Ok(())
2260            }
2261            ast::ClassSetItem::Bracketed(_) | ast::ClassSetItem::Union(_) => {
2262                self.decrement_depth();
2263                Ok(())
2264            }
2265        }
2266    }
2267
2268    fn visit_class_set_binary_op_pre(
2269        &mut self,
2270        ast: &ast::ClassSetBinaryOp,
2271    ) -> Result<()> {
2272        self.increment_depth(&ast.span)
2273    }
2274
2275    fn visit_class_set_binary_op_post(
2276        &mut self,
2277        _ast: &ast::ClassSetBinaryOp,
2278    ) -> Result<()> {
2279        self.decrement_depth();
2280        Ok(())
2281    }
2282}
2283
2284/// When the result is an error, transforms the ast::ErrorKind from the source
2285/// Result into another one. This function is used to return clearer error
2286/// messages when possible.
2287fn specialize_err<T>(
2288    result: Result<T>,
2289    from: ast::ErrorKind,
2290    to: ast::ErrorKind,
2291) -> Result<T> {
2292    if let Err(e) = result {
2293        if e.kind == from {
2294            Err(ast::Error { kind: to, pattern: e.pattern, span: e.span })
2295        } else {
2296            Err(e)
2297        }
2298    } else {
2299        result
2300    }
2301}
2302
2303#[cfg(test)]
2304mod tests {
2305    use std::ops::Range;
2306
2307    use super::{Parser, ParserBuilder, ParserI, Primitive};
2308    use ast::{self, Ast, Position, Span};
2309
2310    // Our own assert_eq, which has slightly better formatting (but honestly
2311    // still kind of crappy).
2312    macro_rules! assert_eq {
2313        ($left:expr, $right:expr) => {{
2314            match (&$left, &$right) {
2315                (left_val, right_val) => {
2316                    if !(*left_val == *right_val) {
2317                        panic!(
2318                            "assertion failed: `(left == right)`\n\n\
2319                             left:  `{:?}`\nright: `{:?}`\n\n",
2320                            left_val, right_val
2321                        )
2322                    }
2323                }
2324            }
2325        }};
2326    }
2327
2328    // We create these errors to compare with real ast::Errors in the tests.
2329    // We define equality between TestError and ast::Error to disregard the
2330    // pattern string in ast::Error, which is annoying to provide in tests.
2331    #[derive(Clone, Debug)]
2332    struct TestError {
2333        span: Span,
2334        kind: ast::ErrorKind,
2335    }
2336
2337    impl PartialEq<ast::Error> for TestError {
2338        fn eq(&self, other: &ast::Error) -> bool {
2339            self.span == other.span && self.kind == other.kind
2340        }
2341    }
2342
2343    impl PartialEq<TestError> for ast::Error {
2344        fn eq(&self, other: &TestError) -> bool {
2345            self.span == other.span && self.kind == other.kind
2346        }
2347    }
2348
2349    fn s(str: &str) -> String {
2350        str.to_string()
2351    }
2352
2353    fn parser(pattern: &str) -> ParserI<Parser> {
2354        ParserI::new(Parser::new(), pattern)
2355    }
2356
2357    fn parser_octal(pattern: &str) -> ParserI<Parser> {
2358        let parser = ParserBuilder::new().octal(true).build();
2359        ParserI::new(parser, pattern)
2360    }
2361
2362    fn parser_nest_limit(pattern: &str, nest_limit: u32) -> ParserI<Parser> {
2363        let p = ParserBuilder::new().nest_limit(nest_limit).build();
2364        ParserI::new(p, pattern)
2365    }
2366
2367    fn parser_ignore_whitespace(pattern: &str) -> ParserI<Parser> {
2368        let p = ParserBuilder::new().ignore_whitespace(true).build();
2369        ParserI::new(p, pattern)
2370    }
2371
2372    /// Short alias for creating a new span.
2373    fn nspan(start: Position, end: Position) -> Span {
2374        Span::new(start, end)
2375    }
2376
2377    /// Short alias for creating a new position.
2378    fn npos(offset: usize, line: usize, column: usize) -> Position {
2379        Position::new(offset, line, column)
2380    }
2381
2382    /// Create a new span from the given offset range. This assumes a single
2383    /// line and sets the columns based on the offsets. i.e., This only works
2384    /// out of the box for ASCII, which is fine for most tests.
2385    fn span(range: Range<usize>) -> Span {
2386        let start = Position::new(range.start, 1, range.start + 1);
2387        let end = Position::new(range.end, 1, range.end + 1);
2388        Span::new(start, end)
2389    }
2390
2391    /// Create a new span for the corresponding byte range in the given string.
2392    fn span_range(subject: &str, range: Range<usize>) -> Span {
2393        let start = Position {
2394            offset: range.start,
2395            line: 1 + subject[..range.start].matches('\n').count(),
2396            column: 1 + subject[..range.start]
2397                .chars()
2398                .rev()
2399                .position(|c| c == '\n')
2400                .unwrap_or(subject[..range.start].chars().count()),
2401        };
2402        let end = Position {
2403            offset: range.end,
2404            line: 1 + subject[..range.end].matches('\n').count(),
2405            column: 1 + subject[..range.end]
2406                .chars()
2407                .rev()
2408                .position(|c| c == '\n')
2409                .unwrap_or(subject[..range.end].chars().count()),
2410        };
2411        Span::new(start, end)
2412    }
2413
2414    /// Create a verbatim literal starting at the given position.
2415    fn lit(c: char, start: usize) -> Ast {
2416        lit_with(c, span(start..start + c.len_utf8()))
2417    }
2418
2419    /// Create a punctuation literal starting at the given position.
2420    fn punct_lit(c: char, span: Span) -> Ast {
2421        Ast::Literal(ast::Literal {
2422            span: span,
2423            kind: ast::LiteralKind::Punctuation,
2424            c: c,
2425        })
2426    }
2427
2428    /// Create a verbatim literal with the given span.
2429    fn lit_with(c: char, span: Span) -> Ast {
2430        Ast::Literal(ast::Literal {
2431            span: span,
2432            kind: ast::LiteralKind::Verbatim,
2433            c: c,
2434        })
2435    }
2436
2437    /// Create a concatenation with the given range.
2438    fn concat(range: Range<usize>, asts: Vec<Ast>) -> Ast {
2439        concat_with(span(range), asts)
2440    }
2441
2442    /// Create a concatenation with the given span.
2443    fn concat_with(span: Span, asts: Vec<Ast>) -> Ast {
2444        Ast::Concat(ast::Concat { span: span, asts: asts })
2445    }
2446
2447    /// Create an alternation with the given span.
2448    fn alt(range: Range<usize>, asts: Vec<Ast>) -> Ast {
2449        Ast::Alternation(ast::Alternation { span: span(range), asts: asts })
2450    }
2451
2452    /// Create a capturing group with the given span.
2453    fn group(range: Range<usize>, index: u32, ast: Ast) -> Ast {
2454        Ast::Group(ast::Group {
2455            span: span(range),
2456            kind: ast::GroupKind::CaptureIndex(index),
2457            ast: Box::new(ast),
2458        })
2459    }
2460
2461    /// Create an ast::SetFlags.
2462    ///
2463    /// The given pattern should be the full pattern string. The range given
2464    /// should correspond to the byte offsets where the flag set occurs.
2465    ///
2466    /// If negated is true, then the set is interpreted as beginning with a
2467    /// negation.
2468    fn flag_set(
2469        pat: &str,
2470        range: Range<usize>,
2471        flag: ast::Flag,
2472        negated: bool,
2473    ) -> Ast {
2474        let mut items = vec![ast::FlagsItem {
2475            span: span_range(pat, (range.end - 2)..(range.end - 1)),
2476            kind: ast::FlagsItemKind::Flag(flag),
2477        }];
2478        if negated {
2479            items.insert(
2480                0,
2481                ast::FlagsItem {
2482                    span: span_range(pat, (range.start + 2)..(range.end - 2)),
2483                    kind: ast::FlagsItemKind::Negation,
2484                },
2485            );
2486        }
2487        Ast::Flags(ast::SetFlags {
2488            span: span_range(pat, range.clone()),
2489            flags: ast::Flags {
2490                span: span_range(pat, (range.start + 2)..(range.end - 1)),
2491                items: items,
2492            },
2493        })
2494    }
2495
2496    #[test]
2497    fn parse_nest_limit() {
2498        // A nest limit of 0 still allows some types of regexes.
2499        assert_eq!(
2500            parser_nest_limit("", 0).parse(),
2501            Ok(Ast::Empty(span(0..0)))
2502        );
2503        assert_eq!(parser_nest_limit("a", 0).parse(), Ok(lit('a', 0)));
2504
2505        // Test repetition operations, which require one level of nesting.
2506        assert_eq!(
2507            parser_nest_limit("a+", 0).parse().unwrap_err(),
2508            TestError {
2509                span: span(0..2),
2510                kind: ast::ErrorKind::NestLimitExceeded(0),
2511            }
2512        );
2513        assert_eq!(
2514            parser_nest_limit("a+", 1).parse(),
2515            Ok(Ast::Repetition(ast::Repetition {
2516                span: span(0..2),
2517                op: ast::RepetitionOp {
2518                    span: span(1..2),
2519                    kind: ast::RepetitionKind::OneOrMore,
2520                },
2521                greedy: true,
2522                ast: Box::new(lit('a', 0)),
2523            }))
2524        );
2525        assert_eq!(
2526            parser_nest_limit("(a)+", 1).parse().unwrap_err(),
2527            TestError {
2528                span: span(0..3),
2529                kind: ast::ErrorKind::NestLimitExceeded(1),
2530            }
2531        );
2532        assert_eq!(
2533            parser_nest_limit("a+*", 1).parse().unwrap_err(),
2534            TestError {
2535                span: span(0..2),
2536                kind: ast::ErrorKind::NestLimitExceeded(1),
2537            }
2538        );
2539        assert_eq!(
2540            parser_nest_limit("a+*", 2).parse(),
2541            Ok(Ast::Repetition(ast::Repetition {
2542                span: span(0..3),
2543                op: ast::RepetitionOp {
2544                    span: span(2..3),
2545                    kind: ast::RepetitionKind::ZeroOrMore,
2546                },
2547                greedy: true,
2548                ast: Box::new(Ast::Repetition(ast::Repetition {
2549                    span: span(0..2),
2550                    op: ast::RepetitionOp {
2551                        span: span(1..2),
2552                        kind: ast::RepetitionKind::OneOrMore,
2553                    },
2554                    greedy: true,
2555                    ast: Box::new(lit('a', 0)),
2556                })),
2557            }))
2558        );
2559
2560        // Test concatenations. A concatenation requires one level of nesting.
2561        assert_eq!(
2562            parser_nest_limit("ab", 0).parse().unwrap_err(),
2563            TestError {
2564                span: span(0..2),
2565                kind: ast::ErrorKind::NestLimitExceeded(0),
2566            }
2567        );
2568        assert_eq!(
2569            parser_nest_limit("ab", 1).parse(),
2570            Ok(concat(0..2, vec![lit('a', 0), lit('b', 1)]))
2571        );
2572        assert_eq!(
2573            parser_nest_limit("abc", 1).parse(),
2574            Ok(concat(0..3, vec![lit('a', 0), lit('b', 1), lit('c', 2)]))
2575        );
2576
2577        // Test alternations. An alternation requires one level of nesting.
2578        assert_eq!(
2579            parser_nest_limit("a|b", 0).parse().unwrap_err(),
2580            TestError {
2581                span: span(0..3),
2582                kind: ast::ErrorKind::NestLimitExceeded(0),
2583            }
2584        );
2585        assert_eq!(
2586            parser_nest_limit("a|b", 1).parse(),
2587            Ok(alt(0..3, vec![lit('a', 0), lit('b', 2)]))
2588        );
2589        assert_eq!(
2590            parser_nest_limit("a|b|c", 1).parse(),
2591            Ok(alt(0..5, vec![lit('a', 0), lit('b', 2), lit('c', 4)]))
2592        );
2593
2594        // Test character classes. Classes form their own mini-recursive
2595        // syntax!
2596        assert_eq!(
2597            parser_nest_limit("[a]", 0).parse().unwrap_err(),
2598            TestError {
2599                span: span(0..3),
2600                kind: ast::ErrorKind::NestLimitExceeded(0),
2601            }
2602        );
2603        assert_eq!(
2604            parser_nest_limit("[a]", 1).parse(),
2605            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
2606                span: span(0..3),
2607                negated: false,
2608                kind: ast::ClassSet::Item(ast::ClassSetItem::Literal(
2609                    ast::Literal {
2610                        span: span(1..2),
2611                        kind: ast::LiteralKind::Verbatim,
2612                        c: 'a',
2613                    }
2614                )),
2615            })))
2616        );
2617        assert_eq!(
2618            parser_nest_limit("[ab]", 1).parse().unwrap_err(),
2619            TestError {
2620                span: span(1..3),
2621                kind: ast::ErrorKind::NestLimitExceeded(1),
2622            }
2623        );
2624        assert_eq!(
2625            parser_nest_limit("[ab[cd]]", 2).parse().unwrap_err(),
2626            TestError {
2627                span: span(3..7),
2628                kind: ast::ErrorKind::NestLimitExceeded(2),
2629            }
2630        );
2631        assert_eq!(
2632            parser_nest_limit("[ab[cd]]", 3).parse().unwrap_err(),
2633            TestError {
2634                span: span(4..6),
2635                kind: ast::ErrorKind::NestLimitExceeded(3),
2636            }
2637        );
2638        assert_eq!(
2639            parser_nest_limit("[a--b]", 1).parse().unwrap_err(),
2640            TestError {
2641                span: span(1..5),
2642                kind: ast::ErrorKind::NestLimitExceeded(1),
2643            }
2644        );
2645        assert_eq!(
2646            parser_nest_limit("[a--bc]", 2).parse().unwrap_err(),
2647            TestError {
2648                span: span(4..6),
2649                kind: ast::ErrorKind::NestLimitExceeded(2),
2650            }
2651        );
2652    }
2653
2654    #[test]
2655    fn parse_comments() {
2656        let pat = "(?x)
2657# This is comment 1.
2658foo # This is comment 2.
2659  # This is comment 3.
2660bar
2661# This is comment 4.";
2662        let astc = parser(pat).parse_with_comments().unwrap();
2663        assert_eq!(
2664            astc.ast,
2665            concat_with(
2666                span_range(pat, 0..pat.len()),
2667                vec![
2668                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2669                    lit_with('f', span_range(pat, 26..27)),
2670                    lit_with('o', span_range(pat, 27..28)),
2671                    lit_with('o', span_range(pat, 28..29)),
2672                    lit_with('b', span_range(pat, 74..75)),
2673                    lit_with('a', span_range(pat, 75..76)),
2674                    lit_with('r', span_range(pat, 76..77)),
2675                ]
2676            )
2677        );
2678        assert_eq!(
2679            astc.comments,
2680            vec![
2681                ast::Comment {
2682                    span: span_range(pat, 5..26),
2683                    comment: s(" This is comment 1."),
2684                },
2685                ast::Comment {
2686                    span: span_range(pat, 30..51),
2687                    comment: s(" This is comment 2."),
2688                },
2689                ast::Comment {
2690                    span: span_range(pat, 53..74),
2691                    comment: s(" This is comment 3."),
2692                },
2693                ast::Comment {
2694                    span: span_range(pat, 78..98),
2695                    comment: s(" This is comment 4."),
2696                },
2697            ]
2698        );
2699    }
2700
2701    #[test]
2702    fn parse_holistic() {
2703        assert_eq!(parser("]").parse(), Ok(lit(']', 0)));
2704        assert_eq!(
2705            parser(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#\&\-\~").parse(),
2706            Ok(concat(
2707                0..36,
2708                vec![
2709                    punct_lit('\\', span(0..2)),
2710                    punct_lit('.', span(2..4)),
2711                    punct_lit('+', span(4..6)),
2712                    punct_lit('*', span(6..8)),
2713                    punct_lit('?', span(8..10)),
2714                    punct_lit('(', span(10..12)),
2715                    punct_lit(')', span(12..14)),
2716                    punct_lit('|', span(14..16)),
2717                    punct_lit('[', span(16..18)),
2718                    punct_lit(']', span(18..20)),
2719                    punct_lit('{', span(20..22)),
2720                    punct_lit('}', span(22..24)),
2721                    punct_lit('^', span(24..26)),
2722                    punct_lit('$', span(26..28)),
2723                    punct_lit('#', span(28..30)),
2724                    punct_lit('&', span(30..32)),
2725                    punct_lit('-', span(32..34)),
2726                    punct_lit('~', span(34..36)),
2727                ]
2728            ))
2729        );
2730    }
2731
2732    #[test]
2733    fn parse_ignore_whitespace() {
2734        // Test that basic whitespace insensitivity works.
2735        let pat = "(?x)a b";
2736        assert_eq!(
2737            parser(pat).parse(),
2738            Ok(concat_with(
2739                nspan(npos(0, 1, 1), npos(7, 1, 8)),
2740                vec![
2741                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2742                    lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
2743                    lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))),
2744                ]
2745            ))
2746        );
2747
2748        // Test that we can toggle whitespace insensitivity.
2749        let pat = "(?x)a b(?-x)a b";
2750        assert_eq!(
2751            parser(pat).parse(),
2752            Ok(concat_with(
2753                nspan(npos(0, 1, 1), npos(15, 1, 16)),
2754                vec![
2755                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2756                    lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
2757                    lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))),
2758                    flag_set(pat, 7..12, ast::Flag::IgnoreWhitespace, true),
2759                    lit_with('a', nspan(npos(12, 1, 13), npos(13, 1, 14))),
2760                    lit_with(' ', nspan(npos(13, 1, 14), npos(14, 1, 15))),
2761                    lit_with('b', nspan(npos(14, 1, 15), npos(15, 1, 16))),
2762                ]
2763            ))
2764        );
2765
2766        // Test that nesting whitespace insensitive flags works.
2767        let pat = "a (?x:a )a ";
2768        assert_eq!(
2769            parser(pat).parse(),
2770            Ok(concat_with(
2771                span_range(pat, 0..11),
2772                vec![
2773                    lit_with('a', span_range(pat, 0..1)),
2774                    lit_with(' ', span_range(pat, 1..2)),
2775                    Ast::Group(ast::Group {
2776                        span: span_range(pat, 2..9),
2777                        kind: ast::GroupKind::NonCapturing(ast::Flags {
2778                            span: span_range(pat, 4..5),
2779                            items: vec![ast::FlagsItem {
2780                                span: span_range(pat, 4..5),
2781                                kind: ast::FlagsItemKind::Flag(
2782                                    ast::Flag::IgnoreWhitespace
2783                                ),
2784                            },],
2785                        }),
2786                        ast: Box::new(lit_with('a', span_range(pat, 6..7))),
2787                    }),
2788                    lit_with('a', span_range(pat, 9..10)),
2789                    lit_with(' ', span_range(pat, 10..11)),
2790                ]
2791            ))
2792        );
2793
2794        // Test that whitespace after an opening paren is insignificant.
2795        let pat = "(?x)( ?P<foo> a )";
2796        assert_eq!(
2797            parser(pat).parse(),
2798            Ok(concat_with(
2799                span_range(pat, 0..pat.len()),
2800                vec![
2801                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2802                    Ast::Group(ast::Group {
2803                        span: span_range(pat, 4..pat.len()),
2804                        kind: ast::GroupKind::CaptureName(ast::CaptureName {
2805                            span: span_range(pat, 9..12),
2806                            name: s("foo"),
2807                            index: 1,
2808                        }),
2809                        ast: Box::new(lit_with('a', span_range(pat, 14..15))),
2810                    }),
2811                ]
2812            ))
2813        );
2814        let pat = "(?x)(  a )";
2815        assert_eq!(
2816            parser(pat).parse(),
2817            Ok(concat_with(
2818                span_range(pat, 0..pat.len()),
2819                vec![
2820                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2821                    Ast::Group(ast::Group {
2822                        span: span_range(pat, 4..pat.len()),
2823                        kind: ast::GroupKind::CaptureIndex(1),
2824                        ast: Box::new(lit_with('a', span_range(pat, 7..8))),
2825                    }),
2826                ]
2827            ))
2828        );
2829        let pat = "(?x)(  ?:  a )";
2830        assert_eq!(
2831            parser(pat).parse(),
2832            Ok(concat_with(
2833                span_range(pat, 0..pat.len()),
2834                vec![
2835                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2836                    Ast::Group(ast::Group {
2837                        span: span_range(pat, 4..pat.len()),
2838                        kind: ast::GroupKind::NonCapturing(ast::Flags {
2839                            span: span_range(pat, 8..8),
2840                            items: vec![],
2841                        }),
2842                        ast: Box::new(lit_with('a', span_range(pat, 11..12))),
2843                    }),
2844                ]
2845            ))
2846        );
2847        let pat = r"(?x)\x { 53 }";
2848        assert_eq!(
2849            parser(pat).parse(),
2850            Ok(concat_with(
2851                span_range(pat, 0..pat.len()),
2852                vec![
2853                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2854                    Ast::Literal(ast::Literal {
2855                        span: span(4..13),
2856                        kind: ast::LiteralKind::HexBrace(
2857                            ast::HexLiteralKind::X
2858                        ),
2859                        c: 'S',
2860                    }),
2861                ]
2862            ))
2863        );
2864
2865        // Test that whitespace after an escape is OK.
2866        let pat = r"(?x)\ ";
2867        assert_eq!(
2868            parser(pat).parse(),
2869            Ok(concat_with(
2870                span_range(pat, 0..pat.len()),
2871                vec![
2872                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2873                    Ast::Literal(ast::Literal {
2874                        span: span_range(pat, 4..6),
2875                        kind: ast::LiteralKind::Special(
2876                            ast::SpecialLiteralKind::Space
2877                        ),
2878                        c: ' ',
2879                    }),
2880                ]
2881            ))
2882        );
2883        // ... but only when `x` mode is enabled.
2884        let pat = r"\ ";
2885        assert_eq!(
2886            parser(pat).parse().unwrap_err(),
2887            TestError {
2888                span: span_range(pat, 0..2),
2889                kind: ast::ErrorKind::EscapeUnrecognized,
2890            }
2891        );
2892    }
2893
2894    #[test]
2895    fn parse_newlines() {
2896        let pat = ".\n.";
2897        assert_eq!(
2898            parser(pat).parse(),
2899            Ok(concat_with(
2900                span_range(pat, 0..3),
2901                vec![
2902                    Ast::Dot(span_range(pat, 0..1)),
2903                    lit_with('\n', span_range(pat, 1..2)),
2904                    Ast::Dot(span_range(pat, 2..3)),
2905                ]
2906            ))
2907        );
2908
2909        let pat = "foobar\nbaz\nquux\n";
2910        assert_eq!(
2911            parser(pat).parse(),
2912            Ok(concat_with(
2913                span_range(pat, 0..pat.len()),
2914                vec![
2915                    lit_with('f', nspan(npos(0, 1, 1), npos(1, 1, 2))),
2916                    lit_with('o', nspan(npos(1, 1, 2), npos(2, 1, 3))),
2917                    lit_with('o', nspan(npos(2, 1, 3), npos(3, 1, 4))),
2918                    lit_with('b', nspan(npos(3, 1, 4), npos(4, 1, 5))),
2919                    lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
2920                    lit_with('r', nspan(npos(5, 1, 6), npos(6, 1, 7))),
2921                    lit_with('\n', nspan(npos(6, 1, 7), npos(7, 2, 1))),
2922                    lit_with('b', nspan(npos(7, 2, 1), npos(8, 2, 2))),
2923                    lit_with('a', nspan(npos(8, 2, 2), npos(9, 2, 3))),
2924                    lit_with('z', nspan(npos(9, 2, 3), npos(10, 2, 4))),
2925                    lit_with('\n', nspan(npos(10, 2, 4), npos(11, 3, 1))),
2926                    lit_with('q', nspan(npos(11, 3, 1), npos(12, 3, 2))),
2927                    lit_with('u', nspan(npos(12, 3, 2), npos(13, 3, 3))),
2928                    lit_with('u', nspan(npos(13, 3, 3), npos(14, 3, 4))),
2929                    lit_with('x', nspan(npos(14, 3, 4), npos(15, 3, 5))),
2930                    lit_with('\n', nspan(npos(15, 3, 5), npos(16, 4, 1))),
2931                ]
2932            ))
2933        );
2934    }
2935
2936    #[test]
2937    fn parse_uncounted_repetition() {
2938        assert_eq!(
2939            parser(r"a*").parse(),
2940            Ok(Ast::Repetition(ast::Repetition {
2941                span: span(0..2),
2942                op: ast::RepetitionOp {
2943                    span: span(1..2),
2944                    kind: ast::RepetitionKind::ZeroOrMore,
2945                },
2946                greedy: true,
2947                ast: Box::new(lit('a', 0)),
2948            }))
2949        );
2950        assert_eq!(
2951            parser(r"a+").parse(),
2952            Ok(Ast::Repetition(ast::Repetition {
2953                span: span(0..2),
2954                op: ast::RepetitionOp {
2955                    span: span(1..2),
2956                    kind: ast::RepetitionKind::OneOrMore,
2957                },
2958                greedy: true,
2959                ast: Box::new(lit('a', 0)),
2960            }))
2961        );
2962
2963        assert_eq!(
2964            parser(r"a?").parse(),
2965            Ok(Ast::Repetition(ast::Repetition {
2966                span: span(0..2),
2967                op: ast::RepetitionOp {
2968                    span: span(1..2),
2969                    kind: ast::RepetitionKind::ZeroOrOne,
2970                },
2971                greedy: true,
2972                ast: Box::new(lit('a', 0)),
2973            }))
2974        );
2975        assert_eq!(
2976            parser(r"a??").parse(),
2977            Ok(Ast::Repetition(ast::Repetition {
2978                span: span(0..3),
2979                op: ast::RepetitionOp {
2980                    span: span(1..3),
2981                    kind: ast::RepetitionKind::ZeroOrOne,
2982                },
2983                greedy: false,
2984                ast: Box::new(lit('a', 0)),
2985            }))
2986        );
2987        assert_eq!(
2988            parser(r"a?").parse(),
2989            Ok(Ast::Repetition(ast::Repetition {
2990                span: span(0..2),
2991                op: ast::RepetitionOp {
2992                    span: span(1..2),
2993                    kind: ast::RepetitionKind::ZeroOrOne,
2994                },
2995                greedy: true,
2996                ast: Box::new(lit('a', 0)),
2997            }))
2998        );
2999        assert_eq!(
3000            parser(r"a?b").parse(),
3001            Ok(concat(
3002                0..3,
3003                vec![
3004                    Ast::Repetition(ast::Repetition {
3005                        span: span(0..2),
3006                        op: ast::RepetitionOp {
3007                            span: span(1..2),
3008                            kind: ast::RepetitionKind::ZeroOrOne,
3009                        },
3010                        greedy: true,
3011                        ast: Box::new(lit('a', 0)),
3012                    }),
3013                    lit('b', 2),
3014                ]
3015            ))
3016        );
3017        assert_eq!(
3018            parser(r"a??b").parse(),
3019            Ok(concat(
3020                0..4,
3021                vec![
3022                    Ast::Repetition(ast::Repetition {
3023                        span: span(0..3),
3024                        op: ast::RepetitionOp {
3025                            span: span(1..3),
3026                            kind: ast::RepetitionKind::ZeroOrOne,
3027                        },
3028                        greedy: false,
3029                        ast: Box::new(lit('a', 0)),
3030                    }),
3031                    lit('b', 3),
3032                ]
3033            ))
3034        );
3035        assert_eq!(
3036            parser(r"ab?").parse(),
3037            Ok(concat(
3038                0..3,
3039                vec![
3040                    lit('a', 0),
3041                    Ast::Repetition(ast::Repetition {
3042                        span: span(1..3),
3043                        op: ast::RepetitionOp {
3044                            span: span(2..3),
3045                            kind: ast::RepetitionKind::ZeroOrOne,
3046                        },
3047                        greedy: true,
3048                        ast: Box::new(lit('b', 1)),
3049                    }),
3050                ]
3051            ))
3052        );
3053        assert_eq!(
3054            parser(r"(ab)?").parse(),
3055            Ok(Ast::Repetition(ast::Repetition {
3056                span: span(0..5),
3057                op: ast::RepetitionOp {
3058                    span: span(4..5),
3059                    kind: ast::RepetitionKind::ZeroOrOne,
3060                },
3061                greedy: true,
3062                ast: Box::new(group(
3063                    0..4,
3064                    1,
3065                    concat(1..3, vec![lit('a', 1), lit('b', 2),])
3066                )),
3067            }))
3068        );
3069        assert_eq!(
3070            parser(r"|a?").parse(),
3071            Ok(alt(
3072                0..3,
3073                vec![
3074                    Ast::Empty(span(0..0)),
3075                    Ast::Repetition(ast::Repetition {
3076                        span: span(1..3),
3077                        op: ast::RepetitionOp {
3078                            span: span(2..3),
3079                            kind: ast::RepetitionKind::ZeroOrOne,
3080                        },
3081                        greedy: true,
3082                        ast: Box::new(lit('a', 1)),
3083                    }),
3084                ]
3085            ))
3086        );
3087
3088        assert_eq!(
3089            parser(r"*").parse().unwrap_err(),
3090            TestError {
3091                span: span(0..0),
3092                kind: ast::ErrorKind::RepetitionMissing,
3093            }
3094        );
3095        assert_eq!(
3096            parser(r"(?i)*").parse().unwrap_err(),
3097            TestError {
3098                span: span(4..4),
3099                kind: ast::ErrorKind::RepetitionMissing,
3100            }
3101        );
3102        assert_eq!(
3103            parser(r"(*)").parse().unwrap_err(),
3104            TestError {
3105                span: span(1..1),
3106                kind: ast::ErrorKind::RepetitionMissing,
3107            }
3108        );
3109        assert_eq!(
3110            parser(r"(?:?)").parse().unwrap_err(),
3111            TestError {
3112                span: span(3..3),
3113                kind: ast::ErrorKind::RepetitionMissing,
3114            }
3115        );
3116        assert_eq!(
3117            parser(r"+").parse().unwrap_err(),
3118            TestError {
3119                span: span(0..0),
3120                kind: ast::ErrorKind::RepetitionMissing,
3121            }
3122        );
3123        assert_eq!(
3124            parser(r"?").parse().unwrap_err(),
3125            TestError {
3126                span: span(0..0),
3127                kind: ast::ErrorKind::RepetitionMissing,
3128            }
3129        );
3130        assert_eq!(
3131            parser(r"(?)").parse().unwrap_err(),
3132            TestError {
3133                span: span(1..1),
3134                kind: ast::ErrorKind::RepetitionMissing,
3135            }
3136        );
3137        assert_eq!(
3138            parser(r"|*").parse().unwrap_err(),
3139            TestError {
3140                span: span(1..1),
3141                kind: ast::ErrorKind::RepetitionMissing,
3142            }
3143        );
3144        assert_eq!(
3145            parser(r"|+").parse().unwrap_err(),
3146            TestError {
3147                span: span(1..1),
3148                kind: ast::ErrorKind::RepetitionMissing,
3149            }
3150        );
3151        assert_eq!(
3152            parser(r"|?").parse().unwrap_err(),
3153            TestError {
3154                span: span(1..1),
3155                kind: ast::ErrorKind::RepetitionMissing,
3156            }
3157        );
3158    }
3159
3160    #[test]
3161    fn parse_counted_repetition() {
3162        assert_eq!(
3163            parser(r"a{5}").parse(),
3164            Ok(Ast::Repetition(ast::Repetition {
3165                span: span(0..4),
3166                op: ast::RepetitionOp {
3167                    span: span(1..4),
3168                    kind: ast::RepetitionKind::Range(
3169                        ast::RepetitionRange::Exactly(5)
3170                    ),
3171                },
3172                greedy: true,
3173                ast: Box::new(lit('a', 0)),
3174            }))
3175        );
3176        assert_eq!(
3177            parser(r"a{5,}").parse(),
3178            Ok(Ast::Repetition(ast::Repetition {
3179                span: span(0..5),
3180                op: ast::RepetitionOp {
3181                    span: span(1..5),
3182                    kind: ast::RepetitionKind::Range(
3183                        ast::RepetitionRange::AtLeast(5)
3184                    ),
3185                },
3186                greedy: true,
3187                ast: Box::new(lit('a', 0)),
3188            }))
3189        );
3190        assert_eq!(
3191            parser(r"a{5,9}").parse(),
3192            Ok(Ast::Repetition(ast::Repetition {
3193                span: span(0..6),
3194                op: ast::RepetitionOp {
3195                    span: span(1..6),
3196                    kind: ast::RepetitionKind::Range(
3197                        ast::RepetitionRange::Bounded(5, 9)
3198                    ),
3199                },
3200                greedy: true,
3201                ast: Box::new(lit('a', 0)),
3202            }))
3203        );
3204        assert_eq!(
3205            parser(r"a{5}?").parse(),
3206            Ok(Ast::Repetition(ast::Repetition {
3207                span: span(0..5),
3208                op: ast::RepetitionOp {
3209                    span: span(1..5),
3210                    kind: ast::RepetitionKind::Range(
3211                        ast::RepetitionRange::Exactly(5)
3212                    ),
3213                },
3214                greedy: false,
3215                ast: Box::new(lit('a', 0)),
3216            }))
3217        );
3218        assert_eq!(
3219            parser(r"ab{5}").parse(),
3220            Ok(concat(
3221                0..5,
3222                vec![
3223                    lit('a', 0),
3224                    Ast::Repetition(ast::Repetition {
3225                        span: span(1..5),
3226                        op: ast::RepetitionOp {
3227                            span: span(2..5),
3228                            kind: ast::RepetitionKind::Range(
3229                                ast::RepetitionRange::Exactly(5)
3230                            ),
3231                        },
3232                        greedy: true,
3233                        ast: Box::new(lit('b', 1)),
3234                    }),
3235                ]
3236            ))
3237        );
3238        assert_eq!(
3239            parser(r"ab{5}c").parse(),
3240            Ok(concat(
3241                0..6,
3242                vec![
3243                    lit('a', 0),
3244                    Ast::Repetition(ast::Repetition {
3245                        span: span(1..5),
3246                        op: ast::RepetitionOp {
3247                            span: span(2..5),
3248                            kind: ast::RepetitionKind::Range(
3249                                ast::RepetitionRange::Exactly(5)
3250                            ),
3251                        },
3252                        greedy: true,
3253                        ast: Box::new(lit('b', 1)),
3254                    }),
3255                    lit('c', 5),
3256                ]
3257            ))
3258        );
3259
3260        assert_eq!(
3261            parser(r"a{ 5 }").parse(),
3262            Ok(Ast::Repetition(ast::Repetition {
3263                span: span(0..6),
3264                op: ast::RepetitionOp {
3265                    span: span(1..6),
3266                    kind: ast::RepetitionKind::Range(
3267                        ast::RepetitionRange::Exactly(5)
3268                    ),
3269                },
3270                greedy: true,
3271                ast: Box::new(lit('a', 0)),
3272            }))
3273        );
3274        assert_eq!(
3275            parser(r"a{ 5 , 9 }").parse(),
3276            Ok(Ast::Repetition(ast::Repetition {
3277                span: span(0..10),
3278                op: ast::RepetitionOp {
3279                    span: span(1..10),
3280                    kind: ast::RepetitionKind::Range(
3281                        ast::RepetitionRange::Bounded(5, 9)
3282                    ),
3283                },
3284                greedy: true,
3285                ast: Box::new(lit('a', 0)),
3286            }))
3287        );
3288        assert_eq!(
3289            parser_ignore_whitespace(r"a{5,9} ?").parse(),
3290            Ok(Ast::Repetition(ast::Repetition {
3291                span: span(0..8),
3292                op: ast::RepetitionOp {
3293                    span: span(1..8),
3294                    kind: ast::RepetitionKind::Range(
3295                        ast::RepetitionRange::Bounded(5, 9)
3296                    ),
3297                },
3298                greedy: false,
3299                ast: Box::new(lit('a', 0)),
3300            }))
3301        );
3302
3303        assert_eq!(
3304            parser(r"(?i){0}").parse().unwrap_err(),
3305            TestError {
3306                span: span(4..4),
3307                kind: ast::ErrorKind::RepetitionMissing,
3308            }
3309        );
3310        assert_eq!(
3311            parser(r"(?m){1,1}").parse().unwrap_err(),
3312            TestError {
3313                span: span(4..4),
3314                kind: ast::ErrorKind::RepetitionMissing,
3315            }
3316        );
3317        assert_eq!(
3318            parser(r"a{]}").parse().unwrap_err(),
3319            TestError {
3320                span: span(2..2),
3321                kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3322            }
3323        );
3324        assert_eq!(
3325            parser(r"a{1,]}").parse().unwrap_err(),
3326            TestError {
3327                span: span(4..4),
3328                kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3329            }
3330        );
3331        assert_eq!(
3332            parser(r"a{").parse().unwrap_err(),
3333            TestError {
3334                span: span(1..2),
3335                kind: ast::ErrorKind::RepetitionCountUnclosed,
3336            }
3337        );
3338        assert_eq!(
3339            parser(r"a{}").parse().unwrap_err(),
3340            TestError {
3341                span: span(2..2),
3342                kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3343            }
3344        );
3345        assert_eq!(
3346            parser(r"a{a").parse().unwrap_err(),
3347            TestError {
3348                span: span(2..2),
3349                kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3350            }
3351        );
3352        assert_eq!(
3353            parser(r"a{9999999999}").parse().unwrap_err(),
3354            TestError {
3355                span: span(2..12),
3356                kind: ast::ErrorKind::DecimalInvalid,
3357            }
3358        );
3359        assert_eq!(
3360            parser(r"a{9").parse().unwrap_err(),
3361            TestError {
3362                span: span(1..3),
3363                kind: ast::ErrorKind::RepetitionCountUnclosed,
3364            }
3365        );
3366        assert_eq!(
3367            parser(r"a{9,a").parse().unwrap_err(),
3368            TestError {
3369                span: span(4..4),
3370                kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3371            }
3372        );
3373        assert_eq!(
3374            parser(r"a{9,9999999999}").parse().unwrap_err(),
3375            TestError {
3376                span: span(4..14),
3377                kind: ast::ErrorKind::DecimalInvalid,
3378            }
3379        );
3380        assert_eq!(
3381            parser(r"a{9,").parse().unwrap_err(),
3382            TestError {
3383                span: span(1..4),
3384                kind: ast::ErrorKind::RepetitionCountUnclosed,
3385            }
3386        );
3387        assert_eq!(
3388            parser(r"a{9,11").parse().unwrap_err(),
3389            TestError {
3390                span: span(1..6),
3391                kind: ast::ErrorKind::RepetitionCountUnclosed,
3392            }
3393        );
3394        assert_eq!(
3395            parser(r"a{2,1}").parse().unwrap_err(),
3396            TestError {
3397                span: span(1..6),
3398                kind: ast::ErrorKind::RepetitionCountInvalid,
3399            }
3400        );
3401        assert_eq!(
3402            parser(r"{5}").parse().unwrap_err(),
3403            TestError {
3404                span: span(0..0),
3405                kind: ast::ErrorKind::RepetitionMissing,
3406            }
3407        );
3408        assert_eq!(
3409            parser(r"|{5}").parse().unwrap_err(),
3410            TestError {
3411                span: span(1..1),
3412                kind: ast::ErrorKind::RepetitionMissing,
3413            }
3414        );
3415    }
3416
3417    #[test]
3418    fn parse_alternate() {
3419        assert_eq!(
3420            parser(r"a|b").parse(),
3421            Ok(Ast::Alternation(ast::Alternation {
3422                span: span(0..3),
3423                asts: vec![lit('a', 0), lit('b', 2)],
3424            }))
3425        );
3426        assert_eq!(
3427            parser(r"(a|b)").parse(),
3428            Ok(group(
3429                0..5,
3430                1,
3431                Ast::Alternation(ast::Alternation {
3432                    span: span(1..4),
3433                    asts: vec![lit('a', 1), lit('b', 3)],
3434                })
3435            ))
3436        );
3437
3438        assert_eq!(
3439            parser(r"a|b|c").parse(),
3440            Ok(Ast::Alternation(ast::Alternation {
3441                span: span(0..5),
3442                asts: vec![lit('a', 0), lit('b', 2), lit('c', 4)],
3443            }))
3444        );
3445        assert_eq!(
3446            parser(r"ax|by|cz").parse(),
3447            Ok(Ast::Alternation(ast::Alternation {
3448                span: span(0..8),
3449                asts: vec![
3450                    concat(0..2, vec![lit('a', 0), lit('x', 1)]),
3451                    concat(3..5, vec![lit('b', 3), lit('y', 4)]),
3452                    concat(6..8, vec![lit('c', 6), lit('z', 7)]),
3453                ],
3454            }))
3455        );
3456        assert_eq!(
3457            parser(r"(ax|by|cz)").parse(),
3458            Ok(group(
3459                0..10,
3460                1,
3461                Ast::Alternation(ast::Alternation {
3462                    span: span(1..9),
3463                    asts: vec![
3464                        concat(1..3, vec![lit('a', 1), lit('x', 2)]),
3465                        concat(4..6, vec![lit('b', 4), lit('y', 5)]),
3466                        concat(7..9, vec![lit('c', 7), lit('z', 8)]),
3467                    ],
3468                })
3469            ))
3470        );
3471        assert_eq!(
3472            parser(r"(ax|(by|(cz)))").parse(),
3473            Ok(group(
3474                0..14,
3475                1,
3476                alt(
3477                    1..13,
3478                    vec![
3479                        concat(1..3, vec![lit('a', 1), lit('x', 2)]),
3480                        group(
3481                            4..13,
3482                            2,
3483                            alt(
3484                                5..12,
3485                                vec![
3486                                    concat(
3487                                        5..7,
3488                                        vec![lit('b', 5), lit('y', 6)]
3489                                    ),
3490                                    group(
3491                                        8..12,
3492                                        3,
3493                                        concat(
3494                                            9..11,
3495                                            vec![lit('c', 9), lit('z', 10),]
3496                                        )
3497                                    ),
3498                                ]
3499                            )
3500                        ),
3501                    ]
3502                )
3503            ))
3504        );
3505
3506        assert_eq!(
3507            parser(r"|").parse(),
3508            Ok(alt(
3509                0..1,
3510                vec![Ast::Empty(span(0..0)), Ast::Empty(span(1..1)),]
3511            ))
3512        );
3513        assert_eq!(
3514            parser(r"||").parse(),
3515            Ok(alt(
3516                0..2,
3517                vec![
3518                    Ast::Empty(span(0..0)),
3519                    Ast::Empty(span(1..1)),
3520                    Ast::Empty(span(2..2)),
3521                ]
3522            ))
3523        );
3524        assert_eq!(
3525            parser(r"a|").parse(),
3526            Ok(alt(0..2, vec![lit('a', 0), Ast::Empty(span(2..2)),]))
3527        );
3528        assert_eq!(
3529            parser(r"|a").parse(),
3530            Ok(alt(0..2, vec![Ast::Empty(span(0..0)), lit('a', 1),]))
3531        );
3532
3533        assert_eq!(
3534            parser(r"(|)").parse(),
3535            Ok(group(
3536                0..3,
3537                1,
3538                alt(
3539                    1..2,
3540                    vec![Ast::Empty(span(1..1)), Ast::Empty(span(2..2)),]
3541                )
3542            ))
3543        );
3544        assert_eq!(
3545            parser(r"(a|)").parse(),
3546            Ok(group(
3547                0..4,
3548                1,
3549                alt(1..3, vec![lit('a', 1), Ast::Empty(span(3..3)),])
3550            ))
3551        );
3552        assert_eq!(
3553            parser(r"(|a)").parse(),
3554            Ok(group(
3555                0..4,
3556                1,
3557                alt(1..3, vec![Ast::Empty(span(1..1)), lit('a', 2),])
3558            ))
3559        );
3560
3561        assert_eq!(
3562            parser(r"a|b)").parse().unwrap_err(),
3563            TestError {
3564                span: span(3..4),
3565                kind: ast::ErrorKind::GroupUnopened,
3566            }
3567        );
3568        assert_eq!(
3569            parser(r"(a|b").parse().unwrap_err(),
3570            TestError {
3571                span: span(0..1),
3572                kind: ast::ErrorKind::GroupUnclosed,
3573            }
3574        );
3575    }
3576
3577    #[test]
3578    fn parse_unsupported_lookaround() {
3579        assert_eq!(
3580            parser(r"(?=a)").parse().unwrap_err(),
3581            TestError {
3582                span: span(0..3),
3583                kind: ast::ErrorKind::UnsupportedLookAround,
3584            }
3585        );
3586        assert_eq!(
3587            parser(r"(?!a)").parse().unwrap_err(),
3588            TestError {
3589                span: span(0..3),
3590                kind: ast::ErrorKind::UnsupportedLookAround,
3591            }
3592        );
3593        assert_eq!(
3594            parser(r"(?<=a)").parse().unwrap_err(),
3595            TestError {
3596                span: span(0..4),
3597                kind: ast::ErrorKind::UnsupportedLookAround,
3598            }
3599        );
3600        assert_eq!(
3601            parser(r"(?<!a)").parse().unwrap_err(),
3602            TestError {
3603                span: span(0..4),
3604                kind: ast::ErrorKind::UnsupportedLookAround,
3605            }
3606        );
3607    }
3608
3609    #[test]
3610    fn parse_group() {
3611        assert_eq!(
3612            parser("(?i)").parse(),
3613            Ok(Ast::Flags(ast::SetFlags {
3614                span: span(0..4),
3615                flags: ast::Flags {
3616                    span: span(2..3),
3617                    items: vec![ast::FlagsItem {
3618                        span: span(2..3),
3619                        kind: ast::FlagsItemKind::Flag(
3620                            ast::Flag::CaseInsensitive
3621                        ),
3622                    }],
3623                },
3624            }))
3625        );
3626        assert_eq!(
3627            parser("(?iU)").parse(),
3628            Ok(Ast::Flags(ast::SetFlags {
3629                span: span(0..5),
3630                flags: ast::Flags {
3631                    span: span(2..4),
3632                    items: vec![
3633                        ast::FlagsItem {
3634                            span: span(2..3),
3635                            kind: ast::FlagsItemKind::Flag(
3636                                ast::Flag::CaseInsensitive
3637                            ),
3638                        },
3639                        ast::FlagsItem {
3640                            span: span(3..4),
3641                            kind: ast::FlagsItemKind::Flag(
3642                                ast::Flag::SwapGreed
3643                            ),
3644                        },
3645                    ],
3646                },
3647            }))
3648        );
3649        assert_eq!(
3650            parser("(?i-U)").parse(),
3651            Ok(Ast::Flags(ast::SetFlags {
3652                span: span(0..6),
3653                flags: ast::Flags {
3654                    span: span(2..5),
3655                    items: vec![
3656                        ast::FlagsItem {
3657                            span: span(2..3),
3658                            kind: ast::FlagsItemKind::Flag(
3659                                ast::Flag::CaseInsensitive
3660                            ),
3661                        },
3662                        ast::FlagsItem {
3663                            span: span(3..4),
3664                            kind: ast::FlagsItemKind::Negation,
3665                        },
3666                        ast::FlagsItem {
3667                            span: span(4..5),
3668                            kind: ast::FlagsItemKind::Flag(
3669                                ast::Flag::SwapGreed
3670                            ),
3671                        },
3672                    ],
3673                },
3674            }))
3675        );
3676
3677        assert_eq!(
3678            parser("()").parse(),
3679            Ok(Ast::Group(ast::Group {
3680                span: span(0..2),
3681                kind: ast::GroupKind::CaptureIndex(1),
3682                ast: Box::new(Ast::Empty(span(1..1))),
3683            }))
3684        );
3685        assert_eq!(
3686            parser("(a)").parse(),
3687            Ok(Ast::Group(ast::Group {
3688                span: span(0..3),
3689                kind: ast::GroupKind::CaptureIndex(1),
3690                ast: Box::new(lit('a', 1)),
3691            }))
3692        );
3693        assert_eq!(
3694            parser("(())").parse(),
3695            Ok(Ast::Group(ast::Group {
3696                span: span(0..4),
3697                kind: ast::GroupKind::CaptureIndex(1),
3698                ast: Box::new(Ast::Group(ast::Group {
3699                    span: span(1..3),
3700                    kind: ast::GroupKind::CaptureIndex(2),
3701                    ast: Box::new(Ast::Empty(span(2..2))),
3702                })),
3703            }))
3704        );
3705
3706        assert_eq!(
3707            parser("(?:a)").parse(),
3708            Ok(Ast::Group(ast::Group {
3709                span: span(0..5),
3710                kind: ast::GroupKind::NonCapturing(ast::Flags {
3711                    span: span(2..2),
3712                    items: vec![],
3713                }),
3714                ast: Box::new(lit('a', 3)),
3715            }))
3716        );
3717
3718        assert_eq!(
3719            parser("(?i:a)").parse(),
3720            Ok(Ast::Group(ast::Group {
3721                span: span(0..6),
3722                kind: ast::GroupKind::NonCapturing(ast::Flags {
3723                    span: span(2..3),
3724                    items: vec![ast::FlagsItem {
3725                        span: span(2..3),
3726                        kind: ast::FlagsItemKind::Flag(
3727                            ast::Flag::CaseInsensitive
3728                        ),
3729                    },],
3730                }),
3731                ast: Box::new(lit('a', 4)),
3732            }))
3733        );
3734        assert_eq!(
3735            parser("(?i-U:a)").parse(),
3736            Ok(Ast::Group(ast::Group {
3737                span: span(0..8),
3738                kind: ast::GroupKind::NonCapturing(ast::Flags {
3739                    span: span(2..5),
3740                    items: vec![
3741                        ast::FlagsItem {
3742                            span: span(2..3),
3743                            kind: ast::FlagsItemKind::Flag(
3744                                ast::Flag::CaseInsensitive
3745                            ),
3746                        },
3747                        ast::FlagsItem {
3748                            span: span(3..4),
3749                            kind: ast::FlagsItemKind::Negation,
3750                        },
3751                        ast::FlagsItem {
3752                            span: span(4..5),
3753                            kind: ast::FlagsItemKind::Flag(
3754                                ast::Flag::SwapGreed
3755                            ),
3756                        },
3757                    ],
3758                }),
3759                ast: Box::new(lit('a', 6)),
3760            }))
3761        );
3762
3763        assert_eq!(
3764            parser("(").parse().unwrap_err(),
3765            TestError {
3766                span: span(0..1),
3767                kind: ast::ErrorKind::GroupUnclosed,
3768            }
3769        );
3770        assert_eq!(
3771            parser("(?").parse().unwrap_err(),
3772            TestError {
3773                span: span(0..1),
3774                kind: ast::ErrorKind::GroupUnclosed,
3775            }
3776        );
3777        assert_eq!(
3778            parser("(?P").parse().unwrap_err(),
3779            TestError {
3780                span: span(2..3),
3781                kind: ast::ErrorKind::FlagUnrecognized,
3782            }
3783        );
3784        assert_eq!(
3785            parser("(?P<").parse().unwrap_err(),
3786            TestError {
3787                span: span(4..4),
3788                kind: ast::ErrorKind::GroupNameUnexpectedEof,
3789            }
3790        );
3791        assert_eq!(
3792            parser("(a").parse().unwrap_err(),
3793            TestError {
3794                span: span(0..1),
3795                kind: ast::ErrorKind::GroupUnclosed,
3796            }
3797        );
3798        assert_eq!(
3799            parser("(()").parse().unwrap_err(),
3800            TestError {
3801                span: span(0..1),
3802                kind: ast::ErrorKind::GroupUnclosed,
3803            }
3804        );
3805        assert_eq!(
3806            parser(")").parse().unwrap_err(),
3807            TestError {
3808                span: span(0..1),
3809                kind: ast::ErrorKind::GroupUnopened,
3810            }
3811        );
3812        assert_eq!(
3813            parser("a)").parse().unwrap_err(),
3814            TestError {
3815                span: span(1..2),
3816                kind: ast::ErrorKind::GroupUnopened,
3817            }
3818        );
3819    }
3820
3821    #[test]
3822    fn parse_capture_name() {
3823        assert_eq!(
3824            parser("(?P<a>z)").parse(),
3825            Ok(Ast::Group(ast::Group {
3826                span: span(0..8),
3827                kind: ast::GroupKind::CaptureName(ast::CaptureName {
3828                    span: span(4..5),
3829                    name: s("a"),
3830                    index: 1,
3831                }),
3832                ast: Box::new(lit('z', 6)),
3833            }))
3834        );
3835        assert_eq!(
3836            parser("(?P<abc>z)").parse(),
3837            Ok(Ast::Group(ast::Group {
3838                span: span(0..10),
3839                kind: ast::GroupKind::CaptureName(ast::CaptureName {
3840                    span: span(4..7),
3841                    name: s("abc"),
3842                    index: 1,
3843                }),
3844                ast: Box::new(lit('z', 8)),
3845            }))
3846        );
3847
3848        assert_eq!(
3849            parser("(?P<").parse().unwrap_err(),
3850            TestError {
3851                span: span(4..4),
3852                kind: ast::ErrorKind::GroupNameUnexpectedEof,
3853            }
3854        );
3855        assert_eq!(
3856            parser("(?P<>z)").parse().unwrap_err(),
3857            TestError {
3858                span: span(4..4),
3859                kind: ast::ErrorKind::GroupNameEmpty,
3860            }
3861        );
3862        assert_eq!(
3863            parser("(?P<a").parse().unwrap_err(),
3864            TestError {
3865                span: span(5..5),
3866                kind: ast::ErrorKind::GroupNameUnexpectedEof,
3867            }
3868        );
3869        assert_eq!(
3870            parser("(?P<ab").parse().unwrap_err(),
3871            TestError {
3872                span: span(6..6),
3873                kind: ast::ErrorKind::GroupNameUnexpectedEof,
3874            }
3875        );
3876        assert_eq!(
3877            parser("(?P<0a").parse().unwrap_err(),
3878            TestError {
3879                span: span(4..5),
3880                kind: ast::ErrorKind::GroupNameInvalid,
3881            }
3882        );
3883        assert_eq!(
3884            parser("(?P<~").parse().unwrap_err(),
3885            TestError {
3886                span: span(4..5),
3887                kind: ast::ErrorKind::GroupNameInvalid,
3888            }
3889        );
3890        assert_eq!(
3891            parser("(?P<abc~").parse().unwrap_err(),
3892            TestError {
3893                span: span(7..8),
3894                kind: ast::ErrorKind::GroupNameInvalid,
3895            }
3896        );
3897        assert_eq!(
3898            parser("(?P<a>y)(?P<a>z)").parse().unwrap_err(),
3899            TestError {
3900                span: span(12..13),
3901                kind: ast::ErrorKind::GroupNameDuplicate {
3902                    original: span(4..5),
3903                },
3904            }
3905        );
3906    }
3907
3908    #[test]
3909    fn parse_flags() {
3910        assert_eq!(
3911            parser("i:").parse_flags(),
3912            Ok(ast::Flags {
3913                span: span(0..1),
3914                items: vec![ast::FlagsItem {
3915                    span: span(0..1),
3916                    kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
3917                }],
3918            })
3919        );
3920        assert_eq!(
3921            parser("i)").parse_flags(),
3922            Ok(ast::Flags {
3923                span: span(0..1),
3924                items: vec![ast::FlagsItem {
3925                    span: span(0..1),
3926                    kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
3927                }],
3928            })
3929        );
3930
3931        assert_eq!(
3932            parser("isU:").parse_flags(),
3933            Ok(ast::Flags {
3934                span: span(0..3),
3935                items: vec![
3936                    ast::FlagsItem {
3937                        span: span(0..1),
3938                        kind: ast::FlagsItemKind::Flag(
3939                            ast::Flag::CaseInsensitive
3940                        ),
3941                    },
3942                    ast::FlagsItem {
3943                        span: span(1..2),
3944                        kind: ast::FlagsItemKind::Flag(
3945                            ast::Flag::DotMatchesNewLine
3946                        ),
3947                    },
3948                    ast::FlagsItem {
3949                        span: span(2..3),
3950                        kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
3951                    },
3952                ],
3953            })
3954        );
3955
3956        assert_eq!(
3957            parser("-isU:").parse_flags(),
3958            Ok(ast::Flags {
3959                span: span(0..4),
3960                items: vec![
3961                    ast::FlagsItem {
3962                        span: span(0..1),
3963                        kind: ast::FlagsItemKind::Negation,
3964                    },
3965                    ast::FlagsItem {
3966                        span: span(1..2),
3967                        kind: ast::FlagsItemKind::Flag(
3968                            ast::Flag::CaseInsensitive
3969                        ),
3970                    },
3971                    ast::FlagsItem {
3972                        span: span(2..3),
3973                        kind: ast::FlagsItemKind::Flag(
3974                            ast::Flag::DotMatchesNewLine
3975                        ),
3976                    },
3977                    ast::FlagsItem {
3978                        span: span(3..4),
3979                        kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
3980                    },
3981                ],
3982            })
3983        );
3984        assert_eq!(
3985            parser("i-sU:").parse_flags(),
3986            Ok(ast::Flags {
3987                span: span(0..4),
3988                items: vec![
3989                    ast::FlagsItem {
3990                        span: span(0..1),
3991                        kind: ast::FlagsItemKind::Flag(
3992                            ast::Flag::CaseInsensitive
3993                        ),
3994                    },
3995                    ast::FlagsItem {
3996                        span: span(1..2),
3997                        kind: ast::FlagsItemKind::Negation,
3998                    },
3999                    ast::FlagsItem {
4000                        span: span(2..3),
4001                        kind: ast::FlagsItemKind::Flag(
4002                            ast::Flag::DotMatchesNewLine
4003                        ),
4004                    },
4005                    ast::FlagsItem {
4006                        span: span(3..4),
4007                        kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
4008                    },
4009                ],
4010            })
4011        );
4012
4013        assert_eq!(
4014            parser("isU").parse_flags().unwrap_err(),
4015            TestError {
4016                span: span(3..3),
4017                kind: ast::ErrorKind::FlagUnexpectedEof,
4018            }
4019        );
4020        assert_eq!(
4021            parser("isUa:").parse_flags().unwrap_err(),
4022            TestError {
4023                span: span(3..4),
4024                kind: ast::ErrorKind::FlagUnrecognized,
4025            }
4026        );
4027        assert_eq!(
4028            parser("isUi:").parse_flags().unwrap_err(),
4029            TestError {
4030                span: span(3..4),
4031                kind: ast::ErrorKind::FlagDuplicate { original: span(0..1) },
4032            }
4033        );
4034        assert_eq!(
4035            parser("i-sU-i:").parse_flags().unwrap_err(),
4036            TestError {
4037                span: span(4..5),
4038                kind: ast::ErrorKind::FlagRepeatedNegation {
4039                    original: span(1..2),
4040                },
4041            }
4042        );
4043        assert_eq!(
4044            parser("-)").parse_flags().unwrap_err(),
4045            TestError {
4046                span: span(0..1),
4047                kind: ast::ErrorKind::FlagDanglingNegation,
4048            }
4049        );
4050        assert_eq!(
4051            parser("i-)").parse_flags().unwrap_err(),
4052            TestError {
4053                span: span(1..2),
4054                kind: ast::ErrorKind::FlagDanglingNegation,
4055            }
4056        );
4057        assert_eq!(
4058            parser("iU-)").parse_flags().unwrap_err(),
4059            TestError {
4060                span: span(2..3),
4061                kind: ast::ErrorKind::FlagDanglingNegation,
4062            }
4063        );
4064    }
4065
4066    #[test]
4067    fn parse_flag() {
4068        assert_eq!(parser("i").parse_flag(), Ok(ast::Flag::CaseInsensitive));
4069        assert_eq!(parser("m").parse_flag(), Ok(ast::Flag::MultiLine));
4070        assert_eq!(parser("s").parse_flag(), Ok(ast::Flag::DotMatchesNewLine));
4071        assert_eq!(parser("U").parse_flag(), Ok(ast::Flag::SwapGreed));
4072        assert_eq!(parser("u").parse_flag(), Ok(ast::Flag::Unicode));
4073        assert_eq!(parser("x").parse_flag(), Ok(ast::Flag::IgnoreWhitespace));
4074
4075        assert_eq!(
4076            parser("a").parse_flag().unwrap_err(),
4077            TestError {
4078                span: span(0..1),
4079                kind: ast::ErrorKind::FlagUnrecognized,
4080            }
4081        );
4082        assert_eq!(
4083            parser("☃").parse_flag().unwrap_err(),
4084            TestError {
4085                span: span_range("☃", 0..3),
4086                kind: ast::ErrorKind::FlagUnrecognized,
4087            }
4088        );
4089    }
4090
4091    #[test]
4092    fn parse_primitive_non_escape() {
4093        assert_eq!(
4094            parser(r".").parse_primitive(),
4095            Ok(Primitive::Dot(span(0..1)))
4096        );
4097        assert_eq!(
4098            parser(r"^").parse_primitive(),
4099            Ok(Primitive::Assertion(ast::Assertion {
4100                span: span(0..1),
4101                kind: ast::AssertionKind::StartLine,
4102            }))
4103        );
4104        assert_eq!(
4105            parser(r"$").parse_primitive(),
4106            Ok(Primitive::Assertion(ast::Assertion {
4107                span: span(0..1),
4108                kind: ast::AssertionKind::EndLine,
4109            }))
4110        );
4111
4112        assert_eq!(
4113            parser(r"a").parse_primitive(),
4114            Ok(Primitive::Literal(ast::Literal {
4115                span: span(0..1),
4116                kind: ast::LiteralKind::Verbatim,
4117                c: 'a',
4118            }))
4119        );
4120        assert_eq!(
4121            parser(r"|").parse_primitive(),
4122            Ok(Primitive::Literal(ast::Literal {
4123                span: span(0..1),
4124                kind: ast::LiteralKind::Verbatim,
4125                c: '|',
4126            }))
4127        );
4128        assert_eq!(
4129            parser(r"☃").parse_primitive(),
4130            Ok(Primitive::Literal(ast::Literal {
4131                span: span_range("☃", 0..3),
4132                kind: ast::LiteralKind::Verbatim,
4133                c: '☃',
4134            }))
4135        );
4136    }
4137
4138    #[test]
4139    fn parse_escape() {
4140        assert_eq!(
4141            parser(r"\|").parse_primitive(),
4142            Ok(Primitive::Literal(ast::Literal {
4143                span: span(0..2),
4144                kind: ast::LiteralKind::Punctuation,
4145                c: '|',
4146            }))
4147        );
4148        let specials = &[
4149            (r"\a", '\x07', ast::SpecialLiteralKind::Bell),
4150            (r"\f", '\x0C', ast::SpecialLiteralKind::FormFeed),
4151            (r"\t", '\t', ast::SpecialLiteralKind::Tab),
4152            (r"\n", '\n', ast::SpecialLiteralKind::LineFeed),
4153            (r"\r", '\r', ast::SpecialLiteralKind::CarriageReturn),
4154            (r"\v", '\x0B', ast::SpecialLiteralKind::VerticalTab),
4155        ];
4156        for &(pat, c, ref kind) in specials {
4157            assert_eq!(
4158                parser(pat).parse_primitive(),
4159                Ok(Primitive::Literal(ast::Literal {
4160                    span: span(0..2),
4161                    kind: ast::LiteralKind::Special(kind.clone()),
4162                    c: c,
4163                }))
4164            );
4165        }
4166        assert_eq!(
4167            parser(r"\A").parse_primitive(),
4168            Ok(Primitive::Assertion(ast::Assertion {
4169                span: span(0..2),
4170                kind: ast::AssertionKind::StartText,
4171            }))
4172        );
4173        assert_eq!(
4174            parser(r"\z").parse_primitive(),
4175            Ok(Primitive::Assertion(ast::Assertion {
4176                span: span(0..2),
4177                kind: ast::AssertionKind::EndText,
4178            }))
4179        );
4180        assert_eq!(
4181            parser(r"\b").parse_primitive(),
4182            Ok(Primitive::Assertion(ast::Assertion {
4183                span: span(0..2),
4184                kind: ast::AssertionKind::WordBoundary,
4185            }))
4186        );
4187        assert_eq!(
4188            parser(r"\B").parse_primitive(),
4189            Ok(Primitive::Assertion(ast::Assertion {
4190                span: span(0..2),
4191                kind: ast::AssertionKind::NotWordBoundary,
4192            }))
4193        );
4194
4195        assert_eq!(
4196            parser(r"\").parse_escape().unwrap_err(),
4197            TestError {
4198                span: span(0..1),
4199                kind: ast::ErrorKind::EscapeUnexpectedEof,
4200            }
4201        );
4202        assert_eq!(
4203            parser(r"\y").parse_escape().unwrap_err(),
4204            TestError {
4205                span: span(0..2),
4206                kind: ast::ErrorKind::EscapeUnrecognized,
4207            }
4208        );
4209    }
4210
4211    #[test]
4212    fn parse_unsupported_backreference() {
4213        assert_eq!(
4214            parser(r"\0").parse_escape().unwrap_err(),
4215            TestError {
4216                span: span(0..2),
4217                kind: ast::ErrorKind::UnsupportedBackreference,
4218            }
4219        );
4220        assert_eq!(
4221            parser(r"\9").parse_escape().unwrap_err(),
4222            TestError {
4223                span: span(0..2),
4224                kind: ast::ErrorKind::UnsupportedBackreference,
4225            }
4226        );
4227    }
4228
4229    #[test]
4230    fn parse_octal() {
4231        for i in 0..511 {
4232            let pat = format!(r"\{:o}", i);
4233            assert_eq!(
4234                parser_octal(&pat).parse_escape(),
4235                Ok(Primitive::Literal(ast::Literal {
4236                    span: span(0..pat.len()),
4237                    kind: ast::LiteralKind::Octal,
4238                    c: ::std::char::from_u32(i).unwrap(),
4239                }))
4240            );
4241        }
4242        assert_eq!(
4243            parser_octal(r"\778").parse_escape(),
4244            Ok(Primitive::Literal(ast::Literal {
4245                span: span(0..3),
4246                kind: ast::LiteralKind::Octal,
4247                c: '?',
4248            }))
4249        );
4250        assert_eq!(
4251            parser_octal(r"\7777").parse_escape(),
4252            Ok(Primitive::Literal(ast::Literal {
4253                span: span(0..4),
4254                kind: ast::LiteralKind::Octal,
4255                c: '\u{01FF}',
4256            }))
4257        );
4258        assert_eq!(
4259            parser_octal(r"\778").parse(),
4260            Ok(Ast::Concat(ast::Concat {
4261                span: span(0..4),
4262                asts: vec![
4263                    Ast::Literal(ast::Literal {
4264                        span: span(0..3),
4265                        kind: ast::LiteralKind::Octal,
4266                        c: '?',
4267                    }),
4268                    Ast::Literal(ast::Literal {
4269                        span: span(3..4),
4270                        kind: ast::LiteralKind::Verbatim,
4271                        c: '8',
4272                    }),
4273                ],
4274            }))
4275        );
4276        assert_eq!(
4277            parser_octal(r"\7777").parse(),
4278            Ok(Ast::Concat(ast::Concat {
4279                span: span(0..5),
4280                asts: vec![
4281                    Ast::Literal(ast::Literal {
4282                        span: span(0..4),
4283                        kind: ast::LiteralKind::Octal,
4284                        c: '\u{01FF}',
4285                    }),
4286                    Ast::Literal(ast::Literal {
4287                        span: span(4..5),
4288                        kind: ast::LiteralKind::Verbatim,
4289                        c: '7',
4290                    }),
4291                ],
4292            }))
4293        );
4294
4295        assert_eq!(
4296            parser_octal(r"\8").parse_escape().unwrap_err(),
4297            TestError {
4298                span: span(0..2),
4299                kind: ast::ErrorKind::EscapeUnrecognized,
4300            }
4301        );
4302    }
4303
4304    #[test]
4305    fn parse_hex_two() {
4306        for i in 0..256 {
4307            let pat = format!(r"\x{:02x}", i);
4308            assert_eq!(
4309                parser(&pat).parse_escape(),
4310                Ok(Primitive::Literal(ast::Literal {
4311                    span: span(0..pat.len()),
4312                    kind: ast::LiteralKind::HexFixed(ast::HexLiteralKind::X),
4313                    c: ::std::char::from_u32(i).unwrap(),
4314                }))
4315            );
4316        }
4317
4318        assert_eq!(
4319            parser(r"\xF").parse_escape().unwrap_err(),
4320            TestError {
4321                span: span(3..3),
4322                kind: ast::ErrorKind::EscapeUnexpectedEof,
4323            }
4324        );
4325        assert_eq!(
4326            parser(r"\xG").parse_escape().unwrap_err(),
4327            TestError {
4328                span: span(2..3),
4329                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4330            }
4331        );
4332        assert_eq!(
4333            parser(r"\xFG").parse_escape().unwrap_err(),
4334            TestError {
4335                span: span(3..4),
4336                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4337            }
4338        );
4339    }
4340
4341    #[test]
4342    fn parse_hex_four() {
4343        for i in 0..65536 {
4344            let c = match ::std::char::from_u32(i) {
4345                None => continue,
4346                Some(c) => c,
4347            };
4348            let pat = format!(r"\u{:04x}", i);
4349            assert_eq!(
4350                parser(&pat).parse_escape(),
4351                Ok(Primitive::Literal(ast::Literal {
4352                    span: span(0..pat.len()),
4353                    kind: ast::LiteralKind::HexFixed(
4354                        ast::HexLiteralKind::UnicodeShort
4355                    ),
4356                    c: c,
4357                }))
4358            );
4359        }
4360
4361        assert_eq!(
4362            parser(r"\uF").parse_escape().unwrap_err(),
4363            TestError {
4364                span: span(3..3),
4365                kind: ast::ErrorKind::EscapeUnexpectedEof,
4366            }
4367        );
4368        assert_eq!(
4369            parser(r"\uG").parse_escape().unwrap_err(),
4370            TestError {
4371                span: span(2..3),
4372                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4373            }
4374        );
4375        assert_eq!(
4376            parser(r"\uFG").parse_escape().unwrap_err(),
4377            TestError {
4378                span: span(3..4),
4379                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4380            }
4381        );
4382        assert_eq!(
4383            parser(r"\uFFG").parse_escape().unwrap_err(),
4384            TestError {
4385                span: span(4..5),
4386                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4387            }
4388        );
4389        assert_eq!(
4390            parser(r"\uFFFG").parse_escape().unwrap_err(),
4391            TestError {
4392                span: span(5..6),
4393                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4394            }
4395        );
4396        assert_eq!(
4397            parser(r"\uD800").parse_escape().unwrap_err(),
4398            TestError {
4399                span: span(2..6),
4400                kind: ast::ErrorKind::EscapeHexInvalid,
4401            }
4402        );
4403    }
4404
4405    #[test]
4406    fn parse_hex_eight() {
4407        for i in 0..65536 {
4408            let c = match ::std::char::from_u32(i) {
4409                None => continue,
4410                Some(c) => c,
4411            };
4412            let pat = format!(r"\U{:08x}", i);
4413            assert_eq!(
4414                parser(&pat).parse_escape(),
4415                Ok(Primitive::Literal(ast::Literal {
4416                    span: span(0..pat.len()),
4417                    kind: ast::LiteralKind::HexFixed(
4418                        ast::HexLiteralKind::UnicodeLong
4419                    ),
4420                    c: c,
4421                }))
4422            );
4423        }
4424
4425        assert_eq!(
4426            parser(r"\UF").parse_escape().unwrap_err(),
4427            TestError {
4428                span: span(3..3),
4429                kind: ast::ErrorKind::EscapeUnexpectedEof,
4430            }
4431        );
4432        assert_eq!(
4433            parser(r"\UG").parse_escape().unwrap_err(),
4434            TestError {
4435                span: span(2..3),
4436                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4437            }
4438        );
4439        assert_eq!(
4440            parser(r"\UFG").parse_escape().unwrap_err(),
4441            TestError {
4442                span: span(3..4),
4443                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4444            }
4445        );
4446        assert_eq!(
4447            parser(r"\UFFG").parse_escape().unwrap_err(),
4448            TestError {
4449                span: span(4..5),
4450                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4451            }
4452        );
4453        assert_eq!(
4454            parser(r"\UFFFG").parse_escape().unwrap_err(),
4455            TestError {
4456                span: span(5..6),
4457                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4458            }
4459        );
4460        assert_eq!(
4461            parser(r"\UFFFFG").parse_escape().unwrap_err(),
4462            TestError {
4463                span: span(6..7),
4464                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4465            }
4466        );
4467        assert_eq!(
4468            parser(r"\UFFFFFG").parse_escape().unwrap_err(),
4469            TestError {
4470                span: span(7..8),
4471                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4472            }
4473        );
4474        assert_eq!(
4475            parser(r"\UFFFFFFG").parse_escape().unwrap_err(),
4476            TestError {
4477                span: span(8..9),
4478                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4479            }
4480        );
4481        assert_eq!(
4482            parser(r"\UFFFFFFFG").parse_escape().unwrap_err(),
4483            TestError {
4484                span: span(9..10),
4485                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4486            }
4487        );
4488    }
4489
4490    #[test]
4491    fn parse_hex_brace() {
4492        assert_eq!(
4493            parser(r"\u{26c4}").parse_escape(),
4494            Ok(Primitive::Literal(ast::Literal {
4495                span: span(0..8),
4496                kind: ast::LiteralKind::HexBrace(
4497                    ast::HexLiteralKind::UnicodeShort
4498                ),
4499                c: '⛄',
4500            }))
4501        );
4502        assert_eq!(
4503            parser(r"\U{26c4}").parse_escape(),
4504            Ok(Primitive::Literal(ast::Literal {
4505                span: span(0..8),
4506                kind: ast::LiteralKind::HexBrace(
4507                    ast::HexLiteralKind::UnicodeLong
4508                ),
4509                c: '⛄',
4510            }))
4511        );
4512        assert_eq!(
4513            parser(r"\x{26c4}").parse_escape(),
4514            Ok(Primitive::Literal(ast::Literal {
4515                span: span(0..8),
4516                kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4517                c: '⛄',
4518            }))
4519        );
4520        assert_eq!(
4521            parser(r"\x{26C4}").parse_escape(),
4522            Ok(Primitive::Literal(ast::Literal {
4523                span: span(0..8),
4524                kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4525                c: '⛄',
4526            }))
4527        );
4528        assert_eq!(
4529            parser(r"\x{10fFfF}").parse_escape(),
4530            Ok(Primitive::Literal(ast::Literal {
4531                span: span(0..10),
4532                kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4533                c: '\u{10FFFF}',
4534            }))
4535        );
4536
4537        assert_eq!(
4538            parser(r"\x").parse_escape().unwrap_err(),
4539            TestError {
4540                span: span(2..2),
4541                kind: ast::ErrorKind::EscapeUnexpectedEof,
4542            }
4543        );
4544        assert_eq!(
4545            parser(r"\x{").parse_escape().unwrap_err(),
4546            TestError {
4547                span: span(2..3),
4548                kind: ast::ErrorKind::EscapeUnexpectedEof,
4549            }
4550        );
4551        assert_eq!(
4552            parser(r"\x{FF").parse_escape().unwrap_err(),
4553            TestError {
4554                span: span(2..5),
4555                kind: ast::ErrorKind::EscapeUnexpectedEof,
4556            }
4557        );
4558        assert_eq!(
4559            parser(r"\x{}").parse_escape().unwrap_err(),
4560            TestError {
4561                span: span(2..4),
4562                kind: ast::ErrorKind::EscapeHexEmpty,
4563            }
4564        );
4565        assert_eq!(
4566            parser(r"\x{FGF}").parse_escape().unwrap_err(),
4567            TestError {
4568                span: span(4..5),
4569                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4570            }
4571        );
4572        assert_eq!(
4573            parser(r"\x{FFFFFF}").parse_escape().unwrap_err(),
4574            TestError {
4575                span: span(3..9),
4576                kind: ast::ErrorKind::EscapeHexInvalid,
4577            }
4578        );
4579        assert_eq!(
4580            parser(r"\x{D800}").parse_escape().unwrap_err(),
4581            TestError {
4582                span: span(3..7),
4583                kind: ast::ErrorKind::EscapeHexInvalid,
4584            }
4585        );
4586        assert_eq!(
4587            parser(r"\x{FFFFFFFFF}").parse_escape().unwrap_err(),
4588            TestError {
4589                span: span(3..12),
4590                kind: ast::ErrorKind::EscapeHexInvalid,
4591            }
4592        );
4593    }
4594
4595    #[test]
4596    fn parse_decimal() {
4597        assert_eq!(parser("123").parse_decimal(), Ok(123));
4598        assert_eq!(parser("0").parse_decimal(), Ok(0));
4599        assert_eq!(parser("01").parse_decimal(), Ok(1));
4600
4601        assert_eq!(
4602            parser("-1").parse_decimal().unwrap_err(),
4603            TestError { span: span(0..0), kind: ast::ErrorKind::DecimalEmpty }
4604        );
4605        assert_eq!(
4606            parser("").parse_decimal().unwrap_err(),
4607            TestError { span: span(0..0), kind: ast::ErrorKind::DecimalEmpty }
4608        );
4609        assert_eq!(
4610            parser("9999999999").parse_decimal().unwrap_err(),
4611            TestError {
4612                span: span(0..10),
4613                kind: ast::ErrorKind::DecimalInvalid,
4614            }
4615        );
4616    }
4617
4618    #[test]
4619    fn parse_set_class() {
4620        fn union(span: Span, items: Vec<ast::ClassSetItem>) -> ast::ClassSet {
4621            ast::ClassSet::union(ast::ClassSetUnion {
4622                span: span,
4623                items: items,
4624            })
4625        }
4626
4627        fn intersection(
4628            span: Span,
4629            lhs: ast::ClassSet,
4630            rhs: ast::ClassSet,
4631        ) -> ast::ClassSet {
4632            ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
4633                span: span,
4634                kind: ast::ClassSetBinaryOpKind::Intersection,
4635                lhs: Box::new(lhs),
4636                rhs: Box::new(rhs),
4637            })
4638        }
4639
4640        fn difference(
4641            span: Span,
4642            lhs: ast::ClassSet,
4643            rhs: ast::ClassSet,
4644        ) -> ast::ClassSet {
4645            ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
4646                span: span,
4647                kind: ast::ClassSetBinaryOpKind::Difference,
4648                lhs: Box::new(lhs),
4649                rhs: Box::new(rhs),
4650            })
4651        }
4652
4653        fn symdifference(
4654            span: Span,
4655            lhs: ast::ClassSet,
4656            rhs: ast::ClassSet,
4657        ) -> ast::ClassSet {
4658            ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
4659                span: span,
4660                kind: ast::ClassSetBinaryOpKind::SymmetricDifference,
4661                lhs: Box::new(lhs),
4662                rhs: Box::new(rhs),
4663            })
4664        }
4665
4666        fn itemset(item: ast::ClassSetItem) -> ast::ClassSet {
4667            ast::ClassSet::Item(item)
4668        }
4669
4670        fn item_ascii(cls: ast::ClassAscii) -> ast::ClassSetItem {
4671            ast::ClassSetItem::Ascii(cls)
4672        }
4673
4674        fn item_unicode(cls: ast::ClassUnicode) -> ast::ClassSetItem {
4675            ast::ClassSetItem::Unicode(cls)
4676        }
4677
4678        fn item_perl(cls: ast::ClassPerl) -> ast::ClassSetItem {
4679            ast::ClassSetItem::Perl(cls)
4680        }
4681
4682        fn item_bracket(cls: ast::ClassBracketed) -> ast::ClassSetItem {
4683            ast::ClassSetItem::Bracketed(Box::new(cls))
4684        }
4685
4686        fn lit(span: Span, c: char) -> ast::ClassSetItem {
4687            ast::ClassSetItem::Literal(ast::Literal {
4688                span: span,
4689                kind: ast::LiteralKind::Verbatim,
4690                c: c,
4691            })
4692        }
4693
4694        fn empty(span: Span) -> ast::ClassSetItem {
4695            ast::ClassSetItem::Empty(span)
4696        }
4697
4698        fn range(span: Span, start: char, end: char) -> ast::ClassSetItem {
4699            let pos1 = Position {
4700                offset: span.start.offset + start.len_utf8(),
4701                column: span.start.column + 1,
4702                ..span.start
4703            };
4704            let pos2 = Position {
4705                offset: span.end.offset - end.len_utf8(),
4706                column: span.end.column - 1,
4707                ..span.end
4708            };
4709            ast::ClassSetItem::Range(ast::ClassSetRange {
4710                span: span,
4711                start: ast::Literal {
4712                    span: Span { end: pos1, ..span },
4713                    kind: ast::LiteralKind::Verbatim,
4714                    c: start,
4715                },
4716                end: ast::Literal {
4717                    span: Span { start: pos2, ..span },
4718                    kind: ast::LiteralKind::Verbatim,
4719                    c: end,
4720                },
4721            })
4722        }
4723
4724        fn alnum(span: Span, negated: bool) -> ast::ClassAscii {
4725            ast::ClassAscii {
4726                span: span,
4727                kind: ast::ClassAsciiKind::Alnum,
4728                negated: negated,
4729            }
4730        }
4731
4732        fn lower(span: Span, negated: bool) -> ast::ClassAscii {
4733            ast::ClassAscii {
4734                span: span,
4735                kind: ast::ClassAsciiKind::Lower,
4736                negated: negated,
4737            }
4738        }
4739
4740        assert_eq!(
4741            parser("[[:alnum:]]").parse(),
4742            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4743                span: span(0..11),
4744                negated: false,
4745                kind: itemset(item_ascii(alnum(span(1..10), false))),
4746            })))
4747        );
4748        assert_eq!(
4749            parser("[[[:alnum:]]]").parse(),
4750            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4751                span: span(0..13),
4752                negated: false,
4753                kind: itemset(item_bracket(ast::ClassBracketed {
4754                    span: span(1..12),
4755                    negated: false,
4756                    kind: itemset(item_ascii(alnum(span(2..11), false))),
4757                })),
4758            })))
4759        );
4760        assert_eq!(
4761            parser("[[:alnum:]&&[:lower:]]").parse(),
4762            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4763                span: span(0..22),
4764                negated: false,
4765                kind: intersection(
4766                    span(1..21),
4767                    itemset(item_ascii(alnum(span(1..10), false))),
4768                    itemset(item_ascii(lower(span(12..21), false))),
4769                ),
4770            })))
4771        );
4772        assert_eq!(
4773            parser("[[:alnum:]--[:lower:]]").parse(),
4774            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4775                span: span(0..22),
4776                negated: false,
4777                kind: difference(
4778                    span(1..21),
4779                    itemset(item_ascii(alnum(span(1..10), false))),
4780                    itemset(item_ascii(lower(span(12..21), false))),
4781                ),
4782            })))
4783        );
4784        assert_eq!(
4785            parser("[[:alnum:]~~[:lower:]]").parse(),
4786            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4787                span: span(0..22),
4788                negated: false,
4789                kind: symdifference(
4790                    span(1..21),
4791                    itemset(item_ascii(alnum(span(1..10), false))),
4792                    itemset(item_ascii(lower(span(12..21), false))),
4793                ),
4794            })))
4795        );
4796
4797        assert_eq!(
4798            parser("[a]").parse(),
4799            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4800                span: span(0..3),
4801                negated: false,
4802                kind: itemset(lit(span(1..2), 'a')),
4803            })))
4804        );
4805        assert_eq!(
4806            parser(r"[a\]]").parse(),
4807            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4808                span: span(0..5),
4809                negated: false,
4810                kind: union(
4811                    span(1..4),
4812                    vec![
4813                        lit(span(1..2), 'a'),
4814                        ast::ClassSetItem::Literal(ast::Literal {
4815                            span: span(2..4),
4816                            kind: ast::LiteralKind::Punctuation,
4817                            c: ']',
4818                        }),
4819                    ]
4820                ),
4821            })))
4822        );
4823        assert_eq!(
4824            parser(r"[a\-z]").parse(),
4825            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4826                span: span(0..6),
4827                negated: false,
4828                kind: union(
4829                    span(1..5),
4830                    vec![
4831                        lit(span(1..2), 'a'),
4832                        ast::ClassSetItem::Literal(ast::Literal {
4833                            span: span(2..4),
4834                            kind: ast::LiteralKind::Punctuation,
4835                            c: '-',
4836                        }),
4837                        lit(span(4..5), 'z'),
4838                    ]
4839                ),
4840            })))
4841        );
4842        assert_eq!(
4843            parser("[ab]").parse(),
4844            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4845                span: span(0..4),
4846                negated: false,
4847                kind: union(
4848                    span(1..3),
4849                    vec![lit(span(1..2), 'a'), lit(span(2..3), 'b'),]
4850                ),
4851            })))
4852        );
4853        assert_eq!(
4854            parser("[a-]").parse(),
4855            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4856                span: span(0..4),
4857                negated: false,
4858                kind: union(
4859                    span(1..3),
4860                    vec![lit(span(1..2), 'a'), lit(span(2..3), '-'),]
4861                ),
4862            })))
4863        );
4864        assert_eq!(
4865            parser("[-a]").parse(),
4866            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4867                span: span(0..4),
4868                negated: false,
4869                kind: union(
4870                    span(1..3),
4871                    vec![lit(span(1..2), '-'), lit(span(2..3), 'a'),]
4872                ),
4873            })))
4874        );
4875        assert_eq!(
4876            parser(r"[\pL]").parse(),
4877            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4878                span: span(0..5),
4879                negated: false,
4880                kind: itemset(item_unicode(ast::ClassUnicode {
4881                    span: span(1..4),
4882                    negated: false,
4883                    kind: ast::ClassUnicodeKind::OneLetter('L'),
4884                })),
4885            })))
4886        );
4887        assert_eq!(
4888            parser(r"[\w]").parse(),
4889            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4890                span: span(0..4),
4891                negated: false,
4892                kind: itemset(item_perl(ast::ClassPerl {
4893                    span: span(1..3),
4894                    kind: ast::ClassPerlKind::Word,
4895                    negated: false,
4896                })),
4897            })))
4898        );
4899        assert_eq!(
4900            parser(r"[a\wz]").parse(),
4901            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4902                span: span(0..6),
4903                negated: false,
4904                kind: union(
4905                    span(1..5),
4906                    vec![
4907                        lit(span(1..2), 'a'),
4908                        item_perl(ast::ClassPerl {
4909                            span: span(2..4),
4910                            kind: ast::ClassPerlKind::Word,
4911                            negated: false,
4912                        }),
4913                        lit(span(4..5), 'z'),
4914                    ]
4915                ),
4916            })))
4917        );
4918
4919        assert_eq!(
4920            parser("[a-z]").parse(),
4921            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4922                span: span(0..5),
4923                negated: false,
4924                kind: itemset(range(span(1..4), 'a', 'z')),
4925            })))
4926        );
4927        assert_eq!(
4928            parser("[a-cx-z]").parse(),
4929            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4930                span: span(0..8),
4931                negated: false,
4932                kind: union(
4933                    span(1..7),
4934                    vec![
4935                        range(span(1..4), 'a', 'c'),
4936                        range(span(4..7), 'x', 'z'),
4937                    ]
4938                ),
4939            })))
4940        );
4941        assert_eq!(
4942            parser(r"[\w&&a-cx-z]").parse(),
4943            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4944                span: span(0..12),
4945                negated: false,
4946                kind: intersection(
4947                    span(1..11),
4948                    itemset(item_perl(ast::ClassPerl {
4949                        span: span(1..3),
4950                        kind: ast::ClassPerlKind::Word,
4951                        negated: false,
4952                    })),
4953                    union(
4954                        span(5..11),
4955                        vec![
4956                            range(span(5..8), 'a', 'c'),
4957                            range(span(8..11), 'x', 'z'),
4958                        ]
4959                    ),
4960                ),
4961            })))
4962        );
4963        assert_eq!(
4964            parser(r"[a-cx-z&&\w]").parse(),
4965            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4966                span: span(0..12),
4967                negated: false,
4968                kind: intersection(
4969                    span(1..11),
4970                    union(
4971                        span(1..7),
4972                        vec![
4973                            range(span(1..4), 'a', 'c'),
4974                            range(span(4..7), 'x', 'z'),
4975                        ]
4976                    ),
4977                    itemset(item_perl(ast::ClassPerl {
4978                        span: span(9..11),
4979                        kind: ast::ClassPerlKind::Word,
4980                        negated: false,
4981                    })),
4982                ),
4983            })))
4984        );
4985        assert_eq!(
4986            parser(r"[a--b--c]").parse(),
4987            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4988                span: span(0..9),
4989                negated: false,
4990                kind: difference(
4991                    span(1..8),
4992                    difference(
4993                        span(1..5),
4994                        itemset(lit(span(1..2), 'a')),
4995                        itemset(lit(span(4..5), 'b')),
4996                    ),
4997                    itemset(lit(span(7..8), 'c')),
4998                ),
4999            })))
5000        );
5001        assert_eq!(
5002            parser(r"[a~~b~~c]").parse(),
5003            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5004                span: span(0..9),
5005                negated: false,
5006                kind: symdifference(
5007                    span(1..8),
5008                    symdifference(
5009                        span(1..5),
5010                        itemset(lit(span(1..2), 'a')),
5011                        itemset(lit(span(4..5), 'b')),
5012                    ),
5013                    itemset(lit(span(7..8), 'c')),
5014                ),
5015            })))
5016        );
5017        assert_eq!(
5018            parser(r"[\^&&^]").parse(),
5019            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5020                span: span(0..7),
5021                negated: false,
5022                kind: intersection(
5023                    span(1..6),
5024                    itemset(ast::ClassSetItem::Literal(ast::Literal {
5025                        span: span(1..3),
5026                        kind: ast::LiteralKind::Punctuation,
5027                        c: '^',
5028                    })),
5029                    itemset(lit(span(5..6), '^')),
5030                ),
5031            })))
5032        );
5033        assert_eq!(
5034            parser(r"[\&&&&]").parse(),
5035            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5036                span: span(0..7),
5037                negated: false,
5038                kind: intersection(
5039                    span(1..6),
5040                    itemset(ast::ClassSetItem::Literal(ast::Literal {
5041                        span: span(1..3),
5042                        kind: ast::LiteralKind::Punctuation,
5043                        c: '&',
5044                    })),
5045                    itemset(lit(span(5..6), '&')),
5046                ),
5047            })))
5048        );
5049        assert_eq!(
5050            parser(r"[&&&&]").parse(),
5051            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5052                span: span(0..6),
5053                negated: false,
5054                kind: intersection(
5055                    span(1..5),
5056                    intersection(
5057                        span(1..3),
5058                        itemset(empty(span(1..1))),
5059                        itemset(empty(span(3..3))),
5060                    ),
5061                    itemset(empty(span(5..5))),
5062                ),
5063            })))
5064        );
5065
5066        let pat = "[☃-⛄]";
5067        assert_eq!(
5068            parser(pat).parse(),
5069            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5070                span: span_range(pat, 0..9),
5071                negated: false,
5072                kind: itemset(ast::ClassSetItem::Range(ast::ClassSetRange {
5073                    span: span_range(pat, 1..8),
5074                    start: ast::Literal {
5075                        span: span_range(pat, 1..4),
5076                        kind: ast::LiteralKind::Verbatim,
5077                        c: '☃',
5078                    },
5079                    end: ast::Literal {
5080                        span: span_range(pat, 5..8),
5081                        kind: ast::LiteralKind::Verbatim,
5082                        c: '⛄',
5083                    },
5084                })),
5085            })))
5086        );
5087
5088        assert_eq!(
5089            parser(r"[]]").parse(),
5090            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5091                span: span(0..3),
5092                negated: false,
5093                kind: itemset(lit(span(1..2), ']')),
5094            })))
5095        );
5096        assert_eq!(
5097            parser(r"[]\[]").parse(),
5098            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5099                span: span(0..5),
5100                negated: false,
5101                kind: union(
5102                    span(1..4),
5103                    vec![
5104                        lit(span(1..2), ']'),
5105                        ast::ClassSetItem::Literal(ast::Literal {
5106                            span: span(2..4),
5107                            kind: ast::LiteralKind::Punctuation,
5108                            c: '[',
5109                        }),
5110                    ]
5111                ),
5112            })))
5113        );
5114        assert_eq!(
5115            parser(r"[\[]]").parse(),
5116            Ok(concat(
5117                0..5,
5118                vec![
5119                    Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5120                        span: span(0..4),
5121                        negated: false,
5122                        kind: itemset(ast::ClassSetItem::Literal(
5123                            ast::Literal {
5124                                span: span(1..3),
5125                                kind: ast::LiteralKind::Punctuation,
5126                                c: '[',
5127                            }
5128                        )),
5129                    })),
5130                    Ast::Literal(ast::Literal {
5131                        span: span(4..5),
5132                        kind: ast::LiteralKind::Verbatim,
5133                        c: ']',
5134                    }),
5135                ]
5136            ))
5137        );
5138
5139        assert_eq!(
5140            parser("[").parse().unwrap_err(),
5141            TestError {
5142                span: span(0..1),
5143                kind: ast::ErrorKind::ClassUnclosed,
5144            }
5145        );
5146        assert_eq!(
5147            parser("[[").parse().unwrap_err(),
5148            TestError {
5149                span: span(1..2),
5150                kind: ast::ErrorKind::ClassUnclosed,
5151            }
5152        );
5153        assert_eq!(
5154            parser("[[-]").parse().unwrap_err(),
5155            TestError {
5156                span: span(0..1),
5157                kind: ast::ErrorKind::ClassUnclosed,
5158            }
5159        );
5160        assert_eq!(
5161            parser("[[[:alnum:]").parse().unwrap_err(),
5162            TestError {
5163                span: span(1..2),
5164                kind: ast::ErrorKind::ClassUnclosed,
5165            }
5166        );
5167        assert_eq!(
5168            parser(r"[\b]").parse().unwrap_err(),
5169            TestError {
5170                span: span(1..3),
5171                kind: ast::ErrorKind::ClassEscapeInvalid,
5172            }
5173        );
5174        assert_eq!(
5175            parser(r"[\w-a]").parse().unwrap_err(),
5176            TestError {
5177                span: span(1..3),
5178                kind: ast::ErrorKind::ClassRangeLiteral,
5179            }
5180        );
5181        assert_eq!(
5182            parser(r"[a-\w]").parse().unwrap_err(),
5183            TestError {
5184                span: span(3..5),
5185                kind: ast::ErrorKind::ClassRangeLiteral,
5186            }
5187        );
5188        assert_eq!(
5189            parser(r"[z-a]").parse().unwrap_err(),
5190            TestError {
5191                span: span(1..4),
5192                kind: ast::ErrorKind::ClassRangeInvalid,
5193            }
5194        );
5195
5196        assert_eq!(
5197            parser_ignore_whitespace("[a ").parse().unwrap_err(),
5198            TestError {
5199                span: span(0..1),
5200                kind: ast::ErrorKind::ClassUnclosed,
5201            }
5202        );
5203        assert_eq!(
5204            parser_ignore_whitespace("[a- ").parse().unwrap_err(),
5205            TestError {
5206                span: span(0..1),
5207                kind: ast::ErrorKind::ClassUnclosed,
5208            }
5209        );
5210    }
5211
5212    #[test]
5213    fn parse_set_class_open() {
5214        assert_eq!(parser("[a]").parse_set_class_open(), {
5215            let set = ast::ClassBracketed {
5216                span: span(0..1),
5217                negated: false,
5218                kind: ast::ClassSet::union(ast::ClassSetUnion {
5219                    span: span(1..1),
5220                    items: vec![],
5221                }),
5222            };
5223            let union = ast::ClassSetUnion { span: span(1..1), items: vec![] };
5224            Ok((set, union))
5225        });
5226        assert_eq!(
5227            parser_ignore_whitespace("[   a]").parse_set_class_open(),
5228            {
5229                let set = ast::ClassBracketed {
5230                    span: span(0..4),
5231                    negated: false,
5232                    kind: ast::ClassSet::union(ast::ClassSetUnion {
5233                        span: span(4..4),
5234                        items: vec![],
5235                    }),
5236                };
5237                let union =
5238                    ast::ClassSetUnion { span: span(4..4), items: vec![] };
5239                Ok((set, union))
5240            }
5241        );
5242        assert_eq!(parser("[^a]").parse_set_class_open(), {
5243            let set = ast::ClassBracketed {
5244                span: span(0..2),
5245                negated: true,
5246                kind: ast::ClassSet::union(ast::ClassSetUnion {
5247                    span: span(2..2),
5248                    items: vec![],
5249                }),
5250            };
5251            let union = ast::ClassSetUnion { span: span(2..2), items: vec![] };
5252            Ok((set, union))
5253        });
5254        assert_eq!(
5255            parser_ignore_whitespace("[ ^ a]").parse_set_class_open(),
5256            {
5257                let set = ast::ClassBracketed {
5258                    span: span(0..4),
5259                    negated: true,
5260                    kind: ast::ClassSet::union(ast::ClassSetUnion {
5261                        span: span(4..4),
5262                        items: vec![],
5263                    }),
5264                };
5265                let union =
5266                    ast::ClassSetUnion { span: span(4..4), items: vec![] };
5267                Ok((set, union))
5268            }
5269        );
5270        assert_eq!(parser("[-a]").parse_set_class_open(), {
5271            let set = ast::ClassBracketed {
5272                span: span(0..2),
5273                negated: false,
5274                kind: ast::ClassSet::union(ast::ClassSetUnion {
5275                    span: span(1..1),
5276                    items: vec![],
5277                }),
5278            };
5279            let union = ast::ClassSetUnion {
5280                span: span(1..2),
5281                items: vec![ast::ClassSetItem::Literal(ast::Literal {
5282                    span: span(1..2),
5283                    kind: ast::LiteralKind::Verbatim,
5284                    c: '-',
5285                })],
5286            };
5287            Ok((set, union))
5288        });
5289        assert_eq!(
5290            parser_ignore_whitespace("[ - a]").parse_set_class_open(),
5291            {
5292                let set = ast::ClassBracketed {
5293                    span: span(0..4),
5294                    negated: false,
5295                    kind: ast::ClassSet::union(ast::ClassSetUnion {
5296                        span: span(2..2),
5297                        items: vec![],
5298                    }),
5299                };
5300                let union = ast::ClassSetUnion {
5301                    span: span(2..3),
5302                    items: vec![ast::ClassSetItem::Literal(ast::Literal {
5303                        span: span(2..3),
5304                        kind: ast::LiteralKind::Verbatim,
5305                        c: '-',
5306                    })],
5307                };
5308                Ok((set, union))
5309            }
5310        );
5311        assert_eq!(parser("[^-a]").parse_set_class_open(), {
5312            let set = ast::ClassBracketed {
5313                span: span(0..3),
5314                negated: true,
5315                kind: ast::ClassSet::union(ast::ClassSetUnion {
5316                    span: span(2..2),
5317                    items: vec![],
5318                }),
5319            };
5320            let union = ast::ClassSetUnion {
5321                span: span(2..3),
5322                items: vec![ast::ClassSetItem::Literal(ast::Literal {
5323                    span: span(2..3),
5324                    kind: ast::LiteralKind::Verbatim,
5325                    c: '-',
5326                })],
5327            };
5328            Ok((set, union))
5329        });
5330        assert_eq!(parser("[--a]").parse_set_class_open(), {
5331            let set = ast::ClassBracketed {
5332                span: span(0..3),
5333                negated: false,
5334                kind: ast::ClassSet::union(ast::ClassSetUnion {
5335                    span: span(1..1),
5336                    items: vec![],
5337                }),
5338            };
5339            let union = ast::ClassSetUnion {
5340                span: span(1..3),
5341                items: vec![
5342                    ast::ClassSetItem::Literal(ast::Literal {
5343                        span: span(1..2),
5344                        kind: ast::LiteralKind::Verbatim,
5345                        c: '-',
5346                    }),
5347                    ast::ClassSetItem::Literal(ast::Literal {
5348                        span: span(2..3),
5349                        kind: ast::LiteralKind::Verbatim,
5350                        c: '-',
5351                    }),
5352                ],
5353            };
5354            Ok((set, union))
5355        });
5356        assert_eq!(parser("[]a]").parse_set_class_open(), {
5357            let set = ast::ClassBracketed {
5358                span: span(0..2),
5359                negated: false,
5360                kind: ast::ClassSet::union(ast::ClassSetUnion {
5361                    span: span(1..1),
5362                    items: vec![],
5363                }),
5364            };
5365            let union = ast::ClassSetUnion {
5366                span: span(1..2),
5367                items: vec![ast::ClassSetItem::Literal(ast::Literal {
5368                    span: span(1..2),
5369                    kind: ast::LiteralKind::Verbatim,
5370                    c: ']',
5371                })],
5372            };
5373            Ok((set, union))
5374        });
5375        assert_eq!(
5376            parser_ignore_whitespace("[ ] a]").parse_set_class_open(),
5377            {
5378                let set = ast::ClassBracketed {
5379                    span: span(0..4),
5380                    negated: false,
5381                    kind: ast::ClassSet::union(ast::ClassSetUnion {
5382                        span: span(2..2),
5383                        items: vec![],
5384                    }),
5385                };
5386                let union = ast::ClassSetUnion {
5387                    span: span(2..3),
5388                    items: vec![ast::ClassSetItem::Literal(ast::Literal {
5389                        span: span(2..3),
5390                        kind: ast::LiteralKind::Verbatim,
5391                        c: ']',
5392                    })],
5393                };
5394                Ok((set, union))
5395            }
5396        );
5397        assert_eq!(parser("[^]a]").parse_set_class_open(), {
5398            let set = ast::ClassBracketed {
5399                span: span(0..3),
5400                negated: true,
5401                kind: ast::ClassSet::union(ast::ClassSetUnion {
5402                    span: span(2..2),
5403                    items: vec![],
5404                }),
5405            };
5406            let union = ast::ClassSetUnion {
5407                span: span(2..3),
5408                items: vec![ast::ClassSetItem::Literal(ast::Literal {
5409                    span: span(2..3),
5410                    kind: ast::LiteralKind::Verbatim,
5411                    c: ']',
5412                })],
5413            };
5414            Ok((set, union))
5415        });
5416        assert_eq!(parser("[-]a]").parse_set_class_open(), {
5417            let set = ast::ClassBracketed {
5418                span: span(0..2),
5419                negated: false,
5420                kind: ast::ClassSet::union(ast::ClassSetUnion {
5421                    span: span(1..1),
5422                    items: vec![],
5423                }),
5424            };
5425            let union = ast::ClassSetUnion {
5426                span: span(1..2),
5427                items: vec![ast::ClassSetItem::Literal(ast::Literal {
5428                    span: span(1..2),
5429                    kind: ast::LiteralKind::Verbatim,
5430                    c: '-',
5431                })],
5432            };
5433            Ok((set, union))
5434        });
5435
5436        assert_eq!(
5437            parser("[").parse_set_class_open().unwrap_err(),
5438            TestError {
5439                span: span(0..1),
5440                kind: ast::ErrorKind::ClassUnclosed,
5441            }
5442        );
5443        assert_eq!(
5444            parser_ignore_whitespace("[    ")
5445                .parse_set_class_open()
5446                .unwrap_err(),
5447            TestError {
5448                span: span(0..5),
5449                kind: ast::ErrorKind::ClassUnclosed,
5450            }
5451        );
5452        assert_eq!(
5453            parser("[^").parse_set_class_open().unwrap_err(),
5454            TestError {
5455                span: span(0..2),
5456                kind: ast::ErrorKind::ClassUnclosed,
5457            }
5458        );
5459        assert_eq!(
5460            parser("[]").parse_set_class_open().unwrap_err(),
5461            TestError {
5462                span: span(0..2),
5463                kind: ast::ErrorKind::ClassUnclosed,
5464            }
5465        );
5466        assert_eq!(
5467            parser("[-").parse_set_class_open().unwrap_err(),
5468            TestError {
5469                span: span(0..2),
5470                kind: ast::ErrorKind::ClassUnclosed,
5471            }
5472        );
5473        assert_eq!(
5474            parser("[--").parse_set_class_open().unwrap_err(),
5475            TestError {
5476                span: span(0..3),
5477                kind: ast::ErrorKind::ClassUnclosed,
5478            }
5479        );
5480    }
5481
5482    #[test]
5483    fn maybe_parse_ascii_class() {
5484        assert_eq!(
5485            parser(r"[:alnum:]").maybe_parse_ascii_class(),
5486            Some(ast::ClassAscii {
5487                span: span(0..9),
5488                kind: ast::ClassAsciiKind::Alnum,
5489                negated: false,
5490            })
5491        );
5492        assert_eq!(
5493            parser(r"[:alnum:]A").maybe_parse_ascii_class(),
5494            Some(ast::ClassAscii {
5495                span: span(0..9),
5496                kind: ast::ClassAsciiKind::Alnum,
5497                negated: false,
5498            })
5499        );
5500        assert_eq!(
5501            parser(r"[:^alnum:]").maybe_parse_ascii_class(),
5502            Some(ast::ClassAscii {
5503                span: span(0..10),
5504                kind: ast::ClassAsciiKind::Alnum,
5505                negated: true,
5506            })
5507        );
5508
5509        let p = parser(r"[:");
5510        assert_eq!(p.maybe_parse_ascii_class(), None);
5511        assert_eq!(p.offset(), 0);
5512
5513        let p = parser(r"[:^");
5514        assert_eq!(p.maybe_parse_ascii_class(), None);
5515        assert_eq!(p.offset(), 0);
5516
5517        let p = parser(r"[^:alnum:]");
5518        assert_eq!(p.maybe_parse_ascii_class(), None);
5519        assert_eq!(p.offset(), 0);
5520
5521        let p = parser(r"[:alnnum:]");
5522        assert_eq!(p.maybe_parse_ascii_class(), None);
5523        assert_eq!(p.offset(), 0);
5524
5525        let p = parser(r"[:alnum]");
5526        assert_eq!(p.maybe_parse_ascii_class(), None);
5527        assert_eq!(p.offset(), 0);
5528
5529        let p = parser(r"[:alnum:");
5530        assert_eq!(p.maybe_parse_ascii_class(), None);
5531        assert_eq!(p.offset(), 0);
5532    }
5533
5534    #[test]
5535    fn parse_unicode_class() {
5536        assert_eq!(
5537            parser(r"\pN").parse_escape(),
5538            Ok(Primitive::Unicode(ast::ClassUnicode {
5539                span: span(0..3),
5540                negated: false,
5541                kind: ast::ClassUnicodeKind::OneLetter('N'),
5542            }))
5543        );
5544        assert_eq!(
5545            parser(r"\PN").parse_escape(),
5546            Ok(Primitive::Unicode(ast::ClassUnicode {
5547                span: span(0..3),
5548                negated: true,
5549                kind: ast::ClassUnicodeKind::OneLetter('N'),
5550            }))
5551        );
5552        assert_eq!(
5553            parser(r"\p{N}").parse_escape(),
5554            Ok(Primitive::Unicode(ast::ClassUnicode {
5555                span: span(0..5),
5556                negated: false,
5557                kind: ast::ClassUnicodeKind::Named(s("N")),
5558            }))
5559        );
5560        assert_eq!(
5561            parser(r"\P{N}").parse_escape(),
5562            Ok(Primitive::Unicode(ast::ClassUnicode {
5563                span: span(0..5),
5564                negated: true,
5565                kind: ast::ClassUnicodeKind::Named(s("N")),
5566            }))
5567        );
5568        assert_eq!(
5569            parser(r"\p{Greek}").parse_escape(),
5570            Ok(Primitive::Unicode(ast::ClassUnicode {
5571                span: span(0..9),
5572                negated: false,
5573                kind: ast::ClassUnicodeKind::Named(s("Greek")),
5574            }))
5575        );
5576
5577        assert_eq!(
5578            parser(r"\p{scx:Katakana}").parse_escape(),
5579            Ok(Primitive::Unicode(ast::ClassUnicode {
5580                span: span(0..16),
5581                negated: false,
5582                kind: ast::ClassUnicodeKind::NamedValue {
5583                    op: ast::ClassUnicodeOpKind::Colon,
5584                    name: s("scx"),
5585                    value: s("Katakana"),
5586                },
5587            }))
5588        );
5589        assert_eq!(
5590            parser(r"\p{scx=Katakana}").parse_escape(),
5591            Ok(Primitive::Unicode(ast::ClassUnicode {
5592                span: span(0..16),
5593                negated: false,
5594                kind: ast::ClassUnicodeKind::NamedValue {
5595                    op: ast::ClassUnicodeOpKind::Equal,
5596                    name: s("scx"),
5597                    value: s("Katakana"),
5598                },
5599            }))
5600        );
5601        assert_eq!(
5602            parser(r"\p{scx!=Katakana}").parse_escape(),
5603            Ok(Primitive::Unicode(ast::ClassUnicode {
5604                span: span(0..17),
5605                negated: false,
5606                kind: ast::ClassUnicodeKind::NamedValue {
5607                    op: ast::ClassUnicodeOpKind::NotEqual,
5608                    name: s("scx"),
5609                    value: s("Katakana"),
5610                },
5611            }))
5612        );
5613
5614        assert_eq!(
5615            parser(r"\p{:}").parse_escape(),
5616            Ok(Primitive::Unicode(ast::ClassUnicode {
5617                span: span(0..5),
5618                negated: false,
5619                kind: ast::ClassUnicodeKind::NamedValue {
5620                    op: ast::ClassUnicodeOpKind::Colon,
5621                    name: s(""),
5622                    value: s(""),
5623                },
5624            }))
5625        );
5626        assert_eq!(
5627            parser(r"\p{=}").parse_escape(),
5628            Ok(Primitive::Unicode(ast::ClassUnicode {
5629                span: span(0..5),
5630                negated: false,
5631                kind: ast::ClassUnicodeKind::NamedValue {
5632                    op: ast::ClassUnicodeOpKind::Equal,
5633                    name: s(""),
5634                    value: s(""),
5635                },
5636            }))
5637        );
5638        assert_eq!(
5639            parser(r"\p{!=}").parse_escape(),
5640            Ok(Primitive::Unicode(ast::ClassUnicode {
5641                span: span(0..6),
5642                negated: false,
5643                kind: ast::ClassUnicodeKind::NamedValue {
5644                    op: ast::ClassUnicodeOpKind::NotEqual,
5645                    name: s(""),
5646                    value: s(""),
5647                },
5648            }))
5649        );
5650
5651        assert_eq!(
5652            parser(r"\p").parse_escape().unwrap_err(),
5653            TestError {
5654                span: span(2..2),
5655                kind: ast::ErrorKind::EscapeUnexpectedEof,
5656            }
5657        );
5658        assert_eq!(
5659            parser(r"\p{").parse_escape().unwrap_err(),
5660            TestError {
5661                span: span(3..3),
5662                kind: ast::ErrorKind::EscapeUnexpectedEof,
5663            }
5664        );
5665        assert_eq!(
5666            parser(r"\p{N").parse_escape().unwrap_err(),
5667            TestError {
5668                span: span(4..4),
5669                kind: ast::ErrorKind::EscapeUnexpectedEof,
5670            }
5671        );
5672        assert_eq!(
5673            parser(r"\p{Greek").parse_escape().unwrap_err(),
5674            TestError {
5675                span: span(8..8),
5676                kind: ast::ErrorKind::EscapeUnexpectedEof,
5677            }
5678        );
5679
5680        assert_eq!(
5681            parser(r"\pNz").parse(),
5682            Ok(Ast::Concat(ast::Concat {
5683                span: span(0..4),
5684                asts: vec![
5685                    Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
5686                        span: span(0..3),
5687                        negated: false,
5688                        kind: ast::ClassUnicodeKind::OneLetter('N'),
5689                    })),
5690                    Ast::Literal(ast::Literal {
5691                        span: span(3..4),
5692                        kind: ast::LiteralKind::Verbatim,
5693                        c: 'z',
5694                    }),
5695                ],
5696            }))
5697        );
5698        assert_eq!(
5699            parser(r"\p{Greek}z").parse(),
5700            Ok(Ast::Concat(ast::Concat {
5701                span: span(0..10),
5702                asts: vec![
5703                    Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
5704                        span: span(0..9),
5705                        negated: false,
5706                        kind: ast::ClassUnicodeKind::Named(s("Greek")),
5707                    })),
5708                    Ast::Literal(ast::Literal {
5709                        span: span(9..10),
5710                        kind: ast::LiteralKind::Verbatim,
5711                        c: 'z',
5712                    }),
5713                ],
5714            }))
5715        );
5716    }
5717
5718    #[test]
5719    fn parse_perl_class() {
5720        assert_eq!(
5721            parser(r"\d").parse_escape(),
5722            Ok(Primitive::Perl(ast::ClassPerl {
5723                span: span(0..2),
5724                kind: ast::ClassPerlKind::Digit,
5725                negated: false,
5726            }))
5727        );
5728        assert_eq!(
5729            parser(r"\D").parse_escape(),
5730            Ok(Primitive::Perl(ast::ClassPerl {
5731                span: span(0..2),
5732                kind: ast::ClassPerlKind::Digit,
5733                negated: true,
5734            }))
5735        );
5736        assert_eq!(
5737            parser(r"\s").parse_escape(),
5738            Ok(Primitive::Perl(ast::ClassPerl {
5739                span: span(0..2),
5740                kind: ast::ClassPerlKind::Space,
5741                negated: false,
5742            }))
5743        );
5744        assert_eq!(
5745            parser(r"\S").parse_escape(),
5746            Ok(Primitive::Perl(ast::ClassPerl {
5747                span: span(0..2),
5748                kind: ast::ClassPerlKind::Space,
5749                negated: true,
5750            }))
5751        );
5752        assert_eq!(
5753            parser(r"\w").parse_escape(),
5754            Ok(Primitive::Perl(ast::ClassPerl {
5755                span: span(0..2),
5756                kind: ast::ClassPerlKind::Word,
5757                negated: false,
5758            }))
5759        );
5760        assert_eq!(
5761            parser(r"\W").parse_escape(),
5762            Ok(Primitive::Perl(ast::ClassPerl {
5763                span: span(0..2),
5764                kind: ast::ClassPerlKind::Word,
5765                negated: true,
5766            }))
5767        );
5768
5769        assert_eq!(
5770            parser(r"\d").parse(),
5771            Ok(Ast::Class(ast::Class::Perl(ast::ClassPerl {
5772                span: span(0..2),
5773                kind: ast::ClassPerlKind::Digit,
5774                negated: false,
5775            })))
5776        );
5777        assert_eq!(
5778            parser(r"\dz").parse(),
5779            Ok(Ast::Concat(ast::Concat {
5780                span: span(0..3),
5781                asts: vec![
5782                    Ast::Class(ast::Class::Perl(ast::ClassPerl {
5783                        span: span(0..2),
5784                        kind: ast::ClassPerlKind::Digit,
5785                        negated: false,
5786                    })),
5787                    Ast::Literal(ast::Literal {
5788                        span: span(2..3),
5789                        kind: ast::LiteralKind::Verbatim,
5790                        c: 'z',
5791                    }),
5792                ],
5793            }))
5794        );
5795    }
5796
5797    // This tests a bug fix where the nest limit checker wasn't decrementing
5798    // its depth during post-traversal, which causes long regexes to trip
5799    // the default limit too aggressively.
5800    #[test]
5801    fn regression_454_nest_too_big() {
5802        let pattern = r#"
5803        2(?:
5804          [45]\d{3}|
5805          7(?:
5806            1[0-267]|
5807            2[0-289]|
5808            3[0-29]|
5809            4[01]|
5810            5[1-3]|
5811            6[013]|
5812            7[0178]|
5813            91
5814          )|
5815          8(?:
5816            0[125]|
5817            [139][1-6]|
5818            2[0157-9]|
5819            41|
5820            6[1-35]|
5821            7[1-5]|
5822            8[1-8]|
5823            90
5824          )|
5825          9(?:
5826            0[0-2]|
5827            1[0-4]|
5828            2[568]|
5829            3[3-6]|
5830            5[5-7]|
5831            6[0167]|
5832            7[15]|
5833            8[0146-9]
5834          )
5835        )\d{4}
5836        "#;
5837        assert!(parser_nest_limit(pattern, 50).parse().is_ok());
5838    }
5839
5840    // This tests that we treat a trailing `-` in a character class as a
5841    // literal `-` even when whitespace mode is enabled and there is whitespace
5842    // after the trailing `-`.
5843    #[test]
5844    fn regression_455_trailing_dash_ignore_whitespace() {
5845        assert!(parser("(?x)[ / - ]").parse().is_ok());
5846        assert!(parser("(?x)[ a - ]").parse().is_ok());
5847        assert!(parser(
5848            "(?x)[
5849            a
5850            - ]
5851        "
5852        )
5853        .parse()
5854        .is_ok());
5855        assert!(parser(
5856            "(?x)[
5857            a # wat
5858            - ]
5859        "
5860        )
5861        .parse()
5862        .is_ok());
5863
5864        assert!(parser("(?x)[ / -").parse().is_err());
5865        assert!(parser("(?x)[ / - ").parse().is_err());
5866        assert!(parser(
5867            "(?x)[
5868            / -
5869        "
5870        )
5871        .parse()
5872        .is_err());
5873        assert!(parser(
5874            "(?x)[
5875            / - # wat
5876        "
5877        )
5878        .parse()
5879        .is_err());
5880    }
5881}