regex_syntax/ast/mod.rs
1/*!
2Defines an abstract syntax for regular expressions.
3*/
4
5use std::cmp::Ordering;
6use std::error;
7use std::fmt;
8
9pub use ast::visitor::{visit, Visitor};
10
11pub mod parse;
12pub mod print;
13mod visitor;
14
15/// An error that occurred while parsing a regular expression into an abstract
16/// syntax tree.
17///
18/// Note that note all ASTs represents a valid regular expression. For example,
19/// an AST is constructed without error for `\p{Quux}`, but `Quux` is not a
20/// valid Unicode property name. That particular error is reported when
21/// translating an AST to the high-level intermediate representation (`HIR`).
22#[derive(Clone, Debug, Eq, PartialEq)]
23pub struct Error {
24 /// The kind of error.
25 kind: ErrorKind,
26 /// The original pattern that the parser generated the error from. Every
27 /// span in an error is a valid range into this string.
28 pattern: String,
29 /// The span of this error.
30 span: Span,
31}
32
33impl Error {
34 /// Return the type of this error.
35 pub fn kind(&self) -> &ErrorKind {
36 &self.kind
37 }
38
39 /// The original pattern string in which this error occurred.
40 ///
41 /// Every span reported by this error is reported in terms of this string.
42 pub fn pattern(&self) -> &str {
43 &self.pattern
44 }
45
46 /// Return the span at which this error occurred.
47 pub fn span(&self) -> &Span {
48 &self.span
49 }
50
51 /// Return an auxiliary span. This span exists only for some errors that
52 /// benefit from being able to point to two locations in the original
53 /// regular expression. For example, "duplicate" errors will have the
54 /// main error position set to the duplicate occurrence while its
55 /// auxiliary span will be set to the initial occurrence.
56 pub fn auxiliary_span(&self) -> Option<&Span> {
57 use self::ErrorKind::*;
58 match self.kind {
59 FlagDuplicate { ref original } => Some(original),
60 FlagRepeatedNegation { ref original, .. } => Some(original),
61 GroupNameDuplicate { ref original, .. } => Some(original),
62 _ => None,
63 }
64 }
65}
66
67/// The type of an error that occurred while building an AST.
68#[derive(Clone, Debug, Eq, PartialEq)]
69pub enum ErrorKind {
70 /// The capturing group limit was exceeded.
71 ///
72 /// Note that this represents a limit on the total number of capturing
73 /// groups in a regex and not necessarily the number of nested capturing
74 /// groups. That is, the nest limit can be low and it is still possible for
75 /// this error to occur.
76 CaptureLimitExceeded,
77 /// An invalid escape sequence was found in a character class set.
78 ClassEscapeInvalid,
79 /// An invalid character class range was found. An invalid range is any
80 /// range where the start is greater than the end.
81 ClassRangeInvalid,
82 /// An invalid range boundary was found in a character class. Range
83 /// boundaries must be a single literal codepoint, but this error indicates
84 /// that something else was found, such as a nested class.
85 ClassRangeLiteral,
86 /// An opening `[` was found with no corresponding closing `]`.
87 ClassUnclosed,
88 /// Note that this error variant is no longer used. Namely, a decimal
89 /// number can only appear as a repetition quantifier. When the number
90 /// in a repetition quantifier is empty, then it gets its own specialized
91 /// error, `RepetitionCountDecimalEmpty`.
92 DecimalEmpty,
93 /// An invalid decimal number was given where one was expected.
94 DecimalInvalid,
95 /// A bracketed hex literal was empty.
96 EscapeHexEmpty,
97 /// A bracketed hex literal did not correspond to a Unicode scalar value.
98 EscapeHexInvalid,
99 /// An invalid hexadecimal digit was found.
100 EscapeHexInvalidDigit,
101 /// EOF was found before an escape sequence was completed.
102 EscapeUnexpectedEof,
103 /// An unrecognized escape sequence.
104 EscapeUnrecognized,
105 /// A dangling negation was used when setting flags, e.g., `i-`.
106 FlagDanglingNegation,
107 /// A flag was used twice, e.g., `i-i`.
108 FlagDuplicate {
109 /// The position of the original flag. The error position
110 /// points to the duplicate flag.
111 original: Span,
112 },
113 /// The negation operator was used twice, e.g., `-i-s`.
114 FlagRepeatedNegation {
115 /// The position of the original negation operator. The error position
116 /// points to the duplicate negation operator.
117 original: Span,
118 },
119 /// Expected a flag but got EOF, e.g., `(?`.
120 FlagUnexpectedEof,
121 /// Unrecognized flag, e.g., `a`.
122 FlagUnrecognized,
123 /// A duplicate capture name was found.
124 GroupNameDuplicate {
125 /// The position of the initial occurrence of the capture name. The
126 /// error position itself points to the duplicate occurrence.
127 original: Span,
128 },
129 /// A capture group name is empty, e.g., `(?P<>abc)`.
130 GroupNameEmpty,
131 /// An invalid character was seen for a capture group name. This includes
132 /// errors where the first character is a digit (even though subsequent
133 /// characters are allowed to be digits).
134 GroupNameInvalid,
135 /// A closing `>` could not be found for a capture group name.
136 GroupNameUnexpectedEof,
137 /// An unclosed group, e.g., `(ab`.
138 ///
139 /// The span of this error corresponds to the unclosed parenthesis.
140 GroupUnclosed,
141 /// An unopened group, e.g., `ab)`.
142 GroupUnopened,
143 /// The nest limit was exceeded. The limit stored here is the limit
144 /// configured in the parser.
145 NestLimitExceeded(u32),
146 /// The range provided in a counted repetition operator is invalid. The
147 /// range is invalid if the start is greater than the end.
148 RepetitionCountInvalid,
149 /// An opening `{` was not followed by a valid decimal value.
150 /// For example, `x{}` or `x{]}` would fail.
151 RepetitionCountDecimalEmpty,
152 /// An opening `{` was found with no corresponding closing `}`.
153 RepetitionCountUnclosed,
154 /// A repetition operator was applied to a missing sub-expression. This
155 /// occurs, for example, in the regex consisting of just a `*` or even
156 /// `(?i)*`. It is, however, possible to create a repetition operating on
157 /// an empty sub-expression. For example, `()*` is still considered valid.
158 RepetitionMissing,
159 /// When octal support is disabled, this error is produced when an octal
160 /// escape is used. The octal escape is assumed to be an invocation of
161 /// a backreference, which is the common case.
162 UnsupportedBackreference,
163 /// When syntax similar to PCRE's look-around is used, this error is
164 /// returned. Some example syntaxes that are rejected include, but are
165 /// not necessarily limited to, `(?=re)`, `(?!re)`, `(?<=re)` and
166 /// `(?<!re)`. Note that all of these syntaxes are otherwise invalid; this
167 /// error is used to improve the user experience.
168 UnsupportedLookAround,
169 /// Hints that destructuring should not be exhaustive.
170 ///
171 /// This enum may grow additional variants, so this makes sure clients
172 /// don't count on exhaustive matching. (Otherwise, adding a new variant
173 /// could break existing code.)
174 #[doc(hidden)]
175 __Nonexhaustive,
176}
177
178impl error::Error for Error {
179 fn description(&self) -> &str {
180 use self::ErrorKind::*;
181 match self.kind {
182 CaptureLimitExceeded => "capture group limit exceeded",
183 ClassEscapeInvalid => "invalid escape sequence in character class",
184 ClassRangeInvalid => "invalid character class range",
185 ClassRangeLiteral => "invalid range boundary, must be a literal",
186 ClassUnclosed => "unclosed character class",
187 DecimalEmpty => "empty decimal literal",
188 DecimalInvalid => "invalid decimal literal",
189 EscapeHexEmpty => "empty hexadecimal literal",
190 EscapeHexInvalid => "invalid hexadecimal literal",
191 EscapeHexInvalidDigit => "invalid hexadecimal digit",
192 EscapeUnexpectedEof => "unexpected eof (escape sequence)",
193 EscapeUnrecognized => "unrecognized escape sequence",
194 FlagDanglingNegation => "dangling flag negation operator",
195 FlagDuplicate { .. } => "duplicate flag",
196 FlagRepeatedNegation { .. } => "repeated negation",
197 FlagUnexpectedEof => "unexpected eof (flag)",
198 FlagUnrecognized => "unrecognized flag",
199 GroupNameDuplicate { .. } => "duplicate capture group name",
200 GroupNameEmpty => "empty capture group name",
201 GroupNameInvalid => "invalid capture group name",
202 GroupNameUnexpectedEof => "unclosed capture group name",
203 GroupUnclosed => "unclosed group",
204 GroupUnopened => "unopened group",
205 NestLimitExceeded(_) => "nest limit exceeded",
206 RepetitionCountInvalid => "invalid repetition count range",
207 RepetitionCountUnclosed => "unclosed counted repetition",
208 RepetitionMissing => "repetition operator missing expression",
209 UnsupportedBackreference => "backreferences are not supported",
210 UnsupportedLookAround => "look-around is not supported",
211 _ => unreachable!(),
212 }
213 }
214}
215
216impl fmt::Display for Error {
217 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
218 ::error::Formatter::from(self).fmt(f)
219 }
220}
221
222impl fmt::Display for ErrorKind {
223 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
224 use self::ErrorKind::*;
225 match *self {
226 CaptureLimitExceeded => write!(
227 f,
228 "exceeded the maximum number of \
229 capturing groups ({})",
230 ::std::u32::MAX
231 ),
232 ClassEscapeInvalid => {
233 write!(f, "invalid escape sequence found in character class")
234 }
235 ClassRangeInvalid => write!(
236 f,
237 "invalid character class range, \
238 the start must be <= the end"
239 ),
240 ClassRangeLiteral => {
241 write!(f, "invalid range boundary, must be a literal")
242 }
243 ClassUnclosed => write!(f, "unclosed character class"),
244 DecimalEmpty => write!(f, "decimal literal empty"),
245 DecimalInvalid => write!(f, "decimal literal invalid"),
246 EscapeHexEmpty => write!(f, "hexadecimal literal empty"),
247 EscapeHexInvalid => {
248 write!(f, "hexadecimal literal is not a Unicode scalar value")
249 }
250 EscapeHexInvalidDigit => write!(f, "invalid hexadecimal digit"),
251 EscapeUnexpectedEof => write!(
252 f,
253 "incomplete escape sequence, \
254 reached end of pattern prematurely"
255 ),
256 EscapeUnrecognized => write!(f, "unrecognized escape sequence"),
257 FlagDanglingNegation => {
258 write!(f, "dangling flag negation operator")
259 }
260 FlagDuplicate { .. } => write!(f, "duplicate flag"),
261 FlagRepeatedNegation { .. } => {
262 write!(f, "flag negation operator repeated")
263 }
264 FlagUnexpectedEof => {
265 write!(f, "expected flag but got end of regex")
266 }
267 FlagUnrecognized => write!(f, "unrecognized flag"),
268 GroupNameDuplicate { .. } => {
269 write!(f, "duplicate capture group name")
270 }
271 GroupNameEmpty => write!(f, "empty capture group name"),
272 GroupNameInvalid => write!(f, "invalid capture group character"),
273 GroupNameUnexpectedEof => write!(f, "unclosed capture group name"),
274 GroupUnclosed => write!(f, "unclosed group"),
275 GroupUnopened => write!(f, "unopened group"),
276 NestLimitExceeded(limit) => write!(
277 f,
278 "exceed the maximum number of \
279 nested parentheses/brackets ({})",
280 limit
281 ),
282 RepetitionCountInvalid => write!(
283 f,
284 "invalid repetition count range, \
285 the start must be <= the end"
286 ),
287 RepetitionCountDecimalEmpty => {
288 write!(f, "repetition quantifier expects a valid decimal")
289 }
290 RepetitionCountUnclosed => {
291 write!(f, "unclosed counted repetition")
292 }
293 RepetitionMissing => {
294 write!(f, "repetition operator missing expression")
295 }
296 UnsupportedBackreference => {
297 write!(f, "backreferences are not supported")
298 }
299 UnsupportedLookAround => write!(
300 f,
301 "look-around, including look-ahead and look-behind, \
302 is not supported"
303 ),
304 _ => unreachable!(),
305 }
306 }
307}
308
309/// Span represents the position information of a single AST item.
310///
311/// All span positions are absolute byte offsets that can be used on the
312/// original regular expression that was parsed.
313#[derive(Clone, Copy, Eq, PartialEq)]
314pub struct Span {
315 /// The start byte offset.
316 pub start: Position,
317 /// The end byte offset.
318 pub end: Position,
319}
320
321impl fmt::Debug for Span {
322 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
323 write!(f, "Span({:?}, {:?})", self.start, self.end)
324 }
325}
326
327impl Ord for Span {
328 fn cmp(&self, other: &Span) -> Ordering {
329 (&self.start, &self.end).cmp(&(&other.start, &other.end))
330 }
331}
332
333impl PartialOrd for Span {
334 fn partial_cmp(&self, other: &Span) -> Option<Ordering> {
335 Some(self.cmp(other))
336 }
337}
338
339/// A single position in a regular expression.
340///
341/// A position encodes one half of a span, and include the byte offset, line
342/// number and column number.
343#[derive(Clone, Copy, Eq, PartialEq)]
344pub struct Position {
345 /// The absolute offset of this position, starting at `0` from the
346 /// beginning of the regular expression pattern string.
347 pub offset: usize,
348 /// The line number, starting at `1`.
349 pub line: usize,
350 /// The approximate column number, starting at `1`.
351 pub column: usize,
352}
353
354impl fmt::Debug for Position {
355 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
356 write!(
357 f,
358 "Position(o: {:?}, l: {:?}, c: {:?})",
359 self.offset, self.line, self.column
360 )
361 }
362}
363
364impl Ord for Position {
365 fn cmp(&self, other: &Position) -> Ordering {
366 self.offset.cmp(&other.offset)
367 }
368}
369
370impl PartialOrd for Position {
371 fn partial_cmp(&self, other: &Position) -> Option<Ordering> {
372 Some(self.cmp(other))
373 }
374}
375
376impl Span {
377 /// Create a new span with the given positions.
378 pub fn new(start: Position, end: Position) -> Span {
379 Span { start: start, end: end }
380 }
381
382 /// Create a new span using the given position as the start and end.
383 pub fn splat(pos: Position) -> Span {
384 Span::new(pos, pos)
385 }
386
387 /// Create a new span by replacing the starting the position with the one
388 /// given.
389 pub fn with_start(self, pos: Position) -> Span {
390 Span { start: pos, ..self }
391 }
392
393 /// Create a new span by replacing the ending the position with the one
394 /// given.
395 pub fn with_end(self, pos: Position) -> Span {
396 Span { end: pos, ..self }
397 }
398
399 /// Returns true if and only if this span occurs on a single line.
400 pub fn is_one_line(&self) -> bool {
401 self.start.line == self.end.line
402 }
403
404 /// Returns true if and only if this span is empty. That is, it points to
405 /// a single position in the concrete syntax of a regular expression.
406 pub fn is_empty(&self) -> bool {
407 self.start.offset == self.end.offset
408 }
409}
410
411impl Position {
412 /// Create a new position with the given information.
413 ///
414 /// `offset` is the absolute offset of the position, starting at `0` from
415 /// the beginning of the regular expression pattern string.
416 ///
417 /// `line` is the line number, starting at `1`.
418 ///
419 /// `column` is the approximate column number, starting at `1`.
420 pub fn new(offset: usize, line: usize, column: usize) -> Position {
421 Position { offset: offset, line: line, column: column }
422 }
423}
424
425/// An abstract syntax tree for a singular expression along with comments
426/// found.
427///
428/// Comments are not stored in the tree itself to avoid complexity. Each
429/// comment contains a span of precisely where it occurred in the original
430/// regular expression.
431#[derive(Clone, Debug, Eq, PartialEq)]
432pub struct WithComments {
433 /// The actual ast.
434 pub ast: Ast,
435 /// All comments found in the original regular expression.
436 pub comments: Vec<Comment>,
437}
438
439/// A comment from a regular expression with an associated span.
440///
441/// A regular expression can only contain comments when the `x` flag is
442/// enabled.
443#[derive(Clone, Debug, Eq, PartialEq)]
444pub struct Comment {
445 /// The span of this comment, including the beginning `#` and ending `\n`.
446 pub span: Span,
447 /// The comment text, starting with the first character following the `#`
448 /// and ending with the last character preceding the `\n`.
449 pub comment: String,
450}
451
452/// An abstract syntax tree for a single regular expression.
453///
454/// An `Ast`'s `fmt::Display` implementation uses constant stack space and heap
455/// space proportional to the size of the `Ast`.
456///
457/// This type defines its own destructor that uses constant stack space and
458/// heap space proportional to the size of the `Ast`.
459#[derive(Clone, Debug, Eq, PartialEq)]
460pub enum Ast {
461 /// An empty regex that matches everything.
462 Empty(Span),
463 /// A set of flags, e.g., `(?is)`.
464 Flags(SetFlags),
465 /// A single character literal, which includes escape sequences.
466 Literal(Literal),
467 /// The "any character" class.
468 Dot(Span),
469 /// A single zero-width assertion.
470 Assertion(Assertion),
471 /// A single character class. This includes all forms of character classes
472 /// except for `.`. e.g., `\d`, `\pN`, `[a-z]` and `[[:alpha:]]`.
473 Class(Class),
474 /// A repetition operator applied to an arbitrary regular expression.
475 Repetition(Repetition),
476 /// A grouped regular expression.
477 Group(Group),
478 /// An alternation of regular expressions.
479 Alternation(Alternation),
480 /// A concatenation of regular expressions.
481 Concat(Concat),
482}
483
484impl Ast {
485 /// Return the span of this abstract syntax tree.
486 pub fn span(&self) -> &Span {
487 match *self {
488 Ast::Empty(ref span) => span,
489 Ast::Flags(ref x) => &x.span,
490 Ast::Literal(ref x) => &x.span,
491 Ast::Dot(ref span) => span,
492 Ast::Assertion(ref x) => &x.span,
493 Ast::Class(ref x) => x.span(),
494 Ast::Repetition(ref x) => &x.span,
495 Ast::Group(ref x) => &x.span,
496 Ast::Alternation(ref x) => &x.span,
497 Ast::Concat(ref x) => &x.span,
498 }
499 }
500
501 /// Return true if and only if this Ast is empty.
502 pub fn is_empty(&self) -> bool {
503 match *self {
504 Ast::Empty(_) => true,
505 _ => false,
506 }
507 }
508
509 /// Returns true if and only if this AST has any (including possibly empty)
510 /// subexpressions.
511 fn has_subexprs(&self) -> bool {
512 match *self {
513 Ast::Empty(_)
514 | Ast::Flags(_)
515 | Ast::Literal(_)
516 | Ast::Dot(_)
517 | Ast::Assertion(_) => false,
518 Ast::Class(_)
519 | Ast::Repetition(_)
520 | Ast::Group(_)
521 | Ast::Alternation(_)
522 | Ast::Concat(_) => true,
523 }
524 }
525}
526
527/// Print a display representation of this Ast.
528///
529/// This does not preserve any of the original whitespace formatting that may
530/// have originally been present in the concrete syntax from which this Ast
531/// was generated.
532///
533/// This implementation uses constant stack space and heap space proportional
534/// to the size of the `Ast`.
535impl fmt::Display for Ast {
536 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
537 use ast::print::Printer;
538 Printer::new().print(self, f)
539 }
540}
541
542/// An alternation of regular expressions.
543#[derive(Clone, Debug, Eq, PartialEq)]
544pub struct Alternation {
545 /// The span of this alternation.
546 pub span: Span,
547 /// The alternate regular expressions.
548 pub asts: Vec<Ast>,
549}
550
551impl Alternation {
552 /// Return this alternation as an AST.
553 ///
554 /// If this alternation contains zero ASTs, then Ast::Empty is
555 /// returned. If this alternation contains exactly 1 AST, then the
556 /// corresponding AST is returned. Otherwise, Ast::Alternation is returned.
557 pub fn into_ast(mut self) -> Ast {
558 match self.asts.len() {
559 0 => Ast::Empty(self.span),
560 1 => self.asts.pop().unwrap(),
561 _ => Ast::Alternation(self),
562 }
563 }
564}
565
566/// A concatenation of regular expressions.
567#[derive(Clone, Debug, Eq, PartialEq)]
568pub struct Concat {
569 /// The span of this concatenation.
570 pub span: Span,
571 /// The concatenation regular expressions.
572 pub asts: Vec<Ast>,
573}
574
575impl Concat {
576 /// Return this concatenation as an AST.
577 ///
578 /// If this concatenation contains zero ASTs, then Ast::Empty is
579 /// returned. If this concatenation contains exactly 1 AST, then the
580 /// corresponding AST is returned. Otherwise, Ast::Concat is returned.
581 pub fn into_ast(mut self) -> Ast {
582 match self.asts.len() {
583 0 => Ast::Empty(self.span),
584 1 => self.asts.pop().unwrap(),
585 _ => Ast::Concat(self),
586 }
587 }
588}
589
590/// A single literal expression.
591///
592/// A literal corresponds to a single Unicode scalar value. Literals may be
593/// represented in their literal form, e.g., `a` or in their escaped form,
594/// e.g., `\x61`.
595#[derive(Clone, Debug, Eq, PartialEq)]
596pub struct Literal {
597 /// The span of this literal.
598 pub span: Span,
599 /// The kind of this literal.
600 pub kind: LiteralKind,
601 /// The Unicode scalar value corresponding to this literal.
602 pub c: char,
603}
604
605impl Literal {
606 /// If this literal was written as a `\x` hex escape, then this returns
607 /// the corresponding byte value. Otherwise, this returns `None`.
608 pub fn byte(&self) -> Option<u8> {
609 let short_hex = LiteralKind::HexFixed(HexLiteralKind::X);
610 if self.c as u32 <= 255 && self.kind == short_hex {
611 Some(self.c as u8)
612 } else {
613 None
614 }
615 }
616}
617
618/// The kind of a single literal expression.
619#[derive(Clone, Debug, Eq, PartialEq)]
620pub enum LiteralKind {
621 /// The literal is written verbatim, e.g., `a` or `☃`.
622 Verbatim,
623 /// The literal is written as an escape because it is punctuation, e.g.,
624 /// `\*` or `\[`.
625 Punctuation,
626 /// The literal is written as an octal escape, e.g., `\141`.
627 Octal,
628 /// The literal is written as a hex code with a fixed number of digits
629 /// depending on the type of the escape, e.g., `\x61` or or `\u0061` or
630 /// `\U00000061`.
631 HexFixed(HexLiteralKind),
632 /// The literal is written as a hex code with a bracketed number of
633 /// digits. The only restriction is that the bracketed hex code must refer
634 /// to a valid Unicode scalar value.
635 HexBrace(HexLiteralKind),
636 /// The literal is written as a specially recognized escape, e.g., `\f`
637 /// or `\n`.
638 Special(SpecialLiteralKind),
639}
640
641/// The type of a special literal.
642///
643/// A special literal is a special escape sequence recognized by the regex
644/// parser, e.g., `\f` or `\n`.
645#[derive(Clone, Debug, Eq, PartialEq)]
646pub enum SpecialLiteralKind {
647 /// Bell, spelled `\a` (`\x07`).
648 Bell,
649 /// Form feed, spelled `\f` (`\x0C`).
650 FormFeed,
651 /// Tab, spelled `\t` (`\x09`).
652 Tab,
653 /// Line feed, spelled `\n` (`\x0A`).
654 LineFeed,
655 /// Carriage return, spelled `\r` (`\x0D`).
656 CarriageReturn,
657 /// Vertical tab, spelled `\v` (`\x0B`).
658 VerticalTab,
659 /// Space, spelled `\ ` (`\x20`). Note that this can only appear when
660 /// parsing in verbose mode.
661 Space,
662}
663
664/// The type of a Unicode hex literal.
665///
666/// Note that all variants behave the same when used with brackets. They only
667/// differ when used without brackets in the number of hex digits that must
668/// follow.
669#[derive(Clone, Debug, Eq, PartialEq)]
670pub enum HexLiteralKind {
671 /// A `\x` prefix. When used without brackets, this form is limited to
672 /// two digits.
673 X,
674 /// A `\u` prefix. When used without brackets, this form is limited to
675 /// four digits.
676 UnicodeShort,
677 /// A `\U` prefix. When used without brackets, this form is limited to
678 /// eight digits.
679 UnicodeLong,
680}
681
682impl HexLiteralKind {
683 /// The number of digits that must be used with this literal form when
684 /// used without brackets. When used with brackets, there is no
685 /// restriction on the number of digits.
686 pub fn digits(&self) -> u32 {
687 match *self {
688 HexLiteralKind::X => 2,
689 HexLiteralKind::UnicodeShort => 4,
690 HexLiteralKind::UnicodeLong => 8,
691 }
692 }
693}
694
695/// A single character class expression.
696#[derive(Clone, Debug, Eq, PartialEq)]
697pub enum Class {
698 /// A Unicode character class, e.g., `\pL` or `\p{Greek}`.
699 Unicode(ClassUnicode),
700 /// A perl character class, e.g., `\d` or `\W`.
701 Perl(ClassPerl),
702 /// A bracketed character class set, which may contain zero or more
703 /// character ranges and/or zero or more nested classes. e.g.,
704 /// `[a-zA-Z\pL]`.
705 Bracketed(ClassBracketed),
706}
707
708impl Class {
709 /// Return the span of this character class.
710 pub fn span(&self) -> &Span {
711 match *self {
712 Class::Perl(ref x) => &x.span,
713 Class::Unicode(ref x) => &x.span,
714 Class::Bracketed(ref x) => &x.span,
715 }
716 }
717}
718
719/// A Perl character class.
720#[derive(Clone, Debug, Eq, PartialEq)]
721pub struct ClassPerl {
722 /// The span of this class.
723 pub span: Span,
724 /// The kind of Perl class.
725 pub kind: ClassPerlKind,
726 /// Whether the class is negated or not. e.g., `\d` is not negated but
727 /// `\D` is.
728 pub negated: bool,
729}
730
731/// The available Perl character classes.
732#[derive(Clone, Debug, Eq, PartialEq)]
733pub enum ClassPerlKind {
734 /// Decimal numbers.
735 Digit,
736 /// Whitespace.
737 Space,
738 /// Word characters.
739 Word,
740}
741
742/// An ASCII character class.
743#[derive(Clone, Debug, Eq, PartialEq)]
744pub struct ClassAscii {
745 /// The span of this class.
746 pub span: Span,
747 /// The kind of ASCII class.
748 pub kind: ClassAsciiKind,
749 /// Whether the class is negated or not. e.g., `[[:alpha:]]` is not negated
750 /// but `[[:^alpha:]]` is.
751 pub negated: bool,
752}
753
754/// The available ASCII character classes.
755#[derive(Clone, Debug, Eq, PartialEq)]
756pub enum ClassAsciiKind {
757 /// `[0-9A-Za-z]`
758 Alnum,
759 /// `[A-Za-z]`
760 Alpha,
761 /// `[\x00-\x7F]`
762 Ascii,
763 /// `[ \t]`
764 Blank,
765 /// `[\x00-\x1F\x7F]`
766 Cntrl,
767 /// `[0-9]`
768 Digit,
769 /// `[!-~]`
770 Graph,
771 /// `[a-z]`
772 Lower,
773 /// `[ -~]`
774 Print,
775 /// `[!-/:-@\[-`{-~]`
776 Punct,
777 /// `[\t\n\v\f\r ]`
778 Space,
779 /// `[A-Z]`
780 Upper,
781 /// `[0-9A-Za-z_]`
782 Word,
783 /// `[0-9A-Fa-f]`
784 Xdigit,
785}
786
787impl ClassAsciiKind {
788 /// Return the corresponding ClassAsciiKind variant for the given name.
789 ///
790 /// The name given should correspond to the lowercase version of the
791 /// variant name. e.g., `cntrl` is the name for `ClassAsciiKind::Cntrl`.
792 ///
793 /// If no variant with the corresponding name exists, then `None` is
794 /// returned.
795 pub fn from_name(name: &str) -> Option<ClassAsciiKind> {
796 use self::ClassAsciiKind::*;
797 match name {
798 "alnum" => Some(Alnum),
799 "alpha" => Some(Alpha),
800 "ascii" => Some(Ascii),
801 "blank" => Some(Blank),
802 "cntrl" => Some(Cntrl),
803 "digit" => Some(Digit),
804 "graph" => Some(Graph),
805 "lower" => Some(Lower),
806 "print" => Some(Print),
807 "punct" => Some(Punct),
808 "space" => Some(Space),
809 "upper" => Some(Upper),
810 "word" => Some(Word),
811 "xdigit" => Some(Xdigit),
812 _ => None,
813 }
814 }
815}
816
817/// A Unicode character class.
818#[derive(Clone, Debug, Eq, PartialEq)]
819pub struct ClassUnicode {
820 /// The span of this class.
821 pub span: Span,
822 /// Whether this class is negated or not.
823 ///
824 /// Note: be careful when using this attribute. This specifically refers
825 /// to whether the class is written as `\p` or `\P`, where the latter
826 /// is `negated = true`. However, it also possible to write something like
827 /// `\P{scx!=Katakana}` which is actually equivalent to
828 /// `\p{scx=Katakana}` and is therefore not actually negated even though
829 /// `negated = true` here. To test whether this class is truly negated
830 /// or not, use the `is_negated` method.
831 pub negated: bool,
832 /// The kind of Unicode class.
833 pub kind: ClassUnicodeKind,
834}
835
836impl ClassUnicode {
837 /// Returns true if this class has been negated.
838 ///
839 /// Note that this takes the Unicode op into account, if it's present.
840 /// e.g., `is_negated` for `\P{scx!=Katakana}` will return `false`.
841 pub fn is_negated(&self) -> bool {
842 match self.kind {
843 ClassUnicodeKind::NamedValue {
844 op: ClassUnicodeOpKind::NotEqual,
845 ..
846 } => !self.negated,
847 _ => self.negated,
848 }
849 }
850}
851
852/// The available forms of Unicode character classes.
853#[derive(Clone, Debug, Eq, PartialEq)]
854pub enum ClassUnicodeKind {
855 /// A one letter abbreviated class, e.g., `\pN`.
856 OneLetter(char),
857 /// A binary property, general category or script. The string may be
858 /// empty.
859 Named(String),
860 /// A property name and an associated value.
861 NamedValue {
862 /// The type of Unicode op used to associate `name` with `value`.
863 op: ClassUnicodeOpKind,
864 /// The property name (which may be empty).
865 name: String,
866 /// The property value (which may be empty).
867 value: String,
868 },
869}
870
871/// The type of op used in a Unicode character class.
872#[derive(Clone, Debug, Eq, PartialEq)]
873pub enum ClassUnicodeOpKind {
874 /// A property set to a specific value, e.g., `\p{scx=Katakana}`.
875 Equal,
876 /// A property set to a specific value using a colon, e.g.,
877 /// `\p{scx:Katakana}`.
878 Colon,
879 /// A property that isn't a particular value, e.g., `\p{scx!=Katakana}`.
880 NotEqual,
881}
882
883impl ClassUnicodeOpKind {
884 /// Whether the op is an equality op or not.
885 pub fn is_equal(&self) -> bool {
886 match *self {
887 ClassUnicodeOpKind::Equal | ClassUnicodeOpKind::Colon => true,
888 _ => false,
889 }
890 }
891}
892
893/// A bracketed character class, e.g., `[a-z0-9]`.
894#[derive(Clone, Debug, Eq, PartialEq)]
895pub struct ClassBracketed {
896 /// The span of this class.
897 pub span: Span,
898 /// Whether this class is negated or not. e.g., `[a]` is not negated but
899 /// `[^a]` is.
900 pub negated: bool,
901 /// The type of this set. A set is either a normal union of things, e.g.,
902 /// `[abc]` or a result of applying set operations, e.g., `[\pL--c]`.
903 pub kind: ClassSet,
904}
905
906/// A character class set.
907///
908/// This type corresponds to the internal structure of a bracketed character
909/// class. That is, every bracketed character is one of two types: a union of
910/// items (literals, ranges, other bracketed classes) or a tree of binary set
911/// operations.
912#[derive(Clone, Debug, Eq, PartialEq)]
913pub enum ClassSet {
914 /// An item, which can be a single literal, range, nested character class
915 /// or a union of items.
916 Item(ClassSetItem),
917 /// A single binary operation (i.e., &&, -- or ~~).
918 BinaryOp(ClassSetBinaryOp),
919}
920
921impl ClassSet {
922 /// Build a set from a union.
923 pub fn union(ast: ClassSetUnion) -> ClassSet {
924 ClassSet::Item(ClassSetItem::Union(ast))
925 }
926
927 /// Return the span of this character class set.
928 pub fn span(&self) -> &Span {
929 match *self {
930 ClassSet::Item(ref x) => x.span(),
931 ClassSet::BinaryOp(ref x) => &x.span,
932 }
933 }
934
935 /// Return true if and only if this class set is empty.
936 fn is_empty(&self) -> bool {
937 match *self {
938 ClassSet::Item(ClassSetItem::Empty(_)) => true,
939 _ => false,
940 }
941 }
942}
943
944/// A single component of a character class set.
945#[derive(Clone, Debug, Eq, PartialEq)]
946pub enum ClassSetItem {
947 /// An empty item.
948 ///
949 /// Note that a bracketed character class cannot contain a single empty
950 /// item. Empty items can appear when using one of the binary operators.
951 /// For example, `[&&]` is the intersection of two empty classes.
952 Empty(Span),
953 /// A single literal.
954 Literal(Literal),
955 /// A range between two literals.
956 Range(ClassSetRange),
957 /// An ASCII character class, e.g., `[:alnum:]` or `[:punct:]`.
958 Ascii(ClassAscii),
959 /// A Unicode character class, e.g., `\pL` or `\p{Greek}`.
960 Unicode(ClassUnicode),
961 /// A perl character class, e.g., `\d` or `\W`.
962 Perl(ClassPerl),
963 /// A bracketed character class set, which may contain zero or more
964 /// character ranges and/or zero or more nested classes. e.g.,
965 /// `[a-zA-Z\pL]`.
966 Bracketed(Box<ClassBracketed>),
967 /// A union of items.
968 Union(ClassSetUnion),
969}
970
971impl ClassSetItem {
972 /// Return the span of this character class set item.
973 pub fn span(&self) -> &Span {
974 match *self {
975 ClassSetItem::Empty(ref span) => span,
976 ClassSetItem::Literal(ref x) => &x.span,
977 ClassSetItem::Range(ref x) => &x.span,
978 ClassSetItem::Ascii(ref x) => &x.span,
979 ClassSetItem::Perl(ref x) => &x.span,
980 ClassSetItem::Unicode(ref x) => &x.span,
981 ClassSetItem::Bracketed(ref x) => &x.span,
982 ClassSetItem::Union(ref x) => &x.span,
983 }
984 }
985}
986
987/// A single character class range in a set.
988#[derive(Clone, Debug, Eq, PartialEq)]
989pub struct ClassSetRange {
990 /// The span of this range.
991 pub span: Span,
992 /// The start of this range.
993 pub start: Literal,
994 /// The end of this range.
995 pub end: Literal,
996}
997
998impl ClassSetRange {
999 /// Returns true if and only if this character class range is valid.
1000 ///
1001 /// The only case where a range is invalid is if its start is greater than
1002 /// its end.
1003 pub fn is_valid(&self) -> bool {
1004 self.start.c <= self.end.c
1005 }
1006}
1007
1008/// A union of items inside a character class set.
1009#[derive(Clone, Debug, Eq, PartialEq)]
1010pub struct ClassSetUnion {
1011 /// The span of the items in this operation. e.g., the `a-z0-9` in
1012 /// `[^a-z0-9]`
1013 pub span: Span,
1014 /// The sequence of items that make up this union.
1015 pub items: Vec<ClassSetItem>,
1016}
1017
1018impl ClassSetUnion {
1019 /// Push a new item in this union.
1020 ///
1021 /// The ending position of this union's span is updated to the ending
1022 /// position of the span of the item given. If the union is empty, then
1023 /// the starting position of this union is set to the starting position
1024 /// of this item.
1025 ///
1026 /// In other words, if you only use this method to add items to a union
1027 /// and you set the spans on each item correctly, then you should never
1028 /// need to adjust the span of the union directly.
1029 pub fn push(&mut self, item: ClassSetItem) {
1030 if self.items.is_empty() {
1031 self.span.start = item.span().start;
1032 }
1033 self.span.end = item.span().end;
1034 self.items.push(item);
1035 }
1036
1037 /// Return this union as a character class set item.
1038 ///
1039 /// If this union contains zero items, then an empty union is
1040 /// returned. If this concatenation contains exactly 1 item, then the
1041 /// corresponding item is returned. Otherwise, ClassSetItem::Union is
1042 /// returned.
1043 pub fn into_item(mut self) -> ClassSetItem {
1044 match self.items.len() {
1045 0 => ClassSetItem::Empty(self.span),
1046 1 => self.items.pop().unwrap(),
1047 _ => ClassSetItem::Union(self),
1048 }
1049 }
1050}
1051
1052/// A Unicode character class set operation.
1053#[derive(Clone, Debug, Eq, PartialEq)]
1054pub struct ClassSetBinaryOp {
1055 /// The span of this operation. e.g., the `a-z--[h-p]` in `[a-z--h-p]`.
1056 pub span: Span,
1057 /// The type of this set operation.
1058 pub kind: ClassSetBinaryOpKind,
1059 /// The left hand side of the operation.
1060 pub lhs: Box<ClassSet>,
1061 /// The right hand side of the operation.
1062 pub rhs: Box<ClassSet>,
1063}
1064
1065/// The type of a Unicode character class set operation.
1066///
1067/// Note that this doesn't explicitly represent union since there is no
1068/// explicit union operator. Concatenation inside a character class corresponds
1069/// to the union operation.
1070#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1071pub enum ClassSetBinaryOpKind {
1072 /// The intersection of two sets, e.g., `\pN&&[a-z]`.
1073 Intersection,
1074 /// The difference of two sets, e.g., `\pN--[0-9]`.
1075 Difference,
1076 /// The symmetric difference of two sets. The symmetric difference is the
1077 /// set of elements belonging to one but not both sets.
1078 /// e.g., `[\pL~~[:ascii:]]`.
1079 SymmetricDifference,
1080}
1081
1082/// A single zero-width assertion.
1083#[derive(Clone, Debug, Eq, PartialEq)]
1084pub struct Assertion {
1085 /// The span of this assertion.
1086 pub span: Span,
1087 /// The assertion kind, e.g., `\b` or `^`.
1088 pub kind: AssertionKind,
1089}
1090
1091/// An assertion kind.
1092#[derive(Clone, Debug, Eq, PartialEq)]
1093pub enum AssertionKind {
1094 /// `^`
1095 StartLine,
1096 /// `$`
1097 EndLine,
1098 /// `\A`
1099 StartText,
1100 /// `\z`
1101 EndText,
1102 /// `\b`
1103 WordBoundary,
1104 /// `\B`
1105 NotWordBoundary,
1106}
1107
1108/// A repetition operation applied to a regular expression.
1109#[derive(Clone, Debug, Eq, PartialEq)]
1110pub struct Repetition {
1111 /// The span of this operation.
1112 pub span: Span,
1113 /// The actual operation.
1114 pub op: RepetitionOp,
1115 /// Whether this operation was applied greedily or not.
1116 pub greedy: bool,
1117 /// The regular expression under repetition.
1118 pub ast: Box<Ast>,
1119}
1120
1121/// The repetition operator itself.
1122#[derive(Clone, Debug, Eq, PartialEq)]
1123pub struct RepetitionOp {
1124 /// The span of this operator. This includes things like `+`, `*?` and
1125 /// `{m,n}`.
1126 pub span: Span,
1127 /// The type of operation.
1128 pub kind: RepetitionKind,
1129}
1130
1131/// The kind of a repetition operator.
1132#[derive(Clone, Debug, Eq, PartialEq)]
1133pub enum RepetitionKind {
1134 /// `?`
1135 ZeroOrOne,
1136 /// `*`
1137 ZeroOrMore,
1138 /// `+`
1139 OneOrMore,
1140 /// `{m,n}`
1141 Range(RepetitionRange),
1142}
1143
1144/// A range repetition operator.
1145#[derive(Clone, Debug, Eq, PartialEq)]
1146pub enum RepetitionRange {
1147 /// `{m}`
1148 Exactly(u32),
1149 /// `{m,}`
1150 AtLeast(u32),
1151 /// `{m,n}`
1152 Bounded(u32, u32),
1153}
1154
1155impl RepetitionRange {
1156 /// Returns true if and only if this repetition range is valid.
1157 ///
1158 /// The only case where a repetition range is invalid is if it is bounded
1159 /// and its start is greater than its end.
1160 pub fn is_valid(&self) -> bool {
1161 match *self {
1162 RepetitionRange::Bounded(s, e) if s > e => false,
1163 _ => true,
1164 }
1165 }
1166}
1167
1168/// A grouped regular expression.
1169///
1170/// This includes both capturing and non-capturing groups. This does **not**
1171/// include flag-only groups like `(?is)`, but does contain any group that
1172/// contains a sub-expression, e.g., `(a)`, `(?P<name>a)`, `(?:a)` and
1173/// `(?is:a)`.
1174#[derive(Clone, Debug, Eq, PartialEq)]
1175pub struct Group {
1176 /// The span of this group.
1177 pub span: Span,
1178 /// The kind of this group.
1179 pub kind: GroupKind,
1180 /// The regular expression in this group.
1181 pub ast: Box<Ast>,
1182}
1183
1184impl Group {
1185 /// If this group is non-capturing, then this returns the (possibly empty)
1186 /// set of flags. Otherwise, `None` is returned.
1187 pub fn flags(&self) -> Option<&Flags> {
1188 match self.kind {
1189 GroupKind::NonCapturing(ref flags) => Some(flags),
1190 _ => None,
1191 }
1192 }
1193
1194 /// Returns true if and only if this group is capturing.
1195 pub fn is_capturing(&self) -> bool {
1196 match self.kind {
1197 GroupKind::CaptureIndex(_) | GroupKind::CaptureName(_) => true,
1198 GroupKind::NonCapturing(_) => false,
1199 }
1200 }
1201
1202 /// Returns the capture index of this group, if this is a capturing group.
1203 ///
1204 /// This returns a capture index precisely when `is_capturing` is `true`.
1205 pub fn capture_index(&self) -> Option<u32> {
1206 match self.kind {
1207 GroupKind::CaptureIndex(i) => Some(i),
1208 GroupKind::CaptureName(ref x) => Some(x.index),
1209 GroupKind::NonCapturing(_) => None,
1210 }
1211 }
1212}
1213
1214/// The kind of a group.
1215#[derive(Clone, Debug, Eq, PartialEq)]
1216pub enum GroupKind {
1217 /// `(a)`
1218 CaptureIndex(u32),
1219 /// `(?P<name>a)`
1220 CaptureName(CaptureName),
1221 /// `(?:a)` and `(?i:a)`
1222 NonCapturing(Flags),
1223}
1224
1225/// A capture name.
1226///
1227/// This corresponds to the name itself between the angle brackets in, e.g.,
1228/// `(?P<foo>expr)`.
1229#[derive(Clone, Debug, Eq, PartialEq)]
1230pub struct CaptureName {
1231 /// The span of this capture name.
1232 pub span: Span,
1233 /// The capture name.
1234 pub name: String,
1235 /// The capture index.
1236 pub index: u32,
1237}
1238
1239/// A group of flags that is not applied to a particular regular expression.
1240#[derive(Clone, Debug, Eq, PartialEq)]
1241pub struct SetFlags {
1242 /// The span of these flags, including the grouping parentheses.
1243 pub span: Span,
1244 /// The actual sequence of flags.
1245 pub flags: Flags,
1246}
1247
1248/// A group of flags.
1249///
1250/// This corresponds only to the sequence of flags themselves, e.g., `is-u`.
1251#[derive(Clone, Debug, Eq, PartialEq)]
1252pub struct Flags {
1253 /// The span of this group of flags.
1254 pub span: Span,
1255 /// A sequence of flag items. Each item is either a flag or a negation
1256 /// operator.
1257 pub items: Vec<FlagsItem>,
1258}
1259
1260impl Flags {
1261 /// Add the given item to this sequence of flags.
1262 ///
1263 /// If the item was added successfully, then `None` is returned. If the
1264 /// given item is a duplicate, then `Some(i)` is returned, where
1265 /// `items[i].kind == item.kind`.
1266 pub fn add_item(&mut self, item: FlagsItem) -> Option<usize> {
1267 for (i, x) in self.items.iter().enumerate() {
1268 if x.kind == item.kind {
1269 return Some(i);
1270 }
1271 }
1272 self.items.push(item);
1273 None
1274 }
1275
1276 /// Returns the state of the given flag in this set.
1277 ///
1278 /// If the given flag is in the set but is negated, then `Some(false)` is
1279 /// returned.
1280 ///
1281 /// If the given flag is in the set and is not negated, then `Some(true)`
1282 /// is returned.
1283 ///
1284 /// Otherwise, `None` is returned.
1285 pub fn flag_state(&self, flag: Flag) -> Option<bool> {
1286 let mut negated = false;
1287 for x in &self.items {
1288 match x.kind {
1289 FlagsItemKind::Negation => {
1290 negated = true;
1291 }
1292 FlagsItemKind::Flag(ref xflag) if xflag == &flag => {
1293 return Some(!negated);
1294 }
1295 _ => {}
1296 }
1297 }
1298 None
1299 }
1300}
1301
1302/// A single item in a group of flags.
1303#[derive(Clone, Debug, Eq, PartialEq)]
1304pub struct FlagsItem {
1305 /// The span of this item.
1306 pub span: Span,
1307 /// The kind of this item.
1308 pub kind: FlagsItemKind,
1309}
1310
1311/// The kind of an item in a group of flags.
1312#[derive(Clone, Debug, Eq, PartialEq)]
1313pub enum FlagsItemKind {
1314 /// A negation operator applied to all subsequent flags in the enclosing
1315 /// group.
1316 Negation,
1317 /// A single flag in a group.
1318 Flag(Flag),
1319}
1320
1321impl FlagsItemKind {
1322 /// Returns true if and only if this item is a negation operator.
1323 pub fn is_negation(&self) -> bool {
1324 match *self {
1325 FlagsItemKind::Negation => true,
1326 _ => false,
1327 }
1328 }
1329}
1330
1331/// A single flag.
1332#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1333pub enum Flag {
1334 /// `i`
1335 CaseInsensitive,
1336 /// `m`
1337 MultiLine,
1338 /// `s`
1339 DotMatchesNewLine,
1340 /// `U`
1341 SwapGreed,
1342 /// `u`
1343 Unicode,
1344 /// `x`
1345 IgnoreWhitespace,
1346}
1347
1348/// A custom `Drop` impl is used for `Ast` such that it uses constant stack
1349/// space but heap space proportional to the depth of the `Ast`.
1350impl Drop for Ast {
1351 fn drop(&mut self) {
1352 use std::mem;
1353
1354 match *self {
1355 Ast::Empty(_)
1356 | Ast::Flags(_)
1357 | Ast::Literal(_)
1358 | Ast::Dot(_)
1359 | Ast::Assertion(_)
1360 // Classes are recursive, so they get their own Drop impl.
1361 | Ast::Class(_) => return,
1362 Ast::Repetition(ref x) if !x.ast.has_subexprs() => return,
1363 Ast::Group(ref x) if !x.ast.has_subexprs() => return,
1364 Ast::Alternation(ref x) if x.asts.is_empty() => return,
1365 Ast::Concat(ref x) if x.asts.is_empty() => return,
1366 _ => {}
1367 }
1368
1369 let empty_span = || Span::splat(Position::new(0, 0, 0));
1370 let empty_ast = || Ast::Empty(empty_span());
1371 let mut stack = vec![mem::replace(self, empty_ast())];
1372 while let Some(mut ast) = stack.pop() {
1373 match ast {
1374 Ast::Empty(_)
1375 | Ast::Flags(_)
1376 | Ast::Literal(_)
1377 | Ast::Dot(_)
1378 | Ast::Assertion(_)
1379 // Classes are recursive, so they get their own Drop impl.
1380 | Ast::Class(_) => {}
1381 Ast::Repetition(ref mut x) => {
1382 stack.push(mem::replace(&mut x.ast, empty_ast()));
1383 }
1384 Ast::Group(ref mut x) => {
1385 stack.push(mem::replace(&mut x.ast, empty_ast()));
1386 }
1387 Ast::Alternation(ref mut x) => {
1388 stack.extend(x.asts.drain(..));
1389 }
1390 Ast::Concat(ref mut x) => {
1391 stack.extend(x.asts.drain(..));
1392 }
1393 }
1394 }
1395 }
1396}
1397
1398/// A custom `Drop` impl is used for `ClassSet` such that it uses constant
1399/// stack space but heap space proportional to the depth of the `ClassSet`.
1400impl Drop for ClassSet {
1401 fn drop(&mut self) {
1402 use std::mem;
1403
1404 match *self {
1405 ClassSet::Item(ref item) => match *item {
1406 ClassSetItem::Empty(_)
1407 | ClassSetItem::Literal(_)
1408 | ClassSetItem::Range(_)
1409 | ClassSetItem::Ascii(_)
1410 | ClassSetItem::Unicode(_)
1411 | ClassSetItem::Perl(_) => return,
1412 ClassSetItem::Bracketed(ref x) => {
1413 if x.kind.is_empty() {
1414 return;
1415 }
1416 }
1417 ClassSetItem::Union(ref x) => {
1418 if x.items.is_empty() {
1419 return;
1420 }
1421 }
1422 },
1423 ClassSet::BinaryOp(ref op) => {
1424 if op.lhs.is_empty() && op.rhs.is_empty() {
1425 return;
1426 }
1427 }
1428 }
1429
1430 let empty_span = || Span::splat(Position::new(0, 0, 0));
1431 let empty_set = || ClassSet::Item(ClassSetItem::Empty(empty_span()));
1432 let mut stack = vec![mem::replace(self, empty_set())];
1433 while let Some(mut set) = stack.pop() {
1434 match set {
1435 ClassSet::Item(ref mut item) => match *item {
1436 ClassSetItem::Empty(_)
1437 | ClassSetItem::Literal(_)
1438 | ClassSetItem::Range(_)
1439 | ClassSetItem::Ascii(_)
1440 | ClassSetItem::Unicode(_)
1441 | ClassSetItem::Perl(_) => {}
1442 ClassSetItem::Bracketed(ref mut x) => {
1443 stack.push(mem::replace(&mut x.kind, empty_set()));
1444 }
1445 ClassSetItem::Union(ref mut x) => {
1446 stack.extend(x.items.drain(..).map(ClassSet::Item));
1447 }
1448 },
1449 ClassSet::BinaryOp(ref mut op) => {
1450 stack.push(mem::replace(&mut op.lhs, empty_set()));
1451 stack.push(mem::replace(&mut op.rhs, empty_set()));
1452 }
1453 }
1454 }
1455 }
1456}
1457
1458#[cfg(test)]
1459mod tests {
1460 use super::*;
1461
1462 // We use a thread with an explicit stack size to test that our destructor
1463 // for Ast can handle arbitrarily sized expressions in constant stack
1464 // space. In case we run on a platform without threads (WASM?), we limit
1465 // this test to Windows/Unix.
1466 #[test]
1467 #[cfg(any(unix, windows))]
1468 fn no_stack_overflow_on_drop() {
1469 use std::thread;
1470
1471 let run = || {
1472 let span = || Span::splat(Position::new(0, 0, 0));
1473 let mut ast = Ast::Empty(span());
1474 for i in 0..200 {
1475 ast = Ast::Group(Group {
1476 span: span(),
1477 kind: GroupKind::CaptureIndex(i),
1478 ast: Box::new(ast),
1479 });
1480 }
1481 assert!(!ast.is_empty());
1482 };
1483
1484 // We run our test on a thread with a small stack size so we can
1485 // force the issue more easily.
1486 thread::Builder::new()
1487 .stack_size(1 << 10)
1488 .spawn(run)
1489 .unwrap()
1490 .join()
1491 .unwrap();
1492 }
1493}