1use std::char;
5use std::cmp;
6use std::error;
7use std::fmt;
8use std::result;
9use std::u8;
10
11use ast::Span;
12use hir::interval::{Interval, IntervalSet, IntervalSetIter};
13use unicode;
14
15pub use hir::visitor::{visit, Visitor};
16pub use unicode::CaseFoldError;
17
18mod interval;
19pub mod literal;
20pub mod print;
21pub mod translate;
22mod visitor;
23
24#[derive(Clone, Debug, Eq, PartialEq)]
26pub struct Error {
27 kind: ErrorKind,
29 pattern: String,
32 span: Span,
34}
35
36impl Error {
37 pub fn kind(&self) -> &ErrorKind {
39 &self.kind
40 }
41
42 pub fn pattern(&self) -> &str {
46 &self.pattern
47 }
48
49 pub fn span(&self) -> &Span {
51 &self.span
52 }
53}
54
55#[derive(Clone, Debug, Eq, PartialEq)]
57pub enum ErrorKind {
58 UnicodeNotAllowed,
61 InvalidUtf8,
64 UnicodePropertyNotFound,
67 UnicodePropertyValueNotFound,
70 UnicodePerlClassNotFound,
74 UnicodeCaseUnavailable,
78 EmptyClassNotAllowed,
84 #[doc(hidden)]
90 __Nonexhaustive,
91}
92
93impl ErrorKind {
94 fn description(&self) -> &str {
95 use self::ErrorKind::*;
96 match *self {
97 UnicodeNotAllowed => "Unicode not allowed here",
98 InvalidUtf8 => "pattern can match invalid UTF-8",
99 UnicodePropertyNotFound => "Unicode property not found",
100 UnicodePropertyValueNotFound => "Unicode property value not found",
101 UnicodePerlClassNotFound => {
102 "Unicode-aware Perl class not found \
103 (make sure the unicode-perl feature is enabled)"
104 }
105 UnicodeCaseUnavailable => {
106 "Unicode-aware case insensitivity matching is not available \
107 (make sure the unicode-case feature is enabled)"
108 }
109 EmptyClassNotAllowed => "empty character classes are not allowed",
110 __Nonexhaustive => unreachable!(),
111 }
112 }
113}
114
115impl error::Error for Error {
116 fn description(&self) -> &str {
117 self.kind.description()
118 }
119}
120
121impl fmt::Display for Error {
122 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
123 ::error::Formatter::from(self).fmt(f)
124 }
125}
126
127impl fmt::Display for ErrorKind {
128 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
129 f.write_str(self.description())
130 }
131}
132
133#[derive(Clone, Debug, Eq, PartialEq)]
169pub struct Hir {
170 kind: HirKind,
172 info: HirInfo,
174}
175
176#[derive(Clone, Debug, Eq, PartialEq)]
178pub enum HirKind {
179 Empty,
182 Literal(Literal),
184 Class(Class),
188 Anchor(Anchor),
190 WordBoundary(WordBoundary),
193 Repetition(Repetition),
195 Group(Group),
197 Concat(Vec<Hir>),
203 Alternation(Vec<Hir>),
209}
210
211impl Hir {
212 pub fn kind(&self) -> &HirKind {
214 &self.kind
215 }
216
217 pub fn into_kind(mut self) -> HirKind {
220 use std::mem;
221 mem::replace(&mut self.kind, HirKind::Empty)
222 }
223
224 pub fn empty() -> Hir {
228 let mut info = HirInfo::new();
229 info.set_always_utf8(true);
230 info.set_all_assertions(true);
231 info.set_anchored_start(false);
232 info.set_anchored_end(false);
233 info.set_line_anchored_start(false);
234 info.set_line_anchored_end(false);
235 info.set_any_anchored_start(false);
236 info.set_any_anchored_end(false);
237 info.set_match_empty(true);
238 info.set_literal(true);
239 info.set_alternation_literal(true);
240 Hir { kind: HirKind::Empty, info: info }
241 }
242
243 pub fn literal(lit: Literal) -> Hir {
249 if let Literal::Byte(b) = lit {
250 assert!(b > 0x7F);
251 }
252
253 let mut info = HirInfo::new();
254 info.set_always_utf8(lit.is_unicode());
255 info.set_all_assertions(false);
256 info.set_anchored_start(false);
257 info.set_anchored_end(false);
258 info.set_line_anchored_start(false);
259 info.set_line_anchored_end(false);
260 info.set_any_anchored_start(false);
261 info.set_any_anchored_end(false);
262 info.set_match_empty(false);
263 info.set_literal(true);
264 info.set_alternation_literal(true);
265 Hir { kind: HirKind::Literal(lit), info: info }
266 }
267
268 pub fn class(class: Class) -> Hir {
270 let mut info = HirInfo::new();
271 info.set_always_utf8(class.is_always_utf8());
272 info.set_all_assertions(false);
273 info.set_anchored_start(false);
274 info.set_anchored_end(false);
275 info.set_line_anchored_start(false);
276 info.set_line_anchored_end(false);
277 info.set_any_anchored_start(false);
278 info.set_any_anchored_end(false);
279 info.set_match_empty(false);
280 info.set_literal(false);
281 info.set_alternation_literal(false);
282 Hir { kind: HirKind::Class(class), info: info }
283 }
284
285 pub fn anchor(anchor: Anchor) -> Hir {
287 let mut info = HirInfo::new();
288 info.set_always_utf8(true);
289 info.set_all_assertions(true);
290 info.set_anchored_start(false);
291 info.set_anchored_end(false);
292 info.set_line_anchored_start(false);
293 info.set_line_anchored_end(false);
294 info.set_any_anchored_start(false);
295 info.set_any_anchored_end(false);
296 info.set_match_empty(true);
297 info.set_literal(false);
298 info.set_alternation_literal(false);
299 if let Anchor::StartText = anchor {
300 info.set_anchored_start(true);
301 info.set_line_anchored_start(true);
302 info.set_any_anchored_start(true);
303 }
304 if let Anchor::EndText = anchor {
305 info.set_anchored_end(true);
306 info.set_line_anchored_end(true);
307 info.set_any_anchored_end(true);
308 }
309 if let Anchor::StartLine = anchor {
310 info.set_line_anchored_start(true);
311 }
312 if let Anchor::EndLine = anchor {
313 info.set_line_anchored_end(true);
314 }
315 Hir { kind: HirKind::Anchor(anchor), info: info }
316 }
317
318 pub fn word_boundary(word_boundary: WordBoundary) -> Hir {
320 let mut info = HirInfo::new();
321 info.set_always_utf8(true);
322 info.set_all_assertions(true);
323 info.set_anchored_start(false);
324 info.set_anchored_end(false);
325 info.set_line_anchored_start(false);
326 info.set_line_anchored_end(false);
327 info.set_any_anchored_start(false);
328 info.set_any_anchored_end(false);
329 info.set_literal(false);
330 info.set_alternation_literal(false);
331 info.set_match_empty(word_boundary.is_negated());
334 if let WordBoundary::AsciiNegate = word_boundary {
336 info.set_always_utf8(false);
337 }
338 Hir { kind: HirKind::WordBoundary(word_boundary), info: info }
339 }
340
341 pub fn repetition(rep: Repetition) -> Hir {
343 let mut info = HirInfo::new();
344 info.set_always_utf8(rep.hir.is_always_utf8());
345 info.set_all_assertions(rep.hir.is_all_assertions());
346 info.set_anchored_start(
349 !rep.is_match_empty() && rep.hir.is_anchored_start(),
350 );
351 info.set_anchored_end(
352 !rep.is_match_empty() && rep.hir.is_anchored_end(),
353 );
354 info.set_line_anchored_start(
355 !rep.is_match_empty() && rep.hir.is_anchored_start(),
356 );
357 info.set_line_anchored_end(
358 !rep.is_match_empty() && rep.hir.is_anchored_end(),
359 );
360 info.set_any_anchored_start(rep.hir.is_any_anchored_start());
361 info.set_any_anchored_end(rep.hir.is_any_anchored_end());
362 info.set_match_empty(rep.is_match_empty() || rep.hir.is_match_empty());
363 info.set_literal(false);
364 info.set_alternation_literal(false);
365 Hir { kind: HirKind::Repetition(rep), info: info }
366 }
367
368 pub fn group(group: Group) -> Hir {
370 let mut info = HirInfo::new();
371 info.set_always_utf8(group.hir.is_always_utf8());
372 info.set_all_assertions(group.hir.is_all_assertions());
373 info.set_anchored_start(group.hir.is_anchored_start());
374 info.set_anchored_end(group.hir.is_anchored_end());
375 info.set_line_anchored_start(group.hir.is_line_anchored_start());
376 info.set_line_anchored_end(group.hir.is_line_anchored_end());
377 info.set_any_anchored_start(group.hir.is_any_anchored_start());
378 info.set_any_anchored_end(group.hir.is_any_anchored_end());
379 info.set_match_empty(group.hir.is_match_empty());
380 info.set_literal(false);
381 info.set_alternation_literal(false);
382 Hir { kind: HirKind::Group(group), info: info }
383 }
384
385 pub fn concat(mut exprs: Vec<Hir>) -> Hir {
389 match exprs.len() {
390 0 => Hir::empty(),
391 1 => exprs.pop().unwrap(),
392 _ => {
393 let mut info = HirInfo::new();
394 info.set_always_utf8(true);
395 info.set_all_assertions(true);
396 info.set_any_anchored_start(false);
397 info.set_any_anchored_end(false);
398 info.set_match_empty(true);
399 info.set_literal(true);
400 info.set_alternation_literal(true);
401
402 for e in &exprs {
404 let x = info.is_always_utf8() && e.is_always_utf8();
405 info.set_always_utf8(x);
406
407 let x = info.is_all_assertions() && e.is_all_assertions();
408 info.set_all_assertions(x);
409
410 let x = info.is_any_anchored_start()
411 || e.is_any_anchored_start();
412 info.set_any_anchored_start(x);
413
414 let x =
415 info.is_any_anchored_end() || e.is_any_anchored_end();
416 info.set_any_anchored_end(x);
417
418 let x = info.is_match_empty() && e.is_match_empty();
419 info.set_match_empty(x);
420
421 let x = info.is_literal() && e.is_literal();
422 info.set_literal(x);
423
424 let x = info.is_alternation_literal()
425 && e.is_alternation_literal();
426 info.set_alternation_literal(x);
427 }
428 info.set_anchored_start(
438 exprs
439 .iter()
440 .take_while(|e| {
441 e.is_anchored_start() || e.is_all_assertions()
442 })
443 .any(|e| e.is_anchored_start()),
444 );
445 info.set_anchored_end(
447 exprs
448 .iter()
449 .rev()
450 .take_while(|e| {
451 e.is_anchored_end() || e.is_all_assertions()
452 })
453 .any(|e| e.is_anchored_end()),
454 );
455 info.set_line_anchored_start(
457 exprs
458 .iter()
459 .take_while(|e| {
460 e.is_line_anchored_start() || e.is_all_assertions()
461 })
462 .any(|e| e.is_line_anchored_start()),
463 );
464 info.set_line_anchored_end(
465 exprs
466 .iter()
467 .rev()
468 .take_while(|e| {
469 e.is_line_anchored_end() || e.is_all_assertions()
470 })
471 .any(|e| e.is_line_anchored_end()),
472 );
473 Hir { kind: HirKind::Concat(exprs), info: info }
474 }
475 }
476 }
477
478 pub fn alternation(mut exprs: Vec<Hir>) -> Hir {
482 match exprs.len() {
483 0 => Hir::empty(),
484 1 => exprs.pop().unwrap(),
485 _ => {
486 let mut info = HirInfo::new();
487 info.set_always_utf8(true);
488 info.set_all_assertions(true);
489 info.set_anchored_start(true);
490 info.set_anchored_end(true);
491 info.set_line_anchored_start(true);
492 info.set_line_anchored_end(true);
493 info.set_any_anchored_start(false);
494 info.set_any_anchored_end(false);
495 info.set_match_empty(false);
496 info.set_literal(false);
497 info.set_alternation_literal(true);
498
499 for e in &exprs {
501 let x = info.is_always_utf8() && e.is_always_utf8();
502 info.set_always_utf8(x);
503
504 let x = info.is_all_assertions() && e.is_all_assertions();
505 info.set_all_assertions(x);
506
507 let x = info.is_anchored_start() && e.is_anchored_start();
508 info.set_anchored_start(x);
509
510 let x = info.is_anchored_end() && e.is_anchored_end();
511 info.set_anchored_end(x);
512
513 let x = info.is_line_anchored_start()
514 && e.is_line_anchored_start();
515 info.set_line_anchored_start(x);
516
517 let x = info.is_line_anchored_end()
518 && e.is_line_anchored_end();
519 info.set_line_anchored_end(x);
520
521 let x = info.is_any_anchored_start()
522 || e.is_any_anchored_start();
523 info.set_any_anchored_start(x);
524
525 let x =
526 info.is_any_anchored_end() || e.is_any_anchored_end();
527 info.set_any_anchored_end(x);
528
529 let x = info.is_match_empty() || e.is_match_empty();
530 info.set_match_empty(x);
531
532 let x = info.is_alternation_literal() && e.is_literal();
533 info.set_alternation_literal(x);
534 }
535 Hir { kind: HirKind::Alternation(exprs), info: info }
536 }
537 }
538 }
539
540 pub fn dot(bytes: bool) -> Hir {
549 if bytes {
550 let mut cls = ClassBytes::empty();
551 cls.push(ClassBytesRange::new(b'\0', b'\x09'));
552 cls.push(ClassBytesRange::new(b'\x0B', b'\xFF'));
553 Hir::class(Class::Bytes(cls))
554 } else {
555 let mut cls = ClassUnicode::empty();
556 cls.push(ClassUnicodeRange::new('\0', '\x09'));
557 cls.push(ClassUnicodeRange::new('\x0B', '\u{10FFFF}'));
558 Hir::class(Class::Unicode(cls))
559 }
560 }
561
562 pub fn any(bytes: bool) -> Hir {
571 if bytes {
572 let mut cls = ClassBytes::empty();
573 cls.push(ClassBytesRange::new(b'\0', b'\xFF'));
574 Hir::class(Class::Bytes(cls))
575 } else {
576 let mut cls = ClassUnicode::empty();
577 cls.push(ClassUnicodeRange::new('\0', '\u{10FFFF}'));
578 Hir::class(Class::Unicode(cls))
579 }
580 }
581
582 pub fn is_always_utf8(&self) -> bool {
587 self.info.is_always_utf8()
588 }
589
590 pub fn is_all_assertions(&self) -> bool {
596 self.info.is_all_assertions()
597 }
598
599 pub fn is_anchored_start(&self) -> bool {
603 self.info.is_anchored_start()
604 }
605
606 pub fn is_anchored_end(&self) -> bool {
610 self.info.is_anchored_end()
611 }
612
613 pub fn is_line_anchored_start(&self) -> bool {
623 self.info.is_line_anchored_start()
624 }
625
626 pub fn is_line_anchored_end(&self) -> bool {
636 self.info.is_line_anchored_end()
637 }
638
639 pub fn is_any_anchored_start(&self) -> bool {
644 self.info.is_any_anchored_start()
645 }
646
647 pub fn is_any_anchored_end(&self) -> bool {
652 self.info.is_any_anchored_end()
653 }
654
655 pub fn is_match_empty(&self) -> bool {
661 self.info.is_match_empty()
662 }
663
664 pub fn is_literal(&self) -> bool {
671 self.info.is_literal()
672 }
673
674 pub fn is_alternation_literal(&self) -> bool {
683 self.info.is_alternation_literal()
684 }
685}
686
687impl HirKind {
688 pub fn is_empty(&self) -> bool {
694 match *self {
695 HirKind::Empty => true,
696 _ => false,
697 }
698 }
699
700 pub fn has_subexprs(&self) -> bool {
703 match *self {
704 HirKind::Empty
705 | HirKind::Literal(_)
706 | HirKind::Class(_)
707 | HirKind::Anchor(_)
708 | HirKind::WordBoundary(_) => false,
709 HirKind::Group(_)
710 | HirKind::Repetition(_)
711 | HirKind::Concat(_)
712 | HirKind::Alternation(_) => true,
713 }
714 }
715}
716
717impl fmt::Display for Hir {
724 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
725 use hir::print::Printer;
726 Printer::new().print(self, f)
727 }
728}
729
730#[derive(Clone, Debug, Eq, PartialEq)]
737pub enum Literal {
738 Unicode(char),
740 Byte(u8),
742}
743
744impl Literal {
745 pub fn is_unicode(&self) -> bool {
748 match *self {
749 Literal::Unicode(_) => true,
750 Literal::Byte(b) if b <= 0x7F => true,
751 Literal::Byte(_) => false,
752 }
753 }
754}
755
756#[derive(Clone, Debug, Eq, PartialEq)]
773pub enum Class {
774 Unicode(ClassUnicode),
776 Bytes(ClassBytes),
779}
780
781impl Class {
782 pub fn case_fold_simple(&mut self) {
789 match *self {
790 Class::Unicode(ref mut x) => x.case_fold_simple(),
791 Class::Bytes(ref mut x) => x.case_fold_simple(),
792 }
793 }
794
795 pub fn negate(&mut self) {
800 match *self {
801 Class::Unicode(ref mut x) => x.negate(),
802 Class::Bytes(ref mut x) => x.negate(),
803 }
804 }
805
806 pub fn is_always_utf8(&self) -> bool {
818 match *self {
819 Class::Unicode(_) => true,
820 Class::Bytes(ref x) => x.is_all_ascii(),
821 }
822 }
823}
824
825#[derive(Clone, Debug, Eq, PartialEq)]
827pub struct ClassUnicode {
828 set: IntervalSet<ClassUnicodeRange>,
829}
830
831impl ClassUnicode {
832 pub fn new<I>(ranges: I) -> ClassUnicode
837 where
838 I: IntoIterator<Item = ClassUnicodeRange>,
839 {
840 ClassUnicode { set: IntervalSet::new(ranges) }
841 }
842
843 pub fn empty() -> ClassUnicode {
845 ClassUnicode::new(vec![])
846 }
847
848 pub fn push(&mut self, range: ClassUnicodeRange) {
850 self.set.push(range);
851 }
852
853 pub fn iter(&self) -> ClassUnicodeIter {
857 ClassUnicodeIter(self.set.iter())
858 }
859
860 pub fn ranges(&self) -> &[ClassUnicodeRange] {
862 self.set.intervals()
863 }
864
865 pub fn case_fold_simple(&mut self) {
879 self.set
880 .case_fold_simple()
881 .expect("unicode-case feature must be enabled");
882 }
883
884 pub fn try_case_fold_simple(
898 &mut self,
899 ) -> result::Result<(), CaseFoldError> {
900 self.set.case_fold_simple()
901 }
902
903 pub fn negate(&mut self) {
908 self.set.negate();
909 }
910
911 pub fn union(&mut self, other: &ClassUnicode) {
913 self.set.union(&other.set);
914 }
915
916 pub fn intersect(&mut self, other: &ClassUnicode) {
919 self.set.intersect(&other.set);
920 }
921
922 pub fn difference(&mut self, other: &ClassUnicode) {
924 self.set.difference(&other.set);
925 }
926
927 pub fn symmetric_difference(&mut self, other: &ClassUnicode) {
936 self.set.symmetric_difference(&other.set);
937 }
938}
939
940#[derive(Debug)]
944pub struct ClassUnicodeIter<'a>(IntervalSetIter<'a, ClassUnicodeRange>);
945
946impl<'a> Iterator for ClassUnicodeIter<'a> {
947 type Item = &'a ClassUnicodeRange;
948
949 fn next(&mut self) -> Option<&'a ClassUnicodeRange> {
950 self.0.next()
951 }
952}
953
954#[derive(Clone, Copy, Default, Eq, PartialEq, PartialOrd, Ord)]
959pub struct ClassUnicodeRange {
960 start: char,
961 end: char,
962}
963
964impl fmt::Debug for ClassUnicodeRange {
965 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
966 let start = if !self.start.is_whitespace() && !self.start.is_control()
967 {
968 self.start.to_string()
969 } else {
970 format!("0x{:X}", self.start as u32)
971 };
972 let end = if !self.end.is_whitespace() && !self.end.is_control() {
973 self.end.to_string()
974 } else {
975 format!("0x{:X}", self.end as u32)
976 };
977 f.debug_struct("ClassUnicodeRange")
978 .field("start", &start)
979 .field("end", &end)
980 .finish()
981 }
982}
983
984impl Interval for ClassUnicodeRange {
985 type Bound = char;
986
987 #[inline]
988 fn lower(&self) -> char {
989 self.start
990 }
991 #[inline]
992 fn upper(&self) -> char {
993 self.end
994 }
995 #[inline]
996 fn set_lower(&mut self, bound: char) {
997 self.start = bound;
998 }
999 #[inline]
1000 fn set_upper(&mut self, bound: char) {
1001 self.end = bound;
1002 }
1003
1004 fn case_fold_simple(
1009 &self,
1010 ranges: &mut Vec<ClassUnicodeRange>,
1011 ) -> Result<(), unicode::CaseFoldError> {
1012 if !unicode::contains_simple_case_mapping(self.start, self.end)? {
1013 return Ok(());
1014 }
1015 let start = self.start as u32;
1016 let end = (self.end as u32).saturating_add(1);
1017 let mut next_simple_cp = None;
1018 for cp in (start..end).filter_map(char::from_u32) {
1019 if next_simple_cp.map_or(false, |next| cp < next) {
1020 continue;
1021 }
1022 let it = match unicode::simple_fold(cp)? {
1023 Ok(it) => it,
1024 Err(next) => {
1025 next_simple_cp = next;
1026 continue;
1027 }
1028 };
1029 for cp_folded in it {
1030 ranges.push(ClassUnicodeRange::new(cp_folded, cp_folded));
1031 }
1032 }
1033 Ok(())
1034 }
1035}
1036
1037impl ClassUnicodeRange {
1038 pub fn new(start: char, end: char) -> ClassUnicodeRange {
1043 ClassUnicodeRange::create(start, end)
1044 }
1045
1046 pub fn start(&self) -> char {
1051 self.start
1052 }
1053
1054 pub fn end(&self) -> char {
1059 self.end
1060 }
1061}
1062
1063#[derive(Clone, Debug, Eq, PartialEq)]
1066pub struct ClassBytes {
1067 set: IntervalSet<ClassBytesRange>,
1068}
1069
1070impl ClassBytes {
1071 pub fn new<I>(ranges: I) -> ClassBytes
1076 where
1077 I: IntoIterator<Item = ClassBytesRange>,
1078 {
1079 ClassBytes { set: IntervalSet::new(ranges) }
1080 }
1081
1082 pub fn empty() -> ClassBytes {
1084 ClassBytes::new(vec![])
1085 }
1086
1087 pub fn push(&mut self, range: ClassBytesRange) {
1089 self.set.push(range);
1090 }
1091
1092 pub fn iter(&self) -> ClassBytesIter {
1096 ClassBytesIter(self.set.iter())
1097 }
1098
1099 pub fn ranges(&self) -> &[ClassBytesRange] {
1101 self.set.intervals()
1102 }
1103
1104 pub fn case_fold_simple(&mut self) {
1112 self.set.case_fold_simple().expect("ASCII case folding never fails");
1113 }
1114
1115 pub fn negate(&mut self) {
1120 self.set.negate();
1121 }
1122
1123 pub fn union(&mut self, other: &ClassBytes) {
1125 self.set.union(&other.set);
1126 }
1127
1128 pub fn intersect(&mut self, other: &ClassBytes) {
1130 self.set.intersect(&other.set);
1131 }
1132
1133 pub fn difference(&mut self, other: &ClassBytes) {
1135 self.set.difference(&other.set);
1136 }
1137
1138 pub fn symmetric_difference(&mut self, other: &ClassBytes) {
1146 self.set.symmetric_difference(&other.set);
1147 }
1148
1149 pub fn is_all_ascii(&self) -> bool {
1153 self.set.intervals().last().map_or(true, |r| r.end <= 0x7F)
1154 }
1155}
1156
1157#[derive(Debug)]
1161pub struct ClassBytesIter<'a>(IntervalSetIter<'a, ClassBytesRange>);
1162
1163impl<'a> Iterator for ClassBytesIter<'a> {
1164 type Item = &'a ClassBytesRange;
1165
1166 fn next(&mut self) -> Option<&'a ClassBytesRange> {
1167 self.0.next()
1168 }
1169}
1170
1171#[derive(Clone, Copy, Default, Eq, PartialEq, PartialOrd, Ord)]
1176pub struct ClassBytesRange {
1177 start: u8,
1178 end: u8,
1179}
1180
1181impl Interval for ClassBytesRange {
1182 type Bound = u8;
1183
1184 #[inline]
1185 fn lower(&self) -> u8 {
1186 self.start
1187 }
1188 #[inline]
1189 fn upper(&self) -> u8 {
1190 self.end
1191 }
1192 #[inline]
1193 fn set_lower(&mut self, bound: u8) {
1194 self.start = bound;
1195 }
1196 #[inline]
1197 fn set_upper(&mut self, bound: u8) {
1198 self.end = bound;
1199 }
1200
1201 fn case_fold_simple(
1207 &self,
1208 ranges: &mut Vec<ClassBytesRange>,
1209 ) -> Result<(), unicode::CaseFoldError> {
1210 if !ClassBytesRange::new(b'a', b'z').is_intersection_empty(self) {
1211 let lower = cmp::max(self.start, b'a');
1212 let upper = cmp::min(self.end, b'z');
1213 ranges.push(ClassBytesRange::new(lower - 32, upper - 32));
1214 }
1215 if !ClassBytesRange::new(b'A', b'Z').is_intersection_empty(self) {
1216 let lower = cmp::max(self.start, b'A');
1217 let upper = cmp::min(self.end, b'Z');
1218 ranges.push(ClassBytesRange::new(lower + 32, upper + 32));
1219 }
1220 Ok(())
1221 }
1222}
1223
1224impl ClassBytesRange {
1225 pub fn new(start: u8, end: u8) -> ClassBytesRange {
1230 ClassBytesRange::create(start, end)
1231 }
1232
1233 pub fn start(&self) -> u8 {
1238 self.start
1239 }
1240
1241 pub fn end(&self) -> u8 {
1246 self.end
1247 }
1248}
1249
1250impl fmt::Debug for ClassBytesRange {
1251 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1252 let mut debug = f.debug_struct("ClassBytesRange");
1253 if self.start <= 0x7F {
1254 debug.field("start", &(self.start as char));
1255 } else {
1256 debug.field("start", &self.start);
1257 }
1258 if self.end <= 0x7F {
1259 debug.field("end", &(self.end as char));
1260 } else {
1261 debug.field("end", &self.end);
1262 }
1263 debug.finish()
1264 }
1265}
1266
1267#[derive(Clone, Debug, Eq, PartialEq)]
1271pub enum Anchor {
1272 StartLine,
1276 EndLine,
1280 StartText,
1283 EndText,
1286}
1287
1288#[derive(Clone, Debug, Eq, PartialEq)]
1292pub enum WordBoundary {
1293 Unicode,
1297 UnicodeNegate,
1299 Ascii,
1303 AsciiNegate,
1305}
1306
1307impl WordBoundary {
1308 pub fn is_negated(&self) -> bool {
1310 match *self {
1311 WordBoundary::Unicode | WordBoundary::Ascii => false,
1312 WordBoundary::UnicodeNegate | WordBoundary::AsciiNegate => true,
1313 }
1314 }
1315}
1316
1317#[derive(Clone, Debug, Eq, PartialEq)]
1325pub struct Group {
1326 pub kind: GroupKind,
1330 pub hir: Box<Hir>,
1332}
1333
1334#[derive(Clone, Debug, Eq, PartialEq)]
1336pub enum GroupKind {
1337 CaptureIndex(u32),
1341 CaptureName {
1343 name: String,
1345 index: u32,
1347 },
1348 NonCapturing,
1350}
1351
1352#[derive(Clone, Debug, Eq, PartialEq)]
1357pub struct Repetition {
1358 pub kind: RepetitionKind,
1360 pub greedy: bool,
1368 pub hir: Box<Hir>,
1370}
1371
1372impl Repetition {
1373 pub fn is_match_empty(&self) -> bool {
1383 match self.kind {
1384 RepetitionKind::ZeroOrOne => true,
1385 RepetitionKind::ZeroOrMore => true,
1386 RepetitionKind::OneOrMore => false,
1387 RepetitionKind::Range(RepetitionRange::Exactly(m)) => m == 0,
1388 RepetitionKind::Range(RepetitionRange::AtLeast(m)) => m == 0,
1389 RepetitionKind::Range(RepetitionRange::Bounded(m, _)) => m == 0,
1390 }
1391 }
1392}
1393
1394#[derive(Clone, Debug, Eq, PartialEq)]
1396pub enum RepetitionKind {
1397 ZeroOrOne,
1399 ZeroOrMore,
1401 OneOrMore,
1403 Range(RepetitionRange),
1405}
1406
1407#[derive(Clone, Debug, Eq, PartialEq)]
1409pub enum RepetitionRange {
1410 Exactly(u32),
1412 AtLeast(u32),
1414 Bounded(u32, u32),
1416}
1417
1418impl Drop for Hir {
1421 fn drop(&mut self) {
1422 use std::mem;
1423
1424 match *self.kind() {
1425 HirKind::Empty
1426 | HirKind::Literal(_)
1427 | HirKind::Class(_)
1428 | HirKind::Anchor(_)
1429 | HirKind::WordBoundary(_) => return,
1430 HirKind::Group(ref x) if !x.hir.kind.has_subexprs() => return,
1431 HirKind::Repetition(ref x) if !x.hir.kind.has_subexprs() => return,
1432 HirKind::Concat(ref x) if x.is_empty() => return,
1433 HirKind::Alternation(ref x) if x.is_empty() => return,
1434 _ => {}
1435 }
1436
1437 let mut stack = vec![mem::replace(self, Hir::empty())];
1438 while let Some(mut expr) = stack.pop() {
1439 match expr.kind {
1440 HirKind::Empty
1441 | HirKind::Literal(_)
1442 | HirKind::Class(_)
1443 | HirKind::Anchor(_)
1444 | HirKind::WordBoundary(_) => {}
1445 HirKind::Group(ref mut x) => {
1446 stack.push(mem::replace(&mut x.hir, Hir::empty()));
1447 }
1448 HirKind::Repetition(ref mut x) => {
1449 stack.push(mem::replace(&mut x.hir, Hir::empty()));
1450 }
1451 HirKind::Concat(ref mut x) => {
1452 stack.extend(x.drain(..));
1453 }
1454 HirKind::Alternation(ref mut x) => {
1455 stack.extend(x.drain(..));
1456 }
1457 }
1458 }
1459 }
1460}
1461
1462#[derive(Clone, Debug, Eq, PartialEq)]
1466struct HirInfo {
1467 bools: u16,
1473}
1474
1475macro_rules! define_bool {
1477 ($bit:expr, $is_fn_name:ident, $set_fn_name:ident) => {
1478 fn $is_fn_name(&self) -> bool {
1479 self.bools & (0b1 << $bit) > 0
1480 }
1481
1482 fn $set_fn_name(&mut self, yes: bool) {
1483 if yes {
1484 self.bools |= 1 << $bit;
1485 } else {
1486 self.bools &= !(1 << $bit);
1487 }
1488 }
1489 }
1490}
1491
1492impl HirInfo {
1493 fn new() -> HirInfo {
1494 HirInfo { bools: 0 }
1495 }
1496
1497 define_bool!(0, is_always_utf8, set_always_utf8);
1498 define_bool!(1, is_all_assertions, set_all_assertions);
1499 define_bool!(2, is_anchored_start, set_anchored_start);
1500 define_bool!(3, is_anchored_end, set_anchored_end);
1501 define_bool!(4, is_line_anchored_start, set_line_anchored_start);
1502 define_bool!(5, is_line_anchored_end, set_line_anchored_end);
1503 define_bool!(6, is_any_anchored_start, set_any_anchored_start);
1504 define_bool!(7, is_any_anchored_end, set_any_anchored_end);
1505 define_bool!(8, is_match_empty, set_match_empty);
1506 define_bool!(9, is_literal, set_literal);
1507 define_bool!(10, is_alternation_literal, set_alternation_literal);
1508}
1509
1510#[cfg(test)]
1511mod tests {
1512 use super::*;
1513
1514 fn uclass(ranges: &[(char, char)]) -> ClassUnicode {
1515 let ranges: Vec<ClassUnicodeRange> = ranges
1516 .iter()
1517 .map(|&(s, e)| ClassUnicodeRange::new(s, e))
1518 .collect();
1519 ClassUnicode::new(ranges)
1520 }
1521
1522 fn bclass(ranges: &[(u8, u8)]) -> ClassBytes {
1523 let ranges: Vec<ClassBytesRange> =
1524 ranges.iter().map(|&(s, e)| ClassBytesRange::new(s, e)).collect();
1525 ClassBytes::new(ranges)
1526 }
1527
1528 fn uranges(cls: &ClassUnicode) -> Vec<(char, char)> {
1529 cls.iter().map(|x| (x.start(), x.end())).collect()
1530 }
1531
1532 #[cfg(feature = "unicode-case")]
1533 fn ucasefold(cls: &ClassUnicode) -> ClassUnicode {
1534 let mut cls_ = cls.clone();
1535 cls_.case_fold_simple();
1536 cls_
1537 }
1538
1539 fn uunion(cls1: &ClassUnicode, cls2: &ClassUnicode) -> ClassUnicode {
1540 let mut cls_ = cls1.clone();
1541 cls_.union(cls2);
1542 cls_
1543 }
1544
1545 fn uintersect(cls1: &ClassUnicode, cls2: &ClassUnicode) -> ClassUnicode {
1546 let mut cls_ = cls1.clone();
1547 cls_.intersect(cls2);
1548 cls_
1549 }
1550
1551 fn udifference(cls1: &ClassUnicode, cls2: &ClassUnicode) -> ClassUnicode {
1552 let mut cls_ = cls1.clone();
1553 cls_.difference(cls2);
1554 cls_
1555 }
1556
1557 fn usymdifference(
1558 cls1: &ClassUnicode,
1559 cls2: &ClassUnicode,
1560 ) -> ClassUnicode {
1561 let mut cls_ = cls1.clone();
1562 cls_.symmetric_difference(cls2);
1563 cls_
1564 }
1565
1566 fn unegate(cls: &ClassUnicode) -> ClassUnicode {
1567 let mut cls_ = cls.clone();
1568 cls_.negate();
1569 cls_
1570 }
1571
1572 fn branges(cls: &ClassBytes) -> Vec<(u8, u8)> {
1573 cls.iter().map(|x| (x.start(), x.end())).collect()
1574 }
1575
1576 fn bcasefold(cls: &ClassBytes) -> ClassBytes {
1577 let mut cls_ = cls.clone();
1578 cls_.case_fold_simple();
1579 cls_
1580 }
1581
1582 fn bunion(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes {
1583 let mut cls_ = cls1.clone();
1584 cls_.union(cls2);
1585 cls_
1586 }
1587
1588 fn bintersect(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes {
1589 let mut cls_ = cls1.clone();
1590 cls_.intersect(cls2);
1591 cls_
1592 }
1593
1594 fn bdifference(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes {
1595 let mut cls_ = cls1.clone();
1596 cls_.difference(cls2);
1597 cls_
1598 }
1599
1600 fn bsymdifference(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes {
1601 let mut cls_ = cls1.clone();
1602 cls_.symmetric_difference(cls2);
1603 cls_
1604 }
1605
1606 fn bnegate(cls: &ClassBytes) -> ClassBytes {
1607 let mut cls_ = cls.clone();
1608 cls_.negate();
1609 cls_
1610 }
1611
1612 #[test]
1613 fn class_range_canonical_unicode() {
1614 let range = ClassUnicodeRange::new('\u{00FF}', '\0');
1615 assert_eq!('\0', range.start());
1616 assert_eq!('\u{00FF}', range.end());
1617 }
1618
1619 #[test]
1620 fn class_range_canonical_bytes() {
1621 let range = ClassBytesRange::new(b'\xFF', b'\0');
1622 assert_eq!(b'\0', range.start());
1623 assert_eq!(b'\xFF', range.end());
1624 }
1625
1626 #[test]
1627 fn class_canonicalize_unicode() {
1628 let cls = uclass(&[('a', 'c'), ('x', 'z')]);
1629 let expected = vec![('a', 'c'), ('x', 'z')];
1630 assert_eq!(expected, uranges(&cls));
1631
1632 let cls = uclass(&[('x', 'z'), ('a', 'c')]);
1633 let expected = vec![('a', 'c'), ('x', 'z')];
1634 assert_eq!(expected, uranges(&cls));
1635
1636 let cls = uclass(&[('x', 'z'), ('w', 'y')]);
1637 let expected = vec![('w', 'z')];
1638 assert_eq!(expected, uranges(&cls));
1639
1640 let cls = uclass(&[
1641 ('c', 'f'),
1642 ('a', 'g'),
1643 ('d', 'j'),
1644 ('a', 'c'),
1645 ('m', 'p'),
1646 ('l', 's'),
1647 ]);
1648 let expected = vec![('a', 'j'), ('l', 's')];
1649 assert_eq!(expected, uranges(&cls));
1650
1651 let cls = uclass(&[('x', 'z'), ('u', 'w')]);
1652 let expected = vec![('u', 'z')];
1653 assert_eq!(expected, uranges(&cls));
1654
1655 let cls = uclass(&[('\x00', '\u{10FFFF}'), ('\x00', '\u{10FFFF}')]);
1656 let expected = vec![('\x00', '\u{10FFFF}')];
1657 assert_eq!(expected, uranges(&cls));
1658
1659 let cls = uclass(&[('a', 'a'), ('b', 'b')]);
1660 let expected = vec![('a', 'b')];
1661 assert_eq!(expected, uranges(&cls));
1662 }
1663
1664 #[test]
1665 fn class_canonicalize_bytes() {
1666 let cls = bclass(&[(b'a', b'c'), (b'x', b'z')]);
1667 let expected = vec![(b'a', b'c'), (b'x', b'z')];
1668 assert_eq!(expected, branges(&cls));
1669
1670 let cls = bclass(&[(b'x', b'z'), (b'a', b'c')]);
1671 let expected = vec![(b'a', b'c'), (b'x', b'z')];
1672 assert_eq!(expected, branges(&cls));
1673
1674 let cls = bclass(&[(b'x', b'z'), (b'w', b'y')]);
1675 let expected = vec![(b'w', b'z')];
1676 assert_eq!(expected, branges(&cls));
1677
1678 let cls = bclass(&[
1679 (b'c', b'f'),
1680 (b'a', b'g'),
1681 (b'd', b'j'),
1682 (b'a', b'c'),
1683 (b'm', b'p'),
1684 (b'l', b's'),
1685 ]);
1686 let expected = vec![(b'a', b'j'), (b'l', b's')];
1687 assert_eq!(expected, branges(&cls));
1688
1689 let cls = bclass(&[(b'x', b'z'), (b'u', b'w')]);
1690 let expected = vec![(b'u', b'z')];
1691 assert_eq!(expected, branges(&cls));
1692
1693 let cls = bclass(&[(b'\x00', b'\xFF'), (b'\x00', b'\xFF')]);
1694 let expected = vec![(b'\x00', b'\xFF')];
1695 assert_eq!(expected, branges(&cls));
1696
1697 let cls = bclass(&[(b'a', b'a'), (b'b', b'b')]);
1698 let expected = vec![(b'a', b'b')];
1699 assert_eq!(expected, branges(&cls));
1700 }
1701
1702 #[test]
1703 #[cfg(feature = "unicode-case")]
1704 fn class_case_fold_unicode() {
1705 let cls = uclass(&[
1706 ('C', 'F'),
1707 ('A', 'G'),
1708 ('D', 'J'),
1709 ('A', 'C'),
1710 ('M', 'P'),
1711 ('L', 'S'),
1712 ('c', 'f'),
1713 ]);
1714 let expected = uclass(&[
1715 ('A', 'J'),
1716 ('L', 'S'),
1717 ('a', 'j'),
1718 ('l', 's'),
1719 ('\u{17F}', '\u{17F}'),
1720 ]);
1721 assert_eq!(expected, ucasefold(&cls));
1722
1723 let cls = uclass(&[('A', 'Z')]);
1724 let expected = uclass(&[
1725 ('A', 'Z'),
1726 ('a', 'z'),
1727 ('\u{17F}', '\u{17F}'),
1728 ('\u{212A}', '\u{212A}'),
1729 ]);
1730 assert_eq!(expected, ucasefold(&cls));
1731
1732 let cls = uclass(&[('a', 'z')]);
1733 let expected = uclass(&[
1734 ('A', 'Z'),
1735 ('a', 'z'),
1736 ('\u{17F}', '\u{17F}'),
1737 ('\u{212A}', '\u{212A}'),
1738 ]);
1739 assert_eq!(expected, ucasefold(&cls));
1740
1741 let cls = uclass(&[('A', 'A'), ('_', '_')]);
1742 let expected = uclass(&[('A', 'A'), ('_', '_'), ('a', 'a')]);
1743 assert_eq!(expected, ucasefold(&cls));
1744
1745 let cls = uclass(&[('A', 'A'), ('=', '=')]);
1746 let expected = uclass(&[('=', '='), ('A', 'A'), ('a', 'a')]);
1747 assert_eq!(expected, ucasefold(&cls));
1748
1749 let cls = uclass(&[('\x00', '\x10')]);
1750 assert_eq!(cls, ucasefold(&cls));
1751
1752 let cls = uclass(&[('k', 'k')]);
1753 let expected =
1754 uclass(&[('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}')]);
1755 assert_eq!(expected, ucasefold(&cls));
1756
1757 let cls = uclass(&[('@', '@')]);
1758 assert_eq!(cls, ucasefold(&cls));
1759 }
1760
1761 #[test]
1762 #[cfg(not(feature = "unicode-case"))]
1763 fn class_case_fold_unicode_disabled() {
1764 let mut cls = uclass(&[
1765 ('C', 'F'),
1766 ('A', 'G'),
1767 ('D', 'J'),
1768 ('A', 'C'),
1769 ('M', 'P'),
1770 ('L', 'S'),
1771 ('c', 'f'),
1772 ]);
1773 assert!(cls.try_case_fold_simple().is_err());
1774 }
1775
1776 #[test]
1777 #[should_panic]
1778 #[cfg(not(feature = "unicode-case"))]
1779 fn class_case_fold_unicode_disabled_panics() {
1780 let mut cls = uclass(&[
1781 ('C', 'F'),
1782 ('A', 'G'),
1783 ('D', 'J'),
1784 ('A', 'C'),
1785 ('M', 'P'),
1786 ('L', 'S'),
1787 ('c', 'f'),
1788 ]);
1789 cls.case_fold_simple();
1790 }
1791
1792 #[test]
1793 fn class_case_fold_bytes() {
1794 let cls = bclass(&[
1795 (b'C', b'F'),
1796 (b'A', b'G'),
1797 (b'D', b'J'),
1798 (b'A', b'C'),
1799 (b'M', b'P'),
1800 (b'L', b'S'),
1801 (b'c', b'f'),
1802 ]);
1803 let expected =
1804 bclass(&[(b'A', b'J'), (b'L', b'S'), (b'a', b'j'), (b'l', b's')]);
1805 assert_eq!(expected, bcasefold(&cls));
1806
1807 let cls = bclass(&[(b'A', b'Z')]);
1808 let expected = bclass(&[(b'A', b'Z'), (b'a', b'z')]);
1809 assert_eq!(expected, bcasefold(&cls));
1810
1811 let cls = bclass(&[(b'a', b'z')]);
1812 let expected = bclass(&[(b'A', b'Z'), (b'a', b'z')]);
1813 assert_eq!(expected, bcasefold(&cls));
1814
1815 let cls = bclass(&[(b'A', b'A'), (b'_', b'_')]);
1816 let expected = bclass(&[(b'A', b'A'), (b'_', b'_'), (b'a', b'a')]);
1817 assert_eq!(expected, bcasefold(&cls));
1818
1819 let cls = bclass(&[(b'A', b'A'), (b'=', b'=')]);
1820 let expected = bclass(&[(b'=', b'='), (b'A', b'A'), (b'a', b'a')]);
1821 assert_eq!(expected, bcasefold(&cls));
1822
1823 let cls = bclass(&[(b'\x00', b'\x10')]);
1824 assert_eq!(cls, bcasefold(&cls));
1825
1826 let cls = bclass(&[(b'k', b'k')]);
1827 let expected = bclass(&[(b'K', b'K'), (b'k', b'k')]);
1828 assert_eq!(expected, bcasefold(&cls));
1829
1830 let cls = bclass(&[(b'@', b'@')]);
1831 assert_eq!(cls, bcasefold(&cls));
1832 }
1833
1834 #[test]
1835 fn class_negate_unicode() {
1836 let cls = uclass(&[('a', 'a')]);
1837 let expected = uclass(&[('\x00', '\x60'), ('\x62', '\u{10FFFF}')]);
1838 assert_eq!(expected, unegate(&cls));
1839
1840 let cls = uclass(&[('a', 'a'), ('b', 'b')]);
1841 let expected = uclass(&[('\x00', '\x60'), ('\x63', '\u{10FFFF}')]);
1842 assert_eq!(expected, unegate(&cls));
1843
1844 let cls = uclass(&[('a', 'c'), ('x', 'z')]);
1845 let expected = uclass(&[
1846 ('\x00', '\x60'),
1847 ('\x64', '\x77'),
1848 ('\x7B', '\u{10FFFF}'),
1849 ]);
1850 assert_eq!(expected, unegate(&cls));
1851
1852 let cls = uclass(&[('\x00', 'a')]);
1853 let expected = uclass(&[('\x62', '\u{10FFFF}')]);
1854 assert_eq!(expected, unegate(&cls));
1855
1856 let cls = uclass(&[('a', '\u{10FFFF}')]);
1857 let expected = uclass(&[('\x00', '\x60')]);
1858 assert_eq!(expected, unegate(&cls));
1859
1860 let cls = uclass(&[('\x00', '\u{10FFFF}')]);
1861 let expected = uclass(&[]);
1862 assert_eq!(expected, unegate(&cls));
1863
1864 let cls = uclass(&[]);
1865 let expected = uclass(&[('\x00', '\u{10FFFF}')]);
1866 assert_eq!(expected, unegate(&cls));
1867
1868 let cls =
1869 uclass(&[('\x00', '\u{10FFFD}'), ('\u{10FFFF}', '\u{10FFFF}')]);
1870 let expected = uclass(&[('\u{10FFFE}', '\u{10FFFE}')]);
1871 assert_eq!(expected, unegate(&cls));
1872
1873 let cls = uclass(&[('\x00', '\u{D7FF}')]);
1874 let expected = uclass(&[('\u{E000}', '\u{10FFFF}')]);
1875 assert_eq!(expected, unegate(&cls));
1876
1877 let cls = uclass(&[('\x00', '\u{D7FE}')]);
1878 let expected = uclass(&[('\u{D7FF}', '\u{10FFFF}')]);
1879 assert_eq!(expected, unegate(&cls));
1880
1881 let cls = uclass(&[('\u{E000}', '\u{10FFFF}')]);
1882 let expected = uclass(&[('\x00', '\u{D7FF}')]);
1883 assert_eq!(expected, unegate(&cls));
1884
1885 let cls = uclass(&[('\u{E001}', '\u{10FFFF}')]);
1886 let expected = uclass(&[('\x00', '\u{E000}')]);
1887 assert_eq!(expected, unegate(&cls));
1888 }
1889
1890 #[test]
1891 fn class_negate_bytes() {
1892 let cls = bclass(&[(b'a', b'a')]);
1893 let expected = bclass(&[(b'\x00', b'\x60'), (b'\x62', b'\xFF')]);
1894 assert_eq!(expected, bnegate(&cls));
1895
1896 let cls = bclass(&[(b'a', b'a'), (b'b', b'b')]);
1897 let expected = bclass(&[(b'\x00', b'\x60'), (b'\x63', b'\xFF')]);
1898 assert_eq!(expected, bnegate(&cls));
1899
1900 let cls = bclass(&[(b'a', b'c'), (b'x', b'z')]);
1901 let expected = bclass(&[
1902 (b'\x00', b'\x60'),
1903 (b'\x64', b'\x77'),
1904 (b'\x7B', b'\xFF'),
1905 ]);
1906 assert_eq!(expected, bnegate(&cls));
1907
1908 let cls = bclass(&[(b'\x00', b'a')]);
1909 let expected = bclass(&[(b'\x62', b'\xFF')]);
1910 assert_eq!(expected, bnegate(&cls));
1911
1912 let cls = bclass(&[(b'a', b'\xFF')]);
1913 let expected = bclass(&[(b'\x00', b'\x60')]);
1914 assert_eq!(expected, bnegate(&cls));
1915
1916 let cls = bclass(&[(b'\x00', b'\xFF')]);
1917 let expected = bclass(&[]);
1918 assert_eq!(expected, bnegate(&cls));
1919
1920 let cls = bclass(&[]);
1921 let expected = bclass(&[(b'\x00', b'\xFF')]);
1922 assert_eq!(expected, bnegate(&cls));
1923
1924 let cls = bclass(&[(b'\x00', b'\xFD'), (b'\xFF', b'\xFF')]);
1925 let expected = bclass(&[(b'\xFE', b'\xFE')]);
1926 assert_eq!(expected, bnegate(&cls));
1927 }
1928
1929 #[test]
1930 fn class_union_unicode() {
1931 let cls1 = uclass(&[('a', 'g'), ('m', 't'), ('A', 'C')]);
1932 let cls2 = uclass(&[('a', 'z')]);
1933 let expected = uclass(&[('a', 'z'), ('A', 'C')]);
1934 assert_eq!(expected, uunion(&cls1, &cls2));
1935 }
1936
1937 #[test]
1938 fn class_union_bytes() {
1939 let cls1 = bclass(&[(b'a', b'g'), (b'm', b't'), (b'A', b'C')]);
1940 let cls2 = bclass(&[(b'a', b'z')]);
1941 let expected = bclass(&[(b'a', b'z'), (b'A', b'C')]);
1942 assert_eq!(expected, bunion(&cls1, &cls2));
1943 }
1944
1945 #[test]
1946 fn class_intersect_unicode() {
1947 let cls1 = uclass(&[]);
1948 let cls2 = uclass(&[('a', 'a')]);
1949 let expected = uclass(&[]);
1950 assert_eq!(expected, uintersect(&cls1, &cls2));
1951
1952 let cls1 = uclass(&[('a', 'a')]);
1953 let cls2 = uclass(&[('a', 'a')]);
1954 let expected = uclass(&[('a', 'a')]);
1955 assert_eq!(expected, uintersect(&cls1, &cls2));
1956
1957 let cls1 = uclass(&[('a', 'a')]);
1958 let cls2 = uclass(&[('b', 'b')]);
1959 let expected = uclass(&[]);
1960 assert_eq!(expected, uintersect(&cls1, &cls2));
1961
1962 let cls1 = uclass(&[('a', 'a')]);
1963 let cls2 = uclass(&[('a', 'c')]);
1964 let expected = uclass(&[('a', 'a')]);
1965 assert_eq!(expected, uintersect(&cls1, &cls2));
1966
1967 let cls1 = uclass(&[('a', 'b')]);
1968 let cls2 = uclass(&[('a', 'c')]);
1969 let expected = uclass(&[('a', 'b')]);
1970 assert_eq!(expected, uintersect(&cls1, &cls2));
1971
1972 let cls1 = uclass(&[('a', 'b')]);
1973 let cls2 = uclass(&[('b', 'c')]);
1974 let expected = uclass(&[('b', 'b')]);
1975 assert_eq!(expected, uintersect(&cls1, &cls2));
1976
1977 let cls1 = uclass(&[('a', 'b')]);
1978 let cls2 = uclass(&[('c', 'd')]);
1979 let expected = uclass(&[]);
1980 assert_eq!(expected, uintersect(&cls1, &cls2));
1981
1982 let cls1 = uclass(&[('b', 'c')]);
1983 let cls2 = uclass(&[('a', 'd')]);
1984 let expected = uclass(&[('b', 'c')]);
1985 assert_eq!(expected, uintersect(&cls1, &cls2));
1986
1987 let cls1 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
1988 let cls2 = uclass(&[('a', 'h')]);
1989 let expected = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
1990 assert_eq!(expected, uintersect(&cls1, &cls2));
1991
1992 let cls1 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
1993 let cls2 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
1994 let expected = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
1995 assert_eq!(expected, uintersect(&cls1, &cls2));
1996
1997 let cls1 = uclass(&[('a', 'b'), ('g', 'h')]);
1998 let cls2 = uclass(&[('d', 'e'), ('k', 'l')]);
1999 let expected = uclass(&[]);
2000 assert_eq!(expected, uintersect(&cls1, &cls2));
2001
2002 let cls1 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
2003 let cls2 = uclass(&[('h', 'h')]);
2004 let expected = uclass(&[('h', 'h')]);
2005 assert_eq!(expected, uintersect(&cls1, &cls2));
2006
2007 let cls1 = uclass(&[('a', 'b'), ('e', 'f'), ('i', 'j')]);
2008 let cls2 = uclass(&[('c', 'd'), ('g', 'h'), ('k', 'l')]);
2009 let expected = uclass(&[]);
2010 assert_eq!(expected, uintersect(&cls1, &cls2));
2011
2012 let cls1 = uclass(&[('a', 'b'), ('c', 'd'), ('e', 'f')]);
2013 let cls2 = uclass(&[('b', 'c'), ('d', 'e'), ('f', 'g')]);
2014 let expected = uclass(&[('b', 'f')]);
2015 assert_eq!(expected, uintersect(&cls1, &cls2));
2016 }
2017
2018 #[test]
2019 fn class_intersect_bytes() {
2020 let cls1 = bclass(&[]);
2021 let cls2 = bclass(&[(b'a', b'a')]);
2022 let expected = bclass(&[]);
2023 assert_eq!(expected, bintersect(&cls1, &cls2));
2024
2025 let cls1 = bclass(&[(b'a', b'a')]);
2026 let cls2 = bclass(&[(b'a', b'a')]);
2027 let expected = bclass(&[(b'a', b'a')]);
2028 assert_eq!(expected, bintersect(&cls1, &cls2));
2029
2030 let cls1 = bclass(&[(b'a', b'a')]);
2031 let cls2 = bclass(&[(b'b', b'b')]);
2032 let expected = bclass(&[]);
2033 assert_eq!(expected, bintersect(&cls1, &cls2));
2034
2035 let cls1 = bclass(&[(b'a', b'a')]);
2036 let cls2 = bclass(&[(b'a', b'c')]);
2037 let expected = bclass(&[(b'a', b'a')]);
2038 assert_eq!(expected, bintersect(&cls1, &cls2));
2039
2040 let cls1 = bclass(&[(b'a', b'b')]);
2041 let cls2 = bclass(&[(b'a', b'c')]);
2042 let expected = bclass(&[(b'a', b'b')]);
2043 assert_eq!(expected, bintersect(&cls1, &cls2));
2044
2045 let cls1 = bclass(&[(b'a', b'b')]);
2046 let cls2 = bclass(&[(b'b', b'c')]);
2047 let expected = bclass(&[(b'b', b'b')]);
2048 assert_eq!(expected, bintersect(&cls1, &cls2));
2049
2050 let cls1 = bclass(&[(b'a', b'b')]);
2051 let cls2 = bclass(&[(b'c', b'd')]);
2052 let expected = bclass(&[]);
2053 assert_eq!(expected, bintersect(&cls1, &cls2));
2054
2055 let cls1 = bclass(&[(b'b', b'c')]);
2056 let cls2 = bclass(&[(b'a', b'd')]);
2057 let expected = bclass(&[(b'b', b'c')]);
2058 assert_eq!(expected, bintersect(&cls1, &cls2));
2059
2060 let cls1 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
2061 let cls2 = bclass(&[(b'a', b'h')]);
2062 let expected = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
2063 assert_eq!(expected, bintersect(&cls1, &cls2));
2064
2065 let cls1 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
2066 let cls2 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
2067 let expected = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
2068 assert_eq!(expected, bintersect(&cls1, &cls2));
2069
2070 let cls1 = bclass(&[(b'a', b'b'), (b'g', b'h')]);
2071 let cls2 = bclass(&[(b'd', b'e'), (b'k', b'l')]);
2072 let expected = bclass(&[]);
2073 assert_eq!(expected, bintersect(&cls1, &cls2));
2074
2075 let cls1 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
2076 let cls2 = bclass(&[(b'h', b'h')]);
2077 let expected = bclass(&[(b'h', b'h')]);
2078 assert_eq!(expected, bintersect(&cls1, &cls2));
2079
2080 let cls1 = bclass(&[(b'a', b'b'), (b'e', b'f'), (b'i', b'j')]);
2081 let cls2 = bclass(&[(b'c', b'd'), (b'g', b'h'), (b'k', b'l')]);
2082 let expected = bclass(&[]);
2083 assert_eq!(expected, bintersect(&cls1, &cls2));
2084
2085 let cls1 = bclass(&[(b'a', b'b'), (b'c', b'd'), (b'e', b'f')]);
2086 let cls2 = bclass(&[(b'b', b'c'), (b'd', b'e'), (b'f', b'g')]);
2087 let expected = bclass(&[(b'b', b'f')]);
2088 assert_eq!(expected, bintersect(&cls1, &cls2));
2089 }
2090
2091 #[test]
2092 fn class_difference_unicode() {
2093 let cls1 = uclass(&[('a', 'a')]);
2094 let cls2 = uclass(&[('a', 'a')]);
2095 let expected = uclass(&[]);
2096 assert_eq!(expected, udifference(&cls1, &cls2));
2097
2098 let cls1 = uclass(&[('a', 'a')]);
2099 let cls2 = uclass(&[]);
2100 let expected = uclass(&[('a', 'a')]);
2101 assert_eq!(expected, udifference(&cls1, &cls2));
2102
2103 let cls1 = uclass(&[]);
2104 let cls2 = uclass(&[('a', 'a')]);
2105 let expected = uclass(&[]);
2106 assert_eq!(expected, udifference(&cls1, &cls2));
2107
2108 let cls1 = uclass(&[('a', 'z')]);
2109 let cls2 = uclass(&[('a', 'a')]);
2110 let expected = uclass(&[('b', 'z')]);
2111 assert_eq!(expected, udifference(&cls1, &cls2));
2112
2113 let cls1 = uclass(&[('a', 'z')]);
2114 let cls2 = uclass(&[('z', 'z')]);
2115 let expected = uclass(&[('a', 'y')]);
2116 assert_eq!(expected, udifference(&cls1, &cls2));
2117
2118 let cls1 = uclass(&[('a', 'z')]);
2119 let cls2 = uclass(&[('m', 'm')]);
2120 let expected = uclass(&[('a', 'l'), ('n', 'z')]);
2121 assert_eq!(expected, udifference(&cls1, &cls2));
2122
2123 let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]);
2124 let cls2 = uclass(&[('a', 'z')]);
2125 let expected = uclass(&[]);
2126 assert_eq!(expected, udifference(&cls1, &cls2));
2127
2128 let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]);
2129 let cls2 = uclass(&[('d', 'v')]);
2130 let expected = uclass(&[('a', 'c')]);
2131 assert_eq!(expected, udifference(&cls1, &cls2));
2132
2133 let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]);
2134 let cls2 = uclass(&[('b', 'g'), ('s', 'u')]);
2135 let expected = uclass(&[('a', 'a'), ('h', 'i'), ('r', 'r')]);
2136 assert_eq!(expected, udifference(&cls1, &cls2));
2137
2138 let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]);
2139 let cls2 = uclass(&[('b', 'd'), ('e', 'g'), ('s', 'u')]);
2140 let expected = uclass(&[('a', 'a'), ('h', 'i'), ('r', 'r')]);
2141 assert_eq!(expected, udifference(&cls1, &cls2));
2142
2143 let cls1 = uclass(&[('x', 'z')]);
2144 let cls2 = uclass(&[('a', 'c'), ('e', 'g'), ('s', 'u')]);
2145 let expected = uclass(&[('x', 'z')]);
2146 assert_eq!(expected, udifference(&cls1, &cls2));
2147
2148 let cls1 = uclass(&[('a', 'z')]);
2149 let cls2 = uclass(&[('a', 'c'), ('e', 'g'), ('s', 'u')]);
2150 let expected = uclass(&[('d', 'd'), ('h', 'r'), ('v', 'z')]);
2151 assert_eq!(expected, udifference(&cls1, &cls2));
2152 }
2153
2154 #[test]
2155 fn class_difference_bytes() {
2156 let cls1 = bclass(&[(b'a', b'a')]);
2157 let cls2 = bclass(&[(b'a', b'a')]);
2158 let expected = bclass(&[]);
2159 assert_eq!(expected, bdifference(&cls1, &cls2));
2160
2161 let cls1 = bclass(&[(b'a', b'a')]);
2162 let cls2 = bclass(&[]);
2163 let expected = bclass(&[(b'a', b'a')]);
2164 assert_eq!(expected, bdifference(&cls1, &cls2));
2165
2166 let cls1 = bclass(&[]);
2167 let cls2 = bclass(&[(b'a', b'a')]);
2168 let expected = bclass(&[]);
2169 assert_eq!(expected, bdifference(&cls1, &cls2));
2170
2171 let cls1 = bclass(&[(b'a', b'z')]);
2172 let cls2 = bclass(&[(b'a', b'a')]);
2173 let expected = bclass(&[(b'b', b'z')]);
2174 assert_eq!(expected, bdifference(&cls1, &cls2));
2175
2176 let cls1 = bclass(&[(b'a', b'z')]);
2177 let cls2 = bclass(&[(b'z', b'z')]);
2178 let expected = bclass(&[(b'a', b'y')]);
2179 assert_eq!(expected, bdifference(&cls1, &cls2));
2180
2181 let cls1 = bclass(&[(b'a', b'z')]);
2182 let cls2 = bclass(&[(b'm', b'm')]);
2183 let expected = bclass(&[(b'a', b'l'), (b'n', b'z')]);
2184 assert_eq!(expected, bdifference(&cls1, &cls2));
2185
2186 let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]);
2187 let cls2 = bclass(&[(b'a', b'z')]);
2188 let expected = bclass(&[]);
2189 assert_eq!(expected, bdifference(&cls1, &cls2));
2190
2191 let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]);
2192 let cls2 = bclass(&[(b'd', b'v')]);
2193 let expected = bclass(&[(b'a', b'c')]);
2194 assert_eq!(expected, bdifference(&cls1, &cls2));
2195
2196 let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]);
2197 let cls2 = bclass(&[(b'b', b'g'), (b's', b'u')]);
2198 let expected = bclass(&[(b'a', b'a'), (b'h', b'i'), (b'r', b'r')]);
2199 assert_eq!(expected, bdifference(&cls1, &cls2));
2200
2201 let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]);
2202 let cls2 = bclass(&[(b'b', b'd'), (b'e', b'g'), (b's', b'u')]);
2203 let expected = bclass(&[(b'a', b'a'), (b'h', b'i'), (b'r', b'r')]);
2204 assert_eq!(expected, bdifference(&cls1, &cls2));
2205
2206 let cls1 = bclass(&[(b'x', b'z')]);
2207 let cls2 = bclass(&[(b'a', b'c'), (b'e', b'g'), (b's', b'u')]);
2208 let expected = bclass(&[(b'x', b'z')]);
2209 assert_eq!(expected, bdifference(&cls1, &cls2));
2210
2211 let cls1 = bclass(&[(b'a', b'z')]);
2212 let cls2 = bclass(&[(b'a', b'c'), (b'e', b'g'), (b's', b'u')]);
2213 let expected = bclass(&[(b'd', b'd'), (b'h', b'r'), (b'v', b'z')]);
2214 assert_eq!(expected, bdifference(&cls1, &cls2));
2215 }
2216
2217 #[test]
2218 fn class_symmetric_difference_unicode() {
2219 let cls1 = uclass(&[('a', 'm')]);
2220 let cls2 = uclass(&[('g', 't')]);
2221 let expected = uclass(&[('a', 'f'), ('n', 't')]);
2222 assert_eq!(expected, usymdifference(&cls1, &cls2));
2223 }
2224
2225 #[test]
2226 fn class_symmetric_difference_bytes() {
2227 let cls1 = bclass(&[(b'a', b'm')]);
2228 let cls2 = bclass(&[(b'g', b't')]);
2229 let expected = bclass(&[(b'a', b'f'), (b'n', b't')]);
2230 assert_eq!(expected, bsymdifference(&cls1, &cls2));
2231 }
2232
2233 #[test]
2234 #[should_panic]
2235 fn hir_byte_literal_non_ascii() {
2236 Hir::literal(Literal::Byte(b'a'));
2237 }
2238
2239 #[test]
2244 #[cfg(any(unix, windows))]
2245 fn no_stack_overflow_on_drop() {
2246 use std::thread;
2247
2248 let run = || {
2249 let mut expr = Hir::empty();
2250 for _ in 0..100 {
2251 expr = Hir::group(Group {
2252 kind: GroupKind::NonCapturing,
2253 hir: Box::new(expr),
2254 });
2255 expr = Hir::repetition(Repetition {
2256 kind: RepetitionKind::ZeroOrOne,
2257 greedy: true,
2258 hir: Box::new(expr),
2259 });
2260
2261 expr = Hir {
2262 kind: HirKind::Concat(vec![expr]),
2263 info: HirInfo::new(),
2264 };
2265 expr = Hir {
2266 kind: HirKind::Alternation(vec![expr]),
2267 info: HirInfo::new(),
2268 };
2269 }
2270 assert!(!expr.kind.is_empty());
2271 };
2272
2273 thread::Builder::new()
2276 .stack_size(1 << 10)
2277 .spawn(run)
2278 .unwrap()
2279 .join()
2280 .unwrap();
2281 }
2282}