1use std::cell::RefCell;
2use std::collections::HashMap;
3use std::sync::Arc;
4
5#[cfg(feature = "perf-literal")]
6use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
7use syntax::hir::literal::Literals;
8use syntax::hir::Hir;
9use syntax::ParserBuilder;
10
11use backtrack;
12use cache::{Cached, CachedGuard};
13use compile::Compiler;
14#[cfg(feature = "perf-dfa")]
15use dfa;
16use error::Error;
17use input::{ByteInput, CharInput};
18use literal::LiteralSearcher;
19use pikevm;
20use prog::Program;
21use re_builder::RegexOptions;
22use re_bytes;
23use re_set;
24use re_trait::{Locations, RegularExpression, Slot};
25use re_unicode;
26use utf8::next_utf8;
27
28pub struct Exec {
34 ro: Arc<ExecReadOnly>,
36 cache: Cached<ProgramCache>,
38}
39
40#[derive(Debug)]
44pub struct ExecNoSync<'c> {
45 ro: &'c Arc<ExecReadOnly>,
47 cache: CachedGuard<'c, ProgramCache>,
49}
50
51pub struct ExecNoSyncStr<'c>(ExecNoSync<'c>);
53
54#[derive(Debug)]
57struct ExecReadOnly {
58 res: Vec<String>,
60 nfa: Program,
66 dfa: Program,
72 dfa_reverse: Program,
76 suffixes: LiteralSearcher,
81 #[cfg(feature = "perf-literal")]
91 ac: Option<AhoCorasick<u32>>,
92 match_type: MatchType,
95}
96
97pub struct ExecBuilder {
101 options: RegexOptions,
102 match_type: Option<MatchType>,
103 bytes: bool,
104 only_utf8: bool,
105}
106
107struct Parsed {
110 exprs: Vec<Hir>,
111 prefixes: Literals,
112 suffixes: Literals,
113 bytes: bool,
114}
115
116impl ExecBuilder {
117 pub fn new(re: &str) -> Self {
123 Self::new_many(&[re])
124 }
125
126 pub fn new_many<I, S>(res: I) -> Self
132 where
133 S: AsRef<str>,
134 I: IntoIterator<Item = S>,
135 {
136 let mut opts = RegexOptions::default();
137 opts.pats = res.into_iter().map(|s| s.as_ref().to_owned()).collect();
138 Self::new_options(opts)
139 }
140
141 pub fn new_options(opts: RegexOptions) -> Self {
143 ExecBuilder {
144 options: opts,
145 match_type: None,
146 bytes: false,
147 only_utf8: true,
148 }
149 }
150
151 pub fn automatic(mut self) -> Self {
159 self.match_type = None;
160 self
161 }
162
163 pub fn nfa(mut self) -> Self {
169 self.match_type = Some(MatchType::Nfa(MatchNfaType::PikeVM));
170 self
171 }
172
173 pub fn bounded_backtracking(mut self) -> Self {
182 self.match_type = Some(MatchType::Nfa(MatchNfaType::Backtrack));
183 self
184 }
185
186 pub fn bytes(mut self, yes: bool) -> Self {
196 self.bytes = yes;
197 self
198 }
199
200 pub fn only_utf8(mut self, yes: bool) -> Self {
205 self.only_utf8 = yes;
206 self
207 }
208
209 pub fn unicode(mut self, yes: bool) -> Self {
211 self.options.unicode = yes;
212 self
213 }
214
215 fn parse(&self) -> Result<Parsed, Error> {
217 let mut exprs = Vec::with_capacity(self.options.pats.len());
218 let mut prefixes = Some(Literals::empty());
219 let mut suffixes = Some(Literals::empty());
220 let mut bytes = false;
221 let is_set = self.options.pats.len() > 1;
222 for pat in &self.options.pats {
225 let mut parser = ParserBuilder::new()
226 .octal(self.options.octal)
227 .case_insensitive(self.options.case_insensitive)
228 .multi_line(self.options.multi_line)
229 .dot_matches_new_line(self.options.dot_matches_new_line)
230 .swap_greed(self.options.swap_greed)
231 .ignore_whitespace(self.options.ignore_whitespace)
232 .unicode(self.options.unicode)
233 .allow_invalid_utf8(!self.only_utf8)
234 .nest_limit(self.options.nest_limit)
235 .build();
236 let expr =
237 parser.parse(pat).map_err(|e| Error::Syntax(e.to_string()))?;
238 bytes = bytes || !expr.is_always_utf8();
239
240 if cfg!(feature = "perf-literal") {
241 if !expr.is_anchored_start() && expr.is_any_anchored_start() {
242 prefixes = None;
245 } else if is_set && expr.is_anchored_start() {
246 prefixes = None;
249 }
250 prefixes = prefixes.and_then(|mut prefixes| {
251 if !prefixes.union_prefixes(&expr) {
252 None
253 } else {
254 Some(prefixes)
255 }
256 });
257
258 if !expr.is_anchored_end() && expr.is_any_anchored_end() {
259 suffixes = None;
262 } else if is_set && expr.is_anchored_end() {
263 suffixes = None;
266 }
267 suffixes = suffixes.and_then(|mut suffixes| {
268 if !suffixes.union_suffixes(&expr) {
269 None
270 } else {
271 Some(suffixes)
272 }
273 });
274 }
275 exprs.push(expr);
276 }
277 Ok(Parsed {
278 exprs: exprs,
279 prefixes: prefixes.unwrap_or_else(Literals::empty),
280 suffixes: suffixes.unwrap_or_else(Literals::empty),
281 bytes: bytes,
282 })
283 }
284
285 pub fn build(self) -> Result<Exec, Error> {
287 if self.options.pats.is_empty() {
290 let ro = Arc::new(ExecReadOnly {
291 res: vec![],
292 nfa: Program::new(),
293 dfa: Program::new(),
294 dfa_reverse: Program::new(),
295 suffixes: LiteralSearcher::empty(),
296 #[cfg(feature = "perf-literal")]
297 ac: None,
298 match_type: MatchType::Nothing,
299 });
300 return Ok(Exec { ro: ro, cache: Cached::new() });
301 }
302 let parsed = self.parse()?;
303 let mut nfa = Compiler::new()
304 .size_limit(self.options.size_limit)
305 .bytes(self.bytes || parsed.bytes)
306 .only_utf8(self.only_utf8)
307 .compile(&parsed.exprs)?;
308 let mut dfa = Compiler::new()
309 .size_limit(self.options.size_limit)
310 .dfa(true)
311 .only_utf8(self.only_utf8)
312 .compile(&parsed.exprs)?;
313 let mut dfa_reverse = Compiler::new()
314 .size_limit(self.options.size_limit)
315 .dfa(true)
316 .only_utf8(self.only_utf8)
317 .reverse(true)
318 .compile(&parsed.exprs)?;
319
320 #[cfg(feature = "perf-literal")]
321 let ac = self.build_aho_corasick(&parsed);
322 nfa.prefixes = LiteralSearcher::prefixes(parsed.prefixes);
323 dfa.prefixes = nfa.prefixes.clone();
324 dfa.dfa_size_limit = self.options.dfa_size_limit;
325 dfa_reverse.dfa_size_limit = self.options.dfa_size_limit;
326
327 let mut ro = ExecReadOnly {
328 res: self.options.pats,
329 nfa: nfa,
330 dfa: dfa,
331 dfa_reverse: dfa_reverse,
332 suffixes: LiteralSearcher::suffixes(parsed.suffixes),
333 #[cfg(feature = "perf-literal")]
334 ac: ac,
335 match_type: MatchType::Nothing,
336 };
337 ro.match_type = ro.choose_match_type(self.match_type);
338
339 let ro = Arc::new(ro);
340 Ok(Exec { ro: ro, cache: Cached::new() })
341 }
342
343 #[cfg(feature = "perf-literal")]
344 fn build_aho_corasick(&self, parsed: &Parsed) -> Option<AhoCorasick<u32>> {
345 if parsed.exprs.len() != 1 {
346 return None;
347 }
348 let lits = match alternation_literals(&parsed.exprs[0]) {
349 None => return None,
350 Some(lits) => lits,
351 };
352 if lits.len() <= 32 {
355 return None;
356 }
357 Some(
358 AhoCorasickBuilder::new()
359 .match_kind(MatchKind::LeftmostFirst)
360 .auto_configure(&lits)
361 .byte_classes(true)
364 .build_with_size::<u32, _, _>(&lits)
365 .expect("AC automaton too big"),
368 )
369 }
370}
371
372impl<'c> RegularExpression for ExecNoSyncStr<'c> {
373 type Text = str;
374
375 fn slots_len(&self) -> usize {
376 self.0.slots_len()
377 }
378
379 fn next_after_empty(&self, text: &str, i: usize) -> usize {
380 next_utf8(text.as_bytes(), i)
381 }
382
383 #[cfg_attr(feature = "perf-inline", inline(always))]
384 fn shortest_match_at(&self, text: &str, start: usize) -> Option<usize> {
385 self.0.shortest_match_at(text.as_bytes(), start)
386 }
387
388 #[cfg_attr(feature = "perf-inline", inline(always))]
389 fn is_match_at(&self, text: &str, start: usize) -> bool {
390 self.0.is_match_at(text.as_bytes(), start)
391 }
392
393 #[cfg_attr(feature = "perf-inline", inline(always))]
394 fn find_at(&self, text: &str, start: usize) -> Option<(usize, usize)> {
395 self.0.find_at(text.as_bytes(), start)
396 }
397
398 #[cfg_attr(feature = "perf-inline", inline(always))]
399 fn captures_read_at(
400 &self,
401 locs: &mut Locations,
402 text: &str,
403 start: usize,
404 ) -> Option<(usize, usize)> {
405 self.0.captures_read_at(locs, text.as_bytes(), start)
406 }
407}
408
409impl<'c> RegularExpression for ExecNoSync<'c> {
410 type Text = [u8];
411
412 fn slots_len(&self) -> usize {
416 self.ro.nfa.captures.len() * 2
417 }
418
419 fn next_after_empty(&self, _text: &[u8], i: usize) -> usize {
420 i + 1
421 }
422
423 #[cfg_attr(feature = "perf-inline", inline(always))]
426 fn shortest_match_at(&self, text: &[u8], start: usize) -> Option<usize> {
427 if !self.is_anchor_end_match(text) {
428 return None;
429 }
430 match self.ro.match_type {
431 #[cfg(feature = "perf-literal")]
432 MatchType::Literal(ty) => {
433 self.find_literals(ty, text, start).map(|(_, e)| e)
434 }
435 #[cfg(feature = "perf-dfa")]
436 MatchType::Dfa | MatchType::DfaMany => {
437 match self.shortest_dfa(text, start) {
438 dfa::Result::Match(end) => Some(end),
439 dfa::Result::NoMatch(_) => None,
440 dfa::Result::Quit => self.shortest_nfa(text, start),
441 }
442 }
443 #[cfg(feature = "perf-dfa")]
444 MatchType::DfaAnchoredReverse => {
445 match dfa::Fsm::reverse(
446 &self.ro.dfa_reverse,
447 self.cache.value(),
448 true,
449 &text[start..],
450 text.len(),
451 ) {
452 dfa::Result::Match(_) => Some(text.len()),
453 dfa::Result::NoMatch(_) => None,
454 dfa::Result::Quit => self.shortest_nfa(text, start),
455 }
456 }
457 #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
458 MatchType::DfaSuffix => {
459 match self.shortest_dfa_reverse_suffix(text, start) {
460 dfa::Result::Match(e) => Some(e),
461 dfa::Result::NoMatch(_) => None,
462 dfa::Result::Quit => self.shortest_nfa(text, start),
463 }
464 }
465 MatchType::Nfa(ty) => self.shortest_nfa_type(ty, text, start),
466 MatchType::Nothing => None,
467 }
468 }
469
470 #[cfg_attr(feature = "perf-inline", inline(always))]
475 fn is_match_at(&self, text: &[u8], start: usize) -> bool {
476 if !self.is_anchor_end_match(text) {
477 return false;
478 }
479 match self.ro.match_type {
483 #[cfg(feature = "perf-literal")]
484 MatchType::Literal(ty) => {
485 self.find_literals(ty, text, start).is_some()
486 }
487 #[cfg(feature = "perf-dfa")]
488 MatchType::Dfa | MatchType::DfaMany => {
489 match self.shortest_dfa(text, start) {
490 dfa::Result::Match(_) => true,
491 dfa::Result::NoMatch(_) => false,
492 dfa::Result::Quit => self.match_nfa(text, start),
493 }
494 }
495 #[cfg(feature = "perf-dfa")]
496 MatchType::DfaAnchoredReverse => {
497 match dfa::Fsm::reverse(
498 &self.ro.dfa_reverse,
499 self.cache.value(),
500 true,
501 &text[start..],
502 text.len(),
503 ) {
504 dfa::Result::Match(_) => true,
505 dfa::Result::NoMatch(_) => false,
506 dfa::Result::Quit => self.match_nfa(text, start),
507 }
508 }
509 #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
510 MatchType::DfaSuffix => {
511 match self.shortest_dfa_reverse_suffix(text, start) {
512 dfa::Result::Match(_) => true,
513 dfa::Result::NoMatch(_) => false,
514 dfa::Result::Quit => self.match_nfa(text, start),
515 }
516 }
517 MatchType::Nfa(ty) => self.match_nfa_type(ty, text, start),
518 MatchType::Nothing => false,
519 }
520 }
521
522 #[cfg_attr(feature = "perf-inline", inline(always))]
525 fn find_at(&self, text: &[u8], start: usize) -> Option<(usize, usize)> {
526 if !self.is_anchor_end_match(text) {
527 return None;
528 }
529 match self.ro.match_type {
530 #[cfg(feature = "perf-literal")]
531 MatchType::Literal(ty) => self.find_literals(ty, text, start),
532 #[cfg(feature = "perf-dfa")]
533 MatchType::Dfa => match self.find_dfa_forward(text, start) {
534 dfa::Result::Match((s, e)) => Some((s, e)),
535 dfa::Result::NoMatch(_) => None,
536 dfa::Result::Quit => {
537 self.find_nfa(MatchNfaType::Auto, text, start)
538 }
539 },
540 #[cfg(feature = "perf-dfa")]
541 MatchType::DfaAnchoredReverse => {
542 match self.find_dfa_anchored_reverse(text, start) {
543 dfa::Result::Match((s, e)) => Some((s, e)),
544 dfa::Result::NoMatch(_) => None,
545 dfa::Result::Quit => {
546 self.find_nfa(MatchNfaType::Auto, text, start)
547 }
548 }
549 }
550 #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
551 MatchType::DfaSuffix => {
552 match self.find_dfa_reverse_suffix(text, start) {
553 dfa::Result::Match((s, e)) => Some((s, e)),
554 dfa::Result::NoMatch(_) => None,
555 dfa::Result::Quit => {
556 self.find_nfa(MatchNfaType::Auto, text, start)
557 }
558 }
559 }
560 MatchType::Nfa(ty) => self.find_nfa(ty, text, start),
561 MatchType::Nothing => None,
562 #[cfg(feature = "perf-dfa")]
563 MatchType::DfaMany => {
564 unreachable!("BUG: RegexSet cannot be used with find")
565 }
566 }
567 }
568
569 fn captures_read_at(
578 &self,
579 locs: &mut Locations,
580 text: &[u8],
581 start: usize,
582 ) -> Option<(usize, usize)> {
583 let slots = locs.as_slots();
584 for slot in slots.iter_mut() {
585 *slot = None;
586 }
587 match slots.len() {
590 0 => return self.find_at(text, start),
591 2 => {
592 return self.find_at(text, start).map(|(s, e)| {
593 slots[0] = Some(s);
594 slots[1] = Some(e);
595 (s, e)
596 });
597 }
598 _ => {} }
600 if !self.is_anchor_end_match(text) {
601 return None;
602 }
603 match self.ro.match_type {
604 #[cfg(feature = "perf-literal")]
605 MatchType::Literal(ty) => {
606 self.find_literals(ty, text, start).and_then(|(s, e)| {
607 self.captures_nfa_type(
608 MatchNfaType::Auto,
609 slots,
610 text,
611 s,
612 e,
613 )
614 })
615 }
616 #[cfg(feature = "perf-dfa")]
617 MatchType::Dfa => {
618 if self.ro.nfa.is_anchored_start {
619 self.captures_nfa(slots, text, start)
620 } else {
621 match self.find_dfa_forward(text, start) {
622 dfa::Result::Match((s, e)) => self.captures_nfa_type(
623 MatchNfaType::Auto,
624 slots,
625 text,
626 s,
627 e,
628 ),
629 dfa::Result::NoMatch(_) => None,
630 dfa::Result::Quit => {
631 self.captures_nfa(slots, text, start)
632 }
633 }
634 }
635 }
636 #[cfg(feature = "perf-dfa")]
637 MatchType::DfaAnchoredReverse => {
638 match self.find_dfa_anchored_reverse(text, start) {
639 dfa::Result::Match((s, e)) => self.captures_nfa_type(
640 MatchNfaType::Auto,
641 slots,
642 text,
643 s,
644 e,
645 ),
646 dfa::Result::NoMatch(_) => None,
647 dfa::Result::Quit => self.captures_nfa(slots, text, start),
648 }
649 }
650 #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
651 MatchType::DfaSuffix => {
652 match self.find_dfa_reverse_suffix(text, start) {
653 dfa::Result::Match((s, e)) => self.captures_nfa_type(
654 MatchNfaType::Auto,
655 slots,
656 text,
657 s,
658 e,
659 ),
660 dfa::Result::NoMatch(_) => None,
661 dfa::Result::Quit => self.captures_nfa(slots, text, start),
662 }
663 }
664 MatchType::Nfa(ty) => {
665 self.captures_nfa_type(ty, slots, text, start, text.len())
666 }
667 MatchType::Nothing => None,
668 #[cfg(feature = "perf-dfa")]
669 MatchType::DfaMany => {
670 unreachable!("BUG: RegexSet cannot be used with captures")
671 }
672 }
673 }
674}
675
676impl<'c> ExecNoSync<'c> {
677 #[cfg(feature = "perf-literal")]
679 #[cfg_attr(feature = "perf-inline", inline(always))]
680 fn find_literals(
681 &self,
682 ty: MatchLiteralType,
683 text: &[u8],
684 start: usize,
685 ) -> Option<(usize, usize)> {
686 use self::MatchLiteralType::*;
687 match ty {
688 Unanchored => {
689 let lits = &self.ro.nfa.prefixes;
690 lits.find(&text[start..]).map(|(s, e)| (start + s, start + e))
691 }
692 AnchoredStart => {
693 let lits = &self.ro.nfa.prefixes;
694 if !self.ro.nfa.is_anchored_start
695 || (self.ro.nfa.is_anchored_start && start == 0)
696 {
697 lits.find_start(&text[start..])
698 .map(|(s, e)| (start + s, start + e))
699 } else {
700 None
701 }
702 }
703 AnchoredEnd => {
704 let lits = &self.ro.suffixes;
705 lits.find_end(&text[start..])
706 .map(|(s, e)| (start + s, start + e))
707 }
708 AhoCorasick => self
709 .ro
710 .ac
711 .as_ref()
712 .unwrap()
713 .find(&text[start..])
714 .map(|m| (start + m.start(), start + m.end())),
715 }
716 }
717
718 #[cfg(feature = "perf-dfa")]
723 #[cfg_attr(feature = "perf-inline", inline(always))]
724 fn find_dfa_forward(
725 &self,
726 text: &[u8],
727 start: usize,
728 ) -> dfa::Result<(usize, usize)> {
729 use dfa::Result::*;
730 let end = match dfa::Fsm::forward(
731 &self.ro.dfa,
732 self.cache.value(),
733 false,
734 text,
735 start,
736 ) {
737 NoMatch(i) => return NoMatch(i),
738 Quit => return Quit,
739 Match(end) if start == end => return Match((start, start)),
740 Match(end) => end,
741 };
742 match dfa::Fsm::reverse(
744 &self.ro.dfa_reverse,
745 self.cache.value(),
746 false,
747 &text[start..],
748 end - start,
749 ) {
750 Match(s) => Match((start + s, end)),
751 NoMatch(i) => NoMatch(i),
752 Quit => Quit,
753 }
754 }
755
756 #[cfg(feature = "perf-dfa")]
763 #[cfg_attr(feature = "perf-inline", inline(always))]
764 fn find_dfa_anchored_reverse(
765 &self,
766 text: &[u8],
767 start: usize,
768 ) -> dfa::Result<(usize, usize)> {
769 use dfa::Result::*;
770 match dfa::Fsm::reverse(
771 &self.ro.dfa_reverse,
772 self.cache.value(),
773 false,
774 &text[start..],
775 text.len() - start,
776 ) {
777 Match(s) => Match((start + s, text.len())),
778 NoMatch(i) => NoMatch(i),
779 Quit => Quit,
780 }
781 }
782
783 #[cfg(feature = "perf-dfa")]
785 #[cfg_attr(feature = "perf-inline", inline(always))]
786 fn shortest_dfa(&self, text: &[u8], start: usize) -> dfa::Result<usize> {
787 dfa::Fsm::forward(&self.ro.dfa, self.cache.value(), true, text, start)
788 }
789
790 #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
793 #[cfg_attr(feature = "perf-inline", inline(always))]
794 fn shortest_dfa_reverse_suffix(
795 &self,
796 text: &[u8],
797 start: usize,
798 ) -> dfa::Result<usize> {
799 match self.exec_dfa_reverse_suffix(text, start) {
800 None => self.shortest_dfa(text, start),
801 Some(r) => r.map(|(_, end)| end),
802 }
803 }
804
805 #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
819 #[cfg_attr(feature = "perf-inline", inline(always))]
820 fn exec_dfa_reverse_suffix(
821 &self,
822 text: &[u8],
823 original_start: usize,
824 ) -> Option<dfa::Result<(usize, usize)>> {
825 use dfa::Result::*;
826
827 let lcs = self.ro.suffixes.lcs();
828 debug_assert!(lcs.len() >= 1);
829 let mut start = original_start;
830 let mut end = start;
831 let mut last_literal = start;
832 while end <= text.len() {
833 last_literal += match lcs.find(&text[last_literal..]) {
834 None => return Some(NoMatch(text.len())),
835 Some(i) => i,
836 };
837 end = last_literal + lcs.len();
838 match dfa::Fsm::reverse(
839 &self.ro.dfa_reverse,
840 self.cache.value(),
841 false,
842 &text[start..end],
843 end - start,
844 ) {
845 Match(0) | NoMatch(0) => return None,
846 Match(i) => return Some(Match((start + i, end))),
847 NoMatch(i) => {
848 start += i;
849 last_literal += 1;
850 continue;
851 }
852 Quit => return Some(Quit),
853 };
854 }
855 Some(NoMatch(text.len()))
856 }
857
858 #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
864 #[cfg_attr(feature = "perf-inline", inline(always))]
865 fn find_dfa_reverse_suffix(
866 &self,
867 text: &[u8],
868 start: usize,
869 ) -> dfa::Result<(usize, usize)> {
870 use dfa::Result::*;
871
872 let match_start = match self.exec_dfa_reverse_suffix(text, start) {
873 None => return self.find_dfa_forward(text, start),
874 Some(Match((start, _))) => start,
875 Some(r) => return r,
876 };
877 match dfa::Fsm::forward(
885 &self.ro.dfa,
886 self.cache.value(),
887 false,
888 text,
889 match_start,
890 ) {
891 NoMatch(_) => panic!("BUG: reverse match implies forward match"),
892 Quit => Quit,
893 Match(e) => Match((match_start, e)),
894 }
895 }
896
897 #[cfg(feature = "perf-dfa")]
903 fn match_nfa(&self, text: &[u8], start: usize) -> bool {
904 self.match_nfa_type(MatchNfaType::Auto, text, start)
905 }
906
907 fn match_nfa_type(
909 &self,
910 ty: MatchNfaType,
911 text: &[u8],
912 start: usize,
913 ) -> bool {
914 self.exec_nfa(
915 ty,
916 &mut [false],
917 &mut [],
918 true,
919 false,
920 text,
921 start,
922 text.len(),
923 )
924 }
925
926 #[cfg(feature = "perf-dfa")]
928 fn shortest_nfa(&self, text: &[u8], start: usize) -> Option<usize> {
929 self.shortest_nfa_type(MatchNfaType::Auto, text, start)
930 }
931
932 fn shortest_nfa_type(
934 &self,
935 ty: MatchNfaType,
936 text: &[u8],
937 start: usize,
938 ) -> Option<usize> {
939 let mut slots = [None, None];
940 if self.exec_nfa(
941 ty,
942 &mut [false],
943 &mut slots,
944 true,
945 true,
946 text,
947 start,
948 text.len(),
949 ) {
950 slots[1]
951 } else {
952 None
953 }
954 }
955
956 fn find_nfa(
958 &self,
959 ty: MatchNfaType,
960 text: &[u8],
961 start: usize,
962 ) -> Option<(usize, usize)> {
963 let mut slots = [None, None];
964 if self.exec_nfa(
965 ty,
966 &mut [false],
967 &mut slots,
968 false,
969 false,
970 text,
971 start,
972 text.len(),
973 ) {
974 match (slots[0], slots[1]) {
975 (Some(s), Some(e)) => Some((s, e)),
976 _ => None,
977 }
978 } else {
979 None
980 }
981 }
982
983 #[cfg(feature = "perf-dfa")]
987 fn captures_nfa(
988 &self,
989 slots: &mut [Slot],
990 text: &[u8],
991 start: usize,
992 ) -> Option<(usize, usize)> {
993 self.captures_nfa_type(
994 MatchNfaType::Auto,
995 slots,
996 text,
997 start,
998 text.len(),
999 )
1000 }
1001
1002 fn captures_nfa_type(
1004 &self,
1005 ty: MatchNfaType,
1006 slots: &mut [Slot],
1007 text: &[u8],
1008 start: usize,
1009 end: usize,
1010 ) -> Option<(usize, usize)> {
1011 if self.exec_nfa(
1012 ty,
1013 &mut [false],
1014 slots,
1015 false,
1016 false,
1017 text,
1018 start,
1019 end,
1020 ) {
1021 match (slots[0], slots[1]) {
1022 (Some(s), Some(e)) => Some((s, e)),
1023 _ => None,
1024 }
1025 } else {
1026 None
1027 }
1028 }
1029
1030 fn exec_nfa(
1031 &self,
1032 mut ty: MatchNfaType,
1033 matches: &mut [bool],
1034 slots: &mut [Slot],
1035 quit_after_match: bool,
1036 quit_after_match_with_pos: bool,
1037 text: &[u8],
1038 start: usize,
1039 end: usize,
1040 ) -> bool {
1041 use self::MatchNfaType::*;
1042 if let Auto = ty {
1043 if backtrack::should_exec(self.ro.nfa.len(), text.len()) {
1044 ty = Backtrack;
1045 } else {
1046 ty = PikeVM;
1047 }
1048 }
1049 if quit_after_match_with_pos || ty == PikeVM {
1053 self.exec_pikevm(
1054 matches,
1055 slots,
1056 quit_after_match,
1057 text,
1058 start,
1059 end,
1060 )
1061 } else {
1062 self.exec_backtrack(matches, slots, text, start, end)
1063 }
1064 }
1065
1066 fn exec_pikevm(
1068 &self,
1069 matches: &mut [bool],
1070 slots: &mut [Slot],
1071 quit_after_match: bool,
1072 text: &[u8],
1073 start: usize,
1074 end: usize,
1075 ) -> bool {
1076 if self.ro.nfa.uses_bytes() {
1077 pikevm::Fsm::exec(
1078 &self.ro.nfa,
1079 self.cache.value(),
1080 matches,
1081 slots,
1082 quit_after_match,
1083 ByteInput::new(text, self.ro.nfa.only_utf8),
1084 start,
1085 end,
1086 )
1087 } else {
1088 pikevm::Fsm::exec(
1089 &self.ro.nfa,
1090 self.cache.value(),
1091 matches,
1092 slots,
1093 quit_after_match,
1094 CharInput::new(text),
1095 start,
1096 end,
1097 )
1098 }
1099 }
1100
1101 fn exec_backtrack(
1103 &self,
1104 matches: &mut [bool],
1105 slots: &mut [Slot],
1106 text: &[u8],
1107 start: usize,
1108 end: usize,
1109 ) -> bool {
1110 if self.ro.nfa.uses_bytes() {
1111 backtrack::Bounded::exec(
1112 &self.ro.nfa,
1113 self.cache.value(),
1114 matches,
1115 slots,
1116 ByteInput::new(text, self.ro.nfa.only_utf8),
1117 start,
1118 end,
1119 )
1120 } else {
1121 backtrack::Bounded::exec(
1122 &self.ro.nfa,
1123 self.cache.value(),
1124 matches,
1125 slots,
1126 CharInput::new(text),
1127 start,
1128 end,
1129 )
1130 }
1131 }
1132
1133 pub fn many_matches_at(
1141 &self,
1142 matches: &mut [bool],
1143 text: &[u8],
1144 start: usize,
1145 ) -> bool {
1146 use self::MatchType::*;
1147 if !self.is_anchor_end_match(text) {
1148 return false;
1149 }
1150 match self.ro.match_type {
1151 #[cfg(feature = "perf-literal")]
1152 Literal(ty) => {
1153 debug_assert_eq!(matches.len(), 1);
1154 matches[0] = self.find_literals(ty, text, start).is_some();
1155 matches[0]
1156 }
1157 #[cfg(feature = "perf-dfa")]
1158 Dfa | DfaAnchoredReverse | DfaMany => {
1159 match dfa::Fsm::forward_many(
1160 &self.ro.dfa,
1161 self.cache.value(),
1162 matches,
1163 text,
1164 start,
1165 ) {
1166 dfa::Result::Match(_) => true,
1167 dfa::Result::NoMatch(_) => false,
1168 dfa::Result::Quit => self.exec_nfa(
1169 MatchNfaType::Auto,
1170 matches,
1171 &mut [],
1172 false,
1173 false,
1174 text,
1175 start,
1176 text.len(),
1177 ),
1178 }
1179 }
1180 #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
1181 DfaSuffix => {
1182 match dfa::Fsm::forward_many(
1183 &self.ro.dfa,
1184 self.cache.value(),
1185 matches,
1186 text,
1187 start,
1188 ) {
1189 dfa::Result::Match(_) => true,
1190 dfa::Result::NoMatch(_) => false,
1191 dfa::Result::Quit => self.exec_nfa(
1192 MatchNfaType::Auto,
1193 matches,
1194 &mut [],
1195 false,
1196 false,
1197 text,
1198 start,
1199 text.len(),
1200 ),
1201 }
1202 }
1203 Nfa(ty) => self.exec_nfa(
1204 ty,
1205 matches,
1206 &mut [],
1207 false,
1208 false,
1209 text,
1210 start,
1211 text.len(),
1212 ),
1213 Nothing => false,
1214 }
1215 }
1216
1217 #[cfg_attr(feature = "perf-inline", inline(always))]
1218 fn is_anchor_end_match(&self, text: &[u8]) -> bool {
1219 #[cfg(not(feature = "perf-literal"))]
1220 fn imp(_: &ExecReadOnly, _: &[u8]) -> bool {
1221 true
1222 }
1223
1224 #[cfg(feature = "perf-literal")]
1225 fn imp(ro: &ExecReadOnly, text: &[u8]) -> bool {
1226 if text.len() > (1 << 20) && ro.nfa.is_anchored_end {
1228 let lcs = ro.suffixes.lcs();
1229 if lcs.len() >= 1 && !lcs.is_suffix(text) {
1230 return false;
1231 }
1232 }
1233 true
1234 }
1235
1236 imp(&self.ro, text)
1237 }
1238
1239 pub fn capture_name_idx(&self) -> &Arc<HashMap<String, usize>> {
1240 &self.ro.nfa.capture_name_idx
1241 }
1242}
1243
1244impl<'c> ExecNoSyncStr<'c> {
1245 pub fn capture_name_idx(&self) -> &Arc<HashMap<String, usize>> {
1246 self.0.capture_name_idx()
1247 }
1248}
1249
1250impl Exec {
1251 #[cfg_attr(feature = "perf-inline", inline(always))]
1253 pub fn searcher(&self) -> ExecNoSync {
1254 let create = || RefCell::new(ProgramCacheInner::new(&self.ro));
1255 ExecNoSync {
1256 ro: &self.ro, cache: self.cache.get_or(create),
1258 }
1259 }
1260
1261 #[cfg_attr(feature = "perf-inline", inline(always))]
1263 pub fn searcher_str(&self) -> ExecNoSyncStr {
1264 ExecNoSyncStr(self.searcher())
1265 }
1266
1267 pub fn into_regex(self) -> re_unicode::Regex {
1269 re_unicode::Regex::from(self)
1270 }
1271
1272 pub fn into_regex_set(self) -> re_set::unicode::RegexSet {
1274 re_set::unicode::RegexSet::from(self)
1275 }
1276
1277 pub fn into_byte_regex(self) -> re_bytes::Regex {
1279 re_bytes::Regex::from(self)
1280 }
1281
1282 pub fn into_byte_regex_set(self) -> re_set::bytes::RegexSet {
1284 re_set::bytes::RegexSet::from(self)
1285 }
1286
1287 pub fn regex_strings(&self) -> &[String] {
1290 &self.ro.res
1291 }
1292
1293 pub fn capture_names(&self) -> &[Option<String>] {
1297 &self.ro.nfa.captures
1298 }
1299
1300 pub fn capture_name_idx(&self) -> &Arc<HashMap<String, usize>> {
1303 &self.ro.nfa.capture_name_idx
1304 }
1305}
1306
1307impl Clone for Exec {
1308 fn clone(&self) -> Exec {
1309 Exec { ro: self.ro.clone(), cache: Cached::new() }
1310 }
1311}
1312
1313impl ExecReadOnly {
1314 fn choose_match_type(&self, hint: Option<MatchType>) -> MatchType {
1315 if let Some(MatchType::Nfa(_)) = hint {
1316 return hint.unwrap();
1317 }
1318 if self.nfa.insts.is_empty() {
1320 return MatchType::Nothing;
1321 }
1322 if let Some(literalty) = self.choose_literal_match_type() {
1323 return literalty;
1324 }
1325 if let Some(dfaty) = self.choose_dfa_match_type() {
1326 return dfaty;
1327 }
1328 MatchType::Nfa(MatchNfaType::Auto)
1330 }
1331
1332 fn choose_literal_match_type(&self) -> Option<MatchType> {
1335 #[cfg(not(feature = "perf-literal"))]
1336 fn imp(_: &ExecReadOnly) -> Option<MatchType> {
1337 None
1338 }
1339
1340 #[cfg(feature = "perf-literal")]
1341 fn imp(ro: &ExecReadOnly) -> Option<MatchType> {
1342 if ro.res.len() != 1 {
1354 return None;
1355 }
1356 if ro.ac.is_some() {
1357 return Some(MatchType::Literal(
1358 MatchLiteralType::AhoCorasick,
1359 ));
1360 }
1361 if ro.nfa.prefixes.complete() {
1362 return if ro.nfa.is_anchored_start {
1363 Some(MatchType::Literal(MatchLiteralType::AnchoredStart))
1364 } else {
1365 Some(MatchType::Literal(MatchLiteralType::Unanchored))
1366 };
1367 }
1368 if ro.suffixes.complete() {
1369 return if ro.nfa.is_anchored_end {
1370 Some(MatchType::Literal(MatchLiteralType::AnchoredEnd))
1371 } else {
1372 Some(MatchType::Literal(MatchLiteralType::Unanchored))
1376 };
1377 }
1378 None
1379 }
1380
1381 imp(self)
1382 }
1383
1384 fn choose_dfa_match_type(&self) -> Option<MatchType> {
1386 #[cfg(not(feature = "perf-dfa"))]
1387 fn imp(_: &ExecReadOnly) -> Option<MatchType> {
1388 None
1389 }
1390
1391 #[cfg(feature = "perf-dfa")]
1392 fn imp(ro: &ExecReadOnly) -> Option<MatchType> {
1393 if !dfa::can_exec(&ro.dfa) {
1394 return None;
1395 }
1396 if ro.res.len() >= 2 {
1398 return Some(MatchType::DfaMany);
1399 }
1400 if !ro.nfa.is_anchored_start && ro.nfa.is_anchored_end {
1403 return Some(MatchType::DfaAnchoredReverse);
1404 }
1405 #[cfg(feature = "perf-literal")]
1406 {
1407 if ro.should_suffix_scan() {
1410 return Some(MatchType::DfaSuffix);
1411 }
1412 }
1413 Some(MatchType::Dfa)
1415 }
1416
1417 imp(self)
1418 }
1419
1420 #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
1435 fn should_suffix_scan(&self) -> bool {
1436 if self.suffixes.is_empty() {
1437 return false;
1438 }
1439 let lcs_len = self.suffixes.lcs().char_len();
1440 lcs_len >= 3 && lcs_len > self.dfa.prefixes.lcp().char_len()
1441 }
1442}
1443
1444#[derive(Clone, Copy, Debug)]
1445enum MatchType {
1446 #[cfg(feature = "perf-literal")]
1449 Literal(MatchLiteralType),
1450 #[cfg(feature = "perf-dfa")]
1452 Dfa,
1453 #[cfg(feature = "perf-dfa")]
1455 DfaAnchoredReverse,
1456 #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
1458 DfaSuffix,
1459 #[cfg(feature = "perf-dfa")]
1461 DfaMany,
1462 Nfa(MatchNfaType),
1464 Nothing,
1466}
1467
1468#[derive(Clone, Copy, Debug)]
1469#[cfg(feature = "perf-literal")]
1470enum MatchLiteralType {
1471 Unanchored,
1473 AnchoredStart,
1475 AnchoredEnd,
1477 AhoCorasick,
1480}
1481
1482#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1483enum MatchNfaType {
1484 Auto,
1486 Backtrack,
1491 PikeVM,
1496}
1497
1498pub type ProgramCache = RefCell<ProgramCacheInner>;
1501
1502#[derive(Debug)]
1503pub struct ProgramCacheInner {
1504 pub pikevm: pikevm::Cache,
1505 pub backtrack: backtrack::Cache,
1506 #[cfg(feature = "perf-dfa")]
1507 pub dfa: dfa::Cache,
1508 #[cfg(feature = "perf-dfa")]
1509 pub dfa_reverse: dfa::Cache,
1510}
1511
1512impl ProgramCacheInner {
1513 fn new(ro: &ExecReadOnly) -> Self {
1514 ProgramCacheInner {
1515 pikevm: pikevm::Cache::new(&ro.nfa),
1516 backtrack: backtrack::Cache::new(&ro.nfa),
1517 #[cfg(feature = "perf-dfa")]
1518 dfa: dfa::Cache::new(&ro.dfa),
1519 #[cfg(feature = "perf-dfa")]
1520 dfa_reverse: dfa::Cache::new(&ro.dfa_reverse),
1521 }
1522 }
1523}
1524
1525#[cfg(feature = "perf-literal")]
1528fn alternation_literals(expr: &Hir) -> Option<Vec<Vec<u8>>> {
1529 use syntax::hir::{HirKind, Literal};
1530
1531 if !expr.is_alternation_literal() {
1540 return None;
1541 }
1542 let alts = match *expr.kind() {
1543 HirKind::Alternation(ref alts) => alts,
1544 _ => return None, };
1546
1547 let extendlit = |lit: &Literal, dst: &mut Vec<u8>| match *lit {
1548 Literal::Unicode(c) => {
1549 let mut buf = [0; 4];
1550 dst.extend_from_slice(c.encode_utf8(&mut buf).as_bytes());
1551 }
1552 Literal::Byte(b) => {
1553 dst.push(b);
1554 }
1555 };
1556
1557 let mut lits = vec![];
1558 for alt in alts {
1559 let mut lit = vec![];
1560 match *alt.kind() {
1561 HirKind::Literal(ref x) => extendlit(x, &mut lit),
1562 HirKind::Concat(ref exprs) => {
1563 for e in exprs {
1564 match *e.kind() {
1565 HirKind::Literal(ref x) => extendlit(x, &mut lit),
1566 _ => unreachable!("expected literal, got {:?}", e),
1567 }
1568 }
1569 }
1570 _ => unreachable!("expected literal or concat, got {:?}", alt),
1571 }
1572 lits.push(lit);
1573 }
1574 Some(lits)
1575}
1576
1577#[cfg(test)]
1578mod test {
1579 #[test]
1580 fn uppercut_s_backtracking_bytes_default_bytes_mismatch() {
1581 use internal::ExecBuilder;
1582
1583 let backtrack_bytes_re = ExecBuilder::new("^S")
1584 .bounded_backtracking()
1585 .only_utf8(false)
1586 .build()
1587 .map(|exec| exec.into_byte_regex())
1588 .map_err(|err| format!("{}", err))
1589 .unwrap();
1590
1591 let default_bytes_re = ExecBuilder::new("^S")
1592 .only_utf8(false)
1593 .build()
1594 .map(|exec| exec.into_byte_regex())
1595 .map_err(|err| format!("{}", err))
1596 .unwrap();
1597
1598 let input = vec![83, 83];
1599
1600 let s1 = backtrack_bytes_re.split(&input);
1601 let s2 = default_bytes_re.split(&input);
1602 for (chunk1, chunk2) in s1.zip(s2) {
1603 assert_eq!(chunk1, chunk2);
1604 }
1605 }
1606
1607 #[test]
1608 fn unicode_lit_star_backtracking_utf8bytes_default_utf8bytes_mismatch() {
1609 use internal::ExecBuilder;
1610
1611 let backtrack_bytes_re = ExecBuilder::new(r"^(?u:\*)")
1612 .bounded_backtracking()
1613 .bytes(true)
1614 .build()
1615 .map(|exec| exec.into_regex())
1616 .map_err(|err| format!("{}", err))
1617 .unwrap();
1618
1619 let default_bytes_re = ExecBuilder::new(r"^(?u:\*)")
1620 .bytes(true)
1621 .build()
1622 .map(|exec| exec.into_regex())
1623 .map_err(|err| format!("{}", err))
1624 .unwrap();
1625
1626 let input = "**";
1627
1628 let s1 = backtrack_bytes_re.split(input);
1629 let s2 = default_bytes_re.split(input);
1630 for (chunk1, chunk2) in s1.zip(s2) {
1631 assert_eq!(chunk1, chunk2);
1632 }
1633 }
1634}