aho_corasick/
state_id.rs

1use std::fmt::Debug;
2use std::hash::Hash;
3
4use error::{Error, Result};
5
6// NOTE: Most of this code was copied from regex-automata, but without the
7// (de)serialization specific stuff.
8
9/// Check that the premultiplication of the given state identifier can
10/// fit into the representation indicated by `S`. If it cannot, or if it
11/// overflows `usize` itself, then an error is returned.
12pub fn premultiply_overflow_error<S: StateID>(
13    last_state: S,
14    alphabet_len: usize,
15) -> Result<()> {
16    let requested = match last_state.to_usize().checked_mul(alphabet_len) {
17        Some(requested) => requested,
18        None => return Err(Error::premultiply_overflow(0, 0)),
19    };
20    if requested > S::max_id() {
21        return Err(Error::premultiply_overflow(S::max_id(), requested));
22    }
23    Ok(())
24}
25
26/// Convert the given `usize` to the chosen state identifier
27/// representation. If the given value cannot fit in the chosen
28/// representation, then an error is returned.
29pub fn usize_to_state_id<S: StateID>(value: usize) -> Result<S> {
30    if value > S::max_id() {
31        Err(Error::state_id_overflow(S::max_id()))
32    } else {
33        Ok(S::from_usize(value))
34    }
35}
36
37/// Return the unique identifier for an automaton's fail state in the chosen
38/// representation indicated by `S`.
39pub fn fail_id<S: StateID>() -> S {
40    S::from_usize(0)
41}
42
43/// Return the unique identifier for an automaton's fail state in the chosen
44/// representation indicated by `S`.
45pub fn dead_id<S: StateID>() -> S {
46    S::from_usize(1)
47}
48
49mod private {
50    /// Sealed stops crates other than aho-corasick from implementing any
51    /// traits that use it.
52    pub trait Sealed {}
53    impl Sealed for u8 {}
54    impl Sealed for u16 {}
55    impl Sealed for u32 {}
56    impl Sealed for u64 {}
57    impl Sealed for usize {}
58}
59
60/// A trait describing the representation of an automaton's state identifier.
61///
62/// The purpose of this trait is to safely express both the possible state
63/// identifier representations that can be used in an automaton and to convert
64/// between state identifier representations and types that can be used to
65/// efficiently index memory (such as `usize`).
66///
67/// In general, one should not need to implement this trait explicitly. Indeed,
68/// for now, this trait is sealed such that it cannot be implemented by any
69/// other type. In particular, this crate provides implementations for `u8`,
70/// `u16`, `u32`, `u64` and `usize`. (`u32` and `u64` are only provided for
71/// targets that can represent all corresponding values in a `usize`.)
72///
73/// # Safety
74///
75/// This trait is unsafe because the correctness of its implementations may be
76/// relied upon by other unsafe code. For example, one possible way to
77/// implement this trait incorrectly would be to return a maximum identifier
78/// in `max_id` that is greater than the real maximum identifier. This will
79/// likely result in wrap-on-overflow semantics in release mode, which can in
80/// turn produce incorrect state identifiers. Those state identifiers may then
81/// in turn access out-of-bounds memory in an automaton's search routine, where
82/// bounds checks are explicitly elided for performance reasons.
83pub unsafe trait StateID:
84    private::Sealed
85    + Clone
86    + Copy
87    + Debug
88    + Eq
89    + Hash
90    + PartialEq
91    + PartialOrd
92    + Ord
93{
94    /// Convert from a `usize` to this implementation's representation.
95    ///
96    /// Implementors may assume that `n <= Self::max_id`. That is, implementors
97    /// do not need to check whether `n` can fit inside this implementation's
98    /// representation.
99    fn from_usize(n: usize) -> Self;
100
101    /// Convert this implementation's representation to a `usize`.
102    ///
103    /// Implementors must not return a `usize` value greater than
104    /// `Self::max_id` and must not permit overflow when converting between the
105    /// implementor's representation and `usize`. In general, the preferred
106    /// way for implementors to achieve this is to simply not provide
107    /// implementations of `StateID` that cannot fit into the target platform's
108    /// `usize`.
109    fn to_usize(self) -> usize;
110
111    /// Return the maximum state identifier supported by this representation.
112    ///
113    /// Implementors must return a correct bound. Doing otherwise may result
114    /// in memory unsafety.
115    fn max_id() -> usize;
116}
117
118unsafe impl StateID for usize {
119    #[inline]
120    fn from_usize(n: usize) -> usize {
121        n
122    }
123
124    #[inline]
125    fn to_usize(self) -> usize {
126        self
127    }
128
129    #[inline]
130    fn max_id() -> usize {
131        ::std::usize::MAX
132    }
133}
134
135unsafe impl StateID for u8 {
136    #[inline]
137    fn from_usize(n: usize) -> u8 {
138        n as u8
139    }
140
141    #[inline]
142    fn to_usize(self) -> usize {
143        self as usize
144    }
145
146    #[inline]
147    fn max_id() -> usize {
148        ::std::u8::MAX as usize
149    }
150}
151
152unsafe impl StateID for u16 {
153    #[inline]
154    fn from_usize(n: usize) -> u16 {
155        n as u16
156    }
157
158    #[inline]
159    fn to_usize(self) -> usize {
160        self as usize
161    }
162
163    #[inline]
164    fn max_id() -> usize {
165        ::std::u16::MAX as usize
166    }
167}
168
169#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
170unsafe impl StateID for u32 {
171    #[inline]
172    fn from_usize(n: usize) -> u32 {
173        n as u32
174    }
175
176    #[inline]
177    fn to_usize(self) -> usize {
178        self as usize
179    }
180
181    #[inline]
182    fn max_id() -> usize {
183        ::std::u32::MAX as usize
184    }
185}
186
187#[cfg(target_pointer_width = "64")]
188unsafe impl StateID for u64 {
189    #[inline]
190    fn from_usize(n: usize) -> u64 {
191        n as u64
192    }
193
194    #[inline]
195    fn to_usize(self) -> usize {
196        self as usize
197    }
198
199    #[inline]
200    fn max_id() -> usize {
201        ::std::u64::MAX as usize
202    }
203}