memchr/
lib.rs

1/*!
2The `memchr` crate provides heavily optimized routines for searching bytes.
3
4The `memchr` function is traditionally provided by libc, however, the
5performance of `memchr` can vary significantly depending on the specific
6implementation of libc that is used. They can range from manually tuned
7Assembly implementations (like that found in GNU's libc) all the way to
8non-vectorized C implementations (like that found in MUSL).
9
10To smooth out the differences between implementations of libc, at least
11on `x86_64` for Rust 1.27+, this crate provides its own implementation of
12`memchr` that should perform competitively with the one found in GNU's libc.
13The implementation is in pure Rust and has no dependency on a C compiler or an
14Assembler.
15
16Additionally, GNU libc also provides an extension, `memrchr`. This crate
17provides its own implementation of `memrchr` as well, on top of `memchr2`,
18`memchr3`, `memrchr2` and `memrchr3`. The difference between `memchr` and
19`memchr2` is that that `memchr2` permits finding all occurrences of two bytes
20instead of one. Similarly for `memchr3`.
21*/
22
23#![cfg_attr(not(feature = "use_std"), no_std)]
24
25#![deny(missing_docs)]
26#![doc(html_root_url = "https://docs.rs/memchr/2.0.0")]
27
28// Supporting 16-bit would be fine. If you need it, please submit a bug report
29// at https://github.com/BurntSushi/rust-memchr
30#[cfg(not(any(target_pointer_width = "32", target_pointer_width = "64")))]
31compile_error!("memchr currently not supported on non-32 or non-64 bit");
32
33#[cfg(feature = "use_std")]
34extern crate core;
35
36#[cfg(test)]
37#[macro_use]
38extern crate quickcheck;
39
40use core::iter::Rev;
41
42pub use iter::{Memchr, Memchr2, Memchr3};
43
44// N.B. If you're looking for the cfg knobs for libc, see build.rs.
45#[cfg(memchr_libc)]
46mod c;
47#[allow(dead_code)]
48mod fallback;
49mod iter;
50mod naive;
51#[cfg(all(target_arch = "x86_64", memchr_runtime_simd))]
52mod x86;
53#[cfg(test)]
54mod tests;
55
56/// An iterator over all occurrences of the needle in a haystack.
57#[inline]
58pub fn memchr_iter(needle: u8, haystack: &[u8]) -> Memchr {
59    Memchr::new(needle, haystack)
60}
61
62/// An iterator over all occurrences of the needles in a haystack.
63#[inline]
64pub fn memchr2_iter(
65    needle1: u8,
66    needle2: u8,
67    haystack: &[u8],
68) -> Memchr2 {
69    Memchr2::new(needle1, needle2, haystack)
70}
71
72/// An iterator over all occurrences of the needles in a haystack.
73#[inline]
74pub fn memchr3_iter(
75    needle1: u8,
76    needle2: u8,
77    needle3: u8,
78    haystack: &[u8],
79) -> Memchr3 {
80    Memchr3::new(needle1, needle2, needle3, haystack)
81}
82
83/// An iterator over all occurrences of the needle in a haystack, in reverse.
84#[inline]
85pub fn memrchr_iter(needle: u8, haystack: &[u8]) -> Rev<Memchr> {
86    Memchr::new(needle, haystack).rev()
87}
88
89/// An iterator over all occurrences of the needles in a haystack, in reverse.
90#[inline]
91pub fn memrchr2_iter(
92    needle1: u8,
93    needle2: u8,
94    haystack: &[u8],
95) -> Rev<Memchr2> {
96    Memchr2::new(needle1, needle2, haystack).rev()
97}
98
99/// An iterator over all occurrences of the needles in a haystack, in reverse.
100#[inline]
101pub fn memrchr3_iter(
102    needle1: u8,
103    needle2: u8,
104    needle3: u8,
105    haystack: &[u8],
106) -> Rev<Memchr3> {
107    Memchr3::new(needle1, needle2, needle3, haystack).rev()
108}
109
110/// Search for the first occurrence of a byte in a slice.
111///
112/// This returns the index corresponding to the first occurrence of `needle` in
113/// `haystack`, or `None` if one is not found.
114///
115/// While this is operationally the same as something like
116/// `haystack.iter().position(|&b| b == needle)`, `memchr` will use a highly
117/// optimized routine that can be up to an order of magnitude faster in some
118/// cases.
119///
120/// # Example
121///
122/// This shows how to find the first position of a byte in a byte string.
123///
124/// ```
125/// use memchr::memchr;
126///
127/// let haystack = b"the quick brown fox";
128/// assert_eq!(memchr(b'k', haystack), Some(8));
129/// ```
130#[inline]
131pub fn memchr(needle: u8, haystack: &[u8]) -> Option<usize> {
132    #[cfg(all(target_arch = "x86_64", memchr_runtime_simd))]
133    #[inline(always)]
134    fn imp(n1: u8, haystack: &[u8]) -> Option<usize> {
135        x86::memchr(n1, haystack)
136    }
137
138    #[cfg(all(
139        memchr_libc,
140        not(all(target_arch = "x86_64", memchr_runtime_simd))
141    ))]
142    #[inline(always)]
143    fn imp(n1: u8, haystack: &[u8]) -> Option<usize> {
144        c::memchr(n1, haystack)
145    }
146
147    #[cfg(all(
148        not(memchr_libc),
149        not(all(target_arch = "x86_64", memchr_runtime_simd))
150    ))]
151    #[inline(always)]
152    fn imp(n1: u8, haystack: &[u8]) -> Option<usize> {
153        fallback::memchr(n1, haystack)
154    }
155
156    if haystack.is_empty() {
157        None
158    } else {
159        imp(needle, haystack)
160    }
161}
162
163/// Like `memchr`, but searches for two bytes instead of one.
164#[inline]
165pub fn memchr2(needle1: u8, needle2: u8, haystack: &[u8]) -> Option<usize> {
166    #[cfg(all(target_arch = "x86_64", memchr_runtime_simd))]
167    #[inline(always)]
168    fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
169        x86::memchr2(n1, n2, haystack)
170    }
171
172    #[cfg(not(all(target_arch = "x86_64", memchr_runtime_simd)))]
173    #[inline(always)]
174    fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
175        fallback::memchr2(n1, n2, haystack)
176    }
177
178    if haystack.is_empty() {
179        None
180    } else {
181        imp(needle1, needle2, haystack)
182    }
183}
184
185/// Like `memchr`, but searches for three bytes instead of one.
186#[inline]
187pub fn memchr3(
188    needle1: u8,
189    needle2: u8,
190    needle3: u8,
191    haystack: &[u8],
192) -> Option<usize> {
193    #[cfg(all(target_arch = "x86_64", memchr_runtime_simd))]
194    #[inline(always)]
195    fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
196        x86::memchr3(n1, n2, n3, haystack)
197    }
198
199    #[cfg(not(all(target_arch = "x86_64", memchr_runtime_simd)))]
200    #[inline(always)]
201    fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
202        fallback::memchr3(n1, n2, n3, haystack)
203    }
204
205    if haystack.is_empty() {
206        None
207    } else {
208        imp(needle1, needle2, needle3, haystack)
209    }
210}
211
212/// Search for the last occurrence of a byte in a slice.
213///
214/// This returns the index corresponding to the last occurrence of `needle` in
215/// `haystack`, or `None` if one is not found.
216///
217/// While this is operationally the same as something like
218/// `haystack.iter().rposition(|&b| b == needle)`, `memrchr` will use a highly
219/// optimized routine that can be up to an order of magnitude faster in some
220/// cases.
221///
222/// # Example
223///
224/// This shows how to find the last position of a byte in a byte string.
225///
226/// ```
227/// use memchr::memrchr;
228///
229/// let haystack = b"the quick brown fox";
230/// assert_eq!(memrchr(b'o', haystack), Some(17));
231/// ```
232#[inline]
233pub fn memrchr(needle: u8, haystack: &[u8]) -> Option<usize> {
234    #[cfg(all(target_arch = "x86_64", memchr_runtime_simd))]
235    #[inline(always)]
236    fn imp(n1: u8, haystack: &[u8]) -> Option<usize> {
237        x86::memrchr(n1, haystack)
238    }
239
240    #[cfg(all(
241        all(memchr_libc, target_os = "linux"),
242        not(all(target_arch = "x86_64", memchr_runtime_simd))
243    ))]
244    #[inline(always)]
245    fn imp(n1: u8, haystack: &[u8]) -> Option<usize> {
246        c::memrchr(n1, haystack)
247    }
248
249    #[cfg(all(
250        not(all(memchr_libc, target_os = "linux")),
251        not(all(target_arch = "x86_64", memchr_runtime_simd))
252    ))]
253    #[inline(always)]
254    fn imp(n1: u8, haystack: &[u8]) -> Option<usize> {
255        fallback::memrchr(n1, haystack)
256    }
257
258    if haystack.is_empty() {
259        None
260    } else {
261        imp(needle, haystack)
262    }
263}
264
265/// Like `memrchr`, but searches for two bytes instead of one.
266#[inline]
267pub fn memrchr2(needle1: u8, needle2: u8, haystack: &[u8]) -> Option<usize> {
268    #[cfg(all(target_arch = "x86_64", memchr_runtime_simd))]
269    #[inline(always)]
270    fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
271        x86::memrchr2(n1, n2, haystack)
272    }
273
274    #[cfg(not(all(target_arch = "x86_64", memchr_runtime_simd)))]
275    #[inline(always)]
276    fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
277        fallback::memrchr2(n1, n2, haystack)
278    }
279
280    if haystack.is_empty() {
281        None
282    } else {
283        imp(needle1, needle2, haystack)
284    }
285}
286
287/// Like `memrchr`, but searches for three bytes instead of one.
288#[inline]
289pub fn memrchr3(
290    needle1: u8,
291    needle2: u8,
292    needle3: u8,
293    haystack: &[u8],
294) -> Option<usize> {
295    #[cfg(all(target_arch = "x86_64", memchr_runtime_simd))]
296    #[inline(always)]
297    fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
298        x86::memrchr3(n1, n2, n3, haystack)
299    }
300
301    #[cfg(not(all(target_arch = "x86_64", memchr_runtime_simd)))]
302    #[inline(always)]
303    fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
304        fallback::memrchr3(n1, n2, n3, haystack)
305    }
306
307    if haystack.is_empty() {
308        None
309    } else {
310        imp(needle1, needle2, needle3, haystack)
311    }
312}