memchr/
fallback.rs

1// This module defines pure Rust platform independent implementations of all
2// the memchr routines. We do our best to make them fast. Some of them may even
3// get auto-vectorized.
4
5use core::cmp;
6use core::ptr;
7use core::usize;
8
9#[cfg(target_pointer_width = "32")]
10const USIZE_BYTES: usize = 4;
11
12#[cfg(target_pointer_width = "64")]
13const USIZE_BYTES: usize = 8;
14
15// The number of bytes to loop at in one iteration of memchr/memrchr.
16const LOOP_SIZE: usize = 2 * USIZE_BYTES;
17
18/// Return `true` if `x` contains any zero byte.
19///
20/// From *Matters Computational*, J. Arndt
21///
22/// "The idea is to subtract one from each of the bytes and then look for
23/// bytes where the borrow propagated all the way to the most significant
24/// bit."
25#[inline(always)]
26fn contains_zero_byte(x: usize) -> bool {
27    const LO_U64: u64 = 0x0101010101010101;
28    const HI_U64: u64 = 0x8080808080808080;
29
30    const LO_USIZE: usize = LO_U64 as usize;
31    const HI_USIZE: usize = HI_U64 as usize;
32
33    x.wrapping_sub(LO_USIZE) & !x & HI_USIZE != 0
34}
35
36/// Repeat the given byte into a word size number. That is, every 8 bits
37/// is equivalent to the given byte. For example, if `b` is `\x4E` or
38/// `01001110` in binary, then the returned value on a 32-bit system would be:
39/// `01001110_01001110_01001110_01001110`.
40#[inline(always)]
41fn repeat_byte(b: u8) -> usize {
42    (b as usize) * (usize::MAX / 255)
43}
44
45pub fn memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
46    let vn1 = repeat_byte(n1);
47    let confirm = |byte| byte == n1;
48    let loop_size = cmp::min(LOOP_SIZE, haystack.len());
49    let align = USIZE_BYTES - 1;
50    let start_ptr = haystack.as_ptr();
51    let end_ptr = haystack[haystack.len()..].as_ptr();
52    let mut ptr = start_ptr;
53
54    unsafe {
55        if haystack.len() < USIZE_BYTES {
56            return forward_search(start_ptr, end_ptr, ptr, confirm);
57        }
58
59        let chunk = read_unaligned_usize(ptr);
60        if contains_zero_byte(chunk ^ vn1) {
61            return forward_search(start_ptr, end_ptr, ptr, confirm);
62        }
63
64        ptr = ptr_add(ptr, USIZE_BYTES - (start_ptr as usize & align));
65        debug_assert!(ptr > start_ptr);
66        debug_assert!(ptr_sub(end_ptr, USIZE_BYTES) >= start_ptr);
67        while loop_size == LOOP_SIZE && ptr <= ptr_sub(end_ptr, loop_size) {
68            debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
69
70            let a = *(ptr as *const usize);
71            let b = *(ptr_add(ptr, USIZE_BYTES) as *const usize);
72            let eqa = contains_zero_byte(a ^ vn1);
73            let eqb = contains_zero_byte(b ^ vn1);
74            if eqa || eqb {
75                break;
76            }
77            ptr = ptr_add(ptr, LOOP_SIZE);
78        }
79        forward_search(start_ptr, end_ptr, ptr, confirm)
80    }
81}
82
83/// Like `memchr`, but searches for two bytes instead of one.
84pub fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
85    let vn1 = repeat_byte(n1);
86    let vn2 = repeat_byte(n2);
87    let confirm = |byte| byte == n1 || byte == n2;
88    let align = USIZE_BYTES - 1;
89    let start_ptr = haystack.as_ptr();
90    let end_ptr = haystack[haystack.len()..].as_ptr();
91    let mut ptr = start_ptr;
92
93    unsafe {
94        if haystack.len() < USIZE_BYTES {
95            return forward_search(start_ptr, end_ptr, ptr, confirm);
96        }
97
98        let chunk = read_unaligned_usize(ptr);
99        let eq1 = contains_zero_byte(chunk ^ vn1);
100        let eq2 = contains_zero_byte(chunk ^ vn2);
101        if eq1 || eq2 {
102            return forward_search(start_ptr, end_ptr, ptr, confirm);
103        }
104
105        ptr = ptr_add(ptr, USIZE_BYTES - (start_ptr as usize & align));
106        debug_assert!(ptr > start_ptr);
107        debug_assert!(ptr_sub(end_ptr, USIZE_BYTES) >= start_ptr);
108        while ptr <= ptr_sub(end_ptr, USIZE_BYTES) {
109            debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
110
111            let chunk = *(ptr as *const usize);
112            let eq1 = contains_zero_byte(chunk ^ vn1);
113            let eq2 = contains_zero_byte(chunk ^ vn2);
114            if eq1 || eq2 {
115                break;
116            }
117            ptr = ptr_add(ptr, USIZE_BYTES);
118        }
119        forward_search(start_ptr, end_ptr, ptr, confirm)
120    }
121}
122
123/// Like `memchr`, but searches for three bytes instead of one.
124pub fn memchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
125    let vn1 = repeat_byte(n1);
126    let vn2 = repeat_byte(n2);
127    let vn3 = repeat_byte(n3);
128    let confirm = |byte| byte == n1 || byte == n2 || byte == n3;
129    let align = USIZE_BYTES - 1;
130    let start_ptr = haystack.as_ptr();
131    let end_ptr = haystack[haystack.len()..].as_ptr();
132    let mut ptr = start_ptr;
133
134    unsafe {
135        if haystack.len() < USIZE_BYTES {
136            return forward_search(start_ptr, end_ptr, ptr, confirm);
137        }
138
139        let chunk = read_unaligned_usize(ptr);
140        let eq1 = contains_zero_byte(chunk ^ vn1);
141        let eq2 = contains_zero_byte(chunk ^ vn2);
142        let eq3 = contains_zero_byte(chunk ^ vn3);
143        if eq1 || eq2 || eq3 {
144            return forward_search(start_ptr, end_ptr, ptr, confirm);
145        }
146
147        ptr = ptr_add(ptr, USIZE_BYTES - (start_ptr as usize & align));
148        debug_assert!(ptr > start_ptr);
149        debug_assert!(ptr_sub(end_ptr, USIZE_BYTES) >= start_ptr);
150        while ptr <= ptr_sub(end_ptr, USIZE_BYTES) {
151            debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
152
153            let chunk = *(ptr as *const usize);
154            let eq1 = contains_zero_byte(chunk ^ vn1);
155            let eq2 = contains_zero_byte(chunk ^ vn2);
156            let eq3 = contains_zero_byte(chunk ^ vn3);
157            if eq1 || eq2 || eq3 {
158                break;
159            }
160            ptr = ptr_add(ptr, USIZE_BYTES);
161        }
162        forward_search(start_ptr, end_ptr, ptr, confirm)
163    }
164}
165
166/// Return the last index matching the byte `x` in `text`.
167pub fn memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
168    let vn1 = repeat_byte(n1);
169    let confirm = |byte| byte == n1;
170    let loop_size = cmp::min(LOOP_SIZE, haystack.len());
171    let align = USIZE_BYTES - 1;
172    let start_ptr = haystack.as_ptr();
173    let end_ptr = haystack[haystack.len()..].as_ptr();
174    let mut ptr = end_ptr;
175
176    unsafe {
177        if haystack.len() < USIZE_BYTES {
178            return reverse_search(start_ptr, end_ptr, ptr, confirm);
179        }
180
181        let chunk = read_unaligned_usize(ptr_sub(ptr, USIZE_BYTES));
182        if contains_zero_byte(chunk ^ vn1) {
183            return reverse_search(start_ptr, end_ptr, ptr, confirm);
184        }
185
186        ptr = (end_ptr as usize & !align) as *const u8;
187        debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
188        while loop_size == LOOP_SIZE && ptr >= ptr_add(start_ptr, loop_size) {
189            debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
190
191            let a = *(ptr_sub(ptr, 2 * USIZE_BYTES) as *const usize);
192            let b = *(ptr_sub(ptr, 1 * USIZE_BYTES) as *const usize);
193            let eqa = contains_zero_byte(a ^ vn1);
194            let eqb = contains_zero_byte(b ^ vn1);
195            if eqa || eqb {
196                break;
197            }
198            ptr = ptr_sub(ptr, loop_size);
199        }
200        reverse_search(start_ptr, end_ptr, ptr, confirm)
201    }
202}
203
204/// Like `memrchr`, but searches for two bytes instead of one.
205pub fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
206    let vn1 = repeat_byte(n1);
207    let vn2 = repeat_byte(n2);
208    let confirm = |byte| byte == n1 || byte == n2;
209    let align = USIZE_BYTES - 1;
210    let start_ptr = haystack.as_ptr();
211    let end_ptr = haystack[haystack.len()..].as_ptr();
212    let mut ptr = end_ptr;
213
214    unsafe {
215        if haystack.len() < USIZE_BYTES {
216            return reverse_search(start_ptr, end_ptr, ptr, confirm);
217        }
218
219        let chunk = read_unaligned_usize(ptr_sub(ptr, USIZE_BYTES));
220        let eq1 = contains_zero_byte(chunk ^ vn1);
221        let eq2 = contains_zero_byte(chunk ^ vn2);
222        if eq1 || eq2 {
223            return reverse_search(start_ptr, end_ptr, ptr, confirm);
224        }
225
226        ptr = (end_ptr as usize & !align) as *const u8;
227        debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
228        while ptr >= ptr_add(start_ptr, USIZE_BYTES) {
229            debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
230
231            let chunk = *(ptr_sub(ptr, USIZE_BYTES) as *const usize);
232            let eq1 = contains_zero_byte(chunk ^ vn1);
233            let eq2 = contains_zero_byte(chunk ^ vn2);
234            if eq1 || eq2 {
235                break;
236            }
237            ptr = ptr_sub(ptr, USIZE_BYTES);
238        }
239        reverse_search(start_ptr, end_ptr, ptr, confirm)
240    }
241}
242
243/// Like `memrchr`, but searches for three bytes instead of one.
244pub fn memrchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
245    let vn1 = repeat_byte(n1);
246    let vn2 = repeat_byte(n2);
247    let vn3 = repeat_byte(n3);
248    let confirm = |byte| byte == n1 || byte == n2 || byte == n3;
249    let align = USIZE_BYTES - 1;
250    let start_ptr = haystack.as_ptr();
251    let end_ptr = haystack[haystack.len()..].as_ptr();
252    let mut ptr = end_ptr;
253
254    unsafe {
255        if haystack.len() < USIZE_BYTES {
256            return reverse_search(start_ptr, end_ptr, ptr, confirm);
257        }
258
259        let chunk = read_unaligned_usize(ptr_sub(ptr, USIZE_BYTES));
260        let eq1 = contains_zero_byte(chunk ^ vn1);
261        let eq2 = contains_zero_byte(chunk ^ vn2);
262        let eq3 = contains_zero_byte(chunk ^ vn3);
263        if eq1 || eq2 || eq3 {
264            return reverse_search(start_ptr, end_ptr, ptr, confirm);
265        }
266
267        ptr = (end_ptr as usize & !align) as *const u8;
268        debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
269        while ptr >= ptr_add(start_ptr, USIZE_BYTES) {
270            debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
271
272            let chunk = *(ptr_sub(ptr, USIZE_BYTES) as *const usize);
273            let eq1 = contains_zero_byte(chunk ^ vn1);
274            let eq2 = contains_zero_byte(chunk ^ vn2);
275            let eq3 = contains_zero_byte(chunk ^ vn3);
276            if eq1 || eq2 || eq3 {
277                break;
278            }
279            ptr = ptr_sub(ptr, USIZE_BYTES);
280        }
281        reverse_search(start_ptr, end_ptr, ptr, confirm)
282    }
283}
284
285#[inline(always)]
286unsafe fn forward_search<F: Fn(u8) -> bool>(
287    start_ptr: *const u8,
288    end_ptr: *const u8,
289    mut ptr: *const u8,
290    confirm: F,
291) -> Option<usize> {
292    debug_assert!(start_ptr <= ptr);
293    debug_assert!(ptr <= end_ptr);
294
295    while ptr < end_ptr {
296        if confirm(*ptr) {
297            return Some(sub(ptr, start_ptr));
298        }
299        ptr = ptr.offset(1);
300    }
301    None
302}
303
304#[inline(always)]
305unsafe fn reverse_search<F: Fn(u8) -> bool>(
306    start_ptr: *const u8,
307    end_ptr: *const u8,
308    mut ptr: *const u8,
309    confirm: F,
310) -> Option<usize> {
311    debug_assert!(start_ptr <= ptr);
312    debug_assert!(ptr <= end_ptr);
313
314    while ptr > start_ptr {
315        ptr = ptr.offset(-1);
316        if confirm(*ptr) {
317            return Some(sub(ptr, start_ptr));
318        }
319    }
320    None
321}
322
323/// Increment the given pointer by the given amount.
324unsafe fn ptr_add(ptr: *const u8, amt: usize) -> *const u8 {
325    debug_assert!(amt < ::core::isize::MAX as usize);
326    ptr.offset(amt as isize)
327}
328
329/// Decrement the given pointer by the given amount.
330unsafe fn ptr_sub(ptr: *const u8, amt: usize) -> *const u8 {
331    debug_assert!(amt < ::core::isize::MAX as usize);
332    ptr.offset((amt as isize).wrapping_neg())
333}
334
335unsafe fn read_unaligned_usize(ptr: *const u8) -> usize {
336    let mut n: usize = 0;
337    ptr::copy_nonoverlapping(ptr, &mut n as *mut _ as *mut u8, USIZE_BYTES);
338    n
339}
340
341/// Subtract `b` from `a` and return the difference. `a` should be greater than
342/// or equal to `b`.
343fn sub(a: *const u8, b: *const u8) -> usize {
344    debug_assert!(a >= b);
345    (a as usize) - (b as usize)
346}