siphasher/
sip128.rs

1// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! An implementation of SipHash with a 128-bit output.
12
13use core::cmp;
14use core::hash;
15use core::marker::PhantomData;
16use core::mem;
17use core::ptr;
18use core::slice;
19
20/// A 128-bit (2x64) hash output
21#[derive(Debug, Clone, Copy, Default)]
22pub struct Hash128 {
23    pub h1: u64,
24    pub h2: u64,
25}
26
27impl From<u128> for Hash128 {
28    fn from(v: u128) -> Self {
29        Hash128 {
30            h1: v as u64,
31            h2: (v >> 64) as u64,
32        }
33    }
34}
35
36impl Into<u128> for Hash128 {
37    fn into(self) -> u128 {
38        (self.h1 as u128) | ((self.h2 as u128) << 64)
39    }
40}
41
42/// An implementation of SipHash128 1-3.
43#[derive(Debug, Clone, Copy, Default)]
44pub struct SipHasher13 {
45    hasher: Hasher<Sip13Rounds>,
46}
47
48/// An implementation of SipHash128 2-4.
49#[derive(Debug, Clone, Copy, Default)]
50pub struct SipHasher24 {
51    hasher: Hasher<Sip24Rounds>,
52}
53
54/// An implementation of SipHash128 2-4.
55///
56/// SipHash is a general-purpose hashing function: it runs at a good
57/// speed (competitive with Spooky and City) and permits strong _keyed_
58/// hashing. This lets you key your hashtables from a strong RNG, such as
59/// [`rand::os::OsRng`](https://doc.rust-lang.org/rand/rand/os/struct.OsRng.html).
60///
61/// Although the SipHash algorithm is considered to be generally strong,
62/// it is not intended for cryptographic purposes. As such, all
63/// cryptographic uses of this implementation are _strongly discouraged_.
64#[derive(Debug, Clone, Copy, Default)]
65pub struct SipHasher(SipHasher24);
66
67#[derive(Debug, Copy)]
68struct Hasher<S: Sip> {
69    k0: u64,
70    k1: u64,
71    length: usize, // how many bytes we've processed
72    state: State,  // hash State
73    tail: u64,     // unprocessed bytes le
74    ntail: usize,  // how many bytes in tail are valid
75    _marker: PhantomData<S>,
76}
77
78#[derive(Debug, Clone, Copy)]
79struct State {
80    // v0, v2 and v1, v3 show up in pairs in the algorithm,
81    // and simd implementations of SipHash will use vectors
82    // of v02 and v13. By placing them in this order in the struct,
83    // the compiler can pick up on just a few simd optimizations by itself.
84    v0: u64,
85    v2: u64,
86    v1: u64,
87    v3: u64,
88}
89
90macro_rules! compress {
91    ($state:expr) => {{
92        compress!($state.v0, $state.v1, $state.v2, $state.v3)
93    }};
94    ($v0:expr, $v1:expr, $v2:expr, $v3:expr) => {{
95        $v0 = $v0.wrapping_add($v1);
96        $v1 = $v1.rotate_left(13);
97        $v1 ^= $v0;
98        $v0 = $v0.rotate_left(32);
99        $v2 = $v2.wrapping_add($v3);
100        $v3 = $v3.rotate_left(16);
101        $v3 ^= $v2;
102        $v0 = $v0.wrapping_add($v3);
103        $v3 = $v3.rotate_left(21);
104        $v3 ^= $v0;
105        $v2 = $v2.wrapping_add($v1);
106        $v1 = $v1.rotate_left(17);
107        $v1 ^= $v2;
108        $v2 = $v2.rotate_left(32);
109    }};
110}
111
112/// Load an integer of the desired type from a byte stream, in LE order. Uses
113/// `copy_nonoverlapping` to let the compiler generate the most efficient way
114/// to load it from a possibly unaligned address.
115///
116/// Unsafe because: unchecked indexing at `i..i+size_of(int_ty)`
117macro_rules! load_int_le {
118    ($buf:expr, $i:expr, $int_ty:ident) => {{
119        debug_assert!($i + mem::size_of::<$int_ty>() <= $buf.len());
120        let mut data = 0 as $int_ty;
121        ptr::copy_nonoverlapping(
122            $buf.get_unchecked($i),
123            &mut data as *mut _ as *mut u8,
124            mem::size_of::<$int_ty>(),
125        );
126        data.to_le()
127    }};
128}
129
130/// Load an u64 using up to 7 bytes of a byte slice.
131///
132/// Unsafe because: unchecked indexing at start..start+len
133#[inline]
134unsafe fn u8to64_le(buf: &[u8], start: usize, len: usize) -> u64 {
135    debug_assert!(len < 8);
136    let mut i = 0; // current byte index (from LSB) in the output u64
137    let mut out = 0;
138    if i + 3 < len {
139        out = load_int_le!(buf, start + i, u32) as u64;
140        i += 4;
141    }
142    if i + 1 < len {
143        out |= (load_int_le!(buf, start + i, u16) as u64) << (i * 8);
144        i += 2
145    }
146    if i < len {
147        out |= (*buf.get_unchecked(start + i) as u64) << (i * 8);
148        i += 1;
149    }
150    debug_assert_eq!(i, len);
151    out
152}
153
154pub trait Hasher128 {
155    /// Return a 128-bit hash
156    fn finish128(&self) -> Hash128;
157}
158
159impl SipHasher {
160    /// Creates a new `SipHasher` with the two initial keys set to 0.
161    #[inline]
162    pub fn new() -> SipHasher {
163        SipHasher::new_with_keys(0, 0)
164    }
165
166    /// Creates a `SipHasher` that is keyed off the provided keys.
167    #[inline]
168    pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher {
169        SipHasher(SipHasher24::new_with_keys(key0, key1))
170    }
171
172    /// Get the keys used by this hasher
173    pub fn keys(&self) -> (u64, u64) {
174        (self.0.hasher.k0, self.0.hasher.k1)
175    }
176}
177
178impl Hasher128 for SipHasher {
179    /// Return a 128-bit hash
180    #[inline]
181    fn finish128(&self) -> Hash128 {
182        self.0.finish128()
183    }
184}
185
186impl SipHasher13 {
187    /// Creates a new `SipHasher13` with the two initial keys set to 0.
188    #[inline]
189    pub fn new() -> SipHasher13 {
190        SipHasher13::new_with_keys(0, 0)
191    }
192
193    /// Creates a `SipHasher13` that is keyed off the provided keys.
194    #[inline]
195    pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher13 {
196        SipHasher13 {
197            hasher: Hasher::new_with_keys(key0, key1),
198        }
199    }
200
201    /// Get the keys used by this hasher
202    pub fn keys(&self) -> (u64, u64) {
203        (self.hasher.k0, self.hasher.k1)
204    }
205}
206
207impl Hasher128 for SipHasher13 {
208    /// Return a 128-bit hash
209    #[inline]
210    fn finish128(&self) -> Hash128 {
211        self.hasher.finish128()
212    }
213}
214
215impl SipHasher24 {
216    /// Creates a new `SipHasher24` with the two initial keys set to 0.
217    #[inline]
218    pub fn new() -> SipHasher24 {
219        SipHasher24::new_with_keys(0, 0)
220    }
221
222    /// Creates a `SipHasher24` that is keyed off the provided keys.
223    #[inline]
224    pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher24 {
225        SipHasher24 {
226            hasher: Hasher::new_with_keys(key0, key1),
227        }
228    }
229
230    /// Get the keys used by this hasher
231    pub fn keys(&self) -> (u64, u64) {
232        (self.hasher.k0, self.hasher.k1)
233    }
234}
235
236impl Hasher128 for SipHasher24 {
237    /// Return a 128-bit hash
238    #[inline]
239    fn finish128(&self) -> Hash128 {
240        self.hasher.finish128()
241    }
242}
243
244impl<S: Sip> Hasher<S> {
245    #[inline]
246    fn new_with_keys(key0: u64, key1: u64) -> Hasher<S> {
247        let mut state = Hasher {
248            k0: key0,
249            k1: key1,
250            length: 0,
251            state: State {
252                v0: 0,
253                v1: 0xee,
254                v2: 0,
255                v3: 0,
256            },
257            tail: 0,
258            ntail: 0,
259            _marker: PhantomData,
260        };
261        state.reset();
262        state
263    }
264
265    #[inline]
266    fn reset(&mut self) {
267        self.length = 0;
268        self.state.v0 = self.k0 ^ 0x736f6d6570736575;
269        self.state.v1 = self.k1 ^ 0x646f72616e646f83;
270        self.state.v2 = self.k0 ^ 0x6c7967656e657261;
271        self.state.v3 = self.k1 ^ 0x7465646279746573;
272        self.ntail = 0;
273    }
274
275    // Specialized write function that is only valid for buffers with len <= 8.
276    // It's used to force inlining of write_u8 and write_usize, those would normally be inlined
277    // except for composite types (that includes slices and str hashing because of delimiter).
278    // Without this extra push the compiler is very reluctant to inline delimiter writes,
279    // degrading performance substantially for the most common use cases.
280    #[inline(always)]
281    fn short_write(&mut self, msg: &[u8]) {
282        debug_assert!(msg.len() <= 8);
283        let length = msg.len();
284        self.length += length;
285
286        let needed = 8 - self.ntail;
287        let fill = cmp::min(length, needed);
288        if fill == 8 {
289            self.tail = unsafe { load_int_le!(msg, 0, u64) };
290        } else {
291            self.tail |= unsafe { u8to64_le(msg, 0, fill) } << (8 * self.ntail);
292            if length < needed {
293                self.ntail += length;
294                return;
295            }
296        }
297        self.state.v3 ^= self.tail;
298        S::c_rounds(&mut self.state);
299        self.state.v0 ^= self.tail;
300
301        // Buffered tail is now flushed, process new input.
302        self.ntail = length - needed;
303        self.tail = unsafe { u8to64_le(msg, needed, self.ntail) };
304    }
305}
306
307impl<S: Sip> Hasher<S> {
308    #[inline]
309    pub fn finish128(&self) -> Hash128 {
310        let mut state = self.state;
311
312        let b: u64 = ((self.length as u64 & 0xff) << 56) | self.tail;
313
314        state.v3 ^= b;
315        S::c_rounds(&mut state);
316        state.v0 ^= b;
317
318        state.v2 ^= 0xee;
319        S::d_rounds(&mut state);
320        let h1 = state.v0 ^ state.v1 ^ state.v2 ^ state.v3;
321
322        state.v1 ^= 0xdd;
323        S::d_rounds(&mut state);
324        let h2 = state.v0 ^ state.v1 ^ state.v2 ^ state.v3;
325
326        Hash128 { h1, h2 }
327    }
328}
329
330impl hash::Hasher for SipHasher {
331    #[inline]
332    fn write(&mut self, msg: &[u8]) {
333        self.0.write(msg)
334    }
335
336    #[inline]
337    fn finish(&self) -> u64 {
338        self.0.finish()
339    }
340}
341
342impl hash::Hasher for SipHasher13 {
343    #[inline]
344    fn write(&mut self, msg: &[u8]) {
345        self.hasher.write(msg)
346    }
347
348    #[inline]
349    fn finish(&self) -> u64 {
350        self.hasher.finish()
351    }
352}
353
354impl hash::Hasher for SipHasher24 {
355    #[inline]
356    fn write(&mut self, msg: &[u8]) {
357        self.hasher.write(msg)
358    }
359
360    #[inline]
361    fn finish(&self) -> u64 {
362        self.hasher.finish()
363    }
364}
365
366impl<S: Sip> hash::Hasher for Hasher<S> {
367    // see short_write comment for explanation
368    #[inline]
369    fn write_usize(&mut self, i: usize) {
370        let bytes = unsafe {
371            slice::from_raw_parts(&i as *const usize as *const u8, mem::size_of::<usize>())
372        };
373        self.short_write(bytes);
374    }
375
376    // see short_write comment for explanation
377    #[inline]
378    fn write_u8(&mut self, i: u8) {
379        self.short_write(&[i]);
380    }
381
382    #[inline]
383    fn write(&mut self, msg: &[u8]) {
384        let length = msg.len();
385        self.length += length;
386
387        let mut needed = 0;
388
389        if self.ntail != 0 {
390            needed = 8 - self.ntail;
391            self.tail |= unsafe { u8to64_le(msg, 0, cmp::min(length, needed)) } << (8 * self.ntail);
392            if length < needed {
393                self.ntail += length;
394                return;
395            } else {
396                self.state.v3 ^= self.tail;
397                S::c_rounds(&mut self.state);
398                self.state.v0 ^= self.tail;
399                self.ntail = 0;
400            }
401        }
402
403        // Buffered tail is now flushed, process new input.
404        let len = length - needed;
405        let left = len & 0x7;
406
407        let mut i = needed;
408        while i < len - left {
409            let mi = unsafe { load_int_le!(msg, i, u64) };
410
411            self.state.v3 ^= mi;
412            S::c_rounds(&mut self.state);
413            self.state.v0 ^= mi;
414
415            i += 8;
416        }
417
418        self.tail = unsafe { u8to64_le(msg, i, left) };
419        self.ntail = left;
420    }
421
422    #[inline]
423    fn finish(&self) -> u64 {
424        self.finish128().h2
425    }
426}
427
428impl<S: Sip> Clone for Hasher<S> {
429    #[inline]
430    fn clone(&self) -> Hasher<S> {
431        Hasher {
432            k0: self.k0,
433            k1: self.k1,
434            length: self.length,
435            state: self.state,
436            tail: self.tail,
437            ntail: self.ntail,
438            _marker: self._marker,
439        }
440    }
441}
442
443impl<S: Sip> Default for Hasher<S> {
444    /// Creates a `Hasher<S>` with the two initial keys set to 0.
445    #[inline]
446    fn default() -> Hasher<S> {
447        Hasher::new_with_keys(0, 0)
448    }
449}
450
451#[doc(hidden)]
452trait Sip {
453    fn c_rounds(_: &mut State);
454    fn d_rounds(_: &mut State);
455}
456
457#[derive(Debug, Clone, Copy, Default)]
458struct Sip13Rounds;
459
460impl Sip for Sip13Rounds {
461    #[inline]
462    fn c_rounds(state: &mut State) {
463        compress!(state);
464    }
465
466    #[inline]
467    fn d_rounds(state: &mut State) {
468        compress!(state);
469        compress!(state);
470        compress!(state);
471    }
472}
473
474#[derive(Debug, Clone, Copy, Default)]
475struct Sip24Rounds;
476
477impl Sip for Sip24Rounds {
478    #[inline]
479    fn c_rounds(state: &mut State) {
480        compress!(state);
481        compress!(state);
482    }
483
484    #[inline]
485    fn d_rounds(state: &mut State) {
486        compress!(state);
487        compress!(state);
488        compress!(state);
489        compress!(state);
490    }
491}
492
493impl Hash128 {
494    /// Convert into a 16-bytes vector
495    pub fn as_bytes(&self) -> [u8; 16] {
496        let mut bytes = [0u8; 16];
497        let h1 = self.h1.to_le();
498        let h2 = self.h2.to_le();
499        unsafe {
500            ptr::copy_nonoverlapping(&h1 as *const _ as *const u8, bytes.get_unchecked_mut(0), 8);
501            ptr::copy_nonoverlapping(&h2 as *const _ as *const u8, bytes.get_unchecked_mut(8), 8);
502        }
503        bytes
504    }
505
506    /// Convert into a `u128`
507    #[inline]
508    pub fn as_u128(&self) -> u128 {
509        let h1 = self.h1.to_le();
510        let h2 = self.h2.to_le();
511        h1 as u128 | ((h2 as u128) << 64)
512    }
513
514    /// Convert into `(u64, u64)`
515    #[inline]
516    pub fn as_u64(&self) -> (u64, u64) {
517        let h1 = self.h1.to_le();
518        let h2 = self.h2.to_le();
519        (h1, h2)
520    }
521}