ahash/
operations.rs

1use crate::convert::*;
2#[allow(unused)]
3use zerocopy::transmute;
4
5///This constant comes from Kunth's prng (Empirically it works better than those from splitmix32).
6pub(crate) const MULTIPLE: u64 = 6364136223846793005;
7
8/// This is a constant with a lot of special properties found by automated search.
9/// See the unit tests below. (Below are alternative values)
10#[cfg(all(target_feature = "ssse3", not(miri)))]
11const SHUFFLE_MASK: u128 = 0x020a0700_0c01030e_050f0d08_06090b04_u128;
12//const SHUFFLE_MASK: u128 = 0x000d0702_0a040301_05080f0c_0e0b0609_u128;
13//const SHUFFLE_MASK: u128 = 0x040A0700_030E0106_0D050F08_020B0C09_u128;
14
15#[inline(always)]
16#[cfg(feature = "folded_multiply")]
17pub(crate) const fn folded_multiply(s: u64, by: u64) -> u64 {
18    let result = (s as u128).wrapping_mul(by as u128);
19    ((result & 0xffff_ffff_ffff_ffff) as u64) ^ ((result >> 64) as u64)
20}
21
22#[inline(always)]
23#[cfg(not(feature = "folded_multiply"))]
24pub(crate) const fn folded_multiply(s: u64, by: u64) -> u64 {
25    let b1 = s.wrapping_mul(by.swap_bytes());
26    let b2 = s.swap_bytes().wrapping_mul(!by);
27    b1 ^ b2.swap_bytes()
28}
29
30/// Given a small (less than 8 byte slice) returns the same data stored in two u32s.
31/// (order of and non-duplication of bytes is NOT guaranteed)
32#[inline(always)]
33pub(crate) fn read_small(data: &[u8]) -> [u64; 2] {
34    debug_assert!(data.len() <= 8);
35    if data.len() >= 2 {
36        if data.len() >= 4 {
37            //len 4-8
38            [data.read_u32().0 as u64, data.read_last_u32() as u64]
39        } else {
40            //len 2-3
41            [data.read_u16().0 as u64, data[data.len() - 1] as u64]
42        }
43    } else {
44        if data.len() > 0 {
45            [data[0] as u64, data[0] as u64]
46        } else {
47            [0, 0]
48        }
49    }
50}
51
52#[inline(always)]
53pub(crate) fn shuffle(a: u128) -> u128 {
54    #[cfg(all(target_feature = "ssse3", not(miri)))]
55    {
56        #[cfg(target_arch = "x86")]
57        use core::arch::x86::*;
58        #[cfg(target_arch = "x86_64")]
59        use core::arch::x86_64::*;
60        unsafe { transmute!(_mm_shuffle_epi8(transmute!(a), transmute!(SHUFFLE_MASK))) }
61    }
62    #[cfg(not(all(target_feature = "ssse3", not(miri))))]
63    {
64        a.swap_bytes()
65    }
66}
67
68#[allow(unused)] //not used by fallback
69#[inline(always)]
70pub(crate) fn add_and_shuffle(a: u128, b: u128) -> u128 {
71    let sum = add_by_64s(a.convert(), b.convert());
72    shuffle(sum.convert())
73}
74
75#[allow(unused)] //not used by fallback
76#[inline(always)]
77pub(crate) fn shuffle_and_add(base: u128, to_add: u128) -> u128 {
78    let shuffled: [u64; 2] = shuffle(base).convert();
79    add_by_64s(shuffled, to_add.convert()).convert()
80}
81
82#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2", not(miri)))]
83#[inline(always)]
84pub(crate) fn add_by_64s(a: [u64; 2], b: [u64; 2]) -> [u64; 2] {
85    unsafe {
86        #[cfg(target_arch = "x86")]
87        use core::arch::x86::*;
88        #[cfg(target_arch = "x86_64")]
89        use core::arch::x86_64::*;
90        transmute!(_mm_add_epi64(transmute!(a), transmute!(b)))
91    }
92}
93
94#[cfg(not(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2", not(miri))))]
95#[inline(always)]
96pub(crate) fn add_by_64s(a: [u64; 2], b: [u64; 2]) -> [u64; 2] {
97    [a[0].wrapping_add(b[0]), a[1].wrapping_add(b[1])]
98}
99
100#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)))]
101#[allow(unused)]
102#[inline(always)]
103pub(crate) fn aesenc(value: u128, xor: u128) -> u128 {
104    #[cfg(target_arch = "x86")]
105    use core::arch::x86::*;
106    #[cfg(target_arch = "x86_64")]
107    use core::arch::x86_64::*;
108    unsafe {
109        let value = transmute!(value);
110        transmute!(_mm_aesenc_si128(value, transmute!(xor)))
111    }
112}
113
114#[cfg(any(
115    all(feature = "nightly-arm-aes", target_arch = "aarch64", target_feature = "aes", not(miri)),
116    all(feature = "nightly-arm-aes", target_arch = "arm", target_feature = "aes", not(miri)),
117))]
118#[allow(unused)]
119#[inline(always)]
120pub(crate) fn aesenc(value: u128, xor: u128) -> u128 {
121    #[cfg(target_arch = "aarch64")]
122    use core::arch::aarch64::*;
123    #[cfg(target_arch = "arm")]
124    use core::arch::arm::*;
125    let res = unsafe { vaesmcq_u8(vaeseq_u8(transmute!(value), transmute!(0u128))) };
126    let value: u128 = transmute!(res);
127    xor ^ value
128}
129
130#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)))]
131#[allow(unused)]
132#[inline(always)]
133pub(crate) fn aesdec(value: u128, xor: u128) -> u128 {
134    #[cfg(target_arch = "x86")]
135    use core::arch::x86::*;
136    #[cfg(target_arch = "x86_64")]
137    use core::arch::x86_64::*;
138    unsafe {
139        let value = transmute!(value);
140        transmute!(_mm_aesdec_si128(value, transmute!(xor)))
141    }
142}
143
144#[cfg(any(
145    all(feature = "nightly-arm-aes", target_arch = "aarch64", target_feature = "aes", not(miri)),
146    all(feature = "nightly-arm-aes", target_arch = "arm", target_feature = "aes", not(miri)),
147))]
148#[allow(unused)]
149#[inline(always)]
150pub(crate) fn aesdec(value: u128, xor: u128) -> u128 {
151    #[cfg(target_arch = "aarch64")]
152    use core::arch::aarch64::*;
153    #[cfg(target_arch = "arm")]
154    use core::arch::arm::*;
155    let res = unsafe { vaesimcq_u8(vaesdq_u8(transmute!(value), transmute!(0u128))) };
156    let value: u128 = transmute!(res);
157    xor ^ value
158}
159
160#[allow(unused)]
161#[inline(always)]
162pub(crate) fn add_in_length(enc: &mut u128, len: u64) {
163    #[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))]
164    {
165        #[cfg(target_arch = "x86_64")]
166        use core::arch::x86_64::*;
167
168        unsafe {
169            let enc = enc as *mut u128;
170            let len = _mm_cvtsi64_si128(len as i64);
171            let data = _mm_loadu_si128(enc.cast());
172            let sum = _mm_add_epi64(data, len);
173            _mm_storeu_si128(enc.cast(), sum);
174        }
175    }
176    #[cfg(not(all(target_arch = "x86_64", target_feature = "sse2", not(miri))))]
177    {
178        let mut t: [u64; 2] = enc.convert();
179        t[0] = t[0].wrapping_add(len);
180        *enc = t.convert();
181    }
182}
183
184#[cfg(test)]
185mod test {
186    use super::*;
187
188    // This is code to search for the shuffle constant
189    //
190    //thread_local! { static MASK: Cell<u128> = Cell::new(0); }
191    //
192    // fn shuffle(a: u128) -> u128 {
193    //     use std::intrinsics::transmute;
194    //     #[cfg(target_arch = "x86")]
195    //     use core::arch::x86::*;
196    //     #[cfg(target_arch = "x86_64")]
197    //     use core::arch::x86_64::*;
198    //     MASK.with(|mask| {
199    //         unsafe { transmute!(_mm_shuffle_epi8(transmute!(a), transmute!(mask.get()))) }
200    //     })
201    // }
202    //
203    // #[test]
204    // fn find_shuffle() {
205    //     use rand::prelude::*;
206    //     use SliceRandom;
207    //     use std::panic;
208    //     use std::io::Write;
209    //
210    //     let mut value: [u8; 16] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 ,13, 14, 15];
211    //     let mut rand = thread_rng();
212    //     let mut successful_list = HashMap::new();
213    //     for _attempt in 0..10000000 {
214    //         rand.shuffle(&mut value);
215    //         let test_val = value.convert();
216    //         MASK.with(|mask| {
217    //             mask.set(test_val);
218    //         });
219    //         if let Ok(successful) = panic::catch_unwind(|| {
220    //             test_shuffle_does_not_collide_with_aes();
221    //             test_shuffle_moves_high_bits();
222    //             test_shuffle_moves_every_value();
223    //             //test_shuffle_does_not_loop();
224    //             value
225    //         }) {
226    //             let successful: u128 = successful.convert();
227    //             successful_list.insert(successful, iters_before_loop());
228    //         }
229    //     }
230    //     let write_file = File::create("/tmp/output").unwrap();
231    //     let mut writer = BufWriter::new(&write_file);
232    //
233    //     for success in successful_list {
234    //         writeln!(writer, "Found successful: {:x?} - {:?}", success.0, success.1);
235    //     }
236    // }
237    //
238    // fn iters_before_loop() -> u32 {
239    //     let numbered = 0x00112233_44556677_8899AABB_CCDDEEFF;
240    //     let mut shuffled = shuffle(numbered);
241    //     let mut count = 0;
242    //     loop {
243    //         // println!("{:>16x}", shuffled);
244    //         if numbered == shuffled {
245    //             break;
246    //         }
247    //         count += 1;
248    //         shuffled = shuffle(shuffled);
249    //     }
250    //     count
251    // }
252
253    #[cfg(all(
254        any(target_arch = "x86", target_arch = "x86_64"),
255        target_feature = "ssse3",
256        target_feature = "aes",
257        not(miri)
258    ))]
259    #[test]
260    fn test_shuffle_does_not_collide_with_aes() {
261        let mut value: [u8; 16] = [0; 16];
262        let zero_mask_enc = aesenc(0, 0);
263        let zero_mask_dec = aesdec(0, 0);
264        for index in 0..16 {
265            value[index] = 1;
266            let excluded_positions_enc: [u8; 16] = aesenc(value.convert(), zero_mask_enc).convert();
267            let excluded_positions_dec: [u8; 16] = aesdec(value.convert(), zero_mask_dec).convert();
268            let actual_location: [u8; 16] = shuffle(value.convert()).convert();
269            for pos in 0..16 {
270                if actual_location[pos] != 0 {
271                    assert_eq!(
272                        0, excluded_positions_enc[pos],
273                        "Forward Overlap between {:?} and {:?} at {}",
274                        excluded_positions_enc, actual_location, index
275                    );
276                    assert_eq!(
277                        0, excluded_positions_dec[pos],
278                        "Reverse Overlap between {:?} and {:?} at {}",
279                        excluded_positions_dec, actual_location, index
280                    );
281                }
282            }
283            value[index] = 0;
284        }
285    }
286
287    #[test]
288    fn test_shuffle_contains_each_value() {
289        let value: [u8; 16] = 0x00010203_04050607_08090A0B_0C0D0E0F_u128.convert();
290        let shuffled: [u8; 16] = shuffle(value.convert()).convert();
291        for index in 0..16_u8 {
292            assert!(shuffled.contains(&index), "Value is missing {}", index);
293        }
294    }
295
296    #[test]
297    fn test_shuffle_moves_every_value() {
298        let mut value: [u8; 16] = [0; 16];
299        for index in 0..16 {
300            value[index] = 1;
301            let shuffled: [u8; 16] = shuffle(value.convert()).convert();
302            assert_eq!(0, shuffled[index], "Value is not moved {}", index);
303            value[index] = 0;
304        }
305    }
306
307    #[test]
308    fn test_shuffle_moves_high_bits() {
309        assert!(
310            shuffle(1) > (1_u128 << 80),
311            "Low bits must be moved to other half {:?} -> {:?}",
312            0,
313            shuffle(1)
314        );
315
316        assert!(
317            shuffle(1_u128 << 58) >= (1_u128 << 64),
318            "High bits must be moved to other half {:?} -> {:?}",
319            7,
320            shuffle(1_u128 << 58)
321        );
322        assert!(
323            shuffle(1_u128 << 58) < (1_u128 << 112),
324            "High bits must not remain high {:?} -> {:?}",
325            7,
326            shuffle(1_u128 << 58)
327        );
328        assert!(
329            shuffle(1_u128 << 64) < (1_u128 << 64),
330            "Low bits must be moved to other half {:?} -> {:?}",
331            8,
332            shuffle(1_u128 << 64)
333        );
334        assert!(
335            shuffle(1_u128 << 64) >= (1_u128 << 16),
336            "Low bits must not remain low {:?} -> {:?}",
337            8,
338            shuffle(1_u128 << 64)
339        );
340
341        assert!(
342            shuffle(1_u128 << 120) < (1_u128 << 50),
343            "High bits must be moved to low half {:?} -> {:?}",
344            15,
345            shuffle(1_u128 << 120)
346        );
347    }
348
349    #[cfg(all(
350        any(target_arch = "x86", target_arch = "x86_64"),
351        target_feature = "ssse3",
352        not(miri)
353    ))]
354    #[test]
355    fn test_shuffle_does_not_loop() {
356        let numbered = 0x00112233_44556677_8899AABB_CCDDEEFF;
357        let mut shuffled = shuffle(numbered);
358        for count in 0..100 {
359            // println!("{:>16x}", shuffled);
360            assert_ne!(numbered, shuffled, "Equal after {} vs {:x}", count, shuffled);
361            shuffled = shuffle(shuffled);
362        }
363    }
364
365    #[test]
366    fn test_add_length() {
367        let mut enc = (u64::MAX as u128) << 64 | 50;
368        add_in_length(&mut enc, u64::MAX);
369        assert_eq!(enc >> 64, u64::MAX as u128);
370        assert_eq!(enc as u64, 49);
371    }
372}