1use crate::convert::*;
2#[allow(unused)]
3use zerocopy::transmute;
4
5pub(crate) const MULTIPLE: u64 = 6364136223846793005;
7
8#[cfg(all(target_feature = "ssse3", not(miri)))]
11const SHUFFLE_MASK: u128 = 0x020a0700_0c01030e_050f0d08_06090b04_u128;
12#[inline(always)]
16#[cfg(feature = "folded_multiply")]
17pub(crate) const fn folded_multiply(s: u64, by: u64) -> u64 {
18 let result = (s as u128).wrapping_mul(by as u128);
19 ((result & 0xffff_ffff_ffff_ffff) as u64) ^ ((result >> 64) as u64)
20}
21
22#[inline(always)]
23#[cfg(not(feature = "folded_multiply"))]
24pub(crate) const fn folded_multiply(s: u64, by: u64) -> u64 {
25 let b1 = s.wrapping_mul(by.swap_bytes());
26 let b2 = s.swap_bytes().wrapping_mul(!by);
27 b1 ^ b2.swap_bytes()
28}
29
30#[inline(always)]
33pub(crate) fn read_small(data: &[u8]) -> [u64; 2] {
34 debug_assert!(data.len() <= 8);
35 if data.len() >= 2 {
36 if data.len() >= 4 {
37 [data.read_u32().0 as u64, data.read_last_u32() as u64]
39 } else {
40 [data.read_u16().0 as u64, data[data.len() - 1] as u64]
42 }
43 } else {
44 if data.len() > 0 {
45 [data[0] as u64, data[0] as u64]
46 } else {
47 [0, 0]
48 }
49 }
50}
51
52#[inline(always)]
53pub(crate) fn shuffle(a: u128) -> u128 {
54 #[cfg(all(target_feature = "ssse3", not(miri)))]
55 {
56 #[cfg(target_arch = "x86")]
57 use core::arch::x86::*;
58 #[cfg(target_arch = "x86_64")]
59 use core::arch::x86_64::*;
60 unsafe { transmute!(_mm_shuffle_epi8(transmute!(a), transmute!(SHUFFLE_MASK))) }
61 }
62 #[cfg(not(all(target_feature = "ssse3", not(miri))))]
63 {
64 a.swap_bytes()
65 }
66}
67
68#[allow(unused)] #[inline(always)]
70pub(crate) fn add_and_shuffle(a: u128, b: u128) -> u128 {
71 let sum = add_by_64s(a.convert(), b.convert());
72 shuffle(sum.convert())
73}
74
75#[allow(unused)] #[inline(always)]
77pub(crate) fn shuffle_and_add(base: u128, to_add: u128) -> u128 {
78 let shuffled: [u64; 2] = shuffle(base).convert();
79 add_by_64s(shuffled, to_add.convert()).convert()
80}
81
82#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2", not(miri)))]
83#[inline(always)]
84pub(crate) fn add_by_64s(a: [u64; 2], b: [u64; 2]) -> [u64; 2] {
85 unsafe {
86 #[cfg(target_arch = "x86")]
87 use core::arch::x86::*;
88 #[cfg(target_arch = "x86_64")]
89 use core::arch::x86_64::*;
90 transmute!(_mm_add_epi64(transmute!(a), transmute!(b)))
91 }
92}
93
94#[cfg(not(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2", not(miri))))]
95#[inline(always)]
96pub(crate) fn add_by_64s(a: [u64; 2], b: [u64; 2]) -> [u64; 2] {
97 [a[0].wrapping_add(b[0]), a[1].wrapping_add(b[1])]
98}
99
100#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)))]
101#[allow(unused)]
102#[inline(always)]
103pub(crate) fn aesenc(value: u128, xor: u128) -> u128 {
104 #[cfg(target_arch = "x86")]
105 use core::arch::x86::*;
106 #[cfg(target_arch = "x86_64")]
107 use core::arch::x86_64::*;
108 unsafe {
109 let value = transmute!(value);
110 transmute!(_mm_aesenc_si128(value, transmute!(xor)))
111 }
112}
113
114#[cfg(any(
115 all(feature = "nightly-arm-aes", target_arch = "aarch64", target_feature = "aes", not(miri)),
116 all(feature = "nightly-arm-aes", target_arch = "arm", target_feature = "aes", not(miri)),
117))]
118#[allow(unused)]
119#[inline(always)]
120pub(crate) fn aesenc(value: u128, xor: u128) -> u128 {
121 #[cfg(target_arch = "aarch64")]
122 use core::arch::aarch64::*;
123 #[cfg(target_arch = "arm")]
124 use core::arch::arm::*;
125 let res = unsafe { vaesmcq_u8(vaeseq_u8(transmute!(value), transmute!(0u128))) };
126 let value: u128 = transmute!(res);
127 xor ^ value
128}
129
130#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)))]
131#[allow(unused)]
132#[inline(always)]
133pub(crate) fn aesdec(value: u128, xor: u128) -> u128 {
134 #[cfg(target_arch = "x86")]
135 use core::arch::x86::*;
136 #[cfg(target_arch = "x86_64")]
137 use core::arch::x86_64::*;
138 unsafe {
139 let value = transmute!(value);
140 transmute!(_mm_aesdec_si128(value, transmute!(xor)))
141 }
142}
143
144#[cfg(any(
145 all(feature = "nightly-arm-aes", target_arch = "aarch64", target_feature = "aes", not(miri)),
146 all(feature = "nightly-arm-aes", target_arch = "arm", target_feature = "aes", not(miri)),
147))]
148#[allow(unused)]
149#[inline(always)]
150pub(crate) fn aesdec(value: u128, xor: u128) -> u128 {
151 #[cfg(target_arch = "aarch64")]
152 use core::arch::aarch64::*;
153 #[cfg(target_arch = "arm")]
154 use core::arch::arm::*;
155 let res = unsafe { vaesimcq_u8(vaesdq_u8(transmute!(value), transmute!(0u128))) };
156 let value: u128 = transmute!(res);
157 xor ^ value
158}
159
160#[allow(unused)]
161#[inline(always)]
162pub(crate) fn add_in_length(enc: &mut u128, len: u64) {
163 #[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))]
164 {
165 #[cfg(target_arch = "x86_64")]
166 use core::arch::x86_64::*;
167
168 unsafe {
169 let enc = enc as *mut u128;
170 let len = _mm_cvtsi64_si128(len as i64);
171 let data = _mm_loadu_si128(enc.cast());
172 let sum = _mm_add_epi64(data, len);
173 _mm_storeu_si128(enc.cast(), sum);
174 }
175 }
176 #[cfg(not(all(target_arch = "x86_64", target_feature = "sse2", not(miri))))]
177 {
178 let mut t: [u64; 2] = enc.convert();
179 t[0] = t[0].wrapping_add(len);
180 *enc = t.convert();
181 }
182}
183
184#[cfg(test)]
185mod test {
186 use super::*;
187
188 #[cfg(all(
254 any(target_arch = "x86", target_arch = "x86_64"),
255 target_feature = "ssse3",
256 target_feature = "aes",
257 not(miri)
258 ))]
259 #[test]
260 fn test_shuffle_does_not_collide_with_aes() {
261 let mut value: [u8; 16] = [0; 16];
262 let zero_mask_enc = aesenc(0, 0);
263 let zero_mask_dec = aesdec(0, 0);
264 for index in 0..16 {
265 value[index] = 1;
266 let excluded_positions_enc: [u8; 16] = aesenc(value.convert(), zero_mask_enc).convert();
267 let excluded_positions_dec: [u8; 16] = aesdec(value.convert(), zero_mask_dec).convert();
268 let actual_location: [u8; 16] = shuffle(value.convert()).convert();
269 for pos in 0..16 {
270 if actual_location[pos] != 0 {
271 assert_eq!(
272 0, excluded_positions_enc[pos],
273 "Forward Overlap between {:?} and {:?} at {}",
274 excluded_positions_enc, actual_location, index
275 );
276 assert_eq!(
277 0, excluded_positions_dec[pos],
278 "Reverse Overlap between {:?} and {:?} at {}",
279 excluded_positions_dec, actual_location, index
280 );
281 }
282 }
283 value[index] = 0;
284 }
285 }
286
287 #[test]
288 fn test_shuffle_contains_each_value() {
289 let value: [u8; 16] = 0x00010203_04050607_08090A0B_0C0D0E0F_u128.convert();
290 let shuffled: [u8; 16] = shuffle(value.convert()).convert();
291 for index in 0..16_u8 {
292 assert!(shuffled.contains(&index), "Value is missing {}", index);
293 }
294 }
295
296 #[test]
297 fn test_shuffle_moves_every_value() {
298 let mut value: [u8; 16] = [0; 16];
299 for index in 0..16 {
300 value[index] = 1;
301 let shuffled: [u8; 16] = shuffle(value.convert()).convert();
302 assert_eq!(0, shuffled[index], "Value is not moved {}", index);
303 value[index] = 0;
304 }
305 }
306
307 #[test]
308 fn test_shuffle_moves_high_bits() {
309 assert!(
310 shuffle(1) > (1_u128 << 80),
311 "Low bits must be moved to other half {:?} -> {:?}",
312 0,
313 shuffle(1)
314 );
315
316 assert!(
317 shuffle(1_u128 << 58) >= (1_u128 << 64),
318 "High bits must be moved to other half {:?} -> {:?}",
319 7,
320 shuffle(1_u128 << 58)
321 );
322 assert!(
323 shuffle(1_u128 << 58) < (1_u128 << 112),
324 "High bits must not remain high {:?} -> {:?}",
325 7,
326 shuffle(1_u128 << 58)
327 );
328 assert!(
329 shuffle(1_u128 << 64) < (1_u128 << 64),
330 "Low bits must be moved to other half {:?} -> {:?}",
331 8,
332 shuffle(1_u128 << 64)
333 );
334 assert!(
335 shuffle(1_u128 << 64) >= (1_u128 << 16),
336 "Low bits must not remain low {:?} -> {:?}",
337 8,
338 shuffle(1_u128 << 64)
339 );
340
341 assert!(
342 shuffle(1_u128 << 120) < (1_u128 << 50),
343 "High bits must be moved to low half {:?} -> {:?}",
344 15,
345 shuffle(1_u128 << 120)
346 );
347 }
348
349 #[cfg(all(
350 any(target_arch = "x86", target_arch = "x86_64"),
351 target_feature = "ssse3",
352 not(miri)
353 ))]
354 #[test]
355 fn test_shuffle_does_not_loop() {
356 let numbered = 0x00112233_44556677_8899AABB_CCDDEEFF;
357 let mut shuffled = shuffle(numbered);
358 for count in 0..100 {
359 assert_ne!(numbered, shuffled, "Equal after {} vs {:x}", count, shuffled);
361 shuffled = shuffle(shuffled);
362 }
363 }
364
365 #[test]
366 fn test_add_length() {
367 let mut enc = (u64::MAX as u128) << 64 | 50;
368 add_in_length(&mut enc, u64::MAX);
369 assert_eq!(enc >> 64, u64::MAX as u128);
370 assert_eq!(enc as u64, 49);
371 }
372}