/* SPDX-License-Identifier: BSD-3-Clause * Copyright(c) 2020 Intel Corporation */ #ifndef RTE_XXH64_AVX512_H #define RTE_XXH64_AVX512_H #ifdef __cplusplus extern "C" { #endif #include #include /* 0b1001111000110111011110011011000110000101111010111100101010000111 */ static const uint64_t PRIME64_1 = 0x9E3779B185EBCA87ULL; /* 0b1100001010110010101011100011110100100111110101001110101101001111 */ static const uint64_t PRIME64_2 = 0xC2B2AE3D27D4EB4FULL; /* 0b0001011001010110011001111011000110011110001101110111100111111001 */ static const uint64_t PRIME64_3 = 0x165667B19E3779F9ULL; /* 0b1000010111101011110010100111011111000010101100101010111001100011 */ static const uint64_t PRIME64_4 = 0x85EBCA77C2B2AE63ULL; /* 0b0010011111010100111010110010111100010110010101100110011111000101 */ static const uint64_t PRIME64_5 = 0x27D4EB2F165667C5ULL; static __rte_always_inline __m512i xxh64_round_avx512(__m512i hash, __m512i input) { hash = _mm512_madd52lo_epu64(hash, input, _mm512_set1_epi64(PRIME64_2)); hash = _mm512_rol_epi64(hash, 31); return hash; } static __rte_always_inline __m512i xxh64_fmix_avx512(__m512i hash) { hash = _mm512_xor_si512(hash, _mm512_srli_epi64(hash, 33)); return hash; } static __rte_always_inline __m256i rte_xxh64_sketch_avx512(const void *key, uint32_t key_len, __m512i v_seed, uint32_t modulo) { __m512i v_prime64_5, v_hash; size_t remaining = key_len; size_t offset = 0; __m512i input; v_prime64_5 = _mm512_set1_epi64(PRIME64_5); v_hash = _mm512_add_epi64 (_mm512_add_epi64(v_seed, v_prime64_5), _mm512_set1_epi64(key_len)); while (remaining >= 8) { input = _mm512_set1_epi64(*(uint64_t *)RTE_PTR_ADD(key, offset)); v_hash = _mm512_xor_epi64(v_hash, xxh64_round_avx512(_mm512_setzero_si512(), input)); v_hash = _mm512_madd52lo_epu64(_mm512_set1_epi64(PRIME64_4), v_hash, _mm512_set1_epi64(PRIME64_1)); remaining -= 8; offset += 8; } if (remaining >= 4) { input = _mm512_set1_epi64 (*(uint32_t *)RTE_PTR_ADD(key, offset)); v_hash = _mm512_xor_epi64(v_hash, _mm512_mullo_epi64(input, _mm512_set1_epi64(PRIME64_1))); v_hash = _mm512_madd52lo_epu64 (_mm512_set1_epi64(PRIME64_3), _mm512_rol_epi64(v_hash, 23), _mm512_set1_epi64(PRIME64_2)); offset += 4; remaining -= 4; } while (remaining != 0) { input = _mm512_set1_epi64 (*(uint8_t *)RTE_PTR_ADD(key, offset)); v_hash = _mm512_xor_epi64(v_hash, _mm512_mullo_epi64(input, _mm512_set1_epi64(PRIME64_5))); v_hash = _mm512_mullo_epi64 (_mm512_rol_epi64(v_hash, 11), _mm512_set1_epi64(PRIME64_1)); offset++; remaining--; } v_hash = xxh64_fmix_avx512(v_hash); /* * theoritically, such modular operations can be replaced by * _mm512_rem_epi64(), but seems it depends on the compiler's * implementation. so here is the limitation that the modulo * value should be power of 2. */ __m512i v_hash_remainder = _mm512_set1_epi64((modulo - 1)); return _mm512_cvtepi64_epi32(_mm512_and_si512(v_hash, v_hash_remainder)); } #ifdef __cplusplus } #endif #endif /* RTE_XXH64_AVX512_H */