/* SPDX-License-Identifier: BSD-3-Clause * Copyright(c) 2017 Cavium, Inc */ /* * Reciprocal divide * * Used with permission from original authors * Hannes Frederic Sowa and Daniel Borkmann * * This algorithm is based on the paper "Division by Invariant * Integers Using Multiplication" by Torbjörn Granlund and Peter * L. Montgomery. * * The assembler implementation from Agner Fog, which this code is * based on, can be found here: * http://www.agner.org/optimize/asmlib.zip * * This optimization for A/B is helpful if the divisor B is mostly * runtime invariant. The reciprocal of B is calculated in the * slow-path with reciprocal_value(). The fast-path can then just use * a much faster multiplication operation with a variable dividend A * to calculate the division A/B. */ #ifndef _RTE_RECIPROCAL_H_ #define _RTE_RECIPROCAL_H_ #include #include #ifdef __cplusplus extern "C" { #endif struct rte_reciprocal { uint32_t m; uint8_t sh1, sh2; }; struct rte_reciprocal_u64 { uint64_t m; uint8_t sh1, sh2; }; static inline uint32_t rte_reciprocal_divide(uint32_t a, struct rte_reciprocal R) { uint32_t t = (uint32_t)(((uint64_t)a * R.m) >> 32); return (t + ((a - t) >> R.sh1)) >> R.sh2; } static __rte_always_inline uint64_t mullhi_u64(uint64_t x, uint64_t y) { #ifdef __SIZEOF_INT128__ __uint128_t xl = x; __uint128_t rl = xl * y; return (rl >> 64); #else uint64_t u0, u1, v0, v1, k, t; uint64_t w1, w2; uint64_t whi; u1 = x >> 32; u0 = x & 0xFFFFFFFF; v1 = y >> 32; v0 = y & 0xFFFFFFFF; t = u0*v0; k = t >> 32; t = u1*v0 + k; w1 = t & 0xFFFFFFFF; w2 = t >> 32; t = u0*v1 + w1; k = t >> 32; whi = u1*v1 + w2 + k; return whi; #endif } static __rte_always_inline uint64_t rte_reciprocal_divide_u64(uint64_t a, const struct rte_reciprocal_u64 *R) { uint64_t t = mullhi_u64(a, R->m); return (t + ((a - t) >> R->sh1)) >> R->sh2; } struct rte_reciprocal rte_reciprocal_value(uint32_t d); struct rte_reciprocal_u64 rte_reciprocal_value_u64(uint64_t d); #ifdef __cplusplus } #endif #endif /* _RTE_RECIPROCAL_H_ */