/* SPDX-License-Identifier: BSD-3-Clause * Copyright(c) 2010-2014 Intel Corporation * Copyright(c) 2019 Arm Limited */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "test.h" /* * Atomic Variables * ================ * * - The main test function performs several subtests. The first * checks that the usual inc/dec/add/sub functions are working * correctly: * * - Initialize 16-bit, 32-bit and 64-bit atomic variables to specific * values. * * - These variables are incremented and decremented on each core at * the same time in ``test_atomic_usual()``. * * - The function checks that once all lcores finish their function, * the value of the atomic variables are still the same. * * - Test "test and set" functions. * * - Initialize 16-bit, 32-bit and 64-bit atomic variables to zero. * * - Invoke ``test_atomic_tas()`` on each lcore: before doing anything * else. The cores are waiting a synchro using ``while * (rte_atomic32_read(&val) == 0)`` which is triggered by the main test * function. Then all cores do a * ``rte_atomicXX_test_and_set()`` at the same time. If it is successful, * it increments another atomic counter. * * - The main function checks that the atomic counter was incremented * twice only (one for 16-bit, one for 32-bit and one for 64-bit values). * * - Test "add/sub and return" functions * * - Initialize 16-bit, 32-bit and 64-bit atomic variables to zero. * * - Invoke ``test_atomic_addsub_return()`` on each lcore. Before doing * anything else, the cores are waiting a synchro. Each lcore does * this operation several times:: * * tmp = rte_atomicXX_add_return(&a, 1); * atomic_add(&count, tmp); * tmp = rte_atomicXX_sub_return(&a, 1); * atomic_sub(&count, tmp+1); * * - At the end of the test, the *count* value must be 0. * * - Test "128-bit compare and swap" (aarch64 and x86_64 only) * * - Initialize 128-bit atomic variables to zero. * * - Invoke ``test_atomic128_cmp_exchange()`` on each lcore. Before doing * anything else, the cores are waiting a synchro. Each lcore does * these compare and swap (CAS) operations several times:: * * Acquired CAS update counter.val[0] + 2; counter.val[1] + 1; * Released CAS update counter.val[0] + 2; counter.val[1] + 1; * Acquired_Released CAS update counter.val[0] + 2; counter.val[1] + 1; * Relaxed CAS update counter.val[0] + 2; counter.val[1] + 1; * * - At the end of the test, the *count128* first 64-bit value and * second 64-bit value differ by the total iterations. * * - Test "atomic exchange" functions * * - Create a 64 bit token that can be tested for data integrity * * - Invoke ``test_atomic_exchange`` on each lcore. Before doing * anything else, the cores wait for a synchronization event. * Each core then does the follwoing for N iterations: * * Generate a new token with a data integrity check * Exchange the new token for previously generated token * Increment a counter if a corrupt token was received * * - At the end of the test, the number of corrupted tokens must be 0. */ #define NUM_ATOMIC_TYPES 3 #define N 1000000 static rte_atomic16_t a16; static rte_atomic32_t a32; static rte_atomic64_t a64; static rte_atomic64_t count; static rte_atomic32_t synchro; static int test_atomic_usual(__attribute__((unused)) void *arg) { unsigned i; while (rte_atomic32_read(&synchro) == 0) ; for (i = 0; i < N; i++) rte_atomic16_inc(&a16); for (i = 0; i < N; i++) rte_atomic16_dec(&a16); for (i = 0; i < (N / 5); i++) rte_atomic16_add(&a16, 5); for (i = 0; i < (N / 5); i++) rte_atomic16_sub(&a16, 5); for (i = 0; i < N; i++) rte_atomic32_inc(&a32); for (i = 0; i < N; i++) rte_atomic32_dec(&a32); for (i = 0; i < (N / 5); i++) rte_atomic32_add(&a32, 5); for (i = 0; i < (N / 5); i++) rte_atomic32_sub(&a32, 5); for (i = 0; i < N; i++) rte_atomic64_inc(&a64); for (i = 0; i < N; i++) rte_atomic64_dec(&a64); for (i = 0; i < (N / 5); i++) rte_atomic64_add(&a64, 5); for (i = 0; i < (N / 5); i++) rte_atomic64_sub(&a64, 5); return 0; } static int test_atomic_tas(__attribute__((unused)) void *arg) { while (rte_atomic32_read(&synchro) == 0) ; if (rte_atomic16_test_and_set(&a16)) rte_atomic64_inc(&count); if (rte_atomic32_test_and_set(&a32)) rte_atomic64_inc(&count); if (rte_atomic64_test_and_set(&a64)) rte_atomic64_inc(&count); return 0; } static int test_atomic_addsub_and_return(__attribute__((unused)) void *arg) { uint32_t tmp16; uint32_t tmp32; uint64_t tmp64; unsigned i; while (rte_atomic32_read(&synchro) == 0) ; for (i = 0; i < N; i++) { tmp16 = rte_atomic16_add_return(&a16, 1); rte_atomic64_add(&count, tmp16); tmp16 = rte_atomic16_sub_return(&a16, 1); rte_atomic64_sub(&count, tmp16+1); tmp32 = rte_atomic32_add_return(&a32, 1); rte_atomic64_add(&count, tmp32); tmp32 = rte_atomic32_sub_return(&a32, 1); rte_atomic64_sub(&count, tmp32+1); tmp64 = rte_atomic64_add_return(&a64, 1); rte_atomic64_add(&count, tmp64); tmp64 = rte_atomic64_sub_return(&a64, 1); rte_atomic64_sub(&count, tmp64+1); } return 0; } /* * rte_atomic32_inc_and_test() would increase a 32 bits counter by one and then * test if that counter is equal to 0. It would return true if the counter is 0 * and false if the counter is not 0. rte_atomic64_inc_and_test() could do the * same thing but for a 64 bits counter. * Here checks that if the 32/64 bits counter is equal to 0 after being atomically * increased by one. If it is, increase the variable of "count" by one which would * be checked as the result later. * */ static int test_atomic_inc_and_test(__attribute__((unused)) void *arg) { while (rte_atomic32_read(&synchro) == 0) ; if (rte_atomic16_inc_and_test(&a16)) { rte_atomic64_inc(&count); } if (rte_atomic32_inc_and_test(&a32)) { rte_atomic64_inc(&count); } if (rte_atomic64_inc_and_test(&a64)) { rte_atomic64_inc(&count); } return 0; } /* * rte_atomicXX_dec_and_test() should decrease a 32 bits counter by one and then * test if that counter is equal to 0. It should return true if the counter is 0 * and false if the counter is not 0. * This test checks if the counter is equal to 0 after being atomically * decreased by one. If it is, increase the value of "count" by one which is to * be checked as the result later. */ static int test_atomic_dec_and_test(__attribute__((unused)) void *arg) { while (rte_atomic32_read(&synchro) == 0) ; if (rte_atomic16_dec_and_test(&a16)) rte_atomic64_inc(&count); if (rte_atomic32_dec_and_test(&a32)) rte_atomic64_inc(&count); if (rte_atomic64_dec_and_test(&a64)) rte_atomic64_inc(&count); return 0; } #if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_ARM64) static rte_int128_t count128; /* * rte_atomic128_cmp_exchange() should update a 128 bits counter's first 64 * bits by 2 and the second 64 bits by 1 in this test. It should return true * if the compare exchange operation is successful. * This test repeats 128 bits compare and swap operations N rounds. In each * iteration it runs compare and swap operation with different memory models. */ static int test_atomic128_cmp_exchange(__attribute__((unused)) void *arg) { rte_int128_t expected; int success; unsigned int i; while (rte_atomic32_read(&synchro) == 0) ; expected = count128; for (i = 0; i < N; i++) { do { rte_int128_t desired; desired.val[0] = expected.val[0] + 2; desired.val[1] = expected.val[1] + 1; success = rte_atomic128_cmp_exchange(&count128, &expected, &desired, 1, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); } while (success == 0); do { rte_int128_t desired; desired.val[0] = expected.val[0] + 2; desired.val[1] = expected.val[1] + 1; success = rte_atomic128_cmp_exchange(&count128, &expected, &desired, 1, __ATOMIC_RELEASE, __ATOMIC_RELAXED); } while (success == 0); do { rte_int128_t desired; desired.val[0] = expected.val[0] + 2; desired.val[1] = expected.val[1] + 1; success = rte_atomic128_cmp_exchange(&count128, &expected, &desired, 1, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED); } while (success == 0); do { rte_int128_t desired; desired.val[0] = expected.val[0] + 2; desired.val[1] = expected.val[1] + 1; success = rte_atomic128_cmp_exchange(&count128, &expected, &desired, 1, __ATOMIC_RELAXED, __ATOMIC_RELAXED); } while (success == 0); } return 0; } #endif /* * Helper definitions/variables/functions for * atomic exchange tests */ typedef union { uint16_t u16; uint8_t u8[2]; } test16_t; typedef union { uint32_t u32; uint16_t u16[2]; uint8_t u8[4]; } test32_t; typedef union { uint64_t u64; uint32_t u32[2]; uint16_t u16[4]; uint8_t u8[8]; } test64_t; const uint8_t CRC8_POLY = 0x91; uint8_t crc8_table[256]; volatile uint16_t token16; volatile uint32_t token32; volatile uint64_t token64; static void build_crc8_table(void) { uint8_t val; int i, j; for (i = 0; i < 256; i++) { val = i; for (j = 0; j < 8; j++) { if (val & 1) val ^= CRC8_POLY; val >>= 1; } crc8_table[i] = val; } } static uint8_t get_crc8(uint8_t *message, int length) { uint8_t crc = 0; int i; for (i = 0; i < length; i++) crc = crc8_table[crc ^ message[i]]; return crc; } /* * The atomic exchange test sets up a token in memory and * then spins up multiple lcores whose job is to generate * new tokens, exchange that new token for the old one held * in memory, and then verify that the old token is still * valid (i.e. the exchange did not corrupt the token). * * A token is made up of random data and 8 bits of crc * covering that random data. The following is an example * of a 64bit token. * * +------------+------------+ * | 63 56 | 55 0 | * +------------+------------+ * | CRC8 | Data | * +------------+------------+ */ static int test_atomic_exchange(__attribute__((unused)) void *arg) { int i; test16_t nt16, ot16; /* new token, old token */ test32_t nt32, ot32; test64_t nt64, ot64; /* Wait until all of the other threads have been dispatched */ while (rte_atomic32_read(&synchro) == 0) ; /* * Let the battle begin! Every thread attempts to steal the current * token with an atomic exchange operation and install its own newly * generated token. If the old token is valid (i.e. it has the * appropriate crc32 hash for the data) then the test iteration has * passed. If the token is invalid, increment the counter. */ for (i = 0; i < N; i++) { /* Test 64bit Atomic Exchange */ nt64.u64 = rte_rand(); nt64.u8[7] = get_crc8(&nt64.u8[0], sizeof(nt64) - 1); ot64.u64 = rte_atomic64_exchange(&token64, nt64.u64); if (ot64.u8[7] != get_crc8(&ot64.u8[0], sizeof(ot64) - 1)) rte_atomic64_inc(&count); /* Test 32bit Atomic Exchange */ nt32.u32 = (uint32_t)rte_rand(); nt32.u8[3] = get_crc8(&nt32.u8[0], sizeof(nt32) - 1); ot32.u32 = rte_atomic32_exchange(&token32, nt32.u32); if (ot32.u8[3] != get_crc8(&ot32.u8[0], sizeof(ot32) - 1)) rte_atomic64_inc(&count); /* Test 16bit Atomic Exchange */ nt16.u16 = (uint16_t)rte_rand(); nt16.u8[1] = get_crc8(&nt16.u8[0], sizeof(nt16) - 1); ot16.u16 = rte_atomic16_exchange(&token16, nt16.u16); if (ot16.u8[1] != get_crc8(&ot16.u8[0], sizeof(ot16) - 1)) rte_atomic64_inc(&count); } return 0; } static int test_atomic(void) { rte_atomic16_init(&a16); rte_atomic32_init(&a32); rte_atomic64_init(&a64); rte_atomic64_init(&count); rte_atomic32_init(&synchro); rte_atomic16_set(&a16, 1UL << 10); rte_atomic32_set(&a32, 1UL << 10); rte_atomic64_set(&a64, 1ULL << 33); printf("usual inc/dec/add/sub functions\n"); rte_eal_mp_remote_launch(test_atomic_usual, NULL, SKIP_MASTER); rte_atomic32_set(&synchro, 1); rte_eal_mp_wait_lcore(); rte_atomic32_set(&synchro, 0); if (rte_atomic16_read(&a16) != 1UL << 10) { printf("Atomic16 usual functions failed\n"); return -1; } if (rte_atomic32_read(&a32) != 1UL << 10) { printf("Atomic32 usual functions failed\n"); return -1; } if (rte_atomic64_read(&a64) != 1ULL << 33) { printf("Atomic64 usual functions failed\n"); return -1; } printf("test and set\n"); rte_atomic64_set(&a64, 0); rte_atomic32_set(&a32, 0); rte_atomic16_set(&a16, 0); rte_atomic64_set(&count, 0); rte_eal_mp_remote_launch(test_atomic_tas, NULL, SKIP_MASTER); rte_atomic32_set(&synchro, 1); rte_eal_mp_wait_lcore(); rte_atomic32_set(&synchro, 0); if (rte_atomic64_read(&count) != NUM_ATOMIC_TYPES) { printf("Atomic test and set failed\n"); return -1; } printf("add/sub and return\n"); rte_atomic64_set(&a64, 0); rte_atomic32_set(&a32, 0); rte_atomic16_set(&a16, 0); rte_atomic64_set(&count, 0); rte_eal_mp_remote_launch(test_atomic_addsub_and_return, NULL, SKIP_MASTER); rte_atomic32_set(&synchro, 1); rte_eal_mp_wait_lcore(); rte_atomic32_set(&synchro, 0); if (rte_atomic64_read(&count) != 0) { printf("Atomic add/sub+return failed\n"); return -1; } /* * Set a64, a32 and a16 with the same value of minus "number of slave * lcores", launch all slave lcores to atomically increase by one and * test them respectively. * Each lcore should have only one chance to increase a64 by one and * then check if it is equal to 0, but there should be only one lcore * that finds that it is 0. It is similar for a32 and a16. * Then a variable of "count", initialized to zero, is increased by * one if a64, a32 or a16 is 0 after being increased and tested * atomically. * We can check if "count" is finally equal to 3 to see if all slave * lcores performed "atomic inc and test" right. */ printf("inc and test\n"); rte_atomic64_clear(&a64); rte_atomic32_clear(&a32); rte_atomic16_clear(&a16); rte_atomic32_clear(&synchro); rte_atomic64_clear(&count); rte_atomic64_set(&a64, (int64_t)(1 - (int64_t)rte_lcore_count())); rte_atomic32_set(&a32, (int32_t)(1 - (int32_t)rte_lcore_count())); rte_atomic16_set(&a16, (int16_t)(1 - (int16_t)rte_lcore_count())); rte_eal_mp_remote_launch(test_atomic_inc_and_test, NULL, SKIP_MASTER); rte_atomic32_set(&synchro, 1); rte_eal_mp_wait_lcore(); rte_atomic32_clear(&synchro); if (rte_atomic64_read(&count) != NUM_ATOMIC_TYPES) { printf("Atomic inc and test failed %d\n", (int)count.cnt); return -1; } /* * Same as above, but this time we set the values to "number of slave * lcores", and decrement instead of increment. */ printf("dec and test\n"); rte_atomic32_clear(&synchro); rte_atomic64_clear(&count); rte_atomic64_set(&a64, (int64_t)(rte_lcore_count() - 1)); rte_atomic32_set(&a32, (int32_t)(rte_lcore_count() - 1)); rte_atomic16_set(&a16, (int16_t)(rte_lcore_count() - 1)); rte_eal_mp_remote_launch(test_atomic_dec_and_test, NULL, SKIP_MASTER); rte_atomic32_set(&synchro, 1); rte_eal_mp_wait_lcore(); rte_atomic32_clear(&synchro); if (rte_atomic64_read(&count) != NUM_ATOMIC_TYPES) { printf("Atomic dec and test failed\n"); return -1; } #if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_ARM64) /* * This case tests the functionality of rte_atomic128_cmp_exchange * API. It calls rte_atomic128_cmp_exchange with four kinds of memory * models successively on each slave core. Once each 128-bit atomic * compare and swap operation is successful, it updates the global * 128-bit counter by 2 for the first 64-bit and 1 for the second * 64-bit. Each slave core iterates this test N times. * At the end of test, verify whether the first 64-bits of the 128-bit * counter and the second 64bits is differ by the total iterations. If * it is, the test passes. */ printf("128-bit compare and swap test\n"); uint64_t iterations = 0; rte_atomic32_clear(&synchro); count128.val[0] = 0; count128.val[1] = 0; rte_eal_mp_remote_launch(test_atomic128_cmp_exchange, NULL, SKIP_MASTER); rte_atomic32_set(&synchro, 1); rte_eal_mp_wait_lcore(); rte_atomic32_clear(&synchro); iterations = count128.val[0] - count128.val[1]; if (iterations != 4*N*(rte_lcore_count()-1)) { printf("128-bit compare and swap failed\n"); return -1; } #endif /* * Test 16/32/64bit atomic exchange. */ test64_t t; printf("exchange test\n"); rte_atomic32_clear(&synchro); rte_atomic64_clear(&count); /* Generate the CRC8 lookup table */ build_crc8_table(); /* Create the initial tokens used by the test */ t.u64 = rte_rand(); token16 = (get_crc8(&t.u8[0], sizeof(token16) - 1) << 8) | (t.u16[0] & 0x00ff); token32 = ((uint32_t)get_crc8(&t.u8[0], sizeof(token32) - 1) << 24) | (t.u32[0] & 0x00ffffff); token64 = ((uint64_t)get_crc8(&t.u8[0], sizeof(token64) - 1) << 56) | (t.u64 & 0x00ffffffffffffff); rte_eal_mp_remote_launch(test_atomic_exchange, NULL, SKIP_MASTER); rte_atomic32_set(&synchro, 1); rte_eal_mp_wait_lcore(); rte_atomic32_clear(&synchro); if (rte_atomic64_read(&count) > 0) { printf("Atomic exchange test failed\n"); return -1; } return 0; } REGISTER_TEST_COMMAND(atomic_autotest, test_atomic);