mirror of https://github.com/F-Stack/f-stack.git
288 lines
7.1 KiB
C
288 lines
7.1 KiB
C
/* SPDX-License-Identifier: BSD-3-Clause
|
|
* Copyright(c) 2020 Intel Corporation
|
|
*/
|
|
|
|
#include <rte_common.h>
|
|
#include <rte_lcore.h>
|
|
#include <rte_rtm.h>
|
|
#include <rte_spinlock.h>
|
|
|
|
#include "rte_power_intrinsics.h"
|
|
|
|
/*
|
|
* Per-lcore structure holding current status of C0.2 sleeps.
|
|
*/
|
|
static struct power_wait_status {
|
|
rte_spinlock_t lock;
|
|
volatile void *monitor_addr; /**< NULL if not currently sleeping */
|
|
} __rte_cache_aligned wait_status[RTE_MAX_LCORE];
|
|
|
|
static inline void
|
|
__umwait_wakeup(volatile void *addr)
|
|
{
|
|
uint64_t val;
|
|
|
|
/* trigger a write but don't change the value */
|
|
val = __atomic_load_n((volatile uint64_t *)addr, __ATOMIC_RELAXED);
|
|
__atomic_compare_exchange_n((volatile uint64_t *)addr, &val, val, 0,
|
|
__ATOMIC_RELAXED, __ATOMIC_RELAXED);
|
|
}
|
|
|
|
static bool wait_supported;
|
|
static bool wait_multi_supported;
|
|
|
|
static inline uint64_t
|
|
__get_umwait_val(const volatile void *p, const uint8_t sz)
|
|
{
|
|
switch (sz) {
|
|
case sizeof(uint8_t):
|
|
return *(const volatile uint8_t *)p;
|
|
case sizeof(uint16_t):
|
|
return *(const volatile uint16_t *)p;
|
|
case sizeof(uint32_t):
|
|
return *(const volatile uint32_t *)p;
|
|
case sizeof(uint64_t):
|
|
return *(const volatile uint64_t *)p;
|
|
default:
|
|
/* shouldn't happen */
|
|
RTE_ASSERT(0);
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
static inline int
|
|
__check_val_size(const uint8_t sz)
|
|
{
|
|
switch (sz) {
|
|
case sizeof(uint8_t): /* fall-through */
|
|
case sizeof(uint16_t): /* fall-through */
|
|
case sizeof(uint32_t): /* fall-through */
|
|
case sizeof(uint64_t): /* fall-through */
|
|
return 0;
|
|
default:
|
|
/* unexpected size */
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* This function uses UMONITOR/UMWAIT instructions and will enter C0.2 state.
|
|
* For more information about usage of these instructions, please refer to
|
|
* Intel(R) 64 and IA-32 Architectures Software Developer's Manual.
|
|
*/
|
|
int
|
|
rte_power_monitor(const struct rte_power_monitor_cond *pmc,
|
|
const uint64_t tsc_timestamp)
|
|
{
|
|
const uint32_t tsc_l = (uint32_t)tsc_timestamp;
|
|
const uint32_t tsc_h = (uint32_t)(tsc_timestamp >> 32);
|
|
const unsigned int lcore_id = rte_lcore_id();
|
|
struct power_wait_status *s;
|
|
uint64_t cur_value;
|
|
|
|
/* prevent user from running this instruction if it's not supported */
|
|
if (!wait_supported)
|
|
return -ENOTSUP;
|
|
|
|
/* prevent non-EAL thread from using this API */
|
|
if (lcore_id >= RTE_MAX_LCORE)
|
|
return -EINVAL;
|
|
|
|
if (pmc == NULL)
|
|
return -EINVAL;
|
|
|
|
if (__check_val_size(pmc->size) < 0)
|
|
return -EINVAL;
|
|
|
|
if (pmc->fn == NULL)
|
|
return -EINVAL;
|
|
|
|
s = &wait_status[lcore_id];
|
|
|
|
/* update sleep address */
|
|
rte_spinlock_lock(&s->lock);
|
|
s->monitor_addr = pmc->addr;
|
|
|
|
/*
|
|
* we're using raw byte codes for now as only the newest compiler
|
|
* versions support this instruction natively.
|
|
*/
|
|
|
|
/* set address for UMONITOR */
|
|
asm volatile(".byte 0xf3, 0x0f, 0xae, 0xf7;"
|
|
:
|
|
: "D"(pmc->addr));
|
|
|
|
/* now that we've put this address into monitor, we can unlock */
|
|
rte_spinlock_unlock(&s->lock);
|
|
|
|
cur_value = __get_umwait_val(pmc->addr, pmc->size);
|
|
|
|
/* check if callback indicates we should abort */
|
|
if (pmc->fn(cur_value, pmc->opaque) != 0)
|
|
goto end;
|
|
|
|
/* execute UMWAIT */
|
|
asm volatile(".byte 0xf2, 0x0f, 0xae, 0xf7;"
|
|
: /* ignore rflags */
|
|
: "D"(0), /* enter C0.2 */
|
|
"a"(tsc_l), "d"(tsc_h));
|
|
|
|
end:
|
|
/* erase sleep address */
|
|
rte_spinlock_lock(&s->lock);
|
|
s->monitor_addr = NULL;
|
|
rte_spinlock_unlock(&s->lock);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* This function uses TPAUSE instruction and will enter C0.2 state. For more
|
|
* information about usage of this instruction, please refer to Intel(R) 64 and
|
|
* IA-32 Architectures Software Developer's Manual.
|
|
*/
|
|
int
|
|
rte_power_pause(const uint64_t tsc_timestamp)
|
|
{
|
|
const uint32_t tsc_l = (uint32_t)tsc_timestamp;
|
|
const uint32_t tsc_h = (uint32_t)(tsc_timestamp >> 32);
|
|
|
|
/* prevent user from running this instruction if it's not supported */
|
|
if (!wait_supported)
|
|
return -ENOTSUP;
|
|
|
|
/* execute TPAUSE */
|
|
asm volatile(".byte 0x66, 0x0f, 0xae, 0xf7;"
|
|
: /* ignore rflags */
|
|
: "D"(0), /* enter C0.2 */
|
|
"a"(tsc_l), "d"(tsc_h));
|
|
|
|
return 0;
|
|
}
|
|
|
|
RTE_INIT(rte_power_intrinsics_init) {
|
|
struct rte_cpu_intrinsics i;
|
|
|
|
rte_cpu_get_intrinsics_support(&i);
|
|
|
|
if (i.power_monitor && i.power_pause)
|
|
wait_supported = 1;
|
|
if (i.power_monitor_multi)
|
|
wait_multi_supported = 1;
|
|
}
|
|
|
|
int
|
|
rte_power_monitor_wakeup(const unsigned int lcore_id)
|
|
{
|
|
struct power_wait_status *s;
|
|
|
|
/* prevent user from running this instruction if it's not supported */
|
|
if (!wait_supported)
|
|
return -ENOTSUP;
|
|
|
|
/* prevent buffer overrun */
|
|
if (lcore_id >= RTE_MAX_LCORE)
|
|
return -EINVAL;
|
|
|
|
s = &wait_status[lcore_id];
|
|
|
|
/*
|
|
* There is a race condition between sleep, wakeup and locking, but we
|
|
* don't need to handle it.
|
|
*
|
|
* Possible situations:
|
|
*
|
|
* 1. T1 locks, sets address, unlocks
|
|
* 2. T2 locks, triggers wakeup, unlocks
|
|
* 3. T1 sleeps
|
|
*
|
|
* In this case, because T1 has already set the address for monitoring,
|
|
* we will wake up immediately even if T2 triggers wakeup before T1
|
|
* goes to sleep.
|
|
*
|
|
* 1. T1 locks, sets address, unlocks, goes to sleep, and wakes up
|
|
* 2. T2 locks, triggers wakeup, and unlocks
|
|
* 3. T1 locks, erases address, and unlocks
|
|
*
|
|
* In this case, since we've already woken up, the "wakeup" was
|
|
* unneeded, and since T1 is still waiting on T2 releasing the lock, the
|
|
* wakeup address is still valid so it's perfectly safe to write it.
|
|
*
|
|
* For multi-monitor case, the act of locking will in itself trigger the
|
|
* wakeup, so no additional writes necessary.
|
|
*/
|
|
rte_spinlock_lock(&s->lock);
|
|
if (s->monitor_addr != NULL)
|
|
__umwait_wakeup(s->monitor_addr);
|
|
rte_spinlock_unlock(&s->lock);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
rte_power_monitor_multi(const struct rte_power_monitor_cond pmc[],
|
|
const uint32_t num, const uint64_t tsc_timestamp)
|
|
{
|
|
const unsigned int lcore_id = rte_lcore_id();
|
|
struct power_wait_status *s = &wait_status[lcore_id];
|
|
uint32_t i, rc;
|
|
|
|
/* check if supported */
|
|
if (!wait_multi_supported)
|
|
return -ENOTSUP;
|
|
|
|
if (pmc == NULL || num == 0)
|
|
return -EINVAL;
|
|
|
|
/* we are already inside transaction region, return */
|
|
if (rte_xtest() != 0)
|
|
return 0;
|
|
|
|
/* start new transaction region */
|
|
rc = rte_xbegin();
|
|
|
|
/* transaction abort, possible write to one of wait addresses */
|
|
if (rc != RTE_XBEGIN_STARTED)
|
|
return 0;
|
|
|
|
/*
|
|
* the mere act of reading the lock status here adds the lock to
|
|
* the read set. This means that when we trigger a wakeup from another
|
|
* thread, even if we don't have a defined wakeup address and thus don't
|
|
* actually cause any writes, the act of locking our lock will itself
|
|
* trigger the wakeup and abort the transaction.
|
|
*/
|
|
rte_spinlock_is_locked(&s->lock);
|
|
|
|
/*
|
|
* add all addresses to wait on into transaction read-set and check if
|
|
* any of wakeup conditions are already met.
|
|
*/
|
|
rc = 0;
|
|
for (i = 0; i < num; i++) {
|
|
const struct rte_power_monitor_cond *c = &pmc[i];
|
|
|
|
/* cannot be NULL */
|
|
if (c->fn == NULL) {
|
|
rc = -EINVAL;
|
|
break;
|
|
}
|
|
|
|
const uint64_t val = __get_umwait_val(c->addr, c->size);
|
|
|
|
/* abort if callback indicates that we need to stop */
|
|
if (c->fn(val, c->opaque) != 0)
|
|
break;
|
|
}
|
|
|
|
/* none of the conditions were met, sleep until timeout */
|
|
if (i == num)
|
|
rte_power_pause(tsc_timestamp);
|
|
|
|
/* end transaction region */
|
|
rte_xend();
|
|
|
|
return rc;
|
|
}
|