mirror of https://github.com/F-Stack/f-stack.git
916 lines
20 KiB
C
916 lines
20 KiB
C
|
/* SPDX-License-Identifier: BSD-3-Clause
|
||
|
* Copyright (c) 2021 NVIDIA Corporation & Affiliates
|
||
|
*/
|
||
|
|
||
|
#include <rte_eal.h>
|
||
|
#include <rte_tailq.h>
|
||
|
#include <rte_rwlock.h>
|
||
|
#include <rte_string_fns.h>
|
||
|
#include <rte_memzone.h>
|
||
|
#include <rte_malloc.h>
|
||
|
#include <rte_errno.h>
|
||
|
#include <rte_log.h>
|
||
|
|
||
|
#include "rte_gpudev.h"
|
||
|
#include "gpudev_driver.h"
|
||
|
|
||
|
/* Logging */
|
||
|
RTE_LOG_REGISTER_DEFAULT(gpu_logtype, NOTICE);
|
||
|
#define GPU_LOG(level, ...) \
|
||
|
rte_log(RTE_LOG_ ## level, gpu_logtype, RTE_FMT("gpu: " \
|
||
|
RTE_FMT_HEAD(__VA_ARGS__, ) "\n", RTE_FMT_TAIL(__VA_ARGS__, )))
|
||
|
|
||
|
/* Set any driver error as EPERM */
|
||
|
#define GPU_DRV_RET(function) \
|
||
|
((function != 0) ? -(rte_errno = EPERM) : (rte_errno = 0))
|
||
|
|
||
|
/* Array of devices */
|
||
|
static struct rte_gpu *gpus;
|
||
|
/* Number of currently valid devices */
|
||
|
static int16_t gpu_max;
|
||
|
/* Number of currently valid devices */
|
||
|
static int16_t gpu_count;
|
||
|
|
||
|
/* Shared memory between processes. */
|
||
|
static const char *GPU_MEMZONE = "rte_gpu_shared";
|
||
|
static struct {
|
||
|
__extension__ struct rte_gpu_mpshared gpus[0];
|
||
|
} *gpu_shared_mem;
|
||
|
|
||
|
/* Event callback object */
|
||
|
struct rte_gpu_callback {
|
||
|
TAILQ_ENTRY(rte_gpu_callback) next;
|
||
|
rte_gpu_callback_t *function;
|
||
|
void *user_data;
|
||
|
enum rte_gpu_event event;
|
||
|
};
|
||
|
static rte_rwlock_t gpu_callback_lock = RTE_RWLOCK_INITIALIZER;
|
||
|
static void gpu_free_callbacks(struct rte_gpu *dev);
|
||
|
|
||
|
int
|
||
|
rte_gpu_init(size_t dev_max)
|
||
|
{
|
||
|
if (dev_max == 0 || dev_max > INT16_MAX) {
|
||
|
GPU_LOG(ERR, "invalid array size");
|
||
|
rte_errno = EINVAL;
|
||
|
return -rte_errno;
|
||
|
}
|
||
|
|
||
|
/* No lock, it must be called before or during first probing. */
|
||
|
if (gpus != NULL) {
|
||
|
GPU_LOG(ERR, "already initialized");
|
||
|
rte_errno = EBUSY;
|
||
|
return -rte_errno;
|
||
|
}
|
||
|
|
||
|
gpus = calloc(dev_max, sizeof(struct rte_gpu));
|
||
|
if (gpus == NULL) {
|
||
|
GPU_LOG(ERR, "cannot initialize library");
|
||
|
rte_errno = ENOMEM;
|
||
|
return -rte_errno;
|
||
|
}
|
||
|
|
||
|
gpu_max = dev_max;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
uint16_t
|
||
|
rte_gpu_count_avail(void)
|
||
|
{
|
||
|
return gpu_count;
|
||
|
}
|
||
|
|
||
|
bool
|
||
|
rte_gpu_is_valid(int16_t dev_id)
|
||
|
{
|
||
|
if (dev_id >= 0 && dev_id < gpu_max &&
|
||
|
gpus[dev_id].process_state == RTE_GPU_STATE_INITIALIZED)
|
||
|
return true;
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
static bool
|
||
|
gpu_match_parent(int16_t dev_id, int16_t parent)
|
||
|
{
|
||
|
if (parent == RTE_GPU_ID_ANY)
|
||
|
return true;
|
||
|
return gpus[dev_id].mpshared->info.parent == parent;
|
||
|
}
|
||
|
|
||
|
int16_t
|
||
|
rte_gpu_find_next(int16_t dev_id, int16_t parent)
|
||
|
{
|
||
|
if (dev_id < 0)
|
||
|
dev_id = 0;
|
||
|
while (dev_id < gpu_max &&
|
||
|
(gpus[dev_id].process_state == RTE_GPU_STATE_UNUSED ||
|
||
|
!gpu_match_parent(dev_id, parent)))
|
||
|
dev_id++;
|
||
|
|
||
|
if (dev_id >= gpu_max)
|
||
|
return RTE_GPU_ID_NONE;
|
||
|
return dev_id;
|
||
|
}
|
||
|
|
||
|
static int16_t
|
||
|
gpu_find_free_id(void)
|
||
|
{
|
||
|
int16_t dev_id;
|
||
|
|
||
|
for (dev_id = 0; dev_id < gpu_max; dev_id++) {
|
||
|
if (gpus[dev_id].process_state == RTE_GPU_STATE_UNUSED)
|
||
|
return dev_id;
|
||
|
}
|
||
|
return RTE_GPU_ID_NONE;
|
||
|
}
|
||
|
|
||
|
static struct rte_gpu *
|
||
|
gpu_get_by_id(int16_t dev_id)
|
||
|
{
|
||
|
if (!rte_gpu_is_valid(dev_id))
|
||
|
return NULL;
|
||
|
return &gpus[dev_id];
|
||
|
}
|
||
|
|
||
|
struct rte_gpu *
|
||
|
rte_gpu_get_by_name(const char *name)
|
||
|
{
|
||
|
int16_t dev_id;
|
||
|
struct rte_gpu *dev;
|
||
|
|
||
|
if (name == NULL) {
|
||
|
rte_errno = EINVAL;
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
RTE_GPU_FOREACH(dev_id) {
|
||
|
dev = &gpus[dev_id];
|
||
|
if (strncmp(name, dev->mpshared->name, RTE_DEV_NAME_MAX_LEN) == 0)
|
||
|
return dev;
|
||
|
}
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
static int
|
||
|
gpu_shared_mem_init(void)
|
||
|
{
|
||
|
const struct rte_memzone *memzone;
|
||
|
|
||
|
if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
|
||
|
memzone = rte_memzone_reserve(GPU_MEMZONE,
|
||
|
sizeof(*gpu_shared_mem) +
|
||
|
sizeof(*gpu_shared_mem->gpus) * gpu_max,
|
||
|
SOCKET_ID_ANY, 0);
|
||
|
} else {
|
||
|
memzone = rte_memzone_lookup(GPU_MEMZONE);
|
||
|
}
|
||
|
if (memzone == NULL) {
|
||
|
GPU_LOG(ERR, "cannot initialize shared memory");
|
||
|
rte_errno = ENOMEM;
|
||
|
return -rte_errno;
|
||
|
}
|
||
|
|
||
|
gpu_shared_mem = memzone->addr;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
struct rte_gpu *
|
||
|
rte_gpu_allocate(const char *name)
|
||
|
{
|
||
|
int16_t dev_id;
|
||
|
struct rte_gpu *dev;
|
||
|
|
||
|
if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
|
||
|
GPU_LOG(ERR, "only primary process can allocate device");
|
||
|
rte_errno = EPERM;
|
||
|
return NULL;
|
||
|
}
|
||
|
if (name == NULL) {
|
||
|
GPU_LOG(ERR, "allocate device without a name");
|
||
|
rte_errno = EINVAL;
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
/* implicit initialization of library before adding first device */
|
||
|
if (gpus == NULL && rte_gpu_init(RTE_GPU_DEFAULT_MAX) < 0)
|
||
|
return NULL;
|
||
|
|
||
|
/* initialize shared memory before adding first device */
|
||
|
if (gpu_shared_mem == NULL && gpu_shared_mem_init() < 0)
|
||
|
return NULL;
|
||
|
|
||
|
if (rte_gpu_get_by_name(name) != NULL) {
|
||
|
GPU_LOG(ERR, "device with name %s already exists", name);
|
||
|
rte_errno = EEXIST;
|
||
|
return NULL;
|
||
|
}
|
||
|
dev_id = gpu_find_free_id();
|
||
|
if (dev_id == RTE_GPU_ID_NONE) {
|
||
|
GPU_LOG(ERR, "reached maximum number of devices");
|
||
|
rte_errno = ENOENT;
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
dev = &gpus[dev_id];
|
||
|
memset(dev, 0, sizeof(*dev));
|
||
|
|
||
|
dev->mpshared = &gpu_shared_mem->gpus[dev_id];
|
||
|
memset(dev->mpshared, 0, sizeof(*dev->mpshared));
|
||
|
|
||
|
if (rte_strscpy(dev->mpshared->name, name, RTE_DEV_NAME_MAX_LEN) < 0) {
|
||
|
GPU_LOG(ERR, "device name too long: %s", name);
|
||
|
rte_errno = ENAMETOOLONG;
|
||
|
return NULL;
|
||
|
}
|
||
|
dev->mpshared->info.name = dev->mpshared->name;
|
||
|
dev->mpshared->info.dev_id = dev_id;
|
||
|
dev->mpshared->info.numa_node = -1;
|
||
|
dev->mpshared->info.parent = RTE_GPU_ID_NONE;
|
||
|
TAILQ_INIT(&dev->callbacks);
|
||
|
__atomic_fetch_add(&dev->mpshared->process_refcnt, 1, __ATOMIC_RELAXED);
|
||
|
|
||
|
gpu_count++;
|
||
|
GPU_LOG(DEBUG, "new device %s (id %d) of total %d",
|
||
|
name, dev_id, gpu_count);
|
||
|
return dev;
|
||
|
}
|
||
|
|
||
|
struct rte_gpu *
|
||
|
rte_gpu_attach(const char *name)
|
||
|
{
|
||
|
int16_t dev_id;
|
||
|
struct rte_gpu *dev;
|
||
|
struct rte_gpu_mpshared *shared_dev;
|
||
|
|
||
|
if (rte_eal_process_type() != RTE_PROC_SECONDARY) {
|
||
|
GPU_LOG(ERR, "only secondary process can attach device");
|
||
|
rte_errno = EPERM;
|
||
|
return NULL;
|
||
|
}
|
||
|
if (name == NULL) {
|
||
|
GPU_LOG(ERR, "attach device without a name");
|
||
|
rte_errno = EINVAL;
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
/* implicit initialization of library before adding first device */
|
||
|
if (gpus == NULL && rte_gpu_init(RTE_GPU_DEFAULT_MAX) < 0)
|
||
|
return NULL;
|
||
|
|
||
|
/* initialize shared memory before adding first device */
|
||
|
if (gpu_shared_mem == NULL && gpu_shared_mem_init() < 0)
|
||
|
return NULL;
|
||
|
|
||
|
for (dev_id = 0; dev_id < gpu_max; dev_id++) {
|
||
|
shared_dev = &gpu_shared_mem->gpus[dev_id];
|
||
|
if (strncmp(name, shared_dev->name, RTE_DEV_NAME_MAX_LEN) == 0)
|
||
|
break;
|
||
|
}
|
||
|
if (dev_id >= gpu_max) {
|
||
|
GPU_LOG(ERR, "device with name %s not found", name);
|
||
|
rte_errno = ENOENT;
|
||
|
return NULL;
|
||
|
}
|
||
|
dev = &gpus[dev_id];
|
||
|
memset(dev, 0, sizeof(*dev));
|
||
|
|
||
|
TAILQ_INIT(&dev->callbacks);
|
||
|
dev->mpshared = shared_dev;
|
||
|
__atomic_fetch_add(&dev->mpshared->process_refcnt, 1, __ATOMIC_RELAXED);
|
||
|
|
||
|
gpu_count++;
|
||
|
GPU_LOG(DEBUG, "attached device %s (id %d) of total %d",
|
||
|
name, dev_id, gpu_count);
|
||
|
return dev;
|
||
|
}
|
||
|
|
||
|
int16_t
|
||
|
rte_gpu_add_child(const char *name, int16_t parent, uint64_t child_context)
|
||
|
{
|
||
|
struct rte_gpu *dev;
|
||
|
|
||
|
if (!rte_gpu_is_valid(parent)) {
|
||
|
GPU_LOG(ERR, "add child to invalid parent ID %d", parent);
|
||
|
rte_errno = ENODEV;
|
||
|
return -rte_errno;
|
||
|
}
|
||
|
|
||
|
dev = rte_gpu_allocate(name);
|
||
|
if (dev == NULL)
|
||
|
return -rte_errno;
|
||
|
|
||
|
dev->mpshared->info.parent = parent;
|
||
|
dev->mpshared->info.context = child_context;
|
||
|
|
||
|
rte_gpu_complete_new(dev);
|
||
|
return dev->mpshared->info.dev_id;
|
||
|
}
|
||
|
|
||
|
void
|
||
|
rte_gpu_complete_new(struct rte_gpu *dev)
|
||
|
{
|
||
|
if (dev == NULL)
|
||
|
return;
|
||
|
|
||
|
dev->process_state = RTE_GPU_STATE_INITIALIZED;
|
||
|
rte_gpu_notify(dev, RTE_GPU_EVENT_NEW);
|
||
|
}
|
||
|
|
||
|
int
|
||
|
rte_gpu_release(struct rte_gpu *dev)
|
||
|
{
|
||
|
int16_t dev_id, child;
|
||
|
|
||
|
if (dev == NULL) {
|
||
|
rte_errno = ENODEV;
|
||
|
return -rte_errno;
|
||
|
}
|
||
|
dev_id = dev->mpshared->info.dev_id;
|
||
|
RTE_GPU_FOREACH_CHILD(child, dev_id) {
|
||
|
GPU_LOG(ERR, "cannot release device %d with child %d",
|
||
|
dev_id, child);
|
||
|
rte_errno = EBUSY;
|
||
|
return -rte_errno;
|
||
|
}
|
||
|
|
||
|
GPU_LOG(DEBUG, "free device %s (id %d)",
|
||
|
dev->mpshared->info.name, dev->mpshared->info.dev_id);
|
||
|
rte_gpu_notify(dev, RTE_GPU_EVENT_DEL);
|
||
|
|
||
|
gpu_free_callbacks(dev);
|
||
|
dev->process_state = RTE_GPU_STATE_UNUSED;
|
||
|
__atomic_fetch_sub(&dev->mpshared->process_refcnt, 1, __ATOMIC_RELAXED);
|
||
|
gpu_count--;
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
int
|
||
|
rte_gpu_close(int16_t dev_id)
|
||
|
{
|
||
|
int firsterr, binerr;
|
||
|
int *lasterr = &firsterr;
|
||
|
struct rte_gpu *dev;
|
||
|
|
||
|
dev = gpu_get_by_id(dev_id);
|
||
|
if (dev == NULL) {
|
||
|
GPU_LOG(ERR, "close invalid device ID %d", dev_id);
|
||
|
rte_errno = ENODEV;
|
||
|
return -rte_errno;
|
||
|
}
|
||
|
|
||
|
if (dev->ops.dev_close != NULL) {
|
||
|
*lasterr = GPU_DRV_RET(dev->ops.dev_close(dev));
|
||
|
if (*lasterr != 0)
|
||
|
lasterr = &binerr;
|
||
|
}
|
||
|
|
||
|
*lasterr = rte_gpu_release(dev);
|
||
|
|
||
|
rte_errno = -firsterr;
|
||
|
return firsterr;
|
||
|
}
|
||
|
|
||
|
int
|
||
|
rte_gpu_callback_register(int16_t dev_id, enum rte_gpu_event event,
|
||
|
rte_gpu_callback_t *function, void *user_data)
|
||
|
{
|
||
|
int16_t next_dev, last_dev;
|
||
|
struct rte_gpu_callback_list *callbacks;
|
||
|
struct rte_gpu_callback *callback;
|
||
|
|
||
|
if (!rte_gpu_is_valid(dev_id) && dev_id != RTE_GPU_ID_ANY) {
|
||
|
GPU_LOG(ERR, "register callback of invalid ID %d", dev_id);
|
||
|
rte_errno = ENODEV;
|
||
|
return -rte_errno;
|
||
|
}
|
||
|
if (function == NULL) {
|
||
|
GPU_LOG(ERR, "cannot register callback without function");
|
||
|
rte_errno = EINVAL;
|
||
|
return -rte_errno;
|
||
|
}
|
||
|
|
||
|
if (dev_id == RTE_GPU_ID_ANY) {
|
||
|
next_dev = 0;
|
||
|
last_dev = gpu_max - 1;
|
||
|
} else {
|
||
|
next_dev = last_dev = dev_id;
|
||
|
}
|
||
|
|
||
|
rte_rwlock_write_lock(&gpu_callback_lock);
|
||
|
do {
|
||
|
callbacks = &gpus[next_dev].callbacks;
|
||
|
|
||
|
/* check if not already registered */
|
||
|
TAILQ_FOREACH(callback, callbacks, next) {
|
||
|
if (callback->event == event &&
|
||
|
callback->function == function &&
|
||
|
callback->user_data == user_data) {
|
||
|
GPU_LOG(INFO, "callback already registered");
|
||
|
return 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
callback = malloc(sizeof(*callback));
|
||
|
if (callback == NULL) {
|
||
|
GPU_LOG(ERR, "cannot allocate callback");
|
||
|
return -ENOMEM;
|
||
|
}
|
||
|
callback->function = function;
|
||
|
callback->user_data = user_data;
|
||
|
callback->event = event;
|
||
|
TAILQ_INSERT_TAIL(callbacks, callback, next);
|
||
|
|
||
|
} while (++next_dev <= last_dev);
|
||
|
rte_rwlock_write_unlock(&gpu_callback_lock);
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
int
|
||
|
rte_gpu_callback_unregister(int16_t dev_id, enum rte_gpu_event event,
|
||
|
rte_gpu_callback_t *function, void *user_data)
|
||
|
{
|
||
|
int16_t next_dev, last_dev;
|
||
|
struct rte_gpu_callback_list *callbacks;
|
||
|
struct rte_gpu_callback *callback, *nextcb;
|
||
|
|
||
|
if (!rte_gpu_is_valid(dev_id) && dev_id != RTE_GPU_ID_ANY) {
|
||
|
GPU_LOG(ERR, "unregister callback of invalid ID %d", dev_id);
|
||
|
rte_errno = ENODEV;
|
||
|
return -rte_errno;
|
||
|
}
|
||
|
if (function == NULL) {
|
||
|
GPU_LOG(ERR, "cannot unregister callback without function");
|
||
|
rte_errno = EINVAL;
|
||
|
return -rte_errno;
|
||
|
}
|
||
|
|
||
|
if (dev_id == RTE_GPU_ID_ANY) {
|
||
|
next_dev = 0;
|
||
|
last_dev = gpu_max - 1;
|
||
|
} else {
|
||
|
next_dev = last_dev = dev_id;
|
||
|
}
|
||
|
|
||
|
rte_rwlock_write_lock(&gpu_callback_lock);
|
||
|
do {
|
||
|
callbacks = &gpus[next_dev].callbacks;
|
||
|
RTE_TAILQ_FOREACH_SAFE(callback, callbacks, next, nextcb) {
|
||
|
if (callback->event != event ||
|
||
|
callback->function != function ||
|
||
|
(callback->user_data != user_data &&
|
||
|
user_data != (void *)-1))
|
||
|
continue;
|
||
|
TAILQ_REMOVE(callbacks, callback, next);
|
||
|
free(callback);
|
||
|
}
|
||
|
} while (++next_dev <= last_dev);
|
||
|
rte_rwlock_write_unlock(&gpu_callback_lock);
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
gpu_free_callbacks(struct rte_gpu *dev)
|
||
|
{
|
||
|
struct rte_gpu_callback_list *callbacks;
|
||
|
struct rte_gpu_callback *callback, *nextcb;
|
||
|
|
||
|
callbacks = &dev->callbacks;
|
||
|
rte_rwlock_write_lock(&gpu_callback_lock);
|
||
|
RTE_TAILQ_FOREACH_SAFE(callback, callbacks, next, nextcb) {
|
||
|
TAILQ_REMOVE(callbacks, callback, next);
|
||
|
free(callback);
|
||
|
}
|
||
|
rte_rwlock_write_unlock(&gpu_callback_lock);
|
||
|
}
|
||
|
|
||
|
void
|
||
|
rte_gpu_notify(struct rte_gpu *dev, enum rte_gpu_event event)
|
||
|
{
|
||
|
int16_t dev_id;
|
||
|
struct rte_gpu_callback *callback;
|
||
|
|
||
|
dev_id = dev->mpshared->info.dev_id;
|
||
|
rte_rwlock_read_lock(&gpu_callback_lock);
|
||
|
TAILQ_FOREACH(callback, &dev->callbacks, next) {
|
||
|
if (callback->event != event || callback->function == NULL)
|
||
|
continue;
|
||
|
callback->function(dev_id, event, callback->user_data);
|
||
|
}
|
||
|
rte_rwlock_read_unlock(&gpu_callback_lock);
|
||
|
}
|
||
|
|
||
|
int
|
||
|
rte_gpu_info_get(int16_t dev_id, struct rte_gpu_info *info)
|
||
|
{
|
||
|
struct rte_gpu *dev;
|
||
|
|
||
|
dev = gpu_get_by_id(dev_id);
|
||
|
if (dev == NULL) {
|
||
|
GPU_LOG(ERR, "query invalid device ID %d", dev_id);
|
||
|
rte_errno = ENODEV;
|
||
|
return -rte_errno;
|
||
|
}
|
||
|
if (info == NULL) {
|
||
|
GPU_LOG(ERR, "query without storage");
|
||
|
rte_errno = EINVAL;
|
||
|
return -rte_errno;
|
||
|
}
|
||
|
|
||
|
if (dev->ops.dev_info_get == NULL) {
|
||
|
*info = dev->mpshared->info;
|
||
|
return 0;
|
||
|
}
|
||
|
return GPU_DRV_RET(dev->ops.dev_info_get(dev, info));
|
||
|
}
|
||
|
|
||
|
void *
|
||
|
rte_gpu_mem_alloc(int16_t dev_id, size_t size)
|
||
|
{
|
||
|
struct rte_gpu *dev;
|
||
|
void *ptr;
|
||
|
int ret;
|
||
|
|
||
|
dev = gpu_get_by_id(dev_id);
|
||
|
if (dev == NULL) {
|
||
|
GPU_LOG(ERR, "alloc mem for invalid device ID %d", dev_id);
|
||
|
rte_errno = ENODEV;
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
if (dev->ops.mem_alloc == NULL) {
|
||
|
GPU_LOG(ERR, "mem allocation not supported");
|
||
|
rte_errno = ENOTSUP;
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
if (size == 0) /* dry-run */
|
||
|
return NULL;
|
||
|
|
||
|
ret = dev->ops.mem_alloc(dev, size, &ptr);
|
||
|
|
||
|
switch (ret) {
|
||
|
case 0:
|
||
|
return ptr;
|
||
|
case -ENOMEM:
|
||
|
case -E2BIG:
|
||
|
rte_errno = -ret;
|
||
|
return NULL;
|
||
|
default:
|
||
|
rte_errno = -EPERM;
|
||
|
return NULL;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
int
|
||
|
rte_gpu_mem_free(int16_t dev_id, void *ptr)
|
||
|
{
|
||
|
struct rte_gpu *dev;
|
||
|
|
||
|
dev = gpu_get_by_id(dev_id);
|
||
|
if (dev == NULL) {
|
||
|
GPU_LOG(ERR, "free mem for invalid device ID %d", dev_id);
|
||
|
rte_errno = ENODEV;
|
||
|
return -rte_errno;
|
||
|
}
|
||
|
|
||
|
if (dev->ops.mem_free == NULL) {
|
||
|
rte_errno = ENOTSUP;
|
||
|
return -rte_errno;
|
||
|
}
|
||
|
|
||
|
if (ptr == NULL) /* dry-run */
|
||
|
return 0;
|
||
|
|
||
|
return GPU_DRV_RET(dev->ops.mem_free(dev, ptr));
|
||
|
}
|
||
|
|
||
|
int
|
||
|
rte_gpu_mem_register(int16_t dev_id, size_t size, void *ptr)
|
||
|
{
|
||
|
struct rte_gpu *dev;
|
||
|
|
||
|
dev = gpu_get_by_id(dev_id);
|
||
|
if (dev == NULL) {
|
||
|
GPU_LOG(ERR, "alloc mem for invalid device ID %d", dev_id);
|
||
|
rte_errno = ENODEV;
|
||
|
return -rte_errno;
|
||
|
}
|
||
|
|
||
|
if (dev->ops.mem_register == NULL) {
|
||
|
GPU_LOG(ERR, "mem registration not supported");
|
||
|
rte_errno = ENOTSUP;
|
||
|
return -rte_errno;
|
||
|
}
|
||
|
|
||
|
if (ptr == NULL || size == 0) /* dry-run */
|
||
|
return 0;
|
||
|
|
||
|
return GPU_DRV_RET(dev->ops.mem_register(dev, size, ptr));
|
||
|
}
|
||
|
|
||
|
int
|
||
|
rte_gpu_mem_unregister(int16_t dev_id, void *ptr)
|
||
|
{
|
||
|
struct rte_gpu *dev;
|
||
|
|
||
|
dev = gpu_get_by_id(dev_id);
|
||
|
if (dev == NULL) {
|
||
|
GPU_LOG(ERR, "unregister mem for invalid device ID %d", dev_id);
|
||
|
rte_errno = ENODEV;
|
||
|
return -rte_errno;
|
||
|
}
|
||
|
|
||
|
if (dev->ops.mem_unregister == NULL) {
|
||
|
rte_errno = ENOTSUP;
|
||
|
return -rte_errno;
|
||
|
}
|
||
|
|
||
|
if (ptr == NULL) /* dry-run */
|
||
|
return 0;
|
||
|
|
||
|
return GPU_DRV_RET(dev->ops.mem_unregister(dev, ptr));
|
||
|
}
|
||
|
|
||
|
int
|
||
|
rte_gpu_wmb(int16_t dev_id)
|
||
|
{
|
||
|
struct rte_gpu *dev;
|
||
|
|
||
|
dev = gpu_get_by_id(dev_id);
|
||
|
if (dev == NULL) {
|
||
|
GPU_LOG(ERR, "memory barrier for invalid device ID %d", dev_id);
|
||
|
rte_errno = ENODEV;
|
||
|
return -rte_errno;
|
||
|
}
|
||
|
|
||
|
if (dev->ops.wmb == NULL) {
|
||
|
rte_errno = ENOTSUP;
|
||
|
return -rte_errno;
|
||
|
}
|
||
|
return GPU_DRV_RET(dev->ops.wmb(dev));
|
||
|
}
|
||
|
|
||
|
int
|
||
|
rte_gpu_comm_create_flag(uint16_t dev_id, struct rte_gpu_comm_flag *devflag,
|
||
|
enum rte_gpu_comm_flag_type mtype)
|
||
|
{
|
||
|
size_t flag_size;
|
||
|
int ret;
|
||
|
|
||
|
if (devflag == NULL) {
|
||
|
rte_errno = EINVAL;
|
||
|
return -rte_errno;
|
||
|
}
|
||
|
if (mtype != RTE_GPU_COMM_FLAG_CPU) {
|
||
|
rte_errno = EINVAL;
|
||
|
return -rte_errno;
|
||
|
}
|
||
|
|
||
|
flag_size = sizeof(uint32_t);
|
||
|
|
||
|
devflag->ptr = rte_zmalloc(NULL, flag_size, 0);
|
||
|
if (devflag->ptr == NULL) {
|
||
|
rte_errno = ENOMEM;
|
||
|
return -rte_errno;
|
||
|
}
|
||
|
|
||
|
ret = rte_gpu_mem_register(dev_id, flag_size, devflag->ptr);
|
||
|
if (ret < 0) {
|
||
|
rte_errno = ENOMEM;
|
||
|
return -rte_errno;
|
||
|
}
|
||
|
|
||
|
devflag->mtype = mtype;
|
||
|
devflag->dev_id = dev_id;
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
int
|
||
|
rte_gpu_comm_destroy_flag(struct rte_gpu_comm_flag *devflag)
|
||
|
{
|
||
|
int ret;
|
||
|
|
||
|
if (devflag == NULL) {
|
||
|
rte_errno = EINVAL;
|
||
|
return -rte_errno;
|
||
|
}
|
||
|
|
||
|
ret = rte_gpu_mem_unregister(devflag->dev_id, devflag->ptr);
|
||
|
if (ret < 0) {
|
||
|
rte_errno = EINVAL;
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
rte_free(devflag->ptr);
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
int
|
||
|
rte_gpu_comm_set_flag(struct rte_gpu_comm_flag *devflag, uint32_t val)
|
||
|
{
|
||
|
if (devflag == NULL) {
|
||
|
rte_errno = EINVAL;
|
||
|
return -rte_errno;
|
||
|
}
|
||
|
|
||
|
if (devflag->mtype != RTE_GPU_COMM_FLAG_CPU) {
|
||
|
rte_errno = EINVAL;
|
||
|
return -rte_errno;
|
||
|
}
|
||
|
|
||
|
RTE_GPU_VOLATILE(*devflag->ptr) = val;
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
int
|
||
|
rte_gpu_comm_get_flag_value(struct rte_gpu_comm_flag *devflag, uint32_t *val)
|
||
|
{
|
||
|
if (devflag == NULL) {
|
||
|
rte_errno = EINVAL;
|
||
|
return -rte_errno;
|
||
|
}
|
||
|
if (devflag->mtype != RTE_GPU_COMM_FLAG_CPU) {
|
||
|
rte_errno = EINVAL;
|
||
|
return -rte_errno;
|
||
|
}
|
||
|
|
||
|
*val = RTE_GPU_VOLATILE(*devflag->ptr);
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
struct rte_gpu_comm_list *
|
||
|
rte_gpu_comm_create_list(uint16_t dev_id,
|
||
|
uint32_t num_comm_items)
|
||
|
{
|
||
|
struct rte_gpu_comm_list *comm_list;
|
||
|
uint32_t idx_l;
|
||
|
int ret;
|
||
|
struct rte_gpu *dev;
|
||
|
|
||
|
if (num_comm_items == 0) {
|
||
|
rte_errno = EINVAL;
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
dev = gpu_get_by_id(dev_id);
|
||
|
if (dev == NULL) {
|
||
|
GPU_LOG(ERR, "memory barrier for invalid device ID %d", dev_id);
|
||
|
rte_errno = ENODEV;
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
comm_list = rte_zmalloc(NULL,
|
||
|
sizeof(struct rte_gpu_comm_list) * num_comm_items, 0);
|
||
|
if (comm_list == NULL) {
|
||
|
rte_errno = ENOMEM;
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
ret = rte_gpu_mem_register(dev_id,
|
||
|
sizeof(struct rte_gpu_comm_list) * num_comm_items, comm_list);
|
||
|
if (ret < 0) {
|
||
|
rte_errno = ENOMEM;
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
for (idx_l = 0; idx_l < num_comm_items; idx_l++) {
|
||
|
comm_list[idx_l].pkt_list = rte_zmalloc(NULL,
|
||
|
sizeof(struct rte_gpu_comm_pkt) * RTE_GPU_COMM_LIST_PKTS_MAX, 0);
|
||
|
if (comm_list[idx_l].pkt_list == NULL) {
|
||
|
rte_errno = ENOMEM;
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
ret = rte_gpu_mem_register(dev_id,
|
||
|
sizeof(struct rte_gpu_comm_pkt) * RTE_GPU_COMM_LIST_PKTS_MAX,
|
||
|
comm_list[idx_l].pkt_list);
|
||
|
if (ret < 0) {
|
||
|
rte_errno = ENOMEM;
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
RTE_GPU_VOLATILE(comm_list[idx_l].status) = RTE_GPU_COMM_LIST_FREE;
|
||
|
comm_list[idx_l].num_pkts = 0;
|
||
|
comm_list[idx_l].dev_id = dev_id;
|
||
|
|
||
|
comm_list[idx_l].mbufs = rte_zmalloc(NULL,
|
||
|
sizeof(struct rte_mbuf *) * RTE_GPU_COMM_LIST_PKTS_MAX, 0);
|
||
|
if (comm_list[idx_l].mbufs == NULL) {
|
||
|
rte_errno = ENOMEM;
|
||
|
return NULL;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return comm_list;
|
||
|
}
|
||
|
|
||
|
int
|
||
|
rte_gpu_comm_destroy_list(struct rte_gpu_comm_list *comm_list,
|
||
|
uint32_t num_comm_items)
|
||
|
{
|
||
|
uint32_t idx_l;
|
||
|
int ret;
|
||
|
uint16_t dev_id;
|
||
|
|
||
|
if (comm_list == NULL) {
|
||
|
rte_errno = EINVAL;
|
||
|
return -rte_errno;
|
||
|
}
|
||
|
|
||
|
dev_id = comm_list[0].dev_id;
|
||
|
|
||
|
for (idx_l = 0; idx_l < num_comm_items; idx_l++) {
|
||
|
ret = rte_gpu_mem_unregister(dev_id, comm_list[idx_l].pkt_list);
|
||
|
if (ret < 0) {
|
||
|
rte_errno = EINVAL;
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
rte_free(comm_list[idx_l].pkt_list);
|
||
|
rte_free(comm_list[idx_l].mbufs);
|
||
|
}
|
||
|
|
||
|
ret = rte_gpu_mem_unregister(dev_id, comm_list);
|
||
|
if (ret < 0) {
|
||
|
rte_errno = EINVAL;
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
rte_free(comm_list);
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
int
|
||
|
rte_gpu_comm_populate_list_pkts(struct rte_gpu_comm_list *comm_list_item,
|
||
|
struct rte_mbuf **mbufs, uint32_t num_mbufs)
|
||
|
{
|
||
|
uint32_t idx;
|
||
|
|
||
|
if (comm_list_item == NULL || comm_list_item->pkt_list == NULL ||
|
||
|
mbufs == NULL || num_mbufs > RTE_GPU_COMM_LIST_PKTS_MAX) {
|
||
|
rte_errno = EINVAL;
|
||
|
return -rte_errno;
|
||
|
}
|
||
|
|
||
|
for (idx = 0; idx < num_mbufs; idx++) {
|
||
|
/* support only unchained mbufs */
|
||
|
if (unlikely((mbufs[idx]->nb_segs > 1) ||
|
||
|
(mbufs[idx]->next != NULL) ||
|
||
|
(mbufs[idx]->data_len != mbufs[idx]->pkt_len))) {
|
||
|
rte_errno = ENOTSUP;
|
||
|
return -rte_errno;
|
||
|
}
|
||
|
comm_list_item->pkt_list[idx].addr =
|
||
|
rte_pktmbuf_mtod_offset(mbufs[idx], uintptr_t, 0);
|
||
|
comm_list_item->pkt_list[idx].size = mbufs[idx]->pkt_len;
|
||
|
comm_list_item->mbufs[idx] = mbufs[idx];
|
||
|
}
|
||
|
|
||
|
RTE_GPU_VOLATILE(comm_list_item->num_pkts) = num_mbufs;
|
||
|
rte_gpu_wmb(comm_list_item->dev_id);
|
||
|
RTE_GPU_VOLATILE(comm_list_item->status) = RTE_GPU_COMM_LIST_READY;
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
int
|
||
|
rte_gpu_comm_cleanup_list(struct rte_gpu_comm_list *comm_list_item)
|
||
|
{
|
||
|
uint32_t idx = 0;
|
||
|
|
||
|
if (comm_list_item == NULL) {
|
||
|
rte_errno = EINVAL;
|
||
|
return -rte_errno;
|
||
|
}
|
||
|
|
||
|
if (RTE_GPU_VOLATILE(comm_list_item->status) ==
|
||
|
RTE_GPU_COMM_LIST_READY) {
|
||
|
GPU_LOG(ERR, "packet list is still in progress");
|
||
|
rte_errno = EINVAL;
|
||
|
return -rte_errno;
|
||
|
}
|
||
|
|
||
|
for (idx = 0; idx < RTE_GPU_COMM_LIST_PKTS_MAX; idx++) {
|
||
|
if (comm_list_item->pkt_list[idx].addr == 0)
|
||
|
break;
|
||
|
|
||
|
comm_list_item->pkt_list[idx].addr = 0;
|
||
|
comm_list_item->pkt_list[idx].size = 0;
|
||
|
comm_list_item->mbufs[idx] = NULL;
|
||
|
}
|
||
|
|
||
|
RTE_GPU_VOLATILE(comm_list_item->status) = RTE_GPU_COMM_LIST_FREE;
|
||
|
RTE_GPU_VOLATILE(comm_list_item->num_pkts) = 0;
|
||
|
rte_mb();
|
||
|
|
||
|
return 0;
|
||
|
}
|