mirror of https://github.com/F-Stack/f-stack.git
486 lines
11 KiB
C
486 lines
11 KiB
C
/* SPDX-License-Identifier: BSD-3-Clause
|
|
* Copyright (c) 2021 NVIDIA Corporation & Affiliates
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <stdint.h>
|
|
#include <inttypes.h>
|
|
#include <stdarg.h>
|
|
#include <errno.h>
|
|
#include <getopt.h>
|
|
|
|
#include <rte_common.h>
|
|
#include <rte_malloc.h>
|
|
#include <rte_memory.h>
|
|
#include <rte_eal.h>
|
|
#include <rte_ether.h>
|
|
#include <rte_ethdev.h>
|
|
#include <rte_mempool.h>
|
|
#include <rte_mbuf.h>
|
|
|
|
#include <rte_gpudev.h>
|
|
|
|
enum app_args {
|
|
ARG_HELP,
|
|
ARG_MEMPOOL
|
|
};
|
|
|
|
static void
|
|
usage(const char *prog_name)
|
|
{
|
|
printf("%s [EAL options] --\n",
|
|
prog_name);
|
|
}
|
|
|
|
static void
|
|
args_parse(int argc, char **argv)
|
|
{
|
|
char **argvopt;
|
|
int opt;
|
|
int opt_idx;
|
|
|
|
static struct option lgopts[] = {
|
|
{ "help", 0, 0, ARG_HELP},
|
|
/* End of options */
|
|
{ 0, 0, 0, 0 }
|
|
};
|
|
|
|
argvopt = argv;
|
|
while ((opt = getopt_long(argc, argvopt, "",
|
|
lgopts, &opt_idx)) != EOF) {
|
|
switch (opt) {
|
|
case ARG_HELP:
|
|
usage(argv[0]);
|
|
break;
|
|
default:
|
|
usage(argv[0]);
|
|
rte_exit(EXIT_FAILURE, "Invalid option: %s\n", argv[optind]);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
static int
|
|
alloc_gpu_memory(uint16_t gpu_id)
|
|
{
|
|
void *ptr_1 = NULL;
|
|
void *ptr_2 = NULL;
|
|
size_t buf_bytes = 1024;
|
|
unsigned int align = 4096;
|
|
int ret;
|
|
|
|
printf("\n=======> TEST: Allocate GPU memory\n\n");
|
|
|
|
/* Alloc memory on GPU 0 without any specific alignment */
|
|
ptr_1 = rte_gpu_mem_alloc(gpu_id, buf_bytes, 0);
|
|
if (ptr_1 == NULL) {
|
|
fprintf(stderr, "rte_gpu_mem_alloc GPU memory returned error\n");
|
|
goto error;
|
|
}
|
|
printf("GPU memory allocated at 0x%p size is %zd bytes\n",
|
|
ptr_1, buf_bytes);
|
|
|
|
/* Alloc memory on GPU 0 with 4kB alignment */
|
|
ptr_2 = rte_gpu_mem_alloc(gpu_id, buf_bytes, align);
|
|
if (ptr_2 == NULL) {
|
|
fprintf(stderr, "rte_gpu_mem_alloc GPU memory returned error\n");
|
|
goto error;
|
|
}
|
|
printf("GPU memory allocated at 0x%p size is %zd bytes\n",
|
|
ptr_2, buf_bytes);
|
|
|
|
if (((uintptr_t)ptr_2) % align) {
|
|
fprintf(stderr, "Memory address 0x%p is not aligned to %u\n", ptr_2, align);
|
|
goto error;
|
|
}
|
|
|
|
ret = rte_gpu_mem_free(gpu_id, (uint8_t *)(ptr_1)+0x700);
|
|
if (ret < 0) {
|
|
printf("GPU memory 0x%p NOT freed: GPU driver didn't find this memory address internally.\n",
|
|
(uint8_t *)(ptr_1)+0x700);
|
|
} else {
|
|
fprintf(stderr, "ERROR: rte_gpu_mem_free freed GPU memory 0x%p\n",
|
|
(uint8_t *)(ptr_1)+0x700);
|
|
goto error;
|
|
}
|
|
|
|
ret = rte_gpu_mem_free(gpu_id, ptr_2);
|
|
if (ret < 0) {
|
|
fprintf(stderr, "rte_gpu_mem_free returned error %d\n", ret);
|
|
goto error;
|
|
}
|
|
printf("GPU memory 0x%p freed\n", ptr_2);
|
|
|
|
ret = rte_gpu_mem_free(gpu_id, ptr_1);
|
|
if (ret < 0) {
|
|
fprintf(stderr, "rte_gpu_mem_free returned error %d\n", ret);
|
|
goto error;
|
|
}
|
|
printf("GPU memory 0x%p freed\n", ptr_1);
|
|
|
|
printf("\n=======> TEST: PASSED\n");
|
|
return 0;
|
|
|
|
error:
|
|
|
|
rte_gpu_mem_free(gpu_id, ptr_1);
|
|
rte_gpu_mem_free(gpu_id, ptr_2);
|
|
|
|
printf("\n=======> TEST: FAILED\n");
|
|
return -1;
|
|
}
|
|
|
|
static int
|
|
register_cpu_memory(uint16_t gpu_id)
|
|
{
|
|
void *ptr = NULL;
|
|
size_t buf_bytes = 1024;
|
|
int ret;
|
|
|
|
printf("\n=======> TEST: Register CPU memory\n\n");
|
|
|
|
/* Alloc memory on CPU visible from GPU 0 */
|
|
ptr = rte_zmalloc(NULL, buf_bytes, 0);
|
|
if (ptr == NULL) {
|
|
fprintf(stderr, "Failed to allocate CPU memory.\n");
|
|
goto error;
|
|
}
|
|
|
|
ret = rte_gpu_mem_register(gpu_id, buf_bytes, ptr);
|
|
if (ret < 0) {
|
|
fprintf(stderr, "rte_gpu_mem_register CPU memory returned error %d\n", ret);
|
|
goto error;
|
|
}
|
|
printf("CPU memory registered at 0x%p %zdB\n", ptr, buf_bytes);
|
|
|
|
ret = rte_gpu_mem_unregister(gpu_id, (uint8_t *)(ptr)+0x700);
|
|
if (ret < 0) {
|
|
printf("CPU memory 0x%p NOT unregistered: GPU driver didn't find this memory address internally\n",
|
|
(uint8_t *)(ptr)+0x700);
|
|
} else {
|
|
fprintf(stderr, "ERROR: rte_gpu_mem_unregister unregistered GPU memory 0x%p\n",
|
|
(uint8_t *)(ptr)+0x700);
|
|
goto error;
|
|
}
|
|
|
|
ret = rte_gpu_mem_unregister(gpu_id, ptr);
|
|
if (ret < 0) {
|
|
fprintf(stderr, "rte_gpu_mem_unregister returned error %d\n", ret);
|
|
goto error;
|
|
}
|
|
printf("CPU memory 0x%p unregistered\n", ptr);
|
|
|
|
rte_free(ptr);
|
|
|
|
printf("\n=======> TEST: PASSED\n");
|
|
return 0;
|
|
|
|
error:
|
|
|
|
rte_gpu_mem_unregister(gpu_id, ptr);
|
|
rte_free(ptr);
|
|
printf("\n=======> TEST: FAILED\n");
|
|
return -1;
|
|
}
|
|
|
|
static int
|
|
gpu_mem_cpu_map(uint16_t gpu_id)
|
|
{
|
|
void *ptr_gpu = NULL;
|
|
void *ptr_cpu = NULL;
|
|
size_t buf_bytes = 1024;
|
|
unsigned int align = 4096;
|
|
int ret;
|
|
|
|
printf("\n=======> TEST: Map GPU memory for CPU visibility\n\n");
|
|
|
|
/* Alloc memory on GPU 0 with 4kB alignment */
|
|
ptr_gpu = rte_gpu_mem_alloc(gpu_id, buf_bytes, align);
|
|
if (ptr_gpu == NULL) {
|
|
fprintf(stderr, "rte_gpu_mem_alloc GPU memory returned error\n");
|
|
goto error;
|
|
}
|
|
printf("GPU memory allocated at 0x%p size is %zd bytes\n",
|
|
ptr_gpu, buf_bytes);
|
|
|
|
ptr_cpu = rte_gpu_mem_cpu_map(gpu_id, buf_bytes, ptr_gpu);
|
|
if (ptr_cpu == NULL) {
|
|
fprintf(stderr, "rte_gpu_mem_cpu_map returned error\n");
|
|
goto error;
|
|
}
|
|
printf("GPU memory CPU mapped at 0x%p\n", ptr_cpu);
|
|
|
|
((uint8_t *)ptr_cpu)[0] = 0x4;
|
|
((uint8_t *)ptr_cpu)[1] = 0x5;
|
|
((uint8_t *)ptr_cpu)[2] = 0x6;
|
|
|
|
printf("GPU memory first 3 bytes set from CPU: %x %x %x\n",
|
|
((uint8_t *)ptr_cpu)[0],
|
|
((uint8_t *)ptr_cpu)[1],
|
|
((uint8_t *)ptr_cpu)[2]);
|
|
|
|
ret = rte_gpu_mem_cpu_unmap(gpu_id, ptr_gpu);
|
|
if (ret < 0) {
|
|
fprintf(stderr, "rte_gpu_mem_cpu_unmap returned error %d\n", ret);
|
|
goto error;
|
|
}
|
|
printf("GPU memory CPU unmapped, 0x%p not valid anymore\n", ptr_cpu);
|
|
|
|
ret = rte_gpu_mem_free(gpu_id, ptr_gpu);
|
|
if (ret < 0) {
|
|
fprintf(stderr, "rte_gpu_mem_free returned error %d\n", ret);
|
|
goto error;
|
|
}
|
|
printf("GPU memory 0x%p freed\n", ptr_gpu);
|
|
|
|
printf("\n=======> TEST: PASSED\n");
|
|
return 0;
|
|
|
|
error:
|
|
|
|
rte_gpu_mem_cpu_unmap(gpu_id, ptr_gpu);
|
|
rte_gpu_mem_free(gpu_id, ptr_gpu);
|
|
|
|
printf("\n=======> TEST: FAILED\n");
|
|
return -1;
|
|
}
|
|
|
|
static int
|
|
create_update_comm_flag(uint16_t gpu_id)
|
|
{
|
|
struct rte_gpu_comm_flag devflag;
|
|
int ret = 0;
|
|
uint32_t set_val;
|
|
uint32_t get_val;
|
|
|
|
printf("\n=======> TEST: Communication flag\n\n");
|
|
|
|
ret = rte_gpu_comm_create_flag(gpu_id, &devflag, RTE_GPU_COMM_FLAG_CPU);
|
|
if (ret < 0) {
|
|
fprintf(stderr, "rte_gpu_comm_create_flag returned error %d\n", ret);
|
|
goto error;
|
|
}
|
|
|
|
set_val = 25;
|
|
ret = rte_gpu_comm_set_flag(&devflag, set_val);
|
|
if (ret < 0) {
|
|
fprintf(stderr, "rte_gpu_comm_set_flag returned error %d\n", ret);
|
|
goto error;
|
|
}
|
|
|
|
ret = rte_gpu_comm_get_flag_value(&devflag, &get_val);
|
|
if (ret < 0) {
|
|
fprintf(stderr, "rte_gpu_comm_get_flag_value returned error %d\n", ret);
|
|
goto error;
|
|
}
|
|
|
|
printf("Communication flag value at 0x%p was set to %d and current value is %d\n",
|
|
devflag.ptr, set_val, get_val);
|
|
|
|
set_val = 38;
|
|
ret = rte_gpu_comm_set_flag(&devflag, set_val);
|
|
if (ret < 0) {
|
|
fprintf(stderr, "rte_gpu_comm_set_flag returned error %d\n", ret);
|
|
goto error;
|
|
}
|
|
|
|
ret = rte_gpu_comm_get_flag_value(&devflag, &get_val);
|
|
if (ret < 0) {
|
|
fprintf(stderr, "rte_gpu_comm_get_flag_value returned error %d\n", ret);
|
|
goto error;
|
|
}
|
|
|
|
printf("Communication flag value at 0x%p was set to %d and current value is %d\n",
|
|
devflag.ptr, set_val, get_val);
|
|
|
|
ret = rte_gpu_comm_destroy_flag(&devflag);
|
|
if (ret < 0) {
|
|
fprintf(stderr, "rte_gpu_comm_destroy_flags returned error %d\n", ret);
|
|
goto error;
|
|
}
|
|
|
|
printf("\n=======> TEST: PASSED\n");
|
|
return 0;
|
|
|
|
error:
|
|
|
|
rte_gpu_comm_destroy_flag(&devflag);
|
|
printf("\n=======> TEST: FAILED\n");
|
|
return -1;
|
|
}
|
|
|
|
static int
|
|
simulate_gpu_task(struct rte_gpu_comm_list *comm_list_item, int num_pkts)
|
|
{
|
|
int idx;
|
|
|
|
if (comm_list_item == NULL)
|
|
return -1;
|
|
|
|
for (idx = 0; idx < num_pkts; idx++) {
|
|
/**
|
|
* consume(comm_list_item->pkt_list[idx].addr);
|
|
*/
|
|
}
|
|
/*
|
|
* A real GPU workload function can't directly call rte_gpu_comm_set_status
|
|
* because it's a CPU-only function.
|
|
* A real GPU workload should implement the content
|
|
* of rte_gpu_comm_set_status() in GPU specific code.
|
|
*/
|
|
rte_gpu_comm_set_status(comm_list_item, RTE_GPU_COMM_LIST_DONE);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
create_update_comm_list(uint16_t gpu_id)
|
|
{
|
|
int ret = 0;
|
|
int i = 0;
|
|
struct rte_gpu_comm_list *comm_list = NULL;
|
|
uint32_t num_comm_items = 1024;
|
|
struct rte_mbuf *mbufs[10];
|
|
|
|
printf("\n=======> TEST: Communication list\n\n");
|
|
|
|
comm_list = rte_gpu_comm_create_list(gpu_id, num_comm_items);
|
|
if (comm_list == NULL) {
|
|
fprintf(stderr, "rte_gpu_comm_create_list returned error %d\n", ret);
|
|
goto error;
|
|
}
|
|
|
|
/**
|
|
* Simulate DPDK receive functions like rte_eth_rx_burst()
|
|
*/
|
|
for (i = 0; i < 10; i++) {
|
|
mbufs[i] = rte_zmalloc(NULL, sizeof(struct rte_mbuf), 0);
|
|
if (mbufs[i] == NULL) {
|
|
fprintf(stderr, "Failed to allocate fake mbufs in CPU memory.\n");
|
|
goto error;
|
|
}
|
|
|
|
memset(mbufs[i], 0, sizeof(struct rte_mbuf));
|
|
}
|
|
|
|
/**
|
|
* Populate just the first item of the list
|
|
*/
|
|
ret = rte_gpu_comm_populate_list_pkts(&(comm_list[0]), mbufs, 10);
|
|
if (ret < 0) {
|
|
fprintf(stderr, "rte_gpu_comm_populate_list_pkts returned error %d\n", ret);
|
|
goto error;
|
|
}
|
|
|
|
ret = rte_gpu_comm_cleanup_list(&(comm_list[0]));
|
|
if (ret == 0) {
|
|
fprintf(stderr, "rte_gpu_comm_cleanup_list erroneously cleaned the list even if packets have not been consumed yet\n");
|
|
goto error;
|
|
}
|
|
printf("Communication list not cleaned because packets have not been consumed yet.\n");
|
|
|
|
/**
|
|
* Simulate a GPU tasks going through the packet list to consume
|
|
* mbufs packets and release them
|
|
*/
|
|
printf("Consuming packets...\n");
|
|
simulate_gpu_task(&(comm_list[0]), 10);
|
|
|
|
/**
|
|
* Packets have been consumed, now the communication item
|
|
* and the related mbufs can be all released
|
|
*/
|
|
ret = rte_gpu_comm_cleanup_list(&(comm_list[0]));
|
|
if (ret < 0) {
|
|
fprintf(stderr, "rte_gpu_comm_cleanup_list returned error %d\n", ret);
|
|
goto error;
|
|
}
|
|
|
|
printf("Communication list cleaned because packets have been consumed now.\n");
|
|
|
|
ret = rte_gpu_comm_destroy_list(comm_list, num_comm_items);
|
|
if (ret < 0) {
|
|
fprintf(stderr, "rte_gpu_comm_destroy_list returned error %d\n", ret);
|
|
goto error;
|
|
}
|
|
|
|
for (i = 0; i < 10; i++)
|
|
rte_free(mbufs[i]);
|
|
|
|
printf("\n=======> TEST: PASSED\n");
|
|
return 0;
|
|
|
|
error:
|
|
|
|
rte_gpu_comm_destroy_list(comm_list, num_comm_items);
|
|
for (i = 0; i < 10; i++)
|
|
rte_free(mbufs[i]);
|
|
printf("\n=======> TEST: FAILED\n");
|
|
return -1;
|
|
}
|
|
|
|
int
|
|
main(int argc, char **argv)
|
|
{
|
|
int ret;
|
|
int nb_gpus = 0;
|
|
int16_t gpu_id = 0;
|
|
struct rte_gpu_info ginfo;
|
|
|
|
/* Init EAL. */
|
|
ret = rte_eal_init(argc, argv);
|
|
if (ret < 0)
|
|
rte_exit(EXIT_FAILURE, "EAL init failed\n");
|
|
argc -= ret;
|
|
argv += ret;
|
|
if (argc > 1)
|
|
args_parse(argc, argv);
|
|
argc -= ret;
|
|
argv += ret;
|
|
|
|
nb_gpus = rte_gpu_count_avail();
|
|
printf("\n\nDPDK found %d GPUs:\n", nb_gpus);
|
|
RTE_GPU_FOREACH(gpu_id)
|
|
{
|
|
if (rte_gpu_info_get(gpu_id, &ginfo))
|
|
rte_exit(EXIT_FAILURE, "rte_gpu_info_get error - bye\n");
|
|
|
|
printf("\tGPU ID %d\n\t\tparent ID %d GPU Bus ID %s NUMA node %d Tot memory %.02f MB, Tot processors %d\n",
|
|
ginfo.dev_id,
|
|
ginfo.parent,
|
|
ginfo.name,
|
|
ginfo.numa_node,
|
|
(((float)ginfo.total_memory)/(float)1024)/(float)1024,
|
|
ginfo.processor_count
|
|
);
|
|
}
|
|
printf("\n\n");
|
|
|
|
if (nb_gpus == 0) {
|
|
fprintf(stderr, "Need at least one GPU on the system to run the example\n");
|
|
return EXIT_FAILURE;
|
|
}
|
|
|
|
gpu_id = 0;
|
|
|
|
/**
|
|
* Memory tests
|
|
*/
|
|
alloc_gpu_memory(gpu_id);
|
|
register_cpu_memory(gpu_id);
|
|
gpu_mem_cpu_map(gpu_id);
|
|
|
|
/**
|
|
* Communication items test
|
|
*/
|
|
create_update_comm_flag(gpu_id);
|
|
create_update_comm_list(gpu_id);
|
|
|
|
/* clean up the EAL */
|
|
rte_eal_cleanup();
|
|
|
|
return EXIT_SUCCESS;
|
|
}
|