f-stack/dpdk/drivers/ml/cnxk/cn10k_ml_ops.c

1539 lines
47 KiB
C
Raw Normal View History

2025-01-10 11:50:43 +00:00
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright (c) 2022 Marvell.
*/
#include <rte_mldev.h>
#include <rte_mldev_pmd.h>
#include <mldev_utils.h>
#include "cnxk_ml_dev.h"
#include "cnxk_ml_model.h"
#include "cnxk_ml_ops.h"
#include "cnxk_ml_xstats.h"
/* ML model macros */
#define CN10K_ML_MODEL_MEMZONE_NAME "ml_cn10k_model_mz"
/* ML layer macros */
#define CN10K_ML_LAYER_MEMZONE_NAME "ml_cn10k_layer_mz"
/* ML Job descriptor flags */
#define ML_FLAGS_POLL_COMPL BIT(0)
#define ML_FLAGS_SSO_COMPL BIT(1)
/* Hardware non-fatal error subtype database */
static struct cnxk_ml_error_db ml_stype_db_hw_nf[] = {
{ML_CN10K_FW_ERR_NOERR, "NO ERROR"},
{ML_CN10K_FW_ERR_UNLOAD_ID_NOT_FOUND, "UNLOAD MODEL ID NOT FOUND"},
{ML_CN10K_FW_ERR_LOAD_LUT_OVERFLOW, "LOAD LUT OVERFLOW"},
{ML_CN10K_FW_ERR_ID_IN_USE, "MODEL ID IN USE"},
{ML_CN10K_FW_ERR_INVALID_TILEMASK, "INVALID TILEMASK"},
{ML_CN10K_FW_ERR_RUN_LUT_OVERFLOW, "RUN LUT OVERFLOW"},
{ML_CN10K_FW_ERR_RUN_ID_NOT_FOUND, "RUN MODEL ID NOT FOUND"},
{ML_CN10K_FW_ERR_COMMAND_NOTSUP, "COMMAND NOT SUPPORTED"},
{ML_CN10K_FW_ERR_DDR_ADDR_RANGE, "DDR ADDRESS OUT OF RANGE"},
{ML_CN10K_FW_ERR_NUM_BATCHES_INVALID, "INVALID BATCHES"},
{ML_CN10K_FW_ERR_INSSYNC_TIMEOUT, "INSSYNC TIMEOUT"},
};
/* Driver error subtype database */
static struct cnxk_ml_error_db ml_stype_db_driver[] = {
{ML_CN10K_DRIVER_ERR_NOERR, "NO ERROR"},
{ML_CN10K_DRIVER_ERR_UNKNOWN, "UNKNOWN ERROR"},
{ML_CN10K_DRIVER_ERR_EXCEPTION, "FW EXCEPTION"},
{ML_CN10K_DRIVER_ERR_FW_ERROR, "UNKNOWN FIRMWARE ERROR"},
};
__rte_hot void
cn10k_ml_set_poll_addr(struct cnxk_ml_req *req)
{
req->status = &req->cn10k_req.status;
}
void
cn10k_ml_qp_initialize(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_qp *qp)
{
uint64_t i;
RTE_SET_USED(cnxk_mldev);
/* Initialize job command */
for (i = 0; i < qp->nb_desc; i++) {
memset(&qp->queue.reqs[i].cn10k_req.jd, 0, sizeof(struct cn10k_ml_jd));
qp->queue.reqs[i].cn10k_req.jcmd.w1.s.jobptr =
PLT_U64_CAST(&qp->queue.reqs[i].cn10k_req.jd);
}
}
static void
cn10k_ml_prep_sp_job_descriptor(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_layer *layer,
struct cnxk_ml_req *req, enum cn10k_ml_job_type job_type)
{
struct cn10k_ml_model_metadata *metadata;
struct cn10k_ml_layer_addr *addr;
struct cn10k_ml_dev *cn10k_mldev;
cn10k_mldev = &cnxk_mldev->cn10k_mldev;
metadata = &layer->glow.metadata;
addr = &layer->glow.addr;
memset(&req->cn10k_req.jd, 0, sizeof(struct cn10k_ml_jd));
req->cn10k_req.jd.hdr.jce.w0.u64 = 0;
req->cn10k_req.jd.hdr.jce.w1.u64 = PLT_U64_CAST(&req->cn10k_req.status);
req->cn10k_req.jd.hdr.model_id = layer->index;
req->cn10k_req.jd.hdr.job_type = job_type;
req->cn10k_req.jd.hdr.fp_flags = 0x0;
req->cn10k_req.jd.hdr.result =
roc_ml_addr_ap2mlip(&cn10k_mldev->roc, &req->cn10k_req.result);
if (job_type == ML_CN10K_JOB_TYPE_MODEL_START) {
if (!layer->glow.metadata.model.ocm_relocatable)
req->cn10k_req.jd.hdr.sp_flags = ML_CN10K_SP_FLAGS_OCM_NONRELOCATABLE;
else
req->cn10k_req.jd.hdr.sp_flags = 0x0;
req->cn10k_req.jd.hdr.sp_flags |= ML_CN10K_SP_FLAGS_EXTENDED_LOAD_JD;
req->cn10k_req.jd.model_start.extended_args = PLT_U64_CAST(
roc_ml_addr_ap2mlip(&cn10k_mldev->roc, &req->cn10k_req.extended_args));
req->cn10k_req.jd.model_start.model_dst_ddr_addr =
PLT_U64_CAST(roc_ml_addr_ap2mlip(&cn10k_mldev->roc, addr->init_load_addr));
req->cn10k_req.jd.model_start.model_init_offset = 0x0;
req->cn10k_req.jd.model_start.model_main_offset = metadata->init_model.file_size;
req->cn10k_req.jd.model_start.model_finish_offset =
metadata->init_model.file_size + metadata->main_model.file_size;
req->cn10k_req.jd.model_start.model_init_size = metadata->init_model.file_size;
req->cn10k_req.jd.model_start.model_main_size = metadata->main_model.file_size;
req->cn10k_req.jd.model_start.model_finish_size = metadata->finish_model.file_size;
req->cn10k_req.jd.model_start.model_wb_offset = metadata->init_model.file_size +
metadata->main_model.file_size +
metadata->finish_model.file_size;
req->cn10k_req.jd.model_start.num_layers = metadata->model.num_layers;
req->cn10k_req.jd.model_start.num_gather_entries = 0;
req->cn10k_req.jd.model_start.num_scatter_entries = 0;
req->cn10k_req.jd.model_start.tilemask = 0; /* Updated after reserving pages */
req->cn10k_req.jd.model_start.batch_size = layer->batch_size;
req->cn10k_req.jd.model_start.ocm_wb_base_address =
0; /* Updated after reserving pages */
req->cn10k_req.jd.model_start.ocm_wb_range_start =
metadata->model.ocm_wb_range_start;
req->cn10k_req.jd.model_start.ocm_wb_range_end = metadata->model.ocm_wb_range_end;
req->cn10k_req.jd.model_start.ddr_wb_base_address =
PLT_U64_CAST(roc_ml_addr_ap2mlip(
&cn10k_mldev->roc, PLT_PTR_ADD(addr->finish_load_addr,
metadata->finish_model.file_size)));
req->cn10k_req.jd.model_start.ddr_wb_range_start =
metadata->model.ddr_wb_range_start;
req->cn10k_req.jd.model_start.ddr_wb_range_end = metadata->model.ddr_wb_range_end;
req->cn10k_req.jd.model_start.input.s.ddr_range_start =
metadata->model.ddr_input_range_start;
req->cn10k_req.jd.model_start.input.s.ddr_range_end =
metadata->model.ddr_input_range_end;
req->cn10k_req.jd.model_start.output.s.ddr_range_start =
metadata->model.ddr_output_range_start;
req->cn10k_req.jd.model_start.output.s.ddr_range_end =
metadata->model.ddr_output_range_end;
req->cn10k_req.extended_args.start.ddr_scratch_base_address = PLT_U64_CAST(
roc_ml_addr_ap2mlip(&cn10k_mldev->roc, addr->scratch_base_addr));
req->cn10k_req.extended_args.start.ddr_scratch_range_start =
metadata->model.ddr_scratch_range_start;
req->cn10k_req.extended_args.start.ddr_scratch_range_end =
metadata->model.ddr_scratch_range_end;
}
}
static __rte_always_inline void
cn10k_ml_prep_fp_job_descriptor(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_req *req,
uint16_t index, void *input, void *output, uint16_t nb_batches)
{
struct cn10k_ml_dev *cn10k_mldev;
cn10k_mldev = &cnxk_mldev->cn10k_mldev;
req->cn10k_req.jd.hdr.jce.w0.u64 = 0;
req->cn10k_req.jd.hdr.jce.w1.u64 = PLT_U64_CAST(req->status);
req->cn10k_req.jd.hdr.model_id = index;
req->cn10k_req.jd.hdr.job_type = ML_CN10K_JOB_TYPE_MODEL_RUN;
req->cn10k_req.jd.hdr.fp_flags = ML_FLAGS_POLL_COMPL;
req->cn10k_req.jd.hdr.sp_flags = 0x0;
req->cn10k_req.jd.hdr.result =
roc_ml_addr_ap2mlip(&cn10k_mldev->roc, &req->cn10k_req.result);
req->cn10k_req.jd.model_run.input_ddr_addr =
PLT_U64_CAST(roc_ml_addr_ap2mlip(&cn10k_mldev->roc, input));
req->cn10k_req.jd.model_run.output_ddr_addr =
PLT_U64_CAST(roc_ml_addr_ap2mlip(&cn10k_mldev->roc, output));
req->cn10k_req.jd.model_run.num_batches = nb_batches;
}
static void
cn10k_ml_xstats_layer_name_update(struct cnxk_ml_dev *cnxk_mldev, uint16_t model_id,
uint16_t layer_id)
{
struct cnxk_ml_model *model;
struct cnxk_ml_layer *layer;
uint16_t rclk_freq;
uint16_t sclk_freq;
uint16_t stat_id;
char suffix[8];
uint16_t i;
model = cnxk_mldev->mldev->data->models[model_id];
layer = &model->layer[layer_id];
stat_id = cnxk_mldev->xstats.offset_for_layer[model_id][layer_id];
roc_clk_freq_get(&rclk_freq, &sclk_freq);
if (sclk_freq == 0)
strcpy(suffix, "cycles");
else
strcpy(suffix, "ns");
/* Update xstat name based on layer name and sclk availability */
for (i = 0; i < RTE_DIM(layer_xstats); i++) {
snprintf(cnxk_mldev->xstats.entries[stat_id].map.name,
sizeof(cnxk_mldev->xstats.entries[stat_id].map.name), "%s-%s-%s",
layer->glow.metadata.model.name, layer_xstats[i].name, suffix);
stat_id++;
}
}
void
cn10k_ml_xstat_model_name_set(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_model *model,
uint16_t stat_id, uint16_t entry, char *suffix)
{
snprintf(cnxk_mldev->xstats.entries[stat_id].map.name,
sizeof(cnxk_mldev->xstats.entries[stat_id].map.name), "%s-%s-%s",
model->glow.metadata.model.name, model_xstats[entry].name, suffix);
}
#define ML_AVG_FOREACH_QP(cnxk_mldev, layer, qp_id, str, value, count) \
do { \
value = 0; \
for (qp_id = 0; qp_id < cnxk_mldev->mldev->data->nb_queue_pairs; qp_id++) { \
value += layer->glow.burst_xstats[qp_id].str##_latency_tot; \
count += layer->glow.burst_xstats[qp_id].dequeued_count - \
layer->glow.burst_xstats[qp_id].str##_reset_count; \
} \
value += layer->glow.sync_xstats->str##_latency_tot; \
count += layer->glow.sync_xstats->dequeued_count - \
layer->glow.sync_xstats->str##_reset_count; \
if (count != 0) \
value = value / count; \
} while (0)
#define ML_MIN_FOREACH_QP(cnxk_mldev, layer, qp_id, str, value, count) \
do { \
value = UINT64_MAX; \
for (qp_id = 0; qp_id < cnxk_mldev->mldev->data->nb_queue_pairs; qp_id++) { \
value = PLT_MIN(value, layer->glow.burst_xstats[qp_id].str##_latency_min); \
count += layer->glow.burst_xstats[qp_id].dequeued_count - \
layer->glow.burst_xstats[qp_id].str##_reset_count; \
} \
value = PLT_MIN(value, layer->glow.sync_xstats->str##_latency_min); \
count += layer->glow.sync_xstats->dequeued_count - \
layer->glow.sync_xstats->str##_reset_count; \
if (count == 0) \
value = 0; \
} while (0)
#define ML_MAX_FOREACH_QP(cnxk_mldev, layer, qp_id, str, value, count) \
do { \
value = 0; \
for (qp_id = 0; qp_id < cnxk_mldev->mldev->data->nb_queue_pairs; qp_id++) { \
value = PLT_MAX(value, layer->glow.burst_xstats[qp_id].str##_latency_max); \
count += layer->glow.burst_xstats[qp_id].dequeued_count - \
layer->glow.burst_xstats[qp_id].str##_reset_count; \
} \
value = PLT_MAX(value, layer->glow.sync_xstats->str##_latency_max); \
count += layer->glow.sync_xstats->dequeued_count - \
layer->glow.sync_xstats->str##_reset_count; \
if (count == 0) \
value = 0; \
} while (0)
uint64_t
cn10k_ml_model_xstat_get(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_layer *layer,
enum cnxk_ml_xstats_type type)
{
uint64_t count = 0;
uint64_t value = 0;
uint32_t qp_id;
switch (type) {
case avg_hw_latency:
ML_AVG_FOREACH_QP(cnxk_mldev, layer, qp_id, hw, value, count);
break;
case min_hw_latency:
ML_MIN_FOREACH_QP(cnxk_mldev, layer, qp_id, hw, value, count);
break;
case max_hw_latency:
ML_MAX_FOREACH_QP(cnxk_mldev, layer, qp_id, hw, value, count);
break;
case avg_fw_latency:
ML_AVG_FOREACH_QP(cnxk_mldev, layer, qp_id, fw, value, count);
break;
case min_fw_latency:
ML_MIN_FOREACH_QP(cnxk_mldev, layer, qp_id, fw, value, count);
break;
case max_fw_latency:
ML_MAX_FOREACH_QP(cnxk_mldev, layer, qp_id, fw, value, count);
break;
default:
value = 0;
}
return value;
}
static int
cn10k_ml_cache_model_data(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_layer *layer)
{
struct cn10k_ml_layer_xstats *xstats;
char str[RTE_MEMZONE_NAMESIZE];
const struct plt_memzone *mz;
uint64_t isize = 0;
uint64_t osize = 0;
int ret = 0;
/* Create input and output buffers. */
isize = layer->info.total_input_sz_q;
osize = layer->info.total_output_sz_q;
snprintf(str, RTE_MEMZONE_NAMESIZE, "%s_%u", "ml_dummy_io", layer->index);
mz = plt_memzone_reserve_aligned(str, isize + osize, 0, ML_CN10K_ALIGN_SIZE);
if (mz == NULL)
return -ENOMEM;
memset(mz->addr, 0, isize + osize);
memset(layer->glow.req, 0, sizeof(struct cnxk_ml_req));
ret = cn10k_ml_inference_sync(cnxk_mldev, layer->index, mz->addr,
PLT_PTR_ADD(mz->addr, isize), 1);
plt_memzone_free(mz);
/* Reset sync xstats. */
xstats = layer->glow.sync_xstats;
xstats->hw_latency_tot = 0;
xstats->hw_latency_min = UINT64_MAX;
xstats->hw_latency_max = 0;
xstats->fw_latency_tot = 0;
xstats->fw_latency_min = UINT64_MAX;
xstats->fw_latency_max = 0;
xstats->dequeued_count = 0;
return ret;
}
int
cn10k_ml_dev_info_get(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_dev_info *dev_info)
{
struct cn10k_ml_dev *cn10k_mldev;
cn10k_mldev = &cnxk_mldev->cn10k_mldev;
if (cn10k_mldev->hw_queue_lock)
dev_info->max_queue_pairs = ML_CN10K_MAX_QP_PER_DEVICE_SL;
else
dev_info->max_queue_pairs = ML_CN10K_MAX_QP_PER_DEVICE_LF;
dev_info->max_desc = ML_CN10K_MAX_DESC_PER_QP;
dev_info->max_io = ML_CN10K_MAX_INPUT_OUTPUT;
dev_info->max_segments = ML_CN10K_MAX_SEGMENTS;
dev_info->align_size = ML_CN10K_ALIGN_SIZE;
return 0;
}
int
cn10k_ml_dev_configure(struct cnxk_ml_dev *cnxk_mldev, const struct rte_ml_dev_config *conf)
{
struct cn10k_ml_dev *cn10k_mldev;
struct cn10k_ml_ocm *ocm;
uint16_t tile_id;
RTE_SET_USED(conf);
cn10k_mldev = &cnxk_mldev->cn10k_mldev;
ocm = &cn10k_mldev->ocm;
ocm->num_tiles = ML_CN10K_OCM_NUMTILES;
ocm->size_per_tile = ML_CN10K_OCM_TILESIZE;
ocm->page_size = cn10k_mldev->ocm_page_size;
ocm->num_pages = ocm->size_per_tile / ocm->page_size;
ocm->mask_words = ocm->num_pages / (8 * sizeof(uint8_t));
/* Allocate memory for ocm_mask */
ocm->ocm_mask =
rte_zmalloc("ocm_mask", ocm->mask_words * ocm->num_tiles, RTE_CACHE_LINE_SIZE);
if (ocm->ocm_mask == NULL) {
plt_err("Unable to allocate memory for OCM mask");
return -ENOMEM;
}
for (tile_id = 0; tile_id < ocm->num_tiles; tile_id++) {
ocm->tile_ocm_info[tile_id].ocm_mask = ocm->ocm_mask + tile_id * ocm->mask_words;
ocm->tile_ocm_info[tile_id].last_wb_page = -1;
}
rte_spinlock_init(&ocm->lock);
/* Set JCMDQ enqueue function */
if (cn10k_mldev->hw_queue_lock == 1)
cn10k_mldev->ml_jcmdq_enqueue = roc_ml_jcmdq_enqueue_sl;
else
cn10k_mldev->ml_jcmdq_enqueue = roc_ml_jcmdq_enqueue_lf;
return 0;
}
int
cn10k_ml_dev_close(struct cnxk_ml_dev *cnxk_mldev)
{
struct cn10k_ml_dev *cn10k_mldev;
cn10k_mldev = &cnxk_mldev->cn10k_mldev;
/* Release ocm_mask memory */
rte_free(cn10k_mldev->ocm.ocm_mask);
/* Unload firmware */
cn10k_ml_fw_unload(cnxk_mldev);
/* Clear scratch registers */
roc_ml_reg_write64(&cn10k_mldev->roc, 0, ML_SCRATCH_WORK_PTR);
roc_ml_reg_write64(&cn10k_mldev->roc, 0, ML_SCRATCH_FW_CTRL);
roc_ml_reg_write64(&cn10k_mldev->roc, 0, ML_SCRATCH_DBG_BUFFER_HEAD_C0);
roc_ml_reg_write64(&cn10k_mldev->roc, 0, ML_SCRATCH_DBG_BUFFER_TAIL_C0);
roc_ml_reg_write64(&cn10k_mldev->roc, 0, ML_SCRATCH_DBG_BUFFER_HEAD_C1);
roc_ml_reg_write64(&cn10k_mldev->roc, 0, ML_SCRATCH_DBG_BUFFER_TAIL_C1);
/* Reset ML_MLR_BASE */
roc_ml_reg_write64(&cn10k_mldev->roc, 0, ML_MLR_BASE);
plt_ml_dbg("ML_MLR_BASE = 0x%016lx", roc_ml_reg_read64(&cn10k_mldev->roc, ML_MLR_BASE));
return 0;
}
int
cn10k_ml_dev_start(struct cnxk_ml_dev *cnxk_mldev)
{
struct cn10k_ml_dev *cn10k_mldev;
uint64_t reg_val64;
cn10k_mldev = &cnxk_mldev->cn10k_mldev;
reg_val64 = roc_ml_reg_read64(&cn10k_mldev->roc, ML_CFG);
reg_val64 |= ROC_ML_CFG_ENA;
roc_ml_reg_write64(&cn10k_mldev->roc, reg_val64, ML_CFG);
plt_ml_dbg("ML_CFG => 0x%016lx", roc_ml_reg_read64(&cn10k_mldev->roc, ML_CFG));
return 0;
}
int
cn10k_ml_dev_stop(struct cnxk_ml_dev *cnxk_mldev)
{
struct cn10k_ml_dev *cn10k_mldev;
uint64_t reg_val64;
cn10k_mldev = &cnxk_mldev->cn10k_mldev;
reg_val64 = roc_ml_reg_read64(&cn10k_mldev->roc, ML_CFG);
reg_val64 &= ~ROC_ML_CFG_ENA;
roc_ml_reg_write64(&cn10k_mldev->roc, reg_val64, ML_CFG);
plt_ml_dbg("ML_CFG => 0x%016lx", roc_ml_reg_read64(&cn10k_mldev->roc, ML_CFG));
return 0;
}
int
cn10k_ml_dev_dump(struct cnxk_ml_dev *cnxk_mldev, FILE *fp)
{
struct cn10k_ml_dev *cn10k_mldev;
struct cn10k_ml_fw *fw;
uint32_t head_loc;
uint32_t tail_loc;
uint32_t bufsize;
char *head_ptr;
int core_id;
cn10k_mldev = &cnxk_mldev->cn10k_mldev;
fw = &cn10k_mldev->fw;
/* Dump OCM state */
cn10k_ml_ocm_print(cnxk_mldev, fp);
if (roc_env_is_asim())
return 0;
/* Dump debug buffer */
for (core_id = 0; core_id <= 1; core_id++) {
bufsize = fw->req->cn10k_req.jd.fw_load.debug.debug_buffer_size;
if (core_id == 0) {
head_loc =
roc_ml_reg_read64(&cn10k_mldev->roc, ML_SCRATCH_DBG_BUFFER_HEAD_C0);
tail_loc =
roc_ml_reg_read64(&cn10k_mldev->roc, ML_SCRATCH_DBG_BUFFER_TAIL_C0);
head_ptr =
PLT_PTR_CAST(fw->req->cn10k_req.jd.fw_load.debug.core0_debug_ptr);
head_ptr = roc_ml_addr_mlip2ap(&cn10k_mldev->roc, head_ptr);
} else {
head_loc =
roc_ml_reg_read64(&cn10k_mldev->roc, ML_SCRATCH_DBG_BUFFER_HEAD_C1);
tail_loc =
roc_ml_reg_read64(&cn10k_mldev->roc, ML_SCRATCH_DBG_BUFFER_TAIL_C1);
head_ptr =
PLT_PTR_CAST(fw->req->cn10k_req.jd.fw_load.debug.core1_debug_ptr);
head_ptr = roc_ml_addr_mlip2ap(&cn10k_mldev->roc, head_ptr);
}
if (head_loc < tail_loc) {
fprintf(fp, "%.*s\n", tail_loc - head_loc, &head_ptr[head_loc]);
} else if (head_loc >= tail_loc + 1) {
fprintf(fp, "%.*s\n", bufsize - tail_loc, &head_ptr[head_loc]);
fprintf(fp, "%.*s\n", tail_loc, &head_ptr[0]);
}
}
/* Dump exception info */
for (core_id = 0; core_id <= 1; core_id++) {
bufsize = fw->req->cn10k_req.jd.fw_load.debug.exception_state_size;
if ((core_id == 0) &&
(roc_ml_reg_read64(&cn10k_mldev->roc, ML_SCRATCH_EXCEPTION_SP_C0) != 0)) {
head_ptr = PLT_PTR_CAST(
fw->req->cn10k_req.jd.fw_load.debug.core0_exception_buffer);
fprintf(fp, "ML_SCRATCH_EXCEPTION_SP_C0 = 0x%016lx",
roc_ml_reg_read64(&cn10k_mldev->roc, ML_SCRATCH_EXCEPTION_SP_C0));
head_ptr = roc_ml_addr_mlip2ap(&cn10k_mldev->roc, head_ptr);
fprintf(fp, "%.*s", bufsize, head_ptr);
} else if ((core_id == 1) && (roc_ml_reg_read64(&cn10k_mldev->roc,
ML_SCRATCH_EXCEPTION_SP_C1) != 0)) {
head_ptr = PLT_PTR_CAST(
fw->req->cn10k_req.jd.fw_load.debug.core1_exception_buffer);
fprintf(fp, "ML_SCRATCH_EXCEPTION_SP_C1 = 0x%016lx",
roc_ml_reg_read64(&cn10k_mldev->roc, ML_SCRATCH_EXCEPTION_SP_C1));
head_ptr = roc_ml_addr_mlip2ap(&cn10k_mldev->roc, head_ptr);
fprintf(fp, "%.*s", bufsize, head_ptr);
}
}
return 0;
}
int
cn10k_ml_dev_selftest(struct cnxk_ml_dev *cnxk_mldev)
{
struct cn10k_ml_dev *cn10k_mldev;
const struct plt_memzone *mz;
struct cnxk_ml_req *req;
uint64_t timeout_cycle;
bool timeout;
int ret;
cn10k_mldev = &cnxk_mldev->cn10k_mldev;
mz = plt_memzone_reserve_aligned("dev_selftest", sizeof(struct cnxk_ml_req), 0,
ML_CN10K_ALIGN_SIZE);
if (mz == NULL) {
plt_err("Could not allocate reserved memzone");
return -ENOMEM;
}
req = mz->addr;
/* Prepare load completion structure */
memset(&req->cn10k_req.jd, 0, sizeof(struct cn10k_ml_jd));
req->cn10k_req.jd.hdr.jce.w1.u64 = PLT_U64_CAST(&req->cn10k_req.status);
req->cn10k_req.jd.hdr.job_type = ML_CN10K_JOB_TYPE_FIRMWARE_SELFTEST;
req->cn10k_req.jd.hdr.result =
roc_ml_addr_ap2mlip(&cn10k_mldev->roc, &req->cn10k_req.result);
req->cn10k_req.jd.fw_load.flags = cn10k_ml_fw_flags_get(&cn10k_mldev->fw);
plt_write64(ML_CNXK_POLL_JOB_START, &req->cn10k_req.status);
plt_wmb();
/* Enqueue firmware selftest request through scratch registers */
timeout = true;
timeout_cycle = plt_tsc_cycles() + ML_CNXK_CMD_TIMEOUT * plt_tsc_hz();
roc_ml_scratch_enqueue(&cn10k_mldev->roc, &req->cn10k_req.jd);
plt_rmb();
do {
if (roc_ml_scratch_is_done_bit_set(&cn10k_mldev->roc) &&
(plt_read64(&req->cn10k_req.status) == ML_CNXK_POLL_JOB_FINISH)) {
timeout = false;
break;
}
} while (plt_tsc_cycles() < timeout_cycle);
/* Check firmware selftest status, clean-up and exit */
ret = 0;
if (timeout) {
ret = -ETIME;
} else {
if (req->cn10k_req.result.error_code != 0)
ret = -1;
}
plt_memzone_free(mz);
return ret;
}
int
cn10k_ml_layer_load(void *device, uint16_t model_id, const char *layer_name, uint8_t *buffer,
size_t size, uint16_t *index)
{
struct cn10k_ml_model_metadata *metadata;
struct cnxk_ml_dev *cnxk_mldev;
struct cnxk_ml_model *model;
struct cnxk_ml_layer *layer;
char str[RTE_MEMZONE_NAMESIZE];
const struct plt_memzone *mz;
size_t layer_object_size = 0;
size_t layer_scratch_size;
size_t layer_xstats_size;
uint8_t *base_dma_addr;
uint16_t scratch_pages;
uint16_t layer_id;
uint16_t wb_pages;
uint64_t mz_size;
uint16_t idx;
int qp_id;
int ret;
PLT_SET_USED(size);
cnxk_mldev = (struct cnxk_ml_dev *)device;
if (cnxk_mldev == NULL) {
plt_err("Invalid device = %p", device);
return -EINVAL;
}
model = cnxk_mldev->mldev->data->models[model_id];
if (model == NULL) {
plt_err("Invalid model_id = %u", model_id);
return -EINVAL;
}
ret = cn10k_ml_model_get_layer_id(model, layer_name, &layer_id);
if (ret != 0)
return ret;
layer = &model->layer[layer_id];
ret = cn10k_ml_model_metadata_check(buffer, size);
if (ret != 0)
return ret;
/* Get index */
for (idx = 0; idx < cnxk_mldev->max_nb_layers; idx++) {
if (!cnxk_mldev->index_map[idx].active) {
layer->index = idx;
break;
}
}
if (idx >= cnxk_mldev->max_nb_layers) {
plt_err("No slots available for model layers, model_id = %u, layer_id = %u",
model->model_id, layer_id);
return -1;
}
layer->model = model;
/* Get WB and scratch pages, check if model can be loaded. */
ret = cn10k_ml_model_ocm_pages_count(cnxk_mldev, layer, buffer, &wb_pages, &scratch_pages);
if (ret < 0)
return ret;
/* Compute layer memzone size */
metadata = (struct cn10k_ml_model_metadata *)buffer;
layer_object_size = metadata->init_model.file_size + metadata->main_model.file_size +
metadata->finish_model.file_size + metadata->weights_bias.file_size;
layer_object_size = PLT_ALIGN_CEIL(layer_object_size, ML_CN10K_ALIGN_SIZE);
layer_scratch_size = PLT_ALIGN_CEIL(metadata->model.ddr_scratch_range_end -
metadata->model.ddr_scratch_range_start + 1,
ML_CN10K_ALIGN_SIZE);
layer_xstats_size = (cnxk_mldev->mldev->data->nb_queue_pairs + 1) *
sizeof(struct cn10k_ml_layer_xstats);
/* Allocate memzone for model data */
mz_size = layer_object_size + layer_scratch_size +
PLT_ALIGN_CEIL(sizeof(struct cnxk_ml_req), ML_CN10K_ALIGN_SIZE) +
layer_xstats_size;
snprintf(str, RTE_MEMZONE_NAMESIZE, "%s_%u_%u", CN10K_ML_LAYER_MEMZONE_NAME,
model->model_id, layer_id);
mz = plt_memzone_reserve_aligned(str, mz_size, 0, ML_CN10K_ALIGN_SIZE);
if (!mz) {
plt_err("plt_memzone_reserve failed : %s", str);
return -ENOMEM;
}
/* Copy metadata to internal buffer */
rte_memcpy(&layer->glow.metadata, buffer, sizeof(struct cn10k_ml_model_metadata));
cn10k_ml_model_metadata_update(&layer->glow.metadata);
/* Set layer name */
rte_memcpy(layer->name, layer->glow.metadata.model.name, MRVL_ML_MODEL_NAME_LEN);
/* Enable support for batch_size of 256 */
if (layer->glow.metadata.model.batch_size == 0)
layer->batch_size = 256;
else
layer->batch_size = layer->glow.metadata.model.batch_size;
/* Set DMA base address */
base_dma_addr = mz->addr;
cn10k_ml_layer_addr_update(layer, buffer, base_dma_addr);
/* Set scratch base address */
layer->glow.addr.scratch_base_addr = PLT_PTR_ADD(base_dma_addr, layer_object_size);
/* Update internal I/O data structure */
cn10k_ml_layer_io_info_set(&layer->info, &layer->glow.metadata);
/* Initialize model_mem_map */
memset(&layer->glow.ocm_map, 0, sizeof(struct cn10k_ml_ocm_layer_map));
layer->glow.ocm_map.ocm_reserved = false;
layer->glow.ocm_map.tilemask = 0;
layer->glow.ocm_map.wb_page_start = -1;
layer->glow.ocm_map.wb_pages = wb_pages;
layer->glow.ocm_map.scratch_pages = scratch_pages;
/* Set slow-path request address and state */
layer->glow.req = PLT_PTR_ADD(mz->addr, layer_object_size + layer_scratch_size);
/* Reset burst and sync stats */
layer->glow.burst_xstats = PLT_PTR_ADD(
layer->glow.req, PLT_ALIGN_CEIL(sizeof(struct cnxk_ml_req), ML_CN10K_ALIGN_SIZE));
for (qp_id = 0; qp_id < cnxk_mldev->mldev->data->nb_queue_pairs + 1; qp_id++) {
layer->glow.burst_xstats[qp_id].hw_latency_tot = 0;
layer->glow.burst_xstats[qp_id].hw_latency_min = UINT64_MAX;
layer->glow.burst_xstats[qp_id].hw_latency_max = 0;
layer->glow.burst_xstats[qp_id].fw_latency_tot = 0;
layer->glow.burst_xstats[qp_id].fw_latency_min = UINT64_MAX;
layer->glow.burst_xstats[qp_id].fw_latency_max = 0;
layer->glow.burst_xstats[qp_id].hw_reset_count = 0;
layer->glow.burst_xstats[qp_id].fw_reset_count = 0;
layer->glow.burst_xstats[qp_id].dequeued_count = 0;
}
layer->glow.sync_xstats =
PLT_PTR_ADD(layer->glow.burst_xstats, cnxk_mldev->mldev->data->nb_queue_pairs *
sizeof(struct cn10k_ml_layer_xstats));
/* Update xstats names */
cn10k_ml_xstats_layer_name_update(cnxk_mldev, model_id, layer_id);
layer->state = ML_CNXK_LAYER_STATE_LOADED;
cnxk_mldev->index_map[idx].model_id = model->model_id;
cnxk_mldev->index_map[idx].layer_id = layer_id;
cnxk_mldev->index_map[idx].active = true;
*index = idx;
return 0;
}
int
cn10k_ml_model_load(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_model_params *params,
struct cnxk_ml_model *model)
{
struct cnxk_ml_layer *layer;
int ret;
/* Metadata check */
ret = cn10k_ml_model_metadata_check(params->addr, params->size);
if (ret != 0)
return ret;
/* Set model sub type */
model->subtype = ML_CNXK_MODEL_SUBTYPE_GLOW_MRVL;
/* Copy metadata to internal buffer */
rte_memcpy(&model->glow.metadata, params->addr, sizeof(struct cn10k_ml_model_metadata));
cn10k_ml_model_metadata_update(&model->glow.metadata);
/* Set model name */
rte_memcpy(model->name, (char *)model->glow.metadata.model.name, 64);
/* Enable support for batch_size of 256 */
if (model->glow.metadata.model.batch_size == 0)
model->batch_size = 256;
else
model->batch_size = model->glow.metadata.model.batch_size;
/* Since the number of layers that the driver would be handling for glow models is
* always 1. consider the entire model as a model with single layer. This would
* ignore the num_layers from metadata.
*/
model->nb_layers = 1;
/* Load layer and get the index */
layer = &model->layer[0];
layer->type = ML_CNXK_LAYER_TYPE_MRVL;
ret = cn10k_ml_layer_load(cnxk_mldev, model->model_id, NULL, params->addr, params->size,
&layer->index);
if (ret != 0) {
plt_err("Model layer load failed: model_id = %u, layer_id = %u", model->model_id,
0);
return ret;
}
cn10k_ml_model_info_set(cnxk_mldev, model, &model->layer[0].info, &model->glow.metadata);
/* Set fast-path functions */
model->enqueue_single = cn10k_ml_enqueue_single;
model->result_update = cn10k_ml_result_update;
model->set_error_code = cn10k_ml_set_error_code;
model->set_poll_addr = cn10k_ml_set_poll_addr;
return 0;
}
int
cn10k_ml_layer_unload(void *device, uint16_t model_id, const char *layer_name)
{
struct cnxk_ml_dev *cnxk_mldev;
struct cnxk_ml_model *model;
struct cnxk_ml_layer *layer;
char str[RTE_MEMZONE_NAMESIZE];
uint16_t layer_id;
int ret;
cnxk_mldev = (struct cnxk_ml_dev *)device;
if (cnxk_mldev == NULL) {
plt_err("Invalid device = %p", device);
return -EINVAL;
}
model = cnxk_mldev->mldev->data->models[model_id];
if (model == NULL) {
plt_err("Invalid model_id = %u", model_id);
return -EINVAL;
}
ret = cn10k_ml_model_get_layer_id(model, layer_name, &layer_id);
if (ret != 0)
return ret;
layer = &model->layer[layer_id];
snprintf(str, RTE_MEMZONE_NAMESIZE, "%s_%u_%u", CN10K_ML_LAYER_MEMZONE_NAME,
model->model_id, layer_id);
ret = plt_memzone_free(plt_memzone_lookup(str));
layer->state = ML_CNXK_LAYER_STATE_UNKNOWN;
cnxk_mldev->index_map[layer->index].active = false;
return ret;
}
int
cn10k_ml_model_unload(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_model *model)
{
return cn10k_ml_layer_unload(cnxk_mldev, model->model_id, NULL);
}
int
cn10k_ml_layer_start(void *device, uint16_t model_id, const char *layer_name)
{
struct cn10k_ml_dev *cn10k_mldev;
struct cnxk_ml_dev *cnxk_mldev;
struct cnxk_ml_model *model;
struct cnxk_ml_layer *layer;
struct cn10k_ml_ocm *ocm;
struct cnxk_ml_req *req;
uint16_t layer_id;
bool job_enqueued;
bool job_dequeued;
uint8_t num_tiles;
uint64_t tilemask;
int wb_page_start;
int tile_start;
int tile_end;
bool locked;
int ret = 0;
cnxk_mldev = (struct cnxk_ml_dev *)device;
if (cnxk_mldev == NULL) {
plt_err("Invalid device = %p", device);
return -EINVAL;
}
model = cnxk_mldev->mldev->data->models[model_id];
if (model == NULL) {
plt_err("Invalid model_id = %u", model_id);
return -EINVAL;
}
ret = cn10k_ml_model_get_layer_id(model, layer_name, &layer_id);
if (ret != 0)
return ret;
layer = &model->layer[layer_id];
cn10k_mldev = &cnxk_mldev->cn10k_mldev;
ocm = &cn10k_mldev->ocm;
/* Prepare JD */
req = layer->glow.req;
cn10k_ml_prep_sp_job_descriptor(cnxk_mldev, layer, req, ML_CN10K_JOB_TYPE_MODEL_START);
req->cn10k_req.result.error_code = 0x0;
req->cn10k_req.result.user_ptr = NULL;
plt_write64(ML_CNXK_POLL_JOB_START, &req->cn10k_req.status);
plt_wmb();
num_tiles = layer->glow.metadata.model.tile_end - layer->glow.metadata.model.tile_start + 1;
locked = false;
while (!locked) {
if (plt_spinlock_trylock(&model->lock) != 0) {
if (layer->state == ML_CNXK_LAYER_STATE_STARTED) {
plt_ml_dbg("Layer already started, model_id = %u, layer_id = %u",
model->model_id, layer_id);
plt_spinlock_unlock(&model->lock);
return 1;
}
if (layer->state == ML_CNXK_LAYER_STATE_JOB_ACTIVE) {
plt_err("A slow-path job is active for the model_id = %u",
model->model_id);
plt_spinlock_unlock(&model->lock);
return -EBUSY;
}
layer->state = ML_CNXK_LAYER_STATE_JOB_ACTIVE;
plt_spinlock_unlock(&model->lock);
locked = true;
}
}
while (!layer->glow.ocm_map.ocm_reserved) {
if (plt_spinlock_trylock(&ocm->lock) != 0) {
wb_page_start = cn10k_ml_ocm_tilemask_find(
cnxk_mldev, num_tiles, layer->glow.ocm_map.wb_pages,
layer->glow.ocm_map.scratch_pages, &tilemask);
if (wb_page_start == -1) {
plt_err("Free pages not available on OCM tiles");
plt_err("Failed to start layer, model_id = %u, layer_id = %u",
model->model_id, layer_id);
plt_spinlock_unlock(&ocm->lock);
return -ENOMEM;
}
layer->glow.ocm_map.tilemask = tilemask;
layer->glow.ocm_map.wb_page_start = wb_page_start;
cn10k_ml_ocm_reserve_pages(
cnxk_mldev, model->model_id, layer_id, layer->glow.ocm_map.tilemask,
layer->glow.ocm_map.wb_page_start, layer->glow.ocm_map.wb_pages,
layer->glow.ocm_map.scratch_pages);
layer->glow.ocm_map.ocm_reserved = true;
plt_spinlock_unlock(&ocm->lock);
}
}
/* Update JD */
cn10k_ml_ocm_tilecount(layer->glow.ocm_map.tilemask, &tile_start, &tile_end);
req->cn10k_req.jd.model_start.tilemask = GENMASK_ULL(tile_end, tile_start);
req->cn10k_req.jd.model_start.ocm_wb_base_address =
layer->glow.ocm_map.wb_page_start * ocm->page_size;
job_enqueued = false;
job_dequeued = false;
do {
if (!job_enqueued) {
req->timeout = plt_tsc_cycles() + ML_CNXK_CMD_TIMEOUT * plt_tsc_hz();
job_enqueued =
roc_ml_scratch_enqueue(&cn10k_mldev->roc, &req->cn10k_req.jd);
}
if (job_enqueued && !job_dequeued)
job_dequeued =
roc_ml_scratch_dequeue(&cn10k_mldev->roc, &req->cn10k_req.jd);
if (job_dequeued)
break;
} while (plt_tsc_cycles() < req->timeout);
if (job_dequeued) {
if (plt_read64(&req->cn10k_req.status) == ML_CNXK_POLL_JOB_FINISH) {
if (req->cn10k_req.result.error_code == 0)
ret = 0;
else
ret = -1;
}
} else { /* Reset scratch registers */
roc_ml_scratch_queue_reset(&cn10k_mldev->roc);
ret = -ETIME;
}
locked = false;
while (!locked) {
if (plt_spinlock_trylock(&model->lock) != 0) {
if (ret == 0)
layer->state = ML_CNXK_LAYER_STATE_STARTED;
else
layer->state = ML_CNXK_LAYER_STATE_UNKNOWN;
plt_spinlock_unlock(&model->lock);
locked = true;
}
}
if (layer->state == ML_CNXK_LAYER_STATE_UNKNOWN) {
while (layer->glow.ocm_map.ocm_reserved) {
if (plt_spinlock_trylock(&ocm->lock) != 0) {
cn10k_ml_ocm_free_pages(cnxk_mldev, model->model_id, layer_id);
layer->glow.ocm_map.ocm_reserved = false;
layer->glow.ocm_map.tilemask = 0x0;
plt_spinlock_unlock(&ocm->lock);
}
}
}
if (ret < 0) {
cn10k_ml_layer_stop(device, model_id, layer_name);
} else {
if (cn10k_mldev->cache_model_data && model->type == ML_CNXK_MODEL_TYPE_GLOW)
ret = cn10k_ml_cache_model_data(cnxk_mldev, layer);
}
return ret;
}
int
cn10k_ml_model_start(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_model *model)
{
struct cnxk_ml_layer *layer;
int ret;
layer = &model->layer[0];
ret = cn10k_ml_layer_start(cnxk_mldev, model->model_id, layer->name);
if (ret != 0) {
plt_err("CN10K Model start failed, model_id = %u, error = %d", model->model_id,
ret);
return ret;
}
cnxk_mldev->nb_models_started++;
model->state = ML_CNXK_MODEL_STATE_STARTED;
return 0;
}
int
cn10k_ml_layer_stop(void *device, uint16_t model_id, const char *layer_name)
{
struct cn10k_ml_dev *cn10k_mldev;
struct cnxk_ml_dev *cnxk_mldev;
struct cnxk_ml_model *model;
struct cnxk_ml_layer *layer;
struct cn10k_ml_ocm *ocm;
struct cnxk_ml_req *req;
uint16_t layer_id;
bool job_enqueued;
bool job_dequeued;
bool locked;
int ret = 0;
cnxk_mldev = (struct cnxk_ml_dev *)device;
if (cnxk_mldev == NULL) {
plt_err("Invalid device = %p", device);
return -EINVAL;
}
model = cnxk_mldev->mldev->data->models[model_id];
if (model == NULL) {
plt_err("Invalid model_id = %u", model_id);
return -EINVAL;
}
ret = cn10k_ml_model_get_layer_id(model, layer_name, &layer_id);
if (ret != 0)
return ret;
layer = &model->layer[layer_id];
cn10k_mldev = &cnxk_mldev->cn10k_mldev;
ocm = &cn10k_mldev->ocm;
/* Prepare JD */
req = layer->glow.req;
cn10k_ml_prep_sp_job_descriptor(cnxk_mldev, layer, req, ML_CN10K_JOB_TYPE_MODEL_STOP);
req->cn10k_req.result.error_code = 0x0;
req->cn10k_req.result.user_ptr = NULL;
plt_write64(ML_CNXK_POLL_JOB_START, &req->cn10k_req.status);
plt_wmb();
locked = false;
while (!locked) {
if (plt_spinlock_trylock(&model->lock) != 0) {
if (layer->state == ML_CNXK_LAYER_STATE_LOADED) {
plt_ml_dbg("Layer not started, model_id = %u, layer_id = %u",
model->model_id, layer_id);
plt_spinlock_unlock(&model->lock);
return 1;
}
if (layer->state == ML_CNXK_LAYER_STATE_JOB_ACTIVE) {
plt_err("A slow-path job is active for the layer, model_id = %u, layer_id = %u",
model->model_id, layer_id);
plt_spinlock_unlock(&model->lock);
return -EBUSY;
}
layer->state = ML_CNXK_LAYER_STATE_JOB_ACTIVE;
plt_spinlock_unlock(&model->lock);
locked = true;
}
}
while (layer->glow.ocm_map.ocm_reserved) {
if (plt_spinlock_trylock(&ocm->lock) != 0) {
cn10k_ml_ocm_free_pages(cnxk_mldev, model->model_id, layer_id);
layer->glow.ocm_map.ocm_reserved = false;
layer->glow.ocm_map.tilemask = 0x0;
plt_spinlock_unlock(&ocm->lock);
}
}
job_enqueued = false;
job_dequeued = false;
do {
if (!job_enqueued) {
req->timeout = plt_tsc_cycles() + ML_CNXK_CMD_TIMEOUT * plt_tsc_hz();
job_enqueued =
roc_ml_scratch_enqueue(&cn10k_mldev->roc, &req->cn10k_req.jd);
}
if (job_enqueued && !job_dequeued)
job_dequeued =
roc_ml_scratch_dequeue(&cn10k_mldev->roc, &req->cn10k_req.jd);
if (job_dequeued)
break;
} while (plt_tsc_cycles() < req->timeout);
if (job_dequeued) {
if (plt_read64(&req->cn10k_req.status) == ML_CNXK_POLL_JOB_FINISH) {
if (req->cn10k_req.result.error_code == 0x0)
ret = 0;
else
ret = -1;
}
} else {
roc_ml_scratch_queue_reset(&cn10k_mldev->roc);
ret = -ETIME;
}
locked = false;
while (!locked) {
if (plt_spinlock_trylock(&model->lock) != 0) {
if (ret == 0)
layer->state = ML_CNXK_LAYER_STATE_LOADED;
else
layer->state = ML_CNXK_LAYER_STATE_UNKNOWN;
plt_spinlock_unlock(&model->lock);
locked = true;
}
}
return ret;
}
int
cn10k_ml_model_stop(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_model *model)
{
struct cnxk_ml_layer *layer;
int ret;
layer = &model->layer[0];
ret = cn10k_ml_layer_stop(cnxk_mldev, model->model_id, layer->name);
if (ret != 0) {
plt_err("CN10K Model stop failed, model_id = %u, error = %d", model->model_id, ret);
return ret;
}
cnxk_mldev->nb_models_stopped++;
model->state = ML_CNXK_MODEL_STATE_LOADED;
return 0;
}
int
cn10k_ml_model_params_update(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_model *model,
void *buffer)
{
struct cnxk_ml_layer *layer;
RTE_SET_USED(cnxk_mldev);
if (model->state == ML_CNXK_MODEL_STATE_UNKNOWN)
return -1;
else if (model->state != ML_CNXK_MODEL_STATE_LOADED)
return -EBUSY;
layer = &model->layer[0];
/* Update model weights & bias */
rte_memcpy(layer->glow.addr.wb_load_addr, buffer,
layer->glow.metadata.weights_bias.file_size);
return 0;
}
__rte_hot void
cn10k_ml_result_update(struct cnxk_ml_dev *cnxk_mldev, int qp_id, void *request)
{
union cn10k_ml_error_code *error_code;
struct cn10k_ml_layer_xstats *xstats;
struct cn10k_ml_dev *cn10k_mldev;
struct cn10k_ml_result *result;
struct cnxk_ml_model *model;
struct cnxk_ml_layer *layer;
struct cnxk_ml_req *req;
struct cnxk_ml_qp *qp;
struct rte_ml_op *op;
uint64_t hw_latency;
uint64_t fw_latency;
uint16_t model_id;
uint16_t layer_id;
uint16_t idx;
req = (struct cnxk_ml_req *)request;
result = &req->cn10k_req.result;
op = req->op;
if (likely(result->error_code == 0)) {
idx = req->cn10k_req.jd.hdr.model_id;
model_id = cnxk_mldev->index_map[idx].model_id;
layer_id = cnxk_mldev->index_map[idx].layer_id;
model = cnxk_mldev->mldev->data->models[model_id];
layer = &model->layer[layer_id];
if (likely(qp_id >= 0)) {
qp = cnxk_mldev->mldev->data->queue_pairs[qp_id];
qp->stats.dequeued_count++;
xstats = &layer->glow.burst_xstats[qp_id];
} else {
xstats = layer->glow.sync_xstats;
}
if (unlikely(xstats->dequeued_count == xstats->hw_reset_count)) {
xstats->hw_latency_min = UINT64_MAX;
xstats->hw_latency_max = 0;
}
if (unlikely(xstats->dequeued_count == xstats->fw_reset_count)) {
xstats->fw_latency_min = UINT64_MAX;
xstats->fw_latency_max = 0;
}
hw_latency = result->stats.hw_end - result->stats.hw_start;
fw_latency = result->stats.fw_end - result->stats.fw_start - hw_latency;
xstats->hw_latency_tot += hw_latency;
xstats->hw_latency_min = PLT_MIN(xstats->hw_latency_min, hw_latency);
xstats->hw_latency_max = PLT_MAX(xstats->hw_latency_max, hw_latency);
xstats->fw_latency_tot += fw_latency;
xstats->fw_latency_min = PLT_MIN(xstats->fw_latency_min, fw_latency);
xstats->fw_latency_max = PLT_MAX(xstats->fw_latency_max, fw_latency);
xstats->dequeued_count++;
op->impl_opaque = result->error_code;
op->status = RTE_ML_OP_STATUS_SUCCESS;
} else {
if (likely(qp_id >= 0)) {
qp = cnxk_mldev->mldev->data->queue_pairs[qp_id];
qp->stats.dequeue_err_count++;
}
/* Handle driver error */
error_code = (union cn10k_ml_error_code *)&result->error_code;
if (error_code->s.etype == ML_CNXK_ETYPE_DRIVER) {
cn10k_mldev = &cnxk_mldev->cn10k_mldev;
/* Check for exception */
if ((roc_ml_reg_read64(&cn10k_mldev->roc, ML_SCRATCH_EXCEPTION_SP_C0) !=
0) ||
(roc_ml_reg_read64(&cn10k_mldev->roc, ML_SCRATCH_EXCEPTION_SP_C1) != 0))
error_code->s.stype = ML_CN10K_DRIVER_ERR_EXCEPTION;
else if ((roc_ml_reg_read64(&cn10k_mldev->roc, ML_CORE_INT_LO) != 0) ||
(roc_ml_reg_read64(&cn10k_mldev->roc, ML_CORE_INT_HI) != 0))
error_code->s.stype = ML_CN10K_DRIVER_ERR_FW_ERROR;
else
error_code->s.stype = ML_CN10K_DRIVER_ERR_UNKNOWN;
}
op->impl_opaque = result->error_code;
op->status = RTE_ML_OP_STATUS_ERROR;
}
op->user_ptr = result->user_ptr;
}
__rte_hot void
cn10k_ml_set_error_code(struct cnxk_ml_req *req, uint64_t etype, uint64_t stype)
{
union cn10k_ml_error_code *error_code;
error_code = (union cn10k_ml_error_code *)&req->cn10k_req.result.error_code;
error_code->s.etype = etype;
error_code->s.stype = stype;
}
__rte_hot bool
cn10k_ml_enqueue_single(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_op *op, uint16_t layer_id,
struct cnxk_ml_qp *qp, uint64_t head)
{
union cn10k_ml_error_code *error_code;
struct cn10k_ml_dev *cn10k_mldev;
struct cnxk_ml_model *model;
struct cnxk_ml_queue *queue;
struct cnxk_ml_req *req;
cn10k_mldev = &cnxk_mldev->cn10k_mldev;
queue = &qp->queue;
req = &queue->reqs[head];
model = cnxk_mldev->mldev->data->models[op->model_id];
model->set_poll_addr(req);
cn10k_ml_prep_fp_job_descriptor(cnxk_mldev, req, model->layer[layer_id].index,
op->input[0]->addr, op->output[0]->addr, op->nb_batches);
memset(&req->cn10k_req.result, 0, sizeof(struct cn10k_ml_result));
error_code = (union cn10k_ml_error_code *)&req->cn10k_req.result.error_code;
error_code->s.etype = ML_CNXK_ETYPE_UNKNOWN;
req->cn10k_req.result.user_ptr = op->user_ptr;
cnxk_ml_set_poll_ptr(req);
if (unlikely(!cn10k_mldev->ml_jcmdq_enqueue(&cn10k_mldev->roc, &req->cn10k_req.jcmd)))
return false;
req->timeout = plt_tsc_cycles() + queue->wait_cycles;
req->op = op;
return true;
}
__rte_hot int
cn10k_ml_op_error_get(struct rte_ml_dev *dev, struct rte_ml_op *op, struct rte_ml_op_error *error)
{
union cn10k_ml_error_code *error_code;
PLT_SET_USED(dev);
error_code = (union cn10k_ml_error_code *)&op->impl_opaque;
/* Copy sub error message */
if (error_code->s.etype == ML_CNXK_ETYPE_HW_NONFATAL) {
if (error_code->s.stype < PLT_DIM(ml_stype_db_hw_nf))
snprintf(error->message, RTE_ML_STR_MAX, "%s : %s",
ml_etype_db[error_code->s.etype].str,
ml_stype_db_hw_nf[error_code->s.stype].str);
else
snprintf(error->message, RTE_ML_STR_MAX, "%s : UNKNOWN ERROR",
ml_etype_db[error_code->s.etype].str);
} else if (error_code->s.etype == ML_CNXK_ETYPE_DRIVER) {
snprintf(error->message, RTE_ML_STR_MAX, "%s : %s",
ml_etype_db[error_code->s.etype].str,
ml_stype_db_driver[error_code->s.stype].str);
} else {
snprintf(error->message, RTE_ML_STR_MAX, "%s",
ml_etype_db[error_code->s.etype].str);
}
error->errcode = error_code->u64;
return 0;
}
__rte_hot int
cn10k_ml_inference_sync(void *device, uint16_t index, void *input, void *output,
uint16_t nb_batches)
{
union cn10k_ml_error_code *error_code;
struct cn10k_ml_dev *cn10k_mldev;
struct cnxk_ml_dev *cnxk_mldev;
struct cnxk_ml_model *model;
struct cnxk_ml_layer *layer;
struct cnxk_ml_req *req;
struct rte_ml_op op;
uint16_t model_id;
uint16_t layer_id;
bool timeout;
int ret = 0;
cnxk_mldev = (struct cnxk_ml_dev *)device;
cn10k_mldev = &cnxk_mldev->cn10k_mldev;
model_id = cnxk_mldev->index_map[index].model_id;
layer_id = cnxk_mldev->index_map[index].layer_id;
model = cnxk_mldev->mldev->data->models[model_id];
layer = &model->layer[layer_id];
req = layer->glow.req;
op.model_id = index;
op.impl_opaque = 0;
cn10k_ml_set_poll_addr(req);
cn10k_ml_prep_fp_job_descriptor(cnxk_mldev, req, index, input, output, nb_batches);
memset(&req->cn10k_req.result, 0, sizeof(struct cn10k_ml_result));
error_code = (union cn10k_ml_error_code *)&req->cn10k_req.result.error_code;
error_code->s.etype = ML_CNXK_ETYPE_UNKNOWN;
req->cn10k_req.result.user_ptr = NULL;
cnxk_ml_set_poll_ptr(req);
req->cn10k_req.jcmd.w1.s.jobptr = PLT_U64_CAST(&req->cn10k_req.jd);
timeout = true;
req->timeout = plt_tsc_cycles() + ML_CNXK_CMD_TIMEOUT * plt_tsc_hz();
do {
if (cn10k_mldev->ml_jcmdq_enqueue(&cn10k_mldev->roc, &req->cn10k_req.jcmd)) {
req->op = &op;
timeout = false;
break;
}
} while (plt_tsc_cycles() < req->timeout);
if (timeout) {
ret = -EBUSY;
goto error_enqueue;
}
timeout = true;
do {
if (cnxk_ml_get_poll_ptr(req) == ML_CNXK_POLL_JOB_FINISH) {
timeout = false;
break;
}
} while (plt_tsc_cycles() < req->timeout);
if (timeout)
ret = -ETIME;
else
cn10k_ml_result_update(cnxk_mldev, -1, req);
error_enqueue:
return ret;
}
int
cn10k_ml_io_alloc(void *device, uint16_t model_id, const char *layer_name, uint64_t **input_qbuffer,
uint64_t **output_qbuffer)
{
struct cnxk_ml_dev *cnxk_mldev;
struct cnxk_ml_model *model;
struct cnxk_ml_layer *layer;
char str[RTE_MEMZONE_NAMESIZE];
const struct plt_memzone *mz;
uint64_t output_size;
uint64_t input_size;
uint16_t layer_id;
int ret;
cnxk_mldev = (struct cnxk_ml_dev *)device;
if (cnxk_mldev == NULL) {
plt_err("Invalid device = %p", device);
return -EINVAL;
}
model = cnxk_mldev->mldev->data->models[model_id];
if (model == NULL) {
plt_err("Invalid model_id = %u", model_id);
return -EINVAL;
}
ret = cn10k_ml_model_get_layer_id(model, layer_name, &layer_id);
if (ret != 0)
return ret;
layer = &model->layer[layer_id];
input_size = PLT_ALIGN_CEIL(layer->info.total_input_sz_q, ML_CN10K_ALIGN_SIZE);
output_size = PLT_ALIGN_CEIL(layer->info.total_output_sz_q, ML_CN10K_ALIGN_SIZE);
sprintf(str, "cn10k_ml_io_mz_%u_%u", model_id, layer_id);
mz = plt_memzone_reserve_aligned(str, input_size + output_size, 0, ML_CN10K_ALIGN_SIZE);
if (mz == NULL) {
plt_err("io_alloc failed: Unable to allocate memory: model_id = %u, layer_name = %s",
model_id, layer_name);
return -ENOMEM;
}
*input_qbuffer = mz->addr;
*output_qbuffer = PLT_PTR_ADD(mz->addr, input_size);
return 0;
}
int
cn10k_ml_io_free(void *device, uint16_t model_id, const char *layer_name)
{
struct cnxk_ml_dev *cnxk_mldev;
struct cnxk_ml_model *model;
char str[RTE_MEMZONE_NAMESIZE];
const struct plt_memzone *mz;
uint16_t layer_id;
int ret;
cnxk_mldev = (struct cnxk_ml_dev *)device;
if (cnxk_mldev == NULL) {
plt_err("Invalid device = %p", device);
return -EINVAL;
}
model = cnxk_mldev->mldev->data->models[model_id];
if (model == NULL) {
plt_err("Invalid model_id = %u", model_id);
return -EINVAL;
}
ret = cn10k_ml_model_get_layer_id(model, layer_name, &layer_id);
if (ret != 0)
return ret;
sprintf(str, "cn10k_ml_io_mz_%u_%u", model_id, layer_id);
mz = plt_memzone_lookup(str);
if (mz == NULL) {
plt_err("io_free failed: Memzone not found: model_id = %u, layer_name = %s",
model_id, layer_name);
return -EINVAL;
}
return plt_memzone_free(mz);
}
int
cn10k_ml_malloc(const char *name, size_t size, uint32_t align, void **addr)
{
const struct plt_memzone *mz;
mz = plt_memzone_reserve_aligned(name, size, 0, align);
if (mz == NULL) {
plt_err("ml_malloc failed: Unable to allocate memory: name = %s", name);
return -ENOMEM;
}
*addr = mz->addr;
return 0;
}
int
cn10k_ml_free(const char *name)
{
const struct plt_memzone *mz;
mz = plt_memzone_lookup(name);
if (mz == NULL) {
plt_err("ml_free failed: Memzone not found: name = %s", name);
return -EINVAL;
}
return plt_memzone_free(mz);
}