f-stack/dpdk/drivers/ml/cnxk/cn10k_ml_ops.h

346 lines
9.0 KiB
C
Raw Normal View History

2025-01-10 11:50:43 +00:00
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright (c) 2022 Marvell.
*/
#ifndef _CN10K_ML_OPS_H_
#define _CN10K_ML_OPS_H_
#include <rte_mldev.h>
#include <rte_mldev_pmd.h>
#include <roc_api.h>
struct cnxk_ml_dev;
struct cnxk_ml_qp;
struct cnxk_ml_model;
struct cnxk_ml_layer;
struct cnxk_ml_req;
/* Firmware version string length */
#define MLDEV_FIRMWARE_VERSION_LENGTH 32
/* Job types */
enum cn10k_ml_job_type {
ML_CN10K_JOB_TYPE_MODEL_RUN = 0,
ML_CN10K_JOB_TYPE_MODEL_STOP,
ML_CN10K_JOB_TYPE_MODEL_START,
ML_CN10K_JOB_TYPE_FIRMWARE_LOAD,
ML_CN10K_JOB_TYPE_FIRMWARE_SELFTEST,
};
/* Firmware stats */
struct cn10k_ml_stats {
/* Firmware start cycle */
uint64_t fw_start;
/* Firmware end cycle */
uint64_t fw_end;
/* Hardware start cycle */
uint64_t hw_start;
/* Hardware end cycle */
uint64_t hw_end;
};
/* Result structure */
struct cn10k_ml_result {
/* Job error code */
uint64_t error_code;
/* Stats */
struct cn10k_ml_stats stats;
/* User context pointer */
void *user_ptr;
};
/* Firmware capability structure */
union cn10k_ml_fw_cap {
uint64_t u64;
struct {
/* CMPC completion support */
uint64_t cmpc_completions : 1;
/* Poll mode completion support */
uint64_t poll_completions : 1;
/* SSO completion support */
uint64_t sso_completions : 1;
/* Support for model side loading */
uint64_t side_load_model : 1;
/* Batch execution */
uint64_t batch_run : 1;
/* Max number of models to be loaded in parallel */
uint64_t max_models : 8;
/* Firmware statistics */
uint64_t fw_stats : 1;
/* Hardware statistics */
uint64_t hw_stats : 1;
/* Max number of batches */
uint64_t max_num_batches : 16;
uint64_t rsvd : 33;
} s;
};
/* Firmware debug info structure */
struct cn10k_ml_fw_debug {
/* ACC core 0 debug buffer */
uint64_t core0_debug_ptr;
/* ACC core 1 debug buffer */
uint64_t core1_debug_ptr;
/* ACC core 0 exception state buffer */
uint64_t core0_exception_buffer;
/* ACC core 1 exception state buffer */
uint64_t core1_exception_buffer;
/* Debug buffer size per core */
uint32_t debug_buffer_size;
/* Exception state dump size */
uint32_t exception_state_size;
};
/* Job descriptor header (32 bytes) */
struct cn10k_ml_jd_header {
/* Job completion structure */
struct ml_jce_s jce;
/* Model ID */
uint64_t model_id : 8;
/* Job type */
uint64_t job_type : 8;
/* Flags for fast-path jobs */
uint64_t fp_flags : 16;
/* Flags for slow-path jobs */
uint64_t sp_flags : 16;
uint64_t rsvd : 16;
/* Job result pointer */
uint64_t *result;
};
/* Extra arguments for job descriptor */
union cn10k_ml_jd_extended_args {
struct cn10k_ml_jd_extended_args_section_start {
/* DDR Scratch base address */
uint64_t ddr_scratch_base_address;
/* DDR Scratch range start */
uint64_t ddr_scratch_range_start;
/* DDR Scratch range end */
uint64_t ddr_scratch_range_end;
uint8_t rsvd[104];
} start;
};
/* Job descriptor structure */
struct cn10k_ml_jd {
/* Job descriptor header (32 bytes) */
struct cn10k_ml_jd_header hdr;
union {
struct cn10k_ml_jd_section_fw_load {
/* Firmware capability structure (8 bytes) */
union cn10k_ml_fw_cap cap;
/* Firmware version (32 bytes) */
uint8_t version[MLDEV_FIRMWARE_VERSION_LENGTH];
/* Debug capability structure (40 bytes) */
struct cn10k_ml_fw_debug debug;
/* Flags to control error handling */
uint64_t flags;
uint8_t rsvd[8];
} fw_load;
struct cn10k_ml_jd_section_model_start {
/* Extended arguments */
uint64_t extended_args;
/* Destination model start address in DDR relative to ML_MLR_BASE */
uint64_t model_dst_ddr_addr;
/* Offset to model init section in the model */
uint64_t model_init_offset : 32;
/* Size of init section in the model */
uint64_t model_init_size : 32;
/* Offset to model main section in the model */
uint64_t model_main_offset : 32;
/* Size of main section in the model */
uint64_t model_main_size : 32;
/* Offset to model finish section in the model */
uint64_t model_finish_offset : 32;
/* Size of finish section in the model */
uint64_t model_finish_size : 32;
/* Offset to WB in model bin */
uint64_t model_wb_offset : 32;
/* Number of model layers */
uint64_t num_layers : 8;
/* Number of gather entries, 0 means linear input mode (= no gather) */
uint64_t num_gather_entries : 8;
/* Number of scatter entries 0 means linear input mode (= no scatter) */
uint64_t num_scatter_entries : 8;
/* Tile mask to load model */
uint64_t tilemask : 8;
/* Batch size of model */
uint64_t batch_size : 32;
/* OCM WB base address */
uint64_t ocm_wb_base_address : 32;
/* OCM WB range start */
uint64_t ocm_wb_range_start : 32;
/* OCM WB range End */
uint64_t ocm_wb_range_end : 32;
/* DDR WB address */
uint64_t ddr_wb_base_address;
/* DDR WB range start */
uint64_t ddr_wb_range_start : 32;
/* DDR WB range end */
uint64_t ddr_wb_range_end : 32;
union {
/* Points to gather list if num_gather_entries > 0 */
void *gather_list;
struct {
/* Linear input mode */
uint64_t ddr_range_start : 32;
uint64_t ddr_range_end : 32;
} s;
} input;
union {
/* Points to scatter list if num_scatter_entries > 0 */
void *scatter_list;
struct {
/* Linear output mode */
uint64_t ddr_range_start : 32;
uint64_t ddr_range_end : 32;
} s;
} output;
} model_start;
struct cn10k_ml_jd_section_model_stop {
uint8_t rsvd[96];
} model_stop;
struct cn10k_ml_jd_section_model_run {
/* Address of the input for the run relative to ML_MLR_BASE */
uint64_t input_ddr_addr;
/* Address of the output for the run relative to ML_MLR_BASE */
uint64_t output_ddr_addr;
/* Number of batches to run in variable batch processing */
uint16_t num_batches;
uint8_t rsvd[78];
} model_run;
};
} __plt_aligned(ROC_ALIGN);
/* CN10K specific request */
struct cn10k_ml_req {
/* Job descriptor */
struct cn10k_ml_jd jd;
/* Job descriptor extra arguments */
union cn10k_ml_jd_extended_args extended_args;
/* Status field for poll mode requests */
volatile uint64_t status;
/* Job command */
struct ml_job_cmd_s jcmd;
/* Result */
struct cn10k_ml_result result;
};
/* Device ops */
int cn10k_ml_dev_info_get(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_dev_info *dev_info);
int cn10k_ml_dev_configure(struct cnxk_ml_dev *cnxk_mldev, const struct rte_ml_dev_config *conf);
int cn10k_ml_dev_close(struct cnxk_ml_dev *cnxk_mldev);
int cn10k_ml_dev_start(struct cnxk_ml_dev *cnxk_mldev);
int cn10k_ml_dev_stop(struct cnxk_ml_dev *cnxk_mldev);
int cn10k_ml_dev_dump(struct cnxk_ml_dev *cnxk_mldev, FILE *fp);
int cn10k_ml_dev_selftest(struct cnxk_ml_dev *cnxk_mldev);
/* Slow-path ops */
int cn10k_ml_model_load(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_model_params *params,
struct cnxk_ml_model *model);
int cn10k_ml_model_unload(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_model *model);
int cn10k_ml_model_start(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_model *model);
int cn10k_ml_model_stop(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_model *model);
int cn10k_ml_model_params_update(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_model *model,
void *buffer);
/* Fast-path ops */
__rte_hot bool cn10k_ml_enqueue_single(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_op *op,
uint16_t layer_id, struct cnxk_ml_qp *qp, uint64_t head);
__rte_hot int cn10k_ml_op_error_get(struct rte_ml_dev *dev, struct rte_ml_op *op,
struct rte_ml_op_error *error);
__rte_hot int cn10k_ml_inference_sync(void *device, uint16_t index, void *input, void *output,
uint16_t nb_batches);
__rte_hot void cn10k_ml_result_update(struct cnxk_ml_dev *cnxk_mldev, int qp_id, void *request);
__rte_hot void cn10k_ml_set_error_code(struct cnxk_ml_req *req, uint64_t etype, uint64_t stype);
__rte_hot void cn10k_ml_set_poll_addr(struct cnxk_ml_req *req);
/* Misc ops */
void cn10k_ml_qp_initialize(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_qp *qp);
/* Layer ops */
int cn10k_ml_layer_load(void *device, uint16_t model_id, const char *layer_name, uint8_t *buffer,
size_t size, uint16_t *index);
int cn10k_ml_layer_unload(void *device, uint16_t model_id, const char *layer_name);
int cn10k_ml_layer_start(void *device, uint16_t model_id, const char *layer_name);
int cn10k_ml_layer_stop(void *device, uint16_t model_id, const char *layer_name);
int cn10k_ml_io_alloc(void *device, uint16_t model_id, const char *layer_name,
uint64_t **input_qbuffer, uint64_t **output_qbuffer);
int cn10k_ml_io_free(void *device, uint16_t model_id, const char *layer_name);
int cn10k_ml_malloc(const char *name, size_t size, uint32_t align, void **addr);
int cn10k_ml_free(const char *name);
/* xstats ops */
void cn10k_ml_xstat_model_name_set(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_model *model,
uint16_t stat_id, uint16_t entry, char *suffix);
uint64_t cn10k_ml_model_xstat_get(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_layer *layer,
enum cnxk_ml_xstats_type type);
#endif /* _CN10K_ML_OPS_H_ */