f-stack/dpdk/drivers/ml/cnxk/cnxk_ml_ops.c

/* SPDX-License-Identifier: BSD-3-Clause
 * Copyright (c) 2023 Marvell.
 */

#include <rte_mldev.h>
#include <rte_mldev_pmd.h>

#include <mldev_utils.h>

#include "cnxk_ml_dev.h"
#include "cnxk_ml_io.h"
#include "cnxk_ml_model.h"
#include "cnxk_ml_ops.h"

/* ML model macros */
#define CNXK_ML_MODEL_MEMZONE_NAME "ml_cnxk_model_mz"

__rte_hot void
cnxk_ml_set_poll_ptr(struct cnxk_ml_req *req)
{
	plt_write64(ML_CNXK_POLL_JOB_START, req->status);
}

__rte_hot uint64_t
cnxk_ml_get_poll_ptr(struct cnxk_ml_req *req)
{
	return plt_read64(req->status);
}

static void
qp_memzone_name_get(char *name, int size, int dev_id, int qp_id)
{
	snprintf(name, size, "cnxk_ml_qp_mem_%u:%u", dev_id, qp_id);
}

static int
cnxk_ml_qp_destroy(const struct rte_ml_dev *dev, struct cnxk_ml_qp *qp)
{
	const struct rte_memzone *qp_mem;
	char name[RTE_MEMZONE_NAMESIZE];
	int ret;

	qp_memzone_name_get(name, RTE_MEMZONE_NAMESIZE, dev->data->dev_id, qp->id);
	qp_mem = rte_memzone_lookup(name);
	ret = rte_memzone_free(qp_mem);
	if (ret)
		return ret;

	rte_free(qp);

	return 0;
}

static int
cnxk_ml_dev_queue_pair_release(struct rte_ml_dev *dev, uint16_t queue_pair_id)
{
	struct cnxk_ml_qp *qp;
	int ret;

	qp = dev->data->queue_pairs[queue_pair_id];
	if (qp == NULL)
		return -EINVAL;

	ret = cnxk_ml_qp_destroy(dev, qp);
	if (ret) {
		plt_err("Could not destroy queue pair %u", queue_pair_id);
		return ret;
	}

	dev->data->queue_pairs[queue_pair_id] = NULL;

	return 0;
}

static struct cnxk_ml_qp *
cnxk_ml_qp_create(const struct rte_ml_dev *dev, uint16_t qp_id, uint32_t nb_desc, int socket_id)
{
	const struct rte_memzone *qp_mem;
	char name[RTE_MEMZONE_NAMESIZE];
	struct cnxk_ml_dev *cnxk_mldev;
	struct cnxk_ml_qp *qp;
	uint32_t len;
	uint8_t *va;

	cnxk_mldev = dev->data->dev_private;

	/* Allocate queue pair */
	qp = rte_zmalloc_socket("cnxk_ml_pmd_queue_pair", sizeof(struct cnxk_ml_qp), ROC_ALIGN,
				socket_id);
	if (qp == NULL) {
		plt_err("Could not allocate queue pair");
		return NULL;
	}

	/* For request queue */
	len = nb_desc * sizeof(struct cnxk_ml_req);
	qp_memzone_name_get(name, RTE_MEMZONE_NAMESIZE, dev->data->dev_id, qp_id);
	qp_mem = rte_memzone_reserve_aligned(
		name, len, socket_id, RTE_MEMZONE_SIZE_HINT_ONLY | RTE_MEMZONE_256MB, ROC_ALIGN);
	if (qp_mem == NULL) {
		plt_err("Could not reserve memzone: %s", name);
		goto qp_free;
	}

	va = qp_mem->addr;
	memset(va, 0, len);

	/* Initialize Request queue */
	qp->id = qp_id;
	qp->queue.reqs = (struct cnxk_ml_req *)va;
	qp->queue.head = 0;
	qp->queue.tail = 0;
	qp->queue.wait_cycles = ML_CNXK_CMD_TIMEOUT * plt_tsc_hz();
	qp->nb_desc = nb_desc;
	qp->stats.enqueued_count = 0;
	qp->stats.dequeued_count = 0;
	qp->stats.enqueue_err_count = 0;
	qp->stats.dequeue_err_count = 0;

	if (cnxk_mldev->type == CNXK_ML_DEV_TYPE_PCI)
		cn10k_ml_qp_initialize(cnxk_mldev, qp);

	return qp;

qp_free:
	rte_free(qp);

	return NULL;
}

static int
cnxk_ml_xstats_init(struct cnxk_ml_dev *cnxk_mldev)
{
	uint16_t nb_stats;
	uint16_t stat_id;
	uint16_t model;
	uint16_t layer;
	uint16_t i;

	/* Allocate memory for xstats entries. Don't allocate during reconfigure */
	nb_stats = RTE_DIM(device_xstats) +
		   RTE_DIM(layer_xstats) * ML_CNXK_MAX_MODELS * ML_CNXK_MODEL_MAX_LAYERS +
		   RTE_DIM(model_xstats) * ML_CNXK_MAX_MODELS;
	if (cnxk_mldev->xstats.entries == NULL)
		cnxk_mldev->xstats.entries = rte_zmalloc(
			"cnxk_ml_xstats", sizeof(struct cnxk_ml_xstats_entry) * nb_stats,
			PLT_CACHE_LINE_SIZE);

	if (cnxk_mldev->xstats.entries == NULL)
		return -ENOMEM;

	/* Initialize device xstats */
	stat_id = 0;
	for (i = 0; i < RTE_DIM(device_xstats); i++) {
		cnxk_mldev->xstats.entries[stat_id].map.id = stat_id;
		snprintf(cnxk_mldev->xstats.entries[stat_id].map.name,
			 sizeof(cnxk_mldev->xstats.entries[stat_id].map.name), "%s",
			 device_xstats[i].name);

		cnxk_mldev->xstats.entries[stat_id].mode = RTE_ML_DEV_XSTATS_DEVICE;
		cnxk_mldev->xstats.entries[stat_id].group = CNXK_ML_XSTATS_GROUP_DEVICE;
		cnxk_mldev->xstats.entries[stat_id].type = device_xstats[i].type;
		cnxk_mldev->xstats.entries[stat_id].fn_id = CNXK_ML_XSTATS_FN_DEVICE;
		cnxk_mldev->xstats.entries[stat_id].obj_idx = 0;
		cnxk_mldev->xstats.entries[stat_id].reset_allowed = device_xstats[i].reset_allowed;
		stat_id++;
	}
	cnxk_mldev->xstats.count_mode_device = stat_id;

	/* Initialize model xstats */
	for (model = 0; model < ML_CNXK_MAX_MODELS; model++) {
		cnxk_mldev->xstats.offset_for_model[model] = stat_id;

		for (i = 0; i < RTE_DIM(model_xstats); i++) {
			cnxk_mldev->xstats.entries[stat_id].map.id = stat_id;
			cnxk_mldev->xstats.entries[stat_id].mode = RTE_ML_DEV_XSTATS_MODEL;
			cnxk_mldev->xstats.entries[stat_id].group = CNXK_ML_XSTATS_GROUP_MODEL;
			cnxk_mldev->xstats.entries[stat_id].type = model_xstats[i].type;
			cnxk_mldev->xstats.entries[stat_id].fn_id = CNXK_ML_XSTATS_FN_MODEL;
			cnxk_mldev->xstats.entries[stat_id].obj_idx = model;
			cnxk_mldev->xstats.entries[stat_id].layer_id = -1;
			cnxk_mldev->xstats.entries[stat_id].reset_allowed =
				model_xstats[i].reset_allowed;

			/* Name of xstat is updated during model load */
			snprintf(cnxk_mldev->xstats.entries[stat_id].map.name,
				 sizeof(cnxk_mldev->xstats.entries[stat_id].map.name),
				 "Model-%u-%s", model, model_xstats[i].name);

			stat_id++;
		}

		for (layer = 0; layer < ML_CNXK_MODEL_MAX_LAYERS; layer++) {
			cnxk_mldev->xstats.offset_for_layer[model][layer] = stat_id;

			for (i = 0; i < RTE_DIM(layer_xstats); i++) {
				cnxk_mldev->xstats.entries[stat_id].map.id = stat_id;
				cnxk_mldev->xstats.entries[stat_id].mode = RTE_ML_DEV_XSTATS_MODEL;
				cnxk_mldev->xstats.entries[stat_id].group =
					CNXK_ML_XSTATS_GROUP_LAYER;
				cnxk_mldev->xstats.entries[stat_id].type = layer_xstats[i].type;
				cnxk_mldev->xstats.entries[stat_id].fn_id = CNXK_ML_XSTATS_FN_MODEL;
				cnxk_mldev->xstats.entries[stat_id].obj_idx = model;
				cnxk_mldev->xstats.entries[stat_id].layer_id = layer;
				cnxk_mldev->xstats.entries[stat_id].reset_allowed =
					layer_xstats[i].reset_allowed;

				/* Name of xstat is updated during model load */
				snprintf(cnxk_mldev->xstats.entries[stat_id].map.name,
					 sizeof(cnxk_mldev->xstats.entries[stat_id].map.name),
					 "Layer-%u-%u-%s", model, layer, layer_xstats[i].name);

				stat_id++;
			}

			cnxk_mldev->xstats.count_per_layer[model][layer] = RTE_DIM(layer_xstats);
		}

		cnxk_mldev->xstats.count_per_model[model] =
			RTE_DIM(layer_xstats) + ML_CNXK_MODEL_MAX_LAYERS * RTE_DIM(model_xstats);
	}

	cnxk_mldev->xstats.count_mode_model = stat_id - cnxk_mldev->xstats.count_mode_device;
	cnxk_mldev->xstats.count = stat_id;

	return 0;
}

void
cnxk_ml_xstats_model_name_update(struct cnxk_ml_dev *cnxk_mldev, uint16_t model_id)
{
	struct cnxk_ml_model *model;
	uint16_t rclk_freq;
	uint16_t sclk_freq;
	uint16_t stat_id;
	char suffix[8];
	uint16_t i;

	model = cnxk_mldev->mldev->data->models[model_id];
	stat_id = cnxk_mldev->xstats.offset_for_model[model_id];

	roc_clk_freq_get(&rclk_freq, &sclk_freq);
	if (sclk_freq == 0)
		rte_strscpy(suffix, "cycles", 7);
	else
		rte_strscpy(suffix, "ns", 3);

	/* Update xstat name based on layer name and sclk availability */
	for (i = 0; i < RTE_DIM(model_xstats); i++) {
		if (model->type == ML_CNXK_MODEL_TYPE_GLOW)
			cn10k_ml_xstat_model_name_set(cnxk_mldev, model, stat_id, i, suffix);
		else
			mvtvm_ml_model_xstat_name_set(cnxk_mldev, model, stat_id, i, suffix);

		stat_id++;
	}
}

static void
cnxk_ml_xstats_uninit(struct cnxk_ml_dev *cnxk_mldev)
{
	rte_free(cnxk_mldev->xstats.entries);
	cnxk_mldev->xstats.entries = NULL;

	cnxk_mldev->xstats.count = 0;
}

static uint64_t
cnxk_ml_dev_xstat_get(struct cnxk_ml_dev *cnxk_mldev, uint16_t obj_idx __rte_unused,
		      int32_t layer_id __rte_unused, enum cnxk_ml_xstats_type type)
{
	switch (type) {
	case nb_models_loaded:
		return cnxk_mldev->nb_models_loaded;
	case nb_models_unloaded:
		return cnxk_mldev->nb_models_unloaded;
	case nb_models_started:
		return cnxk_mldev->nb_models_started;
	case nb_models_stopped:
		return cnxk_mldev->nb_models_stopped;
	default:
		return -1;
	}

	return 0;
}

static uint64_t
cnxk_ml_model_xstat_get(struct cnxk_ml_dev *cnxk_mldev, uint16_t obj_idx, int32_t layer_id,
			enum cnxk_ml_xstats_type type)
{
	struct cnxk_ml_model *model;
	struct cnxk_ml_layer *layer;
	uint16_t rclk_freq; /* MHz */
	uint16_t sclk_freq; /* MHz */
	uint64_t value = 0;

	model = cnxk_mldev->mldev->data->models[obj_idx];
	if (model == NULL)
		return 0;

	if (layer_id >= 0) {
		layer = &model->layer[layer_id];
		goto layer_xstats;
	} else {
		layer = NULL;
		goto model_xstats;
	}

layer_xstats:
	value = cn10k_ml_model_xstat_get(cnxk_mldev, layer, type);
	goto exit_xstats;

model_xstats:
	value = mvtvm_ml_model_xstat_get(cnxk_mldev, model, type);

exit_xstats:
	roc_clk_freq_get(&rclk_freq, &sclk_freq);
	if (sclk_freq != 0) /* return in ns */
		value = (value * 1000ULL) / sclk_freq;

	return value;
}

static int
cnxk_ml_device_xstats_reset(struct cnxk_ml_dev *cnxk_mldev, const uint16_t stat_ids[],
			    uint16_t nb_ids)
{
	struct cnxk_ml_xstats_entry *xs;
	uint16_t nb_stats;
	uint16_t stat_id;
	uint32_t i;

	if (stat_ids == NULL)
		nb_stats = cnxk_mldev->xstats.count_mode_device;
	else
		nb_stats = nb_ids;

	for (i = 0; i < nb_stats; i++) {
		if (stat_ids == NULL)
			stat_id = i;
		else
			stat_id = stat_ids[i];

		if (stat_id >= cnxk_mldev->xstats.count_mode_device)
			return -EINVAL;

		xs = &cnxk_mldev->xstats.entries[stat_id];
		if (!xs->reset_allowed)
			continue;

		xs->reset_value =
			cnxk_ml_dev_xstat_get(cnxk_mldev, xs->obj_idx, xs->layer_id, xs->type);
	}

	return 0;
}

#define ML_AVG_RESET_FOREACH_QP(cnxk_mldev, layer, qp_id, str)                                     \
	do {                                                                                       \
		for (qp_id = 0; qp_id < cnxk_mldev->mldev->data->nb_queue_pairs; qp_id++) {        \
			layer->glow.burst_xstats[qp_id].str##_latency_tot = 0;                     \
			layer->glow.burst_xstats[qp_id].str##_reset_count =                        \
				layer->glow.burst_xstats[qp_id].dequeued_count;                    \
		}                                                                                  \
	} while (0)

#define ML_MIN_RESET_FOREACH_QP(cnxk_mldev, layer, qp_id, str)                                     \
	do {                                                                                       \
		for (qp_id = 0; qp_id < cnxk_mldev->mldev->data->nb_queue_pairs; qp_id++)          \
			layer->glow.burst_xstats[qp_id].str##_latency_min = UINT64_MAX;            \
	} while (0)

#define ML_MAX_RESET_FOREACH_QP(cnxk_mldev, layer, qp_id, str)                                     \
	do {                                                                                       \
		for (qp_id = 0; qp_id < cnxk_mldev->mldev->data->nb_queue_pairs; qp_id++)          \
			layer->glow.burst_xstats[qp_id].str##_latency_max = 0;                     \
	} while (0)

static void
cnxk_ml_reset_model_stat(struct cnxk_ml_dev *cnxk_mldev, uint16_t model_id,
			 enum cnxk_ml_xstats_type type)
{
	struct cnxk_ml_model *model;
	struct cnxk_ml_layer *layer;
	uint16_t layer_id = 0;
	uint32_t qp_id;

	model = cnxk_mldev->mldev->data->models[model_id];
	layer = &model->layer[layer_id];

	switch (type) {
	case avg_hw_latency:
		ML_AVG_RESET_FOREACH_QP(cnxk_mldev, layer, qp_id, hw);
		break;
	case min_hw_latency:
		ML_MIN_RESET_FOREACH_QP(cnxk_mldev, layer, qp_id, hw);
		break;
	case max_hw_latency:
		ML_MAX_RESET_FOREACH_QP(cnxk_mldev, layer, qp_id, hw);
		break;
	case avg_fw_latency:
		ML_AVG_RESET_FOREACH_QP(cnxk_mldev, layer, qp_id, fw);
		break;
	case min_fw_latency:
		ML_MIN_RESET_FOREACH_QP(cnxk_mldev, layer, qp_id, fw);
		break;
	case max_fw_latency:
		ML_MAX_RESET_FOREACH_QP(cnxk_mldev, layer, qp_id, fw);
		break;
	default:
		return;
	}
}

static int
cnxk_ml_model_xstats_reset(struct cnxk_ml_dev *cnxk_mldev, int32_t model_id,
			   const uint16_t stat_ids[], uint16_t nb_ids)
{
	struct cnxk_ml_xstats_entry *xs;
	struct cnxk_ml_model *model;
	int32_t lcl_model_id = 0;
	uint16_t layer_id = 0;
	uint16_t start_id;
	uint16_t end_id;
	int32_t i;
	int32_t j;

	for (i = 0; i < ML_CNXK_MAX_MODELS; i++) {
		if (model_id == -1) {
			model = cnxk_mldev->mldev->data->models[i];
			if (model == NULL) /* skip inactive models */
				continue;
		} else {
			if (model_id != i)
				continue;

			model = cnxk_mldev->mldev->data->models[model_id];
			if (model == NULL) {
				plt_err("Invalid model_id = %d", model_id);
				return -EINVAL;
			}
		}

		start_id = cnxk_mldev->xstats.offset_for_layer[i][layer_id];
		end_id = cnxk_mldev->xstats.offset_for_layer[i][layer_id] +
			 cnxk_mldev->xstats.count_per_layer[i][layer_id] - 1;

		if (stat_ids == NULL) {
			for (j = start_id; j <= end_id; j++) {
				xs = &cnxk_mldev->xstats.entries[j];
				cnxk_ml_reset_model_stat(cnxk_mldev, i, xs->type);
			}
		} else {
			for (j = 0; j < nb_ids; j++) {
				if (stat_ids[j] < start_id || stat_ids[j] > end_id) {
					plt_err("Invalid stat_ids[%d] = %d for model_id = %d", j,
						stat_ids[j], lcl_model_id);
					return -EINVAL;
				}
				xs = &cnxk_mldev->xstats.entries[stat_ids[j]];
				cnxk_ml_reset_model_stat(cnxk_mldev, i, xs->type);
			}
		}
	}

	return 0;
}

static int
cnxk_ml_dev_info_get(struct rte_ml_dev *dev, struct rte_ml_dev_info *dev_info)
{
	struct cnxk_ml_dev *cnxk_mldev;

	if (dev == NULL || dev_info == NULL)
		return -EINVAL;

	cnxk_mldev = dev->data->dev_private;

	memset(dev_info, 0, sizeof(struct rte_ml_dev_info));
	dev_info->driver_name = dev->device->driver->name;
	dev_info->max_models = ML_CNXK_MAX_MODELS;

	if (cnxk_mldev->type == CNXK_ML_DEV_TYPE_PCI)
		return cn10k_ml_dev_info_get(cnxk_mldev, dev_info);
	else
		return mvtvm_ml_dev_info_get(cnxk_mldev, dev_info);

	return 0;
}

static int
cnxk_ml_dev_configure(struct rte_ml_dev *dev, const struct rte_ml_dev_config *conf)
{
	struct rte_ml_dev_info dev_info;
	struct cnxk_ml_dev *cnxk_mldev;
	struct cnxk_ml_model *model;
	struct cnxk_ml_qp *qp;
	uint16_t model_id;
	uint32_t mz_size;
	uint16_t qp_id;
	uint64_t i;
	int ret;

	if (dev == NULL)
		return -EINVAL;

	/* Get CNXK device handle */
	cnxk_mldev = dev->data->dev_private;

	cnxk_ml_dev_info_get(dev, &dev_info);
	if (conf->nb_models > dev_info.max_models) {
		plt_err("Invalid device config, nb_models > %u", dev_info.max_models);
		return -EINVAL;
	}

	if (conf->nb_queue_pairs > dev_info.max_queue_pairs) {
		plt_err("Invalid device config, nb_queue_pairs > %u", dev_info.max_queue_pairs);
		return -EINVAL;
	}

	if (cnxk_mldev->state == ML_CNXK_DEV_STATE_PROBED) {
		plt_ml_dbg("Configuring ML device, nb_queue_pairs = %u, nb_models = %u",
			   conf->nb_queue_pairs, conf->nb_models);

		/* Load firmware */
		if (cnxk_mldev->type == CNXK_ML_DEV_TYPE_PCI) {
			ret = cn10k_ml_fw_load(cnxk_mldev);
			if (ret != 0)
				return ret;
		}
	} else if (cnxk_mldev->state == ML_CNXK_DEV_STATE_CONFIGURED) {
		plt_ml_dbg("Re-configuring ML device, nb_queue_pairs = %u, nb_models = %u",
			   conf->nb_queue_pairs, conf->nb_models);
	} else if (cnxk_mldev->state == ML_CNXK_DEV_STATE_STARTED) {
		plt_err("Device can't be reconfigured in started state");
		return -ENOTSUP;
	} else if (cnxk_mldev->state == ML_CNXK_DEV_STATE_CLOSED) {
		plt_err("Device can't be reconfigured after close");
		return -ENOTSUP;
	}

	/* Configure queue-pairs */
	if (dev->data->queue_pairs == NULL) {
		mz_size = sizeof(dev->data->queue_pairs[0]) * conf->nb_queue_pairs;
		dev->data->queue_pairs =
			rte_zmalloc("cnxk_mldev_queue_pairs", mz_size, RTE_CACHE_LINE_SIZE);
		if (dev->data->queue_pairs == NULL) {
			dev->data->nb_queue_pairs = 0;
			plt_err("Failed to get memory for queue_pairs, nb_queue_pairs %u",
				conf->nb_queue_pairs);
			return -ENOMEM;
		}
	} else { /* Re-configure */
		void **queue_pairs;

		/* Release all queue pairs as ML spec doesn't support queue_pair_destroy. */
		for (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++) {
			qp = dev->data->queue_pairs[qp_id];
			if (qp != NULL) {
				ret = cnxk_ml_dev_queue_pair_release(dev, qp_id);
				if (ret < 0)
					return ret;
			}
		}

		queue_pairs = dev->data->queue_pairs;
		queue_pairs =
			rte_realloc(queue_pairs, sizeof(queue_pairs[0]) * conf->nb_queue_pairs,
				    RTE_CACHE_LINE_SIZE);
		if (queue_pairs == NULL) {
			dev->data->nb_queue_pairs = 0;
			plt_err("Failed to realloc queue_pairs, nb_queue_pairs = %u",
				conf->nb_queue_pairs);
			ret = -ENOMEM;
			goto error;
		}

		memset(queue_pairs, 0, sizeof(queue_pairs[0]) * conf->nb_queue_pairs);
		dev->data->queue_pairs = queue_pairs;
	}
	dev->data->nb_queue_pairs = conf->nb_queue_pairs;

	/* Allocate ML models */
	if (dev->data->models == NULL) {
		mz_size = sizeof(dev->data->models[0]) * conf->nb_models;
		dev->data->models = rte_zmalloc("cnxk_mldev_models", mz_size, RTE_CACHE_LINE_SIZE);
		if (dev->data->models == NULL) {
			dev->data->nb_models = 0;
			plt_err("Failed to get memory for ml_models, nb_models %u",
				conf->nb_models);
			ret = -ENOMEM;
			goto error;
		}
	} else {
		/* Re-configure */
		void **models;

		/* Stop and unload all models */
		for (model_id = 0; model_id < dev->data->nb_models; model_id++) {
			model = dev->data->models[model_id];
			if (model != NULL) {
				if (model->state == ML_CNXK_MODEL_STATE_STARTED) {
					if (cnxk_ml_model_stop(dev, model_id) != 0)
						plt_err("Could not stop model %u", model_id);
				}
				if (model->state == ML_CNXK_MODEL_STATE_LOADED) {
					if (cnxk_ml_model_unload(dev, model_id) != 0)
						plt_err("Could not unload model %u", model_id);
				}
				dev->data->models[model_id] = NULL;
			}
		}

		models = dev->data->models;
		models = rte_realloc(models, sizeof(models[0]) * conf->nb_models,
				     RTE_CACHE_LINE_SIZE);
		if (models == NULL) {
			dev->data->nb_models = 0;
			plt_err("Failed to realloc ml_models, nb_models = %u", conf->nb_models);
			ret = -ENOMEM;
			goto error;
		}
		memset(models, 0, sizeof(models[0]) * conf->nb_models);
		dev->data->models = models;
	}
	dev->data->nb_models = conf->nb_models;

	if (cnxk_mldev->type == CNXK_ML_DEV_TYPE_PCI) {
		ret = cn10k_ml_dev_configure(cnxk_mldev, conf);
		if (ret != 0) {
			plt_err("Failed to configure CN10K ML Device");
			goto error;
		}
	}

	ret = mvtvm_ml_dev_configure(cnxk_mldev, conf);
	if (ret != 0)
		goto error;

	/* Set device capabilities */
	if (cnxk_mldev->type == CNXK_ML_DEV_TYPE_PCI)
		cnxk_mldev->max_nb_layers =
			cnxk_mldev->cn10k_mldev.fw.req->cn10k_req.jd.fw_load.cap.s.max_models;
	else
		cnxk_mldev->max_nb_layers = ML_CNXK_MAX_MODELS;

	cnxk_mldev->mldev->enqueue_burst = cnxk_ml_enqueue_burst;
	cnxk_mldev->mldev->dequeue_burst = cnxk_ml_dequeue_burst;

	if (cnxk_mldev->type == CNXK_ML_DEV_TYPE_PCI)
		cnxk_mldev->mldev->op_error_get = cn10k_ml_op_error_get;

	/* Allocate and initialize index_map */
	if (cnxk_mldev->index_map == NULL) {
		cnxk_mldev->index_map =
			rte_zmalloc("cnxk_ml_index_map",
				    sizeof(struct cnxk_ml_index_map) * cnxk_mldev->max_nb_layers,
				    RTE_CACHE_LINE_SIZE);
		if (cnxk_mldev->index_map == NULL) {
			plt_err("Failed to get memory for index_map, nb_layers %" PRIu64,
				cnxk_mldev->max_nb_layers);
			ret = -ENOMEM;
			goto error;
		}
	}

	for (i = 0; i < cnxk_mldev->max_nb_layers; i++)
		cnxk_mldev->index_map[i].active = false;

	/* Initialize xstats */
	ret = cnxk_ml_xstats_init(cnxk_mldev);
	if (ret != 0) {
		plt_err("Failed to initialize xstats");
		goto error;
	}

	cnxk_mldev->nb_models_loaded = 0;
	cnxk_mldev->nb_models_started = 0;
	cnxk_mldev->nb_models_stopped = 0;
	cnxk_mldev->nb_models_unloaded = 0;
	cnxk_mldev->state = ML_CNXK_DEV_STATE_CONFIGURED;

	return 0;

error:
	rte_free(dev->data->queue_pairs);
	rte_free(dev->data->models);

	return ret;
}

static int
cnxk_ml_dev_close(struct rte_ml_dev *dev)
{
	struct cnxk_ml_dev *cnxk_mldev;
	struct cnxk_ml_model *model;
	struct cnxk_ml_qp *qp;
	uint16_t model_id;
	uint16_t qp_id;

	if (dev == NULL)
		return -EINVAL;

	cnxk_mldev = dev->data->dev_private;

	/* Un-initialize xstats */
	cnxk_ml_xstats_uninit(cnxk_mldev);

	if (mvtvm_ml_dev_close(cnxk_mldev) != 0)
		plt_err("Failed to close MVTVM ML Device");

	if (cnxk_mldev->type == CNXK_ML_DEV_TYPE_PCI) {
		if (cn10k_ml_dev_close(cnxk_mldev) != 0)
			plt_err("Failed to close CN10K ML Device");
	}

	rte_free(cnxk_mldev->index_map);

	/* Stop and unload all models */
	for (model_id = 0; model_id < dev->data->nb_models; model_id++) {
		model = dev->data->models[model_id];
		if (model != NULL) {
			if (model->state == ML_CNXK_MODEL_STATE_STARTED) {
				if (cnxk_ml_model_stop(dev, model_id) != 0)
					plt_err("Could not stop model %u", model_id);
			}
			if (model->state == ML_CNXK_MODEL_STATE_LOADED) {
				if (cnxk_ml_model_unload(dev, model_id) != 0)
					plt_err("Could not unload model %u", model_id);
			}
			dev->data->models[model_id] = NULL;
		}
	}

	rte_free(dev->data->models);

	/* Destroy all queue pairs */
	for (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++) {
		qp = dev->data->queue_pairs[qp_id];
		if (qp != NULL) {
			if (cnxk_ml_qp_destroy(dev, qp) != 0)
				plt_err("Could not destroy queue pair %u", qp_id);
			dev->data->queue_pairs[qp_id] = NULL;
		}
	}

	rte_free(dev->data->queue_pairs);

	cnxk_mldev->state = ML_CNXK_DEV_STATE_CLOSED;

	/* Remove PCI device */
	return rte_dev_remove(dev->device);
}

static int
cnxk_ml_dev_start(struct rte_ml_dev *dev)
{
	struct cnxk_ml_dev *cnxk_mldev;
	int ret;

	if (dev == NULL)
		return -EINVAL;

	cnxk_mldev = dev->data->dev_private;

	if (cnxk_mldev->type == CNXK_ML_DEV_TYPE_PCI) {
		ret = cn10k_ml_dev_start(cnxk_mldev);
		if (ret != 0) {
			plt_err("Failed to start CN10K ML Device");
			return ret;
		}
	}

	cnxk_mldev->state = ML_CNXK_DEV_STATE_STARTED;

	return 0;
}

static int
cnxk_ml_dev_stop(struct rte_ml_dev *dev)
{
	struct cnxk_ml_dev *cnxk_mldev;
	int ret;

	if (dev == NULL)
		return -EINVAL;

	cnxk_mldev = dev->data->dev_private;

	if (cnxk_mldev->type == CNXK_ML_DEV_TYPE_PCI) {
		ret = cn10k_ml_dev_stop(cnxk_mldev);
		if (ret != 0) {
			plt_err("Failed to stop CN10K ML Device");
			return ret;
		}
	}

	cnxk_mldev->state = ML_CNXK_DEV_STATE_CONFIGURED;

	return 0;
}

static int
cnxk_ml_dev_dump(struct rte_ml_dev *dev, FILE *fp)
{
	struct cnxk_ml_dev *cnxk_mldev;
	struct cnxk_ml_model *model;
	uint16_t model_id;

	if ((dev == NULL) || (fp == NULL))
		return -EINVAL;

	cnxk_mldev = dev->data->dev_private;

	/* Dump model info */
	for (model_id = 0; model_id < cnxk_mldev->mldev->data->nb_models; model_id++) {
		model = cnxk_mldev->mldev->data->models[model_id];
		if (model != NULL)
			cnxk_ml_model_dump(cnxk_mldev, model, fp);
	}

	if (cnxk_mldev->type == CNXK_ML_DEV_TYPE_PCI)
		return cn10k_ml_dev_dump(cnxk_mldev, fp);
	else
		return mvtvm_ml_dev_dump(cnxk_mldev, fp);

	return 0;
}

static int
cnxk_ml_dev_selftest(struct rte_ml_dev *dev)
{
	struct cnxk_ml_dev *cnxk_mldev;

	if (dev == NULL)
		return -EINVAL;

	cnxk_mldev = dev->data->dev_private;

	if (cnxk_mldev->type == CNXK_ML_DEV_TYPE_VDEV)
		return -ENOTSUP;

	return cn10k_ml_dev_selftest(cnxk_mldev);
}

static int
cnxk_ml_dev_queue_pair_setup(struct rte_ml_dev *dev, uint16_t queue_pair_id,
			     const struct rte_ml_dev_qp_conf *qp_conf, int socket_id)
{
	struct rte_ml_dev_info dev_info;
	struct cnxk_ml_qp *qp;
	uint32_t nb_desc;

	if (queue_pair_id >= dev->data->nb_queue_pairs) {
		plt_err("Queue-pair id = %u (>= max queue pairs supported, %u)", queue_pair_id,
			dev->data->nb_queue_pairs);
		return -EINVAL;
	}

	if (dev->data->queue_pairs[queue_pair_id] != NULL)
		cnxk_ml_dev_queue_pair_release(dev, queue_pair_id);

	cnxk_ml_dev_info_get(dev, &dev_info);
	if (qp_conf->nb_desc == 0) {
		plt_err("Could not setup queue pair for %u descriptors", qp_conf->nb_desc);
		return -EINVAL;
	} else if (qp_conf->nb_desc > dev_info.max_desc) {
		plt_err("Could not setup queue pair for %u descriptors (> %u)", qp_conf->nb_desc,
			dev_info.max_desc);
		return -EINVAL;
	}
	plt_ml_dbg("Creating queue-pair, queue_pair_id = %u, nb_desc = %u", queue_pair_id,
		   qp_conf->nb_desc);

	/* As the number of usable descriptors is 1 less than the queue size being created, we
	 * increment the size of queue by 1 than the requested size, except when the requested size
	 * is equal to the maximum possible size.
	 */
	nb_desc =
		(qp_conf->nb_desc == dev_info.max_desc) ? dev_info.max_desc : qp_conf->nb_desc + 1;
	qp = cnxk_ml_qp_create(dev, queue_pair_id, nb_desc, socket_id);
	if (qp == NULL) {
		plt_err("Could not create queue pair %u", queue_pair_id);
		return -ENOMEM;
	}
	dev->data->queue_pairs[queue_pair_id] = qp;

	return 0;
}

static int
cnxk_ml_dev_stats_get(struct rte_ml_dev *dev, struct rte_ml_dev_stats *stats)
{
	struct cnxk_ml_qp *qp;
	int qp_id;

	for (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++) {
		qp = dev->data->queue_pairs[qp_id];
		stats->enqueued_count += qp->stats.enqueued_count;
		stats->dequeued_count += qp->stats.dequeued_count;
		stats->enqueue_err_count += qp->stats.enqueue_err_count;
		stats->dequeue_err_count += qp->stats.dequeue_err_count;
	}

	return 0;
}

static void
cnxk_ml_dev_stats_reset(struct rte_ml_dev *dev)
{
	struct cnxk_ml_qp *qp;
	int qp_id;

	for (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++) {
		qp = dev->data->queue_pairs[qp_id];
		qp->stats.enqueued_count = 0;
		qp->stats.dequeued_count = 0;
		qp->stats.enqueue_err_count = 0;
		qp->stats.dequeue_err_count = 0;
	}
}

static int
cnxk_ml_dev_xstats_names_get(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode mode,
			     int32_t model_id, struct rte_ml_dev_xstats_map *xstats_map,
			     uint32_t size)
{
	struct cnxk_ml_xstats_entry *xs;
	struct cnxk_ml_dev *cnxk_mldev;
	struct cnxk_ml_model *model;
	uint32_t xstats_mode_count;
	uint16_t layer_id;
	uint32_t idx = 0;
	uint32_t i;

	if (dev == NULL)
		return -EINVAL;

	cnxk_mldev = dev->data->dev_private;
	xstats_mode_count = 0;

	switch (mode) {
	case RTE_ML_DEV_XSTATS_DEVICE:
		xstats_mode_count = cnxk_mldev->xstats.count_mode_device;
		break;
	case RTE_ML_DEV_XSTATS_MODEL:
		if (model_id >= ML_CNXK_MAX_MODELS)
			break;

		model = cnxk_mldev->mldev->data->models[model_id];
		for (layer_id = 0; layer_id < model->nb_layers; layer_id++) {
			if (model->layer[layer_id].type == ML_CNXK_LAYER_TYPE_MRVL)
				xstats_mode_count +=
					cnxk_mldev->xstats.count_per_layer[model_id][layer_id];
		}

		if ((model->type == ML_CNXK_MODEL_TYPE_TVM) &&
		    (model->subtype != ML_CNXK_MODEL_SUBTYPE_TVM_MRVL))
			xstats_mode_count += RTE_DIM(model_xstats);
		break;
	default:
		return -EINVAL;
	};

	if (xstats_mode_count > size || xstats_map == NULL)
		return xstats_mode_count;

	for (i = 0; i < cnxk_mldev->xstats.count && idx < size; i++) {
		xs = &cnxk_mldev->xstats.entries[i];
		if (xs->mode != mode)
			continue;

		if (mode == RTE_ML_DEV_XSTATS_MODEL) {
			if (model_id != xs->obj_idx)
				continue;

			model = cnxk_mldev->mldev->data->models[model_id];
			if ((model->type == ML_CNXK_MODEL_TYPE_GLOW ||
			     model->subtype == ML_CNXK_MODEL_SUBTYPE_TVM_MRVL) &&
			    xs->group == CNXK_ML_XSTATS_GROUP_MODEL)
				continue;

			if (model->type == ML_CNXK_MODEL_TYPE_TVM &&
			    model->layer[xs->layer_id].type == ML_CNXK_LAYER_TYPE_LLVM)
				continue;
		}

		rte_strscpy(xstats_map[idx].name, xs->map.name, RTE_ML_STR_MAX);
		xstats_map[idx].id = xs->map.id;
		idx++;
	}

	return idx;
}

static int
cnxk_ml_dev_xstats_by_name_get(struct rte_ml_dev *dev, const char *name, uint16_t *stat_id,
			       uint64_t *value)
{
	struct cnxk_ml_xstats_entry *xs;
	struct cnxk_ml_dev *cnxk_mldev;
	cnxk_ml_xstats_fn fn;
	uint32_t i;

	if (dev == NULL)
		return -EINVAL;

	cnxk_mldev = dev->data->dev_private;

	for (i = 0; i < cnxk_mldev->xstats.count; i++) {
		xs = &cnxk_mldev->xstats.entries[i];
		if (strncmp(xs->map.name, name, RTE_ML_STR_MAX) == 0) {
			if (stat_id != NULL)
				*stat_id = xs->map.id;

			switch (xs->fn_id) {
			case CNXK_ML_XSTATS_FN_DEVICE:
				fn = cnxk_ml_dev_xstat_get;
				break;
			case CNXK_ML_XSTATS_FN_MODEL:
				fn = cnxk_ml_model_xstat_get;
				break;
			default:
				plt_err("Unexpected xstat fn_id = %d", xs->fn_id);
				return -EINVAL;
			}

			*value = fn(cnxk_mldev, xs->obj_idx, xs->layer_id, xs->type) -
				 xs->reset_value;

			return 0;
		}
	}

	if (stat_id != NULL)
		*stat_id = (uint16_t)-1;

	return -EINVAL;
}

static int
cnxk_ml_dev_xstats_get(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode mode, int32_t model_id,
		       const uint16_t stat_ids[], uint64_t values[], uint16_t nb_ids)
{
	struct cnxk_ml_xstats_entry *xs;
	struct cnxk_ml_dev *cnxk_mldev;
	struct cnxk_ml_model *model;
	uint32_t xstats_mode_count;
	cnxk_ml_xstats_fn fn;
	uint16_t layer_id;
	uint64_t val;
	uint32_t idx;
	uint32_t i;

	if (dev == NULL)
		return -EINVAL;

	cnxk_mldev = dev->data->dev_private;
	xstats_mode_count = 0;

	switch (mode) {
	case RTE_ML_DEV_XSTATS_DEVICE:
		xstats_mode_count = cnxk_mldev->xstats.count_mode_device;
		break;
	case RTE_ML_DEV_XSTATS_MODEL:
		if (model_id >= ML_CNXK_MAX_MODELS)
			return -EINVAL;

		model = cnxk_mldev->mldev->data->models[model_id];
		for (layer_id = 0; layer_id < model->nb_layers; layer_id++)
			xstats_mode_count += cnxk_mldev->xstats.count_per_layer[model_id][layer_id];

		if ((model->type == ML_CNXK_MODEL_TYPE_TVM) &&
		    (model->subtype != ML_CNXK_MODEL_SUBTYPE_TVM_MRVL))
			xstats_mode_count += RTE_DIM(model_xstats);
		break;
	default:
		return -EINVAL;
	};

	idx = 0;
	for (i = 0; i < nb_ids && idx < xstats_mode_count; i++) {
		xs = &cnxk_mldev->xstats.entries[stat_ids[i]];
		if (stat_ids[i] > cnxk_mldev->xstats.count || xs->mode != mode)
			continue;

		if (mode == RTE_ML_DEV_XSTATS_MODEL) {
			if (model_id != xs->obj_idx)
				continue;

			model = cnxk_mldev->mldev->data->models[xs->obj_idx];
			if ((model->type == ML_CNXK_MODEL_TYPE_GLOW ||
			     model->subtype == ML_CNXK_MODEL_SUBTYPE_TVM_MRVL) &&
			    xs->group == CNXK_ML_XSTATS_GROUP_MODEL)
				continue;

			if (xs->layer_id == -1 && xs->group == CNXK_ML_XSTATS_GROUP_LAYER)
				continue;
		}

		switch (xs->fn_id) {
		case CNXK_ML_XSTATS_FN_DEVICE:
			fn = cnxk_ml_dev_xstat_get;
			break;
		case CNXK_ML_XSTATS_FN_MODEL:
			fn = cnxk_ml_model_xstat_get;
			break;
		default:
			plt_err("Unexpected xstat fn_id = %d", xs->fn_id);
			return -EINVAL;
		}

		val = fn(cnxk_mldev, xs->obj_idx, xs->layer_id, xs->type);
		if (values)
			values[idx] = val;

		idx++;
	}

	return idx;
}

static int
cnxk_ml_dev_xstats_reset(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode mode, int32_t model_id,
			 const uint16_t stat_ids[], uint16_t nb_ids)
{
	struct cnxk_ml_dev *cnxk_mldev;

	if (dev == NULL)
		return -EINVAL;

	cnxk_mldev = dev->data->dev_private;

	switch (mode) {
	case RTE_ML_DEV_XSTATS_DEVICE:
		return cnxk_ml_device_xstats_reset(cnxk_mldev, stat_ids, nb_ids);
	case RTE_ML_DEV_XSTATS_MODEL:
		return cnxk_ml_model_xstats_reset(cnxk_mldev, model_id, stat_ids, nb_ids);
	};

	return 0;
}

static int
cnxk_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params, uint16_t *model_id)
{
	struct rte_ml_dev_info dev_info;
	struct cnxk_ml_dev *cnxk_mldev;
	enum cnxk_ml_model_type type;
	struct cnxk_ml_model *model;

	char str[RTE_MEMZONE_NAMESIZE];
	const struct plt_memzone *mz;
	uint16_t max_scratch_pages;
	struct cn10k_ml_ocm *ocm;
	uint64_t model_info_size;
	uint16_t total_wb_pages;
	uint16_t lcl_model_id;
	uint16_t layer_id;
	uint64_t mz_size;
	bool found;
	int ret;

	if (dev == NULL)
		return -EINVAL;

	cnxk_mldev = dev->data->dev_private;

	type = cnxk_ml_model_get_type(params);
	if (type == ML_CNXK_MODEL_TYPE_INVALID) {
		plt_err("Invalid / unsupported model type");
		return -EINVAL;
	}

	if (cnxk_mldev->type == CNXK_ML_DEV_TYPE_VDEV && type != ML_CNXK_MODEL_TYPE_TVM) {
		plt_err("Unsupported model type");
		return -ENOTSUP;
	}

	/* Find model ID */
	found = false;
	for (lcl_model_id = 0; lcl_model_id < dev->data->nb_models; lcl_model_id++) {
		if (dev->data->models[lcl_model_id] == NULL) {
			found = true;
			break;
		}
	}

	if (!found) {
		plt_err("No slots available to load new model");
		return -ENOMEM;
	}

	/* Compute memzone size */
	cnxk_ml_dev_info_get(dev, &dev_info);
	mz_size = PLT_ALIGN_CEIL(sizeof(struct cnxk_ml_model), dev_info.align_size);
	model_info_size = sizeof(struct rte_ml_model_info) +
			  ML_CNXK_MODEL_MAX_INPUT_OUTPUT * sizeof(struct rte_ml_io_info) +
			  ML_CNXK_MODEL_MAX_INPUT_OUTPUT * sizeof(struct rte_ml_io_info);
	model_info_size = PLT_ALIGN_CEIL(model_info_size, dev_info.align_size);
	mz_size += model_info_size;

	/* Allocate memzone for model object */
	snprintf(str, RTE_MEMZONE_NAMESIZE, "%s_%u", CNXK_ML_MODEL_MEMZONE_NAME, lcl_model_id);
	mz = plt_memzone_reserve_aligned(str, mz_size, 0, dev_info.align_size);
	if (!mz) {
		plt_err("Failed to allocate memory for cnxk_ml_model: %s", str);
		return -ENOMEM;
	}

	model = mz->addr;
	model->cnxk_mldev = cnxk_mldev;
	model->type = type;
	model->model_id = lcl_model_id;
	model->info = PLT_PTR_ADD(
		model, PLT_ALIGN_CEIL(sizeof(struct cnxk_ml_model), dev_info.align_size));
	dev->data->models[lcl_model_id] = model;

	if (type == ML_CNXK_MODEL_TYPE_GLOW)
		ret = cn10k_ml_model_load(cnxk_mldev, params, model);
	else
		ret = mvtvm_ml_model_load(cnxk_mldev, params, model);
	if (ret != 0)
		goto error;

	max_scratch_pages = 0;
	total_wb_pages = 0;
	layer_id = 0;

	ocm = &cnxk_mldev->cn10k_mldev.ocm;

	if (model->type == ML_CNXK_MODEL_TYPE_GLOW) {
		total_wb_pages = total_wb_pages + model->layer[layer_id].glow.ocm_map.wb_pages;
		max_scratch_pages = PLT_MAX(max_scratch_pages,
					    model->layer[layer_id].glow.ocm_map.scratch_pages);
#ifdef RTE_MLDEV_CNXK_ENABLE_MVTVM
	} else {
		for (layer_id = 0; layer_id < model->mvtvm.metadata.model.nb_layers; layer_id++) {
			if (model->layer[layer_id].type == ML_CNXK_LAYER_TYPE_MRVL) {
				total_wb_pages = total_wb_pages +
						 model->layer[layer_id].glow.ocm_map.wb_pages;
				max_scratch_pages =
					PLT_MAX(max_scratch_pages,
						model->layer[layer_id].glow.ocm_map.scratch_pages);
			}
		}
#endif
	}

	if ((total_wb_pages + max_scratch_pages) > ocm->num_pages) {
		plt_err("model_id = %u: total_wb_pages (%u) + scratch_pages (%u) >  %u",
			lcl_model_id, total_wb_pages, max_scratch_pages, ocm->num_pages);

		if (model->type == ML_CNXK_MODEL_TYPE_GLOW) {
			plt_ml_dbg("layer_id = %u: wb_pages = %u, scratch_pages = %u", layer_id,
				   model->layer[layer_id].glow.ocm_map.wb_pages,
				   model->layer[layer_id].glow.ocm_map.scratch_pages);
#ifdef RTE_MLDEV_CNXK_ENABLE_MVTVM
		} else {
			for (layer_id = 0; layer_id < model->mvtvm.metadata.model.nb_layers;
			     layer_id++) {
				if (model->layer[layer_id].type == ML_CNXK_LAYER_TYPE_MRVL) {
					plt_ml_dbg(
						"layer_id = %u: wb_pages = %u, scratch_pages = %u",
						layer_id,
						model->layer[layer_id].glow.ocm_map.wb_pages,
						model->layer[layer_id].glow.ocm_map.scratch_pages);
				}
			}
#endif
		}

		if (model->type == ML_CNXK_MODEL_TYPE_GLOW)
			cn10k_ml_model_unload(cnxk_mldev, model);
#ifdef RTE_MLDEV_CNXK_ENABLE_MVTVM
		else {
			mvtvm_ml_model_unload(cnxk_mldev, model);
			return -ENOMEM;
		}
#endif
	}
	plt_spinlock_init(&model->lock);
	model->state = ML_CNXK_MODEL_STATE_LOADED;
	cnxk_mldev->nb_models_loaded++;

	*model_id = lcl_model_id;

	return 0;

error:
	rte_memzone_free(mz);

	return ret;
}

int
cnxk_ml_model_unload(struct rte_ml_dev *dev, uint16_t model_id)
{
	struct cnxk_ml_dev *cnxk_mldev;
	struct cnxk_ml_model *model;

	char str[RTE_MEMZONE_NAMESIZE];
	int ret = 0;

	if (dev == NULL)
		return -EINVAL;

	cnxk_mldev = dev->data->dev_private;

	model = dev->data->models[model_id];
	if (model == NULL) {
		plt_err("Invalid model_id = %u", model_id);
		return -EINVAL;
	}

	if (model->state != ML_CNXK_MODEL_STATE_LOADED) {
		plt_err("Cannot unload. Model in use.");
		return -EBUSY;
	}

	if (model->type == ML_CNXK_MODEL_TYPE_GLOW)
		ret = cn10k_ml_model_unload(cnxk_mldev, model);
	else
		ret = mvtvm_ml_model_unload(cnxk_mldev, model);
	if (ret != 0)
		return ret;

	dev->data->models[model_id] = NULL;
	cnxk_mldev->nb_models_unloaded++;

	snprintf(str, RTE_MEMZONE_NAMESIZE, "%s_%u", CNXK_ML_MODEL_MEMZONE_NAME, model_id);
	return plt_memzone_free(plt_memzone_lookup(str));
}

static int
cnxk_ml_model_start(struct rte_ml_dev *dev, uint16_t model_id)
{
	struct cnxk_ml_dev *cnxk_mldev;
	struct cnxk_ml_model *model;

	if (dev == NULL)
		return -EINVAL;

	cnxk_mldev = dev->data->dev_private;

	model = dev->data->models[model_id];
	if (model == NULL) {
		plt_err("Invalid model_id = %u", model_id);
		return -EINVAL;
	}

	if (model->type == ML_CNXK_MODEL_TYPE_GLOW)
		return cn10k_ml_model_start(cnxk_mldev, model);
	else
		return mvtvm_ml_model_start(cnxk_mldev, model);

	return 0;
}

int
cnxk_ml_model_stop(struct rte_ml_dev *dev, uint16_t model_id)
{
	struct cnxk_ml_dev *cnxk_mldev;
	struct cnxk_ml_model *model;

	if (dev == NULL)
		return -EINVAL;

	cnxk_mldev = dev->data->dev_private;

	model = dev->data->models[model_id];
	if (model == NULL) {
		plt_err("Invalid model_id = %u", model_id);
		return -EINVAL;
	}

	if (model->type == ML_CNXK_MODEL_TYPE_GLOW)
		return cn10k_ml_model_stop(cnxk_mldev, model);
	else
		return mvtvm_ml_model_stop(cnxk_mldev, model);

	return 0;
}

static int
cnxk_ml_model_info_get(struct rte_ml_dev *dev, uint16_t model_id,
		       struct rte_ml_model_info *model_info)
{
	struct rte_ml_model_info *info;
	struct cnxk_ml_model *model;

	if ((dev == NULL) || (model_info == NULL))
		return -EINVAL;

	model = dev->data->models[model_id];
	if (model == NULL) {
		plt_err("Invalid model_id = %u", model_id);
		return -EINVAL;
	}

	info = (struct rte_ml_model_info *)model->info;
	rte_memcpy(model_info, info, sizeof(struct rte_ml_model_info));
	model_info->input_info = info->input_info;
	model_info->output_info = info->output_info;

	return 0;
}

static int
cnxk_ml_model_params_update(struct rte_ml_dev *dev, uint16_t model_id, void *buffer)
{
	struct cnxk_ml_dev *cnxk_mldev;
	struct cnxk_ml_model *model;

	if ((dev == NULL) || (buffer == NULL))
		return -EINVAL;

	cnxk_mldev = dev->data->dev_private;
	if (cnxk_mldev->type == CNXK_ML_DEV_TYPE_VDEV)
		return -ENOTSUP;

	model = dev->data->models[model_id];
	if (model == NULL) {
		plt_err("Invalid model_id = %u", model_id);
		return -EINVAL;
	}

	return cn10k_ml_model_params_update(cnxk_mldev, model, buffer);
}

static int
cnxk_ml_io_quantize(struct rte_ml_dev *dev, uint16_t model_id, struct rte_ml_buff_seg **dbuffer,
		    struct rte_ml_buff_seg **qbuffer)
{
	struct cnxk_ml_io_info *info = NULL;
	struct cnxk_ml_model *model;
	uint8_t *lcl_dbuffer;
	uint8_t *lcl_qbuffer;
	uint64_t d_offset;
	uint64_t q_offset;
	uint32_t i;
	int ret;

	if ((dev == NULL) || (dbuffer == NULL) || (qbuffer == NULL))
		return -EINVAL;

	model = dev->data->models[model_id];
	if (model == NULL) {
		plt_err("Invalid model_id = %u", model_id);
		return -EINVAL;
	}

	if (model->type == ML_CNXK_MODEL_TYPE_GLOW)
		info = cn10k_ml_model_io_info_get(model, 0);
	else
		info = mvtvm_ml_model_io_info_get(model, 0);

	if (info == NULL)
		return -EINVAL;

	d_offset = 0;
	q_offset = 0;
	for (i = 0; i < info->nb_inputs; i++) {
		if (model->type == ML_CNXK_MODEL_TYPE_TVM &&
		    model->subtype != ML_CNXK_MODEL_SUBTYPE_TVM_MRVL) {
			lcl_dbuffer = dbuffer[i]->addr;
			lcl_qbuffer = qbuffer[i]->addr;
		} else {
			lcl_dbuffer = RTE_PTR_ADD(dbuffer[0]->addr, d_offset);
			lcl_qbuffer = RTE_PTR_ADD(qbuffer[0]->addr, q_offset);
		}

		ret = cnxk_ml_io_quantize_single(&info->input[i], lcl_dbuffer, lcl_qbuffer);
		if (ret < 0)
			return ret;

		if ((model->type == ML_CNXK_MODEL_TYPE_GLOW) ||
		    (model->subtype == ML_CNXK_MODEL_SUBTYPE_TVM_MRVL)) {
			d_offset += info->input[i].sz_d;
			q_offset += info->input[i].sz_q;
		}
	}

	return 0;
}

static int
cnxk_ml_io_dequantize(struct rte_ml_dev *dev, uint16_t model_id, struct rte_ml_buff_seg **qbuffer,
		      struct rte_ml_buff_seg **dbuffer)
{
	struct cnxk_ml_io_info *info = NULL;
	struct cnxk_ml_model *model;
	uint8_t *lcl_qbuffer;
	uint8_t *lcl_dbuffer;
	uint64_t q_offset;
	uint64_t d_offset;
	uint32_t i;
	int ret;

	if ((dev == NULL) || (qbuffer == NULL) || (dbuffer == NULL))
		return -EINVAL;

	model = dev->data->models[model_id];
	if (model == NULL) {
		plt_err("Invalid model_id = %u", model_id);
		return -EINVAL;
	}

	if (model->type == ML_CNXK_MODEL_TYPE_GLOW)
		info = cn10k_ml_model_io_info_get(model, model->nb_layers - 1);
	else
		info = mvtvm_ml_model_io_info_get(model, model->nb_layers - 1);

	if (info == NULL)
		return -EINVAL;

	q_offset = 0;
	d_offset = 0;
	for (i = 0; i < info->nb_outputs; i++) {
		if (model->type == ML_CNXK_MODEL_TYPE_TVM &&
		    model->subtype != ML_CNXK_MODEL_SUBTYPE_TVM_MRVL) {
			lcl_qbuffer = qbuffer[i]->addr;
			lcl_dbuffer = dbuffer[i]->addr;
		} else {
			lcl_qbuffer = RTE_PTR_ADD(qbuffer[0]->addr, q_offset);
			lcl_dbuffer = RTE_PTR_ADD(dbuffer[0]->addr, d_offset);
		}

		ret = cnxk_ml_io_dequantize_single(&info->output[i], lcl_qbuffer, lcl_dbuffer);
		if (ret < 0)
			return ret;

		if ((model->type == ML_CNXK_MODEL_TYPE_GLOW) ||
		    (model->subtype == ML_CNXK_MODEL_SUBTYPE_TVM_MRVL)) {
			q_offset += info->output[i].sz_q;
			d_offset += info->output[i].sz_d;
		}
	}

	return 0;
}

static __rte_always_inline void
queue_index_advance(uint64_t *index, uint64_t nb_desc)
{
	*index = (*index + 1) % nb_desc;
}

static __rte_always_inline uint64_t
queue_pending_count(uint64_t head, uint64_t tail, uint64_t nb_desc)
{
	return (nb_desc + head - tail) % nb_desc;
}

static __rte_always_inline uint64_t
queue_free_count(uint64_t head, uint64_t tail, uint64_t nb_desc)
{
	return nb_desc - queue_pending_count(head, tail, nb_desc) - 1;
}

__rte_hot uint16_t
cnxk_ml_enqueue_burst(struct rte_ml_dev *dev, uint16_t qp_id, struct rte_ml_op **ops,
		      uint16_t nb_ops)
{
	struct cnxk_ml_dev *cnxk_mldev;
	struct cnxk_ml_model *model;
	struct cnxk_ml_queue *queue;
	struct cnxk_ml_qp *qp;
	struct rte_ml_op *op;

	uint16_t layer_id = 0;
	uint16_t count;
	uint64_t head;

	cnxk_mldev = dev->data->dev_private;
	qp = dev->data->queue_pairs[qp_id];
	queue = &qp->queue;

	head = queue->head;
	nb_ops = PLT_MIN(nb_ops, queue_free_count(head, queue->tail, qp->nb_desc));
	count = 0;

	if (unlikely(nb_ops == 0))
		return 0;

enqueue_req:
	op = ops[count];
	model = cnxk_mldev->mldev->data->models[op->model_id];

	if (unlikely(!model->enqueue_single(cnxk_mldev, op, layer_id, qp, head)))
		goto jcmdq_full;

	queue_index_advance(&head, qp->nb_desc);
	count++;

	if (count < nb_ops)
		goto enqueue_req;

jcmdq_full:
	queue->head = head;
	qp->stats.enqueued_count += count;
	rte_wmb();

	return count;
}

__rte_hot uint16_t
cnxk_ml_dequeue_burst(struct rte_ml_dev *dev, uint16_t qp_id, struct rte_ml_op **ops,
		      uint16_t nb_ops)
{
	struct cnxk_ml_dev *cnxk_mldev;
	struct cnxk_ml_queue *queue;
	struct cnxk_ml_model *model;
	struct cnxk_ml_req *req;
	struct cnxk_ml_qp *qp;

	uint64_t status;
	uint16_t count;
	uint64_t tail;

	cnxk_mldev = dev->data->dev_private;
	qp = dev->data->queue_pairs[qp_id];
	queue = &qp->queue;

	tail = queue->tail;
	nb_ops = PLT_MIN(nb_ops, queue_pending_count(queue->head, tail, qp->nb_desc));
	count = 0;

	if (unlikely(nb_ops == 0))
		goto empty_or_active;

dequeue_req:

	req = &queue->reqs[tail];
	model = cnxk_mldev->mldev->data->models[req->op->model_id];

	status = cnxk_ml_get_poll_ptr(req);
	if (unlikely(status != ML_CNXK_POLL_JOB_FINISH)) {
		if (plt_tsc_cycles() < req->timeout)
			goto empty_or_active;
		else /* Timeout, set indication of driver error */
			model->set_error_code(req, ML_CNXK_ETYPE_DRIVER, 0);
	}

	model->result_update(cnxk_mldev, qp->id, req);

	ops[count] = req->op;
	queue_index_advance(&tail, qp->nb_desc);
	count++;

	if (count < nb_ops)
		goto dequeue_req;

empty_or_active:
	queue->tail = tail;

	return count;
}

struct rte_ml_dev_ops cnxk_ml_ops = {
	/* Device control ops */
	.dev_info_get = cnxk_ml_dev_info_get,
	.dev_configure = cnxk_ml_dev_configure,
	.dev_close = cnxk_ml_dev_close,
	.dev_start = cnxk_ml_dev_start,
	.dev_stop = cnxk_ml_dev_stop,
	.dev_dump = cnxk_ml_dev_dump,
	.dev_selftest = cnxk_ml_dev_selftest,

	/* Queue-pair handling ops */
	.dev_queue_pair_setup = cnxk_ml_dev_queue_pair_setup,
	.dev_queue_pair_release = cnxk_ml_dev_queue_pair_release,

	/* Stats ops */
	.dev_stats_get = cnxk_ml_dev_stats_get,
	.dev_stats_reset = cnxk_ml_dev_stats_reset,
	.dev_xstats_names_get = cnxk_ml_dev_xstats_names_get,
	.dev_xstats_by_name_get = cnxk_ml_dev_xstats_by_name_get,
	.dev_xstats_get = cnxk_ml_dev_xstats_get,
	.dev_xstats_reset = cnxk_ml_dev_xstats_reset,

	/* Model ops */
	.model_load = cnxk_ml_model_load,
	.model_unload = cnxk_ml_model_unload,
	.model_start = cnxk_ml_model_start,
	.model_stop = cnxk_ml_model_stop,
	.model_info_get = cnxk_ml_model_info_get,
	.model_params_update = cnxk_ml_model_params_update,

	/* I/O ops */
	.io_quantize = cnxk_ml_io_quantize,
	.io_dequantize = cnxk_ml_io_dequantize,
};