f-stack/dpdk/drivers/net/mlx5/linux/mlx5_verbs.c

1232 lines
33 KiB
C

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright 2020 Mellanox Technologies, Ltd
*/
#include <stddef.h>
#include <errno.h>
#include <string.h>
#include <stdint.h>
#include <unistd.h>
#include <inttypes.h>
#include <sys/queue.h>
#include "mlx5_autoconf.h"
#include <rte_mbuf.h>
#include <rte_malloc.h>
#include <ethdev_driver.h>
#include <rte_common.h>
#include <rte_eal_paging.h>
#include <mlx5_glue.h>
#include <mlx5_common.h>
#include <mlx5_common_mr.h>
#include <mlx5_verbs.h>
#include <mlx5_rx.h>
#include <mlx5_tx.h>
#include <mlx5_utils.h>
#include <mlx5_malloc.h>
/**
* Modify Rx WQ vlan stripping offload
*
* @param rxq
* Rx queue.
*
* @return 0 on success, non-0 otherwise
*/
static int
mlx5_rxq_obj_modify_wq_vlan_strip(struct mlx5_rxq_priv *rxq, int on)
{
uint16_t vlan_offloads =
(on ? IBV_WQ_FLAGS_CVLAN_STRIPPING : 0) |
0;
struct ibv_wq_attr mod;
mod = (struct ibv_wq_attr){
.attr_mask = IBV_WQ_ATTR_FLAGS,
.flags_mask = IBV_WQ_FLAGS_CVLAN_STRIPPING,
.flags = vlan_offloads,
};
return mlx5_glue->modify_wq(rxq->ctrl->obj->wq, &mod);
}
/**
* Modifies the attributes for the specified WQ.
*
* @param rxq
* Verbs Rx queue.
* @param type
* Type of change queue state.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
static int
mlx5_ibv_modify_wq(struct mlx5_rxq_priv *rxq, uint8_t type)
{
struct ibv_wq_attr mod = {
.attr_mask = IBV_WQ_ATTR_STATE,
.wq_state = (enum ibv_wq_state)type,
};
return mlx5_glue->modify_wq(rxq->ctrl->obj->wq, &mod);
}
/**
* Modify QP using Verbs API.
*
* @param txq_obj
* Verbs Tx queue object.
* @param type
* Type of change queue state.
* @param dev_port
* IB device port number.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
static int
mlx5_ibv_modify_qp(struct mlx5_txq_obj *obj, enum mlx5_txq_modify_type type,
uint8_t dev_port)
{
struct ibv_qp_attr mod = {
.qp_state = IBV_QPS_RESET,
.port_num = dev_port,
};
int ret;
if (type != MLX5_TXQ_MOD_RST2RDY) {
ret = mlx5_glue->modify_qp(obj->qp, &mod, IBV_QP_STATE);
if (ret) {
DRV_LOG(ERR, "Cannot change Tx QP state to RESET %s",
strerror(errno));
rte_errno = errno;
return ret;
}
if (type == MLX5_TXQ_MOD_RDY2RST)
return 0;
}
mod.qp_state = IBV_QPS_INIT;
ret = mlx5_glue->modify_qp(obj->qp, &mod, IBV_QP_STATE | IBV_QP_PORT);
if (ret) {
DRV_LOG(ERR, "Cannot change Tx QP state to INIT %s",
strerror(errno));
rte_errno = errno;
return ret;
}
mod.qp_state = IBV_QPS_RTR;
ret = mlx5_glue->modify_qp(obj->qp, &mod, IBV_QP_STATE);
if (ret) {
DRV_LOG(ERR, "Cannot change Tx QP state to RTR %s",
strerror(errno));
rte_errno = errno;
return ret;
}
mod.qp_state = IBV_QPS_RTS;
ret = mlx5_glue->modify_qp(obj->qp, &mod, IBV_QP_STATE);
if (ret) {
DRV_LOG(ERR, "Cannot change Tx QP state to RTS %s",
strerror(errno));
rte_errno = errno;
return ret;
}
return 0;
}
/**
* Create a CQ Verbs object.
*
* @param rxq
* Pointer to Rx queue.
*
* @return
* The Verbs CQ object initialized, NULL otherwise and rte_errno is set.
*/
static struct ibv_cq *
mlx5_rxq_ibv_cq_create(struct mlx5_rxq_priv *rxq)
{
struct mlx5_priv *priv = rxq->priv;
struct mlx5_rxq_ctrl *rxq_ctrl = rxq->ctrl;
struct mlx5_rxq_data *rxq_data = &rxq_ctrl->rxq;
struct mlx5_rxq_obj *rxq_obj = rxq_ctrl->obj;
unsigned int cqe_n = mlx5_rxq_cqe_num(rxq_data);
struct {
struct ibv_cq_init_attr_ex ibv;
struct mlx5dv_cq_init_attr mlx5;
} cq_attr;
cq_attr.ibv = (struct ibv_cq_init_attr_ex){
.cqe = cqe_n,
.channel = rxq_obj->ibv_channel,
.comp_mask = 0,
};
cq_attr.mlx5 = (struct mlx5dv_cq_init_attr){
.comp_mask = 0,
};
if (priv->config.cqe_comp && !rxq_data->hw_timestamp) {
cq_attr.mlx5.comp_mask |=
MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE;
rxq_data->byte_mask = UINT32_MAX;
#ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
if (mlx5_rxq_mprq_enabled(rxq_data)) {
cq_attr.mlx5.cqe_comp_res_format =
MLX5DV_CQE_RES_FORMAT_CSUM_STRIDX;
rxq_data->mcqe_format =
MLX5_CQE_RESP_FORMAT_CSUM_STRIDX;
} else {
cq_attr.mlx5.cqe_comp_res_format =
MLX5DV_CQE_RES_FORMAT_HASH;
rxq_data->mcqe_format =
MLX5_CQE_RESP_FORMAT_HASH;
}
#else
cq_attr.mlx5.cqe_comp_res_format = MLX5DV_CQE_RES_FORMAT_HASH;
rxq_data->mcqe_format = MLX5_CQE_RESP_FORMAT_HASH;
#endif
/*
* For vectorized Rx, it must not be doubled in order to
* make cq_ci and rq_ci aligned.
*/
if (mlx5_rxq_check_vec_support(rxq_data) < 0)
cq_attr.ibv.cqe *= 2;
} else if (priv->config.cqe_comp && rxq_data->hw_timestamp) {
DRV_LOG(DEBUG,
"Port %u Rx CQE compression is disabled for HW"
" timestamp.",
priv->dev_data->port_id);
}
#ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD
if (RTE_CACHE_LINE_SIZE == 128) {
cq_attr.mlx5.comp_mask |= MLX5DV_CQ_INIT_ATTR_MASK_FLAGS;
cq_attr.mlx5.flags |= MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD;
}
#endif
return mlx5_glue->cq_ex_to_cq(mlx5_glue->dv_create_cq
(priv->sh->cdev->ctx,
&cq_attr.ibv,
&cq_attr.mlx5));
}
/**
* Create a WQ Verbs object.
*
* @param rxq
* Pointer to Rx queue.
*
* @return
* The Verbs WQ object initialized, NULL otherwise and rte_errno is set.
*/
static struct ibv_wq *
mlx5_rxq_ibv_wq_create(struct mlx5_rxq_priv *rxq)
{
struct mlx5_priv *priv = rxq->priv;
struct mlx5_rxq_ctrl *rxq_ctrl = rxq->ctrl;
struct mlx5_rxq_data *rxq_data = &rxq_ctrl->rxq;
struct mlx5_rxq_obj *rxq_obj = rxq_ctrl->obj;
unsigned int wqe_n = 1 << rxq_data->elts_n;
struct {
struct ibv_wq_init_attr ibv;
#ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
struct mlx5dv_wq_init_attr mlx5;
#endif
} wq_attr;
wq_attr.ibv = (struct ibv_wq_init_attr){
.wq_context = NULL, /* Could be useful in the future. */
.wq_type = IBV_WQT_RQ,
/* Max number of outstanding WRs. */
.max_wr = wqe_n >> rxq_data->sges_n,
/* Max number of scatter/gather elements in a WR. */
.max_sge = 1 << rxq_data->sges_n,
.pd = priv->sh->cdev->pd,
.cq = rxq_obj->ibv_cq,
.comp_mask = IBV_WQ_FLAGS_CVLAN_STRIPPING | 0,
.create_flags = (rxq_data->vlan_strip ?
IBV_WQ_FLAGS_CVLAN_STRIPPING : 0),
};
/* By default, FCS (CRC) is stripped by hardware. */
if (rxq_data->crc_present) {
wq_attr.ibv.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS;
wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
}
if (priv->config.hw_padding) {
#if defined(HAVE_IBV_WQ_FLAG_RX_END_PADDING)
wq_attr.ibv.create_flags |= IBV_WQ_FLAG_RX_END_PADDING;
wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
#elif defined(HAVE_IBV_WQ_FLAGS_PCI_WRITE_END_PADDING)
wq_attr.ibv.create_flags |= IBV_WQ_FLAGS_PCI_WRITE_END_PADDING;
wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
#endif
}
#ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
wq_attr.mlx5 = (struct mlx5dv_wq_init_attr){
.comp_mask = 0,
};
if (mlx5_rxq_mprq_enabled(rxq_data)) {
struct mlx5dv_striding_rq_init_attr *mprq_attr =
&wq_attr.mlx5.striding_rq_attrs;
wq_attr.mlx5.comp_mask |= MLX5DV_WQ_INIT_ATTR_MASK_STRIDING_RQ;
*mprq_attr = (struct mlx5dv_striding_rq_init_attr){
.single_stride_log_num_of_bytes = rxq_data->log_strd_sz,
.single_wqe_log_num_of_strides = rxq_data->log_strd_num,
.two_byte_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT,
};
}
rxq_obj->wq = mlx5_glue->dv_create_wq(priv->sh->cdev->ctx, &wq_attr.ibv,
&wq_attr.mlx5);
#else
rxq_obj->wq = mlx5_glue->create_wq(priv->sh->cdev->ctx, &wq_attr.ibv);
#endif
if (rxq_obj->wq) {
/*
* Make sure number of WRs*SGEs match expectations since a queue
* cannot allocate more than "desc" buffers.
*/
if (wq_attr.ibv.max_wr != (wqe_n >> rxq_data->sges_n) ||
wq_attr.ibv.max_sge != (1u << rxq_data->sges_n)) {
DRV_LOG(ERR,
"Port %u Rx queue %u requested %u*%u but got"
" %u*%u WRs*SGEs.",
priv->dev_data->port_id, rxq->idx,
wqe_n >> rxq_data->sges_n,
(1 << rxq_data->sges_n),
wq_attr.ibv.max_wr, wq_attr.ibv.max_sge);
claim_zero(mlx5_glue->destroy_wq(rxq_obj->wq));
rxq_obj->wq = NULL;
rte_errno = EINVAL;
}
}
return rxq_obj->wq;
}
/**
* Create the Rx queue Verbs object.
*
* @param rxq
* Pointer to Rx queue.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
static int
mlx5_rxq_ibv_obj_new(struct mlx5_rxq_priv *rxq)
{
uint16_t idx = rxq->idx;
struct mlx5_priv *priv = rxq->priv;
uint16_t port_id = priv->dev_data->port_id;
struct mlx5_rxq_ctrl *rxq_ctrl = rxq->ctrl;
struct mlx5_rxq_data *rxq_data = &rxq_ctrl->rxq;
struct mlx5_rxq_obj *tmpl = rxq_ctrl->obj;
struct mlx5dv_cq cq_info;
struct mlx5dv_rwq rwq;
int ret = 0;
struct mlx5dv_obj obj;
MLX5_ASSERT(rxq_data);
MLX5_ASSERT(tmpl);
tmpl->rxq_ctrl = rxq_ctrl;
if (rxq_ctrl->irq) {
tmpl->ibv_channel =
mlx5_glue->create_comp_channel(priv->sh->cdev->ctx);
if (!tmpl->ibv_channel) {
DRV_LOG(ERR, "Port %u: comp channel creation failure.",
port_id);
rte_errno = ENOMEM;
goto error;
}
tmpl->fd = ((struct ibv_comp_channel *)(tmpl->ibv_channel))->fd;
}
/* Create CQ using Verbs API. */
tmpl->ibv_cq = mlx5_rxq_ibv_cq_create(rxq);
if (!tmpl->ibv_cq) {
DRV_LOG(ERR, "Port %u Rx queue %u CQ creation failure.",
port_id, idx);
rte_errno = ENOMEM;
goto error;
}
obj.cq.in = tmpl->ibv_cq;
obj.cq.out = &cq_info;
ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ);
if (ret) {
rte_errno = ret;
goto error;
}
if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
DRV_LOG(ERR,
"Port %u wrong MLX5_CQE_SIZE environment "
"variable value: it should be set to %u.",
port_id, RTE_CACHE_LINE_SIZE);
rte_errno = EINVAL;
goto error;
}
/* Fill the rings. */
rxq_data->cqe_n = log2above(cq_info.cqe_cnt);
rxq_data->cq_db = cq_info.dbrec;
rxq_data->cqes = (volatile struct mlx5_cqe (*)[])(uintptr_t)cq_info.buf;
rxq_data->uar_data.db = RTE_PTR_ADD(cq_info.cq_uar, MLX5_CQ_DOORBELL);
#ifndef RTE_ARCH_64
rxq_data->uar_data.sl_p = &priv->sh->uar_lock_cq;
#endif
rxq_data->cqn = cq_info.cqn;
/* Create WQ (RQ) using Verbs API. */
tmpl->wq = mlx5_rxq_ibv_wq_create(rxq);
if (!tmpl->wq) {
DRV_LOG(ERR, "Port %u Rx queue %u WQ creation failure.",
port_id, idx);
rte_errno = ENOMEM;
goto error;
}
/* Change queue state to ready. */
ret = mlx5_ibv_modify_wq(rxq, IBV_WQS_RDY);
if (ret) {
DRV_LOG(ERR,
"Port %u Rx queue %u WQ state to IBV_WQS_RDY failed.",
port_id, idx);
rte_errno = ret;
goto error;
}
obj.rwq.in = tmpl->wq;
obj.rwq.out = &rwq;
ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_RWQ);
if (ret) {
rte_errno = ret;
goto error;
}
rxq_data->wqes = rwq.buf;
rxq_data->rq_db = rwq.dbrec;
rxq_data->cq_arm_sn = 0;
mlx5_rxq_initialize(rxq_data);
rxq_data->cq_ci = 0;
priv->dev_data->rx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED;
rxq_ctrl->wqn = ((struct ibv_wq *)(tmpl->wq))->wq_num;
return 0;
error:
ret = rte_errno; /* Save rte_errno before cleanup. */
if (tmpl->wq)
claim_zero(mlx5_glue->destroy_wq(tmpl->wq));
if (tmpl->ibv_cq)
claim_zero(mlx5_glue->destroy_cq(tmpl->ibv_cq));
if (tmpl->ibv_channel)
claim_zero(mlx5_glue->destroy_comp_channel(tmpl->ibv_channel));
rte_errno = ret; /* Restore rte_errno. */
return -rte_errno;
}
/**
* Release an Rx verbs queue object.
*
* @param rxq
* Pointer to Rx queue.
*/
static void
mlx5_rxq_ibv_obj_release(struct mlx5_rxq_priv *rxq)
{
struct mlx5_rxq_obj *rxq_obj = rxq->ctrl->obj;
if (rxq_obj == NULL || rxq_obj->wq == NULL)
return;
claim_zero(mlx5_glue->destroy_wq(rxq_obj->wq));
rxq_obj->wq = NULL;
MLX5_ASSERT(rxq_obj->ibv_cq);
claim_zero(mlx5_glue->destroy_cq(rxq_obj->ibv_cq));
if (rxq_obj->ibv_channel)
claim_zero(mlx5_glue->destroy_comp_channel
(rxq_obj->ibv_channel));
rxq->ctrl->started = false;
}
/**
* Get event for an Rx verbs queue object.
*
* @param rxq_obj
* Verbs Rx queue object.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
static int
mlx5_rx_ibv_get_event(struct mlx5_rxq_obj *rxq_obj)
{
struct ibv_cq *ev_cq;
void *ev_ctx;
int ret = mlx5_glue->get_cq_event(rxq_obj->ibv_channel,
&ev_cq, &ev_ctx);
if (ret < 0 || ev_cq != rxq_obj->ibv_cq)
goto exit;
mlx5_glue->ack_cq_events(rxq_obj->ibv_cq, 1);
return 0;
exit:
if (ret < 0)
rte_errno = errno;
else
rte_errno = EINVAL;
return -rte_errno;
}
/**
* Creates a receive work queue as a filed of indirection table.
*
* @param dev
* Pointer to Ethernet device.
* @param log_n
* Log of number of queues in the array.
* @param ind_tbl
* Verbs indirection table object.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
static int
mlx5_ibv_ind_table_new(struct rte_eth_dev *dev, const unsigned int log_n,
struct mlx5_ind_table_obj *ind_tbl)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct ibv_wq *wq[1 << log_n];
unsigned int i, j;
MLX5_ASSERT(ind_tbl);
for (i = 0; i != ind_tbl->queues_n; ++i) {
struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev,
ind_tbl->queues[i]);
wq[i] = rxq->ctrl->obj->wq;
}
MLX5_ASSERT(i > 0);
/* Finalise indirection table. */
for (j = 0; i != (unsigned int)(1 << log_n); ++j, ++i)
wq[i] = wq[j];
ind_tbl->ind_table = mlx5_glue->create_rwq_ind_table
(priv->sh->cdev->ctx,
&(struct ibv_rwq_ind_table_init_attr){
.log_ind_tbl_size = log_n,
.ind_tbl = wq,
.comp_mask = 0,
});
if (!ind_tbl->ind_table) {
rte_errno = errno;
return -rte_errno;
}
return 0;
}
/**
* Destroys the specified Indirection Table.
*
* @param ind_table
* Indirection table to release.
*/
static void
mlx5_ibv_ind_table_destroy(struct mlx5_ind_table_obj *ind_tbl)
{
claim_zero(mlx5_glue->destroy_rwq_ind_table(ind_tbl->ind_table));
}
/**
* Create an Rx Hash queue.
*
* @param dev
* Pointer to Ethernet device.
* @param hrxq
* Pointer to Rx Hash queue.
* @param tunnel
* Tunnel type.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
static int
mlx5_ibv_hrxq_new(struct rte_eth_dev *dev, struct mlx5_hrxq *hrxq,
int tunnel __rte_unused)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct ibv_qp *qp = NULL;
struct mlx5_ind_table_obj *ind_tbl = hrxq->ind_table;
const uint8_t *rss_key = hrxq->rss_key;
uint64_t hash_fields = hrxq->hash_fields;
int err;
#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
struct mlx5dv_qp_init_attr qp_init_attr;
memset(&qp_init_attr, 0, sizeof(qp_init_attr));
if (tunnel) {
qp_init_attr.comp_mask =
MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS;
qp_init_attr.create_flags = MLX5DV_QP_CREATE_TUNNEL_OFFLOADS;
}
#ifdef HAVE_IBV_FLOW_DV_SUPPORT
if (dev->data->dev_conf.lpbk_mode) {
/* Allow packet sent from NIC loop back w/o source MAC check. */
qp_init_attr.comp_mask |=
MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS;
qp_init_attr.create_flags |=
MLX5DV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_UC;
}
#endif
qp = mlx5_glue->dv_create_qp
(priv->sh->cdev->ctx,
&(struct ibv_qp_init_attr_ex){
.qp_type = IBV_QPT_RAW_PACKET,
.comp_mask =
IBV_QP_INIT_ATTR_PD |
IBV_QP_INIT_ATTR_IND_TABLE |
IBV_QP_INIT_ATTR_RX_HASH,
.rx_hash_conf = (struct ibv_rx_hash_conf){
.rx_hash_function =
IBV_RX_HASH_FUNC_TOEPLITZ,
.rx_hash_key_len = hrxq->rss_key_len,
.rx_hash_key =
(void *)(uintptr_t)rss_key,
.rx_hash_fields_mask = hash_fields,
},
.rwq_ind_tbl = ind_tbl->ind_table,
.pd = priv->sh->cdev->pd,
},
&qp_init_attr);
#else
qp = mlx5_glue->create_qp_ex
(priv->sh->cdev->ctx,
&(struct ibv_qp_init_attr_ex){
.qp_type = IBV_QPT_RAW_PACKET,
.comp_mask =
IBV_QP_INIT_ATTR_PD |
IBV_QP_INIT_ATTR_IND_TABLE |
IBV_QP_INIT_ATTR_RX_HASH,
.rx_hash_conf = (struct ibv_rx_hash_conf){
.rx_hash_function =
IBV_RX_HASH_FUNC_TOEPLITZ,
.rx_hash_key_len = hrxq->rss_key_len,
.rx_hash_key =
(void *)(uintptr_t)rss_key,
.rx_hash_fields_mask = hash_fields,
},
.rwq_ind_tbl = ind_tbl->ind_table,
.pd = priv->sh->cdev->pd,
});
#endif
if (!qp) {
rte_errno = errno;
goto error;
}
hrxq->qp = qp;
#ifdef HAVE_IBV_FLOW_DV_SUPPORT
hrxq->action = mlx5_glue->dv_create_flow_action_dest_ibv_qp(hrxq->qp);
if (!hrxq->action) {
rte_errno = errno;
goto error;
}
#endif
return 0;
error:
err = rte_errno; /* Save rte_errno before cleanup. */
if (qp)
claim_zero(mlx5_glue->destroy_qp(qp));
rte_errno = err; /* Restore rte_errno. */
return -rte_errno;
}
/**
* Destroy a Verbs queue pair.
*
* @param hrxq
* Hash Rx queue to release its qp.
*/
static void
mlx5_ibv_qp_destroy(struct mlx5_hrxq *hrxq)
{
claim_zero(mlx5_glue->destroy_qp(hrxq->qp));
}
/**
* Release a drop Rx queue Verbs object.
*
* @param dev
* Pointer to Ethernet device.
*/
static void
mlx5_rxq_ibv_obj_drop_release(struct rte_eth_dev *dev)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_rxq_priv *rxq = priv->drop_queue.rxq;
struct mlx5_rxq_obj *rxq_obj;
if (rxq == NULL)
return;
if (rxq->ctrl == NULL)
goto free_priv;
rxq_obj = rxq->ctrl->obj;
if (rxq_obj == NULL)
goto free_ctrl;
if (rxq_obj->wq)
claim_zero(mlx5_glue->destroy_wq(rxq_obj->wq));
if (rxq_obj->ibv_cq)
claim_zero(mlx5_glue->destroy_cq(rxq_obj->ibv_cq));
mlx5_free(rxq_obj);
free_ctrl:
mlx5_free(rxq->ctrl);
free_priv:
mlx5_free(rxq);
priv->drop_queue.rxq = NULL;
}
/**
* Create a drop Rx queue Verbs object.
*
* @param dev
* Pointer to Ethernet device.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
static int
mlx5_rxq_ibv_obj_drop_create(struct rte_eth_dev *dev)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct ibv_context *ctx = priv->sh->cdev->ctx;
struct mlx5_rxq_priv *rxq = priv->drop_queue.rxq;
struct mlx5_rxq_ctrl *rxq_ctrl = NULL;
struct mlx5_rxq_obj *rxq_obj = NULL;
if (rxq != NULL)
return 0;
rxq = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*rxq), 0, SOCKET_ID_ANY);
if (rxq == NULL) {
DRV_LOG(DEBUG, "Port %u cannot allocate drop Rx queue memory.",
dev->data->port_id);
rte_errno = ENOMEM;
return -rte_errno;
}
priv->drop_queue.rxq = rxq;
rxq_ctrl = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*rxq_ctrl), 0,
SOCKET_ID_ANY);
if (rxq_ctrl == NULL) {
DRV_LOG(DEBUG, "Port %u cannot allocate drop Rx queue control memory.",
dev->data->port_id);
rte_errno = ENOMEM;
goto error;
}
rxq->ctrl = rxq_ctrl;
rxq_obj = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*rxq_obj), 0,
SOCKET_ID_ANY);
if (rxq_obj == NULL) {
DRV_LOG(DEBUG, "Port %u cannot allocate drop Rx queue memory.",
dev->data->port_id);
rte_errno = ENOMEM;
goto error;
}
rxq_ctrl->obj = rxq_obj;
rxq_obj->ibv_cq = mlx5_glue->create_cq(ctx, 1, NULL, NULL, 0);
if (!rxq_obj->ibv_cq) {
DRV_LOG(DEBUG, "Port %u cannot allocate CQ for drop queue.",
dev->data->port_id);
rte_errno = errno;
goto error;
}
rxq_obj->wq = mlx5_glue->create_wq(ctx, &(struct ibv_wq_init_attr){
.wq_type = IBV_WQT_RQ,
.max_wr = 1,
.max_sge = 1,
.pd = priv->sh->cdev->pd,
.cq = rxq_obj->ibv_cq,
});
if (!rxq_obj->wq) {
DRV_LOG(DEBUG, "Port %u cannot allocate WQ for drop queue.",
dev->data->port_id);
rte_errno = errno;
goto error;
}
return 0;
error:
mlx5_rxq_ibv_obj_drop_release(dev);
return -rte_errno;
}
/**
* Create a Verbs drop action for Rx Hash queue.
*
* @param dev
* Pointer to Ethernet device.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
static int
mlx5_ibv_drop_action_create(struct rte_eth_dev *dev)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_hrxq *hrxq = priv->drop_queue.hrxq;
struct ibv_rwq_ind_table *ind_tbl = NULL;
struct mlx5_rxq_obj *rxq;
int ret;
MLX5_ASSERT(hrxq && hrxq->ind_table);
ret = mlx5_rxq_ibv_obj_drop_create(dev);
if (ret < 0)
goto error;
rxq = priv->drop_queue.rxq->ctrl->obj;
ind_tbl = mlx5_glue->create_rwq_ind_table
(priv->sh->cdev->ctx,
&(struct ibv_rwq_ind_table_init_attr){
.log_ind_tbl_size = 0,
.ind_tbl = (struct ibv_wq **)&rxq->wq,
.comp_mask = 0,
});
if (!ind_tbl) {
DRV_LOG(DEBUG, "Port %u"
" cannot allocate indirection table for drop queue.",
dev->data->port_id);
rte_errno = errno;
goto error;
}
hrxq->qp = mlx5_glue->create_qp_ex(priv->sh->cdev->ctx,
&(struct ibv_qp_init_attr_ex){
.qp_type = IBV_QPT_RAW_PACKET,
.comp_mask = IBV_QP_INIT_ATTR_PD |
IBV_QP_INIT_ATTR_IND_TABLE |
IBV_QP_INIT_ATTR_RX_HASH,
.rx_hash_conf = (struct ibv_rx_hash_conf){
.rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
.rx_hash_key_len = MLX5_RSS_HASH_KEY_LEN,
.rx_hash_key = rss_hash_default_key,
.rx_hash_fields_mask = 0,
},
.rwq_ind_tbl = ind_tbl,
.pd = priv->sh->cdev->pd
});
if (!hrxq->qp) {
DRV_LOG(DEBUG, "Port %u cannot allocate QP for drop queue.",
dev->data->port_id);
rte_errno = errno;
goto error;
}
#ifdef HAVE_IBV_FLOW_DV_SUPPORT
hrxq->action = mlx5_glue->dv_create_flow_action_dest_ibv_qp(hrxq->qp);
if (!hrxq->action) {
rte_errno = errno;
goto error;
}
#endif
hrxq->ind_table->ind_table = ind_tbl;
return 0;
error:
if (hrxq->qp)
claim_zero(mlx5_glue->destroy_qp(hrxq->qp));
if (ind_tbl)
claim_zero(mlx5_glue->destroy_rwq_ind_table(ind_tbl));
if (priv->drop_queue.rxq)
mlx5_rxq_ibv_obj_drop_release(dev);
return -rte_errno;
}
/**
* Release a drop hash Rx queue.
*
* @param dev
* Pointer to Ethernet device.
*/
static void
mlx5_ibv_drop_action_destroy(struct rte_eth_dev *dev)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_hrxq *hrxq = priv->drop_queue.hrxq;
struct ibv_rwq_ind_table *ind_tbl = hrxq->ind_table->ind_table;
#ifdef HAVE_IBV_FLOW_DV_SUPPORT
claim_zero(mlx5_glue->destroy_flow_action(hrxq->action));
#endif
claim_zero(mlx5_glue->destroy_qp(hrxq->qp));
claim_zero(mlx5_glue->destroy_rwq_ind_table(ind_tbl));
mlx5_rxq_ibv_obj_drop_release(dev);
}
/**
* Create a QP Verbs object.
*
* @param dev
* Pointer to Ethernet device.
* @param idx
* Queue index in DPDK Tx queue array.
*
* @return
* The QP Verbs object, NULL otherwise and rte_errno is set.
*/
static struct ibv_qp *
mlx5_txq_ibv_qp_create(struct rte_eth_dev *dev, uint16_t idx)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_txq_data *txq_data = (*priv->txqs)[idx];
struct mlx5_txq_ctrl *txq_ctrl =
container_of(txq_data, struct mlx5_txq_ctrl, txq);
struct ibv_qp *qp_obj = NULL;
struct ibv_qp_init_attr_ex qp_attr = { 0 };
const int desc = 1 << txq_data->elts_n;
MLX5_ASSERT(txq_ctrl->obj->cq);
/* CQ to be associated with the send queue. */
qp_attr.send_cq = txq_ctrl->obj->cq;
/* CQ to be associated with the receive queue. */
qp_attr.recv_cq = txq_ctrl->obj->cq;
/* Max number of outstanding WRs. */
qp_attr.cap.max_send_wr = ((priv->sh->device_attr.max_qp_wr < desc) ?
priv->sh->device_attr.max_qp_wr : desc);
/*
* Max number of scatter/gather elements in a WR, must be 1 to prevent
* libmlx5 from trying to affect must be 1 to prevent libmlx5 from
* trying to affect too much memory. TX gather is not impacted by the
* device_attr.max_sge limit and will still work properly.
*/
qp_attr.cap.max_send_sge = 1;
qp_attr.qp_type = IBV_QPT_RAW_PACKET,
/* Do *NOT* enable this, completions events are managed per Tx burst. */
qp_attr.sq_sig_all = 0;
qp_attr.pd = priv->sh->cdev->pd;
qp_attr.comp_mask = IBV_QP_INIT_ATTR_PD;
if (txq_data->inlen_send)
qp_attr.cap.max_inline_data = txq_ctrl->max_inline_data;
if (txq_data->tso_en) {
qp_attr.max_tso_header = txq_ctrl->max_tso_header;
qp_attr.comp_mask |= IBV_QP_INIT_ATTR_MAX_TSO_HEADER;
}
qp_obj = mlx5_glue->create_qp_ex(priv->sh->cdev->ctx, &qp_attr);
if (qp_obj == NULL) {
DRV_LOG(ERR, "Port %u Tx queue %u QP creation failure.",
dev->data->port_id, idx);
rte_errno = errno;
}
return qp_obj;
}
/**
* Initialize Tx UAR registers for primary process.
*
* @param txq_ctrl
* Pointer to Tx queue control structure.
* @param bf_reg
* BlueFlame register from Verbs UAR.
*/
static void
mlx5_txq_ibv_uar_init(struct mlx5_txq_ctrl *txq_ctrl, void *bf_reg)
{
struct mlx5_priv *priv = txq_ctrl->priv;
struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv));
const size_t page_size = rte_mem_page_size();
struct mlx5_txq_data *txq = &txq_ctrl->txq;
off_t uar_mmap_offset = txq_ctrl->uar_mmap_offset;
#ifndef RTE_ARCH_64
unsigned int lock_idx;
#endif
MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
MLX5_ASSERT(ppriv);
if (page_size == (size_t)-1) {
DRV_LOG(ERR, "Failed to get mem page size");
rte_errno = ENOMEM;
}
txq->db_heu = priv->sh->cdev->config.dbnc == MLX5_TXDB_HEURISTIC;
txq->db_nc = mlx5_db_map_type_get(uar_mmap_offset, page_size);
ppriv->uar_table[txq->idx].db = bf_reg;
#ifndef RTE_ARCH_64
/* Assign an UAR lock according to UAR page number. */
lock_idx = (uar_mmap_offset / page_size) & MLX5_UAR_PAGE_NUM_MASK;
ppriv->uar_table[txq->idx].sl_p = &priv->sh->uar_lock[lock_idx];
#endif
}
/**
* Create the Tx queue Verbs object.
*
* @param dev
* Pointer to Ethernet device.
* @param idx
* Queue index in DPDK Tx queue array.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
int
mlx5_txq_ibv_obj_new(struct rte_eth_dev *dev, uint16_t idx)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_txq_data *txq_data = (*priv->txqs)[idx];
struct mlx5_txq_ctrl *txq_ctrl =
container_of(txq_data, struct mlx5_txq_ctrl, txq);
struct mlx5_txq_obj *txq_obj = txq_ctrl->obj;
unsigned int cqe_n;
struct mlx5dv_qp qp;
struct mlx5dv_cq cq_info;
struct mlx5dv_obj obj;
const int desc = 1 << txq_data->elts_n;
int ret = 0;
MLX5_ASSERT(txq_data);
MLX5_ASSERT(txq_obj);
txq_obj->txq_ctrl = txq_ctrl;
if (mlx5_getenv_int("MLX5_ENABLE_CQE_COMPRESSION")) {
DRV_LOG(ERR, "Port %u MLX5_ENABLE_CQE_COMPRESSION "
"must never be set.", dev->data->port_id);
rte_errno = EINVAL;
return -rte_errno;
}
cqe_n = desc / MLX5_TX_COMP_THRESH +
1 + MLX5_TX_COMP_THRESH_INLINE_DIV;
txq_obj->cq = mlx5_glue->create_cq(priv->sh->cdev->ctx, cqe_n,
NULL, NULL, 0);
if (txq_obj->cq == NULL) {
DRV_LOG(ERR, "Port %u Tx queue %u CQ creation failure.",
dev->data->port_id, idx);
rte_errno = errno;
goto error;
}
txq_obj->qp = mlx5_txq_ibv_qp_create(dev, idx);
if (txq_obj->qp == NULL) {
rte_errno = errno;
goto error;
}
ret = mlx5_ibv_modify_qp(txq_obj, MLX5_TXQ_MOD_RST2RDY,
(uint8_t)priv->dev_port);
if (ret) {
DRV_LOG(ERR, "Port %u Tx queue %u QP state modifying failed.",
dev->data->port_id, idx);
rte_errno = errno;
goto error;
}
qp.comp_mask = MLX5DV_QP_MASK_UAR_MMAP_OFFSET;
#ifdef HAVE_IBV_FLOW_DV_SUPPORT
/* If using DevX, need additional mask to read tisn value. */
if (priv->sh->devx && !priv->sh->tdn)
qp.comp_mask |= MLX5DV_QP_MASK_RAW_QP_HANDLES;
#endif
obj.cq.in = txq_obj->cq;
obj.cq.out = &cq_info;
obj.qp.in = txq_obj->qp;
obj.qp.out = &qp;
ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_QP);
if (ret != 0) {
rte_errno = errno;
goto error;
}
if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
DRV_LOG(ERR,
"Port %u wrong MLX5_CQE_SIZE environment variable"
" value: it should be set to %u.",
dev->data->port_id, RTE_CACHE_LINE_SIZE);
rte_errno = EINVAL;
goto error;
}
txq_data->cqe_n = log2above(cq_info.cqe_cnt);
txq_data->cqe_s = 1 << txq_data->cqe_n;
txq_data->cqe_m = txq_data->cqe_s - 1;
txq_data->qp_num_8s = ((struct ibv_qp *)txq_obj->qp)->qp_num << 8;
txq_data->wqes = qp.sq.buf;
txq_data->wqe_n = log2above(qp.sq.wqe_cnt);
txq_data->wqe_s = 1 << txq_data->wqe_n;
txq_data->wqe_m = txq_data->wqe_s - 1;
txq_data->wqes_end = txq_data->wqes + txq_data->wqe_s;
txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR];
txq_data->cq_db = cq_info.dbrec;
txq_data->cqes = (volatile struct mlx5_cqe *)cq_info.buf;
txq_data->cq_ci = 0;
txq_data->cq_pi = 0;
txq_data->wqe_ci = 0;
txq_data->wqe_pi = 0;
txq_data->wqe_comp = 0;
txq_data->wqe_thres = txq_data->wqe_s / MLX5_TX_COMP_THRESH_INLINE_DIV;
#ifdef HAVE_IBV_FLOW_DV_SUPPORT
/*
* If using DevX need to query and store TIS transport domain value.
* This is done once per port.
* Will use this value on Rx, when creating matching TIR.
*/
if (priv->sh->devx && !priv->sh->tdn) {
ret = mlx5_devx_cmd_qp_query_tis_td(txq_obj->qp, qp.tisn,
&priv->sh->tdn);
if (ret) {
DRV_LOG(ERR, "Fail to query port %u Tx queue %u QP TIS "
"transport domain.", dev->data->port_id, idx);
rte_errno = EINVAL;
goto error;
} else {
DRV_LOG(DEBUG, "Port %u Tx queue %u TIS number %d "
"transport domain %d.", dev->data->port_id,
idx, qp.tisn, priv->sh->tdn);
}
}
#endif
if (qp.comp_mask & MLX5DV_QP_MASK_UAR_MMAP_OFFSET) {
txq_ctrl->uar_mmap_offset = qp.uar_mmap_offset;
DRV_LOG(DEBUG, "Port %u: uar_mmap_offset 0x%" PRIx64 ".",
dev->data->port_id, txq_ctrl->uar_mmap_offset);
} else {
DRV_LOG(ERR,
"Port %u failed to retrieve UAR info, invalid libmlx5.so",
dev->data->port_id);
rte_errno = EINVAL;
goto error;
}
mlx5_txq_ibv_uar_init(txq_ctrl, qp.bf.reg);
dev->data->tx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED;
return 0;
error:
ret = rte_errno; /* Save rte_errno before cleanup. */
if (txq_obj->cq)
claim_zero(mlx5_glue->destroy_cq(txq_obj->cq));
if (txq_obj->qp)
claim_zero(mlx5_glue->destroy_qp(txq_obj->qp));
rte_errno = ret; /* Restore rte_errno. */
return -rte_errno;
}
/*
* Create the dummy QP with minimal resources for loopback.
*
* @param dev
* Pointer to Ethernet device.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
int
mlx5_rxq_ibv_obj_dummy_lb_create(struct rte_eth_dev *dev)
{
#if defined(HAVE_IBV_DEVICE_TUNNEL_SUPPORT) && defined(HAVE_IBV_FLOW_DV_SUPPORT)
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_dev_ctx_shared *sh = priv->sh;
struct ibv_context *ctx = sh->cdev->ctx;
struct mlx5dv_qp_init_attr qp_init_attr = {0};
struct {
struct ibv_cq_init_attr_ex ibv;
struct mlx5dv_cq_init_attr mlx5;
} cq_attr = {{0}};
if (dev->data->dev_conf.lpbk_mode) {
/* Allow packet sent from NIC loop back w/o source MAC check. */
qp_init_attr.comp_mask |=
MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS;
qp_init_attr.create_flags |=
MLX5DV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_UC;
} else {
return 0;
}
/* Only need to check refcnt, 0 after "sh" is allocated. */
if (!!(__atomic_fetch_add(&sh->self_lb.refcnt, 1, __ATOMIC_RELAXED))) {
MLX5_ASSERT(sh->self_lb.ibv_cq && sh->self_lb.qp);
priv->lb_used = 1;
return 0;
}
cq_attr.ibv = (struct ibv_cq_init_attr_ex){
.cqe = 1,
.channel = NULL,
.comp_mask = 0,
};
cq_attr.mlx5 = (struct mlx5dv_cq_init_attr){
.comp_mask = 0,
};
/* Only CQ is needed, no WQ(RQ) is required in this case. */
sh->self_lb.ibv_cq = mlx5_glue->cq_ex_to_cq(mlx5_glue->dv_create_cq(ctx,
&cq_attr.ibv,
&cq_attr.mlx5));
if (!sh->self_lb.ibv_cq) {
DRV_LOG(ERR, "Port %u cannot allocate CQ for loopback.",
dev->data->port_id);
rte_errno = errno;
goto error;
}
sh->self_lb.qp = mlx5_glue->dv_create_qp(ctx,
&(struct ibv_qp_init_attr_ex){
.qp_type = IBV_QPT_RAW_PACKET,
.comp_mask = IBV_QP_INIT_ATTR_PD,
.pd = sh->cdev->pd,
.send_cq = sh->self_lb.ibv_cq,
.recv_cq = sh->self_lb.ibv_cq,
.cap.max_recv_wr = 1,
},
&qp_init_attr);
if (!sh->self_lb.qp) {
DRV_LOG(DEBUG, "Port %u cannot allocate QP for loopback.",
dev->data->port_id);
rte_errno = errno;
goto error;
}
priv->lb_used = 1;
return 0;
error:
if (sh->self_lb.ibv_cq) {
claim_zero(mlx5_glue->destroy_cq(sh->self_lb.ibv_cq));
sh->self_lb.ibv_cq = NULL;
}
(void)__atomic_sub_fetch(&sh->self_lb.refcnt, 1, __ATOMIC_RELAXED);
return -rte_errno;
#else
RTE_SET_USED(dev);
return 0;
#endif
}
/*
* Release the dummy queue resources for loopback.
*
* @param dev
* Pointer to Ethernet device.
*/
void
mlx5_rxq_ibv_obj_dummy_lb_release(struct rte_eth_dev *dev)
{
#if defined(HAVE_IBV_DEVICE_TUNNEL_SUPPORT) && defined(HAVE_IBV_FLOW_DV_SUPPORT)
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_dev_ctx_shared *sh = priv->sh;
if (!priv->lb_used)
return;
MLX5_ASSERT(__atomic_load_n(&sh->self_lb.refcnt, __ATOMIC_RELAXED));
if (!(__atomic_sub_fetch(&sh->self_lb.refcnt, 1, __ATOMIC_RELAXED))) {
if (sh->self_lb.qp) {
claim_zero(mlx5_glue->destroy_qp(sh->self_lb.qp));
sh->self_lb.qp = NULL;
}
if (sh->self_lb.ibv_cq) {
claim_zero(mlx5_glue->destroy_cq(sh->self_lb.ibv_cq));
sh->self_lb.ibv_cq = NULL;
}
}
priv->lb_used = 0;
#else
RTE_SET_USED(dev);
return;
#endif
}
/**
* Release an Tx verbs queue object.
*
* @param txq_obj
* Verbs Tx queue object..
*/
void
mlx5_txq_ibv_obj_release(struct mlx5_txq_obj *txq_obj)
{
MLX5_ASSERT(txq_obj);
claim_zero(mlx5_glue->destroy_qp(txq_obj->qp));
claim_zero(mlx5_glue->destroy_cq(txq_obj->cq));
}
struct mlx5_obj_ops ibv_obj_ops = {
.rxq_obj_modify_vlan_strip = mlx5_rxq_obj_modify_wq_vlan_strip,
.rxq_obj_new = mlx5_rxq_ibv_obj_new,
.rxq_event_get = mlx5_rx_ibv_get_event,
.rxq_obj_modify = mlx5_ibv_modify_wq,
.rxq_obj_release = mlx5_rxq_ibv_obj_release,
.ind_table_new = mlx5_ibv_ind_table_new,
.ind_table_destroy = mlx5_ibv_ind_table_destroy,
.hrxq_new = mlx5_ibv_hrxq_new,
.hrxq_destroy = mlx5_ibv_qp_destroy,
.drop_action_create = mlx5_ibv_drop_action_create,
.drop_action_destroy = mlx5_ibv_drop_action_destroy,
.txq_obj_new = mlx5_txq_ibv_obj_new,
.txq_obj_modify = mlx5_ibv_modify_qp,
.txq_obj_release = mlx5_txq_ibv_obj_release,
.lb_dummy_queue_create = NULL,
.lb_dummy_queue_release = NULL,
};