/* SPDX-License-Identifier: BSD-3-Clause * Copyright 2019 Mellanox Technologies, Ltd */ #ifndef RTE_PMD_MLX5_COMMON_H_ #define RTE_PMD_MLX5_COMMON_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "mlx5_prm.h" #include "mlx5_devx_cmds.h" #include "mlx5_common_os.h" #include "mlx5_common_mr.h" /* Reported driver name. */ #define MLX5_PCI_DRIVER_NAME "mlx5_pci" #define MLX5_AUXILIARY_DRIVER_NAME "mlx5_auxiliary" /* Bit-field manipulation. */ #define BITFIELD_DECLARE(bf, type, size) \ type bf[(((size_t)(size) / (sizeof(type) * CHAR_BIT)) + \ !!((size_t)(size) % (sizeof(type) * CHAR_BIT)))] #define BITFIELD_DEFINE(bf, type, size) \ BITFIELD_DECLARE((bf), type, (size)) = { 0 } #define BITFIELD_SET(bf, b) \ (void)((bf)[((b) / (sizeof((bf)[0]) * CHAR_BIT))] |= \ ((size_t)1 << ((b) % (sizeof((bf)[0]) * CHAR_BIT)))) #define BITFIELD_RESET(bf, b) \ (void)((bf)[((b) / (sizeof((bf)[0]) * CHAR_BIT))] &= \ ~((size_t)1 << ((b) % (sizeof((bf)[0]) * CHAR_BIT)))) #define BITFIELD_ISSET(bf, b) \ !!(((bf)[((b) / (sizeof((bf)[0]) * CHAR_BIT))] & \ ((size_t)1 << ((b) % (sizeof((bf)[0]) * CHAR_BIT))))) /* * Helper macros to work around __VA_ARGS__ limitations in a C99 compliant * manner. */ #define PMD_DRV_LOG_STRIP(a, b) a #define PMD_DRV_LOG_OPAREN ( #define PMD_DRV_LOG_CPAREN ) #define PMD_DRV_LOG_COMMA , /* Return the file name part of a path. */ static inline const char * pmd_drv_log_basename(const char *s) { const char *n = s; while (*n) if (*(n++) == '/') s = n; return s; } #define PMD_DRV_LOG___(level, type, name, ...) \ rte_log(RTE_LOG_ ## level, \ type, \ RTE_FMT(name ": " \ RTE_FMT_HEAD(__VA_ARGS__,), \ RTE_FMT_TAIL(__VA_ARGS__,))) #ifdef RTE_LIBRTE_MLX5_DEBUG #define PMD_DRV_LOG__(level, type, name, ...) \ PMD_DRV_LOG___(level, type, name, "%s:%u: %s(): " __VA_ARGS__) #define PMD_DRV_LOG_(level, type, name, s, ...) \ PMD_DRV_LOG__(level, type, name,\ s "\n" PMD_DRV_LOG_COMMA \ pmd_drv_log_basename(__FILE__) PMD_DRV_LOG_COMMA \ __LINE__ PMD_DRV_LOG_COMMA \ __func__, \ __VA_ARGS__) #else /* RTE_LIBRTE_MLX5_DEBUG */ #define PMD_DRV_LOG__(level, type, name, ...) \ PMD_DRV_LOG___(level, type, name, __VA_ARGS__) #define PMD_DRV_LOG_(level, type, name, s, ...) \ PMD_DRV_LOG__(level, type, name, s "\n", __VA_ARGS__) #endif /* RTE_LIBRTE_MLX5_DEBUG */ /* claim_zero() does not perform any check when debugging is disabled. */ #ifdef RTE_LIBRTE_MLX5_DEBUG #define MLX5_ASSERT(exp) RTE_VERIFY(exp) #define claim_zero(...) MLX5_ASSERT((__VA_ARGS__) == 0) #define claim_nonzero(...) MLX5_ASSERT((__VA_ARGS__) != 0) #else /* RTE_LIBRTE_MLX5_DEBUG */ #define MLX5_ASSERT(exp) RTE_ASSERT(exp) #define claim_zero(...) (__VA_ARGS__) #define claim_nonzero(...) (__VA_ARGS__) #endif /* RTE_LIBRTE_MLX5_DEBUG */ /* Allocate a buffer on the stack and fill it with a printf format string. */ #define MKSTR(name, ...) \ int mkstr_size_##name = snprintf(NULL, 0, "" __VA_ARGS__); \ char name[mkstr_size_##name + 1]; \ \ memset(name, 0, mkstr_size_##name + 1); \ snprintf(name, sizeof(name), "" __VA_ARGS__) enum { PCI_VENDOR_ID_MELLANOX = 0x15b3, }; enum { PCI_DEVICE_ID_MELLANOX_CONNECTX4 = 0x1013, PCI_DEVICE_ID_MELLANOX_CONNECTX4VF = 0x1014, PCI_DEVICE_ID_MELLANOX_CONNECTX4LX = 0x1015, PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF = 0x1016, PCI_DEVICE_ID_MELLANOX_CONNECTX5 = 0x1017, PCI_DEVICE_ID_MELLANOX_CONNECTX5VF = 0x1018, PCI_DEVICE_ID_MELLANOX_CONNECTX5EX = 0x1019, PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF = 0x101a, PCI_DEVICE_ID_MELLANOX_CONNECTX5BF = 0xa2d2, PCI_DEVICE_ID_MELLANOX_CONNECTX5BFVF = 0xa2d3, PCI_DEVICE_ID_MELLANOX_CONNECTX6 = 0x101b, PCI_DEVICE_ID_MELLANOX_CONNECTX6VF = 0x101c, PCI_DEVICE_ID_MELLANOX_CONNECTX6DX = 0x101d, PCI_DEVICE_ID_MELLANOX_CONNECTXVF = 0x101e, PCI_DEVICE_ID_MELLANOX_CONNECTX6DXBF = 0xa2d6, PCI_DEVICE_ID_MELLANOX_CONNECTX6LX = 0x101f, PCI_DEVICE_ID_MELLANOX_CONNECTX7 = 0x1021, PCI_DEVICE_ID_MELLANOX_CONNECTX7BF = 0Xa2dc, }; /* Maximum number of simultaneous unicast MAC addresses. */ #define MLX5_MAX_UC_MAC_ADDRESSES 128 /* Maximum number of simultaneous Multicast MAC addresses. */ #define MLX5_MAX_MC_MAC_ADDRESSES 128 /* Maximum number of simultaneous MAC addresses. */ #define MLX5_MAX_MAC_ADDRESSES \ (MLX5_MAX_UC_MAC_ADDRESSES + MLX5_MAX_MC_MAC_ADDRESSES) /* Recognized Infiniband device physical port name types. */ enum mlx5_nl_phys_port_name_type { MLX5_PHYS_PORT_NAME_TYPE_NOTSET = 0, /* Not set. */ MLX5_PHYS_PORT_NAME_TYPE_LEGACY, /* before kernel ver < 5.0 */ MLX5_PHYS_PORT_NAME_TYPE_UPLINK, /* p0, kernel ver >= 5.0 */ MLX5_PHYS_PORT_NAME_TYPE_PFVF, /* pf0vf0, kernel ver >= 5.0 */ MLX5_PHYS_PORT_NAME_TYPE_PFHPF, /* pf0, kernel ver >= 5.7, HPF rep */ MLX5_PHYS_PORT_NAME_TYPE_PFSF, /* pf0sf0, kernel ver >= 5.0 */ MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN, /* Unrecognized. */ }; /** Switch information returned by mlx5_nl_switch_info(). */ struct mlx5_switch_info { uint32_t master:1; /**< Master device. */ uint32_t representor:1; /**< Representor device. */ enum mlx5_nl_phys_port_name_type name_type; /** < Port name type. */ int32_t ctrl_num; /**< Controller number (valid for c#pf#vf# format). */ int32_t pf_num; /**< PF number (valid for pfxvfx format only). */ int32_t port_name; /**< Representor port name. */ uint64_t switch_id; /**< Switch identifier. */ }; /* CQE status. */ enum mlx5_cqe_status { MLX5_CQE_STATUS_SW_OWN = -1, MLX5_CQE_STATUS_HW_OWN = -2, MLX5_CQE_STATUS_ERR = -3, }; /** * Check whether CQE is valid. * * @param cqe * Pointer to CQE. * @param cqes_n * Size of completion queue. * @param ci * Consumer index. * * @return * The CQE status. */ static __rte_always_inline enum mlx5_cqe_status check_cqe(volatile struct mlx5_cqe *cqe, const uint16_t cqes_n, const uint16_t ci) { const uint16_t idx = ci & cqes_n; const uint8_t op_own = cqe->op_own; const uint8_t op_owner = MLX5_CQE_OWNER(op_own); const uint8_t op_code = MLX5_CQE_OPCODE(op_own); if (unlikely((op_owner != (!!(idx))) || (op_code == MLX5_CQE_INVALID))) return MLX5_CQE_STATUS_HW_OWN; /* Prevent speculative reading of other fields in CQE until * CQE is valid. */ rte_atomic_thread_fence(__ATOMIC_ACQUIRE); if (unlikely(op_code == MLX5_CQE_RESP_ERR || op_code == MLX5_CQE_REQ_ERR)) return MLX5_CQE_STATUS_ERR; return MLX5_CQE_STATUS_SW_OWN; } /* * Get PCI address string from EAL device. * * @param[out] addr * The output address buffer string * @param[in] size * The output buffer size * @return * - 0 on success. * - Negative value and rte_errno is set otherwise. */ __rte_internal int mlx5_dev_to_pci_str(const struct rte_device *dev, char *addr, size_t size); /* * Get PCI address from sysfs of a PCI-related device. * * @param[in] dev_path * The sysfs path should not point to the direct plain PCI device. * Instead, the node "/device/" is used to access the real device. * @param[out] pci_addr * Parsed PCI address. * * @return * - 0 on success. * - Negative value and rte_errno is set otherwise. */ __rte_internal int mlx5_get_pci_addr(const char *dev_path, struct rte_pci_addr *pci_addr); /* * Get kernel network interface name from sysfs IB device path. * * @param[in] ibdev_path * The sysfs path to IB device. * @param[out] ifname * Interface name output of size IF_NAMESIZE. * * @return * - 0 on success. * - Negative value and rte_errno is set otherwise. */ __rte_internal int mlx5_get_ifname_sysfs(const char *ibdev_path, char *ifname); __rte_internal int mlx5_auxiliary_get_child_name(const char *dev, const char *node, char *child, size_t size); enum mlx5_class { MLX5_CLASS_INVALID, MLX5_CLASS_ETH = RTE_BIT64(0), MLX5_CLASS_VDPA = RTE_BIT64(1), MLX5_CLASS_REGEX = RTE_BIT64(2), MLX5_CLASS_COMPRESS = RTE_BIT64(3), MLX5_CLASS_CRYPTO = RTE_BIT64(4), }; #define MLX5_DBR_SIZE RTE_CACHE_LINE_SIZE /* devX creation object */ struct mlx5_devx_obj { void *obj; /* The DV object. */ int id; /* The object ID. */ }; /* UMR memory buffer used to define 1 entry in indirect mkey. */ struct mlx5_klm { uint32_t byte_count; uint32_t mkey; uint64_t address; }; /** Control for key/values list. */ struct mlx5_kvargs_ctrl { struct rte_kvargs *kvlist; /* Structure containing list of key/values.*/ bool is_used[RTE_KVARGS_MAX]; /* Indicator which devargs were used. */ }; /** * Call a handler function for each key/value in the list of keys. * * For each key/value association that matches the given key, calls the * handler function with the for a given arg_name passing the value on the * dictionary for that key and a given extra argument. * * @param mkvlist * The mlx5_kvargs structure. * @param keys * A list of keys to process (table of const char *, the last must be NULL). * @param handler * The function to call for each matching key. * @param opaque_arg * A pointer passed unchanged to the handler. * * @return * - 0 on success * - Negative on error */ __rte_internal int mlx5_kvargs_process(struct mlx5_kvargs_ctrl *mkvlist, const char *const keys[], arg_handler_t handler, void *opaque_arg); /* All UAR arguments using doorbell register in datapath. */ struct mlx5_uar_data { uint64_t *db; /* The doorbell's virtual address mapped to the relevant HW UAR space.*/ #ifndef RTE_ARCH_64 rte_spinlock_t *sl_p; /* Pointer to UAR access lock required for 32bit implementations. */ #endif /* RTE_ARCH_64 */ }; /* DevX UAR control structure. */ struct mlx5_uar { struct mlx5_uar_data bf_db; /* UAR data for Blueflame register. */ struct mlx5_uar_data cq_db; /* UAR data for CQ arm db register. */ void *obj; /* DevX UAR object. */ bool dbnc; /* Doorbell mapped to non-cached region. */ #ifndef RTE_ARCH_64 rte_spinlock_t bf_sl; rte_spinlock_t cq_sl; /* UAR access locks required for 32bit implementations. */ #endif /* RTE_ARCH_64 */ }; /** * Ring a doorbell and flush the update if requested. * * @param uar * Pointer to UAR data structure. * @param val * value to write in big endian format. * @param index * Index of doorbell record. * @param db_rec * Address of doorbell record. * @param flash * Decide whether to flush the DB writing using a memory barrier. */ static __rte_always_inline void mlx5_doorbell_ring(struct mlx5_uar_data *uar, uint64_t val, uint32_t index, volatile uint32_t *db_rec, bool flash) { rte_io_wmb(); *db_rec = rte_cpu_to_be_32(index); /* Ensure ordering between DB record actual update and UAR access. */ rte_wmb(); #ifdef RTE_ARCH_64 *uar->db = val; #else /* !RTE_ARCH_64 */ rte_spinlock_lock(uar->sl_p); *(volatile uint32_t *)uar->db = val; rte_io_wmb(); *((volatile uint32_t *)uar->db + 1) = val >> 32; rte_spinlock_unlock(uar->sl_p); #endif if (flash) rte_wmb(); } /** * Get the doorbell register mapping type. * * @param uar_mmap_offset * Mmap offset of Verbs/DevX UAR. * @param page_size * System page size * * @return * 1 for non-cached, 0 otherwise. */ static inline uint16_t mlx5_db_map_type_get(off_t uar_mmap_offset, size_t page_size) { off_t cmd = uar_mmap_offset / page_size; cmd >>= MLX5_UAR_MMAP_CMD_SHIFT; cmd &= MLX5_UAR_MMAP_CMD_MASK; if (cmd == MLX5_MMAP_GET_NC_PAGES_CMD) return 1; return 0; } __rte_internal void mlx5_translate_port_name(const char *port_name_in, struct mlx5_switch_info *port_info_out); void mlx5_glue_constructor(void); extern uint8_t haswell_broadwell_cpu; __rte_internal void mlx5_common_init(void); /* * Common Driver Interface * * ConnectX common driver supports multiple classes: net, vDPA, regex, crypto * and compress devices. This layer enables creating such multiple classes * on a single device by allowing to bind multiple class-specific device * drivers to attach to the common driver. * * ------------ ------------- -------------- ----------------- ------------ * | mlx5 net | | mlx5 vdpa | | mlx5 regex | | mlx5 compress | | mlx5 ... | * | driver | | driver | | driver | | driver | | drivers | * ------------ ------------- -------------- ----------------- ------------ * || * ----------------- * | mlx5 | * | common driver | * ----------------- * | | * ----------- ----------------- * | mlx5 | | mlx5 | * | pci dev | | auxiliary dev | * ----------- ----------------- * * - mlx5 PCI bus driver binds to mlx5 PCI devices defined by PCI ID table * of all related devices. * - mlx5 class driver such as net, vDPA, regex defines its specific * PCI ID table and mlx5 bus driver probes matching class drivers. * - mlx5 common driver is central place that validates supported * class combinations. * - mlx5 common driver hides bus difference by resolving device address * from devargs, locating target RDMA device and probing with it. */ /* * Device configuration structure. * * Merged configuration from: * * - Device capabilities, * - User device parameters disabled features. */ struct mlx5_common_dev_config { struct mlx5_hca_attr hca_attr; /* HCA attributes. */ int dbnc; /* Skip doorbell register write barrier. */ int device_fd; /* Device file descriptor for importation. */ int pd_handle; /* Protection Domain handle for importation. */ unsigned int devx:1; /* Whether devx interface is available or not. */ unsigned int sys_mem_en:1; /* The default memory allocator. */ unsigned int mr_mempool_reg_en:1; /* Allow/prevent implicit mempool memory registration. */ unsigned int mr_ext_memseg_en:1; /* Whether memseg should be extended for MR creation. */ }; struct mlx5_common_device { struct rte_device *dev; TAILQ_ENTRY(mlx5_common_device) next; uint32_t classes_loaded; void *ctx; /* Verbs/DV/DevX context. */ void *pd; /* Protection Domain. */ uint32_t pdn; /* Protection Domain Number. */ struct mlx5_mr_share_cache mr_scache; /* Global shared MR cache. */ struct mlx5_common_dev_config config; /* Device configuration. */ }; /** * Indicates whether PD and CTX are imported from another process, * or created by this process. * * @param cdev * Pointer to common device. * * @return * True if PD and CTX are imported from another process, False otherwise. */ static inline bool mlx5_imported_pd_and_ctx(struct mlx5_common_device *cdev) { return cdev->config.device_fd != MLX5_ARG_UNSET && cdev->config.pd_handle != MLX5_ARG_UNSET; } /** * Initialization function for the driver called during device probing. */ typedef int (mlx5_class_driver_probe_t)(struct mlx5_common_device *cdev, struct mlx5_kvargs_ctrl *mkvlist); /** * Uninitialization function for the driver called during hot-unplugging. */ typedef int (mlx5_class_driver_remove_t)(struct mlx5_common_device *cdev); /** Device already probed can be probed again to check for new ports. */ #define MLX5_DRV_PROBE_AGAIN 0x0004 /** * A structure describing a mlx5 common class driver. */ struct mlx5_class_driver { TAILQ_ENTRY(mlx5_class_driver) next; enum mlx5_class drv_class; /**< Class of this driver. */ const char *name; /**< Driver name. */ mlx5_class_driver_probe_t *probe; /**< Device probe function. */ mlx5_class_driver_remove_t *remove; /**< Device remove function. */ const struct rte_pci_id *id_table; /**< ID table, NULL terminated. */ uint32_t probe_again:1; /**< Device already probed can be probed again to check new device. */ uint32_t intr_lsc:1; /**< Supports link state interrupt. */ uint32_t intr_rmv:1; /**< Supports device remove interrupt. */ }; /** * Register a mlx5 device driver. * * @param driver * A pointer to a mlx5_driver structure describing the driver * to be registered. */ __rte_internal void mlx5_class_driver_register(struct mlx5_class_driver *driver); /** * Test device is a PCI bus device. * * @param dev * Pointer to device. * * @return * - True on device devargs is a PCI bus device. * - False otherwise. */ __rte_internal bool mlx5_dev_is_pci(const struct rte_device *dev); /** * Test PCI device is a VF device. * * @param pci_dev * Pointer to PCI device. * * @return * - True on PCI device is a VF device. * - False otherwise. */ __rte_internal bool mlx5_dev_is_vf_pci(const struct rte_pci_device *pci_dev); __rte_internal int mlx5_dev_mempool_subscribe(struct mlx5_common_device *cdev); __rte_internal void mlx5_dev_mempool_unregister(struct mlx5_common_device *cdev, struct rte_mempool *mp); __rte_internal int mlx5_devx_uar_prepare(struct mlx5_common_device *cdev, struct mlx5_uar *uar); __rte_internal void mlx5_devx_uar_release(struct mlx5_uar *uar); /* mlx5_common_os.c */ int mlx5_os_open_device(struct mlx5_common_device *cdev, uint32_t classes); int mlx5_os_pd_prepare(struct mlx5_common_device *cdev); int mlx5_os_pd_release(struct mlx5_common_device *cdev); int mlx5_os_remote_pd_and_ctx_validate(struct mlx5_common_dev_config *config); /* mlx5 PMD wrapped MR struct. */ struct mlx5_pmd_wrapped_mr { uint32_t lkey; void *addr; size_t len; void *obj; /* verbs mr object or devx umem object. */ void *imkey; /* DevX indirect mkey object. */ }; __rte_internal int mlx5_os_wrapped_mkey_create(void *ctx, void *pd, uint32_t pdn, void *addr, size_t length, struct mlx5_pmd_wrapped_mr *pmd_mr); __rte_internal void mlx5_os_wrapped_mkey_destroy(struct mlx5_pmd_wrapped_mr *pmd_mr); #endif /* RTE_PMD_MLX5_COMMON_H_ */