mirror of https://github.com/F-Stack/f-stack.git
1441 lines
39 KiB
C
1441 lines
39 KiB
C
/* SPDX-License-Identifier: BSD-3-Clause
|
|
* Copyright(c) 2022 Intel Corporation
|
|
*/
|
|
|
|
#ifndef _ACC_COMMON_H_
|
|
#define _ACC_COMMON_H_
|
|
|
|
#include <bus_pci_driver.h>
|
|
#include "rte_acc_common_cfg.h"
|
|
|
|
/* Values used in filling in descriptors */
|
|
#define ACC_DMA_DESC_TYPE 2
|
|
#define ACC_DMA_BLKID_FCW 1
|
|
#define ACC_DMA_BLKID_IN 2
|
|
#define ACC_DMA_BLKID_OUT_ENC 1
|
|
#define ACC_DMA_BLKID_OUT_HARD 1
|
|
#define ACC_DMA_BLKID_OUT_SOFT 2
|
|
#define ACC_DMA_BLKID_OUT_HARQ 3
|
|
#define ACC_DMA_BLKID_IN_HARQ 3
|
|
#define ACC_DMA_BLKID_IN_MLD_R 3
|
|
|
|
/* Values used in filling in decode FCWs */
|
|
#define ACC_FCW_TD_VER 1
|
|
#define ACC_FCW_TD_EXT_COLD_REG_EN 1
|
|
#define ACC_FCW_TD_AUTOMAP 0x0f
|
|
#define ACC_FCW_TD_RVIDX_0 2
|
|
#define ACC_FCW_TD_RVIDX_1 26
|
|
#define ACC_FCW_TD_RVIDX_2 50
|
|
#define ACC_FCW_TD_RVIDX_3 74
|
|
|
|
#define ACC_SIZE_64MBYTE (64*1024*1024)
|
|
/* Number of elements in an Info Ring */
|
|
#define ACC_INFO_RING_NUM_ENTRIES 1024
|
|
/* Number of elements in HARQ layout memory
|
|
* 128M x 32kB = 4GB addressable memory
|
|
*/
|
|
#define ACC_HARQ_LAYOUT (128 * 1024 * 1024)
|
|
/* Assume offset for HARQ in memory */
|
|
#define ACC_HARQ_OFFSET (32 * 1024)
|
|
#define ACC_HARQ_OFFSET_SHIFT 15
|
|
#define ACC_HARQ_OFFSET_MASK 0x7ffffff
|
|
#define ACC_HARQ_OFFSET_THRESHOLD 1024
|
|
/* Mask used to calculate an index in an Info Ring array (not a byte offset) */
|
|
#define ACC_INFO_RING_MASK (ACC_INFO_RING_NUM_ENTRIES-1)
|
|
|
|
#define MAX_ENQ_BATCH_SIZE 255
|
|
|
|
/* All ACC100 Registers alignment are 32bits = 4B */
|
|
#define ACC_BYTES_IN_WORD 4
|
|
#define ACC_MAX_E_MBUF 64000
|
|
|
|
#define ACC_VF_OFFSET_QOS 16 /* offset in Memory specific to QoS Mon */
|
|
#define ACC_TMPL_PRI_0 0x03020100
|
|
#define ACC_TMPL_PRI_1 0x07060504
|
|
#define ACC_TMPL_PRI_2 0x0b0a0908
|
|
#define ACC_TMPL_PRI_3 0x0f0e0d0c
|
|
#define ACC_TMPL_PRI_4 0x13121110
|
|
#define ACC_TMPL_PRI_5 0x17161514
|
|
#define ACC_TMPL_PRI_6 0x1b1a1918
|
|
#define ACC_TMPL_PRI_7 0x1f1e1d1c
|
|
#define ACC_QUEUE_ENABLE 0x80000000 /* Bit to mark Queue as Enabled */
|
|
#define ACC_FDONE 0x80000000
|
|
#define ACC_SDONE 0x40000000
|
|
|
|
#define ACC_NUM_TMPL 32
|
|
|
|
#define ACC_ACCMAP_0 0
|
|
#define ACC_ACCMAP_1 2
|
|
#define ACC_ACCMAP_2 1
|
|
#define ACC_ACCMAP_3 3
|
|
#define ACC_ACCMAP_4 4
|
|
#define ACC_ACCMAP_5 5
|
|
#define ACC_PF_VAL 2
|
|
|
|
/* max number of iterations to allocate memory block for all rings */
|
|
#define ACC_SW_RING_MEM_ALLOC_ATTEMPTS 5
|
|
#define ACC_MAX_QUEUE_DEPTH 1024
|
|
#define ACC_DMA_MAX_NUM_POINTERS 14
|
|
#define ACC_DMA_MAX_NUM_POINTERS_IN 7
|
|
#define ACC_DMA_DESC_PADDINGS 8
|
|
#define ACC_FCW_PADDING 12
|
|
#define ACC_DESC_FCW_OFFSET 192
|
|
#define ACC_DESC_SIZE 256
|
|
#define ACC_DESC_OFFSET (ACC_DESC_SIZE / 64)
|
|
#define ACC_FCW_TE_BLEN 32
|
|
#define ACC_FCW_TD_BLEN 24
|
|
#define ACC_FCW_LE_BLEN 32
|
|
#define ACC_FCW_LD_BLEN 36
|
|
#define ACC_FCW_FFT_BLEN 28
|
|
#define ACC_5GUL_SIZE_0 16
|
|
#define ACC_5GUL_SIZE_1 40
|
|
#define ACC_5GUL_OFFSET_0 36
|
|
#define ACC_COMPANION_PTRS 8
|
|
#define ACC_FCW_VER 2
|
|
#define ACC_MUX_5GDL_DESC 6
|
|
#define ACC_CMP_ENC_SIZE 20
|
|
#define ACC_CMP_DEC_SIZE 24
|
|
#define ACC_ENC_OFFSET (32)
|
|
#define ACC_DEC_OFFSET (80)
|
|
#define ACC_LIMIT_DL_MUX_BITS 534
|
|
#define ACC_NUM_QGRPS_PER_WORD 8
|
|
#define ACC_MAX_NUM_QGRPS 32
|
|
#define ACC_RING_SIZE_GRANULARITY 64
|
|
|
|
/* Constants from K0 computation from 3GPP 38.212 Table 5.4.2.1-2 */
|
|
#define ACC_N_ZC_1 66 /* N = 66 Zc for BG 1 */
|
|
#define ACC_N_ZC_2 50 /* N = 50 Zc for BG 2 */
|
|
#define ACC_K_ZC_1 22 /* K = 22 Zc for BG 1 */
|
|
#define ACC_K_ZC_2 10 /* K = 10 Zc for BG 2 */
|
|
#define ACC_K0_1_1 17 /* K0 fraction numerator for rv 1 and BG 1 */
|
|
#define ACC_K0_1_2 13 /* K0 fraction numerator for rv 1 and BG 2 */
|
|
#define ACC_K0_2_1 33 /* K0 fraction numerator for rv 2 and BG 1 */
|
|
#define ACC_K0_2_2 25 /* K0 fraction numerator for rv 2 and BG 2 */
|
|
#define ACC_K0_3_1 56 /* K0 fraction numerator for rv 3 and BG 1 */
|
|
#define ACC_K0_3_2 43 /* K0 fraction numerator for rv 3 and BG 2 */
|
|
|
|
#define ACC_ENGINE_OFFSET 0x1000
|
|
#define ACC_LONG_WAIT 1000
|
|
#define ACC_MS_IN_US (1000)
|
|
|
|
#define ACC_ALGO_SPA 0
|
|
#define ACC_ALGO_MSA 1
|
|
#define ACC_HARQ_ALIGN_64B 64
|
|
#define ACC_MAX_ZC 384
|
|
|
|
/* De-ratematch code rate limitation for recommended operation */
|
|
#define ACC_LIM_03 2 /* 0.03 */
|
|
#define ACC_LIM_09 6 /* 0.09 */
|
|
#define ACC_LIM_14 9 /* 0.14 */
|
|
#define ACC_LIM_21 14 /* 0.21 */
|
|
#define ACC_LIM_31 20 /* 0.31 */
|
|
#define ACC_MAX_E (128 * 1024 - 2)
|
|
|
|
extern int acc_common_logtype;
|
|
|
|
/* Helper macro for logging */
|
|
#define rte_acc_log(level, fmt, ...) \
|
|
rte_log(RTE_LOG_ ## level, acc_common_logtype, fmt "\n", \
|
|
##__VA_ARGS__)
|
|
|
|
/* ACC100 DMA Descriptor triplet */
|
|
struct acc_dma_triplet {
|
|
uint64_t address;
|
|
uint32_t blen:20,
|
|
res0:4,
|
|
last:1,
|
|
dma_ext:1,
|
|
res1:2,
|
|
blkid:4;
|
|
} __rte_packed;
|
|
|
|
|
|
/* ACC100 Queue Manager Enqueue PCI Register */
|
|
union acc_enqueue_reg_fmt {
|
|
uint32_t val;
|
|
struct {
|
|
uint32_t num_elem:8,
|
|
addr_offset:3,
|
|
rsrvd:1,
|
|
req_elem_addr:20;
|
|
};
|
|
};
|
|
|
|
/* FEC 4G Uplink Frame Control Word */
|
|
struct __rte_packed acc_fcw_td {
|
|
uint8_t fcw_ver:4,
|
|
num_maps:4; /* Unused in ACC100 */
|
|
uint8_t filler:6, /* Unused in ACC100 */
|
|
rsrvd0:1,
|
|
bypass_sb_deint:1;
|
|
uint16_t k_pos;
|
|
uint16_t k_neg; /* Unused in ACC100 */
|
|
uint8_t c_neg; /* Unused in ACC100 */
|
|
uint8_t c; /* Unused in ACC100 */
|
|
uint32_t ea; /* Unused in ACC100 */
|
|
uint32_t eb; /* Unused in ACC100 */
|
|
uint8_t cab; /* Unused in ACC100 */
|
|
uint8_t k0_start_col; /* Unused in ACC100 */
|
|
uint8_t rsrvd1;
|
|
uint8_t code_block_mode:1, /* Unused in ACC100 */
|
|
turbo_crc_type:1,
|
|
rsrvd2:3,
|
|
bypass_teq:1, /* Unused in ACC100 */
|
|
soft_output_en:1, /* Unused in ACC100 */
|
|
ext_td_cold_reg_en:1;
|
|
union { /* External Cold register */
|
|
uint32_t ext_td_cold_reg;
|
|
struct {
|
|
uint32_t min_iter:4, /* Unused in ACC100 */
|
|
max_iter:4,
|
|
ext_scale:5, /* Unused in ACC100 */
|
|
rsrvd3:3,
|
|
early_stop_en:1, /* Unused in ACC100 */
|
|
sw_soft_out_dis:1, /* Unused in ACC100 */
|
|
sw_et_cont:1, /* Unused in ACC100 */
|
|
sw_soft_out_saturation:1, /* Unused in ACC100 */
|
|
half_iter_on:1, /* Unused in ACC100 */
|
|
raw_decoder_input_on:1, /* Unused in ACC100 */
|
|
rsrvd4:10;
|
|
};
|
|
};
|
|
};
|
|
|
|
/* FEC 4G Downlink Frame Control Word */
|
|
struct __rte_packed acc_fcw_te {
|
|
uint16_t k_neg;
|
|
uint16_t k_pos;
|
|
uint8_t c_neg;
|
|
uint8_t c;
|
|
uint8_t filler;
|
|
uint8_t cab;
|
|
uint32_t ea:17,
|
|
rsrvd0:15;
|
|
uint32_t eb:17,
|
|
rsrvd1:15;
|
|
uint16_t ncb_neg;
|
|
uint16_t ncb_pos;
|
|
uint8_t rv_idx0:2,
|
|
rsrvd2:2,
|
|
rv_idx1:2,
|
|
rsrvd3:2;
|
|
uint8_t bypass_rv_idx0:1,
|
|
bypass_rv_idx1:1,
|
|
bypass_rm:1,
|
|
rsrvd4:5;
|
|
uint8_t rsrvd5:1,
|
|
rsrvd6:3,
|
|
code_block_crc:1,
|
|
rsrvd7:3;
|
|
uint8_t code_block_mode:1,
|
|
rsrvd8:7;
|
|
uint64_t rsrvd9;
|
|
};
|
|
|
|
/* FEC 5GNR Downlink Frame Control Word */
|
|
struct __rte_packed acc_fcw_le {
|
|
uint32_t FCWversion:4,
|
|
qm:4,
|
|
nfiller:11,
|
|
BG:1,
|
|
Zc:9,
|
|
res0:3;
|
|
uint32_t ncb:16,
|
|
k0:16;
|
|
uint32_t rm_e:22,
|
|
res1:4,
|
|
crc_select:1,
|
|
res2:1,
|
|
bypass_intlv:1,
|
|
res3:3;
|
|
uint32_t res4_a:12,
|
|
mcb_count:3,
|
|
res4_b:1,
|
|
C:8,
|
|
Cab:8;
|
|
uint32_t rm_e_b:22,
|
|
res5:10;
|
|
uint32_t res6;
|
|
uint32_t res7;
|
|
uint32_t res8;
|
|
};
|
|
|
|
/* FEC 5GNR Uplink Frame Control Word */
|
|
struct __rte_packed acc_fcw_ld {
|
|
uint32_t FCWversion:4,
|
|
qm:4,
|
|
nfiller:11,
|
|
BG:1,
|
|
Zc:9,
|
|
cnu_algo:1, /* Not supported in ACC100 */
|
|
synd_precoder:1,
|
|
synd_post:1;
|
|
uint32_t ncb:16,
|
|
k0:16;
|
|
uint32_t rm_e:24,
|
|
hcin_en:1,
|
|
hcout_en:1,
|
|
crc_select:1,
|
|
bypass_dec:1,
|
|
bypass_intlv:1,
|
|
so_en:1,
|
|
so_bypass_rm:1,
|
|
so_bypass_intlv:1;
|
|
uint32_t hcin_offset:16,
|
|
hcin_size0:16;
|
|
uint32_t hcin_size1:16,
|
|
hcin_decomp_mode:3,
|
|
llr_pack_mode:1,
|
|
hcout_comp_mode:3,
|
|
saturate_input:1, /* Not supported in ACC200 */
|
|
dec_convllr:4,
|
|
hcout_convllr:4;
|
|
uint32_t itmax:7,
|
|
itstop:1,
|
|
so_it:7,
|
|
minsum_offset:1, /* Not supported in ACC200 */
|
|
hcout_offset:16;
|
|
uint32_t hcout_size0:16,
|
|
hcout_size1:16;
|
|
uint32_t gain_i:8,
|
|
gain_h:8,
|
|
negstop_th:16;
|
|
uint32_t negstop_it:7,
|
|
negstop_en:1,
|
|
tb_crc_select:2, /* Not supported in ACC100 */
|
|
dec_llrclip:2, /* Not supported in ACC200 */
|
|
tb_trailer_size:20; /* Not supported in ACC100 */
|
|
};
|
|
|
|
/* FFT Frame Control Word */
|
|
struct __rte_packed acc_fcw_fft {
|
|
uint32_t in_frame_size:16,
|
|
leading_pad_size:16;
|
|
uint32_t out_frame_size:16,
|
|
leading_depad_size:16;
|
|
uint32_t cs_window_sel;
|
|
uint32_t cs_window_sel2:16,
|
|
cs_enable_bmap:16;
|
|
uint32_t num_antennas:8,
|
|
idft_size:8,
|
|
dft_size:8,
|
|
cs_offset:8;
|
|
uint32_t idft_shift:8,
|
|
dft_shift:8,
|
|
cs_multiplier:16;
|
|
uint32_t bypass:2,
|
|
fp16_in:1, /* Not supported in ACC200 */
|
|
fp16_out:1,
|
|
exp_adj:4,
|
|
power_shift:4,
|
|
power_en:1,
|
|
res:19;
|
|
};
|
|
|
|
/* MLD-TS Frame Control Word */
|
|
struct __rte_packed acc_fcw_mldts {
|
|
uint32_t fcw_version:4,
|
|
res0:12,
|
|
nrb:13, /* 1 to 1925 */
|
|
res1:3;
|
|
uint32_t NLayers:2, /* 1: 2L... 3: 4L */
|
|
res2:14,
|
|
Qmod0:2, /* 0: 2...3: 8 */
|
|
res3_0:2,
|
|
Qmod1:2,
|
|
res3_1:2,
|
|
Qmod2:2,
|
|
res3_2:2,
|
|
Qmod3:2,
|
|
res3_3:2;
|
|
uint32_t Rrep:3, /* 0 to 5 */
|
|
res4:1,
|
|
Crep:3, /* 0 to 6 */
|
|
res5:25;
|
|
uint32_t pad0;
|
|
uint32_t pad1;
|
|
uint32_t pad2;
|
|
uint32_t pad3;
|
|
uint32_t pad4;
|
|
};
|
|
|
|
/* DMA Response Descriptor */
|
|
union acc_dma_rsp_desc {
|
|
uint32_t val;
|
|
struct {
|
|
uint32_t crc_status:1,
|
|
synd_ok:1,
|
|
dma_err:1,
|
|
neg_stop:1,
|
|
fcw_err:1,
|
|
output_truncate:1,
|
|
input_err:1,
|
|
tsen_pagefault:1,
|
|
iterCountFrac:8,
|
|
iter_cnt:8,
|
|
engine_hung:1,
|
|
core_reset:5,
|
|
sdone:1,
|
|
fdone:1;
|
|
uint32_t add_info_0;
|
|
uint32_t add_info_1;
|
|
};
|
|
};
|
|
|
|
/* DMA Request Descriptor */
|
|
struct __rte_packed acc_dma_req_desc {
|
|
union {
|
|
struct{
|
|
uint32_t type:4,
|
|
rsrvd0:26,
|
|
sdone:1,
|
|
fdone:1;
|
|
uint32_t ib_ant_offset:16, /* Not supported in ACC100 */
|
|
res2:12,
|
|
num_ant:4;
|
|
uint32_t ob_ant_offset:16,
|
|
ob_cyc_offset:12,
|
|
num_cs:4;
|
|
uint32_t pass_param:8,
|
|
sdone_enable:1,
|
|
irq_enable:1,
|
|
timeStampEn:1,
|
|
dltb:1, /* Not supported in ACC200 */
|
|
res0:4,
|
|
numCBs:8,
|
|
m2dlen:4,
|
|
d2mlen:4;
|
|
};
|
|
struct{
|
|
uint32_t word0;
|
|
uint32_t word1;
|
|
uint32_t word2;
|
|
uint32_t word3;
|
|
};
|
|
};
|
|
struct acc_dma_triplet data_ptrs[ACC_DMA_MAX_NUM_POINTERS];
|
|
|
|
/* Virtual addresses used to retrieve SW context info */
|
|
union {
|
|
void *op_addr;
|
|
uint64_t pad1; /* pad to 64 bits */
|
|
};
|
|
/*
|
|
* Stores additional information needed for driver processing:
|
|
* - last_desc_in_batch - flag used to mark last descriptor (CB)
|
|
* in batch
|
|
* - cbs_in_tb - stores information about total number of Code Blocks
|
|
* in currently processed Transport Block
|
|
*/
|
|
union {
|
|
struct {
|
|
union {
|
|
struct acc_fcw_ld fcw_ld;
|
|
struct acc_fcw_td fcw_td;
|
|
struct acc_fcw_le fcw_le;
|
|
struct acc_fcw_te fcw_te;
|
|
struct acc_fcw_fft fcw_fft;
|
|
struct acc_fcw_mldts fcw_mldts;
|
|
uint32_t pad2[ACC_FCW_PADDING];
|
|
};
|
|
uint32_t last_desc_in_batch :8,
|
|
cbs_in_tb:8,
|
|
pad4 : 16;
|
|
};
|
|
uint64_t pad3[ACC_DMA_DESC_PADDINGS]; /* pad to 64 bits */
|
|
};
|
|
};
|
|
|
|
/* ACC100 DMA Descriptor */
|
|
union acc_dma_desc {
|
|
struct acc_dma_req_desc req;
|
|
union acc_dma_rsp_desc rsp;
|
|
uint64_t atom_hdr;
|
|
};
|
|
|
|
/* Union describing Info Ring entry */
|
|
union acc_info_ring_data {
|
|
uint32_t val;
|
|
struct {
|
|
union {
|
|
uint16_t detailed_info;
|
|
struct {
|
|
uint16_t aq_id: 4;
|
|
uint16_t qg_id: 4;
|
|
uint16_t vf_id: 6;
|
|
uint16_t reserved: 2;
|
|
};
|
|
};
|
|
uint16_t int_nb: 7;
|
|
uint16_t msi_0: 1;
|
|
uint16_t vf2pf: 6;
|
|
uint16_t loop: 1;
|
|
uint16_t valid: 1;
|
|
};
|
|
struct {
|
|
uint32_t aq_id_3: 6;
|
|
uint32_t qg_id_3: 5;
|
|
uint32_t vf_id_3: 6;
|
|
uint32_t int_nb_3: 6;
|
|
uint32_t msi_0_3: 1;
|
|
uint32_t vf2pf_3: 6;
|
|
uint32_t loop_3: 1;
|
|
uint32_t valid_3: 1;
|
|
};
|
|
} __rte_packed;
|
|
|
|
struct __rte_packed acc_pad_ptr {
|
|
void *op_addr;
|
|
uint64_t pad1; /* pad to 64 bits */
|
|
};
|
|
|
|
struct __rte_packed acc_ptrs {
|
|
struct acc_pad_ptr ptr[ACC_COMPANION_PTRS];
|
|
};
|
|
|
|
/* Union describing Info Ring entry */
|
|
union acc_harq_layout_data {
|
|
uint32_t val;
|
|
struct {
|
|
uint16_t offset;
|
|
uint16_t size0;
|
|
};
|
|
} __rte_packed;
|
|
|
|
/**
|
|
* Structure with details about RTE_BBDEV_EVENT_DEQUEUE event. It's passed to
|
|
* the callback function.
|
|
*/
|
|
struct acc_deq_intr_details {
|
|
uint16_t queue_id;
|
|
};
|
|
|
|
/* TIP VF2PF Comms */
|
|
enum {
|
|
ACC_VF2PF_STATUS_REQUEST = 1,
|
|
ACC_VF2PF_USING_VF = 2,
|
|
};
|
|
|
|
|
|
typedef void (*acc10x_fcw_ld_fill_fun_t)(struct rte_bbdev_dec_op *op,
|
|
struct acc_fcw_ld *fcw,
|
|
union acc_harq_layout_data *harq_layout);
|
|
|
|
/* Private data structure for each ACC100 device */
|
|
struct acc_device {
|
|
void *mmio_base; /**< Base address of MMIO registers (BAR0) */
|
|
void *sw_rings_base; /* Base addr of un-aligned memory for sw rings */
|
|
void *sw_rings; /* 64MBs of 64MB aligned memory for sw rings */
|
|
rte_iova_t sw_rings_iova; /* IOVA address of sw_rings */
|
|
/* Virtual address of the info memory routed to the this function under
|
|
* operation, whether it is PF or VF.
|
|
* HW may DMA information data at this location asynchronously
|
|
*/
|
|
union acc_info_ring_data *info_ring;
|
|
|
|
union acc_harq_layout_data *harq_layout;
|
|
/* Virtual Info Ring head */
|
|
uint16_t info_ring_head;
|
|
/* Number of bytes available for each queue in device, depending on
|
|
* how many queues are enabled with configure()
|
|
*/
|
|
uint32_t sw_ring_size;
|
|
uint32_t ddr_size; /* Size in kB */
|
|
uint32_t *tail_ptrs; /* Base address of response tail pointer buffer */
|
|
rte_iova_t tail_ptr_iova; /* IOVA address of tail pointers */
|
|
/* Max number of entries available for each queue in device, depending
|
|
* on how many queues are enabled with configure()
|
|
*/
|
|
uint32_t sw_ring_max_depth;
|
|
struct rte_acc_conf acc_conf; /* ACC100 Initial configuration */
|
|
/* Bitmap capturing which Queues have already been assigned */
|
|
uint64_t q_assigned_bit_map[ACC_MAX_NUM_QGRPS];
|
|
bool pf_device; /**< True if this is a PF ACC100 device */
|
|
bool configured; /**< True if this ACC100 device is configured */
|
|
uint16_t device_variant; /**< Device variant */
|
|
acc10x_fcw_ld_fill_fun_t fcw_ld_fill; /**< 5GUL FCW generation function */
|
|
};
|
|
|
|
/* Structure associated with each queue. */
|
|
struct __rte_cache_aligned acc_queue {
|
|
union acc_dma_desc *ring_addr; /* Virtual address of sw ring */
|
|
rte_iova_t ring_addr_iova; /* IOVA address of software ring */
|
|
uint32_t sw_ring_head; /* software ring head */
|
|
uint32_t sw_ring_tail; /* software ring tail */
|
|
/* software ring size (descriptors, not bytes) */
|
|
uint32_t sw_ring_depth;
|
|
/* mask used to wrap enqueued descriptors on the sw ring */
|
|
uint32_t sw_ring_wrap_mask;
|
|
/* Virtual address of companion ring */
|
|
struct acc_ptrs *companion_ring_addr;
|
|
/* MMIO register used to enqueue descriptors */
|
|
void *mmio_reg_enqueue;
|
|
uint8_t vf_id; /* VF ID (max = 63) */
|
|
uint8_t qgrp_id; /* Queue Group ID */
|
|
uint16_t aq_id; /* Atomic Queue ID */
|
|
uint16_t aq_depth; /* Depth of atomic queue */
|
|
uint32_t aq_enqueued; /* Count how many "batches" have been enqueued */
|
|
uint32_t aq_dequeued; /* Count how many "batches" have been dequeued */
|
|
uint32_t irq_enable; /* Enable ops dequeue interrupts if set to 1 */
|
|
struct rte_mempool *fcw_mempool; /* FCW mempool */
|
|
enum rte_bbdev_op_type op_type; /* Type of this Queue: TE or TD */
|
|
/* Internal Buffers for loopback input */
|
|
uint8_t *lb_in;
|
|
uint8_t *lb_out;
|
|
rte_iova_t lb_in_addr_iova;
|
|
rte_iova_t lb_out_addr_iova;
|
|
int8_t *derm_buffer; /* interim buffer for de-rm in SDK */
|
|
struct acc_device *d;
|
|
};
|
|
|
|
/* Write to MMIO register address */
|
|
static inline void
|
|
mmio_write(void *addr, uint32_t value)
|
|
{
|
|
*((volatile uint32_t *)(addr)) = rte_cpu_to_le_32(value);
|
|
}
|
|
|
|
/* Write a register of a ACC100 device */
|
|
static inline void
|
|
acc_reg_write(struct acc_device *d, uint32_t offset, uint32_t value)
|
|
{
|
|
void *reg_addr = RTE_PTR_ADD(d->mmio_base, offset);
|
|
mmio_write(reg_addr, value);
|
|
usleep(ACC_LONG_WAIT);
|
|
}
|
|
|
|
/* Read a register of a ACC100 device */
|
|
static inline uint32_t
|
|
acc_reg_read(struct acc_device *d, uint32_t offset)
|
|
{
|
|
|
|
void *reg_addr = RTE_PTR_ADD(d->mmio_base, offset);
|
|
uint32_t ret = *((volatile uint32_t *)(reg_addr));
|
|
return rte_le_to_cpu_32(ret);
|
|
}
|
|
|
|
/* Basic Implementation of Log2 for exact 2^N */
|
|
static inline uint32_t
|
|
log2_basic(uint32_t value)
|
|
{
|
|
return (value == 0) ? 0 : rte_bsf32(value);
|
|
}
|
|
|
|
/* Calculate memory alignment offset assuming alignment is 2^N */
|
|
static inline uint32_t
|
|
calc_mem_alignment_offset(void *unaligned_virt_mem, uint32_t alignment)
|
|
{
|
|
rte_iova_t unaligned_phy_mem = rte_malloc_virt2iova(unaligned_virt_mem);
|
|
return (uint32_t)(alignment -
|
|
(unaligned_phy_mem & (alignment-1)));
|
|
}
|
|
|
|
static void
|
|
free_base_addresses(void **base_addrs, int size)
|
|
{
|
|
int i;
|
|
for (i = 0; i < size; i++)
|
|
rte_free(base_addrs[i]);
|
|
}
|
|
|
|
/* Read flag value 0/1 from bitmap */
|
|
static inline bool
|
|
check_bit(uint32_t bitmap, uint32_t bitmask)
|
|
{
|
|
return bitmap & bitmask;
|
|
}
|
|
|
|
static inline char *
|
|
mbuf_append(struct rte_mbuf *m_head, struct rte_mbuf *m, uint16_t len)
|
|
{
|
|
if (unlikely(len > rte_pktmbuf_tailroom(m)))
|
|
return NULL;
|
|
|
|
char *tail = (char *)m->buf_addr + m->data_off + m->data_len;
|
|
m->data_len = (uint16_t)(m->data_len + len);
|
|
m_head->pkt_len = (m_head->pkt_len + len);
|
|
return tail;
|
|
}
|
|
|
|
|
|
static inline uint32_t
|
|
get_desc_len(void)
|
|
{
|
|
return sizeof(union acc_dma_desc);
|
|
}
|
|
|
|
/* Allocate the 2 * 64MB block for the sw rings */
|
|
static inline int
|
|
alloc_2x64mb_sw_rings_mem(struct rte_bbdev *dev, struct acc_device *d,
|
|
int socket)
|
|
{
|
|
uint32_t sw_ring_size = ACC_SIZE_64MBYTE;
|
|
d->sw_rings_base = rte_zmalloc_socket(dev->device->driver->name,
|
|
2 * sw_ring_size, RTE_CACHE_LINE_SIZE, socket);
|
|
if (d->sw_rings_base == NULL) {
|
|
rte_acc_log(ERR, "Failed to allocate memory for %s:%u",
|
|
dev->device->driver->name,
|
|
dev->data->dev_id);
|
|
return -ENOMEM;
|
|
}
|
|
uint32_t next_64mb_align_offset = calc_mem_alignment_offset(
|
|
d->sw_rings_base, ACC_SIZE_64MBYTE);
|
|
d->sw_rings = RTE_PTR_ADD(d->sw_rings_base, next_64mb_align_offset);
|
|
d->sw_rings_iova = rte_malloc_virt2iova(d->sw_rings_base) +
|
|
next_64mb_align_offset;
|
|
d->sw_ring_size = ACC_MAX_QUEUE_DEPTH * get_desc_len();
|
|
d->sw_ring_max_depth = ACC_MAX_QUEUE_DEPTH;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* Attempt to allocate minimised memory space for sw rings */
|
|
static inline void
|
|
alloc_sw_rings_min_mem(struct rte_bbdev *dev, struct acc_device *d,
|
|
uint16_t num_queues, int socket)
|
|
{
|
|
rte_iova_t sw_rings_base_iova, next_64mb_align_addr_iova;
|
|
uint32_t next_64mb_align_offset;
|
|
rte_iova_t sw_ring_iova_end_addr;
|
|
void *base_addrs[ACC_SW_RING_MEM_ALLOC_ATTEMPTS];
|
|
void *sw_rings_base;
|
|
int i = 0;
|
|
uint32_t q_sw_ring_size = ACC_MAX_QUEUE_DEPTH * get_desc_len();
|
|
uint32_t dev_sw_ring_size = q_sw_ring_size * num_queues;
|
|
/* Free first in case this is a reconfiguration */
|
|
rte_free(d->sw_rings_base);
|
|
|
|
/* Find an aligned block of memory to store sw rings */
|
|
while (i < ACC_SW_RING_MEM_ALLOC_ATTEMPTS) {
|
|
/*
|
|
* sw_ring allocated memory is guaranteed to be aligned to
|
|
* q_sw_ring_size at the condition that the requested size is
|
|
* less than the page size
|
|
*/
|
|
sw_rings_base = rte_zmalloc_socket(
|
|
dev->device->driver->name,
|
|
dev_sw_ring_size, q_sw_ring_size, socket);
|
|
|
|
if (sw_rings_base == NULL) {
|
|
rte_acc_log(ERR,
|
|
"Failed to allocate memory for %s:%u",
|
|
dev->device->driver->name,
|
|
dev->data->dev_id);
|
|
break;
|
|
}
|
|
|
|
sw_rings_base_iova = rte_malloc_virt2iova(sw_rings_base);
|
|
next_64mb_align_offset = calc_mem_alignment_offset(
|
|
sw_rings_base, ACC_SIZE_64MBYTE);
|
|
next_64mb_align_addr_iova = sw_rings_base_iova +
|
|
next_64mb_align_offset;
|
|
sw_ring_iova_end_addr = sw_rings_base_iova + dev_sw_ring_size;
|
|
|
|
/* Check if the end of the sw ring memory block is before the
|
|
* start of next 64MB aligned mem address
|
|
*/
|
|
if (sw_ring_iova_end_addr < next_64mb_align_addr_iova) {
|
|
d->sw_rings_iova = sw_rings_base_iova;
|
|
d->sw_rings = sw_rings_base;
|
|
d->sw_rings_base = sw_rings_base;
|
|
d->sw_ring_size = q_sw_ring_size;
|
|
d->sw_ring_max_depth = ACC_MAX_QUEUE_DEPTH;
|
|
break;
|
|
}
|
|
/* Store the address of the unaligned mem block */
|
|
base_addrs[i] = sw_rings_base;
|
|
i++;
|
|
}
|
|
|
|
/* Free all unaligned blocks of mem allocated in the loop */
|
|
free_base_addresses(base_addrs, i);
|
|
}
|
|
|
|
/*
|
|
* Find queue_id of a device queue based on details from the Info Ring.
|
|
* If a queue isn't found UINT16_MAX is returned.
|
|
*/
|
|
static inline uint16_t
|
|
get_queue_id_from_ring_info(struct rte_bbdev_data *data,
|
|
const union acc_info_ring_data ring_data)
|
|
{
|
|
uint16_t queue_id;
|
|
|
|
for (queue_id = 0; queue_id < data->num_queues; ++queue_id) {
|
|
struct acc_queue *acc_q =
|
|
data->queues[queue_id].queue_private;
|
|
if (acc_q != NULL && acc_q->aq_id == ring_data.aq_id &&
|
|
acc_q->qgrp_id == ring_data.qg_id &&
|
|
acc_q->vf_id == ring_data.vf_id)
|
|
return queue_id;
|
|
}
|
|
|
|
return UINT16_MAX;
|
|
}
|
|
|
|
/* Fill in a frame control word for turbo encoding. */
|
|
static inline void
|
|
acc_fcw_te_fill(const struct rte_bbdev_enc_op *op, struct acc_fcw_te *fcw)
|
|
{
|
|
fcw->code_block_mode = op->turbo_enc.code_block_mode;
|
|
if (fcw->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
|
|
fcw->k_neg = op->turbo_enc.tb_params.k_neg;
|
|
fcw->k_pos = op->turbo_enc.tb_params.k_pos;
|
|
fcw->c_neg = op->turbo_enc.tb_params.c_neg;
|
|
fcw->c = op->turbo_enc.tb_params.c;
|
|
fcw->ncb_neg = op->turbo_enc.tb_params.ncb_neg;
|
|
fcw->ncb_pos = op->turbo_enc.tb_params.ncb_pos;
|
|
|
|
if (check_bit(op->turbo_enc.op_flags,
|
|
RTE_BBDEV_TURBO_RATE_MATCH)) {
|
|
fcw->bypass_rm = 0;
|
|
fcw->cab = op->turbo_enc.tb_params.cab;
|
|
fcw->ea = op->turbo_enc.tb_params.ea;
|
|
fcw->eb = op->turbo_enc.tb_params.eb;
|
|
} else {
|
|
/* E is set to the encoding output size when RM is
|
|
* bypassed.
|
|
*/
|
|
fcw->bypass_rm = 1;
|
|
fcw->cab = fcw->c_neg;
|
|
fcw->ea = 3 * fcw->k_neg + 12;
|
|
fcw->eb = 3 * fcw->k_pos + 12;
|
|
}
|
|
} else { /* For CB mode */
|
|
fcw->k_pos = op->turbo_enc.cb_params.k;
|
|
fcw->ncb_pos = op->turbo_enc.cb_params.ncb;
|
|
|
|
if (check_bit(op->turbo_enc.op_flags,
|
|
RTE_BBDEV_TURBO_RATE_MATCH)) {
|
|
fcw->bypass_rm = 0;
|
|
fcw->eb = op->turbo_enc.cb_params.e;
|
|
} else {
|
|
/* E is set to the encoding output size when RM is
|
|
* bypassed.
|
|
*/
|
|
fcw->bypass_rm = 1;
|
|
fcw->eb = 3 * fcw->k_pos + 12;
|
|
}
|
|
}
|
|
|
|
fcw->bypass_rv_idx1 = check_bit(op->turbo_enc.op_flags,
|
|
RTE_BBDEV_TURBO_RV_INDEX_BYPASS);
|
|
fcw->code_block_crc = check_bit(op->turbo_enc.op_flags,
|
|
RTE_BBDEV_TURBO_CRC_24B_ATTACH);
|
|
fcw->rv_idx1 = op->turbo_enc.rv_index;
|
|
}
|
|
|
|
/* Compute value of k0.
|
|
* Based on 3GPP 38.212 Table 5.4.2.1-2
|
|
* Starting position of different redundancy versions, k0
|
|
*/
|
|
static inline uint16_t
|
|
get_k0(uint16_t n_cb, uint16_t z_c, uint8_t bg, uint8_t rv_index)
|
|
{
|
|
if (rv_index == 0)
|
|
return 0;
|
|
uint16_t n = (bg == 1 ? ACC_N_ZC_1 : ACC_N_ZC_2) * z_c;
|
|
if (n_cb == n) {
|
|
if (rv_index == 1)
|
|
return (bg == 1 ? ACC_K0_1_1 : ACC_K0_1_2) * z_c;
|
|
else if (rv_index == 2)
|
|
return (bg == 1 ? ACC_K0_2_1 : ACC_K0_2_2) * z_c;
|
|
else
|
|
return (bg == 1 ? ACC_K0_3_1 : ACC_K0_3_2) * z_c;
|
|
}
|
|
/* LBRM case - includes a division by N */
|
|
if (unlikely(z_c == 0))
|
|
return 0;
|
|
if (rv_index == 1)
|
|
return (((bg == 1 ? ACC_K0_1_1 : ACC_K0_1_2) * n_cb)
|
|
/ n) * z_c;
|
|
else if (rv_index == 2)
|
|
return (((bg == 1 ? ACC_K0_2_1 : ACC_K0_2_2) * n_cb)
|
|
/ n) * z_c;
|
|
else
|
|
return (((bg == 1 ? ACC_K0_3_1 : ACC_K0_3_2) * n_cb)
|
|
/ n) * z_c;
|
|
}
|
|
|
|
/* Fill in a frame control word for LDPC encoding. */
|
|
static inline void
|
|
acc_fcw_le_fill(const struct rte_bbdev_enc_op *op,
|
|
struct acc_fcw_le *fcw, int num_cb, uint32_t default_e)
|
|
{
|
|
fcw->qm = op->ldpc_enc.q_m;
|
|
fcw->nfiller = op->ldpc_enc.n_filler;
|
|
fcw->BG = (op->ldpc_enc.basegraph - 1);
|
|
fcw->Zc = op->ldpc_enc.z_c;
|
|
fcw->ncb = op->ldpc_enc.n_cb;
|
|
fcw->k0 = get_k0(fcw->ncb, fcw->Zc, op->ldpc_enc.basegraph,
|
|
op->ldpc_enc.rv_index);
|
|
fcw->rm_e = (default_e == 0) ? op->ldpc_enc.cb_params.e : default_e;
|
|
fcw->crc_select = check_bit(op->ldpc_enc.op_flags,
|
|
RTE_BBDEV_LDPC_CRC_24B_ATTACH);
|
|
fcw->bypass_intlv = check_bit(op->ldpc_enc.op_flags,
|
|
RTE_BBDEV_LDPC_INTERLEAVER_BYPASS);
|
|
fcw->mcb_count = num_cb;
|
|
}
|
|
|
|
/* Provide the descriptor index on a given queue */
|
|
static inline uint16_t
|
|
acc_desc_idx(struct acc_queue *q, uint16_t offset)
|
|
{
|
|
return (q->sw_ring_head + offset) & q->sw_ring_wrap_mask;
|
|
}
|
|
|
|
/* Provide the descriptor pointer on a given queue */
|
|
static inline union acc_dma_desc*
|
|
acc_desc(struct acc_queue *q, uint16_t offset)
|
|
{
|
|
return q->ring_addr + acc_desc_idx(q, offset);
|
|
}
|
|
|
|
/* Provide the descriptor index for the tail of a given queue */
|
|
static inline uint16_t
|
|
acc_desc_idx_tail(struct acc_queue *q, uint16_t offset)
|
|
{
|
|
return (q->sw_ring_tail + offset) & q->sw_ring_wrap_mask;
|
|
}
|
|
|
|
/* Provide the descriptor tail pointer on a given queue */
|
|
static inline union acc_dma_desc*
|
|
acc_desc_tail(struct acc_queue *q, uint16_t offset)
|
|
{
|
|
return q->ring_addr + acc_desc_idx_tail(q, offset);
|
|
}
|
|
|
|
/* Provide the operation pointer from the tail of a given queue */
|
|
static inline void*
|
|
acc_op_tail(struct acc_queue *q, uint16_t offset)
|
|
{
|
|
return (q->ring_addr + ((q->sw_ring_tail + offset) & q->sw_ring_wrap_mask))->req.op_addr;
|
|
}
|
|
|
|
/* Enqueue a number of operations to HW and update software rings */
|
|
static inline void
|
|
acc_dma_enqueue(struct acc_queue *q, uint16_t n,
|
|
struct rte_bbdev_stats *queue_stats)
|
|
{
|
|
union acc_enqueue_reg_fmt enq_req;
|
|
union acc_dma_desc *desc;
|
|
#ifdef RTE_BBDEV_OFFLOAD_COST
|
|
uint64_t start_time = 0;
|
|
queue_stats->acc_offload_cycles = 0;
|
|
#else
|
|
RTE_SET_USED(queue_stats);
|
|
#endif
|
|
|
|
/* Set Sdone and IRQ enable bit on last descriptor. */
|
|
desc = acc_desc(q, n - 1);
|
|
desc->req.sdone_enable = 1;
|
|
desc->req.irq_enable = q->irq_enable;
|
|
|
|
enq_req.val = 0;
|
|
/* Setting offset, 100b for 256 DMA Desc */
|
|
enq_req.addr_offset = ACC_DESC_OFFSET;
|
|
|
|
/* Split ops into batches */
|
|
do {
|
|
uint16_t enq_batch_size;
|
|
uint64_t offset;
|
|
rte_iova_t req_elem_addr;
|
|
|
|
enq_batch_size = RTE_MIN(n, MAX_ENQ_BATCH_SIZE);
|
|
|
|
/* Set flag on last descriptor in a batch */
|
|
desc = acc_desc(q, enq_batch_size - 1);
|
|
desc->req.last_desc_in_batch = 1;
|
|
|
|
/* Calculate the 1st descriptor's address */
|
|
offset = ((q->sw_ring_head & q->sw_ring_wrap_mask) * sizeof(union acc_dma_desc));
|
|
req_elem_addr = q->ring_addr_iova + offset;
|
|
|
|
/* Fill enqueue struct */
|
|
enq_req.num_elem = enq_batch_size;
|
|
/* low 6 bits are not needed */
|
|
enq_req.req_elem_addr = (uint32_t)(req_elem_addr >> 6);
|
|
|
|
#ifdef RTE_LIBRTE_BBDEV_DEBUG
|
|
rte_memdump(stderr, "Req sdone", desc, sizeof(*desc));
|
|
#endif
|
|
rte_acc_log(DEBUG, "Enqueue %u reqs (phys %#"PRIx64") to reg %p",
|
|
enq_batch_size,
|
|
req_elem_addr,
|
|
(void *)q->mmio_reg_enqueue);
|
|
|
|
q->aq_enqueued++;
|
|
q->sw_ring_head += enq_batch_size;
|
|
|
|
rte_wmb();
|
|
|
|
#ifdef RTE_BBDEV_OFFLOAD_COST
|
|
/* Start time measurement for enqueue function offload. */
|
|
start_time = rte_rdtsc_precise();
|
|
#endif
|
|
rte_acc_log(DEBUG, "Debug : MMIO Enqueue");
|
|
mmio_write(q->mmio_reg_enqueue, enq_req.val);
|
|
|
|
#ifdef RTE_BBDEV_OFFLOAD_COST
|
|
queue_stats->acc_offload_cycles +=
|
|
rte_rdtsc_precise() - start_time;
|
|
#endif
|
|
|
|
n -= enq_batch_size;
|
|
|
|
} while (n);
|
|
|
|
|
|
}
|
|
|
|
/* Convert offset to harq index for harq_layout structure */
|
|
static inline uint32_t hq_index(uint32_t offset)
|
|
{
|
|
return (offset >> ACC_HARQ_OFFSET_SHIFT) & ACC_HARQ_OFFSET_MASK;
|
|
}
|
|
|
|
/* Calculates number of CBs in processed encoder TB based on 'r' and input
|
|
* length.
|
|
*/
|
|
static inline uint8_t
|
|
get_num_cbs_in_tb_enc(struct rte_bbdev_op_turbo_enc *turbo_enc)
|
|
{
|
|
uint8_t c, c_neg, r, crc24_bits = 0;
|
|
uint16_t k, k_neg, k_pos;
|
|
uint8_t cbs_in_tb = 0;
|
|
int32_t length;
|
|
|
|
length = turbo_enc->input.length;
|
|
r = turbo_enc->tb_params.r;
|
|
c = turbo_enc->tb_params.c;
|
|
c_neg = turbo_enc->tb_params.c_neg;
|
|
k_neg = turbo_enc->tb_params.k_neg;
|
|
k_pos = turbo_enc->tb_params.k_pos;
|
|
crc24_bits = 0;
|
|
if (check_bit(turbo_enc->op_flags, RTE_BBDEV_TURBO_CRC_24B_ATTACH))
|
|
crc24_bits = 24;
|
|
while (length > 0 && r < c) {
|
|
k = (r < c_neg) ? k_neg : k_pos;
|
|
length -= (k - crc24_bits) >> 3;
|
|
r++;
|
|
cbs_in_tb++;
|
|
}
|
|
|
|
return cbs_in_tb;
|
|
}
|
|
|
|
/* Calculates number of CBs in processed decoder TB based on 'r' and input
|
|
* length.
|
|
*/
|
|
static inline uint16_t
|
|
get_num_cbs_in_tb_dec(struct rte_bbdev_op_turbo_dec *turbo_dec)
|
|
{
|
|
uint8_t c, c_neg, r = 0;
|
|
uint16_t kw, k, k_neg, k_pos, cbs_in_tb = 0;
|
|
int32_t length;
|
|
|
|
length = turbo_dec->input.length;
|
|
r = turbo_dec->tb_params.r;
|
|
c = turbo_dec->tb_params.c;
|
|
c_neg = turbo_dec->tb_params.c_neg;
|
|
k_neg = turbo_dec->tb_params.k_neg;
|
|
k_pos = turbo_dec->tb_params.k_pos;
|
|
while (length > 0 && r < c) {
|
|
k = (r < c_neg) ? k_neg : k_pos;
|
|
kw = RTE_ALIGN_CEIL(k + 4, 32) * 3;
|
|
length -= kw;
|
|
r++;
|
|
cbs_in_tb++;
|
|
}
|
|
|
|
return cbs_in_tb;
|
|
}
|
|
|
|
/* Calculates number of CBs in processed decoder TB based on 'r' and input
|
|
* length.
|
|
*/
|
|
static inline uint16_t
|
|
get_num_cbs_in_tb_ldpc_dec(struct rte_bbdev_op_ldpc_dec *ldpc_dec)
|
|
{
|
|
uint16_t r, cbs_in_tb = 0;
|
|
int32_t length = ldpc_dec->input.length;
|
|
r = ldpc_dec->tb_params.r;
|
|
while (length > 0 && r < ldpc_dec->tb_params.c) {
|
|
length -= (r < ldpc_dec->tb_params.cab) ?
|
|
ldpc_dec->tb_params.ea :
|
|
ldpc_dec->tb_params.eb;
|
|
r++;
|
|
cbs_in_tb++;
|
|
}
|
|
return cbs_in_tb;
|
|
}
|
|
|
|
/* Check we can mux encode operations with common FCW */
|
|
static inline int16_t
|
|
check_mux(struct rte_bbdev_enc_op **ops, uint16_t num) {
|
|
uint16_t i;
|
|
if (num <= 1)
|
|
return 1;
|
|
for (i = 1; i < num; ++i) {
|
|
/* Only mux compatible code blocks */
|
|
if (memcmp((uint8_t *)(&ops[i]->ldpc_enc) + ACC_ENC_OFFSET,
|
|
(uint8_t *)(&ops[0]->ldpc_enc) +
|
|
ACC_ENC_OFFSET,
|
|
ACC_CMP_ENC_SIZE) != 0)
|
|
return i;
|
|
}
|
|
/* Avoid multiplexing small inbound size frames */
|
|
int Kp = (ops[0]->ldpc_enc.basegraph == 1 ? 22 : 10) *
|
|
ops[0]->ldpc_enc.z_c - ops[0]->ldpc_enc.n_filler;
|
|
if (Kp <= ACC_LIMIT_DL_MUX_BITS)
|
|
return 1;
|
|
return num;
|
|
}
|
|
|
|
/* Check we can mux encode operations with common FCW */
|
|
static inline bool
|
|
cmp_ldpc_dec_op(struct rte_bbdev_dec_op **ops) {
|
|
/* Only mux compatible code blocks */
|
|
if (memcmp((uint8_t *)(&ops[0]->ldpc_dec) + ACC_DEC_OFFSET,
|
|
(uint8_t *)(&ops[1]->ldpc_dec) +
|
|
ACC_DEC_OFFSET, ACC_CMP_DEC_SIZE) != 0) {
|
|
return false;
|
|
} else
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Fills descriptor with data pointers of one block type.
|
|
*
|
|
* @param desc
|
|
* Pointer to DMA descriptor.
|
|
* @param input
|
|
* Pointer to pointer to input data which will be encoded. It can be changed
|
|
* and points to next segment in scatter-gather case.
|
|
* @param offset
|
|
* Input offset in rte_mbuf structure. It is used for calculating the point
|
|
* where data is starting.
|
|
* @param cb_len
|
|
* Length of currently processed Code Block
|
|
* @param seg_total_left
|
|
* It indicates how many bytes still left in segment (mbuf) for further
|
|
* processing.
|
|
* @param op_flags
|
|
* Store information about device capabilities
|
|
* @param next_triplet
|
|
* Index for ACC200 DMA Descriptor triplet
|
|
* @param scattergather
|
|
* Flag to support scatter-gather for the mbuf
|
|
*
|
|
* @return
|
|
* Returns index of next triplet on success, other value if lengths of
|
|
* pkt and processed cb do not match.
|
|
*
|
|
*/
|
|
static inline int
|
|
acc_dma_fill_blk_type_in(struct acc_dma_req_desc *desc,
|
|
struct rte_mbuf **input, uint32_t *offset, uint32_t cb_len,
|
|
uint32_t *seg_total_left, int next_triplet,
|
|
bool scattergather)
|
|
{
|
|
uint32_t part_len;
|
|
struct rte_mbuf *m = *input;
|
|
if (scattergather)
|
|
part_len = (*seg_total_left < cb_len) ?
|
|
*seg_total_left : cb_len;
|
|
else
|
|
part_len = cb_len;
|
|
cb_len -= part_len;
|
|
*seg_total_left -= part_len;
|
|
|
|
desc->data_ptrs[next_triplet].address =
|
|
rte_pktmbuf_iova_offset(m, *offset);
|
|
desc->data_ptrs[next_triplet].blen = part_len;
|
|
desc->data_ptrs[next_triplet].blkid = ACC_DMA_BLKID_IN;
|
|
desc->data_ptrs[next_triplet].last = 0;
|
|
desc->data_ptrs[next_triplet].dma_ext = 0;
|
|
*offset += part_len;
|
|
next_triplet++;
|
|
|
|
while (cb_len > 0) {
|
|
if (next_triplet < ACC_DMA_MAX_NUM_POINTERS_IN && m->next != NULL) {
|
|
|
|
m = m->next;
|
|
*seg_total_left = rte_pktmbuf_data_len(m);
|
|
part_len = (*seg_total_left < cb_len) ?
|
|
*seg_total_left :
|
|
cb_len;
|
|
desc->data_ptrs[next_triplet].address =
|
|
rte_pktmbuf_iova_offset(m, 0);
|
|
desc->data_ptrs[next_triplet].blen = part_len;
|
|
desc->data_ptrs[next_triplet].blkid =
|
|
ACC_DMA_BLKID_IN;
|
|
desc->data_ptrs[next_triplet].last = 0;
|
|
desc->data_ptrs[next_triplet].dma_ext = 0;
|
|
cb_len -= part_len;
|
|
*seg_total_left -= part_len;
|
|
/* Initializing offset for next segment (mbuf) */
|
|
*offset = part_len;
|
|
next_triplet++;
|
|
} else {
|
|
rte_acc_log(ERR,
|
|
"Some data still left for processing: "
|
|
"data_left: %u, next_triplet: %u, next_mbuf: %p",
|
|
cb_len, next_triplet, m->next);
|
|
return -EINVAL;
|
|
}
|
|
}
|
|
/* Storing new mbuf as it could be changed in scatter-gather case*/
|
|
*input = m;
|
|
|
|
return next_triplet;
|
|
}
|
|
|
|
/* Fills descriptor with data pointers of one block type.
|
|
* Returns index of next triplet
|
|
*/
|
|
static inline int
|
|
acc_dma_fill_blk_type(struct acc_dma_req_desc *desc,
|
|
struct rte_mbuf *mbuf, uint32_t offset,
|
|
uint32_t len, int next_triplet, int blk_id)
|
|
{
|
|
desc->data_ptrs[next_triplet].address =
|
|
rte_pktmbuf_iova_offset(mbuf, offset);
|
|
desc->data_ptrs[next_triplet].blen = len;
|
|
desc->data_ptrs[next_triplet].blkid = blk_id;
|
|
desc->data_ptrs[next_triplet].last = 0;
|
|
desc->data_ptrs[next_triplet].dma_ext = 0;
|
|
next_triplet++;
|
|
|
|
return next_triplet;
|
|
}
|
|
|
|
static inline void
|
|
acc_header_init(struct acc_dma_req_desc *desc)
|
|
{
|
|
desc->word0 = ACC_DMA_DESC_TYPE;
|
|
desc->word1 = 0; /**< Timestamp could be disabled */
|
|
desc->word2 = 0;
|
|
desc->word3 = 0;
|
|
desc->numCBs = 1;
|
|
}
|
|
|
|
#ifdef RTE_LIBRTE_BBDEV_DEBUG
|
|
/* Check if any input data is unexpectedly left for processing */
|
|
static inline int
|
|
check_mbuf_total_left(uint32_t mbuf_total_left)
|
|
{
|
|
if (mbuf_total_left == 0)
|
|
return 0;
|
|
rte_acc_log(ERR,
|
|
"Some date still left for processing: mbuf_total_left = %u",
|
|
mbuf_total_left);
|
|
return -EINVAL;
|
|
}
|
|
#endif
|
|
|
|
static inline int
|
|
acc_dma_desc_te_fill(struct rte_bbdev_enc_op *op,
|
|
struct acc_dma_req_desc *desc, struct rte_mbuf **input,
|
|
struct rte_mbuf *output, uint32_t *in_offset,
|
|
uint32_t *out_offset, uint32_t *out_length,
|
|
uint32_t *mbuf_total_left, uint32_t *seg_total_left, uint8_t r)
|
|
{
|
|
int next_triplet = 1; /* FCW already done */
|
|
uint32_t e, ea, eb, length;
|
|
uint16_t k, k_neg, k_pos;
|
|
uint8_t cab, c_neg;
|
|
|
|
desc->word0 = ACC_DMA_DESC_TYPE;
|
|
desc->word1 = 0; /**< Timestamp could be disabled */
|
|
desc->word2 = 0;
|
|
desc->word3 = 0;
|
|
desc->numCBs = 1;
|
|
|
|
if (op->turbo_enc.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
|
|
ea = op->turbo_enc.tb_params.ea;
|
|
eb = op->turbo_enc.tb_params.eb;
|
|
cab = op->turbo_enc.tb_params.cab;
|
|
k_neg = op->turbo_enc.tb_params.k_neg;
|
|
k_pos = op->turbo_enc.tb_params.k_pos;
|
|
c_neg = op->turbo_enc.tb_params.c_neg;
|
|
e = (r < cab) ? ea : eb;
|
|
k = (r < c_neg) ? k_neg : k_pos;
|
|
} else {
|
|
e = op->turbo_enc.cb_params.e;
|
|
k = op->turbo_enc.cb_params.k;
|
|
}
|
|
|
|
if (check_bit(op->turbo_enc.op_flags, RTE_BBDEV_TURBO_CRC_24B_ATTACH))
|
|
length = (k - 24) >> 3;
|
|
else
|
|
length = k >> 3;
|
|
|
|
if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left < length))) {
|
|
rte_acc_log(ERR,
|
|
"Mismatch between mbuf length and included CB sizes: mbuf len %u, cb len %u",
|
|
*mbuf_total_left, length);
|
|
return -1;
|
|
}
|
|
|
|
next_triplet = acc_dma_fill_blk_type_in(desc, input, in_offset,
|
|
length, seg_total_left, next_triplet,
|
|
check_bit(op->turbo_enc.op_flags,
|
|
RTE_BBDEV_TURBO_ENC_SCATTER_GATHER));
|
|
if (unlikely(next_triplet < 0)) {
|
|
rte_acc_log(ERR,
|
|
"Mismatch between data to process and mbuf data length in bbdev_op: %p",
|
|
op);
|
|
return -1;
|
|
}
|
|
desc->data_ptrs[next_triplet - 1].last = 1;
|
|
desc->m2dlen = next_triplet;
|
|
*mbuf_total_left -= length;
|
|
|
|
/* Set output length */
|
|
if (check_bit(op->turbo_enc.op_flags, RTE_BBDEV_TURBO_RATE_MATCH))
|
|
/* Integer round up division by 8 */
|
|
*out_length = (e + 7) >> 3;
|
|
else
|
|
*out_length = (k >> 3) * 3 + 2;
|
|
|
|
next_triplet = acc_dma_fill_blk_type(desc, output, *out_offset,
|
|
*out_length, next_triplet, ACC_DMA_BLKID_OUT_ENC);
|
|
if (unlikely(next_triplet < 0)) {
|
|
rte_acc_log(ERR,
|
|
"Mismatch between data to process and mbuf data length in bbdev_op: %p",
|
|
op);
|
|
return -1;
|
|
}
|
|
op->turbo_enc.output.length += *out_length;
|
|
*out_offset += *out_length;
|
|
desc->data_ptrs[next_triplet - 1].last = 1;
|
|
desc->d2mlen = next_triplet - desc->m2dlen;
|
|
|
|
desc->op_addr = op;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline int
|
|
acc_pci_remove(struct rte_pci_device *pci_dev)
|
|
{
|
|
struct rte_bbdev *bbdev;
|
|
int ret;
|
|
uint8_t dev_id;
|
|
|
|
if (pci_dev == NULL)
|
|
return -EINVAL;
|
|
|
|
/* Find device */
|
|
bbdev = rte_bbdev_get_named_dev(pci_dev->device.name);
|
|
if (bbdev == NULL) {
|
|
rte_acc_log(CRIT,
|
|
"Couldn't find HW dev \"%s\" to uninitialise it",
|
|
pci_dev->device.name);
|
|
return -ENODEV;
|
|
}
|
|
dev_id = bbdev->data->dev_id;
|
|
|
|
/* free device private memory before close */
|
|
rte_free(bbdev->data->dev_private);
|
|
|
|
/* Close device */
|
|
ret = rte_bbdev_close(dev_id);
|
|
if (ret < 0)
|
|
rte_acc_log(ERR,
|
|
"Device %i failed to close during uninit: %i",
|
|
dev_id, ret);
|
|
|
|
/* release bbdev from library */
|
|
rte_bbdev_release(bbdev);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline void
|
|
acc_enqueue_status(struct rte_bbdev_queue_data *q_data,
|
|
enum rte_bbdev_enqueue_status status)
|
|
{
|
|
q_data->enqueue_status = status;
|
|
q_data->queue_stats.enqueue_status_count[status]++;
|
|
|
|
rte_acc_log(WARNING, "Enqueue Status: %s %#"PRIx64"",
|
|
rte_bbdev_enqueue_status_str(status),
|
|
q_data->queue_stats.enqueue_status_count[status]);
|
|
}
|
|
|
|
static inline void
|
|
acc_enqueue_invalid(struct rte_bbdev_queue_data *q_data)
|
|
{
|
|
acc_enqueue_status(q_data, RTE_BBDEV_ENQ_STATUS_INVALID_OP);
|
|
}
|
|
|
|
static inline void
|
|
acc_enqueue_ring_full(struct rte_bbdev_queue_data *q_data)
|
|
{
|
|
acc_enqueue_status(q_data, RTE_BBDEV_ENQ_STATUS_RING_FULL);
|
|
}
|
|
|
|
static inline void
|
|
acc_enqueue_queue_full(struct rte_bbdev_queue_data *q_data)
|
|
{
|
|
acc_enqueue_status(q_data, RTE_BBDEV_ENQ_STATUS_QUEUE_FULL);
|
|
}
|
|
|
|
/* Number of available descriptor in ring to enqueue */
|
|
static inline uint32_t
|
|
acc_ring_avail_enq(struct acc_queue *q)
|
|
{
|
|
return (q->sw_ring_depth - 1 + q->sw_ring_tail - q->sw_ring_head) & q->sw_ring_wrap_mask;
|
|
}
|
|
|
|
/* Number of available descriptor in ring to dequeue */
|
|
static inline uint32_t
|
|
acc_ring_avail_deq(struct acc_queue *q)
|
|
{
|
|
return (q->sw_ring_depth + q->sw_ring_head - q->sw_ring_tail) & q->sw_ring_wrap_mask;
|
|
}
|
|
|
|
/* Check room in AQ for the enqueues batches into Qmgr */
|
|
static inline int32_t
|
|
acc_aq_avail(struct rte_bbdev_queue_data *q_data, uint16_t num_ops)
|
|
{
|
|
struct acc_queue *q = q_data->queue_private;
|
|
int32_t aq_avail = q->aq_depth -
|
|
((q->aq_enqueued - q->aq_dequeued +
|
|
ACC_MAX_QUEUE_DEPTH) % ACC_MAX_QUEUE_DEPTH)
|
|
- (num_ops >> 7);
|
|
if (aq_avail <= 0)
|
|
acc_enqueue_queue_full(q_data);
|
|
return aq_avail;
|
|
}
|
|
|
|
/* Calculates number of CBs in processed encoder TB based on 'r' and input
|
|
* length.
|
|
*/
|
|
static inline uint8_t
|
|
get_num_cbs_in_tb_ldpc_enc(struct rte_bbdev_op_ldpc_enc *ldpc_enc)
|
|
{
|
|
uint8_t c, r, crc24_bits = 0;
|
|
uint16_t k = (ldpc_enc->basegraph == 1 ? 22 : 10) * ldpc_enc->z_c
|
|
- ldpc_enc->n_filler;
|
|
uint8_t cbs_in_tb = 0;
|
|
int32_t length;
|
|
|
|
length = ldpc_enc->input.length;
|
|
r = ldpc_enc->tb_params.r;
|
|
c = ldpc_enc->tb_params.c;
|
|
crc24_bits = 0;
|
|
if (check_bit(ldpc_enc->op_flags, RTE_BBDEV_LDPC_CRC_24B_ATTACH))
|
|
crc24_bits = 24;
|
|
while (length > 0 && r < c) {
|
|
length -= (k - crc24_bits) >> 3;
|
|
r++;
|
|
cbs_in_tb++;
|
|
}
|
|
return cbs_in_tb;
|
|
}
|
|
|
|
#endif /* _ACC_COMMON_H_ */
|