/* SPDX-License-Identifier: BSD-3-Clause * Copyright 2022 Microsoft Corporation */ #include #include #include "mana.h" uint8_t * gdma_get_wqe_pointer(struct mana_gdma_queue *queue) { uint32_t offset_in_bytes = (queue->head * GDMA_WQE_ALIGNMENT_UNIT_SIZE) & (queue->size - 1); DP_LOG(DEBUG, "txq sq_head %u sq_size %u offset_in_bytes %u", queue->head, queue->size, offset_in_bytes); if (offset_in_bytes + GDMA_WQE_ALIGNMENT_UNIT_SIZE > queue->size) DP_LOG(ERR, "fatal error: offset_in_bytes %u too big", offset_in_bytes); return ((uint8_t *)queue->buffer) + offset_in_bytes; } static uint32_t write_dma_client_oob(uint8_t *work_queue_buffer_pointer, const struct gdma_work_request *work_request, uint32_t client_oob_size) { uint8_t *p = work_queue_buffer_pointer; struct gdma_wqe_dma_oob *header = (struct gdma_wqe_dma_oob *)p; memset(header, 0, sizeof(struct gdma_wqe_dma_oob)); header->num_sgl_entries = work_request->num_sgl_elements; header->inline_client_oob_size_in_dwords = client_oob_size / sizeof(uint32_t); header->client_data_unit = work_request->client_data_unit; DP_LOG(DEBUG, "queue buf %p sgl %u oob_h %u du %u oob_buf %p oob_b %u", work_queue_buffer_pointer, header->num_sgl_entries, header->inline_client_oob_size_in_dwords, header->client_data_unit, work_request->inline_oob_data, work_request->inline_oob_size_in_bytes); p += sizeof(struct gdma_wqe_dma_oob); if (work_request->inline_oob_data && work_request->inline_oob_size_in_bytes > 0) { memcpy(p, work_request->inline_oob_data, work_request->inline_oob_size_in_bytes); if (client_oob_size > work_request->inline_oob_size_in_bytes) memset(p + work_request->inline_oob_size_in_bytes, 0, client_oob_size - work_request->inline_oob_size_in_bytes); } return sizeof(struct gdma_wqe_dma_oob) + client_oob_size; } static uint32_t write_scatter_gather_list(uint8_t *work_queue_head_pointer, uint8_t *work_queue_end_pointer, uint8_t *work_queue_cur_pointer, struct gdma_work_request *work_request) { struct gdma_sgl_element *sge_list; struct gdma_sgl_element dummy_sgl[1]; uint8_t *address; uint32_t size; uint32_t num_sge; uint32_t size_to_queue_end; uint32_t sge_list_size; DP_LOG(DEBUG, "work_queue_cur_pointer %p work_request->flags %x", work_queue_cur_pointer, work_request->flags); num_sge = work_request->num_sgl_elements; sge_list = work_request->sgl; size_to_queue_end = (uint32_t)(work_queue_end_pointer - work_queue_cur_pointer); if (num_sge == 0) { /* Per spec, the case of an empty SGL should be handled as * follows to avoid corrupted WQE errors: * Write one dummy SGL entry * Set the address to 1, leave the rest as 0 */ dummy_sgl[num_sge].address = 1; dummy_sgl[num_sge].size = 0; dummy_sgl[num_sge].memory_key = 0; num_sge++; sge_list = dummy_sgl; } sge_list_size = 0; { address = (uint8_t *)sge_list; size = sizeof(struct gdma_sgl_element) * num_sge; if (size_to_queue_end < size) { memcpy(work_queue_cur_pointer, address, size_to_queue_end); work_queue_cur_pointer = work_queue_head_pointer; address += size_to_queue_end; size -= size_to_queue_end; } memcpy(work_queue_cur_pointer, address, size); sge_list_size = size; } DP_LOG(DEBUG, "sge %u address 0x%" PRIx64 " size %u key %u list_s %u", num_sge, sge_list->address, sge_list->size, sge_list->memory_key, sge_list_size); return sge_list_size; } /* * Post a work request to queue. */ int gdma_post_work_request(struct mana_gdma_queue *queue, struct gdma_work_request *work_req, uint32_t *wqe_size_in_bu) { uint32_t client_oob_size = work_req->inline_oob_size_in_bytes > INLINE_OOB_SMALL_SIZE_IN_BYTES ? INLINE_OOB_LARGE_SIZE_IN_BYTES : INLINE_OOB_SMALL_SIZE_IN_BYTES; uint32_t sgl_data_size = sizeof(struct gdma_sgl_element) * RTE_MAX((uint32_t)1, work_req->num_sgl_elements); uint32_t wqe_size = RTE_ALIGN(sizeof(struct gdma_wqe_dma_oob) + client_oob_size + sgl_data_size, GDMA_WQE_ALIGNMENT_UNIT_SIZE); uint8_t *wq_buffer_pointer; uint32_t queue_free_units = queue->count - (queue->head - queue->tail); if (wqe_size / GDMA_WQE_ALIGNMENT_UNIT_SIZE > queue_free_units) { DP_LOG(DEBUG, "WQE size %u queue count %u head %u tail %u", wqe_size, queue->count, queue->head, queue->tail); return -EBUSY; } DP_LOG(DEBUG, "client_oob_size %u sgl_data_size %u wqe_size %u", client_oob_size, sgl_data_size, wqe_size); *wqe_size_in_bu = wqe_size / GDMA_WQE_ALIGNMENT_UNIT_SIZE; wq_buffer_pointer = gdma_get_wqe_pointer(queue); wq_buffer_pointer += write_dma_client_oob(wq_buffer_pointer, work_req, client_oob_size); if (wq_buffer_pointer >= ((uint8_t *)queue->buffer) + queue->size) wq_buffer_pointer -= queue->size; write_scatter_gather_list((uint8_t *)queue->buffer, (uint8_t *)queue->buffer + queue->size, wq_buffer_pointer, work_req); queue->head += wqe_size / GDMA_WQE_ALIGNMENT_UNIT_SIZE; return 0; } #ifdef RTE_ARCH_32 union gdma_short_doorbell_entry { uint32_t as_uint32; struct { uint32_t tail_ptr_incr : 16; /* Number of CQEs */ uint32_t id : 12; uint32_t reserved : 3; uint32_t arm : 1; } cq; struct { uint32_t tail_ptr_incr : 16; /* In number of bytes */ uint32_t id : 12; uint32_t reserved : 4; } rq; struct { uint32_t tail_ptr_incr : 16; /* In number of bytes */ uint32_t id : 12; uint32_t reserved : 4; } sq; struct { uint32_t tail_ptr_incr : 16; /* Number of EQEs */ uint32_t id : 12; uint32_t reserved : 3; uint32_t arm : 1; } eq; }; /* HW DATA */ enum { DOORBELL_SHORT_OFFSET_SQ = 0x10, DOORBELL_SHORT_OFFSET_RQ = 0x410, DOORBELL_SHORT_OFFSET_CQ = 0x810, DOORBELL_SHORT_OFFSET_EQ = 0xFF0, }; /* * Write to hardware doorbell to notify new activity. */ int mana_ring_short_doorbell(void *db_page, enum gdma_queue_types queue_type, uint32_t queue_id, uint32_t tail_incr, uint8_t arm) { uint8_t *addr = db_page; union gdma_short_doorbell_entry e = {}; if ((queue_id & ~GDMA_SHORT_DB_QID_MASK) || (tail_incr & ~GDMA_SHORT_DB_INC_MASK)) { DP_LOG(ERR, "%s: queue_id %u or " "tail_incr %u overflowed, queue type %d", __func__, queue_id, tail_incr, queue_type); return -EINVAL; } switch (queue_type) { case GDMA_QUEUE_SEND: e.sq.id = queue_id; e.sq.tail_ptr_incr = tail_incr; addr += DOORBELL_SHORT_OFFSET_SQ; break; case GDMA_QUEUE_RECEIVE: e.rq.id = queue_id; e.rq.tail_ptr_incr = tail_incr; addr += DOORBELL_SHORT_OFFSET_RQ; break; case GDMA_QUEUE_COMPLETION: e.cq.id = queue_id; e.cq.tail_ptr_incr = tail_incr; e.cq.arm = arm; addr += DOORBELL_SHORT_OFFSET_CQ; break; default: DP_LOG(ERR, "Unsupported queue type %d", queue_type); return -1; } /* Ensure all writes are done before ringing doorbell */ rte_wmb(); DP_LOG(DEBUG, "db_page %p addr %p queue_id %u type %u tail %u arm %u", db_page, addr, queue_id, queue_type, tail_incr, arm); rte_write32(e.as_uint32, addr); return 0; } #else union gdma_doorbell_entry { uint64_t as_uint64; struct { uint64_t id : 24; uint64_t reserved : 8; uint64_t tail_ptr : 31; uint64_t arm : 1; } cq; struct { uint64_t id : 24; uint64_t wqe_cnt : 8; uint64_t tail_ptr : 32; } rq; struct { uint64_t id : 24; uint64_t reserved : 8; uint64_t tail_ptr : 32; } sq; struct { uint64_t id : 16; uint64_t reserved : 16; uint64_t tail_ptr : 31; uint64_t arm : 1; } eq; }; /* HW DATA */ enum { DOORBELL_OFFSET_SQ = 0x0, DOORBELL_OFFSET_RQ = 0x400, DOORBELL_OFFSET_CQ = 0x800, DOORBELL_OFFSET_EQ = 0xFF8, }; /* * Write to hardware doorbell to notify new activity. */ int mana_ring_doorbell(void *db_page, enum gdma_queue_types queue_type, uint32_t queue_id, uint32_t tail, uint8_t arm) { uint8_t *addr = db_page; union gdma_doorbell_entry e = {}; switch (queue_type) { case GDMA_QUEUE_SEND: e.sq.id = queue_id; e.sq.tail_ptr = tail; addr += DOORBELL_OFFSET_SQ; break; case GDMA_QUEUE_RECEIVE: e.rq.id = queue_id; e.rq.tail_ptr = tail; e.rq.wqe_cnt = arm; addr += DOORBELL_OFFSET_RQ; break; case GDMA_QUEUE_COMPLETION: e.cq.id = queue_id; e.cq.tail_ptr = tail; e.cq.arm = arm; addr += DOORBELL_OFFSET_CQ; break; default: DP_LOG(ERR, "Unsupported queue type %d", queue_type); return -1; } /* Ensure all writes are done before ringing doorbell */ rte_wmb(); DP_LOG(DEBUG, "db_page %p addr %p queue_id %u type %u tail %u arm %u", db_page, addr, queue_id, queue_type, tail, arm); rte_write64(e.as_uint64, addr); return 0; } #endif /* * Poll completion queue for completions. */ uint32_t gdma_poll_completion_queue(struct mana_gdma_queue *cq, struct gdma_comp *gdma_comp, uint32_t max_comp) { struct gdma_hardware_completion_entry *cqe; uint32_t new_owner_bits, old_owner_bits; uint32_t cqe_owner_bits; uint32_t num_comp = 0; struct gdma_hardware_completion_entry *buffer = cq->buffer; while (num_comp < max_comp) { cqe = &buffer[cq->head % cq->count]; new_owner_bits = (cq->head / cq->count) & COMPLETION_QUEUE_OWNER_MASK; old_owner_bits = (cq->head / cq->count - 1) & COMPLETION_QUEUE_OWNER_MASK; cqe_owner_bits = cqe->owner_bits; DP_LOG(DEBUG, "comp cqe bits 0x%x owner bits 0x%x", cqe_owner_bits, old_owner_bits); /* No new entry */ if (cqe_owner_bits == old_owner_bits) break; if (cqe_owner_bits != new_owner_bits) { DRV_LOG(ERR, "CQ overflowed, ID %u cqe 0x%x new 0x%x", cq->id, cqe_owner_bits, new_owner_bits); break; } gdma_comp[num_comp].cqe_data = cqe->dma_client_data; num_comp++; cq->head++; DP_LOG(DEBUG, "comp new 0x%x old 0x%x cqe 0x%x wq %u sq %u head %u", new_owner_bits, old_owner_bits, cqe_owner_bits, cqe->wq_num, cqe->is_sq, cq->head); } /* Make sure the CQE owner bits are checked before we access the data * in CQE */ rte_rmb(); return num_comp; }