/* SPDX-License-Identifier: BSD-3-Clause * Copyright(c) 2018 Advanced Micro Devices, Inc. All rights reserved. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ccp_dev.h" #include "ccp_pmd_private.h" static TAILQ_HEAD(, ccp_device) ccp_list = TAILQ_HEAD_INITIALIZER(ccp_list); static int ccp_dev_id; int ccp_dev_start(struct rte_cryptodev *dev) { struct ccp_private *priv = dev->data->dev_private; priv->last_dev = TAILQ_FIRST(&ccp_list); return 0; } struct ccp_queue * ccp_allot_queue(struct rte_cryptodev *cdev, int slot_req) { int i, ret = 0; struct ccp_device *dev; struct ccp_private *priv = cdev->data->dev_private; dev = TAILQ_NEXT(priv->last_dev, next); if (unlikely(dev == NULL)) dev = TAILQ_FIRST(&ccp_list); priv->last_dev = dev; if (dev->qidx >= dev->cmd_q_count) dev->qidx = 0; ret = rte_atomic64_read(&dev->cmd_q[dev->qidx].free_slots); if (ret >= slot_req) return &dev->cmd_q[dev->qidx]; for (i = 0; i < dev->cmd_q_count; i++) { dev->qidx++; if (dev->qidx >= dev->cmd_q_count) dev->qidx = 0; ret = rte_atomic64_read(&dev->cmd_q[dev->qidx].free_slots); if (ret >= slot_req) return &dev->cmd_q[dev->qidx]; } return NULL; } int ccp_read_hwrng(uint32_t *value) { struct ccp_device *dev; TAILQ_FOREACH(dev, &ccp_list, next) { void *vaddr = (void *)(dev->pci->mem_resource[2].addr); while (dev->hwrng_retries++ < CCP_MAX_TRNG_RETRIES) { *value = CCP_READ_REG(vaddr, TRNG_OUT_REG); if (*value) { dev->hwrng_retries = 0; return 0; } } dev->hwrng_retries = 0; } return -1; } static const struct rte_memzone * ccp_queue_dma_zone_reserve(const char *queue_name, uint32_t queue_size, int socket_id) { const struct rte_memzone *mz; mz = rte_memzone_lookup(queue_name); if (mz != 0) { if (((size_t)queue_size <= mz->len) && ((socket_id == SOCKET_ID_ANY) || (socket_id == mz->socket_id))) { CCP_LOG_INFO("re-use memzone already " "allocated for %s", queue_name); return mz; } CCP_LOG_ERR("Incompatible memzone already " "allocated %s, size %u, socket %d. " "Requested size %u, socket %u", queue_name, (uint32_t)mz->len, mz->socket_id, queue_size, socket_id); return NULL; } CCP_LOG_INFO("Allocate memzone for %s, size %u on socket %u", queue_name, queue_size, socket_id); return rte_memzone_reserve_aligned(queue_name, queue_size, socket_id, RTE_MEMZONE_IOVA_CONTIG, queue_size); } /* bitmap support apis */ static inline void ccp_set_bit(unsigned long *bitmap, int n) { __sync_fetch_and_or(&bitmap[WORD_OFFSET(n)], (1UL << BIT_OFFSET(n))); } static inline void ccp_clear_bit(unsigned long *bitmap, int n) { __sync_fetch_and_and(&bitmap[WORD_OFFSET(n)], ~(1UL << BIT_OFFSET(n))); } static inline uint32_t ccp_get_bit(unsigned long *bitmap, int n) { return ((bitmap[WORD_OFFSET(n)] & (1 << BIT_OFFSET(n))) != 0); } static inline uint32_t ccp_ffz(unsigned long word) { unsigned long first_zero; first_zero = __builtin_ffsl(~word); return first_zero ? (first_zero - 1) : BITS_PER_WORD; } static inline uint32_t ccp_find_first_zero_bit(unsigned long *addr, uint32_t limit) { uint32_t i; uint32_t nwords = 0; nwords = (limit - 1) / BITS_PER_WORD + 1; for (i = 0; i < nwords; i++) { if (addr[i] == 0UL) return i * BITS_PER_WORD; if (addr[i] < ~(0UL)) break; } return (i == nwords) ? limit : i * BITS_PER_WORD + ccp_ffz(addr[i]); } static void ccp_bitmap_set(unsigned long *map, unsigned int start, int len) { unsigned long *p = map + WORD_OFFSET(start); const unsigned int size = start + len; int bits_to_set = BITS_PER_WORD - (start % BITS_PER_WORD); unsigned long mask_to_set = CCP_BITMAP_FIRST_WORD_MASK(start); while (len - bits_to_set >= 0) { *p |= mask_to_set; len -= bits_to_set; bits_to_set = BITS_PER_WORD; mask_to_set = ~0UL; p++; } if (len) { mask_to_set &= CCP_BITMAP_LAST_WORD_MASK(size); *p |= mask_to_set; } } static void ccp_bitmap_clear(unsigned long *map, unsigned int start, int len) { unsigned long *p = map + WORD_OFFSET(start); const unsigned int size = start + len; int bits_to_clear = BITS_PER_WORD - (start % BITS_PER_WORD); unsigned long mask_to_clear = CCP_BITMAP_FIRST_WORD_MASK(start); while (len - bits_to_clear >= 0) { *p &= ~mask_to_clear; len -= bits_to_clear; bits_to_clear = BITS_PER_WORD; mask_to_clear = ~0UL; p++; } if (len) { mask_to_clear &= CCP_BITMAP_LAST_WORD_MASK(size); *p &= ~mask_to_clear; } } static unsigned long _ccp_find_next_bit(const unsigned long *addr, unsigned long nbits, unsigned long start, unsigned long invert) { unsigned long tmp; if (!nbits || start >= nbits) return nbits; tmp = addr[start / BITS_PER_WORD] ^ invert; /* Handle 1st word. */ tmp &= CCP_BITMAP_FIRST_WORD_MASK(start); start = ccp_round_down(start, BITS_PER_WORD); while (!tmp) { start += BITS_PER_WORD; if (start >= nbits) return nbits; tmp = addr[start / BITS_PER_WORD] ^ invert; } return RTE_MIN(start + (ffs(tmp) - 1), nbits); } static unsigned long ccp_find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { return _ccp_find_next_bit(addr, size, offset, 0UL); } static unsigned long ccp_find_next_zero_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { return _ccp_find_next_bit(addr, size, offset, ~0UL); } /** * bitmap_find_next_zero_area - find a contiguous aligned zero area * @map: The address to base the search on * @size: The bitmap size in bits * @start: The bitnumber to start searching at * @nr: The number of zeroed bits we're looking for */ static unsigned long ccp_bitmap_find_next_zero_area(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr) { unsigned long index, end, i; again: index = ccp_find_next_zero_bit(map, size, start); end = index + nr; if (end > size) return end; i = ccp_find_next_bit(map, end, index); if (i < end) { start = i + 1; goto again; } return index; } static uint32_t ccp_lsb_alloc(struct ccp_queue *cmd_q, unsigned int count) { struct ccp_device *ccp; int start; /* First look at the map for the queue */ if (cmd_q->lsb >= 0) { start = (uint32_t)ccp_bitmap_find_next_zero_area(cmd_q->lsbmap, LSB_SIZE, 0, count); if (start < LSB_SIZE) { ccp_bitmap_set(cmd_q->lsbmap, start, count); return start + cmd_q->lsb * LSB_SIZE; } } /* try to get an entry from the shared blocks */ ccp = cmd_q->dev; rte_spinlock_lock(&ccp->lsb_lock); start = (uint32_t)ccp_bitmap_find_next_zero_area(ccp->lsbmap, MAX_LSB_CNT * LSB_SIZE, 0, count); if (start <= MAX_LSB_CNT * LSB_SIZE) { ccp_bitmap_set(ccp->lsbmap, start, count); rte_spinlock_unlock(&ccp->lsb_lock); return start * LSB_ITEM_SIZE; } CCP_LOG_ERR("NO LSBs available"); rte_spinlock_unlock(&ccp->lsb_lock); return 0; } static void __rte_unused ccp_lsb_free(struct ccp_queue *cmd_q, unsigned int start, unsigned int count) { int lsbno = start / LSB_SIZE; if (!start) return; if (cmd_q->lsb == lsbno) { /* An entry from the private LSB */ ccp_bitmap_clear(cmd_q->lsbmap, start % LSB_SIZE, count); } else { /* From the shared LSBs */ struct ccp_device *ccp = cmd_q->dev; rte_spinlock_lock(&ccp->lsb_lock); ccp_bitmap_clear(ccp->lsbmap, start, count); rte_spinlock_unlock(&ccp->lsb_lock); } } static int ccp_find_lsb_regions(struct ccp_queue *cmd_q, uint64_t status) { int q_mask = 1 << cmd_q->id; int weight = 0; int j; /* Build a bit mask to know which LSBs * this queue has access to. * Don't bother with segment 0 * as it has special * privileges. */ cmd_q->lsbmask = 0; status >>= LSB_REGION_WIDTH; for (j = 1; j < MAX_LSB_CNT; j++) { if (status & q_mask) ccp_set_bit(&cmd_q->lsbmask, j); status >>= LSB_REGION_WIDTH; } for (j = 0; j < MAX_LSB_CNT; j++) if (ccp_get_bit(&cmd_q->lsbmask, j)) weight++; CCP_LOG_DBG("Queue %d can access %d LSB regions of mask %lu\n", (int)cmd_q->id, weight, cmd_q->lsbmask); return weight ? 0 : -EINVAL; } static int ccp_find_and_assign_lsb_to_q(struct ccp_device *ccp, int lsb_cnt, int n_lsbs, unsigned long *lsb_pub) { unsigned long qlsb = 0; int bitno = 0; int qlsb_wgt = 0; int i, j; /* For each queue: * If the count of potential LSBs available to a queue matches the * ordinal given to us in lsb_cnt: * Copy the mask of possible LSBs for this queue into "qlsb"; * For each bit in qlsb, see if the corresponding bit in the * aggregation mask is set; if so, we have a match. * If we have a match, clear the bit in the aggregation to * mark it as no longer available. * If there is no match, clear the bit in qlsb and keep looking. */ for (i = 0; i < ccp->cmd_q_count; i++) { struct ccp_queue *cmd_q = &ccp->cmd_q[i]; qlsb_wgt = 0; for (j = 0; j < MAX_LSB_CNT; j++) if (ccp_get_bit(&cmd_q->lsbmask, j)) qlsb_wgt++; if (qlsb_wgt == lsb_cnt) { qlsb = cmd_q->lsbmask; bitno = ffs(qlsb) - 1; while (bitno < MAX_LSB_CNT) { if (ccp_get_bit(lsb_pub, bitno)) { /* We found an available LSB * that this queue can access */ cmd_q->lsb = bitno; ccp_clear_bit(lsb_pub, bitno); break; } ccp_clear_bit(&qlsb, bitno); bitno = ffs(qlsb) - 1; } if (bitno >= MAX_LSB_CNT) return -EINVAL; n_lsbs--; } } return n_lsbs; } /* For each queue, from the most- to least-constrained: * find an LSB that can be assigned to the queue. If there are N queues that * can only use M LSBs, where N > M, fail; otherwise, every queue will get a * dedicated LSB. Remaining LSB regions become a shared resource. * If we have fewer LSBs than queues, all LSB regions become shared * resources. */ static int ccp_assign_lsbs(struct ccp_device *ccp) { unsigned long lsb_pub = 0, qlsb = 0; int n_lsbs = 0; int bitno; int i, lsb_cnt; int rc = 0; rte_spinlock_init(&ccp->lsb_lock); /* Create an aggregate bitmap to get a total count of available LSBs */ for (i = 0; i < ccp->cmd_q_count; i++) lsb_pub |= ccp->cmd_q[i].lsbmask; for (i = 0; i < MAX_LSB_CNT; i++) if (ccp_get_bit(&lsb_pub, i)) n_lsbs++; if (n_lsbs >= ccp->cmd_q_count) { /* We have enough LSBS to give every queue a private LSB. * Brute force search to start with the queues that are more * constrained in LSB choice. When an LSB is privately * assigned, it is removed from the public mask. * This is an ugly N squared algorithm with some optimization. */ for (lsb_cnt = 1; n_lsbs && (lsb_cnt <= MAX_LSB_CNT); lsb_cnt++) { rc = ccp_find_and_assign_lsb_to_q(ccp, lsb_cnt, n_lsbs, &lsb_pub); if (rc < 0) return -EINVAL; n_lsbs = rc; } } rc = 0; /* What's left of the LSBs, according to the public mask, now become * shared. Any zero bits in the lsb_pub mask represent an LSB region * that can't be used as a shared resource, so mark the LSB slots for * them as "in use". */ qlsb = lsb_pub; bitno = ccp_find_first_zero_bit(&qlsb, MAX_LSB_CNT); while (bitno < MAX_LSB_CNT) { ccp_bitmap_set(ccp->lsbmap, bitno * LSB_SIZE, LSB_SIZE); ccp_set_bit(&qlsb, bitno); bitno = ccp_find_first_zero_bit(&qlsb, MAX_LSB_CNT); } return rc; } static int ccp_add_device(struct ccp_device *dev) { int i; uint32_t qmr, status_lo, status_hi, dma_addr_lo, dma_addr_hi; uint64_t status; struct ccp_queue *cmd_q; const struct rte_memzone *q_mz; void *vaddr; if (dev == NULL) return -1; dev->id = ccp_dev_id++; dev->qidx = 0; vaddr = (void *)(dev->pci->mem_resource[2].addr); if (dev->pci->id.device_id == AMD_PCI_CCP_5B) { CCP_WRITE_REG(vaddr, CMD_TRNG_CTL_OFFSET, 0x00012D57); CCP_WRITE_REG(vaddr, CMD_CONFIG_0_OFFSET, 0x00000003); for (i = 0; i < 12; i++) { CCP_WRITE_REG(vaddr, CMD_AES_MASK_OFFSET, CCP_READ_REG(vaddr, TRNG_OUT_REG)); } CCP_WRITE_REG(vaddr, CMD_QUEUE_MASK_OFFSET, 0x0000001F); CCP_WRITE_REG(vaddr, CMD_QUEUE_PRIO_OFFSET, 0x00005B6D); CCP_WRITE_REG(vaddr, CMD_CMD_TIMEOUT_OFFSET, 0x00000000); CCP_WRITE_REG(vaddr, LSB_PRIVATE_MASK_LO_OFFSET, 0x3FFFFFFF); CCP_WRITE_REG(vaddr, LSB_PRIVATE_MASK_HI_OFFSET, 0x000003FF); CCP_WRITE_REG(vaddr, CMD_CLK_GATE_CTL_OFFSET, 0x00108823); } CCP_WRITE_REG(vaddr, CMD_REQID_CONFIG_OFFSET, 0x0); /* Copy the private LSB mask to the public registers */ status_lo = CCP_READ_REG(vaddr, LSB_PRIVATE_MASK_LO_OFFSET); status_hi = CCP_READ_REG(vaddr, LSB_PRIVATE_MASK_HI_OFFSET); CCP_WRITE_REG(vaddr, LSB_PUBLIC_MASK_LO_OFFSET, status_lo); CCP_WRITE_REG(vaddr, LSB_PUBLIC_MASK_HI_OFFSET, status_hi); status = ((uint64_t)status_hi<<30) | ((uint64_t)status_lo); dev->cmd_q_count = 0; /* Find available queues */ qmr = CCP_READ_REG(vaddr, Q_MASK_REG); for (i = 0; i < MAX_HW_QUEUES; i++) { if (!(qmr & (1 << i))) continue; cmd_q = &dev->cmd_q[dev->cmd_q_count++]; cmd_q->dev = dev; cmd_q->id = i; cmd_q->qidx = 0; cmd_q->qsize = Q_SIZE(Q_DESC_SIZE); cmd_q->reg_base = (uint8_t *)vaddr + CMD_Q_STATUS_INCR * (i + 1); /* CCP queue memory */ snprintf(cmd_q->memz_name, sizeof(cmd_q->memz_name), "%s_%d_%s_%d_%s", "ccp_dev", (int)dev->id, "queue", (int)cmd_q->id, "mem"); q_mz = ccp_queue_dma_zone_reserve(cmd_q->memz_name, cmd_q->qsize, SOCKET_ID_ANY); cmd_q->qbase_addr = (void *)q_mz->addr; cmd_q->qbase_desc = (void *)q_mz->addr; cmd_q->qbase_phys_addr = q_mz->iova; cmd_q->qcontrol = 0; /* init control reg to zero */ CCP_WRITE_REG(cmd_q->reg_base, CMD_Q_CONTROL_BASE, cmd_q->qcontrol); /* Disable the interrupts */ CCP_WRITE_REG(cmd_q->reg_base, CMD_Q_INT_ENABLE_BASE, 0x00); CCP_READ_REG(cmd_q->reg_base, CMD_Q_INT_STATUS_BASE); CCP_READ_REG(cmd_q->reg_base, CMD_Q_STATUS_BASE); /* Clear the interrupts */ CCP_WRITE_REG(cmd_q->reg_base, CMD_Q_INTERRUPT_STATUS_BASE, ALL_INTERRUPTS); /* Configure size of each virtual queue accessible to host */ cmd_q->qcontrol &= ~(CMD_Q_SIZE << CMD_Q_SHIFT); cmd_q->qcontrol |= QUEUE_SIZE_VAL << CMD_Q_SHIFT; dma_addr_lo = low32_value(cmd_q->qbase_phys_addr); CCP_WRITE_REG(cmd_q->reg_base, CMD_Q_TAIL_LO_BASE, (uint32_t)dma_addr_lo); CCP_WRITE_REG(cmd_q->reg_base, CMD_Q_HEAD_LO_BASE, (uint32_t)dma_addr_lo); dma_addr_hi = high32_value(cmd_q->qbase_phys_addr); cmd_q->qcontrol |= (dma_addr_hi << 16); CCP_WRITE_REG(cmd_q->reg_base, CMD_Q_CONTROL_BASE, cmd_q->qcontrol); /* create LSB Mask map */ if (ccp_find_lsb_regions(cmd_q, status)) CCP_LOG_ERR("queue doesn't have lsb regions"); cmd_q->lsb = -1; rte_atomic64_init(&cmd_q->free_slots); rte_atomic64_set(&cmd_q->free_slots, (COMMANDS_PER_QUEUE - 1)); /* unused slot barrier b/w H&T */ } if (ccp_assign_lsbs(dev)) CCP_LOG_ERR("Unable to assign lsb region"); /* pre-allocate LSB slots */ for (i = 0; i < dev->cmd_q_count; i++) { dev->cmd_q[i].sb_key = ccp_lsb_alloc(&dev->cmd_q[i], 1); dev->cmd_q[i].sb_iv = ccp_lsb_alloc(&dev->cmd_q[i], 1); dev->cmd_q[i].sb_sha = ccp_lsb_alloc(&dev->cmd_q[i], 2); dev->cmd_q[i].sb_hmac = ccp_lsb_alloc(&dev->cmd_q[i], 2); } TAILQ_INSERT_TAIL(&ccp_list, dev, next); return 0; } static void ccp_remove_device(struct ccp_device *dev) { if (dev == NULL) return; TAILQ_REMOVE(&ccp_list, dev, next); } int ccp_probe_device(struct rte_pci_device *pci_dev) { struct ccp_device *ccp_dev; ccp_dev = rte_zmalloc("ccp_device", sizeof(*ccp_dev), RTE_CACHE_LINE_SIZE); if (ccp_dev == NULL) goto fail; ccp_dev->pci = pci_dev; /* device is valid, add in list */ if (ccp_add_device(ccp_dev)) { ccp_remove_device(ccp_dev); goto fail; } return 0; fail: CCP_LOG_ERR("CCP Device probe failed"); rte_free(ccp_dev); return -1; }