mirror of https://github.com/F-Stack/f-stack.git
902 lines
25 KiB
C
902 lines
25 KiB
C
/* SPDX-License-Identifier: BSD-3-Clause
|
|
* Copyright(C) 2019 Marvell International Ltd.
|
|
*/
|
|
|
|
#include <rte_mempool.h>
|
|
#include <rte_vect.h>
|
|
|
|
#include "otx2_mempool.h"
|
|
|
|
static int __rte_hot
|
|
otx2_npa_enq(struct rte_mempool *mp, void * const *obj_table, unsigned int n)
|
|
{
|
|
unsigned int index; const uint64_t aura_handle = mp->pool_id;
|
|
const uint64_t reg = npa_lf_aura_handle_to_aura(aura_handle);
|
|
const uint64_t addr = npa_lf_aura_handle_to_base(aura_handle) +
|
|
NPA_LF_AURA_OP_FREE0;
|
|
|
|
/* Ensure mbuf init changes are written before the free pointers
|
|
* are enqueued to the stack.
|
|
*/
|
|
rte_io_wmb();
|
|
for (index = 0; index < n; index++)
|
|
otx2_store_pair((uint64_t)obj_table[index], reg, addr);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static __rte_noinline int
|
|
npa_lf_aura_op_alloc_one(const int64_t wdata, int64_t * const addr,
|
|
void **obj_table, uint8_t i)
|
|
{
|
|
uint8_t retry = 4;
|
|
|
|
do {
|
|
obj_table[i] = (void *)otx2_atomic64_add_nosync(wdata, addr);
|
|
if (obj_table[i] != NULL)
|
|
return 0;
|
|
|
|
} while (retry--);
|
|
|
|
return -ENOENT;
|
|
}
|
|
|
|
#if defined(RTE_ARCH_ARM64)
|
|
static __rte_noinline int
|
|
npa_lf_aura_op_search_alloc(const int64_t wdata, int64_t * const addr,
|
|
void **obj_table, unsigned int n)
|
|
{
|
|
uint8_t i;
|
|
|
|
for (i = 0; i < n; i++) {
|
|
if (obj_table[i] != NULL)
|
|
continue;
|
|
if (npa_lf_aura_op_alloc_one(wdata, addr, obj_table, i))
|
|
return -ENOENT;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static __rte_noinline int
|
|
npa_lf_aura_op_alloc_bulk(const int64_t wdata, int64_t * const addr,
|
|
unsigned int n, void **obj_table)
|
|
{
|
|
register const uint64_t wdata64 __asm("x26") = wdata;
|
|
register const uint64_t wdata128 __asm("x27") = wdata;
|
|
uint64x2_t failed = vdupq_n_u64(~0);
|
|
|
|
switch (n) {
|
|
case 32:
|
|
{
|
|
asm volatile (
|
|
".cpu generic+lse\n"
|
|
"casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n"
|
|
"casp x2, x3, %[wdata64], %[wdata128], [%[loc]]\n"
|
|
"casp x4, x5, %[wdata64], %[wdata128], [%[loc]]\n"
|
|
"casp x6, x7, %[wdata64], %[wdata128], [%[loc]]\n"
|
|
"casp x8, x9, %[wdata64], %[wdata128], [%[loc]]\n"
|
|
"casp x10, x11, %[wdata64], %[wdata128], [%[loc]]\n"
|
|
"casp x12, x13, %[wdata64], %[wdata128], [%[loc]]\n"
|
|
"casp x14, x15, %[wdata64], %[wdata128], [%[loc]]\n"
|
|
"casp x16, x17, %[wdata64], %[wdata128], [%[loc]]\n"
|
|
"casp x18, x19, %[wdata64], %[wdata128], [%[loc]]\n"
|
|
"casp x20, x21, %[wdata64], %[wdata128], [%[loc]]\n"
|
|
"casp x22, x23, %[wdata64], %[wdata128], [%[loc]]\n"
|
|
"fmov d16, x0\n"
|
|
"fmov v16.D[1], x1\n"
|
|
"casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n"
|
|
"fmov d17, x2\n"
|
|
"fmov v17.D[1], x3\n"
|
|
"casp x2, x3, %[wdata64], %[wdata128], [%[loc]]\n"
|
|
"fmov d18, x4\n"
|
|
"fmov v18.D[1], x5\n"
|
|
"casp x4, x5, %[wdata64], %[wdata128], [%[loc]]\n"
|
|
"fmov d19, x6\n"
|
|
"fmov v19.D[1], x7\n"
|
|
"casp x6, x7, %[wdata64], %[wdata128], [%[loc]]\n"
|
|
"and %[failed].16B, %[failed].16B, v16.16B\n"
|
|
"and %[failed].16B, %[failed].16B, v17.16B\n"
|
|
"and %[failed].16B, %[failed].16B, v18.16B\n"
|
|
"and %[failed].16B, %[failed].16B, v19.16B\n"
|
|
"fmov d20, x8\n"
|
|
"fmov v20.D[1], x9\n"
|
|
"fmov d21, x10\n"
|
|
"fmov v21.D[1], x11\n"
|
|
"fmov d22, x12\n"
|
|
"fmov v22.D[1], x13\n"
|
|
"fmov d23, x14\n"
|
|
"fmov v23.D[1], x15\n"
|
|
"and %[failed].16B, %[failed].16B, v20.16B\n"
|
|
"and %[failed].16B, %[failed].16B, v21.16B\n"
|
|
"and %[failed].16B, %[failed].16B, v22.16B\n"
|
|
"and %[failed].16B, %[failed].16B, v23.16B\n"
|
|
"st1 { v16.2d, v17.2d, v18.2d, v19.2d}, [%[dst]], 64\n"
|
|
"st1 { v20.2d, v21.2d, v22.2d, v23.2d}, [%[dst]], 64\n"
|
|
"fmov d16, x16\n"
|
|
"fmov v16.D[1], x17\n"
|
|
"fmov d17, x18\n"
|
|
"fmov v17.D[1], x19\n"
|
|
"fmov d18, x20\n"
|
|
"fmov v18.D[1], x21\n"
|
|
"fmov d19, x22\n"
|
|
"fmov v19.D[1], x23\n"
|
|
"and %[failed].16B, %[failed].16B, v16.16B\n"
|
|
"and %[failed].16B, %[failed].16B, v17.16B\n"
|
|
"and %[failed].16B, %[failed].16B, v18.16B\n"
|
|
"and %[failed].16B, %[failed].16B, v19.16B\n"
|
|
"fmov d20, x0\n"
|
|
"fmov v20.D[1], x1\n"
|
|
"fmov d21, x2\n"
|
|
"fmov v21.D[1], x3\n"
|
|
"fmov d22, x4\n"
|
|
"fmov v22.D[1], x5\n"
|
|
"fmov d23, x6\n"
|
|
"fmov v23.D[1], x7\n"
|
|
"and %[failed].16B, %[failed].16B, v20.16B\n"
|
|
"and %[failed].16B, %[failed].16B, v21.16B\n"
|
|
"and %[failed].16B, %[failed].16B, v22.16B\n"
|
|
"and %[failed].16B, %[failed].16B, v23.16B\n"
|
|
"st1 { v16.2d, v17.2d, v18.2d, v19.2d}, [%[dst]], 64\n"
|
|
"st1 { v20.2d, v21.2d, v22.2d, v23.2d}, [%[dst]], 64\n"
|
|
: "+Q" (*addr), [failed] "=&w" (failed)
|
|
: [wdata64] "r" (wdata64), [wdata128] "r" (wdata128),
|
|
[dst] "r" (obj_table), [loc] "r" (addr)
|
|
: "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
|
|
"x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16",
|
|
"x17", "x18", "x19", "x20", "x21", "x22", "x23", "v16", "v17",
|
|
"v18", "v19", "v20", "v21", "v22", "v23"
|
|
);
|
|
break;
|
|
}
|
|
case 16:
|
|
{
|
|
asm volatile (
|
|
".cpu generic+lse\n"
|
|
"casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n"
|
|
"casp x2, x3, %[wdata64], %[wdata128], [%[loc]]\n"
|
|
"casp x4, x5, %[wdata64], %[wdata128], [%[loc]]\n"
|
|
"casp x6, x7, %[wdata64], %[wdata128], [%[loc]]\n"
|
|
"casp x8, x9, %[wdata64], %[wdata128], [%[loc]]\n"
|
|
"casp x10, x11, %[wdata64], %[wdata128], [%[loc]]\n"
|
|
"casp x12, x13, %[wdata64], %[wdata128], [%[loc]]\n"
|
|
"casp x14, x15, %[wdata64], %[wdata128], [%[loc]]\n"
|
|
"fmov d16, x0\n"
|
|
"fmov v16.D[1], x1\n"
|
|
"fmov d17, x2\n"
|
|
"fmov v17.D[1], x3\n"
|
|
"fmov d18, x4\n"
|
|
"fmov v18.D[1], x5\n"
|
|
"fmov d19, x6\n"
|
|
"fmov v19.D[1], x7\n"
|
|
"and %[failed].16B, %[failed].16B, v16.16B\n"
|
|
"and %[failed].16B, %[failed].16B, v17.16B\n"
|
|
"and %[failed].16B, %[failed].16B, v18.16B\n"
|
|
"and %[failed].16B, %[failed].16B, v19.16B\n"
|
|
"fmov d20, x8\n"
|
|
"fmov v20.D[1], x9\n"
|
|
"fmov d21, x10\n"
|
|
"fmov v21.D[1], x11\n"
|
|
"fmov d22, x12\n"
|
|
"fmov v22.D[1], x13\n"
|
|
"fmov d23, x14\n"
|
|
"fmov v23.D[1], x15\n"
|
|
"and %[failed].16B, %[failed].16B, v20.16B\n"
|
|
"and %[failed].16B, %[failed].16B, v21.16B\n"
|
|
"and %[failed].16B, %[failed].16B, v22.16B\n"
|
|
"and %[failed].16B, %[failed].16B, v23.16B\n"
|
|
"st1 { v16.2d, v17.2d, v18.2d, v19.2d}, [%[dst]], 64\n"
|
|
"st1 { v20.2d, v21.2d, v22.2d, v23.2d}, [%[dst]], 64\n"
|
|
: "+Q" (*addr), [failed] "=&w" (failed)
|
|
: [wdata64] "r" (wdata64), [wdata128] "r" (wdata128),
|
|
[dst] "r" (obj_table), [loc] "r" (addr)
|
|
: "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
|
|
"x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "v16",
|
|
"v17", "v18", "v19", "v20", "v21", "v22", "v23"
|
|
);
|
|
break;
|
|
}
|
|
case 8:
|
|
{
|
|
asm volatile (
|
|
".cpu generic+lse\n"
|
|
"casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n"
|
|
"casp x2, x3, %[wdata64], %[wdata128], [%[loc]]\n"
|
|
"casp x4, x5, %[wdata64], %[wdata128], [%[loc]]\n"
|
|
"casp x6, x7, %[wdata64], %[wdata128], [%[loc]]\n"
|
|
"fmov d16, x0\n"
|
|
"fmov v16.D[1], x1\n"
|
|
"fmov d17, x2\n"
|
|
"fmov v17.D[1], x3\n"
|
|
"fmov d18, x4\n"
|
|
"fmov v18.D[1], x5\n"
|
|
"fmov d19, x6\n"
|
|
"fmov v19.D[1], x7\n"
|
|
"and %[failed].16B, %[failed].16B, v16.16B\n"
|
|
"and %[failed].16B, %[failed].16B, v17.16B\n"
|
|
"and %[failed].16B, %[failed].16B, v18.16B\n"
|
|
"and %[failed].16B, %[failed].16B, v19.16B\n"
|
|
"st1 { v16.2d, v17.2d, v18.2d, v19.2d}, [%[dst]], 64\n"
|
|
: "+Q" (*addr), [failed] "=&w" (failed)
|
|
: [wdata64] "r" (wdata64), [wdata128] "r" (wdata128),
|
|
[dst] "r" (obj_table), [loc] "r" (addr)
|
|
: "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
|
|
"v16", "v17", "v18", "v19"
|
|
);
|
|
break;
|
|
}
|
|
case 4:
|
|
{
|
|
asm volatile (
|
|
".cpu generic+lse\n"
|
|
"casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n"
|
|
"casp x2, x3, %[wdata64], %[wdata128], [%[loc]]\n"
|
|
"fmov d16, x0\n"
|
|
"fmov v16.D[1], x1\n"
|
|
"fmov d17, x2\n"
|
|
"fmov v17.D[1], x3\n"
|
|
"and %[failed].16B, %[failed].16B, v16.16B\n"
|
|
"and %[failed].16B, %[failed].16B, v17.16B\n"
|
|
"st1 { v16.2d, v17.2d}, [%[dst]], 32\n"
|
|
: "+Q" (*addr), [failed] "=&w" (failed)
|
|
: [wdata64] "r" (wdata64), [wdata128] "r" (wdata128),
|
|
[dst] "r" (obj_table), [loc] "r" (addr)
|
|
: "memory", "x0", "x1", "x2", "x3", "v16", "v17"
|
|
);
|
|
break;
|
|
}
|
|
case 2:
|
|
{
|
|
asm volatile (
|
|
".cpu generic+lse\n"
|
|
"casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n"
|
|
"fmov d16, x0\n"
|
|
"fmov v16.D[1], x1\n"
|
|
"and %[failed].16B, %[failed].16B, v16.16B\n"
|
|
"st1 { v16.2d}, [%[dst]], 16\n"
|
|
: "+Q" (*addr), [failed] "=&w" (failed)
|
|
: [wdata64] "r" (wdata64), [wdata128] "r" (wdata128),
|
|
[dst] "r" (obj_table), [loc] "r" (addr)
|
|
: "memory", "x0", "x1", "v16"
|
|
);
|
|
break;
|
|
}
|
|
case 1:
|
|
return npa_lf_aura_op_alloc_one(wdata, addr, obj_table, 0);
|
|
}
|
|
|
|
if (unlikely(!(vgetq_lane_u64(failed, 0) & vgetq_lane_u64(failed, 1))))
|
|
return npa_lf_aura_op_search_alloc(wdata, addr, (void **)
|
|
((char *)obj_table - (sizeof(uint64_t) * n)), n);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static __rte_noinline void
|
|
otx2_npa_clear_alloc(struct rte_mempool *mp, void **obj_table, unsigned int n)
|
|
{
|
|
unsigned int i;
|
|
|
|
for (i = 0; i < n; i++) {
|
|
if (obj_table[i] != NULL) {
|
|
otx2_npa_enq(mp, &obj_table[i], 1);
|
|
obj_table[i] = NULL;
|
|
}
|
|
}
|
|
}
|
|
|
|
static __rte_noinline int __rte_hot
|
|
otx2_npa_deq_arm64(struct rte_mempool *mp, void **obj_table, unsigned int n)
|
|
{
|
|
const int64_t wdata = npa_lf_aura_handle_to_aura(mp->pool_id);
|
|
void **obj_table_bak = obj_table;
|
|
const unsigned int nfree = n;
|
|
unsigned int parts;
|
|
|
|
int64_t * const addr = (int64_t * const)
|
|
(npa_lf_aura_handle_to_base(mp->pool_id) +
|
|
NPA_LF_AURA_OP_ALLOCX(0));
|
|
while (n) {
|
|
parts = n > 31 ? 32 : rte_align32prevpow2(n);
|
|
n -= parts;
|
|
if (unlikely(npa_lf_aura_op_alloc_bulk(wdata, addr,
|
|
parts, obj_table))) {
|
|
otx2_npa_clear_alloc(mp, obj_table_bak, nfree - n);
|
|
return -ENOENT;
|
|
}
|
|
obj_table += parts;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
#else
|
|
|
|
static inline int __rte_hot
|
|
otx2_npa_deq(struct rte_mempool *mp, void **obj_table, unsigned int n)
|
|
{
|
|
const int64_t wdata = npa_lf_aura_handle_to_aura(mp->pool_id);
|
|
unsigned int index;
|
|
uint64_t obj;
|
|
|
|
int64_t * const addr = (int64_t *)
|
|
(npa_lf_aura_handle_to_base(mp->pool_id) +
|
|
NPA_LF_AURA_OP_ALLOCX(0));
|
|
for (index = 0; index < n; index++, obj_table++) {
|
|
obj = npa_lf_aura_op_alloc_one(wdata, addr, obj_table, 0);
|
|
if (obj == 0) {
|
|
for (; index > 0; index--) {
|
|
obj_table--;
|
|
otx2_npa_enq(mp, obj_table, 1);
|
|
}
|
|
return -ENOENT;
|
|
}
|
|
*obj_table = (void *)obj;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
#endif
|
|
|
|
static unsigned int
|
|
otx2_npa_get_count(const struct rte_mempool *mp)
|
|
{
|
|
return (unsigned int)npa_lf_aura_op_available(mp->pool_id);
|
|
}
|
|
|
|
static int
|
|
npa_lf_aura_pool_init(struct otx2_mbox *mbox, uint32_t aura_id,
|
|
struct npa_aura_s *aura, struct npa_pool_s *pool)
|
|
{
|
|
struct npa_aq_enq_req *aura_init_req, *pool_init_req;
|
|
struct npa_aq_enq_rsp *aura_init_rsp, *pool_init_rsp;
|
|
struct otx2_mbox_dev *mdev = &mbox->dev[0];
|
|
struct otx2_idev_cfg *idev;
|
|
int rc, off;
|
|
|
|
idev = otx2_intra_dev_get_cfg();
|
|
if (idev == NULL)
|
|
return -ENOMEM;
|
|
|
|
aura_init_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
|
|
|
|
aura_init_req->aura_id = aura_id;
|
|
aura_init_req->ctype = NPA_AQ_CTYPE_AURA;
|
|
aura_init_req->op = NPA_AQ_INSTOP_INIT;
|
|
otx2_mbox_memcpy(&aura_init_req->aura, aura, sizeof(*aura));
|
|
|
|
pool_init_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
|
|
|
|
pool_init_req->aura_id = aura_id;
|
|
pool_init_req->ctype = NPA_AQ_CTYPE_POOL;
|
|
pool_init_req->op = NPA_AQ_INSTOP_INIT;
|
|
otx2_mbox_memcpy(&pool_init_req->pool, pool, sizeof(*pool));
|
|
|
|
otx2_mbox_msg_send(mbox, 0);
|
|
rc = otx2_mbox_wait_for_rsp(mbox, 0);
|
|
if (rc < 0)
|
|
return rc;
|
|
|
|
off = mbox->rx_start +
|
|
RTE_ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN);
|
|
aura_init_rsp = (struct npa_aq_enq_rsp *)((uintptr_t)mdev->mbase + off);
|
|
off = mbox->rx_start + aura_init_rsp->hdr.next_msgoff;
|
|
pool_init_rsp = (struct npa_aq_enq_rsp *)((uintptr_t)mdev->mbase + off);
|
|
|
|
if (rc == 2 && aura_init_rsp->hdr.rc == 0 && pool_init_rsp->hdr.rc == 0)
|
|
return 0;
|
|
else
|
|
return NPA_LF_ERR_AURA_POOL_INIT;
|
|
|
|
if (!(idev->npa_lock_mask & BIT_ULL(aura_id)))
|
|
return 0;
|
|
|
|
aura_init_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
|
|
aura_init_req->aura_id = aura_id;
|
|
aura_init_req->ctype = NPA_AQ_CTYPE_AURA;
|
|
aura_init_req->op = NPA_AQ_INSTOP_LOCK;
|
|
|
|
pool_init_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
|
|
if (!pool_init_req) {
|
|
/* The shared memory buffer can be full.
|
|
* Flush it and retry
|
|
*/
|
|
otx2_mbox_msg_send(mbox, 0);
|
|
rc = otx2_mbox_wait_for_rsp(mbox, 0);
|
|
if (rc < 0) {
|
|
otx2_err("Failed to LOCK AURA context");
|
|
return -ENOMEM;
|
|
}
|
|
|
|
pool_init_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
|
|
if (!pool_init_req) {
|
|
otx2_err("Failed to LOCK POOL context");
|
|
return -ENOMEM;
|
|
}
|
|
}
|
|
pool_init_req->aura_id = aura_id;
|
|
pool_init_req->ctype = NPA_AQ_CTYPE_POOL;
|
|
pool_init_req->op = NPA_AQ_INSTOP_LOCK;
|
|
|
|
rc = otx2_mbox_process(mbox);
|
|
if (rc < 0) {
|
|
otx2_err("Failed to lock POOL ctx to NDC");
|
|
return -ENOMEM;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
npa_lf_aura_pool_fini(struct otx2_mbox *mbox,
|
|
uint32_t aura_id,
|
|
uint64_t aura_handle)
|
|
{
|
|
struct npa_aq_enq_req *aura_req, *pool_req;
|
|
struct npa_aq_enq_rsp *aura_rsp, *pool_rsp;
|
|
struct otx2_mbox_dev *mdev = &mbox->dev[0];
|
|
struct ndc_sync_op *ndc_req;
|
|
struct otx2_idev_cfg *idev;
|
|
int rc, off;
|
|
|
|
idev = otx2_intra_dev_get_cfg();
|
|
if (idev == NULL)
|
|
return -EINVAL;
|
|
|
|
/* Procedure for disabling an aura/pool */
|
|
rte_delay_us(10);
|
|
npa_lf_aura_op_alloc(aura_handle, 0);
|
|
|
|
pool_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
|
|
pool_req->aura_id = aura_id;
|
|
pool_req->ctype = NPA_AQ_CTYPE_POOL;
|
|
pool_req->op = NPA_AQ_INSTOP_WRITE;
|
|
pool_req->pool.ena = 0;
|
|
pool_req->pool_mask.ena = ~pool_req->pool_mask.ena;
|
|
|
|
aura_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
|
|
aura_req->aura_id = aura_id;
|
|
aura_req->ctype = NPA_AQ_CTYPE_AURA;
|
|
aura_req->op = NPA_AQ_INSTOP_WRITE;
|
|
aura_req->aura.ena = 0;
|
|
aura_req->aura_mask.ena = ~aura_req->aura_mask.ena;
|
|
|
|
otx2_mbox_msg_send(mbox, 0);
|
|
rc = otx2_mbox_wait_for_rsp(mbox, 0);
|
|
if (rc < 0)
|
|
return rc;
|
|
|
|
off = mbox->rx_start +
|
|
RTE_ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN);
|
|
pool_rsp = (struct npa_aq_enq_rsp *)((uintptr_t)mdev->mbase + off);
|
|
|
|
off = mbox->rx_start + pool_rsp->hdr.next_msgoff;
|
|
aura_rsp = (struct npa_aq_enq_rsp *)((uintptr_t)mdev->mbase + off);
|
|
|
|
if (rc != 2 || aura_rsp->hdr.rc != 0 || pool_rsp->hdr.rc != 0)
|
|
return NPA_LF_ERR_AURA_POOL_FINI;
|
|
|
|
/* Sync NDC-NPA for LF */
|
|
ndc_req = otx2_mbox_alloc_msg_ndc_sync_op(mbox);
|
|
ndc_req->npa_lf_sync = 1;
|
|
|
|
rc = otx2_mbox_process(mbox);
|
|
if (rc) {
|
|
otx2_err("Error on NDC-NPA LF sync, rc %d", rc);
|
|
return NPA_LF_ERR_AURA_POOL_FINI;
|
|
}
|
|
|
|
if (!(idev->npa_lock_mask & BIT_ULL(aura_id)))
|
|
return 0;
|
|
|
|
aura_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
|
|
aura_req->aura_id = aura_id;
|
|
aura_req->ctype = NPA_AQ_CTYPE_AURA;
|
|
aura_req->op = NPA_AQ_INSTOP_UNLOCK;
|
|
|
|
rc = otx2_mbox_process(mbox);
|
|
if (rc < 0) {
|
|
otx2_err("Failed to unlock AURA ctx to NDC");
|
|
return -EINVAL;
|
|
}
|
|
|
|
pool_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
|
|
pool_req->aura_id = aura_id;
|
|
pool_req->ctype = NPA_AQ_CTYPE_POOL;
|
|
pool_req->op = NPA_AQ_INSTOP_UNLOCK;
|
|
|
|
rc = otx2_mbox_process(mbox);
|
|
if (rc < 0) {
|
|
otx2_err("Failed to unlock POOL ctx to NDC");
|
|
return -EINVAL;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline char*
|
|
npa_lf_stack_memzone_name(struct otx2_npa_lf *lf, int pool_id, char *name)
|
|
{
|
|
snprintf(name, RTE_MEMZONE_NAMESIZE, "otx2_npa_stack_%x_%d",
|
|
lf->pf_func, pool_id);
|
|
|
|
return name;
|
|
}
|
|
|
|
static inline const struct rte_memzone *
|
|
npa_lf_stack_dma_alloc(struct otx2_npa_lf *lf, char *name,
|
|
int pool_id, size_t size)
|
|
{
|
|
return rte_memzone_reserve_aligned(
|
|
npa_lf_stack_memzone_name(lf, pool_id, name), size, 0,
|
|
RTE_MEMZONE_IOVA_CONTIG, OTX2_ALIGN);
|
|
}
|
|
|
|
static inline int
|
|
npa_lf_stack_dma_free(struct otx2_npa_lf *lf, char *name, int pool_id)
|
|
{
|
|
const struct rte_memzone *mz;
|
|
|
|
mz = rte_memzone_lookup(npa_lf_stack_memzone_name(lf, pool_id, name));
|
|
if (mz == NULL)
|
|
return -EINVAL;
|
|
|
|
return rte_memzone_free(mz);
|
|
}
|
|
|
|
static inline int
|
|
bitmap_ctzll(uint64_t slab)
|
|
{
|
|
if (slab == 0)
|
|
return 0;
|
|
|
|
return __builtin_ctzll(slab);
|
|
}
|
|
|
|
static int
|
|
npa_lf_aura_pool_pair_alloc(struct otx2_npa_lf *lf, const uint32_t block_size,
|
|
const uint32_t block_count, struct npa_aura_s *aura,
|
|
struct npa_pool_s *pool, uint64_t *aura_handle)
|
|
{
|
|
int rc, aura_id, pool_id, stack_size, alloc_size;
|
|
char name[RTE_MEMZONE_NAMESIZE];
|
|
const struct rte_memzone *mz;
|
|
uint64_t slab;
|
|
uint32_t pos;
|
|
|
|
/* Sanity check */
|
|
if (!lf || !block_size || !block_count ||
|
|
!pool || !aura || !aura_handle)
|
|
return NPA_LF_ERR_PARAM;
|
|
|
|
/* Block size should be cache line aligned and in range of 128B-128KB */
|
|
if (block_size % OTX2_ALIGN || block_size < 128 ||
|
|
block_size > 128 * 1024)
|
|
return NPA_LF_ERR_INVALID_BLOCK_SZ;
|
|
|
|
pos = slab = 0;
|
|
/* Scan from the beginning */
|
|
__rte_bitmap_scan_init(lf->npa_bmp);
|
|
/* Scan bitmap to get the free pool */
|
|
rc = rte_bitmap_scan(lf->npa_bmp, &pos, &slab);
|
|
/* Empty bitmap */
|
|
if (rc == 0) {
|
|
otx2_err("Mempools exhausted, 'max_pools' devargs to increase");
|
|
return -ERANGE;
|
|
}
|
|
|
|
/* Get aura_id from resource bitmap */
|
|
aura_id = pos + bitmap_ctzll(slab);
|
|
/* Mark pool as reserved */
|
|
rte_bitmap_clear(lf->npa_bmp, aura_id);
|
|
|
|
/* Configuration based on each aura has separate pool(aura-pool pair) */
|
|
pool_id = aura_id;
|
|
rc = (aura_id < 0 || pool_id >= (int)lf->nr_pools || aura_id >=
|
|
(int)BIT_ULL(6 + lf->aura_sz)) ? NPA_LF_ERR_AURA_ID_ALLOC : 0;
|
|
if (rc)
|
|
goto exit;
|
|
|
|
/* Allocate stack memory */
|
|
stack_size = (block_count + lf->stack_pg_ptrs - 1) / lf->stack_pg_ptrs;
|
|
alloc_size = stack_size * lf->stack_pg_bytes;
|
|
|
|
mz = npa_lf_stack_dma_alloc(lf, name, pool_id, alloc_size);
|
|
if (mz == NULL) {
|
|
rc = -ENOMEM;
|
|
goto aura_res_put;
|
|
}
|
|
|
|
/* Update aura fields */
|
|
aura->pool_addr = pool_id;/* AF will translate to associated poolctx */
|
|
aura->ena = 1;
|
|
aura->shift = rte_log2_u32(block_count);
|
|
aura->shift = aura->shift < 8 ? 0 : aura->shift - 8;
|
|
aura->limit = block_count;
|
|
aura->pool_caching = 1;
|
|
aura->err_int_ena = BIT(NPA_AURA_ERR_INT_AURA_ADD_OVER);
|
|
aura->err_int_ena |= BIT(NPA_AURA_ERR_INT_AURA_ADD_UNDER);
|
|
aura->err_int_ena |= BIT(NPA_AURA_ERR_INT_AURA_FREE_UNDER);
|
|
aura->err_int_ena |= BIT(NPA_AURA_ERR_INT_POOL_DIS);
|
|
/* Many to one reduction */
|
|
aura->err_qint_idx = aura_id % lf->qints;
|
|
|
|
/* Update pool fields */
|
|
pool->stack_base = mz->iova;
|
|
pool->ena = 1;
|
|
pool->buf_size = block_size / OTX2_ALIGN;
|
|
pool->stack_max_pages = stack_size;
|
|
pool->shift = rte_log2_u32(block_count);
|
|
pool->shift = pool->shift < 8 ? 0 : pool->shift - 8;
|
|
pool->ptr_start = 0;
|
|
pool->ptr_end = ~0;
|
|
pool->stack_caching = 1;
|
|
pool->err_int_ena = BIT(NPA_POOL_ERR_INT_OVFLS);
|
|
pool->err_int_ena |= BIT(NPA_POOL_ERR_INT_RANGE);
|
|
pool->err_int_ena |= BIT(NPA_POOL_ERR_INT_PERR);
|
|
|
|
/* Many to one reduction */
|
|
pool->err_qint_idx = pool_id % lf->qints;
|
|
|
|
/* Issue AURA_INIT and POOL_INIT op */
|
|
rc = npa_lf_aura_pool_init(lf->mbox, aura_id, aura, pool);
|
|
if (rc)
|
|
goto stack_mem_free;
|
|
|
|
*aura_handle = npa_lf_aura_handle_gen(aura_id, lf->base);
|
|
|
|
/* Update aura count */
|
|
npa_lf_aura_op_cnt_set(*aura_handle, 0, block_count);
|
|
/* Read it back to make sure aura count is updated */
|
|
npa_lf_aura_op_cnt_get(*aura_handle);
|
|
|
|
return 0;
|
|
|
|
stack_mem_free:
|
|
rte_memzone_free(mz);
|
|
aura_res_put:
|
|
rte_bitmap_set(lf->npa_bmp, aura_id);
|
|
exit:
|
|
return rc;
|
|
}
|
|
|
|
static int
|
|
npa_lf_aura_pool_pair_free(struct otx2_npa_lf *lf, uint64_t aura_handle)
|
|
{
|
|
char name[RTE_MEMZONE_NAMESIZE];
|
|
int aura_id, pool_id, rc;
|
|
|
|
if (!lf || !aura_handle)
|
|
return NPA_LF_ERR_PARAM;
|
|
|
|
aura_id = pool_id = npa_lf_aura_handle_to_aura(aura_handle);
|
|
rc = npa_lf_aura_pool_fini(lf->mbox, aura_id, aura_handle);
|
|
rc |= npa_lf_stack_dma_free(lf, name, pool_id);
|
|
|
|
rte_bitmap_set(lf->npa_bmp, aura_id);
|
|
|
|
return rc;
|
|
}
|
|
|
|
static int
|
|
npa_lf_aura_range_update_check(uint64_t aura_handle)
|
|
{
|
|
uint64_t aura_id = npa_lf_aura_handle_to_aura(aura_handle);
|
|
struct otx2_npa_lf *lf = otx2_npa_lf_obj_get();
|
|
struct npa_aura_lim *lim = lf->aura_lim;
|
|
__otx2_io struct npa_pool_s *pool;
|
|
struct npa_aq_enq_req *req;
|
|
struct npa_aq_enq_rsp *rsp;
|
|
int rc;
|
|
|
|
req = otx2_mbox_alloc_msg_npa_aq_enq(lf->mbox);
|
|
|
|
req->aura_id = aura_id;
|
|
req->ctype = NPA_AQ_CTYPE_POOL;
|
|
req->op = NPA_AQ_INSTOP_READ;
|
|
|
|
rc = otx2_mbox_process_msg(lf->mbox, (void *)&rsp);
|
|
if (rc) {
|
|
otx2_err("Failed to get pool(0x%"PRIx64") context", aura_id);
|
|
return rc;
|
|
}
|
|
|
|
pool = &rsp->pool;
|
|
|
|
if (lim[aura_id].ptr_start != pool->ptr_start ||
|
|
lim[aura_id].ptr_end != pool->ptr_end) {
|
|
otx2_err("Range update failed on pool(0x%"PRIx64")", aura_id);
|
|
return -ERANGE;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
otx2_npa_alloc(struct rte_mempool *mp)
|
|
{
|
|
uint32_t block_size, block_count;
|
|
uint64_t aura_handle = 0;
|
|
struct otx2_npa_lf *lf;
|
|
struct npa_aura_s aura;
|
|
struct npa_pool_s pool;
|
|
size_t padding;
|
|
int rc;
|
|
|
|
lf = otx2_npa_lf_obj_get();
|
|
if (lf == NULL) {
|
|
rc = -EINVAL;
|
|
goto error;
|
|
}
|
|
|
|
block_size = mp->elt_size + mp->header_size + mp->trailer_size;
|
|
/*
|
|
* OCTEON TX2 has 8 sets, 41 ways L1D cache, VA<9:7> bits dictate
|
|
* the set selection.
|
|
* Add additional padding to ensure that the element size always
|
|
* occupies odd number of cachelines to ensure even distribution
|
|
* of elements among L1D cache sets.
|
|
*/
|
|
padding = ((block_size / RTE_CACHE_LINE_SIZE) % 2) ? 0 :
|
|
RTE_CACHE_LINE_SIZE;
|
|
mp->trailer_size += padding;
|
|
block_size += padding;
|
|
|
|
block_count = mp->size;
|
|
|
|
if (block_size % OTX2_ALIGN != 0) {
|
|
otx2_err("Block size should be multiple of 128B");
|
|
rc = -ERANGE;
|
|
goto error;
|
|
}
|
|
|
|
memset(&aura, 0, sizeof(struct npa_aura_s));
|
|
memset(&pool, 0, sizeof(struct npa_pool_s));
|
|
pool.nat_align = 1;
|
|
pool.buf_offset = 1;
|
|
|
|
if ((uint32_t)pool.buf_offset * OTX2_ALIGN != mp->header_size) {
|
|
otx2_err("Unsupported mp->header_size=%d", mp->header_size);
|
|
rc = -EINVAL;
|
|
goto error;
|
|
}
|
|
|
|
/* Use driver specific mp->pool_config to override aura config */
|
|
if (mp->pool_config != NULL)
|
|
memcpy(&aura, mp->pool_config, sizeof(struct npa_aura_s));
|
|
|
|
rc = npa_lf_aura_pool_pair_alloc(lf, block_size, block_count,
|
|
&aura, &pool, &aura_handle);
|
|
if (rc) {
|
|
otx2_err("Failed to alloc pool or aura rc=%d", rc);
|
|
goto error;
|
|
}
|
|
|
|
/* Store aura_handle for future queue operations */
|
|
mp->pool_id = aura_handle;
|
|
otx2_npa_dbg("lf=%p block_sz=%d block_count=%d aura_handle=0x%"PRIx64,
|
|
lf, block_size, block_count, aura_handle);
|
|
|
|
/* Just hold the reference of the object */
|
|
otx2_npa_lf_obj_ref();
|
|
return 0;
|
|
error:
|
|
return rc;
|
|
}
|
|
|
|
static void
|
|
otx2_npa_free(struct rte_mempool *mp)
|
|
{
|
|
struct otx2_npa_lf *lf = otx2_npa_lf_obj_get();
|
|
int rc = 0;
|
|
|
|
otx2_npa_dbg("lf=%p aura_handle=0x%"PRIx64, lf, mp->pool_id);
|
|
if (lf != NULL)
|
|
rc = npa_lf_aura_pool_pair_free(lf, mp->pool_id);
|
|
|
|
if (rc)
|
|
otx2_err("Failed to free pool or aura rc=%d", rc);
|
|
|
|
/* Release the reference of npalf */
|
|
otx2_npa_lf_fini();
|
|
}
|
|
|
|
static ssize_t
|
|
otx2_npa_calc_mem_size(const struct rte_mempool *mp, uint32_t obj_num,
|
|
uint32_t pg_shift, size_t *min_chunk_size, size_t *align)
|
|
{
|
|
size_t total_elt_sz;
|
|
|
|
/* Need space for one more obj on each chunk to fulfill
|
|
* alignment requirements.
|
|
*/
|
|
total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
|
|
return rte_mempool_op_calc_mem_size_helper(mp, obj_num, pg_shift,
|
|
total_elt_sz, min_chunk_size,
|
|
align);
|
|
}
|
|
|
|
static uint8_t
|
|
otx2_npa_l1d_way_set_get(uint64_t iova)
|
|
{
|
|
return (iova >> rte_log2_u32(RTE_CACHE_LINE_SIZE)) & 0x7;
|
|
}
|
|
|
|
static int
|
|
otx2_npa_populate(struct rte_mempool *mp, unsigned int max_objs, void *vaddr,
|
|
rte_iova_t iova, size_t len,
|
|
rte_mempool_populate_obj_cb_t *obj_cb, void *obj_cb_arg)
|
|
{
|
|
#define OTX2_L1D_NB_SETS 8
|
|
uint64_t distribution[OTX2_L1D_NB_SETS];
|
|
rte_iova_t start_iova;
|
|
size_t total_elt_sz;
|
|
uint8_t set;
|
|
size_t off;
|
|
int i;
|
|
|
|
if (iova == RTE_BAD_IOVA)
|
|
return -EINVAL;
|
|
|
|
total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
|
|
|
|
/* Align object start address to a multiple of total_elt_sz */
|
|
off = total_elt_sz - ((((uintptr_t)vaddr - 1) % total_elt_sz) + 1);
|
|
|
|
if (len < off)
|
|
return -EINVAL;
|
|
|
|
|
|
vaddr = (char *)vaddr + off;
|
|
iova += off;
|
|
len -= off;
|
|
|
|
memset(distribution, 0, sizeof(uint64_t) * OTX2_L1D_NB_SETS);
|
|
start_iova = iova;
|
|
while (start_iova < iova + len) {
|
|
set = otx2_npa_l1d_way_set_get(start_iova + mp->header_size);
|
|
distribution[set]++;
|
|
start_iova += total_elt_sz;
|
|
}
|
|
|
|
otx2_npa_dbg("iova %"PRIx64", aligned iova %"PRIx64"", iova - off,
|
|
iova);
|
|
otx2_npa_dbg("length %"PRIu64", aligned length %"PRIu64"",
|
|
(uint64_t)(len + off), (uint64_t)len);
|
|
otx2_npa_dbg("element size %"PRIu64"", (uint64_t)total_elt_sz);
|
|
otx2_npa_dbg("requested objects %"PRIu64", possible objects %"PRIu64"",
|
|
(uint64_t)max_objs, (uint64_t)(len / total_elt_sz));
|
|
otx2_npa_dbg("L1D set distribution :");
|
|
for (i = 0; i < OTX2_L1D_NB_SETS; i++)
|
|
otx2_npa_dbg("set[%d] : objects : %"PRIu64"", i,
|
|
distribution[i]);
|
|
|
|
npa_lf_aura_op_range_set(mp->pool_id, iova, iova + len);
|
|
|
|
if (npa_lf_aura_range_update_check(mp->pool_id) < 0)
|
|
return -EBUSY;
|
|
|
|
return rte_mempool_op_populate_helper(mp,
|
|
RTE_MEMPOOL_POPULATE_F_ALIGN_OBJ,
|
|
max_objs, vaddr, iova, len,
|
|
obj_cb, obj_cb_arg);
|
|
}
|
|
|
|
static struct rte_mempool_ops otx2_npa_ops = {
|
|
.name = "octeontx2_npa",
|
|
.alloc = otx2_npa_alloc,
|
|
.free = otx2_npa_free,
|
|
.enqueue = otx2_npa_enq,
|
|
.get_count = otx2_npa_get_count,
|
|
.calc_mem_size = otx2_npa_calc_mem_size,
|
|
.populate = otx2_npa_populate,
|
|
#if defined(RTE_ARCH_ARM64)
|
|
.dequeue = otx2_npa_deq_arm64,
|
|
#else
|
|
.dequeue = otx2_npa_deq,
|
|
#endif
|
|
};
|
|
|
|
RTE_MEMPOOL_REGISTER_OPS(otx2_npa_ops);
|