mirror of https://github.com/F-Stack/f-stack.git
336 lines
7.6 KiB
C
336 lines
7.6 KiB
C
/* SPDX-License-Identifier: BSD-3-Clause
|
|
* Copyright(C) 2022 Marvell.
|
|
*/
|
|
|
|
#ifndef IPSEC_NEON_H
|
|
#define IPSEC_NEON_H
|
|
|
|
#include "ipsec.h"
|
|
#include "neon/port_group.h"
|
|
|
|
#define MAX_TX_BURST (MAX_PKT_BURST / 2)
|
|
|
|
extern xmm_t val_eth[RTE_MAX_ETHPORTS];
|
|
|
|
/*
|
|
* Update source and destination MAC addresses in the ethernet header.
|
|
*/
|
|
static inline void
|
|
processx4_step3(struct rte_mbuf *pkts[FWDSTEP], uint16_t dst_port[FWDSTEP],
|
|
uint64_t tx_offloads, bool ip_cksum, bool is_ipv4, uint8_t *l_pkt)
|
|
{
|
|
uint32x4_t te[FWDSTEP];
|
|
uint32x4_t ve[FWDSTEP];
|
|
uint32_t *p[FWDSTEP];
|
|
struct rte_mbuf *pkt;
|
|
uint32_t val;
|
|
uint8_t i;
|
|
|
|
for (i = 0; i < FWDSTEP; i++) {
|
|
pkt = pkts[i];
|
|
|
|
/* Check if it is a large packet */
|
|
if (pkt->pkt_len - RTE_ETHER_HDR_LEN > mtu_size)
|
|
*l_pkt |= 1;
|
|
|
|
p[i] = rte_pktmbuf_mtod(pkt, uint32_t *);
|
|
ve[i] = vreinterpretq_u32_s32(val_eth[dst_port[i]]);
|
|
te[i] = vld1q_u32(p[i]);
|
|
|
|
/* Update last 4 bytes */
|
|
val = vgetq_lane_u32(te[i], 3);
|
|
#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
|
|
val &= 0xFFFFUL << 16;
|
|
val |= rte_cpu_to_be_16(is_ipv4 ? RTE_ETHER_TYPE_IPV4 : RTE_ETHER_TYPE_IPV6);
|
|
#else
|
|
val &= 0xFFFFUL;
|
|
val |= rte_cpu_to_be_16(is_ipv4 ? RTE_ETHER_TYPE_IPV4 : RTE_ETHER_TYPE_IPV6) << 16;
|
|
#endif
|
|
ve[i] = vsetq_lane_u32(val, ve[i], 3);
|
|
vst1q_u32(p[i], ve[i]);
|
|
|
|
if (ip_cksum) {
|
|
struct rte_ipv4_hdr *ip;
|
|
|
|
pkt->ol_flags |= tx_offloads;
|
|
|
|
ip = (struct rte_ipv4_hdr *)
|
|
(((uintptr_t)p[i]) + RTE_ETHER_HDR_LEN);
|
|
ip->hdr_checksum = 0;
|
|
|
|
/* calculate IPv4 cksum in SW */
|
|
if ((pkt->ol_flags & RTE_MBUF_F_TX_IP_CKSUM) == 0)
|
|
ip->hdr_checksum = rte_ipv4_cksum(ip);
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Process single packet:
|
|
* Update source and destination MAC addresses in the ethernet header.
|
|
*/
|
|
static inline void
|
|
process_packet(struct rte_mbuf *pkt, uint16_t *dst_port, uint64_t tx_offloads,
|
|
bool ip_cksum, bool is_ipv4, uint8_t *l_pkt)
|
|
{
|
|
struct rte_ether_hdr *eth_hdr;
|
|
uint32x4_t te, ve;
|
|
uint32_t val;
|
|
|
|
/* Check if it is a large packet */
|
|
if (pkt->pkt_len - RTE_ETHER_HDR_LEN > mtu_size)
|
|
*l_pkt |= 1;
|
|
|
|
eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *);
|
|
|
|
te = vld1q_u32((uint32_t *)eth_hdr);
|
|
ve = vreinterpretq_u32_s32(val_eth[dst_port[0]]);
|
|
|
|
val = vgetq_lane_u32(te, 3);
|
|
#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
|
|
val &= 0xFFFFUL << 16;
|
|
val |= rte_cpu_to_be_16(is_ipv4 ? RTE_ETHER_TYPE_IPV4 : RTE_ETHER_TYPE_IPV6);
|
|
#else
|
|
val &= 0xFFFFUL;
|
|
val |= rte_cpu_to_be_16(is_ipv4 ? RTE_ETHER_TYPE_IPV4 : RTE_ETHER_TYPE_IPV6) << 16;
|
|
#endif
|
|
ve = vsetq_lane_u32(val, ve, 3);
|
|
vst1q_u32((uint32_t *)eth_hdr, ve);
|
|
|
|
if (ip_cksum) {
|
|
struct rte_ipv4_hdr *ip;
|
|
|
|
pkt->ol_flags |= tx_offloads;
|
|
|
|
ip = (struct rte_ipv4_hdr *)(eth_hdr + 1);
|
|
ip->hdr_checksum = 0;
|
|
|
|
/* calculate IPv4 cksum in SW */
|
|
if ((pkt->ol_flags & RTE_MBUF_F_TX_IP_CKSUM) == 0)
|
|
ip->hdr_checksum = rte_ipv4_cksum(ip);
|
|
}
|
|
}
|
|
|
|
static inline void
|
|
send_packets(struct rte_mbuf *m[], uint16_t port, uint32_t num, bool is_ipv4)
|
|
{
|
|
uint8_t proto;
|
|
uint32_t i;
|
|
|
|
proto = is_ipv4 ? IPPROTO_IP : IPPROTO_IPV6;
|
|
for (i = 0; i < num; i++)
|
|
send_single_packet(m[i], port, proto);
|
|
}
|
|
|
|
static inline void
|
|
send_packetsx4(struct rte_mbuf *m[], uint16_t port, uint32_t num)
|
|
{
|
|
unsigned int lcoreid = rte_lcore_id();
|
|
struct lcore_conf *qconf;
|
|
uint32_t len, j, n;
|
|
|
|
qconf = &lcore_conf[lcoreid];
|
|
|
|
len = qconf->tx_mbufs[port].len;
|
|
|
|
/*
|
|
* If TX buffer for that queue is empty, and we have enough packets,
|
|
* then send them straightway.
|
|
*/
|
|
if (num >= MAX_TX_BURST && len == 0) {
|
|
n = rte_eth_tx_burst(port, qconf->tx_queue_id[port], m, num);
|
|
core_stats_update_tx(n);
|
|
if (unlikely(n < num)) {
|
|
do {
|
|
rte_pktmbuf_free(m[n]);
|
|
} while (++n < num);
|
|
}
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Put packets into TX buffer for that queue.
|
|
*/
|
|
|
|
n = len + num;
|
|
n = (n > MAX_PKT_BURST) ? MAX_PKT_BURST - len : num;
|
|
|
|
j = 0;
|
|
switch (n % FWDSTEP) {
|
|
while (j < n) {
|
|
case 0:
|
|
qconf->tx_mbufs[port].m_table[len + j] = m[j];
|
|
j++;
|
|
/* fallthrough */
|
|
case 3:
|
|
qconf->tx_mbufs[port].m_table[len + j] = m[j];
|
|
j++;
|
|
/* fallthrough */
|
|
case 2:
|
|
qconf->tx_mbufs[port].m_table[len + j] = m[j];
|
|
j++;
|
|
/* fallthrough */
|
|
case 1:
|
|
qconf->tx_mbufs[port].m_table[len + j] = m[j];
|
|
j++;
|
|
}
|
|
}
|
|
|
|
len += n;
|
|
|
|
/* enough pkts to be sent */
|
|
if (unlikely(len == MAX_PKT_BURST)) {
|
|
|
|
send_burst(qconf, MAX_PKT_BURST, port);
|
|
|
|
/* copy rest of the packets into the TX buffer. */
|
|
len = num - n;
|
|
if (len == 0)
|
|
goto exit;
|
|
|
|
j = 0;
|
|
switch (len % FWDSTEP) {
|
|
while (j < len) {
|
|
case 0:
|
|
qconf->tx_mbufs[port].m_table[j] = m[n + j];
|
|
j++;
|
|
/* fallthrough */
|
|
case 3:
|
|
qconf->tx_mbufs[port].m_table[j] = m[n + j];
|
|
j++;
|
|
/* fallthrough */
|
|
case 2:
|
|
qconf->tx_mbufs[port].m_table[j] = m[n + j];
|
|
j++;
|
|
/* fallthrough */
|
|
case 1:
|
|
qconf->tx_mbufs[port].m_table[j] = m[n + j];
|
|
j++;
|
|
}
|
|
}
|
|
}
|
|
|
|
exit:
|
|
qconf->tx_mbufs[port].len = len;
|
|
}
|
|
|
|
/**
|
|
* Send packets burst to the ports in dst_port array
|
|
*/
|
|
static __rte_always_inline void
|
|
send_multi_pkts(struct rte_mbuf **pkts, uint16_t dst_port[MAX_PKT_BURST],
|
|
int nb_rx, uint64_t tx_offloads, bool ip_cksum, bool is_ipv4)
|
|
{
|
|
unsigned int lcoreid = rte_lcore_id();
|
|
uint16_t pnum[MAX_PKT_BURST + 1];
|
|
uint8_t l_pkt = 0;
|
|
uint16_t dlp, *lp;
|
|
int i = 0, k;
|
|
|
|
/*
|
|
* Finish packet processing and group consecutive
|
|
* packets with the same destination port.
|
|
*/
|
|
k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
|
|
|
|
if (k != 0) {
|
|
uint16x8_t dp1, dp2;
|
|
|
|
lp = pnum;
|
|
lp[0] = 1;
|
|
|
|
processx4_step3(pkts, dst_port, tx_offloads, ip_cksum, is_ipv4, &l_pkt);
|
|
|
|
/* dp1: <d[0], d[1], d[2], d[3], ... > */
|
|
dp1 = vld1q_u16(dst_port);
|
|
|
|
for (i = FWDSTEP; i != k; i += FWDSTEP) {
|
|
processx4_step3(&pkts[i], &dst_port[i], tx_offloads, ip_cksum, is_ipv4,
|
|
&l_pkt);
|
|
|
|
/*
|
|
* dp2:
|
|
* <d[j-3], d[j-2], d[j-1], d[j], ... >
|
|
*/
|
|
dp2 = vld1q_u16(&dst_port[i - FWDSTEP + 1]);
|
|
lp = port_groupx4(&pnum[i - FWDSTEP], lp, dp1, dp2);
|
|
|
|
/*
|
|
* dp1:
|
|
* <d[j], d[j+1], d[j+2], d[j+3], ... >
|
|
*/
|
|
dp1 = vextq_u16(dp2, dp1, FWDSTEP - 1);
|
|
}
|
|
|
|
/*
|
|
* dp2: <d[j-3], d[j-2], d[j-1], d[j-1], ... >
|
|
*/
|
|
dp2 = vextq_u16(dp1, dp1, 1);
|
|
dp2 = vsetq_lane_u16(vgetq_lane_u16(dp2, 2), dp2, 3);
|
|
lp = port_groupx4(&pnum[i - FWDSTEP], lp, dp1, dp2);
|
|
|
|
/*
|
|
* remove values added by the last repeated
|
|
* dst port.
|
|
*/
|
|
lp[0]--;
|
|
dlp = dst_port[i - 1];
|
|
} else {
|
|
/* set dlp and lp to the never used values. */
|
|
dlp = BAD_PORT - 1;
|
|
lp = pnum + MAX_PKT_BURST;
|
|
}
|
|
|
|
/* Process up to last 3 packets one by one. */
|
|
switch (nb_rx % FWDSTEP) {
|
|
case 3:
|
|
process_packet(pkts[i], dst_port + i, tx_offloads, ip_cksum, is_ipv4, &l_pkt);
|
|
GROUP_PORT_STEP(dlp, dst_port, lp, pnum, i);
|
|
i++;
|
|
/* fallthrough */
|
|
case 2:
|
|
process_packet(pkts[i], dst_port + i, tx_offloads, ip_cksum, is_ipv4, &l_pkt);
|
|
GROUP_PORT_STEP(dlp, dst_port, lp, pnum, i);
|
|
i++;
|
|
/* fallthrough */
|
|
case 1:
|
|
process_packet(pkts[i], dst_port + i, tx_offloads, ip_cksum, is_ipv4, &l_pkt);
|
|
GROUP_PORT_STEP(dlp, dst_port, lp, pnum, i);
|
|
}
|
|
|
|
/*
|
|
* Send packets out, through destination port.
|
|
* Consecutive packets with the same destination port
|
|
* are already grouped together.
|
|
* If destination port for the packet equals BAD_PORT,
|
|
* then free the packet without sending it out.
|
|
*/
|
|
for (i = 0; i < nb_rx; i += k) {
|
|
|
|
uint16_t pn;
|
|
|
|
pn = dst_port[i];
|
|
k = pnum[i];
|
|
|
|
if (likely(pn != BAD_PORT)) {
|
|
if (l_pkt)
|
|
/* Large packet is present, need to send
|
|
* individual packets with fragment
|
|
*/
|
|
send_packets(pkts + i, pn, k, is_ipv4);
|
|
else
|
|
send_packetsx4(pkts + i, pn, k);
|
|
|
|
} else {
|
|
free_pkts(&pkts[i], k);
|
|
if (is_ipv4)
|
|
core_statistics[lcoreid].lpm4.miss++;
|
|
else
|
|
core_statistics[lcoreid].lpm6.miss++;
|
|
}
|
|
}
|
|
}
|
|
|
|
#endif /* IPSEC_NEON_H */
|