f-stack/lib/ff_dpdk_kni.c

614 lines
17 KiB
C
Raw Normal View History

2017-04-21 10:43:26 +00:00
/*
* Copyright (C) 2017 THL A29 Limited, a Tencent company.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <stdlib.h>
#include <arpa/inet.h>
#include <netinet/icmp6.h>
2017-04-21 10:43:26 +00:00
#include <rte_config.h>
#include <rte_ether.h>
#include <rte_bus_pci.h>
2017-04-21 10:43:26 +00:00
#include <rte_ethdev.h>
#include <rte_kni.h>
#include <rte_malloc.h>
#include <rte_ring.h>
#include <rte_ip.h>
#include <rte_tcp.h>
#include <rte_udp.h>
#include "ff_dpdk_kni.h"
#include "ff_config.h"
/* Callback for request of changing MTU */
/* Total octets in ethernet header */
#define KNI_ENET_HEADER_SIZE 14
/* Total octets in the FCS */
#define KNI_ENET_FCS_SIZE 4
#define set_bit(n, m) (n | magic_bits[m])
#define clear_bit(n, m) (n & (~magic_bits[m]))
#define get_bit(n, m) (n & magic_bits[m])
static const int magic_bits[8] = {
0x80, 0x40, 0x20, 0x10,
0x8, 0x4, 0x2, 0x1
};
static unsigned char *udp_port_bitmap = NULL;
static unsigned char *tcp_port_bitmap = NULL;
/* Structure type for recording kni interface specific stats */
struct kni_interface_stats {
struct rte_kni *kni;
/* number of pkts received from NIC, and sent to KNI */
uint64_t rx_packets;
/* number of pkts received from NIC, but failed to send to KNI */
uint64_t rx_dropped;
/* number of pkts received from KNI, and sent to NIC */
uint64_t tx_packets;
/* number of pkts received from KNI, but failed to send to NIC */
uint64_t tx_dropped;
};
struct rte_ring **kni_rp;
struct kni_interface_stats **kni_stat;
int kni_link = ETH_LINK_DOWN;
2017-04-21 10:43:26 +00:00
static void
set_bitmap(uint16_t port, unsigned char *bitmap)
{
port = htons(port);
unsigned char *p = bitmap + port/8;
*p = set_bit(*p, port % 8);
}
static int
get_bitmap(uint16_t port, unsigned char *bitmap)
{
unsigned char *p = bitmap + port/8;
return get_bit(*p, port % 8) > 0 ? 1 : 0;
}
static void
kni_set_bitmap(const char *p, unsigned char *port_bitmap)
{
int i;
const char *head, *tail, *tail_num;
if(!p)
return;
head = p;
while (1) {
tail = strstr(head, ",");
tail_num = strstr(head, "-");
if(tail_num && (!tail || tail_num < tail - 1)) {
for(i = atoi(head); i <= atoi(tail_num + 1); ++i) {
set_bitmap(i, port_bitmap);
}
} else {
set_bitmap(atoi(head), port_bitmap);
}
if(!tail)
break;
head = tail + 1;
}
}
/* Currently we don't support change mtu. */
static int
kni_change_mtu(uint16_t port_id, unsigned new_mtu)
2017-04-21 10:43:26 +00:00
{
return 0;
}
static void
log_link_state(struct rte_kni *kni, int prev, struct rte_eth_link *link)
{
if (kni == NULL || link == NULL)
return;
if (prev == ETH_LINK_DOWN && link->link_status == ETH_LINK_UP) {
kni_link = ETH_LINK_UP;
printf("%s NIC Link is Up %d Mbps %s %s.\n",
rte_kni_get_name(kni),
link->link_speed,
link->link_autoneg ? "(AutoNeg)" : "(Fixed)",
link->link_duplex ? "Full Duplex" : "Half Duplex");
} else if (prev == ETH_LINK_UP && link->link_status == ETH_LINK_DOWN) {
kni_link = ETH_LINK_DOWN;
printf("%s NIC Link is Down.\n",
rte_kni_get_name(kni));
}
}
/*
* Monitor the link status of all ports and update the
* corresponding KNI interface(s)
*/
static void *
monitor_all_ports_link_status(uint16_t port_id)
{
struct rte_eth_link link;
unsigned int i;
int prev;
memset(&link, 0, sizeof(link));
rte_eth_link_get_nowait(port_id, &link);
prev = rte_kni_update_link(kni_stat[port_id]->kni, link.link_status);
log_link_state(kni_stat[port_id]->kni, prev, &link);
return NULL;
}
2017-04-21 10:43:26 +00:00
static int
kni_config_network_interface(uint16_t port_id, uint8_t if_up)
{
2017-04-21 10:43:26 +00:00
int ret = 0;
if (!rte_eth_dev_is_valid_port(port_id)) {
2017-04-21 10:43:26 +00:00
printf("Invalid port id %d\n", port_id);
return -EINVAL;
}
printf("Configure network interface of %d %s\n",
port_id, if_up ? "up" : "down");
ret = (if_up) ?
rte_eth_dev_set_link_up(port_id) :
rte_eth_dev_set_link_down(port_id);
if(-ENOTSUP == ret) {
if (if_up != 0) {
/* Configure network interface up */
rte_eth_dev_stop(port_id);
ret = rte_eth_dev_start(port_id);
} else {
/* Configure network interface down */
rte_eth_dev_stop(port_id);
ret = 0;
}
}
if (!if_up)
kni_link = ETH_LINK_DOWN;
2017-04-21 10:43:26 +00:00
if (ret < 0)
printf("Failed to Configure network interface of %d %s\n",
port_id, if_up ? "up" : "down");
2017-04-21 10:43:26 +00:00
return ret;
}
static void
print_ethaddr(const char *name, struct ether_addr *mac_addr)
{
char buf[ETHER_ADDR_FMT_SIZE];
ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, mac_addr);
printf("\t%s%s\n", name, buf);
}
/* Callback for request of configuring mac address */
static int
kni_config_mac_address(uint16_t port_id, uint8_t mac_addr[])
{
int ret = 0;
if (!rte_eth_dev_is_valid_port(port_id)) {
printf("Invalid port id %d\n", port_id);
return -EINVAL;
}
print_ethaddr("Address:", (struct ether_addr *)mac_addr);
ret = rte_eth_dev_default_mac_addr_set(port_id,
(struct ether_addr *)mac_addr);
if (ret < 0)
printf("Failed to config mac_addr for port %d\n", port_id);
return ret;
}
2017-04-21 10:43:26 +00:00
static int
kni_process_tx(uint16_t port_id, uint16_t queue_id,
2017-04-21 10:43:26 +00:00
struct rte_mbuf **pkts_burst, unsigned count)
{
/* read packet from kni ring(phy port) and transmit to kni */
uint16_t nb_tx, nb_kni_tx;
nb_tx = rte_ring_dequeue_burst(kni_rp[port_id], (void **)pkts_burst, count, NULL);
2017-04-21 10:43:26 +00:00
/* NB.
* if nb_tx is 0,it must call rte_kni_tx_burst
* must Call regularly rte_kni_tx_burst(kni, NULL, 0).
* detail https://embedded.communities.intel.com/thread/6668
*/
nb_kni_tx = rte_kni_tx_burst(kni_stat[port_id]->kni, pkts_burst, nb_tx);
rte_kni_handle_request(kni_stat[port_id]->kni);
if(nb_kni_tx < nb_tx) {
uint16_t i;
for(i = nb_kni_tx; i < nb_tx; ++i)
rte_pktmbuf_free(pkts_burst[i]);
kni_stat[port_id]->rx_dropped += (nb_tx - nb_kni_tx);
}
kni_stat[port_id]->rx_packets += nb_kni_tx;
return 0;
}
static int
kni_process_rx(uint16_t port_id, uint16_t queue_id,
2017-04-21 10:43:26 +00:00
struct rte_mbuf **pkts_burst, unsigned count)
{
uint16_t nb_kni_rx, nb_rx;
/* read packet from kni, and transmit to phy port */
nb_kni_rx = rte_kni_rx_burst(kni_stat[port_id]->kni, pkts_burst, count);
if (nb_kni_rx > 0) {
nb_rx = rte_eth_tx_burst(port_id, queue_id, pkts_burst, nb_kni_rx);
if (nb_rx < nb_kni_rx) {
uint16_t i;
for(i = nb_rx; i < nb_kni_rx; ++i)
rte_pktmbuf_free(pkts_burst[i]);
kni_stat[port_id]->tx_dropped += (nb_kni_rx - nb_rx);
}
kni_stat[port_id]->tx_packets += nb_rx;
}
return 0;
}
static enum FilterReturn
protocol_filter_l4(uint16_t port, unsigned char *bitmap)
{
if(get_bitmap(port, bitmap)) {
return FILTER_KNI;
2017-04-21 10:43:26 +00:00
}
return FILTER_UNKNOWN;
}
static enum FilterReturn
protocol_filter_tcp(const void *data, uint16_t len)
{
if (len < sizeof(struct tcp_hdr))
return FILTER_UNKNOWN;
const struct tcp_hdr *hdr;
hdr = (const struct tcp_hdr *)data;
return protocol_filter_l4(hdr->dst_port, tcp_port_bitmap);
}
static enum FilterReturn
protocol_filter_udp(const void* data,uint16_t len)
{
if (len < sizeof(struct udp_hdr))
return FILTER_UNKNOWN;
const struct udp_hdr *hdr;
hdr = (const struct udp_hdr *)data;
return protocol_filter_l4(hdr->dst_port, udp_port_bitmap);
}
#ifdef INET6
/*
* https://www.iana.org/assignments/ipv6-parameters/ipv6-parameters.xhtml
*/
#ifndef IPPROTO_HIP
#define IPPROTO_HIP 139
#endif
#ifndef IPPROTO_SHIM6
#define IPPROTO_SHIM6 140
#endif
static int
get_ipv6_hdr_len(uint8_t *proto, void *data, uint16_t len)
2017-04-21 10:43:26 +00:00
{
int ext_hdr_len = 0;
switch (*proto) {
case IPPROTO_HOPOPTS: case IPPROTO_ROUTING: case IPPROTO_DSTOPTS:
case IPPROTO_MH: case IPPROTO_HIP: case IPPROTO_SHIM6:
ext_hdr_len = *((uint8_t *)data + 1) + 1;
break;
case IPPROTO_FRAGMENT:
ext_hdr_len = 8;
break;
case IPPROTO_AH:
ext_hdr_len = (*((uint8_t *)data + 1) + 2) * 4;
break;
case IPPROTO_NONE:
#ifdef FF_IPSEC
case IPPROTO_ESP:
//proto = *((uint8_t *)data + len - 1 - 4);
//ext_hdr_len = len;
#endif
default:
return ext_hdr_len;
}
2017-04-21 10:43:26 +00:00
if (ext_hdr_len >= len) {
return len;
}
2017-04-21 10:43:26 +00:00
*proto = *((uint8_t *)data);
ext_hdr_len += get_ipv6_hdr_len(proto, data + ext_hdr_len, len - ext_hdr_len);
return ext_hdr_len;
}
static enum FilterReturn
protocol_filter_icmp6(void *data, uint16_t len)
{
if (len < sizeof(struct icmp6_hdr))
return FILTER_UNKNOWN;
const struct icmp6_hdr *hdr;
hdr = (const struct icmp6_hdr *)data;
if (hdr->icmp6_type >= ND_ROUTER_SOLICIT && hdr->icmp6_type <= ND_REDIRECT)
return FILTER_NDP;
return FILTER_UNKNOWN;
}
#endif
static enum FilterReturn
protocol_filter_ip(const void *data, uint16_t len, uint16_t eth_frame_type)
{
uint8_t proto;
int hdr_len;
void *next;
uint16_t next_len;
if (eth_frame_type == ETHER_TYPE_IPv4) {
if(len < sizeof(struct ipv4_hdr))
return FILTER_UNKNOWN;
const struct ipv4_hdr *hdr = (struct ipv4_hdr *)data;
hdr_len = (hdr->version_ihl & 0x0f) << 2;
if (len < hdr_len)
return FILTER_UNKNOWN;
proto = hdr->next_proto_id;
#ifdef INET6
} else if(eth_frame_type == ETHER_TYPE_IPv6) {
if(len < sizeof(struct ipv6_hdr))
return FILTER_UNKNOWN;
hdr_len = sizeof(struct ipv6_hdr);
proto = ((struct ipv6_hdr *)data)->proto;
hdr_len += get_ipv6_hdr_len(&proto, (void *)data + hdr_len, len - hdr_len);
if (len < hdr_len)
return FILTER_UNKNOWN;
#endif
} else {
return FILTER_UNKNOWN;
}
next = (void *)data + hdr_len;
next_len = len - hdr_len;
2017-04-21 10:43:26 +00:00
switch (proto) {
2017-04-21 10:43:26 +00:00
case IPPROTO_TCP:
#ifdef FF_KNI
if (!enable_kni)
break;
#else
break;
#endif
2017-04-21 10:43:26 +00:00
return protocol_filter_tcp(next, next_len);
case IPPROTO_UDP:
#ifdef FF_KNI
if (!enable_kni)
break;
#else
break;
#endif
2017-04-21 10:43:26 +00:00
return protocol_filter_udp(next, next_len);
case IPPROTO_IPIP:
return protocol_filter_ip(next, next_len, ETHER_TYPE_IPv4);
#ifdef INET6
case IPPROTO_IPV6:
return protocol_filter_ip(next, next_len, ETHER_TYPE_IPv6);
case IPPROTO_ICMPV6:
return protocol_filter_icmp6(next, next_len);
#endif
2017-04-21 10:43:26 +00:00
}
return FILTER_UNKNOWN;
}
enum FilterReturn
ff_kni_proto_filter(const void *data, uint16_t len, uint16_t eth_frame_type)
2017-04-21 10:43:26 +00:00
{
return protocol_filter_ip(data, len, eth_frame_type);
2017-04-21 10:43:26 +00:00
}
void
ff_kni_init(uint16_t nb_ports, const char *tcp_ports, const char *udp_ports)
2017-04-21 10:43:26 +00:00
{
if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
kni_stat = rte_zmalloc("kni:stat",
sizeof(struct kni_interface_stats *) * nb_ports,
RTE_CACHE_LINE_SIZE);
if (kni_stat == NULL)
rte_exit(EXIT_FAILURE, "rte_zmalloc(1 (struct netio_kni_stat *)) "
"failed\n");
rte_kni_init(nb_ports);
}
uint16_t lcoreid = rte_lcore_id();
char name_buf[RTE_RING_NAMESIZE];
snprintf(name_buf, RTE_RING_NAMESIZE, "kni::ring_%d", lcoreid);
kni_rp = rte_zmalloc(name_buf,
sizeof(struct rte_ring *) * nb_ports,
RTE_CACHE_LINE_SIZE);
if (kni_rp == NULL) {
rte_exit(EXIT_FAILURE, "rte_zmalloc(%s (struct rte_ring*)) "
"failed\n", name_buf);
}
snprintf(name_buf, RTE_RING_NAMESIZE, "kni:tcp_port_bitmap_%d", lcoreid);
tcp_port_bitmap = rte_zmalloc("kni:tcp_port_bitmap", 8192,
RTE_CACHE_LINE_SIZE);
if (tcp_port_bitmap == NULL) {
rte_exit(EXIT_FAILURE, "rte_zmalloc(%s (tcp_port_bitmap)) "
"failed\n", name_buf);
}
snprintf(name_buf, RTE_RING_NAMESIZE, "kni:udp_port_bitmap_%d", lcoreid);
udp_port_bitmap = rte_zmalloc("kni:udp_port_bitmap", 8192,
RTE_CACHE_LINE_SIZE);
if (udp_port_bitmap == NULL) {
rte_exit(EXIT_FAILURE, "rte_zmalloc(%s (udp_port_bitmap)) "
"failed\n",name_buf);
}
memset(tcp_port_bitmap, 0, 8192);
memset(udp_port_bitmap, 0, 8192);
kni_set_bitmap(tcp_ports, tcp_port_bitmap);
kni_set_bitmap(udp_ports, udp_port_bitmap);
}
void
ff_kni_alloc(uint16_t port_id, unsigned socket_id,
2017-06-12 08:12:22 +00:00
struct rte_mempool *mbuf_pool, unsigned ring_queue_size)
2017-04-21 10:43:26 +00:00
{
if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
struct rte_kni_conf conf;
struct rte_kni_ops ops;
struct rte_eth_dev_info dev_info;
2018-12-06 14:17:51 +00:00
const struct rte_pci_device *pci_dev;
2018-12-06 14:41:39 +00:00
const struct rte_bus *bus = NULL;
2017-04-21 10:43:26 +00:00
kni_stat[port_id] = (struct kni_interface_stats*)rte_zmalloc(
"kni:stat_lcore",
sizeof(struct kni_interface_stats),
RTE_CACHE_LINE_SIZE);
if (kni_stat[port_id] == NULL)
rte_panic("rte_zmalloc kni_interface_stats failed\n");
/* only support one kni */
memset(&conf, 0, sizeof(conf));
snprintf(conf.name, RTE_KNI_NAMESIZE, "veth%u", port_id);
conf.core_id = rte_lcore_id();
conf.force_bind = 1;
conf.group_id = port_id;
uint16_t mtu;
rte_eth_dev_get_mtu(port_id, &mtu);
conf.mbuf_size = mtu + KNI_ENET_HEADER_SIZE + KNI_ENET_FCS_SIZE;
memset(&dev_info, 0, sizeof(dev_info));
rte_eth_dev_info_get(port_id, &dev_info);
2019-05-21 14:19:57 +00:00
2018-12-06 14:17:51 +00:00
if (dev_info.device)
bus = rte_bus_find_by_device(dev_info.device);
if (bus && !strcmp(bus->name, "pci")) {
pci_dev = RTE_DEV_TO_PCI(dev_info.device);
conf.addr = pci_dev->addr;
conf.id = pci_dev->id;
}
/* Get the interface default mac address */
rte_eth_macaddr_get(port_id,
(struct ether_addr *)&conf.mac_addr);
2017-04-21 10:43:26 +00:00
memset(&ops, 0, sizeof(ops));
ops.port_id = port_id;
ops.change_mtu = kni_change_mtu;
ops.config_network_if = kni_config_network_interface;
ops.config_mac_address = kni_config_mac_address;
2017-04-21 10:43:26 +00:00
kni_stat[port_id]->kni = rte_kni_alloc(mbuf_pool, &conf, &ops);
if (kni_stat[port_id]->kni == NULL)
rte_panic("create kni on port %u failed!\n", port_id);
else
printf("create kni on port %u success!\n", port_id);
kni_stat[port_id]->rx_packets = 0;
kni_stat[port_id]->rx_dropped = 0;
kni_stat[port_id]->tx_packets = 0;
kni_stat[port_id]->tx_dropped = 0;
}
char ring_name[RTE_KNI_NAMESIZE];
snprintf((char*)ring_name, RTE_KNI_NAMESIZE, "kni_ring_%u", port_id);
if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
2017-06-12 08:12:22 +00:00
kni_rp[port_id] = rte_ring_create(ring_name, ring_queue_size,
2017-04-21 10:43:26 +00:00
socket_id, RING_F_SC_DEQ);
if (rte_ring_lookup(ring_name) != kni_rp[port_id])
rte_panic("lookup kni ring failed!\n");
2017-04-21 10:43:26 +00:00
} else {
kni_rp[port_id] = rte_ring_lookup(ring_name);
}
if (kni_rp[port_id] == NULL)
rte_panic("create kni ring failed!\n");
printf("create kni ring success, %u ring entries are now free!\n",
rte_ring_free_count(kni_rp[port_id]));
}
void
ff_kni_process(uint16_t port_id, uint16_t queue_id,
2017-04-21 10:43:26 +00:00
struct rte_mbuf **pkts_burst, unsigned count)
{
if (unlikely(kni_link == ETH_LINK_DOWN)) {
monitor_all_ports_link_status(port_id);
}
2017-04-21 10:43:26 +00:00
kni_process_tx(port_id, queue_id, pkts_burst, count);
kni_process_rx(port_id, queue_id, pkts_burst, count);
}
/* enqueue the packet, and own it */
int
ff_kni_enqueue(uint16_t port_id, struct rte_mbuf *pkt)
2017-04-21 10:43:26 +00:00
{
int ret = rte_ring_enqueue(kni_rp[port_id], pkt);
if (ret < 0)
rte_pktmbuf_free(pkt);
return 0;
}