/* * Copyright (C) 2017-2021 THL A29 Limited, a Tencent company. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ #include #include #include #include #include #include #include #ifdef FF_KNI_KNI #include #endif #include #include #include #include #include #include "ff_dpdk_kni.h" #include "ff_config.h" #ifndef IPPROTO_OSPFIGP #define IPPROTO_OSPFIGP 89 /**< OSPFIGP */ #endif /* Callback for request of changing MTU */ /* Total octets in ethernet header */ #define KNI_ENET_HEADER_SIZE 14 /* Total octets in the FCS */ #define KNI_ENET_FCS_SIZE 4 #ifndef RTE_KNI_NAMESIZE #define RTE_KNI_NAMESIZE 16 #endif #define set_bit(n, m) (n | magic_bits[m]) #define clear_bit(n, m) (n & (~magic_bits[m])) #define get_bit(n, m) (n & magic_bits[m]) static const int magic_bits[8] = { 0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1 }; static unsigned char *udp_port_bitmap = NULL; static unsigned char *tcp_port_bitmap = NULL; /* Structure type for recording kni interface specific stats */ struct kni_interface_stats { #ifdef FF_KNI_KNI struct rte_kni *kni; #endif /* port id of dev or virtio_user */ uint16_t port_id; /* number of pkts received from NIC, and sent to KNI */ uint64_t rx_packets; /* number of pkts received from NIC, but failed to send to KNI */ uint64_t rx_dropped; /* number of pkts received from KNI, and sent to NIC */ uint64_t tx_packets; /* number of pkts received from KNI, but failed to send to NIC */ uint64_t tx_dropped; }; struct rte_ring **kni_rp; struct kni_interface_stats **kni_stat; struct kni_ratelimit kni_rate_limt = {0, 0, 0}; static void set_bitmap(uint16_t port, unsigned char *bitmap) { port = htons(port); unsigned char *p = bitmap + port/8; *p = set_bit(*p, port % 8); } static int get_bitmap(uint16_t port, unsigned char *bitmap) { unsigned char *p = bitmap + port/8; return get_bit(*p, port % 8) > 0 ? 1 : 0; } static void kni_set_bitmap(const char *p, unsigned char *port_bitmap) { int i; const char *head, *tail, *tail_num; if(!p) return; head = p; while (1) { tail = strstr(head, ","); tail_num = strstr(head, "-"); if(tail_num && (!tail || tail_num < tail - 1)) { for(i = atoi(head); i <= atoi(tail_num + 1); ++i) { set_bitmap(i, port_bitmap); } } else { set_bitmap(atoi(head), port_bitmap); } if(!tail) break; head = tail + 1; } } #ifdef FF_KNI_KNI /* Currently we don't support change mtu. */ static int kni_change_mtu(uint16_t port_id, unsigned new_mtu) { return 0; } static int kni_config_network_interface(uint16_t port_id, uint8_t if_up) { int ret = 0; if (!rte_eth_dev_is_valid_port(port_id)) { printf("Invalid port id %d\n", port_id); return -EINVAL; } printf("Configure network interface of %d %s\n", port_id, if_up ? "up" : "down"); ret = (if_up) ? rte_eth_dev_set_link_up(port_id) : rte_eth_dev_set_link_down(port_id); /* * Some NIC drivers will crash in secondary process after config kni , Such as ENA with DPDK-21.22.3. * If you meet this crash, you can try disable the code below and return 0 directly. * Or run primary first, then config kni interface in kernel, and run secondary processes last. */ if(-ENOTSUP == ret) { if (if_up != 0) { /* Configure network interface up */ rte_eth_dev_stop(port_id); ret = rte_eth_dev_start(port_id); } else { /* Configure network interface down */ rte_eth_dev_stop(port_id); ret = 0; } } if (ret < 0) printf("Failed to Configure network interface of %d %s\n", port_id, if_up ? "up" : "down"); return ret; } static void print_ethaddr(const char *name, struct rte_ether_addr *mac_addr) { char buf[RTE_ETHER_ADDR_FMT_SIZE]; rte_ether_format_addr(buf, RTE_ETHER_ADDR_FMT_SIZE, mac_addr); printf("\t%s%s\n", name, buf); } /* Callback for request of configuring mac address */ static int kni_config_mac_address(uint16_t port_id, uint8_t mac_addr[]) { int ret = 0; if (!rte_eth_dev_is_valid_port(port_id)) { printf("Invalid port id %d\n", port_id); return -EINVAL; } print_ethaddr("Address:", (struct rte_ether_addr *)mac_addr); ret = rte_eth_dev_default_mac_addr_set(port_id, (struct rte_ether_addr *)mac_addr); if (ret < 0) printf("Failed to config mac_addr for port %d\n", port_id); return ret; } #endif static int kni_process_tx(uint16_t port_id, uint16_t queue_id, struct rte_mbuf **pkts_burst, unsigned count) { /* read packet from kni ring(phy port) and transmit to kni */ uint16_t nb_tx, nb_to_tx, nb_kni_tx = 0; nb_tx = rte_ring_dequeue_burst(kni_rp[port_id], (void **)pkts_burst, count, NULL); /* * The total ratelimit forwarded to the kernel, may a few more packets being sent, but it doesn’t matter, * If there are too many processes, there is also the possibility that the control packet will be ratelimited. */ if (ff_global_cfg.kni.kernel_packets_ratelimit) { if (likely(kni_rate_limt.kernel_packets < ff_global_cfg.kni.kernel_packets_ratelimit)) { nb_to_tx = nb_tx; } else { nb_to_tx = 0; } kni_rate_limt.kernel_packets += nb_tx; } else { nb_to_tx = nb_tx; } #ifdef FF_KNI_KNI if (ff_global_cfg.kni.type == KNI_TYPE_KNI) { /* NB. * if nb_tx is 0,it must call rte_kni_tx_burst * must Call regularly rte_kni_tx_burst(kni, NULL, 0). * detail https://embedded.communities.intel.com/thread/6668 */ nb_kni_tx = rte_kni_tx_burst(kni_stat[port_id]->kni, pkts_burst, nb_to_tx); rte_kni_handle_request(kni_stat[port_id]->kni); } else if (ff_global_cfg.kni.type == KNI_TYPE_VIRTIO) #endif { nb_kni_tx = rte_eth_tx_burst(kni_stat[port_id]->port_id, 0, pkts_burst, nb_to_tx); } if(nb_kni_tx < nb_tx) { uint16_t i; for(i = nb_kni_tx; i < nb_tx; ++i) rte_pktmbuf_free(pkts_burst[i]); kni_stat[port_id]->rx_dropped += (nb_tx - nb_kni_tx); } kni_stat[port_id]->rx_packets += nb_kni_tx; return 0; } static int kni_process_rx(uint16_t port_id, uint16_t queue_id, struct rte_mbuf **pkts_burst, unsigned count) { uint16_t nb_kni_rx = 0, nb_rx; #ifdef FF_KNI_KNI if (ff_global_cfg.kni.type == KNI_TYPE_KNI) { /* read packet from kni, and transmit to phy port */ nb_kni_rx = rte_kni_rx_burst(kni_stat[port_id]->kni, pkts_burst, count); } else if (ff_global_cfg.kni.type == KNI_TYPE_VIRTIO) #endif { nb_kni_rx = rte_eth_rx_burst(kni_stat[port_id]->port_id, 0, pkts_burst, count); } if (nb_kni_rx > 0) { nb_rx = rte_eth_tx_burst(port_id, queue_id, pkts_burst, nb_kni_rx); if (nb_rx < nb_kni_rx) { uint16_t i; for(i = nb_rx; i < nb_kni_rx; ++i) rte_pktmbuf_free(pkts_burst[i]); kni_stat[port_id]->tx_dropped += (nb_kni_rx - nb_rx); } kni_stat[port_id]->tx_packets += nb_rx; } return 0; } static enum FilterReturn protocol_filter_l4(uint16_t port, unsigned char *bitmap) { if(get_bitmap(port, bitmap)) { return FILTER_KNI; } return FILTER_UNKNOWN; } static enum FilterReturn protocol_filter_tcp(const void *data, uint16_t len) { if (len < sizeof(struct rte_tcp_hdr)) return FILTER_UNKNOWN; const struct rte_tcp_hdr *hdr; hdr = (const struct rte_tcp_hdr *)data; return protocol_filter_l4(hdr->dst_port, tcp_port_bitmap); } static enum FilterReturn protocol_filter_udp(const void* data,uint16_t len) { if (len < sizeof(struct rte_udp_hdr)) return FILTER_UNKNOWN; const struct rte_udp_hdr *hdr; hdr = (const struct rte_udp_hdr *)data; return protocol_filter_l4(hdr->dst_port, udp_port_bitmap); } #ifdef INET6 /* * https://www.iana.org/assignments/ipv6-parameters/ipv6-parameters.xhtml */ #ifndef IPPROTO_HIP #define IPPROTO_HIP 139 #endif #ifndef IPPROTO_SHIM6 #define IPPROTO_SHIM6 140 #endif #ifndef IPPROTO_MH #define IPPROTO_MH 135 #endif static int get_ipv6_hdr_len(uint8_t *proto, void *data, uint16_t len) { int ext_hdr_len = 0; switch (*proto) { case IPPROTO_HOPOPTS: case IPPROTO_ROUTING: case IPPROTO_DSTOPTS: case IPPROTO_MH: case IPPROTO_HIP: case IPPROTO_SHIM6: ext_hdr_len = *((uint8_t *)data + 1) + 1; break; case IPPROTO_FRAGMENT: ext_hdr_len = 8; break; case IPPROTO_AH: ext_hdr_len = (*((uint8_t *)data + 1) + 2) * 4; break; case IPPROTO_NONE: #ifdef FF_IPSEC case IPPROTO_ESP: //proto = *((uint8_t *)data + len - 1 - 4); //ext_hdr_len = len; #endif default: return ext_hdr_len; } if (ext_hdr_len >= len) { return len; } *proto = *((uint8_t *)data); ext_hdr_len += get_ipv6_hdr_len(proto, data + ext_hdr_len, len - ext_hdr_len); return ext_hdr_len; } static enum FilterReturn protocol_filter_icmp6(void *data, uint16_t len) { if (len < sizeof(struct icmp6_hdr)) return FILTER_UNKNOWN; const struct icmp6_hdr *hdr; hdr = (const struct icmp6_hdr *)data; if (hdr->icmp6_type >= ND_ROUTER_SOLICIT && hdr->icmp6_type <= ND_REDIRECT) return FILTER_NDP; return FILTER_UNKNOWN; } #endif static enum FilterReturn protocol_filter_ip(const void *data, uint16_t len, uint16_t eth_frame_type) { uint8_t proto; int hdr_len; void *next; uint16_t next_len; if (eth_frame_type == RTE_ETHER_TYPE_IPV4) { if(len < sizeof(struct rte_ipv4_hdr)) return FILTER_UNKNOWN; const struct rte_ipv4_hdr *hdr = (struct rte_ipv4_hdr *)data; hdr_len = (hdr->version_ihl & 0x0f) << 2; if (len < hdr_len) return FILTER_UNKNOWN; proto = hdr->next_proto_id; #ifdef INET6 } else if(eth_frame_type == RTE_ETHER_TYPE_IPV6) { if(len < sizeof(struct rte_ipv6_hdr)) return FILTER_UNKNOWN; hdr_len = sizeof(struct rte_ipv6_hdr); proto = ((struct rte_ipv6_hdr *)data)->proto; hdr_len += get_ipv6_hdr_len(&proto, (void *)data + hdr_len, len - hdr_len); if (len < hdr_len) return FILTER_UNKNOWN; #endif } else { return FILTER_UNKNOWN; } next = (void *)data + hdr_len; next_len = len - hdr_len; switch (proto) { #ifdef FF_KNI /* The opsf protocol is forwarded to kni and the ratelimited separately */ case IPPROTO_OSPFIGP: return FILTER_OSPF; #endif case IPPROTO_TCP: #ifdef FF_KNI if (!enable_kni) #endif break; return protocol_filter_tcp(next, next_len); case IPPROTO_UDP: #ifdef FF_KNI if (!enable_kni) #endif break; return protocol_filter_udp(next, next_len); case IPPROTO_IPIP: return protocol_filter_ip(next, next_len, RTE_ETHER_TYPE_IPV4); #ifdef INET6 case IPPROTO_IPV6: return protocol_filter_ip(next, next_len, RTE_ETHER_TYPE_IPV6); case IPPROTO_ICMPV6: return protocol_filter_icmp6(next, next_len); #endif } return FILTER_UNKNOWN; } enum FilterReturn ff_kni_proto_filter(const void *data, uint16_t len, uint16_t eth_frame_type) { return protocol_filter_ip(data, len, eth_frame_type); } void ff_kni_init(uint16_t nb_ports, int type, const char *tcp_ports, const char *udp_ports) { if (rte_eal_process_type() == RTE_PROC_PRIMARY) { kni_stat = rte_zmalloc("kni:stat", sizeof(struct kni_interface_stats *) * nb_ports, RTE_CACHE_LINE_SIZE); if (kni_stat == NULL) rte_exit(EXIT_FAILURE, "rte_zmalloc(1 (struct netio_kni_stat *)) " "failed\n"); if (type == KNI_TYPE_KNI) { #ifdef FF_KNI_KNI rte_kni_init(nb_ports); #endif } } uint16_t lcoreid = rte_lcore_id(); char name_buf[RTE_RING_NAMESIZE]; snprintf(name_buf, RTE_RING_NAMESIZE, "kni::ring_%d", lcoreid); kni_rp = rte_zmalloc(name_buf, sizeof(struct rte_ring *) * nb_ports, RTE_CACHE_LINE_SIZE); if (kni_rp == NULL) { rte_exit(EXIT_FAILURE, "rte_zmalloc(%s (struct rte_ring*)) " "failed\n", name_buf); } snprintf(name_buf, RTE_RING_NAMESIZE, "kni:tcp_port_bitmap_%d", lcoreid); tcp_port_bitmap = rte_zmalloc("kni:tcp_port_bitmap", 8192, RTE_CACHE_LINE_SIZE); if (tcp_port_bitmap == NULL) { rte_exit(EXIT_FAILURE, "rte_zmalloc(%s (tcp_port_bitmap)) " "failed\n", name_buf); } snprintf(name_buf, RTE_RING_NAMESIZE, "kni:udp_port_bitmap_%d", lcoreid); udp_port_bitmap = rte_zmalloc("kni:udp_port_bitmap", 8192, RTE_CACHE_LINE_SIZE); if (udp_port_bitmap == NULL) { rte_exit(EXIT_FAILURE, "rte_zmalloc(%s (udp_port_bitmap)) " "failed\n",name_buf); } memset(tcp_port_bitmap, 0, 8192); memset(udp_port_bitmap, 0, 8192); kni_set_bitmap(tcp_ports, tcp_port_bitmap); kni_set_bitmap(udp_ports, udp_port_bitmap); } void ff_kni_alloc(uint16_t port_id, unsigned socket_id, int type, int port_idx, struct rte_mempool *mbuf_pool, unsigned ring_queue_size) { if (rte_eal_process_type() == RTE_PROC_PRIMARY) { struct rte_eth_dev_info dev_info; struct rte_ether_addr addr = {{0}}; int ret; kni_stat[port_id] = (struct kni_interface_stats*)rte_zmalloc( "kni:stat_lcore", sizeof(struct kni_interface_stats), RTE_CACHE_LINE_SIZE); if (kni_stat[port_id] == NULL) { rte_panic("rte_zmalloc kni_interface_stats failed\n"); } kni_stat[port_id]->rx_packets = 0; kni_stat[port_id]->rx_dropped = 0; kni_stat[port_id]->tx_packets = 0; kni_stat[port_id]->tx_dropped = 0; memset(&dev_info, 0, sizeof(dev_info)); ret = rte_eth_dev_info_get(port_id, &dev_info); if (ret != 0) { rte_panic("kni get dev info %u failed!\n", port_id); } /* Get the interface default mac address */ rte_eth_macaddr_get(port_id, (struct rte_ether_addr *)&addr); printf("ff_kni_alloc get Port %u MAC:"RTE_ETHER_ADDR_PRT_FMT"\n", (unsigned)port_id, RTE_ETHER_ADDR_BYTES(&addr)); #ifdef FF_KNI_KNI if (type == KNI_TYPE_KNI) { struct rte_kni_conf conf; struct rte_kni_ops ops; /* only support one kni */ memset(&conf, 0, sizeof(conf)); snprintf(conf.name, RTE_KNI_NAMESIZE, "veth%u", port_id); conf.core_id = rte_lcore_id(); conf.force_bind = 1; conf.group_id = port_id; uint16_t mtu; rte_eth_dev_get_mtu(port_id, &mtu); conf.mbuf_size = mtu + KNI_ENET_HEADER_SIZE + KNI_ENET_FCS_SIZE; rte_memcpy(&conf.addr, addr.addr_bytes, RTE_ETHER_ADDR_LEN); memset(&ops, 0, sizeof(ops)); ops.port_id = port_id; ops.change_mtu = kni_change_mtu; ops.config_network_if = kni_config_network_interface; ops.config_mac_address = kni_config_mac_address; kni_stat[port_id]->kni = rte_kni_alloc(mbuf_pool, &conf, &ops); if (kni_stat[port_id]->kni == NULL) rte_panic("create kni on port %u failed!\n", port_id); else printf("create kni on port %u success!\n", port_id); kni_stat[port_id]->port_id = port_id; } else if (type == KNI_TYPE_VIRTIO) #endif { /* * to add virtio port for exception path(KNI), * see https://doc.dpdk.org/guides/howto/virtio_user_as_exception_path.html#virtio-user-as-exception-path */ char port_name[32]; char port_args[256]; /* set the name and arguments */ snprintf(port_name, sizeof(port_name), "virtio_user%u", port_id); snprintf(port_args, sizeof(port_args), "path=/dev/vhost-net,queues=1,queue_size=%u,iface=veth%d,mac=" RTE_ETHER_ADDR_PRT_FMT, ring_queue_size, port_id, RTE_ETHER_ADDR_BYTES(&addr)); printf("ff_kni_alloc to rte_eal_hotplug_add virtio user port, portname:%s, portargs:%s\n", port_name, port_args); /* add the vdev for virtio_user */ if (rte_eal_hotplug_add("vdev", port_name, port_args) < 0) { rte_exit(EXIT_FAILURE, "ff_kni_alloc cannot create virtio user paired port for port %u\n", port_id); } kni_stat[port_id]->port_id = port_idx + nb_dev_ports; } } char ring_name[RTE_KNI_NAMESIZE]; snprintf((char*)ring_name, RTE_KNI_NAMESIZE, "kni_ring_%u", port_id); if (rte_eal_process_type() == RTE_PROC_PRIMARY) { kni_rp[port_id] = rte_ring_create(ring_name, ring_queue_size, socket_id, RING_F_SC_DEQ); if (rte_ring_lookup(ring_name) != kni_rp[port_id]) rte_panic("lookup kni ring failed!\n"); } else { kni_rp[port_id] = rte_ring_lookup(ring_name); } if (kni_rp[port_id] == NULL) rte_panic("create kni ring failed!\n"); printf("create kni ring success, %u ring entries are now free!\n", rte_ring_free_count(kni_rp[port_id])); } void ff_kni_process(uint16_t port_id, uint16_t queue_id, struct rte_mbuf **pkts_burst, unsigned count) { kni_process_tx(port_id, queue_id, pkts_burst, count); kni_process_rx(port_id, queue_id, pkts_burst, count); } /* enqueue the packet, and own it */ int ff_kni_enqueue(enum FilterReturn filter, uint16_t port_id, struct rte_mbuf *pkt) { if (filter >= FILTER_ARP) { if (ff_global_cfg.kni.console_packets_ratelimit) { kni_rate_limt.console_packets++; if (kni_rate_limt.console_packets > ff_global_cfg.kni.console_packets_ratelimit) { goto error; } } } else { if (ff_global_cfg.kni.general_packets_ratelimit) { kni_rate_limt.gerneal_packets++; if (kni_rate_limt.gerneal_packets > ff_global_cfg.kni.general_packets_ratelimit) { goto error; } } } int ret = rte_ring_enqueue(kni_rp[port_id], pkt); if (ret < 0) { goto error; } return 0; error: rte_pktmbuf_free(pkt); return -1; }