Sync some features from branch of dev.

1. Added F-Stack FreeBSD support. see 9f7a1423a0 .
2. Enable INET6 by default in helloworld. see 51c91ab0ad .
3. Added FDIR support. see 4854315d0d .
4. To avoid compiling errors when gcc version >= 10. see 6daadb0b5c .
5. Modify `struct linux_sockaddr` same to `struct sockaddr` in linux. see d96a9d10f4 .
6. Sync some modified of ff_config.c, inclue set dpdk log level, Avoid memory leaks, suppor vip_addr and vip_addr6, etc. see git log lib/ff_config.c in branch of dev.
7. Sync some modified of ff_syscall_wrapper.c, include ff_sendmsg, ff_recvmsg, ip6_opt_convert, etc. see git log lib/ff_syscall_wrapper.c in branch of dev.
8. The CPU usage of packet_dispatcher() is modified to usr. see 0508c8b8ec .
9. If process_dispatch_ring() has data packet to be processed and it is considered non-idle state. see 81dd6c7a24 .
10. Fix a plurality of packets may not statistics in ff_traffic.rx_packets and ff_traffic.rx_bytes. see 0b4a084c8a .
11. Added FF_IPSEC=1 in lib/Makefile, disable by default.
12. Some other modified.
This commit is contained in:
fengbojiang 2023-09-12 21:01:47 +08:00
parent 785f85dbe6
commit ef24ad2478
10 changed files with 885 additions and 126 deletions

View File

@ -38,30 +38,35 @@ Currently, besides authorized DNS server of DNSPod, there are various products i
yum install numactl-devel # on Centos
#sudo apt-get install libnuma-dev # on Ubuntu
# Install dependencies (FreeBSD only)
#pkg install meson pkgconf py38-pyelftools
cd f-stack
# Compile DPDK
cd dpdk/usertools
./dpdk-setup.sh # compile with x86_64-native-linuxapp-gcc
# Set hugepage
# Set hugepage (Linux only)
# single-node system
echo 1024 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
# or NUMA
# or NUMA (Linux only)
echo 1024 > /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages
echo 1024 > /sys/devices/system/node/node1/hugepages/hugepages-2048kB/nr_hugepages
# Using Hugepage with the DPDK
# Using Hugepage with the DPDK (Linux only)
mkdir /mnt/huge
mount -t hugetlbfs nodev /mnt/huge
# Close ASLR; it is necessary in multiple process
# Close ASLR; it is necessary in multiple process (Linux only)
echo 0 > /proc/sys/kernel/randomize_va_space
# Install python for running DPDK python scripts
sudo apt install python # On ubuntu
#sudo pkg install python # On FreeBSD
# Offload NIC
# For Linux:
modprobe uio
insmod /data/f-stack/dpdk/x86_64-native-linuxapp-gcc/kmod/igb_uio.ko
insmod /data/f-stack/dpdk/x86_64-native-linuxapp-gcc/kmod/rte_kni.ko carrier=on # carrier=on is necessary, otherwise need to be up `veth0` via `echo 1 > /sys/class/net/veth0/carrier`
@ -69,6 +74,15 @@ Currently, besides authorized DNS server of DNSPod, there are various products i
ifconfig eth0 down
python dpdk-devbind.py --bind=igb_uio eth0 # assuming that use 10GE NIC and eth0
# For FreeBSD:
# Refer DPDK FreeBSD guide to set tunables in /boot/loader.conf
# Below is an example used for our testing machine
#echo "hw.nic_uio.bdfs=\"2:0:0\"" >> /boot/loader.conf
#echo "hw.contigmem.num_buffers=1" >> /boot/loader.conf
#echo "hw.contigmem.buffer_size=1073741824" >> /boot/loader.conf
#kldload contigmem
#kldload nic_uio
# Install DPDK
cd ../x86_64-native-linuxapp-gcc
make install
@ -77,20 +91,23 @@ Currently, besides authorized DNS server of DNSPod, there are various products i
#sudo apt-get install gawk # or execute `sudo update-alternatives --config awk` to choose gawk.
# Install dependencies for F-Stack
sudo apt install gcc make libssl-dev # On ubuntu
sudo apt install gcc make libssl-dev # On ubuntu
#sudo pkg install gcc gmake openssl pkgconf libepoll-shim # On FreeBSD
# Compile F-Stack
export FF_PATH=/data/f-stack
export FF_DPDK=/data/f-stack/dpdk/x86_64-native-linuxapp-gcc
cd ../../lib/
make
make # On Linux
#gmake # On FreeBSD
# Install F-STACK
# libfstack.a will be installed to /usr/local/lib
# ff_*.h will be installed to /usr/local/include
# start.sh will be installed to /usr/local/bin/ff_start
# config.ini will be installed to /etc/f-stack.conf
make install
make install # On Linux
#gmake install # On FreeBSD
#### Nginx

View File

@ -18,6 +18,22 @@ extern "C" {
typedef cpuset_t rte_cpuset_t;
#ifdef RTE_EAL_FREEBSD_CPUSET_LEGACY
#if __FreeBSD_version >= 1301000
#define RTE_CPU_AND(dst, src1, src2) do \
{ \
cpuset_t tmp; \
CPU_COPY(src1, &tmp); \
CPU_AND(&tmp, &tmp, src2); \
CPU_COPY(&tmp, dst); \
} while (0)
#define RTE_CPU_OR(dst, src1, src2) do \
{ \
cpuset_t tmp; \
CPU_COPY(src1, &tmp); \
CPU_OR(&tmp, &tmp, src2); \
CPU_COPY(&tmp, dst); \
} while (0)
#else
#define RTE_CPU_AND(dst, src1, src2) do \
{ \
cpuset_t tmp; \
@ -32,6 +48,7 @@ typedef cpuset_t rte_cpuset_t;
CPU_OR(&tmp, src2); \
CPU_COPY(&tmp, dst); \
} while (0)
#endif
#define RTE_CPU_FILL(set) CPU_FILL(set)
/* In FreeBSD 13 CPU_NAND macro is CPU_ANDNOT */
@ -44,6 +61,15 @@ typedef cpuset_t rte_cpuset_t;
CPU_COPY(&tmp, dst); \
} while (0)
#else
#if __FreeBSD_version >= 1301000
#define RTE_CPU_NOT(dst, src) do \
{ \
cpuset_t tmp; \
CPU_FILL(&tmp); \
CPU_ANDNOT(&tmp, &tmp, src); \
CPU_COPY(&tmp, dst); \
} while (0)
#else
#define RTE_CPU_NOT(dst, src) do \
{ \
cpuset_t tmp; \
@ -51,6 +77,7 @@ typedef cpuset_t rte_cpuset_t;
CPU_ANDNOT(&tmp, src); \
CPU_COPY(&tmp, dst); \
} while (0)
#endif
#endif /* CPU_NAND */
#else /* RTE_EAL_FREEBSD_CPUSET_LEGACY */

View File

@ -14,7 +14,7 @@ LIBS+= -Wl,--no-whole-archive -lrt -lm -ldl -lcrypto -pthread -lnuma
TARGET="helloworld"
all:
cc -O -gdwarf-2 -I../lib -o ${TARGET} main.c ${LIBS}
cc -O -gdwarf-2 -I../lib -DINET6 -o ${TARGET} main.c ${LIBS}
cc -O -gdwarf-2 -I../lib -o ${TARGET}_epoll main_epoll.c ${LIBS}
.PHONY: clean

View File

@ -1,6 +1,6 @@
#
#
# Copyright (c) 2013 Patrick Kelsey. All rights reserved.
# Copyright (C) 2017 THL A29 Limited, a Tencent company.
# Copyright (C) 2017-2021 THL A29 Limited, a Tencent company.
# All rights reserved.
#
# Derived in part from libuinet's Makefile.
@ -20,19 +20,31 @@ PREFIX_INCLUDE=/usr/local/include
PREFIX_BIN=/usr/local/bin
F-STACK_CONF=/etc/f-stack.conf
F-STACK_VERSION=1.21
TGT_OS=$(shell uname)
ifeq ($(TGT_OS),FreeBSD)
CC=gcc
endif
HOST_OS:=$(shell uname -s)
#DEBUG=-O0 -gdwarf-2 -g3 -Wno-format-truncation
# No DPDK KNI support on FreeBSD
ifneq ($(TGT_OS),FreeBSD)
FF_KNI=1
endif
#FF_FLOW_ISOLATE=1
#FF_FDIR=1
# NETGRAPH drivers ipfw
#FF_NETGRAPH=1
#FF_IPFW=1
#FF_USE_PAGE_ARRAY=1
#FF_ZC_SEND=1
FF_INET6=1
#FF_IPSEC=1
include ${TOPDIR}/mk/kern.pre.mk
@ -45,7 +57,7 @@ endif
endif
ifdef RTE_SDK
ifeq (${MACHINE_CPUARCH},aarch64)
ifeq (${MACHINE_CPUARCH},aarch64)
FF_DPDK=${RTE_SDK}/build
else
FF_DPDK=${RTE_SDK}/x86_64-native-linuxapp-gcc
@ -65,10 +77,15 @@ INCLUDES+= -I./opt
# Include search path for files that only include host OS headers
HOST_INCLUDES= -I.
# Use libepoll shim on FreeBSD
ifeq ($(TGT_OS),FreeBSD)
HOST_INCLUDES+= -I/usr/local/include/libepoll-shim
endif
ifndef DEBUG
HOST_CFLAGS = -O2 -frename-registers -funswitch-loops -fweb -Wno-format-truncation
else
HOST_CFLAGS = ${DEBUG}
HOST_CFLAGS = ${DEBUG}
endif
ifdef FF_KNI
@ -78,6 +95,10 @@ endif
HOST_CFLAGS+= ${DPDK_CFLAGS}
HOST_CFLAGS+= ${CONF_CFLAGS}
ifdef FF_FDIR
HOST_CFLAGS+= -DFF_FDIR
endif
ifdef FF_FLOW_ISOLATE
HOST_CFLAGS+= -DFF_FLOW_ISOLATE
endif
@ -94,11 +115,29 @@ ifdef FF_USE_PAGE_ARRAY
HOST_CFLAGS+= -DFF_USE_PAGE_ARRAY
endif
HOST_CFLAGS+= -DINET
CFLAGS+= -DINET
ifdef FF_INET6
HOST_CFLAGS+= -DINET6
CFLAGS+= -DINET6
endif
ifdef FF_IPSEC
HOST_CFLAGS+= -DIPSEC
CFLAGS+= -DIPSEC
endif
GCCVERGE10 = $(shell expr `gcc -dumpversion | cut -f1 -d.` \>= 10)
ifeq "$(GCCVERGE10)" "1"
CFLAGS+= -Wno-error=stringop-overflow
endif
GCCVERGE11 = $(shell expr `gcc -dumpversion | cut -f1 -d.` \>= 11)
ifeq "$(GCCVERGE11)" "1"
CFLAGS+= -Wno-error=stringop-overread
endif
HOST_C= ${CC} -c $(HOST_CFLAGS) ${HOST_INCLUDES} ${WERROR} ${PROF} $<
@ -223,7 +262,7 @@ FF_HOST_SRCS+= \
ff_dpdk_if.c \
ff_dpdk_pcap.c \
ff_epoll.c \
ff_init.c
ff_init.c
ifdef FF_KNI
FF_HOST_SRCS+= \
@ -331,7 +370,7 @@ LIBKERN_SRCS+= \
jenkins_hash.c \
strlcpy.c \
strnlen.c \
zlib.c
zlib.c
endif
@ -495,11 +534,15 @@ NETINET6_SRCS+= \
#ip6_ipsec.c
#sctp6_usrreq.c
#in6_rss.c
ifneq ($(TGT_OS),FreeBSD)
ifndef FF_KNI
FF_HOST_SRCS+= \
ff_dpdk_kni.c
endif
endif
endif #FF_KNI
endif #FreeBSD OS Check
endif #INET6
ifdef FF_IPFW
NETIPFW_SRCS+= \
@ -600,7 +643,7 @@ all: libfstack.a
libfstack.a: machine_includes ff_api.symlist ${MHEADERS} ${MSRCS} ${HOST_OBJS} ${ASM_OBJS} ${OBJS}
${LD} -d -r -o $*.ro ${ASM_OBJS} ${OBJS}
nm $*.ro | grep -v ' U ' | cut -d ' ' -f 3 > $*_localize_list.tmp
objcopy --localize-symbols=$*_localize_list.tmp $*.ro
objcopy --localize-symbols=$*_localize_list.tmp $*.ro
rm $*_localize_list.tmp
objcopy --globalize-symbols=ff_api.symlist $*.ro
rm -f $@

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2017 THL A29 Limited, a Tencent company.
* Copyright (C) 2017-2021 THL A29 Limited, a Tencent company.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -41,7 +41,7 @@ extern "C" {
struct linux_sockaddr {
short sa_family;
char sa_data[126];
char sa_data[14];
};
#define AF_INET6_LINUX 10
@ -64,6 +64,15 @@ int ff_sysctl(const int *name, u_int namelen, void *oldp, size_t *oldlenp,
int ff_ioctl(int fd, unsigned long request, ...);
/*
* While get sockfd from this API, and then need set it to non-blocking mode like this,
* Otherwise, sometimes the socket interface will not work properly, such as `ff_write()`
*
* int on = 1;
* ff_ioctl(sockfd, FIONBIO, &on);
*
* See also `example/main.c`
*/
int ff_socket(int domain, int type, int protocol);
int ff_setsockopt(int s, int level, int optname, const void *optval,
@ -87,6 +96,21 @@ int ff_getsockname(int s, struct linux_sockaddr *name,
ssize_t ff_read(int d, void *buf, size_t nbytes);
ssize_t ff_readv(int fd, const struct iovec *iov, int iovcnt);
/*
* Write data to the socket sendspace buf.
*
* Note:
* The `fd` parameter need set non-blocking mode in advance if F-Stack's APP.
* Otherwise if the `nbytes` parameter is greater than
* `net.inet.tcp.sendspace + net.inet.tcp.sendbuf_inc`,
* the API will return -1, but not the length that has been sent.
*
* You also can modify the value of `net.inet.tcp.sendspace`(default 16384 bytes)
* and `net.inet.tcp.sendbuf_inc`(default 16384 bytes) with `config.ini`.
* But it should be noted that not all parameters can take effect, such as 32768 and 32768.
* `ff_sysctl` can see there values while APP is running.
*/
ssize_t ff_write(int fd, const void *buf, size_t nbytes);
ssize_t ff_writev(int fd, const struct iovec *iov, int iovcnt);
@ -106,10 +130,10 @@ int ff_select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds,
int ff_poll(struct pollfd fds[], nfds_t nfds, int timeout);
int ff_kqueue(void);
int ff_kevent(int kq, const struct kevent *changelist, int nchanges,
int ff_kevent(int kq, const struct kevent *changelist, int nchanges,
struct kevent *eventlist, int nevents, const struct timespec *timeout);
int ff_kevent_do_each(int kq, const struct kevent *changelist, int nchanges,
void *eventlist, int nevents, const struct timespec *timeout,
int ff_kevent_do_each(int kq, const struct kevent *changelist, int nchanges,
void *eventlist, int nevents, const struct timespec *timeout,
void (*do_each)(void **, struct kevent *));
int ff_gettimeofday(struct timeval *tv, struct timezone *tz);

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2017 THL A29 Limited, a Tencent company.
* Copyright (C) 2017-2021 THL A29 Limited, a Tencent company.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -191,6 +191,7 @@ freebsd_conf_handler(struct ff_config *cfg, const char *section,
}
} else {
fprintf(stderr, "freebsd conf section[%s] error\n", section);
free(newconf);
return 0;
}
@ -366,6 +367,81 @@ parse_port_slave_list(struct ff_port_cfg *cfg, const char *v_str)
return res;
}
static int
vip_cfg_handler(struct ff_port_cfg *cur)
{
//vip cfg
int ret;
char *vip_addr_array[VIP_MAX_NUM];
ret = rte_strsplit(cur->vip_addr_str, strlen(cur->vip_addr_str), &vip_addr_array[0], VIP_MAX_NUM, ';');
if (ret <= 0) {
fprintf(stdout, "vip_cfg_handler nb_vip is 0, not set vip_addr or set invalid vip_addr %s\n",
cur->vip_addr_str);
return 1;
}
cur->nb_vip = ret;
cur->vip_addr_array = (char **)calloc(cur->nb_vip, sizeof(char *));
if (cur->vip_addr_array == NULL) {
fprintf(stderr, "vip_cfg_handler malloc failed\n");
goto err;
}
memcpy(cur->vip_addr_array, vip_addr_array, cur->nb_vip * sizeof(char *));
return 1;
err:
cur->nb_vip = 0;
if (cur->vip_addr_array) {
free(cur->vip_addr_array);
cur->vip_addr_array = NULL;
}
return 0;
}
#ifdef INET6
static int
vip6_cfg_handler(struct ff_port_cfg *cur)
{
//vip6 cfg
int ret;
char *vip_addr6_array[VIP_MAX_NUM];
ret = rte_strsplit(cur->vip_addr6_str, strlen(cur->vip_addr6_str),
&vip_addr6_array[0], VIP_MAX_NUM, ';');
if (ret == 0) {
fprintf(stdout, "vip6_cfg_handler nb_vip6 is 0, not set vip_addr6 or set invalid vip_addr6 %s\n",
cur->vip_addr6_str);
return 1;
}
cur->nb_vip6 = ret;
cur->vip_addr6_array = (char **) calloc(cur->nb_vip6, sizeof(char *));
if (cur->vip_addr6_array == NULL) {
fprintf(stderr, "vip6_cfg_handler malloc failed\n");
goto fail;
}
memcpy(cur->vip_addr6_array, vip_addr6_array, cur->nb_vip6 * sizeof(char *));
return 1;
fail:
cur->nb_vip6 = 0;
if (cur->vip_addr6_array) {
free(cur->vip_addr6_array);
cur->vip_addr6_array = NULL;
}
return 0;
}
#endif
static int
port_cfg_handler(struct ff_config *cfg, const char *section,
const char *name, const char *value) {
@ -414,7 +490,9 @@ port_cfg_handler(struct ff_config *cfg, const char *section,
cur->port_id = portid;
}
if (strcmp(name, "addr") == 0) {
if (strcmp(name, "if_name") == 0) {
cur->ifname = strdup(value);
} else if (strcmp(name, "addr") == 0) {
cur->addr = strdup(value);
} else if (strcmp(name, "netmask") == 0) {
cur->netmask = strdup(value);
@ -422,26 +500,33 @@ port_cfg_handler(struct ff_config *cfg, const char *section,
cur->broadcast = strdup(value);
} else if (strcmp(name, "gateway") == 0) {
cur->gateway = strdup(value);
} else if (strcmp(name, "pcap") == 0) {
cur->pcap = strdup(value);
} else if (strcmp(name, "lcore_list") == 0) {
return parse_port_lcore_list(cur, value);
} else if (strcmp(name, "slave_port_list") == 0) {
return parse_port_slave_list(cur, value);
} else if (strcmp(name, "vip_addr") == 0) {
cur->vip_addr_str = strdup(value);
if (cur->vip_addr_str) {
return vip_cfg_handler(cur);
}
} else if (strcmp(name, "vip_ifname") == 0) {
cur->vip_ifname = strdup(value);
}
#ifdef INET6
else if (0 == strcmp(name, "addr6"))
{
else if (0 == strcmp(name, "addr6")) {
cur->addr6_str = strdup(value);
}
else if (0 == strcmp(name, "prefix_len"))
{
} else if (0 == strcmp(name, "prefix_len")) {
cur->prefix_len = atoi(value);
}
else if (0 == strcmp(name, "gateway6"))
{
} else if (0 == strcmp(name, "gateway6")) {
cur->gateway6_str = strdup(value);
} else if (strcmp(name, "vip_addr6") == 0) {
cur->vip_addr6_str = strdup(value);
if (cur->vip_addr6_str) {
return vip6_cfg_handler(cur);
}
} else if (0 == strcmp(name, "vip_prefix_len")) {
cur->vip_prefix_len = atoi(value);
}
#endif
@ -571,7 +656,9 @@ ini_parse_handler(void* user, const char* section, const char* name,
printf("[%s]: %s=%s\n", section, name, value);
#define MATCH(s, n) strcmp(section, s) == 0 && strcmp(name, n) == 0
if (MATCH("dpdk", "channel")) {
if (MATCH("dpdk", "log_level")) {
pconfig->dpdk.log_level = atoi(value);
} else if (MATCH("dpdk", "channel")) {
pconfig->dpdk.nb_channel = atoi(value);
} else if (MATCH("dpdk", "memory")) {
pconfig->dpdk.memory = atoi(value);
@ -640,9 +727,9 @@ ini_parse_handler(void* user, const char* section, const char* name,
return bond_cfg_handler(pconfig, section, name, value);
} else if (strcmp(section, "pcap") == 0) {
if (strcmp(name, "snaplen") == 0) {
pconfig->pcap.snap_len = (uint16_t)atoi(value);
pconfig->pcap.snap_len = (uint16_t)atoi(value);
} else if (strcmp(name, "savelen") == 0) {
pconfig->pcap.save_len = (uint32_t)atoi(value);
pconfig->pcap.save_len = (uint32_t)atoi(value);
} else if (strcmp(name, "enable") == 0) {
pconfig->pcap.enable = (uint16_t)atoi(value);
} else if (strcmp(name, "savepath") == 0) {
@ -675,6 +762,10 @@ dpdk_args_setup(struct ff_config *cfg)
sprintf(temp, "-m%d", cfg->dpdk.memory);
dpdk_argv[n++] = strdup(temp);
}
if (cfg->dpdk.log_level) {
sprintf(temp, "--log-level=%d", cfg->dpdk.log_level);
dpdk_argv[n++] = strdup(temp);
}
if (cfg->dpdk.proc_type) {
sprintf(temp, "--proc-type=%s", cfg->dpdk.proc_type);
dpdk_argv[n++] = strdup(temp);
@ -688,8 +779,14 @@ dpdk_args_setup(struct ff_config *cfg)
dpdk_argv[n++] = strdup(temp);
}
if (cfg->dpdk.pci_whitelist) {
sprintf(temp, "--pci-whitelist=%s", cfg->dpdk.pci_whitelist);
dpdk_argv[n++] = strdup(temp);
char* token;
char* rest = cfg->dpdk.pci_whitelist;
while ((token = strtok_r(rest, ",", &rest))){
sprintf(temp, "--pci-whitelist=%s", token);
dpdk_argv[n++] = strdup(temp);
}
}
if (cfg->dpdk.nb_vdev) {
@ -785,6 +882,7 @@ dpdk_args_setup(struct ff_config *cfg)
for (i=0; i<n; i++)
printf("%s ", dpdk_argv[i]);
printf("\n");
return n;
}
@ -948,7 +1046,16 @@ ff_load_config(int argc, char *const argv[])
ret = ini_parse(ff_global_cfg.filename, ini_parse_handler,
&ff_global_cfg);
if (ret != 0) {
printf("parse %s failed on line %d\n", ff_global_cfg.filename, ret);
switch(ret) {
case -1:
printf("failed to open file %s\n", ff_global_cfg.filename);
break;
case -2:
printf("failed to allocate memory for config parsing\n");
break;
default:
printf("parse %s failed on line %d\n", ff_global_cfg.filename, ret);
}
return -1;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2017 THL A29 Limited, a Tencent company.
* Copyright (C) 2017-2021 THL A29 Limited, a Tencent company.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -44,6 +44,8 @@ extern char *dpdk_argv[DPDK_CONFIG_NUM + 1];
#define MAX_PKT_BURST 32
#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
#define VIP_MAX_NUM 64
struct ff_hw_features {
uint8_t rx_csum;
uint8_t rx_lro;
@ -54,6 +56,7 @@ struct ff_hw_features {
struct ff_port_cfg {
char *name;
char *ifname;
uint8_t port_id;
uint8_t mac[6];
struct ff_hw_features hw_features;
@ -62,15 +65,21 @@ struct ff_port_cfg {
char *broadcast;
char *gateway;
#ifdef INET6
char *addr6_str;
char *gateway6_str;
uint8_t prefix_len;
#endif
char *vip_ifname;
char *vip_addr_str;
char **vip_addr_array;
uint32_t nb_vip;
char *pcap;
uint16_t snaplen;
uint32_t savelen;
#ifdef INET6
char *addr6_str;
char *gateway6_str;
uint8_t prefix_len;
char *vip_addr6_str;
char **vip_addr6_array;
uint32_t nb_vip6;
uint8_t vip_prefix_len;
#endif
int nb_lcores;
int nb_slaves;
@ -155,6 +164,9 @@ struct ff_config {
int nb_ports;
uint16_t max_portid;
uint16_t *portid_list;
// load dpdk log level
uint16_t log_level;
// MAP(portid => struct ff_port_cfg*)
struct ff_port_cfg *port_cfgs;
struct ff_vdev_cfg *vdev_cfgs;

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2017 THL A29 Limited, a Tencent company.
* Copyright (C) 2017-2021 THL A29 Limited, a Tencent company.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -78,6 +78,7 @@ static int numa_on;
static unsigned idle_sleep;
static unsigned pkt_tx_delay;
static uint64_t usr_cb_tsc;
static struct rte_timer freebsd_clock;
@ -357,7 +358,7 @@ init_mem_pool(void)
} else {
printf("create mbuf pool on socket %d\n", socketid);
}
#ifdef FF_USE_PAGE_ARRAY
nb_mbuf = RTE_ALIGN_CEIL (
nb_ports*nb_lcores*MAX_PKT_BURST +
@ -704,6 +705,9 @@ init_port_start(void)
port_conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_TSO;
pconf->hw_features.tx_tso = 1;
}
else {
printf("TSO is not supported\n");
}
} else {
printf("TSO is disabled\n");
}
@ -736,7 +740,7 @@ init_port_start(void)
uint16_t q;
for (q = 0; q < nb_queues; q++) {
if (numa_on) {
uint16_t lcore_id = lcore_conf.port_cfgs[port_id].lcore_list[q];
uint16_t lcore_id = lcore_conf.port_cfgs[u_port_id].lcore_list[q];
socketid = rte_lcore_to_socket_id(lcore_id);
}
mbuf_pool = pktmbuf_pool[socketid];
@ -792,8 +796,8 @@ init_port_start(void)
if (ret < 0) {
return ret;
}
//RSS reta update will failed when enable flow isolate
#ifndef FF_FLOW_ISOLATE
//RSS reta update will failed when enable flow isolate
#ifndef FF_FLOW_ISOLATE
if (nb_queues > 1) {
/* set HW rss hash function to Toeplitz. */
if (!rte_eth_dev_filter_supported(port_id, RTE_ETH_FILTER_HASH)) {
@ -810,7 +814,7 @@ init_port_start(void)
set_rss_table(port_id, dev_info.reta_size, nb_queues);
}
#endif
#endif
/* Enable RX in promiscuous mode for the Ethernet device. */
if (ff_global_cfg.dpdk.promiscuous) {
@ -848,7 +852,7 @@ init_clock(void)
return 0;
}
#ifdef FF_FLOW_ISOLATE
#if defined(FF_FLOW_ISOLATE) || defined(FF_FDIR)
/** Print a message out of a flow error. */
static int
port_flow_complain(struct rte_flow_error *error)
@ -875,7 +879,7 @@ port_flow_complain(struct rte_flow_error *error)
const char *errstr;
char buf[32];
int err = rte_errno;
if ((unsigned int)error->type >= RTE_DIM(errstrlist) ||
!errstrlist[error->type])
errstr = "unknown type";
@ -889,12 +893,15 @@ port_flow_complain(struct rte_flow_error *error)
rte_strerror(err));
return -err;
}
#endif
#ifdef FF_FLOW_ISOLATE
static int
port_flow_isolate(uint16_t port_id, int set)
{
struct rte_flow_error error;
/* Poisoning to make sure PMDs update it in case of error. */
memset(&error, 0x66, sizeof(error));
if (rte_flow_isolate(port_id, set, &error))
@ -1055,6 +1062,110 @@ init_flow(uint16_t port_id, uint16_t tcp_port) {
#endif
#ifdef FF_FDIR
/*
* Flow director allows the traffic to specific port to be processed on the
* specific queue. Unlike FF_FLOW_ISOLATE, the FF_FDIR implementation uses
* general flow rule so that most FDIR supported NIC will support. The best
* using case of FDIR is (but not limited to), using multiple processes to
* listen on different ports.
*
* This function can be called either in FSTACK or in end-application.
*
* Example:
* Given 2 fstack instances A and B. Instance A listens on port 80, and
* instance B listens on port 81. We want to process the traffic to port 80
* on rx queue 0, and the traffic to port 81 on rx queue 1.
* // port 80 rx queue 0
* ret = fdir_add_tcp_flow(port_id, 0, FF_FLOW_INGRESS, 0, 80);
* // port 81 rx queue 1
* ret = fdir_add_tcp_flow(port_id, 1, FF_FLOW_INGRESS, 0, 81);
*/
#define FF_FLOW_EGRESS 1
#define FF_FLOW_INGRESS 2
/**
* Create a flow rule that moves packets with matching src and dest tcp port
* to the target queue.
*
* This function uses general flow rules and doesn't rely on the flow_isolation
* that not all the FDIR capable NIC support.
*
* @param port_id
* The selected port.
* @param queue
* The target queue.
* @param dir
* The direction of the traffic.
* 1 for egress, 2 for ingress and sum(1+2) for both.
* @param tcp_sport
* The src tcp port to match.
* @param tcp_dport
* The dest tcp port to match.
*
*/
static int
fdir_add_tcp_flow(uint16_t port_id, uint16_t queue, uint16_t dir,
uint16_t tcp_sport, uint16_t tcp_dport)
{
struct rte_flow_attr attr;
struct rte_flow_item flow_pattern[4];
struct rte_flow_action flow_action[2];
struct rte_flow *flow = NULL;
struct rte_flow_action_queue flow_action_queue = { .index = queue };
struct rte_flow_item_tcp tcp_spec;
struct rte_flow_item_tcp tcp_mask;
struct rte_flow_error rfe;
int res;
memset(flow_pattern, 0, sizeof(flow_pattern));
memset(flow_action, 0, sizeof(flow_action));
/*
* set the rule attribute.
*/
memset(&attr, 0, sizeof(struct rte_flow_attr));
attr.ingress = ((dir & FF_FLOW_INGRESS) > 0);
attr.egress = ((dir & FF_FLOW_EGRESS) > 0);
/*
* create the action sequence.
* one action only, move packet to queue
*/
flow_action[0].type = RTE_FLOW_ACTION_TYPE_QUEUE;
flow_action[0].conf = &flow_action_queue;
flow_action[1].type = RTE_FLOW_ACTION_TYPE_END;
flow_pattern[0].type = RTE_FLOW_ITEM_TYPE_ETH;
flow_pattern[1].type = RTE_FLOW_ITEM_TYPE_IPV4;
/*
* set the third level of the pattern (TCP).
*/
memset(&tcp_spec, 0, sizeof(struct rte_flow_item_tcp));
memset(&tcp_mask, 0, sizeof(struct rte_flow_item_tcp));
tcp_spec.hdr.src_port = htons(tcp_sport);
tcp_mask.hdr.src_port = (tcp_sport == 0 ? 0: 0xffff);
tcp_spec.hdr.dst_port = htons(tcp_dport);
tcp_mask.hdr.dst_port = (tcp_dport == 0 ? 0: 0xffff);
flow_pattern[2].type = RTE_FLOW_ITEM_TYPE_TCP;
flow_pattern[2].spec = &tcp_spec;
flow_pattern[2].mask = &tcp_mask;
flow_pattern[3].type = RTE_FLOW_ITEM_TYPE_END;
res = rte_flow_validate(port_id, &attr, flow_pattern, flow_action, &rfe);
if (res)
return (1);
flow = rte_flow_create(port_id, &attr, flow_pattern, flow_action, &rfe);
if (!flow)
return port_flow_complain(&rfe);
return (0);
}
#endif
int
ff_dpdk_init(int argc, char **argv)
{
@ -1097,8 +1208,8 @@ ff_dpdk_init(int argc, char **argv)
#ifdef FF_USE_PAGE_ARRAY
ff_mmap_init();
#endif
#ifdef FF_FLOW_ISOLATE
#ifdef FF_FLOW_ISOLATE
// run once in primary process
if (0 == lcore_conf.tx_queue_id[0]){
ret = port_flow_isolate(0, 1);
@ -1106,7 +1217,7 @@ ff_dpdk_init(int argc, char **argv)
rte_exit(EXIT_FAILURE, "init_port_isolate failed\n");
}
#endif
ret = init_port_start();
if (ret < 0) {
rte_exit(EXIT_FAILURE, "init_port_start failed\n");
@ -1114,8 +1225,8 @@ ff_dpdk_init(int argc, char **argv)
init_clock();
#ifdef FF_FLOW_ISOLATE
//Only give a example usage: port_id=0, tcp_port= 80.
//Recommend:
//Only give a example usage: port_id=0, tcp_port= 80.
//Recommend:
//1. init_flow should replace `set_rss_table` in `init_port_start` loop, This can set all NIC's port_id_list instead only 0 device(port_id).
//2. using config options `tcp_port` replace magic number of 80
ret = init_flow(0, 80);
@ -1123,6 +1234,16 @@ ff_dpdk_init(int argc, char **argv)
rte_exit(EXIT_FAILURE, "init_port_flow failed\n");
}
#endif
#ifdef FF_FDIR
/*
* Refer function header section for usage.
*/
ret = fdir_add_tcp_flow(0, 0, FF_FLOW_INGRESS, 0, 80);
if (ret)
rte_exit(EXIT_FAILURE, "fdir_add_tcp_flow failed\n");
#endif
return 0;
}
@ -1192,7 +1313,8 @@ protocol_filter(const void *data, uint16_t len)
if(ether_type == RTE_ETHER_TYPE_ARP)
return FILTER_ARP;
#ifdef INET6
#if (!defined(__FreeBSD__) && defined(INET6) ) || \
( defined(__FreeBSD__) && defined(INET6) && defined(FF_KNI))
if (ether_type == RTE_ETHER_TYPE_IPV6) {
return ff_kni_proto_filter(data,
len, ether_type);
@ -1313,12 +1435,14 @@ process_packets(uint16_t port_id, uint16_t queue_id, struct rte_mbuf **bufs,
uint16_t len = rte_pktmbuf_data_len(rtem);
if (!pkts_from_ring) {
ff_traffic.rx_packets++;
ff_traffic.rx_bytes += len;
ff_traffic.rx_packets += rtem->nb_segs;
ff_traffic.rx_bytes += rte_pktmbuf_pkt_len(rtem);
}
if (!pkts_from_ring && packet_dispatcher) {
uint64_t cur_tsc = rte_rdtsc();
int ret = (*packet_dispatcher)(data, &len, queue_id, nb_queues);
usr_cb_tsc += rte_rdtsc() - cur_tsc;
if (ret == FF_DISPATCH_RESPONSE) {
rte_pktmbuf_pkt_len(rtem) = rte_pktmbuf_data_len(rtem) = len;
/*
@ -1423,7 +1547,7 @@ process_dispatch_ring(uint16_t port_id, uint16_t queue_id,
process_packets(port_id, queue_id, pkts_burst, nb_rb, ctx, 1);
}
return 0;
return nb_rb;
}
static inline void
@ -1521,7 +1645,7 @@ handle_ipfw_msg(struct ff_msg *msg)
case FF_IPFW_SET:
ret = ff_setsockopt_freebsd(fd, msg->ipfw.level,
msg->ipfw.optname, msg->ipfw.optval,
*(msg->ipfw.optlen));
*(msg->ipfw.optlen));
break;
default:
ret = -1;
@ -1660,11 +1784,11 @@ send_burst(struct lcore_conf *qconf, uint16_t n, uint8_t port)
if (unlikely(ff_global_cfg.pcap.enable)) {
uint16_t i;
for (i = 0; i < n; i++) {
ff_dump_packets( ff_global_cfg.pcap.save_path, m_table[i],
ff_dump_packets( ff_global_cfg.pcap.save_path, m_table[i],
ff_global_cfg.pcap.snap_len, ff_global_cfg.pcap.save_len);
}
}
ret = rte_eth_tx_burst(port, queueid, m_table, n);
ff_traffic.tx_packets += ret;
uint16_t i;
@ -1674,7 +1798,7 @@ send_burst(struct lcore_conf *qconf, uint16_t n, uint8_t port)
if (qconf->tx_mbufs[port].bsd_m_table[i])
ff_enq_tx_bsdmbuf(port, qconf->tx_mbufs[port].bsd_m_table[i], m_table[i]->nb_segs);
#endif
}
}
if (unlikely(ret < n)) {
do {
rte_pktmbuf_free(m_table[ret]);
@ -1716,7 +1840,7 @@ ff_dpdk_if_send(struct ff_dpdk_if_context *ctx, void *m,
#ifdef FF_USE_PAGE_ARRAY
struct lcore_conf *qconf = &lcore_conf;
int len = 0;
len = ff_if_send_onepkt(ctx, m,total);
if (unlikely(len == MAX_PKT_BURST)) {
send_burst(qconf, MAX_PKT_BURST, ctx->port_id);
@ -1868,6 +1992,7 @@ main_loop(void *arg)
idle = 1;
sys_tsc = 0;
usr_tsc = 0;
usr_cb_tsc = 0;
/*
* TX burst queue drain
@ -1904,7 +2029,7 @@ main_loop(void *arg)
}
#endif
process_dispatch_ring(port_id, queue_id, pkts_burst, ctx);
idle &= !process_dispatch_ring(port_id, queue_id, pkts_burst, ctx);
nb_rx = rte_eth_rx_burst(port_id, queue_id, pkts_burst,
MAX_PKT_BURST);
@ -1949,12 +2074,13 @@ main_loop(void *arg)
end_tsc = idle_sleep_tsc;
}
usr_tsc = usr_cb_tsc;
if (usch_tsc == cur_tsc) {
usr_tsc = idle_sleep_tsc - div_tsc;
usr_tsc += idle_sleep_tsc - div_tsc;
}
if (!idle) {
sys_tsc = div_tsc - cur_tsc;
sys_tsc = div_tsc - cur_tsc - usr_cb_tsc;
ff_top_status.sys_tsc += sys_tsc;
}

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2010 Kip Macy. All rights reserved.
* Copyright (C) 2017 THL A29 Limited, a Tencent company.
* Copyright (C) 2017-2021 THL A29 Limited, a Tencent company.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -87,6 +87,10 @@
#define LINUX_IP_TTL 2
#define LINUX_IP_HDRINCL 3
#define LINUX_IP_OPTIONS 4
#define LINUX_IP_RECVTTL 12
#define LINUX_IP_RECVTOS 13
#define LINUX_IP_TRANSPARENT 19
#define LINUX_IP_MINTTL 21
#define LINUX_IP_MULTICAST_IF 32
#define LINUX_IP_MULTICAST_TTL 33
@ -94,6 +98,10 @@
#define LINUX_IP_ADD_MEMBERSHIP 35
#define LINUX_IP_DROP_MEMBERSHIP 36
#define LINUX_IPV6_V6ONLY 26
#define LINUX_IPV6_RECVPKTINFO 49
#define LINUX_IPV6_TRANSPARENT 75
#define LINUX_TCP_NODELAY 1
#define LINUX_TCP_MAXSEG 2
#define LINUX_TCP_KEEPIDLE 4
@ -194,6 +202,44 @@ struct linux_msghdr {
/* msghdr define end */
/* cmsghdr define start */
struct linux_cmsghdr
{
size_t cmsg_len; /* Length of data in cmsg_data plus length
of cmsghdr structure.
!! The type should be socklen_t but the
definition of the kernel is incompatible
with this. */
int cmsg_level; /* Originating protocol. */
int cmsg_type; /* Protocol specific type. */
};
/*
* LINUX_CMSG_XXXX has the same effect as FreeBSD's CMSG_XXXX,
* because aligned to 8 bytes, but still redefine them.
*/
#define LINUX_CMSG_DATA(cmsg) ((unsigned char *)(cmsg) + \
_ALIGN(sizeof(struct linux_cmsghdr)))
#define LINUX_CMSG_SPACE(l) (_ALIGN(sizeof(struct linux_cmsghdr)) + _ALIGN(l))
#define LINUX_CMSG_LEN(l) (_ALIGN(sizeof(struct linux_cmsghdr)) + (l))
#define LINUX_CMSG_FIRSTHDR(mhdr) \
((mhdr)->msg_controllen >= sizeof(struct linux_cmsghdr) ? \
(struct linux_cmsghdr *)(mhdr)->msg_control : \
(struct linux_cmsghdr *)0)
#define LINUX_CMSG_NXTHDR(mhdr, cmsg) \
((char *)(cmsg) == (char *)0 ? LINUX_CMSG_FIRSTHDR(mhdr) : \
((char *)(cmsg) + _ALIGN(((struct linux_cmsghdr *)(cmsg))->cmsg_len) + \
_ALIGN(sizeof(struct linux_cmsghdr)) > \
(char *)(mhdr)->msg_control + (mhdr)->msg_controllen) ? \
(struct linux_cmsghdr *)0 : \
(struct linux_cmsghdr *)(void *)((char *)(cmsg) + \
_ALIGN(((struct linux_cmsghdr *)(cmsg))->cmsg_len)))
/* cmsghdr define end */
extern int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
static long
@ -377,7 +423,30 @@ ip_opt_convert(int optname)
case LINUX_IP_ADD_MEMBERSHIP:
return IP_ADD_MEMBERSHIP;
case LINUX_IP_DROP_MEMBERSHIP:
return IP_DROP_MEMBERSHIP;
return IP_DROP_MEMBERSHIP;
case LINUX_IP_RECVTTL:
return IP_RECVTTL;
case LINUX_IP_RECVTOS:
return IP_RECVTOS;
case LINUX_IP_TRANSPARENT:
return IP_BINDANY;
case LINUX_IP_MINTTL:
return IP_MINTTL;
default:
return optname;
}
}
static int
ip6_opt_convert(int optname)
{
switch(optname) {
case LINUX_IPV6_V6ONLY:
return IPV6_V6ONLY;
case LINUX_IPV6_RECVPKTINFO:
return IPV6_RECVPKTINFO;
case LINUX_IPV6_TRANSPARENT:
return IPV6_BINDANY;
default:
return optname;
}
@ -414,6 +483,8 @@ linux2freebsd_opt(int level, int optname)
return so_opt_convert(optname);
case IPPROTO_IP:
return ip_opt_convert(optname);
case IPPROTO_IPV6:
return ip6_opt_convert(optname);
case IPPROTO_TCP:
return tcp_opt_convert(optname);
default:
@ -425,7 +496,7 @@ static void
linux2freebsd_sockaddr(const struct linux_sockaddr *linux,
socklen_t addrlen, struct sockaddr *freebsd)
{
if (linux == NULL) {
if (linux == NULL || freebsd == NULL) {
return;
}
@ -433,20 +504,201 @@ linux2freebsd_sockaddr(const struct linux_sockaddr *linux,
freebsd->sa_family = linux->sa_family == LINUX_AF_INET6 ? AF_INET6 : linux->sa_family;
freebsd->sa_len = addrlen;
bcopy(linux->sa_data, freebsd->sa_data, addrlen - sizeof(linux->sa_family));
if (linux->sa_data != freebsd->sa_data) {
bcopy(linux->sa_data, freebsd->sa_data, addrlen - sizeof(linux->sa_family));
}
}
static void
freebsd2linux_sockaddr(struct linux_sockaddr *linux,
struct sockaddr *freebsd)
{
if (linux == NULL) {
if (linux == NULL || freebsd == NULL) {
return;
}
/* #linux and #freebsd may point to the same address */
if (linux->sa_data != freebsd->sa_data) {
bcopy(freebsd->sa_data, linux->sa_data, freebsd->sa_len - sizeof(linux->sa_family));
}
linux->sa_family = freebsd->sa_family == AF_INET6 ? LINUX_AF_INET6 : freebsd->sa_family;
}
bcopy(freebsd->sa_data, linux->sa_data, freebsd->sa_len - sizeof(linux->sa_family));
static inline int
freebsd2linux_cmsghdr(struct linux_msghdr *linux_msg, const struct msghdr *freebsd_msg)
{
struct cmsghdr *freebsd_cmsg = CMSG_FIRSTHDR(freebsd_msg);
struct linux_cmsghdr *linux_cmsg = LINUX_CMSG_FIRSTHDR(linux_msg);
while (freebsd_cmsg && linux_cmsg) {
unsigned char *freebsd_optval = CMSG_DATA(freebsd_cmsg);
unsigned char *linux_optval = LINUX_CMSG_DATA(linux_cmsg);
/*
* The address of linux_cmsg and freebsd_cmsg coincides while recvmsg,
* but the position of the variable pointer is different,
* and the assignment must be reversed.
*
* Although sizeof(struct linux_msghdr) and sizeof(struct msghdr) have different lengths,
* but cmsg_data both skip the same 16 bytesboth aligned to 8 bytes.
*/
linux_cmsg->cmsg_type = freebsd_cmsg->cmsg_type;
linux_cmsg->cmsg_level = freebsd_cmsg->cmsg_level;
linux_cmsg->cmsg_len = LINUX_CMSG_LEN(freebsd_cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)));
/*
* The freebsd_msg's cmsg_level and cmsg_type has been moddied while recvmsg,
* must use linux_cmsg to judge and calculate data length.
* And don't copy other the bytes that used aligned.
*/
switch (linux_cmsg->cmsg_level) {
case IPPROTO_IP:
switch (linux_cmsg->cmsg_type) {
case IP_RECVTOS:
linux_cmsg->cmsg_type = LINUX_IP_TOS;
*linux_optval = *freebsd_optval;
break;
case IP_RECVTTL:
linux_cmsg->cmsg_type = LINUX_IP_TTL;
*linux_optval = *freebsd_optval;
break;
/*case XXXX:
break;*/
default:
memcpy(linux_optval, freebsd_optval, linux_cmsg->cmsg_len - sizeof(struct linux_cmsghdr));
break;
}
break;
default:
memcpy(linux_optval, freebsd_optval, linux_cmsg->cmsg_len - sizeof(struct linux_cmsghdr));
break;
}
linux_cmsg = LINUX_CMSG_NXTHDR(linux_msg, linux_cmsg);
freebsd_cmsg = CMSG_NXTHDR(freebsd_msg, freebsd_cmsg);
}
return 0;
}
static inline int
linux2freebsd_cmsg(const struct linux_msghdr *linux_msg, struct msghdr *freebsd_msg)
{
struct cmsghdr *freebsd_cmsg = CMSG_FIRSTHDR(freebsd_msg);
struct linux_cmsghdr *linux_cmsg = LINUX_CMSG_FIRSTHDR(linux_msg);
while (freebsd_cmsg && linux_cmsg) {
unsigned char *freebsd_optval = CMSG_DATA(freebsd_cmsg);
unsigned char *linux_optval = LINUX_CMSG_DATA(linux_cmsg);
freebsd_cmsg->cmsg_type = linux_cmsg->cmsg_type;
freebsd_cmsg->cmsg_level = linux_cmsg->cmsg_level;
freebsd_cmsg->cmsg_len = CMSG_LEN(linux_cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct linux_cmsghdr)));
switch (linux_cmsg->cmsg_level) {
case IPPROTO_IP:
switch (linux_cmsg->cmsg_type) {
case LINUX_IP_TOS:
freebsd_cmsg->cmsg_type = IP_TOS;
freebsd_cmsg->cmsg_len = CMSG_LEN(sizeof(char));
if (linux_cmsg->cmsg_len == LINUX_CMSG_LEN(sizeof(int))) {
*freebsd_optval = *(int *)linux_optval;
} else if (linux_cmsg->cmsg_len == LINUX_CMSG_LEN(sizeof(char))) {
*freebsd_optval = *linux_optval;
}
break;
case LINUX_IP_TTL:
freebsd_cmsg->cmsg_type = IP_TTL;
freebsd_cmsg->cmsg_len = CMSG_LEN(sizeof(char));
*freebsd_optval = *(int *)linux_optval;
break;
/*case XXXX:
break;*/
default:
memcpy(freebsd_optval, linux_optval, linux_cmsg->cmsg_len - sizeof(struct linux_cmsghdr));
break;
}
break;
default:
memcpy(freebsd_optval, linux_optval, linux_cmsg->cmsg_len - sizeof(struct linux_cmsghdr));
break;
}
linux_cmsg = LINUX_CMSG_NXTHDR(linux_msg, linux_cmsg);
freebsd_cmsg = CMSG_NXTHDR(freebsd_msg, freebsd_cmsg);
}
return 0;
}
/*
* While sendmsg, need convert msg_name and msg_control from Linux to FreeBSD.
* While recvmsg, need convert msg_name and msg_control from FreeBSD to Linux.
*/
static int
freebsd2linux_msghdr(struct linux_msghdr *linux_msg, struct msghdr *freebsd_msg, int send_flag)
{
if (linux_msg == NULL || freebsd_msg == NULL) {
return -1;
}
if (linux_msg->msg_name && freebsd_msg->msg_name && !send_flag) {
linux_msg->msg_name = freebsd_msg->msg_name;
freebsd2linux_sockaddr(linux_msg->msg_name, freebsd_msg->msg_name);
linux_msg->msg_namelen = freebsd_msg->msg_namelen;
}
linux_msg->msg_iov = freebsd_msg->msg_iov;
linux_msg->msg_iovlen = freebsd_msg->msg_iovlen;
if(freebsd_msg->msg_control && linux_msg->msg_control && !send_flag) {
freebsd2linux_cmsghdr(linux_msg, freebsd_msg);
linux_msg->msg_controllen = freebsd_msg->msg_controllen;
}
linux_msg->msg_flags = freebsd_msg->msg_flags;
return 0;
}
static int
linux2freebsd_msghdr(const struct linux_msghdr *linux_msg, struct msghdr *freebsd_msg, int send_flag)
{
int ret = 0;
if (linux_msg == NULL || freebsd_msg == NULL) {
return -1;;
}
if (linux_msg->msg_name && freebsd_msg->msg_name && send_flag) {
linux2freebsd_sockaddr(linux_msg->msg_name, linux_msg->msg_namelen, freebsd_msg->msg_name);
} else {
freebsd_msg->msg_name = linux_msg->msg_name;
}
freebsd_msg->msg_namelen = linux_msg->msg_namelen;
freebsd_msg->msg_iov = linux_msg->msg_iov;
freebsd_msg->msg_iovlen = linux_msg->msg_iovlen;
freebsd_msg->msg_controllen = linux_msg->msg_controllen;
if (linux_msg->msg_control && send_flag) {
ret = linux2freebsd_cmsg(linux_msg, freebsd_msg);
if(ret < 0) {
return ret;
}
} else {
freebsd_msg->msg_control = linux_msg->msg_control;
}
freebsd_msg->msg_flags = linux_msg->msg_flags;
return 0;
}
int
@ -604,7 +856,7 @@ ff_close(int fd)
{
int rc;
if ((rc = kern_close(curthread, fd)))
if ((rc = kern_close(curthread, fd)))
goto kern_fail;
return (rc);
@ -619,7 +871,7 @@ ff_read(int fd, void *buf, size_t nbytes)
struct uio auio;
struct iovec aiov;
int rc;
if (nbytes > INT_MAX) {
rc = EINVAL;
goto kern_fail;
@ -686,7 +938,7 @@ ff_write(int fd, const void *buf, size_t nbytes)
if ((rc = kern_writev(curthread, fd, &auio)))
goto kern_fail;
rc = curthread->td_retval[0];
return (rc);
kern_fail:
ff_os_errno(rc);
@ -709,7 +961,7 @@ ff_writev(int fd, const struct iovec *iov, int iovcnt)
if ((rc = kern_writev(curthread, fd, &auio)))
goto kern_fail;
rc = curthread->td_retval[0];
return (rc);
kern_fail:
ff_os_errno(rc);
@ -760,25 +1012,39 @@ kern_fail:
ssize_t
ff_sendmsg(int s, const struct msghdr *msg, int flags)
{
int rc;
int rc, ret;
struct sockaddr_storage freebsd_sa;
void *linux_sa = msg->msg_name;
struct msghdr freebsd_msg;
struct cmsghdr *freebsd_cmsg = NULL;
if (linux_sa != NULL) {
linux2freebsd_sockaddr(linux_sa,
sizeof(struct linux_sockaddr), (struct sockaddr *)&freebsd_sa);
__DECONST(struct msghdr *, msg)->msg_name = &freebsd_sa;
freebsd_msg.msg_name = &freebsd_sa;
if ((__DECONST(struct linux_msghdr *, msg))->msg_control) {
freebsd_cmsg = malloc((__DECONST(struct linux_msghdr *, msg))->msg_controllen, NULL, 0);
if (freebsd_cmsg == NULL) {
rc = ENOMEM;
goto kern_fail;
}
}
freebsd_msg.msg_control = freebsd_cmsg;
ret = linux2freebsd_msghdr((const struct linux_msghdr *)msg, &freebsd_msg, 1);
if (ret < 0) {
rc = EINVAL;
goto kern_fail;
}
rc = sendit(curthread, s, __DECONST(struct msghdr *, msg), flags);
__DECONST(struct msghdr *, msg)->msg_name = linux_sa;
rc = sendit(curthread, s, &freebsd_msg, flags);
if (rc)
goto kern_fail;
rc = curthread->td_retval[0];
freebsd2linux_msghdr(__DECONST(struct linux_msghdr *, msg), &freebsd_msg, 1);
if (freebsd_cmsg) {
free(freebsd_cmsg, NULL);
}
return (rc);
kern_fail:
ff_os_errno(rc);
@ -819,7 +1085,7 @@ ff_recvfrom(int s, void *buf, size_t len, int flags,
if (fromlen != NULL)
*fromlen = msg.msg_namelen;
if (from)
if (from && msg.msg_namelen != 0)
freebsd2linux_sockaddr(from, (struct sockaddr *)&bsdaddr);
return (rc);
@ -828,27 +1094,25 @@ kern_fail:
return (-1);
}
/*
* It is considered here that the upper 4 bytes of
* msg->iovlen and msg->msg_controllen in linux_msghdr are 0.
*/
ssize_t
ff_recvmsg(int s, struct msghdr *msg, int flags)
{
int rc;
struct linux_msghdr *linux_msg = (struct linux_msghdr *)msg;
int rc, ret;
struct msghdr freebsd_msg;
msg->msg_flags = flags;
ret = linux2freebsd_msghdr((struct linux_msghdr *)msg, &freebsd_msg, 0);
if (ret < 0) {
rc = EINVAL;
goto kern_fail;
}
freebsd_msg.msg_flags = flags;
if ((rc = kern_recvit(curthread, s, msg, UIO_SYSSPACE, NULL))) {
msg->msg_flags = 0;
if ((rc = kern_recvit(curthread, s, &freebsd_msg, UIO_SYSSPACE, NULL))) {
goto kern_fail;
}
rc = curthread->td_retval[0];
freebsd2linux_sockaddr(linux_msg->msg_name, msg->msg_name);
linux_msg->msg_flags = msg->msg_flags;
msg->msg_flags = 0;
freebsd2linux_msghdr((struct linux_msghdr *)msg, &freebsd_msg, 0);
return (rc);
kern_fail:
@ -866,7 +1130,7 @@ ff_fcntl(int fd, int cmd, ...)
va_start(ap, cmd);
argp = va_arg(ap, uintptr_t);
va_end(ap);
va_end(ap);
if ((rc = kern_fcntl(curthread, fd, cmd, argp)))
goto kern_fail;
@ -897,11 +1161,11 @@ ff_accept(int s, struct linux_sockaddr * addr,
if (addrlen)
*addrlen = pf->sa_len;
if(pf != NULL)
free(pf, M_SONAME);
return (rc);
kern_fail:
if(pf != NULL)
free(pf, M_SONAME);
@ -929,7 +1193,7 @@ kern_fail:
int
ff_bind(int s, const struct linux_sockaddr *addr, socklen_t addrlen)
{
int rc;
int rc;
struct sockaddr_storage bsdaddr;
linux2freebsd_sockaddr(addr, addrlen, (struct sockaddr *)&bsdaddr);
@ -974,7 +1238,7 @@ ff_getpeername(int s, struct linux_sockaddr * name,
if(pf != NULL)
free(pf, M_SONAME);
return (rc);
kern_fail:
if(pf != NULL)
free(pf, M_SONAME);
@ -1006,7 +1270,7 @@ kern_fail:
return (-1);
}
int
int
ff_shutdown(int s, int how)
{
int rc;
@ -1031,7 +1295,7 @@ ff_sysctl(const int *name, u_int namelen, void *oldp, size_t *oldlenp,
int rc;
size_t retval;
rc = userland_sysctl(curthread, __DECONST(int *, name), namelen, oldp, oldlenp,
rc = userland_sysctl(curthread, __DECONST(int *, name), namelen, oldp, oldlenp,
1, __DECONST(void *, newp), newlen, &retval, 0);
if (rc)
goto kern_fail;
@ -1143,8 +1407,8 @@ kevent_copyin(void *arg, struct kevent *kevp, int count)
}
int
ff_kevent_do_each(int kq, const struct kevent *changelist, int nchanges,
void *eventlist, int nevents, const struct timespec *timeout,
ff_kevent_do_each(int kq, const struct kevent *changelist, int nchanges,
void *eventlist, int nevents, const struct timespec *timeout,
void (*do_each)(void **, struct kevent *))
{
int rc;
@ -1168,7 +1432,7 @@ ff_kevent_do_each(int kq, const struct kevent *changelist, int nchanges,
kevent_copyin
};
if ((rc = kern_kevent(curthread, kq, nchanges, nevents, &k_ops,
if ((rc = kern_kevent(curthread, kq, nchanges, nevents, &k_ops,
&ts)))
goto kern_fail;
@ -1180,7 +1444,7 @@ kern_fail:
}
int
ff_kevent(int kq, const struct kevent *changelist, int nchanges,
ff_kevent(int kq, const struct kevent *changelist, int nchanges,
struct kevent *eventlist, int nevents, const struct timespec *timeout)
{
return ff_kevent_do_each(kq, changelist, nchanges, eventlist, nevents, timeout, NULL);

View File

@ -69,10 +69,17 @@ struct ff_veth_softc {
in_addr_t broadcast;
in_addr_t gateway;
uint8_t nb_vip;
in_addr_t vip[VIP_MAX_NUM];
#ifdef INET6
struct in6_addr ip6;
struct in6_addr gateway6;
uint8_t prefix_length;
uint8_t nb_vip6;
uint8_t vip_prefix_length;
struct in6_addr vip6[VIP_MAX_NUM];
#endif /* INET6 */
struct ff_dpdk_if_context *host_ctx;
@ -81,12 +88,26 @@ struct ff_veth_softc {
static int
ff_veth_config(struct ff_veth_softc *sc, struct ff_port_cfg *cfg)
{
int i, j;
memcpy(sc->mac, cfg->mac, ETHER_ADDR_LEN);
inet_pton(AF_INET, cfg->addr, &sc->ip);
inet_pton(AF_INET, cfg->netmask, &sc->netmask);
inet_pton(AF_INET, cfg->broadcast, &sc->broadcast);
inet_pton(AF_INET, cfg->gateway, &sc->gateway);
if (cfg->nb_vip) {
for (i = 0, j = 0; i < cfg->nb_vip; ++i) {
if (inet_pton(AF_INET, cfg->vip_addr_array[i], &sc->vip[j])) {
j++;
} else {
printf("ff_veth_config inet_pton vip %s failed.\n", cfg->vip_addr_array[i]);
}
}
sc->nb_vip = j;
}
#ifdef INET6
if (cfg->addr6_str) {
inet_pton(AF_INET6_LINUX, cfg->addr6_str, &sc->ip6);
@ -103,6 +124,19 @@ ff_veth_config(struct ff_veth_softc *sc, struct ff_port_cfg *cfg)
} else {
printf("%s: No addr6 config found.\n", sc->host_ifname);
}
if (cfg->nb_vip6) {
for (i = 0, j = 0; i < cfg->nb_vip6; ++i) {
if (inet_pton(AF_INET6_LINUX, cfg->vip_addr6_array[i], &sc->vip6[j])) {
j++;
} else {
printf("ff_veth_config inet_pton vip6 %s failed.\n", cfg->vip_addr6_array[i]);
}
}
sc->nb_vip6 = j;
sc->vip_prefix_length = cfg->vip_prefix_len == 0 ? 64 : cfg->vip_prefix_len;
}
#endif /* INET6 */
return 0;
@ -299,7 +333,7 @@ ff_mbuf_get(void *p, void *m, void *data, uint16_t len)
struct mbuf *mb = m_get(M_NOWAIT, MT_DATA);
if (mb == NULL) {
return NULL;
return NULL;
}
m_extadd(mb, data, len, ff_mbuf_ext_free, m, NULL, 0, EXT_DISPOSABLE);
@ -393,6 +427,52 @@ ff_veth_set_gateway(struct ff_veth_softc *sc)
(struct sockaddr *)&nm, RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
}
static int
ff_veth_setvaddr(struct ff_veth_softc *sc, struct ff_port_cfg *cfg)
{
struct in_aliasreq req;
bzero(&req, sizeof req);
if (cfg->vip_ifname) {
strlcpy(req.ifra_name, cfg->vip_ifname, IFNAMSIZ);
} else {
strlcpy(req.ifra_name, sc->ifp->if_dname, IFNAMSIZ);
}
struct sockaddr_in sa;
bzero(&sa, sizeof(sa));
sa.sin_len = sizeof(sa);
sa.sin_family = AF_INET;
int i, ret;
struct socket *so = NULL;
socreate(AF_INET, &so, SOCK_DGRAM, 0, curthread->td_ucred, curthread);
for (i = 0; i < sc->nb_vip; ++i) {
sa.sin_addr.s_addr = sc->vip[i];
bcopy(&sa, &req.ifra_addr, sizeof(sa));
// Only support '255.255.255.255' netmask now
sa.sin_addr.s_addr = 0xFFFFFFFF;
bcopy(&sa, &req.ifra_mask, sizeof(sa));
// Only support 'x.x.x.255' broadaddr now
sa.sin_addr.s_addr = sc->vip[i] | 0xFF000000;
bcopy(&sa, &req.ifra_broadaddr, sizeof(sa));
ret = ifioctl(so, SIOCAIFADDR, (caddr_t)&req, curthread);
if (ret < 0) {
printf("ff_veth_setvaddr ifioctl SIOCAIFADDR error\n");
goto done;
}
}
done:
sofree(so);
return ret;
}
#ifdef INET6
static int
ff_veth_setaddr6(struct ff_veth_softc *sc)
@ -410,7 +490,8 @@ ff_veth_setaddr6(struct ff_veth_softc *sc)
uint8_t mask_size_mod = sc->prefix_length % 8;
if (mask_size_mod)
{
ifr6.ifra_prefixmask.sin6_addr.__u6_addr.__u6_addr8[sc->prefix_length / 8] = ((1 << mask_size_mod) - 1) << (8 - mask_size_mod);
ifr6.ifra_prefixmask.sin6_addr.__u6_addr.__u6_addr8[sc->prefix_length / 8] = \
((1 << mask_size_mod) - 1) << (8 - mask_size_mod);
}
ifr6.ifra_lifetime.ia6t_pltime = ifr6.ifra_lifetime.ia6t_vltime = ND6_INFINITE_LIFETIME;
@ -442,6 +523,52 @@ ff_veth_set_gateway6(struct ff_veth_softc *sc)
return rtrequest_fib(RTM_ADD, (struct sockaddr *)&dst, (struct sockaddr *)&gw,
(struct sockaddr *)&nm, RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
}
static int
ff_veth_setvaddr6(struct ff_veth_softc *sc, struct ff_port_cfg *cfg)
{
struct in6_aliasreq ifr6;
bzero(&ifr6, sizeof(ifr6));
if (cfg->vip_ifname) {
strlcpy(ifr6.ifra_name, cfg->vip_ifname, IFNAMSIZ);
} else {
strlcpy(ifr6.ifra_name, sc->ifp->if_dname, IFNAMSIZ);
}
ifr6.ifra_addr.sin6_len = sizeof ifr6.ifra_addr;
ifr6.ifra_addr.sin6_family = AF_INET6;
ifr6.ifra_prefixmask.sin6_len = sizeof ifr6.ifra_prefixmask;
memset(&ifr6.ifra_prefixmask.sin6_addr, 0xff, sc->prefix_length / 8);
uint8_t mask_size_mod = sc->prefix_length % 8;
if (mask_size_mod)
{
ifr6.ifra_prefixmask.sin6_addr.__u6_addr.__u6_addr8[sc->prefix_length / 8] = \
((1 << mask_size_mod) - 1) << (8 - mask_size_mod);
}
ifr6.ifra_lifetime.ia6t_pltime = ifr6.ifra_lifetime.ia6t_vltime = ND6_INFINITE_LIFETIME;
struct socket *so = NULL;
socreate(AF_INET6, &so, SOCK_DGRAM, 0, curthread->td_ucred, curthread);
int i, ret;
for (i = 0; i < sc->nb_vip6; ++i) {
ifr6.ifra_addr.sin6_addr = sc->vip6[i];
ret = ifioctl(so, SIOCAIFADDR_IN6, (caddr_t)&ifr6, curthread);
if (ret < 0) {
printf("ff_veth_setvaddr6 ifioctl SIOCAIFADDR error\n");
goto done;
}
}
done:
sofree(so);
return ret;
}
#endif /* INET6 */
static int
@ -485,7 +612,7 @@ ff_veth_setup_interface(struct ff_veth_softc *sc, struct ff_port_cfg *cfg)
return -1;
}
//set ip
// Set IP
int ret = ff_veth_setaddr(sc);
if (ret != 0) {
printf("ff_veth_setaddr failed\n");
@ -495,6 +622,10 @@ ff_veth_setup_interface(struct ff_veth_softc *sc, struct ff_port_cfg *cfg)
printf("ff_veth_set_gateway failed\n");
}
if (sc->nb_vip) {
ret = ff_veth_setvaddr(sc, cfg);
}
#ifdef INET6
// Set IPv6
if (cfg->addr6_str) {
@ -510,6 +641,10 @@ ff_veth_setup_interface(struct ff_veth_softc *sc, struct ff_port_cfg *cfg)
}
}
}
if (sc->nb_vip6) {
ret = ff_veth_setvaddr6(sc, cfg);
}
#endif /* INET6 */
return (0);
@ -528,7 +663,11 @@ ff_veth_attach(struct ff_port_cfg *cfg)
}
memset(sc, 0, sizeof(struct ff_veth_softc));
snprintf(sc->host_ifname, sizeof(sc->host_ifname), ff_IF_NAME, cfg->port_id);
if(cfg->ifname){
snprintf(sc->host_ifname, sizeof(sc->host_ifname), "%s", cfg->ifname);
} else {
snprintf(sc->host_ifname, sizeof(sc->host_ifname), ff_IF_NAME, cfg->port_id);
}
error = ff_veth_config(sc, cfg);
if (0 != error) {
@ -573,7 +712,7 @@ ff_veth_softc_to_hostc(void *softc)
/********************
* get next mbuf's addr, current mbuf's data and datalen.
*
*
********************/
int ff_next_mbuf(void **mbuf_bsd, void **data, unsigned *len)
{
@ -605,7 +744,7 @@ void* ff_rte_frm_extcl(void* mbuf)
bsd_mbuf->m_ext.ext_type == EXT_DISPOSABLE && bsd_mbuf->m_ext.ext_free == ff_mbuf_ext_free ) {
return bsd_mbuf->m_ext.ext_arg1;
}
else
else
return NULL;
}